diff --git a/.hlint.yaml b/.hlint.yaml index 176ac6ab4..939564cf0 100644 --- a/.hlint.yaml +++ b/.hlint.yaml @@ -36,6 +36,10 @@ - ignore: {name: "Redundant =="} - ignore: {name: "Hoist not"} - ignore: {name: "Use /="} +- ignore: {name: "Use unless"} +- ignore: {name: "Use notElem"} +- ignore: {name: "Use elem"} +- ignore: {name: "Use infix"} # Specify additional command line arguments # diff --git a/bench/macro/lsm-tree-bench-bloomfilter.hs b/bench/macro/lsm-tree-bench-bloomfilter.hs index 300a625e8..368c19d4d 100644 --- a/bench/macro/lsm-tree-bench-bloomfilter.hs +++ b/bench/macro/lsm-tree-bench-bloomfilter.hs @@ -9,10 +9,8 @@ import Control.Monad import Control.Monad.ST import Control.Monad.ST.Unsafe import Data.Bits ((.&.)) -import Data.BloomFilter (Bloom) -import qualified Data.BloomFilter as Bloom -import qualified Data.BloomFilter.Hash as Bloom -import qualified Data.BloomFilter.Mutable as MBloom +import Data.BloomFilter.Blocked (Bloom, BloomSize) +import qualified Data.BloomFilter.Blocked as Bloom import Data.Time import Data.Vector (Vector) import qualified Data.Vector as V @@ -28,7 +26,6 @@ import Text.Printf (printf) import Database.LSMTree.Extras.Orphans () import Database.LSMTree.Internal.Assertions (fromIntegralChecked) import qualified Database.LSMTree.Internal.BloomFilterQuery1 as Bloom1 -import Database.LSMTree.Internal.RunAcc (numHashFunctions) import Database.LSMTree.Internal.Serialise (SerialisedKey, serialiseKey) @@ -60,7 +57,7 @@ benchmarkNumLookups = 25_000_000 benchmarkBatchSize :: Int benchmarkBatchSize = 256 -benchmarkNumBitsPerEntry :: Integer +benchmarkNumBitsPerEntry :: RequestedBitsPerEntry benchmarkNumBitsPerEntry = 10 benchmarks :: IO () @@ -76,7 +73,7 @@ benchmarks = do let filterSizes = lsmStyleBloomFilters benchmarkSizeBase benchmarkNumBitsPerEntry putStrLn "Bloom filter stats:" - putStrLn "(numEntries, sizeFactor, numBits, numHashFuncs)" + putStrLn "(numEntries, sizeFactor, BloomSize { sizeBits, sizeHashes })" mapM_ print filterSizes putStrLn $ "total number of entries:\t " ++ show (totalNumEntries filterSizes) putStrLn $ "total filter size in bytes:\t " ++ show (totalNumBytes filterSizes) @@ -94,19 +91,19 @@ benchmarks = do putStrLn "" hashcost <- - benchmark "makeCheapHashes" + benchmark "makeHashes" "(This baseline is the cost of computing and hashing the keys)" (benchInBatches benchmarkBatchSize rng0 - (benchMakeCheapHashes vbs)) + (benchMakeHashes vbs)) (fromIntegralChecked benchmarkNumLookups) (0, 0) 289 _ <- - benchmark "elemCheapHashes" + benchmark "elemHashes" "(this is the simple one-by-one lookup, less the cost of computing and hashing the keys)" (benchInBatches benchmarkBatchSize rng0 - (benchElemCheapHashes vbs)) + (benchElemHashes vbs)) (fromIntegralChecked benchmarkNumLookups) hashcost 0 @@ -180,10 +177,10 @@ benchmark name description action n (subtractTime, subtractAlloc) expectedAlloc putStrLn "" return (timeNet, allocNet) --- | (numEntries, sizeFactor, numBits, numHashFuncs) -type BloomFilterSizeInfo = (Integer, Integer, Integer, Integer) +-- | (numEntries, sizeFactor, (BloomSize numBits numHashFuncs)) +type BloomFilterSizeInfo = (Integer, Integer, BloomSize) type SizeBase = Int -type RequestedBitsPerEntry = Integer +type RequestedBitsPerEntry = Double -- | Calculate the sizes of a realistic LSM style set of Bloom filters, one -- for each LSM run. This uses base 4, with 4 disk levels, using tiering @@ -194,28 +191,29 @@ type RequestedBitsPerEntry = Integer -- lsmStyleBloomFilters :: SizeBase -> RequestedBitsPerEntry -> [BloomFilterSizeInfo] lsmStyleBloomFilters l1 requestedBitsPerEntry = - [ (numEntries, sizeFactor, nbits, nhashes) + [ (numEntries, sizeFactor, bsize) | (numEntries, sizeFactor) <- replicate 8 (2^(l1+0), 1) -- 8 runs at level 1 (tiering) ++ replicate 8 (2^(l1+2), 4) -- 8 runs at level 2 (tiering) ++ replicate 8 (2^(l1+4),16) -- 8 runs at level 3 (tiering) ++ [(2^(l1+8),256)] -- 1 run at level 4 (leveling) - , let nbits = numEntries * requestedBitsPerEntry - nhashes = numHashFunctions nbits numEntries + , let bsize = Bloom.sizeForBits requestedBitsPerEntry (fromIntegral numEntries) ] totalNumEntries, totalNumBytes :: [BloomFilterSizeInfo] -> Integer totalNumEntries filterSizes = - sum [ numEntries | (numEntries, _, _, _) <- filterSizes ] + sum [ numEntries | (numEntries, _, _) <- filterSizes ] totalNumBytes filterSizes = - sum [ nbits | (_,_,nbits,_) <- filterSizes ] `div` 8 + sum [ toInteger (Bloom.sizeBits bsize) + | (_,_,bsize) <- filterSizes ] + `div` 8 totalNumEntriesSanityCheck :: SizeBase -> [BloomFilterSizeInfo] -> Bool totalNumEntriesSanityCheck l1 filterSizes = totalNumEntries filterSizes == - sum [ 2^l1 * sizeFactor | (_, sizeFactor, _, _) <- filterSizes ] + sum [ 2^l1 * sizeFactor | (_, sizeFactor, _) <- filterSizes ] -- | Input environment for benchmarking 'Bloom.elemMany'. @@ -240,9 +238,7 @@ elemManyEnv :: [BloomFilterSizeInfo] elemManyEnv filterSizes rng0 = stToIO $ do -- create the filters - mbs <- sequence - [ MBloom.new (fromIntegralChecked numHashFuncs) (fromIntegralChecked numBits) - | (_, _, numBits, numHashFuncs) <- filterSizes ] + mbs <- sequence [ Bloom.new bsize | (_, _, bsize) <- filterSizes ] -- add elements foldM_ (\rng (i, mb) -> do @@ -251,13 +247,13 @@ elemManyEnv filterSizes rng0 = -- insert n elements into filter b let k :: Word256 (!k, !rng') = uniform rng - MBloom.insert mb (serialiseKey k) + Bloom.insert mb (serialiseKey k) return rng' ) rng0 (zip [0 .. totalNumEntries filterSizes - 1] (cycle [ mb' - | (mb, (_, sizeFactor, _, _)) <- zip mbs filterSizes + | (mb, (_, sizeFactor, _)) <- zip mbs filterSizes , mb' <- replicate (fromIntegralChecked sizeFactor) mb ])) V.fromList <$> mapM Bloom.unsafeFreeze mbs @@ -280,21 +276,21 @@ benchInBatches !b !rng0 !action = -- | This gives us a combined cost of calculating the series of keys and their -- hashes (when used with 'benchInBatches'). -benchMakeCheapHashes :: Vector (Bloom SerialisedKey) -> BatchBench -benchMakeCheapHashes !_bs !ks = - let khs :: VP.Vector (Bloom.CheapHashes SerialisedKey) - !khs = V.convert (V.map Bloom.makeHashes ks) +benchMakeHashes :: Vector (Bloom SerialisedKey) -> BatchBench +benchMakeHashes !_bs !ks = + let khs :: VP.Vector (Bloom.Hashes SerialisedKey) + !khs = V.convert (V.map Bloom.hashes ks) in khs `seq` () -- | This gives us a combined cost of calculating the series of keys, their --- hashes, and then using 'Bloom.elemCheapHashes' with each filter (when used +-- hashes, and then using 'Bloom.elemHashes' with each filter (when used -- with 'benchInBatches'). -benchElemCheapHashes :: Vector (Bloom SerialisedKey) -> BatchBench -benchElemCheapHashes !bs !ks = - let khs :: VP.Vector (Bloom.CheapHashes SerialisedKey) - !khs = V.convert (V.map Bloom.makeHashes ks) +benchElemHashes :: Vector (Bloom SerialisedKey) -> BatchBench +benchElemHashes !bs !ks = + let khs :: VP.Vector (Bloom.Hashes SerialisedKey) + !khs = V.convert (V.map Bloom.hashes ks) in V.foldl' (\_ b -> VP.foldl' - (\_ kh -> Bloom.elemHashes kh b `seq` ()) + (\_ kh -> Bloom.elemHashes b kh `seq` ()) () khs) () bs diff --git a/bench/macro/lsm-tree-bench-lookups.hs b/bench/macro/lsm-tree-bench-lookups.hs index 5edebc588..457d5246b 100644 --- a/bench/macro/lsm-tree-bench-lookups.hs +++ b/bench/macro/lsm-tree-bench-lookups.hs @@ -9,9 +9,8 @@ import Control.Monad.Primitive import Control.Monad.ST.Strict (ST, runST) import Control.RefCount import Data.Bits ((.&.)) -import Data.BloomFilter (Bloom) -import qualified Data.BloomFilter as Bloom -import qualified Data.BloomFilter.Internal as Bloom +import Data.BloomFilter.Blocked (Bloom) +import qualified Data.BloomFilter.Blocked as Bloom import Data.Time import qualified Data.Vector as V import Data.Vector.Algorithms.Merge as Merge @@ -167,14 +166,16 @@ benchmarks !caching = withFS $ \hfs hbio -> do traceMarkerIO "Computing statistics for generated runs" let numEntries = V.map Run.size runs numPages = V.map Run.sizeInPages runs - nhashes = V.map Bloom.hashesN blooms + nhashes = V.map (Bloom.sizeHashes . Bloom.size) blooms bitsPerEntry = V.zipWith - (\b (NumEntries n) -> fromIntegral (Bloom.length b) / fromIntegral n :: Double) + (\b (NumEntries n) -> + fromIntegral (Bloom.sizeBits (Bloom.size b)) + / fromIntegral n :: Double) blooms numEntries stats = V.zip4 numEntries numPages nhashes bitsPerEntry putStrLn "Actual stats for generated runs:" - putStrLn "(numEntries, numPages, hashesN, bits per entry)" + putStrLn "(numEntries, numPages, numHashes, bits per entry)" mapM_ print stats _ <- putStr "Pausing. Drop caches now! When ready, press enter." >> getLine diff --git a/bench/micro/Bench/Database/LSMTree/Internal/BloomFilter.hs b/bench/micro/Bench/Database/LSMTree/Internal/BloomFilter.hs index a854fe41a..00cafd08f 100644 --- a/bench/micro/Bench/Database/LSMTree/Internal/BloomFilter.hs +++ b/bench/micro/Bench/Database/LSMTree/Internal/BloomFilter.hs @@ -9,9 +9,9 @@ module Bench.Database.LSMTree.Internal.BloomFilter ( ) where import Criterion.Main +import qualified Data.Bifoldable as BiFold import Data.BloomFilter (Bloom) import qualified Data.BloomFilter as Bloom -import qualified Data.BloomFilter.Easy as Bloom.Easy import Data.BloomFilter.Hash (Hashable) import qualified Data.Foldable as Fold import Data.Map.Strict (Map) @@ -38,8 +38,11 @@ benchmarks = bgroup "Bench.Database.LSMTree.Internal.BloomFilter" [ ] , env (constructionEnv 2_500_000) $ \ m -> bgroup "construction" [ - bench "easyList 0.1" $ whnf (constructBloom Bloom.Easy.easyList 0.1) m - , bench "easyList 0.9" $ whnf (constructBloom Bloom.Easy.easyList 0.9) m + bench "FPR = 0.1" $ + whnf (constructBloom 0.1) m + + , bench "FPR = 0.9" $ + whnf (constructBloom 0.9) m ] ] @@ -57,7 +60,9 @@ elemEnv fpr nbloom nelemsPositive nelemsNegative = do $ uniformWithoutReplacement @UTxOKey stdgen (nbloom + nelemsNegative) ys2 = sampleUniformWithReplacement @UTxOKey stdgen' nelemsPositive xs zs <- generate $ shuffle (ys1 ++ ys2) - pure (Bloom.Easy.easyList fpr (fmap serialiseKey xs), fmap serialiseKey zs) + pure ( Bloom.fromList (Bloom.policyForFPR fpr) (fmap serialiseKey xs) + , fmap serialiseKey zs + ) -- | Used for benchmarking 'Bloom.elem'. elems :: Hashable a => Bloom a -> [a] -> () @@ -74,8 +79,11 @@ constructionEnv n = do -- | Used for benchmarking the construction of bloom filters from write buffers. constructBloom :: - (Double -> [SerialisedKey] -> Bloom SerialisedKey) - -> Double + Double -> Map SerialisedKey SerialisedKey -> Bloom SerialisedKey -constructBloom mkBloom fpr m = mkBloom fpr (Map.keys m) +constructBloom fpr m = + -- For faster construction, avoid going via lists and use Bloom.create, + -- traversing the map inserting the keys + Bloom.create (Bloom.sizeForFPR fpr (Map.size m)) $ \b -> + BiFold.bifoldMap (\k -> Bloom.insert b k) (\_v -> pure ()) m diff --git a/bloomfilter/bench/bloomfilter-bench.hs b/bloomfilter/bench/bloomfilter-bench.hs new file mode 100644 index 000000000..5a7265ed9 --- /dev/null +++ b/bloomfilter/bench/bloomfilter-bench.hs @@ -0,0 +1,58 @@ +module Main where + +import qualified Data.BloomFilter.Blocked as B.Blocked +import qualified Data.BloomFilter.Classic as B.Classic +import Data.BloomFilter.Hash (Hashable (..), hash64) + +import Data.Word (Word64) +import System.Random + +import Criterion.Main + +main :: IO () +main = + defaultMain [ + bgroup "Data.BloomFilter.Classic" [ + env newStdGen $ \g0 -> + bench "construct m=1e6 fpr=1%" $ + whnf (constructBloom_classic 1_000_000 0.01) g0 + + , env newStdGen $ \g0 -> + bench "construct m=1e6 fpr=0.1%" $ + whnf (constructBloom_classic 1_000_000 0.001) g0 + + , env newStdGen $ \g0 -> + bench "construct m=1e7 fpr=0.1%" $ + whnf (constructBloom_classic 10_000_000 0.001) g0 + ] + , bgroup "Data.BloomFilter.Blocked" [ + env newStdGen $ \g0 -> + bench "construct m=1e6 fpr=1%" $ + whnf (constructBloom_blocked 1_000_000 0.01) g0 + + , env newStdGen $ \g0 -> + bench "construct m=1e6 fpr=0.1%" $ + whnf (constructBloom_blocked 1_000_000 0.001) g0 + + , env newStdGen $ \g0 -> + bench "construct m=1e7 fpr=0.1%" $ + whnf (constructBloom_blocked 10_000_000 0.001) g0 + ] + ] + +constructBloom_classic :: Int -> Double -> StdGen -> B.Classic.Bloom Word64 +constructBloom_classic n fpr g0 = + B.Classic.unfold (B.Classic.sizeForFPR fpr n) (nextElement n) (g0, 0) + +constructBloom_blocked :: Int -> Double -> StdGen -> B.Blocked.Bloom Word64 +constructBloom_blocked n fpr g0 = + B.Blocked.unfold (B.Blocked.sizeForFPR fpr n) (nextElement n) (g0, 0) + +{-# INLINE nextElement #-} +nextElement :: Int -> (StdGen, Int) -> Maybe (Word64, (StdGen, Int)) +nextElement !n (!g, !i) + | i >= n = Nothing + | otherwise = Just (x, (g', i+1)) + where + (!x, !g') = uniform g + diff --git a/bloomfilter/examples/Words.hs b/bloomfilter/examples/Words.hs deleted file mode 100644 index 5dc385029..000000000 --- a/bloomfilter/examples/Words.hs +++ /dev/null @@ -1,41 +0,0 @@ --- This program is intended for performance analysis. It simply --- builds a Bloom filter from a list of words, one per line, and --- queries it exhaustively. - -module Main (main) where - -import Control.Monad (forM_, mapM_) -import qualified Data.BloomFilter as BF -import Data.BloomFilter.Easy (easyList, suggestSizing) -import Data.BloomFilter.Hash (cheapHashes) -import qualified Data.ByteString.Lazy.Char8 as B -import Data.Time.Clock (diffUTCTime, getCurrentTime) -import System.Environment (getArgs) - -conservative, aggressive :: Double -> [B.ByteString] -> BF.Bloom B.ByteString -conservative = easyList - -aggressive fpr xs - = let (size, numHashes) = suggestSizing (length xs) fpr - k = 3 - in BF.fromList (cheapHashes (numHashes - k)) (size * k) xs - -testFunction = conservative - -main = do - args <- getArgs - let files | null args = ["/usr/share/dict/words"] - | otherwise = args - forM_ files $ \file -> do - a <- getCurrentTime - words <- B.lines `fmap` B.readFile file - putStrLn $ {-# SCC "words/length" #-} (show (length words) ++ " words") - b <- getCurrentTime - putStrLn $ show (diffUTCTime b a) ++ "s to count words" - let filt = {-# SCC "construct" #-} testFunction 0.01 words - print filt - c <- getCurrentTime - putStrLn $ show (diffUTCTime c b) ++ "s to construct filter" - {-# SCC "query" #-} mapM_ print $ filter (not . (`BF.elem` filt)) words - d <- getCurrentTime - putStrLn $ show (diffUTCTime d c) ++ "s to query every element" diff --git a/bloomfilter/examples/spell.hs b/bloomfilter/examples/spell.hs index e6816b92d..a1cf356a4 100644 --- a/bloomfilter/examples/spell.hs +++ b/bloomfilter/examples/spell.hs @@ -1,22 +1,16 @@ {-# LANGUAGE BangPatterns #-} module Main (main) where -import Control.Exception (IOException, catch) import Control.Monad (forM_, when) -import Data.Char (isLetter, toLower) import System.Environment (getArgs) -import Data.BloomFilter.Easy (easyList, notElem) -import Prelude hiding (notElem) +import qualified Data.BloomFilter as B main :: IO () main = do files <- getArgs - dictionary <- readFile "/usr/share/dict/words" `catchIO` \_ -> return "yes no" - let !bloom = easyList 0.01 (words dictionary) - forM_ files $ \file -> do - ws <- words <$> readFile file - forM_ ws $ \w -> when (w `notElem` bloom) $ putStrLn w - -catchIO :: IO a -> (IOException -> IO a) -> IO a -catchIO = catch + dictionary <- readFile "/usr/share/dict/words" + let !bloom = B.fromList (B.policyForFPR 0.01) (words dictionary) + forM_ files $ \file -> + putStrLn . unlines . filter (`B.notElem` bloom) . words + =<< readFile file diff --git a/bloomfilter/fpr.blocked.gnuplot.data b/bloomfilter/fpr.blocked.gnuplot.data new file mode 100644 index 000000000..07dae151f --- /dev/null +++ b/bloomfilter/fpr.blocked.gnuplot.data @@ -0,0 +1,999 @@ +2.0 0.39201843320666596 0.3880831046648373 +2.0 0.39201843320666596 0.3825950607604861 +2.0 0.39201843320666596 0.3782830262642101 +2.0 0.39201843320666596 0.3986671893375147 +2.0 0.39201843320666596 0.39984319874559 +2.0 0.39201843320666596 0.40493923951391614 +2.0 0.39201843320666596 0.4057232457859663 +2.0 0.39201843320666596 0.39317914543316346 +2.0 0.39201843320666596 0.3884751078008624 +2.2 0.3545234090532114 0.31903580290677064 +2.2 0.3545234090532114 0.3310882665721375 +2.2 0.3545234090532114 0.33286068769939736 +2.2 0.3545234090532114 0.3317972350230415 +2.2 0.3545234090532114 0.33073378234668555 +2.2 0.3545234090532114 0.3555476781283233 +2.2 0.3545234090532114 0.3456221198156682 +2.2 0.3545234090532114 0.34207727756114853 +2.2 0.3545234090532114 0.3342786246012052 +2.4000000000000004 0.32074266536538576 0.31173829377806284 +2.4000000000000004 0.32074266536538576 0.31462475946119306 +2.4000000000000004 0.32074266536538576 0.32200128287363694 +2.4000000000000004 0.32074266536538576 0.3008338678640154 +2.4000000000000004 0.32074266536538576 0.3114175753688262 +2.4000000000000004 0.32074266536538576 0.3296985246953175 +2.4000000000000004 0.32074266536538576 0.3024374599101988 +2.4000000000000004 0.32074266536538576 0.3236048749198204 +2.4000000000000004 0.32074266536538576 0.3290570878768441 +2.6000000000000005 0.29029659365451377 0.2783744557329463 +2.6000000000000005 0.29029659365451377 0.2780841799709724 +2.6000000000000005 0.29029659365451377 0.2708272859216255 +2.6000000000000005 0.29029659365451377 0.2763425253991292 +2.6000000000000005 0.29029659365451377 0.27169811320754716 +2.6000000000000005 0.29029659365451377 0.2879535558780842 +2.6000000000000005 0.29029659365451377 0.2841799709724238 +2.6000000000000005 0.29029659365451377 0.28592162554426703 +2.6000000000000005 0.29029659365451377 0.2896952104499274 +2.8000000000000007 0.262845493076701 0.2578186596583443 +2.8000000000000007 0.262845493076701 0.24362680683311433 +2.8000000000000007 0.262845493076701 0.25098554533508544 +2.8000000000000007 0.262845493076701 0.26701708278580816 +2.8000000000000007 0.262845493076701 0.25624178712220763 +2.8000000000000007 0.262845493076701 0.2507227332457293 +2.8000000000000007 0.262845493076701 0.2614980289093298 +2.8000000000000007 0.262845493076701 0.2604467805519054 +2.8000000000000007 0.262845493076701 0.2680683311432326 +3.000000000000001 0.23808526530534094 0.22833333333333333 +3.000000000000001 0.23808526530534094 0.2311904761904762 +3.000000000000001 0.23808526530534094 0.235 +3.000000000000001 0.23808526530534094 0.23333333333333334 +3.000000000000001 0.23808526530534094 0.22333333333333333 +3.000000000000001 0.23808526530534094 0.23166666666666666 +3.000000000000001 0.23808526530534094 0.22976190476190475 +3.000000000000001 0.23808526530534094 0.2361904761904762 +3.000000000000001 0.23808526530534094 0.24928571428571428 +3.200000000000001 0.21574358502064847 0.21963322545846817 +3.200000000000001 0.21574358502064847 0.2127292340884574 +3.200000000000001 0.21574358502064847 0.2168284789644013 +3.200000000000001 0.21574358502064847 0.22071197411003235 +3.200000000000001 0.21574358502064847 0.2161812297734628 +3.200000000000001 0.21574358502064847 0.21877022653721684 +3.200000000000001 0.21574358502064847 0.21725997842502698 +3.200000000000001 0.21574358502064847 0.21121898597626754 +3.200000000000001 0.21574358502064847 0.23106796116504855 +3.4000000000000012 0.19557649229605542 0.20457656952865244 +3.4000000000000012 0.19557649229605542 0.1875611187169959 +3.4000000000000012 0.19557649229605542 0.21181302562096616 +3.4000000000000012 0.19557649229605542 0.19812243301388618 +3.4000000000000012 0.19557649229605542 0.19264619597105417 +3.4000000000000012 0.19557649229605542 0.19968707216898102 +3.4000000000000012 0.19557649229605542 0.19362409544298845 +3.4000000000000012 0.19557649229605542 0.1985135928026599 +3.4000000000000012 0.19557649229605542 0.19597105417563074 +3.6000000000000014 0.1773653593556774 0.18109258602341255 +3.6000000000000014 0.1773653593556774 0.18268889677190492 +3.6000000000000014 0.1773653593556774 0.18570415040794608 +3.6000000000000014 0.1773653593556774 0.1777225966654842 +3.6000000000000014 0.1773653593556774 0.17630365377793544 +3.6000000000000014 0.1773653593556774 0.1727562965590635 +3.6000000000000014 0.1773653593556774 0.1784320681092586 +3.6000000000000014 0.1773653593556774 0.17949627527492018 +3.6000000000000014 0.1773653593556774 0.17878680383114579 +3.8000000000000016 0.1609141896431677 0.15561635017701964 +3.8000000000000016 0.1609141896431677 0.15561635017701964 +3.8000000000000016 0.1609141896431677 0.16124879304795622 +3.8000000000000016 0.1609141896431677 0.15770840038622466 +3.8000000000000016 0.1609141896431677 0.16124879304795622 +3.8000000000000016 0.1609141896431677 0.16253620856131315 +3.8000000000000016 0.1609141896431677 0.16076601223044737 +3.8000000000000016 0.1609141896431677 0.153363373028645 +3.8000000000000016 0.1609141896431677 0.16929514000643708 +4.000000000000002 0.1460472119673121 0.14575726595589308 +4.000000000000002 0.1460472119673121 0.1421060318387615 +4.000000000000002 0.1460472119673121 0.1444428216737257 +4.000000000000002 0.1460472119673121 0.14838615452022783 +4.000000000000002 0.1460472119673121 0.14590331532057835 +4.000000000000002 0.1460472119673121 0.15130714181393312 +4.000000000000002 0.1460472119673121 0.14707171023806045 +4.000000000000002 0.1460472119673121 0.13801664962757412 +4.000000000000002 0.1460472119673121 0.15159924054330365 +4.200000000000002 0.13260673675027865 0.13168014852141627 +4.200000000000002 0.13260673675027865 0.13128232329929718 +4.200000000000002 0.13260673675027865 0.13406709985413076 +4.200000000000002 0.13260673675027865 0.13300623259514655 +4.200000000000002 0.13260673675027865 0.13592361755735313 +4.200000000000002 0.13260673675027865 0.13300623259514655 +4.200000000000002 0.13260673675027865 0.13446492507624982 +4.200000000000002 0.13260673675027865 0.1299562392255669 +4.200000000000002 0.13260673675027865 0.13353666622463864 +4.400000000000002 0.12045124516777768 0.11780293905083113 +4.400000000000002 0.12045124516777768 0.115152975186702 +4.400000000000002 0.12045124516777768 0.12238014936159962 +4.400000000000002 0.12045124516777768 0.12370513129366417 +4.400000000000002 0.12045124516777768 0.12430739580823898 +4.400000000000002 0.12045124516777768 0.12141652613827993 +4.400000000000002 0.12045124516777768 0.1187665622741508 +4.400000000000002 0.12045124516777768 0.11443025776921224 +4.400000000000002 0.12045124516777768 0.1257528306432185 +4.600000000000002 0.10945368529563608 0.10245183887915937 +4.600000000000002 0.10945368529563608 0.10956654991243432 +4.600000000000002 0.10945368529563608 0.11055166374781086 +4.600000000000002 0.10945368529563608 0.11241243432574431 +4.600000000000002 0.10945368529563608 0.11427320490367776 +4.600000000000002 0.10945368529563608 0.10825306479859895 +4.600000000000002 0.10945368529563608 0.1108800350262697 +4.600000000000002 0.10945368529563608 0.10573555166374782 +4.600000000000002 0.10945368529563608 0.12095008756567426 +4.8000000000000025 9.949995231639147e-2 0.10019900497512438 +4.8000000000000025 9.949995231639147e-2 9.412935323383084e-2 +4.8000000000000025 9.949995231639147e-2 0.10228855721393035 +4.8000000000000025 9.949995231639147e-2 0.10378109452736319 +4.8000000000000025 9.949995231639147e-2 0.1054726368159204 +4.8000000000000025 9.949995231639147e-2 0.10228855721393035 +4.8000000000000025 9.949995231639147e-2 0.10577114427860697 +4.8000000000000025 9.949995231639147e-2 9.681592039800994e-2 +4.8000000000000025 9.949995231639147e-2 0.103681592039801 +5.000000000000003 9.048753243817137e-2 8.994661116641028e-2 +5.000000000000003 9.048753243817137e-2 8.91322052302959e-2 +5.000000000000003 9.048753243817137e-2 8.958465297258167e-2 +5.000000000000003 9.048753243817137e-2 9.655234820378246e-2 +5.000000000000003 9.048753243817137e-2 9.419961994389647e-2 +5.000000000000003 9.048753243817137e-2 9.302325581395349e-2 +5.000000000000003 9.048753243817137e-2 9.727626459143969e-2 +5.000000000000003 9.048753243817137e-2 9.003710071486744e-2 +5.000000000000003 9.048753243817137e-2 9.727626459143969e-2 +5.200000000000003 8.232429247650848e-2 8.355972668148515e-2 +5.200000000000003 8.232429247650848e-2 8.10899810652836e-2 +5.200000000000003 8.232429247650848e-2 8.150160533465053e-2 +5.200000000000003 8.232429247650848e-2 8.792294393677451e-2 +5.200000000000003 8.232429247650848e-2 8.224252901951098e-2 +5.200000000000003 8.232429247650848e-2 8.150160533465053e-2 +5.200000000000003 8.232429247650848e-2 8.602947229768668e-2 +5.200000000000003 8.232429247650848e-2 8.767596937515436e-2 +5.200000000000003 8.232429247650848e-2 8.183090475014407e-2 +5.400000000000003 7.492739908322692e-2 7.934961786303012e-2 +5.400000000000003 7.492739908322692e-2 7.5827963434737e-2 +5.400000000000003 7.492739908322692e-2 7.62026075228533e-2 +5.400000000000003 7.492739908322692e-2 7.695189569908586e-2 +5.400000000000003 7.492739908322692e-2 7.747639742244868e-2 +5.400000000000003 7.492739908322692e-2 7.44043158998951e-2 +5.400000000000003 7.492739908322692e-2 7.717668215195564e-2 +5.400000000000003 7.492739908322692e-2 7.590289225236026e-2 +5.400000000000003 7.492739908322692e-2 7.425445826464859e-2 +5.600000000000003 6.82223534062247e-2 6.972301814708691e-2 +5.600000000000003 6.82223534062247e-2 6.719879929048983e-2 +5.600000000000003 6.82223534062247e-2 6.4538136171374e-2 +5.600000000000003 6.82223534062247e-2 6.719879929048983e-2 +5.600000000000003 6.82223534062247e-2 6.972301814708691e-2 +5.600000000000003 6.82223534062247e-2 6.815390912812117e-2 +5.600000000000003 6.82223534062247e-2 6.863146404693683e-2 +5.600000000000003 6.82223534062247e-2 7.0814572247237e-2 +5.600000000000003 6.82223534062247e-2 6.52203574839678e-2 +5.800000000000003 6.21421285572768e-2 6.232910763112105e-2 +5.800000000000003 6.21421285572768e-2 6.307481978622918e-2 +5.800000000000003 6.21421285572768e-2 6.152125279642058e-2 +5.800000000000003 6.21421285572768e-2 6.829480487198608e-2 +5.800000000000003 6.21421285572768e-2 6.232910763112105e-2 +5.800000000000003 6.21421285572768e-2 6.1086254039274174e-2 +5.800000000000003 6.21421285572768e-2 6.487695749440715e-2 +5.800000000000003 6.21421285572768e-2 6.717623663932389e-2 +5.800000000000003 6.21421285572768e-2 6.102411135968183e-2 +6.0000000000000036 5.66263986760163e-2 5.7701019252548134e-2 +6.0000000000000036 5.66263986760163e-2 5.492638731596829e-2 +6.0000000000000036 5.66263986760163e-2 5.65118912797282e-2 +6.0000000000000036 5.66263986760163e-2 5.9173272933182336e-2 +6.0000000000000036 5.66263986760163e-2 5.951302378255945e-2 +6.0000000000000036 5.66263986760163e-2 5.475651189127973e-2 +6.0000000000000036 5.66263986760163e-2 5.8776896942242356e-2 +6.0000000000000036 5.66263986760163e-2 6.098527746319366e-2 +6.0000000000000036 5.66263986760163e-2 5.311438278595697e-2 +6.200000000000004 5.162084962827559e-2 5.229196778856081e-2 +6.200000000000004 5.162084962827559e-2 5.14660334503407e-2 +6.200000000000004 5.162084962827559e-2 5.203386330786702e-2 +6.200000000000004 5.162084962827559e-2 5.1362791658063185e-2 +6.200000000000004 5.162084962827559e-2 5.280817674994838e-2 +6.200000000000004 5.162084962827559e-2 5.151765434647945e-2 +6.200000000000004 5.162084962827559e-2 5.662812306421639e-2 +6.200000000000004 5.162084962827559e-2 5.440842453024985e-2 +6.200000000000004 5.162084962827559e-2 5.244683047697708e-2 +6.400000000000004 4.7076562484715895e-2 4.891253177666886e-2 +6.400000000000004 4.7076562484715895e-2 4.67470106392995e-2 +6.400000000000004 4.7076562484715895e-2 4.792392430091329e-2 +6.400000000000004 4.7076562484715895e-2 4.7829771207984186e-2 +6.400000000000004 4.7076562484715895e-2 4.7971000847377834e-2 +6.400000000000004 4.7076562484715895e-2 4.599378589586668e-2 +6.400000000000004 4.7076562484715895e-2 4.9289144148385276e-2 +6.400000000000004 4.7076562484715895e-2 4.844176631202335e-2 +6.400000000000004 4.7076562484715895e-2 4.7547311929196874e-2 +6.600000000000004 4.294946190765297e-2 4.453893398617017e-2 +6.600000000000004 4.294946190765297e-2 4.488253231971825e-2 +6.600000000000004 4.294946190765297e-2 4.148949877593094e-2 +6.600000000000004 4.294946190765297e-2 4.372288794399347e-2 +6.600000000000004 4.294946190765297e-2 4.432418502770261e-2 +6.600000000000004 4.294946190765297e-2 4.25632435682687e-2 +6.600000000000004 4.294946190765297e-2 4.655757419576515e-2 +6.600000000000004 4.294946190765297e-2 4.518318086157282e-2 +6.600000000000004 4.294946190765297e-2 4.526908044495984e-2 +6.800000000000004 3.919982244476177e-2 3.9709917679341435e-2 +6.800000000000004 3.919982244476177e-2 3.947471579772638e-2 +6.800000000000004 3.919982244476177e-2 3.7671501372010974e-2 +6.800000000000004 3.919982244476177e-2 3.8729909839278716e-2 +6.800000000000004 3.919982244476177e-2 4.108192865542924e-2 +6.800000000000004 3.919982244476177e-2 4.049392395139161e-2 +6.800000000000004 3.919982244476177e-2 3.9670717365738926e-2 +6.800000000000004 3.919982244476177e-2 3.888671109368875e-2 +6.800000000000004 3.919982244476177e-2 3.90827126617013e-2 +7.000000000000004 3.5791826500754534e-2 3.7939797415798705e-2 +7.000000000000004 3.5791826500754534e-2 3.4575324814775044e-2 +7.000000000000004 3.5791826500754534e-2 3.4253194459357886e-2 +7.000000000000004 3.5791826500754534e-2 3.7975589677511724e-2 +7.000000000000004 3.5791826500754534e-2 3.5505923619313505e-2 +7.000000000000004 3.5791826500754534e-2 3.704499087297326e-2 +7.000000000000004 3.5791826500754534e-2 3.6830237302695155e-2 +7.000000000000004 3.5791826500754534e-2 3.6508106947278e-2 +7.000000000000004 3.5791826500754534e-2 3.543433909588747e-2 +7.200000000000005 3.269316844350807e-2 3.4361003040507404e-2 +7.200000000000005 3.269316844350807e-2 3.269362801190048e-2 +7.200000000000005 3.269316844350807e-2 3.0960865727269755e-2 +7.200000000000005 3.269316844350807e-2 3.226861084774577e-2 +7.200000000000005 3.269316844350807e-2 3.0993559355281655e-2 +7.200000000000005 3.269316844350807e-2 3.220322359172197e-2 +7.200000000000005 3.269316844350807e-2 3.4361003040507404e-2 +7.200000000000005 3.269316844350807e-2 3.494948834472161e-2 +7.200000000000005 3.269316844350807e-2 3.190898093961487e-2 +7.400000000000005 2.9874699909469064e-2 2.969557553849371e-2 +7.400000000000005 2.9874699909469064e-2 3.0741194395482927e-2 +7.400000000000005 2.9874699909469064e-2 2.984494966092074e-2 +7.400000000000005 2.9874699909469064e-2 3.0771069219968333e-2 +7.400000000000005 2.9874699909469064e-2 2.885908045290234e-2 +7.400000000000005 2.9874699909469064e-2 2.9635825889522897e-2 +7.400000000000005 2.9874699909469064e-2 3.008394825680399e-2 +7.400000000000005 2.9874699909469064e-2 3.139844053416186e-2 +7.400000000000005 2.9874699909469064e-2 3.065156992202671e-2 +7.600000000000005 2.7310111913322593e-2 2.733777583570024e-2 +7.600000000000005 2.7310111913322593e-2 2.782936421236618e-2 +7.600000000000005 2.7310111913322593e-2 2.701005025125628e-2 +7.600000000000005 2.7310111913322593e-2 2.7911295608477167e-2 +7.600000000000005 2.7310111913322593e-2 2.690080838977496e-2 +7.600000000000005 2.7310111913322593e-2 2.810246886606948e-2 +7.600000000000005 2.7310111913322593e-2 2.788398514310684e-2 +7.600000000000005 2.7310111913322593e-2 2.8621367708105746e-2 +7.600000000000005 2.7310111913322593e-2 2.7911295608477167e-2 +7.800000000000005 2.4975649846666052e-2 2.4576038362596468e-2 +7.800000000000005 2.4975649846666052e-2 2.5100526986188466e-2 +7.800000000000005 2.4975649846666052e-2 2.5874772097205224e-2 +7.800000000000005 2.4975649846666052e-2 2.5100526986188466e-2 +7.800000000000005 2.4975649846666052e-2 2.5175453932415895e-2 +7.800000000000005 2.4975649846666052e-2 2.547516171732561e-2 +7.800000000000005 2.4975649846666052e-2 2.6074577287145035e-2 +7.800000000000005 2.4975649846666052e-2 2.5375259122355703e-2 +7.800000000000005 2.4975649846666052e-2 2.5999650340917606e-2 +8.000000000000005 2.2849857876681937e-2 2.4197970935015082e-2 +8.000000000000005 2.2849857876681937e-2 2.415227127319258e-2 +8.000000000000005 2.2849857876681937e-2 2.3352527191298784e-2 +8.000000000000005 2.2849857876681937e-2 2.4106571611370076e-2 +8.000000000000005 2.2849857876681937e-2 2.4449319075038844e-2 +8.000000000000005 2.2849857876681937e-2 2.3489626176766292e-2 +8.000000000000005 2.2849857876681937e-2 2.472351704597386e-2 +8.000000000000005 2.2849857876681937e-2 2.3946622794991317e-2 +8.000000000000005 2.2849857876681937e-2 2.454071839868385e-2 +8.200000000000006 2.091334963463375e-2 1.9240421616195415e-2 +8.200000000000006 2.091334963463375e-2 2.082984774970721e-2 +8.200000000000006 2.091334963463375e-2 2.1143550276058222e-2 +8.200000000000006 2.091334963463375e-2 2.1310858290112096e-2 +8.200000000000006 2.091334963463375e-2 2.147816630416597e-2 +8.200000000000006 2.091334963463375e-2 2.1603647314706376e-2 +8.200000000000006 2.091334963463375e-2 2.254475489375941e-2 +8.200000000000006 2.091334963463375e-2 2.1101723272544753e-2 +8.200000000000006 2.091334963463375e-2 2.1708214823490043e-2 +8.400000000000006 1.9148602420807403e-2 1.8957164467763247e-2 +8.400000000000006 1.9148602420807403e-2 2.031671868716849e-2 +8.400000000000006 1.9148602420807403e-2 1.8995461769718322e-2 +8.400000000000006 1.9148602420807403e-2 1.9646515902954638e-2 +8.400000000000006 1.9148602420807403e-2 1.7903988663998623e-2 +8.400000000000006 1.9148602420807403e-2 1.9167799628516172e-2 +8.400000000000006 1.9148602420807403e-2 2.085288091453957e-2 +8.400000000000006 1.9148602420807403e-2 1.9799705110774946e-2 +8.400000000000006 1.9148602420807403e-2 1.98762997146851e-2 +8.600000000000005 1.7539772452040647e-2 1.7399540455685546e-2 +8.600000000000005 1.7539772452040647e-2 1.745216003367653e-2 +8.600000000000005 1.7539772452040647e-2 1.766263834564047e-2 +8.600000000000005 1.7539772452040647e-2 1.68733446757757e-2 +8.600000000000005 1.7539772452040647e-2 1.7101362847069968e-2 +8.600000000000005 1.7539772452040647e-2 1.7417080315015872e-2 +8.600000000000005 1.7539772452040647e-2 1.839931243751425e-2 +8.600000000000005 1.7539772452040647e-2 1.7610018767649484e-2 +8.600000000000005 1.7539772452040647e-2 1.8416852296844578e-2 +8.800000000000006 1.6072528944441514e-2 1.5815358899353885e-2 +8.800000000000006 1.6072528944441514e-2 1.604037416824713e-2 +8.800000000000006 1.6072528944441514e-2 1.6956507763026776e-2 +8.800000000000006 1.6072528944441514e-2 1.6265389437140378e-2 +8.800000000000006 1.6072528944441514e-2 1.6538622263653605e-2 +8.800000000000006 1.6072528944441514e-2 1.5574271111253977e-2 +8.800000000000006 1.6072528944441514e-2 1.6586839821273586e-2 +8.800000000000006 1.6072528944441514e-2 1.592786653380051e-2 +8.800000000000006 1.6072528944441514e-2 1.5783213860940563e-2 +9.000000000000007 1.4733905061014388e-2 1.4336019802271957e-2 +9.000000000000007 1.4733905061014388e-2 1.4984308467533998e-2 +9.000000000000007 1.4733905061014388e-2 1.4954840800931178e-2 +9.000000000000007 1.4733905061014388e-2 1.4365487468874777e-2 +9.000000000000007 1.4733905061014388e-2 1.4115012302750807e-2 +9.000000000000007 1.4733905061014388e-2 1.4615962634998748e-2 +9.000000000000007 1.4733905061014388e-2 1.4498091968587467e-2 +9.000000000000007 1.4733905061014388e-2 1.4365487468874777e-2 +9.000000000000007 1.4733905061014388e-2 1.5131646800548099e-2 +9.200000000000006 1.351216396499029e-2 1.3147404975205048e-2 +9.200000000000006 1.351216396499029e-2 1.3390625211128677e-2 +9.200000000000006 1.351216396499029e-2 1.3633845447052305e-2 +9.200000000000006 1.351216396499029e-2 1.295823368059778e-2 +9.200000000000006 1.351216396499029e-2 1.3282527328495953e-2 +9.200000000000006 1.351216396499029e-2 1.3823016741659573e-2 +9.200000000000006 1.351216396499029e-2 1.4268920507519559e-2 +9.200000000000006 1.351216396499029e-2 1.3823016741659573e-2 +9.200000000000006 1.351216396499029e-2 1.4066236977583201e-2 +9.400000000000006 1.23966784076142e-2 1.263217920587105e-2 +9.400000000000006 1.23966784076142e-2 1.3351184499237606e-2 +9.400000000000006 1.23966784076142e-2 1.2458626204023951e-2 +9.400000000000006 1.23966784076142e-2 1.2954491923587092e-2 +9.400000000000006 1.23966784076142e-2 1.211152020032975e-2 +9.400000000000006 1.23966784076142e-2 1.2880112065652621e-2 +9.400000000000006 1.23966784076142e-2 1.2768542278750914e-2 +9.400000000000006 1.23966784076142e-2 1.2260279916198694e-2 +9.400000000000006 1.23966784076142e-2 1.3239614712335899e-2 +9.600000000000007 1.1377822446563804e-2 1.1878484469222893e-2 +9.600000000000007 1.1377822446563804e-2 1.2344976675389691e-2 +9.600000000000007 1.1377822446563804e-2 1.1503015132552055e-2 +9.600000000000007 1.1377822446563804e-2 1.1218568665377175e-2 +9.600000000000007 1.1377822446563804e-2 1.1173057230629196e-2 +9.600000000000007 1.1377822446563804e-2 1.1173057230629196e-2 +9.600000000000007 1.1377822446563804e-2 1.1798839458413927e-2 +9.600000000000007 1.1377822446563804e-2 1.109341221982023e-2 +9.600000000000007 1.1377822446563804e-2 1.2015018773466833e-2 +9.800000000000008 1.0446874040350686e-2 1.1157309709366708e-2 +9.800000000000008 1.0446874040350686e-2 1.0864795971667954e-2 +9.800000000000008 1.0446874040350686e-2 1.0237980819456342e-2 +9.800000000000008 1.0446874040350686e-2 1.0540941476358623e-2 +9.800000000000008 1.0446874040350686e-2 1.0509600718748041e-2 +9.800000000000008 1.0446874040350686e-2 1.0405131526712772e-2 +9.800000000000008 1.0446874040350686e-2 1.1021499759720858e-2 +9.800000000000008 1.0446874040350686e-2 1.0540941476358623e-2 +9.800000000000008 1.0446874040350686e-2 1.064541066839389e-2 +10.000000000000007 9.595927397018434e-3 9.76864246576657e-3 +10.000000000000007 9.595927397018434e-3 9.624703726094174e-3 +10.000000000000007 9.595927397018434e-3 9.567128230225217e-3 +10.000000000000007 9.595927397018434e-3 9.845409793591848e-3 +10.000000000000007 9.595927397018434e-3 9.356018078705702e-3 +10.000000000000007 9.595927397018434e-3 9.077736515339071e-3 +10.000000000000007 9.595927397018434e-3 1.0162075020871117e-2 +10.000000000000007 9.595927397018434e-3 9.240867086967787e-3 +10.000000000000007 9.595927397018434e-3 9.979752617286083e-3 +10.200000000000006 8.817814074001557e-3 8.817797843166648e-3 +10.200000000000006 8.817814074001557e-3 8.914793619441481e-3 +10.200000000000006 8.817814074001557e-3 9.055878384932147e-3 +10.200000000000006 8.817814074001557e-3 9.04706058708898e-3 +10.200000000000006 8.817814074001557e-3 8.429814738067315e-3 +10.200000000000006 8.817814074001557e-3 8.650259684146481e-3 +10.200000000000006 8.817814074001557e-3 8.747255460421315e-3 +10.200000000000006 8.817814074001557e-3 8.78252665179398e-3 +10.200000000000006 8.817814074001557e-3 8.75607325826448e-3 +10.400000000000007 8.10603193174018e-3 7.976330401653629e-3 +10.400000000000007 8.10603193174018e-3 8.543752279820046e-3 +10.400000000000007 8.10603193174018e-3 8.024966562639322e-3 +10.400000000000007 8.10603193174018e-3 8.25193531390589e-3 +10.400000000000007 8.10603193174018e-3 7.943906294329834e-3 +10.400000000000007 8.10603193174018e-3 8.10602683094881e-3 +10.400000000000007 8.10603193174018e-3 8.527540226158148e-3 +10.400000000000007 8.10603193174018e-3 7.830421918696552e-3 +10.400000000000007 8.10603193174018e-3 8.203299152920196e-3 +10.600000000000009 7.45468113797498e-3 7.40249284351145e-3 +10.600000000000009 7.45468113797498e-3 7.290672709923664e-3 +10.600000000000009 7.45468113797498e-3 7.454675572519084e-3 +10.600000000000009 7.45468113797498e-3 7.477039599236642e-3 +10.600000000000009 7.45468113797498e-3 7.231035305343511e-3 +10.600000000000009 7.45468113797498e-3 7.186307251908397e-3 +10.600000000000009 7.45468113797498e-3 7.074487118320611e-3 +10.600000000000009 7.45468113797498e-3 7.573950381679389e-3 +10.600000000000009 7.45468113797498e-3 7.797590648854962e-3 +10.800000000000008 6.858406503829848e-3 6.721259756114289e-3 +10.800000000000008 6.858406503829848e-3 6.872145179210732e-3 +10.800000000000008 6.858406503829848e-3 6.673250757856329e-3 +10.800000000000008 6.858406503829848e-3 6.920154177468691e-3 +10.800000000000008 6.858406503829848e-3 6.542940619727583e-3 +10.800000000000008 6.858406503829848e-3 6.652675472888633e-3 +10.800000000000008 6.858406503829848e-3 6.789844039339945e-3 +10.800000000000008 6.858406503829848e-3 6.933871034113822e-3 +10.800000000000008 6.858406503829848e-3 7.0916148855328315e-3 +11.000000000000007 6.312345507942478e-3 6.381769978538064e-3 +11.000000000000007 6.312345507942478e-3 6.508016664562555e-3 +11.000000000000007 6.312345507942478e-3 6.451205655851534e-3 +11.000000000000007 6.312345507942478e-3 6.274460295417245e-3 +11.000000000000007 6.312345507942478e-3 6.268147961116021e-3 +11.000000000000007 6.312345507942478e-3 6.1040272692841815e-3 +11.000000000000007 6.312345507942478e-3 6.590077010478475e-3 +11.000000000000007 6.312345507942478e-3 6.19871228380255e-3 +11.000000000000007 6.312345507942478e-3 6.306021966923368e-3 +11.200000000000008 5.812081432014394e-3 5.939961058963703e-3 +11.200000000000008 5.812081432014394e-3 6.125948097991921e-3 +11.200000000000008 5.812081432014394e-3 5.602859550725059e-3 +11.200000000000008 5.812081432014394e-3 5.951585248902967e-3 +11.200000000000008 5.812081432014394e-3 5.823719159571067e-3 +11.200000000000008 5.812081432014394e-3 6.067827148295603e-3 +11.200000000000008 5.812081432014394e-3 5.841155444479963e-3 +11.200000000000008 5.812081432014394e-3 5.858591729388858e-3 +11.200000000000008 5.812081432014394e-3 6.364243991746825e-3 +11.40000000000001 5.353601091101685e-3 5.364312864714385e-3 +11.40000000000001 5.353601091101685e-3 5.6105787247711335e-3 +11.40000000000001 5.353601091101685e-3 5.401788104288238e-3 +11.40000000000001 5.353601091101685e-3 5.557042668237058e-3 +11.40000000000001 5.353601091101685e-3 5.535628245623428e-3 +11.40000000000001 5.353601091101685e-3 5.701590020879062e-3 +11.40000000000001 5.353601091101685e-3 5.385727287328015e-3 +11.40000000000001 5.353601091101685e-3 5.342898442100755e-3 +11.40000000000001 5.353601091101685e-3 5.487445794742759e-3 +11.600000000000009 4.933256695536919e-3 4.992452122778802e-3 +11.600000000000009 4.933256695536919e-3 4.913520073406806e-3 +11.600000000000009 4.933256695536919e-3 5.06645091906505e-3 +11.600000000000009 4.933256695536919e-3 5.239114777066293e-3 +11.600000000000009 4.933256695536919e-3 5.06645091906505e-3 +11.600000000000009 4.933256695536919e-3 4.9036535672353065e-3 +11.600000000000009 4.933256695536919e-3 4.790188746263061e-3 +11.600000000000009 4.933256695536919e-3 4.721123203062563e-3 +11.600000000000009 4.933256695536919e-3 4.972719110435803e-3 +11.800000000000008 4.547731429255463e-3 4.561371594888354e-3 +11.800000000000008 4.547731429255463e-3 4.4658692982855065e-3 +11.800000000000008 4.547731429255463e-3 4.693255718768475e-3 +11.800000000000008 4.547731429255463e-3 4.402201100550275e-3 +11.800000000000008 4.547731429255463e-3 4.429487471008232e-3 +11.800000000000008 4.547731429255463e-3 4.5431806812497155e-3 +11.800000000000008 4.547731429255463e-3 4.72963754604575e-3 +11.800000000000008 4.547731429255463e-3 4.379462458501978e-3 +11.800000000000008 4.547731429255463e-3 4.402201100550275e-3 +12.000000000000009 4.1940083721126e-3 4.399521882273995e-3 +12.000000000000009 4.1940083721126e-3 4.202403170675446e-3 +12.000000000000009 4.1940083721126e-3 3.988508398515319e-3 +12.000000000000009 4.1940083721126e-3 3.946568247111372e-3 +12.000000000000009 4.1940083721126e-3 3.9214041562690045e-3 +12.000000000000009 4.1940083721126e-3 4.038836580200054e-3 +12.000000000000009 4.1940083721126e-3 4.181433094973473e-3 +12.000000000000009 4.1940083721126e-3 4.1059408224463695e-3 +12.000000000000009 4.1940083721126e-3 4.1898211252542625e-3 +12.20000000000001 3.8693424320706163e-3 3.675873116598695e-3 +12.20000000000001 3.8693424320706163e-3 3.8074306807717013e-3 +12.20000000000001 3.8693424320706163e-3 3.8538627622445266e-3 +12.20000000000001 3.8693424320706163e-3 3.5791396135303084e-3 +12.20000000000001 3.8693424320706163e-3 3.830646721508114e-3 +12.20000000000001 3.8693424320706163e-3 3.8577321023672622e-3 +12.20000000000001 3.8693424320706163e-3 3.7996920005262304e-3 +12.20000000000001 3.8693424320706163e-3 3.5868782937757797e-3 +12.20000000000001 3.8693424320706163e-3 3.8848174832264108e-3 +12.40000000000001 3.5712349873948854e-3 3.7533703551595452e-3 +12.40000000000001 3.5712349873948854e-3 3.510526221809546e-3 +12.40000000000001 3.5712349873948854e-3 3.692659321822045e-3 +12.40000000000001 3.5712349873948854e-3 3.692659321822045e-3 +12.40000000000001 3.5712349873948854e-3 3.599807153188222e-3 +12.40000000000001 3.5712349873948854e-3 3.4712426120029286e-3 +12.40000000000001 3.5712349873948854e-3 3.6390907629948394e-3 +12.40000000000001 3.5712349873948854e-3 3.731942931628663e-3 +12.40000000000001 3.5712349873948854e-3 3.7212292198632218e-3 +12.600000000000009 3.2974109696596596e-3 3.3633617790205365e-3 +12.600000000000009 3.2974109696596596e-3 3.4457971167416277e-3 +12.600000000000009 3.2974109696596596e-3 3.162219554981073e-3 +12.600000000000009 3.2974109696596596e-3 3.317197989896725e-3 +12.600000000000009 3.2974109696596596e-3 3.3633617790205365e-3 +12.600000000000009 3.2974109696596596e-3 3.4326074627062534e-3 +12.600000000000009 3.2974109696596596e-3 3.3007109223525065e-3 +12.600000000000009 3.2974109696596596e-3 3.1655169684899165e-3 +12.600000000000009 3.2974109696596596e-3 3.2050859305960406e-3 +12.80000000000001 3.0457981458141417e-3 3.2102728731942215e-3 +12.80000000000001 3.0457981458141417e-3 3.0183874927281534e-3 +12.80000000000001 3.0457981458141417e-3 3.0183874927281534e-3 +12.80000000000001 3.0457981458141417e-3 3.204181273814346e-3 +12.80000000000001 3.0457981458141417e-3 3.1371736806357193e-3 +12.80000000000001 3.0457981458141417e-3 3.1219446821860314e-3 +12.80000000000001 3.0457981458141417e-3 3.088440885596718e-3 +12.80000000000001 3.0457981458141417e-3 3.1067156837363435e-3 +12.80000000000001 3.0457981458141417e-3 3.115853082806156e-3 +13.00000000000001 2.8145083821412512e-3 2.780733010227919e-3 +13.00000000000001 2.8145083821412512e-3 2.8651682230890906e-3 +13.00000000000001 2.8145083821412512e-3 2.7863620244186634e-3 +13.00000000000001 2.8145083821412512e-3 2.8820552656613247e-3 +13.00000000000001 2.8145083821412512e-3 2.7441444179880778e-3 +13.00000000000001 2.8145083821412512e-3 2.9580469572363793e-3 +13.00000000000001 2.8145083821412512e-3 2.8229506166585045e-3 +13.00000000000001 2.8145083821412512e-3 3.0115225920484545e-3 +13.00000000000001 2.8145083821412512e-3 2.81732160246776e-3 +13.20000000000001 2.6018206949624846e-3 2.4951476013800066e-3 +13.20000000000001 2.6018206949624846e-3 2.5627949816051164e-3 +13.20000000000001 2.6018206949624846e-3 2.5029530683290577e-3 +13.20000000000001 2.6018206949624846e-3 2.7189043205861387e-3 +13.20000000000001 2.6018206949624846e-3 2.6408496510956273e-3 +13.20000000000001 2.6018206949624846e-3 2.6304423618302257e-3 +13.20000000000001 2.6018206949624846e-3 2.476934845165554e-3 +13.20000000000001 2.6018206949624846e-3 2.5549895146560653e-3 +13.20000000000001 2.6018206949624846e-3 2.5549895146560653e-3 +13.40000000000001 2.4061659126745766e-3 2.281044949578801e-3 +13.40000000000001 2.4061659126745766e-3 2.30029427404782e-3 +13.40000000000001 2.4061659126745766e-3 2.305106605165075e-3 +13.40000000000001 2.4061659126745766e-3 2.4085717241860544e-3 +13.40000000000001 2.4061659126745766e-3 2.350823750778996e-3 +13.40000000000001 2.4061659126745766e-3 2.4085717241860544e-3 +13.40000000000001 2.4061659126745766e-3 2.276232618461546e-3 +13.40000000000001 2.4061659126745766e-3 2.2858572806960555e-3 +13.40000000000001 2.4061659126745766e-3 2.46872586315174e-3 +13.60000000000001 2.2261127913912448e-3 2.045795545107677e-3 +13.60000000000001 2.2261127913912448e-3 2.1214833019451754e-3 +13.60000000000001 2.2261127913912448e-3 2.1125788599642933e-3 +13.60000000000001 2.2261127913912448e-3 2.070282760555103e-3 +13.60000000000001 2.2261127913912448e-3 2.152648848878263e-3 +13.60000000000001 2.2261127913912448e-3 2.1170310809547344e-3 +13.60000000000001 2.2261127913912448e-3 2.1259355229356165e-3 +13.60000000000001 2.2261127913912448e-3 2.1593271803639247e-3 +13.60000000000001 2.2261127913912448e-3 2.1081266389738522e-3 +13.800000000000011 2.0603554423226206e-3 2.0006057446847964e-3 +13.800000000000011 2.0603554423226206e-3 2.0624164268068807e-3 +13.800000000000011 2.0603554423226206e-3 2.0995028360801314e-3 +13.800000000000011 2.0603554423226206e-3 2.136589245353382e-3 +13.800000000000011 2.0603554423226206e-3 2.095382123938659e-3 +13.800000000000011 2.0603554423226206e-3 2.1118649725045484e-3 +13.800000000000011 2.0603554423226206e-3 2.155132449990007e-3 +13.800000000000011 2.0603554423226206e-3 2.0438732221702556e-3 +13.800000000000011 2.0603554423226206e-3 2.136589245353382e-3 +14.00000000000001 1.9077019432494815e-3 1.9172400899672066e-3 +14.00000000000001 1.9077019432494815e-3 1.955394121608345e-3 +14.00000000000001 1.9077019432494815e-3 1.9992712579956543e-3 +14.00000000000001 1.9077019432494815e-3 1.9363171057877758e-3 +14.00000000000001 1.9077019432494815e-3 1.9210554931313204e-3 +14.00000000000001 1.9077019432494815e-3 1.9210554931313204e-3 +14.00000000000001 1.9077019432494815e-3 2.006902064323882e-3 +14.00000000000001 1.9077019432494815e-3 1.812316502954076e-3 +14.00000000000001 1.9077019432494815e-3 1.98973275008537e-3 +14.20000000000001 1.7670640192116816e-3 1.7794348924740683e-3 +14.20000000000001 1.7670640192116816e-3 1.6893145553179834e-3 +14.20000000000001 1.7670640192116816e-3 1.717587602268912e-3 +14.20000000000001 1.7670640192116816e-3 1.761764238129738e-3 +14.20000000000001 1.7670640192116816e-3 1.7458606492198406e-3 +14.20000000000001 1.7670640192116816e-3 1.763531303564171e-3 +14.20000000000001 1.7670640192116816e-3 1.851884575285823e-3 +14.20000000000001 1.7670640192116816e-3 1.6557403120637556e-3 +14.20000000000001 1.7670640192116816e-3 1.7723666307363363e-3 +14.400000000000011 1.6374476889839651e-3 1.5965102741576566e-3 +14.400000000000011 1.6374476889839651e-3 1.5261000774512163e-3 +14.400000000000011 1.6374476889839651e-3 1.6587332386889294e-3 +14.400000000000011 1.6374476889839651e-3 1.640721327903561e-3 +14.400000000000011 1.6374476889839651e-3 1.5866855955474558e-3 +14.400000000000011 1.6374476889839651e-3 1.6898447209545658e-3 +14.400000000000011 1.6374476889839651e-3 1.7144064174800682e-3 +14.400000000000011 1.6374476889839651e-3 1.5211877381461159e-3 +14.400000000000011 1.6374476889839651e-3 1.637446435033494e-3 +14.600000000000012 1.5179447841949282e-3 1.4891049431908741e-3 +14.600000000000012 1.5179447841949282e-3 1.4541921871323725e-3 +14.600000000000012 1.5179447841949282e-3 1.5088382400935054e-3 +14.600000000000012 1.5179447841949282e-3 1.479997267697352e-3 +14.600000000000012 1.5179447841949282e-3 1.537679212489659e-3 +14.600000000000012 1.5179447841949282e-3 1.476961375866178e-3 +14.600000000000012 1.5179447841949282e-3 1.5179459155870276e-3 +14.600000000000012 1.5179447841949282e-3 1.4526742412167854e-3 +14.600000000000012 1.5179447841949282e-3 1.434458890229741e-3 +14.800000000000011 1.4077252571790926e-3 1.3598623808008829e-3 +14.800000000000011 1.4077252571790926e-3 1.3260769800356436e-3 +14.800000000000011 1.4077252571790926e-3 1.3852014313748124e-3 +14.800000000000011 1.4077252571790926e-3 1.3739396311197327e-3 +14.800000000000011 1.4077252571790926e-3 1.4471413327777512e-3 +14.800000000000011 1.4077252571790926e-3 1.3725319060878477e-3 +14.800000000000011 1.4077252571790926e-3 1.3866091564066973e-3 +14.800000000000011 1.4077252571790926e-3 1.3866091564066973e-3 +14.800000000000011 1.4077252571790926e-3 1.3739396311197327e-3 +15.00000000000001 1.30603020194702e-3 1.2041599678194125e-3 +15.00000000000001 1.30603020194702e-3 1.2694614845124392e-3 +15.00000000000001 1.30603020194702e-3 1.2916640001880684e-3 +15.00000000000001 1.30603020194702e-3 1.2263624834950417e-3 +15.00000000000001 1.30603020194702e-3 1.2995001821912316e-3 +15.00000000000001 1.30603020194702e-3 1.22244439249346e-3 +15.00000000000001 1.30603020194702e-3 1.2407288171675075e-3 +15.00000000000001 1.30603020194702e-3 1.180651421809923e-3 +15.00000000000001 1.30603020194702e-3 1.2746856058478814e-3 +15.200000000000012 1.2121655201122257e-3 1.24004509254882e-3 +15.200000000000012 1.2121655201122257e-3 1.2097409602773435e-3 +15.200000000000012 1.2121655201122257e-3 1.2073166296956252e-3 +15.200000000000012 1.2121655201122257e-3 1.2570154066208468e-3 +15.200000000000012 1.2121655201122257e-3 1.2764100512745917e-3 +15.200000000000012 1.2121655201122257e-3 1.260651902493424e-3 +15.200000000000012 1.2121655201122257e-3 1.2521667454574105e-3 +15.200000000000012 1.2121655201122257e-3 1.2545910760391288e-3 +15.200000000000012 1.2121655201122257e-3 1.1770124974241488e-3 +15.400000000000013 1.125496170313487e-3 1.1457551347950528e-3 +15.400000000000013 1.125496170313487e-3 1.1828965094986253e-3 +15.400000000000013 1.125496170313487e-3 1.0849783398255706e-3 +15.400000000000013 1.125496170313487e-3 1.1873984943111795e-3 +15.400000000000013 1.125496170313487e-3 1.1772690284829325e-3 +15.400000000000013 1.125496170313487e-3 1.1896494867174566e-3 +15.400000000000013 1.125496170313487e-3 1.1558846006232998e-3 +15.400000000000013 1.125496170313487e-3 1.1209942183260046e-3 +15.400000000000013 1.125496170313487e-3 1.055715438543968e-3 +15.600000000000012 1.0454409456952054e-3 1.0423048213654715e-3 +15.600000000000012 1.0454409456952054e-3 1.05694099739267e-3 +15.600000000000012 1.0454409456952054e-3 1.0611227619718693e-3 +15.600000000000012 1.0454409456952054e-3 1.0109415870214755e-3 +15.600000000000012 1.0454409456952054e-3 1.0809861437230668e-3 +15.600000000000012 1.0454409456952054e-3 1.038123056786272e-3 +15.600000000000012 1.0454409456952054e-3 1.1039858489086639e-3 +15.600000000000012 1.0454409456952054e-3 1.0621682031166692e-3 +15.600000000000012 1.0454409456952054e-3 1.0893496728814657e-3 +15.800000000000011 9.714677294265883e-4 9.695250493020003e-4 +15.800000000000011 9.714677294265883e-4 9.520386255670944e-4 +15.800000000000011 9.714677294265883e-4 9.85068537066361e-4 +15.800000000000011 9.714677294265883e-4 9.180372460825554e-4 +15.800000000000011 9.714677294265883e-4 9.510671575818219e-4 +15.800000000000011 9.714677294265883e-4 9.83125601095816e-4 +15.800000000000011 9.714677294265883e-4 1.0132411086392647e-3 +15.800000000000011 9.714677294265883e-4 9.510671575818219e-4 +15.800000000000011 9.714677294265883e-4 9.74382389228363e-4 +16.000000000000014 9.03089183114959e-4 8.994771111974063e-4 +16.000000000000014 9.03089183114959e-4 8.976709322592589e-4 +16.000000000000014 9.03089183114959e-4 9.401161373057228e-4 +16.000000000000014 9.03089183114959e-4 8.118774326972573e-4 +16.000000000000014 9.03089183114959e-4 8.678689797798268e-4 +16.000000000000014 9.03089183114959e-4 9.184419900479541e-4 +16.000000000000014 9.03089183114959e-4 8.859307691613008e-4 +16.000000000000014 9.03089183114959e-4 8.516133693365002e-4 +16.000000000000014 9.03089183114959e-4 8.669658903107531e-4 +16.200000000000014 8.398588273554193e-4 8.348198838306979e-4 +16.200000000000014 8.398588273554193e-4 7.8862763673745e-4 +16.200000000000014 8.398588273554193e-4 8.197024211456349e-4 +16.200000000000014 8.398588273554193e-4 8.121436898031034e-4 +16.200000000000014 8.398588273554193e-4 8.222219982598121e-4 +16.200000000000014 8.398588273554193e-4 8.516170645918789e-4 +16.200000000000014 8.398588273554193e-4 8.003856632702767e-4 +16.200000000000014 8.398588273554193e-4 7.987059451941586e-4 +16.200000000000014 8.398588273554193e-4 7.8862763673745e-4 +16.400000000000013 7.813674776082505e-4 7.548007201080162e-4 +16.400000000000013 7.813674776082505e-4 7.376106415962394e-4 +16.400000000000013 7.813674776082505e-4 7.938690803620544e-4 +16.400000000000013 7.813674776082505e-4 7.524566184927739e-4 +16.400000000000013 7.813674776082505e-4 7.172950942641396e-4 +16.400000000000013 7.813674776082505e-4 7.376106415962394e-4 +16.400000000000013 7.813674776082505e-4 7.508938840826124e-4 +16.400000000000013 7.813674776082505e-4 7.837113066960044e-4 +16.400000000000013 7.813674776082505e-4 8.009013852077812e-4 +16.600000000000012 7.272400021518117e-4 7.694198516139636e-4 +16.600000000000012 7.272400021518117e-4 7.679653717432378e-4 +16.600000000000012 7.272400021518117e-4 7.286944152336403e-4 +16.600000000000012 7.272400021518117e-4 7.243309756214629e-4 +16.600000000000012 7.272400021518117e-4 7.672381318078749e-4 +16.600000000000012 7.272400021518117e-4 7.628746921956973e-4 +16.600000000000012 7.272400021518117e-4 7.74510531161504e-4 +16.600000000000012 7.272400021518117e-4 7.272399353629146e-4 +16.600000000000012 7.272400021518117e-4 7.614202123249716e-4 +16.80000000000001 6.771323720620374e-4 7.272402249163065e-4 +16.80000000000001 6.771323720620374e-4 6.98800663048071e-4 +16.80000000000001 6.771323720620374e-4 6.378587447589951e-4 +16.80000000000001 6.771323720620374e-4 6.669754390526647e-4 +16.80000000000001 6.771323720620374e-4 7.062491197278469e-4 +16.80000000000001 6.771323720620374e-4 6.947378684954659e-4 +16.80000000000001 6.771323720620374e-4 7.048948548769786e-4 +16.80000000000001 6.771323720620374e-4 7.143747088330571e-4 +16.80000000000001 6.771323720620374e-4 6.818723524122165e-4 +17.000000000000014 6.307289760561569e-4 6.326211772304312e-4 +17.000000000000014 6.307289760561569e-4 6.452357570356242e-4 +17.000000000000014 6.307289760561569e-4 6.294675322791329e-4 +17.000000000000014 6.307289760561569e-4 6.136993075226416e-4 +17.000000000000014 6.307289760561569e-4 7.013706371687332e-4 +17.000000000000014 6.307289760561569e-4 6.162222234836802e-4 +17.000000000000014 6.307289760561569e-4 6.439742990551049e-4 +17.000000000000014 6.307289760561569e-4 6.263138873278347e-4 +17.000000000000014 6.307289760561569e-4 6.521737759284804e-4 +17.200000000000014 5.877401756398498e-4 5.589409391618354e-4 +17.200000000000014 5.877401756398498e-4 6.088988569628407e-4 +17.200000000000014 5.877401756398498e-4 5.859769887953207e-4 +17.200000000000014 5.877401756398498e-4 5.983195331932161e-4 +17.200000000000014 5.877401756398498e-4 5.871524692141678e-4 +17.200000000000014 5.877401756398498e-4 5.62467380418377e-4 +17.200000000000014 5.877401756398498e-4 5.818628073293555e-4 +17.200000000000014 5.877401756398498e-4 5.542390174864467e-4 +17.200000000000014 5.877401756398498e-4 5.759854052351195e-4 +17.400000000000013 5.479000783427062e-4 5.292715667205435e-4 +17.400000000000013 5.479000783427062e-4 5.54474974659617e-4 +17.400000000000013 5.479000783427062e-4 5.363942689641947e-4 +17.400000000000013 5.479000783427062e-4 5.473522724159658e-4 +17.400000000000013 5.479000783427062e-4 5.637892775936224e-4 +17.400000000000013 5.479000783427062e-4 5.298194668931321e-4 +17.400000000000013 5.479000783427062e-4 5.522833739692628e-4 +17.400000000000013 5.479000783427062e-4 5.4954387310632e-4 +17.400000000000013 5.479000783427062e-4 5.28723666547955e-4 +17.600000000000016 5.109645089423057e-4 4.915478801869926e-4 +17.600000000000016 5.109645089423057e-4 5.068768161595599e-4 +17.600000000000016 5.109645089423057e-4 5.038110289650465e-4 +17.600000000000016 5.109645089423057e-4 5.06365851627141e-4 +17.600000000000016 5.109645089423057e-4 5.135193550810058e-4 +17.600000000000016 5.109645089423057e-4 5.027890999002087e-4 +17.600000000000016 5.109645089423057e-4 5.216947875997084e-4 +17.600000000000016 5.109645089423057e-4 5.053439225623031e-4 +17.600000000000016 5.109645089423057e-4 4.900149865897359e-4 +17.800000000000015 4.767091604856612e-4 4.37619028323676e-4 +17.800000000000015 4.767091604856612e-4 4.7527905363693355e-4 +17.800000000000015 4.767091604856612e-4 4.547805588461731e-4 +17.800000000000015 4.767091604856612e-4 4.357121915989541e-4 +17.800000000000015 4.767091604856612e-4 4.733722169122116e-4 +17.800000000000015 4.767091604856612e-4 4.485833394908269e-4 +17.800000000000015 4.767091604856612e-4 4.605010690203388e-4 +17.800000000000015 4.767091604856612e-4 4.4095599259193934e-4 +17.800000000000015 4.767091604856612e-4 4.6717499755686545e-4 +18.000000000000014 4.449279086389735e-4 4.1244819370382484e-4 +18.000000000000014 4.449279086389735e-4 4.168974730318057e-4 +18.000000000000014 4.449279086389735e-4 4.2668588755336354e-4 +18.000000000000014 4.449279086389735e-4 4.1200326577102675e-4 +18.000000000000014 4.449279086389735e-4 4.1422790543501714e-4 +18.000000000000014 4.449279086389735e-4 4.213467523597865e-4 +18.000000000000014 4.449279086389735e-4 4.373641579405176e-4 +18.000000000000014 4.449279086389735e-4 4.373641579405176e-4 +18.000000000000014 4.449279086389735e-4 4.106684819726325e-4 +18.200000000000014 4.154312744512661e-4 4.2914051007483166e-4 +18.200000000000014 4.154312744512661e-4 4.1750843429351963e-4 +18.200000000000014 4.154312744512661e-4 4.303868039085436e-4 +18.200000000000014 4.154312744512661e-4 4.1833929684932766e-4 +18.200000000000014 4.154312744512661e-4 4.0421463340059165e-4 +18.200000000000014 4.154312744512661e-4 4.1958559068303963e-4 +18.200000000000014 4.154312744512661e-4 3.988140267878397e-4 +18.200000000000014 4.154312744512661e-4 4.2997137263063964e-4 +18.200000000000014 4.154312744512661e-4 4.2124731579465564e-4 +18.400000000000013 3.8804502202118517e-4 3.6941879790657506e-4 +18.400000000000013 3.8804502202118517e-4 3.705829327739277e-4 +18.400000000000013 3.8804502202118517e-4 3.806721016243174e-4 +18.400000000000013 3.8804502202118517e-4 3.861047310052964e-4 +18.400000000000013 3.8804502202118517e-4 3.7834383188961205e-4 +18.400000000000013 3.8804502202118517e-4 3.826123264032385e-4 +18.400000000000013 3.8804502202118517e-4 3.9192540534205967e-4 +18.400000000000013 3.8804502202118517e-4 4.1598419260068117e-4 +18.400000000000013 3.8804502202118517e-4 3.880449557842175e-4 +18.600000000000016 3.626088788241667e-4 3.61521100938722e-4 +18.600000000000016 3.626088788241667e-4 3.5716979380605936e-4 +18.600000000000016 3.626088788241667e-4 3.535437045288405e-4 +18.600000000000016 3.626088788241667e-4 3.54994140239728e-4 +18.600000000000016 3.626088788241667e-4 3.821898098188696e-4 +18.600000000000016 3.626088788241667e-4 3.782011116139288e-4 +18.600000000000016 3.626088788241667e-4 3.658724080713846e-4 +18.600000000000016 3.626088788241667e-4 3.651471902159409e-4 +18.600000000000016 3.626088788241667e-4 3.68048061637716e-4 +18.800000000000015 3.389753676027032e-4 3.345686725081159e-4 +18.800000000000015 3.389753676027032e-4 3.115183485663207e-4 +18.800000000000015 3.389753676027032e-4 3.352466232122864e-4 +18.800000000000015 3.389753676027032e-4 3.227045351851331e-4 +18.800000000000015 3.389753676027032e-4 3.2914506687475235e-4 +18.800000000000015 3.389753676027032e-4 3.1355220067883204e-4 +18.800000000000015 3.389753676027032e-4 3.1558605279134337e-4 +18.800000000000015 3.389753676027032e-4 3.443989577185874e-4 +18.800000000000015 3.389753676027032e-4 3.3490764786020115e-4 +19.000000000000014 3.170087397577456e-4 2.9545216068413026e-4 +19.000000000000014 3.170087397577456e-4 2.9545216068413026e-4 +19.000000000000014 3.170087397577456e-4 3.062304583914912e-4 +19.000000000000014 3.170087397577456e-4 3.068644759036889e-4 +19.000000000000014 3.170087397577456e-4 2.9703720446462453e-4 +19.000000000000014 3.170087397577456e-4 2.9925626575731646e-4 +19.000000000000014 3.170087397577456e-4 3.10985589732974e-4 +19.000000000000014 3.170087397577456e-4 3.052794321231947e-4 +19.000000000000014 3.170087397577456e-4 3.10985589732974e-4 +19.200000000000017 2.9658400111523623e-4 2.7256070036533217e-4 +19.200000000000017 2.9658400111523623e-4 2.781957964555839e-4 +19.200000000000017 2.9658400111523623e-4 2.8116163650308476e-4 +19.200000000000017 2.9658400111523623e-4 2.856103965743361e-4 +19.200000000000017 2.9658400111523623e-4 2.88576236621837e-4 +19.200000000000017 2.9658400111523623e-4 2.948045007215889e-4 +19.200000000000017 2.9658400111523623e-4 2.9213524467883807e-4 +19.200000000000017 2.9658400111523623e-4 2.734504523795824e-4 +19.200000000000017 2.9658400111523623e-4 2.998464288023404e-4 +19.400000000000016 2.7758602178815306e-4 2.556567171512347e-4 +19.400000000000016 2.7758602178815306e-4 2.670377436476523e-4 +19.400000000000016 2.7758602178815306e-4 2.6676015763554454e-4 +19.400000000000016 2.7758602178815306e-4 2.5704464721177343e-4 +19.400000000000016 2.7758602178815306e-4 2.687032597202988e-4 +19.400000000000016 2.7758602178815306e-4 2.5843257727231215e-4 +19.400000000000016 2.7758602178815306e-4 2.6731532965976006e-4 +19.400000000000016 2.7758602178815306e-4 2.6453946953868256e-4 +19.400000000000016 2.7758602178815306e-4 2.692584317445143e-4 +19.600000000000016 2.5990872262005797e-4 2.5471052019425573e-4 +19.600000000000016 2.5990872262005797e-4 2.6042851146392275e-4 +19.600000000000016 2.5990872262005797e-4 2.645870505691351e-4 +19.600000000000016 2.5990872262005797e-4 2.596487853816954e-4 +19.600000000000016 2.5990872262005797e-4 2.661465027335897e-4 +19.600000000000016 2.5990872262005797e-4 2.7550321572031744e-4 +19.600000000000016 2.5990872262005797e-4 2.762829418025448e-4 +19.600000000000016 2.5990872262005797e-4 2.614681462402258e-4 +19.600000000000016 2.5990872262005797e-4 2.5834924191131656e-4 +19.800000000000015 2.4345433138892443e-4 2.337161327673183e-4 +19.800000000000015 2.4345433138892443e-4 2.4028939900139914e-4 +19.800000000000015 2.4345433138892443e-4 2.475930281503778e-4 +19.800000000000015 2.4345433138892443e-4 2.4345430496595656e-4 +19.800000000000015 2.4345433138892443e-4 2.5148829702983315e-4 +19.800000000000015 2.4345433138892443e-4 2.475930281503778e-4 +19.800000000000015 2.4345433138892443e-4 2.495406625901055e-4 +19.800000000000015 2.4345433138892443e-4 2.449150307957523e-4 +19.800000000000015 2.4345433138892443e-4 2.5343593146956076e-4 +20.000000000000014 2.2813270257705028e-4 2.2699202037498626e-4 +20.000000000000014 2.2813270257705028e-4 2.3064214331568956e-4 +20.000000000000014 2.2813270257705028e-4 2.2630762232360438e-4 +20.000000000000014 2.2813270257705028e-4 2.23113764750489e-4 +20.000000000000014 2.2813270257705028e-4 2.33607868205011e-4 +20.000000000000014 2.2813270257705028e-4 2.2653575500739836e-4 +20.000000000000014 2.2813270257705028e-4 2.2357003011807693e-4 +20.000000000000014 2.2813270257705028e-4 2.2357003011807693e-4 +20.000000000000014 2.2813270257705028e-4 2.345203989401868e-4 +20.200000000000017 2.1386069508055506e-4 2.1621316436627408e-4 +20.200000000000017 2.1386069508055506e-4 2.1086664694871043e-4 +20.200000000000017 2.1386069508055506e-4 2.1835177133329954e-4 +20.200000000000017 2.1386069508055506e-4 2.0872803998168497e-4 +20.200000000000017 2.1386069508055506e-4 1.9846272653996277e-4 +20.200000000000017 2.1386069508055506e-4 2.1043892555530535e-4 +20.200000000000017 2.1386069508055506e-4 2.1321911461243844e-4 +20.200000000000017 2.1386069508055506e-4 2.1450227879265372e-4 +20.200000000000017 2.1386069508055506e-4 2.2305670666075556e-4 +20.400000000000016 2.0056160274611903e-4 1.9835543488877554e-4 +20.400000000000016 2.0056160274611903e-4 1.9935824295191394e-4 +20.400000000000016 2.0056160274611903e-4 1.8712398458162546e-4 +20.400000000000016 2.0056160274611903e-4 1.9695150360038178e-4 +20.400000000000016 2.0056160274611903e-4 1.8732454619425315e-4 +20.400000000000016 2.0056160274611903e-4 1.9715206521300947e-4 +20.400000000000016 2.0056160274611903e-4 1.997593661771693e-4 +20.400000000000016 2.0056160274611903e-4 1.9915768133928628e-4 +20.400000000000016 2.0056160274611903e-4 1.9053353199629603e-4 +20.600000000000016 1.8816463308837033e-4 1.6859551095635615e-4 +20.600000000000016 1.8816463308837033e-4 1.9249241931735753e-4 +20.600000000000016 1.8816463308837033e-4 1.7649842553243536e-4 +20.600000000000016 1.8816463308837033e-4 1.7969722428941978e-4 +20.600000000000016 1.8816463308837033e-4 1.751812731030888e-4 +20.600000000000016 1.8816463308837033e-4 1.7066532191675784e-4 +20.600000000000016 1.8816463308837033e-4 1.7706291943072672e-4 +20.600000000000016 1.8816463308837033e-4 1.8571849253786108e-4 +20.600000000000016 1.8816463308837033e-4 1.7988538892218356e-4 +20.80000000000002 1.766044299634105e-4 1.6318249666394226e-4 +20.80000000000002 1.766044299634105e-4 1.7625122475174715e-4 +20.80000000000002 1.766044299634105e-4 1.7024667400870164e-4 +20.80000000000002 1.766044299634105e-4 1.649485410001321e-4 +20.80000000000002 1.766044299634105e-4 1.6530174986737007e-4 +20.80000000000002 1.766044299634105e-4 1.7413197154831934e-4 +20.80000000000002 1.766044299634105e-4 1.746617848491763e-4 +20.80000000000002 1.766044299634105e-4 1.6759760750441688e-4 +20.80000000000002 1.766044299634105e-4 1.6689118976994093e-4 +21.000000000000018 1.6582063635655071e-4 1.7477496479627606e-4 +21.000000000000018 1.6582063635655071e-4 1.7842301908993648e-4 +21.000000000000018 1.6582063635655071e-4 1.7195601375117483e-4 +21.000000000000018 1.6582063635655071e-4 1.6333333996615933e-4 +21.000000000000018 1.6582063635655071e-4 1.8107614948532588e-4 +21.000000000000018 1.6582063635655071e-4 1.6217259541817647e-4 +21.000000000000018 1.6582063635655071e-4 1.8008122558705485e-4 +21.000000000000018 1.6582063635655071e-4 1.6830795945751444e-4 +21.000000000000018 1.6582063635655071e-4 1.754382473951234e-4 +21.200000000000017 1.5575749378922264e-4 1.5653627739910535e-4 +21.200000000000017 1.5575749378922264e-4 1.6494718185637073e-4 +21.200000000000017 1.5575749378922264e-4 1.556017324594092e-4 +21.200000000000017 1.5575749378922264e-4 1.5871688225839637e-4 +21.200000000000017 1.5575749378922264e-4 1.5933991221819382e-4 +21.200000000000017 1.5575749378922264e-4 1.6074172962773805e-4 +21.200000000000017 1.5575749378922264e-4 1.556017324594092e-4 +21.200000000000017 1.5575749378922264e-4 1.5233082517047267e-4 +21.200000000000017 1.5575749378922264e-4 1.6245506201718098e-4 +21.400000000000016 1.463634751646593e-4 1.5587709332690896e-4 +21.400000000000016 1.463634751646593e-4 1.5807254534559782e-4 +21.400000000000016 1.463634751646593e-4 1.4855891993127942e-4 +21.400000000000016 1.463634751646593e-4 1.4182620040730025e-4 +21.400000000000016 1.463634751646593e-4 1.535352778403075e-4 +21.400000000000016 1.463634751646593e-4 1.5294982396865715e-4 +21.400000000000016 1.463634751646593e-4 1.4987619114249274e-4 +21.400000000000016 1.463634751646593e-4 1.4211892734312544e-4 +21.400000000000016 1.463634751646593e-4 1.4387528895807653e-4 +21.600000000000016 1.375909481573892e-4 1.3057380928793933e-4 +21.600000000000016 1.375909481573892e-4 1.4281940362579666e-4 +21.600000000000016 1.375909481573892e-4 1.45158449735275e-4 +21.600000000000016 1.375909481573892e-4 1.3029862739270657e-4 +21.600000000000016 1.375909481573892e-4 1.3855408424968905e-4 +21.600000000000016 1.375909481573892e-4 1.3951722088300367e-4 +21.600000000000016 1.375909481573892e-4 1.3924203898777092e-4 +21.600000000000016 1.375909481573892e-4 1.3951722088300367e-4 +21.600000000000016 1.375909481573892e-4 1.4103072130678378e-4 +21.80000000000002 1.2939586651062514e-4 1.2616097208387647e-4 +21.80000000000002 1.2939586651062514e-4 1.3392472421211503e-4 +21.80000000000002 1.2939586651062514e-4 1.2538459687105261e-4 +21.80000000000002 1.2939586651062514e-4 1.2007936625008961e-4 +21.80000000000002 1.2939586651062514e-4 1.2900768119756395e-4 +21.80000000000002 1.2939586651062514e-4 1.2409063818301285e-4 +21.80000000000002 1.2939586651062514e-4 1.2887828532875996e-4 +21.80000000000002 1.2939586651062514e-4 1.3353653660570309e-4 +21.80000000000002 1.2939586651062514e-4 1.3340714073689912e-4 +22.000000000000018 1.2173748684079103e-4 1.2076358130730716e-4 +22.000000000000018 1.2173748684079103e-4 1.2502439314778673e-4 +22.000000000000018 1.2173748684079103e-4 1.2465918070431707e-4 +22.000000000000018 1.2173748684079103e-4 1.1516365717410541e-4 +22.000000000000018 1.2173748684079103e-4 1.1528539465526198e-4 +22.000000000000018 1.2173748684079103e-4 1.1808535672186284e-4 +22.000000000000018 1.2173748684079103e-4 1.1930273153342844e-4 +22.000000000000018 1.2173748684079103e-4 1.2636350544050891e-4 +22.000000000000018 1.2173748684079103e-4 1.2039836886383749e-4 +22.200000000000017 1.1457810876195154e-4 1.1297401105059987e-4 +22.200000000000017 1.1457810876195154e-4 1.0460980942109298e-4 +22.200000000000017 1.1457810876195154e-4 1.1366147967768262e-4 +22.200000000000017 1.1457810876195154e-4 1.0735968392942401e-4 +22.200000000000017 1.1457810876195154e-4 1.0483896563012057e-4 +22.200000000000017 1.1457810876195154e-4 1.1033871464678263e-4 +22.200000000000017 1.1457810876195154e-4 1.1274485484157229e-4 +22.200000000000017 1.1457810876195154e-4 1.1847376006726193e-4 +22.200000000000017 1.1457810876195154e-4 1.1182823000546194e-4 +22.40000000000002 1.0788283633687184e-4 1.210445432051883e-4 +22.40000000000002 1.0788283633687184e-4 1.1877900362648157e-4 +22.40000000000002 1.0788283633687184e-4 1.1726864390734375e-4 +22.40000000000002 1.0788283633687184e-4 1.1209026772744264e-4 +22.40000000000002 1.0788283633687184e-4 1.1543463567696211e-4 +22.40000000000002 1.0788283633687184e-4 1.0971684531165464e-4 +22.40000000000002 1.0788283633687184e-4 1.1770017525566884e-4 +22.40000000000002 1.0788283633687184e-4 1.1834747227815648e-4 +22.40000000000002 1.0788283633687184e-4 1.170528782331812e-4 +22.60000000000002 1.0161935903764002e-4 1.0395660040576609e-4 +22.60000000000002 1.0161935903764002e-4 1.0497279395811961e-4 +22.60000000000002 1.0161935903764002e-4 1.0365174234006003e-4 +22.60000000000002 1.0161935903764002e-4 1.0355012298482467e-4 +22.60000000000002 1.0161935903764002e-4 1.095456649437105e-4 +22.60000000000002 1.0161935903764002e-4 1.0466793589241355e-4 +22.60000000000002 1.0161935903764002e-4 1.0619222622094385e-4 +22.60000000000002 1.0161935903764002e-4 1.0578574880000244e-4 +22.60000000000002 1.0161935903764002e-4 1.1035861978559332e-4 +22.80000000000002 9.57577505589414e-5 1.0150321473129562e-4 +22.80000000000002 9.57577505589414e-5 9.882199773839347e-5 +22.80000000000002 9.57577505589414e-5 1.0246079222876067e-4 +22.80000000000002 9.57577505589414e-5 9.671532724397035e-5 +22.80000000000002 9.57577505589414e-5 9.642805399473084e-5 +22.80000000000002 9.57577505589414e-5 9.738563149219589e-5 +22.80000000000002 9.57577505589414e-5 9.75771469916889e-5 +22.80000000000002 9.57577505589414e-5 1.0083291048307008e-4 +22.80000000000002 9.57577505589414e-5 9.853472448915395e-5 +23.000000000000018 9.027028397266895e-5 9.41519030374415e-5 +23.000000000000018 9.027028397266895e-5 9.315892994692198e-5 +23.000000000000018 9.027028397266895e-5 9.55962275327426e-5 +23.000000000000018 9.027028397266895e-5 9.505460584700469e-5 +23.000000000000018 9.027028397266895e-5 9.37005516326599e-5 +23.000000000000018 9.027028397266895e-5 9.027028095631975e-5 +23.000000000000018 9.027028397266895e-5 8.990919983249446e-5 +23.000000000000018 9.027028397266895e-5 9.686001146613108e-5 +23.000000000000018 9.027028397266895e-5 9.523514640891733e-5 +23.200000000000017 8.513126184494805e-5 8.989861377359136e-5 +23.200000000000017 8.513126184494805e-5 8.972835124750502e-5 +23.200000000000017 8.513126184494805e-5 8.010851852362639e-5 +23.200000000000017 8.513126184494805e-5 8.30029814670943e-5 +23.200000000000017 8.513126184494805e-5 8.828111977577107e-5 +23.200000000000017 8.513126184494805e-5 8.206653757361939e-5 +23.200000000000017 8.513126184494805e-5 8.274758767796478e-5 +23.200000000000017 8.513126184494805e-5 8.989861377359136e-5 +23.200000000000017 8.513126184494805e-5 8.71744133562098e-5 +23.40000000000002 8.031686005707997e-5 8.047749336863849e-5 +23.40000000000002 8.031686005707997e-5 8.601935668444293e-5 +23.40000000000002 8.031686005707997e-5 7.838925501775565e-5 +23.40000000000002 8.031686005707997e-5 7.951369105284641e-5 +23.40000000000002 8.031686005707997e-5 8.336890031601471e-5 +23.40000000000002 8.031686005707997e-5 7.525689749143139e-5 +23.40000000000002 8.031686005707997e-5 7.774672014056092e-5 +23.40000000000002 8.031686005707997e-5 7.903178989495036e-5 +23.40000000000002 8.031686005707997e-5 7.99152753510931e-5 +23.60000000000002 7.580498418149386e-5 7.239375836934385e-5 +23.60000000000002 7.580498418149386e-5 7.178731850865826e-5 +23.60000000000002 7.580498418149386e-5 7.603239753345748e-5 +23.60000000000002 7.580498418149386e-5 7.785171711551428e-5 +23.60000000000002 7.580498418149386e-5 7.262117331710095e-5 +23.60000000000002 7.580498418149386e-5 7.239375836934385e-5 +23.60000000000002 7.580498418149386e-5 7.262117331710095e-5 +23.60000000000002 7.580498418149386e-5 7.663883739414308e-5 +23.60000000000002 7.580498418149386e-5 7.610820251604317e-5 +23.80000000000002 7.157513736351763e-5 6.921315734247816e-5 +23.80000000000002 7.157513736351763e-5 7.193301254311328e-5 +23.80000000000002 7.157513736351763e-5 6.620700159440775e-5 +23.80000000000002 7.157513736351763e-5 6.856898111074878e-5 +23.80000000000002 7.157513736351763e-5 7.221931309054856e-5 +23.80000000000002 7.157513736351763e-5 7.064466007965453e-5 +23.80000000000002 7.157513736351763e-5 6.949945788991344e-5 +23.80000000000002 7.157513736351763e-5 7.1646711995678e-5 +23.80000000000002 7.157513736351763e-5 7.028678439536044e-5 +24.00000000000002 6.760829875058038e-5 7.024502058131777e-5 +24.00000000000002 6.760829875058038e-5 7.470716818321092e-5 +24.00000000000002 6.760829875058038e-5 7.308456905524977e-5 +24.00000000000002 6.760829875058038e-5 7.240848608526597e-5 +24.00000000000002 6.760829875058038e-5 7.308456905524977e-5 +24.00000000000002 6.760829875058038e-5 7.430151840122062e-5 +24.00000000000002 6.760829875058038e-5 7.031262887831615e-5 +24.00000000000002 6.760829875058038e-5 7.024502058131777e-5 +24.00000000000002 6.760829875058038e-5 7.078588695730482e-5 diff --git a/bloomfilter/fpr.classic.gnuplot.data b/bloomfilter/fpr.classic.gnuplot.data new file mode 100644 index 000000000..583ffd95e --- /dev/null +++ b/bloomfilter/fpr.classic.gnuplot.data @@ -0,0 +1,183 @@ +2.0 0.3934693402873666 0.3900039354584809 +2.0 0.3934693402873666 0.37504919323101144 +2.0 0.3934693402873666 0.3892168437622983 +2.3 0.3374056100322293 0.3434547908232119 +2.3 0.3374056100322293 0.32388663967611336 +2.3 0.3374056100322293 0.340080971659919 +2.5999999999999996 0.28797243387934673 0.28217679239850274 +2.5999999999999996 0.28797243387934673 0.2824647279009502 +2.5999999999999996 0.28797243387934673 0.2830405989058451 +2.8999999999999995 0.2482540030425363 0.2477656405163853 +2.8999999999999995 0.2482540030425363 0.23857994041708044 +2.8999999999999995 0.2482540030425363 0.24056603773584906 +3.1999999999999993 0.21598193982220967 0.21706263498920086 +3.1999999999999993 0.21598193982220967 0.21036717062634988 +3.1999999999999993 0.21598193982220967 0.21036717062634988 +3.499999999999999 0.18947031330845207 0.18075028419856007 +3.499999999999999 0.18947031330845207 0.1911708980674498 +3.499999999999999 0.18947031330845207 0.1964759378552482 +3.799999999999999 0.16269646923733003 0.16254474454930035 +3.799999999999999 0.16269646923733003 0.15929059550927432 +3.799999999999999 0.16269646923733003 0.15554832411324437 +4.099999999999999 0.1397281316954663 0.14559172837781192 +4.099999999999999 0.1397281316954663 0.13846583764146989 +4.099999999999999 0.1397281316954663 0.13706860416375577 +4.399999999999999 0.12077596326307868 0.12065217391304348 +4.399999999999999 0.12077596326307868 0.1213768115942029 +4.399999999999999 0.12077596326307868 0.1178743961352657 +4.699999999999998 0.10502663985837676 0.10272030248923432 +4.699999999999998 0.10502663985837676 0.10629135595000525 +4.699999999999998 0.10502663985837676 9.904421804432308e-2 +4.999999999999998 9.184883923294052e-2 9.396527969137504e-2 +4.999999999999998 9.184883923294052e-2 9.06585836318545e-2 +4.999999999999998 9.184883923294052e-2 9.176081565169468e-2 +5.299999999999998 7.881929681834592e-2 8.291952392212501e-2 +5.299999999999998 7.881929681834592e-2 7.283045637266493e-2 +5.299999999999998 7.881929681834592e-2 7.448569401749823e-2 +5.599999999999998 6.789553531345656e-2 6.91832439405255e-2 +5.599999999999998 6.789553531345656e-2 6.544911399280331e-2 +5.599999999999998 6.789553531345656e-2 6.477018127503564e-2 +5.899999999999998 5.876263252151765e-2 5.858502761781643e-2 +5.899999999999998 5.876263252151765e-2 5.688094958279469e-2 +5.899999999999998 5.876263252151765e-2 5.699847220589964e-2 +6.1999999999999975 5.108781980733247e-2 5.057729641360989e-2 +6.1999999999999975 5.108781980733247e-2 4.955553284969858e-2 +6.1999999999999975 5.108781980733247e-2 4.930009195872075e-2 +6.499999999999997 4.450177077923493e-2 4.570335098571492e-2 +6.499999999999997 4.450177077923493e-2 4.28552356370433e-2 +6.499999999999997 4.450177077923493e-2 4.20987049975524e-2 +6.799999999999997 3.825331720048197e-2 3.756407313901002e-2 +6.799999999999997 3.825331720048197e-2 3.80613572029684e-2 +6.799999999999997 3.825331720048197e-2 3.699028383444266e-2 +7.099999999999997 3.300976153640641e-2 3.231663035584604e-2 +7.099999999999997 3.300976153640641e-2 3.26467287251601e-2 +7.099999999999997 3.300976153640641e-2 3.32409057899254e-2 +7.399999999999997 2.859146946961458e-2 2.8162973552537527e-2 +7.399999999999997 2.859146946961458e-2 2.7962830593280915e-2 +7.399999999999997 2.859146946961458e-2 2.9049320943531094e-2 +7.699999999999997 2.485365957813612e-2 2.5424992543990457e-2 +7.699999999999997 2.485365957813612e-2 2.3884083904960732e-2 +7.699999999999997 2.485365957813612e-2 2.4381151207873545e-2 +7.9999999999999964 2.157714146321929e-2 2.2073578595317726e-2 +7.9999999999999964 2.157714146321929e-2 2.0606322149099146e-2 +7.9999999999999964 2.157714146321929e-2 2.1814650987161507e-2 +8.299999999999997 1.858180941904642e-2 1.789430652594024e-2 +8.299999999999997 1.858180941904642e-2 1.735543332837818e-2 +8.299999999999997 1.858180941904642e-2 1.9325107774639513e-2 +8.599999999999996 1.6053280201294692e-2 1.595684908416676e-2 +8.599999999999996 1.6053280201294692e-2 1.4929446326232483e-2 +8.599999999999996 1.6053280201294692e-2 1.651870996741207e-2 +8.899999999999995 1.3911659303848264e-2 1.3299574302328816e-2 +8.899999999999995 1.3911659303848264e-2 1.35638963857433e-2 +8.899999999999995 1.3911659303848264e-2 1.402298210956846e-2 +9.199999999999996 1.2091803064275024e-2 1.2152210976892662e-2 +9.199999999999996 1.2091803064275024e-2 1.2007109950302899e-2 +9.199999999999996 1.2091803064275024e-2 1.2599605808877765e-2 +9.499999999999996 1.0472810419700971e-2 1.0923181651568309e-2 +9.499999999999996 1.0472810419700971e-2 1.0776561763627795e-2 +9.499999999999996 1.0472810419700971e-2 1.0713724668796146e-2 +9.799999999999995 9.030714850304539e-3 8.922362800610477e-3 +9.799999999999995 9.030714850304539e-3 8.804963290076129e-3 +9.799999999999995 9.030714850304539e-3 8.714655974280476e-3 +10.099999999999994 7.808242114948139e-3 8.222066057624737e-3 +10.099999999999994 7.808242114948139e-3 7.652065276801749e-3 +10.099999999999994 7.808242114948139e-3 7.816038104161786e-3 +10.399999999999995 6.769015491520877e-3 6.653940920044405e-3 +10.399999999999995 6.769015491520877e-3 6.80962824574229e-3 +10.399999999999995 6.769015491520877e-3 6.477946551864186e-3 +10.699999999999996 5.883120099197549e-3 5.81251691395357e-3 +10.699999999999996 5.883120099197549e-3 5.759568885385168e-3 +10.699999999999996 5.883120099197549e-3 5.724270199672899e-3 +10.999999999999995 5.086410643627119e-3 4.877875097913552e-3 +10.999999999999995 5.086410643627119e-3 5.132196010213528e-3 +10.999999999999995 5.086410643627119e-3 5.132196010213528e-3 +11.299999999999994 4.390250871934632e-3 4.706357533903774e-3 +11.299999999999994 4.390250871934632e-3 4.346356304631284e-3 +11.299999999999994 4.390250871934632e-3 4.188306984462873e-3 +11.599999999999994 3.7982630662866026e-3 3.6463358123352504e-3 +11.599999999999994 3.7982630662866026e-3 3.5589756834980514e-3 +11.599999999999994 3.7982630662866026e-3 3.718502875287719e-3 +11.899999999999995 3.293628806737463e-3 3.2738722596964585e-3 +11.899999999999995 3.293628806737463e-3 3.2837531618887016e-3 +11.899999999999995 3.293628806737463e-3 3.204705944350759e-3 +12.199999999999994 2.862426553117111e-3 2.9397115819483964e-3 +12.199999999999994 2.862426553117111e-3 2.765103591199757e-3 +12.199999999999994 2.862426553117111e-3 2.885325486469312e-3 +12.499999999999993 2.4714022749062954e-3 2.429385931309916e-3 +12.499999999999993 2.4714022749062954e-3 2.5405989190097596e-3 +12.499999999999993 2.4714022749062954e-3 2.3601867389633465e-3 +12.799999999999994 2.1347378701230876e-3 2.047211821314058e-3 +12.799999999999994 2.1347378701230876e-3 2.132601261202027e-3 +12.799999999999994 2.1347378701230876e-3 2.224394909081594e-3 +13.099999999999994 1.8477570412903156e-3 1.8606902846837658e-3 +13.099999999999994 1.8477570412903156e-3 1.9438393043568238e-3 +13.099999999999994 1.8477570412903156e-3 1.8311261888000118e-3 +13.399999999999993 1.602601363396337e-3 1.5240750979590855e-3 +13.399999999999993 1.602601363396337e-3 1.5481141373590711e-3 +13.399999999999993 1.602601363396337e-3 1.5577297531190653e-3 +13.699999999999992 1.392732355820269e-3 1.3955179084501257e-3 +13.699999999999992 1.392732355820269e-3 1.3551286675867985e-3 +13.699999999999992 1.392732355820269e-3 1.2980266374007156e-3 +13.999999999999993 1.2011660314775851e-3 1.2323969038730415e-3 +13.999999999999993 1.2011660314775851e-3 1.1783444080891363e-3 +13.999999999999993 1.2011660314775851e-3 1.2239887378622117e-3 +14.299999999999994 1.0381521121625098e-3 1.0859070853880093e-3 +14.299999999999994 1.0381521121625098e-3 1.0454191539060473e-3 +14.299999999999994 1.0381521121625098e-3 9.841681806384635e-4 +14.599999999999993 8.989266559699968e-4 8.665651479003774e-4 +14.599999999999993 8.989266559699968e-4 9.088146934930307e-4 +14.599999999999993 8.989266559699968e-4 8.189220432958961e-4 +14.899999999999991 7.79789817603333e-4 7.84468460235013e-4 +14.899999999999991 7.79789817603333e-4 8.179994182768675e-4 +14.899999999999991 7.79789817603333e-4 7.875876191226274e-4 +15.199999999999992 6.764405168941774e-4 6.135313770228103e-4 +15.199999999999992 6.764405168941774e-4 6.771167678057697e-4 +15.199999999999992 6.764405168941774e-4 6.9673353730264e-4 +15.499999999999993 5.839219194030841e-4 5.79250454583144e-4 +15.499999999999993 5.839219194030841e-4 5.827539855584453e-4 +15.499999999999993 5.839219194030841e-4 6.020234059226023e-4 +15.799999999999992 5.049190197991896e-4 5.089582714807656e-4 +15.799999999999992 5.049190197991896e-4 4.806828119540564e-4 +15.799999999999992 5.049190197991896e-4 4.8118773087417623e-4 +16.09999999999999 4.3733863083801914e-4 4.369012288339067e-4 +16.09999999999999 4.3733863083801914e-4 4.3602655169910405e-4 +16.09999999999999 4.3733863083801914e-4 4.264051032162753e-4 +16.39999999999999 3.7942896537169595e-4 3.9574447579792977e-4 +16.39999999999999 3.7942896537169595e-4 3.691844438651828e-4 +16.39999999999999 3.7942896537169595e-4 3.8663817913527364e-4 +16.699999999999992 3.287090342802393e-4 3.379129078786301e-4 +16.699999999999992 3.287090342802393e-4 3.3988516220476996e-4 +16.699999999999992 3.287090342802393e-4 3.306813086827839e-4 +16.999999999999993 2.839064245426713e-4 2.770926957400121e-4 +16.999999999999993 2.839064245426713e-4 3.0150865048759513e-4 +16.999999999999993 2.839064245426713e-4 2.9072020536657006e-4 +17.29999999999999 2.455927593376433e-4 2.384705857215798e-4 +17.29999999999999 2.455927593376433e-4 2.5639885838653895e-4 +17.29999999999999 2.455927593376433e-4 2.522237811905896e-4 +17.59999999999999 2.1277570578396985e-4 1.9745584106507e-4 +17.59999999999999 2.1277570578396985e-4 2.140523449476944e-4 +17.59999999999999 2.1277570578396985e-4 2.11286260967257e-4 +17.89999999999999 1.8462225664709928e-4 1.8536074727705247e-4 +17.89999999999999 1.8462225664709928e-4 1.8222216888690316e-4 +17.89999999999999 1.8462225664709928e-4 1.868377253430051e-4 +18.19999999999999 1.597658501117391e-4 1.6455882258961267e-4 +18.19999999999999 1.597658501117391e-4 1.6871273461614657e-4 +18.19999999999999 1.597658501117391e-4 1.5673029607806799e-4 +18.49999999999999 1.3805376639064915e-4 1.3266966289342732e-4 +18.49999999999999 1.3805376639064915e-4 1.421953722999273e-4 +18.49999999999999 1.3805376639064915e-4 1.38467920793036e-4 +18.79999999999999 1.1946336764483079e-4 1.1492375788264087e-4 +18.79999999999999 1.1946336764483079e-4 1.1277341729855819e-4 +18.79999999999999 1.1946336764483079e-4 1.250781439741424e-4 +19.09999999999999 1.0352241360319877e-4 9.907094846405867e-5 +19.09999999999999 1.0352241360319877e-4 1.0538581560753577e-4 +19.09999999999999 1.0352241360319877e-4 1.018660535931387e-4 +19.39999999999999 8.983346052166448e-5 8.749778785110284e-5 +19.39999999999999 8.983346052166448e-5 8.43536168297593e-5 +19.39999999999999 8.983346052166448e-5 9.180979382323111e-5 +19.69999999999999 7.766526701752384e-5 7.525764092226258e-5 +19.69999999999999 7.766526701752384e-5 7.618962409157853e-5 +19.69999999999999 7.766526701752384e-5 7.945156518418433e-5 +19.99999999999999 6.713708129260107e-5 7.04267998344131e-5 +19.99999999999999 6.713708129260107e-5 6.599575237104678e-5 +19.99999999999999 6.713708129260107e-5 6.693567152994266e-5 diff --git a/bloomfilter/fpr.gnuplot b/bloomfilter/fpr.gnuplot new file mode 100644 index 000000000..8449c8530 --- /dev/null +++ b/bloomfilter/fpr.gnuplot @@ -0,0 +1,20 @@ +set term png size 1800, 1200 +set output "fpr.png" +set title "Bloom filter false positive rates (FPR) vs bits per entry\nclassic and block-structured implementations" +# set subtitle "blah" + +set xlabel "Bits per entry" +set xrange [1:25] +set grid xtics +set xtics 0,2,24 + +set ylabel "False Positive Rate (FPR), log scale" +set yrange [1e-5:1] +set logscale y +set format y "10^{%L}" +set grid ytics + +plot "fpr.classic.gnuplot.data" using 1 : 3 title "Classic, actual FPR" with points pointtype 1 pointsize 2, \ + "fpr.classic.gnuplot.data" using 1 : 2 title "Classic, calculated FPR" with lines linewidth 2, \ + "fpr.blocked.gnuplot.data" using 1 : 3 title "Blocked, actual FPR" with points pointtype 1 pointsize 2, \ + "fpr.blocked.gnuplot.data" using 1 : 2 title "Blocked, calculated FPR" with lines linewidth 3 diff --git a/bloomfilter/fpr.png b/bloomfilter/fpr.png new file mode 100644 index 000000000..1edc1e9e2 Binary files /dev/null and b/bloomfilter/fpr.png differ diff --git a/bloomfilter/src/Data/BloomFilter.hs b/bloomfilter/src/Data/BloomFilter.hs index 587dd99cf..064a3349b 100644 --- a/bloomfilter/src/Data/BloomFilter.hs +++ b/bloomfilter/src/Data/BloomFilter.hs @@ -1,262 +1,5 @@ --- | --- --- A fast, space efficient Bloom filter implementation. A Bloom --- filter is a set-like data structure that provides a probabilistic --- membership test. --- --- * Queries do not give false negatives. When an element is added to --- a filter, a subsequent membership test will definitely return --- 'True'. --- --- * False positives /are/ possible. If an element has not been added --- to a filter, a membership test /may/ nevertheless indicate that --- the element is present. --- --- This module provides low-level control. For an easier to use --- interface, see the "Data.BloomFilter.Easy" module. - module Data.BloomFilter ( - -- * Overview - -- $overview - - -- ** Ease of use - -- $ease - - -- ** Performance - -- $performance - - -- ** Differences from bloomfilter package - -- $differences - - -- * Types - Hash, - Bloom, - MBloom, - Bloom', - MBloom', - CheapHashes, - RealHashes, - - -- * Immutable Bloom filters - - -- ** Conversion - freeze, - thaw, - unsafeFreeze, - - -- ** Creation - unfold, - - fromList, - empty, - singleton, - - -- ** Accessors - length, - elem, - elemHashes, - notElem, -) where - -import Control.Exception (assert) -import Control.Monad (forM_, liftM) -import Control.Monad.ST (ST, runST) -import Data.BloomFilter.Hash (CheapHashes, Hash, Hashable, - Hashes (..), RealHashes) -import Data.BloomFilter.Internal (Bloom' (..), bloomInvariant) -import Data.BloomFilter.Mutable (MBloom, MBloom', insert, new) -import qualified Data.BloomFilter.Mutable.Internal as MB -import Data.Word (Word64) - -import Prelude hiding (elem, length, notElem) - -import qualified Data.BloomFilter.BitVec64 as V - --- | Bloom filter using 'CheapHashes' hashing scheme. -type Bloom = Bloom' CheapHashes - --- | Create an immutable Bloom filter, using the given setup function --- which executes in the 'ST' monad. --- --- Example: --- --- @ --- TODO ---import "Data.BloomFilter.Hash" (cheapHashes) --- ---filter = create (cheapHashes 3) 1024 $ \mf -> do --- insertMB mf \"foo\" --- insertMB mf \"bar\" --- @ --- --- Note that the result of the setup function is not used. -create :: Int -- ^ number of hash functions to use - -> Word64 -- ^ number of bits in filter - -> (forall s. (MBloom' s h a -> ST s ())) -- ^ setup function - -> Bloom' h a -{-# INLINE create #-} -create hash numBits body = runST $ do - mb <- new hash numBits - body mb - unsafeFreeze mb - --- | Create an immutable Bloom filter from a mutable one. The mutable --- filter may be modified afterwards. -freeze :: MBloom' s h a -> ST s (Bloom' h a) -freeze mb = do - ba <- V.freeze (MB.bitArray mb) - let !bf = Bloom (MB.hashesN mb) (MB.size mb) ba - assert (bloomInvariant bf) $ pure bf - --- | Create an immutable Bloom filter from a mutable one. The mutable --- filter /must not/ be modified afterwards, or a runtime crash may --- occur. For a safer creation interface, use 'freeze' or 'create'. -unsafeFreeze :: MBloom' s h a -> ST s (Bloom' h a) -unsafeFreeze mb = do - ba <- V.unsafeFreeze (MB.bitArray mb) - let !bf = Bloom (MB.hashesN mb) (MB.size mb) ba - assert (bloomInvariant bf) $ pure bf - --- | Copy an immutable Bloom filter to create a mutable one. There is --- no non-copying equivalent. -thaw :: Bloom' h a -> ST s (MBloom' s h a) -thaw ub = MB.MBloom (hashesN ub) (size ub) `liftM` V.thaw (bitArray ub) - --- | Create an empty Bloom filter. -empty :: Int -- ^ number of hash functions to use - -> Word64 -- ^ number of bits in filter - -> Bloom' h a -{-# INLINE [1] empty #-} -empty hash numBits = create hash numBits (\_ -> return ()) - --- | Create a Bloom filter with a single element. -singleton :: (Hashes h, Hashable a) - => Int -- ^ number of hash functions to use - -> Word64 -- ^ number of bits in filter - -> a -- ^ element to insert - -> Bloom' h a -singleton hash numBits elt = create hash numBits (\mb -> insert mb elt) - --- | Query an immutable Bloom filter for membership. If the value is --- present, return @True@. If the value is not present, there is --- /still/ some possibility that @True@ will be returned. -elem :: (Hashes h, Hashable a) => a -> Bloom' h a -> Bool -elem elt ub = elemHashes (makeHashes elt) ub -{-# SPECIALISE elem :: Hashable a => a -> Bloom a -> Bool #-} - --- | Query an immutable Bloom filter for membership using already constructed 'Hashes' value. -elemHashes :: Hashes h => h a -> Bloom' h a -> Bool -elemHashes !ch !ub = go 0 where - go :: Int -> Bool - go !i | i >= hashesN ub - = True - go !i = let idx' :: Word64 - !idx' = evalHashes ch i in - let idx :: Int - !idx = fromIntegral (idx' `V.unsafeRemWord64` size ub) in - -- While the idx' can cover the full Word64 range, - -- after taking the remainder, it now must fit in - -- and Int because it's less than the filter size. - if V.unsafeIndex (bitArray ub) idx - then go (i + 1) - else False -{-# SPECIALISE elemHashes :: CheapHashes a -> Bloom a -> Bool #-} - --- | Query an immutable Bloom filter for non-membership. If the value --- /is/ present, return @False@. If the value is not present, there --- is /still/ some possibility that @False@ will be returned. -notElem :: (Hashes h, Hashable a) => a -> Bloom' h a -> Bool -notElem elt ub = notElemHashes (makeHashes elt) ub - --- | Query an immutable Bloom filter for non-membership using already constructed 'Hashes' value. -notElemHashes :: Hashes h => h a -> Bloom' h a -> Bool -notElemHashes !ch !ub = not (elemHashes ch ub) - --- | Return the size of an immutable Bloom filter, in bits. -length :: Bloom' h a -> Word64 -length = size - --- | Build an immutable Bloom filter from a seed value. The seeding --- function populates the filter as follows. --- --- * If it returns 'Nothing', it is finished producing values to --- insert into the filter. --- --- * If it returns @'Just' (a,b)@, @a@ is added to the filter and --- @b@ is used as a new seed. -unfold :: forall a b h. (Hashes h, Hashable a) - => Int -- ^ number of hash functions to use - -> Word64 -- ^ number of bits in filter - -> (b -> Maybe (a, b)) -- ^ seeding function - -> b -- ^ initial seed - -> Bloom' h a -{-# INLINE unfold #-} -unfold hs numBits f k = create hs numBits (loop k) - where loop :: forall s. b -> MBloom' s h a -> ST s () - loop j mb = case f j of - Just (a, j') -> insert mb a >> loop j' mb - _ -> return () - --- | Create an immutable Bloom filter, populating it from a list of --- values. --- --- Here is an example that uses the @cheapHashes@ function from the --- "Data.BloomFilter.Hash" module to create a hash function that --- returns three hashes. --- --- @ --- filt = fromList 3 1024 [\"foo\", \"bar\", \"quux\"] --- @ -fromList :: (Hashes h, Hashable a) - => Int -- ^ number of hash functions to use - -> Word64 -- ^ number of bits in filter - -> [a] -- ^ values to populate with - -> Bloom' h a -fromList hs numBits list = create hs numBits $ forM_ list . insert - --- $overview --- --- Each of the functions for creating Bloom filters accepts two parameters: --- --- * The number of bits that should be used for the filter. Note that --- a filter is fixed in size; it cannot be resized after creation. --- --- * A number of hash functions, /k/, to be used for the filter. --- --- By choosing these parameters with care, it is possible to tune for --- a particular false positive rate. --- The 'Data.BloomFilter.Easy.suggestSizing' function in --- the "Data.BloomFilter.Easy" module calculates useful estimates for --- these parameters. - --- $ease --- --- This module provides immutable interfaces for working with a --- query-only Bloom filter, and for converting to and from mutable --- Bloom filters. --- --- For a higher-level interface that is easy to use, see the --- "Data.BloomFilter.Easy" module. - --- $performance --- --- The implementation has been carefully tuned for high performance --- and low space consumption. + module Data.BloomFilter.Classic + ) where --- $differences --- --- This package is (almost entirely rewritten) fork of --- [bloomfilter](https://hackage.haskell.org/package/bloomfilter) package. --- --- The main differences are --- --- * This packages support bloomfilters of arbitrary sizes --- (not limited to powers of two). Also sizes over 2^32 are supported. --- --- * The 'Bloom' and 'MBloom' types are parametrised over 'Hashes' variable, --- instead of having a @a -> ['Hash']@ typed field. --- This separation allows clean de/serialization of Bloom filters in this --- package, as the hashing scheme is a static. --- --- * [XXH3 hash](https://xxhash.com/) is used instead of Jenkins' --- lookup3. +import Data.BloomFilter.Classic diff --git a/bloomfilter/src/Data/BloomFilter/BitVec64.hs b/bloomfilter/src/Data/BloomFilter/BitVec64.hs deleted file mode 100644 index 1160c9831..000000000 --- a/bloomfilter/src/Data/BloomFilter/BitVec64.hs +++ /dev/null @@ -1,121 +0,0 @@ -{-# LANGUAGE CPP #-} -{-# LANGUAGE MagicHash #-} -{-# LANGUAGE UnboxedTuples #-} --- | Minimal bit vector implementation. -module Data.BloomFilter.BitVec64 ( - BitVec64 (..), - unsafeIndex, - prefetchIndex, - MBitVec64 (..), - new, - unsafeWrite, - unsafeRead, - freeze, - unsafeFreeze, - thaw, - unsafeRemWord64, -) where - -import Control.Monad.ST (ST) -import Data.Bits -import Data.Primitive.ByteArray (ByteArray (ByteArray), - newPinnedByteArray, setByteArray) -import qualified Data.Vector.Primitive as VP -import qualified Data.Vector.Primitive.Mutable as VPM -import Data.Word (Word64, Word8) - -import GHC.Exts (Int (I#), prefetchByteArray0#, uncheckedIShiftRL#, - (+#)) -import qualified GHC.Exts -import GHC.ST (ST (ST)) -import GHC.Word (Word64 (W64#)) - - --- | Bit vector backed up by an array of Word64 --- --- This vector's offset and length are multiples of 64 -newtype BitVec64 = BV64 (VP.Vector Word64) - deriving (Eq, Show) - -{-# INLINE unsafeIndex #-} -unsafeIndex :: BitVec64 -> Int -> Bool -unsafeIndex (BV64 bv) i = - unsafeTestBit (VP.unsafeIndex bv j) k - where - !j = unsafeShiftR i 6 -- `div` 64, bit index to Word64 index. - !k = i .&. 63 -- `mod` 64, bit within Word64 - -{-# INLINE unsafeTestBit #-} --- like testBit but using unsafeShiftL instead of shiftL -unsafeTestBit :: Word64 -> Int -> Bool -unsafeTestBit w k = w .&. (1 `unsafeShiftL` k) /= 0 - -{-# INLINE prefetchIndex #-} -prefetchIndex :: BitVec64 -> Int -> ST s () -prefetchIndex (BV64 (VP.Vector (I# off#) _ (ByteArray ba#))) (I# i#) = - ST (\s -> case prefetchByteArray0# ba# (off# +# uncheckedIShiftRL# i# 3#) s of - s' -> (# s', () #)) - -- We only need to shiftR 3 here, not 6, because we're going from a bit - -- offset to a byte offset for prefetch. Whereas in unsafeIndex, we go from - -- a bit offset to a Word64 offset, so an extra shiftR 3, for 6 total. - -newtype MBitVec64 s = MBV64 (VP.MVector s Word64) - --- | Will create an explicitly pinned byte array if it is larger than 1 kB. --- This is done because pinned byte arrays allow for more efficient --- serialisation, but the definition of 'isByteArrayPinned' changed in GHC 9.6, --- see . --- --- TODO: remove this workaround once a solution exists, e.g. a new primop that --- allows checking for implicit pinning. -new :: Word64 -> ST s (MBitVec64 s) -new s - | numWords >= 128 = do - mba <- newPinnedByteArray numBytes - setByteArray mba 0 numBytes (0 :: Word8) - return (MBV64 (VP.MVector 0 numWords mba)) - | otherwise = - MBV64 <$> VPM.new numWords - where - !numWords = w2i (roundUpTo64 s) - !numBytes = unsafeShiftL numWords 3 -- * 8 - -unsafeWrite :: MBitVec64 s -> Word64 -> Bool -> ST s () -unsafeWrite (MBV64 mbv) i x = do - VPM.unsafeModify mbv (\w -> if x then setBit w (w2i k) else clearBit w (w2i k)) (w2i j) - where - !j = unsafeShiftR i 6 -- `div` 64 - !k = i .&. 63 -- `mod` 64 - -unsafeRead :: MBitVec64 s -> Word64 -> ST s Bool -unsafeRead (MBV64 mbv) i = do - !w <- VPM.unsafeRead mbv (w2i j) - return $! testBit w (w2i k) - where - !j = unsafeShiftR i 6 -- `div` 64 - !k = i .&. 63 -- `mod` 64 - -freeze :: MBitVec64 s -> ST s BitVec64 -freeze (MBV64 mbv) = BV64 <$> VP.freeze mbv - -unsafeFreeze :: MBitVec64 s -> ST s BitVec64 -unsafeFreeze (MBV64 mbv) = BV64 <$> VP.unsafeFreeze mbv - -thaw :: BitVec64 -> ST s (MBitVec64 s) -thaw (BV64 bv) = MBV64 <$> VP.thaw bv - --- this may overflow, but so be it (1^64 bits is a lot) -roundUpTo64 :: Word64 -> Word64 -roundUpTo64 i = unsafeShiftR (i + 63) 6 - --- | Like 'rem' but does not check for division by 0. -unsafeRemWord64 :: Word64 -> Word64 -> Word64 -#if MIN_VERSION_base(4,17,0) -unsafeRemWord64 (W64# x#) (W64# y#) = W64# (x# `GHC.Exts.remWord64#` y#) -#else -unsafeRemWord64 (W64# x#) (W64# y#) = W64# (x# `GHC.Exts.remWord#` y#) -#endif - -w2i :: Word64 -> Int -w2i = fromIntegral -{-# INLINE w2i #-} diff --git a/bloomfilter/src/Data/BloomFilter/Blocked.hs b/bloomfilter/src/Data/BloomFilter/Blocked.hs new file mode 100644 index 000000000..359efd6b2 --- /dev/null +++ b/bloomfilter/src/Data/BloomFilter/Blocked.hs @@ -0,0 +1,191 @@ +-- | +-- +-- A fast, space efficient Bloom filter implementation. A Bloom +-- filter is a set-like data structure that provides a probabilistic +-- membership test. +-- +-- * Queries do not give false negatives. When an element is added to +-- a filter, a subsequent membership test will definitely return +-- 'True'. +-- +-- * False positives /are/ possible. If an element has not been added +-- to a filter, a membership test /may/ nevertheless indicate that +-- the element is present. +-- + +module Data.BloomFilter.Blocked ( + -- * Types + Hash, + Hashable, + + -- * Immutable Bloom filters + Bloom, + + -- ** Creation + create, + unfold, + fromList, + + -- ** (De)Serialisation + formatVersion, + serialise, + deserialise, + + -- ** Sizes + NumEntries, + BloomSize (..), + FPR, + sizeForFPR, + BitsPerEntry, + sizeForBits, + sizeForPolicy, + BloomPolicy (..), + policyFPR, + policyForFPR, + policyForBits, + + -- ** Accessors + size, + elem, + notElem, + (?), + + -- * Mutable Bloom filters + MBloom, + new, + insert, + + -- ** Conversion + freeze, + thaw, + unsafeFreeze, + + -- * Low level variants + Hashes, + hashes, + insertHashes, + elemHashes, + -- ** Prefetching + prefetchInsert, + prefetchElem, +) where + +import Control.Monad.Primitive (PrimMonad, PrimState, RealWorld, + stToPrim) +import Control.Monad.ST (ST, runST) +import Data.Primitive.ByteArray (MutableByteArray) + +import Data.BloomFilter.Blocked.Calc +import Data.BloomFilter.Blocked.Internal hiding (deserialise) +import qualified Data.BloomFilter.Blocked.Internal as Internal +import Data.BloomFilter.Hash + +import Prelude hiding (elem, notElem) + +-- | Create an immutable Bloom filter, using the given setup function +-- which executes in the 'ST' monad. +-- +-- Example: +-- +-- @ +--filter = create (sizeForBits 16 2) $ \mf -> do +-- insert mf \"foo\" +-- insert mf \"bar\" +-- @ +-- +-- Note that the result of the setup function is not used. +create :: BloomSize + -> (forall s. (MBloom s a -> ST s ())) -- ^ setup function + -> Bloom a +{-# INLINE create #-} +create bloomsize body = + runST $ do + mb <- new bloomsize + body mb + unsafeFreeze mb + +{-# INLINEABLE insert #-} +-- | Insert a value into a mutable Bloom filter. Afterwards, a +-- membership query for the same value is guaranteed to return @True@. +insert :: Hashable a => MBloom s a -> a -> ST s () +insert = \ !mb !x -> insertHashes mb (hashes x) + +{-# INLINE elem #-} +-- | Query an immutable Bloom filter for membership. If the value is +-- present, return @True@. If the value is not present, there is +-- /still/ some possibility that @True@ will be returned. +elem :: Hashable a => a -> Bloom a -> Bool +elem = \ !x !b -> elemHashes b (hashes x) + +-- | Same as 'elem' but with the opposite argument order: +-- +-- > x `elem` bfilter +-- +-- versus +-- +-- > bfilter ? x +-- +(?) :: Hashable a => Bloom a -> a -> Bool +(?) = flip elem + +{-# INLINE notElem #-} +-- | Query an immutable Bloom filter for non-membership. If the value +-- /is/ present, return @False@. If the value is not present, there +-- is /still/ some possibility that @False@ will be returned. +notElem :: Hashable a => a -> Bloom a -> Bool +notElem = \x b -> not (x `elem` b) + +-- | Build an immutable Bloom filter from a seed value. The seeding +-- function populates the filter as follows. +-- +-- * If it returns 'Nothing', it is finished producing values to +-- insert into the filter. +-- +-- * If it returns @'Just' (a,b)@, @a@ is added to the filter and +-- @b@ is used as a new seed. +unfold :: forall a b. + Hashable a + => BloomSize + -> (b -> Maybe (a, b)) -- ^ seeding function + -> b -- ^ initial seed + -> Bloom a +{-# INLINE unfold #-} +unfold bloomsize f k = + create bloomsize body + where + body :: forall s. MBloom s a -> ST s () + body mb = loop k + where + loop :: b -> ST s () + loop !j = case f j of + Nothing -> return () + Just (a, j') -> insert mb a >> loop j' + +-- | Create a Bloom filter, populating it from a sequence of values. +-- +-- For example +-- +-- @ +-- filt = fromList (policyForBits 10) [\"foo\", \"bar\", \"quux\"] +-- @ +fromList :: (Foldable t, Hashable a) + => BloomPolicy + -> t a -- ^ values to populate with + -> Bloom a +fromList policy xs = + create bsize (\b -> mapM_ (insert b) xs) + where + bsize = sizeForPolicy policy (length xs) + +{-# SPECIALISE deserialise :: BloomSize + -> (MutableByteArray RealWorld -> Int -> Int -> IO ()) + -> IO (Bloom a) #-} +deserialise :: PrimMonad m + => BloomSize + -> (MutableByteArray (PrimState m) -> Int -> Int -> m ()) + -> m (Bloom a) +deserialise bloomsize fill = do + mbloom <- stToPrim $ new bloomsize + Internal.deserialise mbloom fill + stToPrim $ unsafeFreeze mbloom + diff --git a/bloomfilter/src/Data/BloomFilter/Blocked/BitArray.hs b/bloomfilter/src/Data/BloomFilter/Blocked/BitArray.hs new file mode 100644 index 000000000..d9c4e58ec --- /dev/null +++ b/bloomfilter/src/Data/BloomFilter/Blocked/BitArray.hs @@ -0,0 +1,185 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} +-- | Blocked bit array implementation. This uses blocks of 64 bytes, aligned +-- to 64byte boundaries to match typical cache line sizes. This means that +-- multiple accesses to the same block only require a single cache line load +-- or store. +module Data.BloomFilter.Blocked.BitArray ( + bitsToBlocks, + blocksToBits, + BlockIx (..), + BitIx (..), + BitArray (..), + unsafeIndex, + prefetchIndex, + MBitArray (..), + new, + unsafeSet, + prefetchSet, + freeze, + unsafeFreeze, + thaw, + serialise, + deserialise, +) where + +import Control.Exception (assert) +import Control.Monad.Primitive (PrimMonad, PrimState) +import Control.Monad.ST (ST) +import Data.Bits +import Data.Primitive.ByteArray +import Data.Primitive.PrimArray +import Data.Word (Word64, Word8) + +import GHC.Exts (Int (I#), prefetchByteArray0#, + prefetchMutableByteArray3#) +import GHC.ST (ST (ST)) + +-- | An array of blocks of bits. +-- +-- Each block is 512 bits (64 bytes large), corresponding to a cache line on +-- most current architectures. +-- +-- It is represented by an array of 'Word64'. This array is aligned to 64 bytes +-- so that multiple accesses within a single block will use only one cache line. +-- +newtype BitArray = BitArray (PrimArray Word64) + deriving stock (Eq, Show) + +-- | The number of 512-bit blocks for the given number of bits. This rounds +-- up to the nearest multiple of 512. +bitsToBlocks :: Int -> Int +bitsToBlocks n = (n+511) `div` 512 -- rounded up + +blocksToBits :: Int -> Int +blocksToBits n = n * 512 + +newtype BlockIx = BlockIx Word +newtype BitIx = BitIx Int + +{-# INLINE unsafeIndex #-} +unsafeIndex :: BitArray -> BlockIx -> BitIx -> Bool +unsafeIndex (BitArray arr) blockIx blockBitIx = + assert (wordIx >= 0 && wordIx < sizeofPrimArray arr) $ + indexPrimArray arr wordIx `unsafeTestBit` wordBitIx + where + (wordIx, wordBitIx) = wordAndBitIndex blockIx blockBitIx + +{-# INLINE prefetchIndex #-} +prefetchIndex :: BitArray -> BlockIx -> ST s () +prefetchIndex (BitArray (PrimArray ba#)) (BlockIx blockIx) = + -- For reading, we want to prefetch such that we do least disturbence of + -- the caches. We will typically not keep this cache line longer than one + -- read. + let !i@(I# i#) = fromIntegral blockIx `shiftL` 6 in + -- blockIx * 64 to go from block index to the byte offset of the beginning + -- of the block. This offset is in bytes, not words. + + assert (i >= 0 && i <= sizeofByteArray (ByteArray ba#)) $ + + ST (\s -> case prefetchByteArray0# ba# i# s of + s' -> (# s', () #)) + +newtype MBitArray s = MBitArray (MutablePrimArray s Word64) + +-- | We create an explicitly pinned byte array, aligned to 64 bytes. +-- +new :: Int -> ST s (MBitArray s) +new numBlocks = do + mba@(MutableByteArray mba#) <- newAlignedPinnedByteArray numBytes 64 + setByteArray mba 0 numBytes (0 :: Word8) + return (MBitArray (MutablePrimArray mba#)) + where + !numBytes = numBlocks * 64 + +serialise :: BitArray -> (ByteArray, Int, Int) +serialise bitArray = + let ba = asByteArray bitArray + in (ba, 0, sizeofByteArray ba) + where + asByteArray (BitArray (PrimArray ba#)) = ByteArray ba# + +-- | Do an inplace overwrite of the byte array representing the bit block. +deserialise :: PrimMonad m + => MBitArray (PrimState m) + -> (MutableByteArray (PrimState m) -> Int -> Int -> m ()) + -> m () +deserialise bitArray fill = do + let mba = asMutableByteArray bitArray + len <- getSizeofMutableByteArray mba + fill mba 0 len + where + asMutableByteArray (MBitArray (MutablePrimArray mba#)) = + MutableByteArray mba# + +unsafeSet :: MBitArray s -> BlockIx -> BitIx -> ST s () +unsafeSet (MBitArray arr) blockIx blockBitIx = do +#ifdef NO_IGNORE_ASSERTS + sz <- getSizeofMutablePrimArray arr + assert (wordIx >= 0 && wordIx <= sz) $ return () +#endif + w <- readPrimArray arr wordIx + writePrimArray arr wordIx (unsafeSetBit w wordBitIx) + where + (wordIx, wordBitIx) = wordAndBitIndex blockIx blockBitIx + +{-# INLINE prefetchSet #-} +prefetchSet :: MBitArray s -> BlockIx -> ST s () +prefetchSet (MBitArray (MutablePrimArray mba#)) (BlockIx blockIx) = do + -- For setting, we will do several writes to the same cache line, so + -- read it into all 3 levels of cache. + let !(I# i#) = fromIntegral blockIx `shiftL` 6 + -- blockIx * 64 to go from block index to the byte offset of the beginning + -- of the block. This offset is in bytes, not words. + +#ifdef NO_IGNORE_ASSERTS + sz <- getSizeofMutableByteArray (MutableByteArray mba#) + assert (let i = I# i# in i >= 0 && i <= sz) $ return () +#endif + + ST (\s -> case prefetchMutableByteArray3# mba# i# s of + s' -> (# s', () #)) + +freeze :: MBitArray s -> ST s BitArray +freeze (MBitArray arr) = do + len <- getSizeofMutablePrimArray arr + BitArray <$> freezePrimArray arr 0 len + +unsafeFreeze :: MBitArray s -> ST s BitArray +unsafeFreeze (MBitArray arr) = + BitArray <$> unsafeFreezePrimArray arr + +thaw :: BitArray -> ST s (MBitArray s) +thaw (BitArray arr) = + MBitArray <$> thawPrimArray arr 0 (sizeofPrimArray arr) + +{-# INLINE wordAndBitIndex #-} +-- | Given the index of the 512 bit block, and the index of the bit within the +-- block, compute the index of the word in the array, and index of the bit +-- within the word. +-- +wordAndBitIndex :: BlockIx -> BitIx -> (Int, Int) +wordAndBitIndex (BlockIx blockIx) (BitIx blockBitIx) = + assert (blockBitIx < 512) $ + (wordIx, wordBitIx) + where + -- Select the Word64 in the underlying array based on the block index + -- and the bit index. + -- * There are 8 Word64s in each 64byte block. + -- * Use 3 bits (bits 6..8) to select the Word64 within the block + wordIx = fromIntegral blockIx `shiftL` 3 -- * 8 + + (blockBitIx `shiftR` 6) .&. 7 -- `div` 64, `mod` 8 + + -- Bits 0..5 of blockBitIx select the bit within Word64 + wordBitIx = blockBitIx .&. 63 -- `mod` 64 + +{-# INLINE unsafeTestBit #-} +-- like testBit but using unsafeShiftL instead of shiftL +unsafeTestBit :: Word64 -> Int -> Bool +unsafeTestBit w k = w .&. (1 `unsafeShiftL` k) /= 0 + +{-# INLINE unsafeSetBit #-} +-- like setBit but using unsafeShiftL instead of shiftL +unsafeSetBit :: Word64 -> Int -> Word64 +unsafeSetBit w k = w .|. (1 `unsafeShiftL` k) diff --git a/bloomfilter/src/Data/BloomFilter/Blocked/Calc.hs b/bloomfilter/src/Data/BloomFilter/Blocked/Calc.hs new file mode 100644 index 000000000..4e2720013 --- /dev/null +++ b/bloomfilter/src/Data/BloomFilter/Blocked/Calc.hs @@ -0,0 +1,98 @@ +-- | Various formulas for working with bloomfilters. +module Data.BloomFilter.Blocked.Calc ( + NumEntries, + BloomSize (..), + FPR, + sizeForFPR, + BitsPerEntry, + sizeForBits, + sizeForPolicy, + BloomPolicy (..), + policyFPR, + policyForFPR, + policyForBits, +) where + +import Data.BloomFilter.Classic.Calc (BitsPerEntry, BloomPolicy (..), + BloomSize (..), FPR, NumEntries) + +policyForFPR :: FPR -> BloomPolicy +policyForFPR fpr | fpr <= 0 || fpr >= 1 = + error "bloomPolicyForFPR: fpr out of range (0,1)" + +policyForFPR fpr = + BloomPolicy { + policyBits = c, + policyHashes = k + } + where + k :: Int + k = max 1 (round (recip_log2 * log_fpr)) + c = log_fpr * log_fpr * f2 + + log_fpr * f1 + + f0 + log_fpr = negate (log fpr) + + -- These parameters are from a (quadratic) linear regression in log space + -- of samples of the actual FPR between 1 and 20 bits. This is with log FPR + -- as the indepedent variable and bits as the depedent variable. + f2,f1,f0 :: Double + f2 = 8.035531421107756e-2 + f1 = 1.653017726702572 + f0 = 0.5343568065075601 +{- +Regression, FPR indepedent, bits depedent: +Fit {fitParams = V3 8.035531421107756e-2 1.653017726702572 0.5343568065075601, fitErrors = V3 7.602655075308541e-4 8.422591688796256e-3 2.0396917012822195e-2, fitNDF = 996, fitWSSR = 18.362899348627252} +Fit {fitParams = V3 8.079418894776325e-2 1.6462569292513933 0.5550062950289885, fitErrors = V3 7.713375250014809e-4 8.542261871094414e-3 2.0678969159415226e-2, fitNDF = 996, fitWSSR = 19.00125036371992} +-} + +policyForBits :: BitsPerEntry -> BloomPolicy +policyForBits c | c < 0 || c > 64 = + error "policyForBits: out of ragnge [0,64]" + +policyForBits c = + BloomPolicy { + policyBits = c, + policyHashes = k + } + where + k = max 1 (round (c * log2)) + +policyFPR :: BloomPolicy -> FPR +policyFPR BloomPolicy { + policyBits = c + } = + exp (negate (c*c*f2 + c*f1 + f0)) + where + -- These parameters are from a (quadratic) linear regression in log space + -- of samples of the actual FPR between 2 and 24 bits. This is with bits as + -- the indepedent variable and log FPR as the depedent variable. + f2,f1,f0 :: Double + f2 = -4.990533525011442e-3 + f1 = 0.5236326626983274 + f0 = -9.08567744857578e-2 +{- +Regression, bits indepedent, FPR depedent: +Fit {fitParams = V3 (-4.990533525011442e-3) 0.5236326626983274 (-9.08567744857578e-2), fitErrors = V3 3.2672398863476205e-5 8.69874829861453e-4 4.98365450607998e-3, fitNDF = 996, fitWSSR = 1.4326826384055948} +Fit {fitParams = V3 (-5.03623760876204e-3) 0.5251544487138062 (-0.10110451821280719), fitErrors = V3 3.344945010267228e-5 8.905631581753235e-4 5.102181306816477e-3, fitNDF = 996, fitWSSR = 1.5016403117905384} +-} + +sizeForFPR :: FPR -> NumEntries -> BloomSize +sizeForFPR = sizeForPolicy . policyForFPR + +sizeForBits :: BitsPerEntry -> NumEntries -> BloomSize +sizeForBits = sizeForPolicy . policyForBits + +sizeForPolicy :: BloomPolicy -> NumEntries -> BloomSize +sizeForPolicy BloomPolicy { + policyBits = c, + policyHashes = k + } n = + BloomSize { + sizeBits = max 1 (ceiling (fromIntegral n * c)), + sizeHashes = max 1 k + } + +log2, recip_log2 :: Double +log2 = log 2 +recip_log2 = recip log2 diff --git a/bloomfilter/src/Data/BloomFilter/Blocked/Internal.hs b/bloomfilter/src/Data/BloomFilter/Blocked/Internal.hs new file mode 100644 index 000000000..ed12968a2 --- /dev/null +++ b/bloomfilter/src/Data/BloomFilter/Blocked/Internal.hs @@ -0,0 +1,330 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} +{-# OPTIONS_HADDOCK not-home #-} + +-- | This module defines the 'Bloom' and 'MBloom' types and all the functions +-- that need direct knowledge of and access to the representation. This forms +-- the trusted base. +module Data.BloomFilter.Blocked.Internal ( + -- * Mutable Bloom filters + MBloom, + new, + + -- * Immutable Bloom filters + Bloom, + bloomInvariant, + size, + + -- * Hash-based operations + Hashes, + hashes, + insertHashes, + prefetchInsert, + elemHashes, + prefetchElem, + + -- * Conversion + freeze, + unsafeFreeze, + thaw, + + -- * (De)Serialisation + formatVersion, + serialise, + deserialise, + ) where + +import Control.DeepSeq (NFData (..)) +import Control.Exception (assert) +import Control.Monad.Primitive (PrimMonad, PrimState) +import Control.Monad.ST (ST) +import Data.Bits +import Data.Kind (Type) +import Data.Primitive.ByteArray +import Data.Primitive.PrimArray +import Data.Primitive.Types (Prim (..)) + +import Data.BloomFilter.Blocked.BitArray (BitArray, BitIx (..), + BlockIx (..), MBitArray, bitsToBlocks, blocksToBits) +import qualified Data.BloomFilter.Blocked.BitArray as BitArray +import Data.BloomFilter.Classic.Calc +import Data.BloomFilter.Hash + +-- | The version of the format used by 'serialise' and 'deserialise'. The +-- format number will change when there is an incompatible change in the +-- library, such that deserialising and using the filter will not work. +-- This can include more than just changes to the serialised format, for +-- example changes to hash functions or how the hash is mapped to bits. +-- +-- Note that the format produced does not include this version. Version +-- checking is the responsibility of the user of the library. +-- +formatVersion :: Int +formatVersion = 1000 + +------------------------------------------------------------------------------- +-- Mutable Bloom filters +-- + +type MBloom :: Type -> Type -> Type +-- | A mutable Bloom filter, for use within the 'ST' monad. +data MBloom s a = MBloom { + mbNumBlocks :: {-# UNPACK #-} !Int -- ^ non-zero + , mbNumHashes :: {-# UNPACK #-} !Int + , mbBitArray :: {-# UNPACK #-} !(MBitArray s) + } +type role MBloom nominal nominal + +instance Show (MBloom s a) where + show mb = "MBloom { " ++ show numBits ++ " bits } " + where + numBits = blocksToBits (mbNumBlocks mb) + +instance NFData (MBloom s a) where + rnf !_ = () + +-- | Create a new mutable Bloom filter. +-- +-- The maximum size is $2^41$ bits (256 Gbytes). Tell us if you need bigger +-- bloom filters. +-- +new :: BloomSize -> ST s (MBloom s a) +new BloomSize { sizeBits, sizeHashes } = do + let numBlocks :: Int + numBlocks = max 1 (bitsToBlocks sizeBits) + .&. 0xffff_ffff + mbBitArray <- BitArray.new numBlocks + pure MBloom { + mbNumBlocks = numBlocks, + mbNumHashes = max 1 sizeHashes, + mbBitArray + } + +{-# NOINLINE insertHashes #-} +insertHashes :: forall s a. MBloom s a -> Hashes a -> ST s () +insertHashes MBloom { mbNumBlocks, mbNumHashes, mbBitArray } !h = + go g0 mbNumHashes + where + blockIx :: BlockIx + (!blockIx, !g0) = blockIxAndBitGen h mbNumBlocks + + go :: BitIxGen -> Int -> ST s () + go !_ 0 = return () + go !g !i = do + let blockBitIx :: BitIx + (!blockBitIx, !g') = genBitIndex g + assert (let BlockIx b = blockIx + in b >= 0 && b < fromIntegral mbNumBlocks) $ + BitArray.unsafeSet mbBitArray blockIx blockBitIx + go g' (i-1) + +prefetchInsert :: MBloom s a -> Hashes a -> ST s () +prefetchInsert MBloom { mbNumBlocks, mbBitArray } !h = + BitArray.prefetchSet mbBitArray blockIx + where + blockIx :: BlockIx + (!blockIx, _) = blockIxAndBitGen h mbNumBlocks + +-- | Overwrite the filter's bit array. Use 'new' to create a filter of the +-- expected size and then use this function to fill in the bit data. +-- +-- The callback is expected to read (exactly) the given number of bytes into +-- the given byte array buffer. +-- +-- See also 'formatVersion' for compatibility advice. +-- +deserialise :: PrimMonad m + => MBloom (PrimState m) a + -> (MutableByteArray (PrimState m) -> Int -> Int -> m ()) + -> m () +deserialise MBloom {mbBitArray} fill = + BitArray.deserialise mbBitArray fill + + +------------------------------------------------------------------------------- +-- Immutable Bloom filters +-- + +type Bloom :: Type -> Type +-- | An immutable Bloom filter. +data Bloom a = Bloom { + numBlocks :: {-# UNPACK #-} !Int -- ^ non-zero + , numHashes :: {-# UNPACK #-} !Int + , bitArray :: {-# UNPACK #-} !BitArray + } + deriving stock Eq +type role Bloom nominal + +bloomInvariant :: Bloom a -> Bool +bloomInvariant Bloom { numBlocks, bitArray = BitArray.BitArray pa } = + numBlocks * 8 == sizeofPrimArray pa + +instance Show (Bloom a) where + show mb = "Bloom { " ++ show numBits ++ " bits } " + where + numBits = blocksToBits (numBlocks mb) + +instance NFData (Bloom a) where + rnf !_ = () + +-- | Return the size of the Bloom filter. +size :: Bloom a -> BloomSize +size Bloom { numBlocks, numHashes } = + BloomSize { + sizeBits = blocksToBits numBlocks, + sizeHashes = numHashes + } + +-- | Query an immutable Bloom filter for membership using already constructed +-- 'Hash' value. +elemHashes :: Bloom a -> Hashes a -> Bool +elemHashes Bloom { numBlocks, numHashes, bitArray } !h = + go g0 numHashes + where + blockIx :: BlockIx + (!blockIx, !g0) = blockIxAndBitGen h numBlocks + + go :: BitIxGen -> Int -> Bool + go !_ 0 = True + go !g !i + | let blockBitIx :: BitIx + (!blockBitIx, !g') = genBitIndex g + , assert (let BlockIx b = blockIx + in b >= 0 && b < fromIntegral numBlocks) $ + BitArray.unsafeIndex bitArray blockIx blockBitIx + = go g' (i-1) + + | otherwise = False + +prefetchElem :: Bloom a -> Hashes a -> ST s () +prefetchElem Bloom { numBlocks, bitArray } !h = + BitArray.prefetchIndex bitArray blockIx + where + blockIx :: BlockIx + (!blockIx, _) = blockIxAndBitGen h numBlocks + +-- | Serialise the bloom filter to a 'BloomSize' (which is needed to +-- deserialise) and a 'ByteArray' along with the offset and length containing +-- the filter's bit data. +-- +-- See also 'formatVersion' for compatibility advice. +-- +serialise :: Bloom a -> (BloomSize, ByteArray, Int, Int) +serialise b@Bloom{bitArray} = + (size b, ba, off, len) + where + (ba, off, len) = BitArray.serialise bitArray + + +------------------------------------------------------------------------------- +-- Conversions between mutable and immutable Bloom filters +-- + +-- | Create an immutable Bloom filter from a mutable one. The mutable +-- filter may be modified afterwards. +freeze :: MBloom s a -> ST s (Bloom a) +freeze MBloom { mbNumBlocks, mbNumHashes, mbBitArray } = do + bitArray <- BitArray.freeze mbBitArray + let !bf = Bloom { + numBlocks = mbNumBlocks, + numHashes = mbNumHashes, + bitArray + } + assert (bloomInvariant bf) $ pure bf + +-- | Create an immutable Bloom filter from a mutable one without copying. The +-- mutable filter /must not/ be modified afterwards. For a safer creation +-- interface, use 'freeze' or 'create'. +unsafeFreeze :: MBloom s a -> ST s (Bloom a) +unsafeFreeze MBloom { mbNumBlocks, mbNumHashes, mbBitArray } = do + bitArray <- BitArray.unsafeFreeze mbBitArray + let !bf = Bloom { + numBlocks = mbNumBlocks, + numHashes = mbNumHashes, + bitArray + } + assert (bloomInvariant bf) $ pure bf + +-- | Copy an immutable Bloom filter to create a mutable one. There is +-- no non-copying equivalent. +thaw :: Bloom a -> ST s (MBloom s a) +thaw Bloom { numBlocks, numHashes, bitArray } = do + mbBitArray <- BitArray.thaw bitArray + pure MBloom { + mbNumBlocks = numBlocks, + mbNumHashes = numHashes, + mbBitArray + } + + +------------------------------------------------------------------------------- +-- Low level utils +-- + +{-# INLINE reduceRange32 #-} +-- | Given a word sampled uniformly from the full 'Word32' range, such as a +-- hash, reduce it fairly to a value in the range @[0,n)@. +-- +-- See +-- +reduceRange32 :: Word -- ^ Sample from 0..2^32-1 + -> Word -- ^ upper bound of range [0,n) + -> Word -- ^ result within range +reduceRange32 x n = + assert (n > 0) $ + let w :: Word + w = x * n + in w `shiftR` 32 + +------------------------------------------------------------------------------- +-- Hashes +-- + +-- | A small family of hashes, for probing bits in a (blocked) bloom filter. +-- +newtype Hashes a = Hashes Hash + deriving stock Show + deriving newtype Prim +type role Hashes nominal + +{-# INLINE hashes #-} +hashes :: Hashable a => a -> Hashes a +hashes = Hashes . hash64 + +{-# INLINE blockIxAndBitGen #-} +-- | The scheme for turning 'Hashes' into block and bit indexes is as follows: +-- the high 32bits of the 64bit hash select the block of bits, while the low +-- 32bits are used with a simpler PRNG to produce a sequence of probe points +-- withi the selected 512bit block. +-- +blockIxAndBitGen :: Hashes a -> Int -> (BlockIx, BitIxGen) +blockIxAndBitGen (Hashes w64) numBlocks = + assert (numBlocks > 0) $ + (blockIx, bitGen) + where + blockIx = BlockIx (high32 `reduceRange32` fromIntegral numBlocks) + bitGen = BitIxGen low32 + + high32, low32 :: Word + high32 = fromIntegral (w64 `shiftR` 32) + low32 = fromIntegral w64 .&. 0xffff_ffff + +newtype BitIxGen = BitIxGen Word + +{-# INLINE genBitIndex #-} +-- | Generate the next in a (short) short sequence of pseudo-random 9-bit +-- values. This is used for selecting the probe bit within the 512 bit block. +-- +-- This simple generator works by multiplying a 32bit value by the golden ratio +-- (as a fraction of a 32bit value). This is only suitable for short sequences +-- using the top few bits each time. +genBitIndex :: BitIxGen -> (BitIx, BitIxGen) +genBitIndex (BitIxGen h) = + (BitIx i, BitIxGen h') + where + i :: Int + i = fromIntegral (h `shiftR` (32-9)) -- top 9 bits + + h' :: Word + h' = (h * 0x9e37_79b9) .&. 0xffff_ffff -- keep least significant 32 bits diff --git a/bloomfilter/src/Data/BloomFilter/Calc.hs b/bloomfilter/src/Data/BloomFilter/Calc.hs deleted file mode 100644 index 814f49889..000000000 --- a/bloomfilter/src/Data/BloomFilter/Calc.hs +++ /dev/null @@ -1,38 +0,0 @@ --- | Various formulas for working with bloomfilters. -module Data.BloomFilter.Calc ( - falsePositiveProb, - filterSize, -) where - -import Numeric (expm1) - --- $setup --- >>> import Numeric (showFFloat) - --- | Approximate probability of false positives --- \[ --- {\displaystyle \varepsilon =\left(1-\left[1-{\frac {1}{m}}\right]^{kn}\right)^{k}\approx \left(1-e^{-kn/m}\right)^{k}} --- \] --- --- >>> [ showFFloat (Just 5) (falsePositiveProb 10_000 100_000 k) "" | k <- [1..5] ] --- ["0.09516","0.03286","0.01741","0.01181","0.00943"] --- -falsePositiveProb :: - Double -- ^ /n/, number of elements - -> Double -- ^ /m/, size of bloom filter - -> Double -- ^ /k/, number of hash functions - -> Double -falsePositiveProb n m k = - -- (1 - (1 - recip m) ** (k * n)) ** k - negate (expm1 (negate (k * n / m))) ** k - --- | Filter size for given number of elements, false positive rate and --- number of hash functions. -filterSize :: - Double -- ^ /n/, number of elements - -> Double -- ^ /e/, false positive rate - -> Double -- ^ /k/, number of hash functions - -> Double -filterSize n e k = - -- recip (1 - (1 - e ** recip k) ** recip (k * n)) - negate k * n / log (1 - e ** recip k) diff --git a/bloomfilter/src/Data/BloomFilter/Classic.hs b/bloomfilter/src/Data/BloomFilter/Classic.hs new file mode 100644 index 000000000..000cf204e --- /dev/null +++ b/bloomfilter/src/Data/BloomFilter/Classic.hs @@ -0,0 +1,259 @@ +-- | +-- +-- A fast, space efficient Bloom filter implementation. A Bloom +-- filter is a set-like data structure that provides a probabilistic +-- membership test. +-- +-- * Queries do not give false negatives. When an element is added to +-- a filter, a subsequent membership test will definitely return +-- 'True'. +-- +-- * False positives /are/ possible. If an element has not been added +-- to a filter, a membership test /may/ nevertheless indicate that +-- the element is present. +-- +-- This module provides low-level control. For an easier to use +-- interface, see the "Data.BloomFilter.Classic.Easy" module. + +module Data.BloomFilter.Classic ( + -- * Overview + -- $overview + + -- ** Example: a spell checker + -- $example + + -- ** Differences from bloomfilter package + -- $differences + + -- * Types + Hash, + Hashable, + + -- * Immutable Bloom filters + Bloom, + + -- ** Creation + create, + unfold, + fromList, + + -- ** (De)Serialisation + formatVersion, + serialise, + deserialise, + + -- ** Sizes + NumEntries, + BloomSize (..), + FPR, + sizeForFPR, + BitsPerEntry, + sizeForBits, + sizeForPolicy, + BloomPolicy (..), + policyFPR, + policyForFPR, + policyForBits, + + -- ** Accessors + size, + elem, + notElem, + (?), + + -- * Mutable Bloom filters + MBloom, + new, + insert, + + -- ** Conversion + freeze, + thaw, + unsafeFreeze, + + -- * Low level variants + Hashes, + hashes, + insertHashes, + elemHashes, +) where + +import Control.Monad.Primitive (PrimMonad, PrimState, RealWorld, + stToPrim) +import Control.Monad.ST (ST, runST) +import Data.Primitive.ByteArray (MutableByteArray) + +import Data.BloomFilter.Classic.Calc +import Data.BloomFilter.Classic.Internal hiding (deserialise) +import qualified Data.BloomFilter.Classic.Internal as Internal +import Data.BloomFilter.Hash + +import Prelude hiding (elem, notElem) + +-- | Create an immutable Bloom filter, using the given setup function +-- which executes in the 'ST' monad. +-- +-- Example: +-- +-- @ +--filter = create (sizeForBits 16 2) $ \mf -> do +-- insert mf \"foo\" +-- insert mf \"bar\" +-- @ +-- +-- Note that the result of the setup function is not used. +create :: BloomSize + -> (forall s. (MBloom s a -> ST s ())) -- ^ setup function + -> Bloom a +{-# INLINE create #-} +create bloomsize body = + runST $ do + mb <- new bloomsize + body mb + unsafeFreeze mb + +-- | Insert a value into a mutable Bloom filter. Afterwards, a +-- membership query for the same value is guaranteed to return @True@. +insert :: Hashable a => MBloom s a -> a -> ST s () +insert !mb !x = insertHashes mb (hashes x) + +-- | Query an immutable Bloom filter for membership. If the value is +-- present, return @True@. If the value is not present, there is +-- /still/ some possibility that @True@ will be returned. +elem :: Hashable a => a -> Bloom a -> Bool +elem = \ !x !b -> elemHashes b (hashes x) + +-- | Same as 'elem' but with the opposite argument order: +-- +-- > x `elem` bfilter +-- +-- versus +-- +-- > bfilter ? x +-- +(?) :: Hashable a => Bloom a -> a -> Bool +(?) = flip elem + +-- | Query an immutable Bloom filter for non-membership. If the value +-- /is/ present, return @False@. If the value is not present, there +-- is /still/ some possibility that @False@ will be returned. +notElem :: Hashable a => a -> Bloom a -> Bool +notElem = \ x b -> not (x `elem` b) + +-- | Build an immutable Bloom filter from a seed value. The seeding +-- function populates the filter as follows. +-- +-- * If it returns 'Nothing', it is finished producing values to +-- insert into the filter. +-- +-- * If it returns @'Just' (a,b)@, @a@ is added to the filter and +-- @b@ is used as a new seed. +unfold :: forall a b. + Hashable a + => BloomSize + -> (b -> Maybe (a, b)) -- ^ seeding function + -> b -- ^ initial seed + -> Bloom a +{-# INLINE unfold #-} +unfold bloomsize f k = + create bloomsize body + where + body :: forall s. MBloom s a -> ST s () + body mb = loop k + where + loop :: b -> ST s () + loop !j = case f j of + Nothing -> return () + Just (a, j') -> insert mb a >> loop j' + +-- | Create a Bloom filter, populating it from a sequence of values. +-- +-- For example +-- +-- @ +-- filt = fromList (policyForBits 10) [\"foo\", \"bar\", \"quux\"] +-- @ +fromList :: (Foldable t, Hashable a) + => BloomPolicy + -> t a -- ^ values to populate with + -> Bloom a +fromList policy xs = + create bsize (\b -> mapM_ (insert b) xs) + where + bsize = sizeForPolicy policy (length xs) + +{-# SPECIALISE deserialise :: BloomSize + -> (MutableByteArray RealWorld -> Int -> Int -> IO ()) + -> IO (Bloom a) #-} +deserialise :: PrimMonad m + => BloomSize + -> (MutableByteArray (PrimState m) -> Int -> Int -> m ()) + -> m (Bloom a) +deserialise bloomsize fill = do + mbloom <- stToPrim $ new bloomsize + Internal.deserialise mbloom fill + stToPrim $ unsafeFreeze mbloom + +-- $overview +-- +-- Each of the functions for creating Bloom filters accepts a 'BloomSize'. The +-- size determines the number of bits that should be used for the filter. Note +-- that a filter is fixed in size; it cannot be resized after creation. +-- +-- The size can be specified by asking for a target false positive rate (FPR) +-- or a number of bits per element, and the number of elements in the filter. +-- For example: +-- +-- * @'sizeForFPR' 1e-3 10_000@ for a Bloom filter sizes for 10,000 elements +-- with a false positive rate of 1 in 1000 +-- * @'sizeForBits' 10 10_000@ for a Bloom filter sizes for 10,000 elements +-- with 10 bits per element +-- +-- Depending on the application it may be more important to target a fixed +-- amount of memory to use, or target a specific FPR. +-- +-- As a very rough guide for filter sizes, here are a range of FPRs and bits +-- per element: +-- +-- * FPR of 1e-1 requires approximately 4.8 bits per element +-- * FPR of 1e-2 requires approximately 9.6 bits per element +-- * FPR of 1e-3 requires approximately 14.4 bits per element +-- * FPR of 1e-4 requires approximately 19.2 bits per element +-- * FPR of 1e-5 requires approximately 24.0 bits per element +-- + +-- $example +-- +-- This example reads a dictionary file containing one word per line, +-- constructs a Bloom filter with a 1% false positive rate, and +-- spellchecks its standard input. Like the Unix @spell@ command, it +-- prints each word that it does not recognize. +-- +-- @ +-- import Data.Maybe (mapMaybe) +-- import qualified Data.BloomFilter as B +-- +-- main = do +-- filt \<- B.fromList (B.policyForFPR 0.01) . words \<$> readFile "\/usr\/share\/dict\/words" +-- let check word | B.elem word filt = Nothing +-- | otherwise = Just word +-- interact (unlines . mapMaybe check . lines) +-- @ + +-- $differences +-- +-- This package is (almost entirely rewritten) fork of +-- [bloomfilter](https://hackage.haskell.org/package/bloomfilter) package. +-- +-- The main differences are +-- +-- * This packages support bloomfilters of arbitrary sizes +-- (not limited to powers of two). Also sizes over 2^32 are supported. +-- +-- * The 'Bloom' and 'MBloom' types are parametrised over 'Hashes' variable, +-- instead of having a @a -> ['Hash']@ typed field. +-- This separation allows clean de/serialization of Bloom filters in this +-- package, as the hashing scheme is a static. +-- +-- * [XXH3 hash](https://xxhash.com/) is used instead of Jenkins' +-- lookup3. diff --git a/bloomfilter/src/Data/BloomFilter/Classic/BitArray.hs b/bloomfilter/src/Data/BloomFilter/Classic/BitArray.hs new file mode 100644 index 000000000..9f53d952d --- /dev/null +++ b/bloomfilter/src/Data/BloomFilter/Classic/BitArray.hs @@ -0,0 +1,130 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} +-- | Minimal bit array implementation. +module Data.BloomFilter.Classic.BitArray ( + BitArray (..), + unsafeIndex, + prefetchIndex, + MBitArray (..), + new, + unsafeSet, + freeze, + unsafeFreeze, + thaw, + serialise, + deserialise, +) where + +import Control.Exception (assert) +import Control.Monad.Primitive (PrimMonad, PrimState) +import Control.Monad.ST (ST) +import Data.Bits +import Data.Primitive.ByteArray +import Data.Primitive.PrimArray +import Data.Word (Word64, Word8) + +import GHC.Exts (Int (I#), prefetchByteArray0#) +import GHC.ST (ST (ST)) + +-- | Bit vector backed up by an array of Word64 +-- +-- This vector's offset and length are multiples of 64 +newtype BitArray = BitArray (PrimArray Word64) + deriving stock (Eq, Show) + +{-# INLINE unsafeIndex #-} +unsafeIndex :: BitArray -> Int -> Bool +unsafeIndex (BitArray arr) !i = + assert (j >= 0 && j < sizeofPrimArray arr) $ + unsafeTestBit (indexPrimArray arr j) k + where + !j = unsafeShiftR i 6 -- `div` 64, bit index to Word64 index. + !k = i .&. 63 -- `mod` 64, bit within Word64 + +{-# INLINE prefetchIndex #-} +prefetchIndex :: BitArray -> Int -> ST s () +prefetchIndex (BitArray (PrimArray ba#)) !i = + let !(I# bi#) = i `unsafeShiftR` 3 in + ST (\s -> case prefetchByteArray0# ba# bi# s of + s' -> (# s', () #)) + -- We only need to shiftR 3 here, not 6, because we're going from a bit + -- offset to a byte offset for prefetch. Whereas in unsafeIndex, we go from + -- a bit offset to a Word64 offset, so an extra shiftR 3, for 6 total. + +newtype MBitArray s = MBitArray (MutablePrimArray s Word64) + +-- | Will create an explicitly pinned byte array. +-- This is done because pinned byte arrays allow for more efficient +-- serialisation, but the definition of 'isByteArrayPinned' changed in GHC 9.6, +-- see . +-- +-- TODO: remove this workaround once a solution exists, e.g. a new primop that +-- allows checking for implicit pinning. +new :: Int -> ST s (MBitArray s) +new s = do + mba@(MutableByteArray mba#) <- newPinnedByteArray numBytes + setByteArray mba 0 numBytes (0 :: Word8) + return (MBitArray (MutablePrimArray mba#)) + where + !numWords = roundUpTo64 s + !numBytes = unsafeShiftL numWords 3 -- * 8 + + -- this may overflow, but so be it (1^64 bits is a lot) + roundUpTo64 :: Int -> Int + roundUpTo64 i = unsafeShiftR (i + 63) 6 -- `div` 64, rounded up + +serialise :: BitArray -> (ByteArray, Int, Int) +serialise bitArray = + let ba = asByteArray bitArray + in (ba, 0, sizeofByteArray ba) + where + asByteArray (BitArray (PrimArray ba#)) = ByteArray ba# + +-- | Do an inplace overwrite of the byte array representing the bit block. +deserialise :: PrimMonad m + => MBitArray (PrimState m) + -> (MutableByteArray (PrimState m) -> Int -> Int -> m ()) + -> m () +deserialise bitArray fill = do + let mba = asMutableByteArray bitArray + len <- getSizeofMutableByteArray mba + fill mba 0 len + where + asMutableByteArray (MBitArray (MutablePrimArray mba#)) = + MutableByteArray mba# + +unsafeSet :: MBitArray s -> Int -> ST s () +unsafeSet (MBitArray arr) i = do +#ifdef NO_IGNORE_ASSERTS + sz <- getSizeofMutablePrimArray arr + assert (j >= 0 && j < sz) $ return () +#endif + w <- readPrimArray arr j + writePrimArray arr j (unsafeSetBit w k) + where + !j = unsafeShiftR i 6 -- `div` 64 + !k = i .&. 63 -- `mod` 64 + +freeze :: MBitArray s -> ST s BitArray +freeze (MBitArray arr) = do + len <- getSizeofMutablePrimArray arr + BitArray <$> freezePrimArray arr 0 len + +unsafeFreeze :: MBitArray s -> ST s BitArray +unsafeFreeze (MBitArray arr) = + BitArray <$> unsafeFreezePrimArray arr + +thaw :: BitArray -> ST s (MBitArray s) +thaw (BitArray arr) = + MBitArray <$> thawPrimArray arr 0 (sizeofPrimArray arr) + +{-# INLINE unsafeTestBit #-} +-- like testBit but using unsafeShiftL instead of shiftL +unsafeTestBit :: Word64 -> Int -> Bool +unsafeTestBit w k = w .&. (1 `unsafeShiftL` k) /= 0 + +{-# INLINE unsafeSetBit #-} +-- like setBit but using unsafeShiftL instead of shiftL +unsafeSetBit :: Word64 -> Int -> Word64 +unsafeSetBit w k = w .|. (1 `unsafeShiftL` k) diff --git a/bloomfilter/src/Data/BloomFilter/Classic/Calc.hs b/bloomfilter/src/Data/BloomFilter/Classic/Calc.hs new file mode 100644 index 000000000..bdac5c0b1 --- /dev/null +++ b/bloomfilter/src/Data/BloomFilter/Classic/Calc.hs @@ -0,0 +1,119 @@ +-- | Various formulas for working with bloomfilters. +module Data.BloomFilter.Classic.Calc ( + NumEntries, + BloomSize (..), + FPR, + sizeForFPR, + BitsPerEntry, + sizeForBits, + sizeForPolicy, + BloomPolicy (..), + policyFPR, + policyForFPR, + policyForBits, +) where + +import Numeric + +type FPR = Double +type BitsPerEntry = Double +type NumEntries = Int + +-- | A policy on intended bloom filter size -- independent of the number of +-- elements. +-- +-- We can decide a policy based on: +-- +-- 1. a target false positive rate (FPR) using 'policyForFPR' +-- 2. a number of bits per entry using 'policyForBits' +-- +-- A policy can be turned into a 'BloomSize' given a target 'NumEntries' using +-- 'sizeForPolicy'. +-- +-- Either way we define the policy, we can inspect the result to see: +-- +-- 1. The bits per entry 'policyBits'. This will determine the +-- size of the bloom filter in bits. In general the bits per entry can be +-- fractional. The final bloom filter size in will be rounded to a whole +-- number of bits. +-- 2. The number of hashes 'policyHashes'. +-- 3. The expected FPR for the policy using 'policyFPR'. +-- +data BloomPolicy = BloomPolicy { + policyBits :: !Double, + policyHashes :: !Int + } + deriving stock Show + +policyForFPR :: FPR -> BloomPolicy +policyForFPR fpr | fpr <= 0 || fpr >= 1 = + error "bloomPolicyForFPR: fpr out of range (0,1)" + +policyForFPR fpr = + BloomPolicy { + policyBits = c, + policyHashes = k + } + where + -- There's a simper fomula to compute the number of bits, but it assumes + -- that k is a real. We must however round k to the nearest natural, and + -- so we have to use a more precise approximation, using the actual value + -- of k. + k :: Int; k' :: Double + k = min 64 (max 1 (round ((-recip_log2) * log_fpr))) + k' = fromIntegral k + c = negate k' / log1mexp (log_fpr / k') + log_fpr = log fpr + +policyForBits :: BitsPerEntry -> BloomPolicy +policyForBits c | c < 0 || c > 64 = + error "policyForBits: out of ragnge [0,64]" + +policyForBits c = + BloomPolicy { + policyBits = c, + policyHashes = k + } + where + k = max 1 (round (c * log2)) + +policyFPR :: BloomPolicy -> FPR +policyFPR BloomPolicy { + policyBits = c, + policyHashes = k + } = + negate (expm1 (negate (k' / c))) ** k' + where + k' = fromIntegral k + +-- | Parameters for constructing a Bloom filter. +-- +data BloomSize = BloomSize { + -- | The requested number of bits in filter. + -- The actual size will be rounded up to the nearest 512. + sizeBits :: !Int, + + -- | The number of hash functions to use. + sizeHashes :: !Int + } + deriving stock Show + +sizeForFPR :: FPR -> NumEntries -> BloomSize +sizeForFPR = sizeForPolicy . policyForFPR + +sizeForBits :: BitsPerEntry -> NumEntries -> BloomSize +sizeForBits = sizeForPolicy . policyForBits + +sizeForPolicy :: BloomPolicy -> NumEntries -> BloomSize +sizeForPolicy BloomPolicy { + policyBits = c, + policyHashes = k + } n = + BloomSize { + sizeBits = max 1 (ceiling (fromIntegral n * c)), + sizeHashes = max 1 k + } + +log2, recip_log2 :: Double +log2 = log 2 +recip_log2 = recip log2 diff --git a/bloomfilter/src/Data/BloomFilter/Classic/Internal.hs b/bloomfilter/src/Data/BloomFilter/Classic/Internal.hs new file mode 100644 index 000000000..b6eeec50a --- /dev/null +++ b/bloomfilter/src/Data/BloomFilter/Classic/Internal.hs @@ -0,0 +1,397 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} +{-# OPTIONS_HADDOCK not-home #-} +-- | This module defines the 'Bloom' and 'MBloom' types and all the functions +-- that need direct knowledge of and access to the representation. This forms +-- the trusted base. +module Data.BloomFilter.Classic.Internal ( + -- * Mutable Bloom filters + MBloom, + new, + + -- * Immutable Bloom filters + Bloom, + bloomInvariant, + size, + + -- * Hash-based operations + Hashes, + hashes, + insertHashes, + elemHashes, + + -- * Conversion + freeze, + unsafeFreeze, + thaw, + + -- * (De)Serialisation + formatVersion, + serialise, + deserialise, + ) where + +import Control.DeepSeq (NFData (..)) +import Control.Exception (assert) +import Control.Monad.Primitive (PrimMonad, PrimState) +import Control.Monad.ST (ST) +import Data.Bits +import Data.Kind (Type) +import Data.Primitive.ByteArray +import Data.Primitive.PrimArray +import Data.Primitive.Types (Prim (..)) +import Data.Word (Word64) + +import GHC.Exts (Int (I#), Int#, int2Word#, timesWord2#, + uncheckedIShiftL#, word2Int#, (+#)) +import qualified GHC.Exts as Exts +import GHC.Word (Word64 (W64#)) + +import Data.BloomFilter.Classic.BitArray (BitArray, MBitArray) +import qualified Data.BloomFilter.Classic.BitArray as BitArray +import Data.BloomFilter.Classic.Calc +import Data.BloomFilter.Hash + +-- | The version of the format used by 'serialise' and 'deserialise'. The +-- format number will change when there is an incompatible change in the +-- library, such that deserialising and using the filter will not work. +-- This can include more than just changes to the serialised format, for +-- example changes to hash functions or how the hash is mapped to bits. +-- +-- Note that the format produced does not include this version. Version +-- checking is the responsibility of the user of the library. +-- +-- History: +-- +-- * Version 0: original +-- +-- * Version 1: changed range reduction (of hash to bit index) from remainder +-- to method based on multiplication. +-- +formatVersion :: Int +formatVersion = 1 + +------------------------------------------------------------------------------- +-- Mutable Bloom filters +-- + +type MBloom :: Type -> Type -> Type +-- | A mutable Bloom filter, for use within the 'ST' monad. +data MBloom s a = MBloom { + mbNumBits :: {-# UNPACK #-} !Int -- ^ non-zero + , mbNumHashes :: {-# UNPACK #-} !Int + , mbBitArray :: {-# UNPACK #-} !(MBitArray s) + } +type role MBloom nominal nominal + +instance Show (MBloom s a) where + show mb = "MBloom { " ++ show (mbNumBits mb) ++ " bits } " + +instance NFData (MBloom s a) where + rnf !_ = () + +-- | Create a new mutable Bloom filter. +-- +-- The size is ceiled at $2^48$. Tell us if you need bigger bloom filters. +-- +new :: BloomSize -> ST s (MBloom s a) +new BloomSize { sizeBits, sizeHashes } = do + let !mbNumBits = max 1 (min 0x1_0000_0000_0000 sizeBits) + mbBitArray <- BitArray.new mbNumBits + pure MBloom { + mbNumBits, + mbNumHashes = max 1 sizeHashes, + mbBitArray + } + +insertHashes :: MBloom s a -> Hashes a -> ST s () +insertHashes MBloom { mbNumBits, mbNumHashes, mbBitArray } !ch = + go 0 + where + go !i | i >= mbNumHashes = return () + go !i = do + let idx = reduceRange64 (evalHashes ch i) mbNumBits + BitArray.unsafeSet mbBitArray idx + go (i + 1) + +-- | Overwrite the filter's bit array. Use 'new' to create a filter of the +-- expected size and then use this function to fill in the bit data. +-- +-- The callback is expected to read (exactly) the given number of bytes into +-- the given byte array buffer. +-- +-- See also 'formatVersion' for compatibility advice. +-- +deserialise :: PrimMonad m + => MBloom (PrimState m) a + -> (MutableByteArray (PrimState m) -> Int -> Int -> m ()) + -> m () +deserialise MBloom {mbBitArray} fill = + BitArray.deserialise mbBitArray fill + + +------------------------------------------------------------------------------- +-- Immutable Bloom filters +-- + +type Bloom :: Type -> Type +-- | An immutable Bloom filter. +data Bloom a = Bloom { + numBits :: {-# UNPACK #-} !Int -- ^ non-zero + , numHashes :: {-# UNPACK #-} !Int + , bitArray :: {-# UNPACK #-} !BitArray + } + deriving stock Eq +type role Bloom nominal + +bloomInvariant :: Bloom a -> Bool +bloomInvariant Bloom { numBits, bitArray = BitArray.BitArray pa } = + numBits > 0 + && numBits <= 2^(48 :: Int) + && ceilDiv64 numBits == sizeofPrimArray pa + where + ceilDiv64 x = unsafeShiftR (x + 63) 6 + +instance Show (Bloom a) where + show mb = "Bloom { " ++ show (numBits mb) ++ " bits } " + +instance NFData (Bloom a) where + rnf !_ = () + +-- | Return the size of the Bloom filter. +size :: Bloom a -> BloomSize +size Bloom { numBits, numHashes } = + BloomSize { + sizeBits = numBits, + sizeHashes = numHashes + } + +-- | Query an immutable Bloom filter for membership using already constructed 'Hashes' value. +elemHashes :: Bloom a -> Hashes a -> Bool +elemHashes Bloom { numBits, numHashes, bitArray } !ch = + go 0 + where + go :: Int -> Bool + go !i | i >= numHashes + = True + go !i = + let idx = reduceRange64 (evalHashes ch i) numBits + in if BitArray.unsafeIndex bitArray idx + then go (i + 1) + else False + +-- | Serialise the bloom filter to a 'BloomSize' (which is needed to +-- deserialise) and a 'ByteArray' along with the offset and length containing +-- the filter's bit data. +-- +-- See also 'formatVersion' for compatibility advice. +-- +serialise :: Bloom a -> (BloomSize, ByteArray, Int, Int) +serialise b@Bloom{bitArray} = + (size b, ba, off, len) + where + (ba, off, len) = BitArray.serialise bitArray + + +------------------------------------------------------------------------------- +-- Conversions between mutable and immutable Bloom filters +-- + +-- | Create an immutable Bloom filter from a mutable one. The mutable +-- filter may be modified afterwards. +freeze :: MBloom s a -> ST s (Bloom a) +freeze MBloom { mbNumBits, mbNumHashes, mbBitArray } = do + bitArray <- BitArray.freeze mbBitArray + let !bf = Bloom { + numBits = mbNumBits, + numHashes = mbNumHashes, + bitArray + } + assert (bloomInvariant bf) $ pure bf + +-- | Create an immutable Bloom filter from a mutable one without copying. The +-- mutable filter /must not/ be modified afterwards. For a safer creation +-- interface, use 'freeze' or 'create'. +unsafeFreeze :: MBloom s a -> ST s (Bloom a) +unsafeFreeze MBloom { mbNumBits, mbNumHashes, mbBitArray } = do + bitArray <- BitArray.unsafeFreeze mbBitArray + let !bf = Bloom { + numBits = mbNumBits, + numHashes = mbNumHashes, + bitArray + } + assert (bloomInvariant bf) $ pure bf + +-- | Copy an immutable Bloom filter to create a mutable one. There is +-- no non-copying equivalent. +thaw :: Bloom a -> ST s (MBloom s a) +thaw Bloom { numBits, numHashes, bitArray } = do + mbBitArray <- BitArray.thaw bitArray + pure MBloom { + mbNumBits = numBits, + mbNumHashes = numHashes, + mbBitArray + } + + +------------------------------------------------------------------------------- +-- Low level utils +-- + +-- | Given a word sampled uniformly from the full 'Word64' range, such as a +-- hash, reduce it fairly to a value in the range @[0,n)@. +-- +-- See +-- +{-# INLINE reduceRange64 #-} +reduceRange64 :: Word64 -- ^ Sample from 0..2^64-1 + -> Int -- ^ upper bound of range [0,n) + -> Int -- ^ result within range +reduceRange64 (W64# x) (I# n) = + -- Note that we use widening multiplication of two 64bit numbers, with a + -- 128bit result. GHC provides a primop which returns the 128bit result as + -- a pair of 64bit words. There are (as of 2025) no high level wrappers in + -- the base or primitive packages, so we use the primops directly. + case timesWord2# (word64ToWordShim# x) (int2Word# n) of + (# high, _low #) -> I# (word2Int# high) + -- Note that while x can cover the full Word64 range, since the result is + -- less than n, and since n was an Int then the result fits an Int too. + +{-# INLINE word64ToWordShim# #-} + +#if MIN_VERSION_base(4,17,0) +word64ToWordShim# :: Exts.Word64# -> Exts.Word# +word64ToWordShim# = Exts.word64ToWord# +#else +word64ToWordShim# :: Exts.Word# -> Exts.Word# +word64ToWordShim# x# = x# +#endif + +------------------------------------------------------------------------------- +-- Hashes +-- + +-- | A pair of hashes used for a double hashing scheme. +-- +-- See 'evalHashes'. +data Hashes a = Hashes !Hash !Hash +type role Hashes nominal + +instance Prim (Hashes a) where + sizeOfType# _ = 16# + alignmentOfType# _ = 8# + + indexByteArray# ba i = Hashes + (indexByteArray# ba (indexLo i)) + (indexByteArray# ba (indexHi i)) + readByteArray# ba i s1 = + case readByteArray# ba (indexLo i) s1 of { (# s2, lo #) -> + case readByteArray# ba (indexHi i) s2 of { (# s3, hi #) -> + (# s3, Hashes lo hi #) + }} + writeByteArray# ba i (Hashes lo hi) s = + writeByteArray# ba (indexHi i) hi (writeByteArray# ba (indexLo i) lo s) + + indexOffAddr# ba i = Hashes + (indexOffAddr# ba (indexLo i)) + (indexOffAddr# ba (indexHi i)) + readOffAddr# ba i s1 = + case readOffAddr# ba (indexLo i) s1 of { (# s2, lo #) -> + case readOffAddr# ba (indexHi i) s2 of { (# s3, hi #) -> + (# s3, Hashes lo hi #) + }} + writeOffAddr# ba i (Hashes lo hi) s = + writeOffAddr# ba (indexHi i) hi (writeOffAddr# ba (indexLo i) lo s) + +indexLo :: Int# -> Int# +indexLo i = uncheckedIShiftL# i 1# + +indexHi :: Int# -> Int# +indexHi i = uncheckedIShiftL# i 1# +# 1# + +{- Note [Original Hashes] + +Compute a list of 32-bit hashes relatively cheaply. The value to +hash is inspected at most twice, regardless of the number of hashes +requested. + +We use a variant of Kirsch and Mitzenmacher's technique from \"Less +Hashing, Same Performance: Building a Better Bloom Filter\", +. + +Where Kirsch and Mitzenmacher multiply the second hash by a +coefficient, we shift right by the coefficient. This offers better +performance (as a shift is much cheaper than a multiply), and the +low order bits of the final hash stay well mixed. + +-} + +{- Note: [Hashes] + +On the first glance the 'evalHashes' scheme seems dubious. + +Firstly, it's original performance motivation is dubious. + +> multiply the second hash by a coefficient + +While the scheme double hashing scheme is presented in +theoretical analysis as + + g(i) = a + i * b + +In practice it's implemented in a loop which looks like + + g[0] = a + for (i = 1; i < k; i++) { + a += b; + g[i] = a; + } + +I.e. with just an addition. + +Secondly there is no analysis anywhere about the +'evalHashes' scheme. + +Peter Dillinger's thesis (Adaptive Approximate State Storage) +discusses various fast hashing schemes (section 6.5), +mentioning why ordinary "double hashing" is weak scheme. + +Issue 1: when second hash value is bad, e.g. not coprime with bloom filters size in bits, +we can get repetitions (worst case 0, or m/2). + +Issue 2: in bloom filter scenario, whether we do a + i * b or h0 - i * b' (with b' = -b) +as we probe all indices (as set) doesn't matter, not sequentially (like in hash table). +So we lose one bit entropy. + +Issue 3: the scheme is prone to partial overlap. +Two values with the same second hash value could overlap on many indices. + +Then Dillinger discusses various schemes which solve this issue. + +The Hashes scheme seems to avoid these cuprits. +This is probably because it uses most of the bits of the second hash, even in m = 2^n scenarios. +(normal double hashing and enhances double hashing don't use the high bits or original hash then). +TL;DR Hashes seems to work well in practice. + +For the record: RocksDB uses an own scheme as well, +where first hash is used to pick a cache line, and second one to generate probes inside it. +https://github.com/facebook/rocksdb/blob/096fb9b67d19a9a180e7c906b4a0cdb2b2d0c1f6/util/bloom_impl.h + +-} + +-- | Evalute 'Hashes' family. +-- +-- \[ +-- g_i = h_0 + \left\lfloor h_1 / 2^i \right\rfloor +-- \] +-- +evalHashes :: Hashes a -> Int -> Hash +evalHashes (Hashes h1 h2) i = h1 + (h2 `unsafeShiftR` i) + +-- | Create 'Hashes' structure. +-- +-- It's simply hashes the value twice using seed 0 and 1. +hashes :: Hashable a => a -> Hashes a +hashes v = Hashes (hashSalt64 0 v) (hashSalt64 1 v) +{-# INLINE hashes #-} diff --git a/bloomfilter/src/Data/BloomFilter/Easy.hs b/bloomfilter/src/Data/BloomFilter/Easy.hs deleted file mode 100644 index e06ea6012..000000000 --- a/bloomfilter/src/Data/BloomFilter/Easy.hs +++ /dev/null @@ -1,172 +0,0 @@ --- | An easy-to-use Bloom filter interface. -module Data.BloomFilter.Easy ( - -- * Easy creation and querying - Bloom, - easyList, - B.elem, - B.notElem, - B.length, - - -- * Mutable bloom filter - MBloom, - easyNew, - MB.new, - MB.insert, - B.freeze, - - -- ** Example: a spell checker - -- $example - - -- * Useful defaults for creation - safeSuggestSizing, - suggestSizing, -) where - -import Control.Monad.ST (ST) -import Data.BloomFilter (Bloom) -import qualified Data.BloomFilter as B -import Data.BloomFilter.Calc -import Data.BloomFilter.Hash (Hashable) -import Data.BloomFilter.Mutable (MBloom) -import qualified Data.BloomFilter.Mutable as MB -import qualified Data.ByteString as SB -import Data.Word (Word64) - -------------------------------------------------------------------------------- --- Easy interface -------------------------------------------------------------------------------- - --- | Create a Bloom filter with the desired false positive rate and --- members. The hash functions used are computed by the @cheapHashes@ --- function from the 'Data.BloomFilter.Hash' module. -easyList :: Hashable a - => Double -- ^ desired false positive rate (0 < /ε/ < 1) - -> [a] -- ^ values to populate with - -> Bloom a -{-# SPECIALISE easyList :: Double -> [SB.ByteString] -> Bloom SB.ByteString #-} -easyList errRate xs = B.fromList numHashes numBits xs - where - capacity = length xs - (numBits, numHashes) - | capacity > 0 = suggestSizing capacity errRate - | otherwise = (1, 1) - --- | Create a Bloom filter with the desired false positive rate, /ε/ --- and expected maximum size, /n/. -easyNew :: Double -- ^ desired false positive rate (0 < /ε/ < 1) - -> Int -- ^ expected maximum size, /n/ - -> ST s (MBloom s a) -easyNew errRate capacity = MB.new numHashes numBits - where - (numBits, numHashes) = suggestSizing capacity errRate - -------------------------------------------------------------------------------- --- Size suggestions -------------------------------------------------------------------------------- - --- | Suggest a good combination of filter size and number of hash --- functions for a Bloom filter, based on its expected maximum --- capacity and a desired false positive rate. --- --- The false positive rate is the rate at which queries against the --- filter should return 'True' when an element is not actually --- present. It should be a fraction between 0 and 1, so a 1% false --- positive rate is represented by 0.01. --- --- This function will suggest to use a bloom filter of prime size. --- These theoretically behave the best. --- Also it won't suggest to use over 63 hash functions, --- because CheapHashes work only up to 63 functions. --- --- Note that while creating bloom filters with extremely small (or --- even negative) capacity is allowed for convenience, it is often --- not very useful. --- This function will always suggest to use at least 61 bits. --- --- >>> safeSuggestSizing 10000 0.01 --- Right (99317,7) --- -safeSuggestSizing :: - Int -- ^ expected maximum capacity - -> Double -- ^ desired false positive rate (0 < /e/ < 1) - -> Either String (Word64, Int) -safeSuggestSizing (fromIntegral -> capacity) errRate - | capacity <= 0 = Right (61, 1) - | errRate <= 0 || errRate >= 1 = Left "invalid error rate" - | otherwise = pickSize primes - where - bits :: Double - hashes :: Int - (bits, hashes) = minimum - [ (filterSize capacity errRate k, k') - | k' <- [1 .. 63] - , let k = fromIntegral k' - ] - - pickSize [] = Left "capacity too large" - pickSize (w:ws) - | fromIntegral w >= bits = Right (w, hashes) - | otherwise = pickSize ws - --- primes from around 2^6 to 2^40, with five primes per "octave", --- --- * 61, 73, 83, 97, 109 --- * 127, 139, ... --- * 257, 293, ... --- * ... --- --- The third next element is around 1.5 times larger: --- 97/63 = 1.59; 109/73 = 1.49; 127/83 = 1.52 --- --- The approximate growth rate is 1.14. --- -primes :: [Word64] -primes = - [61,73,83,97,109,127,139,167,193,223,257,293,337,389,443,509,587,673,773 - ,887,1021,1171,1327,1553,1783,2039,2351,2699,3089,3559,4093,4703,5399,6203 - ,7129,8191,9403,10799,12413,14251,16381,18803,21617,24821,28517,32749 - ,37633,43237,49667,57047,65537,75277,86467,99317,114089,131071,150559 - ,172933,198659,228203,262139,301123,345889,397337,456409,524287,602233 - ,691799,794669,912839,1048573,1204493,1383593,1589333,1825673,2097143 - ,2408993,2767201,3178667,3651341,4194301,4817977,5534413,6357353,7302683 - ,8388593,9635981,11068817,12714749,14605411,16777213,19271957,22137667 - ,25429499,29210821,33554393,38543917,44275331,50858999,58421653,67108859 - ,77087833,88550677,101718013,116843297,134217689,154175663,177101321 - ,203436029,233686637,268435399,308351357,354202703,406872031,467373223 - ,536870909,616702721,708405407,813744131,934746541,1073741789,1233405449 - ,1416810797,1627488229,1869493097,2147483647,2466810893,2833621657 - ,3254976541,3738986131,4294967291,4933621843,5667243317,6509953069 - ,7477972391,8589934583,9867243719,11334486629,13019906153,14955944737 - ,17179869143,19734487471,22668973277,26039812297,29911889569,34359738337 - ,39468974939,45337946581,52079624657,59823779149,68719476731,78937949837 - ,90675893137,104159249321,119647558343,137438953447,157875899707 - ,181351786333,208318498651,239295116717,274877906899,315751799521 - ,362703572681,416636997289,478590233419,549755813881,631503599063 - ,725407145383,833273994643,957180466901,1099511627689 - ] - --- | Behaves as 'safeSuggestSizing', but calls 'error' if given --- invalid or out-of-range inputs. -suggestSizing :: Int -- ^ expected maximum capacity - -> Double -- ^ desired false positive rate (0 < /e/ < 1) - -> (Word64, Int) -suggestSizing cap errs = either fatal id (safeSuggestSizing cap errs) - where fatal = error . ("Data.BloomFilter.Util.suggestSizing: " ++) - --- $example --- --- This example reads a dictionary file containing one word per line, --- constructs a Bloom filter with a 1% false positive rate, and --- spellchecks its standard input. Like the Unix @spell@ command, it --- prints each word that it does not recognize. --- --- @ --- import Data.Maybe (mapMaybe) --- import qualified Data.BloomFilter.Easy as B --- --- main = do --- filt \<- B.'easyList' 0.01 . words \<$> readFile "\/usr\/share\/dict\/words" --- let check word | B.'B.elem' word filt = Nothing --- | otherwise = Just word --- interact (unlines . mapMaybe check . lines) --- @ diff --git a/bloomfilter/src/Data/BloomFilter/Hash.hs b/bloomfilter/src/Data/BloomFilter/Hash.hs index 0dc6a0bf1..f778b4799 100644 --- a/bloomfilter/src/Data/BloomFilter/Hash.hs +++ b/bloomfilter/src/Data/BloomFilter/Hash.hs @@ -15,26 +15,15 @@ module Data.BloomFilter.Hash ( Incremental (..), HashState, incrementalHash, - -- * Hashing - Hashes (..), - RealHashes (..), - -- * Compute a family of hash values - CheapHashes (..), - evalCheapHashes, - makeCheapHashes, ) where import Control.Monad (forM_) import Control.Monad.ST (ST, runST) -import Data.Array.Byte (ByteArray (..)) -import Data.Bits (unsafeShiftR) import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as LBS import Data.Char (ord) import qualified Data.Primitive.ByteArray as P -import Data.Primitive.Types (Prim (..)) import Data.Word (Word32, Word64) -import GHC.Exts (Int#, uncheckedIShiftL#, (+#)) import qualified XXH3 -- | A hash value is 64 bits wide. @@ -75,7 +64,7 @@ instance Hashable LBS.ByteString where forM_ (LBS.toChunks lbs) $ \bs -> update s bs -instance Hashable ByteArray where +instance Hashable P.ByteArray where hashSalt64 salt ba = XXH3.xxh3_64bit_withSeed_ba ba 0 (P.sizeofByteArray ba) salt instance Hashable Word64 where @@ -84,6 +73,13 @@ instance Hashable Word64 where instance Hashable Word32 where hashSalt64 salt w = XXH3.xxh3_64bit_withSeed_w32 w salt +instance Hashable Word where + hashSalt64 salt n = hashSalt64 salt (fromIntegral n :: Word64) + --32bit support would need some CPP here to select based on word size + +instance Hashable Int where + hashSalt64 salt n = hashSalt64 salt (fromIntegral n :: Word) + {- Note [Tree hashing] We recursively hash inductive types (instead e.g. just serially hashing @@ -108,8 +104,8 @@ instance (Hashable a, Hashable b) => Hashable (a, b) where update s (hash64 x) update s (hash64 y) --- | Hash a (part of) 'ByteArray'. -hashByteArray :: ByteArray -> Int -> Int -> Word64 -> Word64 +-- | Hash a (part of) 'P.ByteArray'. +hashByteArray :: P.ByteArray -> Int -> Int -> Word64 -> Word64 hashByteArray = XXH3.xxh3_64bit_withSeed_ba ------------------------------------------------------------------------------- @@ -142,156 +138,3 @@ incrementalHash seed f = runST $ do XXH3.xxh3_64bit_reset_withSeed s seed f (HashState s) XXH3.xxh3_64bit_digest s - -------------------------------------------------------------------------------- --- Hashes -------------------------------------------------------------------------------- - --- | A type class abstracting over different hashing schemes.b -class Hashes h where - makeHashes :: Hashable a => a -> h a - - evalHashes :: h a -> Int -> Hash - --- | A closure of real hashing function. -newtype RealHashes a = RealHashes (Word64 -> Hash) - -instance Hashes RealHashes where - makeHashes x = RealHashes (\salt -> hashSalt64 salt x) - evalHashes (RealHashes f) i = f (fromIntegral i) - -------------------------------------------------------------------------------- --- CheapHashes -------------------------------------------------------------------------------- - --- | A pair of hashes used for a double hashing scheme. --- --- See 'evalCheapHashes'. -data CheapHashes a = CheapHashes !Hash !Hash - deriving Show -type role CheapHashes nominal - -instance Hashes CheapHashes where - makeHashes = makeCheapHashes - {-# INLINE makeHashes #-} - evalHashes = evalCheapHashes - {-# INLINE evalHashes #-} - -instance Prim (CheapHashes a) where - sizeOfType# _ = 16# - alignmentOfType# _ = 8# - - indexByteArray# ba i = CheapHashes - (indexByteArray# ba (indexLo i)) - (indexByteArray# ba (indexHi i)) - readByteArray# ba i s1 = - case readByteArray# ba (indexLo i) s1 of { (# s2, lo #) -> - case readByteArray# ba (indexHi i) s2 of { (# s3, hi #) -> - (# s3, CheapHashes lo hi #) - }} - writeByteArray# ba i (CheapHashes lo hi) s = - writeByteArray# ba (indexHi i) hi (writeByteArray# ba (indexLo i) lo s) - - indexOffAddr# ba i = CheapHashes - (indexOffAddr# ba (indexLo i)) - (indexOffAddr# ba (indexHi i)) - readOffAddr# ba i s1 = - case readOffAddr# ba (indexLo i) s1 of { (# s2, lo #) -> - case readOffAddr# ba (indexHi i) s2 of { (# s3, hi #) -> - (# s3, CheapHashes lo hi #) - }} - writeOffAddr# ba i (CheapHashes lo hi) s = - writeOffAddr# ba (indexHi i) hi (writeOffAddr# ba (indexLo i) lo s) - -indexLo :: Int# -> Int# -indexLo i = uncheckedIShiftL# i 1# - -indexHi :: Int# -> Int# -indexHi i = uncheckedIShiftL# i 1# +# 1# - -{- Note [Original CheapHashes] - -Compute a list of 32-bit hashes relatively cheaply. The value to -hash is inspected at most twice, regardless of the number of hashes -requested. - -We use a variant of Kirsch and Mitzenmacher's technique from \"Less -Hashing, Same Performance: Building a Better Bloom Filter\", -. - -Where Kirsch and Mitzenmacher multiply the second hash by a -coefficient, we shift right by the coefficient. This offers better -performance (as a shift is much cheaper than a multiply), and the -low order bits of the final hash stay well mixed. - --} - -{- Note: [CheapHashes] - -On the first glance the 'evalCheapHashes' scheme seems dubious. - -Firstly, it's original performance motivation is dubious. - -> multiply the second hash by a coefficient - -While the scheme double hashing scheme is presented in -theoretical analysis as - - g(i) = a + i * b - -In practice it's implemented in a loop which looks like - - g[0] = a - for (i = 1; i < k; i++) { - a += b; - g[i] = a; - } - -I.e. with just an addition. - -Secondly there is no analysis anywhere about the -'evalCheapHashes' scheme. - -Peter Dillinger's thesis (Adaptive Approximate State Storage) -discusses various fast hashing schemes (section 6.5), -mentioning why ordinary "double hashing" is weak scheme. - -Issue 1: when second hash value is bad, e.g. not coprime with bloom filters size in bits, -we can get repetitions (worst case 0, or m/2). - -Issue 2: in bloom filter scenario, whether we do a + i * b or h0 - i * b' (with b' = -b) -as we probe all indices (as set) doesn't matter, not sequentially (like in hash table). -So we lose one bit entropy. - -Issue 3: the scheme is prone to partial overlap. -Two values with the same second hash value could overlap on many indices. - -Then Dillinger discusses various schemes which solve this issue. - -The CheapHashes scheme seems to avoid these cuprits. -This is probably because it uses most of the bits of the second hash, even in m = 2^n scenarios. -(normal double hashing and enhances double hashing don't use the high bits or original hash then). -TL;DR CheapHashes seems to work well in practice. - -For the record: RocksDB uses an own scheme as well, -where first hash is used to pick a cache line, and second one to generate probes inside it. -https://github.com/facebook/rocksdb/blob/096fb9b67d19a9a180e7c906b4a0cdb2b2d0c1f6/util/bloom_impl.h - --} - --- | Evalute 'CheapHashes' family. --- --- \[ --- g_i = h_0 + \left\lfloor h_1 / 2^i \right\rfloor --- \] --- -evalCheapHashes :: CheapHashes a -> Int -> Hash -evalCheapHashes (CheapHashes h1 h2) i = h1 + (h2 `unsafeShiftR` i) - --- | Create 'CheapHashes' structure. --- --- It's simply hashes the value twice using seed 0 and 1. -makeCheapHashes :: Hashable a => a -> CheapHashes a -makeCheapHashes v = CheapHashes (hashSalt64 0 v) (hashSalt64 1 v) -{-# SPECIALISE makeCheapHashes :: BS.ByteString -> CheapHashes BS.ByteString #-} -{-# INLINEABLE makeCheapHashes #-} diff --git a/bloomfilter/src/Data/BloomFilter/Internal.hs b/bloomfilter/src/Data/BloomFilter/Internal.hs deleted file mode 100644 index 48b33f4de..000000000 --- a/bloomfilter/src/Data/BloomFilter/Internal.hs +++ /dev/null @@ -1,56 +0,0 @@ -{-# OPTIONS_HADDOCK not-home #-} --- | This module exports 'Bloom'' definition. -module Data.BloomFilter.Internal ( - Bloom'(..), - bloomInvariant, -) where - -import Control.DeepSeq (NFData (..)) -import Data.Bits -import qualified Data.BloomFilter.BitVec64 as V -import Data.Kind (Type) -import Data.Primitive.ByteArray (sizeofByteArray) -import qualified Data.Vector.Primitive as VP -import Data.Word (Word64) - -type Bloom' :: (Type -> Type) -> Type -> Type -data Bloom' h a = Bloom { - hashesN :: {-# UNPACK #-} !Int - , size :: {-# UNPACK #-} !Word64 -- ^ size is non-zero - , bitArray :: {-# UNPACK #-} !V.BitVec64 - } -type role Bloom' nominal nominal - -bloomInvariant :: Bloom' h a -> Bool -bloomInvariant (Bloom _ s (V.BV64 (VP.Vector off len ba))) = - s > 0 - && s <= 2^(48 :: Int) - && off >= 0 - && ceilDiv64 s == fromIntegral len - && (off + len) * 8 <= sizeofByteArray ba - where - ceilDiv64 x = unsafeShiftR (x + 63) 6 - -instance Eq (Bloom' h a) where - -- We support arbitrary sized bitvectors, - -- therefore an equality is a bit involved: - -- we need to be careful when comparing the last bits of bitArray. - Bloom k n (V.BV64 v) == Bloom k' n' (V.BV64 v') = - k == k' && - n == n' && - VP.take w v == VP.take w v' && -- compare full words - if l == 0 then True else unsafeShiftL x s == unsafeShiftL x' s -- compare last words - where - !w = fromIntegral (unsafeShiftR n 6) :: Int -- n `div` 64 - !l = fromIntegral (n .&. 63) :: Int -- n `mod` 64 - !s = 64 - l - - -- last words - x = VP.unsafeIndex v w - x' = VP.unsafeIndex v' w - -instance Show (Bloom' h a) where - show mb = "Bloom { " ++ show (size mb) ++ " bits } " - -instance NFData (Bloom' h a) where - rnf !_ = () diff --git a/bloomfilter/src/Data/BloomFilter/Mutable.hs b/bloomfilter/src/Data/BloomFilter/Mutable.hs deleted file mode 100644 index 0d986bded..000000000 --- a/bloomfilter/src/Data/BloomFilter/Mutable.hs +++ /dev/null @@ -1,129 +0,0 @@ --- | --- A fast, space efficient Bloom filter implementation. A Bloom --- filter is a set-like data structure that provides a probabilistic --- membership test. --- --- * Queries do not give false negatives. When an element is added to --- a filter, a subsequent membership test will definitely return --- 'True'. --- --- * False positives /are/ possible. If an element has not been added --- to a filter, a membership test /may/ nevertheless indicate that --- the element is present. --- --- This module provides low-level control. For an easier to use --- interface, see the "Data.BloomFilter.Easy" module. - -module Data.BloomFilter.Mutable ( - -- * Overview - -- $overview - - -- ** Ease of use - -- $ease - - -- ** Performance - -- $performance - - -- * Types - Hash, - MBloom, - MBloom', - CheapHashes, - RealHashes, - -- * Mutable Bloom filters - - -- ** Creation - new, - - -- ** Accessors - length, - elem, - - -- ** Mutation - insert, -) where - -import Control.Monad (liftM) -import Control.Monad.ST (ST) -import Data.BloomFilter.Hash (CheapHashes, Hash, Hashable, - Hashes (..), RealHashes) -import Data.BloomFilter.Mutable.Internal -import Data.Word (Word64) - -import qualified Data.BloomFilter.BitVec64 as V - -import Prelude hiding (elem, length) - --- | Mutable Bloom filter using CheapHashes hashing scheme. -type MBloom s = MBloom' s CheapHashes - --- | Create a new mutable Bloom filter. --- --- The size is ceiled at $2^48$. Tell us if you need bigger bloom filters. --- -new :: Int -- ^ number of hash functions to use - -> Word64 -- ^ number of bits in filter - -> ST s (MBloom' s h a) -new hash numBits = MBloom hash numBits' `liftM` V.new numBits' - where numBits' | numBits == 0 = 1 - | numBits >= 0xffff_ffff_ffff = 0x1_0000_0000_0000 - | otherwise = numBits - --- | Insert a value into a mutable Bloom filter. Afterwards, a --- membership query for the same value is guaranteed to return @True@. -insert :: (Hashes h, Hashable a) => MBloom' s h a -> a -> ST s () -insert !mb !x = insertHashes mb (makeHashes x) - -insertHashes :: Hashes h => MBloom' s h a -> h a -> ST s () -insertHashes (MBloom k m v) !h = go 0 - where - go !i | i >= k = return () - | otherwise = let !idx = evalHashes h i `rem` m - in V.unsafeWrite v idx True >> go (i + 1) - --- | Query a mutable Bloom filter for membership. If the value is --- present, return @True@. If the value is not present, there is --- /still/ some possibility that @True@ will be returned. -elem :: (Hashes h, Hashable a) => a -> MBloom' s h a -> ST s Bool -elem elt mb = elemHashes (makeHashes elt) mb - -elemHashes :: forall h s a. Hashes h => h a -> MBloom' s h a -> ST s Bool -elemHashes !ch (MBloom k m v) = go 0 where - go :: Int -> ST s Bool - go !i | i >= k = return True - | otherwise = do let !idx' = evalHashes ch i - let !idx = idx' `rem` m - b <- V.unsafeRead v idx - if b - then go (i + 1) - else return False - --- | Return the size of a mutable Bloom filter, in bits. -length :: MBloom' s h a -> Word64 -length = size - --- $overview --- --- Each of the functions for creating Bloom filters accepts two parameters: --- --- * The number of bits that should be used for the filter. Note that --- a filter is fixed in size; it cannot be resized after creation. --- --- * A number of hash functions, /k/, to be used for the filter. --- --- By choosing these parameters with care, it is possible to tune for --- a particular false positive rate. --- The 'Data.BloomFilter.Easy.suggestSizing' function in --- the "Data.BloomFilter.Easy" module calculates useful estimates for --- these parameters. - --- $ease --- --- This module provides both mutable interfaces for creating and --- querying a Bloom filter. It is most useful as a low-level way to --- manage a Bloom filter with a custom set of characteristics. - --- $performance --- --- The implementation has been carefully tuned for high performance --- and low space consumption. diff --git a/bloomfilter/src/Data/BloomFilter/Mutable/Internal.hs b/bloomfilter/src/Data/BloomFilter/Mutable/Internal.hs deleted file mode 100644 index 0c61fc4be..000000000 --- a/bloomfilter/src/Data/BloomFilter/Mutable/Internal.hs +++ /dev/null @@ -1,24 +0,0 @@ -{-# OPTIONS_HADDOCK not-home #-} --- | This module exports 'MBloom'' internals. -module Data.BloomFilter.Mutable.Internal ( - MBloom'(..), -) where - -import qualified Data.BloomFilter.BitVec64 as V -import Data.Kind (Type) -import Data.Word (Word64) - -import Prelude hiding (div, divMod, elem, length, mod, notElem, rem, - (*), (/)) - --- | A mutable Bloom filter, for use within the 'ST' monad. -type MBloom' :: Type -> (Type -> Type) -> Type -> Type -data MBloom' s h a = MBloom { - hashesN :: {-# UNPACK #-} !Int - , size :: {-# UNPACK #-} !Word64 -- ^ size is non-zero - , bitArray :: {-# UNPACK #-} !(V.MBitVec64 s) - } -type role MBloom' nominal nominal nominal - -instance Show (MBloom' s h a) where - show mb = "MBloom { " ++ show (size mb) ++ " bits } " diff --git a/bloomfilter/tests/QCSupport.hs b/bloomfilter/tests/QCSupport.hs deleted file mode 100644 index f26888616..000000000 --- a/bloomfilter/tests/QCSupport.hs +++ /dev/null @@ -1,11 +0,0 @@ -module QCSupport (P(..)) where - -import Test.QuickCheck (Arbitrary (..), choose) -import Test.QuickCheck.Instances () - -newtype P = P { unP :: Double } - deriving (Eq, Ord, Show) - -instance Arbitrary P where - arbitrary = P <$> choose (epsilon, 1 - epsilon) - where epsilon = 1e-6 :: Double diff --git a/bloomfilter/tests/bloomfilter-tests.hs b/bloomfilter/tests/bloomfilter-tests.hs index e1710d987..d8223fb69 100644 --- a/bloomfilter/tests/bloomfilter-tests.hs +++ b/bloomfilter/tests/bloomfilter-tests.hs @@ -1,63 +1,200 @@ -{-# LANGUAGE NumericUnderscores #-} -module Main where +module Main (main) where -import Control.Monad (forM_) -import qualified Data.BloomFilter.BitVec64 as BV64 -import qualified Data.BloomFilter.Easy as B +import qualified Data.BloomFilter.Blocked as Bloom.Blocked +import qualified Data.BloomFilter.Classic as B +import qualified Data.BloomFilter.Classic as Bloom.Classic import Data.BloomFilter.Hash (Hashable (..), hash64) -import qualified Data.BloomFilter.Internal as BI + import Data.ByteString (ByteString) import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as LBS -import Data.Int (Int16, Int32, Int64, Int8) -import qualified Data.Vector.Primitive as VP +import Data.Int (Int64) +import Data.Proxy (Proxy (..)) import Data.Word (Word32, Word64) -import System.IO (BufferMode (..), hSetBuffering, stdout) + +import Test.QuickCheck.Instances () import Test.Tasty import Test.Tasty.QuickCheck -import QCSupport (P (..)) - +import Prelude hiding (elem, notElem) main :: IO () main = defaultMain tests tests :: TestTree -tests = testGroup "bloomfilter" - [ testGroup "easyList" - [ testProperty "()" $ prop_pai () - , testProperty "Char" $ prop_pai (undefined :: Char) - , testProperty "Word32" $ prop_pai (undefined :: Word32) - , testProperty "Word64" $ prop_pai (undefined :: Word64) - , testProperty "ByteString" $ prop_pai (undefined :: ByteString) - , testProperty "LBS.ByteString" $ prop_pai (undefined :: LBS.ByteString) - , testProperty "LBS.ByteString" $ prop_pai (undefined :: String) +tests = + testGroup "Data.BloomFilter" $ + [ testGroup "Classic" + [ testGroup "calculations" $ + test_calculations proxyClassic + (FPR 1e-6, FPR 1) (BitsPerEntry 1, BitsPerEntry 50) 1e-6 + ++ test_calculations_classic + , test_fromList proxyClassic + ] + , testGroup "Blocked" + [ testGroup "calculations" $ + -- for the Blocked impl, the calculations are approximations + -- based on regressions, so we have to use much looser tolerances: + test_calculations proxyBlocked + (FPR 1e-4, FPR 1e-1) (BitsPerEntry 3, BitsPerEntry 24) 1e-2 + , test_fromList proxyBlocked + ] + , tests_hashes + ] + where + test_calculations proxy fprRrange bitsRange tolerance = + [ testProperty "prop_calc_policy_fpr" $ + prop_calc_policy_fpr proxy fprRrange tolerance + + , testProperty "prop_calc_policy_bits" $ + prop_calc_policy_bits proxy bitsRange tolerance + + , testProperty "prop_calc_size_hashes_bits" $ + prop_calc_size_hashes_bits proxy + ] + + test_calculations_classic = + [ testProperty "prop_calc_size_fpr_fpr" $ + prop_calc_size_fpr_fpr proxyClassic + + , testProperty "prop_calc_size_fpr_bits" $ + prop_calc_size_fpr_bits proxyClassic + ] + + test_fromList proxy = + testGroup "fromList" + [ testProperty "()" $ prop_elem proxy (Proxy :: Proxy ()) + , testProperty "Char" $ prop_elem proxy (Proxy :: Proxy Char) + , testProperty "Word32" $ prop_elem proxy (Proxy :: Proxy Word32) + , testProperty "Word64" $ prop_elem proxy (Proxy :: Proxy Word64) + , testProperty "ByteString" $ prop_elem proxy (Proxy :: Proxy ByteString) + , testProperty "LBS.ByteString" $ prop_elem proxy (Proxy :: Proxy LBS.ByteString) + , testProperty "String" $ prop_elem proxy (Proxy :: Proxy String) ] - , testGroup "hashes" + + tests_hashes = + testGroup "hashes" [ testProperty "prop_rechunked_eq" prop_rechunked_eq , testProperty "prop_tuple_ex" $ hash64 (BS.empty, BS.pack [120]) =/= hash64 (BS.pack [120], BS.empty) , testProperty "prop_list_ex" $ hash64 [[],[],[BS.empty]] =/= hash64 [[],[BS.empty],[]] ] - , testGroup "equality" - [ testProperty "doesn't care about leftover bits a" $ - BI.Bloom 1 48 (BV64.BV64 (VP.singleton 0xffff_0000_1234_5678)) === - BI.Bloom 1 48 (BV64.BV64 (VP.singleton 0xeeee_0000_1234_5678)) - - , testProperty "doesn't care about leftover bits b" $ - BI.Bloom 1 49 (BV64.BV64 (VP.singleton 0xffff_0000_1234_5678)) =/= - BI.Bloom 1 49 (BV64.BV64 (VP.singleton 0xeeee_0000_1234_5678)) - ] - ] + +proxyClassic :: Proxy Bloom.Classic.Bloom +proxyClassic = Proxy + +proxyBlocked :: Proxy Bloom.Blocked.Bloom +proxyBlocked = Proxy ------------------------------------------------------------------------------- -- Element is in a Bloom filter ------------------------------------------------------------------------------- -prop_pai :: (Hashable a) => a -> a -> [a] -> P -> Property -prop_pai _ x xs (P q) = let bf = B.easyList q (x:xs) in - B.elem x bf .&&. not (B.notElem x bf) +prop_elem :: forall bloom a. (BloomFilter bloom, Hashable a) + => Proxy bloom -> Proxy a + -> a -> [a] -> FPR -> Property +prop_elem proxy _ x xs (FPR q) = + let bf :: bloom a + bf = fromList (policyForFPR proxy q) (x:xs) + in elem x bf .&&. not (notElem x bf) + +------------------------------------------------------------------------------- +-- Bloom filter size calculations +------------------------------------------------------------------------------- + +prop_calc_policy_fpr :: BloomFilter bloom => Proxy bloom + -> (FPR, FPR) -> Double + -> FPR -> Property +prop_calc_policy_fpr proxy (FPR lb, FPR ub) t (FPR fpr) = + fpr > lb && fpr < ub ==> + let policy = policyForFPR proxy fpr + in policyFPR proxy policy ~~~ fpr + where + (~~~) = withinTolerance t + +prop_calc_policy_bits :: BloomFilter bloom => Proxy bloom + -> (BitsPerEntry, BitsPerEntry) -> Double + -> BitsPerEntry -> Property +prop_calc_policy_bits proxy (BitsPerEntry lb, BitsPerEntry ub) t + (BitsPerEntry c) = + c >= lb && c <= ub ==> + let policy = policyForBits proxy c + c' = B.policyBits policy + fpr = policyFPR proxy policy + policy' = policyForFPR proxy fpr + fpr' = policyFPR proxy policy' + in c === c' .&&. fpr ~~~ fpr' + where + (~~~) = withinTolerance t + +-- | Compare @sizeHashes . sizeForBits@ against @numHashFunctions@ +prop_calc_size_hashes_bits :: BloomFilter bloom => Proxy bloom + -> BitsPerEntry -> NumEntries -> Property +prop_calc_size_hashes_bits proxy (BitsPerEntry c) (NumEntries numEntries) = + let bsize = sizeForBits proxy c numEntries + in numHashFunctions (fromIntegral (B.sizeBits bsize)) + (fromIntegral numEntries) + === fromIntegral (B.sizeHashes bsize) + +-- | Compare @sizeForFPR@ against @falsePositiveRate@ with some tolerance for deviations +prop_calc_size_fpr_fpr :: BloomFilter bloom => Proxy bloom + -> FPR -> NumEntries -> Property +prop_calc_size_fpr_fpr proxy (FPR fpr) (NumEntries numEntries) = + let bsize = sizeForFPR proxy fpr numEntries + in falsePositiveRate (fromIntegral (B.sizeBits bsize)) + (fromIntegral numEntries) + (fromIntegral (B.sizeHashes bsize)) + ~~~ fpr + where + (~~~) = withinTolerance 1e-6 + +-- | Compare @sizeForBits@ against @falsePositiveRate@ with some tolerance for deviations +prop_calc_size_fpr_bits :: BloomFilter bloom => Proxy bloom + -> BitsPerEntry -> NumEntries -> Property +prop_calc_size_fpr_bits proxy (BitsPerEntry c) (NumEntries numEntries) = + let policy = policyForBits proxy c + bsize = sizeForPolicy proxy policy numEntries + in falsePositiveRate (fromIntegral (B.sizeBits bsize)) + (fromIntegral numEntries) + (fromIntegral (B.sizeHashes bsize)) + ~~~ policyFPR proxy policy + where + (~~~) = withinTolerance 1e-6 + +-- reference implementations used for sanity checks + +-- | Computes the optimal number of hash functions that minimises the false +-- positive rate for a bloom filter. +-- +-- See Niv Dayan, Manos Athanassoulis, Stratos Idreos, +-- /Optimal Bloom Filters and Adaptive Merging for LSM-Trees/, +-- Footnote 2, page 6. +numHashFunctions :: + Double -- ^ Number of bits assigned to the bloom filter. + -> Double -- ^ Number of entries inserted into the bloom filter. + -> Integer +numHashFunctions bits nentries = + round $ + max 1 ((bits / nentries) * log 2) + +-- | False positive rate +-- +-- See +-- +falsePositiveRate :: + Double -- ^ Number of bits assigned to the bloom filter. + -> Double -- ^ Number of entries inserted into the bloom filter. + -> Double -- ^ Number of hash functions + -> Double +falsePositiveRate m n k = + (1 - exp (-(k * n / m))) ** k + +withinTolerance :: Double -> Double -> Double -> Property +withinTolerance t a b = + counterexample (show a ++ " /= " ++ show b ++ + " and not within (abs) tolerance of " ++ show t) $ + abs (a - b) < t ------------------------------------------------------------------------------- -- Chunking @@ -84,3 +221,78 @@ prop_rechunked f s = prop_rechunked_eq :: LBS.ByteString -> Property prop_rechunked_eq = prop_rechunked hash64 + +------------------------------------------------------------------------------- +-- Class to allow testing two filter implementations +------------------------------------------------------------------------------- + +class BloomFilter bloom where + fromList :: Hashable a => B.BloomPolicy -> [a] -> bloom a + elem :: Hashable a => a -> bloom a -> Bool + notElem :: Hashable a => a -> bloom a -> Bool + + sizeForFPR :: Proxy bloom -> B.FPR -> B.NumEntries -> B.BloomSize + sizeForBits :: Proxy bloom -> B.BitsPerEntry -> B.NumEntries -> B.BloomSize + sizeForPolicy :: Proxy bloom -> B.BloomPolicy -> B.NumEntries -> B.BloomSize + policyForFPR :: Proxy bloom -> B.FPR -> B.BloomPolicy + policyForBits :: Proxy bloom -> B.BitsPerEntry -> B.BloomPolicy + policyFPR :: Proxy bloom -> B.BloomPolicy -> B.FPR + +instance BloomFilter Bloom.Classic.Bloom where + fromList = Bloom.Classic.fromList + elem = Bloom.Classic.elem + notElem = Bloom.Classic.notElem + + sizeForFPR _ = Bloom.Classic.sizeForFPR + sizeForBits _ = Bloom.Classic.sizeForBits + sizeForPolicy _ = Bloom.Classic.sizeForPolicy + policyForFPR _ = Bloom.Classic.policyForFPR + policyForBits _ = Bloom.Classic.policyForBits + policyFPR _ = Bloom.Classic.policyFPR + +instance BloomFilter Bloom.Blocked.Bloom where + fromList = Bloom.Blocked.fromList + elem = Bloom.Blocked.elem + notElem = Bloom.Blocked.notElem + + sizeForFPR _ = Bloom.Blocked.sizeForFPR + sizeForBits _ = Bloom.Blocked.sizeForBits + sizeForPolicy _ = Bloom.Blocked.sizeForPolicy + policyForFPR _ = Bloom.Blocked.policyForFPR + policyForBits _ = Bloom.Blocked.policyForBits + policyFPR _ = Bloom.Blocked.policyFPR + +------------------------------------------------------------------------------- +-- QC generators +------------------------------------------------------------------------------- + +newtype FPR = FPR Double + deriving stock Show + +instance Arbitrary FPR where + -- The most significant effect of the FPR is from its (negative) exponent, + -- which influences both filter bits and number of hashes. So we generate + -- values with an exponent from 10^0 to 10^-6 + arbitrary = do + m <- choose (epsilon, 1-epsilon) + e <- choose (0, 6) + pure (FPR (m * 10 ** (-e))) + where + epsilon = 1e-6 :: Double + +newtype BitsPerEntry = BitsPerEntry Double + deriving stock Show + +instance Arbitrary BitsPerEntry where + arbitrary = BitsPerEntry <$> choose (1, 50) + +newtype NumEntries = NumEntries Int + deriving stock Show + +-- | The FPR calculations are approximations and are not expected to be +-- accurate for low numbers of entries or bits. +-- +instance Arbitrary NumEntries where + arbitrary = NumEntries <$> choose (1_000, 100_000_000) + shrink (NumEntries n) = + [ NumEntries n' | n' <- shrink n, n' >= 1000 ] diff --git a/bloomfilter/tests/fpr-calc.hs b/bloomfilter/tests/fpr-calc.hs new file mode 100644 index 000000000..8d17db755 --- /dev/null +++ b/bloomfilter/tests/fpr-calc.hs @@ -0,0 +1,184 @@ +{-# LANGUAGE ParallelListComp #-} +module Main (main) where + +import qualified Data.BloomFilter as B (BitsPerEntry, BloomPolicy, BloomSize, + FPR, Hashable) +import qualified Data.BloomFilter.Blocked as B.Blocked +import qualified Data.BloomFilter.Classic as B.Classic + +import Control.Parallel.Strategies +import Data.IntSet (IntSet) +import qualified Data.IntSet as IntSet +import Data.List (unfoldr) +import Math.Regression.Simple +import System.IO +import System.Random + +import Prelude hiding (elem) + +-- | Write out data files used by gnuplot fpr.plot +main :: IO () +main = do + hSetBuffering stdout NoBuffering --for progress reporting + + withFile "bloomfilter/fpr.classic.gnuplot.data" WriteMode $ \h -> do + hSetBuffering h LineBuffering --for incremental output + mapM_ (\l -> hPutStrLn h l >> putChar '.') $ + [ unwords [show bitsperkey, show y1, show y2] + | (bitsperkey, _) <- xs_classic + | y1 <- ys_classic_calc + | y2 <- ys_classic_actual + ] + putStrLn "Wrote bloomfilter/fpr.classic.gnuplot.data" + + withFile "bloomfilter/fpr.blocked.gnuplot.data" WriteMode $ \h -> do + hSetBuffering h LineBuffering --for incremental output + mapM_ (\l -> hPutStrLn h l >> putChar '.') $ + [ unwords [show bitsperkey, show y1, show y2] + | (bitsperkey, _) <- xs_blocked + | y1 <- ys_blocked_calc + | y2 <- ys_blocked_actual + ] + putStrLn "Wrote bloomfilter/fpr.blocked.gnuplot.data" + + let regressionData :: [(Double, Double)] + regressionData = zip (map fst xs_blocked) + (map (negate . log) ys_blocked_actual) + regressionBitsToFPR = quadraticFit (\(x,y)->(x,y)) regressionData + regressionFPRToBits = quadraticFit (\(x,y)->(y,x)) regressionData + putStrLn "" + putStrLn "Blocked bloom filter quadratic regressions:" + putStrLn "bits indepedent, FPR depedent:" + print regressionBitsToFPR + putStrLn "" + putStrLn "FPR indepedent, bits depedent:" + print regressionFPRToBits + where + -- x axis values + xs_classic = + [ (bitsperkey, g) + | bitsperkey <- [2,2.3..20] + , g <- mkStdGen <$> [1..3] + ] + -- We use fewer points for classic, as it's slower and there's less need. + + xs_blocked = + [ (bitsperkey, g) + | bitsperkey <- [2,2.2..24] + , g <- mkStdGen <$> [1..9] + ] + + ys_classic_calc, ys_classic_actual, + ys_blocked_calc, ys_blocked_actual :: [Double] + + ys_classic_calc = ys_calc classicBloomImpl xs_classic + ys_blocked_calc = ys_calc blockedBloomImpl xs_blocked + + ys_classic_actual = ys_actual classicBloomImpl xs_classic + ys_blocked_actual = ys_actual blockedBloomImpl xs_blocked + + ys_calc :: BloomImpl b -> [(Double, StdGen)] -> [Double] + ys_calc BloomImpl{..} xs = + [ fpr + | (bitsperkey, _) <- xs + , let policy = policyForBits bitsperkey + fpr = policyFPR policy + ] + + ys_actual :: BloomImpl b -> [(Double, StdGen)] -> [Double] + ys_actual impl@BloomImpl{..} xs = + withStrategy (parList rseq) -- eval in parallel + [ fpr + | (bitsperkey, g) <- xs + , let policy = policyForBits bitsperkey + fpr_est = policyFPR policy + nentries = round (1000 * recip fpr_est) + fpr = actualFalsePositiveRate impl policy nentries g + ] +{- + -- fpr values in the range 1e-1 .. 1e-6 + ys = [ exp (-log_fpr) + | log_fpr <- [2.3,2.4 .. 13.8] ] + + xs_classic_calc = xs_calc classicBloomImpl + xs_blocked_calc = xs_calc blockedBloomImpl + + xs_calc BloomImpl{..} = + [ bits + | fpr <- ys + , let policy = policyForFPR fpr + bits = policyBits policy + ] +-} + +actualFalsePositiveRate :: BloomImpl bloom + -> B.BloomPolicy -> Int -> StdGen -> Double +actualFalsePositiveRate bloomimpl policy n g0 = + fromIntegral (countFalsePositives bloomimpl policy n g0) + / fromIntegral n + +countFalsePositives :: forall bloom. BloomImpl bloom + -> B.BloomPolicy -> Int -> StdGen -> Int +countFalsePositives BloomImpl{..} policy n g0 = + let (!g0', !g0'') = split g0 + + -- create a bloom filter from n elements from g0 + size = sizeForPolicy policy n + + xs_b :: bloom Int + !xs_b = unfold size nextElement (g0', 0) + + -- and a set, so we can make sure we don't count true positives + xs_s :: IntSet + !xs_s = IntSet.fromList (unfoldr nextElement (g0', 0)) + + -- now for a different random sequence (that will mostly not overlap) + -- count the number of false positives + in length + [ () + | y <- unfoldr nextElement (g0'', 0) + , y `elem` xs_b -- Bloom filter reports positive + , not (y `IntSet.member` xs_s) -- but it is not a true positive + ] + where + nextElement :: (StdGen, Int) -> Maybe (Int, (StdGen, Int)) + nextElement (!g, !i) + | i >= n = Nothing + | otherwise = Just (x, (g', i+1)) + where + (!x, !g') = uniform g + +data BloomImpl bloom = BloomImpl { + policyForBits :: B.BitsPerEntry -> B.BloomPolicy, + policyForFPR :: B.FPR -> B.BloomPolicy, + policyBits :: B.BloomPolicy -> B.BitsPerEntry, + policyFPR :: B.BloomPolicy -> B.FPR, + sizeForPolicy :: B.BloomPolicy -> Int -> B.BloomSize, + unfold :: forall a b. B.Hashable a + => B.BloomSize -> (b -> Maybe (a, b)) -> b -> bloom a, + elem :: forall a. B.Hashable a => a -> bloom a -> Bool + } + +classicBloomImpl :: BloomImpl B.Classic.Bloom +classicBloomImpl = + BloomImpl { + policyForBits = B.Classic.policyForBits, + policyForFPR = B.Classic.policyForFPR, + policyBits = B.Classic.policyBits, + policyFPR = B.Classic.policyFPR, + sizeForPolicy = B.Classic.sizeForPolicy, + unfold = B.Classic.unfold, + elem = B.Classic.elem + } + +blockedBloomImpl :: BloomImpl B.Blocked.Bloom +blockedBloomImpl = + BloomImpl { + policyForBits = B.Blocked.policyForBits, + policyForFPR = B.Blocked.policyForFPR, + policyBits = B.Blocked.policyBits, + policyFPR = B.Blocked.policyFPR, + sizeForPolicy = B.Blocked.sizeForPolicy, + unfold = B.Blocked.unfold, + elem = B.Blocked.elem + } diff --git a/bloomfilter/tests/primes.hs b/bloomfilter/tests/primes.hs deleted file mode 100644 index 18f340b4a..000000000 --- a/bloomfilter/tests/primes.hs +++ /dev/null @@ -1,26 +0,0 @@ -{-# LANGUAGE BangPatterns #-} -module Main (main) where - -import Data.Bits ((.|.)) -import Data.Numbers.Primes - -steps :: Int -steps = 5 - --- calculate some primes exponentially spaced between 64..2^40 -sparsePrimes :: [Int] -sparsePrimes = go (6 * steps) where - go :: Int -> [Int] - go !e = if e > 40 * steps then [] else go1 e (truncate' (k ^ e)) - - go1 :: Int -> Int -> [Int] - go1 !e !n = if isPrime n then n : go (e + 1) else go1 e (n - 2) -- we count down! - - k :: Double - k = exp (log 2 / fromIntegral steps) - - -- truncate to odd - truncate' n = truncate n .|. 1 - -main :: IO () -main = print sparsePrimes diff --git a/cabal.project.release b/cabal.project.release index 99c81e0a7..895b82d2e 100644 --- a/cabal.project.release +++ b/cabal.project.release @@ -14,6 +14,9 @@ benchmarks: True -- Luckily, bloomfilter is not commonly used package, so this is good enough. constraints: bloomfilter <0 +-- regression-simple is used by the bloomfilter-fpr-calc test executable +allow-newer: regression-simple:base + -- comment me if you don't have liburing installed -- -- TODO: it is slightly unfortunate that one has to manually remove this file diff --git a/lsm-tree.cabal b/lsm-tree.cabal index 1f3baa3fe..333b9d846 100644 --- a/lsm-tree.cabal +++ b/lsm-tree.cabal @@ -268,7 +268,9 @@ flag bloom-query-fast common bloom-query-fast if (flag(bloom-query-fast) && impl(ghc >=9.4)) - cpp-options: -DBLOOM_QUERY_FAST + +--TODO: temporarily disabled: +--cpp-options: -DBLOOM_QUERY_FAST library import: language, warnings, wno-x-partial, bloom-query-fast @@ -359,10 +361,10 @@ library if (flag(bloom-query-fast) && impl(ghc >=9.4)) -- The bulk bloom filter query uses some fancy stuff - exposed-modules: - Database.LSMTree.Internal.BloomFilterQuery2 - Database.LSMTree.Internal.StrictArray + exposed-modules: Database.LSMTree.Internal.StrictArray + --TODO: temporarily disabled + -- Database.LSMTree.Internal.BloomFilterQuery2 build-depends: data-elevator ^>=0.1.0.2 || ^>=0.2 -- this exists due windows @@ -404,60 +406,72 @@ test-suite xxhash-tests -- this fork doesn't work on 32bit systems library bloomfilter - import: language + import: language, warnings visibility: private hs-source-dirs: bloomfilter/src build-depends: - , base >=4.16 && <5 - , bitvec ^>=1.1.5.0 + , base >=4.16 && <5 , bytestring >=0.9 - , data-array-byte , deepseq , lsm-tree:xxhash , primitive - , vector ^>=0.13.0.0 exposed-modules: Data.BloomFilter - Data.BloomFilter.BitVec64 - Data.BloomFilter.Calc - Data.BloomFilter.Easy + Data.BloomFilter.Blocked + Data.BloomFilter.Classic Data.BloomFilter.Hash - Data.BloomFilter.Internal - Data.BloomFilter.Mutable - Data.BloomFilter.Mutable.Internal - ghc-options: -O2 -Wall + other-modules: + Data.BloomFilter.Blocked.BitArray + Data.BloomFilter.Blocked.Calc + Data.BloomFilter.Blocked.Internal + Data.BloomFilter.Classic.BitArray + Data.BloomFilter.Classic.Calc + Data.BloomFilter.Classic.Internal + + ghc-options: -O2 test-suite bloomfilter-tests - import: language + import: language, warnings type: exitcode-stdio-1.0 hs-source-dirs: bloomfilter/tests main-is: bloomfilter-tests.hs - other-modules: QCSupport build-depends: , base <5 , bytestring , lsm-tree:bloomfilter - , QuickCheck , quickcheck-instances - , random , tasty , tasty-quickcheck - , vector -test-suite bloomfilter-primes +benchmark bloomfilter-bench import: language type: exitcode-stdio-1.0 + hs-source-dirs: bloomfilter/bench + main-is: bloomfilter-bench.hs + build-depends: + , base + , criterion + , lsm-tree:bloomfilter + , random + +executable bloomfilter-fpr-calc + import: language, warnings + scope: private hs-source-dirs: bloomfilter/tests - main-is: primes.hs + main-is: fpr-calc.hs build-depends: - , base <5 - , primes ^>=0.2.1.0 + , base + , containers + , lsm-tree:bloomfilter + , parallel + , random + , regression-simple -test-suite bloomfilter-spell +executable bloomfilter-spell import: language - type: exitcode-stdio-1.0 + scope: private hs-source-dirs: bloomfilter/examples main-is: spell.hs build-depends: @@ -494,7 +508,6 @@ library extras , io-classes:strict-stm , lsm-tree , lsm-tree:blockio-api - , lsm-tree:bloomfilter , lsm-tree:control , lsm-tree:kmerge , lsm-tree:prototypes diff --git a/src-extras/Database/LSMTree/Extras/NoThunks.hs b/src-extras/Database/LSMTree/Extras/NoThunks.hs index 0715fd142..8d257a54f 100644 --- a/src-extras/Database/LSMTree/Extras/NoThunks.hs +++ b/src-extras/Database/LSMTree/Extras/NoThunks.hs @@ -21,7 +21,6 @@ import Control.Monad.ST.Unsafe (unsafeIOToST, unsafeSTToIO) import Control.RefCount import Control.Tracer import Data.Bit -import Data.BloomFilter import Data.Map.Strict import Data.Primitive import Data.Primitive.PrimVar @@ -35,6 +34,7 @@ import Data.Word import Database.LSMTree.Internal.Arena import Database.LSMTree.Internal.BlobFile import Database.LSMTree.Internal.BlobRef +import Database.LSMTree.Internal.BloomFilter (Bloom, MBloom) import Database.LSMTree.Internal.ChecksumHandle import Database.LSMTree.Internal.Chunk import Database.LSMTree.Internal.Config diff --git a/src/Database/LSMTree/Internal/BloomFilter.hs b/src/Database/LSMTree/Internal/BloomFilter.hs index 471fa9f7c..99c6b52e6 100644 --- a/src/Database/LSMTree/Internal/BloomFilter.hs +++ b/src/Database/LSMTree/Internal/BloomFilter.hs @@ -1,96 +1,128 @@ {-# OPTIONS_HADDOCK not-home #-} module Database.LSMTree.Internal.BloomFilter ( + BF.Bloom, + BF.MBloom, + bloomFilterVersion, bloomFilterToLBS, - bloomFilterFromSBS, + bloomFilterFromFile, ) where -import Control.Exception (assert) -import Control.Monad (when) -import qualified Data.BloomFilter as BF -import qualified Data.BloomFilter.BitVec64 as BV64 -import qualified Data.BloomFilter.Internal as BF +import Control.Monad (void, when) +import Control.Monad.Class.MonadThrow +import Control.Monad.Primitive (PrimMonad) +import qualified Data.ByteString as BS import qualified Data.ByteString.Builder.Extra as B import qualified Data.ByteString.Lazy as LBS -import Data.ByteString.Short (ShortByteString (SBS)) -import qualified Data.Primitive as P -import Data.Primitive.ByteArray (ByteArray (ByteArray)) -import qualified Data.Vector.Primitive as VP +import qualified Data.Primitive.ByteArray as P import Data.Word (Word32, Word64, byteSwap32) -import Database.LSMTree.Internal.BitMath (ceilDiv64, mul8) +import System.FS.API + +import qualified Data.BloomFilter.Blocked as BF import Database.LSMTree.Internal.ByteString (byteArrayToByteString) -import Database.LSMTree.Internal.Vector +import Database.LSMTree.Internal.CRC32C (FileCorruptedError (..), + FileFormat (..)) -- serialising ----------------------------------------------------------- -- | By writing out the version in host endianness, we also indicate endianness. -- During deserialisation, we would discover an endianness mismatch. +-- +-- We base our version number on the 'BF.formatVersion' from the @bloomfilter@ +-- library, plus our own version here. This accounts both for changes in the +-- format code here, and changes in the library. +-- bloomFilterVersion :: Word32 -bloomFilterVersion = 1 +bloomFilterVersion = 1 + fromIntegral BF.formatVersion bloomFilterToLBS :: BF.Bloom a -> LBS.ByteString -bloomFilterToLBS b@(BF.Bloom _ _ bv) = - header b <> LBS.fromStrict (bitVec bv) +bloomFilterToLBS bf = + let (size, ba, off, len) = BF.serialise bf + in header size <> byteArrayToLBS ba off len where - header (BF.Bloom hashesN len _) = + header BF.BloomSize { sizeBits, sizeHashes } = -- creates a single 16 byte chunk B.toLazyByteStringWith (B.safeStrategy 16 B.smallChunkSize) mempty $ B.word32Host bloomFilterVersion - <> B.word32Host (fromIntegral hashesN) - <> B.word64Host len + <> B.word32Host (fromIntegral sizeHashes) + <> B.word64Host (fromIntegral sizeBits) - bitVec (BV64.BV64 (VP.Vector off len ba)) = - byteArrayToByteString (mul8 off) (mul8 len) ba + byteArrayToLBS :: P.ByteArray -> Int -> Int -> LBS.ByteString + byteArrayToLBS ba off len = + LBS.fromStrict (byteArrayToByteString off len ba) -- deserialising ----------------------------------------------------------- --- | Read 'BF.Bloom' from a 'ShortByteString'. --- --- The input must be 64 bit aligned and exactly contain the serialised bloom --- filter. In successful case the data portion of bloom filter is /not/ copied --- (the short bytestring has only 16 bytes of extra data in the header). +{-# SPECIALISE bloomFilterFromFile :: + HasFS IO h + -> FsPath + -> Handle h + -> IO (BF.Bloom a) #-} +-- | Read a 'BF.Bloom' from a file. -- -bloomFilterFromSBS :: ShortByteString -> Either String (BF.Bloom a) -bloomFilterFromSBS (SBS ba') = do - when (P.sizeofByteArray ba < 16) $ Left "Doesn't contain a header" - - let ver = P.indexPrimArray word32pa 0 - hsn = P.indexPrimArray word32pa 1 - len = P.indexPrimArray word64pa 1 -- length in bits - - when (ver /= bloomFilterVersion) $ Left $ - if byteSwap32 ver == bloomFilterVersion +bloomFilterFromFile :: + (PrimMonad m, MonadCatch m) + => HasFS m h + -> FsPath -- ^ File path just for error reporting + -> Handle h -- ^ The open file, in read mode + -> m (BF.Bloom a) +bloomFilterFromFile hfs fp h = do + header <- rethrowEOFError "Doesn't contain a header" $ + hGetByteArrayExactly hfs h 16 + + let !version = P.indexByteArray header 0 :: Word32 + !nhashes = P.indexByteArray header 1 :: Word32 + !nbits = P.indexByteArray header 1 :: Word64 + + when (version /= bloomFilterVersion) $ throwFormatError $ + if byteSwap32 version == bloomFilterVersion then "Different byte order" else "Unsupported version" - when (len <= 0) $ Left "Length is zero" + when (nbits <= 0) $ throwFormatError "Length is zero" -- limit to 2^48 bits - when (len >= 0x1_0000_0000_0000) $ Left "Too large bloomfilter" - - -- we need to round the size of vector up - let len64 = fromIntegral (ceilDiv64 len) - -- make sure the bit vector exactly fits into the byte array - -- (smaller bit vector could work, but wastes memory and should not happen) - let bytesUsed = mul8 (2 + len64) - when (bytesUsed > P.sizeofByteArray ba) $ - Left "Byte array is too small for components" - when (bytesUsed < P.sizeofByteArray ba) $ - Left "Byte array is too large for components" - - let vec64 :: VP.Vector Word64 - vec64 = mkPrimVector 2 len64 ba - - let bloom = BF.Bloom (fromIntegral hsn) len (BV64.BV64 vec64) - assert (BF.bloomInvariant bloom) $ return bloom + when (nbits >= 0x1_0000_0000_0000) $ throwFormatError "Too large bloomfilter" + --TODO: get max size from bloomfilter lib + + -- read the filter data from the file directly into the bloom filter + bloom <- + BF.deserialise + BF.BloomSize { + BF.sizeBits = fromIntegral nbits, + BF.sizeHashes = fromIntegral nhashes + } + (\buf off len -> + rethrowEOFError "bloom filter file too short" $ + void $ hGetBufExactly hfs + h buf (BufferOffset off) (fromIntegral len)) + + -- check we're now at EOF + trailing <- hGetSome hfs h 1 + when (not (BS.null trailing)) $ + throwFormatError "Byte array is too large for components" + return bloom where - ba :: ByteArray - ba = ByteArray ba' - - word32pa :: P.PrimArray Word32 - word32pa = P.PrimArray ba' + throwFormatError = throwIO + . ErrFileFormatInvalid + (mkFsErrorPath hfs fp) FormatBloomFilterFile + rethrowEOFError msg = + handleJust + (\e -> if isFsErrorType FsReachedEOF e then Just e else Nothing) + (\e -> throwIO $ + ErrFileFormatInvalid + (fsErrorPath e) FormatBloomFilterFile msg) + +hGetByteArrayExactly :: + (PrimMonad m, MonadThrow m) + => HasFS m h + -> Handle h + -> Int + -> m P.ByteArray +hGetByteArrayExactly hfs h len = do + buf <- P.newByteArray (fromIntegral len) + _ <- hGetBufExactly hfs h buf 0 (fromIntegral len) + P.unsafeFreezeByteArray buf - word64pa :: P.PrimArray Word64 - word64pa = P.PrimArray ba' diff --git a/src/Database/LSMTree/Internal/BloomFilterQuery1.hs b/src/Database/LSMTree/Internal/BloomFilterQuery1.hs index 3ec17ddfd..b442a257a 100644 --- a/src/Database/LSMTree/Internal/BloomFilterQuery1.hs +++ b/src/Database/LSMTree/Internal/BloomFilterQuery1.hs @@ -20,9 +20,8 @@ import Data.Word (Word32) import Control.Exception (assert) import Control.Monad.ST (ST) -import Data.BloomFilter (Bloom) -import qualified Data.BloomFilter as Bloom -import qualified Data.BloomFilter.Hash as Bloom +import Data.BloomFilter.Blocked (Bloom) +import qualified Data.BloomFilter.Blocked as Bloom import Database.LSMTree.Internal.Serialise (SerialisedKey) @@ -94,8 +93,8 @@ bloomQueries !blooms !ks !rsN = V.length blooms !ksN = V.length ks - hs :: VP.Vector (Bloom.CheapHashes SerialisedKey) - !hs = VP.generate ksN $ \i -> Bloom.makeCheapHashes (V.unsafeIndex ks i) + hs :: VP.Vector (Bloom.Hashes SerialisedKey) + !hs = VP.generate ksN $ \i -> Bloom.hashes (V.unsafeIndex ks i) -- Loop over all run indexes loop1 :: @@ -120,7 +119,7 @@ bloomQueries !blooms !ks loop2 !res2 !resix2 !kix !b | kix == ksN = pure (res2, resix2) | let !h = hs `VP.unsafeIndex` kix - , Bloom.elemHashes h b = do + , Bloom.elemHashes b h = do -- Double the vector if we've reached the end. -- Note unsafeGrow takes the number to grow by, not the new size. res2' <- if resix2 == VPM.length res2 diff --git a/src/Database/LSMTree/Internal/BloomFilterQuery2.hs b/src/Database/LSMTree/Internal/BloomFilterQuery2.hs index a8fda56ab..83f711c2e 100644 --- a/src/Database/LSMTree/Internal/BloomFilterQuery2.hs +++ b/src/Database/LSMTree/Internal/BloomFilterQuery2.hs @@ -218,7 +218,7 @@ prepKeyHashes :: V.Vector SerialisedKey -> P.PrimArray (Bloom.CheapHashes SerialisedKey) prepKeyHashes keys = P.generatePrimArray (V.length keys) $ \i -> - Bloom.makeCheapHashes (V.unsafeIndex keys i) + Bloom.makeHashes (V.unsafeIndex keys i) prepInitialCandidateProbes :: P.StrictArray (Bloom SerialisedKey) @@ -246,7 +246,7 @@ prepInitialCandidateProbes !keyhash = P.indexPrimArray keyhashes kix !hn = BF.hashesN filter - 1 !bix = (fromIntegral :: Word64 -> Int) $ - Bloom.evalCheapHashes keyhash hn + Bloom.evalHashes keyhash hn `BV64.unsafeRemWord64` -- size must be > 0 BF.size filter -- bloomInvariant ensures this BV64.prefetchIndex (BF.bitArray filter) bix @@ -390,7 +390,7 @@ bloomQueriesBody !filters !keyhashes !candidateProbes = assert (hn >= 0 && hn < BF.hashesN filter) $ do let !keyhash = P.indexPrimArray keyhashes kix !bix = (fromIntegral :: Word64 -> Int) $ - Bloom.evalCheapHashes keyhash hn + Bloom.evalHashes keyhash hn `BV64.unsafeRemWord64` -- size must be > 0 BF.size filter -- bloomInvariant ensures this BV64.prefetchIndex (BF.bitArray filter) bix diff --git a/src/Database/LSMTree/Internal/CRC32C.hs b/src/Database/LSMTree/Internal/CRC32C.hs index 5bd62ee94..5a995efeb 100644 --- a/src/Database/LSMTree/Internal/CRC32C.hs +++ b/src/Database/LSMTree/Internal/CRC32C.hs @@ -282,24 +282,30 @@ newtype ChecksumsFileName = ChecksumsFileName {unChecksumsFileName :: BSC.ByteSt {-# SPECIALISE getChecksum :: - FsPath + HasFS IO h + -> FsPath -> ChecksumsFile -> ChecksumsFileName -> IO CRC32C #-} getChecksum :: MonadThrow m - => FsPath + => HasFS m h + -> FsPath -> ChecksumsFile -> ChecksumsFileName -> m CRC32C -getChecksum fsPath checksumsFile checksumsFileName = +getChecksum hfs fsPath checksumsFile checksumsFileName = case Map.lookup checksumsFileName checksumsFile of Just checksum -> pure checksum Nothing -> - throwIO . ErrFileFormatInvalid fsPath FormatChecksumsFile $ - "could not find checksum for " <> show (unChecksumsFileName checksumsFileName) + throwIO $ + ErrFileFormatInvalid + (mkFsErrorPath hfs fsPath) + FormatChecksumsFile + ("could not find checksum for " <> + show (unChecksumsFileName checksumsFileName)) {-# SPECIALISE readChecksumsFile :: @@ -314,7 +320,7 @@ readChecksumsFile :: -> m ChecksumsFile readChecksumsFile fs path = do str <- withFile fs path ReadMode (\h -> hGetAll fs h) - expectValidFile path FormatChecksumsFile (parseChecksumsFile (BSL.toStrict str)) + expectValidFile fs path FormatChecksumsFile (parseChecksumsFile (BSL.toStrict str)) {-# SPECIALISE writeChecksumsFile :: HasFS IO h -> FsPath -> ChecksumsFile -> IO () #-} writeChecksumsFile :: MonadThrow m @@ -414,24 +420,30 @@ checkCRC fs hbio dropCache expected fp = withFile fs fp ReadMode $ \h -> do hAdviseAll hbio h AdviceSequential !checksum <- hGetAllCRC32C' fs h defaultChunkSize initialCRC32C when dropCache $ hDropCacheAll hbio h - expectChecksum fp expected checksum + expectChecksum fs fp expected checksum {-# SPECIALISE expectChecksum :: - FsPath + HasFS IO h + -> FsPath -> CRC32C -> CRC32C -> IO () #-} expectChecksum :: MonadThrow m - => FsPath + => HasFS m h + -> FsPath -> CRC32C -> CRC32C -> m () -expectChecksum fp expected checksum = +expectChecksum hfs fp expected checksum = when (expected /= checksum) $ - throwIO $ ErrFileChecksumMismatch fp (unCRC32C expected) (unCRC32C checksum) + throwIO $ + ErrFileChecksumMismatch + (mkFsErrorPath hfs fp) + (unCRC32C expected) + (unCRC32C checksum) {------------------------------------------------------------------------------- @@ -451,7 +463,7 @@ data FileCorruptedError = -- | The file fails to parse. ErrFileFormatInvalid -- | File. - !FsPath + !FsErrorPath -- | File format. !FileFormat -- | Error message. @@ -459,7 +471,7 @@ data FileCorruptedError | -- | The file CRC32 checksum is invalid. ErrFileChecksumMismatch -- | File. - !FsPath + !FsErrorPath -- | Expected checksum. !Word32 -- | Actual checksum. @@ -469,19 +481,21 @@ data FileCorruptedError {-# SPECIALISE expectValidFile :: - FsPath + HasFS IO h + -> FsPath -> FileFormat -> Either String a -> IO a #-} expectValidFile :: - (MonadThrow m) - => FsPath + MonadThrow m + => HasFS f h + -> FsPath -> FileFormat -> Either String a -> m a -expectValidFile _file _format (Right x) = +expectValidFile _hfs _file _format (Right x) = pure x -expectValidFile file format (Left msg) = - throwIO $ ErrFileFormatInvalid file format msg +expectValidFile hfs file format (Left msg) = + throwIO $ ErrFileFormatInvalid (mkFsErrorPath hfs file) format msg diff --git a/src/Database/LSMTree/Internal/ChecksumHandle.hs b/src/Database/LSMTree/Internal/ChecksumHandle.hs index 6aea6ae01..69897e7e0 100644 --- a/src/Database/LSMTree/Internal/ChecksumHandle.hs +++ b/src/Database/LSMTree/Internal/ChecksumHandle.hs @@ -24,13 +24,12 @@ module Database.LSMTree.Internal.ChecksumHandle import Control.Monad.Class.MonadSTM (MonadSTM (..)) import Control.Monad.Class.MonadThrow (MonadThrow) import Control.Monad.Primitive -import Data.BloomFilter (Bloom) import qualified Data.ByteString.Lazy as BSL import Data.Primitive.PrimVar import Data.Word (Word64) import Database.LSMTree.Internal.BlobRef (BlobSpan (..), RawBlobRef) import qualified Database.LSMTree.Internal.BlobRef as BlobRef -import Database.LSMTree.Internal.BloomFilter (bloomFilterToLBS) +import Database.LSMTree.Internal.BloomFilter (Bloom, bloomFilterToLBS) import Database.LSMTree.Internal.Chunk (Chunk) import qualified Database.LSMTree.Internal.Chunk as Chunk (toByteString) import Database.LSMTree.Internal.CRC32C (CRC32C) diff --git a/src/Database/LSMTree/Internal/Lookup.hs b/src/Database/LSMTree/Internal/Lookup.hs index eaab22597..07f43a55c 100644 --- a/src/Database/LSMTree/Internal/Lookup.hs +++ b/src/Database/LSMTree/Internal/Lookup.hs @@ -19,7 +19,6 @@ module Database.LSMTree.Internal.Lookup ( ) where import Data.Bifunctor -import Data.BloomFilter (Bloom) import Data.Primitive.ByteArray import qualified Data.Vector as V import qualified Data.Vector.Mutable as VM @@ -37,6 +36,7 @@ import Control.Monad.ST.Strict import Control.RefCount import Database.LSMTree.Internal.BlobRef (WeakBlobRef (..)) +import Database.LSMTree.Internal.BloomFilter (Bloom) import Database.LSMTree.Internal.Entry import Database.LSMTree.Internal.Index (Index) import qualified Database.LSMTree.Internal.Index as Index (search) diff --git a/src/Database/LSMTree/Internal/MergeSchedule.hs b/src/Database/LSMTree/Internal/MergeSchedule.hs index 829cd7974..863658d42 100644 --- a/src/Database/LSMTree/Internal/MergeSchedule.hs +++ b/src/Database/LSMTree/Internal/MergeSchedule.hs @@ -53,10 +53,10 @@ import Control.Monad.Class.MonadThrow (MonadMask, MonadThrow (..)) import Control.Monad.Primitive import Control.RefCount import Control.Tracer -import Data.BloomFilter (Bloom) import Data.Foldable (fold, traverse_) import qualified Data.Vector as V import Database.LSMTree.Internal.Assertions (assert) +import Database.LSMTree.Internal.BloomFilter (Bloom) import Database.LSMTree.Internal.Config import Database.LSMTree.Internal.Entry (Entry, NumEntries (..), unNumEntries) diff --git a/src/Database/LSMTree/Internal/Run.hs b/src/Database/LSMTree/Internal/Run.hs index d04b0990c..44811b24c 100644 --- a/src/Database/LSMTree/Internal/Run.hs +++ b/src/Database/LSMTree/Internal/Run.hs @@ -36,14 +36,14 @@ import Control.Monad.Class.MonadSTM (MonadSTM (..)) import Control.Monad.Class.MonadThrow import Control.Monad.Primitive import Control.RefCount -import Data.BloomFilter (Bloom) import qualified Data.ByteString.Short as SBS import Data.Foldable (for_) import Database.LSMTree.Internal.BlobFile import Database.LSMTree.Internal.BlobRef hiding (mkRawBlobRef, mkWeakBlobRef) import qualified Database.LSMTree.Internal.BlobRef as BlobRef -import Database.LSMTree.Internal.BloomFilter (bloomFilterFromSBS) +import Database.LSMTree.Internal.BloomFilter (Bloom, + bloomFilterFromFile) import qualified Database.LSMTree.Internal.CRC32C as CRC import Database.LSMTree.Internal.Entry (NumEntries (..)) import Database.LSMTree.Internal.Index (Index, IndexType (..)) @@ -294,7 +294,8 @@ openFromDisk :: -- TODO: make exception safe openFromDisk fs hbio runRunDataCaching indexType runRunFsPaths = do expectedChecksums <- - CRC.expectValidFile (runChecksumsPath runRunFsPaths) CRC.FormatChecksumsFile . fromChecksumsFile + CRC.expectValidFile fs (runChecksumsPath runRunFsPaths) CRC.FormatChecksumsFile + . fromChecksumsFile =<< CRC.readChecksumsFile fs (runChecksumsPath runRunFsPaths) -- verify checksums of files we don't read yet @@ -303,11 +304,14 @@ openFromDisk fs hbio runRunDataCaching indexType runRunFsPaths = do checkCRC runRunDataCaching (forRunBlobRaw expectedChecksums) (forRunBlobRaw paths) -- read and try parsing files - runFilter <- - CRC.expectValidFile (forRunFilterRaw paths) CRC.FormatBloomFilterFile . bloomFilterFromSBS - =<< readCRC (forRunFilterRaw expectedChecksums) (forRunFilterRaw paths) + let filterPath = forRunFilterRaw paths + checkCRC CacheRunData (forRunFilterRaw expectedChecksums) filterPath + runFilter <- FS.withFile fs filterPath FS.ReadMode $ + bloomFilterFromFile fs filterPath + (runNumEntries, runIndex) <- - CRC.expectValidFile (forRunIndexRaw paths) CRC.FormatIndexFile . Index.fromSBS indexType + CRC.expectValidFile fs (forRunIndexRaw paths) CRC.FormatIndexFile + . Index.fromSBS indexType =<< readCRC (forRunIndexRaw expectedChecksums) (forRunIndexRaw paths) runKOpsFile <- FS.hOpen fs (runKOpsPath runRunFsPaths) FS.ReadMode @@ -336,5 +340,5 @@ openFromDisk fs hbio runRunDataCaching indexType runRunFsPaths = do (sbs, !checksum) <- CRC.hGetExactlyCRC32C_SBS fs h (fromIntegral n) CRC.initialCRC32C -- drop the file from the OS page cache FS.hAdviseAll hbio h FS.AdviceDontNeed - CRC.expectChecksum fp expected checksum + CRC.expectChecksum fs fp expected checksum return sbs diff --git a/src/Database/LSMTree/Internal/RunAcc.hs b/src/Database/LSMTree/Internal/RunAcc.hs index 5cce81cca..9ad2ed52f 100644 --- a/src/Database/LSMTree/Internal/RunAcc.hs +++ b/src/Database/LSMTree/Internal/RunAcc.hs @@ -29,23 +29,16 @@ module Database.LSMTree.Internal.RunAcc ( , PageAcc.entryWouldFitInPage -- * Bloom filter allocation , RunBloomFilterAlloc (..) - -- ** Exposed for testing - , newMBloom - , numHashFunctions - , falsePositiveRate ) where import Control.DeepSeq (NFData (..)) import Control.Exception (assert) import Control.Monad.ST.Strict -import Data.BloomFilter (Bloom, MBloom) -import qualified Data.BloomFilter as Bloom -import qualified Data.BloomFilter.Easy as Bloom.Easy -import qualified Data.BloomFilter.Mutable as MBloom +import Data.BloomFilter.Blocked (Bloom, MBloom) +import qualified Data.BloomFilter.Blocked as Bloom import Data.Primitive.PrimVar (PrimVar, modifyPrimVar, newPrimVar, readPrimVar) import Data.Word (Word64) -import Database.LSMTree.Internal.Assertions (fromIntegralChecked) import Database.LSMTree.Internal.BlobRef (BlobSpan (..)) import Database.LSMTree.Internal.Chunk (Chunk) import Database.LSMTree.Internal.Entry (Entry (..), NumEntries (..)) @@ -88,8 +81,12 @@ new :: -> RunBloomFilterAlloc -> IndexType -> ST s (RunAcc s) -new nentries alloc indexType = do - mbloom <- newMBloom nentries alloc +new (NumEntries nentries) alloc indexType = do + --TODO: it'd be possible to cache this BloomPolicy, since it is indepedent + -- of the NumEntries, avoiding recalculating the policy every time. + let policy = bloomFilterAllocPolicy alloc + bsize = Bloom.sizeForPolicy policy nentries + mbloom <- Bloom.new bsize mindex <- Index.newWithDefaults indexType mpageacc <- PageAcc.newPageAcc entryCount <- newPrimVar 0 @@ -171,7 +168,7 @@ addSmallKeyOp :: addSmallKeyOp racc@RunAcc{..} k e = assert (PageAcc.entryWouldFitInPage k e) $ do modifyPrimVar entryCount (+1) - MBloom.insert mbloom k + Bloom.insert mbloom k pageBoundaryNeeded <- -- Try adding the key/op to the page accumulator to see if it fits. If @@ -217,7 +214,7 @@ addLargeKeyOp :: addLargeKeyOp racc@RunAcc{..} k e = assert (not (PageAcc.entryWouldFitInPage k e)) $ do modifyPrimVar entryCount (+1) - MBloom.insert mbloom k + Bloom.insert mbloom k -- If the existing page accumulator is non-empty, we flush it, since the -- new large key/op will need more than one page to itself. @@ -271,7 +268,7 @@ addLargeSerialisedKeyOp racc@RunAcc{..} k page overflowPages = assert (RawPage.rawPageOverflowPages page > 0) $ assert (RawPage.rawPageOverflowPages page == length overflowPages) $ do modifyPrimVar entryCount (+1) - MBloom.insert mbloom k + Bloom.insert mbloom k -- If the existing page accumulator is non-empty, we flush it, since the -- new large key/op will need more than one page to itself. @@ -325,7 +322,7 @@ selectPagesAndChunks mpagemchunkPre page chunks = -- | See 'Database.LSMTree.Internal.Config.BloomFilterAlloc' data RunBloomFilterAlloc = -- | Bits per element in a filter - RunAllocFixed !Word64 + RunAllocFixed !Word64 --TODO: this could be Double too | RunAllocRequestFPR !Double deriving stock (Show, Eq) @@ -333,42 +330,8 @@ instance NFData RunBloomFilterAlloc where rnf (RunAllocFixed a) = rnf a rnf (RunAllocRequestFPR a) = rnf a -newMBloom :: NumEntries -> RunBloomFilterAlloc -> ST s (MBloom s a) -newMBloom (NumEntries nentries) = \case - RunAllocFixed !bitsPerEntry -> - let !nbits = fromIntegral bitsPerEntry * fromIntegral nentries - in MBloom.new - (fromIntegralChecked $ numHashFunctions nbits (fromIntegralChecked nentries)) - (fromIntegralChecked nbits) - RunAllocRequestFPR !fpr -> - Bloom.Easy.easyNew fpr nentries - --- | Computes the optimal number of hash functions that minimises the false --- positive rate for a bloom filter. --- --- See Niv Dayan, Manos Athanassoulis, Stratos Idreos, --- /Optimal Bloom Filters and Adaptive Merging for LSM-Trees/, --- Footnote 2, page 6. -numHashFunctions :: - Integer -- ^ Number of bits assigned to the bloom filter. - -> Integer -- ^ Number of entries inserted into the bloom filter. - -> Integer -numHashFunctions nbits nentries = truncate @Double $ max 1 $ - (fromIntegral nbits / fromIntegral nentries) * log 2 - --- | False positive rate --- --- Assumes that the bloom filter uses 'numHashFunctions' hash functions. --- --- See Niv Dayan, Manos Athanassoulis, Stratos Idreos, --- /Optimal Bloom Filters and Adaptive Merging for LSM-Trees/, --- Equation 2. -falsePositiveRate :: - Floating a - => a -- ^ entries - -> a -- ^ bits - -> a -falsePositiveRate entries bits = exp ((-(bits / entries)) * sq (log 2)) - -sq :: Num a => a -> a -sq x = x * x +--TODO: RunBloomFilterAlloc could probably be replaced by Bloom.BloomPolicy +bloomFilterAllocPolicy :: RunBloomFilterAlloc -> Bloom.BloomPolicy +bloomFilterAllocPolicy = \case + RunAllocFixed bitsPerEntry -> Bloom.policyForBits (fromIntegral bitsPerEntry) + RunAllocRequestFPR fpr -> Bloom.policyForFPR fpr diff --git a/src/Database/LSMTree/Internal/RunBuilder.hs b/src/Database/LSMTree/Internal/RunBuilder.hs index 7583dde4a..b440c52fc 100644 --- a/src/Database/LSMTree/Internal/RunBuilder.hs +++ b/src/Database/LSMTree/Internal/RunBuilder.hs @@ -22,11 +22,11 @@ import qualified Control.Monad.Class.MonadST as ST import Control.Monad.Class.MonadSTM (MonadSTM (..)) import Control.Monad.Class.MonadThrow (MonadThrow) import Control.Monad.Primitive -import Data.BloomFilter (Bloom) import Data.Foldable (for_, traverse_) import Data.Primitive.PrimVar import Data.Word (Word64) import Database.LSMTree.Internal.BlobRef (RawBlobRef) +import Database.LSMTree.Internal.BloomFilter (Bloom) import Database.LSMTree.Internal.ChecksumHandle import qualified Database.LSMTree.Internal.CRC32C as CRC import Database.LSMTree.Internal.Entry diff --git a/src/Database/LSMTree/Internal/Snapshot.hs b/src/Database/LSMTree/Internal/Snapshot.hs index 1d1ee8786..0c806c964 100644 --- a/src/Database/LSMTree/Internal/Snapshot.hs +++ b/src/Database/LSMTree/Internal/Snapshot.hs @@ -517,7 +517,8 @@ openWriteBuffer reg resolve hfs hbio uc activeDir snapWriteBufferPaths = do -- TODO: This reads the blobfile twice: once to check the CRC and once more -- to copy it from the snapshot directory to the active directory. (expectedChecksumForKOps, expectedChecksumForBlob) <- - CRC.expectValidFile (writeBufferChecksumsPath snapWriteBufferPaths) CRC.FormatWriteBufferFile . fromChecksumsFileForWriteBufferFiles + CRC.expectValidFile hfs (writeBufferChecksumsPath snapWriteBufferPaths) CRC.FormatWriteBufferFile + . fromChecksumsFileForWriteBufferFiles =<< CRC.readChecksumsFile hfs (writeBufferChecksumsPath snapWriteBufferPaths) checkCRC hfs hbio False (unForKOps expectedChecksumForKOps) (writeBufferKOpsPath snapWriteBufferPaths) checkCRC hfs hbio False (unForBlob expectedChecksumForBlob) (writeBufferBlobPath snapWriteBufferPaths) diff --git a/src/Database/LSMTree/Internal/Snapshot/Codec.hs b/src/Database/LSMTree/Internal/Snapshot/Codec.hs index eef4b6b23..732506e46 100644 --- a/src/Database/LSMTree/Internal/Snapshot/Codec.hs +++ b/src/Database/LSMTree/Internal/Snapshot/Codec.hs @@ -22,7 +22,6 @@ import Codec.CBOR.Decoding import Codec.CBOR.Encoding import Codec.CBOR.Read import Codec.CBOR.Write -import Control.Monad (when) import Control.Monad.Class.MonadThrow (Exception (displayException), MonadThrow (..)) import Data.Bifunctor (Bifunctor (..)) @@ -130,16 +129,15 @@ readFileSnapshotMetaData hfs contentPath checksumPath = do checksumFile <- readChecksumsFile hfs checksumPath let checksumFileName = ChecksumsFileName (BSC.pack "metadata") - expectedChecksum <- getChecksum checksumPath checksumFile checksumFileName + expectedChecksum <- getChecksum hfs checksumPath checksumFile checksumFileName (lbs, actualChecksum) <- FS.withFile hfs contentPath FS.ReadMode $ \h -> do n <- FS.hGetSize hfs h FS.hGetExactlyCRC32C hfs h n initialCRC32C - when (expectedChecksum /= actualChecksum) . throwIO $ - ErrFileChecksumMismatch contentPath (unCRC32C expectedChecksum) (unCRC32C actualChecksum) + expectChecksum hfs contentPath expectedChecksum actualChecksum - expectValidFile contentPath FormatSnapshotMetaData (decodeSnapshotMetaData lbs) + expectValidFile hfs contentPath FormatSnapshotMetaData (decodeSnapshotMetaData lbs) decodeSnapshotMetaData :: ByteString -> Either String SnapshotMetaData decodeSnapshotMetaData lbs = bimap displayException (getVersioned . snd) (deserialiseFromBytes decode lbs) diff --git a/test/Test/Database/LSMTree/Internal/BloomFilter.hs b/test/Test/Database/LSMTree/Internal/BloomFilter.hs index f36ce3336..1cdbfb9ba 100644 --- a/test/Test/Database/LSMTree/Internal/BloomFilter.hs +++ b/test/Test/Database/LSMTree/Internal/BloomFilter.hs @@ -3,22 +3,29 @@ module Test.Database.LSMTree.Internal.BloomFilter (tests) where import Control.DeepSeq (deepseq) -import Data.Bits (unsafeShiftL, unsafeShiftR, (.&.)) +import Control.Exception (displayException) +import Control.Monad (void) +import qualified Control.Monad.IOSim as IOSim +import Data.Bits ((.&.)) +import qualified Data.ByteString as BS +import qualified Data.ByteString.Builder as BS.Builder +import qualified Data.ByteString.Builder.Extra as BS.Builder import qualified Data.ByteString.Lazy as LBS -import qualified Data.ByteString.Short as SBS -import Data.Primitive.ByteArray (ByteArray (..), byteArrayFromList) import qualified Data.Set as Set import qualified Data.Vector as V import qualified Data.Vector.Primitive as VP import Data.Word (Word32, Word64) +import qualified System.FS.API as FS +import qualified System.FS.API.Strict as FS +import qualified System.FS.Sim.MockFS as MockFS +import qualified System.FS.Sim.STM as FSSim import Test.QuickCheck.Gen (genDouble) +import Test.QuickCheck.Instances () import Test.Tasty (TestTree, testGroup) import Test.Tasty.QuickCheck hiding ((.&.)) -import qualified Data.BloomFilter as BF -import qualified Data.BloomFilter.Easy as BF -import qualified Data.BloomFilter.Internal as BF (bloomInvariant) +import qualified Data.BloomFilter.Blocked as BF import Database.LSMTree.Internal.BloomFilter import qualified Database.LSMTree.Internal.BloomFilterQuery1 as Bloom1 import Database.LSMTree.Internal.Serialise (SerialisedKey, @@ -34,7 +41,7 @@ tests :: TestTree tests = testGroup "Database.LSMTree.Internal.BloomFilter" [ testProperty "roundtrip" roundtrip_prop -- a specific case: 300 bits is just under 5x 64 bit words - , testProperty "roundtrip-3-300" $ roundtrip_prop (Positive (Small 3)) 300 + , testProperty "roundtrip-3-300" $ roundtrip_prop (Positive (Small 3)) (Positive 300) , testProperty "total-deserialisation" $ withMaxSuccess 10000 $ prop_total_deserialisation , testProperty "total-deserialisation-whitebox" $ withMaxSuccess 10000 $ @@ -48,42 +55,52 @@ tests = testGroup "Database.LSMTree.Internal.BloomFilter" #endif ] -roundtrip_prop :: Positive (Small Int) -> Word64 -> [Word64] -> Property -roundtrip_prop (Positive (Small hfN)) (limitBits -> bits) ws = - counterexample (show sbs) $ - Right lhs === rhs +roundtrip_prop :: Positive (Small Int) -> Positive Int -> [Word64] -> Property +roundtrip_prop (Positive (Small hfN)) (Positive bits) ws = + counterexample (show bs) $ + case bloomFilterFromBS bs of + Left err -> label (displayException err) $ property True + Right rhs -> lhs === rhs where - lhs = BF.fromList hfN bits ws - sbs = SBS.toShort (LBS.toStrict (bloomFilterToLBS lhs)) - rhs = bloomFilterFromSBS sbs + sz = BF.BloomSize { sizeBits = limitBits bits, + sizeHashes = hfN } + lhs = BF.create sz (\b -> mapM_ (BF.insert b) ws) + bs = LBS.toStrict (bloomFilterToLBS lhs) -limitBits :: Word64 -> Word64 +limitBits :: Int -> Int limitBits b = b .&. 0xffffff -prop_total_deserialisation :: [Word32] -> Property -prop_total_deserialisation word32s = - case bloomFilterFromSBS (SBS.SBS ba) of - Left err -> label err $ property True - Right bf -> label "parsed successfully" $ property $ - -- Just forcing the filter is not enough (e.g. the bit vector might - -- point outside of the byte array). - bf `deepseq` BF.bloomInvariant bf - where - !(ByteArray ba) = byteArrayFromList word32s +prop_total_deserialisation :: BS.ByteString -> Property +prop_total_deserialisation bs = + case bloomFilterFromBS bs of + Left err -> label (displayException err) $ property True + Right bf -> label "parsed successfully" $ deepseq bf $ property True + +-- | Write the bytestring to a file in the mock file system and then use +-- 'bloomFilterFromFile'. +bloomFilterFromBS :: BS.ByteString -> Either IOSim.Failure (BF.Bloom a) +bloomFilterFromBS bs = + IOSim.runSim $ do + hfs <- FSSim.simHasFS' MockFS.empty + let file = FS.mkFsPath ["filter"] + -- write the bytestring + FS.withFile hfs file (FS.WriteMode FS.MustBeNew) $ \h -> do + void $ FS.hPutAllStrict hfs h bs + -- deserialise from file + FS.withFile hfs file FS.ReadMode $ \h -> + bloomFilterFromFile hfs file h -- Length is in Word64s. A large length would require significant amount of -- memory, so we make it 'Small'. prop_total_deserialisation_whitebox :: Word32 -> Small Word32 -> Property -prop_total_deserialisation_whitebox hsn (Small len64) = - forAll (vector (fromIntegral len64 * 2)) $ \word32s -> - prop_total_deserialisation (prefix <> word32s) +prop_total_deserialisation_whitebox hsn (Small nword64s) = + forAll (vector (fromIntegral nword64s * 8)) $ \bytes -> + prop_total_deserialisation (prefix <> BS.pack bytes) where - prefix = - [ 1 {- version -} - , hsn - , unsafeShiftL len64 6 -- len64 * 64 (lower 32 bits) - , unsafeShiftR len64 (32 - 6) -- len64 * 64 (upper 32 bits) - ] + prefix = LBS.toStrict $ BS.Builder.toLazyByteString $ + BS.Builder.word32Host bloomFilterVersion + <> BS.Builder.word32Host hsn + <> BS.Builder.word64Host (fromIntegral nword64s) newtype FPR = FPR Double deriving stock Show @@ -99,7 +116,7 @@ prop_bloomQueries1 :: FPR -> Property prop_bloomQueries1 (FPR fpr) filters keys = let filters' :: [BF.Bloom SerialisedKey] - filters' = map (BF.easyList fpr . map (\(Small k) -> serialiseKey k)) + filters' = map (BF.fromList (BF.policyForFPR fpr) . map (\(Small k) -> serialiseKey k)) filters keys' :: [SerialisedKey] @@ -144,7 +161,8 @@ prop_bloomQueries2 :: FPR -> Property prop_bloomQueries2 (FPR fpr) filters keys = let filters' :: [BF.Bloom SerialisedKey] - filters' = map (BF.easyList fpr . map (\(Small k) -> serialiseKey k)) filters + filters' = map (BF.fromList (BF.policyForFPR fpr) . + map (\(Small k) -> serialiseKey k)) filters keys' :: [SerialisedKey] keys' = map (\(Small k) -> serialiseKey k) keys diff --git a/test/Test/Database/LSMTree/Internal/Lookup.hs b/test/Test/Database/LSMTree/Internal/Lookup.hs index 1df8cf339..26d39c13a 100644 --- a/test/Test/Database/LSMTree/Internal/Lookup.hs +++ b/test/Test/Database/LSMTree/Internal/Lookup.hs @@ -24,8 +24,8 @@ import Control.Exception import Control.Monad.ST.Strict import Control.RefCount import Data.Bifunctor -import Data.BloomFilter (Bloom) -import qualified Data.BloomFilter as Bloom +import Data.BloomFilter.Blocked (Bloom) +import qualified Data.BloomFilter.Blocked as Bloom import Data.Coerce (coerce) import Data.Either (rights) import qualified Data.Foldable as F diff --git a/test/Test/Database/LSMTree/Internal/Merge.hs b/test/Test/Database/LSMTree/Internal/Merge.hs index be7c87365..29de6629e 100644 --- a/test/Test/Database/LSMTree/Internal/Merge.hs +++ b/test/Test/Database/LSMTree/Internal/Merge.hs @@ -3,7 +3,7 @@ module Test.Database.LSMTree.Internal.Merge (tests) where import Control.Exception (evaluate) import Control.RefCount import Data.Bifoldable (bifoldMap) -import qualified Data.BloomFilter as Bloom +import qualified Data.BloomFilter.Blocked as Bloom import Data.Foldable (traverse_) import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map @@ -103,7 +103,8 @@ prop_MergeDistributes fs hbio mergeType stepSize (SmallList rds) = do (lhsSize === rhsSize) .&&. -- we can't just test bloom filter equality, their sizes may differ. counterexample "runFilter" - (Bloom.length lhsFilter >= Bloom.length rhsFilter) + ( Bloom.sizeBits (Bloom.size lhsFilter) + >= Bloom.sizeBits (Bloom.size rhsFilter)) .&&. -- the index is equal, but only because range finder precision is -- always 0 for the numbers of entries we are dealing with. counterexample "runIndex" diff --git a/test/Test/Database/LSMTree/Internal/RunAcc.hs b/test/Test/Database/LSMTree/Internal/RunAcc.hs index 9172fcbd5..4be8fc1b0 100644 --- a/test/Test/Database/LSMTree/Internal/RunAcc.hs +++ b/test/Test/Database/LSMTree/Internal/RunAcc.hs @@ -8,7 +8,7 @@ module Test.Database.LSMTree.Internal.RunAcc (tests) where import Control.Exception (assert) import Control.Monad.ST import Data.Bifunctor (Bifunctor (..)) -import qualified Data.BloomFilter as Bloom +import qualified Data.BloomFilter.Blocked as Bloom import qualified Data.ByteString as BS import qualified Data.ByteString.Short as SBS import Data.Maybe diff --git a/test/Test/Database/LSMTree/Internal/RunBloomFilterAlloc.hs b/test/Test/Database/LSMTree/Internal/RunBloomFilterAlloc.hs index 386ba548c..5d4cbc847 100644 --- a/test/Test/Database/LSMTree/Internal/RunBloomFilterAlloc.hs +++ b/test/Test/Database/LSMTree/Internal/RunBloomFilterAlloc.hs @@ -11,32 +11,21 @@ module Test.Database.LSMTree.Internal.RunBloomFilterAlloc ( -- * Main test tree tests - -- * Bloom filter construction - -- - -- A common interface to bloom filter construction, based on expected false - -- positive rates. - , BloomMaker - , mkBloomFromAlloc -- * Verifying FPRs , measureApproximateFPR - , measureExactFPR + , measureExactFPR --TODO: this is not currently used for anything, delete? ) where import Control.Exception (assert) -import Control.Monad.ST -import Data.BloomFilter (Bloom) +import Data.BloomFilter (Bloom, Hashable) import qualified Data.BloomFilter as Bloom -import Data.BloomFilter.Hash (Hashable) -import qualified Data.BloomFilter.Mutable as MBloom import Data.Foldable (Foldable (..)) import Data.Proxy (Proxy (..)) import Data.Set (Set) import qualified Data.Set as Set import Data.Word (Word64) import Database.LSMTree.Extras.Random -import qualified Database.LSMTree.Internal.Entry as LSMT -import Database.LSMTree.Internal.RunAcc (RunBloomFilterAlloc (..), - falsePositiveRate, newMBloom) +import Database.LSMTree.Internal.RunAcc (RunBloomFilterAlloc (..)) import System.Random import Test.QuickCheck import Test.QuickCheck.Gen @@ -80,16 +69,14 @@ prop_verifyFPR p alloc (NumEntries numEntries) (Seed seed) = let stdgen = mkStdGen seed measuredFPR = measureApproximateFPR p (mkBloomFromAlloc alloc) numEntries stdgen expectedFPR = case alloc of - RunAllocFixed bits -> - falsePositiveRate (fromIntegral numEntries) - (fromIntegral bits * fromIntegral numEntries) + RunAllocFixed bits -> Bloom.policyFPR (Bloom.policyForBits (fromIntegral bits)) RunAllocRequestFPR requestedFPR -> requestedFPR -- error margins lb = expectedFPR - 0.1 ub = expectedFPR + 0.03 in assert (fprInvariant True measuredFPR) $ -- measured FPR is in the range [0,1] assert (fprInvariant True expectedFPR) $ -- expected FPR is in the range [0,1] - counterexample (printf "expected $f <= %f <= %f" lb measuredFPR ub) $ + counterexample (printf "expected %f <= %f <= %f" lb measuredFPR ub) $ lb <= measuredFPR .&&. measuredFPR <= ub {------------------------------------------------------------------------------- @@ -287,12 +274,10 @@ instance Monoid Counts where type BloomMaker a = [a] -> Bloom a --- | Create a bloom filter through the 'newMBloom' interface. Tunes the bloom --- filter according to 'RunBloomFilterAlloc'. +-- | Create a bloom filter, with size determined by a 'RunBloomFilterAlloc'. mkBloomFromAlloc :: Hashable a => RunBloomFilterAlloc -> BloomMaker a -mkBloomFromAlloc alloc xs = runST $ do - mb <- newMBloom n alloc - mapM_ (MBloom.insert mb) xs - Bloom.unsafeFreeze mb +mkBloomFromAlloc alloc = Bloom.fromList policy where - n = LSMT.NumEntries $ length xs + policy = case alloc of + RunAllocFixed bits -> Bloom.policyForBits (fromIntegral bits) + RunAllocRequestFPR fpr -> Bloom.policyForFPR fpr