diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index 6527f070..8266dc5a 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -29,7 +29,8 @@ module Data.HashMap.Internal ( - HashMap(..) + Tree(..) + , HashMap(..) , Leaf(..) -- * Construction @@ -125,11 +126,13 @@ module Data.HashMap.Internal , sparseIndex , two , unionArrayBy + , unionArrayByInternal , update32 , update32M , update32With' + , update32WithInternal' , updateOrConcatWithKey - , filterMapAux + , filterMapAuxInternal , equalKeys , equalKeys1 , lookupRecordCollision @@ -183,7 +186,7 @@ hash :: H.Hashable a => a -> Hash hash = fromIntegral . H.hash data Leaf k v = L !k v - deriving (Eq) + deriving Eq instance (NFData k, NFData v) => NFData (Leaf k v) where rnf (L k v) = rnf k `seq` rnf v @@ -206,12 +209,12 @@ instance NFData2 Leaf where -- | A map from keys to values. A map cannot contain duplicate keys; -- each key can map to at most one value. -data HashMap k v +data Tree k v = Empty -- ^ Invariants: -- -- * 'Empty' is not a valid sub-node. It can only appear at the root. (INV1) - | BitmapIndexed !Bitmap !(A.Array (HashMap k v)) + | BitmapIndexed !Bitmap !(A.Array (Tree k v)) -- ^ Invariants: -- -- * Only the lower @maxChildren@ bits of the 'Bitmap' may be set. The @@ -229,7 +232,7 @@ data HashMap k v -- compatible with its 'Hash'. (INV6) -- (TODO: Document this properly (#425)) -- * The 'Hash' of a 'Leaf' node must be the 'hash' of its key. (INV7) - | Full !(A.Array (HashMap k v)) + | Full !(A.Array (Tree k v)) -- ^ Invariants: -- -- * The array of a 'Full' node stores exactly 'maxChildren' sub-nodes. (INV8) @@ -245,30 +248,42 @@ data HashMap k v -- * No two keys stored in a 'Collision' can be equal according to their -- 'Eq' instance. (INV10) -type role HashMap nominal representational +type role Tree nominal representational --- | @since 0.2.17.0 -deriving instance (TH.Lift k, TH.Lift v) => TH.Lift (HashMap k v) - -instance (NFData k, NFData v) => NFData (HashMap k v) where +instance (NFData k, NFData v) => NFData (Tree k v) where rnf Empty = () rnf (BitmapIndexed _ ary) = rnf ary rnf (Leaf _ l) = rnf l rnf (Full ary) = rnf ary rnf (Collision _ ary) = rnf ary +deriving instance (TH.Lift k, TH.Lift v) => TH.Lift (Tree k v) + -- | @since 0.2.14.0 -instance NFData k => NFData1 (HashMap k) where +instance NFData k => NFData1 (Tree k) where liftRnf = liftRnf2 rnf -- | @since 0.2.14.0 -instance NFData2 HashMap where +instance NFData2 Tree where liftRnf2 _ _ Empty = () liftRnf2 rnf1 rnf2 (BitmapIndexed _ ary) = liftRnf (liftRnf2 rnf1 rnf2) ary liftRnf2 rnf1 rnf2 (Leaf _ l) = liftRnf2 rnf1 rnf2 l liftRnf2 rnf1 rnf2 (Full ary) = liftRnf (liftRnf2 rnf1 rnf2) ary liftRnf2 rnf1 rnf2 (Collision _ ary) = liftRnf (liftRnf2 rnf1 rnf2) ary +-- | A wrapper over 'Tree'. The 'Int' field represent the hashmap's +-- size. +data HashMap k v = HashMap {-# UNPACK #-} !A.Size !(Tree k v) + +instance NFData2 HashMap where + liftRnf2 rnf1 rnf2 (HashMap sz hm) = rnf sz `seq` liftRnf2 rnf1 rnf2 hm + +-- | @since 0.2.17.0 +deriving instance (TH.Lift k, TH.Lift v) => TH.Lift (HashMap k v) + +instance (NFData k, NFData v) => NFData (HashMap k v) where + rnf (HashMap !_ m) = rnf m + instance Functor (HashMap k) where fmap = map @@ -328,7 +343,7 @@ instance (Eq k, Hashable k) => Monoid (HashMap k v) where {-# INLINE mappend #-} instance (Data k, Data v, Eq k, Hashable k) => Data (HashMap k v) where - gfoldl f z m = z fromList `f` toList m + gfoldl f z hw = z fromList `f` toList hw toConstr _ = fromListConstr gunfold k z c = case Data.constrIndex c of 1 -> k (z fromList) @@ -421,7 +436,7 @@ instance (Eq k, Eq v) => Eq (HashMap k v) where equal1 :: Eq k => (v -> v' -> Bool) -> HashMap k v -> HashMap k v' -> Bool -equal1 eq = go +equal1 eq (HashMap s1 t1) (HashMap s2 t2) = s1 == s2 && go t1 t2 where go Empty Empty = True go (BitmapIndexed bm1 ary1) (BitmapIndexed bm2 ary2) @@ -436,7 +451,8 @@ equal1 eq = go equal2 :: (k -> k' -> Bool) -> (v -> v' -> Bool) -> HashMap k v -> HashMap k' v' -> Bool -equal2 eqk eqv t1 t2 = go (leavesAndCollisions t1 []) (leavesAndCollisions t2 []) +equal2 eqk eqv (HashMap s1 t1) (HashMap s2 t2) = + (s1 == s2) && go (leavesAndCollisions t1 []) (leavesAndCollisions t2 []) where -- If the two trees are the same, then their lists of 'Leaf's and -- 'Collision's read from left to right should be the same (modulo the @@ -470,7 +486,8 @@ instance (Ord k, Ord v) => Ord (HashMap k v) where cmp :: (k -> k' -> Ordering) -> (v -> v' -> Ordering) -> HashMap k v -> HashMap k' v' -> Ordering -cmp cmpk cmpv t1 t2 = go (leavesAndCollisions t1 []) (leavesAndCollisions t2 []) +cmp cmpk cmpv (HashMap s1 t1) (HashMap s2 t2) = + compare s1 s2 `mappend` go (leavesAndCollisions t1 []) (leavesAndCollisions t2 []) where go (Leaf k1 l1 : tl1) (Leaf k2 l2 : tl2) = compare k1 k2 `mappend` @@ -492,7 +509,8 @@ cmp cmpk cmpv t1 t2 = go (leavesAndCollisions t1 []) (leavesAndCollisions t2 []) -- Same as 'equal2' but doesn't compare the values. equalKeys1 :: (k -> k' -> Bool) -> HashMap k v -> HashMap k' v' -> Bool -equalKeys1 eq t1 t2 = go (leavesAndCollisions t1 []) (leavesAndCollisions t2 []) +equalKeys1 eq (HashMap s1 t1) (HashMap s2 t2) = + (s1 == s2) && go (leavesAndCollisions t1 []) (leavesAndCollisions t2 []) where go (Leaf k1 l1 : tl1) (Leaf k2 l2 : tl2) | k1 == k2 && leafEq l1 l2 @@ -508,9 +526,9 @@ equalKeys1 eq t1 t2 = go (leavesAndCollisions t1 []) (leavesAndCollisions t2 []) -- Same as 'equal1' but doesn't compare the values. equalKeys :: Eq k => HashMap k v -> HashMap k v' -> Bool -equalKeys = go +equalKeys (HashMap s1 t1) (HashMap s2 t2) = s1 == s2 && go t1 t2 where - go :: Eq k => HashMap k v -> HashMap k v' -> Bool + go :: Eq k => Tree k v -> Tree k v' -> Bool go Empty Empty = True go (BitmapIndexed bm1 ary1) (BitmapIndexed bm2 ary2) = bm1 == bm2 && A.sameArray1 go ary1 ary2 @@ -523,9 +541,9 @@ equalKeys = go leafEq (L k1 _) (L k2 _) = k1 == k2 instance Hashable2 HashMap where - liftHashWithSalt2 hk hv salt hm = go salt (leavesAndCollisions hm []) + liftHashWithSalt2 hk hv salt (HashMap _ hm) = go salt (leavesAndCollisions hm []) where - -- go :: Int -> [HashMap k v] -> Int + -- go :: Int -> [Tree k v] -> Int go s [] = s go s (Leaf _ l : tl) = s `hashLeafWithSalt` l `go` tl @@ -549,9 +567,9 @@ instance (Hashable k) => Hashable1 (HashMap k) where liftHashWithSalt = H.liftHashWithSalt2 H.hashWithSalt instance (Hashable k, Hashable v) => Hashable (HashMap k v) where - hashWithSalt salt hm = go salt hm + hashWithSalt salt (HashMap _ t) = go salt t where - go :: Int -> HashMap k v -> Int + go :: Int -> Tree k v -> Int go s Empty = s go s (BitmapIndexed _ a) = A.foldl' go s a go s (Leaf h (L _ v)) @@ -573,7 +591,7 @@ instance (Hashable k, Hashable v) => Hashable (HashMap k v) where arrayHashesSorted s = List.sort . List.map (hashLeafWithSalt s) . A.toList -- | Helper to get 'Leaf's and 'Collision's as a list. -leavesAndCollisions :: HashMap k v -> [HashMap k v] -> [HashMap k v] +leavesAndCollisions :: Tree k v -> [Tree k v] -> [Tree k v] leavesAndCollisions (BitmapIndexed _ ary) a = A.foldr leavesAndCollisions a ary leavesAndCollisions (Full ary) a = A.foldr leavesAndCollisions a ary leavesAndCollisions l@(Leaf _ _) a = l : a @@ -581,7 +599,7 @@ leavesAndCollisions c@(Collision _ _) a = c : a leavesAndCollisions Empty a = a -- | Helper function to detect 'Leaf's and 'Collision's. -isLeafOrCollision :: HashMap k v -> Bool +isLeafOrCollision :: Tree k v -> Bool isLeafOrCollision (Leaf _ _) = True isLeafOrCollision (Collision _ _) = True isLeafOrCollision _ = False @@ -591,29 +609,23 @@ isLeafOrCollision _ = False -- | \(O(1)\) Construct an empty map. empty :: HashMap k v -empty = Empty +empty = HashMap 0 Empty -- | \(O(1)\) Construct a map with a single element. singleton :: (Hashable k) => k -> v -> HashMap k v -singleton k v = Leaf (hash k) (L k v) +singleton k v = HashMap 1 (Leaf (hash k) (L k v)) ------------------------------------------------------------------------ -- * Basic interface -- | \(O(1)\) Return 'True' if this map is empty, 'False' otherwise. null :: HashMap k v -> Bool -null Empty = True -null _ = False +null (HashMap _ Empty) = True +null _ = False --- | \(O(n)\) Return the number of key-value mappings in this map. +-- | \(O(1)\) Return the number of key-value mappings in this map. size :: HashMap k v -> Int -size t = go t 0 - where - go Empty !n = n - go (Leaf _ _) n = n + 1 - go (BitmapIndexed _ ary) n = A.foldl' (flip go) n ary - go (Full ary) n = A.foldl' (flip go) n ary - go (Collision _ ary) n = n + A.length ary +size (HashMap sz _) = A.unSize sz -- | \(O(\log n)\) Return 'True' if the specified key is present in the -- map, 'False' otherwise. @@ -629,12 +641,12 @@ lookup :: (Eq k, Hashable k) => k -> HashMap k v -> Maybe v -- GHC does not yet perform a worker-wrapper transformation on -- unboxed sums automatically. That seems likely to happen at some -- point (possibly as early as GHC 8.6) but for now we do it manually. -lookup k m = case lookup# k m of +lookup k (HashMap _ m) = case lookup# k m of (# (# #) | #) -> Nothing (# | a #) -> Just a {-# INLINE lookup #-} -lookup# :: (Eq k, Hashable k) => k -> HashMap k v -> (# (# #) | v #) +lookup# :: (Eq k, Hashable k) => k -> Tree k v -> (# (# #) | v #) lookup# k m = lookupCont (\_ -> (# (# #) | #)) (\v _i -> (# | v #)) (hash k) k 0 m {-# INLINABLE lookup# #-} @@ -647,7 +659,7 @@ lookup' :: Eq k => Hash -> k -> HashMap k v -> Maybe v -- lookup' would probably prefer to be implemented in terms of its own -- lookup'#, but it's not important enough and we don't want too much -- code. -lookup' h k m = case lookupRecordCollision# h k m of +lookup' h k (HashMap _ m) = case lookupRecordCollision# h k m of (# (# #) | #) -> Nothing (# | (# a, _i #) #) -> Just a {-# INLINE lookup' #-} @@ -674,7 +686,7 @@ lookupResToMaybe (Present x _) = Just x -- Key not in map => Absent -- Key in map, no collision => Present v (-1) -- Key in map, collision => Present v position -lookupRecordCollision :: Eq k => Hash -> k -> HashMap k v -> LookupRes v +lookupRecordCollision :: Eq k => Hash -> k -> Tree k v -> LookupRes v lookupRecordCollision h k m = case lookupRecordCollision# h k m of (# (# #) | #) -> Absent (# | (# a, i #) #) -> Present a (I# i) -- GHC will eliminate the I# @@ -685,7 +697,7 @@ lookupRecordCollision h k m = case lookupRecordCollision# h k m of -- may be changing in GHC 8.6 or so (there is some work in progress), but -- for now we use Int# explicitly here. We don't need to push the Int# -- into lookupCont because inlining takes care of that. -lookupRecordCollision# :: Eq k => Hash -> k -> HashMap k v -> (# (# #) | (# v, Int# #) #) +lookupRecordCollision# :: Eq k => Hash -> k -> Tree k v -> (# (# #) | (# v, Int# #) #) lookupRecordCollision# h k m = lookupCont (\_ -> (# (# #) | #)) (\v (I# i) -> (# | (# v, i #) #)) h k 0 m -- INLINABLE to specialize to the Eq instance. @@ -711,10 +723,10 @@ lookupCont :: -> Hash -- The hash of the key -> k -> Int -- The offset of the subkey in the hash. - -> HashMap k v -> r + -> Tree k v -> r lookupCont absent present !h0 !k0 !s0 !m0 = go h0 k0 s0 m0 where - go :: Eq k => Hash -> k -> Int -> HashMap k v -> r + go :: Eq k => Hash -> k -> Int -> Tree k v -> r go !_ !_ !_ Empty = absent (# #) go h k _ (Leaf hx (L kx x)) | h == hx && k == kx = present x (-1) @@ -777,7 +789,7 @@ lookupDefault = findWithDefault infixl 9 ! -- | Create a 'Collision' value with two 'Leaf' values. -collision :: Hash -> Leaf k v -> Leaf k v -> HashMap k v +collision :: Hash -> Leaf k v -> Leaf k v -> Tree k v collision h !e1 !e2 = let v = A.run $ do mary <- A.new 2 e1 A.write mary 1 e2 @@ -786,7 +798,7 @@ collision h !e1 !e2 = {-# INLINE collision #-} -- | Create a 'BitmapIndexed' or 'Full' node. -bitmapIndexedOrFull :: Bitmap -> A.Array (HashMap k v) -> HashMap k v +bitmapIndexedOrFull :: Bitmap -> A.Array (Tree k v) -> Tree k v -- The strictness in @ary@ helps achieve a nice code size reduction in -- @unionWith[Key]@ with GHC 9.2.2. See the Core diffs in -- https://github.com/haskell-unordered-containers/unordered-containers/pull/376. @@ -797,45 +809,56 @@ bitmapIndexedOrFull b !ary -- | \(O(\log n)\) Associate the specified value with the specified -- key in this map. If this map previously contained a mapping for --- the key, the old value is replaced. +-- the key, the old value is replaced. Returns a tuple containing the +-- hashmap's change in size, and the hashmap after the insertion. insert :: (Eq k, Hashable k) => k -> v -> HashMap k v -> HashMap k v insert k v m = insert' (hash k) k v m {-# INLINABLE insert #-} -insert' :: Eq k => Hash -> k -> v -> HashMap k v -> HashMap k v -insert' h0 k0 v0 m0 = go h0 k0 v0 0 m0 +insert' :: (Eq k, Hashable k) => Hash -> k -> v -> HashMap k v -> HashMap k v +insert' h k v (HashMap sz m) = + let A.Sized diff m' = insertInternal h k v m + in HashMap (sz + diff) m' +{-# INLINABLE insert' #-} + +insertInternal :: Eq k => Hash -> k -> v -> Tree k v -> A.Sized (Tree k v) +insertInternal h0 k0 v0 m0 = go h0 k0 v0 0 m0 where - go !h !k x !_ Empty = Leaf h (L k x) + go !h !k x !_ Empty = A.Sized 1 (Leaf h (L k x)) go h k x s t@(Leaf hy l@(L ky y)) | hy == h = if ky == k then if x `ptrEq` y - then t - else Leaf h (L k x) - else collision h l (L k x) - | otherwise = runST (two s h k x hy t) + then A.Sized 0 t + else A.Sized 0 (Leaf h (L k x)) + else A.Sized 1 (collision h l (L k x)) + | otherwise = A.Sized 1 (runST (two s h k x hy t)) go h k x s t@(BitmapIndexed b ary) | b .&. m == 0 = let !ary' = A.insert ary i $! Leaf h (L k x) - in bitmapIndexedOrFull (b .|. m) ary' + in A.Sized 1 (bitmapIndexedOrFull (b .|. m) ary') | otherwise = let !st = A.index ary i - !st' = go h k x (nextShift s) st + A.Sized sz !st' = go h k x (nextShift s) st in if st' `ptrEq` st - then t - else BitmapIndexed b (A.update ary i st') + then A.Sized sz t + else A.Sized sz (BitmapIndexed b (A.update ary i st')) where m = mask h s i = sparseIndex b m go h k x s t@(Full ary) = let !st = A.index ary i - !st' = go h k x (nextShift s) st + A.Sized sz !st' = go h k x (nextShift s) st in if st' `ptrEq` st - then t - else Full (update32 ary i st') + then A.Sized sz t + else A.Sized sz (Full (update32 ary i st')) where i = index h s go h k x s t@(Collision hy v) - | h == hy = Collision h (updateOrSnocWith (\a _ -> (# a #)) k x v) + | h == hy = + let !start = A.length v + !newV = updateOrSnocWith (\a _ -> (# a #)) k x v + !end = A.length newV + in A.Sized (A.Size (end - start)) (Collision h newV) | otherwise = go h k x s $ BitmapIndexed (mask hy s) (A.singleton t) -{-# INLINABLE insert' #-} +{-# INLINABLE insertInternal #-} -- Insert optimized for the case when we know the key is not in the map. -- @@ -844,7 +867,7 @@ insert' h0 k0 v0 m0 = go h0 k0 v0 0 m0 -- We can skip: -- - the key equality check on a Leaf -- - check for its existence in the array for a hash collision -insertNewKey :: Hash -> k -> v -> HashMap k v -> HashMap k v +insertNewKey :: Hash -> k -> v -> Tree k v -> Tree k v insertNewKey !h0 !k0 x0 !m0 = go h0 k0 x0 0 m0 where go !h !k x !_ Empty = Leaf h (L k x) @@ -879,7 +902,7 @@ insertNewKey !h0 !k0 x0 !m0 = go h0 k0 x0 0 m0 -- hash collision position if there was one. This information can be obtained -- from 'lookupRecordCollision'. If there is no collision, pass (-1) as collPos -- (first argument). -insertKeyExists :: Int -> Hash -> k -> v -> HashMap k v -> HashMap k v +insertKeyExists :: Int -> Hash -> k -> v -> Tree k v -> Tree k v insertKeyExists !collPos0 !h0 !k0 x0 !m0 = go collPos0 h0 k0 x0 m0 where go !_collPos !_shiftedHash !k x (Leaf h _kx) @@ -925,38 +948,50 @@ setAtPosition i k x ary = A.update ary i (L k x) -- | In-place update version of insert unsafeInsert :: (Eq k, Hashable k) => k -> v -> HashMap k v -> HashMap k v -unsafeInsert k0 v0 m0 = runST (go h0 k0 v0 0 m0) +unsafeInsert k0 v0 (HashMap sz m0) = + let A.Sized diff m0' = unsafeInsertInternal k0 v0 m0 + in HashMap (diff + sz) m0' +{-# INLINABLE unsafeInsert #-} + +-- | In-place update version of insert. Returns a tuple with the +-- HashMap's change in size and the hashmap itself. +unsafeInsertInternal :: (Eq k, Hashable k) => k -> v -> Tree k v -> A.Sized (Tree k v) +unsafeInsertInternal k0 v0 m0 = runST (go h0 k0 v0 0 m0) where h0 = hash k0 - go !h !k x !_ Empty = return $! Leaf h (L k x) + go !h !k x !_ Empty = return $! A.Sized 1 (Leaf h (L k x)) go h k x s t@(Leaf hy l@(L ky y)) | hy == h = if ky == k then if x `ptrEq` y - then return t - else return $! Leaf h (L k x) - else return $! collision h l (L k x) - | otherwise = two s h k x hy t + then return $! A.Sized 0 t + else return $! A.Sized 0 (Leaf h (L k x)) + else return $! A.Sized 1 (collision h l (L k x)) + | otherwise = A.Sized 1 <$> two s h k x hy t go h k x s t@(BitmapIndexed b ary) | b .&. m == 0 = do ary' <- A.insertM ary i $! Leaf h (L k x) - return $! bitmapIndexedOrFull (b .|. m) ary' + return $! A.Sized 1 (bitmapIndexedOrFull (b .|. m) ary') | otherwise = do st <- A.indexM ary i - st' <- go h k x (nextShift s) st + A.Sized sz st' <- go h k x (nextShift s) st A.unsafeUpdateM ary i st' - return t + return (A.Sized sz t) where m = mask h s i = sparseIndex b m go h k x s t@(Full ary) = do st <- A.indexM ary i - st' <- go h k x (nextShift s) st + A.Sized sz st' <- go h k x (nextShift s) st A.unsafeUpdateM ary i st' - return t + return (A.Sized sz t) where i = index h s go h k x s t@(Collision hy v) - | h == hy = return $! Collision h (updateOrSnocWith (\a _ -> (# a #)) k x v) + | h == hy = + let !start = A.length v + !newV = updateOrSnocWith (\a _ -> (# a #)) k x v + !end = A.length newV + in return $! A.Sized (A.Size (end - start)) (Collision h newV) | otherwise = go h k x s $ BitmapIndexed (mask hy s) (A.singleton t) -{-# INLINABLE unsafeInsert #-} +{-# INLINABLE unsafeInsertInternal #-} -- | Create a map from two key-value pairs which hashes don't collide. To -- enhance sharing, the second key-value pair is represented by the hash of its @@ -966,7 +1001,7 @@ unsafeInsert k0 v0 m0 = runST (go h0 k0 v0 0 m0) -- key. See issue #232. We don't need to force the HashMap argument -- because it's already in WHNF (having just been matched) and we -- just put it directly in an array. -two :: Shift -> Hash -> k -> v -> Hash -> HashMap k v -> ST s (HashMap k v) +two :: Shift -> Hash -> k -> v -> Hash -> Tree k v -> ST s (Tree k v) two = go where go s h1 k1 v1 h2 t2 @@ -1013,47 +1048,65 @@ insertWith f k new m = insertModifying new (\old -> (# f new old #)) k m -- to the unboxed unary tuple, we avoid introducing any unnecessary -- thunks in the tree. insertModifying :: (Eq k, Hashable k) => v -> (v -> (# v #)) -> k -> HashMap k v - -> HashMap k v -insertModifying x f k0 m0 = go h0 k0 0 m0 + -> HashMap k v +-- We're not going to worry about allocating a function closure +-- to pass to insertModifying. See comments at 'adjust'. +insertModifying x f k (HashMap sz m) = + let A.Sized diff m' = insertWithInternal x f k m + in HashMap (sz + diff) m' +{-# INLINE insertModifying #-} + +-- | @insertModifying@ is a lot like insertWith; we use it to implement alterF. +-- It takes a value to insert when the key is absent and a function +-- to apply to calculate a new value when the key is present. Thanks +-- to the unboxed unary tuple, we avoid introducing any unnecessary +-- thunks in the tree. +-- | /O(log n)/ Associate the specified value with the specified +-- key in this map. If this map previously contained a mapping for +-- the key, the old value is replaced. Returns a tuple containing the +-- hashmap's change in size, and the hashmap after the insertion. +insertWithInternal :: (Eq k, Hashable k) => v -> (v -> (# v #)) -> k -> Tree k v + -> A.Sized (Tree k v) +insertWithInternal x f k0 m0 = go h0 k0 0 m0 where !h0 = hash k0 - go !h !k !_ Empty = Leaf h (L k x) + go !h !k !_ Empty = A.Sized 1 (Leaf h (L k x)) go h k s t@(Leaf hy l@(L ky y)) | hy == h = if ky == k - then case f y of + then A.Sized 0 (case f y of (# v' #) | ptrEq y v' -> t - | otherwise -> Leaf h (L k v') - else collision h l (L k x) - | otherwise = runST (two s h k x hy t) + | otherwise -> Leaf h (L k v')) + else A.Sized 1 (collision h l (L k x)) + | otherwise = A.Sized 1 (runST (two s h k x hy t)) go h k s t@(BitmapIndexed b ary) | b .&. m == 0 = let ary' = A.insert ary i $! Leaf h (L k x) - in bitmapIndexedOrFull (b .|. m) ary' + in A.Sized 1 (bitmapIndexedOrFull (b .|. m) ary') | otherwise = - let !st = A.index ary i - !st' = go h k (nextShift s) st - ary' = A.update ary i $! st' + let !st = A.index ary i + A.Sized sz !st' = go h k (nextShift s) st + ary' = A.update ary i $! st' in if ptrEq st st' - then t - else BitmapIndexed b ary' + then A.Sized 0 t + else A.Sized sz (BitmapIndexed b ary') where m = mask h s i = sparseIndex b m go h k s t@(Full ary) = - let !st = A.index ary i - !st' = go h k (nextShift s) st - ary' = update32 ary i $! st' + let !st = A.index ary i + A.Sized sz !st' = go h k (nextShift s) st + ary' = update32 ary i $! st' in if ptrEq st st' - then t - else Full ary' + then A.Sized 0 t + else A.Sized sz (Full ary') where i = index h s go h k s t@(Collision hy v) | h == hy = let !v' = insertModifyingArr x f k v in if A.unsafeSameArray v v' - then t - else Collision h v' + then A.Sized 0 t + else A.Sized (A.Size (A.length v' - A.length v)) (Collision h v') | otherwise = go h k s $ BitmapIndexed (mask hy s) (A.singleton t) -{-# INLINABLE insertModifying #-} +{-# INLINABLE insertWithInternal #-} -- Like insertModifying for arrays; used to implement insertModifying insertModifyingArr :: Eq k => v -> (v -> (# v #)) -> k -> A.Array (Leaf k v) @@ -1072,67 +1125,84 @@ insertModifyingArr x f k0 ary0 = go k0 ary0 0 (A.length ary0) {-# INLINE insertModifyingArr #-} -- | In-place update version of insertWith -unsafeInsertWith :: forall k v. (Eq k, Hashable k) +unsafeInsertWith :: forall k v . (Eq k, Hashable k) => (v -> v -> v) -> k -> v -> HashMap k v -> HashMap k v -unsafeInsertWith f k0 v0 m0 = unsafeInsertWithKey (\_ a b -> (# f a b #)) k0 v0 m0 +unsafeInsertWith f k0 v0 (HashMap sz m0) = + let A.Sized diff m0' = unsafeInsertWithKey (\_ a b -> (# f a b #)) k0 v0 m0 + in HashMap (diff + sz) m0' {-# INLINABLE unsafeInsertWith #-} unsafeInsertWithKey :: forall k v. (Eq k, Hashable k) - => (k -> v -> v -> (# v #)) -> k -> v -> HashMap k v - -> HashMap k v + => (k -> v -> v -> (# v #)) -> k -> v -> Tree k v + -> A.Sized (Tree k v) unsafeInsertWithKey f k0 v0 m0 = runST (go h0 k0 v0 0 m0) where h0 = hash k0 - go :: Hash -> k -> v -> Shift -> HashMap k v -> ST s (HashMap k v) - go !h !k x !_ Empty = return $! Leaf h (L k x) + go :: Hash -> k -> v -> Shift -> Tree k v -> ST s (A.Sized (Tree k v)) + go !h !k x !_ Empty = return $! A.Sized 1 (Leaf h (L k x)) go h k x s t@(Leaf hy l@(L ky y)) | hy == h = if ky == k then case f k x y of - (# v #) -> return $! Leaf h (L k v) - else return $! collision h l (L k x) - | otherwise = two s h k x hy t + (# v #) -> return $! A.Sized 0 (Leaf h (L k v)) + else return $! A.Sized 1 (collision h l (L k x)) + | otherwise = do + twoHM <- two s h k x hy t + return $! A.Sized 1 twoHM go h k x s t@(BitmapIndexed b ary) | b .&. m == 0 = do ary' <- A.insertM ary i $! Leaf h (L k x) - return $! bitmapIndexedOrFull (b .|. m) ary' + return $! A.Sized 1 (bitmapIndexedOrFull (b .|. m) ary') | otherwise = do st <- A.indexM ary i - st' <- go h k x (nextShift s) st + A.Sized sz st' <- go h k x (nextShift s) st A.unsafeUpdateM ary i st' - return t + return (A.Sized sz t) where m = mask h s i = sparseIndex b m go h k x s t@(Full ary) = do st <- A.indexM ary i - st' <- go h k x (nextShift s) st + A.Sized sz st' <- go h k x (nextShift s) st A.unsafeUpdateM ary i st' - return t + return (A.Sized sz t) where i = index h s go h k x s t@(Collision hy v) - | h == hy = return $! Collision h (updateOrSnocWithKey f k x v) + | h == hy = + let !start = A.Size (A.length v) + !newV = updateOrSnocWithKey f k x v + !end = A.Size (A.length newV) + in return $! A.Sized (end - start) (Collision h newV) | otherwise = go h k x s $ BitmapIndexed (mask hy s) (A.singleton t) {-# INLINABLE unsafeInsertWithKey #-} -- | \(O(\log n)\) Remove the mapping for the specified key from this map --- if present. +-- if present. Returns a tuple with the hashmap's change in size and the +-- hashmap after the deletion. delete :: (Eq k, Hashable k) => k -> HashMap k v -> HashMap k v delete k m = delete' (hash k) k m {-# INLINABLE delete #-} -delete' :: Eq k => Hash -> k -> HashMap k v -> HashMap k v -delete' h0 k0 m0 = go h0 k0 0 m0 + +delete' :: (Eq k, Hashable k) => Hash -> k -> HashMap k v -> HashMap k v +delete' h k (HashMap sz m) = + let A.Sized diff m' = deleteInternal h k m + in HashMap (sz + diff) m' +{-# INLINABLE delete' #-} + + +deleteInternal :: Eq k => Hash -> k -> Tree k v -> A.Sized (Tree k v) +deleteInternal h0 k0 m0 = go h0 k0 0 m0 where - go !_ !_ !_ Empty = Empty + go !_ !_ !_ Empty = A.Sized 0 Empty go h k _ t@(Leaf hy (L ky _)) - | hy == h && ky == k = Empty - | otherwise = t + | hy == h && ky == k = A.Sized (-1) Empty + | otherwise = A.Sized 0 t go h k s t@(BitmapIndexed b ary) - | b .&. m == 0 = t + | b .&. m == 0 = A.Sized 0 t | otherwise = let !st = A.index ary i - !st' = go h k (nextShift s) st - in if st' `ptrEq` st + A.Sized sz !st' = go h k (nextShift s) st + in A.Sized sz $! if st' `ptrEq` st then t else case st' of Empty | A.length ary == 1 -> Empty @@ -1150,8 +1220,8 @@ delete' h0 k0 m0 = go h0 k0 0 m0 i = sparseIndex b m go h k s t@(Full ary) = let !st = A.index ary i - !st' = go h k (nextShift s) st - in if st' `ptrEq` st + A.Sized sz !st' = go h k (nextShift s) st + in A.Sized sz $! if st' `ptrEq` st then t else case st' of Empty -> @@ -1164,23 +1234,23 @@ delete' h0 k0 m0 = go h0 k0 0 m0 | h == hy = case indexOf k v of Just i | A.length v == 2 -> - if i == 0 + A.Sized (-1) $! if i == 0 then Leaf h (A.index v 1) else Leaf h (A.index v 0) - | otherwise -> Collision h (A.delete v i) - Nothing -> t - | otherwise = t -{-# INLINABLE delete' #-} + | otherwise -> A.Sized (-1) (Collision h (A.delete v i)) + Nothing -> A.Sized 0 t + | otherwise = A.Sized 0 t +{-# INLINABLE deleteInternal #-} -- | Delete optimized for the case when we know the key is in the map. -- -- It is only valid to call this when the key exists in the map and you know the -- hash collision position if there was one. This information can be obtained -- from 'lookupRecordCollision'. If there is no collision, pass (-1) as collPos. -deleteKeyExists :: Int -> Hash -> k -> HashMap k v -> HashMap k v +deleteKeyExists :: Int -> Hash -> k -> Tree k v -> Tree k v deleteKeyExists !collPos0 !h0 !k0 !m0 = go collPos0 h0 k0 m0 where - go :: Int -> Word -> k -> HashMap k v -> HashMap k v + go :: Int -> Word -> k -> Tree k v -> Tree k v go !_collPos !_shiftedHash !_k (Leaf _ _) = Empty go collPos shiftedHash k (BitmapIndexed b ary) = let !st = A.index ary i @@ -1247,7 +1317,7 @@ adjust f k m = adjust# (\v -> (# f v #)) k m -- | Much like 'adjust', but not inherently leaky. adjust# :: (Eq k, Hashable k) => (v -> (# v #)) -> k -> HashMap k v -> HashMap k v -adjust# f k0 m0 = go h0 k0 0 m0 +adjust# f k0 (HashMap sz m0) = HashMap sz (go h0 k0 0 m0) where h0 = hash k0 go !_ !_ !_ Empty = Empty @@ -1299,19 +1369,19 @@ update f = alter (>>= f) -- 'lookup' k ('alter' f k m) = f ('lookup' k m) -- @ alter :: (Eq k, Hashable k) => (Maybe v -> Maybe v) -> k -> HashMap k v -> HashMap k v -alter f k m = +alter f k hm@(HashMap sz m) = let !h = hash k !lookupRes = lookupRecordCollision h k m in case f (lookupResToMaybe lookupRes) of Nothing -> case lookupRes of - Absent -> m - Present _ collPos -> deleteKeyExists collPos h k m + Absent -> hm + Present _ collPos -> HashMap (sz - 1) $! deleteKeyExists collPos h k m Just v' -> case lookupRes of - Absent -> insertNewKey h k v' m + Absent -> HashMap (sz + 1) $! insertNewKey h k v' m Present v collPos -> if v `ptrEq` v' - then m - else insertKeyExists collPos h k v' m + then hm + else HashMap sz $! insertKeyExists collPos h k v' m {-# INLINABLE alter #-} -- | \(O(\log n)\) The expression @('alterF' f k map)@ alters the value @x@ at @@ -1425,32 +1495,32 @@ alterFWeird _ _ f = alterFEager f -- eagerly, whether or not the given function requires that information. alterFEager :: (Functor f, Eq k, Hashable k) => (Maybe v -> f (Maybe v)) -> k -> HashMap k v -> f (HashMap k v) -alterFEager f !k m = (<$> f mv) $ \case +alterFEager f !k hm@(HashMap sz m) = (<$> f mv) $ \case ------------------------------ -- Delete the key from the map. Nothing -> case lookupRes of - -- Key did not exist in the map to begin with, no-op - Absent -> m + -- Key did not exist in the map to begin with, no-op + Absent -> hm - -- Key did exist - Present _ collPos -> deleteKeyExists collPos h k m + -- Key did exist + Present _ collPos -> HashMap (sz - 1) (deleteKeyExists collPos h k m) ------------------------------ -- Update value Just v' -> case lookupRes of - -- Key did not exist before, insert v' under a new key - Absent -> insertNewKey h k v' m + -- Key did not exist before, insert v' under a new key + Absent -> HashMap (sz + 1) (insertNewKey h k v' m) - -- Key existed before - Present v collPos -> - if v `ptrEq` v' - -- If the value is identical, no-op - then m - -- If the value changed, update the value. - else insertKeyExists collPos h k v' m + -- Key existed before + Present v collPos -> + if v `ptrEq` v' + -- If the value is identical, no-op + then hm + -- If the value changed, update the value. + else HashMap sz (insertKeyExists collPos h k v' m) where !h = hash k !lookupRes = lookupRecordCollision h k m @@ -1501,7 +1571,9 @@ isSubmapOfBy :: (Eq k, Hashable k) => (v1 -> v2 -> Bool) -> HashMap k v1 -> Hash -- and m2 are collision nodes for the same hash. Since collision nodes are -- unsorted arrays, it requires for every key in m1 a linear search to to find a -- matching key in m2, hence O(n*m). -isSubmapOfBy comp !m1 !m2 = go 0 m1 m2 +isSubmapOfBy comp (HashMap !sz1 !m1) (HashMap !sz2 !m2) + | sz1 > sz2 = False + | otherwise = go 0 m1 m2 where -- An empty map is always a submap of any other map. go _ Empty _ = True @@ -1549,7 +1621,7 @@ isSubmapOfBy comp !m1 !m2 = go 0 m1 m2 {-# INLINABLE isSubmapOfBy #-} -- | \(O(\min n m))\) Checks if a bitmap indexed node is a submap of another. -submapBitmapIndexed :: (HashMap k v1 -> HashMap k v2 -> Bool) -> Bitmap -> A.Array (HashMap k v1) -> Bitmap -> A.Array (HashMap k v2) -> Bool +submapBitmapIndexed :: (Tree k v1 -> Tree k v2 -> Bool) -> Bitmap -> A.Array (Tree k v1) -> Bitmap -> A.Array (Tree k v2) -> Bool submapBitmapIndexed comp !b1 !ary1 !b2 !ary2 = subsetBitmaps && go 0 0 (b1Orb2 .&. negate b1Orb2) where go :: Int -> Int -> Bitmap -> Bool @@ -1597,89 +1669,159 @@ unionWith f = unionWithKey (const f) -- | \(O(n+m)\) The union of two maps. If a key occurs in both maps, -- the provided function (first argument) will be used to compute the -- result. -unionWithKey :: Eq k => (k -> v -> v -> v) -> HashMap k v -> HashMap k v - -> HashMap k v -unionWithKey f = go 0 +unionWithKey + :: Eq k + => (k -> v -> v -> v) + -> HashMap k v + -> HashMap k v + -> HashMap k v +unionWithKey f (HashMap x tree1) (HashMap y tree2) = if x < y + then let A.Sized diff m' = unionWithKeyInternal f y tree1 tree2 + in HashMap (diff + x) m' + else let A.Sized diff m' = unionWithKeyInternal (\k v v' -> f k v' v) x tree2 tree1 + in HashMap (diff + y) m' +{-# INLINE unionWithKey #-} + +-- | /O(n+m)/ The union of two maps. If a key occurs in both maps, +-- the provided function (first argument) will be used to compute the +-- result. +-- Returns a tuple where the first component is how many elements were added +-- to the first hashmap and the second is the union hashmap itself. +unionWithKeyInternal + :: forall k v . Eq k + => (k -> v -> v -> v) + -> A.Size + -- ^ Initial size of the + -> Tree k v + -> Tree k v + -> A.Sized (Tree k v) +unionWithKeyInternal f siz hm1 hm2 = go 0 siz hm1 hm2 where + go :: Int -- ^ Bitmask accumulator + -> A.Size -- ^ Size accumulator. + -- Counts down from the second hashmap's size. + -> Tree k v + -> Tree k v + -> A.Sized (Tree k v) -- empty vs. anything - go !_ t1 Empty = t1 - go _ Empty t2 = t2 + go !_ !sz t1 Empty = A.Sized sz t1 + go _ !sz Empty t2 = A.Sized sz t2 -- leaf vs. leaf - go s t1@(Leaf h1 l1@(L k1 v1)) t2@(Leaf h2 l2@(L k2 v2)) + go s !sz t1@(Leaf h1 l1@(L k1 v1)) t2@(Leaf h2 l2@(L k2 v2)) | h1 == h2 = if k1 == k2 - then Leaf h1 (L k1 (f k1 v1 v2)) - else collision h1 l1 l2 - | otherwise = goDifferentHash s h1 h2 t1 t2 - go s t1@(Leaf h1 (L k1 v1)) t2@(Collision h2 ls2) - | h1 == h2 = Collision h1 (updateOrSnocWithKey (\k a b -> (# f k a b #)) k1 v1 ls2) - | otherwise = goDifferentHash s h1 h2 t1 t2 - go s t1@(Collision h1 ls1) t2@(Leaf h2 (L k2 v2)) - | h1 == h2 = Collision h1 (updateOrSnocWithKey (\k a b -> (# f k b a #)) k2 v2 ls1) - | otherwise = goDifferentHash s h1 h2 t1 t2 - go s t1@(Collision h1 ls1) t2@(Collision h2 ls2) - | h1 == h2 = Collision h1 (updateOrConcatWithKey (\k a b -> (# f k a b #)) ls1 ls2) - | otherwise = goDifferentHash s h1 h2 t1 t2 + then A.Sized (sz - 1) (Leaf h1 (L k1 (f k1 v1 v2))) + else A.Sized sz (collision h1 l1 l2) + | otherwise = goDifferentHash s sz h1 h2 t1 t2 + go s !sz t1@(Leaf h1 (L k1 v1)) t2@(Collision h2 ls2) + | h1 == h2 = + let !start = A.Size (A.length ls2) + !newV = updateOrSnocWithKey (\k a b -> (# f k a b #)) k1 v1 ls2 + !end = A.Size (A.length newV) + in A.Sized (sz + (end - start - 1)) (Collision h1 newV) + | otherwise = goDifferentHash s sz h1 h2 t1 t2 + go s !sz t1@(Collision h1 ls1) t2@(Leaf h2 (L k2 v2)) + | h1 == h2 = + let !start = A.Size (A.length ls1) + !newV = updateOrSnocWithKey (\k a b -> (# f k b a #)) k2 v2 ls1 + !end = A.Size (A.length newV) + in A.Sized (sz + (end - start - 1)) (Collision h1 newV) + | otherwise = goDifferentHash s sz h1 h2 t1 t2 + go s !sz t1@(Collision h1 ls1) t2@(Collision h2 ls2) + | h1 == h2 = + let !start = A.Size (A.length ls1) + !newV = updateOrConcatWithKey (\k a b -> (# f k a b #)) ls1 ls2 + !end = A.Size (A.length newV) + !len_ls2 = A.Size (A.length ls2) + in A.Sized (sz + (end - start - len_ls2)) (Collision h1 newV) + | otherwise = goDifferentHash s sz h1 h2 t1 t2 -- branch vs. branch - go s (BitmapIndexed b1 ary1) (BitmapIndexed b2 ary2) = - let b' = b1 .|. b2 - ary' = unionArrayBy (go (nextShift s)) b1 b2 ary1 ary2 - in bitmapIndexedOrFull b' ary' - go s (BitmapIndexed b1 ary1) (Full ary2) = - let ary' = unionArrayBy (go (nextShift s)) b1 fullBitmap ary1 ary2 - in Full ary' - go s (Full ary1) (BitmapIndexed b2 ary2) = - let ary' = unionArrayBy (go (nextShift s)) fullBitmap b2 ary1 ary2 - in Full ary' - go s (Full ary1) (Full ary2) = - let ary' = unionArrayBy (go (nextShift s)) fullBitmap fullBitmap - ary1 ary2 - in Full ary' + go s !sz (BitmapIndexed b1 ary1) (BitmapIndexed b2 ary2) = + let b' = b1 .|. b2 + A.RunResA dsz ary' = + unionArrayByInternal sz + (go (nextShift s)) + b1 + b2 + ary1 + ary2 + in A.Sized dsz (bitmapIndexedOrFull b' ary') + go s !sz (BitmapIndexed b1 ary1) (Full ary2) = + let A.RunResA dsz ary' = + unionArrayByInternal sz + (go (nextShift s)) + b1 + fullBitmap + ary1 + ary2 + in A.Sized dsz (Full ary') + go s !sz (Full ary1) (BitmapIndexed b2 ary2) = + let A.RunResA dsz ary' = + unionArrayByInternal sz + (go (nextShift s)) + fullBitmap + b2 + ary1 + ary2 + in A.Sized dsz (Full ary') + go s !sz (Full ary1) (Full ary2) = + let A.RunResA dsz ary' = + unionArrayByInternal sz + (go (nextShift s)) + fullBitmap + fullBitmap + ary1 + ary2 + in A.Sized dsz (Full ary') -- leaf vs. branch - go s (BitmapIndexed b1 ary1) t2 + go s !sz (BitmapIndexed b1 ary1) t2 | b1 .&. m2 == 0 = let ary' = A.insert ary1 i t2 b' = b1 .|. m2 - in bitmapIndexedOrFull b' ary' - | otherwise = let ary' = A.updateWith' ary1 i $ \st1 -> - go (nextShift s) st1 t2 - in BitmapIndexed b1 ary' + in A.Sized sz (bitmapIndexedOrFull b' ary') + | otherwise = let A.RunResA dsz ary' = A.updateWithInternal' ary1 i $ \st1 -> + go (nextShift s) sz st1 t2 + in A.Sized dsz (BitmapIndexed b1 ary') where h2 = leafHashCode t2 m2 = mask h2 s i = sparseIndex b1 m2 - go s t1 (BitmapIndexed b2 ary2) + go s !sz t1 (BitmapIndexed b2 ary2) | b2 .&. m1 == 0 = let ary' = A.insert ary2 i $! t1 b' = b2 .|. m1 - in bitmapIndexedOrFull b' ary' - | otherwise = let ary' = A.updateWith' ary2 i $ \st2 -> - go (nextShift s) t1 st2 - in BitmapIndexed b2 ary' + in A.Sized sz (bitmapIndexedOrFull b' ary') + | otherwise = let A.RunResA dsz ary' = A.updateWithInternal' ary2 i $ \st2 -> + go (nextShift s) sz t1 st2 + in A.Sized dsz (BitmapIndexed b2 ary') where h1 = leafHashCode t1 m1 = mask h1 s i = sparseIndex b2 m1 - go s (Full ary1) t2 = + go s !sz (Full ary1) t2 = let h2 = leafHashCode t2 i = index h2 s - ary' = update32With' ary1 i $ \st1 -> go (nextShift s) st1 t2 - in Full ary' - go s t1 (Full ary2) = - let h1 = leafHashCode t1 - i = index h1 s - ary' = update32With' ary2 i $ \st2 -> go (nextShift s) t1 st2 - in Full ary' + A.RunResA dsz ary' = + update32WithInternal' ary1 i $ \st1 -> + go (nextShift s) sz st1 t2 + in A.Sized dsz (Full ary') + go s !sz t1 (Full ary2) = + let h1 = leafHashCode t1 + i = index h1 s + A.RunResA dsz ary' = update32WithInternal' ary2 i $ \st2 -> + go (nextShift s) sz t1 st2 + in A.Sized dsz (Full ary') leafHashCode (Leaf h _) = h leafHashCode (Collision h _) = h leafHashCode _ = error "leafHashCode" - goDifferentHash s h1 h2 t1 t2 - | m1 == m2 = BitmapIndexed m1 (A.singleton $! goDifferentHash (nextShift s) h1 h2 t1 t2) - | m1 < m2 = BitmapIndexed (m1 .|. m2) (A.pair t1 t2) - | otherwise = BitmapIndexed (m1 .|. m2) (A.pair t2 t1) + goDifferentHash s sz h1 h2 t1 t2 + | m1 == m2 = let A.Sized dsz hm = goDifferentHash (nextShift s) sz h1 h2 t1 t2 + in A.Sized dsz $! BitmapIndexed m1 (A.singleton hm) + | m1 < m2 = A.Sized sz (BitmapIndexed (m1 .|. m2) (A.pair t1 t2)) + | otherwise = A.Sized sz (BitmapIndexed (m1 .|. m2) (A.pair t2 t1)) where m1 = mask h1 s m2 = mask h2 s -{-# INLINE unionWithKey #-} +{-# INLINE unionWithKeyInternal #-} -- | Strict in the result of @f@. unionArrayBy :: (a -> a -> a) -> Bitmap -> Bitmap -> A.Array a -> A.Array a @@ -1715,6 +1857,41 @@ unionArrayBy f !b1 !b2 !ary1 !ary2 = A.run $ do -- where we copy one array, and then update. {-# INLINE unionArrayBy #-} +-- | Strict in the result of @f@. +unionArrayByInternal + :: A.Size + -> (A.Size -> a -> a -> A.Sized a) + -> Bitmap + -> Bitmap + -> A.Array a + -> A.Array a + -> A.RunResA a +unionArrayByInternal siz f b1 b2 ary1 ary2 = A.runInternal $ do + let b' = b1 .|. b2 + mary <- A.new_ (popCount b') + -- iterate over nonzero bits of b1 .|. b2 + -- it would be nice if we could shift m by more than 1 each time + let ba = b1 .&. b2 + go !sz !i !i1 !i2 !m + | m > b' = return sz + | b' .&. m == 0 = go sz i i1 i2 (m `unsafeShiftL` 1) + | ba .&. m /= 0 = do + let A.Sized dsz hm = f sz (A.index ary1 i1) (A.index ary2 i2) + A.write mary i hm + go dsz (i+1) (i1+1) (i2+1) (m `unsafeShiftL` 1) + | b1 .&. m /= 0 = do + A.write mary i =<< A.indexM ary1 i1 + go sz (i+1) (i1+1) (i2 ) (m `unsafeShiftL` 1) + | otherwise = do + A.write mary i =<< A.indexM ary2 i2 + go sz (i+1) (i1 ) (i2+1) (m `unsafeShiftL` 1) + d <- go siz 0 0 0 (b' .&. negate b') -- XXX: b' must be non-zero + return (A.RunResM d mary) + -- TODO: For the case where b1 .&. b2 == b1, i.e. when one is a + -- subset of the other, we could use a slightly simpler algorithm, + -- where we copy one array, and then update. +{-# INLINE unionArrayByInternal #-} + -- TODO: Figure out the time complexity of 'unions'. -- | Construct a set containing all elements from a list of sets. @@ -1750,7 +1927,7 @@ compose bc !ab -- | \(O(n)\) Transform this map by applying a function to every value. mapWithKey :: (k -> v1 -> v2) -> HashMap k v1 -> HashMap k v2 -mapWithKey f = go +mapWithKey f (HashMap sz m) = HashMap sz (go m) where go Empty = Empty go (Leaf h (L k v)) = Leaf h $ L k (f k v) @@ -1778,7 +1955,7 @@ traverseWithKey :: Applicative f => (k -> v1 -> f v2) -> HashMap k v1 -> f (HashMap k v2) -traverseWithKey f = go +traverseWithKey f (HashMap sz m) = HashMap sz <$> go m where go Empty = pure Empty go (Leaf h (L k v)) = Leaf h . L k <$> f k v @@ -1823,8 +2000,13 @@ difference a b = foldlWithKey' go empty a -- encountered, the combining function is applied to the values of these keys. -- If it returns 'Nothing', the element is discarded (proper set difference). If -- it returns (@'Just' y@), the element is updated with a new value @y@. -differenceWith :: (Eq k, Hashable k) => (v -> w -> Maybe v) -> HashMap k v -> HashMap k w -> HashMap k v -differenceWith f a b = foldlWithKey' go empty a +differenceWith + :: (Eq k, Hashable k) + => (v -> w -> Maybe v) + -> HashMap k v + -> HashMap k w + -> HashMap k v +differenceWith f a b =foldlWithKey' go empty a where go m k v = case lookup k b of Nothing -> unsafeInsert k v m @@ -1848,109 +2030,134 @@ intersectionWith f = Exts.inline intersectionWithKey $ const f -- the provided function is used to combine the values from the two -- maps. intersectionWithKey :: Eq k => (k -> v1 -> v2 -> v3) -> HashMap k v1 -> HashMap k v2 -> HashMap k v3 -intersectionWithKey f = intersectionWithKey# $ \k v1 v2 -> (# f k v1 v2 #) +intersectionWithKey f (HashMap _ tree1) (HashMap _ tree2) = + let A.Sized newSz m' = intersectionWithKey# (\k v1 v2 -> (# f k v1 v2 #)) tree1 tree2 + in HashMap newSz m' + {-# INLINABLE intersectionWithKey #-} -intersectionWithKey# :: Eq k => (k -> v1 -> v2 -> (# v3 #)) -> HashMap k v1 -> HashMap k v2 -> HashMap k v3 -intersectionWithKey# f = go 0 +intersectionWithKey# + :: forall k v1 v2 v3 . Eq k + => (k -> v1 -> v2 -> (# v3 #)) + -> Tree k v1 + -> Tree k v2 + -> A.Sized (Tree k v3) +intersectionWithKey# f = go 0 0 where + go :: Int -- ^ Bitmask accumulator + -> A.Size -- ^ Size accumulator. + -> Tree k v1 + -> Tree k v2 + -> A.Sized (Tree k v3) -- empty vs. anything - go !_ _ Empty = Empty - go _ Empty _ = Empty + go !_ !sz _ Empty = A.Sized sz Empty + go _ !sz Empty _ = A.Sized sz Empty -- leaf vs. anything - go s (Leaf h1 (L k1 v1)) t2 = + go s !sz (Leaf h1 (L k1 v1)) t2 = lookupCont - (\_ -> Empty) - (\v _ -> case f k1 v1 v of (# v' #) -> Leaf h1 $ L k1 v') + (\_ -> A.Sized sz Empty) + (\v _ -> case f k1 v1 v of (# v' #) -> A.Sized (sz + 1) $! Leaf h1 $ L k1 v') h1 k1 s t2 - go s t1 (Leaf h2 (L k2 v2)) = + go s !sz t1 (Leaf h2 (L k2 v2)) = lookupCont - (\_ -> Empty) - (\v _ -> case f k2 v v2 of (# v' #) -> Leaf h2 $ L k2 v') + (\_ -> A.Sized sz Empty) + (\v _ -> case f k2 v v2 of (# v' #) -> A.Sized (sz + 1) $! Leaf h2 $ L k2 v') h2 k2 s t1 -- collision vs. collision - go _ (Collision h1 ls1) (Collision h2 ls2) = intersectionCollisions f h1 h2 ls1 ls2 + -- stopped here + go _ !sz (Collision h1 ls1) (Collision h2 ls2) = intersectionCollisions f sz h1 h2 ls1 ls2 -- branch vs. branch - go s (BitmapIndexed b1 ary1) (BitmapIndexed b2 ary2) = - intersectionArrayBy (go (nextShift s)) b1 b2 ary1 ary2 - go s (BitmapIndexed b1 ary1) (Full ary2) = - intersectionArrayBy (go (nextShift s)) b1 fullBitmap ary1 ary2 - go s (Full ary1) (BitmapIndexed b2 ary2) = - intersectionArrayBy (go (nextShift s)) fullBitmap b2 ary1 ary2 - go s (Full ary1) (Full ary2) = - intersectionArrayBy (go (nextShift s)) fullBitmap fullBitmap ary1 ary2 + go s !sz (BitmapIndexed b1 ary1) (BitmapIndexed b2 ary2) = + intersectionArrayBy (go (nextShift s)) sz b1 b2 ary1 ary2 + go s !sz (BitmapIndexed b1 ary1) (Full ary2) = + intersectionArrayBy (go (nextShift s)) sz b1 fullBitmap ary1 ary2 + go s !sz (Full ary1) (BitmapIndexed b2 ary2) = + intersectionArrayBy (go (nextShift s)) sz fullBitmap b2 ary1 ary2 + go s !sz (Full ary1) (Full ary2) = + intersectionArrayBy (go (nextShift s)) sz fullBitmap fullBitmap ary1 ary2 -- collision vs. branch - go s (BitmapIndexed b1 ary1) t2@(Collision h2 _ls2) - | b1 .&. m2 == 0 = Empty - | otherwise = go (nextShift s) (A.index ary1 i) t2 + go s !sz (BitmapIndexed b1 ary1) t2@(Collision h2 _ls2) + | b1 .&. m2 == 0 = A.Sized sz Empty + | otherwise = go (nextShift s) sz (A.index ary1 i) t2 where m2 = mask h2 s i = sparseIndex b1 m2 - go s t1@(Collision h1 _ls1) (BitmapIndexed b2 ary2) - | b2 .&. m1 == 0 = Empty - | otherwise = go (nextShift s) t1 (A.index ary2 i) + go s !sz t1@(Collision h1 _ls1) (BitmapIndexed b2 ary2) + | b2 .&. m1 == 0 = A.Sized sz Empty + | otherwise = go (nextShift s) sz t1 (A.index ary2 i) where m1 = mask h1 s i = sparseIndex b2 m1 - go s (Full ary1) t2@(Collision h2 _ls2) = go (nextShift s) (A.index ary1 i) t2 + go s !sz (Full ary1) t2@(Collision h2 _ls2) = go (nextShift s) sz (A.index ary1 i) t2 where i = index h2 s - go s t1@(Collision h1 _ls1) (Full ary2) = go (nextShift s) t1 (A.index ary2 i) + go s !sz t1@(Collision h1 _ls1) (Full ary2) = go (nextShift s) sz t1 (A.index ary2 i) where i = index h1 s {-# INLINE intersectionWithKey# #-} intersectionArrayBy :: - ( HashMap k v1 -> - HashMap k v2 -> - HashMap k v3 + ( A.Size -> + Tree k v1 -> + Tree k v2 -> + A.Sized (Tree k v3) ) -> + A.Size -> Bitmap -> Bitmap -> - A.Array (HashMap k v1) -> - A.Array (HashMap k v2) -> - HashMap k v3 -intersectionArrayBy f !b1 !b2 !ary1 !ary2 - | b1 .&. b2 == 0 = Empty + A.Array (Tree k v1) -> + A.Array (Tree k v2) -> + A.Sized (Tree k v3) +intersectionArrayBy f !sze !b1 !b2 !ary1 !ary2 + | b1 .&. b2 == 0 = A.Sized sze Empty | otherwise = runST $ do mary <- A.new_ $ popCount bIntersect -- iterate over nonzero bits of b1 .|. b2 - let go !i !i1 !i2 !b !bFinal - | b == 0 = pure (i, bFinal) + let go !i !i1 !i2 !sz !b !bFinal + | b == 0 = pure (i, bFinal, sz) | testBit $ b1 .&. b2 = do x1 <- A.indexM ary1 i1 x2 <- A.indexM ary2 i2 - case f x1 x2 of - Empty -> go i (i1 + 1) (i2 + 1) b' (bFinal .&. complement m) + let A.Sized newSz hm = f sz x1 x2 + case hm of + Empty -> go i (i1 + 1) (i2 + 1) newSz b' (bFinal .&. complement m) _ -> do - A.write mary i $! f x1 x2 - go (i + 1) (i1 + 1) (i2 + 1) b' bFinal - | testBit b1 = go i (i1 + 1) i2 b' bFinal - | otherwise = go i i1 (i2 + 1) b' bFinal + A.write mary i $! hm + go (i + 1) (i1 + 1) (i2 + 1) newSz b' bFinal + | testBit b1 = go i (i1 + 1) i2 sz b' bFinal + | otherwise = go i i1 (i2 + 1) sz b' bFinal where m = 1 `unsafeShiftL` countTrailingZeros b testBit x = x .&. m /= 0 b' = b .&. complement m - (len, bFinal) <- go 0 0 0 bCombined bIntersect + (len, bFinal, hmSize) <- go 0 0 0 sze bCombined bIntersect case len of - 0 -> pure Empty + 0 -> pure . A.Sized hmSize $ Empty 1 -> do l <- A.read mary 0 if isLeafOrCollision l - then pure l - else BitmapIndexed bFinal <$> (A.unsafeFreeze =<< A.shrink mary 1) - _ -> bitmapIndexedOrFull bFinal <$> (A.unsafeFreeze =<< A.shrink mary len) + then pure . A.Sized hmSize $! l + else A.Sized hmSize . BitmapIndexed bFinal <$> (A.unsafeFreeze =<< A.shrink mary 1) + _ -> A.Sized hmSize . bitmapIndexedOrFull bFinal <$> (A.unsafeFreeze =<< A.shrink mary len) where bCombined = b1 .|. b2 bIntersect = b1 .&. b2 {-# INLINE intersectionArrayBy #-} -intersectionCollisions :: Eq k => (k -> v1 -> v2 -> (# v3 #)) -> Hash -> Hash -> A.Array (Leaf k v1) -> A.Array (Leaf k v2) -> HashMap k v3 -intersectionCollisions f h1 h2 ary1 ary2 +intersectionCollisions + :: Eq k + => (k -> v1 -> v2 -> (# v3 #)) + -> A.Size + -> Hash + -> Hash + -> A.Array (Leaf k v1) + -> A.Array (Leaf k v2) + -> A.Sized (Tree k v3) +intersectionCollisions f sz h1 h2 ary1 ary2 | h1 == h2 = runST $ do mary2 <- A.thaw ary2 0 $ A.length ary2 mary <- A.new_ $ min (A.length ary1) (A.length ary2) - let go i j + let go !i !j | i >= A.length ary1 || j >= A.lengthM mary2 = pure j | otherwise = do L k1 v1 <- A.indexM ary1 i @@ -1960,13 +2167,13 @@ intersectionCollisions f h1 h2 ary1 ary2 A.write mary j $ L k1 v3 go (i + 1) (j + 1) Nothing -> do - go (i + 1) j + go (i + 1) j len <- go 0 0 - case len of + A.Sized (sz + A.Size len) <$> case len of 0 -> pure Empty 1 -> Leaf h1 <$> A.read mary 0 _ -> Collision h1 <$> (A.unsafeFreeze =<< A.shrink mary len) - | otherwise = Empty + | otherwise = A.Sized sz Empty {-# INLINE intersectionCollisions #-} -- | Say we have @@ -2019,7 +2226,7 @@ foldr' f = foldrWithKey' (\ _ v z -> f v z) -- is evaluated before using the result in the next application. -- This function is strict in the starting value. foldlWithKey' :: (a -> k -> v -> a) -> a -> HashMap k v -> a -foldlWithKey' f = go +foldlWithKey' f acc (HashMap _ m) = go acc m where go !z Empty = z go z (Leaf _ (L k v)) = f z k v @@ -2034,7 +2241,7 @@ foldlWithKey' f = go -- is evaluated before using the result in the next application. -- This function is strict in the starting value. foldrWithKey' :: (k -> v -> a -> a) -> a -> HashMap k v -> a -foldrWithKey' f = flip go +foldrWithKey' f a (HashMap _ m) = go m a where go Empty z = z go (Leaf _ (L k v)) !z = f k v z @@ -2061,7 +2268,7 @@ foldl f = foldlWithKey (\a _k v -> f a v) -- elements, using the given starting value (typically the -- right-identity of the operator). foldrWithKey :: (k -> v -> a -> a) -> a -> HashMap k v -> a -foldrWithKey f = flip go +foldrWithKey f a (HashMap _ m) = go m a where go Empty z = z go (Leaf _ (L k v)) z = f k v z @@ -2074,7 +2281,7 @@ foldrWithKey f = flip go -- elements, using the given starting value (typically the -- left-identity of the operator). foldlWithKey :: (a -> k -> v -> a) -> a -> HashMap k v -> a -foldlWithKey f = go +foldlWithKey f a (HashMap _ m) = go a m where go z Empty = z go z (Leaf _ (L k v)) = f z k v @@ -2086,7 +2293,7 @@ foldlWithKey f = go -- | \(O(n)\) Reduce the map by applying a function to each element -- and combining the results with a monoid operation. foldMapWithKey :: Monoid m => (k -> v -> m) -> HashMap k v -> m -foldMapWithKey f = go +foldMapWithKey f (HashMap _ m) = go m where go Empty = mempty go (Leaf _ (L k v)) = f k v @@ -2101,12 +2308,14 @@ foldMapWithKey f = go -- | \(O(n)\) Transform this map by applying a function to every value -- and retaining only some of them. mapMaybeWithKey :: (k -> v1 -> Maybe v2) -> HashMap k v1 -> HashMap k v2 -mapMaybeWithKey f = filterMapAux onLeaf onColl +mapMaybeWithKey f (HashMap _ m) = HashMap size' m' where onLeaf (Leaf h (L k v)) | Just v' <- f k v = Just (Leaf h (L k v')) onLeaf _ = Nothing onColl (L k v) | Just v' <- f k v = Just (L k v') | otherwise = Nothing + + A.Sized size' m' = filterMapAuxInternal onLeaf onColl m {-# INLINE mapMaybeWithKey #-} -- | \(O(n)\) Transform this map by applying a function to every value @@ -2118,83 +2327,86 @@ mapMaybe f = mapMaybeWithKey (const f) -- | \(O(n)\) Filter this map by retaining only elements satisfying a -- predicate. filterWithKey :: forall k v. (k -> v -> Bool) -> HashMap k v -> HashMap k v -filterWithKey pred = filterMapAux onLeaf onColl +filterWithKey pred (HashMap _ m) = HashMap size' m' where onLeaf t@(Leaf _ (L k v)) | pred k v = Just t onLeaf _ = Nothing onColl el@(L k v) | pred k v = Just el onColl _ = Nothing -{-# INLINE filterWithKey #-} + A.Sized size' m' = filterMapAuxInternal onLeaf onColl m +{-# INLINE filterWithKey #-} -- | Common implementation for 'filterWithKey' and 'mapMaybeWithKey', --- allowing the former to former to reuse terms. -filterMapAux :: forall k v1 v2 - . (HashMap k v1 -> Maybe (HashMap k v2)) +-- allowing the former and latter to reuse terms. +-- Returns the result hashmap's size, and the hashmap itself. +filterMapAuxInternal :: forall k v1 v2 + . (Tree k v1 -> Maybe (Tree k v2)) -> (Leaf k v1 -> Maybe (Leaf k v2)) - -> HashMap k v1 - -> HashMap k v2 -filterMapAux onLeaf onColl = go + -> Tree k v1 + -> A.Sized (Tree k v2) +filterMapAuxInternal onLeaf onColl = go 0 where - go Empty = Empty - go t@Leaf{} - | Just t' <- onLeaf t = t' - | otherwise = Empty - go (BitmapIndexed b ary) = filterA ary b - go (Full ary) = filterA ary fullBitmap - go (Collision h ary) = filterC ary h - - filterA ary0 b0 = + go !sz Empty = A.Sized sz Empty + go !sz t@Leaf{} + | Just t' <- onLeaf t = A.Sized (sz + 1) t' + | otherwise = A.Sized sz Empty + go !sz (BitmapIndexed b ary) = filterA sz ary b + go !sz (Full ary) = filterA sz ary fullBitmap + go !sz (Collision h ary) = filterC sz ary h + + filterA sze ary0 b0 = let !n = A.length ary0 in runST $ do mary <- A.new_ n - step ary0 mary b0 0 0 1 n + step ary0 mary b0 0 0 1 n sze where - step :: A.Array (HashMap k v1) -> A.MArray s (HashMap k v2) - -> Bitmap -> Int -> Int -> Bitmap -> Int - -> ST s (HashMap k v2) - step !ary !mary !b i !j !bi n + step :: A.Array (Tree k v1) -> A.MArray s (Tree k v2) + -> Bitmap -> Int -> Int -> Bitmap -> Int -> A.Size + -> ST s (A.Sized (Tree k v2)) + step !ary !mary !b i !j !bi n !siz | i >= n = case j of - 0 -> return Empty + 0 -> return $! A.Sized siz Empty 1 -> do ch <- A.read mary 0 case ch of - t | isLeafOrCollision t -> return t - _ -> BitmapIndexed b <$> (A.unsafeFreeze =<< A.shrink mary 1) + t | isLeafOrCollision t -> return . A.Sized siz $! t + _ -> A.Sized siz . BitmapIndexed b <$> (A.unsafeFreeze =<< A.shrink mary 1) _ -> do ary2 <- A.unsafeFreeze =<< A.shrink mary j - return $! if j == maxChildren + return . A.Sized siz $! if j == maxChildren then Full ary2 else BitmapIndexed b ary2 - | bi .&. b == 0 = step ary mary b i j (bi `unsafeShiftL` 1) n - | otherwise = case go (A.index ary i) of - Empty -> step ary mary (b .&. complement bi) (i+1) j - (bi `unsafeShiftL` 1) n - t -> do A.write mary j t - step ary mary b (i+1) (j+1) (bi `unsafeShiftL` 1) n - - filterC ary0 h = + | bi .&. b == 0 = step ary mary b i j (bi `unsafeShiftL` 1) n siz + | otherwise = case go siz (A.index ary i) of + A.Sized dsz Empty -> step ary mary (b .&. complement bi) (i+1) j + (bi `unsafeShiftL` 1) n dsz + A.Sized dsz t -> do + A.write mary j t + step ary mary b (i+1) (j+1) (bi `unsafeShiftL` 1) n dsz + + filterC siz ary0 h = let !n = A.length ary0 in runST $ do mary <- A.new_ n - step ary0 mary 0 0 n + step ary0 mary 0 0 n siz where step :: A.Array (Leaf k v1) -> A.MArray s (Leaf k v2) - -> Int -> Int -> Int - -> ST s (HashMap k v2) - step !ary !mary i !j n + -> Int -> Int -> Int -> A.Size + -> ST s (A.Sized (Tree k v2)) + step !ary !mary i !j n !sze | i >= n = case j of - 0 -> return Empty + 0 -> return (A.Sized sze Empty) 1 -> do l <- A.read mary 0 - return $! Leaf h l + return . A.Sized sze $! Leaf h l _ | i == j -> do ary2 <- A.unsafeFreeze mary - return $! Collision h ary2 + return . A.Sized sze $! Collision h ary2 | otherwise -> do ary2 <- A.unsafeFreeze =<< A.shrink mary j - return $! Collision h ary2 + return . A.Sized sze $! Collision h ary2 | Just el <- onColl $! A.index ary i - = A.write mary j el >> step ary mary (i+1) (j+1) n - | otherwise = step ary mary (i+1) j n -{-# INLINE filterMapAux #-} + = A.write mary j el >> step ary mary (i+1) (j+1) n (sze + 1) + | otherwise = step ary mary (i+1) j n sze +{-# INLINE filterMapAuxInternal #-} -- | \(O(n)\) Filter this map by retaining only elements which values -- satisfy a predicate. @@ -2296,7 +2508,9 @@ fromListWith f = List.foldl' (\ m (k, v) -> unsafeInsertWith f k v m) empty -- -- @since 0.2.11 fromListWithKey :: (Eq k, Hashable k) => (k -> v -> v -> v) -> [(k, v)] -> HashMap k v -fromListWithKey f = List.foldl' (\ m (k, v) -> unsafeInsertWithKey (\k' a b -> (# f k' a b #)) k v m) empty +fromListWithKey f = List.foldl' (\ (HashMap sz m) (k, v) -> + let A.Sized diff m' = unsafeInsertWithKey (\k' a b -> (# f k' a b #)) k v m + in HashMap (sz + diff) m') empty {-# INLINE fromListWithKey #-} ------------------------------------------------------------------------ @@ -2425,6 +2639,13 @@ update32With' ary idx f = update32 ary idx $! f x {-# INLINE update32With' #-} +-- | /O(n)/ Update the element at the given position in this array, by applying a function to it. +update32WithInternal' :: A.Array e -> Int -> (e -> A.Sized e) -> A.RunResA e +update32WithInternal' ary idx f = + let A.Sized s x = f $! A.index ary idx + in A.RunResA s (update32 ary idx x) +{-# INLINE update32WithInternal' #-} + -- | Unsafely clone an array of (2^bitsPerSubkey) elements. The length of the input -- array is not checked. clone :: A.Array e -> ST s (A.MArray s e) @@ -2503,7 +2724,7 @@ nextShift s = s + bitsPerSubkey ------------------------------------------------------------------------ -- Pointer equality --- | Check if two the two arguments are the same value. N.B. This +-- | Check if the two arguments are the same value. N.B. This -- function might give false negatives (due to GC moving objects.) ptrEq :: a -> a -> Bool ptrEq x y = Exts.isTrue# (Exts.reallyUnsafePtrEquality# x y ==# 1#) diff --git a/Data/HashMap/Internal/Array.hs b/Data/HashMap/Internal/Array.hs index dda9edcf..ebfdea53 100644 --- a/Data/HashMap/Internal/Array.hs +++ b/Data/HashMap/Internal/Array.hs @@ -1,10 +1,12 @@ -{-# LANGUAGE BangPatterns #-} -{-# LANGUAGE CPP #-} -{-# LANGUAGE MagicHash #-} -{-# LANGUAGE Rank2Types #-} -{-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TemplateHaskellQuotes #-} -{-# LANGUAGE UnboxedTuples #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE DeriveLift #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE Rank2Types #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TemplateHaskellQuotes #-} +{-# LANGUAGE UnboxedTuples #-} {-# OPTIONS_GHC -fno-full-laziness -funbox-strict-fields #-} {-# OPTIONS_HADDOCK not-home #-} @@ -28,6 +30,10 @@ module Data.HashMap.Internal.Array ( Array(..) , MArray(..) + , RunResA (..) + , RunResM (..) + , Size (..) + , Sized (..) -- * Creation , new @@ -47,6 +53,7 @@ module Data.HashMap.Internal.Array , index# , update , updateWith' + , updateWithInternal' , unsafeUpdateM , insert , insertM @@ -57,6 +64,8 @@ module Data.HashMap.Internal.Array , unsafeThaw , unsafeSameArray , run + , runInternal + , run2 , copy , copyM , cloneM @@ -288,10 +297,27 @@ unsafeThaw ary (# s', mary #) -> (# s', MArray mary #) {-# INLINE unsafeThaw #-} +data RunResA e = RunResA !Size !(Array e) + +data RunResM s e = RunResM !Size !(MArray s e) + run :: (forall s . ST s (MArray s e)) -> Array e run act = runST $ act >>= unsafeFreeze {-# INLINE run #-} +runInternal :: (forall s . ST s (RunResM s e)) -> RunResA e +runInternal act = runST $ do + RunResM s mary <- act + ary <- unsafeFreeze mary + return (RunResA s ary) +{-# INLINE runInternal #-} + +run2 :: (forall s. ST s (MArray s e, a)) -> (Array e, a) +run2 k = runST (do + (marr,b) <- k + arr <- unsafeFreeze marr + return (arr,b)) + -- | Unsafely copy the elements of an array. Array bounds are not checked. copy :: Array e -> Int -> MArray s e -> Int -> Int -> ST s () copy !src !_sidx@(I# sidx#) !dst !_didx@(I# didx#) _n@(I# n#) = @@ -360,6 +386,26 @@ updateWith' ary idx f = update ary idx $! f x {-# INLINE updateWith' #-} +-- | This newtype wrapper is to avoid confusion when local functions +-- take more than one paramenter of 'Int' type (see 'go' in +-- 'Data.HashMap.Base.unionWithKeyInternal'). +newtype Size = Size { unSize :: Int } + deriving (Eq, Ord, Num, Integral, Enum, Real, NFData, TH.Lift) + +-- | Helper datatype used in 'updateWithInternal''. Used when a change in +-- a value's size must be returned along with the value itself (typically +-- a hashmap). +data Sized a = Sized {-# UNPACK #-} !Size !a + +-- | /O(n)/ Update the element at the given position in this array, by +-- applying a function to it. Evaluates the element to WHNF before +-- inserting it into the array. +updateWithInternal' :: Array e -> Int -> (e -> Sized e) -> RunResA e +updateWithInternal' ary idx f = + let Sized sz e = f (index ary idx) + in RunResA sz (update ary idx e) +{-# INLINE updateWithInternal' #-} + -- | \(O(1)\) Update the element at the given position in this array, -- without copying. unsafeUpdateM :: Array e -> Int -> e -> ST s () diff --git a/Data/HashMap/Internal/Debug.hs b/Data/HashMap/Internal/Debug.hs index e09901e8..7a133689 100644 --- a/Data/HashMap/Internal/Debug.hs +++ b/Data/HashMap/Internal/Debug.hs @@ -28,7 +28,7 @@ module Data.HashMap.Internal.Debug import Data.Bits (complement, countTrailingZeros, popCount, shiftL, unsafeShiftL, (.&.), (.|.)) import Data.Hashable (Hashable) -import Data.HashMap.Internal (Bitmap, Hash, HashMap (..), Leaf (..), +import Data.HashMap.Internal (Bitmap, Hash, HashMap (..), Leaf (..), Tree (..), bitsPerSubkey, fullBitmap, hash, isLeafOrCollision, maxChildren, sparseIndex) import Data.Semigroup (Sum (..)) @@ -65,6 +65,7 @@ data Error k | INV8_bad_Full_size !Int | INV9_Collision_size !Int | INV10_Collision_duplicate_key k !Hash + | INV11_Negative_HM_Size !Int deriving (Eq, Show) -- TODO: Name this 'Index'?! @@ -95,55 +96,60 @@ hashMatchesSubHashPath (SubHashPath ph l) h = maskToLength h l == ph maskToLength h' l' = h' .&. complement (complement 0 `shiftL` l') valid :: Hashable k => HashMap k v -> Validity k -valid Empty = Valid -valid t = validInternal initialSubHashPath t +valid (HashMap sz hm) = if sz >= 0 + then valid' hm + else Invalid (INV11_Negative_HM_Size $ A.unSize sz) initialSubHashPath where - validInternal p Empty = Invalid INV1_internal_Empty p - validInternal p (Leaf h l) = validHash p h <> validLeaf p h l - validInternal p (Collision h ary) = validHash p h <> validCollision p h ary - validInternal p (BitmapIndexed b ary) = validBitmapIndexed p b ary - validInternal p (Full ary) = validFull p ary + valid' :: Hashable k => Tree k v -> Validity k + valid' Empty = Valid + valid' t = validInternal initialSubHashPath t + where + validInternal p Empty = Invalid INV1_internal_Empty p + validInternal p (Leaf h l) = validHash p h <> validLeaf p h l + validInternal p (Collision h ary) = validHash p h <> validCollision p h ary + validInternal p (BitmapIndexed b ary) = validBitmapIndexed p b ary + validInternal p (Full ary) = validFull p ary - validHash p h | hashMatchesSubHashPath p h = Valid - | otherwise = Invalid (INV6_misplaced_hash h) p + validHash p h | hashMatchesSubHashPath p h = Valid + | otherwise = Invalid (INV6_misplaced_hash h) p - validLeaf p h (L k _) | hash k == h = Valid - | otherwise = Invalid (INV7_key_hash_mismatch k h) p + validLeaf p h (L k _) | hash k == h = Valid + | otherwise = Invalid (INV7_key_hash_mismatch k h) p - validCollision p h ary = validCollisionSize <> A.foldMap (validLeaf p h) ary <> distinctKeys - where - n = A.length ary - validCollisionSize | n < 2 = Invalid (INV9_Collision_size n) p - | otherwise = Valid - distinctKeys = A.foldMap (\(L k _) -> appearsOnce k) ary - appearsOnce k | A.foldMap (\(L k' _) -> if k' == k then Sum @Int 1 else Sum 0) ary == 1 = Valid - | otherwise = Invalid (INV10_Collision_duplicate_key k h) p - - validBitmapIndexed p b ary = validBitmap <> validArraySize <> validSubTrees p b ary - where - validBitmap | b .&. complement fullBitmap == 0 = Valid - | otherwise = Invalid (INV2_Bitmap_unexpected_1_bits b) p - n = A.length ary - validArraySize | n < 1 || n >= maxChildren = Invalid (INV3_bad_BitmapIndexed_size n) p - | popCount b == n = Valid - | otherwise = Invalid (INV4_bitmap_array_size_mismatch b n) p - - validSubTrees p b ary - | A.length ary == 1 - , isLeafOrCollision (A.index ary 0) - = Invalid INV5_BitmapIndexed_invalid_single_subtree p - | otherwise = go b - where - go 0 = Valid - go b' = validInternal (addSubHash p (fromIntegral c)) (A.index ary i) <> go b'' + validCollision p h ary = validCollisionSize <> A.foldMap (validLeaf p h) ary <> distinctKeys where - c = countTrailingZeros b' - m = 1 `unsafeShiftL` c - i = sparseIndex b m - b'' = b' .&. complement m - - validFull p ary = validArraySize <> validSubTrees p fullBitmap ary - where - n = A.length ary - validArraySize | n == maxChildren = Valid - | otherwise = Invalid (INV8_bad_Full_size n) p + n = A.length ary + validCollisionSize | n < 2 = Invalid (INV9_Collision_size n) p + | otherwise = Valid + distinctKeys = A.foldMap (\(L k _) -> appearsOnce k) ary + appearsOnce k | A.foldMap (\(L k' _) -> if k' == k then Sum @Int 1 else Sum 0) ary == 1 = Valid + | otherwise = Invalid (INV10_Collision_duplicate_key k h) p + + validBitmapIndexed p b ary = validBitmap <> validArraySize <> validSubTrees p b ary + where + validBitmap | b .&. complement fullBitmap == 0 = Valid + | otherwise = Invalid (INV2_Bitmap_unexpected_1_bits b) p + n = A.length ary + validArraySize | n < 1 || n >= maxChildren = Invalid (INV3_bad_BitmapIndexed_size n) p + | popCount b == n = Valid + | otherwise = Invalid (INV4_bitmap_array_size_mismatch b n) p + + validSubTrees p b ary + | A.length ary == 1 + , isLeafOrCollision (A.index ary 0) + = Invalid INV5_BitmapIndexed_invalid_single_subtree p + | otherwise = go b + where + go 0 = Valid + go b' = validInternal (addSubHash p (fromIntegral c)) (A.index ary i) <> go b'' + where + c = countTrailingZeros b' + m = 1 `unsafeShiftL` c + i = sparseIndex b m + b'' = b' .&. complement m + + validFull p ary = validArraySize <> validSubTrees p fullBitmap ary + where + n = A.length ary + validArraySize | n == maxChildren = Valid + | otherwise = Invalid (INV8_bad_Full_size n) p diff --git a/Data/HashMap/Internal/Strict.hs b/Data/HashMap/Internal/Strict.hs index a76bffb8..524b33bf 100644 --- a/Data/HashMap/Internal/Strict.hs +++ b/Data/HashMap/Internal/Strict.hs @@ -1,11 +1,12 @@ -{-# LANGUAGE BangPatterns #-} -{-# LANGUAGE CPP #-} -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE MagicHash #-} -{-# LANGUAGE PatternGuards #-} -{-# LANGUAGE Trustworthy #-} -{-# LANGUAGE UnboxedTuples #-} -{-# OPTIONS_HADDOCK not-home #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE Trustworthy #-} +{-# LANGUAGE UnboxedTuples #-} +{-# OPTIONS_HADDOCK not-home #-} ------------------------------------------------------------------------ -- | @@ -129,7 +130,7 @@ import Data.Coerce (coerce) import Data.Functor.Identity (Identity (..)) -- See Note [Imports from Data.HashMap.Internal] import Data.Hashable (Hashable) -import Data.HashMap.Internal (Hash, HashMap (..), Leaf (..), LookupRes (..), +import Data.HashMap.Internal (Hash, HashMap (..), Leaf (..), LookupRes (..), Tree (..), fullBitmap, hash, index, mask, nextShift, ptrEq, sparseIndex) import Prelude hiding (lookup, map) @@ -187,83 +188,113 @@ insert k !v = HM.insert k v -- > insertWith f k v map -- > where f new old = new + old insertWith :: (Eq k, Hashable k) => (v -> v -> v) -> k -> v -> HashMap k v - -> HashMap k v -insertWith f k0 v0 m0 = go h0 k0 v0 0 m0 + -> HashMap k v +insertWith f k0 v0 (HashMap sz m0) = + let A.Sized diff m0' = insertWithInternal f k0 v0 m0 + in HashMap (diff + sz) m0' +{-# INLINABLE insertWith #-} + +-- | /O(log n)/ Associate the value with the key in this map. If +-- this map previously contained a mapping for the key, the old value +-- is replaced by the result of applying the given function to the new +-- and old value. +-- Returns a tuple where the first component is the +-- difference in size between the old and new hashmaps, and the second +-- is the new hashmap. +-- Example: +-- > insertWith f k v map +-- > where f new old = new + old +insertWithInternal :: (Eq k, Hashable k) => (v -> v -> v) -> k -> v -> Tree k v + -> A.Sized (Tree k v) +insertWithInternal f k0 v0 m0 = go h0 k0 v0 0 m0 where h0 = hash k0 - go !h !k x !_ Empty = leaf h k x + go !h !k x !_ Empty = A.Sized 1 (leaf h k x) go h k x s t@(Leaf hy l@(L ky y)) | hy == h = if ky == k - then leaf h k (f x y) - else x `seq` HM.collision h l (L k x) - | otherwise = x `seq` runST (HM.two s h k x hy t) + then A.Sized 0 (leaf h k (f x y)) + else A.Sized 1 (x `seq` HM.collision h l (L k x)) + | otherwise = x `seq` A.Sized 1 $!runST (HM.two s h k x hy t) go h k x s (BitmapIndexed b ary) | b .&. m == 0 = let ary' = A.insert ary i $! leaf h k x - in HM.bitmapIndexedOrFull (b .|. m) ary' + in A.Sized 1 (HM.bitmapIndexedOrFull (b .|. m) ary') | otherwise = - let st = A.index ary i - st' = go h k x (nextShift s) st + let st = A.index ary i + A.Sized sz st' = go h k x (nextShift s) st ary' = A.update ary i $! st' - in BitmapIndexed b ary' + in A.Sized sz (BitmapIndexed b ary') where m = mask h s i = sparseIndex b m go h k x s (Full ary) = - let st = A.index ary i - st' = go h k x (nextShift s) st - ary' = HM.update32 ary i $! st' - in Full ary' + let st = A.index ary i + A.Sized sz st' = go h k x (nextShift s) st + ary' = HM.update32 ary i $! st' + in A.Sized sz (Full ary') where i = index h s go h k x s t@(Collision hy v) - | h == hy = Collision h (updateOrSnocWith f k x v) + | h == hy = + let !start = A.length v + !newV = updateOrSnocWith f k x v + !end = A.length newV + in A.Sized (A.Size (end - start)) (Collision h newV) | otherwise = go h k x s $ BitmapIndexed (mask hy s) (A.singleton t) -{-# INLINABLE insertWith #-} +{-# INLINABLE insertWithInternal #-} -- | In-place update version of insertWith -unsafeInsertWith :: (Eq k, Hashable k) => (v -> v -> v) -> k -> v -> HashMap k v +unsafeInsertWith :: forall k v . (Eq k, Hashable k) + => (v -> v -> v) -> k -> v -> HashMap k v -> HashMap k v -unsafeInsertWith f k0 v0 m0 = unsafeInsertWithKey (const f) k0 v0 m0 +unsafeInsertWith f k0 v0 (HashMap sz m0) = + let A.Sized diff m0' = unsafeInsertWithKey (const f) k0 v0 m0 + in HashMap (diff + sz) m0' {-# INLINABLE unsafeInsertWith #-} -unsafeInsertWithKey :: (Eq k, Hashable k) => (k -> v -> v -> v) -> k -> v -> HashMap k v - -> HashMap k v +unsafeInsertWithKey :: (Eq k, Hashable k) => (k -> v -> v -> v) -> k -> v -> Tree k v + -> A.Sized (Tree k v) unsafeInsertWithKey f k0 v0 m0 = runST (go h0 k0 v0 0 m0) where h0 = hash k0 - go !h !k x !_ Empty = return $! leaf h k x + go !h !k x !_ Empty = return $! A.Sized 1 (leaf h k x) go h k x s t@(Leaf hy l@(L ky y)) | hy == h = if ky == k - then return $! leaf h k (f k x y) + then return $! A.Sized 0 (leaf h k (f k x y)) else do let l' = x `seq` L k x - return $! HM.collision h l l' - | otherwise = x `seq` HM.two s h k x hy t + return $! A.Sized 1 (HM.collision h l l') + | otherwise = do + two <- x `seq` HM.two s h k x hy t + return $! A.Sized 1 two go h k x s t@(BitmapIndexed b ary) | b .&. m == 0 = do ary' <- A.insertM ary i $! leaf h k x - return $! HM.bitmapIndexedOrFull (b .|. m) ary' + return . A.Sized 1 $! HM.bitmapIndexedOrFull (b .|. m) ary' | otherwise = do st <- A.indexM ary i - st' <- go h k x (nextShift s) st + A.Sized sz st' <- go h k x (nextShift s) st A.unsafeUpdateM ary i st' - return t + return . A.Sized sz $! t where m = mask h s i = sparseIndex b m go h k x s t@(Full ary) = do st <- A.indexM ary i - st' <- go h k x (nextShift s) st + A.Sized sz st' <- go h k x (nextShift s) st A.unsafeUpdateM ary i st' - return t + return .A.Sized sz $! t where i = index h s go h k x s t@(Collision hy v) - | h == hy = return $! Collision h (updateOrSnocWithKey f k x v) + | h == hy = + let !start = A.Size (A.length v) + !newV = updateOrSnocWithKey f k x v + !end = A.Size (A.length newV) + in return . A.Sized (end - start) $! Collision h newV | otherwise = go h k x s $ BitmapIndexed (mask hy s) (A.singleton t) {-# INLINABLE unsafeInsertWithKey #-} -- | \(O(\log n)\) Adjust the value tied to a given key in this map only -- if it is present. Otherwise, leave the map alone. adjust :: (Eq k, Hashable k) => (v -> v) -> k -> HashMap k v -> HashMap k v -adjust f k0 m0 = go h0 k0 0 m0 +adjust f k0 (HashMap sz m0) = HashMap sz (go h0 k0 0 m0) where h0 = hash k0 go !_ !_ !_ Empty = Empty @@ -305,19 +336,19 @@ update f = alter (>>= f) -- 'lookup' k ('alter' f k m) = f ('lookup' k m) -- @ alter :: (Eq k, Hashable k) => (Maybe v -> Maybe v) -> k -> HashMap k v -> HashMap k v -alter f k m = +alter f k hm@(HashMap sz m) = let !h = hash k !lookupRes = HM.lookupRecordCollision h k m in case f (HM.lookupResToMaybe lookupRes) of Nothing -> case lookupRes of - Absent -> m - Present _ collPos -> HM.deleteKeyExists collPos h k m + Absent -> hm + Present _ collPos -> HashMap (sz - 1) $! HM.deleteKeyExists collPos h k m Just !v' -> case lookupRes of - Absent -> HM.insertNewKey h k v' m + Absent -> HashMap (sz + 1) $! HM.insertNewKey h k v' m Present v collPos -> if v `ptrEq` v' - then m - else HM.insertKeyExists collPos h k v' m + then hm + else HashMap sz $! HM.insertKeyExists collPos h k v' m {-# INLINABLE alter #-} -- | \(O(\log n)\) The expression (@'alterF' f k map@) alters the value @x@ at @@ -408,33 +439,31 @@ alterFWeird _ _ f = alterFEager f -- eagerly, whether or not the given function requires that information. alterFEager :: (Functor f, Eq k, Hashable k) => (Maybe v -> f (Maybe v)) -> k -> HashMap k v -> f (HashMap k v) -alterFEager f !k !m = (<$> f mv) $ \fres -> - case fres of - +alterFEager f !k !hm@(HashMap sz m) = (<$> f mv) $ \case ------------------------------ -- Delete the key from the map. Nothing -> case lookupRes of -- Key did not exist in the map to begin with, no-op - Absent -> m + Absent -> hm -- Key did exist, no collision - Present _ collPos -> HM.deleteKeyExists collPos h k m + Present _ collPos -> HashMap (sz - 1) (HM.deleteKeyExists collPos h k m) ------------------------------ -- Update value Just !v' -> case lookupRes of -- Key did not exist before, insert v' under a new key - Absent -> HM.insertNewKey h k v' m + Absent -> HashMap (sz + 1) (HM.insertNewKey h k v' m) -- Key existed before, no hash collision Present v collPos -> if v `ptrEq` v' -- If the value is identical, no-op - then m + then hm -- If the value changed, update the value. - else HM.insertKeyExists collPos h k v' m + else HashMap sz (HM.insertKeyExists collPos h k v' m) where !h = hash k !lookupRes = HM.lookupRecordCollision h k m @@ -453,96 +482,134 @@ unionWith f = unionWithKey (const f) -- | \(O(n+m)\) The union of two maps. If a key occurs in both maps, -- the provided function (first argument) will be used to compute the result. -unionWithKey :: Eq k => (k -> v -> v -> v) -> HashMap k v -> HashMap k v - -> HashMap k v -unionWithKey f = go 0 +unionWithKey + :: Eq k + => (k -> v -> v -> v) + -> HashMap k v + -> HashMap k v + -> HashMap k v +unionWithKey f (HashMap sz m) hw = + let A.Sized diff m' = unionWithKeyInternal f m hw + in HashMap (diff + sz) m' +{-# INLINE unionWithKey #-} + +-- | \(O(n+m)\) The union of two maps. If a key occurs in both maps, +-- the provided function (first argument) will be used to compute the result. +unionWithKeyInternal + :: Eq k + => (k -> v -> v -> v) + -> Tree k v + -> HashMap k v + -> A.Sized (Tree k v) +unionWithKeyInternal f hm1 (HashMap siz hm2) = go 0 siz hm1 hm2 where -- empty vs. anything - go !_ t1 Empty = t1 - go _ Empty t2 = t2 + go !_ !sz t1 Empty = A.Sized sz t1 + go _ !sz Empty t2 = A.Sized sz t2 -- leaf vs. leaf - go s t1@(Leaf h1 l1@(L k1 v1)) t2@(Leaf h2 l2@(L k2 v2)) + go s !sz t1@(Leaf h1 l1@(L k1 v1)) t2@(Leaf h2 l2@(L k2 v2)) | h1 == h2 = if k1 == k2 - then leaf h1 k1 (f k1 v1 v2) - else HM.collision h1 l1 l2 - | otherwise = goDifferentHash s h1 h2 t1 t2 - go s t1@(Leaf h1 (L k1 v1)) t2@(Collision h2 ls2) - | h1 == h2 = Collision h1 (updateOrSnocWithKey f k1 v1 ls2) - | otherwise = goDifferentHash s h1 h2 t1 t2 - go s t1@(Collision h1 ls1) t2@(Leaf h2 (L k2 v2)) - | h1 == h2 = Collision h1 (updateOrSnocWithKey (flip . f) k2 v2 ls1) - | otherwise = goDifferentHash s h1 h2 t1 t2 - go s t1@(Collision h1 ls1) t2@(Collision h2 ls2) - | h1 == h2 = Collision h1 (HM.updateOrConcatWithKey (\k a b -> let !v = f k a b in (# v #)) ls1 ls2) - | otherwise = goDifferentHash s h1 h2 t1 t2 + then A.Sized (sz - 1) (leaf h1 k1 (f k1 v1 v2)) + else A.Sized sz (HM.collision h1 l1 l2) + | otherwise = goDifferentHash s sz h1 h2 t1 t2 + go s !sz t1@(Leaf h1 (L k1 v1)) t2@(Collision h2 ls2) + | h1 == h2 = + let !start = A.Size (A.length ls2) + !newV = updateOrSnocWithKey f k1 v1 ls2 + !end = A.Size (A.length newV) + in A.Sized (sz + (end - start - 1)) (Collision h1 newV) + | otherwise = goDifferentHash s sz h1 h2 t1 t2 + go s !sz t1@(Collision h1 ls1) t2@(Leaf h2 (L k2 v2)) + | h1 == h2 = + let !start = A.Size (A.length ls1) + !newV = updateOrSnocWithKey (flip . f) k2 v2 ls1 + !end = A.Size (A.length newV) + in A.Sized (sz + (end - start - 1)) (Collision h1 newV) + | otherwise = goDifferentHash s sz h1 h2 t1 t2 + go s !sz t1@(Collision h1 ls1) t2@(Collision h2 ls2) + | h1 == h2 = + let !start = A.Size (A.length ls1) + !newV = HM.updateOrConcatWithKey (\k a b -> let !v = f k a b in (# v #)) ls1 ls2 + !end = A.Size (A.length newV) + !len_ls2 = A.Size (A.length ls2) + in A.Sized (sz + (end - start - len_ls2)) (Collision h1 newV) + | otherwise = goDifferentHash s sz h1 h2 t1 t2 -- branch vs. branch - go s (BitmapIndexed b1 ary1) (BitmapIndexed b2 ary2) = - let b' = b1 .|. b2 - ary' = HM.unionArrayBy (go (nextShift s)) b1 b2 ary1 ary2 - in HM.bitmapIndexedOrFull b' ary' - go s (BitmapIndexed b1 ary1) (Full ary2) = - let ary' = HM.unionArrayBy (go (nextShift s)) b1 fullBitmap ary1 ary2 - in Full ary' - go s (Full ary1) (BitmapIndexed b2 ary2) = - let ary' = HM.unionArrayBy (go (nextShift s)) fullBitmap b2 ary1 ary2 - in Full ary' - go s (Full ary1) (Full ary2) = - let ary' = HM.unionArrayBy (go (nextShift s)) fullBitmap fullBitmap - ary1 ary2 - in Full ary' + go s !sz (BitmapIndexed b1 ary1) (BitmapIndexed b2 ary2) = + let b' = b1 .|. b2 + A.RunResA dsz ary' = HM.unionArrayByInternal sz (go (nextShift s)) b1 b2 ary1 ary2 + in A.Sized dsz (HM.bitmapIndexedOrFull b' ary') + go s !sz (BitmapIndexed b1 ary1) (Full ary2) = + let A.RunResA dsz ary' = HM.unionArrayByInternal sz (go (nextShift s)) b1 fullBitmap ary1 ary2 + in A.Sized dsz (Full ary') + go s !sz (Full ary1) (BitmapIndexed b2 ary2) = + let A.RunResA dsz ary' = HM.unionArrayByInternal sz (go (nextShift s)) fullBitmap b2 ary1 ary2 + in A.Sized dsz (Full ary') + go s !sz (Full ary1) (Full ary2) = + let A.RunResA dsz ary' = + HM.unionArrayByInternal sz (go (nextShift s)) fullBitmap fullBitmap ary1 ary2 + in A.Sized dsz (Full ary') -- leaf vs. branch - go s (BitmapIndexed b1 ary1) t2 + go s !sz (BitmapIndexed b1 ary1) t2 | b1 .&. m2 == 0 = let ary' = A.insert ary1 i t2 b' = b1 .|. m2 - in HM.bitmapIndexedOrFull b' ary' - | otherwise = let ary' = A.updateWith' ary1 i $ \st1 -> - go (nextShift s) st1 t2 - in BitmapIndexed b1 ary' + in A.Sized sz (HM.bitmapIndexedOrFull b' ary') + | otherwise = let A.RunResA dsz ary' = + A.updateWithInternal' ary1 i $ \st1 -> + go (nextShift s) sz st1 t2 + in A.Sized dsz (BitmapIndexed b1 ary') where h2 = leafHashCode t2 m2 = mask h2 s i = sparseIndex b1 m2 - go s t1 (BitmapIndexed b2 ary2) + go s !sz t1 (BitmapIndexed b2 ary2) | b2 .&. m1 == 0 = let ary' = A.insert ary2 i $! t1 b' = b2 .|. m1 - in HM.bitmapIndexedOrFull b' ary' - | otherwise = let ary' = A.updateWith' ary2 i $ \st2 -> - go (nextShift s) t1 st2 - in BitmapIndexed b2 ary' + in A.Sized sz (HM.bitmapIndexedOrFull b' ary') + | otherwise = let A.RunResA dsz ary' = + A.updateWithInternal' ary2 i $ \st2 -> + go (nextShift s) sz t1 st2 + in A.Sized dsz (BitmapIndexed b2 ary') where h1 = leafHashCode t1 m1 = mask h1 s i = sparseIndex b2 m1 - go s (Full ary1) t2 = + go s !sz (Full ary1) t2 = let h2 = leafHashCode t2 i = index h2 s - ary' = HM.update32With' ary1 i $ \st1 -> go (nextShift s) st1 t2 - in Full ary' - go s t1 (Full ary2) = + A.RunResA dsz ary' = + HM.update32WithInternal' ary1 i $ \st1 -> + go (nextShift s) sz st1 t2 + in A.Sized dsz (Full ary') + go s !sz t1 (Full ary2) = let h1 = leafHashCode t1 i = index h1 s - ary' = HM.update32With' ary2 i $ \st2 -> go (nextShift s) t1 st2 - in Full ary' + A.RunResA dsz ary' = + HM.update32WithInternal' ary2 i $ \st2 -> + go (nextShift s) sz t1 st2 + in A.Sized dsz (Full ary') leafHashCode (Leaf h _) = h leafHashCode (Collision h _) = h leafHashCode _ = error "leafHashCode" - goDifferentHash s h1 h2 t1 t2 - | m1 == m2 = BitmapIndexed m1 (A.singleton $! goDifferentHash (nextShift s) h1 h2 t1 t2) - | m1 < m2 = BitmapIndexed (m1 .|. m2) (A.pair t1 t2) - | otherwise = BitmapIndexed (m1 .|. m2) (A.pair t2 t1) + goDifferentHash s sz h1 h2 t1 t2 + | m1 == m2 = + let A.Sized dsz hm = goDifferentHash (nextShift s) sz h1 h2 t1 t2 + in A.Sized dsz (BitmapIndexed m1 (A.singleton $! hm)) + | m1 < m2 = A.Sized sz (BitmapIndexed (m1 .|. m2) (A.pair t1 t2)) + | otherwise = A.Sized sz (BitmapIndexed (m1 .|. m2) (A.pair t2 t1)) where m1 = mask h1 s m2 = mask h2 s -{-# INLINE unionWithKey #-} +{-# INLINE unionWithKeyInternal #-} ------------------------------------------------------------------------ -- * Transformations -- | \(O(n)\) Transform this map by applying a function to every value. mapWithKey :: (k -> v1 -> v2) -> HashMap k v1 -> HashMap k v2 -mapWithKey f = go +mapWithKey f (HashMap sz m) = HashMap sz (go m) where go Empty = Empty go (Leaf h (L k v)) = leaf h k (f k v) @@ -564,12 +631,14 @@ map f = mapWithKey (const f) -- | \(O(n)\) Transform this map by applying a function to every value -- and retaining only some of them. mapMaybeWithKey :: (k -> v1 -> Maybe v2) -> HashMap k v1 -> HashMap k v2 -mapMaybeWithKey f = HM.filterMapAux onLeaf onColl +mapMaybeWithKey f (HashMap _ m) = HashMap size' m' where onLeaf (Leaf h (L k v)) | Just v' <- f k v = Just (leaf h k v') onLeaf _ = Nothing onColl (L k v) | Just !v' <- f k v = Just (L k v') | otherwise = Nothing + + A.Sized size' m' = HM.filterMapAuxInternal onLeaf onColl m {-# INLINE mapMaybeWithKey #-} -- | \(O(n)\) Transform this map by applying a function to every value @@ -594,7 +663,7 @@ traverseWithKey :: Applicative f => (k -> v1 -> f v2) -> HashMap k v1 -> f (HashMap k v2) -traverseWithKey f = go +traverseWithKey f (HashMap sz m) = HashMap sz <$> go m where go Empty = pure Empty go (Leaf h (L k v)) = leaf h k <$> f k v @@ -630,9 +699,10 @@ intersectionWith f = Exts.inline intersectionWithKey $ const f -- | \(O(n+m)\) Intersection of two maps. If a key occurs in both maps -- the provided function is used to combine the values from the two -- maps. -intersectionWithKey :: Eq k => (k -> v1 -> v2 -> v3) - -> HashMap k v1 -> HashMap k v2 -> HashMap k v3 -intersectionWithKey f = HM.intersectionWithKey# $ \k v1 v2 -> let !v3 = f k v1 v2 in (# v3 #) +intersectionWithKey :: Eq k => (k -> v1 -> v2 -> v3) -> HashMap k v1 -> HashMap k v2 -> HashMap k v3 +intersectionWithKey f (HashMap _ tree1) (HashMap _ tree2) = + let A.Sized newSz m' = HM.intersectionWithKey# (\k v1 v2 -> let !v3 = f k v1 v2 in (# v3 #)) tree1 tree2 + in HashMap newSz m' {-# INLINABLE intersectionWithKey #-} ------------------------------------------------------------------------ @@ -706,7 +776,9 @@ fromListWith f = List.foldl' (\ m (k, v) -> unsafeInsertWith f k v m) HM.empty -- -- @since 0.2.11 fromListWithKey :: (Eq k, Hashable k) => (k -> v -> v -> v) -> [(k, v)] -> HashMap k v -fromListWithKey f = List.foldl' (\ m (k, v) -> unsafeInsertWithKey f k v m) HM.empty +fromListWithKey f = List.foldl' (\ (HashMap sz m) (k, v) -> + let A.Sized diff m' = unsafeInsertWithKey f k v m + in HashMap (sz + diff) m') HM.empty {-# INLINE fromListWithKey #-} ------------------------------------------------------------------------ @@ -755,6 +827,6 @@ updateOrSnocWithKey f k0 v0 ary0 = go k0 v0 ary0 0 (A.length ary0) -- These constructors make sure the value is in WHNF before it's -- inserted into the constructor. -leaf :: Hash -> k -> v -> HashMap k v +leaf :: Hash -> k -> v -> Tree k v leaf h k = \ !v -> Leaf h (L k v) {-# INLINE leaf #-} diff --git a/Data/HashSet/Internal.hs b/Data/HashSet/Internal.hs index da0713c3..a01f3fbe 100644 --- a/Data/HashSet/Internal.hs +++ b/Data/HashSet/Internal.hs @@ -327,7 +327,7 @@ null :: HashSet a -> Bool null = H.null . asMap {-# INLINE null #-} --- | \(O(n)\) Return the number of elements in this set. +-- | \(O(1)\) Return the number of elements in this set. -- -- >>> HashSet.size HashSet.empty -- 0 diff --git a/benchmarks/Benchmarks.hs b/benchmarks/Benchmarks.hs index ae05c422..f5e56b9d 100644 --- a/benchmarks/Benchmarks.hs +++ b/benchmarks/Benchmarks.hs @@ -20,8 +20,16 @@ import Test.Tasty.Bench (bench, bgroup, defaultMain, env, nf, whnf) import qualified Data.ByteString as BS import qualified "hashmap" Data.HashMap as IHM import qualified Data.HashMap.Strict as HM +import qualified "unordered-containers" Data.HashSet as HS import qualified Data.IntMap as IM +import Data.List (foldl') import qualified Data.Map as M +import Data.Maybe (fromMaybe) +import qualified Data.Set as S +import qualified Data.Vector as V +import GHC.Generics (Generic) +import Prelude hiding (lookup) + import qualified Util.ByteString as UBS import qualified Util.Int as UI import qualified Util.String as US @@ -37,6 +45,8 @@ instance NFData B where data Env = Env { n :: !Int, + csz :: !Int, -- container size + elems :: ![(String, Int)], keys :: ![String], elemsBS :: ![(BS.ByteString, Int)], @@ -49,6 +59,11 @@ data Env = Env { keysBS' :: ![BS.ByteString], keysI' :: ![Int], + listOfHMs :: ![HM.HashMap Int Int], + vecOfHMs :: !(V.Vector (HM.HashMap Int Int)), + hsetOfHMs :: !(HS.HashSet (HM.HashMap Int Int)), + setOfHMs :: !(S.Set (HM.HashMap Int Int)), + keysDup :: ![String], keysDupBS :: ![BS.ByteString], keysDupI :: ![Int], @@ -79,6 +94,20 @@ setupEnv :: IO Env setupEnv = do let n = 2^(12 :: Int) + -- When building a container of hashmaps, 'cn' will be the size of each. + cn = n `div` 16 + -- 'csz' is the size of the container of hashmaps. + csz = 2^(7 :: Int) + + values = [1..csz*cn] + + chop _ [] = [] + chop k l = + let (taken, left) = splitAt k l + in taken : chop k left + + vals = chop cn values + elems = zip keys [1..n] keys = US.rnd 8 n elemsBS = zip keysBS [1..n] @@ -91,6 +120,11 @@ setupEnv = do keysBS' = UBS.rnd' 8 n keysI' = UI.rnd' (n+n) n + listOfHMs = zipWith (\x y -> HM.fromList (zip x y)) (repeat keysI) vals + vecOfHMs = V.fromList listOfHMs + hsetOfHMs = HS.fromList listOfHMs + setOfHMs = S.fromList listOfHMs + keysDup = US.rnd 2 n keysDupBS = UBS.rnd 2 n keysDupI = UI.rnd (n`div`4) n @@ -128,8 +162,8 @@ main = do [ #ifdef BENCH_containers_Map env setupEnv $ \ ~(Env{..}) -> - -- * Comparison to other data structures - -- ** Map + -- Comparison to other data structures + -- Map bgroup "Map" [ bgroup "lookup" [ bench "String" $ whnf (lookupM keys) m @@ -231,7 +265,7 @@ main = do env setupEnv $ \ ~(Env{..}) -> bgroup "HashMap" - [ -- * Basic interface + [ -- Basic interface bgroup "lookup" [ bench "String" $ whnf (lookup keys) hm , bench "ByteString" $ whnf (lookup keysBS) hmbs @@ -313,6 +347,51 @@ main = do , bench "Int" $ whnf (isSubmapOfNaive hmiSubset) hmi ] + , bgroup "containerized" + [ bgroup "lookup" + [ bench "List" $ nf (lookupC keysI) listOfHMs + , bench "Vector" $ nf (lookupC keysI) vecOfHMs + , bench "HashSet" $ nf (lookupHS keysI) hsetOfHMs + , bench "Set" $ nf (lookupS keysI) setOfHMs + ] + , bgroup "insert" + [ bench "List" $ nf (insertC elemsI) listOfHMs + , bench "Vector" $ nf (insertC elemsI) vecOfHMs + , bench "HashSet" $ nf (insertHS elemsI) hsetOfHMs + , bench "Set" $ nf (insertS elemsI) setOfHMs + ] + , bgroup "delete" + [ bench "List" $ nf (deleteC keysI) listOfHMs + , bench "Vector" $ nf (deleteC keysI) vecOfHMs + , bench "HashSet" $ nf (deleteHS keysI) hsetOfHMs + , bench "Set" $ nf (deleteS keysI) setOfHMs + ] + , bgroup "union" + [ bench "List" $ whnf unionC listOfHMs + , bench "Vector" $ whnf unionC vecOfHMs + , bench "HashSet" $ whnf unionC hsetOfHMs + , bench "Set" $ whnf unionC setOfHMs + ] + , bgroup "map" + [ bench "List" $ nf (mapC (\ v -> v + 1)) listOfHMs + , bench "Vector" $ nf (mapC (\ v -> v + 1)) vecOfHMs + , bench "HashSet" $ nf (mapHS (\ v -> v + 1)) hsetOfHMs + , bench "Set" $ nf (mapS (\ v -> v + 1)) setOfHMs + ] + , bgroup "intersection" + [ bench "List" $ whnf intersectionC listOfHMs + , bench "Vector" $ whnf intersectionC vecOfHMs + , bench "HashSet" $ whnf intersectionC hsetOfHMs + , bench "Set" $ whnf intersectionC setOfHMs + ] + , bgroup "size" + [ bench "List" $ nf sizeC listOfHMs + , bench "Vector" $ nf sizeC vecOfHMs + , bench "HashSet" $ nf sizeHS hsetOfHMs + , bench "Set" $ nf sizeS setOfHMs + ] + ] + -- Combine , bgroup "union" [ bench "Int" $ whnf (HM.union hmi) hmi2 @@ -327,7 +406,7 @@ main = do -- Transformations , bench "map" $ whnf (HM.map (\ v -> v + 1)) hmi - -- * Difference and intersection + -- Difference and intersection , bench "difference" $ whnf (HM.difference hmi) hmi2 -- Folds @@ -389,6 +468,18 @@ lookup xs m = foldl' (\z k -> fromMaybe z (HM.lookup k m)) 0 xs {-# SPECIALIZE lookup :: [BS.ByteString] -> HM.HashMap BS.ByteString Int -> Int #-} +lookupC :: (Eq k, Hashable k, Traversable f) => [k] -> f (HM.HashMap k Int) -> f Int +lookupC = fmap . lookup +{-# SPECIALIZE lookupC :: [Int] -> [HM.HashMap Int Int] -> [Int] #-} +{-# SPECIALIZE lookupC :: [Int] -> V.Vector (HM.HashMap Int Int) + -> V.Vector Int #-} + +lookupHS :: [Int] -> HS.HashSet (HM.HashMap Int Int) -> HS.HashSet Int +lookupHS = HS.map . lookup + +lookupS :: [Int] -> S.Set (HM.HashMap Int Int) -> S.Set Int +lookupS = S.map . lookup + insert :: (Eq k, Hashable k) => [(k, Int)] -> HM.HashMap k Int -> HM.HashMap k Int insert xs m0 = foldl' (\m (k, v) -> HM.insert k v m) m0 xs @@ -399,6 +490,21 @@ insert xs m0 = foldl' (\m (k, v) -> HM.insert k v m) m0 xs {-# SPECIALIZE insert :: [(BS.ByteString, Int)] -> HM.HashMap BS.ByteString Int -> HM.HashMap BS.ByteString Int #-} +insertC :: (Eq k, Hashable k, Traversable f) => [(k, Int)] -> f (HM.HashMap k Int) + -> f (HM.HashMap k Int) +insertC l = fmap (insert l) +{-# SPECIALIZE insertC :: [(Int, Int)] -> [HM.HashMap Int Int] + -> [HM.HashMap Int Int] #-} +{-# SPECIALIZE insertC :: [(Int, Int)] -> V.Vector (HM.HashMap Int Int) + -> V.Vector (HM.HashMap Int Int) #-} + +insertHS :: [(Int, Int)] -> HS.HashSet (HM.HashMap Int Int) + -> HS.HashSet (HM.HashMap Int Int) +insertHS l = HS.map (insert l) + +insertS :: [(Int, Int)] -> S.Set (HM.HashMap Int Int) -> S.Set (HM.HashMap Int Int) +insertS l = S.map (insert l) + delete :: (Eq k, Hashable k) => [k] -> HM.HashMap k Int -> HM.HashMap k Int delete xs m0 = foldl' (\m k -> HM.delete k m) m0 xs {-# SPECIALIZE delete :: [Int] -> HM.HashMap Int Int -> HM.HashMap Int Int #-} @@ -407,6 +513,21 @@ delete xs m0 = foldl' (\m k -> HM.delete k m) m0 xs {-# SPECIALIZE delete :: [BS.ByteString] -> HM.HashMap BS.ByteString Int -> HM.HashMap BS.ByteString Int #-} +deleteC :: (Eq k, Hashable k, Functor f) => [k] -> f (HM.HashMap k Int) + -> f (HM.HashMap k Int) +deleteC = fmap . delete +{-# SPECIALIZE deleteC :: [Int] -> [HM.HashMap Int Int] + -> [HM.HashMap Int Int] #-} +{-# SPECIALIZE deleteC :: [Int] -> V.Vector (HM.HashMap Int Int) + -> V.Vector (HM.HashMap Int Int) #-} + +deleteHS :: [Int] -> HS.HashSet (HM.HashMap Int Int) + -> HS.HashSet (HM.HashMap Int Int) +deleteHS = HS.map . delete + +deleteS :: [Int] -> S.Set (HM.HashMap Int Int) -> S.Set (HM.HashMap Int Int) +deleteS = S.map . delete + alterInsert :: (Eq k, Hashable k) => [(k, Int)] -> HM.HashMap k Int -> HM.HashMap k Int alterInsert xs m0 = @@ -451,6 +572,52 @@ alterFDelete xs m0 = {-# SPECIALIZE alterFDelete :: [BS.ByteString] -> HM.HashMap BS.ByteString Int -> HM.HashMap BS.ByteString Int #-} +unionC :: (Eq k, Hashable k, Foldable f) => f (HM.HashMap k Int) + -> HM.HashMap k Int +unionC = foldl' HM.union mempty +{-# SPECIALIZE unionC :: [HM.HashMap Int Int] -> HM.HashMap Int Int #-} +{-# SPECIALIZE unionC :: V.Vector (HM.HashMap Int Int) -> HM.HashMap Int Int #-} +{-# SPECIALIZE unionC :: HS.HashSet (HM.HashMap Int Int) -> HM.HashMap Int Int #-} +{-# SPECIALIZE unionC :: S.Set (HM.HashMap Int Int) -> HM.HashMap Int Int #-} + +mapC :: (Eq k, Hashable k, Functor f) => (Int -> Int) -> f (HM.HashMap k Int) + -> f (HM.HashMap k Int) +mapC f = fmap (HM.map f) +{-# SPECIALIZE mapC :: (Int -> Int) -> [HM.HashMap Int Int] + -> [HM.HashMap Int Int] #-} +{-# SPECIALIZE mapC :: (Int -> Int) -> V.Vector (HM.HashMap Int Int) + -> V.Vector (HM.HashMap Int Int) #-} + +mapHS :: (Int -> Int) -> HS.HashSet (HM.HashMap Int Int) + -> HS.HashSet (HM.HashMap Int Int) +mapHS f = HS.map (HM.map f) + +mapS :: (Int -> Int) -> S.Set (HM.HashMap Int Int) -> S.Set (HM.HashMap Int Int) +mapS f = S.map (HM.map f) + +intersectionC :: (Eq k, Hashable k, Foldable f) => f (HM.HashMap k Int) + -> HM.HashMap k Int +intersectionC = foldl' HM.intersection mempty +{-# SPECIALIZE intersectionC :: [HM.HashMap Int Int] + -> HM.HashMap Int Int #-} +{-# SPECIALIZE intersectionC :: V.Vector (HM.HashMap Int Int) + -> HM.HashMap Int Int #-} +{-# SPECIALIZE intersectionC :: HS.HashSet (HM.HashMap Int Int) + -> HM.HashMap Int Int #-} +{-# SPECIALIZE intersectionC :: S.Set (HM.HashMap Int Int) + -> HM.HashMap Int Int #-} + +sizeC :: (Eq k, Hashable k, Functor f) => f (HM.HashMap k Int) -> f Int +sizeC = fmap HM.size +{-# SPECIALIZE sizeC :: [HM.HashMap Int Int] -> [Int] #-} +{-# SPECIALIZE sizeC :: V.Vector (HM.HashMap Int Int) -> V.Vector Int #-} + +sizeHS :: HS.HashSet (HM.HashMap Int Int) -> HS.HashSet Int +sizeHS = HS.map HM.size + +sizeS :: S.Set (HM.HashMap Int Int) -> S.Set Int +sizeS = S.map HM.size + isSubmapOfNaive :: (Eq k, Hashable k) => HM.HashMap k Int -> HM.HashMap k Int -> Bool isSubmapOfNaive m1 m2 = and [ Just v1 == HM.lookup k1 m2 | (k1,v1) <- HM.toList m1 ] {-# SPECIALIZE isSubmapOfNaive :: HM.HashMap Int Int -> HM.HashMap Int Int -> Bool #-} diff --git a/tests/Properties.hs b/tests/Properties.hs index 01acc420..e07ca736 100644 --- a/tests/Properties.hs +++ b/tests/Properties.hs @@ -6,6 +6,7 @@ import qualified Properties.HashMapLazy import qualified Properties.HashMapStrict import qualified Properties.HashSet import qualified Properties.List +import qualified Properties.Size tests :: TestTree tests = testGroup "Properties" @@ -13,4 +14,5 @@ tests = testGroup "Properties" , Properties.HashMapStrict.tests , Properties.HashSet.tests , Properties.List.tests + , Properties.Size.tests ] diff --git a/tests/Properties/Size.hs b/tests/Properties/Size.hs new file mode 100644 index 00000000..3b6cd028 --- /dev/null +++ b/tests/Properties/Size.hs @@ -0,0 +1,234 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE RecordWildCards #-} + +{-# OPTIONS_GHC -fno-warn-orphans #-} -- because of Arbitrary (HashMap k v) + +-- | Tests for size field invariant in @HashMap@ wrapper introduced in GitHub +-- PR #170. + +module Properties.Size (tests) where + +import Data.Maybe (isJust, isNothing) +import Data.Hashable (Hashable) +#if defined(STRICT) +import Data.HashMap.Strict (HashMap) +import qualified Data.HashMap.Strict as HM +#else +import Data.HashMap.Lazy (HashMap) +import qualified Data.HashMap.Lazy as HM +#endif +import qualified Data.Map as M + +import GHC.Generics (Generic) + +import Test.QuickCheck (Arbitrary (..), Property, conjoin, frequency, (===), + genericShrink) +import Test.Tasty (TestTree, testGroup) +import Test.Tasty.QuickCheck (testProperty) +import Util.Key (Key (..), keyToInt) + +instance (Eq k, Hashable k, Arbitrary k, Arbitrary v) => Arbitrary (HashMap k v) where + arbitrary = HM.fromList <$> arbitrary + shrink = fmap HM.fromList . shrink . HM.toList + +-- | Property to check that the hashmap built by @fromList@ applied to a list +-- without repeating keys will have the right size i.e. equal to the list's +-- length. +fromListProperty :: M.Map Key Int -> Bool +fromListProperty m = + let sz = M.size m + list = M.toList m + hm = HM.fromList list + in sz == HM.size hm + +-- | Property to check that the hashmap built by @fromListWith@ applied to a +--list without repeating keys will have the right size i.e. equal to the list's +-- length. +fromListWithProperty :: M.Map Key Int -> Bool +fromListWithProperty m = + let sz = M.size m + list = M.toList m + hm = HM.fromListWith (+) list + in sz == HM.size hm + +{- Note on @HashMapAction@ datatype +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some actions correspond to functions from @Data.HashMap.Base@ that require +function arguments i.e. @insertWith@ requires a @v -> v -> v@ argument. +However, function values do not have a @Show@ instance, which is undesirable because if QuickCheck fails it'll print the values for which a certain test failed. +To get around this, simple functions like @(+)@ are used instead. + +Furthermore, when functions have a @Bool@ or a @Maybe v@ argument and/or +result value, simple predicates like @even/odd@ are used to "mimic" such +functions. An example: @mapMaybe@ has an argument @f::(a -> Maybe b)@, but in +these tests all hashmaps are instantiated as @HashMap Key Int@, so no parameter +is passed to @MapMaybe@ in @HashMapAction@ and @f = \v -> if odd v then Just +(succ v) else Nothing@ is used instead. +-} + +-- Datatype representing the actions that can potentially change a hashmap's +-- size. +data HashMapAction + = Insert Key Int + | InsertWith Key Int + | Adjust Key + | Update (Maybe Int) Key + | Alter (Maybe Int) Key + | Delete Key + | Union (HM.HashMap Key Int) + | UnionWith (HM.HashMap Key Int) + | UnionWithKey (HM.HashMap Key Int) + | Intersection (HM.HashMap Key Int) + | IntersectionWith (HM.HashMap Key Int) + | IntersectionWithKey (HM.HashMap Key Int) + | Difference (HM.HashMap Key Int) + | DifferenceWith (HM.HashMap Key Int) + | Filter + | FilterWithKey + | Map + | MapMaybe + | MapMaybeWithKey + deriving (Eq, Show, Generic) + +-- Here, higher weights are used for operations that increase the size of the +-- hashmap so that its size is more likely to grow instead of nearing and +-- staying 0, creating more interesting sequences of actions to be tested. +instance Arbitrary HashMapAction where + arbitrary = frequency + [ (4, Insert <$> arbitrary <*> arbitrary) + , (4, InsertWith <$> arbitrary <*> arbitrary) + , (4, Union <$> arbitrary) + , (4, UnionWith <$> arbitrary) + , (4, UnionWithKey <$> arbitrary) + , (1, Adjust <$> arbitrary) + , (1, Update <$> arbitrary <*> arbitrary) + , (1, Alter <$> arbitrary <*> arbitrary) + , (1, Delete <$> arbitrary) + , (1, Intersection <$> arbitrary) + , (1, IntersectionWith <$> arbitrary) + , (1, IntersectionWithKey <$> arbitrary) + , (1, Difference <$> arbitrary) + , (1, DifferenceWith <$> arbitrary) + , (1, pure Filter) + , (1, pure FilterWithKey) + , (1, pure Map) + , (1, pure MapMaybe) + , (1, pure MapMaybeWithKey) + ] + shrink = genericShrink + +-- Simple way of representing a hashmap and its size without having to +-- use @size@, which is the function to be tested. As such, its use is +-- avoided and the @Int@ field of the tuple is used instead. +data HashMapState = HashMapState + { sz :: Int -- ^ The size of the @hm@ hashmap, also in this + -- datatype, obtained without using @size@. + , hm :: HM.HashMap Key Int -- ^ The hashmap resultant of every + -- @HashMapAction@ from the start of the test. + } deriving (Show, Eq) + +-- | Applies a @HashMapAction@ to @HashMapState@, updating the hashmap's +-- size after the operation. +applyActionToState :: HashMapState -> HashMapAction -> HashMapState +applyActionToState HashMapState {..} (Insert k v) + | HM.member k hm = HashMapState sz hm' + | otherwise = HashMapState (sz + 1) hm' + where + hm' = HM.insert k v hm +applyActionToState HashMapState {..} (InsertWith k v) + | HM.member k hm = HashMapState sz hm' + | otherwise = HashMapState (sz + 1) hm' + where + hm' = HM.insertWith (+) k v hm +applyActionToState HashMapState {..} (Adjust k) = HashMapState sz (HM.adjust succ k hm) +applyActionToState HashMapState {..} (Update mk k) + | HM.member k hm && isNothing mk = HashMapState (sz - 1) hm' + | otherwise = HashMapState sz hm' + where + hm' = HM.update (const mk) k hm +applyActionToState HashMapState {..} (Alter mv k) = + case (HM.member k hm, mv) of + (True, Just _) -> HashMapState sz hm' + (True, Nothing) -> HashMapState (sz - 1) hm' + (False, Just _) -> HashMapState (sz + 1) hm' + (False, Nothing) -> HashMapState sz hm' + where + func = const mv + hm' = HM.alter func k hm +applyActionToState HashMapState {..} (Delete k) + | HM.member k hm = HashMapState (sz - 1) hm' + | otherwise = HashMapState sz hm' + where + hm' = HM.delete k hm +applyActionToState HashMapState {..} (Union hm') = + let sz' = length $ HM.toList hm' + lenIntersect = length [ k | k <- HM.keys hm, HM.member k hm' ] + newLen = sz + sz' - lenIntersect + in HashMapState newLen (HM.union hm hm') +applyActionToState HashMapState {..} (UnionWith hm') = + let sz' = length $ HM.toList hm' + lenIntersect = length [ k | k <- HM.keys hm, HM.member k hm' ] + newLen = sz + sz' - lenIntersect + in HashMapState newLen (HM.unionWith (+) hm hm') +applyActionToState HashMapState {..} (UnionWithKey hm') = + let sz' = length $ HM.toList hm' + lenIntersect = length [ k | k <- HM.keys hm, HM.member k hm' ] + newLen = sz + sz' - lenIntersect + fun k v1 v2 = keyToInt k + v1 + v2 + in HashMapState newLen (HM.unionWithKey fun hm hm') +applyActionToState HashMapState {..} (Intersection hm') = + let lenIntersect = length [ k | k <- HM.keys hm, HM.member k hm' ] + in HashMapState lenIntersect (HM.intersection hm hm') +applyActionToState HashMapState {..} (IntersectionWith hm') = + let lenIntersect = length [ k | k <- HM.keys hm, HM.member k hm' ] + in HashMapState lenIntersect (HM.intersectionWith (+) hm hm') +applyActionToState HashMapState {..} (IntersectionWithKey hm') = + let lenIntersect = length [ k | k <- HM.keys hm, HM.member k hm' ] + fun k v1 v2 = keyToInt k + v1 + v2 + in HashMapState lenIntersect (HM.intersectionWithKey fun hm hm') +applyActionToState HashMapState {..} (Difference hm') = + let lenDiff = length [ k | k <- HM.keys hm, not $ HM.member k hm' ] + in HashMapState lenDiff (HM.difference hm hm') +applyActionToState HashMapState {..} (DifferenceWith hm') = + let fun v w = if odd v then Just (v + w) else Nothing + lenDiff = length [ k | (k, v) <- HM.toList hm, not $ HM.member k hm' && even v] + in HashMapState lenDiff (HM.differenceWith fun hm hm') +applyActionToState HashMapState {..} Filter = + let lenFilter = length [ (k, v) | (k, v) <- HM.toList hm, even v ] + in HashMapState lenFilter (HM.filter even hm) +applyActionToState HashMapState {..} FilterWithKey = + let lenFilter = length [ (k, v) | (k, v) <- HM.toList hm, even $ keyToInt k + v ] + in HashMapState lenFilter (HM.filterWithKey (\k -> even . (+) (keyToInt k)) hm) +applyActionToState HashMapState {..} Map = HashMapState sz (HM.map succ hm) +applyActionToState HashMapState {..} MapMaybe = + let mapFun v = if odd v then Just (succ v) else Nothing + lenMapMaybe = length [ (k, v) | (k, v) <- HM.toList hm, isJust (mapFun v)] + in HashMapState lenMapMaybe (HM.mapMaybe mapFun hm) +applyActionToState HashMapState {..} MapMaybeWithKey = + let mapFun k v = if odd v then Just (keyToInt k + succ v) else Nothing + lenMapMaybe = length [ (k, v) | (k, v) <- HM.toList hm, isJust (mapFun k v)] + in HashMapState lenMapMaybe (HM.mapMaybeWithKey mapFun hm) + +-- | Property to check that after each operation that may change a hashmap's +-- size, the @Int@ field in the @HashMap@ wrapper always correctly represents +-- the hashmap's size. +sizeInvariantProperty :: [HashMapAction] -> Property +sizeInvariantProperty actionList = + conjoin . + map (\HashMapState {..} -> sz === HM.size hm) . + scanl applyActionToState (HashMapState 0 mempty) $ actionList + +------------------------------------------------------------------------ +-- * Test list + +tests :: TestTree +tests = testGroup "Data.HashMap.size" [ + testGroup "size invariant checks" + [ testProperty "size" sizeInvariantProperty + , testProperty "fromList" fromListProperty + , testProperty "fromListWith" fromListWithProperty + ] + ] \ No newline at end of file diff --git a/tests/Util/Key.hs b/tests/Util/Key.hs index a3d1476b..e89a82d1 100644 --- a/tests/Util/Key.hs +++ b/tests/Util/Key.hs @@ -2,7 +2,7 @@ {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE TypeApplications #-} -module Util.Key (Key(..), keyToInt, incKey, collisionAtHash) where +module Util.Key (Key(..), SmallSum (..), keyToInt, incKey, collisionAtHash) where import Data.Bits (bit, (.&.)) import Data.Hashable (Hashable (hashWithSalt)) diff --git a/unordered-containers.cabal b/unordered-containers.cabal index d16adc73..20fc43b9 100644 --- a/unordered-containers.cabal +++ b/unordered-containers.cabal @@ -90,6 +90,7 @@ test-suite unordered-containers-tests Properties.HashMapStrict Properties.HashSet Properties.List + Properties.Size Strictness Util.Key @@ -130,6 +131,7 @@ benchmark benchmarks containers, deepseq, hashable, + vector, hashmap, mtl, random, diff --git a/utils/Stats.hs b/utils/Stats.hs index 7278ecc3..46ed9102 100644 --- a/utils/Stats.hs +++ b/utils/Stats.hs @@ -30,7 +30,7 @@ instance Monoid Histogram where mempty = H 0 0 0 0 0 -- | Count the number of node types at each level -nodeHistogram :: HM.HashMap k v -> [Histogram] +nodeHistogram :: HM.Tree k v -> [Histogram] nodeHistogram Empty = [mempty { empty = 1 }] nodeHistogram (Leaf {}) = [mempty { leaf = 1 }] nodeHistogram (BitmapIndexed _ ary) =