Skip to content

Make size run in O(1) #170

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
869 changes: 545 additions & 324 deletions Data/HashMap/Internal.hs

Large diffs are not rendered by default.

60 changes: 53 additions & 7 deletions Data/HashMap/Internal/Array.hs
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE Rank2Types #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskellQuotes #-}
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveLift #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE Rank2Types #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskellQuotes #-}
{-# LANGUAGE UnboxedTuples #-}
{-# OPTIONS_GHC -fno-full-laziness -funbox-strict-fields #-}
{-# OPTIONS_HADDOCK not-home #-}

Expand All @@ -28,6 +30,10 @@
module Data.HashMap.Internal.Array
( Array(..)
, MArray(..)
, RunResA (..)
, RunResM (..)
, Size (..)
, Sized (..)

-- * Creation
, new
Expand All @@ -47,6 +53,7 @@ module Data.HashMap.Internal.Array
, index#
, update
, updateWith'
, updateWithInternal'
, unsafeUpdateM
, insert
, insertM
Expand All @@ -57,6 +64,8 @@ module Data.HashMap.Internal.Array
, unsafeThaw
, unsafeSameArray
, run
, runInternal
, run2
, copy
, copyM
, cloneM
Expand Down Expand Up @@ -288,10 +297,27 @@ unsafeThaw ary
(# s', mary #) -> (# s', MArray mary #)
{-# INLINE unsafeThaw #-}

data RunResA e = RunResA !Size !(Array e)

data RunResM s e = RunResM !Size !(MArray s e)

run :: (forall s . ST s (MArray s e)) -> Array e
run act = runST $ act >>= unsafeFreeze
{-# INLINE run #-}

runInternal :: (forall s . ST s (RunResM s e)) -> RunResA e
runInternal act = runST $ do
RunResM s mary <- act
ary <- unsafeFreeze mary
return (RunResA s ary)
{-# INLINE runInternal #-}

run2 :: (forall s. ST s (MArray s e, a)) -> (Array e, a)
run2 k = runST (do
(marr,b) <- k
arr <- unsafeFreeze marr
return (arr,b))

-- | Unsafely copy the elements of an array. Array bounds are not checked.
copy :: Array e -> Int -> MArray s e -> Int -> Int -> ST s ()
copy !src !_sidx@(I# sidx#) !dst !_didx@(I# didx#) _n@(I# n#) =
Expand Down Expand Up @@ -360,6 +386,26 @@ updateWith' ary idx f
= update ary idx $! f x
{-# INLINE updateWith' #-}

-- | This newtype wrapper is to avoid confusion when local functions
-- take more than one paramenter of 'Int' type (see 'go' in
-- 'Data.HashMap.Base.unionWithKeyInternal').
newtype Size = Size { unSize :: Int }
deriving (Eq, Ord, Num, Integral, Enum, Real, NFData, TH.Lift)

-- | Helper datatype used in 'updateWithInternal''. Used when a change in
-- a value's size must be returned along with the value itself (typically
-- a hashmap).
data Sized a = Sized {-# UNPACK #-} !Size !a

-- | /O(n)/ Update the element at the given position in this array, by
-- applying a function to it. Evaluates the element to WHNF before
-- inserting it into the array.
updateWithInternal' :: Array e -> Int -> (e -> Sized e) -> RunResA e
updateWithInternal' ary idx f =
let Sized sz e = f (index ary idx)
in RunResA sz (update ary idx e)
{-# INLINE updateWithInternal' #-}

-- | \(O(1)\) Update the element at the given position in this array,
-- without copying.
unsafeUpdateM :: Array e -> Int -> e -> ST s ()
Expand Down
102 changes: 54 additions & 48 deletions Data/HashMap/Internal/Debug.hs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ module Data.HashMap.Internal.Debug
import Data.Bits (complement, countTrailingZeros, popCount, shiftL,
unsafeShiftL, (.&.), (.|.))
import Data.Hashable (Hashable)
import Data.HashMap.Internal (Bitmap, Hash, HashMap (..), Leaf (..),
import Data.HashMap.Internal (Bitmap, Hash, HashMap (..), Leaf (..), Tree (..),
bitsPerSubkey, fullBitmap, hash,
isLeafOrCollision, maxChildren, sparseIndex)
import Data.Semigroup (Sum (..))
Expand Down Expand Up @@ -65,6 +65,7 @@ data Error k
| INV8_bad_Full_size !Int
| INV9_Collision_size !Int
| INV10_Collision_duplicate_key k !Hash
| INV11_Negative_HM_Size !Int
deriving (Eq, Show)

-- TODO: Name this 'Index'?!
Expand Down Expand Up @@ -95,55 +96,60 @@ hashMatchesSubHashPath (SubHashPath ph l) h = maskToLength h l == ph
maskToLength h' l' = h' .&. complement (complement 0 `shiftL` l')

valid :: Hashable k => HashMap k v -> Validity k
valid Empty = Valid
valid t = validInternal initialSubHashPath t
valid (HashMap sz hm) = if sz >= 0
then valid' hm
else Invalid (INV11_Negative_HM_Size $ A.unSize sz) initialSubHashPath
where
validInternal p Empty = Invalid INV1_internal_Empty p
validInternal p (Leaf h l) = validHash p h <> validLeaf p h l
validInternal p (Collision h ary) = validHash p h <> validCollision p h ary
validInternal p (BitmapIndexed b ary) = validBitmapIndexed p b ary
validInternal p (Full ary) = validFull p ary
valid' :: Hashable k => Tree k v -> Validity k
valid' Empty = Valid
valid' t = validInternal initialSubHashPath t
where
validInternal p Empty = Invalid INV1_internal_Empty p
validInternal p (Leaf h l) = validHash p h <> validLeaf p h l
validInternal p (Collision h ary) = validHash p h <> validCollision p h ary
validInternal p (BitmapIndexed b ary) = validBitmapIndexed p b ary
validInternal p (Full ary) = validFull p ary

validHash p h | hashMatchesSubHashPath p h = Valid
| otherwise = Invalid (INV6_misplaced_hash h) p
validHash p h | hashMatchesSubHashPath p h = Valid
| otherwise = Invalid (INV6_misplaced_hash h) p

validLeaf p h (L k _) | hash k == h = Valid
| otherwise = Invalid (INV7_key_hash_mismatch k h) p
validLeaf p h (L k _) | hash k == h = Valid
| otherwise = Invalid (INV7_key_hash_mismatch k h) p

validCollision p h ary = validCollisionSize <> A.foldMap (validLeaf p h) ary <> distinctKeys
where
n = A.length ary
validCollisionSize | n < 2 = Invalid (INV9_Collision_size n) p
| otherwise = Valid
distinctKeys = A.foldMap (\(L k _) -> appearsOnce k) ary
appearsOnce k | A.foldMap (\(L k' _) -> if k' == k then Sum @Int 1 else Sum 0) ary == 1 = Valid
| otherwise = Invalid (INV10_Collision_duplicate_key k h) p

validBitmapIndexed p b ary = validBitmap <> validArraySize <> validSubTrees p b ary
where
validBitmap | b .&. complement fullBitmap == 0 = Valid
| otherwise = Invalid (INV2_Bitmap_unexpected_1_bits b) p
n = A.length ary
validArraySize | n < 1 || n >= maxChildren = Invalid (INV3_bad_BitmapIndexed_size n) p
| popCount b == n = Valid
| otherwise = Invalid (INV4_bitmap_array_size_mismatch b n) p

validSubTrees p b ary
| A.length ary == 1
, isLeafOrCollision (A.index ary 0)
= Invalid INV5_BitmapIndexed_invalid_single_subtree p
| otherwise = go b
where
go 0 = Valid
go b' = validInternal (addSubHash p (fromIntegral c)) (A.index ary i) <> go b''
validCollision p h ary = validCollisionSize <> A.foldMap (validLeaf p h) ary <> distinctKeys
where
c = countTrailingZeros b'
m = 1 `unsafeShiftL` c
i = sparseIndex b m
b'' = b' .&. complement m

validFull p ary = validArraySize <> validSubTrees p fullBitmap ary
where
n = A.length ary
validArraySize | n == maxChildren = Valid
| otherwise = Invalid (INV8_bad_Full_size n) p
n = A.length ary
validCollisionSize | n < 2 = Invalid (INV9_Collision_size n) p
| otherwise = Valid
distinctKeys = A.foldMap (\(L k _) -> appearsOnce k) ary
appearsOnce k | A.foldMap (\(L k' _) -> if k' == k then Sum @Int 1 else Sum 0) ary == 1 = Valid
| otherwise = Invalid (INV10_Collision_duplicate_key k h) p

validBitmapIndexed p b ary = validBitmap <> validArraySize <> validSubTrees p b ary
where
validBitmap | b .&. complement fullBitmap == 0 = Valid
| otherwise = Invalid (INV2_Bitmap_unexpected_1_bits b) p
n = A.length ary
validArraySize | n < 1 || n >= maxChildren = Invalid (INV3_bad_BitmapIndexed_size n) p
| popCount b == n = Valid
| otherwise = Invalid (INV4_bitmap_array_size_mismatch b n) p

validSubTrees p b ary
| A.length ary == 1
, isLeafOrCollision (A.index ary 0)
= Invalid INV5_BitmapIndexed_invalid_single_subtree p
| otherwise = go b
where
go 0 = Valid
go b' = validInternal (addSubHash p (fromIntegral c)) (A.index ary i) <> go b''
where
c = countTrailingZeros b'
m = 1 `unsafeShiftL` c
i = sparseIndex b m
b'' = b' .&. complement m

validFull p ary = validArraySize <> validSubTrees p fullBitmap ary
where
n = A.length ary
validArraySize | n == maxChildren = Valid
| otherwise = Invalid (INV8_bad_Full_size n) p
Loading