From 028e187120c2cd6ed9f74d67a03bec1431f74be3 Mon Sep 17 00:00:00 2001 From: Soumik Sarkar Date: Wed, 11 Sep 2024 20:14:06 +0530 Subject: [PATCH] More efficient Eq, Ord for Seq (#1035) * Add benchmarks * Keep the list based implementation, for now, but define the list comparisons ourself to avoid base's performance issues. On Seq Int and with GHC 9.6.3, benchmark times improve by ~40%. --- containers-tests/benchmarks/Sequence.hs | 12 +++++- containers/src/Data/Sequence/Internal.hs | 48 ++++++++++++++++++++++-- 2 files changed, 55 insertions(+), 5 deletions(-) diff --git a/containers-tests/benchmarks/Sequence.hs b/containers-tests/benchmarks/Sequence.hs index feea81ae2..5355a21e7 100644 --- a/containers-tests/benchmarks/Sequence.hs +++ b/containers-tests/benchmarks/Sequence.hs @@ -4,7 +4,7 @@ import Control.Applicative import Control.DeepSeq (rnf) import Control.Exception (evaluate) import Control.Monad.Trans.State.Strict -import Test.Tasty.Bench (bench, bgroup, defaultMain, nf) +import Test.Tasty.Bench (bench, bgroup, defaultMain, nf, whnf) import Data.Foldable (foldl', foldr') import qualified Data.Sequence as S import qualified Data.Foldable @@ -174,6 +174,16 @@ main = do , bench "1000" $ nf (S.unstableSortOn id) rs1000 , bench "10000" $ nf (S.unstableSortOn id) rs10000] ] + , bgroup "eq" + [ bench "100/100" $ whnf (\s' -> s' == s') s100 + , bench "10000/10000" $ whnf (\s' -> s' == s') s10000 + ] + , bgroup "compare" + [ bench "100/100" $ whnf (uncurry compare) (s100, s100) + , bench "10000/10000" $ whnf (uncurry compare) (s10000, s10000) + , bench "100/10000" $ whnf (uncurry compare) (s100, s10000) + , bench "10000/100" $ whnf (uncurry compare) (s10000, s100) + ] ] {- diff --git a/containers/src/Data/Sequence/Internal.hs b/containers/src/Data/Sequence/Internal.hs index 2f9266ff1..d3222d869 100644 --- a/containers/src/Data/Sequence/Internal.hs +++ b/containers/src/Data/Sequence/Internal.hs @@ -908,10 +908,12 @@ instance Alternative Seq where (<|>) = (><) instance Eq a => Eq (Seq a) where - xs == ys = length xs == length ys && toList xs == toList ys + xs == ys = liftEq (==) xs ys + {-# INLINABLE (==) #-} instance Ord a => Ord (Seq a) where - compare xs ys = compare (toList xs) (toList ys) + compare xs ys = liftCompare compare xs ys + {-# INLINABLE compare #-} #ifdef TESTING instance Show a => Show (Seq a) where @@ -929,11 +931,49 @@ instance Show1 Seq where -- | @since 0.5.9 instance Eq1 Seq where - liftEq eq xs ys = length xs == length ys && liftEq eq (toList xs) (toList ys) + liftEq eq xs ys = + sameSize xs ys && sameSizeLiftEqLists eq (toList xs) (toList ys) + {-# INLINE liftEq #-} -- | @since 0.5.9 instance Ord1 Seq where - liftCompare cmp xs ys = liftCompare cmp (toList xs) (toList ys) + liftCompare f xs ys = liftCmpLists f (toList xs) (toList ys) + {-# INLINE liftCompare #-} + +-- Note [Eq and Ord] +-- ~~~~~~~~~~~~~~~~~ +-- Eq and Ord for Seq are implemented by converting to lists, which turns out +-- to be quite efficient. +-- However, we define our own functions to work with lists because the relevant +-- list functions in base have performance issues (liftEq and liftCompare are +-- recursive and cannot inline, (==) and compare are not INLINABLE and cannot +-- specialize). + +-- Same as `length xs == length ys` but uses the structure invariants to skip +-- unnecessary cases. +sameSize :: Seq a -> Seq b -> Bool +sameSize (Seq t1) (Seq t2) = case (t1, t2) of + (EmptyT, EmptyT) -> True + (Single _, Single _) -> True + (Deep v1 _ _ _, Deep v2 _ _ _) -> v1 == v2 + _ -> False + +-- Assumes the lists are of equal size to skip some cases. +sameSizeLiftEqLists :: (a -> b -> Bool) -> [a] -> [b] -> Bool +sameSizeLiftEqLists eq = go + where + go (x:xs) (y:ys) = eq x y && go xs ys + go _ _ = True +{-# INLINE sameSizeLiftEqLists #-} + +liftCmpLists :: (a -> b -> Ordering) -> [a] -> [b] -> Ordering +liftCmpLists cmp = go + where + go [] [] = EQ + go [] (_:_) = LT + go (_:_) [] = GT + go (x:xs) (y:ys) = cmp x y <> go xs ys +{-# INLINE liftCmpLists #-} instance Read a => Read (Seq a) where #ifdef __GLASGOW_HASKELL__