From 028e187120c2cd6ed9f74d67a03bec1431f74be3 Mon Sep 17 00:00:00 2001
From: Soumik Sarkar <soumiksarkar.3120@gmail.com>
Date: Wed, 11 Sep 2024 20:14:06 +0530
Subject: [PATCH] More efficient Eq, Ord for Seq (#1035)

* Add benchmarks
* Keep the list based implementation, for now, but define the list
  comparisons ourself to avoid base's performance issues.
  On Seq Int and with GHC 9.6.3, benchmark times improve by ~40%.
---
 containers-tests/benchmarks/Sequence.hs  | 12 +++++-
 containers/src/Data/Sequence/Internal.hs | 48 ++++++++++++++++++++++--
 2 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/containers-tests/benchmarks/Sequence.hs b/containers-tests/benchmarks/Sequence.hs
index feea81ae2..5355a21e7 100644
--- a/containers-tests/benchmarks/Sequence.hs
+++ b/containers-tests/benchmarks/Sequence.hs
@@ -4,7 +4,7 @@ import Control.Applicative
 import Control.DeepSeq (rnf)
 import Control.Exception (evaluate)
 import Control.Monad.Trans.State.Strict
-import Test.Tasty.Bench (bench, bgroup, defaultMain, nf)
+import Test.Tasty.Bench (bench, bgroup, defaultMain, nf, whnf)
 import Data.Foldable (foldl', foldr')
 import qualified Data.Sequence as S
 import qualified Data.Foldable
@@ -174,6 +174,16 @@ main = do
             , bench "1000"  $ nf (S.unstableSortOn id) rs1000
             , bench "10000" $ nf (S.unstableSortOn id) rs10000]
          ]
+      , bgroup "eq"
+        [ bench "100/100" $ whnf (\s' -> s' == s') s100
+        , bench "10000/10000" $  whnf (\s' -> s' == s') s10000
+        ]
+      , bgroup "compare"
+        [ bench "100/100" $ whnf (uncurry compare) (s100, s100)
+        , bench "10000/10000" $  whnf (uncurry compare) (s10000, s10000)
+        , bench "100/10000" $  whnf (uncurry compare) (s100, s10000)
+        , bench "10000/100" $  whnf (uncurry compare) (s10000, s100)
+        ]
       ]
 
 {-
diff --git a/containers/src/Data/Sequence/Internal.hs b/containers/src/Data/Sequence/Internal.hs
index 2f9266ff1..d3222d869 100644
--- a/containers/src/Data/Sequence/Internal.hs
+++ b/containers/src/Data/Sequence/Internal.hs
@@ -908,10 +908,12 @@ instance Alternative Seq where
     (<|>) = (><)
 
 instance Eq a => Eq (Seq a) where
-    xs == ys = length xs == length ys && toList xs == toList ys
+  xs == ys = liftEq (==) xs ys
+  {-# INLINABLE (==) #-}
 
 instance Ord a => Ord (Seq a) where
-    compare xs ys = compare (toList xs) (toList ys)
+  compare xs ys = liftCompare compare xs ys
+  {-# INLINABLE compare #-}
 
 #ifdef TESTING
 instance Show a => Show (Seq a) where
@@ -929,11 +931,49 @@ instance Show1 Seq where
 
 -- | @since 0.5.9
 instance Eq1 Seq where
-    liftEq eq xs ys = length xs == length ys && liftEq eq (toList xs) (toList ys)
+  liftEq eq xs ys =
+    sameSize xs ys && sameSizeLiftEqLists eq (toList xs) (toList ys)
+  {-# INLINE liftEq #-}
 
 -- | @since 0.5.9
 instance Ord1 Seq where
-    liftCompare cmp xs ys = liftCompare cmp (toList xs) (toList ys)
+  liftCompare f xs ys = liftCmpLists f (toList xs) (toList ys)
+  {-# INLINE liftCompare #-}
+
+-- Note [Eq and Ord]
+-- ~~~~~~~~~~~~~~~~~
+-- Eq and Ord for Seq are implemented by converting to lists, which turns out
+-- to be quite efficient.
+-- However, we define our own functions to work with lists because the relevant
+-- list functions in base have performance issues (liftEq and liftCompare are
+-- recursive and cannot inline, (==) and compare are not INLINABLE and cannot
+-- specialize).
+
+-- Same as `length xs == length ys` but uses the structure invariants to skip
+-- unnecessary cases.
+sameSize :: Seq a -> Seq b -> Bool
+sameSize (Seq t1) (Seq t2) = case (t1, t2) of
+  (EmptyT, EmptyT) -> True
+  (Single _, Single _) -> True
+  (Deep v1 _ _ _, Deep v2 _ _ _) -> v1 == v2
+  _ -> False
+
+-- Assumes the lists are of equal size to skip some cases.
+sameSizeLiftEqLists :: (a -> b -> Bool) -> [a] -> [b] -> Bool
+sameSizeLiftEqLists eq = go
+  where
+    go (x:xs) (y:ys) = eq x y && go xs ys
+    go _ _ = True
+{-# INLINE sameSizeLiftEqLists #-}
+
+liftCmpLists :: (a -> b -> Ordering) -> [a] -> [b] -> Ordering
+liftCmpLists cmp = go
+  where
+    go [] [] = EQ
+    go [] (_:_) = LT
+    go (_:_) [] = GT
+    go (x:xs) (y:ys) = cmp x y <> go xs ys
+{-# INLINE liftCmpLists #-}
 
 instance Read a => Read (Seq a) where
 #ifdef __GLASGOW_HASKELL__