diff --git a/benchmarks/haskell/Benchmarks/Pure.hs b/benchmarks/haskell/Benchmarks/Pure.hs index cec8a2d1..b411c17d 100644 --- a/benchmarks/haskell/Benchmarks/Pure.hs +++ b/benchmarks/haskell/Benchmarks/Pure.hs @@ -208,6 +208,14 @@ benchmark kind ~Env{..} = [ benchT $ nf (T.zipWith min tb) ta , benchTL $ nf (TL.zipWith min tlb) tla ] + , bgroup "length . unpack" -- length should fuse with unpack + [ benchT $ nf (L.length . T.unpack) ta + , benchTL $ nf (L.length . TL.unpack) tla + ] + , bgroup "length . drop 1 . unpack" -- no list fusion because of drop 1 + [ benchT $ nf (L.length . L.drop 1 . T.unpack) ta + , benchTL $ nf (L.length . L.drop 1 . TL.unpack) tla + ] , bgroup "length" [ bgroup "cons" [ benchT $ nf (T.length . T.cons c) ta diff --git a/src/Data/Text/Lazy.hs b/src/Data/Text/Lazy.hs index 4ea92ac7..0bb73972 100644 --- a/src/Data/Text/Lazy.hs +++ b/src/Data/Text/Lazy.hs @@ -424,7 +424,23 @@ unpack :: #endif Text -> String unpack t = S.unstreamList (stream t) -{-# INLINE [1] unpack #-} +{-# NOINLINE unpack #-} + +foldrFB :: (Char -> b -> b) -> b -> Text -> b +foldrFB = foldr +{-# INLINE [0] foldrFB #-} + +-- List fusion rules for `unpack`: +-- * `unpack` rewrites to `build` up till (but not including) phase 1. `build` +-- fuses if `foldr` is applied to it. +-- * If it doesn't fuse: In phase 1, `build` inlines to give us `foldrFB (:) []` +-- and we rewrite that back to `unpack`. +-- * If it fuses: In phase 0, `foldrFB` inlines and `foldr` inlines. GHC +-- optimizes the fused code. +{-# RULES +"Text.Lazy.unpack" [~1] forall t. unpack t = Exts.build (\lcons lnil -> foldrFB lcons lnil t) +"Text.Lazy.unpackBack" [1] foldrFB (:) [] = unpack + #-} -- | /O(n)/ Convert a literal string into a Text. unpackCString# :: Addr# -> Text diff --git a/src/Data/Text/Show.hs b/src/Data/Text/Show.hs index 696d1354..6a0efe70 100644 --- a/src/Data/Text/Show.hs +++ b/src/Data/Text/Show.hs @@ -31,6 +31,7 @@ import Data.Text.Internal.Encoding.Utf8 (utf8Length) import Data.Text.Internal.Unsafe.Char (unsafeWrite) import Data.Text.Unsafe (Iter(..), iterArray) import GHC.Exts (Ptr(..), Int(..), Addr#, indexWord8OffAddr#) +import qualified GHC.Exts as Exts import GHC.Word (Word8(..)) import qualified Data.Text.Array as A #if !MIN_VERSION_ghc_prim(0,7,0) @@ -53,12 +54,32 @@ unpack :: HasCallStack => #endif Text -> String -unpack (Text arr off len) = go off +unpack t = foldrText (:) [] t +{-# NOINLINE unpack #-} + +foldrText :: (Char -> b -> b) -> b -> Text -> b +foldrText f z (Text arr off len) = go off where go !i - | i >= off + len = [] - | otherwise = let !(Iter c l) = iterArray arr i in c : go (i + l) -{-# INLINE [1] unpack #-} + | i >= off + len = z + | otherwise = let !(Iter c l) = iterArray arr i in f c (go (i + l)) +{-# INLINE foldrText #-} + +foldrTextFB :: (Char -> b -> b) -> b -> Text -> b +foldrTextFB = foldrText +{-# INLINE [0] foldrTextFB #-} + +-- List fusion rules for `unpack`: +-- * `unpack` rewrites to `build` up till (but not including) phase 1. `build` +-- fuses if `foldr` is applied to it. +-- * If it doesn't fuse: In phase 1, `build` inlines to give us +-- `foldrTextFB (:) []` and we rewrite that back to `unpack`. +-- * If it fuses: In phase 0, `foldrTextFB` inlines and `foldrText` inlines. GHC +-- optimizes the fused code. +{-# RULES +"Text.unpack" [~1] forall t. unpack t = Exts.build (\lcons lnil -> foldrTextFB lcons lnil t) +"Text.unpackBack" [1] foldrTextFB (:) [] = unpack + #-} -- | /O(n)/ Convert a null-terminated --