Skip to content

Commit

Permalink
Implement pure-Haskell reverse
Browse files Browse the repository at this point in the history
  • Loading branch information
chreekat committed Oct 22, 2023
1 parent 9fc523c commit 6204458
Show file tree
Hide file tree
Showing 4 changed files with 131 additions and 27 deletions.
3 changes: 3 additions & 0 deletions cbits/measure_off.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ static inline const ssize_t measure_off_naive(const uint8_t *src, const uint8_t
while (src < srcend - 7){
uint64_t w64;
memcpy(&w64, src, sizeof(uint64_t));
// find leading bytes by finding every byte that is not a continuation
// byte. The bit twiddle only results in a 0 if the original byte starts
// with 0b11...
w64 = ((w64 << 1) | ~w64) & 0x8080808080808080ULL;
// compute the popcount of w64 with two bit shifts and a multiplication
size_t leads = ( (w64 >> 7) // w64 >> 7 = Sum{0<= i <= 7} x_i * 256^i (x_i \in {0,1})
Expand Down
27 changes: 2 additions & 25 deletions src/Data/Text.hs
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ import Prelude (Char, Bool(..), Int, Maybe(..), String,
Eq, (==), (/=), Ord(..), Ordering(..), (++),
Monad(..), pure, Read(..),
(&&), (||), (+), (-), (.), ($), ($!), (>>),
not, return, otherwise, quot, IO)
not, return, otherwise, quot)
import Control.DeepSeq (NFData(rnf))
#if defined(ASSERTS)
import Control.Exception (assert)
Expand All @@ -224,13 +224,13 @@ import Data.Data (Data(gfoldl, toConstr, gunfold, dataTypeOf), constrIndex,
Constr, mkConstr, DataType, mkDataType, Fixity(Prefix))
import Control.Monad (foldM)
import Control.Monad.ST (ST, runST)
import Control.Monad.ST.Unsafe (unsafeIOToST)
import qualified Data.Text.Array as A
import qualified Data.List as L hiding (head, tail)
import Data.Binary (Binary(get, put))
import Data.Monoid (Monoid(..))
import Data.Semigroup (Semigroup(..))
import Data.String (IsString(..))
import Data.Text.Internal.Reverse (reverse)
import Data.Text.Internal.Encoding.Utf8 (utf8Length, utf8LengthByLeader, chr2, chr3, chr4, ord2, ord3, ord4)
import qualified Data.Text.Internal.Fusion as S
import Data.Text.Internal.Fusion.CaseMapping (foldMapping, lowerMapping, upperMapping)
Expand Down Expand Up @@ -746,29 +746,6 @@ intersperse c t@(Text src o l) = if l == 0 then mempty else runST $ do
return (Text arr 0 (dstLen - cLen))
{-# INLINE [1] intersperse #-}

-- | /O(n)/ Reverse the characters of a string.
--
-- Example:
--
-- >>> T.reverse "desrever"
-- "reversed"
reverse ::
#if defined(ASSERTS)
HasCallStack =>
#endif
Text -> Text
reverse (Text (A.ByteArray ba) off len) = runST $ do
marr@(A.MutableByteArray mba) <- A.new len
unsafeIOToST $ c_reverse mba ba (intToCSize off) (intToCSize len)
brr <- A.unsafeFreeze marr
return $ Text brr 0 len
{-# INLINE reverse #-}

-- | The input buffer (src :: ByteArray#, off :: CSize, len :: CSize)
-- must specify a valid UTF-8 sequence, this condition is not checked.
foreign import ccall unsafe "_hs_text_reverse" c_reverse
:: Exts.MutableByteArray# s -> ByteArray# -> CSize -> CSize -> IO ()

-- | /O(m+n)/ Replace every non-overlapping occurrence of @needle@ in
-- @haystack@ with @replacement@.
--
Expand Down
102 changes: 102 additions & 0 deletions src/Data/Text/Internal/Reverse.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE UnliftedFFITypes #-}
{-# LANGUAGE NoImplicitPrelude #-}
#if defined(PURE_HASKELL)
{-# LANGUAGE BangPatterns #-}
#endif

{-# OPTIONS_HADDOCK not-home #-}

{-# OPTIONS_GHC -ddump-to-file -ddump-simpl -dsuppress-all -dno-suppress-type-signatures #-}

-- | Implements 'reverse', using efficient C routines by default.
module Data.Text.Internal.Reverse (reverse) where

#if !defined(PURE_HASKELL)
import GHC.Exts as Exts
import Control.Monad.ST.Unsafe (unsafeIOToST)
import Foreign.C.Types (CSize(..))
#else
import Control.Monad.ST (ST)
import Data.Text.Internal.Encoding.Utf8 (utf8LengthByLeader)
#endif
#if defined(ASSERTS)
import GHC.Stack (HasCallStack)
#endif
import Prelude hiding (reverse)
import Data.Text.Internal (Text(..))
import Control.Monad.ST (runST)
import qualified Data.Text.Array as A

-- | /O(n)/ Reverse the characters of a string.
--
-- Example:
--
-- $setup
-- >>> T.reverse "desrever"
-- "reversed"
reverse ::
#if defined(ASSERTS)
HasCallStack =>
#endif
Text -> Text
#if defined(PURE_HASKELL)
reverse (Text src off len) = runST $ do
dest <- A.new len
_ <- reversePoints src off dest len
result <- A.unsafeFreeze dest
pure $ Text result 0 len

-- Step 0:
--
-- Input: R E D R U M
-- ^
-- x
-- Output: _ _ _ _ _ _
-- ^
-- y
--
-- Step 1:
--
-- Input: R E D R U M
-- ^
-- x
--
-- Output: _ _ _ _ _ R
-- ^
-- y
reversePoints
:: A.Array -- ^ Input array
-> Int -- ^ Input index
-> A.MArray s -- ^ Output array
-> Int -- ^ Output index
-> ST s ()
reversePoints src xx dest yy = go xx yy where
go !_ y | y <= 0 = pure ()
go x y =
let pLen = utf8LengthByLeader (A.unsafeIndex src x)
-- The next y is also the start of the current point in the output
yNext = y - pLen
in do
A.copyI pLen dest yNext src x
go (x + pLen) yNext
#else
reverse (Text (A.ByteArray ba) off len) = runST $ do
marr@(A.MutableByteArray mba) <- A.new len
unsafeIOToST $ c_reverse mba ba (fromIntegral off) (fromIntegral len)
brr <- A.unsafeFreeze marr
return $ Text brr 0 len
#endif
{-# INLINE reverse #-}

#if !defined(PURE_HASKELL)
-- | The input buffer (src :: ByteArray#, off :: CSize, len :: CSize)
-- must specify a valid UTF-8 sequence, this condition is not checked.
foreign import ccall unsafe "_hs_text_reverse" c_reverse
:: Exts.MutableByteArray# s -> ByteArray# -> CSize -> CSize -> IO ()
#endif

-- $setup
-- >>> :set -XOverloadedStrings
-- >>> import qualified Data.Text.Internal.Reverse as T
26 changes: 24 additions & 2 deletions text.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,35 @@ flag simdutf
default: True
manual: True

flag pure-haskell
description: Don't use text's standard C routines
NB: This feature is not fully implemented. Several C routines are still in
use.

When this flag is true, text will use pure Haskell variants of the
routines. This is not recommended except for use with GHC's JavaScript
backend.

This flag also disables simdutf.

default: False
manual: True

library
c-sources: cbits/is_ascii.c
if arch(javascript) || flag(pure-haskell)
cpp-options: -DPURE_HASKELL
c-sources: cbits/is_ascii.c
cbits/measure_off.c
cbits/utils.c
else
c-sources: cbits/is_ascii.c
cbits/measure_off.c
cbits/reverse.c
cbits/utils.c

hs-source-dirs: src

if flag(simdutf)
if flag(simdutf) && !(arch(javascript) || flag(pure-haskell))
exposed-modules: Data.Text.Internal.Validate.Simd
include-dirs: simdutf
cxx-sources: simdutf/simdutf.cpp
Expand Down Expand Up @@ -184,6 +205,7 @@ library

other-modules:
Data.Text.Show
Data.Text.Internal.Reverse

build-depends:
array >= 0.3 && < 0.6,
Expand Down

0 comments on commit 6204458

Please sign in to comment.