diff --git a/Data/ByteString/Lazy.hs b/Data/ByteString/Lazy.hs index decd4a35b..9d4de3484 100644 --- a/Data/ByteString/Lazy.hs +++ b/Data/ByteString/Lazy.hs @@ -1,8 +1,11 @@ -{-# LANGUAGE BangPatterns #-} {-# OPTIONS_GHC -fno-warn-incomplete-patterns #-} {-# OPTIONS_HADDOCK prune #-} {-# LANGUAGE Trustworthy #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE ScopedTypeVariables #-} + -- | -- Module : Data.ByteString.Lazy -- Copyright : (c) Don Stewart 2006 @@ -237,9 +240,9 @@ import qualified Data.ByteString as P (ByteString) -- type name only import qualified Data.ByteString as S -- S for strict (hmm...) import qualified Data.ByteString.Internal.Type as S import qualified Data.ByteString.Unsafe as S -import qualified Data.ByteString.Lazy.Internal.Deque as D import Data.ByteString.Lazy.Internal +import Control.Exception (assert) import Control.Monad (mplus) import Data.Word (Word8) import Data.Int (Int64) @@ -790,15 +793,75 @@ take i cs0 = take' i cs0 -- -- @since 0.11.2.0 takeEnd :: Int64 -> ByteString -> ByteString -takeEnd i _ | i <= 0 = Empty -takeEnd i cs0 = takeEnd' i cs0 - where takeEnd' 0 _ = Empty - takeEnd' n cs = - snd $ foldrChunks takeTuple (n,Empty) cs - takeTuple _ (0, cs) = (0, cs) - takeTuple c (n, cs) - | n > fromIntegral (S.length c) = (n - fromIntegral (S.length c), Chunk c cs) - | otherwise = (0, Chunk (S.takeEnd (fromIntegral n) c) cs) +takeEnd i bs + | i <= 0 = Empty + | otherwise = splitAtEndFold (\_ res -> res) id i bs + +-- | Helper function for implementing 'takeEnd' and 'dropEnd' +splitAtEndFold + :: forall result + . (S.StrictByteString -> result -> result) + -- ^ What to do when one chunk of output is ready + -- (The StrictByteString will not be empty.) + -> (ByteString -> result) + -- ^ What to do when the split-point is reached + -> Int64 + -- ^ Number of bytes to leave at the end (must be strictly positive) + -> ByteString -- ^ Input ByteString + -> result +{-# INLINE splitAtEndFold #-} +splitAtEndFold step end len bs0 = assert (len > 0) $ case bs0 of + Empty -> end Empty + Chunk c t -> goR len c t t + where + -- Idea: Keep two references into the input ByteString: + -- "toSplit" tracks the current split point, + -- "toScan" tracks the yet-unprocessed tail. + -- When they are closer than "len" bytes apart, process more input. ("goR") + -- When they are at least "len" bytes apart, produce more output. ("goL") + -- We always have that "toScan" is a suffix of "toSplit", + -- and "toSplit" is a suffix of the original input (bs0). + goR :: Int64 -> S.StrictByteString -> ByteString -> ByteString -> result + goR !undershoot nextOutput@(S.BS noFp noLen) toSplit toScan = + assert (undershoot > 0) $ + -- INVARIANT: length toSplit == length toScan + len - undershoot + -- (not 'assert'ed because that would break our laziness properties) + case toScan of + Empty + | undershoot >= intToInt64 noLen + -> end (Chunk nextOutput toSplit) + | undershootW <- fromIntegral @Int64 @Int undershoot + -- conversion Int64->Int is OK because 0 < undershoot < noLen + , splitIndex <- noLen - undershootW + , beforeSplit <- S.BS noFp splitIndex + , afterSplit <- S.BS (noFp `S.plusForeignPtr` splitIndex) undershootW + -> step beforeSplit $ end (Chunk afterSplit toSplit) + + Chunk (S.BS _ cLen) newBsR + | cLen64 <- intToInt64 cLen + , undershoot > cLen64 + -> goR (undershoot - cLen64) nextOutput toSplit newBsR + | undershootW <- fromIntegral @Int64 @Int undershoot + -> step nextOutput $ goL (cLen - undershootW) toSplit newBsR + + goL :: Int -> ByteString -> ByteString -> result + goL !overshoot toSplit toScan = + assert (overshoot >= 0) $ + -- INVARIANT: length toSplit == length toScan + len + intToInt64 overshoot + -- (not 'assert'ed because that would break our laziness properties) + case toSplit of + Empty -> splitAtEndFoldInvariantFailed + Chunk c@(S.BS _ cLen) newBsL + | overshoot >= cLen + -> step c $ goL (overshoot - cLen) newBsL toScan + | otherwise + -> goR (intToInt64 $ cLen - overshoot) c newBsL toScan + +splitAtEndFoldInvariantFailed :: a +-- See Note [Float error calls out of INLINABLE things] in D.B.Internal.Type +splitAtEndFoldInvariantFailed = + moduleError "splitAtEndFold" + "internal error: toSplit not longer than toScan" -- | /O(n\/c)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@ -- elements, or 'empty' if @n > 'length' xs@. @@ -824,44 +887,9 @@ drop i cs0 = drop' i cs0 -- -- @since 0.11.2.0 dropEnd :: Int64 -> ByteString -> ByteString -dropEnd i p | i <= 0 = p -dropEnd i p = go D.empty p - where go :: D.Deque -> ByteString -> ByteString - go deque (Chunk c cs) - | D.byteLength deque < i = go (D.snoc c deque) cs - | otherwise = - let (output, deque') = getOutput empty (D.snoc c deque) - in foldrChunks Chunk (go deque' cs) output - go deque Empty = fromDeque $ dropEndBytes deque i - - len c = fromIntegral (S.length c) - - -- get a `ByteString` from all the front chunks of the accumulating deque - -- for which we know they won't be dropped - getOutput :: ByteString -> D.Deque -> (ByteString, D.Deque) - getOutput out deque = case D.popFront deque of - Nothing -> (reverseChunks out, deque) - Just (x, deque') | D.byteLength deque' >= i -> - getOutput (Chunk x out) deque' - _ -> (reverseChunks out, deque) - - -- reverse a `ByteString`s chunks, keeping all internal `S.StrictByteString`s - -- unchanged - reverseChunks = foldlChunks (flip Chunk) empty - - -- drop n elements from the rear of the accumulating `deque` - dropEndBytes :: D.Deque -> Int64 -> D.Deque - dropEndBytes deque n = case D.popRear deque of - Nothing -> deque - Just (deque', x) | len x <= n -> dropEndBytes deque' (n - len x) - | otherwise -> - D.snoc (S.dropEnd (fromIntegral n) x) deque' - - -- build a lazy ByteString from an accumulating `deque` - fromDeque :: D.Deque -> ByteString - fromDeque deque = - List.foldr chunk Empty (D.front deque) `append` - List.foldl' (flip chunk) Empty (D.rear deque) +dropEnd i p + | i <= 0 = p + | otherwise = splitAtEndFold Chunk (const Empty) i p -- | /O(n\/c)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@. splitAt :: Int64 -> ByteString -> (ByteString, ByteString) @@ -1688,6 +1716,9 @@ revNonEmptyChunks = List.foldl' (flip Chunk) Empty revChunks :: [P.ByteString] -> ByteString revChunks = List.foldl' (flip chunk) Empty +intToInt64 :: Int -> Int64 +intToInt64 = fromIntegral @Int @Int64 + -- $IOChunk -- -- ⚠ Using lazy I\/O functions like 'readFile' or 'hGetContents' diff --git a/Data/ByteString/Lazy/Internal/Deque.hs b/Data/ByteString/Lazy/Internal/Deque.hs deleted file mode 100644 index d3b436878..000000000 --- a/Data/ByteString/Lazy/Internal/Deque.hs +++ /dev/null @@ -1,65 +0,0 @@ -{- | - A Deque used for accumulating `S.StrictByteString`s in `Data.ByteString.Lazy.dropEnd`. --} -module Data.ByteString.Lazy.Internal.Deque ( - Deque (..), - empty, - null, - cons, - snoc, - popFront, - popRear, -) where - -import qualified Data.ByteString as S -import Data.Int (Int64) -import Prelude hiding (head, tail, length, null) - --- A `S.StrictByteString` Deque used as an accumulator for lazy --- Bytestring operations -data Deque = Deque - { front :: [S.StrictByteString] - , rear :: [S.StrictByteString] - , -- | Total length in bytes - byteLength :: !Int64 - } - --- An empty Deque -empty :: Deque -empty = Deque [] [] 0 - --- Is the `Deque` empty? --- O(1) -null :: Deque -> Bool -null deque = byteLength deque == 0 - --- Add a `S.StrictByteString` to the front of the `Deque` --- O(1) -cons :: S.StrictByteString -> Deque -> Deque -cons x (Deque fs rs acc) = Deque (x : fs) rs (acc + len x) - --- Add a `S.StrictByteString` to the rear of the `Deque` --- O(1) -snoc :: S.StrictByteString -> Deque -> Deque -snoc x (Deque fs rs acc) = Deque fs (x : rs) (acc + len x) - -len :: S.StrictByteString -> Int64 -len x = fromIntegral $ S.length x - --- Pop a `S.StrictByteString` from the front of the `Deque` --- Returns the bytestring and the updated Deque, or Nothing if the Deque is empty --- O(1) , occasionally O(n) -popFront :: Deque -> Maybe (S.StrictByteString, Deque) -popFront (Deque [] rs acc) = case reverse rs of - [] -> Nothing - x : xs -> Just (x, Deque xs [] (acc - len x)) -popFront (Deque (x : xs) rs acc) = Just (x, Deque xs rs (acc - len x)) - --- Pop a `S.StrictByteString` from the rear of the `Deque` --- Returns the bytestring and the updated Deque, or Nothing if the Deque is empty --- O(1) , occasionally O(n) -popRear :: Deque -> Maybe (Deque, S.StrictByteString) -popRear (Deque fs [] acc) = case reverse fs of - [] -> Nothing - x : xs -> Just (Deque [] xs (acc - len x), x) -popRear (Deque fs (x : xs) acc) = Just (Deque fs xs (acc - len x), x) diff --git a/bench/BenchAll.hs b/bench/BenchAll.hs index 3daa09463..85f348748 100644 --- a/bench/BenchAll.hs +++ b/bench/BenchAll.hs @@ -20,6 +20,8 @@ import Data.Semigroup import Data.String import Test.Tasty.Bench import Prelude hiding (words) +import qualified Data.List as List +import Control.DeepSeq import qualified Data.ByteString as S import qualified Data.ByteString.Char8 as S8 @@ -99,9 +101,12 @@ lazyByteStringData = case S.splitAt (nRepl `div` 2) byteStringData of {-# NOINLINE smallChunksData #-} smallChunksData :: L.ByteString -smallChunksData - = L.fromChunks [S.take sz (S.drop n byteStringData) - | let sz = 48, n <- [0, sz .. S.length byteStringData]] +smallChunksData = L.fromChunks $ List.unfoldr step (byteStringData, 1) + where + step (!s, !i) + | S.null s = Nothing + | otherwise = case S.splitAt i s of + (!s1, !s2) -> Just (s1, (s2, i * 71 `mod` 97)) {-# NOINLINE byteStringChunksData #-} byteStringChunksData :: [S.ByteString] @@ -419,6 +424,19 @@ main = do [ bench "strict" $ nf S.tails byteStringData , bench "lazy" $ nf L.tails lazyByteStringData ] + , bgroup "splitAtEnd (lazy)" $ let + testSAE op = \bs -> [op i bs | i <- [0,5..L.length bs]] `deepseq` () + {-# INLINE testSAE #-} + in + [ bench "takeEnd" $ + nf (testSAE L.takeEnd) lazyByteStringData + , bench "takeEnd (small chunks)" $ + nf (testSAE L.takeEnd) smallChunksData + , bench "dropEnd" $ + nf (testSAE L.dropEnd) lazyByteStringData + , bench "dropEnd (small chunks)" $ + nf (testSAE L.dropEnd) smallChunksData + ] , bgroup "sort" $ map (\s -> bench (S8.unpack s) $ nf S.sort s) sortInputs , bgroup "stimes" $ let st = stimes :: Int -> S.ByteString -> S.ByteString in diff --git a/bytestring.cabal b/bytestring.cabal index eea29d17b..1b2b5ebe8 100644 --- a/bytestring.cabal +++ b/bytestring.cabal @@ -106,7 +106,6 @@ library Data.ByteString.Builder.RealFloat.Internal Data.ByteString.Builder.RealFloat.TableGenerator Data.ByteString.Internal.Type - Data.ByteString.Lazy.Internal.Deque Data.ByteString.Lazy.ReadInt Data.ByteString.Lazy.ReadNat Data.ByteString.ReadInt diff --git a/tests/Properties/ByteString.hs b/tests/Properties/ByteString.hs index ef3bbc77b..fec5d7f0a 100644 --- a/tests/Properties/ByteString.hs +++ b/tests/Properties/ByteString.hs @@ -80,6 +80,10 @@ import Test.Tasty import Test.Tasty.QuickCheck import QuickCheckUtils +#ifdef BYTESTRING_LAZY +import Data.Int +#endif + #ifndef BYTESTRING_CHAR8 toElem :: Word8 -> Word8 toElem = id @@ -114,16 +118,25 @@ instance Arbitrary Natural where testRdInt :: forall a. (Arbitrary a, RdInt a) => String -> TestTree testRdInt s = testGroup s $ - [ testProperty "from string" $ \ prefix value suffix -> + [ testProperty "from string" $ int64OK $ \value prefix suffix -> let si = show @a value b = prefix <> B.pack si <> suffix in fmap (second B.unpack) (bread @a b) === sread @a (B.unpack prefix ++ si ++ B.unpack suffix) - , testProperty "from number" $ \n -> + , testProperty "from number" $ int64OK $ \n -> bread @a (B.pack (show n)) === Just (n, B.empty) ] #endif +intToIndexTy :: Int -> IndexTy +#ifdef BYTESTRING_LAZY +type IndexTy = Int64 +intToIndexTy = fromIntegral @Int @Int64 +#else +type IndexTy = Int +intToIndexTy = id +#endif + tests :: [TestTree] tests = [ testProperty "pack . unpack" $ @@ -308,7 +321,7 @@ tests = #endif , testProperty "drop" $ - \n x -> B.unpack (B.drop n x) === List.genericDrop n (B.unpack x) + \(intToIndexTy -> n) x -> B.unpack (B.drop n x) === List.genericDrop n (B.unpack x) , testProperty "drop 10" $ \x -> let n = 10 in B.unpack (B.drop n x) === List.genericDrop n (B.unpack x) , testProperty "drop 2^31" $ @@ -325,7 +338,7 @@ tests = #endif , testProperty "take" $ - \n x -> B.unpack (B.take n x) === List.genericTake n (B.unpack x) + \(intToIndexTy -> n) x -> B.unpack (B.take n x) === List.genericTake n (B.unpack x) , testProperty "take 10" $ \x -> let n = 10 in B.unpack (B.take n x) === List.genericTake n (B.unpack x) , testProperty "take 2^31" $ @@ -342,11 +355,11 @@ tests = #endif , testProperty "dropEnd" $ - \n x -> B.dropEnd n x === B.take (B.length x - n) x + \(intToIndexTy -> n) x -> B.dropEnd n x === B.take (B.length x - n) x , testProperty "dropWhileEnd" $ \f x -> B.dropWhileEnd f x === B.reverse (B.dropWhile f (B.reverse x)) , testProperty "takeEnd" $ - \n x -> B.takeEnd n x === B.drop (B.length x - n) x + \(intToIndexTy -> n) x -> B.takeEnd n x === B.drop (B.length x - n) x , testProperty "takeWhileEnd" $ \f x -> B.takeWhileEnd f x === B.reverse (B.takeWhile f (B.reverse x)) @@ -366,7 +379,7 @@ tests = , testProperty "compareLength 4" $ \x (toElem -> c) -> B.compareLength (B.snoc x c <> undefined) (B.length x) === GT , testProperty "compareLength 5" $ - \x n -> B.compareLength x n === compare (B.length x) n + \x (intToIndexTy -> n) -> B.compareLength x n === compare (B.length x) n , testProperty "dropEnd lazy" $ \(toElem -> c) -> B.take 1 (B.dropEnd 1 (B.singleton c <> B.singleton c <> B.singleton c <> undefined)) === B.singleton c , testProperty "dropWhileEnd lazy" $ @@ -470,7 +483,8 @@ tests = (l1 == l2 || l1 == l2 + 1) && sum (map B.length splits) + l2 == B.length x , testProperty "splitAt" $ - \n x -> (B.unpack *** B.unpack) (B.splitAt n x) === List.genericSplitAt n (B.unpack x) + \(intToIndexTy -> n) x -> (B.unpack *** B.unpack) (B.splitAt n x) + === List.genericSplitAt n (B.unpack x) , testProperty "splitAt 10" $ \x -> let n = 10 in (B.unpack *** B.unpack) (B.splitAt n x) === List.genericSplitAt n (B.unpack x) , testProperty "splitAt (2^31)" $ @@ -594,13 +608,13 @@ tests = #endif , testProperty "index" $ - \(NonNegative n) x -> fromIntegral n < B.length x ==> B.index x (fromIntegral n) === B.unpack x !! n + \(NonNegative n) x -> intToIndexTy n < B.length x ==> B.index x (intToIndexTy n) === B.unpack x !! n , testProperty "indexMaybe" $ - \(NonNegative n) x -> fromIntegral n < B.length x ==> B.indexMaybe x (fromIntegral n) === Just (B.unpack x !! n) + \(NonNegative n) x -> intToIndexTy n < B.length x ==> B.indexMaybe x (intToIndexTy n) === Just (B.unpack x !! n) , testProperty "indexMaybe Nothing" $ - \n x -> (n :: Int) < 0 || fromIntegral n >= B.length x ==> B.indexMaybe x (fromIntegral n) === Nothing + \n x -> n < 0 || intToIndexTy n >= B.length x ==> B.indexMaybe x (intToIndexTy n) === Nothing , testProperty "!?" $ - \n x -> B.indexMaybe x (fromIntegral (n :: Int)) === x B.!? (fromIntegral n) + \(intToIndexTy -> n) x -> B.indexMaybe x n === x B.!? n #ifdef BYTESTRING_CHAR8 , testProperty "isString" $ diff --git a/tests/QuickCheckUtils.hs b/tests/QuickCheckUtils.hs index db885c672..64bb1d59a 100644 --- a/tests/QuickCheckUtils.hs +++ b/tests/QuickCheckUtils.hs @@ -1,12 +1,16 @@ +{-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE TypeApplications #-} +{-# LANGUAGE TypeOperators #-} +{-# LANGUAGE UndecidableInstances #-} module QuickCheckUtils ( Char8(..) , String8(..) , CByteString(..) , Sqrt(..) + , int64OK ) where import Test.Tasty.QuickCheck @@ -18,6 +22,7 @@ import Data.Word import Data.Int import System.IO import Foreign.C (CChar) +import GHC.TypeLits (TypeError, ErrorMessage(..)) import qualified Data.ByteString.Short as SB import qualified Data.ByteString as P @@ -112,3 +117,22 @@ instance Arbitrary SB.ShortByteString where instance CoArbitrary SB.ShortByteString where coarbitrary s = coarbitrary (SB.unpack s) + +-- | This /poison instance/ exists to make accidental mis-use +-- of the @Arbitrary Int64@ instance a bit less likely. +instance {-# OVERLAPPING #-} + TypeError (Text "Found a test taking a raw Int64 argument." + :$$: Text "'instance Arbitrary Int64' by default is likely to" + :$$: Text "produce very large numbers after the first few tests," + :$$: Text "which doesn't make great indices into a LazyByteString." + :$$: Text "For indices, try 'intToIndexTy' in Properties/ByteString.hs." + :$$: Text "" + :$$: Text "If very few small-numbers tests is OK, use" + :$$: Text "'int64OK' to bypass this poison-instance." + ) => Testable (Int64 -> prop) where + property = error "poison instance Testable (Int64 -> prop)" + +-- | Use this to bypass the poison instance for @Testable (Int64 -> prop)@ +-- defined in "QuickCheckUtils". +int64OK :: (Arbitrary a, Show a, Testable b) => (a -> b) -> Property +int64OK f = propertyForAllShrinkShow arbitrary shrink (\v -> [show v]) f diff --git a/tests/builder/Data/ByteString/Builder/Tests.hs b/tests/builder/Data/ByteString/Builder/Tests.hs index a7ab9131a..0d5afc6ba 100644 --- a/tests/builder/Data/ByteString/Builder/Tests.hs +++ b/tests/builder/Data/ByteString/Builder/Tests.hs @@ -52,9 +52,9 @@ import System.Posix.Internals (c_unlink) import Test.Tasty (TestTree, TestName, testGroup) import Test.Tasty.QuickCheck - ( Arbitrary(..), oneof, choose, listOf, elements, forAll + ( Arbitrary(..), oneof, choose, listOf, elements , counterexample, ioProperty, Property, testProperty - , (===), (.&&.), conjoin + , (===), (.&&.), conjoin, forAll, forAllShrink , UnicodeString(..), NonNegative(..) ) import QuickCheckUtils @@ -538,7 +538,8 @@ testBuilderConstr :: (Arbitrary a, Show a) testBuilderConstr name ref mkBuilder = testProperty name check where - check x = forAll (choose (0, maxPaddingAmount)) $ \paddingAmount -> let + check = int64OK $ \x -> + forAllShrink genPaddingAmount shrink $ \paddingAmount -> let -- use padding to make sure we test at unaligned positions ws = ref x b1 = mkBuilder x @@ -548,6 +549,7 @@ testBuilderConstr name ref mkBuilder = maxPaddingAmount = 15 padBuf = S.replicate maxPaddingAmount (S.c2w ' ') + genPaddingAmount = choose (0, maxPaddingAmount) testsBinary :: [TestTree]