Skip to content

Commit

Permalink
Fast and correct decoding of doubles
Browse files Browse the repository at this point in the history
  • Loading branch information
gwils committed Jan 8, 2019
1 parent d096448 commit 40a08bb
Show file tree
Hide file tree
Showing 14 changed files with 75 additions and 53 deletions.
6 changes: 3 additions & 3 deletions benchmarks/src/Data/Sv/Random.hs
Original file line number Diff line number Diff line change
Expand Up @@ -150,11 +150,11 @@ shortRowEnc =

productEnc :: E.Encode Product
productEnc =
contramap i E.int <> contramap f E.float <> contramap d E.double
contramap i E.int <> contramap f E.float <> contramap d E.doubleFast

coproductEnc :: E.Encode Coproduct
coproductEnc =
E.encodeOf _I E.int <> E.encodeOf _B E.byteString <> E.encodeOf _D E.double
E.encodeOf _I E.int <> E.encodeOf _B E.byteString <> E.encodeOf _D E.doubleFast

longRowEnc :: E.Encode LongRow
longRowEnc = mconcat [
Expand All @@ -163,7 +163,7 @@ longRowEnc = mconcat [
, E.encodeOf lrI E.int
, E.encodeOf lrG E.integer
, E.encodeOf lrF E.float
, E.encodeOf lrD E.double
, E.encodeOf lrD E.doubleFast
, E.encodeOf lrP productEnc
, E.encodeOf lrC coproductEnc
]
Expand Down
4 changes: 2 additions & 2 deletions examples/csv/encoding-header.expected.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Bytestring,Text,Int,Double,Text2,Double2,Sum
Hello,Goodbye,5,5.1,text,0.5,20
Yes,no,200,-4.5,,22.0,19.3
a,b,0,0.0,"words ""words"" words",15.0,More words
Yes,no,200,-4.5,,22,19.3
a,b,0,0,"words ""words"" words",15,More words
4 changes: 2 additions & 2 deletions examples/csv/encoding.expected.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
Hello,Goodbye,5,5.1,text,0.5,20
Yes,no,200,-4.5,,22.0,19.3
a,b,0,0.0,"words ""words"" words",15.0,More words
Yes,no,200,-4.5,,22,19.3
a,b,0,0,"words ""words"" words",15,More words
16 changes: 8 additions & 8 deletions examples/src/Data/Sv/Example/Encoding.hs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ makeLenses ''Example
-- divide :: (Product -> (Text, Int)) -> Encode Text -> Encode Int -> Encode Product
-- @
productEnc :: Encode Product
productEnc = divide (\(Product t d) -> (t,d)) E.text E.double
productEnc = divide (\(Product t d) -> (t,d)) E.text E.doubleFast

-- | Here we're defining an encoder for a 'Sum' using the 'choose' combinator.
--
Expand All @@ -92,11 +92,11 @@ productEnc = divide (\(Product t d) -> (t,d)) E.text E.double
-- @
-- split :: Sum -> Either Int (Either Double Text)
-- choose split :: Encode Int -> Encode (Either Double Text) -> Encode Sum
-- chosen E.double E.text :: Encode (Either Double Text)
-- chosen E.doubleFast E.text :: Encode (Either Double Text)
-- @
--
sumEnc :: Encode Sum
sumEnc = choose split E.int $ chosen E.double E.text
sumEnc = choose split E.int $ chosen E.doubleFast E.text
where
split s =
case s of
Expand All @@ -119,7 +119,7 @@ exampleEnc =
divide (\(Example b t i d p s) -> (b,(t,(i,(d,(p,s)))))) E.byteString $
divided E.text $
divided E.int $
divided E.double $
divided E.doubleFast $
divided productEnc sumEnc

examples :: [Example]
Expand Down Expand Up @@ -160,7 +160,7 @@ main = do
exampleEncContravariantExtras :: Encode Example
exampleEncContravariantExtras =
contramap (\(Example b t i d p s) -> (b,t,i,d,p,s)) $
contrazip6 E.byteString E.text E.int E.double productEnc sumEnc
contrazip6 E.byteString E.text E.int E.doubleFast productEnc sumEnc

-- | Bonus Round #2
--
Expand All @@ -177,17 +177,17 @@ exampleEncContravariantExtras =
-- This version is pretty clean. It's my favourite of the three :)
productEncLens :: Encode Product
productEncLens =
E.encodeOf p1 E.text <> E.encodeOf p2 E.double
E.encodeOf p1 E.text <> E.encodeOf p2 E.doubleFast

sumEncLens :: Encode Sum
sumEncLens =
E.encodeOf _Sum1 E.int <> E.encodeOf _Sum2 E.double <> E.encodeOf _Sum3 E.text
E.encodeOf _Sum1 E.int <> E.encodeOf _Sum2 E.doubleFast <> E.encodeOf _Sum3 E.text

exampleEncLens :: Encode Example
exampleEncLens =
E.encodeOf e1 E.byteString
<> E.encodeOf e2 E.text
<> E.encodeOf e3 E.int
<> E.encodeOf e4 E.double
<> E.encodeOf e4 E.doubleFast
<> E.encodeOf e5 productEncLens
<> E.encodeOf e6 sumEncLens
4 changes: 2 additions & 2 deletions examples/src/Data/Sv/Example/EncodingWithHeader.hs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ exampleEnc =
"Bytestring" =: E.encodeOf e1 E.byteString
<> "Text" =: E.encodeOf e2 E.text
<> "Int" =: E.encodeOf e3 E.int
<> "Double" =: E.encodeOf e4 E.double
<> "Double" =: E.encodeOf e4 E.doubleFast
-- Notice that we have had to inline the definition of productEnc to
-- attach a name to each component of it.
--
Expand All @@ -42,7 +42,7 @@ exampleEnc =
-- This could also be avoided by not using ADTs within the data
-- type to be encoded.
<> "Text2" =: E.encodeOf (e5.p1) E.text
<> "Double2" =: E.encodeOf (e5.p2) E.double
<> "Double2" =: E.encodeOf (e5.p2) E.doubleFast
<> "Sum" =: E.encodeOf e6 sumEnc


Expand Down
2 changes: 1 addition & 1 deletion examples/src/Data/Sv/Example/Numbers.hs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ opts :: ParseOptions
opts = defaultParseOptions & headedness .~ Unheaded

num :: Decode' ByteString (Maybe Double)
num = D.orEmpty D.rational
num = D.orEmpty D.double
<!> (D.exactly "unknown" <!> D.exactly "NULL") $> Nothing

main :: IO ()
Expand Down
17 changes: 7 additions & 10 deletions sv-core/src/Data/Sv/Decode/Core.hs
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ import Control.Monad.Reader (ReaderT (ReaderT, runReaderT))
import Control.Monad.State (state)
import Control.Monad.Writer.Strict (runWriter)
import qualified Data.Attoparsec.ByteString as A
import qualified Data.Attoparsec.ByteString.Char8 as AC8
import Data.Bifunctor (first, second)
import Data.ByteString (ByteString)
import qualified Data.ByteString.UTF8 as UTF8
Expand All @@ -159,6 +160,7 @@ import qualified Data.Text.Read as TR (Reader, rational)
import qualified Data.Text.Lazy as LT
import Data.Vector (Vector, (!))
import qualified Data.Vector as V
import GHC.Float (double2Float)
import Text.Parsec (Parsec)
import qualified Text.Parsec as P (parse)
import Text.Read (readMaybe)
Expand Down Expand Up @@ -254,22 +256,17 @@ integer = named "integer"

-- | Decode a UTF-8 'ByteString' field as a 'Float'
float :: Decode' ByteString Float
float = named "float"
float = double2Float <$> double

-- | Decode a UTF-8 'ByteString' field as a 'Double'
--
-- This is currently the fastest but least precise way to decode doubles.
-- 'rational' is more precise but slower. 'read' is the most precise, but
-- slower still.
--
-- If you aren't sure which to use, use 'read'.
-- This is currently the fastest and most precise way to decode doubles.
double :: Decode' ByteString Double
double = named "double"
double = withAttoparsec AC8.double <!> (
contents >>== \s -> badDecode $ "Couldn't decode \"" <> s <> "\" as a double"
)

-- | Decode a UTF-8 'ByteString' as any 'Floating' type (usually 'Double')
--
-- This is slower than 'double' but more precise. It is not as precise as
-- 'read'.
rational :: Floating a => Decode' ByteString a
rational = rat `o` utf8
where
Expand Down
10 changes: 10 additions & 0 deletions sv-core/src/Data/Sv/Encode/Core.hs
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ module Data.Sv.Encode.Core (
, integer
, float
, double
, doubleFast
, boolTrueFalse
, booltruefalse
, boolyesno
Expand Down Expand Up @@ -137,6 +138,7 @@ import qualified Data.Bool as B (bool)
import qualified Data.ByteString as Strict
import qualified Data.ByteString.Builder as BS
import qualified Data.ByteString.Lazy as LBS
import qualified Data.Double.Conversion.ByteString as DC
import Data.Foldable (fold)
import Data.Functor.Contravariant (Contravariant (contramap))
import Data.Functor.Contravariant.Compose (ComposeFC (ComposeFC, getComposeFC))
Expand Down Expand Up @@ -349,9 +351,17 @@ float :: Encode Float
float = unsafeBuilder BS.floatDec

-- | Encode a 'Double'
--
-- This version satisfies the roundtrip property. If that doesn't matter to you,
-- use the faster version 'doubleFast'
double :: Encode Double
double = unsafeBuilder BS.doubleDec

-- | Encode a 'Double' really quickly. This version uses the @double-conversion@
-- package.
doubleFast :: Encode Double
doubleFast = contramap DC.toShortest unsafeByteString

-- | Encode a 'String'
string :: Encode String
string = escaped BS.stringUtf8
Expand Down
1 change: 1 addition & 0 deletions sv-core/sv-core.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ library
, containers >= 0.4 && < 0.7
, contravariant >= 1.2 && < 1.6
, deepseq >= 1.1 && < 1.5
, double-conversion >= 2 && < 2.1
, lens >= 4 && < 4.18
, mtl >= 2.0.1 && < 2.3
, parsec >= 3.1 && < 3.2
Expand Down
4 changes: 3 additions & 1 deletion sv/bench/bench.hs
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@ main :: IO ()
main =
defaultMain
[ bench "double" $ nf (pd D.double) doublesC
, bench "rational" $ nf (pd (D.rational :: Decode' BS.ByteString Double)) doublesC
, bench "rational double" $ nf (pd (D.rational :: Decode' BS.ByteString Double)) doublesC
, bench "read double" $ nf (pd (D.read :: Decode' BS.ByteString Double)) doublesC
, bench "float" $ nf (pd D.float) doublesC
, bench "rational float" $ nf (pd (D.rational :: Decode' BS.ByteString Float)) doublesC
]

doubles :: LBS.ByteString
Expand Down
13 changes: 8 additions & 5 deletions sv/test/Data/Sv/CassavaTest.hs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import Data.Sv.Encode (Encode)
import qualified Data.Sv.Encode as E
import Data.Vector as V
import Data.Tuple.Only (Only (fromOnly))
import Hedgehog (Gen, (===), failure, forAll, property)
import Hedgehog (Gen, TestLimit, (===), failure, forAll, property, withTests)
import qualified Hedgehog.Gen as Gen
import qualified Hedgehog.Range as Range
import Test.Tasty (TestName, TestTree, testGroup)
Expand All @@ -25,17 +25,20 @@ test_CassavaAgreement =
, cassavaAgreement "char" Sv.char E.char (Gen.unicode)
, cassavaAgreement "integer" Sv.integer E.integer (Gen.integral (Range.linear (-10000000) 10000000))
, cassavaAgreement "string" Sv.string E.string (Gen.string (Range.linear 1 500) Gen.unicode)
, cassavaAgreement "bytestring" Sv.byteString E.byteString (Gen.utf8 (Range.linear 1 600) Gen.unicode)
, cassavaAgreement "float" Sv.float E.float (Gen.float (Range.exponentialFloat (-10000000) 10000000))
, cassavaAgreement "double" Sv.double E.double (Gen.double (Range.exponentialFloat (-10000000) 10000000))
, cassavaAgreement' "bytestring" 5000 Sv.byteString E.byteString (Gen.utf8 (Range.linear 1 600) Gen.unicode)
, cassavaAgreement' "float" 5000 Sv.float E.float (Gen.float (Range.exponentialFloat (-1000000000) 1000000000))
, cassavaAgreement' "double" 5000 Sv.double E.double (Gen.double (Range.exponentialFloat (-1000000000) 1000000000))
]

opts :: ParseOptions
opts = defaultParseOptions { _headedness = Unheaded }

-- | Test that decoding with sv gets the same result as decoding with cassava
cassavaAgreement :: forall a . (Csv.FromField a, Csv.ToField a, Show a, Eq a) => TestName -> Decode' BS.ByteString a -> Encode a -> Gen a -> TestTree
cassavaAgreement name dec enc gen = testProperty name $ property $ do
cassavaAgreement t = cassavaAgreement' t 100

cassavaAgreement' :: forall a . (Csv.FromField a, Csv.ToField a, Show a, Eq a) => TestName -> TestLimit -> Decode' BS.ByteString a -> Encode a -> Gen a -> TestTree
cassavaAgreement' name reps dec enc gen = testProperty name $ withTests reps $ property $ do
a <- forAll gen
let sa = Sv.encode enc defaultEncodeOptions [a]
let cassava :: Either String [a]
Expand Down
18 changes: 9 additions & 9 deletions sv/test/Data/Sv/DecodeTest.hs
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ parseDecoder = D.contents D.>>==
data Semi = Semi Text Int Double Text deriving (Eq, Show)

semiD :: D.Decode' ByteString Semi
semiD = Semi <$> D.utf8 <*> (parseDecoder `o` D.contents) <*> D.rational <*> D.utf8
semiD = Semi <$> D.utf8 <*> (parseDecoder `o` D.contents) <*> D.double <*> D.utf8

semigroupoidTest :: TestTree
semigroupoidTest = testGroup "Semigroupoid Decode"
Expand All @@ -120,7 +120,7 @@ semigroupoidTest = testGroup "Semigroupoid Decode"
Failure (DecodeErrors (pure (BadDecode "no")))
, testCase "Does the right thing in the case of right failure" $
parseDecode semiD opts semiTestString3 @?=
Failure (DecodeErrors (pure (BadDecode "Couldn't decode \"false\": input does not start with a digit")))
Failure (DecodeErrors (pure (BadDecode "Couldn't decode \"false\" as a double")))
]

-- This CSV has enough columns to make an Item, it has more columns than a
Expand All @@ -143,34 +143,34 @@ data SuperItem2 = SuperItem2 Int Text Text Double Int Double deriving (Eq, Show)
inOrder :: NameDecode' ByteString Item
inOrder =
Item <$> D.column "id" D.int <*> D.column "name" D.utf8
<*> D.column "cost" D.rational <*> D.column "units" D.int
<*> D.column "cost" D.double <*> D.column "units" D.int

outOrder :: NameDecode' ByteString Item
outOrder =
(\n u c i -> Item i n c u) <$> D.column "name" D.utf8
<*> D.column "units" D.int <*> D.column "cost" D.rational
<*> D.column "units" D.int <*> D.column "cost" D.double
<*> D.column "id" D.int

inOrderSemi :: NameDecode' ByteString SemiItem
inOrderSemi =
SemiItem <$> D.column "name" D.utf8 <*> D.column "cost" D.rational
SemiItem <$> D.column "name" D.utf8 <*> D.column "cost" D.double

outOrderSemi :: NameDecode' ByteString SemiItem2
outOrderSemi =
SemiItem2 <$> D.column "units" D.int <*> D.column "cost" D.rational
SemiItem2 <$> D.column "units" D.int <*> D.column "cost" D.double

super :: NameDecode' ByteString SuperItem
super =
SuperItem <$> D.column "id" D.int <*> D.column "name" D.utf8
<*> D.column "manufacturer" D.utf8
<*> D.column "cost" D.rational <*> D.column "units" D.int
<*> D.column "cost" D.double <*> D.column "units" D.int

super2 :: NameDecode' ByteString SuperItem2
super2 =
SuperItem2 <$> D.column "id" D.int <*> D.column "name" D.utf8
<*> D.column "manufacturer" D.utf8
<*> D.column "cost" D.rational <*> D.column "units" D.int
<*> D.column "profit" D.rational
<*> D.column "cost" D.double <*> D.column "units" D.int
<*> D.column "profit" D.double

namedTest :: TestTree
namedTest = testGroup "Named decodes"
Expand Down
6 changes: 3 additions & 3 deletions sv/test/Data/Sv/EncodeTest.hs
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ data Three = Three {
three :: NameEncode Three
three =
E.named "first" (contramap int E.int)
<> E.named "\"Second\"" (contramap double E.double)
<> E.named "\"Second\"" (contramap double E.doubleFast)
<> E.named "third" (contramap text E.text)

myInt :: NameEncode Int
Expand All @@ -120,8 +120,8 @@ encodeNamedTests = testGroup "named" [
encodeNamed three opts [] @?= "first,\"\"\"Second\"\"\",third"
, testCase "multiple columns, one row" $
encodeNamed three opts [Three 1 2 "th\"ree"]
@?= "first,\"\"\"Second\"\"\",third\n1,2.0,\"th\"\"ree\""
@?= "first,\"\"\"Second\"\"\",third\n1,2,\"th\"\"ree\""
, testCase "multiple columns, multiple rows" $
encodeNamed three opts [Three 1 2 "three", Three 4 5 "SIX"]
@?= "first,\"\"\"Second\"\"\",third\n1,2.0,three\n4,5.0,SIX"
@?= "first,\"\"\"Second\"\"\",third\n1,2,three\n4,5,SIX"
]
23 changes: 16 additions & 7 deletions sv/test/Data/Sv/RoundTripsDecodeEncode.hs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ test_Roundtrips =
, integer
, float
, double
, rational
, doubleFast
, readDouble
, string
, byteString
Expand Down Expand Up @@ -114,19 +114,28 @@ float :: TestTree
float = roundTripCodecIso "float" D.float E.float
floatingTests

doubleTests :: (IsString s, Fractional a) => [(s,a)]
doubleTests = ("7.845860130857695", 7.845860130857695) : floatingTests

double :: TestTree
double = roundTripCodecIso "double" D.double E.double
floatingTests
( ("1.0000000000034547e-2", 1.0000000000034547e-2)
: doubleTests
)

rational :: TestTree
rational = roundTripCodecIso "rational" D.rational E.double
(("7.845860130857695", 7.845860130857695) : floatingTests)
doubleFast :: TestTree
doubleFast = roundTripCodecIso "doubleFast" D.double E.doubleFast
[ ("5", 5)
, ("10.5", 10.5)
, ("12345.678", 12345.678)
, ("7.845860130857695", 7.845860130857695)
, ("0.010000000000034547", 1.0000000000034547e-2)
]

readDouble :: TestTree
readDouble = roundTripCodecIso "read double" D.read (E.show :: Encode Double)
( ("1.0000000000034547e-2", 1.0000000000034547e-2)
: ("7.845860130857695", 7.845860130857695)
: floatingTests
: doubleTests
)

text :: TestTree
Expand Down

0 comments on commit 40a08bb

Please sign in to comment.