Fast and correct decoding of doubles

qfpl · Jan 8, 2019 · 40a08bb · 40a08bb
1 parent d096448
commit 40a08bb
Show file tree

Hide file tree

Showing 14 changed files with 75 additions and 53 deletions.
diff --git a/benchmarks/src/Data/Sv/Random.hs b/benchmarks/src/Data/Sv/Random.hs
@@ -150,11 +150,11 @@ shortRowEnc =
 
 productEnc :: E.Encode Product
 productEnc =
-  contramap i E.int <> contramap f E.float <> contramap d E.double
+  contramap i E.int <> contramap f E.float <> contramap d E.doubleFast
 
 coproductEnc :: E.Encode Coproduct
 coproductEnc =
-  E.encodeOf _I  E.int <> E.encodeOf _B E.byteString <> E.encodeOf _D E.double
+  E.encodeOf _I  E.int <> E.encodeOf _B E.byteString <> E.encodeOf _D E.doubleFast
 
 longRowEnc :: E.Encode LongRow
 longRowEnc = mconcat [
@@ -163,7 +163,7 @@ longRowEnc = mconcat [
   , E.encodeOf lrI E.int
   , E.encodeOf lrG E.integer
   , E.encodeOf lrF E.float
-  , E.encodeOf lrD E.double
+  , E.encodeOf lrD E.doubleFast
   , E.encodeOf lrP productEnc
   , E.encodeOf lrC coproductEnc
   ]

diff --git a/examples/csv/encoding-header.expected.csv b/examples/csv/encoding-header.expected.csv
@@ -1,4 +1,4 @@
 Bytestring,Text,Int,Double,Text2,Double2,Sum
 Hello,Goodbye,5,5.1,text,0.5,20
-Yes,no,200,-4.5,,22.0,19.3
-a,b,0,0.0,"words ""words"" words",15.0,More words
+Yes,no,200,-4.5,,22,19.3
+a,b,0,0,"words ""words"" words",15,More words
diff --git a/examples/csv/encoding.expected.csv b/examples/csv/encoding.expected.csv
@@ -1,3 +1,3 @@
 Hello,Goodbye,5,5.1,text,0.5,20
-Yes,no,200,-4.5,,22.0,19.3
-a,b,0,0.0,"words ""words"" words",15.0,More words
+Yes,no,200,-4.5,,22,19.3
+a,b,0,0,"words ""words"" words",15,More words
diff --git a/examples/src/Data/Sv/Example/Encoding.hs b/examples/src/Data/Sv/Example/Encoding.hs
@@ -70,7 +70,7 @@ makeLenses ''Example
 -- divide :: (Product -> (Text, Int)) -> Encode Text -> Encode Int -> Encode Product
 -- @
 productEnc :: Encode Product
-productEnc = divide (\(Product t d) -> (t,d)) E.text E.double
+productEnc = divide (\(Product t d) -> (t,d)) E.text E.doubleFast
 
 -- | Here we're defining an encoder for a 'Sum' using the 'choose' combinator.
 --
@@ -92,11 +92,11 @@ productEnc = divide (\(Product t d) -> (t,d)) E.text E.double
 -- @
 -- split :: Sum -> Either Int (Either Double Text)
 -- choose split :: Encode Int -> Encode (Either Double Text) -> Encode Sum
--- chosen E.double E.text :: Encode (Either Double Text)
+-- chosen E.doubleFast E.text :: Encode (Either Double Text)
 -- @
 --
 sumEnc :: Encode Sum
-sumEnc = choose split E.int $ chosen E.double E.text
+sumEnc = choose split E.int $ chosen E.doubleFast E.text
   where
     split s =
       case s of
@@ -119,7 +119,7 @@ exampleEnc =
   divide (\(Example b t i d p s) -> (b,(t,(i,(d,(p,s)))))) E.byteString $
     divided E.text $
     divided E.int $
-    divided E.double $
+    divided E.doubleFast $
     divided productEnc sumEnc
 
 examples :: [Example]
@@ -160,7 +160,7 @@ main = do
 exampleEncContravariantExtras :: Encode Example
 exampleEncContravariantExtras =
   contramap (\(Example b t i d p s) -> (b,t,i,d,p,s)) $
-    contrazip6 E.byteString E.text E.int E.double productEnc sumEnc
+    contrazip6 E.byteString E.text E.int E.doubleFast productEnc sumEnc
 
 -- | Bonus Round #2
 --
@@ -177,17 +177,17 @@ exampleEncContravariantExtras =
 -- This version is pretty clean. It's my favourite of the three :)
 productEncLens :: Encode Product
 productEncLens =
-  E.encodeOf p1 E.text <> E.encodeOf p2 E.double
+  E.encodeOf p1 E.text <> E.encodeOf p2 E.doubleFast
 
 sumEncLens :: Encode Sum
 sumEncLens =
-    E.encodeOf _Sum1 E.int <> E.encodeOf _Sum2 E.double <> E.encodeOf _Sum3 E.text
+    E.encodeOf _Sum1 E.int <> E.encodeOf _Sum2 E.doubleFast <> E.encodeOf _Sum3 E.text
 
 exampleEncLens :: Encode Example
 exampleEncLens =
       E.encodeOf e1 E.byteString
   <>  E.encodeOf e2 E.text
   <>  E.encodeOf e3 E.int
-  <>  E.encodeOf e4 E.double
+  <>  E.encodeOf e4 E.doubleFast
   <>  E.encodeOf e5 productEncLens
   <>  E.encodeOf e6 sumEncLens
diff --git a/examples/src/Data/Sv/Example/EncodingWithHeader.hs b/examples/src/Data/Sv/Example/EncodingWithHeader.hs
@@ -32,7 +32,7 @@ exampleEnc =
       "Bytestring" =: E.encodeOf e1 E.byteString
   <>  "Text" =: E.encodeOf e2 E.text
   <>  "Int" =: E.encodeOf e3 E.int
-  <>  "Double" =: E.encodeOf e4 E.double
+  <>  "Double" =: E.encodeOf e4 E.doubleFast
   -- Notice that we have had to inline the definition of productEnc to
   -- attach a name to each component of it.
   --
@@ -42,7 +42,7 @@ exampleEnc =
   -- This could also be avoided by not using ADTs within the data
   -- type to be encoded.
   <>  "Text2" =: E.encodeOf (e5.p1) E.text
-  <>  "Double2" =: E.encodeOf (e5.p2) E.double
+  <>  "Double2" =: E.encodeOf (e5.p2) E.doubleFast
   <>  "Sum" =: E.encodeOf e6 sumEnc
 
 

diff --git a/examples/src/Data/Sv/Example/Numbers.hs b/examples/src/Data/Sv/Example/Numbers.hs
@@ -21,7 +21,7 @@ opts :: ParseOptions
 opts = defaultParseOptions & headedness .~ Unheaded
 
 num :: Decode' ByteString (Maybe Double)
-num = D.orEmpty D.rational
+num = D.orEmpty D.double
   <!> (D.exactly "unknown" <!> D.exactly "NULL") $> Nothing
 
 main :: IO ()

diff --git a/sv-core/src/Data/Sv/Decode/Core.hs b/sv-core/src/Data/Sv/Decode/Core.hs
@@ -134,6 +134,7 @@ import Control.Monad.Reader (ReaderT (ReaderT, runReaderT))
 import Control.Monad.State (state)
 import Control.Monad.Writer.Strict (runWriter)
 import qualified Data.Attoparsec.ByteString as A
+import qualified Data.Attoparsec.ByteString.Char8 as AC8
 import Data.Bifunctor (first, second)
 import Data.ByteString (ByteString)
 import qualified Data.ByteString.UTF8 as UTF8
@@ -159,6 +160,7 @@ import qualified Data.Text.Read as TR (Reader, rational)
 import qualified Data.Text.Lazy as LT
 import Data.Vector (Vector, (!))
 import qualified Data.Vector as V
+import GHC.Float (double2Float)
 import Text.Parsec (Parsec)
 import qualified Text.Parsec as P (parse)
 import Text.Read (readMaybe)
@@ -254,22 +256,17 @@ integer = named "integer"
 
 -- | Decode a UTF-8 'ByteString' field as a 'Float'
 float :: Decode' ByteString Float
-float = named "float"
+float = double2Float <$> double
 
 -- | Decode a UTF-8 'ByteString' field as a 'Double'
 --
--- This is currently the fastest but least precise way to decode doubles.
--- 'rational' is more precise but slower. 'read' is the most precise, but
--- slower still.
---
--- If you aren't sure which to use, use 'read'.
+-- This is currently the fastest and most precise way to decode doubles.
 double :: Decode' ByteString Double
-double = named "double"
+double = withAttoparsec AC8.double <!> (
+    contents >>== \s -> badDecode $ "Couldn't decode \"" <> s <> "\" as a double"
+  )
 
 -- | Decode a UTF-8 'ByteString' as any 'Floating' type (usually 'Double')
---
--- This is slower than 'double' but more precise. It is not as precise as
--- 'read'.
 rational :: Floating a => Decode' ByteString a
 rational = rat `o` utf8
   where

diff --git a/sv-core/src/Data/Sv/Encode/Core.hs b/sv-core/src/Data/Sv/Encode/Core.hs
@@ -96,6 +96,7 @@ module Data.Sv.Encode.Core (
 , integer
 , float
 , double
+, doubleFast
 , boolTrueFalse
 , booltruefalse
 , boolyesno
@@ -137,6 +138,7 @@ import qualified Data.Bool as B (bool)
 import qualified Data.ByteString as Strict
 import qualified Data.ByteString.Builder as BS
 import qualified Data.ByteString.Lazy as LBS
+import qualified Data.Double.Conversion.ByteString as DC
 import Data.Foldable (fold)
 import Data.Functor.Contravariant (Contravariant (contramap))
 import Data.Functor.Contravariant.Compose (ComposeFC (ComposeFC, getComposeFC))
@@ -349,9 +351,17 @@ float :: Encode Float
 float = unsafeBuilder BS.floatDec
 
 -- | Encode a 'Double'
+--
+-- This version satisfies the roundtrip property. If that doesn't matter to you,
+-- use the faster version 'doubleFast'
 double :: Encode Double
 double = unsafeBuilder BS.doubleDec
 
+-- | Encode a 'Double' really quickly. This version uses the @double-conversion@
+-- package.
+doubleFast :: Encode Double
+doubleFast = contramap DC.toShortest unsafeByteString
+
 -- | Encode a 'String'
 string :: Encode String
 string = escaped BS.stringUtf8

diff --git a/sv-core/sv-core.cabal b/sv-core/sv-core.cabal
@@ -55,6 +55,7 @@ library
                        , containers >= 0.4 && < 0.7
                        , contravariant >= 1.2 && < 1.6
                        , deepseq >= 1.1 && < 1.5
+                       , double-conversion >= 2 && < 2.1
                        , lens >= 4 && < 4.18
                        , mtl >= 2.0.1 && < 2.3
                        , parsec >= 3.1 && < 3.2

diff --git a/sv/bench/bench.hs b/sv/bench/bench.hs
@@ -20,8 +20,10 @@ main :: IO ()
 main =
   defaultMain
       [ bench "double" $ nf (pd D.double) doublesC
-      , bench "rational" $ nf (pd (D.rational :: Decode' BS.ByteString Double)) doublesC
+      , bench "rational double" $ nf (pd (D.rational :: Decode' BS.ByteString Double)) doublesC
       , bench "read double" $ nf (pd (D.read :: Decode' BS.ByteString Double)) doublesC
+      , bench "float" $ nf (pd D.float) doublesC
+      , bench "rational float" $ nf (pd (D.rational :: Decode' BS.ByteString Float)) doublesC
       ]
 
 doubles :: LBS.ByteString

diff --git a/sv/test/Data/Sv/CassavaTest.hs b/sv/test/Data/Sv/CassavaTest.hs
@@ -12,7 +12,7 @@ import Data.Sv.Encode (Encode)
 import qualified Data.Sv.Encode as E
 import Data.Vector as V
 import Data.Tuple.Only (Only (fromOnly))
-import Hedgehog (Gen, (===), failure, forAll, property)
+import Hedgehog (Gen, TestLimit, (===), failure, forAll, property, withTests)
 import qualified Hedgehog.Gen as Gen
 import qualified Hedgehog.Range as Range
 import Test.Tasty (TestName, TestTree, testGroup)
@@ -25,17 +25,20 @@ test_CassavaAgreement =
     , cassavaAgreement "char" Sv.char E.char (Gen.unicode)
     , cassavaAgreement "integer" Sv.integer E.integer (Gen.integral (Range.linear (-10000000) 10000000))
     , cassavaAgreement "string" Sv.string E.string (Gen.string (Range.linear 1 500) Gen.unicode)
-    , cassavaAgreement "bytestring" Sv.byteString E.byteString (Gen.utf8 (Range.linear 1 600) Gen.unicode)
-    , cassavaAgreement "float" Sv.float E.float (Gen.float (Range.exponentialFloat (-10000000) 10000000))
-    , cassavaAgreement "double" Sv.double E.double (Gen.double (Range.exponentialFloat (-10000000) 10000000))
+    , cassavaAgreement' "bytestring" 5000 Sv.byteString E.byteString (Gen.utf8 (Range.linear 1 600) Gen.unicode)
+    , cassavaAgreement' "float" 5000 Sv.float E.float (Gen.float (Range.exponentialFloat (-1000000000) 1000000000))
+    , cassavaAgreement' "double" 5000 Sv.double E.double (Gen.double (Range.exponentialFloat (-1000000000) 1000000000))
     ]
 
 opts :: ParseOptions
 opts = defaultParseOptions { _headedness = Unheaded }
 
 -- | Test that decoding with sv gets the same result as decoding with cassava
 cassavaAgreement :: forall a . (Csv.FromField a, Csv.ToField a, Show a, Eq a) => TestName -> Decode' BS.ByteString a -> Encode a -> Gen a -> TestTree
-cassavaAgreement name dec enc gen = testProperty name $ property $ do
+cassavaAgreement t = cassavaAgreement' t 100
+
+cassavaAgreement' :: forall a . (Csv.FromField a, Csv.ToField a, Show a, Eq a) => TestName -> TestLimit -> Decode' BS.ByteString a -> Encode a -> Gen a -> TestTree
+cassavaAgreement' name reps dec enc gen = testProperty name $ withTests reps $ property $ do
   a <- forAll gen
   let sa = Sv.encode enc defaultEncodeOptions [a]
   let cassava :: Either String [a]

diff --git a/sv/test/Data/Sv/DecodeTest.hs b/sv/test/Data/Sv/DecodeTest.hs
@@ -108,7 +108,7 @@ parseDecoder = D.contents D.>>==
 data Semi = Semi Text Int Double Text deriving (Eq, Show)
 
 semiD :: D.Decode' ByteString Semi
-semiD = Semi <$> D.utf8 <*> (parseDecoder `o` D.contents) <*> D.rational <*> D.utf8
+semiD = Semi <$> D.utf8 <*> (parseDecoder `o` D.contents) <*> D.double <*> D.utf8
 
 semigroupoidTest :: TestTree
 semigroupoidTest = testGroup "Semigroupoid Decode"
@@ -120,7 +120,7 @@ semigroupoidTest = testGroup "Semigroupoid Decode"
         Failure (DecodeErrors (pure (BadDecode "no")))
   , testCase "Does the right thing in the case of right failure" $
       parseDecode semiD opts semiTestString3 @?=
-        Failure (DecodeErrors (pure (BadDecode "Couldn't decode \"false\": input does not start with a digit")))
+        Failure (DecodeErrors (pure (BadDecode "Couldn't decode \"false\" as a double")))
   ]
 
 -- This CSV has enough columns to make an Item, it has more columns than a
@@ -143,34 +143,34 @@ data SuperItem2 = SuperItem2 Int Text Text Double Int Double deriving (Eq, Show)
 inOrder :: NameDecode' ByteString Item
 inOrder =
   Item <$> D.column "id" D.int <*> D.column "name" D.utf8
-    <*> D.column "cost" D.rational <*> D.column "units" D.int
+    <*> D.column "cost" D.double <*> D.column "units" D.int
 
 outOrder :: NameDecode' ByteString Item
 outOrder =
   (\n u c i -> Item i n c u) <$> D.column "name" D.utf8
-    <*> D.column "units" D.int <*> D.column "cost" D.rational
+    <*> D.column "units" D.int <*> D.column "cost" D.double
     <*> D.column "id" D.int
 
 inOrderSemi :: NameDecode' ByteString SemiItem
 inOrderSemi =
-  SemiItem <$> D.column "name" D.utf8 <*> D.column "cost" D.rational
+  SemiItem <$> D.column "name" D.utf8 <*> D.column "cost" D.double
 
 outOrderSemi :: NameDecode' ByteString SemiItem2
 outOrderSemi =
-  SemiItem2 <$> D.column "units" D.int <*> D.column "cost" D.rational
+  SemiItem2 <$> D.column "units" D.int <*> D.column "cost" D.double
 
 super :: NameDecode' ByteString SuperItem
 super =
   SuperItem <$> D.column "id" D.int <*> D.column "name" D.utf8
     <*> D.column "manufacturer" D.utf8
-    <*> D.column "cost" D.rational <*> D.column "units" D.int
+    <*> D.column "cost" D.double <*> D.column "units" D.int
 
 super2 :: NameDecode' ByteString SuperItem2
 super2 =
   SuperItem2 <$> D.column "id" D.int <*> D.column "name" D.utf8
     <*> D.column "manufacturer" D.utf8
-    <*> D.column "cost" D.rational <*> D.column "units" D.int
-    <*> D.column "profit" D.rational
+    <*> D.column "cost" D.double <*> D.column "units" D.int
+    <*> D.column "profit" D.double
 
 namedTest :: TestTree
 namedTest = testGroup "Named decodes"

diff --git a/sv/test/Data/Sv/EncodeTest.hs b/sv/test/Data/Sv/EncodeTest.hs
@@ -100,7 +100,7 @@ data Three = Three {
 three :: NameEncode Three
 three =
   E.named "first" (contramap int E.int)
-    <> E.named "\"Second\"" (contramap double E.double)
+    <> E.named "\"Second\"" (contramap double E.doubleFast)
     <> E.named "third" (contramap text E.text)
 
 myInt :: NameEncode Int
@@ -120,8 +120,8 @@ encodeNamedTests = testGroup "named" [
       encodeNamed three opts [] @?= "first,\"\"\"Second\"\"\",third"
   , testCase "multiple columns, one row" $
       encodeNamed three opts [Three 1 2 "th\"ree"]
-        @?= "first,\"\"\"Second\"\"\",third\n1,2.0,\"th\"\"ree\""
+        @?= "first,\"\"\"Second\"\"\",third\n1,2,\"th\"\"ree\""
   , testCase "multiple columns, multiple rows" $
       encodeNamed three opts [Three 1 2 "three", Three 4 5 "SIX"]
-        @?= "first,\"\"\"Second\"\"\",third\n1,2.0,three\n4,5.0,SIX"
+        @?= "first,\"\"\"Second\"\"\",third\n1,2,three\n4,5,SIX"
   ]
diff --git a/sv/test/Data/Sv/RoundTripsDecodeEncode.hs b/sv/test/Data/Sv/RoundTripsDecodeEncode.hs
@@ -26,7 +26,7 @@ test_Roundtrips =
     , integer
     , float
     , double
-    , rational
+    , doubleFast
     , readDouble
     , string
     , byteString
@@ -114,19 +114,28 @@ float :: TestTree
 float = roundTripCodecIso "float" D.float E.float
   floatingTests
 
+doubleTests :: (IsString s, Fractional a) => [(s,a)]
+doubleTests = ("7.845860130857695", 7.845860130857695) : floatingTests
+
 double :: TestTree
 double = roundTripCodecIso "double" D.double E.double
-  floatingTests
+  ( ("1.0000000000034547e-2", 1.0000000000034547e-2)
+  : doubleTests
+  )
 
-rational :: TestTree
-rational = roundTripCodecIso "rational" D.rational E.double
-  (("7.845860130857695", 7.845860130857695) : floatingTests)
+doubleFast :: TestTree
+doubleFast = roundTripCodecIso "doubleFast" D.double E.doubleFast
+  [ ("5", 5)
+  , ("10.5", 10.5)
+  , ("12345.678", 12345.678)
+  , ("7.845860130857695", 7.845860130857695)
+  , ("0.010000000000034547", 1.0000000000034547e-2)
+  ]
 
 readDouble :: TestTree
 readDouble = roundTripCodecIso "read double" D.read (E.show :: Encode Double)
   ( ("1.0000000000034547e-2", 1.0000000000034547e-2)
-  : ("7.845860130857695", 7.845860130857695)
-  : floatingTests
+  : doubleTests
   )
 
 text :: TestTree