From 629b88d49935e274a0581975b4d0a9ef770186a3 Mon Sep 17 00:00:00 2001 From: Oleg Grenrus Date: Fri, 9 Jun 2023 23:45:18 +0300 Subject: [PATCH] Resolve #998. Drop attoparsec.Number instances. More fore: Drop `attoparsec` dependency alltogether. We parse scientific from Text manually now. Notice, `scientific` parser in `attoparsec` is quadratic (uses `decimal`). https://github.com/haskell/attoparsec/issues/217 --- .hlint.yaml | 1 + aeson.cabal | 5 +- changelog.md | 1 + src/Data/Aeson/Internal/Scientific.hs | 113 ++++++++++++++++++++++++++ src/Data/Aeson/Types/FromJSON.hs | 9 +- src/Data/Aeson/Types/ToJSON.hs | 13 --- 6 files changed, 121 insertions(+), 21 deletions(-) create mode 100644 src/Data/Aeson/Internal/Scientific.hs diff --git a/.hlint.yaml b/.hlint.yaml index 52676c03d..277920c8e 100644 --- a/.hlint.yaml +++ b/.hlint.yaml @@ -17,6 +17,7 @@ name: "Avoid lambda" within: - Data.Time.FromText + - Data.Aeson.Internal.Scientific - ignore: name: "Use isDigit" within: diff --git a/aeson.cabal b/aeson.cabal index 7726a11bb..4f14e8fa9 100644 --- a/aeson.cabal +++ b/aeson.cabal @@ -76,6 +76,7 @@ library Data.Aeson.Internal.ByteString Data.Aeson.Internal.Functions Data.Aeson.Internal.Prelude + Data.Aeson.Internal.Scientific Data.Aeson.Internal.Text Data.Aeson.Internal.TH Data.Aeson.Internal.Unescape @@ -109,8 +110,7 @@ library -- Other dependencies build-depends: - attoparsec >=0.14.2 && <0.15 - , data-fix >=0.3.2 && <0.4 + data-fix >=0.3.2 && <0.4 , dlist >=1.0 && <1.1 , hashable >=1.3.5.0 && <1.5 , indexed-traversable >=0.1.2 && <0.2 @@ -171,7 +171,6 @@ test-suite aeson-tests build-depends: aeson - , attoparsec , base , base-compat , base-orphans >=0.5.3 && <0.10 diff --git a/changelog.md b/changelog.md index 97503a81b..cf27cd0b6 100644 --- a/changelog.md +++ b/changelog.md @@ -6,6 +6,7 @@ For the latest version of this document, please see [https://github.com/haskell/ * Move `Data.Aeson.Parser` module into separate `attoparsec-aeson` package, as these parsers are not used by `aeson` itself anymore. * Remove `cffi` flag. Then the C implementation for string unescaping was used for `text <2` versions. The new native Haskell implementation introduced in version 2.0.3.0 is at least as fast. +* Drop instance for `attoparsec.Number`. ### 2.1.2.1 diff --git a/src/Data/Aeson/Internal/Scientific.hs b/src/Data/Aeson/Internal/Scientific.hs new file mode 100644 index 000000000..b03b296ec --- /dev/null +++ b/src/Data/Aeson/Internal/Scientific.hs @@ -0,0 +1,113 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE ScopedTypeVariables #-} +module Data.Aeson.Internal.Scientific ( + scanScientific, +) where + +import Data.Integer.Conversion (textToInteger) +import Data.Scientific (Scientific) +import Data.Text (Text) + +import qualified Data.Scientific as Sci +import qualified Data.Text as T + +-- | Parse 'Scientific' number from 'Text'. +-- +-- This is different from how JSON numbers are parsed: arbitrary leading zeroes are accepted. +-- +scanScientific + :: forall r. (Scientific -> Text -> r) + -> (String -> r) + -> Text + -> r +scanScientific kont err input0 = case T.uncons input0 of + Nothing -> errEnd + Just (c, text') + | c == '+' -> scanScientific' kont err text' + | c == '-' -> scanScientific' (\sci -> kont (negate sci)) err text' + | otherwise -> scanScientific' kont err input0 + where + errEnd = err "Unexpected end-of-input while parsing number literal" + +scanScientific' + :: forall r. (Scientific -> Text -> r) + -> (String -> r) + -> Text + -> r +scanScientific' kont err input0 = state_start input0 where + state_start :: Text -> r + state_start !text = case T.uncons text of + Nothing -> errEnd + Just (c, text') + | '0' <= c, c <= '9' -> state_i 1 text' + | otherwise -> err $ "Unexpected " ++ show c ++ " while parsing number literal" + + state_i :: Int -> Text -> r + state_i !n !text = case T.uncons text of + Nothing -> kont (fromInteger int) text + Just (c, text') + | '0' <= c, c <= '9' -> state_i (n + 1) text' + | '.' == c -> go_dec int text' + | 'e' == c || 'E' == c -> go_sci int 0 text' + | otherwise -> kont (fromInteger int) text + where + int = textToInteger (T.take n input0) + + go_dec :: Integer -> Text -> r + go_dec !int !text1 = case T.uncons text1 of + Nothing -> errEnd + Just (c, text') + | '0' <= c, c <= '9' -> state_dec 1 text' + | otherwise -> err $ "Unexpected " ++ show c ++ " while parsing number literal" + where + state_dec :: Int -> Text -> r + state_dec !n !text = case T.uncons text of + Nothing -> kont dec text + Just (c, text') + | '0' <= c, c <= '9' -> state_dec (n + 1) text' + | 'e' == c || 'E' == c -> go_sci coef (negate n) text' + | otherwise -> kont dec text + where + frac = textToInteger (T.take n text1) + coef = int * 10 ^ n + frac + dec = Sci.scientific coef (negate n) + + go_sci :: Integer -> Int -> Text -> r + go_sci !coef !exp10 !text2 = case T.uncons text2 of + Nothing -> errEnd + Just (c, text') + | '0' <= c, c <= '9' -> go_sci_pos coef exp10 text2 1 text' + | '+' == c -> case T.uncons text' of + Nothing -> errEnd + Just (c', text'') + | '0' <= c', c' <= '9' -> go_sci_pos coef exp10 text' 1 text'' + | otherwise -> errUnx c' + | '-' == c -> case T.uncons text' of + Nothing -> errEnd + Just (c', text'') + | '0' <= c', c' <= '9' -> go_sci_neg coef exp10 text' 1 text'' + | otherwise -> errUnx c' + | otherwise -> errUnx c + + go_sci_pos :: Integer -> Int -> Text -> Int -> Text -> r + go_sci_pos !coef !exp10 !text2 !n !text = case T.uncons text of + Nothing -> kont sci text + Just (c, text') + | '0' <= c, c <= '9' -> go_sci_pos coef exp10 text2 (n + 1) text' + | otherwise -> kont sci text + where + exp10' = fromInteger (textToInteger (T.take n text2)) + sci = Sci.scientific coef (exp10 + exp10') + + go_sci_neg :: Integer -> Int -> Text -> Int -> Text -> r + go_sci_neg !coef !exp10 !text2 !n !text = case T.uncons text of + Nothing -> kont sci text + Just (c, text') + | '0' <= c, c <= '9' -> go_sci_neg coef exp10 text2 (n + 1) text' + | otherwise -> kont sci text + where + exp10' = fromInteger (textToInteger (T.take n text2)) + sci = Sci.scientific coef (exp10 - exp10') + + errEnd = err "Unexpected end-of-input while parsing number literal" + errUnx c = err $ "Unexpected " ++ show c ++ " while parsing number literal" diff --git a/src/Data/Aeson/Types/FromJSON.hs b/src/Data/Aeson/Types/FromJSON.hs index 0af7ead04..7417ca4d3 100644 --- a/src/Data/Aeson/Types/FromJSON.hs +++ b/src/Data/Aeson/Types/FromJSON.hs @@ -79,6 +79,7 @@ import Data.Aeson.Internal.Prelude import Control.Monad (zipWithM) import Data.Aeson.Internal.Functions (mapKey, mapKeyO) +import Data.Aeson.Internal.Scientific import Data.Aeson.Types.Generic import Data.Aeson.Types.Internal import Data.Aeson.Decoding.ByteString.Lazy @@ -116,7 +117,6 @@ import Unsafe.Coerce (unsafeCoerce) import qualified Data.Aeson.Parser.Time as Time import qualified Data.Aeson.Key as Key import qualified Data.Aeson.KeyMap as KM -import qualified Data.Attoparsec.ByteString.Char8 as A (endOfInput, parseOnly, scientific) import qualified Data.ByteString.Lazy as L import qualified Data.DList as DList import qualified Data.DList.DNonEmpty as DNE @@ -199,10 +199,9 @@ parseBoundedIntegral name = prependContext name . withScientific' parseBoundedIntegralFromScientific parseScientificText :: Text -> Parser Scientific -parseScientificText - = either fail pure - . A.parseOnly (A.scientific <* A.endOfInput) - . T.encodeUtf8 +parseScientificText = scanScientific + (\sci rest -> if T.null rest then return sci else fail $ "Expecting end-of-input, got " ++ show (T.take 10 rest)) + fail parseIntegralText :: Integral a => String -> Text -> Parser a parseIntegralText name t = diff --git a/src/Data/Aeson/Types/ToJSON.hs b/src/Data/Aeson/Types/ToJSON.hs index f846088ab..1547079e1 100644 --- a/src/Data/Aeson/Types/ToJSON.hs +++ b/src/Data/Aeson/Types/ToJSON.hs @@ -15,9 +15,6 @@ {-# LANGUAGE TypeOperators #-} {-# LANGUAGE UndecidableInstances #-} --- TODO: Drop this when we remove support for Data.Attoparsec.Number -{-# OPTIONS_GHC -fno-warn-deprecations #-} - module Data.Aeson.Types.ToJSON ( -- * Core JSON classes @@ -66,7 +63,6 @@ import Data.Aeson.Types.Generic (AllNullary, False, IsRecord, One, ProductSize, import Data.Aeson.Types.Internal import qualified Data.Aeson.Key as Key import qualified Data.Aeson.KeyMap as KM -import Data.Attoparsec.Number (Number(..)) import Data.Bits (unsafeShiftR) import Data.DList (DList) import Data.Fixed (Fixed, HasResolution, Nano) @@ -1332,15 +1328,6 @@ instance ToJSON Double where instance ToJSONKey Double where toJSONKey = toJSONKeyTextEnc E.doubleText - -instance ToJSON Number where - toJSON (D d) = toJSON d - toJSON (I i) = toJSON i - - toEncoding (D d) = toEncoding d - toEncoding (I i) = toEncoding i - - instance ToJSON Float where toJSON = realFloatToJSON toEncoding = E.float