diff --git a/.stylish-haskell.yaml b/.stylish-haskell.yaml new file mode 100644 index 0000000..72b82b9 --- /dev/null +++ b/.stylish-haskell.yaml @@ -0,0 +1,225 @@ +# stylish-haskell configuration file +# ================================== + +# The stylish-haskell tool is mainly configured by specifying steps. These steps +# are a list, so they have an order, and one specific step may appear more than +# once (if needed). Each file is processed by these steps in the given order. +steps: + # Convert some ASCII sequences to their Unicode equivalents. This is disabled + # by default. + # - unicode_syntax: + # # In order to make this work, we also need to insert the UnicodeSyntax + # # language pragma. If this flag is set to true, we insert it when it's + # # not already present. You may want to disable it if you configure + # # language extensions using some other method than pragmas. Default: + # # true. + # add_language_pragma: true + + # Align the right hand side of some elements. This is quite conservative + # and only applies to statements where each element occupies a single + # line. + - simple_align: + cases: true + top_level_patterns: true + records: true + + # Import cleanup + - imports: + # There are different ways we can align names and lists. + # + # - global: Align the import names and import list throughout the entire + # file. + # + # - file: Like global, but don't add padding when there are no qualified + # imports in the file. + # + # - group: Only align the imports per group (a group is formed by adjacent + # import lines). + # + # - none: Do not perform any alignment. + # + # Default: global. + align: global + + # The following options affect only import list alignment. + # + # List align has following options: + # + # - after_alias: Import list is aligned with end of import including + # 'as' and 'hiding' keywords. + # + # > import qualified Data.List as List (concat, foldl, foldr, head, + # > init, last, length) + # + # - with_alias: Import list is aligned with start of alias or hiding. + # + # > import qualified Data.List as List (concat, foldl, foldr, head, + # > init, last, length) + # + # - new_line: Import list starts always on new line. + # + # > import qualified Data.List as List + # > (concat, foldl, foldr, head, init, last, length) + # + # Default: after_alias + list_align: after_alias + + # Right-pad the module names to align imports in a group: + # + # - true: a little more readable + # + # > import qualified Data.List as List (concat, foldl, foldr, + # > init, last, length) + # > import qualified Data.List.Extra as List (concat, foldl, foldr, + # > init, last, length) + # + # - false: diff-safe + # + # > import qualified Data.List as List (concat, foldl, foldr, init, + # > last, length) + # > import qualified Data.List.Extra as List (concat, foldl, foldr, + # > init, last, length) + # + # Default: true + pad_module_names: true + + # Long list align style takes effect when import is too long. This is + # determined by 'columns' setting. + # + # - inline: This option will put as much specs on same line as possible. + # + # - new_line: Import list will start on new line. + # + # - new_line_multiline: Import list will start on new line when it's + # short enough to fit to single line. Otherwise it'll be multiline. + # + # - multiline: One line per import list entry. + # Type with constructor list acts like single import. + # + # > import qualified Data.Map as M + # > ( empty + # > , singleton + # > , ... + # > , delete + # > ) + # + # Default: inline + long_list_align: new_line_multiline + + # Align empty list (importing instances) + # + # Empty list align has following options + # + # - inherit: inherit list_align setting + # + # - right_after: () is right after the module name: + # + # > import Vector.Instances () + # + # Default: inherit + empty_list_align: inherit + + # List padding determines indentation of import list on lines after import. + # This option affects 'long_list_align'. + # + # - : constant value + # + # - module_name: align under start of module name. + # Useful for 'file' and 'group' align settings. + list_padding: 4 + + # Separate lists option affects formatting of import list for type + # or class. The only difference is single space between type and list + # of constructors, selectors and class functions. + # + # - true: There is single space between Foldable type and list of it's + # functions. + # + # > import Data.Foldable (Foldable (fold, foldl, foldMap)) + # + # - false: There is no space between Foldable type and list of it's + # functions. + # + # > import Data.Foldable (Foldable(fold, foldl, foldMap)) + # + # Default: true + separate_lists: false + + # Space surround option affects formatting of import lists on a single + # line. The only difference is single space after the initial + # parenthesis and a single space before the terminal parenthesis. + # + # - true: There is single space associated with the enclosing + # parenthesis. + # + # > import Data.Foo ( foo ) + # + # - false: There is no space associated with the enclosing parenthesis + # + # > import Data.Foo (foo) + # + # Default: false + space_surround: true + + # Language pragmas + - language_pragmas: + # We can generate different styles of language pragma lists. + # + # - vertical: Vertical-spaced language pragmas, one per line. + # + # - compact: A more compact style. + # + # - compact_line: Similar to compact, but wrap each line with + # `{-#LANGUAGE #-}'. + # + # Default: vertical. + style: vertical + + # Align affects alignment of closing pragma brackets. + # + # - true: Brackets are aligned in same column. + # + # - false: Brackets are not aligned together. There is only one space + # between actual import and closing bracket. + # + # Default: true + align: true + + # stylish-haskell can detect redundancy of some language pragmas. If this + # is set to true, it will remove those redundant pragmas. Default: true. + remove_redundant: true + + # Replace tabs by spaces. This is disabled by default. + # - tabs: + # # Number of spaces to use for each tab. Default: 8, as specified by the + # # Haskell report. + # spaces: 8 + + # Remove trailing whitespace + - trailing_whitespace: {} + +# A common setting is the number of columns (parts of) code will be wrapped +# to. Different steps take this into account. Default: 80. +columns: 80 + +# By default, line endings are converted according to the OS. You can override +# preferred format here. +# +# - native: Native newline format. CRLF on Windows, LF on other OSes. +# +# - lf: Convert to LF ("\n"). +# +# - crlf: Convert to CRLF ("\r\n"). +# +# Default: native. +newline: lf + +# Sometimes, language extensions are specified in a cabal file or from the +# command line instead of using language pragmas in the file. stylish-haskell +# needs to be aware of these, so it can parse the file correctly. +# +# No language extensions are enabled by default. +language_extensions: + - OverloadedStrings + - MultiParamTypeClasses + - FlexibleContexts diff --git a/package.yaml b/package.yaml index ee675f2..0a0777c 100644 --- a/package.yaml +++ b/package.yaml @@ -21,6 +21,7 @@ default-extensions: - FlexibleInstances - MultiParamTypeClasses - OverloadedStrings +- FlexibleContexts library: source-dirs: src diff --git a/src/Data/Text/Conversions.hs b/src/Data/Text/Conversions.hs index 692a50f..0d65cd7 100644 --- a/src/Data/Text/Conversions.hs +++ b/src/Data/Text/Conversions.hs @@ -41,25 +41,28 @@ module Data.Text.Conversions ( , DecodeText(..) , convertText , decodeConvertText + -- * UTF8 utility methods + , fromUTF8 + , toUTF8 -- * Encoding newtypes , UTF8(..) , Base16(..) , Base64(..) ) where -import Control.Error.Util (hush) +import Control.Error.Util ( hush ) -import qualified Data.Text as T -import qualified Data.Text.Encoding as T -import qualified Data.Text.Lazy as TL -import qualified Data.Text.Lazy.Encoding as TL +import qualified Data.Text as T +import qualified Data.Text.Encoding as T +import qualified Data.Text.Lazy as TL +import qualified Data.Text.Lazy.Encoding as TL -import qualified Data.ByteString as B -import qualified Data.ByteString.Lazy as BL +import qualified Data.ByteString as B +import qualified Data.ByteString.Lazy as BL -import qualified Data.ByteString.Base16 as Base16 +import qualified Data.ByteString.Base16 as Base16 import qualified Data.ByteString.Base16.Lazy as Base16L -import qualified Data.ByteString.Base64 as Base64 +import qualified Data.ByteString.Base64 as Base64 import qualified Data.ByteString.Base64.Lazy as Base64L {-| @@ -143,6 +146,42 @@ convertText = fromText . toText decodeConvertText :: (DecodeText f a, FromText b) => a -> f b decodeConvertText = fmap fromText . decodeText +{-| + A convenience function for the common case of converting from UTF8 bytes + to text-like representations. + + >>> fromUTF8 ("hello" :: ByteString) :: Maybe Text + Just "hello" + >>> fromUTF8 ("invalid \xc3\x28" :: ByteString)) :: Maybe Text + Nothing +-} +fromUTF8 :: (DecodeText f (UTF8 a), FromText b) => a -> f b +fromUTF8 = decodeConvertText . UTF8 +{-# INLINEABLE fromUTF8 #-} +{-# SPECIALIZE INLINE fromUTF8 :: B.ByteString -> Maybe String #-} +{-# SPECIALIZE INLINE fromUTF8 :: B.ByteString -> Maybe T.Text #-} +{-# SPECIALIZE INLINE fromUTF8 :: B.ByteString -> Maybe TL.Text #-} +{-# SPECIALIZE INLINE fromUTF8 :: BL.ByteString -> Maybe String #-} +{-# SPECIALIZE INLINE fromUTF8 :: BL.ByteString -> Maybe T.Text #-} +{-# SPECIALIZE INLINE fromUTF8 :: BL.ByteString -> Maybe TL.Text #-} + +{-| + A convenience function for the common case of converting to UTF8 bytes + from text-like representations. + + >>> toUTF8 ("hello" :: Text) :: ByteString + "hello" +-} +toUTF8 :: (ToText a, FromText (UTF8 b)) => a -> b +toUTF8 = unUTF8 . fromText . toText +{-# INLINEABLE toUTF8 #-} +{-# SPECIALIZE INLINE toUTF8 :: String -> B.ByteString #-} +{-# SPECIALIZE INLINE toUTF8 :: String -> BL.ByteString #-} +{-# SPECIALIZE INLINE toUTF8 :: T.Text -> B.ByteString #-} +{-# SPECIALIZE INLINE toUTF8 :: T.Text -> BL.ByteString #-} +{-# SPECIALIZE INLINE toUTF8 :: TL.Text -> B.ByteString #-} +{-# SPECIALIZE INLINE toUTF8 :: TL.Text -> BL.ByteString #-} + instance ToText T.Text where toText = id instance FromText T.Text where fromText = id instance ToText String where toText = T.pack diff --git a/test/Data/Text/ConversionsSpec.hs b/test/Data/Text/ConversionsSpec.hs index f5d5f06..0c75758 100644 --- a/test/Data/Text/ConversionsSpec.hs +++ b/test/Data/Text/ConversionsSpec.hs @@ -1,13 +1,15 @@ module Data.Text.ConversionsSpec (spec) where -import Test.Hspec -import Data.Text.Conversions +import Data.Text.Conversions +import Test.Hspec -import qualified Data.Text as T -import qualified Data.Text.Lazy as TL +import qualified Data.Text as T +import qualified Data.Text.Lazy as TL -import qualified Data.ByteString as B -import qualified Data.ByteString.Lazy as BL +import qualified Data.ByteString as B +import qualified Data.ByteString.Lazy as BL + +import Data.Text.Encoding ( encodeUtf8 ) newtype Upper = Upper T.Text deriving (Eq, Show) newtype Lower = Lower T.Text deriving (Eq, Show) @@ -87,3 +89,23 @@ spec = do it "fails to decode improperly encoded bytestrings" $ do decodeConvertText (UTF8 ("invalid \xc3\x28" :: B.ByteString)) `shouldBe` (Nothing :: Maybe T.Text) decodeConvertText (UTF8 ("invalid \xc3\x28" :: BL.ByteString)) `shouldBe` (Nothing :: Maybe T.Text) + + describe "fromUTF8" $ do + it "successfully decodes properly encoded bytestrings" $ do + fromUTF8 ("hello" :: B.ByteString) `shouldBe` Just ("hello" :: T.Text) + fromUTF8 ("hello" :: B.ByteString) `shouldBe` Just ("hello" :: TL.Text) + fromUTF8 ("hello" :: BL.ByteString) `shouldBe` Just ("hello" :: T.Text) + fromUTF8 ("hello" :: BL.ByteString) `shouldBe` Just ("hello" :: TL.Text) + + it "fails to decode improperly encoded bytestrings" $ do + fromUTF8 ("invalid \xc3\x28" :: B.ByteString) `shouldBe` (Nothing :: Maybe T.Text) + fromUTF8 ("invalid \xc3\x28" :: BL.ByteString) `shouldBe` (Nothing :: Maybe T.Text) + fromUTF8 ("invalid \xc3\x28" :: B.ByteString) `shouldBe` (Nothing :: Maybe TL.Text) + fromUTF8 ("invalid \xc3\x28" :: BL.ByteString) `shouldBe` (Nothing :: Maybe TL.Text) + + describe "toUTF8" $ do + it "successfully encodes to bytestrings" $ do + toUTF8 ("hello" :: T.Text) `shouldBe` (encodeUtf8 "hello") + toUTF8 ("hello" :: TL.Text) `shouldBe` (encodeUtf8 "hello") + toUTF8 ("hello" :: T.Text) `shouldBe` (BL.fromStrict $ encodeUtf8 "hello") + toUTF8 ("hello" :: TL.Text) `shouldBe` (BL.fromStrict $ encodeUtf8 "hello")