Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

UTF8 utility functions #4

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
225 changes: 225 additions & 0 deletions .stylish-haskell.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
# stylish-haskell configuration file
# ==================================

# The stylish-haskell tool is mainly configured by specifying steps. These steps
# are a list, so they have an order, and one specific step may appear more than
# once (if needed). Each file is processed by these steps in the given order.
steps:
# Convert some ASCII sequences to their Unicode equivalents. This is disabled
# by default.
# - unicode_syntax:
# # In order to make this work, we also need to insert the UnicodeSyntax
# # language pragma. If this flag is set to true, we insert it when it's
# # not already present. You may want to disable it if you configure
# # language extensions using some other method than pragmas. Default:
# # true.
# add_language_pragma: true

# Align the right hand side of some elements. This is quite conservative
# and only applies to statements where each element occupies a single
# line.
- simple_align:
cases: true
top_level_patterns: true
records: true

# Import cleanup
- imports:
# There are different ways we can align names and lists.
#
# - global: Align the import names and import list throughout the entire
# file.
#
# - file: Like global, but don't add padding when there are no qualified
# imports in the file.
#
# - group: Only align the imports per group (a group is formed by adjacent
# import lines).
#
# - none: Do not perform any alignment.
#
# Default: global.
align: global

# The following options affect only import list alignment.
#
# List align has following options:
#
# - after_alias: Import list is aligned with end of import including
# 'as' and 'hiding' keywords.
#
# > import qualified Data.List as List (concat, foldl, foldr, head,
# > init, last, length)
#
# - with_alias: Import list is aligned with start of alias or hiding.
#
# > import qualified Data.List as List (concat, foldl, foldr, head,
# > init, last, length)
#
# - new_line: Import list starts always on new line.
#
# > import qualified Data.List as List
# > (concat, foldl, foldr, head, init, last, length)
#
# Default: after_alias
list_align: after_alias

# Right-pad the module names to align imports in a group:
#
# - true: a little more readable
#
# > import qualified Data.List as List (concat, foldl, foldr,
# > init, last, length)
# > import qualified Data.List.Extra as List (concat, foldl, foldr,
# > init, last, length)
#
# - false: diff-safe
#
# > import qualified Data.List as List (concat, foldl, foldr, init,
# > last, length)
# > import qualified Data.List.Extra as List (concat, foldl, foldr,
# > init, last, length)
#
# Default: true
pad_module_names: true

# Long list align style takes effect when import is too long. This is
# determined by 'columns' setting.
#
# - inline: This option will put as much specs on same line as possible.
#
# - new_line: Import list will start on new line.
#
# - new_line_multiline: Import list will start on new line when it's
# short enough to fit to single line. Otherwise it'll be multiline.
#
# - multiline: One line per import list entry.
# Type with constructor list acts like single import.
#
# > import qualified Data.Map as M
# > ( empty
# > , singleton
# > , ...
# > , delete
# > )
#
# Default: inline
long_list_align: new_line_multiline

# Align empty list (importing instances)
#
# Empty list align has following options
#
# - inherit: inherit list_align setting
#
# - right_after: () is right after the module name:
#
# > import Vector.Instances ()
#
# Default: inherit
empty_list_align: inherit

# List padding determines indentation of import list on lines after import.
# This option affects 'long_list_align'.
#
# - <integer>: constant value
#
# - module_name: align under start of module name.
# Useful for 'file' and 'group' align settings.
list_padding: 4

# Separate lists option affects formatting of import list for type
# or class. The only difference is single space between type and list
# of constructors, selectors and class functions.
#
# - true: There is single space between Foldable type and list of it's
# functions.
#
# > import Data.Foldable (Foldable (fold, foldl, foldMap))
#
# - false: There is no space between Foldable type and list of it's
# functions.
#
# > import Data.Foldable (Foldable(fold, foldl, foldMap))
#
# Default: true
separate_lists: false

# Space surround option affects formatting of import lists on a single
# line. The only difference is single space after the initial
# parenthesis and a single space before the terminal parenthesis.
#
# - true: There is single space associated with the enclosing
# parenthesis.
#
# > import Data.Foo ( foo )
#
# - false: There is no space associated with the enclosing parenthesis
#
# > import Data.Foo (foo)
#
# Default: false
space_surround: true

# Language pragmas
- language_pragmas:
# We can generate different styles of language pragma lists.
#
# - vertical: Vertical-spaced language pragmas, one per line.
#
# - compact: A more compact style.
#
# - compact_line: Similar to compact, but wrap each line with
# `{-#LANGUAGE #-}'.
#
# Default: vertical.
style: vertical

# Align affects alignment of closing pragma brackets.
#
# - true: Brackets are aligned in same column.
#
# - false: Brackets are not aligned together. There is only one space
# between actual import and closing bracket.
#
# Default: true
align: true

# stylish-haskell can detect redundancy of some language pragmas. If this
# is set to true, it will remove those redundant pragmas. Default: true.
remove_redundant: true

# Replace tabs by spaces. This is disabled by default.
# - tabs:
# # Number of spaces to use for each tab. Default: 8, as specified by the
# # Haskell report.
# spaces: 8

# Remove trailing whitespace
- trailing_whitespace: {}

# A common setting is the number of columns (parts of) code will be wrapped
# to. Different steps take this into account. Default: 80.
columns: 80

# By default, line endings are converted according to the OS. You can override
# preferred format here.
#
# - native: Native newline format. CRLF on Windows, LF on other OSes.
#
# - lf: Convert to LF ("\n").
#
# - crlf: Convert to CRLF ("\r\n").
#
# Default: native.
newline: lf

# Sometimes, language extensions are specified in a cabal file or from the
# command line instead of using language pragmas in the file. stylish-haskell
# needs to be aware of these, so it can parse the file correctly.
#
# No language extensions are enabled by default.
language_extensions:
- OverloadedStrings
- MultiParamTypeClasses
- FlexibleContexts
1 change: 1 addition & 0 deletions package.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ default-extensions:
- FlexibleInstances
- MultiParamTypeClasses
- OverloadedStrings
- FlexibleContexts

library:
source-dirs: src
Expand Down
57 changes: 48 additions & 9 deletions src/Data/Text/Conversions.hs
Original file line number Diff line number Diff line change
Expand Up @@ -41,25 +41,28 @@ module Data.Text.Conversions (
, DecodeText(..)
, convertText
, decodeConvertText
-- * UTF8 utility methods
, fromUTF8
, toUTF8
-- * Encoding newtypes
, UTF8(..)
, Base16(..)
, Base64(..)
) where

import Control.Error.Util (hush)
import Control.Error.Util ( hush )

import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL

import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL

import qualified Data.ByteString.Base16 as Base16
import qualified Data.ByteString.Base16 as Base16
import qualified Data.ByteString.Base16.Lazy as Base16L
import qualified Data.ByteString.Base64 as Base64
import qualified Data.ByteString.Base64 as Base64
import qualified Data.ByteString.Base64.Lazy as Base64L

{-|
Expand Down Expand Up @@ -143,6 +146,42 @@ convertText = fromText . toText
decodeConvertText :: (DecodeText f a, FromText b) => a -> f b
decodeConvertText = fmap fromText . decodeText

{-|
A convenience function for the common case of converting from UTF8 bytes
to text-like representations.

>>> fromUTF8 ("hello" :: ByteString) :: Maybe Text
Just "hello"
>>> fromUTF8 ("invalid \xc3\x28" :: ByteString)) :: Maybe Text
Nothing
-}
fromUTF8 :: (DecodeText f (UTF8 a), FromText b) => a -> f b
fromUTF8 = decodeConvertText . UTF8
{-# INLINEABLE fromUTF8 #-}
{-# SPECIALIZE INLINE fromUTF8 :: B.ByteString -> Maybe String #-}
{-# SPECIALIZE INLINE fromUTF8 :: B.ByteString -> Maybe T.Text #-}
{-# SPECIALIZE INLINE fromUTF8 :: B.ByteString -> Maybe TL.Text #-}
{-# SPECIALIZE INLINE fromUTF8 :: BL.ByteString -> Maybe String #-}
{-# SPECIALIZE INLINE fromUTF8 :: BL.ByteString -> Maybe T.Text #-}
{-# SPECIALIZE INLINE fromUTF8 :: BL.ByteString -> Maybe TL.Text #-}

{-|
A convenience function for the common case of converting to UTF8 bytes
from text-like representations.

>>> toUTF8 ("hello" :: Text) :: ByteString
"hello"
-}
toUTF8 :: (ToText a, FromText (UTF8 b)) => a -> b
toUTF8 = unUTF8 . fromText . toText
{-# INLINEABLE toUTF8 #-}
{-# SPECIALIZE INLINE toUTF8 :: String -> B.ByteString #-}
{-# SPECIALIZE INLINE toUTF8 :: String -> BL.ByteString #-}
{-# SPECIALIZE INLINE toUTF8 :: T.Text -> B.ByteString #-}
{-# SPECIALIZE INLINE toUTF8 :: T.Text -> BL.ByteString #-}
{-# SPECIALIZE INLINE toUTF8 :: TL.Text -> B.ByteString #-}
{-# SPECIALIZE INLINE toUTF8 :: TL.Text -> BL.ByteString #-}

instance ToText T.Text where toText = id
instance FromText T.Text where fromText = id
instance ToText String where toText = T.pack
Expand Down
34 changes: 28 additions & 6 deletions test/Data/Text/ConversionsSpec.hs
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
module Data.Text.ConversionsSpec (spec) where

import Test.Hspec
import Data.Text.Conversions
import Data.Text.Conversions
import Test.Hspec

import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL

import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL

import Data.Text.Encoding ( encodeUtf8 )

newtype Upper = Upper T.Text deriving (Eq, Show)
newtype Lower = Lower T.Text deriving (Eq, Show)
Expand Down Expand Up @@ -87,3 +89,23 @@ spec = do
it "fails to decode improperly encoded bytestrings" $ do
decodeConvertText (UTF8 ("invalid \xc3\x28" :: B.ByteString)) `shouldBe` (Nothing :: Maybe T.Text)
decodeConvertText (UTF8 ("invalid \xc3\x28" :: BL.ByteString)) `shouldBe` (Nothing :: Maybe T.Text)

describe "fromUTF8" $ do
it "successfully decodes properly encoded bytestrings" $ do
fromUTF8 ("hello" :: B.ByteString) `shouldBe` Just ("hello" :: T.Text)
fromUTF8 ("hello" :: B.ByteString) `shouldBe` Just ("hello" :: TL.Text)
fromUTF8 ("hello" :: BL.ByteString) `shouldBe` Just ("hello" :: T.Text)
fromUTF8 ("hello" :: BL.ByteString) `shouldBe` Just ("hello" :: TL.Text)

it "fails to decode improperly encoded bytestrings" $ do
fromUTF8 ("invalid \xc3\x28" :: B.ByteString) `shouldBe` (Nothing :: Maybe T.Text)
fromUTF8 ("invalid \xc3\x28" :: BL.ByteString) `shouldBe` (Nothing :: Maybe T.Text)
fromUTF8 ("invalid \xc3\x28" :: B.ByteString) `shouldBe` (Nothing :: Maybe TL.Text)
fromUTF8 ("invalid \xc3\x28" :: BL.ByteString) `shouldBe` (Nothing :: Maybe TL.Text)

describe "toUTF8" $ do
it "successfully encodes to bytestrings" $ do
toUTF8 ("hello" :: T.Text) `shouldBe` (encodeUtf8 "hello")
toUTF8 ("hello" :: TL.Text) `shouldBe` (encodeUtf8 "hello")
toUTF8 ("hello" :: T.Text) `shouldBe` (BL.fromStrict $ encodeUtf8 "hello")
toUTF8 ("hello" :: TL.Text) `shouldBe` (BL.fromStrict $ encodeUtf8 "hello")