From caed573c98cd8bfe14fa4fbf793e30c42ebdcc6f Mon Sep 17 00:00:00 2001 From: William Rusnack Date: Sat, 22 Jun 2024 19:08:37 -0400 Subject: [PATCH] Integrate utf8 hPutStr to standard hPutStr (#589) * integrate utf8 hPutStr to standard hPutStr * comparing encoding pointers instead of strings of encoding names * Simplify import of utf8 and add a comment about pointer comparison in hPutStr * Add a comment about non-atomic B.hPutStrLn --------- Co-authored-by: Li-yao Xia Co-authored-by: Bodigrim --- src/Data/Text/IO.hs | 11 +-------- src/Data/Text/Internal/IO.hs | 47 +++++++++++++++++++++++++++++++----- 2 files changed, 42 insertions(+), 16 deletions(-) diff --git a/src/Data/Text/IO.hs b/src/Data/Text/IO.hs index 3d5a4446..64f1346c 100644 --- a/src/Data/Text/IO.hs +++ b/src/Data/Text/IO.hs @@ -51,8 +51,7 @@ import qualified Control.Exception as E import Control.Monad (liftM2, when) import Data.IORef (readIORef) import qualified Data.Text as T -import Data.Text.Internal.Fusion (stream, streamLn) -import Data.Text.Internal.IO (hGetLineWith, readChunk, hPutStream) +import Data.Text.Internal.IO (hGetLineWith, readChunk, hPutStr, hPutStrLn) import GHC.IO.Buffer (CharBuffer, isEmptyBuffer) import GHC.IO.Exception (IOException(ioe_type), IOErrorType(InappropriateType)) import GHC.IO.Handle.Internals (augmentIOError, hClose_help, wantReadableHandle) @@ -166,14 +165,6 @@ chooseGoodBuffering h = do hGetLine :: Handle -> IO Text hGetLine = hGetLineWith T.concat --- | Write a string to a handle. -hPutStr :: Handle -> Text -> IO () -hPutStr h = hPutStream h . stream - --- | Write a string to a handle, followed by a newline. -hPutStrLn :: Handle -> Text -> IO () -hPutStrLn h = hPutStream h . streamLn - -- | The 'interact' function takes a function of type @Text -> Text@ -- as its argument. The entire input from the standard input device is -- passed to this function as its argument, and the resulting string diff --git a/src/Data/Text/Internal/IO.hs b/src/Data/Text/Internal/IO.hs index 5c9fea8b..546d3ab6 100644 --- a/src/Data/Text/Internal/IO.hs +++ b/src/Data/Text/Internal/IO.hs @@ -1,4 +1,5 @@ {-# LANGUAGE BangPatterns, RecordWildCards #-} +{-# LANGUAGE MagicHash #-} -- | -- Module : Data.Text.Internal.IO -- Copyright : (c) 2009, 2010 Bryan O'Sullivan, @@ -19,16 +20,22 @@ module Data.Text.Internal.IO hGetLineWith , readChunk , hPutStream + , hPutStr + , hPutStrLn ) where import qualified Control.Exception as E +import qualified Data.ByteString as B +import Data.ByteString.Builder (hPutBuilder, charUtf8) import Data.IORef (readIORef, writeIORef) import Data.Text (Text) -import Data.Text.Internal.Fusion (unstream) +import Data.Text.Encoding (encodeUtf8, encodeUtf8Builder) +import Data.Text.Internal.Fusion (stream, streamLn, unstream) import Data.Text.Internal.Fusion.Types (Step(..), Stream(..)) import Data.Text.Internal.Fusion.Size (exactSize, maxSize) import Data.Text.Unsafe (inlinePerformIO) import Foreign.Storable (peekElemOff) +import GHC.Exts (reallyUnsafePtrEquality#, isTrue#) import GHC.IO.Buffer (Buffer(..), BufferState(..), CharBuffer, RawCharBuffer, bufferAdjustL, bufferElems, charSize, emptyBuffer, isEmptyBuffer, newCharBuffer, readCharBuf, withRawBuffer, @@ -37,7 +44,7 @@ import GHC.IO.Handle.Internals (ioe_EOF, readTextDevice, wantReadableHandle_, wantWritableHandle) import GHC.IO.Handle.Text (commitBuffer') import GHC.IO.Handle.Types (BufferList(..), BufferMode(..), Handle__(..), Newline(..)) -import System.IO (Handle, hPutChar) +import System.IO (Handle, hPutChar, utf8) import System.IO.Error (isEOFError) import qualified Data.Text as T @@ -168,17 +175,45 @@ readChunk hh@Handle__{..} buf = do -- | Print a @Stream Char@. hPutStream :: Handle -> Stream Char -> IO () --- This function is lifted almost verbatim from GHC.IO.Handle.Text. -hPutStream h str = do - (buffer_mode, nl) <- +hPutStream h str = hPutStreamOrUtf8 h str Nothing + +-- | Write a string to a handle. +hPutStr :: Handle -> Text -> IO () +hPutStr h t = hPutStreamOrUtf8 h (stream t) (Just putUtf8) + where + putUtf8 = B.hPutStr h (encodeUtf8 t) + +-- | Write a string to a handle, followed by a newline. +hPutStrLn :: Handle -> Text -> IO () +hPutStrLn h t = hPutStreamOrUtf8 h (streamLn t) (Just putUtf8) + where + -- Not using B.hPutStrLn because it's not necessarily atomic: + -- https://github.com/haskell/bytestring/issues/200 + putUtf8 = hPutBuilder h (encodeUtf8Builder t <> charUtf8 '\n') + +-- | 'hPutStream' with an optional special case when the output encoding is +-- UTF-8 and without newline conversion. +hPutStreamOrUtf8 :: Handle -> Stream Char -> Maybe (IO ()) -> IO () +-- This function is modified from GHC.IO.Handle.Text. +hPutStreamOrUtf8 h str mPutUtf8 = do + (buffer_mode, nl, isUtf8) <- wantWritableHandle "hPutStr" h $ \h_ -> do bmode <- getSpareBuffer h_ - return (bmode, haOutputNL h_) + return (bmode, haOutputNL h_, eqUTF8 h_) case buffer_mode of + _ | Just putUtf8 <- mPutUtf8, nl == LF && isUtf8 -> putUtf8 (NoBuffering, _) -> hPutChars h str (LineBuffering, buf) -> writeLines h nl buf str (BlockBuffering _, buf) -> writeBlocks (nl == CRLF) h buf str + where + -- If the encoding is UTF-8, it's most likely pointer-equal to + -- 'System.IO.utf8', letting us avoid a String comparison. + -- If it is somehow UTF-8 but not pointer-equal to 'utf8', + -- we will just take a slower branch, but the result is still correct. + eqUTF8 = maybe False (\enc -> isTrue# (reallyUnsafePtrEquality# utf8 enc)) . haCodec +{-# INLINE hPutStreamOrUtf8 #-} + hPutChars :: Handle -> Stream Char -> IO () hPutChars h (Stream next0 s0 _len) = loop s0 where