From 603dffc71543dbab4d02f777e3c3b0ab8058e445 Mon Sep 17 00:00:00 2001 From: William Rusnack Date: Wed, 29 May 2024 20:13:20 -0400 Subject: [PATCH 1/2] integrate utf8 hPutStr to standard hPutStr --- src/Data/Text/IO.hs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Data/Text/IO.hs b/src/Data/Text/IO.hs index a4569bc3..ba054acd 100644 --- a/src/Data/Text/IO.hs +++ b/src/Data/Text/IO.hs @@ -49,14 +49,17 @@ import System.IO (Handle, IOMode(..), hPutChar, openFile, stdin, stdout, withFile) import qualified Control.Exception as E import Control.Monad (liftM2, when) +import qualified Data.ByteString as B import Data.IORef (readIORef, writeIORef) import qualified Data.Text as T +import Data.Text.Encoding (encodeUtf8) import Data.Text.Internal.Fusion (stream) import Data.Text.Internal.Fusion.Types (Step(..), Stream(..)) import Data.Text.Internal.IO (hGetLineWith, readChunk) import GHC.IO.Buffer (Buffer(..), BufferState(..), RawCharBuffer, CharBuffer, emptyBuffer, isEmptyBuffer, newCharBuffer) import qualified GHC.IO.Buffer +import GHC.IO.Encoding (textEncodingName) import GHC.IO.Exception (IOException(ioe_type), IOErrorType(InappropriateType)) import GHC.IO.Handle.Internals (augmentIOError, hClose_help, wantReadableHandle, wantWritableHandle) @@ -176,12 +179,14 @@ hGetLine = hGetLineWith T.concat hPutStr :: Handle -> Text -> IO () -- This function is lifted almost verbatim from GHC.IO.Handle.Text. hPutStr h t = do - (buffer_mode, nl) <- + (buffer_mode, nl, isUTF8) <- wantWritableHandle "hPutStr" h $ \h_ -> do bmode <- getSpareBuffer h_ - return (bmode, haOutputNL h_) + let isUTF8 = maybe False ((== "UTF-8") . textEncodingName) $ haCodec h_ + return (bmode, haOutputNL h_, isUTF8) let str = stream t case buffer_mode of + _ | nl == LF && isUTF8 -> B.hPutStr h $ encodeUtf8 t (NoBuffering, _) -> hPutChars h str (LineBuffering, buf) -> writeLines h nl buf str (BlockBuffering _, buf) -> writeBlocks (nl == CRLF) h buf str From b56f07280c1463c59bfcaa6be2f55a9f601d1407 Mon Sep 17 00:00:00 2001 From: William Rusnack Date: Sat, 1 Jun 2024 09:24:40 -0400 Subject: [PATCH 2/2] comparing encoding pointers instead of strings of encoding names --- src/Data/Text/IO.hs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/Data/Text/IO.hs b/src/Data/Text/IO.hs index ba054acd..c6fa7706 100644 --- a/src/Data/Text/IO.hs +++ b/src/Data/Text/IO.hs @@ -1,5 +1,6 @@ {-# LANGUAGE BangPatterns, CPP, RecordWildCards, ScopedTypeVariables #-} {-# LANGUAGE Trustworthy #-} +{-# LANGUAGE MagicHash #-} -- | -- Module : Data.Text.IO -- Copyright : (c) 2009, 2010 Bryan O'Sullivan, @@ -56,10 +57,11 @@ import Data.Text.Encoding (encodeUtf8) import Data.Text.Internal.Fusion (stream) import Data.Text.Internal.Fusion.Types (Step(..), Stream(..)) import Data.Text.Internal.IO (hGetLineWith, readChunk) +import GHC.Exts (reallyUnsafePtrEquality#, isTrue#) import GHC.IO.Buffer (Buffer(..), BufferState(..), RawCharBuffer, CharBuffer, emptyBuffer, isEmptyBuffer, newCharBuffer) import qualified GHC.IO.Buffer -import GHC.IO.Encoding (textEncodingName) +import GHC.IO.Encoding (utf8) import GHC.IO.Exception (IOException(ioe_type), IOErrorType(InappropriateType)) import GHC.IO.Handle.Internals (augmentIOError, hClose_help, wantReadableHandle, wantWritableHandle) @@ -182,8 +184,7 @@ hPutStr h t = do (buffer_mode, nl, isUTF8) <- wantWritableHandle "hPutStr" h $ \h_ -> do bmode <- getSpareBuffer h_ - let isUTF8 = maybe False ((== "UTF-8") . textEncodingName) $ haCodec h_ - return (bmode, haOutputNL h_, isUTF8) + return (bmode, haOutputNL h_, eqUTF8 h_) let str = stream t case buffer_mode of _ | nl == LF && isUTF8 -> B.hPutStr h $ encodeUtf8 t @@ -191,6 +192,9 @@ hPutStr h t = do (LineBuffering, buf) -> writeLines h nl buf str (BlockBuffering _, buf) -> writeBlocks (nl == CRLF) h buf str + where + eqUTF8 = maybe False (\enc -> isTrue# (reallyUnsafePtrEquality# utf8 enc)) . haCodec + hPutChars :: Handle -> Stream Char -> IO () hPutChars h (Stream next0 s0 _len) = loop s0 where