Skip to content

Commit 9fc7dc1

Browse files
committed
Decode as UTF-8 in ExitCodeException's Show instance
Partial fix for #86. This isn't perfect (for retrocomputing you may want another character encoding) but improves the behavior in many circumstances.
1 parent 685a67a commit 9fc7dc1

File tree

3 files changed

+117
-6
lines changed

3 files changed

+117
-6
lines changed

package.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ dependencies:
2121
- stm
2222
- transformers
2323
- unliftio-core
24+
- text
2425

2526
library:
2627
source-dirs: src

src/System/Process/Typed/Internal.hs

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ import qualified Control.Exception as E
1717
import Control.Exception hiding (bracket, finally, handle)
1818
import Control.Monad (void)
1919
import qualified System.Process as P
20+
import qualified Data.Text as T
21+
import Data.Text.Encoding.Error (lenientDecode)
22+
import qualified Data.Text.Lazy as TL (toStrict)
23+
import qualified Data.Text.Lazy.Encoding as TLE
2024
import Data.Typeable (Typeable)
2125
import System.IO (Handle, hClose, IOMode(ReadWriteMode), withBinaryFile)
2226
import Control.Concurrent.Async (async)
@@ -616,17 +620,20 @@ data ExitCodeException = ExitCodeException
616620
instance Exception ExitCodeException
617621
instance Show ExitCodeException where
618622
show ece =
619-
let stdout = L8.unpack $ eceStdout ece
620-
stderr = L8.unpack $ eceStderr ece
621-
stdout' = if L.null (eceStdout ece)
623+
let decode = TL.toStrict . TLE.decodeUtf8With lenientDecode
624+
625+
stdout = decode $ eceStdout ece
626+
stderr = decode $ eceStderr ece
627+
628+
stdout' = if T.null stdout
622629
then []
623630
else [ "\n\nStandard output:\n"
624-
, stdout
631+
, T.unpack stdout
625632
]
626-
stderr' = if L.null (eceStderr ece)
633+
stderr' = if T.null stderr
627634
then []
628635
else [ "\nStandard error:\n"
629-
, stderr
636+
, T.unpack stderr
630637
]
631638
in concat $
632639
[ "Received "

test/System/Process/TypedSpec.hs

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,3 +341,106 @@ spec = do
341341
++ "puppy\n"
342342
++ "Standard error:\n"
343343
++ "doggy"
344+
345+
it "decodes UTF-8" $ do
346+
let exitCodeException =
347+
ExitCodeException
348+
{ eceExitCode = ExitFailure 1
349+
, eceProcessConfig = proc "puppy" []
350+
, eceStdout = L.pack [0x61, 0xc2, 0xa9, 0xe2, 0x82, 0xac, 0xf0, 0x9f, 0x92, 0xa9, 0x0a]
351+
, eceStderr = L.pack [0x61, 0xc2, 0xa9, 0xe2, 0x82, 0xac, 0xf0, 0x9f, 0x92, 0xa9, 0x0a]
352+
}
353+
show exitCodeException `shouldBe`
354+
"Received ExitFailure 1 when running\n"
355+
++ "Raw command: puppy\n"
356+
++ "\n"
357+
++ "Standard output:\n"
358+
++ "a©€💩\n"
359+
++ "\n"
360+
++ "Standard error:\n"
361+
++ "a©€💩\n"
362+
363+
it "decodes UTF-8 leniently (overlong)" $ do
364+
let exitCodeException =
365+
ExitCodeException
366+
{ eceExitCode = ExitFailure 1
367+
, eceProcessConfig = proc "puppy" []
368+
, -- Overlong sequence, U+20AC € encoded as 4 bytes.
369+
-- We get four U+FFFD � replacement characters out, one
370+
-- for each byte in the sequence.
371+
eceStdout = L.pack [ 0xf0, 0x82, 0x82, 0xac, 0x0a ]
372+
, eceStderr = L.empty
373+
}
374+
show exitCodeException `shouldBe`
375+
"Received ExitFailure 1 when running\n"
376+
++ "Raw command: puppy\n"
377+
++ "\n"
378+
++ "Standard output:\n"
379+
++ "����\n"
380+
381+
it "decodes UTF-8 leniently (lone surrogate)" $ do
382+
let exitCodeException =
383+
ExitCodeException
384+
{ eceExitCode = ExitFailure 1
385+
, eceProcessConfig = proc "puppy" []
386+
, -- Half of a surrogate pair, invalid in UTF-8. (U+D800)
387+
eceStdout = L.pack [ 0xed, 0xa0, 0x80, 0x0a]
388+
, eceStderr = L.empty
389+
}
390+
show exitCodeException `shouldBe`
391+
"Received ExitFailure 1 when running\n"
392+
++ "Raw command: puppy\n"
393+
++ "\n"
394+
++ "Standard output:\n"
395+
++ "���\n"
396+
397+
it "decodes UTF-8 leniently (unexpected continuation)" $ do
398+
let exitCodeException =
399+
ExitCodeException
400+
{ eceExitCode = ExitFailure 1
401+
, eceProcessConfig = proc "puppy" []
402+
, -- An unexpected continuation byte.
403+
eceStdout = L.pack [ 0xa0, 0x80, 0x0a]
404+
, eceStderr = L.empty
405+
}
406+
show exitCodeException `shouldBe`
407+
"Received ExitFailure 1 when running\n"
408+
++ "Raw command: puppy\n"
409+
++ "\n"
410+
++ "Standard output:\n"
411+
++ "��\n"
412+
413+
it "decodes UTF-8 leniently (missing continuation)" $ do
414+
let exitCodeException =
415+
ExitCodeException
416+
{ eceExitCode = ExitFailure 1
417+
, eceProcessConfig = proc "puppy" []
418+
, -- Missing a continuation byte.
419+
eceStdout = L.pack [ 0xf0, 0x9f, 0x90, 0x0a]
420+
, eceStderr = L.empty
421+
}
422+
show exitCodeException `shouldBe`
423+
"Received ExitFailure 1 when running\n"
424+
++ "Raw command: puppy\n"
425+
++ "\n"
426+
++ "Standard output:\n"
427+
++ "���\n"
428+
429+
it "decodes UTF-8 leniently (invalid byte)" $ do
430+
let exitCodeException =
431+
ExitCodeException
432+
{ eceExitCode = ExitFailure 1
433+
, eceProcessConfig = proc "puppy" []
434+
, -- Invalid bytes (no defined meaning in UTF-8).
435+
eceStdout = L.pack [ 0xc0, 0x0a, 0xc1, 0x0a, 0xf5, 0x0a, 0xff, 0x0a]
436+
, eceStderr = L.empty
437+
}
438+
show exitCodeException `shouldBe`
439+
"Received ExitFailure 1 when running\n"
440+
++ "Raw command: puppy\n"
441+
++ "\n"
442+
++ "Standard output:\n"
443+
++ "\n"
444+
++ "\n"
445+
++ "\n"
446+
++ "\n"

0 commit comments

Comments
 (0)