From c06c3da3c9394075650adcbf80c878a2b56364d2 Mon Sep 17 00:00:00 2001 From: Bodigrim Date: Wed, 14 Feb 2024 21:59:48 +0000 Subject: [PATCH 1/3] Do not use Data.ByteString.isValidUtf8 unless bytestring >= 0.12.1 (unreleased atm) --- src/Data/Text/Internal/Encoding.hs | 6 ++++-- src/Data/Text/Internal/Validate.hs | 5 ++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/Data/Text/Internal/Encoding.hs b/src/Data/Text/Internal/Encoding.hs index 59a01caf2..cb6bd7fd1 100644 --- a/src/Data/Text/Internal/Encoding.hs +++ b/src/Data/Text/Internal/Encoding.hs @@ -244,8 +244,10 @@ validateUtf8Chunk bs = validateUtf8ChunkFrom 0 bs (,) {-# INLINE validateUtf8ChunkFrom #-} validateUtf8ChunkFrom :: forall r. Int -> ByteString -> (Int -> Maybe Utf8State -> r) -> r validateUtf8ChunkFrom ofs bs k - -- B.isValidUtf8 is buggy before bytestring-0.11.5.0 -#if defined(SIMDUTF) || MIN_VERSION_bytestring(0,11,5) + -- B.isValidUtf8 is buggy before bytestring-0.11.5.3 / bytestring-0.12.1.0. + -- MIN_VERSION_bytestring does not allow us to differentiate + -- between 0.11.5.2 and 0.11.5.3 so no choice except demanding 0.12.1+. +#if defined(SIMDUTF) || MIN_VERSION_bytestring(0,12,1) | guessUtf8Boundary > 0 && -- the rest of the bytestring is valid utf-8 up to the boundary ( diff --git a/src/Data/Text/Internal/Validate.hs b/src/Data/Text/Internal/Validate.hs index e011e16e8..cc0e9cbaa 100644 --- a/src/Data/Text/Internal/Validate.hs +++ b/src/Data/Text/Internal/Validate.hs @@ -47,7 +47,10 @@ isValidUtf8ByteString :: ByteString -> Bool isValidUtf8ByteString bs = withBS bs $ \fp len -> unsafeDupablePerformIO $ unsafeWithForeignPtr fp $ \ptr -> (/= 0) <$> c_is_valid_utf8_ptr_unsafe ptr (fromIntegral len) #else -#if MIN_VERSION_bytestring(0,11,2) +-- B.isValidUtf8 is buggy before bytestring-0.11.5.3 / bytestring-0.12.1.0. +-- MIN_VERSION_bytestring does not allow us to differentiate +-- between 0.11.5.2 and 0.11.5.3 so no choice except demanding 0.12.1+. +#if MIN_VERSION_bytestring(0,12,1) isValidUtf8ByteString = B.isValidUtf8 #else isValidUtf8ByteString = N.isValidUtf8ByteStringHaskell From 29396ca0c0cc56ee73d45e16174704db7bf8e5c8 Mon Sep 17 00:00:00 2001 From: Bodigrim Date: Wed, 14 Feb 2024 22:05:32 +0000 Subject: [PATCH 2/3] Update changelog --- changelog.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/changelog.md b/changelog.md index b61749b1c..0e50c6876 100644 --- a/changelog.md +++ b/changelog.md @@ -1,3 +1,18 @@ +### 2.1.1 + +* Add pure Haskell implementations as an alternative to C-based ones, + suitable for JavaScript backend. + +* [Add type synonyms for lazy and strict text flavours](https://github.com/haskell/text/pull/547) + +* [Share empty `Text` values](https://github.com/haskell/text/pull/493) + +* [Fix bug in `isValidUtf8ByteArray`](https://github.com/haskell/text/pull/553) + +* [Optimize the implementation of `Data.Text.concat`](https://github.com/haskell/text/pull/551) + +* [Fix `filter/filter` rules for `Text` and lazy `Text`](https://github.com/haskell/text/pull/560) + ### 2.1 * [Switch `Data.Text.Array` to `Data.Array.Byte`](https://github.com/haskell/text/pull/474) From ebd445d09583776d21600d8c1459ace2f06e364b Mon Sep 17 00:00:00 2001 From: Bodigrim Date: Wed, 14 Feb 2024 22:05:54 +0000 Subject: [PATCH 3/3] Bump version to 2.1.1 --- text.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/text.cabal b/text.cabal index 3ddd4c8c5..af868d222 100644 --- a/text.cabal +++ b/text.cabal @@ -1,6 +1,6 @@ cabal-version: 2.2 name: text -version: 2.1 +version: 2.1.1 homepage: https://github.com/haskell/text bug-reports: https://github.com/haskell/text/issues