From 43c74f9d92a3a5ab388cfaccef228382b057f9bb Mon Sep 17 00:00:00 2001
From: Matthew Craven <clyring@gmail.com>
Date: Fri, 12 Sep 2025 15:03:48 -0400
Subject: [PATCH 1/6] Add a few basic tests for unboxing and RULES working

This uses tasty-inspection-testing, which is a plugin and so
introduces a circular dependency:
   bytestring -> plugin -> ghc -> bytestring
This is worked around with an ugly but simple hack:
Guard the part of the testsuite that uses the plugin behind a flag,
and change the name of the (local) package before trying to build
the testsuite with that flag enabled.
---
 .github/workflows/ci.yml     | 23 ++++++++-
 bytestring.cabal             | 25 ++++++++-
 run-plugin-tests.sh          | 11 ++++
 tests/Main.hs                |  8 +++
 tests/PluginTests.hs         | 98 ++++++++++++++++++++++++++++++++++++
 tests/PluginTests/Splices.hs | 45 +++++++++++++++++
 6 files changed, 207 insertions(+), 3 deletions(-)
 create mode 100644 run-plugin-tests.sh
 create mode 100644 tests/PluginTests.hs
 create mode 100644 tests/PluginTests/Splices.hs

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index b683dfc40..315bbfe55 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -158,7 +158,7 @@ jobs:
         path: |
           ${{ steps.setup-haskell-cabal.outputs.cabal-store }}
           dist-newstyle
-        key: ${{ runner.os }}-latest
+        key: ${{ runner.os }}-latest-bounds-checking
     - name: Test
       run: cabal test --ghc-options='-fcheck-prim-bounds -fno-ignore-asserts -DHS_BYTESTRING_ASSERTIONS'
 
@@ -183,6 +183,27 @@ jobs:
     - name: Test
       run: cabal test -fpure-haskell --ghc-options=-fno-ignore-asserts --enable-tests --test-show-details=direct all
 
+  inspection-testing:
+    needs: build
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+    - uses: haskell-actions/setup@v2
+      id: setup-haskell-cabal
+      with:
+        ghc-version: 'latest'
+    - name: Update cabal package database
+      run: cabal update
+    - uses: actions/cache@v3
+      name: Cache cabal stuff
+      with:
+        path: |
+          ${{ steps.setup-haskell-cabal.outputs.cabal-store }}
+          dist-newstyle
+        key: ${{ runner.os }}-latest-inspection-testing
+    - name: Test
+      run: sh run-plugin-tests.sh
+
   i386:
     needs: build
     runs-on: ubuntu-latest
diff --git a/bytestring.cabal b/bytestring.cabal
index dcdd35a0c..35e6e5ac1 100644
--- a/bytestring.cabal
+++ b/bytestring.cabal
@@ -80,6 +80,14 @@ Flag pure-haskell
   default: False
   manual: True
 
+Flag plugin-tests
+  description: Also build and run the tests that use plugins.
+
+    (Due to circular dependencies, these tests currently
+    cannot be built without renaming the library.)
+  default: False
+  manual: True
+
 source-repository head
   type:     git
   location: https://github.com/haskell/bytestring
@@ -216,16 +224,27 @@ test-suite bytestring-tests
                     QuickCheckUtils
   hs-source-dirs:   tests,
                     tests/builder
+  build-depends:    bytestring
+                    -- Keep 'bytestring' on the same line as 'build-depends:'
+                    -- this is used by our hack to allow plugin tests
   build-depends:    base,
-                    bytestring,
                     deepseq,
                     QuickCheck,
                     tasty,
+                    tasty-expected-failure ^>= 0.12.3,
                     tasty-quickcheck >= 0.8.1,
                     template-haskell,
                     transformers >= 0.3,
                     syb
 
+  if flag(plugin-tests)
+    cpp-options:   -DBYTESTRING_PLUGIN_TESTS=1
+    build-depends: tasty-inspection-testing ^>= 0.2.1
+    other-modules: PluginTests
+                   PluginTests.Splices
+  else
+    cpp-options:   -DBYTESTRING_PLUGIN_TESTS=0
+
   ghc-options:      -fwarn-unused-binds
                     -rtsopts
   if !arch(wasm32)
@@ -249,8 +268,10 @@ benchmark bytestring-bench
 
   ghc-options:      -O2 "-with-rtsopts=-A32m"
                     -fproc-alignment=64
+  build-depends:    bytestring
+                    -- Keep 'bytestring' on the same line as 'build-depends:'
+                    -- this is used by our hack to allow plugin tests
   build-depends:    base,
-                    bytestring,
                     deepseq,
                     tasty-bench,
                     random
diff --git a/run-plugin-tests.sh b/run-plugin-tests.sh
new file mode 100644
index 000000000..7720d1bd9
--- /dev/null
+++ b/run-plugin-tests.sh
@@ -0,0 +1,11 @@
+# small script to hackily work around the dependency cycle
+# 'bytestring -> [plugin] -> ghc -> bytestring' that prevents
+# the testsuite from using plugins, by renaming the library
+# to 'bytestring-plugins-hack'
+
+sed -E '/Name:|build-depends:/s/bytestring/bytestring-plugins-hack/' bytestring.cabal > bytestring-plugins-hack.cabal
+
+mv bytestring.cabal bytestring.cabal.__BACKUP__
+cabal test -fplugin-tests --test-show-details=direct "$@"
+mv bytestring.cabal.__BACKUP__ bytestring.cabal
+rm bytestring-plugins-hack.cabal
diff --git a/tests/Main.hs b/tests/Main.hs
index 043b5c415..b519e086c 100644
--- a/tests/Main.hs
+++ b/tests/Main.hs
@@ -1,3 +1,5 @@
+{-# LANGUAGE CPP #-}
+
 module Main (main) where
 
 import Test.Tasty
@@ -7,6 +9,9 @@ import qualified IsValidUtf8
 import qualified LazyHClose
 import qualified Lift
 import qualified Properties
+#if BYTESTRING_PLUGIN_TESTS
+import qualified PluginTests
+#endif
 
 main :: IO ()
 main = defaultMain $ testGroup "All"
@@ -15,4 +20,7 @@ main = defaultMain $ testGroup "All"
   , LazyHClose.testSuite
   , Lift.testSuite
   , Properties.testSuite
+#if BYTESTRING_PLUGIN_TESTS
+  , PluginTests.testSuite
+#endif
   ]
diff --git a/tests/PluginTests.hs b/tests/PluginTests.hs
new file mode 100644
index 000000000..70ceec904
--- /dev/null
+++ b/tests/PluginTests.hs
@@ -0,0 +1,98 @@
+{-# LANGUAGE TemplateHaskell #-}
+
+{-# OPTIONS_GHC -O -dsuppress-all -dno-suppress-type-signatures -fplugin=Test.Tasty.Inspection.Plugin #-}
+
+module PluginTests (testSuite) where
+
+import qualified Data.ByteString as S
+import qualified Data.ByteString.Char8 as S8
+import qualified Data.ByteString.Lazy as BL
+import qualified Data.ByteString.Lazy.Char8 as BL8
+import Data.ByteString.Builder
+import Data.Word
+
+import Test.Tasty
+import Test.Tasty.ExpectedFailure
+import Test.Tasty.Inspection
+
+import PluginTests.Splices
+
+testSuite :: TestTree
+testSuite = testGroup "Inspection plugin tests"
+  [ testGroup "Literals" $
+    [ testGroup "StrictByteString"
+      [ $(hasNoStringyStuff 'pack_strict_foo)
+      , $(inspectTest $ 'len_pack_strict_foo === 'literal_three)
+      , $(hasNoStringyStuff 'pack_strict_literal)
+      , $(inspectTest $ 'len_pack_strict_literal === 'literal_thirtyOne)
+      , expectFail $ $(hasNoStringyStuff 'pack_strict_nonAscii)
+      , expectFail $ $(hasNoStringyStuff 'pack_strict_literal_nonAscii)
+      ]
+
+    , testGroup "Builder"
+      [ $(hasNoStringyStuff 'builder_string8_foo)
+      , $(hasNoStringyStuff 'builder_string8_literal)
+      , $(hasNoStringyStuff 'builder_stringUtf8_foo)
+      , $(hasNoStringyStuff 'builder_stringUtf8_literal)
+      , $(hasNoStringyStuff 'builder_stringUtf8_nonAscii)
+      , $(hasNoStringyStuff 'builder_stringUtf8_literal_nonAscii)
+      ]
+    ]
+
+  , $(inspectTest $ 'append_pack_replicate_unboxing `hasNoType` ''S.ByteString)
+  ]
+
+foo_string_literal :: [Char]
+foo_string_literal = "foo"
+
+unicode_string_literal :: [Char]
+unicode_string_literal = "\0example\0  ... \xff \x1f530"
+
+pack_strict_foo :: S.ByteString
+pack_strict_foo = S8.pack foo_string_literal
+
+len_pack_strict_foo :: Int
+len_pack_strict_foo = S.length pack_strict_foo
+
+pack_strict_literal :: S.ByteString
+pack_strict_literal = S8.pack "some ascii literal of length 31"
+
+len_pack_strict_literal :: Int
+len_pack_strict_literal = S.length pack_strict_literal
+
+pack_strict_nonAscii :: S.ByteString
+pack_strict_nonAscii = S8.pack unicode_string_literal
+
+pack_strict_literal_nonAscii :: S.ByteString
+pack_strict_literal_nonAscii
+  = S8.pack "this\0literal contains\x80\xf0\xff non-ascii characters"
+
+literal_three :: Int
+literal_three = 3
+
+literal_thirtyOne :: Int
+literal_thirtyOne = 31
+
+builder_string8_foo :: Builder
+builder_string8_foo = string8 foo_string_literal
+
+builder_string8_literal :: Builder
+builder_string8_literal = string8 "some ascii string literal"
+
+builder_stringUtf8_foo :: Builder
+builder_stringUtf8_foo = stringUtf8 foo_string_literal
+
+builder_stringUtf8_literal :: Builder
+builder_stringUtf8_literal = stringUtf8 "some other ascii string literal"
+
+builder_stringUtf8_nonAscii :: Builder
+builder_stringUtf8_nonAscii = stringUtf8 unicode_string_literal
+
+builder_stringUtf8_literal_nonAscii :: Builder
+builder_stringUtf8_literal_nonAscii
+  = stringUtf8 "inline literal string containing \0special\0 and non-ASCII characters like \x2139"
+
+append_pack_replicate_unboxing :: Int -> Word8 -> Int
+append_pack_replicate_unboxing n c
+  = S.count c $ S.append (S.pack [0..c]) (S.replicate n c)
+
diff --git a/tests/PluginTests/Splices.hs b/tests/PluginTests/Splices.hs
new file mode 100644
index 000000000..7cd30f5a0
--- /dev/null
+++ b/tests/PluginTests/Splices.hs
@@ -0,0 +1,45 @@
+{-# LANGUAGE TemplateHaskellQuotes #-}
+
+module PluginTests.Splices where
+
+import Test.Tasty
+import Test.Tasty.Inspection
+import Language.Haskell.TH
+
+import GHC.Base
+  ( unpackCString#
+  , unpackAppendCString#
+  , unpackCStringUtf8#
+  , unpackAppendCStringUtf8#
+  , unpackNBytes#
+  , unpackFoldrCString#
+  , unpackFoldrCStringUtf8#
+  )
+import Language.Haskell.TH (Name)
+
+unpackCString_functions_without_foldr :: [Name]
+unpackCString_functions_without_foldr =
+  [ 'unpackCString#
+  , 'unpackAppendCString#
+  , 'unpackCStringUtf8#
+  , 'unpackAppendCStringUtf8#
+  , 'unpackNBytes#
+  ]
+
+unpackCString_functions_all :: [Name]
+unpackCString_functions_all =
+  [ 'unpackFoldrCString#
+  , 'unpackFoldrCStringUtf8#
+  ] ++ unpackCString_functions_without_foldr
+
+hasNoStringyStuff :: Name -> Q Exp
+hasNoStringyStuff n = flip inspectObligations n
+  [ (`hasNoTypes` [''Char, ''[]])
+  , (`doesNotUseAnyOf` unpackCString_functions_all)
+  ]
+
+hasNoStringyStuffExceptFolds :: Name -> Q Exp
+hasNoStringyStuffExceptFolds n = flip inspectObligations n
+  [ (`hasNoTypes` [''Char, ''[]])
+  , (`doesNotUseAnyOf` unpackCString_functions_without_foldr)
+  ]

From dbfbc9c15c23e9364f31daa162c1280b4bdc35df Mon Sep 17 00:00:00 2001
From: Matthew Craven <clyring@gmail.com>
Date: Sat, 13 Sep 2025 20:16:11 -0400
Subject: [PATCH 2/6] Replace 'plugin-tests' flag with more sed hackery

---
 bytestring.cabal    | 12 +++---------
 run-plugin-tests.sh | 11 +++++++----
 2 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/bytestring.cabal b/bytestring.cabal
index 35e6e5ac1..62abbb4b1 100644
--- a/bytestring.cabal
+++ b/bytestring.cabal
@@ -80,14 +80,6 @@ Flag pure-haskell
   default: False
   manual: True
 
-Flag plugin-tests
-  description: Also build and run the tests that use plugins.
-
-    (Due to circular dependencies, these tests currently
-    cannot be built without renaming the library.)
-  default: False
-  manual: True
-
 source-repository head
   type:     git
   location: https://github.com/haskell/bytestring
@@ -237,7 +229,9 @@ test-suite bytestring-tests
                     transformers >= 0.3,
                     syb
 
-  if flag(plugin-tests)
+  -- The following intentionally-funnily-spelled condition
+  -- is changed to 'true' by our hack to allow plugin-based tests
+  if false && impl(pluginTestsHack)
     cpp-options:   -DBYTESTRING_PLUGIN_TESTS=1
     build-depends: tasty-inspection-testing ^>= 0.2.1
     other-modules: PluginTests
diff --git a/run-plugin-tests.sh b/run-plugin-tests.sh
index 7720d1bd9..4f1bc306e 100644
--- a/run-plugin-tests.sh
+++ b/run-plugin-tests.sh
@@ -3,9 +3,12 @@
 # the testsuite from using plugins, by renaming the library
 # to 'bytestring-plugins-hack'
 
-sed -E '/Name:|build-depends:/s/bytestring/bytestring-plugins-hack/' bytestring.cabal > bytestring-plugins-hack.cabal
+sed -E '
+  /Name:|build-depends:/s/bytestring/bytestring-plugins-hack/ ;
+  s/if false && impl\(pluginTestsHack\)/if true/' \
+    bytestring.cabal > bytestring-plugins-hack.cabal
 
-mv bytestring.cabal bytestring.cabal.__BACKUP__
-cabal test -fplugin-tests --test-show-details=direct "$@"
-mv bytestring.cabal.__BACKUP__ bytestring.cabal
+mv bytestring.cabal bytestring.cabal.__MOVED_DURING_PLUGIN_TESTS__
+cabal test --test-show-details=direct "$@"
+mv bytestring.cabal.__MOVED_DURING_PLUGIN_TESTS__ bytestring.cabal
 rm bytestring-plugins-hack.cabal

From 62d6da3604c5160be10da7190a33bebbe19a4d8e Mon Sep 17 00:00:00 2001
From: Matthew Craven <clyring@gmail.com>
Date: Sat, 13 Sep 2025 20:18:42 -0400
Subject: [PATCH 3/6] Fix emulated CI jobs

---
 .github/workflows/ci.yml | 2 +-
 bytestring.cabal         | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 315bbfe55..92b8d4138 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -138,7 +138,7 @@ jobs:
           apt-get install -y ghc libghc-tasty-quickcheck-dev libghc-syb-dev
         run: |
           ghc --version
-          ghc --make -fPIC -XHaskell2010 -XBangPatterns -XDeriveDataTypeable -XDeriveGeneric -XDeriveLift -XFlexibleContexts -XFlexibleInstances -XLambdaCase -XMagicHash -XMultiWayIf -XNamedFieldPuns -XPatternSynonyms -XRankNTypes -XScopedTypeVariables -XStandaloneDeriving -XTupleSections -XTypeApplications -XTypeOperators -XUnboxedTuples -optP-Wall -optP-Werror=undef -DPURE_HASKELL=0 -Iinclude -itests:tests/builder -o Main cbits/*.c tests/Main.hs +RTS -s
+          ghc --make -fPIC -XHaskell2010 -XBangPatterns -XDeriveDataTypeable -XDeriveGeneric -XDeriveLift -XFlexibleContexts -XFlexibleInstances -XLambdaCase -XMagicHash -XMultiWayIf -XNamedFieldPuns -XPatternSynonyms -XRankNTypes -XScopedTypeVariables -XStandaloneDeriving -XTupleSections -XTypeApplications -XTypeOperators -XUnboxedTuples -optP-Wall -optP-Werror=undef -DPURE_HASKELL=0 -DBYTESTRING_PLUGIN_TESTS=0 -Iinclude -itests:tests/builder -o Main cbits/*.c tests/Main.hs +RTS -s
           ./Main +RTS -s
 
   bounds-checking:
diff --git a/bytestring.cabal b/bytestring.cabal
index 62abbb4b1..06305d649 100644
--- a/bytestring.cabal
+++ b/bytestring.cabal
@@ -223,7 +223,6 @@ test-suite bytestring-tests
                     deepseq,
                     QuickCheck,
                     tasty,
-                    tasty-expected-failure ^>= 0.12.3,
                     tasty-quickcheck >= 0.8.1,
                     template-haskell,
                     transformers >= 0.3,
@@ -233,7 +232,8 @@ test-suite bytestring-tests
   -- is changed to 'true' by our hack to allow plugin-based tests
   if false && impl(pluginTestsHack)
     cpp-options:   -DBYTESTRING_PLUGIN_TESTS=1
-    build-depends: tasty-inspection-testing ^>= 0.2.1
+    build-depends: tasty-inspection-testing ^>= 0.2.1,
+                   tasty-expected-failure ^>= 0.12.3
     other-modules: PluginTests
                    PluginTests.Splices
   else

From af12adcb35d8171618b7b56a9375a820442042ee Mon Sep 17 00:00:00 2001
From: Matthew Craven <clyring@gmail.com>
Date: Sat, 13 Sep 2025 20:18:57 -0400
Subject: [PATCH 4/6] fiddle with wording

---
 bytestring.cabal | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bytestring.cabal b/bytestring.cabal
index 06305d649..09879788c 100644
--- a/bytestring.cabal
+++ b/bytestring.cabal
@@ -218,7 +218,7 @@ test-suite bytestring-tests
                     tests/builder
   build-depends:    bytestring
                     -- Keep 'bytestring' on the same line as 'build-depends:'
-                    -- this is used by our hack to allow plugin tests
+                    -- this is used by our hack to allow plugin-based tests
   build-depends:    base,
                     deepseq,
                     QuickCheck,
@@ -264,7 +264,7 @@ benchmark bytestring-bench
                     -fproc-alignment=64
   build-depends:    bytestring
                     -- Keep 'bytestring' on the same line as 'build-depends:'
-                    -- this is used by our hack to allow plugin tests
+                    -- this is used by our hack to allow plugin-based tests
   build-depends:    base,
                     deepseq,
                     tasty-bench,

From 1f4666e2ccfa6c28eb8dc97b1d5d7cb4ad609170 Mon Sep 17 00:00:00 2001
From: Viktor Dukhovni <ietf-dane@dukhovni.org>
Date: Thu, 12 Jan 2023 21:59:41 -0500
Subject: [PATCH 5/6] Avoid per-byte loop in cstring{,Utf8} builders
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Copy chunks of the input to the output buffer with, up to the shorter
of the available buffer space and the "null-free" portion of the remaining
string.  Actually "null-free" here means not containing any denormalised
two-byte encodings starting with 0xC0 (so possibly also other ASCII
bytes if the UTF-8 encoding is oddball).

This substantially improves performance, with just one "15%" increase
that looks like a spurious measurement error (perhaps code layout
difference artefact).

      UTF-8 String (12B):                            OK
        16.7 ns ± 1.3 ns, 60% less than baseline
      UTF-8 String (64B, one null):                  OK
        22.6 ns ± 1.3 ns, 87% less than baseline
      UTF-8 String (64B, one null, no shared work):  OK
        30.1 ns ± 2.6 ns, 83% less than baseline
      UTF-8 String (64B, half nulls):                OK
        92.6 ns ± 5.3 ns, 49% less than baseline
      UTF-8 String (64B, all nulls):                 OK
        76.3 ns ± 4.5 ns, 57% less than baseline
      UTF-8 String (64B, all nulls, no shared work): OK
        82.3 ns ± 5.6 ns, 54% less than baseline
      ASCII String (12B):                            OK
        6.50 ns ± 326 ps, 76% less than baseline
      ASCII String (64B):                            OK
        8.03 ns ± 334 ps, 94% less than baseline
      AsciiLit:                                      OK
        8.02 ns ± 648 ps, 94% less than baseline
      Utf8Lit:                                       OK
        21.8 ns ± 1.3 ns, 88% less than baseline
      strLit:                                        OK
        8.90 ns ± 788 ps, 94% less than baseline
      stringUtf8:                                    OK
        22.4 ns ± 1.3 ns, 87% less than baseline
      strLitInline:                                  OK
        8.26 ns ± 676 ps, 94% less than baseline
      utf8LitInline:                                 OK
        23.2 ns ± 1.3 ns, 87% less than baseline
      foldMap byteStringInsert (10000):              OK
        46.0 μs ± 4.0 μs, 15% less than baseline
-->   lazyByteStringHex (10000):                     OK
-->     4.74 μs ± 337 ns, 15% more than baseline
      foldMap integerDec (small) (10000):            OK
        205  μs ±  12 μs,  9% less than baseline
    char8 (10000):                                   OK
      2.58 μs ± 234 ns, 30% less than baseline
      foldMap (left-assoc) (10000):                  OK
        73.2 μs ± 2.9 μs, 54% less than baseline
      foldMap (right-assoc) (10000):                 OK
        43.0 μs ± 4.2 μs, 65% less than baseline
      foldMap [manually fused, left-assoc] (10000):  OK
        81.4 μs ± 5.3 μs, 48% less than baseline
      foldMap [manually fused, right-assoc] (10000): OK
        47.3 μs ± 785 ns, 61% less than baseline
---
 Data/ByteString/Builder/Internal.hs           | 110 +++++++++++++++++-
 Data/ByteString/Builder/Prim.hs               |  77 +++++-------
 bench/BenchAll.hs                             |   6 +
 .../Data/ByteString/Builder/Prim/Tests.hs     |  22 +++-
 4 files changed, 160 insertions(+), 55 deletions(-)

diff --git a/Data/ByteString/Builder/Internal.hs b/Data/ByteString/Builder/Internal.hs
index 8bb6278bd..a699a545c 100644
--- a/Data/ByteString/Builder/Internal.hs
+++ b/Data/ByteString/Builder/Internal.hs
@@ -1,6 +1,8 @@
-{-# LANGUAGE Unsafe #-}
-{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE MagicHash #-}
 {-# LANGUAGE NoMonoLocalBinds #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE Unsafe #-}
+{-# LANGUAGE ViewPatterns #-}
 
 {-# OPTIONS_HADDOCK not-home #-}
 
@@ -87,6 +89,8 @@ module Data.ByteString.Builder.Internal (
   -- , sizedChunksInsert
 
   , byteStringCopy
+  , asciiLiteralCopy
+  , modUtf8LitCopy
   , byteStringInsert
   , byteStringThreshold
 
@@ -816,6 +820,7 @@ ensureFree :: Int -> Builder
 ensureFree minFree =
     builder step
   where
+    step :: forall r. BuildStep r -> BuildStep r
     step k br@(BufferRange op ope)
       | ope `minusPtr` op < minFree = return $ bufferFull minFree op k
       | otherwise                   = k br
@@ -839,6 +844,25 @@ wrappedBytesCopyStep bs0 k =
       where
         outRemaining = ope `minusPtr` op
 
+-- | Copy the bytes from a 'BufferRange' into the output stream.
+wrappedBufferRangeCopyStep :: BufferRange  -- ^ Input 'BufferRange'.
+                           -> BuildStep a -> BuildStep a
+wrappedBufferRangeCopyStep (BufferRange ip0 ipe) k =
+    go ip0
+  where
+    go !ip (BufferRange op ope)
+      | inpRemaining <= outRemaining = do
+          copyBytes op ip inpRemaining
+          let !br' = BufferRange (op `plusPtr` inpRemaining) ope
+          k br'
+      | otherwise = do
+          copyBytes op ip outRemaining
+          let !ip' = ip `plusPtr` outRemaining
+          return $ bufferFull 1 ope (go ip')
+      where
+        outRemaining = ope `minusPtr` op
+        inpRemaining = ipe `minusPtr` ip
+
 
 -- Strict ByteStrings
 ------------------------------------------------------------------------------
@@ -858,6 +882,7 @@ byteStringThreshold :: Int -> S.StrictByteString -> Builder
 byteStringThreshold maxCopySize =
     \bs -> builder $ step bs
   where
+    step :: forall r. S.ByteString -> BuildStep r -> BuildStep r
     step bs@(S.BS _ len) k br@(BufferRange !op _)
       | len <= maxCopySize = byteStringCopyStep bs k br
       | otherwise          = return $ insertChunk op bs k
@@ -949,6 +974,87 @@ byteStringInsert :: S.StrictByteString -> Builder
 byteStringInsert =
     \bs -> builder $ \k (BufferRange op _) -> return $ insertChunk op bs k
 
+
+------------------------------------------------------------------------------
+-- Raw CString encoding
+------------------------------------------------------------------------------
+
+-- | Builder for raw 'Addr#' pointers to null-terminated primitive ASCII
+-- strings that are free of embedded (overlong-encoded as the two-byte sequence
+-- @0xC0 0x80@) null characters.
+--
+-- @since 0.11.5.0
+{-# INLINABLE asciiLiteralCopy #-}
+asciiLiteralCopy :: Ptr Word8 -> Int -> Builder
+asciiLiteralCopy = \ !ip !len -> builder $ \k br@(BufferRange op ope) ->
+    if len <= ope `minusPtr` op
+    then copyBytes op ip len >> k (BufferRange (op `plusPtr` len) ope)
+    else wrappedBufferRangeCopyStep (BufferRange ip (ip `plusPtr` len)) k br
+
+-- | Builder for pointers to /null-terminated/ primitive UTF-8 encoded strings
+-- that may contain embedded overlong two-byte encodings of the NUL character
+-- as @0xC0 0x80@.  Other deviations from strict UTF-8 are tolerated, but the
+-- result is not well defined.
+--
+-- @since 0.11.5.0
+{-# INLINABLE modUtf8LitCopy #-}
+modUtf8LitCopy :: Ptr Word8 -> Int -> Builder
+modUtf8LitCopy !ip !len
+    | len > 0   = builder (modUtf8_step ip len)
+    | otherwise = builder id
+
+-- | Copy a /non-empty/ UTF-8 input possibly containing denormalised 2-octet
+-- sequences.  While only the NUL byte should ever encoded that way (as @0xC0
+-- 80@), this handles other denormalised @0xC0 0x??@  sequences by keeping the
+-- bottom 6 bits of the second byte.  If the input is non-UTF8 garbage, the the
+-- result may not be what the user expected.
+--
+modUtf8_step :: Ptr Word8 -> Int -> BuildStep r -> BuildStep r
+modUtf8_step !ip !len k (BufferRange op ope)
+    | op == ope = return $ bufferFull 1 op (modUtf8_step ip len k)
+    | otherwise = do
+        let !avail = ope `minusPtr` op
+            !usable = avail `min` len
+        -- null-termination makes it possible to read one more byte than the
+        -- nominal input length, with any unexpected 0xC000 ending interpreted
+        -- as a NUL.  More typically, this simplifies hanlding of inputs where
+        -- 0xC0 0x80 might otherwise be split across the "usable" input window.
+        !ch <- peekElemOff ip (usable - 1)
+        let !use | ch /= 0xC0 = usable
+                 | otherwise  = usable + 1
+        !n <- utf8_copyBytes (ip `plusPtr` use) ip op
+        let !op' = op `plusPtr` n
+            !len' = len - use
+            ip' = ip `plusPtr` use
+        if | len' <= 0 -> k (BufferRange op' ope)
+           | op' < ope -> modUtf8_step ip' len' k (BufferRange op' ope)
+           | otherwise -> return $ bufferFull 1 op' (modUtf8_step ip' len' k)
+
+-- | Consume the supplied input returning the number of bytes written
+utf8_copyBytes :: Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO Int
+utf8_copyBytes !ipe = \ ip op -> go 0 ip op
+  where
+    go !n !ip@((< ipe) -> True) !op = do
+        !ch <- peek ip
+        let !ip' = ip `plusPtr` 1
+            !op' = op `plusPtr` 1
+        if | ch /= 0xC0 -> do
+               poke op ch
+               let !cnt = ipe `minusPtr` ip'
+               !runend <- S.memchr ip' 0xC0 (fromIntegral cnt)
+               let !runlen | runend == nullPtr = cnt
+                           | otherwise = runend `minusPtr` ip'
+               if (runlen == 0)
+               then go (n + 1) ip' op'
+               else do
+                   copyBytes op' ip' runlen
+                   go (n + 1 + runlen) (ip' `plusPtr` runlen) (op' `plusPtr` runlen)
+           | otherwise -> do
+               !ch' <- peek ip'
+               poke op (ch' .&. 0x3f)
+               go (n + 1) (ip' `plusPtr` 1) op'
+    go !n _ _ = pure n
+
 -- Short bytestrings
 ------------------------------------------------------------------------------
 
diff --git a/Data/ByteString/Builder/Prim.hs b/Data/ByteString/Builder/Prim.hs
index 82f5d18a9..acb3eb69b 100644
--- a/Data/ByteString/Builder/Prim.hs
+++ b/Data/ByteString/Builder/Prim.hs
@@ -1,5 +1,8 @@
+{-# LANGUAGE CPP #-}
 {-# LANGUAGE Trustworthy #-}
 
+#include "bytestring-cpp-macros.h"
+
 {- | Copyright : (c) 2010-2011 Simon Meier
                  (c) 2010      Jasper van der Jeugt
 License        : BSD3-style (see LICENSE)
@@ -463,10 +466,11 @@ import           Data.ByteString.Builder.Prim.Binary
 import           Data.ByteString.Builder.Prim.ASCII
 
 import           Foreign
+#if !MIN_VERSION_base(4,15,0)
+import           Foreign.C (CSize(..), CString)
+#endif
 import           Foreign.ForeignPtr.Unsafe (unsafeForeignPtrToPtr)
-import           GHC.Word (Word8 (..))
 import           GHC.Exts
-import           GHC.IO
 
 ------------------------------------------------------------------------------
 -- Creating Builders from bounded primitives
@@ -658,59 +662,36 @@ primMapLazyByteStringBounded w =
     L.foldrChunks (\x b -> primMapByteStringBounded w x `mappend` b) mempty
 
 
-------------------------------------------------------------------------------
--- Raw CString encoding
-------------------------------------------------------------------------------
-
--- | A null-terminated ASCII encoded 'Foreign.C.String.CString'.
--- Null characters are not representable.
+-- | Builder for raw 'Addr#' pointers to null-terminated primitive ASCII
+-- strings that are free of embedded null characters.
 --
 -- @since 0.11.0.0
 cstring :: Addr# -> Builder
-cstring =
-    \addr0 -> builder $ step addr0
-  where
-    step :: Addr# -> BuildStep r -> BuildStep r
-    step !addr !k br@(BufferRange op0@(Ptr op0#) ope)
-      | W8# ch == 0 = k br
-      | op0 == ope =
-          return $ bufferFull 1 op0 (step addr k)
-      | otherwise = do
-          IO $ \s -> case writeWord8OffAddr# op0# 0# ch s of
-                       s' -> (# s', () #)
-          let br' = BufferRange (op0 `plusPtr` 1) ope
-          step (addr `plusAddr#` 1#) k br'
-      where
-        !ch = indexWord8OffAddr# addr 0#
+cstring s = asciiLiteralCopy (Ptr s) (byteCountLiteral s)
+{-# INLINE cstring #-}
 
--- | A null-terminated UTF-8 encoded 'Foreign.C.String.CString'.
--- Null characters can be encoded as @0xc0 0x80@.
+-- | Builder for raw 'Addr#' pointers to null-terminated primitive UTF-8
+-- encoded strings in which any emebded null characters are represented via
+-- the two-byte overlong-encoding: @0xC0 0x80@.
 --
 -- @since 0.11.0.0
 cstringUtf8 :: Addr# -> Builder
-cstringUtf8 =
-    \addr0 -> builder $ step addr0
-  where
-    step :: Addr# -> BuildStep r -> BuildStep r
-    step !addr !k br@(BufferRange op0@(Ptr op0#) ope)
-      | W8# ch == 0 = k br
-      | op0 == ope =
-          return $ bufferFull 1 op0 (step addr k)
-        -- NULL is encoded as 0xc0 0x80
-      | W8# ch == 0xc0
-      , W8# (indexWord8OffAddr# addr 1#) == 0x80 = do
-          let !(W8# nullByte#) = 0
-          IO $ \s -> case writeWord8OffAddr# op0# 0# nullByte# s of
-                       s' -> (# s', () #)
-          let br' = BufferRange (op0 `plusPtr` 1) ope
-          step (addr `plusAddr#` 2#) k br'
-      | otherwise = do
-          IO $ \s -> case writeWord8OffAddr# op0# 0# ch s of
-                       s' -> (# s', () #)
-          let br' = BufferRange (op0 `plusPtr` 1) ope
-          step (addr `plusAddr#` 1#) k br'
-      where
-        !ch = indexWord8OffAddr# addr 0#
+cstringUtf8 s = modUtf8LitCopy (Ptr s) (byteCountLiteral s)
+{-# INLINE cstringUtf8 #-}
+
+-- | Byte count of null-terminated primitive literal string excluding the
+-- terminating null byte.
+byteCountLiteral :: Addr# -> Int
+byteCountLiteral addr# =
+#if HS_cstringLength_AND_FinalPtr_AVAILABLE
+  I# (cstringLength# addr#)
+#else
+  fromIntegral (pure_strlen (Ptr addr#))
+
+foreign import ccall unsafe "string.h strlen" pure_strlen
+    :: CString -> CSize
+#endif
+{-# INLINE byteCountLiteral #-}
 
 ------------------------------------------------------------------------------
 -- Char8 encoding
diff --git a/bench/BenchAll.hs b/bench/BenchAll.hs
index 7f95a3e6f..845365aad 100644
--- a/bench/BenchAll.hs
+++ b/bench/BenchAll.hs
@@ -327,6 +327,12 @@ main = do
         , benchB'_ "ASCII String (12B)" $ asciiLit (Ptr "hello wurld!"#)
         , benchB'  "ASCII String (64B, naive)" asciiStr fromString
         , benchB'_ "ASCII String (64B)" $ asciiLit asciiBuf
+        , benchB'_ "AsciiLit"      $ asciiLit asciiBuf
+        , benchB'_ "Utf8Lit"       $ utf8Lit utf8Buf
+        , benchB'_ "strLit"        $ string8 asciiStr
+        , benchB'_ "stringUtf8"    $ stringUtf8 utf8Str
+        , benchB'_ "strLitInline"  $ string8 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
+        , benchB'_ "utf8LitInline" $ stringUtf8 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\0XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
         ]
 
       , bgroup "Encoding wrappers"
diff --git a/tests/builder/Data/ByteString/Builder/Prim/Tests.hs b/tests/builder/Data/ByteString/Builder/Prim/Tests.hs
index 60e0cf4a8..c9f5d5abd 100644
--- a/tests/builder/Data/ByteString/Builder/Prim/Tests.hs
+++ b/tests/builder/Data/ByteString/Builder/Prim/Tests.hs
@@ -14,6 +14,7 @@ import           Data.Char  (ord)
 import qualified Data.ByteString.Lazy                  as L
 import qualified Data.ByteString.Lazy.Char8            as LC
 import           Data.ByteString.Builder
+import           Data.ByteString.Builder.Extra         as BE
 import qualified Data.ByteString.Builder.Prim          as BP
 import           Data.ByteString.Builder.Prim.TestUtils
 
@@ -22,17 +23,28 @@ import           Test.Tasty.QuickCheck
 
 tests :: [TestTree]
 tests = concat [ testsBinary, testsASCII, testsChar8, testsUtf8
-               , testsCombinatorsB, [testCString, testCStringUtf8] ]
+               , testsCombinatorsB
+               , [ testCString
+                 , testCStringUtf8 1
+                 , testCStringUtf8 6
+                 , testCStringUtf8 64
+                 ]
+               ]
 
 testCString :: TestTree
 testCString = testProperty "cstring" $
     toLazyByteString (BP.cstring "hello world!"#) ==
       LC.pack "hello" `L.append` L.singleton 0x20 `L.append` LC.pack "world!"
 
-testCStringUtf8 :: TestTree
-testCStringUtf8 = testProperty "cstringUtf8" $
-    toLazyByteString (BP.cstringUtf8 "hello\xc0\x80world!"#) ==
-      LC.pack "hello" `L.append` L.singleton 0x00 `L.append` LC.pack "world!"
+testCStringUtf8 :: Int -> TestTree
+testCStringUtf8 sz = testProperty "cstringUtf8" $
+    BE.toLazyByteStringWith (BE.untrimmedStrategy sz sz) L.empty
+      (BP.cstringUtf8 "hello\xc0\x80\xc0\x80\xd0\xbc\xd0\xb8\xd1\x80\xc0\x80\xC0"#) ==
+      LC.pack "hello" `L.append` L.singleton 0x00
+                      `L.append` L.singleton 0x00
+                      `L.append` LC.pack "\xd0\xbc\xd0\xb8\xd1\x80"
+                      `L.append` L.singleton 0x00
+                      `L.append` L.singleton 0x00
 
 ------------------------------------------------------------------------------
 -- Binary

From d63b19885ab0a396b9e4014910e61c34e378416f Mon Sep 17 00:00:00 2001
From: Viktor Dukhovni <ietf-dane@dukhovni.org>
Date: Wed, 27 Aug 2025 16:45:08 +1000
Subject: [PATCH 6/6] fixup! Avoid per-byte loop in cstring{,Utf8} builders

Matthew Craven review fixes
---
 Data/ByteString/Builder/Internal.hs | 19 ++++++++++---------
 Data/ByteString/Builder/Prim.hs     | 25 +++----------------------
 Data/ByteString/Internal/Type.hs    | 13 +++++++++++++
 bench/BenchAll.hs                   |  2 --
 4 files changed, 26 insertions(+), 33 deletions(-)

diff --git a/Data/ByteString/Builder/Internal.hs b/Data/ByteString/Builder/Internal.hs
index a699a545c..12862016d 100644
--- a/Data/ByteString/Builder/Internal.hs
+++ b/Data/ByteString/Builder/Internal.hs
@@ -1,7 +1,7 @@
-{-# LANGUAGE MagicHash #-}
-{-# LANGUAGE NoMonoLocalBinds #-}
-{-# LANGUAGE TypeFamilies #-}
 {-# LANGUAGE Unsafe #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE NoMonoLocalBinds #-}
 {-# LANGUAGE ViewPatterns #-}
 
 {-# OPTIONS_HADDOCK not-home #-}
@@ -979,11 +979,11 @@ byteStringInsert =
 -- Raw CString encoding
 ------------------------------------------------------------------------------
 
--- | Builder for raw 'Addr#' pointers to null-terminated primitive ASCII
--- strings that are free of embedded (overlong-encoded as the two-byte sequence
--- @0xC0 0x80@) null characters.
+-- | Builder for raw pointers to static data of known length that will never be
+-- moved or freed.  (This is used with the static buffers GHC uses to implement
+-- ASCII string literals that do not contain null characters.)
 --
--- @since 0.11.5.0
+-- @since 0.13.0.0
 {-# INLINABLE asciiLiteralCopy #-}
 asciiLiteralCopy :: Ptr Word8 -> Int -> Builder
 asciiLiteralCopy = \ !ip !len -> builder $ \k br@(BufferRange op ope) ->
@@ -996,7 +996,7 @@ asciiLiteralCopy = \ !ip !len -> builder $ \k br@(BufferRange op ope) ->
 -- as @0xC0 0x80@.  Other deviations from strict UTF-8 are tolerated, but the
 -- result is not well defined.
 --
--- @since 0.11.5.0
+-- @since 0.13.0.0
 {-# INLINABLE modUtf8LitCopy #-}
 modUtf8LitCopy :: Ptr Word8 -> Int -> Builder
 modUtf8LitCopy !ip !len
@@ -1034,6 +1034,7 @@ modUtf8_step !ip !len k (BufferRange op ope)
 utf8_copyBytes :: Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO Int
 utf8_copyBytes !ipe = \ ip op -> go 0 ip op
   where
+    go :: Int -> Ptr Word8 -> Ptr Word8 -> IO Int
     go !n !ip@((< ipe) -> True) !op = do
         !ch <- peek ip
         let !ip' = ip `plusPtr` 1
@@ -1041,7 +1042,7 @@ utf8_copyBytes !ipe = \ ip op -> go 0 ip op
         if | ch /= 0xC0 -> do
                poke op ch
                let !cnt = ipe `minusPtr` ip'
-               !runend <- S.memchr ip' 0xC0 (fromIntegral cnt)
+               !runend <- S.memchr ip' 0xC0 (fromIntegral @Int cnt)
                let !runlen | runend == nullPtr = cnt
                            | otherwise = runend `minusPtr` ip'
                if (runlen == 0)
diff --git a/Data/ByteString/Builder/Prim.hs b/Data/ByteString/Builder/Prim.hs
index acb3eb69b..6f0cd09e5 100644
--- a/Data/ByteString/Builder/Prim.hs
+++ b/Data/ByteString/Builder/Prim.hs
@@ -1,8 +1,5 @@
-{-# LANGUAGE CPP #-}
 {-# LANGUAGE Trustworthy #-}
 
-#include "bytestring-cpp-macros.h"
-
 {- | Copyright : (c) 2010-2011 Simon Meier
                  (c) 2010      Jasper van der Jeugt
 License        : BSD3-style (see LICENSE)
@@ -456,6 +453,7 @@ import           Data.ByteString.Builder.Internal
 
 import qualified Data.ByteString               as S
 import qualified Data.ByteString.Internal      as S
+import qualified Data.ByteString.Internal.Type as S
 import qualified Data.ByteString.Lazy.Internal as L
 
 import           Data.Char (ord)
@@ -466,9 +464,6 @@ import           Data.ByteString.Builder.Prim.Binary
 import           Data.ByteString.Builder.Prim.ASCII
 
 import           Foreign
-#if !MIN_VERSION_base(4,15,0)
-import           Foreign.C (CSize(..), CString)
-#endif
 import           Foreign.ForeignPtr.Unsafe (unsafeForeignPtrToPtr)
 import           GHC.Exts
 
@@ -667,7 +662,7 @@ primMapLazyByteStringBounded w =
 --
 -- @since 0.11.0.0
 cstring :: Addr# -> Builder
-cstring s = asciiLiteralCopy (Ptr s) (byteCountLiteral s)
+cstring s = asciiLiteralCopy (Ptr s) (S.byteCountLiteral s)
 {-# INLINE cstring #-}
 
 -- | Builder for raw 'Addr#' pointers to null-terminated primitive UTF-8
@@ -676,23 +671,9 @@ cstring s = asciiLiteralCopy (Ptr s) (byteCountLiteral s)
 --
 -- @since 0.11.0.0
 cstringUtf8 :: Addr# -> Builder
-cstringUtf8 s = modUtf8LitCopy (Ptr s) (byteCountLiteral s)
+cstringUtf8 s = modUtf8LitCopy (Ptr s) (S.byteCountLiteral s)
 {-# INLINE cstringUtf8 #-}
 
--- | Byte count of null-terminated primitive literal string excluding the
--- terminating null byte.
-byteCountLiteral :: Addr# -> Int
-byteCountLiteral addr# =
-#if HS_cstringLength_AND_FinalPtr_AVAILABLE
-  I# (cstringLength# addr#)
-#else
-  fromIntegral (pure_strlen (Ptr addr#))
-
-foreign import ccall unsafe "string.h strlen" pure_strlen
-    :: CString -> CSize
-#endif
-{-# INLINE byteCountLiteral #-}
-
 ------------------------------------------------------------------------------
 -- Char8 encoding
 ------------------------------------------------------------------------------
diff --git a/Data/ByteString/Internal/Type.hs b/Data/ByteString/Internal/Type.hs
index 0927771e1..725b43ec2 100644
--- a/Data/ByteString/Internal/Type.hs
+++ b/Data/ByteString/Internal/Type.hs
@@ -44,6 +44,7 @@ module Data.ByteString.Internal.Type (
         unsafePackAddress, unsafePackLenAddress,
         unsafePackLiteral, unsafePackLenLiteral,
         literalFromOctetString, literalFromHex,
+        byteCountLiteral,
 
         -- * Low level imperative construction
         empty,
@@ -486,6 +487,18 @@ unsafePackLenAddress len addr# = do
 #endif
 {-# INLINE unsafePackLenAddress #-}
 
+-- | Byte count of null-terminated primitive literal string excluding the
+-- terminating null byte.
+byteCountLiteral :: Addr# -> Int
+byteCountLiteral addr# =
+#if HS_cstringLength_AND_FinalPtr_AVAILABLE
+  I# (cstringLength# addr#)
+#else
+  fromIntegral @CSize @Int $
+    accursedUnutterablePerformIO (c_strlen (Ptr addr#))
+#endif
+{-# INLINE byteCountLiteral #-}
+
 -- | See 'unsafePackAddress'. This function has similar behavior. Prefer
 -- this function when the address in known to be an @Addr#@ literal. In
 -- that context, there is no need for the sequencing guarantees that 'IO'
diff --git a/bench/BenchAll.hs b/bench/BenchAll.hs
index 845365aad..e81f0e8d2 100644
--- a/bench/BenchAll.hs
+++ b/bench/BenchAll.hs
@@ -327,8 +327,6 @@ main = do
         , benchB'_ "ASCII String (12B)" $ asciiLit (Ptr "hello wurld!"#)
         , benchB'  "ASCII String (64B, naive)" asciiStr fromString
         , benchB'_ "ASCII String (64B)" $ asciiLit asciiBuf
-        , benchB'_ "AsciiLit"      $ asciiLit asciiBuf
-        , benchB'_ "Utf8Lit"       $ utf8Lit utf8Buf
         , benchB'_ "strLit"        $ string8 asciiStr
         , benchB'_ "stringUtf8"    $ stringUtf8 utf8Str
         , benchB'_ "strLitInline"  $ string8 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"