diff --git a/.hgignore b/.hgignore
index 4f96a314af80af422647add2909dd9709df76175..3550e5beb476c4a83a6570d16f0b4947488bd3f3 100644
--- a/.hgignore
+++ b/.hgignore
@@ -1,8 +1,12 @@
-^(?:dist|tests/benchmarks/dist|tests/tests/dist)$
-^tests/benchmarks/.*\.txt$
+^(?:dist|benchmarks/dist|tests/coverage|tests/dist)$
+^benchmarks/.*\.txt$
 ^tests/text-testdata.tar.bz2$
 ^tests/(?:\.hpc|bm|qc|qc-hpc|stdio-hpc|text/test)$
-\.(?:aux|eventlog|h[ip]|log|[oa]|orig|prof|ps|rej|swp)$
+\.(?:aux|eventlog|h[ip]|log|[oa]|orig|prof|ps|rej|swp|tix)$
 ~$
+
 syntax: glob
 .\#*
+cabal-dev
+cabal.sandbox.config
+\.cabal-sandbox
diff --git a/.hgtags b/.hgtags
index ba773dc170b71822804801a5c4d2e3e3d4907662..6790a54981b0f26a53ca5db0428cd15452bda380 100644
--- a/.hgtags
+++ b/.hgtags
@@ -36,3 +36,9 @@ b75d3041d275e8e76b26605b0d426a572ddf1249 0.11.1.3
 407937739e9e764f1ae0f1f9ca454c42dca38772 0.11.1.10
 8b981edd27befa4c2dd334fcb7db22ac67e22b67 0.11.1.11
 204da16b5098531bdf858c388e2620238ef2aa5e 0.11.1.12
+6a3d847a56a69d0514a79cb212cb218271ad0917 0.11.1.13
+1d2c6fa9092c6a4000b2abdd9d01f3efcd477be5 0.11.2.0
+78219784cf3652cc662805bf2971bd62d80210a9 0.11.2.1
+4297307ebc11ad677cfba6b40319e7e5e2c0cfee 0.11.2.3
+7fa79662b66aade97fe49394977213fe6432942e 0.11.3.0
+d99cd091cdf71ce807a4255f6cc509c3154f51ea 0.11.3.1
diff --git a/Data/Text.hs b/Data/Text.hs
index 805454a278cf2cc07055e13cedf971871ccd7776..f8d21ffbe537399d6675e422a4ac590d614fcf3e 100644
--- a/Data/Text.hs
+++ b/Data/Text.hs
@@ -1,9 +1,12 @@
-{-# LANGUAGE BangPatterns, CPP, Rank2Types, UnboxedTuples #-}
+{-# LANGUAGE BangPatterns, CPP, MagicHash, Rank2Types, UnboxedTuples #-}
 {-# OPTIONS_GHC -fno-warn-orphans #-}
+#if __GLASGOW_HASKELL__ >= 702
+{-# LANGUAGE Trustworthy #-}
+#endif
 
 -- |
 -- Module      : Data.Text
--- Copyright   : (c) 2009, 2010, 2011 Bryan O'Sullivan,
+-- Copyright   : (c) 2009, 2010, 2011, 2012 Bryan O'Sullivan,
 --               (c) 2009 Duncan Coutts,
 --               (c) 2008, 2009 Tom Harper
 --
@@ -167,26 +170,29 @@ module Data.Text
     , partition
 
     -- , findSubstring
-    
+
     -- * Indexing
     -- $index
     , index
     , findIndex
     , count
 
-    -- * Zipping and unzipping
+    -- * Zipping
     , zip
     , zipWith
 
     -- -* Ordered text
     -- , sort
+
+    -- * Low level operations
+    , copy
     ) where
 
 import Prelude (Char, Bool(..), Int, Maybe(..), String,
                 Eq(..), Ord(..), Ordering(..), (++),
                 Read(..), Show(..),
                 (&&), (||), (+), (-), (.), ($), ($!), (>>), (*),
-                div, maxBound, not, return, otherwise)
+                maxBound, not, return, otherwise, quot)
 #if defined(HAVE_DEEPSEQ)
 import Control.DeepSeq (NFData)
 #endif
@@ -195,11 +201,7 @@ import Control.Exception (assert)
 #endif
 import Data.Char (isSpace)
 import Data.Data (Data(gfoldl, toConstr, gunfold, dataTypeOf))
-#if __GLASGOW_HASKELL__ >= 612
 import Data.Data (mkNoRepType)
-#else
-import Data.Data (mkNorepType)
-#endif
 import Control.Monad (foldM)
 import qualified Data.Text.Array as A
 import qualified Data.List as L
@@ -222,6 +224,12 @@ import Data.ByteString (ByteString)
 import qualified Data.Text.Lazy as L
 import Data.Int (Int64)
 #endif
+#if __GLASGOW_HASKELL__ >= 702
+import qualified GHC.CString as GHC
+#else
+import qualified GHC.Base as GHC
+#endif
+import GHC.Prim (Addr#)
 
 -- $strict
 --
@@ -324,7 +332,7 @@ instance NFData Text
 
 -- This instance preserves data abstraction at the cost of inefficiency.
 -- We omit reflection services for the sake of data abstraction.
--- 
+--
 -- This instance was created by copying the behavior of Data.Set and
 -- Data.Map. If you feel a mistake has been made, please feel free to
 -- submit improvements.
@@ -338,11 +346,7 @@ instance Data Text where
   gfoldl f z txt = z pack `f` (unpack txt)
   toConstr _     = P.error "Data.Text.Text.toConstr"
   gunfold _ _    = P.error "Data.Text.Text.gunfold"
-#if __GLASGOW_HASKELL__ >= 612
   dataTypeOf _   = mkNoRepType "Data.Text.Text"
-#else
-  dataTypeOf _   = mkNorepType "Data.Text.Text"
-#endif
 
 -- | /O(n)/ Compare two 'Text' values lexicographically.
 compareText :: Text -> Text -> Ordering
@@ -364,7 +368,7 @@ compareText ta@(Text _arrA _offA lenA) tb@(Text _arrB _offB lenB)
 -- | /O(n)/ Convert a 'String' into a 'Text'.  Subject to
 -- fusion.  Performs replacement on invalid scalar values.
 pack :: String -> Text
-pack = unstream . S.streamList . L.map safe
+pack = unstream . S.map safe . S.streamList
 {-# INLINE [1] pack #-}
 
 -- | /O(n)/ Convert a Text into a String.  Subject to fusion.
@@ -372,6 +376,19 @@ unpack :: Text -> String
 unpack = S.unstreamList . stream
 {-# INLINE [1] unpack #-}
 
+-- | /O(n)/ Convert a literal string into a Text.  Subject to fusion.
+unpackCString# :: Addr# -> Text
+unpackCString# addr# = unstream (S.streamCString# addr#)
+{-# NOINLINE unpackCString# #-}
+
+{-# RULES "TEXT literal" forall a.
+    unstream (S.map safe (S.streamList (GHC.unpackCString# a)))
+      = unpackCString# a #-}
+
+{-# RULES "TEXT literal UTF8" forall a.
+    unstream (S.map safe (S.streamList (GHC.unpackCStringUtf8# a)))
+      = unpackCString# a #-}
+
 -- | /O(1)/ Convert a character into a Text.  Subject to fusion.
 -- Performs replacement on invalid scalar values.
 singleton :: Char -> Text
@@ -712,7 +729,7 @@ center k c t
     | otherwise = replicateChar l c `append` t `append` replicateChar r c
   where len = length t
         d   = k - len
-        r   = d `div` 2
+        r   = d `quot` 2
         l   = d - r
 {-# INLINE center #-}
 
@@ -884,11 +901,11 @@ mapAccumR f z0 = second reverse . S.mapAccumL g z0 . reverseStream
 -- @t@ repeated @n@ times.
 replicate :: Int -> Text -> Text
 replicate n t@(Text a o l)
-    | n <= 0 || l <= 0      = empty
-    | n == 1                = t
-    | isSingleton t         = replicateChar n (unsafeHead t)
-    | n <= maxBound `div` l = Text (A.run x) 0 len
-    | otherwise             = overflowError "replicate"
+    | n <= 0 || l <= 0       = empty
+    | n == 1                 = t
+    | isSingleton t          = replicateChar n (unsafeHead t)
+    | n <= maxBound `quot` l = Text (A.run x) 0 len
+    | otherwise              = overflowError "replicate"
   where
     len = l * n
     x = do
@@ -1110,7 +1127,7 @@ findAIndexOrEnd q t@(Text _arr _off len) = go 0
     where go !i | i >= len || q c       = i
                 | otherwise             = go (i+d)
                 where Iter c d          = iter t i
-    
+
 -- | /O(n)/ Group characters in a string by equality.
 group :: Text -> [Text]
 group = groupBy (==)
@@ -1143,7 +1160,7 @@ tails t | null t    = [empty]
 -- > splitOn "\r\n" "a\r\nb\r\nd\r\ne" == ["a","b","d","e"]
 -- > splitOn "aaa"  "aaaXaaaXaaaXaaa"  == ["","X","X","X",""]
 -- > splitOn "x"    "x"                == ["",""]
--- 
+--
 -- and
 --
 -- > intercalate s . splitOn s         == id
@@ -1439,8 +1456,6 @@ isPrefixOf a@(Text _ _ alen) b@(Text _ _ blen) =
 {-# RULES
 "TEXT isPrefixOf -> fused" [~1] forall s t.
     isPrefixOf s t = S.isPrefixOf (stream s) (stream t)
-"TEXT isPrefixOf -> unfused" [1] forall s t.
-    S.isPrefixOf (stream s) (stream t) = isPrefixOf s t
   #-}
 
 -- | /O(n)/ The 'isSuffixOf' function takes two 'Text's and returns
@@ -1557,3 +1572,19 @@ emptyError fun = P.error $ "Data.Text." ++ fun ++ ": empty input"
 
 overflowError :: String -> a
 overflowError fun = P.error $ "Data.Text." ++ fun ++ ": size overflow"
+
+-- | /O(n)/ Make a distinct copy of the given string, sharing no
+-- storage with the original string.
+--
+-- As an example, suppose you read a large string, of which you need
+-- only a small portion.  If you do not use 'copy', the entire original
+-- array will be kept alive in memory by the smaller string. Making a
+-- copy \"breaks the link\" to the original array, allowing it to be
+-- garbage collected if there are no other live references to it.
+copy :: Text -> Text
+copy (Text arr off len) = Text (A.run go) 0 len
+  where
+    go = do
+      marr <- A.new len
+      A.copyI marr 0 arr off len
+      return marr
diff --git a/Data/Text/Array.hs b/Data/Text/Array.hs
index 046eb9dda10d281e36ee1a6ca42f9e2f079c59a7..e2c4274fcd7a3baa9cdd7966719f0d7a08fc8c0b 100644
--- a/Data/Text/Array.hs
+++ b/Data/Text/Array.hs
@@ -114,7 +114,7 @@ instance IArray (MArray s) where
 -- | Create an uninitialized mutable array.
 new :: forall s. Int -> ST s (MArray s)
 new n
-  | n < 0 || n .&. highBit /= 0 = error $ "Data.Text.Array.new: size overflow"
+  | n < 0 || n .&. highBit /= 0 = array_size_error
   | otherwise = ST $ \s1# ->
        case newByteArray# len# s1# of
          (# s2#, marr# #) -> (# s2#, MArray marr#
@@ -126,6 +126,9 @@ new n
         highBit    = maxBound `xor` (maxBound `shiftR` 1)
 {-# INLINE new #-}
 
+array_size_error :: a
+array_size_error = error "Data.Text.Array.new: size overflow"
+
 -- | Freeze a mutable array. Do not mutate the 'MArray' afterwards!
 unsafeFreeze :: MArray s -> ST s Array
 unsafeFreeze MArray{..} = ST $ \s# ->
@@ -181,6 +184,7 @@ run2 k = runST (do
                  (marr,b) <- k
                  arr <- unsafeFreeze marr
                  return (arr,b))
+{-# INLINE run2 #-}
 
 -- | Copy some elements of a mutable array.
 copyM :: MArray s               -- ^ Destination
diff --git a/Data/Text/Encoding.hs b/Data/Text/Encoding.hs
index 256b68724eb5bd3a8c629f5dffbbef54bd2ace92..4d7fc4cd427c2c5e83d63b5259fe491d0906a875 100644
--- a/Data/Text/Encoding.hs
+++ b/Data/Text/Encoding.hs
@@ -1,5 +1,8 @@
-{-# LANGUAGE BangPatterns, CPP, ForeignFunctionInterface, MagicHash,
+{-# LANGUAGE BangPatterns, CPP, ForeignFunctionInterface, GeneralizedNewtypeDeriving, MagicHash,
     UnliftedFFITypes #-}
+#if __GLASGOW_HASKELL__ >= 702
+{-# LANGUAGE Trustworthy #-}
+#endif
 -- |
 -- Module      : Data.Text.Encoding
 -- Copyright   : (c) 2009, 2010, 2011 Bryan O'Sullivan,
@@ -23,6 +26,7 @@ module Data.Text.Encoding
     -- * Decoding ByteStrings to Text
     -- $strict
       decodeASCII
+    , decodeLatin1
     , decodeUtf8
     , decodeUtf16LE
     , decodeUtf16BE
@@ -39,17 +43,23 @@ module Data.Text.Encoding
     , decodeUtf32LEWith
     , decodeUtf32BEWith
 
+    -- ** Stream oriented decoding
+    -- $stream
+    , streamDecodeUtf8
+    , streamDecodeUtf8With
+    , Decoding(..)
+
     -- * Encoding Text to ByteStrings
     , encodeUtf8
     , encodeUtf16LE
     , encodeUtf16BE
     , encodeUtf32LE
     , encodeUtf32BE
-    
+
     -- * Generic encoding of Text
-    , encodeStreamWithB
-    , encodeTextWithB
-    , encodeUtf8Builder
+    -- , encodeStreamWithB
+    -- , encodeTextWithB
+    -- , encodeUtf8Builder
     , encodeUtf8Escaped
     ) where
 
@@ -59,28 +69,35 @@ import Control.Monad.ST.Unsafe (unsafeIOToST, unsafeSTToIO)
 #else
 import Control.Monad.ST (unsafeIOToST, unsafeSTToIO)
 #endif
+import Control.Monad.ST (runST)
 import Data.Bits ((.&.))
 import Data.ByteString as B
 import Data.ByteString.Internal as B
-import Data.ByteString.Lazy.Builder.Internal as B
-import qualified Data.ByteString.Lazy.Builder.BasicEncoding.Internal as B
-import qualified Data.ByteString.Lazy.Builder.BasicEncoding          as B
+import qualified Data.ByteString.Lazy                  as BL
+import qualified Data.ByteString.Builder               as B
+import qualified Data.ByteString.Builder.Internal      as B
+import qualified Data.ByteString.Builder.Prim.Internal as BP
+import qualified Data.ByteString.Builder.Prim          as BP
+import Data.Text ()
 import Data.Text.Encoding.Error (OnDecodeError, UnicodeException, strictDecode)
-import Data.Text.Internal (Text(..), textP)
+import Data.Text.Internal (Text(..), safe, textP)
+import Data.Text.Private (runText)
 import Data.Text.UnsafeChar (ord, unsafeWrite)
 import Data.Text.UnsafeShift (shiftL, shiftR)
-import Data.Word (Word8)
+import Data.Word (Word8, Word32)
 import Foreign.C.Types (CSize)
 import Foreign.ForeignPtr (withForeignPtr)
 import Foreign.Marshal.Utils (with)
-import Foreign.Ptr (Ptr, minusPtr, plusPtr)
-import Foreign.Storable (peek, poke)
+import Foreign.Ptr (Ptr, minusPtr, nullPtr, plusPtr)
+import Foreign.Storable (Storable, peek, poke)
 import GHC.Base (MutableByteArray#)
-import System.IO.Unsafe (unsafePerformIO)
 import qualified Data.Text.Array as A
 import qualified Data.Text.Encoding.Fusion as E
 import qualified Data.Text.Encoding.Utf16 as U16
 import qualified Data.Text.Fusion as F
+import Data.Text.Unsafe (unsafeDupablePerformIO)
+
+#include "text_cbits.h"
 
 -- $strict
 --
@@ -98,39 +115,171 @@ import qualified Data.Text.Fusion as F
 -- | /Deprecated/.  Decode a 'ByteString' containing 7-bit ASCII
 -- encoded text.
 --
--- This function is deprecated.  Use 'decodeUtf8' instead.
+-- This function is deprecated.  Use 'decodeLatin1' instead.
 decodeASCII :: ByteString -> Text
 decodeASCII = decodeUtf8
 {-# DEPRECATED decodeASCII "Use decodeUtf8 instead" #-}
 
+-- | Decode a 'ByteString' containing Latin-1 (aka ISO-8859-1) encoded text.
+--
+-- 'decodeLatin1' is semantically equivalent to
+--  @Data.Text.pack . Data.ByteString.Char8.unpack@
+decodeLatin1 :: ByteString -> Text
+decodeLatin1 (PS fp off len) = textP a 0 len
+ where
+  a = A.run (A.new len >>= unsafeIOToST . go)
+  go dest = withForeignPtr fp $ \ptr -> do
+    c_decode_latin1 (A.maBA dest) (ptr `plusPtr` off) (ptr `plusPtr` (off+len))
+    return dest
+
 -- | Decode a 'ByteString' containing UTF-8 encoded text.
 decodeUtf8With :: OnDecodeError -> ByteString -> Text
-decodeUtf8With onErr (PS fp off len) = textP (fst a) 0 (snd a)
+decodeUtf8With onErr (PS fp off len) = runText $ \done -> do
+  let go dest = withForeignPtr fp $ \ptr ->
+        with (0::CSize) $ \destOffPtr -> do
+          let end = ptr `plusPtr` (off + len)
+              loop curPtr = do
+                curPtr' <- c_decode_utf8 (A.maBA dest) destOffPtr curPtr end
+                if curPtr' == end
+                  then do
+                    n <- peek destOffPtr
+                    unsafeSTToIO (done dest (fromIntegral n))
+                  else do
+                    x <- peek curPtr'
+                    case onErr desc (Just x) of
+                      Nothing -> loop $ curPtr' `plusPtr` 1
+                      Just c -> do
+                        destOff <- peek destOffPtr
+                        w <- unsafeSTToIO $
+                             unsafeWrite dest (fromIntegral destOff) (safe c)
+                        poke destOffPtr (destOff + fromIntegral w)
+                        loop $ curPtr' `plusPtr` 1
+          loop (ptr `plusPtr` off)
+  (unsafeIOToST . go) =<< A.new len
  where
-  a = A.run2 (A.new len >>= unsafeIOToST . go)
   desc = "Data.Text.Encoding.decodeUtf8: Invalid UTF-8 stream"
-  go dest = withForeignPtr fp $ \ptr ->
-    with (0::CSize) $ \destOffPtr -> do
-      let end = ptr `plusPtr` (off + len)
-          loop curPtr = do
-            curPtr' <- c_decode_utf8 (A.maBA dest) destOffPtr curPtr end
-            if curPtr' == end
-              then do
-                n <- peek destOffPtr
-                return (dest,fromIntegral n)
-              else do
-                x <- peek curPtr'
-                case onErr desc (Just x) of
-                  Nothing -> loop $ curPtr' `plusPtr` 1
-                  Just c -> do
-                    destOff <- peek destOffPtr
-                    w <- unsafeSTToIO $
-                         unsafeWrite dest (fromIntegral destOff) c
-                    poke destOffPtr (destOff + fromIntegral w)
-                    loop $ curPtr' `plusPtr` 1
-      loop (ptr `plusPtr` off)
 {- INLINE[0] decodeUtf8With #-}
 
+-- $stream
+--
+-- The 'streamDecodeUtf8' and 'streamDecodeUtf8With' functions accept
+-- a 'ByteString' that represents a possibly incomplete input (e.g. a
+-- packet from a network stream) that may not end on a UTF-8 boundary.
+--
+-- The first element of the result is the maximal chunk of 'Text' that
+-- can be decoded from the given input. The second is a function which
+-- accepts another 'ByteString'. That string will be assumed to
+-- directly follow the string that was passed as input to the original
+-- function, and it will in turn be decoded.
+--
+-- To help understand the use of these functions, consider the Unicode
+-- string @\"hi &#9731;\"@. If encoded as UTF-8, this becomes @\"hi
+-- \\xe2\\x98\\x83\"@; the final @\'&#9731;\'@ is encoded as 3 bytes.
+--
+-- Now suppose that we receive this encoded string as 3 packets that
+-- are split up on untidy boundaries: @[\"hi \\xe2\", \"\\x98\",
+-- \"\\x83\"]@. We cannot decode the entire Unicode string until we
+-- have received all three packets, but we would like to make progress
+-- as we receive each one.
+--
+-- @
+-- let 'Some' t0 f0 = 'streamDecodeUtf8' \"hi \\xe2\"
+-- t0 == \"hi \" :: 'Text'
+-- @
+--
+-- We use the continuation @f0@ to decode our second packet.
+--
+-- @
+-- let 'Some' t1 f1 = f0 \"\\x98\"
+-- t1 == \"\"
+-- @
+--
+-- We could not give @f0@ enough input to decode anything, so it
+-- returned an empty string. Once we feed our second continuation @f1@
+-- the last byte of input, it will make progress.
+--
+-- @
+-- let 'Some' t2 f2 = f1 \"\\x83\"
+-- t2 == \"&#9731;\"
+-- @
+--
+-- If given invalid input, an exception will be thrown by the function
+-- or continuation where it is encountered.
+
+-- | A stream oriented decoding result.
+data Decoding = Some Text ByteString (ByteString -> Decoding)
+
+instance Show Decoding where
+    showsPrec d (Some t bs _) = showParen (d > prec) $
+                                showString "Some " . showsPrec prec' t .
+                                showChar ' ' . showsPrec prec' bs .
+                                showString " _"
+      where prec = 10; prec' = prec + 1
+
+newtype CodePoint = CodePoint Word32 deriving (Eq, Show, Num, Storable)
+newtype DecoderState = DecoderState Word32 deriving (Eq, Show, Num, Storable)
+
+-- | Decode, in a stream oriented way, a 'ByteString' containing UTF-8
+-- encoded text that is known to be valid.
+--
+-- If the input contains any invalid UTF-8 data, an exception will be
+-- thrown (either by this function or a continuation) that cannot be
+-- caught in pure code.  For more control over the handling of invalid
+-- data, use 'streamDecodeUtf8With'.
+streamDecodeUtf8 :: ByteString -> Decoding
+streamDecodeUtf8 = streamDecodeUtf8With strictDecode
+
+-- | Decode, in a stream oriented way, a 'ByteString' containing UTF-8
+-- encoded text.
+streamDecodeUtf8With :: OnDecodeError -> ByteString -> Decoding
+streamDecodeUtf8With onErr = decodeChunk 0 0
+ where
+  -- We create a slightly larger than necessary buffer to accommodate a
+  -- potential surrogate pair started in the last buffer
+  decodeChunk :: CodePoint -> DecoderState -> ByteString -> Decoding
+  decodeChunk codepoint0 state0 bs@(PS fp off len) =
+    runST $ (unsafeIOToST . decodeChunkToBuffer) =<< A.new (len+1)
+   where
+    decodeChunkToBuffer :: A.MArray s -> IO Decoding
+    decodeChunkToBuffer dest = withForeignPtr fp $ \ptr ->
+      with (0::CSize) $ \destOffPtr ->
+      with codepoint0 $ \codepointPtr ->
+      with state0 $ \statePtr ->
+      with nullPtr $ \curPtrPtr ->
+        let end = ptr `plusPtr` (off + len)
+            loop curPtr = do
+              poke curPtrPtr curPtr
+              curPtr' <- c_decode_utf8_with_state (A.maBA dest) destOffPtr
+                         curPtrPtr end codepointPtr statePtr
+              state <- peek statePtr
+              case state of
+                UTF8_REJECT -> do
+                  -- We encountered an encoding error
+                  x <- peek curPtr'
+                  case onErr desc (Just x) of
+                    Nothing -> loop $ curPtr' `plusPtr` 1
+                    Just c -> do
+                      destOff <- peek destOffPtr
+                      w <- unsafeSTToIO $
+                           unsafeWrite dest (fromIntegral destOff) (safe c)
+                      poke destOffPtr (destOff + fromIntegral w)
+                      poke statePtr 0
+                      loop $ curPtr' `plusPtr` 1
+
+                _ -> do
+                  -- We encountered the end of the buffer while decoding
+                  n <- peek destOffPtr
+                  codepoint <- peek codepointPtr
+                  chunkText <- unsafeSTToIO $ do
+                      arr <- A.unsafeFreeze dest
+                      return $! textP arr 0 (fromIntegral n)
+                  lastPtr <- peek curPtrPtr
+                  let left = lastPtr `minusPtr` curPtr
+                  return $ Some chunkText (B.drop left bs)
+                           (decodeChunk codepoint state)
+        in loop (ptr `plusPtr` off)
+  desc = "Data.Text.Encoding.streamDecodeUtf8With: Invalid UTF-8 stream"
+
 -- | Decode a 'ByteString' containing UTF-8 encoded text that is known
 -- to be valid.
 --
@@ -144,17 +293,22 @@ decodeUtf8 = decodeUtf8With strictDecode
 {-# RULES "STREAM stream/decodeUtf8 fusion" [1]
     forall bs. F.stream (decodeUtf8 bs) = E.streamUtf8 strictDecode bs #-}
 
--- | Decode a 'ByteString' containing UTF-8 encoded text..
+-- | Decode a 'ByteString' containing UTF-8 encoded text.
 --
 -- If the input contains any invalid UTF-8 data, the relevant
 -- exception will be returned, otherwise the decoded text.
 decodeUtf8' :: ByteString -> Either UnicodeException Text
-decodeUtf8' = unsafePerformIO . try . evaluate . decodeUtf8With strictDecode
+decodeUtf8' = unsafeDupablePerformIO . try . evaluate . decodeUtf8With strictDecode
 {-# INLINE decodeUtf8' #-}
 
 -- | Encode text using UTF-8 encoding.
 encodeUtf8 :: Text -> ByteString
-encodeUtf8 (Text arr off len) = unsafePerformIO $ do
+encodeUtf8 =
+    BL.toStrict . B.toLazyByteString
+  . encodeUtf8Escaped (BP.liftFixedToBounded BP.word8)
+
+{-
+encodeUtf8 (Text arr off len) = unsafeDupablePerformIO $ do
   let size0 = max len 4
   mallocByteString size0 >>= start size0 off 0
  where
@@ -207,6 +361,55 @@ encodeUtf8 (Text arr off len) = unsafePerformIO $ do
                   poke8 (m+1) $ ((w `shiftR` 6) .&. 0x3F) + 0x80
                   poke8 (m+2) $ (w .&. 0x3F) + 0x80
                   go (n+1) (m+3)
+-}
+
+-- | Encode text using UTF-8 encoding and escape the ASCII characters using
+-- a 'BP.PrimBounded'.
+encodeUtf8Escaped :: BP.BoundedPrim Word8 -> Text -> B.Builder
+encodeUtf8Escaped be (Text arr off len) =
+    B.builder step
+  where
+    bound   = max 4 $ BP.sizeBound be
+    iend    = off + len
+    step !k =
+        outerLoop off
+      where
+        outerLoop !i0 !br@(B.BufferRange op0 ope)
+          | i0 >= iend                = k br
+          | op0 `plusPtr` bound < ope =
+              goPartial (i0 + min outRemaining inpRemaining)
+          | otherwise  = return $ B.bufferFull bound op0 (outerLoop i0)
+          where
+            outRemaining = (ope `minusPtr` op0) `div` bound
+            inpRemaining = iend - i0
+
+            goPartial !iendTmp = go i0 op0
+              where
+                go !i !op
+                  | i < iendTmp = case A.unsafeIndex arr i of
+                      w | w <= 0x7F -> do
+                            BP.runB be (fromIntegral w) op >>= go (i + 1)
+                        | w <= 0x7FF -> do
+                            poke8 0 $ (w `shiftR` 6) + 0xC0
+                            poke8 1 $ (w .&. 0x3f) + 0x80
+                            go (i + 1) (op `plusPtr` 2)
+                        | 0xD800 <= w && w <= 0xDBFF -> do
+                            let c = ord $ U16.chr2 w (A.unsafeIndex arr (i+1))
+                            poke8 0 $ (c `shiftR` 18) + 0xF0
+                            poke8 1 $ ((c `shiftR` 12) .&. 0x3F) + 0x80
+                            poke8 2 $ ((c `shiftR` 6) .&. 0x3F) + 0x80
+                            poke8 3 $ (c .&. 0x3F) + 0x80
+                            go (i + 2) (op `plusPtr` 4)
+                        | otherwise -> do
+                            poke8 0 $ (w `shiftR` 12) + 0xE0
+                            poke8 1 $ ((w `shiftR` 6) .&. 0x3F) + 0x80
+                            poke8 2 $ (w .&. 0x3F) + 0x80
+                            go (i + 1) (op `plusPtr` 3)
+                  | otherwise =
+                      outerLoop i (B.BufferRange op ope)
+                  where
+                    poke8 j v = poke (op `plusPtr` j) (fromIntegral v :: Word8)
+
 
 -- | Decode text from little endian UTF-16 encoding.
 decodeUtf16LEWith :: OnDecodeError -> ByteString -> Text
@@ -288,14 +491,24 @@ foreign import ccall unsafe "_hs_text_decode_utf8" c_decode_utf8
     :: MutableByteArray# s -> Ptr CSize
     -> Ptr Word8 -> Ptr Word8 -> IO (Ptr Word8)
 
+foreign import ccall unsafe "_hs_text_decode_utf8_state" c_decode_utf8_with_state
+    :: MutableByteArray# s -> Ptr CSize
+    -> Ptr (Ptr Word8) -> Ptr Word8
+    -> Ptr CodePoint -> Ptr DecoderState -> IO (Ptr Word8)
+
+foreign import ccall unsafe "_hs_text_decode_latin1" c_decode_latin1
+    :: MutableByteArray# s -> Ptr Word8 -> Ptr Word8 -> IO ()
+
+
+{-
 -- | Encode all elements of a 'F.Stream' using a 'B.BoundedEncoding'.
 {-# INLINE encodeStreamWithB #-}
 encodeStreamWithB :: B.BoundedEncoding a -> F.Stream a -> B.Builder
-encodeStreamWithB be = 
+encodeStreamWithB be =
     \(F.Stream next s0 _) -> B.builder $ step next s0
   where
     bound = B.sizeBound be
-    step next s0 k (B.BufferRange op0 ope0) = 
+    step next s0 k (B.BufferRange op0 ope0) =
         go s0 op0
       where
         go s !op = case next s of
@@ -303,11 +516,11 @@ encodeStreamWithB be =
           F.Skip s'    -> go s' op
           F.Yield x s'
             | op `plusPtr` bound <= ope0 -> B.runB be x op >>= go s'
-            | otherwise                  -> 
+            | otherwise                  ->
                 return $ B.bufferFull bound op (step next s k)
 
 
--- | 
+-- |
 -- | /Subject to fusion./
 -- Encode all 'Char's of a 'T.Text' using a 'B.BoundedEncoding'.
 {-# INLINE encodeTextWithB #-}
@@ -317,51 +530,6 @@ encodeTextWithB be = encodeStreamWithB be . F.stream
 -- | Encode text using UTF-8 encoding.
 encodeUtf8Builder :: Text -> B.Builder
 encodeUtf8Builder = encodeUtf8Escaped (B.fromF B.word8)
+-}
 
--- | Encode text using UTF-8 encoding and escape the ASCII characters using
--- a 'BoundedEncoding'.
-encodeUtf8Escaped :: B.BoundedEncoding Word8 -> Text -> B.Builder
-encodeUtf8Escaped be (Text arr off len) =
-    B.builder step
-  where
-    bound   = max 4 $ B.sizeBound be
-    iend    = off + len
-    step !k =
-        outerLoop off
-      where
-        outerLoop !i0 !br@(BufferRange op0 ope)
-          | i0 >= iend                = k br
-          | op0 `plusPtr` bound < ope =
-              goPartial (i0 + min outRemaining inpRemaining)
-          | otherwise  = return $ bufferFull bound op0 (outerLoop i0)
-          where
-            outRemaining = (ope `minusPtr` op0) `div` bound
-            inpRemaining = iend - i0
-
-            goPartial !iendTmp = go i0 op0
-              where
-                go !i !op
-                  | i < iendTmp = case A.unsafeIndex arr i of
-                      w | w <= 0x7F -> do
-                            B.runB be (fromIntegral w) op >>= go (i + 1)
-                        | w <= 0x7FF -> do
-                            poke8 0 $ (w `shiftR` 6) + 0xC0
-                            poke8 1 $ (w .&. 0x3f) + 0x80
-                            go (i + 1) (op `plusPtr` 2)
-                        | 0xD800 <= w && w <= 0xDBFF -> do
-                            let c = ord $ U16.chr2 w (A.unsafeIndex arr (i+1))
-                            poke8 0 $ (c `shiftR` 18) + 0xF0
-                            poke8 1 $ ((c `shiftR` 12) .&. 0x3F) + 0x80
-                            poke8 2 $ ((c `shiftR` 6) .&. 0x3F) + 0x80
-                            poke8 3 $ (c .&. 0x3F) + 0x80
-                            go (i + 2) (op `plusPtr` 4)
-                        | otherwise -> do
-                            poke8 0 $ (w `shiftR` 12) + 0xE0
-                            poke8 1 $ ((w `shiftR` 6) .&. 0x3F) + 0x80
-                            poke8 2 $ (w .&. 0x3F) + 0x80
-                            go (i + 1) (op `plusPtr` 3)
-                  | otherwise =
-                      outerLoop i (BufferRange op ope)
-                  where
-                    poke8 j v = poke (op `plusPtr` j) (fromIntegral v :: Word8)
 
diff --git a/Data/Text/Encoding/Error.hs b/Data/Text/Encoding/Error.hs
index 732fa7518f4660d02190e8b777f32c917ea3ed5f..d38d9a107c6b1ec58aca282e08770b6c31732862 100644
--- a/Data/Text/Encoding/Error.hs
+++ b/Data/Text/Encoding/Error.hs
@@ -1,4 +1,7 @@
 {-# LANGUAGE CPP, DeriveDataTypeable #-}
+#if __GLASGOW_HASKELL__ >= 702
+{-# LANGUAGE Trustworthy #-}
+#endif
 -- |
 -- Module      : Data.Text.Encoding.Error
 -- Copyright   : (c) Bryan O'Sullivan 2009
@@ -35,11 +38,8 @@ module Data.Text.Encoding.Error
     , replace
     ) where
 
-#if __GLASGOW_HASKELL__ >= 610
+import Control.DeepSeq (NFData (..))
 import Control.Exception (Exception, throw)
-#else
-import Control.Exception.Extensible (Exception, throw)
-#endif
 import Data.Typeable (Typeable)
 import Data.Word (Word8)
 import Numeric (showHex)
@@ -88,12 +88,16 @@ showUnicodeException (EncodeError desc (Just c))
     = "Cannot encode character '\\x" ++ showHex (fromEnum c) ("': " ++ desc)
 showUnicodeException (EncodeError desc Nothing)
     = "Cannot encode input: " ++ desc
-                     
+
 instance Show UnicodeException where
     show = showUnicodeException
 
 instance Exception UnicodeException
 
+instance NFData UnicodeException where
+    rnf (DecodeError desc w) = rnf desc `seq` rnf w `seq` ()
+    rnf (EncodeError desc c) = rnf desc `seq` rnf c `seq` ()
+
 -- | Throw a 'UnicodeException' if decoding fails.
 strictDecode :: OnDecodeError
 strictDecode desc c = throw (DecodeError desc c)
diff --git a/Data/Text/Encoding/Fusion.hs b/Data/Text/Encoding/Fusion.hs
index ee580507b3dd7cb282a6ed06b75f35cbd8a749bc..b9e5862dc7a0d52fa8c83f731f87706fd283cb21 100644
--- a/Data/Text/Encoding/Fusion.hs
+++ b/Data/Text/Encoding/Fusion.hs
@@ -44,12 +44,12 @@ import Data.Text.UnsafeShift (shiftL, shiftR)
 import Data.Word (Word8, Word16, Word32)
 import Foreign.ForeignPtr (withForeignPtr, ForeignPtr)
 import Foreign.Storable (pokeByteOff)
-import System.IO.Unsafe (unsafePerformIO)
 import qualified Data.ByteString as B
 import qualified Data.ByteString.Unsafe as B
 import qualified Data.Text.Encoding.Utf8 as U8
 import qualified Data.Text.Encoding.Utf16 as U16
 import qualified Data.Text.Encoding.Utf32 as U32
+import Data.Text.Unsafe (unsafeDupablePerformIO)
 
 streamASCII :: ByteString -> Stream Char
 streamASCII bs = Stream next 0 (maxSize l)
@@ -163,7 +163,7 @@ streamUtf32LE onErr bs = Stream next 0 (maxSize (l `shiftR` 2))
 
 -- | /O(n)/ Convert a 'Stream' 'Word8' to a 'ByteString'.
 unstream :: Stream Word8 -> ByteString
-unstream (Stream next s0 len) = unsafePerformIO $ do
+unstream (Stream next s0 len) = unsafeDupablePerformIO $ do
     let mlen = upperBound 4 len
     mallocByteString mlen >>= loop mlen 0 s0
     where
diff --git a/Data/Text/Encoding/Fusion/Common.hs b/Data/Text/Encoding/Fusion/Common.hs
index 06807a7fbbef2f33b84d3ae5ecd31f1b3d211b58..30e4b07ca5ff70972f32bc52b7ff1e8f1949f64d 100644
--- a/Data/Text/Encoding/Fusion/Common.hs
+++ b/Data/Text/Encoding/Fusion/Common.hs
@@ -21,8 +21,7 @@ module Data.Text.Encoding.Fusion.Common
     -- * Restreaming
     -- Restreaming is the act of converting from one 'Stream'
     -- representation to another.
-      restreamUtf8
-    , restreamUtf16LE
+      restreamUtf16LE
     , restreamUtf16BE
     , restreamUtf32LE
     , restreamUtf32BE
@@ -34,31 +33,6 @@ import Data.Text.Fusion.Internal (RS(..))
 import Data.Text.UnsafeChar (ord)
 import Data.Text.UnsafeShift (shiftR)
 import Data.Word (Word8)
-import qualified Data.Text.Encoding.Utf8 as U8
-
--- | /O(n)/ Convert a Stream Char into a UTF-8 encoded Stream Word8.
-restreamUtf8 :: Stream Char -> Stream Word8
-restreamUtf8 (Stream next0 s0 len) = Stream next (RS0 s0) (len * 2)
-  where
-    next (RS0 s) = case next0 s of
-        Done              -> Done
-        Skip s'           -> Skip (RS0 s')
-        Yield x s'
-            | n <= 0x7F   -> Yield c  (RS0 s')
-            | n <= 0x07FF -> Yield a2 (RS1 s' b2)
-            | n <= 0xFFFF -> Yield a3 (RS2 s' b3 c3)
-            | otherwise   -> Yield a4 (RS3 s' b4 c4 d4)
-          where
-            n  = ord x
-            c  = fromIntegral n
-            (a2,b2) = U8.ord2 x
-            (a3,b3,c3) = U8.ord3 x
-            (a4,b4,c4,d4) = U8.ord4 x
-    next (RS1 s x2)       = Yield x2 (RS0 s)
-    next (RS2 s x2 x3)    = Yield x2 (RS1 s x3)
-    next (RS3 s x2 x3 x4) = Yield x2 (RS2 s x3 x4)
-    {-# INLINE next #-}
-{-# INLINE restreamUtf8 #-}
 
 restreamUtf16BE :: Stream Char -> Stream Word8
 restreamUtf16BE (Stream next0 s0 len) = Stream next (RS0 s0) (len * 2)
diff --git a/Data/Text/Encoding/Utf8.hs b/Data/Text/Encoding/Utf8.hs
index ba0d73664e12643841b4322fbfff390823ab0417..6a1bd53d0705b024e3a766dfac2933bbadd7ca9e 100644
--- a/Data/Text/Encoding/Utf8.hs
+++ b/Data/Text/Encoding/Utf8.hs
@@ -146,7 +146,7 @@ validate3 x1 x2 x3 = validate3_1 || validate3_2 || validate3_3 || validate3_4
 validate4 :: Word8 -> Word8 -> Word8 -> Word8 -> Bool
 {-# INLINE validate4 #-}
 validate4 x1 x2 x3 x4 = validate4_1 || validate4_2 || validate4_3
-  where 
+  where
     validate4_1 = x1 == 0xF0 &&
                   between x2 0x90 0xBF &&
                   between x3 0x80 0xBF &&
diff --git a/Data/Text/Foreign.hs b/Data/Text/Foreign.hs
index 58760e0d3d02ef6a93e676ab3630e42be05ca43d..9f006bb6aaa7eaeb5cc676b18fcc0c92307d5783 100644
--- a/Data/Text/Foreign.hs
+++ b/Data/Text/Foreign.hs
@@ -21,6 +21,9 @@ module Data.Text.Foreign
     , fromPtr
     , useAsPtr
     , asForeignPtr
+    -- ** Encoding as UTF-8
+    , peekCStringLen
+    , withCStringLen
     -- * Unsafe conversion code
     , lengthWord16
     , unsafeCopyToPtr
@@ -38,14 +41,17 @@ import Control.Monad.ST.Unsafe (unsafeIOToST)
 #else
 import Control.Monad.ST (unsafeIOToST)
 #endif
+import Data.ByteString.Unsafe (unsafePackCStringLen, unsafeUseAsCStringLen)
+import Data.Text.Encoding (decodeUtf8, encodeUtf8)
 import Data.Text.Internal (Text(..), empty)
 import Data.Text.Unsafe (lengthWord16)
-import qualified Data.Text.Array as A
 import Data.Word (Word16)
+import Foreign.C.String (CStringLen)
+import Foreign.ForeignPtr (ForeignPtr, mallocForeignPtrArray, withForeignPtr)
 import Foreign.Marshal.Alloc (allocaBytes)
 import Foreign.Ptr (Ptr, castPtr, plusPtr)
-import Foreign.ForeignPtr (ForeignPtr, mallocForeignPtrArray, withForeignPtr)
 import Foreign.Storable (peek, poke)
+import qualified Data.Text.Array as A
 
 -- $interop
 --
@@ -148,3 +154,21 @@ asForeignPtr t@(Text _arr _off len) = do
   fp <- mallocForeignPtrArray len
   withForeignPtr fp $ unsafeCopyToPtr t
   return (fp, I16 len)
+
+-- | /O(n)/ Decode a C string with explicit length, which is assumed
+-- to have been encoded as UTF-8. If decoding fails, a
+-- 'UnicodeException' is thrown.
+peekCStringLen :: CStringLen -> IO Text
+peekCStringLen cs = do
+  bs <- unsafePackCStringLen cs
+  return $! decodeUtf8 bs
+
+-- | Marshal a 'Text' into a C string encoded as UTF-8 in temporary
+-- storage, with explicit length information. The encoded string may
+-- contain NUL bytes, and is not followed by a trailing NUL byte.
+--
+-- The temporary storage is freed when the subcomputation terminates
+-- (either normally or via an exception), so the pointer to the
+-- temporary storage must /not/ be used after this function returns.
+withCStringLen :: Text -> (CStringLen -> IO a) -> IO a
+withCStringLen t act = unsafeUseAsCStringLen (encodeUtf8 t) act
diff --git a/Data/Text/Fusion.hs b/Data/Text/Fusion.hs
index 6e1fe253189d0f17a87b29ea000d33846f2515c1..275530f4cb472f8b1d4baca181c2dc8676177aca 100644
--- a/Data/Text/Fusion.hs
+++ b/Data/Text/Fusion.hs
@@ -51,6 +51,7 @@ import Prelude (Bool(..), Char, Maybe(..), Monad(..), Int,
                 fromIntegral, otherwise)
 import Data.Bits ((.&.))
 import Data.Text.Internal (Text(..))
+import Data.Text.Private (runText)
 import Data.Text.UnsafeChar (ord, unsafeChr, unsafeWrite)
 import Data.Text.UnsafeShift (shiftL, shiftR)
 import qualified Data.Text.Array as A
@@ -59,7 +60,6 @@ import Data.Text.Fusion.Internal
 import Data.Text.Fusion.Size
 import qualified Data.Text.Internal as I
 import qualified Data.Text.Encoding.Utf16 as U16
-import qualified Prelude as P
 
 default(Int)
 
@@ -94,15 +94,14 @@ reverseStream (Text arr off len) = Stream next (off+len-1) (maxSize len)
 
 -- | /O(n)/ Convert a 'Stream Char' into a 'Text'.
 unstream :: Stream Char -> Text
-unstream (Stream next0 s0 len) = I.textP (P.fst a) 0 (P.snd a)
-  where
-    a = A.run2 (A.new mlen >>= \arr -> outer arr mlen s0 0)
-      where mlen = upperBound 4 len
-    outer arr top = loop
-      where
+unstream (Stream next0 s0 len) = runText $ \done -> do
+  let mlen = upperBound 4 len
+  arr0 <- A.new mlen
+  let outer arr top = loop
+       where
         loop !s !i =
             case next0 s of
-              Done          -> return (arr, i)
+              Done          -> done arr i
               Skip s'       -> loop s' i
               Yield x s'
                 | j >= top  -> {-# SCC "unstream/resize" #-} do
@@ -114,6 +113,7 @@ unstream (Stream next0 s0 len) = I.textP (P.fst a) 0 (P.snd a)
                                   loop s' (i+d)
                 where j | ord x < 0x10000 = i
                         | otherwise       = i + 1
+  outer arr0 mlen s0 0
 {-# INLINE [0] unstream #-}
 {-# RULES "STREAM stream/unstream fusion" forall s. stream (unstream s) = s #-}
 
@@ -223,9 +223,9 @@ mapAccumL f z0 (Stream next0 s0 len) = (nz,I.textP na 0 nl)
                                arr' <- A.new top'
                                A.copyM arr' 0 arr 0 top
                                outer arr' top' z s i
-                | otherwise -> do let (z',c) = f z x
-                                  d <- unsafeWrite arr i c
+                | otherwise -> do d <- unsafeWrite arr i c
                                   loop z' s' (i+d)
-                where j | ord x < 0x10000 = i
+                where (z',c) = f z x
+                      j | ord c < 0x10000 = i
                         | otherwise       = i + 1
 {-# INLINE [0] mapAccumL #-}
diff --git a/Data/Text/Fusion/Common.hs b/Data/Text/Fusion/Common.hs
index 4ce008291c1fe4d9b7ffa4b7afa3f2ee5347d921..ade520783257111aa45e16783ba4c8fcc7feb866 100644
--- a/Data/Text/Fusion/Common.hs
+++ b/Data/Text/Fusion/Common.hs
@@ -1,7 +1,7 @@
-{-# LANGUAGE BangPatterns, Rank2Types #-}
+{-# LANGUAGE BangPatterns, MagicHash, Rank2Types #-}
 -- |
 -- Module      : Data.Text.Fusion.Common
--- Copyright   : (c) Bryan O'Sullivan 2009
+-- Copyright   : (c) Bryan O'Sullivan 2009, 2012
 --
 -- License     : BSD-style
 -- Maintainer  : bos@serpentine.com, rtomharper@googlemail.com,
@@ -17,6 +17,7 @@ module Data.Text.Fusion.Common
       singleton
     , streamList
     , unstreamList
+    , streamCString#
 
     -- * Basic interface
     , cons
@@ -104,10 +105,13 @@ import Prelude (Bool(..), Char, Eq(..), Int, Integral, Maybe(..),
                 (&&), fromIntegral, otherwise)
 import qualified Data.List as L
 import qualified Prelude as P
+import Data.Bits (shiftL)
 import Data.Int (Int64)
 import Data.Text.Fusion.Internal
 import Data.Text.Fusion.CaseMapping (foldMapping, lowerMapping, upperMapping)
 import Data.Text.Fusion.Size
+import GHC.Prim (Addr#, chr#, indexCharOffAddr#, ord#)
+import GHC.Types (Char(..), Int(..))
 
 singleton :: Char -> Stream Char
 singleton c = Stream next False 1
@@ -131,6 +135,35 @@ unstreamList (Stream next s0 _len) = unfold s0
 
 {-# RULES "STREAM streamList/unstreamList fusion" forall s. streamList (unstreamList s) = s #-}
 
+-- | Stream the UTF-8-like packed encoding used by GHC to represent
+-- constant strings in generated code.
+--
+-- This encoding uses the byte sequence "\xc0\x80" to represent NUL,
+-- and the string is NUL-terminated.
+streamCString# :: Addr# -> Stream Char
+streamCString# addr = Stream step 0 unknownSize
+  where
+    step !i
+        | b == 0    = Done
+        | b <= 0x7f = Yield (C# b#) (i+1)
+        | b <= 0xdf = let !c = chr $ ((b-0xc0) `shiftL` 6) + next 1
+                      in Yield c (i+2)
+        | b <= 0xef = let !c = chr $ ((b-0xe0) `shiftL` 12) +
+                                      (next 1  `shiftL` 6) +
+                                       next 2
+                      in Yield c (i+3)
+        | otherwise = let !c = chr $ ((b-0xf0) `shiftL` 18) +
+                                      (next 1  `shiftL` 12) +
+                                      (next 2  `shiftL` 6) +
+                                       next 3
+                      in Yield c (i+4)
+      where b      = I# (ord# b#)
+            next n = I# (ord# (at# (i+n))) - 0x80
+            !b#    = at# i
+    at# (I# i#) = indexCharOffAddr# addr i#
+    chr (I# i#) = C# (chr# i#)
+{-# INLINE [0] streamCString# #-}
+
 -- ----------------------------------------------------------------------------
 -- * Basic stream functions
 
@@ -139,7 +172,7 @@ data C s = C0 !s
 
 -- | /O(n)/ Adds a character to the front of a Stream Char.
 cons :: Char -> Stream Char -> Stream Char
-cons w (Stream next0 s0 len) = Stream next (C1 s0) (len+1)
+cons !w (Stream next0 s0 len) = Stream next (C1 s0) (len+1)
     where
       next (C1 s) = Yield w (C0 s)
       next (C0 s) = case next0 s of
@@ -185,9 +218,13 @@ head (Stream next s0 _len) = loop_head s0
       loop_head !s = case next s of
                       Yield x _ -> x
                       Skip s'   -> loop_head s'
-                      Done      -> streamError "head" "Empty stream"
+                      Done      -> head_empty
 {-# INLINE [0] head #-}
 
+head_empty :: a
+head_empty = streamError "head" "Empty stream"
+{-# NOINLINE head_empty #-}
+
 -- | /O(1)/ Returns the first character and remainder of a 'Stream
 -- Char', or 'Nothing' if empty.  Subject to array fusion.
 uncons :: Stream Char -> Maybe (Char, Stream Char)
@@ -274,7 +311,7 @@ lengthI (Stream next s0 _len) = loop_length 0 s0
 -- of 'lengthI', but can short circuit if the count of characters is
 -- greater than the number, and hence be more efficient.
 compareLengthI :: Integral a => Stream Char -> a -> Ordering
-compareLengthI (Stream next s0 len) n = 
+compareLengthI (Stream next s0 len) n =
     case exactly len of
       Nothing -> loop_cmp 0 s0
       Just i  -> compare (fromIntegral i) n
@@ -743,7 +780,6 @@ isPrefixOf (Stream next1 s1 _) (Stream next2 s2 _) = loop (next1 s1) (next2 s2)
       loop (Yield x1 s1') (Yield x2 s2') = x1 == x2 &&
                                            loop (next1 s1') (next2 s2')
 {-# INLINE [0] isPrefixOf #-}
-{-# SPECIALISE isPrefixOf :: Stream Char -> Stream Char -> Bool #-}
 
 -- ----------------------------------------------------------------------------
 -- * Searching
diff --git a/Data/Text/Fusion/Internal.hs b/Data/Text/Fusion/Internal.hs
index e28d7a82ae302d9d82ad64e5785d1abf0d549ccf..dfb105a0265f800cc1695da67e14a1a236cd1492 100644
--- a/Data/Text/Fusion/Internal.hs
+++ b/Data/Text/Fusion/Internal.hs
@@ -101,7 +101,6 @@ eq (Stream next1 s1 _) (Stream next2 s2 _) = loop (next1 s1) (next2 s2)
       loop (Yield x1 s1') (Yield x2 s2') = x1 == x2 &&
                                            loop (next1 s1') (next2 s2')
 {-# INLINE [0] eq #-}
-{-# SPECIALISE eq :: Stream Char -> Stream Char -> Bool #-}
 
 cmp :: (Ord a) => Stream a -> Stream a -> Ordering
 cmp (Stream next1 s1 _) (Stream next2 s2 _) = loop (next1 s1) (next2 s2)
@@ -117,7 +116,6 @@ cmp (Stream next1 s1 _) (Stream next2 s2 _) = loop (next1 s1) (next2 s2)
             EQ    -> loop (next1 s1') (next2 s2')
             other -> other
 {-# INLINE [0] cmp #-}
-{-# SPECIALISE cmp :: Stream Char -> Stream Char -> Ordering #-}
 
 -- | The empty stream.
 empty :: Stream a
diff --git a/Data/Text/Fusion/Size.hs b/Data/Text/Fusion/Size.hs
index 37e4a1694ddfc610f7fff42d98325cb22c71dcd3..0f9d79b2d824ad7be2f4be6671e3d2cb333079c8 100644
--- a/Data/Text/Fusion/Size.hs
+++ b/Data/Text/Fusion/Size.hs
@@ -93,8 +93,8 @@ subtractSize _         _           = Unknown
 
 mul :: Int -> Int -> Int
 mul m n
-    | m <= maxBound `div` n = m * n
-    | otherwise             = overflowError
+    | m <= maxBound `quot` n = m * n
+    | otherwise              = overflowError
 {-# INLINE mul #-}
 
 mulSize :: Size -> Size -> Size
diff --git a/Data/Text/IO.hs b/Data/Text/IO.hs
index 4cd4d710c377d4c92f58df56808afe2bed580948..815d4c47608e7f736e6a672ee342f053f364577c 100644
--- a/Data/Text/IO.hs
+++ b/Data/Text/IO.hs
@@ -1,4 +1,7 @@
 {-# LANGUAGE BangPatterns, CPP, RecordWildCards, ScopedTypeVariables #-}
+#if __GLASGOW_HASKELL__ >= 702
+{-# LANGUAGE Trustworthy #-}
+#endif
 -- |
 -- Module      : Data.Text.IO
 -- Copyright   : (c) 2009, 2010 Bryan O'Sullivan,
@@ -16,7 +19,7 @@
 module Data.Text.IO
     (
     -- * Performance
-    -- $performance 
+    -- $performance
 
     -- * Locale support
     -- $locale
@@ -26,6 +29,7 @@ module Data.Text.IO
     , appendFile
     -- * Operations on handles
     , hGetContents
+    , hGetChunk
     , hGetLine
     , hPutStr
     , hPutStrLn
@@ -38,15 +42,11 @@ module Data.Text.IO
     ) where
 
 import Data.Text (Text)
-import Prelude hiding (appendFile, catch, getContents, getLine, interact,
+import Prelude hiding (appendFile, getContents, getLine, interact,
                        putStr, putStrLn, readFile, writeFile)
 import System.IO (Handle, IOMode(..), hPutChar, openFile, stdin, stdout,
                   withFile)
-#if __GLASGOW_HASKELL__ <= 610
-import qualified Data.ByteString.Char8 as B
-import Data.Text.Encoding (decodeUtf8, encodeUtf8)
-#else
-import Control.Exception (catch, throwIO)
+import qualified Control.Exception as E
 import Control.Monad (liftM2, when)
 import Data.IORef (readIORef, writeIORef)
 import qualified Data.Text as T
@@ -64,7 +64,6 @@ import GHC.IO.Handle.Types (BufferList(..), BufferMode(..), Handle__(..),
                             HandleType(..), Newline(..))
 import System.IO (hGetBuffering, hFileSize, hSetBuffering, hTell)
 import System.IO.Error (isEOFError)
-#endif
 
 -- $performance
 -- #performance#
@@ -97,6 +96,30 @@ writeFile p = withFile p WriteMode . flip hPutStr
 appendFile :: FilePath -> Text -> IO ()
 appendFile p = withFile p AppendMode . flip hPutStr
 
+catchError :: String -> Handle -> Handle__ -> IOError -> IO Text
+catchError caller h Handle__{..} err
+    | isEOFError err = do
+        buf <- readIORef haCharBuffer
+        return $ if isEmptyBuffer buf
+                 then T.empty
+                 else T.singleton '\r'
+    | otherwise = E.throwIO (augmentIOError err caller h)
+
+-- | /Experimental./ Read a single chunk of strict text from a
+-- 'Handle'. The size of the chunk depends on the amount of input
+-- currently buffered.
+--
+-- This function blocks only if there is no data available, and EOF
+-- has not yet been reached. Once EOF is reached, this function
+-- returns an empty string instead of throwing an exception.
+hGetChunk :: Handle -> IO Text
+hGetChunk h = wantReadableHandle "hGetChunk" h readSingleChunk
+ where
+  readSingleChunk hh@Handle__{..} = do
+    buf <- readIORef haCharBuffer
+    t <- readChunk hh buf `E.catch` catchError "hGetChunk" h hh
+    return (hh, t)
+
 -- | Read the remaining contents of a 'Handle' as a string.  The
 -- 'Handle' is closed once the contents have been read, or if an
 -- exception is thrown.
@@ -109,31 +132,21 @@ appendFile p = withFile p AppendMode . flip hPutStr
 -- result to construct its result.  For files more than a half of
 -- available RAM in size, this may result in memory exhaustion.
 hGetContents :: Handle -> IO Text
-#if __GLASGOW_HASKELL__ <= 610
-hGetContents = fmap decodeUtf8 . B.hGetContents
-#else
 hGetContents h = do
   chooseGoodBuffering h
   wantReadableHandle "hGetContents" h readAll
  where
   readAll hh@Handle__{..} = do
-    let catchError e
-          | isEOFError e = do
-              buf <- readIORef haCharBuffer
-              return $ if isEmptyBuffer buf
-                       then T.empty
-                       else T.singleton '\r'
-          | otherwise = throwIO (augmentIOError e "hGetContents" h)
-        readChunks = do
+    let readChunks = do
           buf <- readIORef haCharBuffer
-          t <- readChunk hh buf `catch` catchError
+          t <- readChunk hh buf `E.catch` catchError "hGetContents" h hh
           if T.null t
             then return [t]
             else (t:) `fmap` readChunks
     ts <- readChunks
     (hh', _) <- hClose_help hh
     return (hh'{haType=ClosedHandle}, T.concat ts)
-  
+
 -- | Use a more efficient buffer size if we're reading in
 -- block-buffered mode with the default buffer size.  When we can
 -- determine the size of the handle we're reading, set the buffer size
@@ -144,30 +157,22 @@ chooseGoodBuffering h = do
   bufMode <- hGetBuffering h
   case bufMode of
     BlockBuffering Nothing -> do
-      d <- catch (liftM2 (-) (hFileSize h) (hTell h)) $ \(e::IOException) ->
+      d <- E.catch (liftM2 (-) (hFileSize h) (hTell h)) $ \(e::IOException) ->
            if ioe_type e == InappropriateType
            then return 16384 -- faster than the 2KB default
-           else throwIO e
+           else E.throwIO e
       when (d > 0) . hSetBuffering h . BlockBuffering . Just . fromIntegral $ d
     _ -> return ()
-#endif
 
 -- | Read a single line from a handle.
 hGetLine :: Handle -> IO Text
-#if __GLASGOW_HASKELL__ <= 610
-hGetLine = fmap decodeUtf8 . B.hGetLine
-#else
 hGetLine = hGetLineWith T.concat
-#endif
 
 -- | Write a string to a handle.
 hPutStr :: Handle -> Text -> IO ()
-#if __GLASGOW_HASKELL__ <= 610
-hPutStr h = B.hPutStr h . encodeUtf8
-#else
 -- This function is lifted almost verbatim from GHC.IO.Handle.Text.
 hPutStr h t = do
-  (buffer_mode, nl) <- 
+  (buffer_mode, nl) <-
        wantWritableHandle "hPutStr" h $ \h_ -> do
                      bmode <- getSpareBuffer h_
                      return (bmode, haOutputNL h_)
@@ -249,7 +254,7 @@ writeBlocksRaw h buf0 (Stream next0 s0 _len) = outer s0 buf0
 
 -- This function is completely lifted from GHC.IO.Handle.Text.
 getSpareBuffer :: Handle__ -> IO (BufferMode, CharBuffer)
-getSpareBuffer Handle__{haCharBuffer=ref, 
+getSpareBuffer Handle__{haCharBuffer=ref,
                         haBuffers=spare_ref,
                         haBufferMode=mode}
  = do
@@ -270,11 +275,10 @@ getSpareBuffer Handle__{haCharBuffer=ref,
 -- This function is completely lifted from GHC.IO.Handle.Text.
 commitBuffer :: Handle -> RawCharBuffer -> Int -> Int -> Bool -> Bool
              -> IO CharBuffer
-commitBuffer hdl !raw !sz !count flush release = 
+commitBuffer hdl !raw !sz !count flush release =
   wantWritableHandle "commitAndReleaseBuffer" hdl $
      commitBuffer' raw sz count flush release
 {-# INLINE commitBuffer #-}
-#endif
 
 -- | Write a string to a handle, followed by a newline.
 hPutStrLn :: Handle -> Text -> IO ()
diff --git a/Data/Text/IO/Internal.hs b/Data/Text/IO/Internal.hs
index bc4e74b2fa4a17573abe1d9d7ca862a332592424..0f40368f12c8c03d21ca90f692f010857c4c54bb 100644
--- a/Data/Text/IO/Internal.hs
+++ b/Data/Text/IO/Internal.hs
@@ -12,14 +12,11 @@
 
 module Data.Text.IO.Internal
     (
-#if __GLASGOW_HASKELL__ >= 612
       hGetLineWith
     , readChunk
-#endif
     ) where
 
-#if __GLASGOW_HASKELL__ >= 612
-import Control.Exception (catch)
+import qualified Control.Exception as E
 import Data.IORef (readIORef, writeIORef)
 import Data.Text (Text)
 import Data.Text.Fusion (unstream)
@@ -32,7 +29,6 @@ import GHC.IO.Buffer (Buffer(..), CharBuffer, RawCharBuffer, bufferAdjustL,
                       withRawBuffer, writeCharBuf)
 import GHC.IO.Handle.Internals (ioe_EOF, readTextDevice, wantReadableHandle_)
 import GHC.IO.Handle.Types (Handle__(..), Newline(..))
-import Prelude hiding (catch)
 import System.IO (Handle)
 import System.IO.Error (isEOFError)
 import qualified Data.Text as T
@@ -84,9 +80,9 @@ hGetLineLoop hh@Handle__{..} = go where
 -- This function is lifted almost verbatim from GHC.IO.Handle.Text.
 maybeFillReadBuffer :: Handle__ -> CharBuffer -> IO (Maybe CharBuffer)
 maybeFillReadBuffer handle_ buf
-  = catch (Just `fmap` getSomeCharacters handle_ buf) $ \e ->
-      if isEOFError e 
-      then return Nothing 
+  = E.catch (Just `fmap` getSomeCharacters handle_ buf) $ \e ->
+      if isEOFError e
+      then return Nothing
       else ioError e
 
 unpack :: RawCharBuffer -> Int -> Int -> IO Text
@@ -164,4 +160,3 @@ readChunk hh@Handle__{..} buf = do
 
 sizeError :: String -> a
 sizeError loc = error $ "Data.Text.IO." ++ loc ++ ": bad internal buffer size"
-#endif
diff --git a/Data/Text/Internal.hs b/Data/Text/Internal.hs
index 474b0d2ba443610c0f470fcf5f586958edef2886..221d1e7ddabb643f5f11d69bfe64385cc480affb 100644
--- a/Data/Text/Internal.hs
+++ b/Data/Text/Internal.hs
@@ -23,6 +23,7 @@
 module Data.Text.Internal
     (
     -- * Types
+    -- $internals
       Text(..)
     -- * Construction
     , text
@@ -47,9 +48,9 @@ import Data.Typeable (Typeable)
 
 -- | A space efficient, packed, unboxed Unicode text type.
 data Text = Text
-    {-# UNPACK #-} !A.Array          -- payload
-    {-# UNPACK #-} !Int              -- offset
-    {-# UNPACK #-} !Int              -- length
+    {-# UNPACK #-} !A.Array          -- payload (Word16 elements)
+    {-# UNPACK #-} !Int              -- offset (units of Word16, not Char)
+    {-# UNPACK #-} !Int              -- length (units of Word16, not Char)
     deriving (Typeable)
 
 -- | Smart constructor.
@@ -90,7 +91,8 @@ showText (Text arr off len) =
 -- scalar values, but are unfortunately admitted as valid 'Char'
 -- values by Haskell.  They cannot be represented in a 'Text'.  This
 -- function remaps those code points to the Unicode replacement
--- character \"&#xfffd;\", and leaves other code points unchanged.
+-- character (U+FFFD, \'&#xfffd;\'), and leaves other code points
+-- unchanged.
 safe :: Char -> Char
 safe c
     | ord c .&. 0x1ff800 /= 0xd800 = c
@@ -101,3 +103,22 @@ safe c
 firstf :: (a -> c) -> Maybe (a,b) -> Maybe (c,b)
 firstf f (Just (a, b)) = Just (f a, b)
 firstf _  Nothing      = Nothing
+
+-- $internals
+--
+-- Internally, the 'Text' type is represented as an array of 'Word16'
+-- UTF-16 code units. The offset and length fields in the constructor
+-- are in these units, /not/ units of 'Char'.
+--
+-- Invariants that all functions must maintain:
+--
+-- * Since the 'Text' type uses UTF-16 internally, it cannot represent
+--   characters in the reserved surrogate code point range U+D800 to
+--   U+DFFF. To maintain this invariant, the 'safe' function maps
+--   'Char' values in this range to the replacement character (U+FFFD,
+--   \'&#xfffd;\').
+--
+-- * A leading (or \"high\") surrogate code unit (0xD800–0xDBFF) must
+--   always be followed by a trailing (or \"low\") surrogate code unit
+--   (0xDC00-0xDFFF). A trailing surrogate code unit must always be
+--   preceded by a leading surrogate code unit.
diff --git a/Data/Text/Lazy.hs b/Data/Text/Lazy.hs
index a4b432e0fd5fb2e30035caf5c94c88b7e890cfc5..5a976cb9ee942609e25dc672cdcad94ca33fc9ec 100644
--- a/Data/Text/Lazy.hs
+++ b/Data/Text/Lazy.hs
@@ -1,8 +1,11 @@
 {-# OPTIONS_GHC -fno-warn-orphans #-}
-{-# LANGUAGE BangPatterns, CPP #-}
+{-# LANGUAGE BangPatterns, MagicHash, CPP #-}
+#if __GLASGOW_HASKELL__ >= 702
+{-# LANGUAGE Trustworthy #-}
+#endif
 -- |
 -- Module      : Data.Text.Lazy
--- Copyright   : (c) 2009, 2010 Bryan O'Sullivan
+-- Copyright   : (c) 2009, 2010, 2012 Bryan O'Sullivan
 --
 -- License     : BSD-style
 -- Maintainer  : bos@serpentine.com, rtomharper@googlemail.com,
@@ -174,7 +177,7 @@ module Data.Text.Lazy
     , partition
 
     -- , findSubstring
-    
+
     -- * Indexing
     , index
     , count
@@ -190,7 +193,7 @@ module Data.Text.Lazy
 import Prelude (Char, Bool(..), Maybe(..), String,
                 Eq(..), Ord(..), Ordering(..), Read(..), Show(..),
                 (&&), (||), (+), (-), (.), ($), (++),
-                div, error, flip, fmap, fromIntegral, not, otherwise)
+                error, flip, fmap, fromIntegral, not, otherwise, quot)
 import qualified Prelude as P
 #if defined(HAVE_DEEPSEQ)
 import Control.DeepSeq (NFData(..))
@@ -199,11 +202,7 @@ import Data.Int (Int64)
 import qualified Data.List as L
 import Data.Char (isSpace)
 import Data.Data (Data(gfoldl, toConstr, gunfold, dataTypeOf))
-#if __GLASGOW_HASKELL__ >= 612
 import Data.Data (mkNoRepType)
-#else
-import Data.Data (mkNorepType)
-#endif
 import Data.Monoid (Monoid(..))
 import Data.String (IsString(..))
 import qualified Data.Text as T
@@ -217,6 +216,12 @@ import Data.Text.Lazy.Internal (Text(..), chunk, empty, foldlChunks, foldrChunks
 import Data.Text.Internal (firstf, safe, textP)
 import qualified Data.Text.Util as U
 import Data.Text.Lazy.Search (indices)
+#if __GLASGOW_HASKELL__ >= 702
+import qualified GHC.CString as GHC
+#else
+import qualified GHC.Base as GHC
+#endif
+import GHC.Prim (Addr#)
 
 -- $fusion
 --
@@ -332,11 +337,7 @@ instance Data Text where
   gfoldl f z txt = z pack `f` (unpack txt)
   toConstr _     = error "Data.Text.Lazy.Text.toConstr"
   gunfold _ _    = error "Data.Text.Lazy.Text.gunfold"
-#if __GLASGOW_HASKELL__ >= 612
   dataTypeOf _   = mkNoRepType "Data.Text.Lazy.Text"
-#else
-  dataTypeOf _   = mkNorepType "Data.Text.Lazy.Text"
-#endif
 
 -- | /O(n)/ Convert a 'String' into a 'Text'.
 --
@@ -351,6 +352,19 @@ unpack :: Text -> String
 unpack t = S.unstreamList (stream t)
 {-# INLINE [1] unpack #-}
 
+-- | /O(n)/ Convert a literal string into a Text.
+unpackCString# :: Addr# -> Text
+unpackCString# addr# = unstream (S.streamCString# addr#)
+{-# NOINLINE unpackCString# #-}
+
+{-# RULES "TEXT literal" forall a.
+    unstream (S.streamList (L.map safe (GHC.unpackCString# a)))
+      = unpackCString# a #-}
+
+{-# RULES "TEXT literal UTF8" forall a.
+    unstream (S.streamList (L.map safe (GHC.unpackCStringUtf8# a)))
+      = unpackCString# a #-}
+
 -- | /O(1)/ Convert a character into a Text.  Subject to fusion.
 -- Performs replacement on invalid scalar values.
 singleton :: Char -> Text
@@ -606,7 +620,7 @@ center k c t
     | otherwise = replicateChar l c `append` t `append` replicateChar r c
   where len = length t
         d   = k - len
-        r   = d `div` 2
+        r   = d `quot` 2
         l   = d - r
 {-# INLINE center #-}
 
@@ -902,7 +916,7 @@ drop i t0
     | otherwise = drop' i t0
   where drop' 0 ts           = ts
         drop' _ Empty        = Empty
-        drop' n (Chunk t ts) 
+        drop' n (Chunk t ts)
             | n < len   = Chunk (T.drop (fromIntegral n) t) ts
             | otherwise = drop' (n - len) ts
             where len   = fromIntegral (T.length t)
@@ -1190,7 +1204,7 @@ tails ts@(Chunk t ts')
 -- > splitOn "\r\n" "a\r\nb\r\nd\r\ne" == ["a","b","d","e"]
 -- > splitOn "aaa"  "aaaXaaaXaaaXaaa"  == ["","X","X","X",""]
 -- > splitOn "x"    "x"                == ["",""]
--- 
+--
 -- and
 --
 -- > intercalate s . splitOn s         == id
diff --git a/Data/Text/Lazy/Builder.hs b/Data/Text/Lazy/Builder.hs
index ddf05546b7b23abdcf3a060feec9097006833c37..fc6eb6a51700d75d745e14bb2ed5bf88050d9e9c 100644
--- a/Data/Text/Lazy/Builder.hs
+++ b/Data/Text/Lazy/Builder.hs
@@ -1,11 +1,15 @@
 {-# LANGUAGE BangPatterns, CPP, Rank2Types #-}
+#if __GLASGOW_HASKELL__ >= 702
+{-# LANGUAGE Trustworthy #-}
+#endif
 
 -----------------------------------------------------------------------------
 -- |
 -- Module      : Data.Text.Lazy.Builder
--- Copyright   : (c) 2010 Johan Tibell
+-- Copyright   : (c) 2013 Bryan O'Sullivan
+--               (c) 2010 Johan Tibell
 -- License     : BSD3-style (see LICENSE)
--- 
+--
 -- Maintainer  : Johan Tibell <johan.tibell@gmail.com>
 -- Stability   : experimental
 -- Portability : portable to Hugs and GHC
@@ -15,7 +19,9 @@
 -- @fromLazyText@, which construct new builders, and 'mappend', which
 -- concatenates two builders.
 --
--- To get maximum performance when building lazy @Text@ values using a builder, associate @mappend@ calls to the right.  For example, prefer
+-- To get maximum performance when building lazy @Text@ values using a
+-- builder, associate @mappend@ calls to the right.  For example,
+-- prefer
 --
 -- > singleton 'a' `mappend` (singleton 'b' `mappend` singleton 'c')
 --
@@ -23,8 +29,14 @@
 --
 -- > singleton 'a' `mappend` singleton 'b' `mappend` singleton 'c'
 --
--- as the latter associates @mappend@ to the left.
+-- as the latter associates @mappend@ to the left. Or, equivalently,
+-- prefer
+--
+--  > singleton 'a' <> singleton 'b' <> singleton 'c'
 --
+-- since the '<>' from recent versions of 'Data.Monoid' associates 
+-- to the right.
+
 -----------------------------------------------------------------------------
 
 module Data.Text.Lazy.Builder
@@ -43,270 +55,4 @@ module Data.Text.Lazy.Builder
    , flush
    ) where
 
-import Control.Monad.ST (ST, runST)
-import Data.Bits ((.&.))
-import Data.Monoid (Monoid(..))
-import Data.Text.Internal (Text(..))
-import Data.Text.Lazy.Internal (smallChunkSize)
-import Data.Text.Unsafe (inlineInterleaveST)
-import Data.Text.UnsafeChar (ord, unsafeWrite)
-import Data.Text.UnsafeShift (shiftR)
-import Prelude hiding (map, putChar)
-
-import qualified Data.String as String
-import qualified Data.Text as S
-import qualified Data.Text.Array as A
-import qualified Data.Text.Lazy as L
-
-------------------------------------------------------------------------
-
--- | A @Builder@ is an efficient way to build lazy @Text@ values.
--- There are several functions for constructing builders, but only one
--- to inspect them: to extract any data, you have to turn them into
--- lazy @Text@ values using @toLazyText@.
---
--- Internally, a builder constructs a lazy @Text@ by filling arrays
--- piece by piece.  As each buffer is filled, it is \'popped\' off, to
--- become a new chunk of the resulting lazy @Text@.  All this is
--- hidden from the user of the @Builder@.
-newtype Builder = Builder {
-     -- Invariant (from Data.Text.Lazy):
-     --      The lists include no null Texts.
-     runBuilder :: forall s. (Buffer s -> ST s [S.Text])
-                -> Buffer s
-                -> ST s [S.Text]
-   }
-
-instance Monoid Builder where
-   mempty  = empty
-   {-# INLINE mempty #-}
-   mappend = append
-   {-# INLINE mappend #-}
-   mconcat = foldr mappend mempty
-   {-# INLINE mconcat #-}
-
-instance String.IsString Builder where
-    fromString = fromString
-    {-# INLINE fromString #-}
-
-instance Show Builder where
-    show = show . toLazyText
-
-instance Eq Builder where
-    a == b = toLazyText a == toLazyText b
-
-instance Ord Builder where
-    a <= b = toLazyText a <= toLazyText b
-
-------------------------------------------------------------------------
-
--- | /O(1)./ The empty @Builder@, satisfying
---
---  * @'toLazyText' 'empty' = 'L.empty'@
---
-empty :: Builder
-empty = Builder (\ k buf -> k buf)
-{-# INLINE empty #-}
-
--- | /O(1)./ A @Builder@ taking a single character, satisfying
---
---  * @'toLazyText' ('singleton' c) = 'L.singleton' c@
---
-singleton :: Char -> Builder
-singleton c = writeAtMost 2 $ \ marr o ->
-    if n < 0x10000
-    then A.unsafeWrite marr o (fromIntegral n) >> return 1
-    else do
-        A.unsafeWrite marr o lo
-        A.unsafeWrite marr (o+1) hi
-        return 2
-  where n = ord c
-        m = n - 0x10000
-        lo = fromIntegral $ (m `shiftR` 10) + 0xD800
-        hi = fromIntegral $ (m .&. 0x3FF) + 0xDC00
-{-# INLINE singleton #-}
-
-------------------------------------------------------------------------
-
--- | /O(1)./ The concatenation of two builders, an associative
--- operation with identity 'empty', satisfying
---
---  * @'toLazyText' ('append' x y) = 'L.append' ('toLazyText' x) ('toLazyText' y)@
---
-append :: Builder -> Builder -> Builder
-append (Builder f) (Builder g) = Builder (f . g)
-{-# INLINE [0] append #-}
-
--- TODO: Experiment to find the right threshold.
-copyLimit :: Int
-copyLimit = 128                                 
-
--- This function attempts to merge small @Text@ values instead of
--- treating each value as its own chunk.  We may not always want this.
-
--- | /O(1)./ A @Builder@ taking a 'S.Text', satisfying
---
---  * @'toLazyText' ('fromText' t) = 'L.fromChunks' [t]@
---
-fromText :: S.Text -> Builder
-fromText t@(Text arr off l)
-    | S.null t       = empty
-    | l <= copyLimit = writeN l $ \marr o -> A.copyI marr o arr off (l+o)
-    | otherwise      = flush `append` mapBuilder (t :)
-{-# INLINE [1] fromText #-}
-
-{-# RULES
-"fromText/pack" forall s .
-        fromText (S.pack s) = fromString s
- #-}
-
--- | /O(1)./ A Builder taking a @String@, satisfying
---
---  * @'toLazyText' ('fromString' s) = 'L.fromChunks' [S.pack s]@
---
-fromString :: String -> Builder
-fromString str = Builder $ \k (Buffer p0 o0 u0 l0) ->
-    let loop !marr !o !u !l [] = k (Buffer marr o u l)
-        loop marr o u l s@(c:cs)
-            | l <= 1 = do
-                arr <- A.unsafeFreeze marr
-                let !t = Text arr o u
-                marr' <- A.new chunkSize
-                ts <- inlineInterleaveST (loop marr' 0 0 chunkSize s)
-                return $ t : ts
-            | otherwise = do
-                n <- unsafeWrite marr (o+u) c
-                loop marr o (u+n) (l-n) cs
-    in loop p0 o0 u0 l0 str
-  where
-    chunkSize = smallChunkSize
-{-# INLINE fromString #-}
-
--- | /O(1)./ A @Builder@ taking a lazy @Text@, satisfying
---
---  * @'toLazyText' ('fromLazyText' t) = t@
---
-fromLazyText :: L.Text -> Builder
-fromLazyText ts = flush `append` mapBuilder (L.toChunks ts ++)
-{-# INLINE fromLazyText #-}
-
-------------------------------------------------------------------------
-
--- Our internal buffer type
-data Buffer s = Buffer {-# UNPACK #-} !(A.MArray s)
-                       {-# UNPACK #-} !Int  -- offset
-                       {-# UNPACK #-} !Int  -- used units
-                       {-# UNPACK #-} !Int  -- length left
-
-------------------------------------------------------------------------
-
--- | /O(n)./ Extract a lazy @Text@ from a @Builder@ with a default
--- buffer size.  The construction work takes place if and when the
--- relevant part of the lazy @Text@ is demanded.
-toLazyText :: Builder -> L.Text
-toLazyText = toLazyTextWith smallChunkSize
-
--- | /O(n)./ Extract a lazy @Text@ from a @Builder@, using the given
--- size for the initial buffer.  The construction work takes place if
--- and when the relevant part of the lazy @Text@ is demanded.
---
--- If the initial buffer is too small to hold all data, subsequent
--- buffers will be the default buffer size.
-toLazyTextWith :: Int -> Builder -> L.Text
-toLazyTextWith chunkSize m = L.fromChunks (runST $
-  newBuffer chunkSize >>= runBuilder (m `append` flush) (const (return [])))
-
--- | /O(1)./ Pop the strict @Text@ we have constructed so far, if any,
--- yielding a new chunk in the result lazy @Text@.
-flush :: Builder
-flush = Builder $ \ k buf@(Buffer p o u l) ->
-    if u == 0
-    then k buf
-    else do arr <- A.unsafeFreeze p
-            let !b = Buffer p (o+u) 0 l
-                !t = Text arr o u
-            ts <- inlineInterleaveST (k b)
-            return $! t : ts
-
-------------------------------------------------------------------------
-
--- | Sequence an ST operation on the buffer
-withBuffer :: (forall s. Buffer s -> ST s (Buffer s)) -> Builder
-withBuffer f = Builder $ \k buf -> f buf >>= k
-{-# INLINE withBuffer #-}
-
--- | Get the size of the buffer
-withSize :: (Int -> Builder) -> Builder
-withSize f = Builder $ \ k buf@(Buffer _ _ _ l) ->
-    runBuilder (f l) k buf
-{-# INLINE withSize #-}
-
--- | Map the resulting list of texts.
-mapBuilder :: ([S.Text] -> [S.Text]) -> Builder
-mapBuilder f = Builder (fmap f .)
-
-------------------------------------------------------------------------
-
--- | Ensure that there are at least @n@ many elements available.
-ensureFree :: Int -> Builder
-ensureFree !n = withSize $ \ l ->
-    if n <= l
-    then empty
-    else flush `append'` withBuffer (const (newBuffer (max n smallChunkSize)))
-{-# INLINE [0] ensureFree #-}
-
-writeAtMost :: Int -> (forall s. A.MArray s -> Int -> ST s Int) -> Builder
-writeAtMost n f = ensureFree n `append'` withBuffer (writeBuffer f)
-{-# INLINE [0] writeAtMost #-}
-
--- | Ensure that @n@ many elements are available, and then use @f@ to
--- write some elements into the memory.
-writeN :: Int -> (forall s. A.MArray s -> Int -> ST s ()) -> Builder
-writeN n f = writeAtMost n (\ p o -> f p o >> return n)
-{-# INLINE writeN #-}
-
-writeBuffer :: (A.MArray s -> Int -> ST s Int) -> Buffer s -> ST s (Buffer s)
-writeBuffer f (Buffer p o u l) = do
-    n <- f p (o+u)
-    return $! Buffer p o (u+n) (l-n)
-{-# INLINE writeBuffer #-}
-
-newBuffer :: Int -> ST s (Buffer s)
-newBuffer size = do
-    arr <- A.new size
-    return $! Buffer arr 0 0 size
-{-# INLINE newBuffer #-}
-
-------------------------------------------------------------------------
--- Some nice rules for Builder
-
--- This function makes GHC understand that 'writeN' and 'ensureFree'
--- are *not* recursive in the precense of the rewrite rules below.
--- This is not needed with GHC 7+.
-append' :: Builder -> Builder -> Builder
-append' (Builder f) (Builder g) = Builder (f . g)
-{-# INLINE append' #-}
-
-{-# RULES
-
-"append/writeAtMost" forall a b (f::forall s. A.MArray s -> Int -> ST s Int)
-                           (g::forall s. A.MArray s -> Int -> ST s Int) ws.
-    append (writeAtMost a f) (append (writeAtMost b g) ws) =
-        append (writeAtMost (a+b) (\marr o -> f marr o >>= \ n -> 
-                                    g marr (o+n) >>= \ m ->
-                                    let s = n+m in s `seq` return s)) ws
-
-"writeAtMost/writeAtMost" forall a b (f::forall s. A.MArray s -> Int -> ST s Int)
-                           (g::forall s. A.MArray s -> Int -> ST s Int).
-    append (writeAtMost a f) (writeAtMost b g) =
-        writeAtMost (a+b) (\marr o -> f marr o >>= \ n -> 
-                            g marr (o+n) >>= \ m ->
-                            let s = n+m in s `seq` return s)
-
-"ensureFree/ensureFree" forall a b .
-    append (ensureFree a) (ensureFree b) = ensureFree (max a b)
-
-"flush/flush"
-    append flush flush = flush
-
- #-}
+import Data.Text.Lazy.Builder.Internal
diff --git a/Data/Text/Lazy/Builder/Int.hs b/Data/Text/Lazy/Builder/Int.hs
index 9ec4c836853be2bfe8ae2ae6c45394b92ee3bfe2..c0f405c9be0efaadbbf185b6a2ac18b1bce33176 100644
--- a/Data/Text/Lazy/Builder/Int.hs
+++ b/Data/Text/Lazy/Builder/Int.hs
@@ -1,7 +1,11 @@
-{-# LANGUAGE BangPatterns, CPP, MagicHash, UnboxedTuples #-}
+{-# LANGUAGE BangPatterns, CPP, MagicHash, RankNTypes, UnboxedTuples #-}
+#if __GLASGOW_HASKELL__ >= 702
+{-# LANGUAGE Trustworthy #-}
+#endif
 
 -- Module:      Data.Text.Lazy.Builder.Int
--- Copyright:   (c) 2011 MailRank, Inc.
+-- Copyright:   (c) 2013 Bryan O'Sullivan
+--              (c) 2011 MailRank, Inc.
 -- License:     BSD3
 -- Maintainer:  Bryan O'Sullivan <bos@serpentine.com>
 -- Stability:   experimental
@@ -17,45 +21,134 @@ module Data.Text.Lazy.Builder.Int
 
 import Data.Int (Int8, Int16, Int32, Int64)
 import Data.Monoid (mempty)
+import qualified Data.ByteString.Unsafe as B
 import Data.Text.Lazy.Builder.Functions ((<>), i2d)
-import Data.Text.Lazy.Builder
+import Data.Text.Lazy.Builder.Internal
+import Data.Text.Lazy.Builder.Int.Digits (digits)
+import Data.Text.Array
 import Data.Word (Word, Word8, Word16, Word32, Word64)
 import GHC.Base (quotInt, remInt)
 import GHC.Num (quotRemInteger)
 import GHC.Types (Int(..))
+import Control.Monad.ST
 
 #ifdef  __GLASGOW_HASKELL__
-# if __GLASGOW_HASKELL__ < 611
-import GHC.Integer.Internals
-# else
+# if defined(INTEGER_GMP)
 import GHC.Integer.GMP.Internals
+# elif defined(INTEGER_SIMPLE)
+import GHC.Integer
+# else
+# error "You need to use either GMP or integer-simple."
 # endif
 #endif
 
-#ifdef INTEGER_GMP
+#if defined(INTEGER_GMP) || defined(INTEGER_SIMPLE)
 # define PAIR(a,b) (# a,b #)
 #else
 # define PAIR(a,b) (a,b)
 #endif
 
 decimal :: Integral a => a -> Builder
-{-# SPECIALIZE decimal :: Int -> Builder #-}
 {-# SPECIALIZE decimal :: Int8 -> Builder #-}
-{-# SPECIALIZE decimal :: Int16 -> Builder #-}
-{-# SPECIALIZE decimal :: Int32 -> Builder #-}
-{-# SPECIALIZE decimal :: Int64 -> Builder #-}
-{-# SPECIALIZE decimal :: Word -> Builder #-}
-{-# SPECIALIZE decimal :: Word8 -> Builder #-}
-{-# SPECIALIZE decimal :: Word16 -> Builder #-}
-{-# SPECIALIZE decimal :: Word32 -> Builder #-}
-{-# SPECIALIZE decimal :: Word64 -> Builder #-}
+{-# RULES "decimal/Int" decimal = boundedDecimal :: Int -> Builder #-}
+{-# RULES "decimal/Int16" decimal = boundedDecimal :: Int16 -> Builder #-}
+{-# RULES "decimal/Int32" decimal = boundedDecimal :: Int32 -> Builder #-}
+{-# RULES "decimal/Int64" decimal = boundedDecimal :: Int64 -> Builder #-}
+{-# RULES "decimal/Word" decimal = positive :: Word -> Builder #-}
+{-# RULES "decimal/Word8" decimal = positive :: Word8 -> Builder #-}
+{-# RULES "decimal/Word16" decimal = positive :: Word16 -> Builder #-}
+{-# RULES "decimal/Word32" decimal = positive :: Word32 -> Builder #-}
+{-# RULES "decimal/Word64" decimal = positive :: Word64 -> Builder #-}
 {-# RULES "decimal/Integer" decimal = integer 10 :: Integer -> Builder #-}
-decimal i
-    | i < 0     = singleton '-' <> go (-i)
-    | otherwise = go i
-  where
-    go n | n < 10    = digit n
-         | otherwise = go (n `quot` 10) <> digit (n `rem` 10)
+decimal i = decimal' (<= -128) i
+
+boundedDecimal :: (Integral a, Bounded a) => a -> Builder
+{-# SPECIALIZE boundedDecimal :: Int -> Builder #-}
+{-# SPECIALIZE boundedDecimal :: Int8 -> Builder #-}
+{-# SPECIALIZE boundedDecimal :: Int16 -> Builder #-}
+{-# SPECIALIZE boundedDecimal :: Int32 -> Builder #-}
+{-# SPECIALIZE boundedDecimal :: Int64 -> Builder #-}
+boundedDecimal i = decimal' (== minBound) i
+
+decimal' :: (Integral a) => (a -> Bool) -> a -> Builder
+{-# INLINE decimal' #-}
+decimal' p i
+    | i < 0 = if p i
+              then let (q, r) = i `quotRem` 10
+                       qq = -q
+                       !n = countDigits qq
+                   in writeN (n + 2) $ \marr off -> do
+                       unsafeWrite marr off minus
+                       posDecimal marr (off+1) n qq
+                       unsafeWrite marr (off+n+1) (i2w (-r))
+              else let j = -i
+                       !n = countDigits j
+                   in writeN (n + 1) $ \marr off ->
+                       unsafeWrite marr off minus >> posDecimal marr (off+1) n j
+    | otherwise = positive i
+
+positive :: (Integral a) => a -> Builder
+{-# SPECIALIZE positive :: Int -> Builder #-}
+{-# SPECIALIZE positive :: Int8 -> Builder #-}
+{-# SPECIALIZE positive :: Int16 -> Builder #-}
+{-# SPECIALIZE positive :: Int32 -> Builder #-}
+{-# SPECIALIZE positive :: Int64 -> Builder #-}
+{-# SPECIALIZE positive :: Word -> Builder #-}
+{-# SPECIALIZE positive :: Word8 -> Builder #-}
+{-# SPECIALIZE positive :: Word16 -> Builder #-}
+{-# SPECIALIZE positive :: Word32 -> Builder #-}
+{-# SPECIALIZE positive :: Word64 -> Builder #-}
+positive i
+    | i < 10    = writeN 1 $ \marr off -> unsafeWrite marr off (i2w i)
+    | otherwise = let !n = countDigits i
+                  in writeN n $ \marr off -> posDecimal marr off n i
+
+posDecimal :: (Integral a) =>
+              forall s. MArray s -> Int -> Int -> a -> ST s ()
+{-# INLINE posDecimal #-}
+posDecimal marr off0 ds v0 = go (off0 + ds - 1) v0
+  where go off v
+           | v >= 100 = do
+               let (q, r) = v `quotRem` 100
+               write2 off r
+               go (off - 2) q
+           | v < 10    = unsafeWrite marr off (i2w v)
+           | otherwise = write2 off v
+        write2 off i0 = do
+          let i = fromIntegral i0; j = i + i
+          unsafeWrite marr off $ get (j + 1)
+          unsafeWrite marr (off - 1) $ get j
+        get = fromIntegral . B.unsafeIndex digits
+
+minus, zero :: Word16
+{-# INLINE minus #-}
+{-# INLINE zero #-}
+minus = 45
+zero = 48
+
+i2w :: (Integral a) => a -> Word16
+{-# INLINE i2w #-}
+i2w v = zero + fromIntegral v
+
+countDigits :: (Integral a) => a -> Int
+{-# INLINE countDigits #-}
+countDigits v0 = go 1 (fromIntegral v0 :: Word64)
+  where go !k v
+           | v < 10    = k
+           | v < 100   = k + 1
+           | v < 1000  = k + 2
+           | v < 1000000000000 =
+               k + if v < 100000000
+                   then if v < 1000000
+                        then if v < 10000
+                             then 3
+                             else 4 + fin v 100000
+                        else 6 + fin v 10000000
+                   else if v < 10000000000
+                        then 8 + fin v 1000000000
+                        else 10 + fin v 100000000000
+           | otherwise = go (k + 12) (v `quot` 1000000000000)
+        fin v n = if v >= n then 1 else 0
 
 hexadecimal :: Integral a => a -> Builder
 {-# SPECIALIZE hexadecimal :: Int -> Builder #-}
@@ -68,17 +161,22 @@ hexadecimal :: Integral a => a -> Builder
 {-# SPECIALIZE hexadecimal :: Word16 -> Builder #-}
 {-# SPECIALIZE hexadecimal :: Word32 -> Builder #-}
 {-# SPECIALIZE hexadecimal :: Word64 -> Builder #-}
-{-# RULES "hexadecimal/Integer" hexadecimal = integer 16 :: Integer -> Builder #-}
+{-# RULES "hexadecimal/Integer"
+    hexadecimal = hexInteger :: Integer -> Builder #-}
 hexadecimal i
-    | i < 0     = singleton '-' <> go (-i)
+    | i < 0     = error hexErrMsg
     | otherwise = go i
   where
     go n | n < 16    = hexDigit n
          | otherwise = go (n `quot` 16) <> hexDigit (n `rem` 16)
 
-digit :: Integral a => a -> Builder
-digit n = singleton $! i2d (fromIntegral n)
-{-# INLINE digit #-}
+hexInteger :: Integer -> Builder
+hexInteger i
+    | i < 0     = error hexErrMsg
+    | otherwise = integer 16 i
+
+hexErrMsg :: String
+hexErrMsg = "Data.Text.Lazy.Builder.Int.hexadecimal: applied to negative number"
 
 hexDigit :: Integral a => a -> Builder
 hexDigit n
@@ -93,8 +191,13 @@ int = decimal
 data T = T !Integer !Int
 
 integer :: Int -> Integer -> Builder
+#ifdef INTEGER_GMP
 integer 10 (S# i#) = decimal (I# i#)
 integer 16 (S# i#) = hexadecimal (I# i#)
+#else
+integer 10 i = decimal i
+integer 16 i = hexadecimal i
+#endif
 integer base i
     | i < 0     = singleton '-' <> go (-i)
     | otherwise = go i
@@ -146,7 +249,7 @@ integer base i
     pblock = loop maxDigits
       where
         loop !d !n
-            | d == 1    = digit n
-            | otherwise = loop (d-1) q <> digit r
+            | d == 1    = hexDigit n
+            | otherwise = loop (d-1) q <> hexDigit r
             where q = n `quotInt` base
                   r = n `remInt` base
diff --git a/Data/Text/Lazy/Builder/Int/Digits.hs b/Data/Text/Lazy/Builder/Int/Digits.hs
new file mode 100644
index 0000000000000000000000000000000000000000..69fe26e8fd73a3af6cabbfa2c4436ab0c87104fa
--- /dev/null
+++ b/Data/Text/Lazy/Builder/Int/Digits.hs
@@ -0,0 +1,22 @@
+{-# LANGUAGE OverloadedStrings #-}
+
+-- Module:      Data.Text.Lazy.Builder.Int.Digits
+-- Copyright:   (c) 2013 Bryan O'Sullivan
+-- License:     BSD3
+-- Maintainer:  Bryan O'Sullivan <bos@serpentine.com>
+-- Stability:   experimental
+-- Portability: portable
+--
+-- This module exists because the C preprocessor does things that we
+-- shall not speak of when confronted with Haskell multiline strings.
+
+module Data.Text.Lazy.Builder.Int.Digits (digits) where
+
+import Data.ByteString.Char8 (ByteString)
+
+digits :: ByteString
+digits = "0001020304050607080910111213141516171819\
+         \2021222324252627282930313233343536373839\
+         \4041424344454647484950515253545556575859\
+         \6061626364656667686970717273747576777879\
+         \8081828384858687888990919293949596979899"
diff --git a/Data/Text/Lazy/Builder/Internal.hs b/Data/Text/Lazy/Builder/Internal.hs
new file mode 100644
index 0000000000000000000000000000000000000000..14af638165737fa5ecb5f9c0310788f3386a9ce3
--- /dev/null
+++ b/Data/Text/Lazy/Builder/Internal.hs
@@ -0,0 +1,321 @@
+{-# LANGUAGE BangPatterns, CPP, Rank2Types #-}
+
+-----------------------------------------------------------------------------
+-- |
+-- Module      : Data.Text.Lazy.Builder.Internal
+-- Copyright   : (c) 2013 Bryan O'Sullivan
+--               (c) 2010 Johan Tibell
+-- License     : BSD3-style (see LICENSE)
+--
+-- Maintainer  : Johan Tibell <johan.tibell@gmail.com>
+-- Stability   : experimental
+-- Portability : portable to Hugs and GHC
+--
+-- Efficient construction of lazy @Text@ values.  The principal
+-- operations on a @Builder@ are @singleton@, @fromText@, and
+-- @fromLazyText@, which construct new builders, and 'mappend', which
+-- concatenates two builders.
+--
+-- To get maximum performance when building lazy @Text@ values using a
+-- builder, associate @mappend@ calls to the right.  For example,
+-- prefer
+--
+-- > singleton 'a' `mappend` (singleton 'b' `mappend` singleton 'c')
+--
+-- to
+--
+-- > singleton 'a' `mappend` singleton 'b' `mappend` singleton 'c'
+--
+-- as the latter associates @mappend@ to the left.
+--
+-----------------------------------------------------------------------------
+
+module Data.Text.Lazy.Builder.Internal
+   ( -- * Public API
+     -- ** The Builder type
+     Builder
+   , toLazyText
+   , toLazyTextWith
+
+     -- ** Constructing Builders
+   , singleton
+   , fromText
+   , fromLazyText
+   , fromString
+
+     -- ** Flushing the buffer state
+   , flush
+
+     -- * Internal functions
+   , append'
+   , ensureFree
+   , writeN
+   ) where
+
+import Control.Monad.ST (ST, runST)
+import Data.Bits ((.&.))
+import Data.Monoid (Monoid(..))
+import Data.Text.Internal (Text(..))
+import Data.Text.Lazy.Internal (smallChunkSize)
+import Data.Text.Unsafe (inlineInterleaveST)
+import Data.Text.UnsafeChar (ord, unsafeWrite)
+import Data.Text.UnsafeShift (shiftR)
+import Prelude hiding (map, putChar)
+
+import qualified Data.String as String
+import qualified Data.Text as S
+import qualified Data.Text.Array as A
+import qualified Data.Text.Lazy as L
+
+------------------------------------------------------------------------
+
+-- | A @Builder@ is an efficient way to build lazy @Text@ values.
+-- There are several functions for constructing builders, but only one
+-- to inspect them: to extract any data, you have to turn them into
+-- lazy @Text@ values using @toLazyText@.
+--
+-- Internally, a builder constructs a lazy @Text@ by filling arrays
+-- piece by piece.  As each buffer is filled, it is \'popped\' off, to
+-- become a new chunk of the resulting lazy @Text@.  All this is
+-- hidden from the user of the @Builder@.
+newtype Builder = Builder {
+     -- Invariant (from Data.Text.Lazy):
+     --      The lists include no null Texts.
+     runBuilder :: forall s. (Buffer s -> ST s [S.Text])
+                -> Buffer s
+                -> ST s [S.Text]
+   }
+
+instance Monoid Builder where
+   mempty  = empty
+   {-# INLINE mempty #-}
+   mappend = append
+   {-# INLINE mappend #-}
+   mconcat = foldr mappend mempty
+   {-# INLINE mconcat #-}
+
+instance String.IsString Builder where
+    fromString = fromString
+    {-# INLINE fromString #-}
+
+instance Show Builder where
+    show = show . toLazyText
+
+instance Eq Builder where
+    a == b = toLazyText a == toLazyText b
+
+instance Ord Builder where
+    a <= b = toLazyText a <= toLazyText b
+
+------------------------------------------------------------------------
+
+-- | /O(1)./ The empty @Builder@, satisfying
+--
+--  * @'toLazyText' 'empty' = 'L.empty'@
+--
+empty :: Builder
+empty = Builder (\ k buf -> k buf)
+{-# INLINE empty #-}
+
+-- | /O(1)./ A @Builder@ taking a single character, satisfying
+--
+--  * @'toLazyText' ('singleton' c) = 'L.singleton' c@
+--
+singleton :: Char -> Builder
+singleton c = writeAtMost 2 $ \ marr o ->
+    if n < 0x10000
+    then A.unsafeWrite marr o (fromIntegral n) >> return 1
+    else do
+        A.unsafeWrite marr o lo
+        A.unsafeWrite marr (o+1) hi
+        return 2
+  where n = ord c
+        m = n - 0x10000
+        lo = fromIntegral $ (m `shiftR` 10) + 0xD800
+        hi = fromIntegral $ (m .&. 0x3FF) + 0xDC00
+{-# INLINE singleton #-}
+
+------------------------------------------------------------------------
+
+-- | /O(1)./ The concatenation of two builders, an associative
+-- operation with identity 'empty', satisfying
+--
+--  * @'toLazyText' ('append' x y) = 'L.append' ('toLazyText' x) ('toLazyText' y)@
+--
+append :: Builder -> Builder -> Builder
+append (Builder f) (Builder g) = Builder (f . g)
+{-# INLINE [0] append #-}
+
+-- TODO: Experiment to find the right threshold.
+copyLimit :: Int
+copyLimit = 128
+
+-- This function attempts to merge small @Text@ values instead of
+-- treating each value as its own chunk.  We may not always want this.
+
+-- | /O(1)./ A @Builder@ taking a 'S.Text', satisfying
+--
+--  * @'toLazyText' ('fromText' t) = 'L.fromChunks' [t]@
+--
+fromText :: S.Text -> Builder
+fromText t@(Text arr off l)
+    | S.null t       = empty
+    | l <= copyLimit = writeN l $ \marr o -> A.copyI marr o arr off (l+o)
+    | otherwise      = flush `append` mapBuilder (t :)
+{-# INLINE [1] fromText #-}
+
+{-# RULES
+"fromText/pack" forall s .
+        fromText (S.pack s) = fromString s
+ #-}
+
+-- | /O(1)./ A Builder taking a @String@, satisfying
+--
+--  * @'toLazyText' ('fromString' s) = 'L.fromChunks' [S.pack s]@
+--
+fromString :: String -> Builder
+fromString str = Builder $ \k (Buffer p0 o0 u0 l0) ->
+    let loop !marr !o !u !l [] = k (Buffer marr o u l)
+        loop marr o u l s@(c:cs)
+            | l <= 1 = do
+                arr <- A.unsafeFreeze marr
+                let !t = Text arr o u
+                marr' <- A.new chunkSize
+                ts <- inlineInterleaveST (loop marr' 0 0 chunkSize s)
+                return $ t : ts
+            | otherwise = do
+                n <- unsafeWrite marr (o+u) c
+                loop marr o (u+n) (l-n) cs
+    in loop p0 o0 u0 l0 str
+  where
+    chunkSize = smallChunkSize
+{-# INLINE fromString #-}
+
+-- | /O(1)./ A @Builder@ taking a lazy @Text@, satisfying
+--
+--  * @'toLazyText' ('fromLazyText' t) = t@
+--
+fromLazyText :: L.Text -> Builder
+fromLazyText ts = flush `append` mapBuilder (L.toChunks ts ++)
+{-# INLINE fromLazyText #-}
+
+------------------------------------------------------------------------
+
+-- Our internal buffer type
+data Buffer s = Buffer {-# UNPACK #-} !(A.MArray s)
+                       {-# UNPACK #-} !Int  -- offset
+                       {-# UNPACK #-} !Int  -- used units
+                       {-# UNPACK #-} !Int  -- length left
+
+------------------------------------------------------------------------
+
+-- | /O(n)./ Extract a lazy @Text@ from a @Builder@ with a default
+-- buffer size.  The construction work takes place if and when the
+-- relevant part of the lazy @Text@ is demanded.
+toLazyText :: Builder -> L.Text
+toLazyText = toLazyTextWith smallChunkSize
+
+-- | /O(n)./ Extract a lazy @Text@ from a @Builder@, using the given
+-- size for the initial buffer.  The construction work takes place if
+-- and when the relevant part of the lazy @Text@ is demanded.
+--
+-- If the initial buffer is too small to hold all data, subsequent
+-- buffers will be the default buffer size.
+toLazyTextWith :: Int -> Builder -> L.Text
+toLazyTextWith chunkSize m = L.fromChunks (runST $
+  newBuffer chunkSize >>= runBuilder (m `append` flush) (const (return [])))
+
+-- | /O(1)./ Pop the strict @Text@ we have constructed so far, if any,
+-- yielding a new chunk in the result lazy @Text@.
+flush :: Builder
+flush = Builder $ \ k buf@(Buffer p o u l) ->
+    if u == 0
+    then k buf
+    else do arr <- A.unsafeFreeze p
+            let !b = Buffer p (o+u) 0 l
+                !t = Text arr o u
+            ts <- inlineInterleaveST (k b)
+            return $! t : ts
+
+------------------------------------------------------------------------
+
+-- | Sequence an ST operation on the buffer
+withBuffer :: (forall s. Buffer s -> ST s (Buffer s)) -> Builder
+withBuffer f = Builder $ \k buf -> f buf >>= k
+{-# INLINE withBuffer #-}
+
+-- | Get the size of the buffer
+withSize :: (Int -> Builder) -> Builder
+withSize f = Builder $ \ k buf@(Buffer _ _ _ l) ->
+    runBuilder (f l) k buf
+{-# INLINE withSize #-}
+
+-- | Map the resulting list of texts.
+mapBuilder :: ([S.Text] -> [S.Text]) -> Builder
+mapBuilder f = Builder (fmap f .)
+
+------------------------------------------------------------------------
+
+-- | Ensure that there are at least @n@ many elements available.
+ensureFree :: Int -> Builder
+ensureFree !n = withSize $ \ l ->
+    if n <= l
+    then empty
+    else flush `append'` withBuffer (const (newBuffer (max n smallChunkSize)))
+{-# INLINE [0] ensureFree #-}
+
+writeAtMost :: Int -> (forall s. A.MArray s -> Int -> ST s Int) -> Builder
+writeAtMost n f = ensureFree n `append'` withBuffer (writeBuffer f)
+{-# INLINE [0] writeAtMost #-}
+
+-- | Ensure that @n@ many elements are available, and then use @f@ to
+-- write some elements into the memory.
+writeN :: Int -> (forall s. A.MArray s -> Int -> ST s ()) -> Builder
+writeN n f = writeAtMost n (\ p o -> f p o >> return n)
+{-# INLINE writeN #-}
+
+writeBuffer :: (A.MArray s -> Int -> ST s Int) -> Buffer s -> ST s (Buffer s)
+writeBuffer f (Buffer p o u l) = do
+    n <- f p (o+u)
+    return $! Buffer p o (u+n) (l-n)
+{-# INLINE writeBuffer #-}
+
+newBuffer :: Int -> ST s (Buffer s)
+newBuffer size = do
+    arr <- A.new size
+    return $! Buffer arr 0 0 size
+{-# INLINE newBuffer #-}
+
+------------------------------------------------------------------------
+-- Some nice rules for Builder
+
+-- This function makes GHC understand that 'writeN' and 'ensureFree'
+-- are *not* recursive in the precense of the rewrite rules below.
+-- This is not needed with GHC 7+.
+append' :: Builder -> Builder -> Builder
+append' (Builder f) (Builder g) = Builder (f . g)
+{-# INLINE append' #-}
+
+{-# RULES
+
+"append/writeAtMost" forall a b (f::forall s. A.MArray s -> Int -> ST s Int)
+                           (g::forall s. A.MArray s -> Int -> ST s Int) ws.
+    append (writeAtMost a f) (append (writeAtMost b g) ws) =
+        append (writeAtMost (a+b) (\marr o -> f marr o >>= \ n ->
+                                    g marr (o+n) >>= \ m ->
+                                    let s = n+m in s `seq` return s)) ws
+
+"writeAtMost/writeAtMost" forall a b (f::forall s. A.MArray s -> Int -> ST s Int)
+                           (g::forall s. A.MArray s -> Int -> ST s Int).
+    append (writeAtMost a f) (writeAtMost b g) =
+        writeAtMost (a+b) (\marr o -> f marr o >>= \ n ->
+                            g marr (o+n) >>= \ m ->
+                            let s = n+m in s `seq` return s)
+
+"ensureFree/ensureFree" forall a b .
+    append (ensureFree a) (ensureFree b) = ensureFree (max a b)
+
+"flush/flush"
+    append flush flush = flush
+
+ #-}
diff --git a/Data/Text/Lazy/Builder/RealFloat.hs b/Data/Text/Lazy/Builder/RealFloat.hs
index fcb9e19d04b37a4add0b7ca775e90b1c4d13a518..169e6f453362b3efb7cbf9944419fee004cf39ce 100644
--- a/Data/Text/Lazy/Builder/RealFloat.hs
+++ b/Data/Text/Lazy/Builder/RealFloat.hs
@@ -1,4 +1,7 @@
-{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE CPP, OverloadedStrings #-}
+#if __GLASGOW_HASKELL__ >= 702
+{-# LANGUAGE Trustworthy #-}
+#endif
 
 -- |
 -- Module:    Data.Text.Lazy.Builder.RealFloat
diff --git a/Data/Text/Lazy/Encoding.hs b/Data/Text/Lazy/Encoding.hs
index 2b361acc3d3668b5fbe41bda1e0681affe422adb..1498f8d962b9dd34fb65e79476fa3ed2e685a8ed 100644
--- a/Data/Text/Lazy/Encoding.hs
+++ b/Data/Text/Lazy/Encoding.hs
@@ -1,4 +1,7 @@
-{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE BangPatterns,CPP #-}
+#if __GLASGOW_HASKELL__ >= 702
+{-# LANGUAGE Trustworthy #-}
+#endif
 -- |
 -- Module      : Data.Text.Lazy.Encoding
 -- Copyright   : (c) 2009, 2010 Bryan O'Sullivan
@@ -20,6 +23,7 @@ module Data.Text.Lazy.Encoding
     -- * Decoding ByteStrings to Text
     -- $strict
       decodeASCII
+    , decodeLatin1
     , decodeUtf8
     , decodeUtf16LE
     , decodeUtf16BE
@@ -46,17 +50,20 @@ module Data.Text.Lazy.Encoding
 
 import Control.Exception (evaluate, try)
 import Data.Bits ((.&.))
+import Data.Monoid (mempty, (<>))
 import Data.Text.Encoding.Error (OnDecodeError, UnicodeException, strictDecode)
 import Data.Text.Lazy.Internal (Text(..), chunk, empty, foldrChunks)
-import System.IO.Unsafe (unsafePerformIO)
 import qualified Data.ByteString as S
 import qualified Data.ByteString.Lazy as B
 import qualified Data.ByteString.Lazy.Internal as B
 import qualified Data.ByteString.Unsafe as S
+import qualified Data.ByteString.Builder               as B
+import qualified Data.ByteString.Builder.Prim          as BP
 import qualified Data.Text as T
 import qualified Data.Text.Encoding as TE
 import qualified Data.Text.Lazy.Encoding.Fusion as E
 import qualified Data.Text.Lazy.Fusion as F
+import Data.Text.Unsafe (unsafeDupablePerformIO)
 
 -- $strict
 --
@@ -74,11 +81,15 @@ import qualified Data.Text.Lazy.Fusion as F
 -- | /Deprecated/.  Decode a 'ByteString' containing 7-bit ASCII
 -- encoded text.
 --
--- This function is deprecated.  Use 'decodeUtf8' instead.
+-- This function is deprecated.  Use 'decodeLatin1' instead.
 decodeASCII :: B.ByteString -> Text
 decodeASCII = decodeUtf8
 {-# DEPRECATED decodeASCII "Use decodeUtf8 instead" #-}
 
+-- | Decode a 'ByteString' containing Latin-1 (aka ISO-8859-1) encoded text.
+decodeLatin1 :: B.ByteString -> Text
+decodeLatin1 = foldr (chunk . TE.decodeLatin1) empty . B.toChunks
+
 -- | Decode a 'ByteString' containing UTF-8 encoded text.
 decodeUtf8With :: OnDecodeError -> B.ByteString -> Text
 decodeUtf8With onErr bs0 = fast bs0
@@ -141,7 +152,7 @@ decodeUtf8 = decodeUtf8With strictDecode
 -- input before it can return a result.  If you need lazy (streaming)
 -- decoding, use 'decodeUtf8With' in lenient mode.
 decodeUtf8' :: B.ByteString -> Either UnicodeException Text
-decodeUtf8' bs = unsafePerformIO $ do
+decodeUtf8' bs = unsafeDupablePerformIO $ do
                    let t = decodeUtf8 bs
                    try (evaluate (rnf t `seq` t))
   where
@@ -150,8 +161,13 @@ decodeUtf8' bs = unsafePerformIO $ do
 {-# INLINE decodeUtf8' #-}
 
 encodeUtf8 :: Text -> B.ByteString
-encodeUtf8 (Chunk c cs) = B.Chunk (TE.encodeUtf8 c) (encodeUtf8 cs)
-encodeUtf8 Empty        = B.Empty
+encodeUtf8 =
+    B.toLazyByteString . go
+  where
+    go Empty        = mempty
+    go (Chunk c cs) =
+        TE.encodeUtf8Escaped (BP.liftFixedToBounded BP.word8) c <> go cs
+
 
 -- | Decode text from little endian UTF-16 encoding.
 decodeUtf16LEWith :: OnDecodeError -> B.ByteString -> Text
diff --git a/Data/Text/Lazy/Encoding/Fusion.hs b/Data/Text/Lazy/Encoding/Fusion.hs
index 28c2d48b17c1a040e3399a174b7c4f0fefd1acbd..0d0c724bed1a718529f67f34aeb69c67760f1ec4 100644
--- a/Data/Text/Lazy/Encoding/Fusion.hs
+++ b/Data/Text/Lazy/Encoding/Fusion.hs
@@ -5,7 +5,7 @@
 -- Copyright   : (c) 2009, 2010 Bryan O'Sullivan
 --
 -- License     : BSD-style
--- Maintainer  : bos@serpentine.com, rtomharper@googlemail.com, 
+-- Maintainer  : bos@serpentine.com, rtomharper@googlemail.com,
 --               duncan@haskell.org
 -- Stability   : experimental
 -- Portability : portable
@@ -42,7 +42,7 @@ import Data.Word (Word8, Word16, Word32)
 import qualified Data.Text.Encoding.Utf8 as U8
 import qualified Data.Text.Encoding.Utf16 as U16
 import qualified Data.Text.Encoding.Utf32 as U32
-import System.IO.Unsafe (unsafePerformIO)
+import Data.Text.Unsafe (unsafeDupablePerformIO)
 import Foreign.ForeignPtr (withForeignPtr, ForeignPtr)
 import Foreign.Storable (pokeByteOff)
 import Data.ByteString.Internal (mallocByteString, memcpy)
@@ -272,7 +272,7 @@ streamUtf32LE onErr bs0 = Stream next (T bs0 S0 0) unknownSize
 -- | /O(n)/ Convert a 'Stream' 'Word8' to a lazy 'ByteString'.
 unstreamChunks :: Int -> Stream Word8 -> ByteString
 unstreamChunks chunkSize (Stream next s0 len0) = chunk s0 (upperBound 4 len0)
-  where chunk s1 len1 = unsafePerformIO $ do
+  where chunk s1 len1 = unsafeDupablePerformIO $ do
           let len = max 4 (min len1 chunkSize)
           mallocByteString len >>= loop len 0 s1
           where
diff --git a/Data/Text/Lazy/Fusion.hs b/Data/Text/Lazy/Fusion.hs
index 6a2291dea9febbdce7aa76081743880b9f414cd1..d96fb183771fa5f18ff35fc4ab9338c819abe468 100644
--- a/Data/Text/Lazy/Fusion.hs
+++ b/Data/Text/Lazy/Fusion.hs
@@ -24,6 +24,7 @@ module Data.Text.Lazy.Fusion
 
 import Prelude hiding (length)
 import qualified Data.Text.Fusion.Common as S
+import Control.Monad.ST (runST)
 import Data.Text.Fusion.Internal
 import Data.Text.Fusion.Size (isEmpty, unknownSize)
 import Data.Text.Lazy.Internal
@@ -50,33 +51,36 @@ stream text = Stream next (text :*: 0) unknownSize
 -- | /O(n)/ Convert a 'Stream Char' into a 'Text', using the given
 -- chunk size.
 unstreamChunks :: Int -> Stream Char -> Text
-unstreamChunks chunkSize (Stream next s0 len0)
+unstreamChunks !chunkSize (Stream next s0 len0)
   | isEmpty len0 = Empty
   | otherwise    = outer s0
   where
-    outer s = {-# SCC "unstreamChunks/outer" #-}
-              case next s of
+    outer so = {-# SCC "unstreamChunks/outer" #-}
+              case next so of
                 Done       -> Empty
                 Skip s'    -> outer s'
-                Yield x s' -> I.Text arr 0 len `chunk` outer s''
-                  where (arr,(s'',len)) = A.run2 fill
-                        fill = do a <- A.new unknownLength
-                                  unsafeWrite a 0 x >>= inner a unknownLength s'
-                        unknownLength = 4
-    inner marr len s !i
-        | i + 1 >= chunkSize = return (marr, (s,i))
-        | i + 1 >= len       = {-# SCC "unstreamChunks/resize" #-} do
-            let newLen = min (len `shiftL` 1) chunkSize
-            marr' <- A.new newLen
-            A.copyM marr' 0 marr 0 len
-            inner marr' newLen s i
-        | otherwise =
-            {-# SCC "unstreamChunks/inner" #-}
-            case next s of
-              Done        -> return (marr,(s,i))
-              Skip s'     -> inner marr len s' i
-              Yield x s'  -> do d <- unsafeWrite marr i x
-                                inner marr len s' (i+d)
+                Yield x s' -> runST $ do
+                                a <- A.new unknownLength
+                                unsafeWrite a 0 x >>= inner a unknownLength s'
+                    where unknownLength = 4
+      where
+        inner marr !len s !i
+            | i + 1 >= chunkSize = finish marr i s
+            | i + 1 >= len       = {-# SCC "unstreamChunks/resize" #-} do
+                let newLen = min (len `shiftL` 1) chunkSize
+                marr' <- A.new newLen
+                A.copyM marr' 0 marr 0 len
+                inner marr' newLen s i
+            | otherwise =
+                {-# SCC "unstreamChunks/inner" #-}
+                case next s of
+                  Done        -> finish marr i s
+                  Skip s'     -> inner marr len s' i
+                  Yield x s'  -> do d <- unsafeWrite marr i x
+                                    inner marr len s' (i+d)
+        finish marr len s' = do
+          arr <- A.unsafeFreeze marr
+          return (I.Text arr 0 len `Chunk` outer s')
 {-# INLINE [0] unstreamChunks #-}
 
 -- | /O(n)/ Convert a 'Stream Char' into a 'Text', using
diff --git a/Data/Text/Lazy/IO.hs b/Data/Text/Lazy/IO.hs
index 366107850c5472cb69bb9f6dae6795eb406d7cea..318bc60e7ca301022e25cdab567d7d7a044036c2 100644
--- a/Data/Text/Lazy/IO.hs
+++ b/Data/Text/Lazy/IO.hs
@@ -1,4 +1,7 @@
 {-# LANGUAGE BangPatterns, CPP, RecordWildCards #-}
+#if __GLASGOW_HASKELL__ >= 702
+{-# LANGUAGE Trustworthy #-}
+#endif
 -- |
 -- Module      : Data.Text.Lazy.IO
 -- Copyright   : (c) 2009, 2010 Bryan O'Sullivan,
@@ -16,7 +19,7 @@
 module Data.Text.Lazy.IO
     (
     -- * Performance
-    -- $performance 
+    -- $performance
 
     -- * Locale support
     -- $locale
@@ -38,18 +41,13 @@ module Data.Text.Lazy.IO
     ) where
 
 import Data.Text.Lazy (Text)
-import Prelude hiding (appendFile, catch, getContents, getLine, interact,
+import Prelude hiding (appendFile, getContents, getLine, interact,
                        putStr, putStrLn, readFile, writeFile)
 import System.IO (Handle, IOMode(..), hPutChar, openFile, stdin, stdout,
                   withFile)
 import qualified Data.Text.IO as T
 import qualified Data.Text.Lazy as L
-#if __GLASGOW_HASKELL__ <= 610
-import Data.Text.Lazy.Encoding (decodeUtf8)
-import qualified Data.ByteString.Char8 as S8
-import qualified Data.ByteString.Lazy.Char8 as L8
-#else
-import Control.Exception (catch, throwIO)
+import qualified Control.Exception as E
 import Control.Monad (when)
 import Data.IORef (readIORef)
 import Data.Text.IO.Internal (hGetLineWith, readChunk)
@@ -62,7 +60,6 @@ import GHC.IO.Handle.Types (Handle__(..), HandleType(..))
 import System.IO (BufferMode(..), hGetBuffering, hSetBuffering)
 import System.IO.Error (isEOFError)
 import System.IO.Unsafe (unsafeInterleaveIO)
-#endif
 
 -- $performance
 --
@@ -96,9 +93,6 @@ appendFile p = withFile p AppendMode . flip hPutStr
 -- | Lazily read the remaining contents of a 'Handle'.  The 'Handle'
 -- will be closed after the read completes, or on error.
 hGetContents :: Handle -> IO Text
-#if __GLASGOW_HASKELL__ <= 610
-hGetContents = fmap decodeUtf8 . L8.hGetContents
-#else
 hGetContents h = do
   chooseGoodBuffering h
   wantReadableHandle "hGetContents" h $ \hh -> do
@@ -119,7 +113,7 @@ lazyRead h = unsafeInterleaveIO $
     case haType hh of
       ClosedHandle     -> return (hh, L.empty)
       SemiClosedHandle -> lazyReadBuffered h hh
-      _                -> ioException 
+      _                -> ioException
                           (IOError (Just h) IllegalOperation "hGetContents"
                            "illegal handle type" Nothing Nothing)
 
@@ -128,22 +122,17 @@ lazyReadBuffered h hh@Handle__{..} = do
    buf <- readIORef haCharBuffer
    (do t <- readChunk hh buf
        ts <- lazyRead h
-       return (hh, chunk t ts)) `catch` \e -> do
+       return (hh, chunk t ts)) `E.catch` \e -> do
          (hh', _) <- hClose_help hh
          if isEOFError e
            then return $ if isEmptyBuffer buf
                          then (hh', empty)
                          else (hh', L.singleton '\r')
-           else throwIO (augmentIOError e "hGetContents" h)
-#endif
+           else E.throwIO (augmentIOError e "hGetContents" h)
 
 -- | Read a single line from a handle.
 hGetLine :: Handle -> IO Text
-#if __GLASGOW_HASKELL__ <= 610
-hGetLine = fmap (decodeUtf8 . L8.fromChunks . (:[])) . S8.hGetLine
-#else
 hGetLine = hGetLineWith L.fromChunks
-#endif
 
 -- | Write a string to a handle.
 hPutStr :: Handle -> Text -> IO ()
diff --git a/Data/Text/Lazy/Internal.hs b/Data/Text/Lazy/Internal.hs
index ae66530e164f0a86827a05752cbc237730105d45..8d75b3d98359c6d1dc4778afe403456011939f97 100644
--- a/Data/Text/Lazy/Internal.hs
+++ b/Data/Text/Lazy/Internal.hs
@@ -8,7 +8,7 @@
 --               duncan@haskell.org
 -- Stability   : experimental
 -- Portability : GHC
--- 
+--
 -- A module containing private 'Text' internals. This exposes the
 -- 'Text' representation and low level construction functions.
 -- Modules which extend the 'Text' system may need to use this module.
diff --git a/Data/Text/Lazy/Read.hs b/Data/Text/Lazy/Read.hs
index 153741993e02de830aadd431b63a7ddacd8e6381..85673384e12f5a330295e00e8dd0cb36afa0d9fe 100644
--- a/Data/Text/Lazy/Read.hs
+++ b/Data/Text/Lazy/Read.hs
@@ -1,4 +1,7 @@
-{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE OverloadedStrings, CPP #-}
+#if __GLASGOW_HASKELL__ >= 702
+{-# LANGUAGE Trustworthy #-}
+#endif
 
 -- |
 -- Module      : Data.Text.Lazy.Read
diff --git a/Data/Text/Private.hs b/Data/Text/Private.hs
index 313afde5ab68e6b0abd130851171eb72a3b5a85a..5c70bf28842da9be8b6f2a64105f719105c86541 100644
--- a/Data/Text/Private.hs
+++ b/Data/Text/Private.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE BangPatterns, UnboxedTuples #-}
+{-# LANGUAGE BangPatterns, Rank2Types, UnboxedTuples #-}
 
 -- |
 -- Module      : Data.Text.Private
@@ -11,11 +11,14 @@
 
 module Data.Text.Private
     (
-      span_
+      runText
+    , span_
     ) where
 
+import Control.Monad.ST (ST, runST)
 import Data.Text.Internal (Text(..), textP)
 import Data.Text.Unsafe (Iter(..), iter)
+import qualified Data.Text.Array as A
 
 span_ :: (Char -> Bool) -> Text -> (# Text, Text #)
 span_ p t@(Text arr off len) = (# hd,tl #)
@@ -26,3 +29,9 @@ span_ p t@(Text arr off len) = (# hd,tl #)
                 | otherwise      = i
             where Iter c d       = iter t i
 {-# INLINE span_ #-}
+
+runText :: (forall s. (A.MArray s -> Int -> ST s Text) -> ST s Text) -> Text
+runText act = runST (act $ \ !marr !len -> do
+                             arr <- A.unsafeFreeze marr
+                             return $! textP arr 0 len)
+{-# INLINE runText #-}
diff --git a/Data/Text/Read.hs b/Data/Text/Read.hs
index 709e8c5d948ab5175c85f30e0f8d1532682186f9..f7d721cbf2729a737b61c79bdaa8ad5082822965 100644
--- a/Data/Text/Read.hs
+++ b/Data/Text/Read.hs
@@ -1,4 +1,7 @@
-{-# LANGUAGE OverloadedStrings, UnboxedTuples #-}
+{-# LANGUAGE OverloadedStrings, UnboxedTuples, CPP #-}
+#if __GLASGOW_HASKELL__ >= 702
+{-# LANGUAGE Trustworthy #-}
+#endif
 
 -- |
 -- Module      : Data.Text.Read
diff --git a/Data/Text/Search.hs b/Data/Text/Search.hs
index 85ce550b1d59ffd137ea3ce145795ed5fd920772..875227678badcd86a2d9ef2b68d81c01c79e8144 100644
--- a/Data/Text/Search.hs
+++ b/Data/Text/Search.hs
@@ -14,7 +14,7 @@
 -- Horspool, Sunday, and Lundh.
 --
 -- References:
--- 
+--
 -- * R. S. Boyer, J. S. Moore: A Fast String Searching Algorithm.
 --   Communications of the ACM, 20, 10, 762-772 (1977)
 --
@@ -35,10 +35,11 @@ module Data.Text.Search
 import qualified Data.Text.Array as A
 import Data.Word (Word64)
 import Data.Text.Internal (Text(..))
-import Data.Text.Fusion.Internal (PairS(..))
 import Data.Bits ((.|.), (.&.))
 import Data.Text.UnsafeShift (shiftL)
 
+data T = {-# UNPACK #-} !Word64 :* {-# UNPACK #-} !Int
+
 -- | /O(n+m)/ Find the offsets of all non-overlapping indices of
 -- @needle@ within @haystack@.  The offsets returned represent
 -- locations in the low-level array.
@@ -60,9 +61,8 @@ indices _needle@(Text narr noff nlen) _haystack@(Text harr hoff hlen)
     hindex k = A.unsafeIndex harr (hoff+k)
     hindex' k | k == hlen  = 0
               | otherwise = A.unsafeIndex harr (hoff+k)
-    (mask :: Word64) :*: skip  = buildTable 0 0 (nlen-2)
     buildTable !i !msk !skp
-        | i >= nlast           = (msk .|. swizzle z) :*: skp
+        | i >= nlast           = (msk .|. swizzle z) :* skp
         | otherwise            = buildTable (i+1) (msk .|. swizzle c) skp'
         where c                = nindex i
               skp' | c == z    = nlen - i - 2
@@ -80,7 +80,8 @@ indices _needle@(Text narr noff nlen) _haystack@(Text harr hoff hlen)
               delta | nextInPattern = nlen + 1
                     | c == z        = skip + 1
                     | otherwise     = 1
-              nextInPattern         = mask .&. swizzle (hindex' (i+nlen)) == 0
+                where nextInPattern = mask .&. swizzle (hindex' (i+nlen)) == 0
+              !(mask :* skip)       = buildTable 0 0 (nlen-2)
     scanOne c = loop 0
         where loop !i | i >= hlen     = []
                       | hindex i == c = i : loop (i+1)
diff --git a/Data/Text/Unsafe.hs b/Data/Text/Unsafe.hs
index 754fd4f848d0a75c9d040fc97062472c3ae084e3..e0b66d06bd659e1fe0273808502fe59bc518a96d 100644
--- a/Data/Text/Unsafe.hs
+++ b/Data/Text/Unsafe.hs
@@ -14,6 +14,7 @@ module Data.Text.Unsafe
     (
       inlineInterleaveST
     , inlinePerformIO
+    , unsafeDupablePerformIO
     , Iter(..)
     , iter
     , iter_
@@ -24,7 +25,7 @@ module Data.Text.Unsafe
     , takeWord16
     , dropWord16
     ) where
-     
+
 #if defined(ASSERTS)
 import Control.Exception (assert)
 #endif
@@ -33,6 +34,7 @@ import Data.Text.Internal (Text(..))
 import Data.Text.Unsafe.Base (inlineInterleaveST, inlinePerformIO)
 import Data.Text.UnsafeChar (unsafeChr)
 import qualified Data.Text.Array as A
+import GHC.IO (unsafeDupablePerformIO)
 
 -- | /O(1)/ A variant of 'head' for non-empty 'Text'. 'unsafeHead'
 -- omits the check for the empty case, so there is an obligation on
@@ -45,7 +47,7 @@ unsafeHead (Text arr off _len)
           n = A.unsafeIndex arr (off+1)
 {-# INLINE unsafeHead #-}
 
--- | /O(1)/ A variant of 'tail' for non-empty 'Text'. 'unsafeHead'
+-- | /O(1)/ A variant of 'tail' for non-empty 'Text'. 'unsafeTail'
 -- omits the check for the empty case, so there is an obligation on
 -- the programmer to provide a proof that the 'Text' is non-empty.
 unsafeTail :: Text -> Text
diff --git a/Data/Text/Unsafe/Base.hs b/Data/Text/Unsafe/Base.hs
index e34c83af5da8a7f1488f5a2126e4be2000af90ab..7907baf0425dec73c9ea2f50a2ecac52745eaf0d 100644
--- a/Data/Text/Unsafe/Base.hs
+++ b/Data/Text/Unsafe/Base.hs
@@ -15,14 +15,10 @@ module Data.Text.Unsafe.Base
       inlineInterleaveST
     , inlinePerformIO
     ) where
-     
+
 import GHC.ST (ST(..))
 #if defined(__GLASGOW_HASKELL__)
-# if __GLASGOW_HASKELL__ >= 611
 import GHC.IO (IO(IO))
-# else
-import GHC.IOBase (IO(IO))
-# endif
 import GHC.Base (realWorld#)
 #endif
 
diff --git a/tests/benchmarks/.gitignore b/benchmarks/.gitignore
similarity index 100%
rename from tests/benchmarks/.gitignore
rename to benchmarks/.gitignore
diff --git a/tests/benchmarks/Setup.hs b/benchmarks/Setup.hs
similarity index 100%
rename from tests/benchmarks/Setup.hs
rename to benchmarks/Setup.hs
diff --git a/tests/benchmarks/cbits/time_iconv.c b/benchmarks/cbits/time_iconv.c
similarity index 100%
rename from tests/benchmarks/cbits/time_iconv.c
rename to benchmarks/cbits/time_iconv.c
diff --git a/tests/benchmarks/src/Data/Text/Benchmarks.hs b/benchmarks/haskell/Benchmarks.hs
similarity index 59%
rename from tests/benchmarks/src/Data/Text/Benchmarks.hs
rename to benchmarks/haskell/Benchmarks.hs
index b9911bf8969c7e4f3768a5427a8f83119a0836eb..cbf1fa72b496b3388ebec01099d31a99808596c8 100644
--- a/tests/benchmarks/src/Data/Text/Benchmarks.hs
+++ b/benchmarks/haskell/Benchmarks.hs
@@ -9,25 +9,25 @@ import Criterion.Main (Benchmark, defaultMain, bgroup)
 import System.FilePath ((</>))
 import System.IO (IOMode (WriteMode), openFile, hSetEncoding, utf8)
 
-import qualified Data.Text.Benchmarks.Builder as Builder
-import qualified Data.Text.Benchmarks.DecodeUtf8 as DecodeUtf8
-import qualified Data.Text.Benchmarks.EncodeUtf8 as EncodeUtf8
-import qualified Data.Text.Benchmarks.Equality as Equality
-import qualified Data.Text.Benchmarks.FileRead as FileRead
-import qualified Data.Text.Benchmarks.FoldLines as FoldLines
-import qualified Data.Text.Benchmarks.Pure as Pure
-import qualified Data.Text.Benchmarks.ReadNumbers as ReadNumbers
-import qualified Data.Text.Benchmarks.Replace as Replace
-import qualified Data.Text.Benchmarks.Search as Search
-import qualified Data.Text.Benchmarks.Stream as Stream
-import qualified Data.Text.Benchmarks.WordFrequencies as WordFrequencies
+import qualified Benchmarks.Builder as Builder
+import qualified Benchmarks.DecodeUtf8 as DecodeUtf8
+import qualified Benchmarks.EncodeUtf8 as EncodeUtf8
+import qualified Benchmarks.Equality as Equality
+import qualified Benchmarks.FileRead as FileRead
+import qualified Benchmarks.FoldLines as FoldLines
+import qualified Benchmarks.Pure as Pure
+import qualified Benchmarks.ReadNumbers as ReadNumbers
+import qualified Benchmarks.Replace as Replace
+import qualified Benchmarks.Search as Search
+import qualified Benchmarks.Stream as Stream
+import qualified Benchmarks.WordFrequencies as WordFrequencies
 
-import qualified Data.Text.Benchmarks.Programs.BigTable as Programs.BigTable
-import qualified Data.Text.Benchmarks.Programs.Cut as Programs.Cut
-import qualified Data.Text.Benchmarks.Programs.Fold as Programs.Fold
-import qualified Data.Text.Benchmarks.Programs.Sort as Programs.Sort
-import qualified Data.Text.Benchmarks.Programs.StripTags as Programs.StripTags
-import qualified Data.Text.Benchmarks.Programs.Throughput as Programs.Throughput
+import qualified Benchmarks.Programs.BigTable as Programs.BigTable
+import qualified Benchmarks.Programs.Cut as Programs.Cut
+import qualified Benchmarks.Programs.Fold as Programs.Fold
+import qualified Benchmarks.Programs.Sort as Programs.Sort
+import qualified Benchmarks.Programs.StripTags as Programs.StripTags
+import qualified Benchmarks.Programs.Throughput as Programs.Throughput
 
 main :: IO ()
 main = benchmarks >>= defaultMain
@@ -49,7 +49,8 @@ benchmarks = do
         , Equality.benchmark (tf "japanese.txt")
         , FileRead.benchmark (tf "russian.txt")
         , FoldLines.benchmark (tf "russian.txt")
-        , Pure.benchmark (tf "japanese.txt")
+        , Pure.benchmark "tiny "(tf "tiny.txt")
+        , Pure.benchmark "japanese" (tf "japanese.txt")
         , ReadNumbers.benchmark (tf "numbers.txt")
         , Replace.benchmark (tf "russian.txt") "принимая" "своем"
         , Search.benchmark (tf "russian.txt") "принимая"
@@ -70,4 +71,4 @@ benchmarks = do
     return $ bs ++ [ps]
   where
     -- Location of a test file
-    tf = ("../text-test-data" </>)
+    tf = ("../tests/text-test-data" </>)
diff --git a/benchmarks/haskell/Benchmarks/Builder.hs b/benchmarks/haskell/Benchmarks/Builder.hs
new file mode 100644
index 0000000000000000000000000000000000000000..b6e7bd70e92011f1484e026f4e2f5ca08df1fecd
--- /dev/null
+++ b/benchmarks/haskell/Benchmarks/Builder.hs
@@ -0,0 +1,75 @@
+-- | Testing the internal builder monoid
+--
+-- Tested in this benchmark:
+--
+-- * Concatenating many small strings using a builder
+--
+{-# LANGUAGE OverloadedStrings #-}
+module Benchmarks.Builder
+    ( benchmark
+    ) where
+
+import Criterion (Benchmark, bgroup, bench, nf)
+import Data.Binary.Builder as B
+import Data.ByteString.Char8 ()
+import Data.Monoid (mconcat, mempty)
+import qualified Blaze.ByteString.Builder as Blaze
+import qualified Blaze.ByteString.Builder.Char.Utf8 as Blaze
+import qualified Data.ByteString as SB
+import qualified Data.ByteString.Lazy as LB
+import qualified Data.Text as T
+import qualified Data.Text.Lazy as LT
+import qualified Data.Text.Lazy.Builder as LTB
+import qualified Data.Text.Lazy.Builder.Int as Int
+import Data.Int (Int64)
+
+benchmark :: IO Benchmark
+benchmark = return $ bgroup "Builder"
+    [ bgroup "Comparison"
+      [ bench "LazyText" $ nf
+          (LT.length . LTB.toLazyText . mconcat . map LTB.fromText) texts
+      , bench "Binary" $ nf
+          (LB.length . B.toLazyByteString . mconcat . map B.fromByteString)
+          byteStrings
+      , bench "Blaze" $ nf
+          (LB.length . Blaze.toLazyByteString . mconcat . map Blaze.fromString)
+          strings
+      ]
+    , bgroup "Int"
+      [ bgroup "Decimal"
+        [ bgroup "Positive" .
+          flip map numbers $ \n ->
+          (bench (show (length (show n))) $ nf (LTB.toLazyText . Int.decimal) n)
+        , bgroup "Negative" .
+          flip map numbers $ \m ->
+          let n = negate m in
+          (bench (show (length (show n))) $ nf (LTB.toLazyText . Int.decimal) n)
+        , bench "Empty" $ nf LTB.toLazyText mempty
+        , bgroup "Show" .
+          flip map numbers $ \n ->
+          (bench (show (length (show n))) $ nf show n)
+        ]
+      ]
+    ]
+  where
+    numbers :: [Int64]
+    numbers = [
+      6, 14, 500, 9688, 10654, 620735, 5608880, 37010612,
+      731223504, 5061580596, 24596952933, 711732309084, 2845910093839,
+      54601756118340, 735159434806159, 3619097625502435, 95777227510267124,
+      414944309510675693, 8986407456998704019
+     ]
+
+texts :: [T.Text]
+texts = take 200000 $ cycle ["foo", "λx", "由の"]
+{-# NOINLINE texts #-}
+
+-- Note that the non-ascii characters will be chopped
+byteStrings :: [SB.ByteString]
+byteStrings = take 200000 $ cycle ["foo", "λx", "由の"]
+{-# NOINLINE byteStrings #-}
+
+-- Note that the non-ascii characters will be chopped
+strings :: [String]
+strings = take 200000 $ cycle ["foo", "λx", "由の"]
+{-# NOINLINE strings #-}
diff --git a/tests/benchmarks/src/Data/Text/Benchmarks/DecodeUtf8.hs b/benchmarks/haskell/Benchmarks/DecodeUtf8.hs
similarity index 96%
rename from tests/benchmarks/src/Data/Text/Benchmarks/DecodeUtf8.hs
rename to benchmarks/haskell/Benchmarks/DecodeUtf8.hs
index 474b75e17984a593c5b086b5015dfcc0ba4aa948..14d084a92b43c3a3cb726bfc95cc697317730ed0 100644
--- a/tests/benchmarks/src/Data/Text/Benchmarks/DecodeUtf8.hs
+++ b/benchmarks/haskell/Benchmarks/DecodeUtf8.hs
@@ -14,11 +14,11 @@
 --
 -- The latter are used for testing stream fusion.
 --
-module Data.Text.Benchmarks.DecodeUtf8
+module Benchmarks.DecodeUtf8
     ( benchmark
     ) where
 
-import Foreign.C.Types (CInt, CSize)
+import Foreign.C.Types
 import Data.ByteString.Internal (ByteString(..))
 import Foreign.Ptr (Ptr, plusPtr)
 import Foreign.ForeignPtr (withForeignPtr)
diff --git a/tests/benchmarks/src/Data/Text/Benchmarks/EncodeUtf8.hs b/benchmarks/haskell/Benchmarks/EncodeUtf8.hs
similarity index 95%
rename from tests/benchmarks/src/Data/Text/Benchmarks/EncodeUtf8.hs
rename to benchmarks/haskell/Benchmarks/EncodeUtf8.hs
index 39804615225724f1a33f54a8e4c29508c0cba2a0..758e095f58cf6c5b0a906eb7b8d7c9d4d0fec296 100644
--- a/tests/benchmarks/src/Data/Text/Benchmarks/EncodeUtf8.hs
+++ b/benchmarks/haskell/Benchmarks/EncodeUtf8.hs
@@ -6,7 +6,7 @@
 --
 -- * UTF-8 encoding it
 --
-module Data.Text.Benchmarks.EncodeUtf8
+module Benchmarks.EncodeUtf8
     ( benchmark
     ) where
 
diff --git a/tests/benchmarks/src/Data/Text/Benchmarks/Equality.hs b/benchmarks/haskell/Benchmarks/Equality.hs
similarity index 97%
rename from tests/benchmarks/src/Data/Text/Benchmarks/Equality.hs
rename to benchmarks/haskell/Benchmarks/Equality.hs
index 210411336428e81d3d1f4bb7c832b123add3a194..33964a23105a5ab76c02b6833deb4da0c686e020 100644
--- a/tests/benchmarks/src/Data/Text/Benchmarks/Equality.hs
+++ b/benchmarks/haskell/Benchmarks/Equality.hs
@@ -5,7 +5,7 @@
 --
 -- * Comparison of strings (Eq instance)
 --
-module Data.Text.Benchmarks.Equality
+module Benchmarks.Equality
     ( benchmark
     ) where
 
diff --git a/tests/benchmarks/src/Data/Text/Benchmarks/FileRead.hs b/benchmarks/haskell/Benchmarks/FileRead.hs
similarity index 96%
rename from tests/benchmarks/src/Data/Text/Benchmarks/FileRead.hs
rename to benchmarks/haskell/Benchmarks/FileRead.hs
index af07996e0182188f49ca7e3d7aed8ecd1d231864..65a79b49d2dfa7943364118d0bc78010887721c1 100644
--- a/tests/benchmarks/src/Data/Text/Benchmarks/FileRead.hs
+++ b/benchmarks/haskell/Benchmarks/FileRead.hs
@@ -4,7 +4,7 @@
 --
 -- * Reading a file from the disk
 --
-module Data.Text.Benchmarks.FileRead
+module Benchmarks.FileRead
     ( benchmark
     ) where
 
diff --git a/tests/benchmarks/src/Data/Text/Benchmarks/FoldLines.hs b/benchmarks/haskell/Benchmarks/FoldLines.hs
similarity index 97%
rename from tests/benchmarks/src/Data/Text/Benchmarks/FoldLines.hs
rename to benchmarks/haskell/Benchmarks/FoldLines.hs
index e2a4b2932efe64b3b2f768f3072a90290cac3f80..f08e3b2238dfaf84aaa6afb0e710fff1ae1ca4df 100644
--- a/tests/benchmarks/src/Data/Text/Benchmarks/FoldLines.hs
+++ b/benchmarks/haskell/Benchmarks/FoldLines.hs
@@ -6,7 +6,7 @@
 -- * Buffered, line-based IO
 --
 {-# LANGUAGE BangPatterns #-}
-module Data.Text.Benchmarks.FoldLines
+module Benchmarks.FoldLines
     ( benchmark
     ) where
 
diff --git a/tests/benchmarks/src/Data/Text/Benchmarks/Programs/BigTable.hs b/benchmarks/haskell/Benchmarks/Programs/BigTable.hs
similarity index 96%
rename from tests/benchmarks/src/Data/Text/Benchmarks/Programs/BigTable.hs
rename to benchmarks/haskell/Benchmarks/Programs/BigTable.hs
index e057e871e1a5ad771fa479de160140cd2c13469a..0d3402b4720aca7f9f503fecea6361db29748678 100644
--- a/tests/benchmarks/src/Data/Text/Benchmarks/Programs/BigTable.hs
+++ b/benchmarks/haskell/Benchmarks/Programs/BigTable.hs
@@ -7,7 +7,7 @@
 -- * Writing to a handle
 --
 {-# LANGUAGE OverloadedStrings #-}
-module Data.Text.Benchmarks.Programs.BigTable
+module Benchmarks.Programs.BigTable
     ( benchmark
     ) where
 
diff --git a/tests/benchmarks/src/Data/Text/Benchmarks/Programs/Cut.hs b/benchmarks/haskell/Benchmarks/Programs/Cut.hs
similarity index 98%
rename from tests/benchmarks/src/Data/Text/Benchmarks/Programs/Cut.hs
rename to benchmarks/haskell/Benchmarks/Programs/Cut.hs
index 3e918fff8c23372f2c556558718eda48bea15fdd..3bb44696687e9586aaa7bb07f0bb60dd47d729cf 100644
--- a/tests/benchmarks/src/Data/Text/Benchmarks/Programs/Cut.hs
+++ b/benchmarks/haskell/Benchmarks/Programs/Cut.hs
@@ -12,7 +12,7 @@
 --
 -- * Writing back to a handle
 --
-module Data.Text.Benchmarks.Programs.Cut
+module Benchmarks.Programs.Cut
     ( benchmark
     ) where
 
diff --git a/tests/benchmarks/src/Data/Text/Benchmarks/Programs/Fold.hs b/benchmarks/haskell/Benchmarks/Programs/Fold.hs
similarity index 97%
rename from tests/benchmarks/src/Data/Text/Benchmarks/Programs/Fold.hs
rename to benchmarks/haskell/Benchmarks/Programs/Fold.hs
index f42a71b05f7be55d3093de836929cec772883c69..722487a89f6bb488471b70a194102a4e4cd280af 100644
--- a/tests/benchmarks/src/Data/Text/Benchmarks/Programs/Fold.hs
+++ b/benchmarks/haskell/Benchmarks/Programs/Fold.hs
@@ -13,7 +13,7 @@
 -- * Writing back to a handle
 --
 {-# LANGUAGE OverloadedStrings #-}
-module Data.Text.Benchmarks.Programs.Fold
+module Benchmarks.Programs.Fold
     ( benchmark
     ) where
 
diff --git a/tests/benchmarks/src/Data/Text/Benchmarks/Programs/Sort.hs b/benchmarks/haskell/Benchmarks/Programs/Sort.hs
similarity index 98%
rename from tests/benchmarks/src/Data/Text/Benchmarks/Programs/Sort.hs
rename to benchmarks/haskell/Benchmarks/Programs/Sort.hs
index 4b4aee0686df600dab05ac2154df7f849a4b814b..808f71056c0e8361541929e6bb766eb40d7b2364 100644
--- a/tests/benchmarks/src/Data/Text/Benchmarks/Programs/Sort.hs
+++ b/benchmarks/haskell/Benchmarks/Programs/Sort.hs
@@ -13,7 +13,7 @@
 -- * Writing back to a handle
 --
 {-# LANGUAGE OverloadedStrings #-}
-module Data.Text.Benchmarks.Programs.Sort
+module Benchmarks.Programs.Sort
     ( benchmark
     ) where
 
diff --git a/tests/benchmarks/src/Data/Text/Benchmarks/Programs/StripTags.hs b/benchmarks/haskell/Benchmarks/Programs/StripTags.hs
similarity index 96%
rename from tests/benchmarks/src/Data/Text/Benchmarks/Programs/StripTags.hs
rename to benchmarks/haskell/Benchmarks/Programs/StripTags.hs
index 41f18d5194af19d22c2d23f041a6ea9c72f1e131..1d94cf688a12dca39a81c5e18f5558221c146b79 100644
--- a/tests/benchmarks/src/Data/Text/Benchmarks/Programs/StripTags.hs
+++ b/benchmarks/haskell/Benchmarks/Programs/StripTags.hs
@@ -11,10 +11,10 @@
 -- * Writing back to a handle
 --
 {-# OPTIONS_GHC -fspec-constr-count=5 #-}
-module Data.Text.Benchmarks.Programs.StripTags
+module Benchmarks.Programs.StripTags
     ( benchmark
     ) where
-     
+
 import Criterion (Benchmark, bgroup, bench)
 import Data.List (mapAccumL)
 import System.IO (Handle, hPutStr)
diff --git a/tests/benchmarks/src/Data/Text/Benchmarks/Programs/Throughput.hs b/benchmarks/haskell/Benchmarks/Programs/Throughput.hs
similarity index 96%
rename from tests/benchmarks/src/Data/Text/Benchmarks/Programs/Throughput.hs
rename to benchmarks/haskell/Benchmarks/Programs/Throughput.hs
index ecb7021c13220b390a2c2a7716f8b509908644da..87932ead6ca65ad4e1a5beb643f20e34733fe002 100644
--- a/tests/benchmarks/src/Data/Text/Benchmarks/Programs/Throughput.hs
+++ b/benchmarks/haskell/Benchmarks/Programs/Throughput.hs
@@ -14,7 +14,7 @@
 --
 -- * Writing back to a handle
 --
-module Data.Text.Benchmarks.Programs.Throughput
+module Benchmarks.Programs.Throughput
     ( benchmark
     ) where
 
diff --git a/tests/benchmarks/src/Data/Text/Benchmarks/Pure.hs b/benchmarks/haskell/Benchmarks/Pure.hs
similarity index 97%
rename from tests/benchmarks/src/Data/Text/Benchmarks/Pure.hs
rename to benchmarks/haskell/Benchmarks/Pure.hs
index 708e2db4444c9c626ef057c407154a75e22b2eba..4860ab309f549359ef68493671f983795b66e9ad 100644
--- a/tests/benchmarks/src/Data/Text/Benchmarks/Pure.hs
+++ b/benchmarks/haskell/Benchmarks/Pure.hs
@@ -4,9 +4,9 @@
 --
 -- * Most pure functions defined the string types
 --
-{-# LANGUAGE BangPatterns, GADTs, MagicHash #-}
+{-# LANGUAGE BangPatterns, CPP, GADTs, MagicHash #-}
 {-# OPTIONS_GHC -fno-warn-orphans #-}
-module Data.Text.Benchmarks.Pure
+module Benchmarks.Pure
     ( benchmark
     ) where
 
@@ -27,8 +27,8 @@ import qualified Data.Text.Lazy as TL
 import qualified Data.Text.Lazy.Builder as TB
 import qualified Data.Text.Lazy.Encoding as TL
 
-benchmark :: FilePath -> IO Benchmark
-benchmark fp = do
+benchmark :: String -> FilePath -> IO Benchmark
+benchmark kind fp = do
     -- Evaluate stuff before actually running the benchmark, we don't want to
     -- count it here.
 
@@ -100,6 +100,10 @@ benchmark fp = do
             , benchBSL $ nf BL.unpack bla
             , benchS   $ nf UTF8.toString bsa
             ]
+        , bgroup "decode'"
+            [ benchT   $ nf T.decodeUtf8' bsa
+            , benchTL  $ nf TL.decodeUtf8' bla
+            ]
         , bgroup "drop"
             [ benchT   $ nf (T.drop (ta_len `div` 3)) ta
             , benchTL  $ nf (TL.drop (tla_len `div` 3)) tla
@@ -405,11 +409,11 @@ benchmark fp = do
             ]
         ]
   where
-    benchS   = bench "String"
-    benchT   = bench "Text"
-    benchTL  = bench "LazyText"
-    benchBS  = bench "ByteString"
-    benchBSL = bench "LazyByteString"
+    benchS   = bench ("String+" ++ kind)
+    benchT   = bench ("Text+" ++ kind)
+    benchTL  = bench ("LazyText+" ++ kind)
+    benchBS  = bench ("ByteString+" ++ kind)
+    benchBSL = bench ("LazyByteString+" ++ kind)
 
     c  = 'й'
     p0 = (== c)
@@ -425,11 +429,13 @@ benchmark fp = do
     replicat n = concat . L.replicate n
     short = T.pack "short"
 
+#if !MIN_VERSION_bytestring(0,10,0)
 instance NFData BS.ByteString
 
 instance NFData BL.ByteString where
     rnf BL.Empty        = ()
     rnf (BL.Chunk _ ts) = rnf ts
+#endif
 
 data B where
     B :: NFData a => a -> B
diff --git a/tests/benchmarks/src/Data/Text/Benchmarks/ReadNumbers.hs b/benchmarks/haskell/Benchmarks/ReadNumbers.hs
similarity index 98%
rename from tests/benchmarks/src/Data/Text/Benchmarks/ReadNumbers.hs
rename to benchmarks/haskell/Benchmarks/ReadNumbers.hs
index 04d50bb34ec1b6b5a26b68d5742b71944f419b31..b907f8a8bc05bc53450bda344134533f2d78a2ff 100644
--- a/tests/benchmarks/src/Data/Text/Benchmarks/ReadNumbers.hs
+++ b/benchmarks/haskell/Benchmarks/ReadNumbers.hs
@@ -16,7 +16,7 @@
 --
 -- * Lexing/parsing of different numerical types
 --
-module Data.Text.Benchmarks.ReadNumbers
+module Benchmarks.ReadNumbers
     ( benchmark
     ) where
 
@@ -88,7 +88,7 @@ text reader = foldl' go 1000000
   where
     go z t = case reader t of Left _       -> z
                               Right (n, _) -> min n z
-    
+
 byteString :: (Ord a, Num a) => (t -> Maybe (a,t)) -> [t] -> a
 byteString reader = foldl' go 1000000
   where
diff --git a/tests/benchmarks/src/Data/Text/Benchmarks/Replace.hs b/benchmarks/haskell/Benchmarks/Replace.hs
similarity index 96%
rename from tests/benchmarks/src/Data/Text/Benchmarks/Replace.hs
rename to benchmarks/haskell/Benchmarks/Replace.hs
index 42a7b59125486d72ed40fe56b4dc113b25057c25..d56d52dfe13193497e9e088c3c6ef0a1d13be99e 100644
--- a/tests/benchmarks/src/Data/Text/Benchmarks/Replace.hs
+++ b/benchmarks/haskell/Benchmarks/Replace.hs
@@ -4,7 +4,7 @@
 --
 -- * Search and replace of a pattern in a text
 --
-module Data.Text.Benchmarks.Replace
+module Benchmarks.Replace
     ( benchmark
     ) where
 
diff --git a/tests/benchmarks/src/Data/Text/Benchmarks/Search.hs b/benchmarks/haskell/Benchmarks/Search.hs
similarity index 97%
rename from tests/benchmarks/src/Data/Text/Benchmarks/Search.hs
rename to benchmarks/haskell/Benchmarks/Search.hs
index b4473b746edd20d011359442b1ce9893e2180c1c..93f1e05e1e8179dac914494bd219eaf5b05c167e 100644
--- a/tests/benchmarks/src/Data/Text/Benchmarks/Search.hs
+++ b/benchmarks/haskell/Benchmarks/Search.hs
@@ -4,7 +4,7 @@
 --
 -- * Searching all occurences of a pattern using library routines
 --
-module Data.Text.Benchmarks.Search
+module Benchmarks.Search
     ( benchmark
     ) where
 
diff --git a/tests/benchmarks/src/Data/Text/Benchmarks/Stream.hs b/benchmarks/haskell/Benchmarks/Stream.hs
similarity index 97%
rename from tests/benchmarks/src/Data/Text/Benchmarks/Stream.hs
rename to benchmarks/haskell/Benchmarks/Stream.hs
index d5e6b843627546a091dbe3dd4a82cca3e2264d3a..9960bef0bbff1794db32c6202e5cecbaeede423c 100644
--- a/tests/benchmarks/src/Data/Text/Benchmarks/Stream.hs
+++ b/benchmarks/haskell/Benchmarks/Stream.hs
@@ -7,7 +7,7 @@
 --
 {-# LANGUAGE BangPatterns #-}
 {-# OPTIONS_GHC -fno-warn-orphans #-}
-module Data.Text.Benchmarks.Stream
+module Benchmarks.Stream
     ( benchmark
     ) where
 
@@ -86,7 +86,6 @@ benchmark fp = do
             ]
 
         -- Encoding.Fusion.Common
-        , bench "restreamUtf8"    $ nf F.restreamUtf8 s
         , bench "restreamUtf16LE" $ nf F.restreamUtf16LE s
         , bench "restreamUtf16BE" $ nf F.restreamUtf16BE s
         , bench "restreamUtf32LE" $ nf F.restreamUtf32LE s
diff --git a/tests/benchmarks/src/Data/Text/Benchmarks/WordFrequencies.hs b/benchmarks/haskell/Benchmarks/WordFrequencies.hs
similarity index 95%
rename from tests/benchmarks/src/Data/Text/Benchmarks/WordFrequencies.hs
rename to benchmarks/haskell/Benchmarks/WordFrequencies.hs
index 559470f6d304cad98dd4fb9da046f4b7b0f0050b..698103debaee2c9164c80083c2104f5c0f8d5ffe 100644
--- a/tests/benchmarks/src/Data/Text/Benchmarks/WordFrequencies.hs
+++ b/benchmarks/haskell/Benchmarks/WordFrequencies.hs
@@ -8,7 +8,7 @@
 --
 -- * Comparing: Eq/Ord instances
 --
-module Data.Text.Benchmarks.WordFrequencies
+module Benchmarks.WordFrequencies
     ( benchmark
     ) where
 
diff --git a/benchmarks/haskell/Multilang.hs b/benchmarks/haskell/Multilang.hs
new file mode 100644
index 0000000000000000000000000000000000000000..a861afad04661c5029404f1f6da57ac42db8f3dc
--- /dev/null
+++ b/benchmarks/haskell/Multilang.hs
@@ -0,0 +1,32 @@
+{-# LANGUAGE BangPatterns, OverloadedStrings, RankNTypes #-}
+
+module Main (
+  main
+  ) where
+
+import Control.Monad (forM_)
+import qualified Data.ByteString as B
+import qualified Data.Text as Text
+import Data.Text.Encoding (decodeUtf8)
+import Data.Text (Text)
+import System.IO (hFlush, stdout)
+import Timer (timer)
+
+type BM = Text -> ()
+
+bm :: forall a. (Text -> a) -> BM
+bm f t = f t `seq` ()
+
+benchmarks :: [(String, Text.Text -> ())]
+benchmarks = [
+    ("find_first", bm $ Text.isInfixOf "en:Benin")
+  , ("find_index", bm $ Text.findIndex (=='c'))
+  ]
+
+main :: IO ()
+main = do
+  !contents <- decodeUtf8 `fmap` B.readFile "../tests/text-test-data/yiwiki.xml"
+  forM_ benchmarks $ \(name, bmark) -> do
+    putStr $ name ++ " "
+    hFlush stdout
+    putStrLn =<< (timer 100 contents bmark)
diff --git a/benchmarks/haskell/Timer.hs b/benchmarks/haskell/Timer.hs
new file mode 100644
index 0000000000000000000000000000000000000000..ac09616d4b9f5a0dfd2a08e8d07708a1b8ee9459
--- /dev/null
+++ b/benchmarks/haskell/Timer.hs
@@ -0,0 +1,30 @@
+{-# LANGUAGE BangPatterns #-}
+
+module Timer (timer) where
+
+import Control.Exception (evaluate)
+import Data.Time.Clock.POSIX (getPOSIXTime)
+import GHC.Float (FFFormat(..), formatRealFloat)
+
+ickyRound :: Int -> Double -> String
+ickyRound k = formatRealFloat FFFixed (Just k)
+
+timer :: Int -> a -> (a -> b) -> IO String
+timer count a0 f = do
+  let loop !k !fastest
+        | k <= 0 = return fastest
+        | otherwise = do
+        start <- getPOSIXTime
+        let inner a i
+              | i <= 0    = return ()
+              | otherwise = evaluate (f a) >> inner a (i-1)
+        inner a0 count
+        end <- getPOSIXTime
+        let elapsed = end - start
+        loop (k-1) (min fastest (elapsed / fromIntegral count))
+  t <- loop (3::Int) 1e300
+  let log10 x = log x / log 10
+      ft = realToFrac t
+      prec = round (log10 (fromIntegral count) - log10 ft)
+  return $! ickyRound prec ft
+{-# NOINLINE timer #-}
diff --git a/tests/benchmarks/python/.gitignore b/benchmarks/python/.gitignore
similarity index 100%
rename from tests/benchmarks/python/.gitignore
rename to benchmarks/python/.gitignore
diff --git a/tests/benchmarks/python/cut.py b/benchmarks/python/cut.py
similarity index 100%
rename from tests/benchmarks/python/cut.py
rename to benchmarks/python/cut.py
diff --git a/benchmarks/python/multilang.py b/benchmarks/python/multilang.py
new file mode 100755
index 0000000000000000000000000000000000000000..f2868545924ada4f877f9220f93e13bf23f89010
--- /dev/null
+++ b/benchmarks/python/multilang.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+
+import math
+import sys
+import time
+
+def find_first():
+    cf = contents.find
+    return timer(lambda: cf("en:Benin"))
+
+def timer(f, count=100):
+    a = 1e300
+    def g():
+        return
+    for i in xrange(3):
+        start = time.time()
+        for j in xrange(count):
+            g()
+        a = min(a, (time.time() - start) / count)
+
+    b = 1e300
+    for i in xrange(3):
+        start = time.time()
+        for j in xrange(count):
+            f()
+        b = min(b, (time.time() - start) / count)
+
+    return round(b - a, int(round(math.log(count, 10) - math.log(b - a, 10))))
+
+contents = open('../../tests/text-test-data/yiwiki.xml', 'r').read()
+contents = contents.decode('utf-8')
+
+benchmarks = (
+    find_first,
+    )
+
+to_run = sys.argv[1:]
+bms = []
+if to_run:
+    for r in to_run:
+        for b in benchmarks:
+            if b.__name__.startswith(r):
+                bms.append(b)
+else:
+    bms = benchmarks
+
+for b in bms:
+    sys.stdout.write(b.__name__ + ' ')
+    sys.stdout.flush()
+    print b()
diff --git a/tests/benchmarks/python/sort.py b/benchmarks/python/sort.py
similarity index 100%
rename from tests/benchmarks/python/sort.py
rename to benchmarks/python/sort.py
diff --git a/tests/benchmarks/python/strip_tags.py b/benchmarks/python/strip_tags.py
similarity index 100%
rename from tests/benchmarks/python/strip_tags.py
rename to benchmarks/python/strip_tags.py
diff --git a/tests/benchmarks/python/utils.py b/benchmarks/python/utils.py
similarity index 100%
rename from tests/benchmarks/python/utils.py
rename to benchmarks/python/utils.py
diff --git a/tests/benchmarks/ruby/cut.rb b/benchmarks/ruby/cut.rb
similarity index 100%
rename from tests/benchmarks/ruby/cut.rb
rename to benchmarks/ruby/cut.rb
diff --git a/tests/benchmarks/ruby/fold.rb b/benchmarks/ruby/fold.rb
similarity index 100%
rename from tests/benchmarks/ruby/fold.rb
rename to benchmarks/ruby/fold.rb
diff --git a/tests/benchmarks/ruby/sort.rb b/benchmarks/ruby/sort.rb
similarity index 100%
rename from tests/benchmarks/ruby/sort.rb
rename to benchmarks/ruby/sort.rb
diff --git a/tests/benchmarks/ruby/strip_tags.rb b/benchmarks/ruby/strip_tags.rb
similarity index 100%
rename from tests/benchmarks/ruby/strip_tags.rb
rename to benchmarks/ruby/strip_tags.rb
diff --git a/tests/benchmarks/ruby/utils.rb b/benchmarks/ruby/utils.rb
similarity index 100%
rename from tests/benchmarks/ruby/utils.rb
rename to benchmarks/ruby/utils.rb
diff --git a/benchmarks/text-benchmarks.cabal b/benchmarks/text-benchmarks.cabal
new file mode 100644
index 0000000000000000000000000000000000000000..ce36dd599773254b604637bae0cfd4f24a73246c
--- /dev/null
+++ b/benchmarks/text-benchmarks.cabal
@@ -0,0 +1,54 @@
+name:                text-benchmarks
+version:             0.0.0.0
+synopsis:            Benchmarks for the text package
+description:         Benchmarks for the text package
+homepage:            https://bitbucket.org/bos/text
+license:             BSD3
+license-file:        ../LICENSE
+author:              Jasper Van der Jeugt <jaspervdj@gmail.com>,
+                     Bryan O'Sullivan <bos@serpentine.com>,
+                     Tom Harper <rtomharper@googlemail.com>,
+                     Duncan Coutts <duncan@haskell.org>
+maintainer:          jaspervdj@gmail.com
+category:            Text
+build-type:          Simple
+
+cabal-version:       >=1.2
+
+flag llvm
+  description: use LLVM
+  default: False
+
+executable text-benchmarks
+  hs-source-dirs: haskell ..
+  c-sources:      ../cbits/cbits.c
+                  cbits/time_iconv.c
+  include-dirs:   ../include
+  main-is:        Benchmarks.hs
+  ghc-options:    -Wall -O2
+  if flag(llvm)
+    ghc-options:  -fllvm
+  cpp-options:    -DHAVE_DEEPSEQ -DINTEGER_GMP
+  build-depends:  base == 4.*,
+                  binary,
+                  blaze-builder,
+                  bytestring >= 0.10.4.0,
+                  bytestring-lexing,
+                  containers,
+                  criterion >= 0.6.0.1,
+                  deepseq,
+                  directory,
+                  filepath,
+                  ghc-prim,
+                  integer-gmp,
+                  stringsearch,
+                  utf8-string
+
+executable text-multilang
+  hs-source-dirs: haskell
+  main-is:        Multilang.hs
+  ghc-options:    -Wall -O2
+  build-depends:  base == 4.*,
+                  bytestring,
+                  text,
+                  time
diff --git a/cbits/cbits.c b/cbits/cbits.c
index 7b367458fa677c3faa4d2a621aec933adc787ba6..6fa8bc123fc75675b75b02a4924fb731461a067b 100644
--- a/cbits/cbits.c
+++ b/cbits/cbits.c
@@ -9,6 +9,7 @@
 #include <string.h>
 #include <stdint.h>
 #include <stdio.h>
+#include "text_cbits.h"
 
 void _hs_text_memcpy(void *dest, size_t doff, const void *src, size_t soff,
 		     size_t n)
@@ -47,7 +48,7 @@ static const uint8_t utf8d[] = {
   12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
   12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
   12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
-  12,36,12,12,12,12,12,12,12,12,12,12, 
+  12,36,12,12,12,12,12,12,12,12,12,12,
 };
 
 static inline uint32_t
@@ -61,24 +62,83 @@ decode(uint32_t *state, uint32_t* codep, uint32_t byte) {
   return *state = utf8d[256 + *state + type];
 }
 
+/*
+ * The ISO 8859-1 (aka latin-1) code points correspond exactly to the first 256 unicode
+ * code-points, therefore we can trivially convert from a latin-1 encoded bytestring to
+ * an UTF16 array
+ */
+void
+_hs_text_decode_latin1(uint16_t *dest, const uint8_t const *src,
+                       const uint8_t const *srcend)
+{
+  const uint8_t *p = src;
+
+#if defined(__i386__) || defined(__x86_64__)
+  /* This optimization works on a little-endian systems by using
+     (aligned) 32-bit loads instead of 8-bit loads
+   */
+
+  /* consume unaligned prefix */
+  while (p != srcend && (uintptr_t)p & 0x3)
+    *dest++ = *p++;
+
+  /* iterate over 32-bit aligned loads */
+  while (p < srcend - 3) {
+    const uint32_t w = *((const uint32_t *)p);
+
+    *dest++ =  w        & 0xff;
+    *dest++ = (w >> 8)  & 0xff;
+    *dest++ = (w >> 16) & 0xff;
+    *dest++ = (w >> 24) & 0xff;
+
+    p += 4;
+  }
+#endif
+
+  /* handle unaligned suffix */
+  while (p != srcend)
+    *dest++ = *p++;
+}
+
 /*
  * A best-effort decoder. Runs until it hits either end of input or
  * the start of an invalid byte sequence.
  *
- * At exit, updates *destoff with the next offset to write to, and
- * returns the next source offset to read from.
+ * At exit, we update *destoff with the next offset to write to, *src
+ * with the next source location past the last one successfully
+ * decoded, and return the next source location to read from.
+ *
+ * Moreover, we expose the internal decoder state (state0 and
+ * codepoint0), allowing one to restart the decoder after it
+ * terminates (say, due to a partial codepoint).
+ *
+ * In particular, there are a few possible outcomes,
+ *
+ *   1) We decoded the buffer entirely:
+ *      In this case we return srcend
+ *      state0 == UTF8_ACCEPT
+ *
+ *   2) We met an invalid encoding
+ *      In this case we return the address of the first invalid byte
+ *      state0 == UTF8_REJECT
+ *
+ *   3) We reached the end of the buffer while decoding a codepoint
+ *      In this case we return a pointer to the first byte of the partial codepoint
+ *      state0 != UTF8_ACCEPT, UTF8_REJECT
+ *
  */
-uint8_t const *
-_hs_text_decode_utf8(uint16_t *dest, size_t *destoff,
-		     const uint8_t const *src, const uint8_t const *srcend)
+const uint8_t *
+_hs_text_decode_utf8_state(uint16_t *const dest, size_t *destoff,
+                           const uint8_t **const src,
+                           const uint8_t *const srcend,
+                           uint32_t *codepoint0, uint32_t *state0)
 {
   uint16_t *d = dest + *destoff;
-  const uint8_t const *s = src;
-  uint32_t state = UTF8_ACCEPT;
+  const uint8_t *s = *src, *last = *src;
+  uint32_t state = *state0;
+  uint32_t codepoint = *codepoint0;
 
   while (s < srcend) {
-    uint32_t codepoint;
-
 #if defined(__i386__) || defined(__x86_64__)
     /*
      * This code will only work on a little-endian system that
@@ -106,6 +166,7 @@ _hs_text_decode_utf8(uint16_t *dest, size_t *destoff,
 	*d++ = (uint16_t) ((codepoint >> 16) & 0xff);
 	*d++ = (uint16_t) ((codepoint >> 24) & 0xff);
       }
+      last = s;
     }
 #endif
 
@@ -121,13 +182,29 @@ _hs_text_decode_utf8(uint16_t *dest, size_t *destoff,
       *d++ = (uint16_t) (0xD7C0 + (codepoint >> 10));
       *d++ = (uint16_t) (0xDC00 + (codepoint & 0x3FF));
     }
+    last = s;
   }
 
-  /* Error recovery - if we're not in a valid finishing state, back up. */
-  if (state != UTF8_ACCEPT)
+  /* Invalid encoding, back up to the errant character */
+  if (state == UTF8_REJECT)
     s -= 1;
 
   *destoff = d - dest;
+  *codepoint0 = codepoint;
+  *state0 = state;
+  *src = last;
 
   return s;
 }
+
+/*
+ * Helper to decode buffer and discard final decoder state
+ */
+const uint8_t *
+_hs_text_decode_utf8(uint16_t *const dest, size_t *destoff,
+                     const uint8_t *src, const uint8_t *const srcend)
+{
+  uint32_t codepoint;
+  uint32_t state = UTF8_ACCEPT;
+  return _hs_text_decode_utf8_state(dest, destoff, &src, srcend, &codepoint, &state);
+}
diff --git a/include/text_cbits.h b/include/text_cbits.h
new file mode 100644
index 0000000000000000000000000000000000000000..3523efea5f1662ec3041550e784155e6be8107f8
--- /dev/null
+++ b/include/text_cbits.h
@@ -0,0 +1,11 @@
+/*
+ * Copyright (c) 2013 Bryan O'Sullivan <bos@serpentine.com>.
+ */
+
+#ifndef _text_cbits_h
+#define _text_cbits_h
+
+#define UTF8_ACCEPT 0
+#define UTF8_REJECT 12
+
+#endif
diff --git a/scripts/Arsec.hs b/scripts/Arsec.hs
index 818bfc022124a27c2401e89a03de96aca7a2a47f..653c8e4f581c6b22c14ba8fa27d9593c01c0e89c 100644
--- a/scripts/Arsec.hs
+++ b/scripts/Arsec.hs
@@ -26,7 +26,7 @@ import Text.ParserCombinators.Parsec.Prim hiding ((<|>), many)
 instance Applicative (GenParser s a) where
     pure = return
     (<*>) = ap
-    
+
 instance Alternative (GenParser s a) where
     empty = mzero
     (<|>) = mplus
diff --git a/tests-and-benchmarks.markdown b/tests-and-benchmarks.markdown
new file mode 100644
index 0000000000000000000000000000000000000000..71e42684baed61dcb6f3b83433408ef90f5c7cec
--- /dev/null
+++ b/tests-and-benchmarks.markdown
@@ -0,0 +1,63 @@
+Tests and benchmarks
+====================
+
+Prerequisites
+-------------
+
+To run the tests and benchmarks, you will need the test data, which
+you can clone from one of the following locations:
+
+* Mercurial master repository:
+  [bitbucket.org/bos/text-test-data](https://bitbucket.org/bos/text-test-data)
+
+* Git mirror repository:
+  [github.com/bos/text-test-data](https://github.com/bos/text-test-data)
+
+You should clone that repository into the `tests` subdirectory (your
+clone must be named `text-test-data` locally), then run `make -C
+tests/text-test-data` to uncompress the test files.  Many tests and
+benchmarks will fail if the test files are missing.
+
+Functional tests
+----------------
+
+The functional tests are located in the `tests` subdirectory. An overview of
+what's in that directory:
+
+    Makefile          Has targets for common tasks
+    Tests             Source files of the testing code
+    scripts           Various utility scripts
+    text-tests.cabal  Cabal file that compiles all benchmarks
+
+The `text-tests.cabal` builds:
+
+- A copy of the text library, sharing the source code, but exposing all internal
+  modules, for testing purposes
+- The different test suites
+
+To compile, run all tests, and generate a coverage report, simply use `make`.
+
+Benchmarks
+----------
+
+The benchmarks are located in the `benchmarks` subdirectory. An overview of
+what's in that directory:
+
+    Makefile               Has targets for common tasks
+    haskell                Source files of the haskell benchmarks
+    python                 Python implementations of some benchmarks
+    ruby                   Ruby implementations of some benchmarks
+    text-benchmarks.cabal  Cabal file which compiles all benchmarks
+
+To compile the benchmarks, navigate to the `benchmarks` subdirectory and run
+`cabal configure && cabal build`. Then, you can run the benchmarks using:
+
+    ./dist/build/text-benchmarks/text-benchmarks
+
+However, since there's quite a lot of benchmarks, you usually don't want to
+run them all. Instead, use the `-l` flag to get a list of benchmarks:
+
+    ./dist/build/text-benchmarks/text-benchmarks
+
+And run the ones you want to inspect. If you want to configure the benchmarks
+further, the exact parameters can be changed in `Benchmarks.hs`.
diff --git a/tests/tests/.ghci b/tests/.ghci
similarity index 100%
rename from tests/tests/.ghci
rename to tests/.ghci
diff --git a/tests/tests/Makefile b/tests/Makefile
similarity index 78%
rename from tests/tests/Makefile
rename to tests/Makefile
index f2c9d0e7f922e7ab1af18d79a3479b1e9300cf1c..11468b9201f8ea2ffa5e6fed97688e9b2ec60acb 100644
--- a/tests/tests/Makefile
+++ b/tests/Makefile
@@ -1,12 +1,18 @@
+count = 1000
+
 coverage: build coverage/hpc_index.html
 
-build:
+build: text-test-data
 	cabal configure -fhpc
 	cabal build
 
+text-test-data:
+	hg clone https://bitbucket.org/bos/text-test-data
+	$(MAKE) -C text-test-data
+
 coverage/text-tests.tix:
 	-mkdir -p coverage
-	./dist/build/text-tests/text-tests
+	./dist/build/text-tests/text-tests -a $(count)
 	mv text-tests.tix $@
 
 coverage/text-tests-stdio.tix:
diff --git a/tests/README.markdown b/tests/README.markdown
deleted file mode 100644
index 66d33ac3981d4e3cd370fbfa1ccd9de32f286e41..0000000000000000000000000000000000000000
--- a/tests/README.markdown
+++ /dev/null
@@ -1,63 +0,0 @@
-Tests
-=====
-
-This directory contains the tests for the Text library. To run these
-tests, you will need the test data from one of the following
-locations:
-
-* Mercurial master repository:
-  [bitbucket.org/bos/text-test-data](https://bitbucket.org/bos/text-test-data)
-
-* Git mirror repository:
-  [github.com/bos/text-test-data](https://github.com/bos/text-test-data)
-
-You should clone that repository to the same directory as this `README`
-file, then run `make` to uncompress the test files.  Many tests will
-fail if the test files are missing.
-
-There are two categories of tests: functional tests (including QuickCheck
-properties), and benchmarks.
-
-Functional tests
-----------------
-
-The functional tests are located in the `tests` subdirectory. An overview of
-what's in that directory:
-
-    scripts           Various utility scripts
-    src               Source files of the testing code
-    text-tests.cabal  Cabal file which compiles all benchmarks
-    Makefile          Has targets for common tasks
-
-The `text-tests.cabal` builds:
-
-- A copy of the text library, sharing the source code, but exposing all internal
-  modules, for testing purposes
-- The different test suites
-
-To compile, run all tests, and generate a coverage report, simply use `make`.
-
-Benchmarks
-----------
-
-The benchmarks are located in the `benchmarks` subdirectory. An overview of
-what's in that directory:
-
-    python            Python implementations of some benchmarks
-    ruby              Ruby implementations of some benchmarks
-    src               Source files of the haskell benchmarks
-    benchmarks.cabal  Cabal file which compiles all benchmarks
-    Makefile          Has targets for common tasks
-
-To compile the benchmarks, navigate to the `benchmarks` subdirectory and run
-`cabal configure && cabal build`. Then, you can run the benchmarks using:
-
-    ./dist/build/benchmarks/benchmarks
-
-However, since there quite a lot of benchmarks, you usually don't want to run
-them all. Instead, use the `-l` flag to get a list of benchmarks:
-
-    ./dist/build/benchmarks/benchmarks
-
-And run the ones you want to inspect. If you want to configure the benchmarks
-further, the exact parameters can be changed in `src/Data/Text/Benchmarks.hs`.
diff --git a/tests/tests/src/Data/Text/Tests.hs b/tests/Tests.hs
similarity index 64%
rename from tests/tests/src/Data/Text/Tests.hs
rename to tests/Tests.hs
index 3bb1a51c0b51ea260857649537addb2b84162db1..fb97ff4adc08eac99f5682de9c191a14481796e9 100644
--- a/tests/tests/src/Data/Text/Tests.hs
+++ b/tests/Tests.hs
@@ -6,8 +6,8 @@ module Main
 
 import Test.Framework (defaultMain)
 
-import qualified Data.Text.Tests.Properties as Properties
-import qualified Data.Text.Tests.Regressions as Regressions
+import qualified Tests.Properties as Properties
+import qualified Tests.Regressions as Regressions
 
 main :: IO ()
 main = defaultMain [Properties.tests, Regressions.tests]
diff --git a/tests/tests/src/Data/Text/Tests/IO.hs b/tests/Tests/IO.hs
similarity index 100%
rename from tests/tests/src/Data/Text/Tests/IO.hs
rename to tests/Tests/IO.hs
diff --git a/tests/tests/src/Data/Text/Tests/Properties.hs b/tests/Tests/Properties.hs
similarity index 90%
rename from tests/tests/src/Data/Text/Tests/Properties.hs
rename to tests/Tests/Properties.hs
index ef6cb4a876b96bceb2cafc8bc4c34a33dc14c1d7..55974b640d74759dae1fe1e4b8491e0363c47b2b 100644
--- a/tests/tests/src/Data/Text/Tests/Properties.hs
+++ b/tests/Tests/Properties.hs
@@ -1,10 +1,10 @@
 -- | General quicktest properties for the text library
 --
-{-# LANGUAGE BangPatterns, FlexibleInstances, OverloadedStrings,
-             ScopedTypeVariables, TypeSynonymInstances, CPP #-}
+{-# LANGUAGE BangPatterns, CPP, FlexibleInstances, OverloadedStrings,
+             ScopedTypeVariables, TypeSynonymInstances #-}
 {-# OPTIONS_GHC -fno-enable-rewrite-rules #-}
 {-# OPTIONS_GHC -fno-warn-missing-signatures #-}
-module Data.Text.Tests.Properties
+module Tests.Properties
     (
       tests
     ) where
@@ -16,6 +16,7 @@ import Text.Show.Functions ()
 import Control.Arrow ((***), second)
 import Control.Exception (catch)
 import Data.Char (chr, isDigit, isHexDigit, isLower, isSpace, isUpper, ord)
+import Data.Int (Int8, Int16, Int32, Int64)
 import Data.Monoid (Monoid(..))
 import Data.String (fromString)
 import Data.Text.Encoding.Error
@@ -24,13 +25,13 @@ import Data.Text.Fusion.Size
 import Data.Text.Lazy.Read as TL
 import Data.Text.Read as T
 import Data.Text.Search (indices)
-import Data.Word (Word8, Word16, Word32)
+import Data.Word (Word, Word8, Word16, Word32, Word64)
 import Numeric (showHex)
-import Prelude hiding (catch, replicate)
 import Test.Framework (Test, testGroup)
 import Test.Framework.Providers.QuickCheck2 (testProperty)
 import qualified Data.Bits as Bits (shiftL, shiftR)
 import qualified Data.ByteString as B
+import qualified Data.ByteString.Lazy as BL
 import qualified Data.List as L
 import qualified Data.Text as T
 import qualified Data.Text.Encoding as E
@@ -39,6 +40,8 @@ import qualified Data.Text.Fusion.Common as S
 import qualified Data.Text.IO as T
 import qualified Data.Text.Lazy as TL
 import qualified Data.Text.Lazy.Builder as TB
+import qualified Data.Text.Lazy.Builder.Int as TB
+import qualified Data.Text.Lazy.Builder.RealFloat as TB
 import qualified Data.Text.Lazy.Encoding as EL
 import qualified Data.Text.Lazy.Fusion as SL
 import qualified Data.Text.Lazy.IO as TL
@@ -46,9 +49,15 @@ import qualified Data.Text.Lazy.Search as S (indices)
 import qualified Data.Text.UnsafeShift as U
 import qualified System.IO as IO
 
-import Data.Text.Tests.QuickCheckUtils
-import Data.Text.Tests.Utils
-import qualified Data.Text.Tests.SlowFunctions as Slow
+import Tests.QuickCheckUtils
+import Tests.Utils
+import qualified Tests.SlowFunctions as Slow
+
+#if MIN_VERSION_base(4,6,0)
+import Prelude hiding (replicate)
+#else
+import Prelude hiding (catch, replicate)
+#endif
 
 t_pack_unpack       = (T.unpack . T.pack) `eq` id
 tl_pack_unpack      = (TL.unpack . TL.pack) `eq` id
@@ -62,10 +71,20 @@ tl_unstreamChunks x = f 11 x == f 1000 x
 tl_chunk_unchunk    = (TL.fromChunks . TL.toChunks) `eq` id
 tl_from_to_strict   = (TL.fromStrict . TL.toStrict) `eq` id
 
+-- Note: this silently truncates code-points > 255 to 8-bit due to 'B.pack'
+encodeL1 :: T.Text -> B.ByteString
+encodeL1 = B.pack . map (fromIntegral . fromEnum) . T.unpack
+encodeLazyL1 :: TL.Text -> BL.ByteString
+encodeLazyL1 = BL.fromChunks . map encodeL1 . TL.toChunks
+
 t_ascii t    = E.decodeASCII (E.encodeUtf8 a) == a
     where a  = T.map (\c -> chr (ord c `mod` 128)) t
 tl_ascii t   = EL.decodeASCII (EL.encodeUtf8 a) == a
     where a  = TL.map (\c -> chr (ord c `mod` 128)) t
+t_latin1 t   = E.decodeLatin1 (encodeL1 a) == a
+    where a  = T.map (\c -> chr (ord c `mod` 256)) t
+tl_latin1 t  = EL.decodeLatin1 (encodeLazyL1 a) == a
+    where a  = TL.map (\c -> chr (ord c `mod` 256)) t
 t_utf8       = forAll genUnicode $ (E.decodeUtf8 . E.encodeUtf8) `eq` id
 t_utf8'      = forAll genUnicode $ (E.decodeUtf8' . E.encodeUtf8) `eq` (id . Right)
 tl_utf8      = forAll genUnicode $ (EL.decodeUtf8 . EL.encodeUtf8) `eq` id
@@ -79,6 +98,18 @@ tl_utf32LE   = forAll genUnicode $ (EL.decodeUtf32LE . EL.encodeUtf32LE) `eq` id
 t_utf32BE    = forAll genUnicode $ (E.decodeUtf32BE . E.encodeUtf32BE) `eq` id
 tl_utf32BE   = forAll genUnicode $ (EL.decodeUtf32BE . EL.encodeUtf32BE) `eq` id
 
+t_utf8_incr  = do
+        Positive n <- arbitrary
+        forAll genUnicode $ recode n `eq` id
+    where recode n = T.concat . feedChunksOf n E.streamDecodeUtf8 . E.encodeUtf8
+          feedChunksOf :: Int -> (B.ByteString -> E.Decoding) -> B.ByteString
+                       -> [T.Text]
+          feedChunksOf n f bs
+            | B.null bs  = []
+            | otherwise  = let (a,b) = B.splitAt n bs
+                               E.Some t _ f' = f a
+                           in t : feedChunksOf n f' b
+
 -- This is a poor attempt to ensure that the error handling paths on
 -- decode are exercised in some way.  Proper testing would be rather
 -- more involved.
@@ -610,17 +641,55 @@ shiftR_Word32 = shiftR :: Word32 -> Property
 
 -- Builder.
 
-t_builderSingleton = id `eqP`
-                     (unpackS . TB.toLazyText . mconcat . map TB.singleton)
-t_builderFromText = L.concat `eq` (unpackS . TB.toLazyText . mconcat .
+tb_singleton = id `eqP`
+               (unpackS . TB.toLazyText . mconcat . map TB.singleton)
+tb_fromText = L.concat `eq` (unpackS . TB.toLazyText . mconcat .
                                    map (TB.fromText . packS))
-t_builderAssociative s1 s2 s3 =
+tb_associative s1 s2 s3 =
     TB.toLazyText (b1 `mappend` (b2 `mappend` b3)) ==
     TB.toLazyText ((b1 `mappend` b2) `mappend` b3)
   where b1 = TB.fromText (packS s1)
         b2 = TB.fromText (packS s2)
         b3 = TB.fromText (packS s3)
 
+-- Numeric builder stuff.
+
+tb_decimal :: (Integral a, Show a) => a -> Bool
+tb_decimal = (TB.toLazyText . TB.decimal) `eq` (TL.pack . show)
+
+tb_decimal_integer (a::Integer) = tb_decimal a
+tb_decimal_int (a::Int) = tb_decimal a
+tb_decimal_int8 (a::Int8) = tb_decimal a
+tb_decimal_int16 (a::Int16) = tb_decimal a
+tb_decimal_int32 (a::Int32) = tb_decimal a
+tb_decimal_int64 (a::Int64) = tb_decimal a
+tb_decimal_word (a::Word) = tb_decimal a
+tb_decimal_word8 (a::Word8) = tb_decimal a
+tb_decimal_word16 (a::Word16) = tb_decimal a
+tb_decimal_word32 (a::Word32) = tb_decimal a
+tb_decimal_word64 (a::Word64) = tb_decimal a
+
+tb_hex :: (Integral a, Show a) => a -> Bool
+tb_hex = (TB.toLazyText . TB.hexadecimal) `eq` (TL.pack . flip showHex "")
+
+tb_hexadecimal_integer (a::Integer) = tb_hex a
+tb_hexadecimal_int (a::Int) = tb_hex a
+tb_hexadecimal_int8 (a::Int8) = tb_hex a
+tb_hexadecimal_int16 (a::Int16) = tb_hex a
+tb_hexadecimal_int32 (a::Int32) = tb_hex a
+tb_hexadecimal_int64 (a::Int64) = tb_hex a
+tb_hexadecimal_word (a::Word) = tb_hex a
+tb_hexadecimal_word8 (a::Word8) = tb_hex a
+tb_hexadecimal_word16 (a::Word16) = tb_hex a
+tb_hexadecimal_word32 (a::Word32) = tb_hex a
+tb_hexadecimal_word64 (a::Word64) = tb_hex a
+
+tb_realfloat :: (RealFloat a, Show a) => a -> Bool
+tb_realfloat = (TB.toLazyText . TB.realFloat) `eq` (TL.pack . show)
+
+tb_realfloat_float (a::Float) = tb_realfloat a
+tb_realfloat_double (a::Double) = tb_realfloat a
+
 -- Reading.
 
 t_decimal (n::Int) s =
@@ -681,6 +750,8 @@ t_take_drop_16 m t = T.append (takeWord16 n t) (dropWord16 n t) == t
   where n = small m
 t_use_from t = monadicIO $ assert . (==t) =<< run (useAsPtr t fromPtr)
 
+t_copy t = T.copy t == t
+
 -- Regression tests.
 s_filter_eq s = S.filter p t == S.streamList (filter p s)
     where p = (/= S.last t)
@@ -691,7 +762,7 @@ s_filter_eq s = S.filter p t == S.streamList (filter p s)
 -- themselves.
 shorten :: Int -> S.Stream a -> S.Stream a
 shorten n t@(S.Stream arr off len)
-    | n > 0     = S.Stream arr off (smaller (exactSize n) len) 
+    | n > 0     = S.Stream arr off (smaller (exactSize n) len)
     | otherwise = t
 
 tests :: Test
@@ -713,8 +784,11 @@ tests =
     testGroup "transcoding" [
       testProperty "t_ascii" t_ascii,
       testProperty "tl_ascii" tl_ascii,
+      testProperty "t_latin1" t_latin1,
+      testProperty "tl_latin1" tl_latin1,
       testProperty "t_utf8" t_utf8,
       testProperty "t_utf8'" t_utf8',
+      testProperty "t_utf8_incr" t_utf8_incr,
       testProperty "tl_utf8" tl_utf8,
       testProperty "tl_utf8'" tl_utf8',
       testProperty "t_utf16LE" t_utf16LE,
@@ -1080,9 +1154,39 @@ tests =
     ],
 
     testGroup "builder" [
-      testProperty "t_builderSingleton" t_builderSingleton,
-      testProperty "t_builderFromText" t_builderFromText,
-      testProperty "t_builderAssociative" t_builderAssociative
+      testProperty "tb_associative" tb_associative,
+      testGroup "decimal" [
+        testProperty "tb_decimal_int" tb_decimal_int,
+        testProperty "tb_decimal_int8" tb_decimal_int8,
+        testProperty "tb_decimal_int16" tb_decimal_int16,
+        testProperty "tb_decimal_int32" tb_decimal_int32,
+        testProperty "tb_decimal_int64" tb_decimal_int64,
+        testProperty "tb_decimal_integer" tb_decimal_integer,
+        testProperty "tb_decimal_word" tb_decimal_word,
+        testProperty "tb_decimal_word8" tb_decimal_word8,
+        testProperty "tb_decimal_word16" tb_decimal_word16,
+        testProperty "tb_decimal_word32" tb_decimal_word32,
+        testProperty "tb_decimal_word64" tb_decimal_word64
+      ],
+      testGroup "hexadecimal" [
+        testProperty "tb_hexadecimal_int" tb_hexadecimal_int,
+        testProperty "tb_hexadecimal_int8" tb_hexadecimal_int8,
+        testProperty "tb_hexadecimal_int16" tb_hexadecimal_int16,
+        testProperty "tb_hexadecimal_int32" tb_hexadecimal_int32,
+        testProperty "tb_hexadecimal_int64" tb_hexadecimal_int64,
+        testProperty "tb_hexadecimal_integer" tb_hexadecimal_integer,
+        testProperty "tb_hexadecimal_word" tb_hexadecimal_word,
+        testProperty "tb_hexadecimal_word8" tb_hexadecimal_word8,
+        testProperty "tb_hexadecimal_word16" tb_hexadecimal_word16,
+        testProperty "tb_hexadecimal_word32" tb_hexadecimal_word32,
+        testProperty "tb_hexadecimal_word64" tb_hexadecimal_word64
+      ],
+      testGroup "realfloat" [
+        testProperty "tb_realfloat_double" tb_realfloat_double,
+        testProperty "tb_realfloat_float" tb_realfloat_float
+      ],
+      testProperty "tb_fromText" tb_fromText,
+      testProperty "tb_singleton" tb_singleton
     ],
 
     testGroup "read" [
@@ -1113,6 +1217,7 @@ tests =
       testProperty "t_dropWord16" t_dropWord16,
       testProperty "t_takeWord16" t_takeWord16,
       testProperty "t_take_drop_16" t_take_drop_16,
-      testProperty "t_use_from" t_use_from
+      testProperty "t_use_from" t_use_from,
+      testProperty "t_copy" t_copy
     ]
   ]
diff --git a/tests/tests/src/Data/Text/Tests/QuickCheckUtils.hs b/tests/Tests/QuickCheckUtils.hs
similarity index 98%
rename from tests/tests/src/Data/Text/Tests/QuickCheckUtils.hs
rename to tests/Tests/QuickCheckUtils.hs
index e6449ffaed5516874ad8f2e96ed88f274dec7e87..00d4d3fa2d2992e4602bed95e3c35138ddf38b2a 100644
--- a/tests/tests/src/Data/Text/Tests/QuickCheckUtils.hs
+++ b/tests/Tests/QuickCheckUtils.hs
@@ -1,10 +1,10 @@
 -- | This module provides quickcheck utilities, e.g. arbitrary and show
 -- instances, and comparison functions, so we can focus on the actual properties
--- in the 'Data.Text.Tests.Properties' module.
+-- in the 'Tests.Properties' module.
 --
 {-# LANGUAGE CPP, FlexibleInstances, TypeSynonymInstances #-}
 {-# OPTIONS_GHC -fno-warn-orphans #-}
-module Data.Text.Tests.QuickCheckUtils
+module Tests.QuickCheckUtils
     (
       genUnicode
     , unsquare
@@ -24,7 +24,7 @@ module Data.Text.Tests.QuickCheckUtils
     , eqP
 
     , Encoding (..)
-    
+
     , write_read
     ) where
 
@@ -50,7 +50,7 @@ import qualified Data.Text.Lazy.Fusion as TLF
 import qualified Data.Text.Lazy.Internal as TL
 import qualified System.IO as IO
 
-import Data.Text.Tests.Utils
+import Tests.Utils
 
 instance Random I16 where
     randomR = integralRandomR
@@ -67,7 +67,7 @@ genUnicode = fmap fromString string where
     string = sized $ \n ->
         do k <- choose (0,n)
            sequence [ char | _ <- [1..k] ]
-    
+
     excluding :: [a -> Bool] -> Gen a -> Gen a
     excluding bad gen = loop
       where
@@ -76,14 +76,14 @@ genUnicode = fmap fromString string where
           if or (map ($ x) bad)
             then loop
             else return x
-    
+
     reserved = [lowSurrogate, highSurrogate, noncharacter]
     lowSurrogate c = c >= 0xDC00 && c <= 0xDFFF
     highSurrogate c = c >= 0xD800 && c <= 0xDBFF
     noncharacter c = masked == 0xFFFE || masked == 0xFFFF
       where
-        masked = c .&. 0xFFFF 
-    
+        masked = c .&. 0xFFFF
+
     ascii = choose (0,0x7F)
     plane0 = choose (0xF0, 0xFFFF)
     plane1 = oneof [ choose (0x10000, 0x10FFF)
@@ -109,7 +109,7 @@ genUnicode = fmap fromString string where
                    ]
     plane14 = choose (0xE0000, 0xE0FFF)
     planes = [ascii, plane0, plane1, plane2, plane14]
-    
+
     char = chr `fmap` excluding reserved (oneof planes)
 
 -- For tests that have O(n^2) running times or input sizes, resize
diff --git a/tests/tests/src/Data/Text/Tests/Regressions.hs b/tests/Tests/Regressions.hs
similarity index 62%
rename from tests/tests/src/Data/Text/Tests/Regressions.hs
rename to tests/Tests/Regressions.hs
index a80c616afafbcdfc27e6cad21a0bfb0317f091e8..5d2db97626db2b654f533047ddbae171cdb60408 100644
--- a/tests/tests/src/Data/Text/Tests/Regressions.hs
+++ b/tests/Tests/Regressions.hs
@@ -1,24 +1,27 @@
 -- | Regression tests for specific bugs.
 --
 {-# LANGUAGE OverloadedStrings, ScopedTypeVariables #-}
-module Data.Text.Tests.Regressions
+module Tests.Regressions
     (
       tests
     ) where
 
 import Control.Exception (SomeException, handle)
 import System.IO
-import Test.HUnit (assertFailure)
+import Test.HUnit (assertBool, assertEqual, assertFailure)
 import qualified Data.ByteString as B
+import Data.ByteString.Char8 ()
 import qualified Data.ByteString.Lazy as LB
 import qualified Data.Text as T
+import qualified Data.Text.Encoding as TE
 import qualified Data.Text.IO as T
 import qualified Data.Text.Lazy as LT
 import qualified Data.Text.Lazy.Encoding as LE
+import qualified Data.Text.Unsafe as T
 import qualified Test.Framework as F
 import qualified Test.Framework.Providers.HUnit as F
 
-import Data.Text.Tests.Utils (withTempFile)
+import Tests.Utils (withTempFile)
 
 -- Reported by Michael Snoyman: UTF-8 encoding a large lazy bytestring
 -- caused either a segfault or attempt to allocate a negative number
@@ -49,9 +52,31 @@ replicate_crash = handle (\(_::SomeException) -> return ()) $
     power | maxBound == (2147483647::Int) = 28
           | otherwise                     = 60 :: Int
 
+-- Reported by John Millikin: a UTF-8 decode error handler could
+-- return a bogus substitution character, which we would write without
+-- checking.
+utf8_decode_unsafe :: IO ()
+utf8_decode_unsafe = do
+  let t = TE.decodeUtf8With (\_ _ -> Just '\xdc00') "\x80"
+  assertBool "broken error recovery shouldn't break us" (t == "\xfffd")
+
+-- Reported by Eric Seidel: we mishandled mapping Chars that fit in a
+-- single Word16 to Chars that require two.
+mapAccumL_resize :: IO ()
+mapAccumL_resize = do
+  let f a _ = (a, '\65536')
+      count = 5
+      val   = T.mapAccumL f (0::Int) (T.replicate count "a")
+  assertEqual "mapAccumL should correctly fill buffers for two-word results"
+             (0, T.replicate count "\65536") val
+  assertEqual "mapAccumL should correctly size buffers for two-word results"
+             (count * 2) (T.lengthWord16 (snd val))
+
 tests :: F.Test
 tests = F.testGroup "Regressions"
     [ F.testCase "hGetContents_crash" hGetContents_crash
     , F.testCase "lazy_encode_crash" lazy_encode_crash
+    , F.testCase "mapAccumL_resize" mapAccumL_resize
     , F.testCase "replicate_crash" replicate_crash
+    , F.testCase "utf8_decode_unsafe" utf8_decode_unsafe
     ]
diff --git a/tests/tests/src/Data/Text/Tests/SlowFunctions.hs b/tests/Tests/SlowFunctions.hs
similarity index 97%
rename from tests/tests/src/Data/Text/Tests/SlowFunctions.hs
rename to tests/Tests/SlowFunctions.hs
index f6d21a156339021db828df24d86240757c1552b6..2d0b9a3cdaf4c3eb707b78632e829887c4abb29e 100644
--- a/tests/tests/src/Data/Text/Tests/SlowFunctions.hs
+++ b/tests/Tests/SlowFunctions.hs
@@ -1,5 +1,5 @@
 {-# LANGUAGE BangPatterns #-}
-module Data.Text.Tests.SlowFunctions
+module Tests.SlowFunctions
     (
       indices
     , splitOn
diff --git a/tests/tests/src/Data/Text/Tests/Utils.hs b/tests/Tests/Utils.hs
similarity index 98%
rename from tests/tests/src/Data/Text/Tests/Utils.hs
rename to tests/Tests/Utils.hs
index ed828b5470688fd0fb87f875acecf4dd199a1e4e..34f9acca1fc13cde4010b84ffc02b93fcd960fac 100644
--- a/tests/tests/src/Data/Text/Tests/Utils.hs
+++ b/tests/Tests/Utils.hs
@@ -1,7 +1,7 @@
 -- | Miscellaneous testing utilities
 --
 {-# LANGUAGE ScopedTypeVariables #-}
-module Data.Text.Tests.Utils
+module Tests.Utils
     (
       (=^=)
     , withRedirect
diff --git a/tests/benchmarks/src/Data/Text/Benchmarks/Builder.hs b/tests/benchmarks/src/Data/Text/Benchmarks/Builder.hs
deleted file mode 100644
index 5bb83fed0a2742ec22e76c035933f28ec71cefbc..0000000000000000000000000000000000000000
--- a/tests/benchmarks/src/Data/Text/Benchmarks/Builder.hs
+++ /dev/null
@@ -1,48 +0,0 @@
--- | Testing the internal builder monoid
---
--- Tested in this benchmark:
---
--- * Concatenating many small strings using a builder
---
-{-# LANGUAGE OverloadedStrings #-}
-module Data.Text.Benchmarks.Builder
-    ( benchmark
-    ) where
-
-import Criterion (Benchmark, bgroup, bench, nf)
-import Data.Binary.Builder as B
-import Data.ByteString.Char8 ()
-import Data.Monoid (mconcat)
-import qualified Blaze.ByteString.Builder as Blaze
-import qualified Blaze.ByteString.Builder.Char.Utf8 as Blaze
-import qualified Data.ByteString as SB
-import qualified Data.ByteString.Lazy as LB
-import qualified Data.Text as T
-import qualified Data.Text.Lazy as LT
-import qualified Data.Text.Lazy.Builder as LTB
-
-benchmark :: IO Benchmark
-benchmark = return $ bgroup "Builder"
-    [ bench "LazyText" $ nf
-        (LT.length . LTB.toLazyText . mconcat . map LTB.fromText) texts
-    , bench "Binary" $ nf
-        (LB.length . B.toLazyByteString . mconcat . map B.fromByteString)
-        byteStrings
-    , bench "Blaze" $ nf
-        (LB.length . Blaze.toLazyByteString . mconcat . map Blaze.fromString)
-        strings
-    ]
-
-texts :: [T.Text]
-texts = take 200000 $ cycle ["foo", "λx", "由の"]
-{-# NOINLINE texts #-}
-
--- Note that the non-ascii characters will be chopped
-byteStrings :: [SB.ByteString]
-byteStrings = take 200000 $ cycle ["foo", "λx", "由の"]
-{-# NOINLINE byteStrings #-}
-
--- Note that the non-ascii characters will be chopped
-strings :: [String]
-strings = take 200000 $ cycle ["foo", "λx", "由の"]
-{-# NOINLINE strings #-}
diff --git a/tests/benchmarks/text-benchmarks.cabal b/tests/benchmarks/text-benchmarks.cabal
deleted file mode 100644
index 72eff1453fe74ff82b7b7f0edce3254be55e5134..0000000000000000000000000000000000000000
--- a/tests/benchmarks/text-benchmarks.cabal
+++ /dev/null
@@ -1,36 +0,0 @@
-name:                text-benchmarks
-version:             0.0.0.0
-synopsis:            Benchmarks for the text package
-description:         Benchmarks for the text package
-homepage:            https://bitbucket.org/bos/text
-license:             BSD3
-license-file:        ../../LICENSE
-author:              Jasper Van der Jeugt <jaspervdj@gmail.com>,
-                     Bryan O'Sullivan <bos@serpentine.com>,
-                     Tom Harper <rtomharper@googlemail.com>,
-                     Duncan Coutts <duncan@haskell.org>
-maintainer:          jaspervdj@gmail.com
-category:            Text
-build-type:          Simple
-
-cabal-version:       >=1.2
-
-executable text-benchmarks
-  hs-source-dirs: src ../..
-  c-sources:      ../../cbits/cbits.c
-                  cbits/time_iconv.c
-  main-is:        Data/Text/Benchmarks.hs
-  ghc-options:    -Wall -O2
-  cpp-options:    -DHAVE_DEEPSEQ
-  build-depends:  base              >= 4   && < 5,
-                  criterion         >= 0.5 && < 0.7,
-                  bytestring        >= 0.9 && < 0.10,
-                  deepseq           >= 1.1 && < 1.2,
-                  filepath          >= 1.1 && < 1.3,
-                  directory         >= 1.1 && < 1.2,
-                  containers        >= 0.3 && < 0.5,
-                  binary            >= 0.5 && < 0.6,
-                  utf8-string       >= 0.3 && < 0.4,
-                  blaze-builder     >= 0.3 && < 0.4,
-                  bytestring-lexing >= 0.2 && < 0.3,
-                  stringsearch      >= 0.3 && < 0.4
diff --git a/tests/tests/scripts/cover-stdio.sh b/tests/scripts/cover-stdio.sh
similarity index 100%
rename from tests/tests/scripts/cover-stdio.sh
rename to tests/scripts/cover-stdio.sh
diff --git a/tests/tests/text-tests.cabal b/tests/text-tests.cabal
similarity index 72%
rename from tests/tests/text-tests.cabal
rename to tests/text-tests.cabal
index f90640e520098a5219cdcaf24aee6dd9b9df8723..96cb79f831de6b7f776749d8fe2e4ada36c7befe 100644
--- a/tests/tests/text-tests.cabal
+++ b/tests/text-tests.cabal
@@ -4,7 +4,7 @@ synopsis:      Functional tests for the text package
 description:   Functional tests for the text package
 homepage:      https://bitbucket.org/bos/text
 license:       BSD3
-license-file:  ../../LICENSE
+license-file:  ../LICENSE
 author:        Jasper Van der Jeugt <jaspervdj@gmail.com>,
                Bryan O'Sullivan <bos@serpentine.com>,
                Tom Harper <rtomharper@googlemail.com>,
@@ -20,8 +20,7 @@ flag hpc
   default:     False
 
 executable text-tests
-  hs-source-dirs: src
-  main-is:        Data/Text/Tests.hs
+  main-is: Tests.hs
 
   ghc-options:
     -Wall -threaded -O0 -rtsopts
@@ -35,21 +34,20 @@ executable text-tests
     -DHAVE_DEEPSEQ
 
   build-depends:
-    text-tests,
-    base                       >= 4   && < 5,
-    bytestring                 >= 0.9 && < 0.10,
-    deepseq                    >= 1.1,
-    directory                  >= 1.1 && < 1.2,
-    random                     >= 1.0 && < 1.1,
-    QuickCheck                 >= 2.4 && < 2.5,
-    HUnit                      >= 1.2 && < 1.3,
-    test-framework             >= 0.4 && < 0.5,
-    test-framework-quickcheck2 >= 0.2 && < 0.3,
-    test-framework-hunit       >= 0.2 && < 0.3
+    HUnit >= 1.2,
+    QuickCheck >= 2.4,
+    base == 4.*,
+    bytestring,
+    deepseq,
+    directory,
+    random,
+    test-framework >= 0.4,
+    test-framework-hunit >= 0.2,
+    test-framework-quickcheck2 >= 0.2,
+    text-tests
 
 executable text-tests-stdio
-  hs-source-dirs: src
-  main-is:        Data/Text/Tests/IO.hs
+  main-is:        Tests/IO.hs
 
   ghc-options:
     -Wall -threaded -rtsopts
@@ -64,41 +62,45 @@ executable text-tests-stdio
     base >= 4 && < 5
 
 library
-  hs-source-dirs: ../..
-  c-sources: ../../cbits/cbits.c
+  hs-source-dirs: ..
+  c-sources: ../cbits/cbits.c
+  include-dirs: ../include
   exposed-modules:
     Data.Text
     Data.Text.Array
     Data.Text.Encoding
     Data.Text.Encoding.Error
-    Data.Text.Foreign
-    Data.Text.IO
-    Data.Text.Internal
-    Data.Text.Lazy
-    Data.Text.Lazy.Builder
-    Data.Text.Lazy.Builder.Int
-    Data.Text.Lazy.Builder.RealFloat
-    Data.Text.Lazy.Encoding
-    Data.Text.Lazy.IO
-    Data.Text.Lazy.Internal
-    Data.Text.Lazy.Read
-    Data.Text.Read
     Data.Text.Encoding.Fusion
     Data.Text.Encoding.Fusion.Common
     Data.Text.Encoding.Utf16
     Data.Text.Encoding.Utf32
     Data.Text.Encoding.Utf8
+    Data.Text.Foreign
     Data.Text.Fusion
     Data.Text.Fusion.CaseMapping
     Data.Text.Fusion.Common
     Data.Text.Fusion.Internal
     Data.Text.Fusion.Size
+    Data.Text.IO
     Data.Text.IO.Internal
+    Data.Text.Internal
+    Data.Text.Lazy
+    Data.Text.Lazy.Builder
     Data.Text.Lazy.Builder.Functions
+    Data.Text.Lazy.Builder.Int
+    Data.Text.Lazy.Builder.Int.Digits
+    Data.Text.Lazy.Builder.Internal
+    Data.Text.Lazy.Builder.RealFloat
     Data.Text.Lazy.Builder.RealFloat.Functions
+    Data.Text.Lazy.Encoding
     Data.Text.Lazy.Encoding.Fusion
     Data.Text.Lazy.Fusion
+    Data.Text.Lazy.IO
+    Data.Text.Lazy.Internal
+    Data.Text.Lazy.Read
     Data.Text.Lazy.Search
+    Data.Text.Private
+    Data.Text.Read
     Data.Text.Search
     Data.Text.Unsafe
     Data.Text.Unsafe.Base
@@ -117,8 +119,8 @@ library
 
   build-depends:
     array,
-    base        >= 4   && < 5,
-    bytestring  >= 0.9 && < 1.0,
-    deepseq     >= 1.1,
-    integer-gmp >= 0.2 && < 0.3,
-    ghc-prim    >= 0.2 && < 0.3
+    base == 4.*,
+    bytestring,
+    deepseq,
+    ghc-prim,
+    integer-gmp
diff --git a/text.cabal b/text.cabal
index 7f66e8c8746405f9a14cc2f11a24e30807644de0..01381ab4ad6b4e716a690a9cb495acde186e2678 100644
--- a/text.cabal
+++ b/text.cabal
@@ -1,9 +1,9 @@
 name:           text
-version:        0.11.2.0
+version:        0.11.5.0
 homepage:       https://github.com/bos/text
 bug-reports:    https://github.com/bos/text/issues
 synopsis:       An efficient packed Unicode text type.
-description:    
+description:
     .
     An efficient packed, immutable Unicode text type (both strict and
     lazy), with a powerful loop fusion optimization framework.
@@ -31,6 +31,15 @@ description:
     the @text-icu@ package:
     <http://hackage.haskell.org/package/text-icu>
     .
+    &#8212;&#8212; RELEASE NOTES &#8212;&#8212;
+    .
+    Changes in 0.11.2.0:
+    .
+    * String literals are now converted directly from the format in
+      which GHC stores them into 'Text', without an intermediate
+      transformation through 'String', and without inlining of
+      conversion code at each site where a string literal is declared.
+    .
 license:        BSD3
 license-file:   LICENSE
 author:         Bryan O'Sullivan <bos@serpentine.com>
@@ -40,31 +49,36 @@ category:       Data, Text
 build-type:     Simple
 cabal-version:  >= 1.8
 extra-source-files:
-    README.markdown
     -- scripts/CaseFolding.txt
     -- scripts/SpecialCasing.txt
+    README.markdown
+    benchmarks/Setup.hs
+    benchmarks/cbits/*.c
+    benchmarks/haskell/*.hs
+    benchmarks/haskell/Benchmarks/*.hs
+    benchmarks/python/*.py
+    benchmarks/ruby/*.rb
+    benchmarks/text-benchmarks.cabal
     scripts/*.hs
-    tests/README.markdown
-    tests/benchmarks/Setup.hs
-    tests/benchmarks/cbits/*.c
-    tests/benchmarks/python/*.py
-    tests/benchmarks/ruby/*.rb
-    tests/benchmarks/src/Data/Text/*.hs
-    tests/benchmarks/src/Data/Text/Benchmarks/*.hs
-    tests/benchmarks/text-benchmarks.cabal
-    tests/tests/.ghci
-    tests/tests/Makefile
-    tests/tests/scripts/*.sh
-    tests/tests/src/Data/Text/*.hs
-    tests/tests/src/Data/Text/Tests/*.hs
-    tests/tests/text-tests.cabal
+    tests-and-benchmarks.markdown
+    tests/*.hs
+    tests/.ghci
+    tests/Makefile
+    tests/Tests/*.hs
+    tests/scripts/*.sh
+    tests/text-tests.cabal
 
 flag developer
   description: operate in developer mode
   default: False
 
+flag integer-simple
+  description: Use the simple integer library instead of GMP
+  default: False
+
 library
-  c-sources: cbits/cbits.c
+  c-sources:    cbits/cbits.c
+  include-dirs: include
 
   exposed-modules:
     Data.Text
@@ -83,6 +97,7 @@ library
     Data.Text.Lazy.Internal
     Data.Text.Lazy.Read
     Data.Text.Read
+    Data.Text.Unsafe
   other-modules:
     Data.Text.Encoding.Fusion
     Data.Text.Encoding.Fusion.Common
@@ -96,51 +111,46 @@ library
     Data.Text.Fusion.Size
     Data.Text.IO.Internal
     Data.Text.Lazy.Builder.Functions
+    Data.Text.Lazy.Builder.Int.Digits
+    Data.Text.Lazy.Builder.Internal
     Data.Text.Lazy.Builder.RealFloat.Functions
     Data.Text.Lazy.Encoding.Fusion
     Data.Text.Lazy.Fusion
     Data.Text.Lazy.Search
     Data.Text.Private
     Data.Text.Search
-    Data.Text.Unsafe
     Data.Text.Unsafe.Base
     Data.Text.UnsafeChar
     Data.Text.UnsafeShift
     Data.Text.Util
 
   build-depends:
-    array,
-    base       < 5,
-    bytestring >= 0.10 && < 1.0
-  if impl(ghc >= 6.10)
-    build-depends:
-      ghc-prim, base >= 4, deepseq >= 1.1.0.0
-    cpp-options: -DHAVE_DEEPSEQ
-  else
-    build-depends: extensible-exceptions
-    extensions: PatternSignatures
+    array      >= 0.3,
+    base       >= 4.2 && < 5,
+    bytestring >= 0.10.4.0,
+    deepseq    >= 1.1.0.0,
+    ghc-prim   >= 0.2
 
-  ghc-options: -Wall -funbox-strict-fields -O2
-  if impl(ghc >= 6.8)
-    ghc-options: -fwarn-tabs
+  cpp-options: -DHAVE_DEEPSEQ
+  ghc-options: -Wall -fwarn-tabs -funbox-strict-fields -O2
   if flag(developer)
     ghc-prof-options: -auto-all
     ghc-options: -Werror
     cpp-options: -DASSERTS
 
-  if impl(ghc >= 6.11)
-    cpp-options: -DINTEGER_GMP
-    build-depends: integer-gmp >= 0.2 && < 0.5
-
-  if impl(ghc >= 6.9) && impl(ghc < 6.11)
+  if flag(integer-simple)
+    cpp-options: -DINTEGER_SIMPLE
+    build-depends: integer-simple >= 0.1 && < 0.5
+  else
     cpp-options: -DINTEGER_GMP
-    build-depends: integer >= 0.1 && < 0.2
+    build-depends: integer-gmp >= 0.2
 
 test-suite tests
   type:           exitcode-stdio-1.0
-  hs-source-dirs: . tests/tests/src
-  main-is:        Data/Text/Tests.hs
+  hs-source-dirs: tests .
+  main-is:        Tests.hs
   c-sources:      cbits/cbits.c
+  include-dirs:   include
 
   ghc-options:
     -Wall -threaded -O0 -rtsopts
@@ -149,16 +159,25 @@ test-suite tests
     -DASSERTS -DHAVE_DEEPSEQ
 
   build-depends:
-    base                       >= 4    && < 5,
-    bytestring                 >= 0.10 && < 1.0,
-    deepseq                    >= 1.1,
-    directory                  >= 1.0  && < 1.2,
-    random                     >= 1.0  && < 1.1,
-    QuickCheck                 >= 2.4  && < 2.5,
-    HUnit                      >= 1.2  && < 1.3,
-    test-framework             >= 0.4  && < 0.5,
-    test-framework-quickcheck2 >= 0.2  && < 0.3,
-    test-framework-hunit       >= 0.2  && < 0.3
+    HUnit >= 1.2,
+    QuickCheck >= 2.4,
+    array,
+    base,
+    bytestring,
+    deepseq,
+    directory,
+    ghc-prim,
+    random,
+    test-framework >= 0.4,
+    test-framework-hunit >= 0.2,
+    test-framework-quickcheck2 >= 0.2
+
+  if flag(integer-simple)
+    cpp-options: -DINTEGER_SIMPLE
+    build-depends: integer-simple >= 0.1 && < 0.5
+  else
+    cpp-options: -DINTEGER_GMP
+    build-depends: integer-gmp >= 0.2
 
 source-repository head
   type:     git