@@ -34,7 +34,8 @@ module Data.Attoparsec.Text.Buffer
34
34
, iter
35
35
, iter_
36
36
, substring
37
- , dropWord16
37
+ , lengthCodeUnits
38
+ , dropCodeUnits
38
39
) where
39
40
40
41
import Control.Exception (assert )
@@ -44,8 +45,14 @@ import Data.Monoid as Mon (Monoid(..))
44
45
import Data.Semigroup (Semigroup (.. ))
45
46
import Data.Text ()
46
47
import Data.Text.Internal (Text (.. ))
48
+ #if MIN_VERSION_text(2,0,0)
49
+ import Data.Text.Internal.Encoding.Utf8 (utf8LengthByLeader )
50
+ import Data.Text.Unsafe (iterArray , lengthWord8 )
51
+ #else
47
52
import Data.Text.Internal.Encoding.Utf16 (chr2 )
48
53
import Data.Text.Internal.Unsafe.Char (unsafeChr )
54
+ import Data.Text.Unsafe (lengthWord16 )
55
+ #endif
49
56
import Data.Text.Unsafe (Iter (.. ))
50
57
import Foreign.Storable (sizeOf )
51
58
import GHC.Exts (Int (.. ), indexIntArray #, unsafeCoerce #, writeIntArray #)
@@ -108,16 +115,25 @@ append (Buf arr0 off0 len0 cap0 gen0) !arr1 !off1 !len1 = runST $ do
108
115
let newgen = gen + 1
109
116
marr <- unsafeThaw arr0
110
117
writeGen marr newgen
118
+ #if MIN_VERSION_text(2,0,0)
119
+ A. copyI newlen marr (off0+ len0) arr1 off1
120
+ #else
111
121
A. copyI marr (off0+ len0) arr1 off1 (off0+ newlen)
122
+ #endif
112
123
arr2 <- A. unsafeFreeze marr
113
124
return (Buf arr2 off0 newlen cap0 newgen)
114
125
else do
115
126
let newcap = newlen * 2
116
127
newgen = 1
117
128
marr <- A. new (newcap + woff)
118
129
writeGen marr newgen
130
+ #if MIN_VERSION_text(2,0,0)
131
+ A. copyI len0 marr woff arr0 off0
132
+ A. copyI newlen marr (woff+ len0) arr1 off1
133
+ #else
119
134
A. copyI marr woff arr0 off0 (woff+ len0)
120
135
A. copyI marr (woff+ len0) arr1 off1 (woff+ newlen)
136
+ #endif
121
137
arr2 <- A. unsafeFreeze marr
122
138
return (Buf arr2 woff newlen newcap newgen)
123
139
@@ -132,11 +148,52 @@ substring s l (Buf arr off len _ _) =
132
148
Text arr (off+ s) l
133
149
{-# INLINE substring #-}
134
150
135
- dropWord16 :: Int -> Buffer -> Text
136
- dropWord16 s (Buf arr off len _ _) =
151
+ #if MIN_VERSION_text(2,0,0)
152
+
153
+ lengthCodeUnits :: Text -> Int
154
+ lengthCodeUnits = lengthWord8
155
+
156
+ dropCodeUnits :: Int -> Buffer -> Text
157
+ dropCodeUnits s (Buf arr off len _ _) =
158
+ assert (s >= 0 && s <= len) $
159
+ Text arr (off+ s) (len- s)
160
+ {-# INLINE dropCodeUnits #-}
161
+
162
+ -- | /O(1)/ Iterate (unsafely) one step forwards through a UTF-8
163
+ -- array, returning the current character and the delta to add to give
164
+ -- the next offset to iterate at.
165
+ iter :: Buffer -> Int -> Iter
166
+ iter (Buf arr off _ _ _) i = iterArray arr (off + i)
167
+ {-# INLINE iter #-}
168
+
169
+ -- | /O(1)/ Iterate one step through a UTF-8 array, returning the
170
+ -- delta to add to give the next offset to iterate at.
171
+ iter_ :: Buffer -> Int -> Int
172
+ iter_ (Buf arr off _ _ _) i = utf8LengthByLeader $ A. unsafeIndex arr (off+ i)
173
+ {-# INLINE iter_ #-}
174
+
175
+ unsafeThaw :: A. Array -> ST s (A. MArray s )
176
+ unsafeThaw (A. ByteArray a) = ST $ \ s# ->
177
+ (# s# , A. MutableByteArray (unsafeCoerce# a) # )
178
+
179
+ readGen :: A. Array -> Int
180
+ readGen (A. ByteArray a) = case indexIntArray# a 0 # of r# -> I # r#
181
+
182
+ writeGen :: A. MArray s -> Int -> ST s ()
183
+ writeGen (A. MutableByteArray a) (I # gen# ) = ST $ \ s0# ->
184
+ case writeIntArray# a 0 # gen# s0# of
185
+ s1# -> (# s1# , () # )
186
+
187
+ #else
188
+
189
+ lengthCodeUnits :: Text -> Int
190
+ lengthCodeUnits = lengthWord16
191
+
192
+ dropCodeUnits :: Int -> Buffer -> Text
193
+ dropCodeUnits s (Buf arr off len _ _) =
137
194
assert (s >= 0 && s <= len) $
138
195
Text arr (off+ s) (len- s)
139
- {-# INLINE dropWord16 #-}
196
+ {-# INLINE dropCodeUnits #-}
140
197
141
198
-- | /O(1)/ Iterate (unsafely) one step forwards through a UTF-16
142
199
-- array, returning the current character and the delta to add to give
@@ -170,3 +227,5 @@ writeGen :: A.MArray s -> Int -> ST s ()
170
227
writeGen a (I # gen# ) = ST $ \ s0# ->
171
228
case writeIntArray# (A. maBA a) 0 # gen# s0# of
172
229
s1# -> (# s1# , () # )
230
+
231
+ #endif
0 commit comments