Skip to content

Commit ca715d3

Browse files
committed
Fixed bug with empty "". Added dynamic settings of resultLimit.
1 parent 792c175 commit ca715d3

File tree

6 files changed

+30
-17
lines changed

6 files changed

+30
-17
lines changed

Data/JsonStream/CLexType.hsc

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,6 @@ newtype LexResultType = LexResultType CInt deriving (Show, Eq, Storable)
88

99
#include "lexer.h"
1010

11-
resultLimit :: Int
12-
resultLimit = #const RESULT_COUNT
13-
1411
#{enum LexResultType, LexResultType
1512
, resNumber = RES_NUMBER
1613
, resString = RES_STRING

Data/JsonStream/CLexer.hs

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,14 @@ data Header = Header {
4343
, hdrPosition :: !CInt
4444
, hdrLength :: !CInt
4545
, hdrResultNum :: !CInt
46+
, hdrResultLimit :: !CInt
4647
} deriving (Show)
4748

49+
defHeader :: Header
50+
defHeader = Header 0 0 0 0 0 0 0
51+
4852
instance Storable Header where
49-
sizeOf _ = 7 * sizeOf (undefined :: CInt)
53+
sizeOf _ = 8 * sizeOf (undefined :: CInt)
5054
alignment _ = sizeOf (undefined :: CInt)
5155
peek ptr = do
5256
state <- peekByteOff ptr 0
@@ -55,8 +59,8 @@ instance Storable Header where
5559
position <- peekByteOff ptr (3 * sizeOf state)
5660
slength <- peekByteOff ptr (4 * sizeOf state)
5761
sresultnum <- peekByteOff ptr (5 * sizeOf state)
58-
return $ Header state sdata1 sdata2 position slength sresultnum
59-
-- return $ Header state sdata1 sdata2 position slength sresultnum
62+
sresultlimit <- peekByteOff ptr (6 * sizeOf state)
63+
return $ Header state sdata1 sdata2 position slength sresultnum sresultlimit
6064

6165
poke ptr (Header {..}) = do
6266
pokeByteOff ptr 0 hdrCurrentState
@@ -65,6 +69,7 @@ instance Storable Header where
6569
pokeByteOff ptr (3 * sizeOf hdrCurrentState) hdrPosition
6670
pokeByteOff ptr (4 * sizeOf hdrCurrentState) hdrLength
6771
pokeByteOff ptr (5 * sizeOf hdrCurrentState) hdrResultNum
72+
pokeByteOff ptr (6 * sizeOf hdrCurrentState) hdrResultLimit
6873

6974
peekResultField :: Int -> Int -> ResultPtr -> Int
7075
peekResultField n fieldno fptr = inlinePerformIO $ -- !! Using inlinePerformIO should be safe - we are just reading bytes from memory
@@ -91,7 +96,7 @@ callLex bs hdr = unsafeDupablePerformIO $ -- Using Dupable PerformIO should be s
9196
poke hdrptr (hdr{hdrResultNum=0, hdrLength=fromIntegral $ BS.length bs})
9297

9398
bsptr <- unsafeUseAsCString bs return
94-
resptr <- mallocForeignPtrBytes (resultLimit * sizeOf (undefined :: CInt) * 4)
99+
resptr <- mallocForeignPtrBytes (fromIntegral (hdrResultLimit hdr) * sizeOf (undefined :: CInt) * 4)
95100
res <- withForeignPtr resptr $ \resptr' ->
96101
lexJson bsptr hdrptr resptr'
97102

@@ -212,18 +217,22 @@ parseResults (TempData {tmpNumbers=tmpNumbers, tmpBuffer=bs}) (err, hdr, rescoun
212217
PartialResult (StringContent (encodeUtf8 $ T.singleton $ toEnum resAddData)) next
213218
-- -- Partial string, not the end
214219
| resType == resStringPartial ->
215-
if resLength == 0
220+
if resLength == -1
216221
then PartialResult (StringContent (BSW.singleton $ fromIntegral resAddData)) next -- \n\r..
217222
else PartialResult (StringContent textSection) next -- normal string section
218223
| otherwise -> error "Unsupported"
219224

225+
-- | Estimate number of elements in a chunk
226+
estResultLimit :: BS.ByteString -> CInt
227+
estResultLimit dta = fromIntegral $ 1 + BS.length dta `div` 5
228+
220229
getNextResult :: TempData -> TokenResult
221230
getNextResult tmp@(TempData {..})
222231
| tmpError = TokFailed
223232
| hdrPosition tmpHeader < hdrLength tmpHeader = parseResults tmp (callLex tmpBuffer tmpHeader)
224233
| otherwise = TokMoreData newdata
225234
where
226-
newdata dta = parseResults newtmp (callLex dta newhdr)
235+
newdata dta = parseResults newtmp (callLex dta newhdr{hdrResultLimit=estResultLimit dta})
227236
where
228237
newtmp = tmp{tmpBuffer=dta}
229238
newhdr = tmpHeader{hdrPosition=0, hdrLength=fromIntegral $ BS.length dta}
@@ -232,4 +241,4 @@ getNextResult tmp@(TempData {..})
232241
tokenParser :: BS.ByteString -> TokenResult
233242
tokenParser dta = getNextResult (TempData dta newhdr False [])
234243
where
235-
newhdr = Header 0 0 0 0 (fromIntegral $ BS.length dta) 0
244+
newhdr = defHeader{hdrLength=fromIntegral (BS.length dta), hdrResultLimit=(estResultLimit dta)}

Data/JsonStream/Parser.hs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@ import qualified Data.Text.Lazy as TL
8282
import Data.Text.Lazy.Encoding (decodeUtf8')
8383
import qualified Data.Vector as Vec
8484

85-
import Data.Bits (clearBit, setBit)
8685
import Data.JsonStream.CLexer (tokenParser)
8786
import Data.JsonStream.TokenParser
8887

@@ -463,6 +462,7 @@ ignoreVal' stval = Parser $ moreData (handleTok stval)
463462
handleLongString level _ (StringContent _) ntok = moreData (handleLongString level) ntok
464463
handleLongString 0 _ StringEnd ntok = Done "" ntok
465464
handleLongString level _ StringEnd ntok = moreData (handleTok level) ntok
465+
handleLongString _ _ el _ = Failed $ "Unexpected element in handleLongStr: " ++ (show el)
466466

467467
handleTok :: Int -> TokenResult -> Element -> TokenResult -> ParseResult a
468468
handleTok 0 _ (JValue _) ntok = Done "" ntok
@@ -480,6 +480,7 @@ ignoreVal' stval = Parser $ moreData (handleTok stval)
480480
ObjectEnd _ -> moreData (handleTok (level - 1)) ntok
481481
ArrayBegin -> moreData (handleTok (level + 1)) ntok
482482
ObjectBegin -> moreData (handleTok (level + 1)) ntok
483+
StringEnd -> Failed "Internal error - out of order StringEnd"
483484

484485
-- | Gather matches and return them as list.
485486
--

c_lib/lexer.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -190,8 +190,7 @@ int handle_string(const char *input, struct lexer *lexer)
190190
// Emit partial string
191191
res->restype = RES_STRING_PARTIAL;
192192
res->adddata = 0;
193-
if (res->length != 0) // Do not add new result, if length == 0
194-
lexer->result_num++;
193+
lexer->result_num++;
195194

196195
// If we stopped because of backslash, change state, move one forward
197196
if (lexer->position < lexer->length) {
@@ -251,7 +250,7 @@ static inline void emitchar(char ch, struct lexer *lexer)
251250

252251
res->restype = RES_STRING_PARTIAL;
253252
res->startpos = lexer->position;
254-
res->length = 0;
253+
res->length = -1; // Special value indicating that this is special character
255254
res->adddata = ch;
256255

257256
lexer->result_num++;
@@ -294,7 +293,7 @@ int lex_json(const char *input, struct lexer *lexer, struct lexer_result *result
294293
&&state_string_uni
295294
};
296295
#define DISPATCH() { \
297-
if (!(lexer->position < lexer->length && lexer->result_num < RESULT_COUNT && res == 0)) \
296+
if (!(lexer->position < lexer->length && lexer->result_num < lexer->result_limit && res == 0)) \
298297
return res; \
299298
goto *dispatch_table[lexer->current_state];\
300299
}

c_lib/lexer.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717
#define RES_STRING_UNI 11
1818
#define RES_NUMBER_SMALL 12
1919

20-
#define RESULT_COUNT 6000
21-
2220
enum states {
2321
STATE_BASE = 0,
2422
STATE_STRING,
@@ -47,6 +45,7 @@ struct lexer {
4745
int length;
4846

4947
int result_num;
48+
int result_limit;
5049
struct lexer_result *result;
5150
};
5251

test/ParserSpec.hs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,14 @@ errTests = describe "Tests of previous errors" $ do
262262
res = parseLazyByteString parser onechar :: [Int]
263263
res `shouldBe` [123]
264264

265+
it "Parses correctly handles empty strings when sliced:" $ do
266+
let test1 = "[\"\", \"\", true]"
267+
onechar = BL.fromChunks $ map BS.singleton $ BS.unpack test1
268+
parser = arrayOf bool
269+
res = parseByteString parser test1 :: [Bool]
270+
res `shouldBe` [True]
271+
272+
265273
-- testLexer (start:rest) = iter rest (tokenParser start)
266274
-- where
267275
-- iter [] (TokMoreData cont) = print "done"

0 commit comments

Comments
 (0)