Skip to content

Commit 0971ad5

Browse files
authored
Fix infinite loop in isSubmapOf[By] / isSubsetOf on 32-bit platforms (#501)
The internal `submapBitmapIndexed` function used by these functions could enter an infinite loop while comparing two nodes `m1` and `m2` where `m2` contained a sub-node associated with the partial hash `0b11111`. In this case the high bit of the combined bitmap of `m1` and `m2`, `b1Orb2`, was `1`. After checking the submap condition at the high bit, the 32-bit `m` variable used to iterate over `b1Orb2` would be left-shifted and overflow to `0`, resulting in the infinite loop. https://github.com/haskell-unordered-containers/unordered-containers/blob/37eee2290cab287c0947d1de20235a37ced63c94/Data/HashMap/Internal.hs#L1551-L1572 To fix this bug and any similar issues related to a branching factor of 32, we return to a branching factor of 16 for 32-bit platforms. On 64-bit platforms the branching factors stays at 32. Fixes #491.
1 parent 8e380a6 commit 0971ad5

File tree

11 files changed

+105
-38
lines changed

11 files changed

+105
-38
lines changed

.github/workflows/32bit-ci.yml

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# This config is mostly copied from
2+
# https://github.com/haskell/bytestring/blob/master/.github/workflows/ci.yml
3+
4+
name: 32bit-ci
5+
on:
6+
push:
7+
branches:
8+
- master
9+
pull_request: {} # Validate all PRs
10+
11+
defaults:
12+
run:
13+
shell: bash
14+
15+
jobs:
16+
i386:
17+
runs-on: ubuntu-latest
18+
container:
19+
image: i386/ubuntu:bionic
20+
steps:
21+
- name: Install
22+
run: |
23+
apt-get update -y
24+
apt-get install -y autoconf build-essential zlib1g-dev libgmp-dev curl libncurses5 libtinfo5 libncurses5-dev libtinfo-dev
25+
curl --proto '=https' --tlsv1.2 -sSf https://get-ghcup.haskell.org | BOOTSTRAP_HASKELL_NONINTERACTIVE=1 BOOTSTRAP_HASKELL_INSTALL_NO_STACK=1 sh
26+
- uses: actions/checkout@v1 #This version must stay old enough to remain compatible with the container image
27+
- name: Test
28+
run: |
29+
source ~/.ghcup/env
30+
cabal update
31+
cabal test
32+
# TODO: Consider testing with -fdebug

Data/HashMap/Internal.hs

Lines changed: 33 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
{-# OPTIONS_GHC -fno-full-laziness -funbox-strict-fields #-}
1616
{-# OPTIONS_HADDOCK not-home #-}
1717

18+
#include "MachDeps.h"
19+
1820
-- | = WARNING
1921
--
2022
-- This module is considered __internal__.
@@ -125,9 +127,9 @@ module Data.HashMap.Internal
125127
, sparseIndex
126128
, two
127129
, unionArrayBy
128-
, update32
129-
, update32M
130-
, update32With'
130+
, updateFullArray
131+
, updateFullArrayM
132+
, updateFullArrayWith'
131133
, updateOrConcatWithKey
132134
, filterMapAux
133135
, equalKeys
@@ -830,7 +832,7 @@ insert' h0 k0 v0 m0 = go h0 k0 v0 0 m0
830832
!st' = go h k x (nextShift s) st
831833
in if st' `ptrEq` st
832834
then t
833-
else Full (update32 ary i st')
835+
else Full (updateFullArray ary i st')
834836
where i = index h s
835837
go h k x s t@(Collision hy v)
836838
| h == hy = Collision h (updateOrSnocWith (\a _ -> (# a #)) k x v)
@@ -864,7 +866,7 @@ insertNewKey !h0 !k0 x0 !m0 = go h0 k0 x0 0 m0
864866
go h k x s (Full ary) =
865867
let !st = A.index ary i
866868
!st' = go h k x (nextShift s) st
867-
in Full (update32 ary i st')
869+
in Full (updateFullArray ary i st')
868870
where i = index h s
869871
go h k x s t@(Collision hy v)
870872
| h == hy = Collision h (A.snoc v (L k x))
@@ -893,7 +895,7 @@ insertKeyExists !collPos0 !h0 !k0 x0 !m0 = go collPos0 h0 k0 x0 m0
893895
go collPos shiftedHash k x (Full ary) =
894896
let !st = A.index ary i
895897
!st' = go collPos (shiftHash shiftedHash) k x st
896-
in Full (update32 ary i st')
898+
in Full (updateFullArray ary i st')
897899
where i = index' shiftedHash
898900
go collPos _shiftedHash k x (Collision h v)
899901
| collPos >= 0 = Collision h (setAtPosition collPos k x v)
@@ -1041,7 +1043,7 @@ insertModifying x f k0 m0 = go h0 k0 0 m0
10411043
go h k s t@(Full ary) =
10421044
let !st = A.index ary i
10431045
!st' = go h k (nextShift s) st
1044-
ary' = update32 ary i $! st'
1046+
ary' = updateFullArray ary i $! st'
10451047
in if ptrEq st st'
10461048
then t
10471049
else Full ary'
@@ -1270,7 +1272,7 @@ adjust# f k0 m0 = go h0 k0 0 m0
12701272
let i = index h s
12711273
!st = A.index ary i
12721274
!st' = go h k (nextShift s) st
1273-
ary' = update32 ary i $! st'
1275+
ary' = updateFullArray ary i $! st'
12741276
in if ptrEq st st'
12751277
then t
12761278
else Full ary'
@@ -1554,6 +1556,9 @@ submapBitmapIndexed comp !b1 !ary1 !b2 !ary2 = subsetBitmaps && go 0 0 (b1Orb2 .
15541556
where
15551557
go :: Int -> Int -> Bitmap -> Bool
15561558
go !i !j !m
1559+
1560+
-- Note: m can overflow to 0 when maxChildren == WORD_SIZE_IN_BITS. See
1561+
-- #491. In that case there needs to be a check '| m == 0 = True'
15571562
| m > b1Orb2 = True
15581563

15591564
-- In case a key is both in ary1 and ary2, check ary1[i] <= ary2[j] and
@@ -1660,12 +1665,12 @@ unionWithKey f = go 0
16601665
go s (Full ary1) t2 =
16611666
let h2 = leafHashCode t2
16621667
i = index h2 s
1663-
ary' = update32With' ary1 i $ \st1 -> go (nextShift s) st1 t2
1668+
ary' = updateFullArrayWith' ary1 i $ \st1 -> go (nextShift s) st1 t2
16641669
in Full ary'
16651670
go s t1 (Full ary2) =
16661671
let h1 = leafHashCode t1
16671672
i = index h1 s
1668-
ary' = update32With' ary2 i $ \st2 -> go (nextShift s) t1 st2
1673+
ary' = updateFullArrayWith' ary2 i $ \st2 -> go (nextShift s) t1 st2
16691674
in Full ary'
16701675

16711676
leafHashCode (Leaf h _) = h
@@ -2406,24 +2411,24 @@ subsetArray cmpV ary1 ary2 = A.length ary1 <= A.length ary2 && A.all inAry2 ary1
24062411
-- Manually unrolled loops
24072412

24082413
-- | \(O(n)\) Update the element at the given position in this array.
2409-
update32 :: A.Array e -> Int -> e -> A.Array e
2410-
update32 ary idx b = runST (update32M ary idx b)
2411-
{-# INLINE update32 #-}
2414+
updateFullArray :: A.Array e -> Int -> e -> A.Array e
2415+
updateFullArray ary idx b = runST (updateFullArrayM ary idx b)
2416+
{-# INLINE updateFullArray #-}
24122417

24132418
-- | \(O(n)\) Update the element at the given position in this array.
2414-
update32M :: A.Array e -> Int -> e -> ST s (A.Array e)
2415-
update32M ary idx b = do
2419+
updateFullArrayM :: A.Array e -> Int -> e -> ST s (A.Array e)
2420+
updateFullArrayM ary idx b = do
24162421
mary <- clone ary
24172422
A.write mary idx b
24182423
A.unsafeFreeze mary
2419-
{-# INLINE update32M #-}
2424+
{-# INLINE updateFullArrayM #-}
24202425

24212426
-- | \(O(n)\) Update the element at the given position in this array, by applying a function to it.
2422-
update32With' :: A.Array e -> Int -> (e -> e) -> A.Array e
2423-
update32With' ary idx f
2427+
updateFullArrayWith' :: A.Array e -> Int -> (e -> e) -> A.Array e
2428+
updateFullArrayWith' ary idx f
24242429
| (# x #) <- A.index# ary idx
2425-
= update32 ary idx $! f x
2426-
{-# INLINE update32With' #-}
2430+
= updateFullArray ary idx $! f x
2431+
{-# INLINE updateFullArrayWith' #-}
24272432

24282433
-- | Unsafely clone an array of (2^bitsPerSubkey) elements. The length of the input
24292434
-- array is not checked.
@@ -2440,8 +2445,16 @@ clone ary =
24402445
-- | Number of bits that are inspected at each level of the hash tree.
24412446
--
24422447
-- This constant is named /t/ in the original /Ideal Hash Trees/ paper.
2448+
--
2449+
-- Note that this constant is platform-dependent. On 32-bit platforms we use
2450+
-- '4', because bitmaps using '2^5' bits turned out to be prone to integer
2451+
-- overflow bugs. See #491 for instance.
24432452
bitsPerSubkey :: Int
2453+
#if WORD_SIZE_IN_BITS < 64
2454+
bitsPerSubkey = 4
2455+
#else
24442456
bitsPerSubkey = 5
2457+
#endif
24452458

24462459
-- | The size of a 'Full' node, i.e. @2 ^ 'bitsPerSubkey'@.
24472460
maxChildren :: Int

Data/HashMap/Internal/Strict.hs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
-- strings.
4040
--
4141
-- Many operations have a average-case complexity of \(O(\log n)\). The
42-
-- implementation uses a large base (i.e. 32) so in practice these
42+
-- implementation uses a large base (i.e. 16 or 32) so in practice these
4343
-- operations are constant time.
4444
module Data.HashMap.Internal.Strict
4545
(
@@ -211,7 +211,7 @@ insertWith f k0 v0 m0 = go h0 k0 v0 0 m0
211211
go h k x s (Full ary) =
212212
let st = A.index ary i
213213
st' = go h k x (nextShift s) st
214-
ary' = HM.update32 ary i $! st'
214+
ary' = HM.updateFullArray ary i $! st'
215215
in Full ary'
216216
where i = index h s
217217
go h k x s t@(Collision hy v)
@@ -282,7 +282,7 @@ adjust f k0 m0 = go h0 k0 0 m0
282282
let i = index h s
283283
st = A.index ary i
284284
st' = go h k (nextShift s) st
285-
ary' = HM.update32 ary i $! st'
285+
ary' = HM.updateFullArray ary i $! st'
286286
in Full ary'
287287
go h k _ t@(Collision hy v)
288288
| h == hy = Collision h (updateWith f k v)
@@ -516,12 +516,12 @@ unionWithKey f = go 0
516516
go s (Full ary1) t2 =
517517
let h2 = leafHashCode t2
518518
i = index h2 s
519-
ary' = HM.update32With' ary1 i $ \st1 -> go (nextShift s) st1 t2
519+
ary' = HM.updateFullArrayWith' ary1 i $ \st1 -> go (nextShift s) st1 t2
520520
in Full ary'
521521
go s t1 (Full ary2) =
522522
let h1 = leafHashCode t1
523523
i = index h1 s
524-
ary' = HM.update32With' ary2 i $ \st2 -> go (nextShift s) t1 st2
524+
ary' = HM.updateFullArrayWith' ary2 i $ \st2 -> go (nextShift s) t1 st2
525525
in Full ary'
526526

527527
leafHashCode (Leaf h _) = h

Data/HashMap/Lazy.hs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
-- strings.
2121
--
2222
-- Many operations have a average-case complexity of \(O(\log n)\). The
23-
-- implementation uses a large base (i.e. 32) so in practice these
23+
-- implementation uses a large base (i.e. 16 or 32) so in practice these
2424
-- operations are constant time.
2525
module Data.HashMap.Lazy
2626
(

Data/HashMap/Strict.hs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
-- strings.
2020
--
2121
-- Many operations have a average-case complexity of \(O(\log n)\). The
22-
-- implementation uses a large base (i.e. 32) so in practice these
22+
-- implementation uses a large base (i.e. 16 or 32) so in practice these
2323
-- operations are constant time.
2424
module Data.HashMap.Strict
2525
(

Data/HashSet.hs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ especially when value comparisons are expensive, as in the case of
8787
strings.
8888
8989
Many operations have a average-case complexity of \(O(\log n)\). The
90-
implementation uses a large base (i.e. 16) so in practice these
90+
implementation uses a large base (i.e. 16 or 32) so in practice these
9191
operations are constant time.
9292
-}
9393

Data/HashSet/Internal.hs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
-- strings.
3838
--
3939
-- Many operations have a average-case complexity of \(O(\log n)\). The
40-
-- implementation uses a large base (i.e. 32) so in practice these
40+
-- implementation uses a large base (i.e. 16 or 32) so in practice these
4141
-- operations are constant time.
4242

4343
module Data.HashSet.Internal

docs/developer-guide.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,8 @@ Here's a quick overview in order of simplicity:
103103
it contains *2^B* elements.
104104

105105
The number of bits of the hash value to use at each level of the tree, *B*, is a
106-
compile time constant, currently 5. In general a larger *B* improves lookup
106+
compile time constant, currently 5 on 64-bit platforms, and 4 on platforms with
107+
a smaller word size. In general a larger *B* improves lookup
107108
performance (shallower tree) but hurts modification (large nodes to copy when
108109
updating the spine of the tree).
109110

tests/Main.hs

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,17 @@
11
module Main (main) where
22

3+
import GHC.IO.Encoding (setLocaleEncoding, utf8)
34
import Test.Tasty (defaultMain, testGroup)
45

56
import qualified Properties
67
import qualified Regressions
78
import qualified Strictness
89

910
main :: IO ()
10-
main = defaultMain $ testGroup "All"
11-
[ Properties.tests
12-
, Regressions.tests
13-
, Strictness.tests
14-
]
11+
main = do
12+
setLocaleEncoding utf8
13+
defaultMain $ testGroup "All"
14+
[ Properties.tests
15+
, Regressions.tests
16+
, Strictness.tests
17+
]

tests/Regressions.hs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
{-# LANGUAGE BinaryLiterals #-}
12
{-# LANGUAGE CPP #-}
23
{-# LANGUAGE MagicHash #-}
34
{-# LANGUAGE ScopedTypeVariables #-}
@@ -26,6 +27,7 @@ import Test.Tasty.QuickCheck (testProperty)
2627
import qualified Data.HashMap.Lazy as HML
2728
import qualified Data.HashMap.Strict as HMS
2829
import qualified Data.HashSet as HS
30+
import qualified Test.Tasty as Tasty
2931

3032
#if MIN_VERSION_base(4,12,0)
3133
-- nothunks requires base >= 4.12
@@ -262,6 +264,17 @@ issue420 = do
262264
assert $ k1 `HS.member` s1
263265
assert $ k2 `HS.member` s1
264266

267+
------------------------------------------------------------------------
268+
-- Issue 491
269+
270+
issue491 :: TestTree
271+
issue491 = Tasty.localOption (Tasty.mkTimeout 1000000) $ testGroup "issue491" $
272+
[ testCase "1" $ assert $ m [0, -1] `HML.isSubmapOf` m [0, -1]
273+
, testCase "2" $ assert $ m [1, 0b11111] `HML.isSubmapOf` m [1, 0b11111]
274+
, testCase "3" $ assert $ m [0, 1] `HML.isSubmapOf` m [0, 1, 0b11111]
275+
]
276+
where m = HS.toMap . HS.fromList @Int
277+
265278
------------------------------------------------------------------------
266279
-- * Test list
267280

@@ -292,4 +305,5 @@ tests = testGroup "Regression tests"
292305
, testCase "issue383" issue383
293306
#endif
294307
, testCase "issue420" issue420
308+
, issue491
295309
]

0 commit comments

Comments
 (0)