3
3
import pytest
4
4
from numpy .testing import assert_array_equal as aae
5
5
from pandas .testing import assert_frame_equal as afe
6
+ from pandas .testing import assert_series_equal as ase
6
7
7
8
from estimagic .bootstrap_samples import (
9
+ _calculate_bootstrap_indices_weights ,
8
10
_convert_cluster_ids_to_indices ,
9
11
_get_bootstrap_samples_from_indices ,
10
12
get_bootstrap_indices ,
@@ -18,6 +20,7 @@ def data():
18
20
df = pd .DataFrame ()
19
21
df ["id" ] = np .arange (900 )
20
22
df ["hh" ] = [3 , 1 , 2 , 0 , 0 , 2 , 5 , 4 , 5 ] * 100
23
+ df ["weights" ] = np .ones (900 )
21
24
return df
22
25
23
26
@@ -33,6 +36,37 @@ def test_get_bootstrap_indices_radomization_works_with_clustering(data):
33
36
assert set (res [0 ]) != set (res [1 ])
34
37
35
38
39
+ def test_get_bootstrap_indices_randomization_works_with_weights (data ):
40
+ rng = get_rng (seed = 12345 )
41
+ res = get_bootstrap_indices (data , weight_by = "weights" , n_draws = 2 , rng = rng )
42
+ assert set (res [0 ]) != set (res [1 ])
43
+
44
+
45
+ def test_get_bootstrap_indices_randomization_works_with_weights_and_clustering (data ):
46
+ rng = get_rng (seed = 12345 )
47
+ res = get_bootstrap_indices (
48
+ data , weight_by = "weights" , cluster_by = "hh" , n_draws = 2 , rng = rng
49
+ )
50
+ assert set (res [0 ]) != set (res [1 ])
51
+
52
+
53
+ def test_get_bootstrap_indices_randomization_works_with_and_without_weights (data ):
54
+ rng1 = get_rng (seed = 12345 )
55
+ rng2 = get_rng (seed = 12345 )
56
+ res1 = get_bootstrap_indices (data , n_draws = 1 , rng = rng1 )
57
+ res2 = get_bootstrap_indices (data , weight_by = "weights" , n_draws = 1 , rng = rng2 )
58
+ assert not np .array_equal (res1 , res2 )
59
+
60
+
61
+ def test_get_boostrap_indices_randomization_works_with_extreme_case (data ):
62
+ rng = get_rng (seed = 12345 )
63
+ weights = np .zeros (900 )
64
+ weights [0 ] = 1.0
65
+ data ["weights" ] = weights
66
+ res = get_bootstrap_indices (data , weight_by = "weights" , n_draws = 1 , rng = rng )
67
+ assert len (np .unique (res )) == 1
68
+
69
+
36
70
def test_clustering_leaves_households_intact (data ):
37
71
rng = get_rng (seed = 12345 )
38
72
indices = get_bootstrap_indices (data , cluster_by = "hh" , n_draws = 1 , rng = rng )[0 ]
@@ -63,3 +97,53 @@ def test_get_bootstrap_samples_from_indices():
63
97
def test_get_bootstrap_samples_runs (data ):
64
98
rng = get_rng (seed = 12345 )
65
99
get_bootstrap_samples (data , n_draws = 2 , rng = rng )
100
+
101
+
102
+ @pytest .fixture
103
+ def sample_data ():
104
+ return pd .DataFrame ({"weight" : [1 , 2 , 3 , 4 ], "cluster" : ["A" , "A" , "B" , "B" ]})
105
+
106
+
107
+ def test_no_weights_no_clusters (sample_data ):
108
+ result = _calculate_bootstrap_indices_weights (sample_data , None , None )
109
+ assert result is None
110
+
111
+
112
+ def test_weights_no_clusters (sample_data ):
113
+ result = _calculate_bootstrap_indices_weights (sample_data , "weight" , None )
114
+ expected = pd .Series ([0.1 , 0.2 , 0.3 , 0.4 ], index = sample_data .index , name = "weight" )
115
+ pd .testing .assert_series_equal (result , expected )
116
+
117
+
118
+ def test_weights_and_clusters (sample_data ):
119
+ result = _calculate_bootstrap_indices_weights (sample_data , "weight" , "cluster" )
120
+ expected = pd .Series (
121
+ [0.3 , 0.7 ], index = pd .Index (["A" , "B" ], name = "cluster" ), name = "weight"
122
+ )
123
+ ase (result , expected )
124
+
125
+
126
+ def test_invalid_weight_column ():
127
+ data = pd .DataFrame ({"x" : [1 , 2 , 3 ]})
128
+ with pytest .raises (KeyError ):
129
+ _calculate_bootstrap_indices_weights (data , "weight" , None )
130
+
131
+
132
+ def test_invalid_cluster_column (sample_data ):
133
+ with pytest .raises (KeyError ):
134
+ _calculate_bootstrap_indices_weights (sample_data , "weight" , "invalid_cluster" )
135
+
136
+
137
+ def test_empty_dataframe ():
138
+ empty_df = pd .DataFrame ()
139
+ result = _calculate_bootstrap_indices_weights (empty_df , None , None )
140
+ assert result is None
141
+
142
+
143
+ def test_some_zero_weights_with_clusters ():
144
+ data = pd .DataFrame ({"weight" : [0 , 1 , 0 , 2 ], "cluster" : ["A" , "A" , "B" , "B" ]})
145
+ result = _calculate_bootstrap_indices_weights (data , "weight" , "cluster" )
146
+ expected = pd .Series (
147
+ [1 / 3 , 2 / 3 ], index = pd .Index (["A" , "B" ], name = "cluster" ), name = "weight"
148
+ )
149
+ ase (result , expected )
0 commit comments