Skip to content

Commit aceeb02

Browse files
committed
Added permutation pvalue adjustment option in load
Added permutation pvalue adjustment option in load
1 parent bf2cbce commit aceeb02

File tree

10 files changed

+235
-14
lines changed

10 files changed

+235
-14
lines changed

dabest/_api.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ def load(
2222
experiment_label=None,
2323
x1_level=None,
2424
mini_meta=False,
25+
ps_adjust=False,
2526
):
2627
"""
2728
Loads data in preparation for estimation statistics.
@@ -82,6 +83,9 @@ def load(
8283
is True; otherwise it can only be a string.
8384
mini_meta : boolean, default False
8485
Indicator of weighted delta calculation.
86+
ps_adjust : boolean, default False
87+
Indicator of whether to adjust calculated p-value according to Phipson & Smyth (2010)
88+
# https://doi.org/10.2202/1544-6115.1585
8589
8690
Returns
8791
-------
@@ -105,6 +109,7 @@ def load(
105109
experiment_label,
106110
x1_level,
107111
mini_meta,
112+
ps_adjust,
108113
)
109114

110115
# %% ../nbs/API/load.ipynb 5

dabest/_dabest_object.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ def __init__(
3838
experiment_label,
3939
x1_level,
4040
mini_meta,
41+
ps_adjust,
4142
):
4243
"""
4344
Parses and stores pandas DataFrames in preparation for estimation
@@ -56,6 +57,7 @@ def __init__(
5657
self.__random_seed = random_seed
5758
self.__is_proportional = proportional
5859
self.__is_mini_meta = mini_meta
60+
self.__ps_adjust = ps_adjust
5961

6062
# after this call the attributes self.__experiment_label and self.__x1_level are updated
6163
self._check_errors(x, y, idx, experiment, experiment_label, x1_level)
@@ -695,6 +697,7 @@ def _compute_effectsize_dfs(self):
695697
x1_level=self.__x1_level,
696698
x2=self.__x2,
697699
mini_meta=self.__is_mini_meta,
700+
ps_adjust=self.__ps_adjust,
698701
)
699702

700703
self.__mean_diff = EffectSizeDataFrame(

dabest/_effsize_objects.py

Lines changed: 54 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010
import lqrt
1111
from scipy.stats import norm
1212
import numpy as np
13+
from scipy.special import binom as binomcoeff # devMJBL
14+
from scipy.stats import binom # devMJBL
15+
from scipy.integrate import fixed_quad # devMJBL
16+
from numpy import arange, mean # devMJBL
1317
from numpy import array, isnan, isinf, repeat, random, isin, abs, var
1418
from numpy import sort as npsort
1519
from numpy import nan as npnan
@@ -50,6 +54,10 @@ class TwoGroupsEffectSize(object):
5054
`random_seed` is used to seed the random number generator during
5155
bootstrap resampling. This ensures that the confidence intervals
5256
reported are replicable.
57+
ps_adjust : boolean, default False.
58+
If True, adjust calculated p-value according to Phipson & Smyth (2010)
59+
# https://doi.org/10.2202/1544-6115.1585
60+
5361
5462
Returns
5563
-------
@@ -87,6 +95,7 @@ def __init__(
8795
resamples=5000,
8896
permutation_count=5000,
8997
random_seed=12345,
98+
ps_adjust=False,
9099
):
91100
from ._stats_tools import confint_2group_diff as ci2g
92101
from ._stats_tools import effsize as es
@@ -99,13 +108,14 @@ def __init__(
99108
"hedges_g": "Hedges' g",
100109
"cliffs_delta": "Cliff's delta",
101110
}
102-
111+
103112
self.__is_paired = is_paired
104113
self.__resamples = resamples
105114
self.__effect_size = effect_size
106115
self.__random_seed = random_seed
107116
self.__ci = ci
108117
self.__is_proportional = proportional
118+
self.__ps_adjust = ps_adjust
109119
self._check_errors(control, test)
110120

111121
# Convert to numpy arrays for speed.
@@ -329,6 +339,7 @@ def _perform_statistical_test(self):
329339
self.__effect_size,
330340
self.__is_paired,
331341
self.__permutation_count,
342+
ps_adjust = self.__ps_adjust,
332343
)
333344

334345
if self.__is_paired and not self.__is_proportional:
@@ -827,6 +838,7 @@ def __init__(
827838
delta2=False,
828839
experiment_label=None,
829840
mini_meta=False,
841+
ps_adjust=False,
830842
):
831843
"""
832844
Parses the data from a Dabest object, enabling plotting and printing
@@ -846,6 +858,7 @@ def __init__(
846858
self.__x2 = x2
847859
self.__delta2 = delta2
848860
self.__is_mini_meta = mini_meta
861+
self.__ps_adjust = ps_adjust
849862

850863
def __pre_calc(self):
851864
from .misc_tools import print_greeting, get_varname
@@ -896,7 +909,6 @@ def __pre_calc(self):
896909
cname = current_tuple[ix]
897910
control = grouped_data[cname]
898911
test = grouped_data[tname]
899-
900912
result = TwoGroupsEffectSize(
901913
control,
902914
test,
@@ -907,6 +919,7 @@ def __pre_calc(self):
907919
self.__resamples,
908920
self.__permutation_count,
909921
self.__random_seed,
922+
self.__ps_adjust
910923
)
911924
r_dict = result.to_dict()
912925
r_dict["control"] = cname
@@ -1633,6 +1646,10 @@ class PermutationTest:
16331646
`random_seed` is used to seed the random number generator during
16341647
bootstrap resampling. This ensures that the generated permutations
16351648
are replicable.
1649+
ps_adjust : bool, default False
1650+
If True, the p-value is adjusted according to Phipson & Smyth (2010).
1651+
# https://doi.org/10.2202/1544-6115.1585
1652+
16361653
16371654
Returns
16381655
-------
@@ -1651,6 +1668,7 @@ def __init__(self, control: array,
16511668
is_paired:str=None,
16521669
permutation_count:int=5000, # The number of permutations (reshuffles) to perform.
16531670
random_seed:int=12345,#`random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the generated permutations are replicable.
1671+
ps_adjust:bool=False,
16541672
**kwargs):
16551673
from ._stats_tools.effsize import two_group_difference
16561674
from ._stats_tools.confint_2group_diff import calculate_group_var
@@ -1675,6 +1693,7 @@ def __init__(self, control: array,
16751693

16761694
BAG = array([*control, *test])
16771695
CONTROL_LEN = int(len(control))
1696+
TEST_LEN = int(len(test)) # devMJBL
16781697
EXTREME_COUNT = 0.
16791698
THRESHOLD = abs(two_group_difference(control, test,
16801699
is_paired, effect_size))
@@ -1714,13 +1733,43 @@ def __init__(self, control: array,
17141733

17151734
if abs(es) > THRESHOLD:
17161735
EXTREME_COUNT += 1.
1736+
1737+
if ps_adjust:
1738+
# devMJBL
1739+
# adjust calculated p-value according to Phipson & Smyth (2010)
1740+
# https://doi.org/10.2202/1544-6115.1585
1741+
# as per R code in statmod::permp
1742+
# https://rdrr.io/cran/statmod/src/R/permp.R
1743+
# (assumes two-sided test)
1744+
1745+
if CONTROL_LEN == TEST_LEN:
1746+
totalPermutations = binomcoeff(CONTROL_LEN + TEST_LEN, TEST_LEN)/2
1747+
else:
1748+
totalPermutations = binomcoeff(CONTROL_LEN + TEST_LEN, TEST_LEN)
1749+
1750+
if totalPermutations <= 10e3:
1751+
# use exact calculation
1752+
p = arange(1, totalPermutations + 1)/totalPermutations
1753+
x2 = repeat(EXTREME_COUNT, repeats=totalPermutations)
1754+
Y = binom.cdf(k=x2, n=permutation_count, p=p)
1755+
self.pvalue = mean(Y)
1756+
else:
1757+
# use integral approximation
1758+
def binomcdf(p, k, n):
1759+
return binom.cdf(k, n, p)
1760+
1761+
integrationVal, _ = fixed_quad(binomcdf,
1762+
a=0, b=0.5/totalPermutations,
1763+
args=(EXTREME_COUNT, permutation_count),
1764+
n=128)
17171765

1766+
self.pvalue = (EXTREME_COUNT + 1)/(permutation_count + 1) - integrationVal
1767+
else:
1768+
self.pvalue = EXTREME_COUNT / self.__permutation_count
1769+
17181770
self.__permutations = array(self.__permutations)
17191771
self.__permutations_var = array(self.__permutations_var)
17201772

1721-
self.pvalue = EXTREME_COUNT / self.__permutation_count
1722-
1723-
17241773
def __repr__(self):
17251774
return("{} permutations were taken. The p-value is {}.".format(self.__permutation_count,
17261775
self.pvalue))

nbs/API/dabest_object.ipynb

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@
143143
" experiment_label,\n",
144144
" x1_level,\n",
145145
" mini_meta,\n",
146+
" ps_adjust,\n",
146147
" ):\n",
147148
" \"\"\"\n",
148149
" Parses and stores pandas DataFrames in preparation for estimation\n",
@@ -161,6 +162,7 @@
161162
" self.__random_seed = random_seed\n",
162163
" self.__is_proportional = proportional\n",
163164
" self.__is_mini_meta = mini_meta\n",
165+
" self.__ps_adjust = ps_adjust\n",
164166
"\n",
165167
" # after this call the attributes self.__experiment_label and self.__x1_level are updated\n",
166168
" self._check_errors(x, y, idx, experiment, experiment_label, x1_level)\n",
@@ -800,6 +802,7 @@
800802
" x1_level=self.__x1_level,\n",
801803
" x2=self.__x2,\n",
802804
" mini_meta=self.__is_mini_meta,\n",
805+
" ps_adjust=self.__ps_adjust,\n",
803806
" )\n",
804807
"\n",
805808
" self.__mean_diff = EffectSizeDataFrame(\n",

nbs/API/effsize_objects.ipynb

Lines changed: 54 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,10 @@
9292
"import lqrt\n",
9393
"from scipy.stats import norm\n",
9494
"import numpy as np\n",
95+
"from scipy.special import binom as binomcoeff # devMJBL\n",
96+
"from scipy.stats import binom # devMJBL\n",
97+
"from scipy.integrate import fixed_quad # devMJBL\n",
98+
"from numpy import arange, mean # devMJBL\n",
9599
"from numpy import array, isnan, isinf, repeat, random, isin, abs, var\n",
96100
"from numpy import sort as npsort\n",
97101
"from numpy import nan as npnan\n",
@@ -139,6 +143,10 @@
139143
" `random_seed` is used to seed the random number generator during\n",
140144
" bootstrap resampling. This ensures that the confidence intervals\n",
141145
" reported are replicable.\n",
146+
" ps_adjust : boolean, default False.\n",
147+
" If True, adjust calculated p-value according to Phipson & Smyth (2010)\n",
148+
" # https://doi.org/10.2202/1544-6115.1585\n",
149+
" \n",
142150
"\n",
143151
" Returns\n",
144152
" -------\n",
@@ -176,6 +184,7 @@
176184
" resamples=5000,\n",
177185
" permutation_count=5000,\n",
178186
" random_seed=12345,\n",
187+
" ps_adjust=False,\n",
179188
" ):\n",
180189
" from ._stats_tools import confint_2group_diff as ci2g\n",
181190
" from ._stats_tools import effsize as es\n",
@@ -188,13 +197,14 @@
188197
" \"hedges_g\": \"Hedges' g\",\n",
189198
" \"cliffs_delta\": \"Cliff's delta\",\n",
190199
" }\n",
191-
"\n",
200+
" \n",
192201
" self.__is_paired = is_paired\n",
193202
" self.__resamples = resamples\n",
194203
" self.__effect_size = effect_size\n",
195204
" self.__random_seed = random_seed\n",
196205
" self.__ci = ci\n",
197206
" self.__is_proportional = proportional\n",
207+
" self.__ps_adjust = ps_adjust\n",
198208
" self._check_errors(control, test)\n",
199209
"\n",
200210
" # Convert to numpy arrays for speed.\n",
@@ -418,6 +428,7 @@
418428
" self.__effect_size,\n",
419429
" self.__is_paired,\n",
420430
" self.__permutation_count,\n",
431+
" ps_adjust = self.__ps_adjust,\n",
421432
" )\n",
422433
"\n",
423434
" if self.__is_paired and not self.__is_proportional:\n",
@@ -1027,6 +1038,7 @@
10271038
" delta2=False,\n",
10281039
" experiment_label=None,\n",
10291040
" mini_meta=False,\n",
1041+
" ps_adjust=False,\n",
10301042
" ):\n",
10311043
" \"\"\"\n",
10321044
" Parses the data from a Dabest object, enabling plotting and printing\n",
@@ -1046,6 +1058,7 @@
10461058
" self.__x2 = x2\n",
10471059
" self.__delta2 = delta2\n",
10481060
" self.__is_mini_meta = mini_meta\n",
1061+
" self.__ps_adjust = ps_adjust\n",
10491062
"\n",
10501063
" def __pre_calc(self):\n",
10511064
" from .misc_tools import print_greeting, get_varname\n",
@@ -1096,7 +1109,6 @@
10961109
" cname = current_tuple[ix]\n",
10971110
" control = grouped_data[cname]\n",
10981111
" test = grouped_data[tname]\n",
1099-
"\n",
11001112
" result = TwoGroupsEffectSize(\n",
11011113
" control,\n",
11021114
" test,\n",
@@ -1107,6 +1119,7 @@
11071119
" self.__resamples,\n",
11081120
" self.__permutation_count,\n",
11091121
" self.__random_seed,\n",
1122+
" self.__ps_adjust\n",
11101123
" )\n",
11111124
" r_dict = result.to_dict()\n",
11121125
" r_dict[\"control\"] = cname\n",
@@ -2138,6 +2151,10 @@
21382151
" `random_seed` is used to seed the random number generator during\n",
21392152
" bootstrap resampling. This ensures that the generated permutations\n",
21402153
" are replicable.\n",
2154+
" ps_adjust : bool, default False\n",
2155+
" If True, the p-value is adjusted according to Phipson & Smyth (2010).\n",
2156+
" # https://doi.org/10.2202/1544-6115.1585\n",
2157+
"\n",
21412158
" \n",
21422159
" Returns\n",
21432160
" -------\n",
@@ -2156,6 +2173,7 @@
21562173
" is_paired:str=None,\n",
21572174
" permutation_count:int=5000, # The number of permutations (reshuffles) to perform.\n",
21582175
" random_seed:int=12345,#`random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the generated permutations are replicable.\n",
2176+
" ps_adjust:bool=False,\n",
21592177
" **kwargs):\n",
21602178
" from ._stats_tools.effsize import two_group_difference\n",
21612179
" from ._stats_tools.confint_2group_diff import calculate_group_var\n",
@@ -2180,6 +2198,7 @@
21802198
"\n",
21812199
" BAG = array([*control, *test])\n",
21822200
" CONTROL_LEN = int(len(control))\n",
2201+
" TEST_LEN = int(len(test)) # devMJBL\n",
21832202
" EXTREME_COUNT = 0.\n",
21842203
" THRESHOLD = abs(two_group_difference(control, test, \n",
21852204
" is_paired, effect_size))\n",
@@ -2219,13 +2238,43 @@
22192238
"\n",
22202239
" if abs(es) > THRESHOLD:\n",
22212240
" EXTREME_COUNT += 1.\n",
2241+
" \n",
2242+
" if ps_adjust:\n",
2243+
" # devMJBL\n",
2244+
" # adjust calculated p-value according to Phipson & Smyth (2010)\n",
2245+
" # https://doi.org/10.2202/1544-6115.1585\n",
2246+
" # as per R code in statmod::permp\n",
2247+
" # https://rdrr.io/cran/statmod/src/R/permp.R\n",
2248+
" # (assumes two-sided test)\n",
2249+
"\n",
2250+
" if CONTROL_LEN == TEST_LEN:\n",
2251+
" totalPermutations = binomcoeff(CONTROL_LEN + TEST_LEN, TEST_LEN)/2\n",
2252+
" else:\n",
2253+
" totalPermutations = binomcoeff(CONTROL_LEN + TEST_LEN, TEST_LEN)\n",
2254+
"\n",
2255+
" if totalPermutations <= 10e3:\n",
2256+
" # use exact calculation\n",
2257+
" p = arange(1, totalPermutations + 1)/totalPermutations\n",
2258+
" x2 = repeat(EXTREME_COUNT, repeats=totalPermutations)\n",
2259+
" Y = binom.cdf(k=x2, n=permutation_count, p=p)\n",
2260+
" self.pvalue = mean(Y)\n",
2261+
" else:\n",
2262+
" # use integral approximation\n",
2263+
" def binomcdf(p, k, n):\n",
2264+
" return binom.cdf(k, n, p)\n",
2265+
"\n",
2266+
" integrationVal, _ = fixed_quad(binomcdf,\n",
2267+
" a=0, b=0.5/totalPermutations,\n",
2268+
" args=(EXTREME_COUNT, permutation_count),\n",
2269+
" n=128)\n",
22222270
"\n",
2271+
" self.pvalue = (EXTREME_COUNT + 1)/(permutation_count + 1) - integrationVal\n",
2272+
" else:\n",
2273+
" self.pvalue = EXTREME_COUNT / self.__permutation_count\n",
2274+
" \n",
22232275
" self.__permutations = array(self.__permutations)\n",
22242276
" self.__permutations_var = array(self.__permutations_var)\n",
22252277
"\n",
2226-
" self.pvalue = EXTREME_COUNT / self.__permutation_count\n",
2227-
"\n",
2228-
"\n",
22292278
" def __repr__(self):\n",
22302279
" return(\"{} permutations were taken. The p-value is {}.\".format(self.__permutation_count, \n",
22312280
" self.pvalue))\n",

0 commit comments

Comments
 (0)