|
92 | 92 | "import lqrt\n", |
93 | 93 | "from scipy.stats import norm\n", |
94 | 94 | "import numpy as np\n", |
| 95 | + "from scipy.special import binom as binomcoeff # devMJBL\n", |
| 96 | + "from scipy.stats import binom # devMJBL\n", |
| 97 | + "from scipy.integrate import fixed_quad # devMJBL\n", |
| 98 | + "from numpy import arange, mean # devMJBL\n", |
95 | 99 | "from numpy import array, isnan, isinf, repeat, random, isin, abs, var\n", |
96 | 100 | "from numpy import sort as npsort\n", |
97 | 101 | "from numpy import nan as npnan\n", |
|
139 | 143 | " `random_seed` is used to seed the random number generator during\n", |
140 | 144 | " bootstrap resampling. This ensures that the confidence intervals\n", |
141 | 145 | " reported are replicable.\n", |
| 146 | + " ps_adjust : boolean, default False.\n", |
| 147 | + " If True, adjust calculated p-value according to Phipson & Smyth (2010)\n", |
| 148 | + " # https://doi.org/10.2202/1544-6115.1585\n", |
| 149 | + " \n", |
142 | 150 | "\n", |
143 | 151 | " Returns\n", |
144 | 152 | " -------\n", |
|
176 | 184 | " resamples=5000,\n", |
177 | 185 | " permutation_count=5000,\n", |
178 | 186 | " random_seed=12345,\n", |
| 187 | + " ps_adjust=False,\n", |
179 | 188 | " ):\n", |
180 | 189 | " from ._stats_tools import confint_2group_diff as ci2g\n", |
181 | 190 | " from ._stats_tools import effsize as es\n", |
|
188 | 197 | " \"hedges_g\": \"Hedges' g\",\n", |
189 | 198 | " \"cliffs_delta\": \"Cliff's delta\",\n", |
190 | 199 | " }\n", |
191 | | - "\n", |
| 200 | + " \n", |
192 | 201 | " self.__is_paired = is_paired\n", |
193 | 202 | " self.__resamples = resamples\n", |
194 | 203 | " self.__effect_size = effect_size\n", |
195 | 204 | " self.__random_seed = random_seed\n", |
196 | 205 | " self.__ci = ci\n", |
197 | 206 | " self.__is_proportional = proportional\n", |
| 207 | + " self.__ps_adjust = ps_adjust\n", |
198 | 208 | " self._check_errors(control, test)\n", |
199 | 209 | "\n", |
200 | 210 | " # Convert to numpy arrays for speed.\n", |
|
418 | 428 | " self.__effect_size,\n", |
419 | 429 | " self.__is_paired,\n", |
420 | 430 | " self.__permutation_count,\n", |
| 431 | + " ps_adjust = self.__ps_adjust,\n", |
421 | 432 | " )\n", |
422 | 433 | "\n", |
423 | 434 | " if self.__is_paired and not self.__is_proportional:\n", |
|
1027 | 1038 | " delta2=False,\n", |
1028 | 1039 | " experiment_label=None,\n", |
1029 | 1040 | " mini_meta=False,\n", |
| 1041 | + " ps_adjust=False,\n", |
1030 | 1042 | " ):\n", |
1031 | 1043 | " \"\"\"\n", |
1032 | 1044 | " Parses the data from a Dabest object, enabling plotting and printing\n", |
|
1046 | 1058 | " self.__x2 = x2\n", |
1047 | 1059 | " self.__delta2 = delta2\n", |
1048 | 1060 | " self.__is_mini_meta = mini_meta\n", |
| 1061 | + " self.__ps_adjust = ps_adjust\n", |
1049 | 1062 | "\n", |
1050 | 1063 | " def __pre_calc(self):\n", |
1051 | 1064 | " from .misc_tools import print_greeting, get_varname\n", |
|
1096 | 1109 | " cname = current_tuple[ix]\n", |
1097 | 1110 | " control = grouped_data[cname]\n", |
1098 | 1111 | " test = grouped_data[tname]\n", |
1099 | | - "\n", |
1100 | 1112 | " result = TwoGroupsEffectSize(\n", |
1101 | 1113 | " control,\n", |
1102 | 1114 | " test,\n", |
|
1107 | 1119 | " self.__resamples,\n", |
1108 | 1120 | " self.__permutation_count,\n", |
1109 | 1121 | " self.__random_seed,\n", |
| 1122 | + " self.__ps_adjust\n", |
1110 | 1123 | " )\n", |
1111 | 1124 | " r_dict = result.to_dict()\n", |
1112 | 1125 | " r_dict[\"control\"] = cname\n", |
|
2138 | 2151 | " `random_seed` is used to seed the random number generator during\n", |
2139 | 2152 | " bootstrap resampling. This ensures that the generated permutations\n", |
2140 | 2153 | " are replicable.\n", |
| 2154 | + " ps_adjust : bool, default False\n", |
| 2155 | + " If True, the p-value is adjusted according to Phipson & Smyth (2010).\n", |
| 2156 | + " # https://doi.org/10.2202/1544-6115.1585\n", |
| 2157 | + "\n", |
2141 | 2158 | " \n", |
2142 | 2159 | " Returns\n", |
2143 | 2160 | " -------\n", |
|
2156 | 2173 | " is_paired:str=None,\n", |
2157 | 2174 | " permutation_count:int=5000, # The number of permutations (reshuffles) to perform.\n", |
2158 | 2175 | " random_seed:int=12345,#`random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the generated permutations are replicable.\n", |
| 2176 | + " ps_adjust:bool=False,\n", |
2159 | 2177 | " **kwargs):\n", |
2160 | 2178 | " from ._stats_tools.effsize import two_group_difference\n", |
2161 | 2179 | " from ._stats_tools.confint_2group_diff import calculate_group_var\n", |
|
2180 | 2198 | "\n", |
2181 | 2199 | " BAG = array([*control, *test])\n", |
2182 | 2200 | " CONTROL_LEN = int(len(control))\n", |
| 2201 | + " TEST_LEN = int(len(test)) # devMJBL\n", |
2183 | 2202 | " EXTREME_COUNT = 0.\n", |
2184 | 2203 | " THRESHOLD = abs(two_group_difference(control, test, \n", |
2185 | 2204 | " is_paired, effect_size))\n", |
|
2219 | 2238 | "\n", |
2220 | 2239 | " if abs(es) > THRESHOLD:\n", |
2221 | 2240 | " EXTREME_COUNT += 1.\n", |
| 2241 | + " \n", |
| 2242 | + " if ps_adjust:\n", |
| 2243 | + " # devMJBL\n", |
| 2244 | + " # adjust calculated p-value according to Phipson & Smyth (2010)\n", |
| 2245 | + " # https://doi.org/10.2202/1544-6115.1585\n", |
| 2246 | + " # as per R code in statmod::permp\n", |
| 2247 | + " # https://rdrr.io/cran/statmod/src/R/permp.R\n", |
| 2248 | + " # (assumes two-sided test)\n", |
| 2249 | + "\n", |
| 2250 | + " if CONTROL_LEN == TEST_LEN:\n", |
| 2251 | + " totalPermutations = binomcoeff(CONTROL_LEN + TEST_LEN, TEST_LEN)/2\n", |
| 2252 | + " else:\n", |
| 2253 | + " totalPermutations = binomcoeff(CONTROL_LEN + TEST_LEN, TEST_LEN)\n", |
| 2254 | + "\n", |
| 2255 | + " if totalPermutations <= 10e3:\n", |
| 2256 | + " # use exact calculation\n", |
| 2257 | + " p = arange(1, totalPermutations + 1)/totalPermutations\n", |
| 2258 | + " x2 = repeat(EXTREME_COUNT, repeats=totalPermutations)\n", |
| 2259 | + " Y = binom.cdf(k=x2, n=permutation_count, p=p)\n", |
| 2260 | + " self.pvalue = mean(Y)\n", |
| 2261 | + " else:\n", |
| 2262 | + " # use integral approximation\n", |
| 2263 | + " def binomcdf(p, k, n):\n", |
| 2264 | + " return binom.cdf(k, n, p)\n", |
| 2265 | + "\n", |
| 2266 | + " integrationVal, _ = fixed_quad(binomcdf,\n", |
| 2267 | + " a=0, b=0.5/totalPermutations,\n", |
| 2268 | + " args=(EXTREME_COUNT, permutation_count),\n", |
| 2269 | + " n=128)\n", |
2222 | 2270 | "\n", |
| 2271 | + " self.pvalue = (EXTREME_COUNT + 1)/(permutation_count + 1) - integrationVal\n", |
| 2272 | + " else:\n", |
| 2273 | + " self.pvalue = EXTREME_COUNT / self.__permutation_count\n", |
| 2274 | + " \n", |
2223 | 2275 | " self.__permutations = array(self.__permutations)\n", |
2224 | 2276 | " self.__permutations_var = array(self.__permutations_var)\n", |
2225 | 2277 | "\n", |
2226 | | - " self.pvalue = EXTREME_COUNT / self.__permutation_count\n", |
2227 | | - "\n", |
2228 | | - "\n", |
2229 | 2278 | " def __repr__(self):\n", |
2230 | 2279 | " return(\"{} permutations were taken. The p-value is {}.\".format(self.__permutation_count, \n", |
2231 | 2280 | " self.pvalue))\n", |
|
0 commit comments