ACCLAB
diff --git a/‎dabest/_api.py‎
Lines changed: 5 additions & 0 deletions b/‎dabest/_api.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎dabest/_dabest_object.py‎
Lines changed: 3 additions & 0 deletions b/‎dabest/_dabest_object.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎dabest/_effsize_objects.py‎
Lines changed: 54 additions & 5 deletions b/‎dabest/_effsize_objects.py‎
Lines changed: 54 additions & 5 deletions
diff --git a/‎nbs/API/dabest_object.ipynb‎
Lines changed: 3 additions & 0 deletions b/‎nbs/API/dabest_object.ipynb‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎nbs/API/effsize_objects.ipynb‎
Lines changed: 54 additions & 5 deletions b/‎nbs/API/effsize_objects.ipynb‎
Lines changed: 54 additions & 5 deletions
@@ -22,6 +22,7 @@ def load(
     experiment_label=None,
     x1_level=None,
     mini_meta=False,
+    ps_adjust=False,
 ):
     """
     Loads data in preparation for estimation statistics.
@@ -82,6 +83,9 @@ def load(
         is True; otherwise it can only be a string.
     mini_meta : boolean, default False
         Indicator of weighted delta calculation.
+    ps_adjust : boolean, default False
+        Indicator of whether to adjust calculated p-value according to Phipson & Smyth (2010)
+        # https://doi.org/10.2202/1544-6115.1585
 
     Returns
     -------
@@ -105,6 +109,7 @@ def load(
         experiment_label,
         x1_level,
         mini_meta,
+        ps_adjust,
     )
 
 # %% ../nbs/API/load.ipynb 5
 
@@ -38,6 +38,7 @@ def __init__(
         experiment_label,
         x1_level,
         mini_meta,
+        ps_adjust,
     ):
         """
         Parses and stores pandas DataFrames in preparation for estimation
@@ -56,6 +57,7 @@ def __init__(
         self.__random_seed = random_seed
         self.__is_proportional = proportional
         self.__is_mini_meta = mini_meta
+        self.__ps_adjust = ps_adjust
 
         # after this call the attributes self.__experiment_label and self.__x1_level are updated
         self._check_errors(x, y, idx, experiment, experiment_label, x1_level)
@@ -695,6 +697,7 @@ def _compute_effectsize_dfs(self):
             x1_level=self.__x1_level,
             x2=self.__x2,
             mini_meta=self.__is_mini_meta,
+            ps_adjust=self.__ps_adjust,
         )
 
         self.__mean_diff = EffectSizeDataFrame(
 
@@ -10,6 +10,10 @@
 import lqrt
 from scipy.stats import norm
 import numpy as np
+from scipy.special import binom as binomcoeff  # devMJBL
+from scipy.stats import binom  # devMJBL
+from scipy.integrate import fixed_quad  # devMJBL
+from numpy import arange, mean  # devMJBL
 from numpy import array, isnan, isinf, repeat, random, isin, abs, var
 from numpy import sort as npsort
 from numpy import nan as npnan
@@ -50,6 +54,10 @@ class TwoGroupsEffectSize(object):
             `random_seed` is used to seed the random number generator during
             bootstrap resampling. This ensures that the confidence intervals
             reported are replicable.
+        ps_adjust : boolean, default False.
+            If True, adjust calculated p-value according to Phipson & Smyth (2010)
+            # https://doi.org/10.2202/1544-6115.1585
+            
 
         Returns
         -------
@@ -87,6 +95,7 @@ def __init__(
         resamples=5000,
         permutation_count=5000,
         random_seed=12345,
+        ps_adjust=False,
     ):
         from ._stats_tools import confint_2group_diff as ci2g
         from ._stats_tools import effsize as es
@@ -99,13 +108,14 @@ def __init__(
             "hedges_g": "Hedges' g",
             "cliffs_delta": "Cliff's delta",
         }
-
+  
         self.__is_paired = is_paired
         self.__resamples = resamples
         self.__effect_size = effect_size
         self.__random_seed = random_seed
         self.__ci = ci
         self.__is_proportional = proportional
+        self.__ps_adjust = ps_adjust
         self._check_errors(control, test)
 
         # Convert to numpy arrays for speed.
@@ -329,6 +339,7 @@ def _perform_statistical_test(self):
             self.__effect_size,
             self.__is_paired,
             self.__permutation_count,
+            ps_adjust = self.__ps_adjust,
         )
 
         if self.__is_paired and not self.__is_proportional:
@@ -827,6 +838,7 @@ def __init__(
         delta2=False,
         experiment_label=None,
         mini_meta=False,
+        ps_adjust=False,
     ):
         """
         Parses the data from a Dabest object, enabling plotting and printing
@@ -846,6 +858,7 @@ def __init__(
         self.__x2 = x2
         self.__delta2 = delta2
         self.__is_mini_meta = mini_meta
+        self.__ps_adjust = ps_adjust
 
     def __pre_calc(self):
         from .misc_tools import print_greeting, get_varname
@@ -896,7 +909,6 @@ def __pre_calc(self):
                     cname = current_tuple[ix]
                     control = grouped_data[cname]
                 test = grouped_data[tname]
-
                 result = TwoGroupsEffectSize(
                     control,
                     test,
@@ -907,6 +919,7 @@ def __pre_calc(self):
                     self.__resamples,
                     self.__permutation_count,
                     self.__random_seed,
+                    self.__ps_adjust
                 )
                 r_dict = result.to_dict()
                 r_dict["control"] = cname
@@ -1633,6 +1646,10 @@ class PermutationTest:
         `random_seed` is used to seed the random number generator during
         bootstrap resampling. This ensures that the generated permutations
         are replicable.
+    ps_adjust : bool, default False
+        If True, the p-value is adjusted according to Phipson & Smyth (2010).
+        # https://doi.org/10.2202/1544-6115.1585
+
         
     Returns
     -------
@@ -1651,6 +1668,7 @@ def __init__(self, control: array,
                  is_paired:str=None,
                  permutation_count:int=5000, # The number of permutations (reshuffles) to perform.
                  random_seed:int=12345,#`random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the generated permutations are replicable.
+                 ps_adjust:bool=False,
                  **kwargs):
         from ._stats_tools.effsize import two_group_difference
         from ._stats_tools.confint_2group_diff import calculate_group_var
@@ -1675,6 +1693,7 @@ def __init__(self, control: array,
 
         BAG = array([*control, *test])
         CONTROL_LEN = int(len(control))
+        TEST_LEN = int(len(test)) # devMJBL
         EXTREME_COUNT = 0.
         THRESHOLD = abs(two_group_difference(control, test, 
                                                 is_paired, effect_size))
@@ -1714,13 +1733,43 @@ def __init__(self, control: array,
 
             if abs(es) > THRESHOLD:
                 EXTREME_COUNT += 1.
+                
+        if ps_adjust:
+            # devMJBL
+            # adjust calculated p-value according to Phipson & Smyth (2010)
+            # https://doi.org/10.2202/1544-6115.1585
+            # as per R code in statmod::permp
+            # https://rdrr.io/cran/statmod/src/R/permp.R
+            # (assumes two-sided test)
+
+            if CONTROL_LEN == TEST_LEN:
+                totalPermutations = binomcoeff(CONTROL_LEN + TEST_LEN, TEST_LEN)/2
+            else:
+                totalPermutations = binomcoeff(CONTROL_LEN + TEST_LEN, TEST_LEN)
+
+            if totalPermutations <= 10e3:
+                # use exact calculation
+                p = arange(1, totalPermutations + 1)/totalPermutations
+                x2 = repeat(EXTREME_COUNT, repeats=totalPermutations)
+                Y = binom.cdf(k=x2, n=permutation_count, p=p)
+                self.pvalue = mean(Y)
+            else:
+                # use integral approximation
+                def binomcdf(p, k, n):
+                    return binom.cdf(k, n, p)
+
+                integrationVal, _ = fixed_quad(binomcdf,
+                                            a=0, b=0.5/totalPermutations,
+                                            args=(EXTREME_COUNT, permutation_count),
+                                            n=128)
 
+                self.pvalue = (EXTREME_COUNT + 1)/(permutation_count + 1) - integrationVal
+        else:
+            self.pvalue = EXTREME_COUNT / self.__permutation_count
+            
         self.__permutations = array(self.__permutations)
         self.__permutations_var = array(self.__permutations_var)
 
-        self.pvalue = EXTREME_COUNT / self.__permutation_count
-
-
     def __repr__(self):
         return("{} permutations were taken. The p-value is {}.".format(self.__permutation_count, 
                                                                       self.pvalue))
 
@@ -143,6 +143,7 @@
     "        experiment_label,\n",
     "        x1_level,\n",
     "        mini_meta,\n",
+    "        ps_adjust,\n",
     "    ):\n",
     "        \"\"\"\n",
     "        Parses and stores pandas DataFrames in preparation for estimation\n",
@@ -161,6 +162,7 @@
     "        self.__random_seed = random_seed\n",
     "        self.__is_proportional = proportional\n",
     "        self.__is_mini_meta = mini_meta\n",
+    "        self.__ps_adjust = ps_adjust\n",
     "\n",
     "        # after this call the attributes self.__experiment_label and self.__x1_level are updated\n",
     "        self._check_errors(x, y, idx, experiment, experiment_label, x1_level)\n",
@@ -800,6 +802,7 @@
     "            x1_level=self.__x1_level,\n",
     "            x2=self.__x2,\n",
     "            mini_meta=self.__is_mini_meta,\n",
+    "            ps_adjust=self.__ps_adjust,\n",
     "        )\n",
     "\n",
     "        self.__mean_diff = EffectSizeDataFrame(\n",
 
@@ -92,6 +92,10 @@
     "import lqrt\n",
     "from scipy.stats import norm\n",
     "import numpy as np\n",
+    "from scipy.special import binom as binomcoeff  # devMJBL\n",
+    "from scipy.stats import binom  # devMJBL\n",
+    "from scipy.integrate import fixed_quad  # devMJBL\n",
+    "from numpy import arange, mean  # devMJBL\n",
     "from numpy import array, isnan, isinf, repeat, random, isin, abs, var\n",
     "from numpy import sort as npsort\n",
     "from numpy import nan as npnan\n",
@@ -139,6 +143,10 @@
     "            `random_seed` is used to seed the random number generator during\n",
     "            bootstrap resampling. This ensures that the confidence intervals\n",
     "            reported are replicable.\n",
+    "        ps_adjust : boolean, default False.\n",
+    "            If True, adjust calculated p-value according to Phipson & Smyth (2010)\n",
+    "            # https://doi.org/10.2202/1544-6115.1585\n",
+    "            \n",
     "\n",
     "        Returns\n",
     "        -------\n",
@@ -176,6 +184,7 @@
     "        resamples=5000,\n",
     "        permutation_count=5000,\n",
     "        random_seed=12345,\n",
+    "        ps_adjust=False,\n",
     "    ):\n",
     "        from ._stats_tools import confint_2group_diff as ci2g\n",
     "        from ._stats_tools import effsize as es\n",
@@ -188,13 +197,14 @@
     "            \"hedges_g\": \"Hedges' g\",\n",
     "            \"cliffs_delta\": \"Cliff's delta\",\n",
     "        }\n",
-    "\n",
+    "  \n",
     "        self.__is_paired = is_paired\n",
     "        self.__resamples = resamples\n",
     "        self.__effect_size = effect_size\n",
     "        self.__random_seed = random_seed\n",
     "        self.__ci = ci\n",
     "        self.__is_proportional = proportional\n",
+    "        self.__ps_adjust = ps_adjust\n",
     "        self._check_errors(control, test)\n",
     "\n",
     "        # Convert to numpy arrays for speed.\n",
@@ -418,6 +428,7 @@
     "            self.__effect_size,\n",
     "            self.__is_paired,\n",
     "            self.__permutation_count,\n",
+    "            ps_adjust = self.__ps_adjust,\n",
     "        )\n",
     "\n",
     "        if self.__is_paired and not self.__is_proportional:\n",
@@ -1027,6 +1038,7 @@
     "        delta2=False,\n",
     "        experiment_label=None,\n",
     "        mini_meta=False,\n",
+    "        ps_adjust=False,\n",
     "    ):\n",
     "        \"\"\"\n",
     "        Parses the data from a Dabest object, enabling plotting and printing\n",
@@ -1046,6 +1058,7 @@
     "        self.__x2 = x2\n",
     "        self.__delta2 = delta2\n",
     "        self.__is_mini_meta = mini_meta\n",
+    "        self.__ps_adjust = ps_adjust\n",
     "\n",
     "    def __pre_calc(self):\n",
     "        from .misc_tools import print_greeting, get_varname\n",
@@ -1096,7 +1109,6 @@
     "                    cname = current_tuple[ix]\n",
     "                    control = grouped_data[cname]\n",
     "                test = grouped_data[tname]\n",
-    "\n",
     "                result = TwoGroupsEffectSize(\n",
     "                    control,\n",
     "                    test,\n",
@@ -1107,6 +1119,7 @@
     "                    self.__resamples,\n",
     "                    self.__permutation_count,\n",
     "                    self.__random_seed,\n",
+    "                    self.__ps_adjust\n",
     "                )\n",
     "                r_dict = result.to_dict()\n",
     "                r_dict[\"control\"] = cname\n",
@@ -2138,6 +2151,10 @@
     "        `random_seed` is used to seed the random number generator during\n",
     "        bootstrap resampling. This ensures that the generated permutations\n",
     "        are replicable.\n",
+    "    ps_adjust : bool, default False\n",
+    "        If True, the p-value is adjusted according to Phipson & Smyth (2010).\n",
+    "        # https://doi.org/10.2202/1544-6115.1585\n",
+    "\n",
     "        \n",
     "    Returns\n",
     "    -------\n",
@@ -2156,6 +2173,7 @@
     "                 is_paired:str=None,\n",
     "                 permutation_count:int=5000, # The number of permutations (reshuffles) to perform.\n",
     "                 random_seed:int=12345,#`random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the generated permutations are replicable.\n",
+    "                 ps_adjust:bool=False,\n",
     "                 **kwargs):\n",
     "        from ._stats_tools.effsize import two_group_difference\n",
     "        from ._stats_tools.confint_2group_diff import calculate_group_var\n",
@@ -2180,6 +2198,7 @@
     "\n",
     "        BAG = array([*control, *test])\n",
     "        CONTROL_LEN = int(len(control))\n",
+    "        TEST_LEN = int(len(test)) # devMJBL\n",
     "        EXTREME_COUNT = 0.\n",
     "        THRESHOLD = abs(two_group_difference(control, test, \n",
     "                                                is_paired, effect_size))\n",
@@ -2219,13 +2238,43 @@
     "\n",
     "            if abs(es) > THRESHOLD:\n",
     "                EXTREME_COUNT += 1.\n",
+    "                \n",
+    "        if ps_adjust:\n",
+    "            # devMJBL\n",
+    "            # adjust calculated p-value according to Phipson & Smyth (2010)\n",
+    "            # https://doi.org/10.2202/1544-6115.1585\n",
+    "            # as per R code in statmod::permp\n",
+    "            # https://rdrr.io/cran/statmod/src/R/permp.R\n",
+    "            # (assumes two-sided test)\n",
+    "\n",
+    "            if CONTROL_LEN == TEST_LEN:\n",
+    "                totalPermutations = binomcoeff(CONTROL_LEN + TEST_LEN, TEST_LEN)/2\n",
+    "            else:\n",
+    "                totalPermutations = binomcoeff(CONTROL_LEN + TEST_LEN, TEST_LEN)\n",
+    "\n",
+    "            if totalPermutations <= 10e3:\n",
+    "                # use exact calculation\n",
+    "                p = arange(1, totalPermutations + 1)/totalPermutations\n",
+    "                x2 = repeat(EXTREME_COUNT, repeats=totalPermutations)\n",
+    "                Y = binom.cdf(k=x2, n=permutation_count, p=p)\n",
+    "                self.pvalue = mean(Y)\n",
+    "            else:\n",
+    "                # use integral approximation\n",
+    "                def binomcdf(p, k, n):\n",
+    "                    return binom.cdf(k, n, p)\n",
+    "\n",
+    "                integrationVal, _ = fixed_quad(binomcdf,\n",
+    "                                            a=0, b=0.5/totalPermutations,\n",
+    "                                            args=(EXTREME_COUNT, permutation_count),\n",
+    "                                            n=128)\n",
     "\n",
+    "                self.pvalue = (EXTREME_COUNT + 1)/(permutation_count + 1) - integrationVal\n",
+    "        else:\n",
+    "            self.pvalue = EXTREME_COUNT / self.__permutation_count\n",
+    "            \n",
     "        self.__permutations = array(self.__permutations)\n",
     "        self.__permutations_var = array(self.__permutations_var)\n",
     "\n",
-    "        self.pvalue = EXTREME_COUNT / self.__permutation_count\n",
-    "\n",
-    "\n",
     "    def __repr__(self):\n",
     "        return(\"{} permutations were taken. The p-value is {}.\".format(self.__permutation_count, \n",
     "                                                                      self.pvalue))\n",