From 5e3503467caf65ac7a1fe50505475cb36f98dbc9 Mon Sep 17 00:00:00 2001
From: Matthew Middlehurst <m.middlehurst@uea.ac.uk>
Date: Wed, 13 Nov 2024 09:48:09 +0000
Subject: [PATCH 1/6] release

---
 MANIFEST.in      | 3 ---
 README.md        | 5 +++--
 pyproject.toml   | 4 ++--
 tsml/__init__.py | 2 +-
 4 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/MANIFEST.in b/MANIFEST.in
index b459cfe..28ac35a 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,3 @@
-recursive-include docs *
 recursive-include tsml *.py
 recursive-include tsml/datasets *.ts
 include .coveragerc
@@ -13,5 +12,3 @@ exclude .codecov.yml
 exclude .gitattributes
 exclude .gitignore
 exclude .pre-commit-config.yaml
-exclude .readthedocs.yml
-exclude sweep.yaml
diff --git a/README.md b/README.md
index fbd08e9..7609c25 100644
--- a/README.md
+++ b/README.md
@@ -8,14 +8,15 @@
 
 # tsml-py
 
-A toolkit for in-development time series machine learning algorithms.
+A repository for in-development time series machine learning algorithms and other odd
+bits by Matthew Middlehurst.
 
 Please see [`tsml_eval`](https://github.com/time-series-machine-learning/tsml-eval) and
 [`aeon`](https://github.com/aeon-toolkit/aeon) for more developed and stable packages. This package
 is more of a sandbox for testing out new ideas and algorithms. It may contain some
 algorithms and implementations that are not available in the other toolkits.
 
-The current release of `tsml` is v0.4.0.
+The current release of `tsml` is v0.5.0.
 
 ## Installation
 
diff --git a/pyproject.toml b/pyproject.toml
index 7679260..3cb3db7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "tsml"
-version = "0.4.0"
-description = "A toolkit for time series machine learning algorithms."
+version = "0.5.0"
+description = "A development sandbox for time series machine learning algorithms."
 authors = [
     {name = "Matthew Middlehurst", email = "m.b.middlehurst@soton.ac.uk"},
 ]
diff --git a/tsml/__init__.py b/tsml/__init__.py
index da155f0..90b5545 100644
--- a/tsml/__init__.py
+++ b/tsml/__init__.py
@@ -1,3 +1,3 @@
 """tsml."""
 
-__version__ = "0.4.0"
+__version__ = "0.5.0"

From 697ab0e9a6953c6b6d82c04b471c81a5b73accc9 Mon Sep 17 00:00:00 2001
From: Matthew Middlehurst <m.middlehurst@uea.ac.uk>
Date: Wed, 13 Nov 2024 10:48:41 +0000
Subject: [PATCH 2/6] seql

---
 .github/workflows/release.yml                 |   4 +-
 pyproject.toml                                |   3 +-
 tsml/dictionary_based/__init__.py             |   9 +
 tsml/dictionary_based/_mrseql.py              | 185 ++++++++++++++++++
 .../_mrsqm.py                                 |  11 --
 tsml/shapelet_based/__init__.py               |   2 -
 6 files changed, 198 insertions(+), 16 deletions(-)
 create mode 100644 tsml/dictionary_based/__init__.py
 create mode 100644 tsml/dictionary_based/_mrseql.py
 rename tsml/{shapelet_based => dictionary_based}/_mrsqm.py (94%)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 53dbf44..01621ad 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -79,14 +79,14 @@ jobs:
         with:
           timeout_minutes: 30
           max_attempts: 3
-          command: python -m pip install "${env:WHEELNAME}[dev,all_extras,unstable_extras]"
+          command: python -m pip install "${env:WHEELNAME}[dev,all_extras]"
       - if: matrix.os != 'windows-2022'
         name: Unix install
         uses: nick-fields/retry@v3
         with:
           timeout_minutes: 30
           max_attempts: 3
-          command: python -m pip install "${{ env.WHEELNAME }}[dev,all_extras,unstable_extras]"
+          command: python -m pip install "${{ env.WHEELNAME }}[dev,all_extras]"
 
       - name: Tests
         run: python -m pytest -n logical
diff --git a/pyproject.toml b/pyproject.toml
index 3cb3db7..6a52ed1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -57,7 +57,8 @@ all_extras = [
 unstable_extras = [
     "pycatch22",
     "pyfftw>=0.12.0; python_version < '3.12'",  # requires fftw to be installed for Windows and some other OS (see http://www.fftw.org/index.html)
-    "mrsqm>=0.0.7; platform_system != 'Windows' and python_version < '3.12'",  # requires gcc and fftw to be installed for Windows and some other OS (see http://www.fftw.org/index.html)
+    "mrsqm>=0.0.7; platform_system == 'Linux' and python_version < '3.12'",  # requires gcc and fftw to be installed for Windows and some other OS (see http://www.fftw.org/index.html)
+    "mrseql>=0.0.4,<0.1.0; platform_system == 'Linux' and python_version < '3.12'",  # requires gcc and fftw to be installed for Windows and some other OS (see http://www.fftw.org/index.html)
 ]
 dev = [
     "pre-commit",
diff --git a/tsml/dictionary_based/__init__.py b/tsml/dictionary_based/__init__.py
new file mode 100644
index 0000000..0c2ceaf
--- /dev/null
+++ b/tsml/dictionary_based/__init__.py
@@ -0,0 +1,9 @@
+"""Dictionary-based estimators."""
+
+__all__ = [
+    "MrSEQLClassifier",
+    "MrSQMClassifier",
+]
+
+from tsml.dictionary_based._mrseql import MrSEQLClassifier
+from tsml.dictionary_based._mrsqm import MrSQMClassifier
diff --git a/tsml/dictionary_based/_mrseql.py b/tsml/dictionary_based/_mrseql.py
new file mode 100644
index 0000000..9197b23
--- /dev/null
+++ b/tsml/dictionary_based/_mrseql.py
@@ -0,0 +1,185 @@
+"""Multiple Representations Sequence Learning (MrSEQL) Classifier."""
+
+from typing import List, Union
+
+import numpy as np
+import pandas as pd
+from sklearn.base import ClassifierMixin
+from sklearn.utils.multiclass import check_classification_targets
+from sklearn.utils.validation import check_is_fitted
+
+from tsml.base import BaseTimeSeriesEstimator
+from tsml.utils.validation import _check_optional_dependency
+
+
+class MrSEQLClassifier(ClassifierMixin, BaseTimeSeriesEstimator):
+    """
+    Multiple Representations Sequence Learning (MrSEQL) Classifier.
+
+    This is a wrapper for the MrSEQLClassifier algorithm from the `mrseql` package.
+    MrSEQL is not included in ``all_extras`` as it requires gcc and fftw
+    (http://www.fftw.org/index.html) to be installed for Windows and some Linux OS.
+
+    Overview: MrSEQL extends the symbolic sequence classifier (SEQL) to work with
+    multiple symbolic representations of time series, using features extracted from the
+    SAX and SFA transformations.
+
+    Parameters
+    ----------
+    seql_mode : "clf" or "fs", default="fs".
+        If "fs", trains a logistic regression model with features extracted by SEQL.
+        IF "clf", builds an ensemble of SEQL models
+    symrep : "sax" or "sfa", or ["sax", "sfa"], default = "sax"
+        The symbolic features to extract from the time series.
+    custom_config : dict, default=None
+        Additional configuration for the symbolic transformations. See the original
+        package for details. ``symrep`` will be ignored if used.
+
+    References
+    ----------
+    .. [1] Le Nguyen, Thach, et al. "Interpretable time series classification using
+        linear models and multi-resolution multi-domain symbolic representations."
+        Data mining and knowledge discovery 33 (2019): 1183-1222.
+    """
+
+    def __init__(self, seql_mode="fs", symrep=("sax"), custom_config=None) -> None:
+        self.seql_mode = seql_mode
+        self.symrep = symrep
+        self.custom_config = custom_config
+
+        _check_optional_dependency("mrseql", "mrseql", self)
+
+        super().__init__()
+
+    def fit(self, X: Union[np.ndarray, List[np.ndarray]], y: np.ndarray) -> object:
+        """Fit the estimator to training data.
+
+        Parameters
+        ----------
+        X : 3D np.ndarray of shape (n_instances, n_channels, n_timepoints)
+            The training data.
+        y : 1D np.ndarray of shape (n_instances)
+            The class labels for fitting, indices correspond to instance indices in X
+
+        Returns
+        -------
+        self :
+            Reference to self.
+        """
+        X, y = self._validate_data(X=X, y=y, ensure_min_samples=2)
+        X = self._convert_X(X)
+
+        check_classification_targets(y)
+
+        self.n_instances_, self.n_dims_, self.series_length_ = (
+            X.shape if X.ndim == 3 else (X.shape[0], 1, X.shape[1])
+        )
+        self.classes_ = np.unique(y)
+        self.n_classes_ = self.classes_.shape[0]
+        self.class_dictionary_ = {}
+        for index, class_val in enumerate(self.classes_):
+            self.class_dictionary_[class_val] = index
+
+        if self.n_classes_ == 1:
+            return self
+
+        from mrseql import MrSEQLClassifier
+
+        _X = _convert_data(X)
+
+        self.clf_ = MrSEQLClassifier(
+            seql_mode=self.seql_mode,
+            symrep=self.symrep,
+            custom_config=self.custom_config,
+        )
+        self.clf_.fit(_X, y)
+
+        return self
+
+    def predict(self, X: Union[np.ndarray, List[np.ndarray]]) -> np.ndarray:
+        """Predicts labels for sequences in X.
+
+        Parameters
+        ----------
+        X : 3D np.array of shape (n_instances, n_channels, n_timepoints)
+            The testing data.
+
+        Returns
+        -------
+        y : array-like of shape (n_instances)
+            Predicted class labels.
+        """
+        check_is_fitted(self)
+
+        # treat case of single class seen in fit
+        if self.n_classes_ == 1:
+            return np.repeat(list(self.class_dictionary_.keys()), X.shape[0], axis=0)
+
+        X = self._validate_data(X=X, reset=False)
+        X = self._convert_X(X)
+
+        return self.clf_.predict(_convert_data(X))
+
+    def predict_proba(self, X: Union[np.ndarray, List[np.ndarray]]) -> np.ndarray:
+        """Predicts labels probabilities for sequences in X.
+
+        Parameters
+        ----------
+        X : 3D np.array of shape (n_instances, n_channels, n_timepoints)
+            The testing data.
+
+        Returns
+        -------
+        y : array-like of shape (n_instances, n_classes_)
+            Predicted probabilities using the ordering in classes_.
+        """
+        check_is_fitted(self)
+
+        # treat case of single class seen in fit
+        if self.n_classes_ == 1:
+            return np.repeat([[1]], X.shape[0], axis=0)
+
+        X = self._validate_data(X=X, reset=False)
+        X = self._convert_X(X)
+
+        return self.clf_.predict_proba(_convert_data(X))
+
+    def _more_tags(self) -> dict:
+        return {
+            "non_deterministic": True,
+            "_xfail_checks": {"check_estimators_pickle": "External failure to pickle."},
+            "optional_dependency": True,
+        }
+
+    @classmethod
+    def get_test_params(
+        cls, parameter_set: Union[str, None] = None
+    ) -> Union[dict, List[dict]]:
+        """Return unit test parameter settings for the estimator.
+
+        Parameters
+        ----------
+        parameter_set : None or str, default=None
+            Name of the set of test parameters to return, for use in tests. If no
+            special parameters are defined for a value, will return `"default"` set.
+
+        Returns
+        -------
+        params : dict or list of dict
+            Parameters to create testing instances of the class.
+        """
+        return {}
+
+
+def _convert_data(X):
+    column_list = []
+    for i in range(X.shape[1]):
+        nested_column = (
+            pd.DataFrame(X[:, i, :])
+            .apply(lambda x: [pd.Series(x, dtype=X.dtype)], axis=1)
+            .str[0]
+            .rename(str(i))
+        )
+        column_list.append(nested_column)
+    df = pd.concat(column_list, axis=1)
+    return df
diff --git a/tsml/shapelet_based/_mrsqm.py b/tsml/dictionary_based/_mrsqm.py
similarity index 94%
rename from tsml/shapelet_based/_mrsqm.py
rename to tsml/dictionary_based/_mrsqm.py
index b1367ba..87fe4e4 100644
--- a/tsml/shapelet_based/_mrsqm.py
+++ b/tsml/dictionary_based/_mrsqm.py
@@ -59,17 +59,6 @@ class MrSQMClassifier(ClassifierMixin, BaseTimeSeriesEstimator):
     .. [2] Nguyen, Thach Le, and Georgiana Ifrim. "MrSQM: Fast time series
         classification with symbolic representations." arXiv preprint arXiv:2109.01036
         (2021).
-
-    Examples
-    --------
-    >>> from tsml.shapelet_based import MrSQMClassifier
-    >>> from tsml.utils.testing import generate_3d_test_data
-    >>> X, y = generate_3d_test_data(n_samples=8, series_length=10, random_state=0)
-    >>> clf = MrSQMClassifier(random_state=0)  # doctest: +SKIP
-    >>> clf.fit(X, y)  # doctest: +SKIP
-    MrSQMClassifier(...)
-    >>> clf.predict(X)  # doctest: +SKIP
-    array([0, 1, 1, 0, 0, 1, 0, 1])
     """
 
     def __init__(
diff --git a/tsml/shapelet_based/__init__.py b/tsml/shapelet_based/__init__.py
index dba6f01..b36f609 100644
--- a/tsml/shapelet_based/__init__.py
+++ b/tsml/shapelet_based/__init__.py
@@ -1,12 +1,10 @@
 """Shapelet-based estimators."""
 
 __all__ = [
-    "MrSQMClassifier",
     "RandomShapeletForestClassifier",
     "RandomShapeletForestRegressor",
 ]
 
-from tsml.shapelet_based._mrsqm import MrSQMClassifier
 from tsml.shapelet_based._rsf import (
     RandomShapeletForestClassifier,
     RandomShapeletForestRegressor,

From 9b7bd1bde8bf3d6659f2a80f0205d1ac0fcd13c9 Mon Sep 17 00:00:00 2001
From: MatthewMiddlehurst <pfm15hbu@gmail.com>
Date: Sat, 11 Jan 2025 15:31:10 +0000
Subject: [PATCH 3/6] remove stuff in aeon and eval

---
 pyproject.toml                                |   16 +-
 tsml/distance_based/__init__.py               |    2 -
 tsml/distance_based/_mpdist.py                |  168 --
 tsml/distances/__init__.py                    |    7 -
 tsml/distances/_manhattan.py                  |   66 -
 tsml/interval_based/__init__.py               |   21 -
 tsml/interval_based/_base.py                  | 1086 ------------
 tsml/interval_based/_interval_forest.py       |  454 -----
 tsml/interval_based/_interval_pipelines.py    |  820 ---------
 tsml/interval_based/tests/__init__.py         |    1 -
 .../tests/test_interval_forest.py             |  206 ---
 .../tests/test_interval_pipelines.py          |   38 -
 tsml/transformations/__init__.py              |   16 -
 tsml/transformations/_acf.py                  |  140 --
 tsml/transformations/_ar_coefficient.py       |  119 --
 tsml/transformations/_catch22.py              |  335 ----
 tsml/transformations/_interval_extraction.py  | 1572 -----------------
 tsml/transformations/_periodogram.py          |  121 --
 tsml/transformations/_quantile.py             |   57 -
 tsml/vector/__init__.py                       |   13 -
 tsml/vector/_cit.py                           |  487 -----
 tsml/vector/_rotation_forest.py               |  792 ---------
 tsml/vector/tests/__init__.py                 |    1 -
 tsml/vector/tests/test_rotation_forest.py     |   29 -
 24 files changed, 6 insertions(+), 6561 deletions(-)
 delete mode 100644 tsml/distance_based/_mpdist.py
 delete mode 100644 tsml/distances/__init__.py
 delete mode 100644 tsml/distances/_manhattan.py
 delete mode 100644 tsml/interval_based/__init__.py
 delete mode 100644 tsml/interval_based/_base.py
 delete mode 100644 tsml/interval_based/_interval_forest.py
 delete mode 100644 tsml/interval_based/_interval_pipelines.py
 delete mode 100644 tsml/interval_based/tests/__init__.py
 delete mode 100644 tsml/interval_based/tests/test_interval_forest.py
 delete mode 100644 tsml/interval_based/tests/test_interval_pipelines.py
 delete mode 100644 tsml/transformations/_acf.py
 delete mode 100644 tsml/transformations/_ar_coefficient.py
 delete mode 100644 tsml/transformations/_catch22.py
 delete mode 100644 tsml/transformations/_interval_extraction.py
 delete mode 100644 tsml/transformations/_periodogram.py
 delete mode 100644 tsml/transformations/_quantile.py
 delete mode 100644 tsml/vector/__init__.py
 delete mode 100644 tsml/vector/_cit.py
 delete mode 100644 tsml/vector/_rotation_forest.py
 delete mode 100644 tsml/vector/tests/__init__.py
 delete mode 100644 tsml/vector/tests/test_rotation_forest.py

diff --git a/pyproject.toml b/pyproject.toml
index 6a52ed1..9ade4e7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "tsml"
-version = "0.5.0"
+version = "0.6.0"
 description = "A development sandbox for time series machine learning algorithms."
 authors = [
     {name = "Matthew Middlehurst", email = "m.b.middlehurst@soton.ac.uk"},
@@ -38,11 +38,11 @@ classifiers = [
     "Programming Language :: Python :: 3.12",
 ]
 dependencies = [
-    "numba>=0.55.0,<0.61.0",
-    "numpy>=1.21.0,<2.2.0",
-    "scipy>=1.9.0,<1.14.0",
-    "pandas>=1.5.3,<2.3.0",
-    "scikit-learn>=1.0.0,<1.4.0",
+    "numba>=0.55.0",
+    "numpy>=1.21.0",
+    "scipy>=1.9.0",
+    "pandas>=1.5.3",
+    "scikit-learn>=1.0.0",
     "packaging>=20.0",
 ]
 
@@ -50,13 +50,9 @@ dependencies = [
 all_extras = [
     "grailts",
     "scikit-fda>=0.7.0",
-    "statsmodels>=0.12.1",
-    "stumpy>=1.6.0",
     "wildboar",
 ]
 unstable_extras = [
-    "pycatch22",
-    "pyfftw>=0.12.0; python_version < '3.12'",  # requires fftw to be installed for Windows and some other OS (see http://www.fftw.org/index.html)
     "mrsqm>=0.0.7; platform_system == 'Linux' and python_version < '3.12'",  # requires gcc and fftw to be installed for Windows and some other OS (see http://www.fftw.org/index.html)
     "mrseql>=0.0.4,<0.1.0; platform_system == 'Linux' and python_version < '3.12'",  # requires gcc and fftw to be installed for Windows and some other OS (see http://www.fftw.org/index.html)
 ]
diff --git a/tsml/distance_based/__init__.py b/tsml/distance_based/__init__.py
index be9fde8..1e8b6f7 100644
--- a/tsml/distance_based/__init__.py
+++ b/tsml/distance_based/__init__.py
@@ -2,8 +2,6 @@
 
 __all__ = [
     "GRAILClassifier",
-    "MPDistClassifier",
 ]
 
 from tsml.distance_based._grail import GRAILClassifier
-from tsml.distance_based._mpdist import MPDistClassifier
diff --git a/tsml/distance_based/_mpdist.py b/tsml/distance_based/_mpdist.py
deleted file mode 100644
index b931414..0000000
--- a/tsml/distance_based/_mpdist.py
+++ /dev/null
@@ -1,168 +0,0 @@
-"""Matrix Profile Distance 1-NN Classifier."""
-
-__author__ = ["TonyBagnall", "patrickzib", "MatthewMiddlehurst"]
-__all__ = ["MPDistClassifier"]
-
-from typing import List, Union
-
-import numpy as np
-import stumpy
-from sklearn.base import ClassifierMixin
-from sklearn.metrics import pairwise
-from sklearn.utils.multiclass import check_classification_targets
-from sklearn.utils.validation import check_is_fitted
-
-from tsml.base import BaseTimeSeriesEstimator
-from tsml.utils.validation import check_n_jobs
-
-
-class MPDistClassifier(ClassifierMixin, BaseTimeSeriesEstimator):
-    """Matrix Profile Distance 1-NN Classifier.
-
-    Calculates the matrix profile distance to the training data for each case and
-    returns the label of the nearest neighbour.
-
-    Parameters
-    ----------
-    window : int or float, default=10
-        Window size for the matrix profile. If float, will use a proportion of the
-        series length.
-    n_jobs : int, default=1
-        The number of jobs to run in parallel for both `fit` and `predict`.
-        ``-1`` means using all processors.
-
-    Attributes
-    ----------
-    n_instances_ : int
-        The number of train cases in the training set.
-    n_timepoints_ : int
-        The length of each series in the training set.
-    n_classes_ : int
-        Number of classes. Extracted from the data.
-    classes_ : ndarray of shape (n_classes_)
-        Holds the label for each class.
-    class_dictionary_ : dict
-        A dictionary mapping class labels to class indices in classes_.
-
-    References
-    ----------
-    .. [1] Gharghabi, Shaghayegh, et al. "Matrix profile xii: Mpdist: a novel time
-        series distance measure to allow data mining in more challenging scenarios."
-        2018 IEEE International Conference on Data Mining (ICDM). IEEE, 2018.
-
-    Examples
-    --------
-    >>> from tsml.distance_based import MPDistClassifier
-    >>> from tsml.utils.testing import generate_3d_test_data
-    >>> X, y = generate_3d_test_data(n_samples=8, series_length=10, random_state=0)
-    >>> clf = MPDistClassifier()
-    >>> clf.fit(X, y)
-    MPDistClassifier(...)
-    >>> clf.predict(X)
-    array([0, 1, 1, 0, 0, 1, 0, 1])
-    """
-
-    def __init__(self, window=10, n_jobs=1):
-        self.window = window
-        self.n_jobs = n_jobs
-
-        super().__init__()
-
-    def fit(self, X: Union[np.ndarray, List[np.ndarray]], y: np.ndarray) -> object:
-        """Fit the estimator to training data.
-
-        Parameters
-        ----------
-        X : 2D np.ndarray of shape (n_instances, n_timepoints)
-            The training data.
-        y : 1D np.ndarray of shape (n_instances)
-            The class labels for fitting, indices correspond to instance indices in X
-
-        Returns
-        -------
-        self :
-            Reference to self.
-        """
-        X, y = self._validate_data(X=X, y=y, ensure_min_samples=2)
-        X = self._convert_X(X)
-
-        check_classification_targets(y)
-
-        self.n_instances_, self.n_timepoints_ = X.shape
-        self.classes_ = np.unique(y)
-        self.n_classes_ = self.classes_.shape[0]
-        self.class_dictionary_ = {}
-        for index, class_val in enumerate(self.classes_):
-            self.class_dictionary_[class_val] = index
-
-        if self.n_classes_ == 1:
-            return self
-
-        self._n_jobs = check_n_jobs(self.n_jobs)
-
-        self._X_train = X.astype(np.float64)
-        self._y_train = y
-
-        return self
-
-    def predict(self, X: Union[np.ndarray, List[np.ndarray]]) -> np.ndarray:
-        """Predicts labels for sequences in X.
-
-        Parameters
-        ----------
-        X : 2D np.array of shape (n_instances, n_timepoints)
-            The testing data.
-
-        Returns
-        -------
-        y : array-like of shape (n_instances)
-            Predicted class labels.
-        """
-        check_is_fitted(self)
-
-        # treat case of single class seen in fit
-        if self.n_classes_ == 1:
-            return np.repeat(list(self.class_dictionary_.keys()), X.shape[0], axis=0)
-
-        X = self._validate_data(X=X, reset=False)
-        X = self._convert_X(X)
-
-        window = (
-            self.window if self.window >= 1 else int(self.window * self.n_timepoints_)
-        )
-
-        distance_matrix = pairwise.pairwise_distances(
-            X.astype(np.float64),
-            self._X_train,
-            metric=(lambda x, y: stumpy.mpdist(x, y, window)),
-            n_jobs=self._n_jobs,
-        )
-
-        return self._y_train[np.argmin(distance_matrix, axis=1)]
-
-    def _more_tags(self) -> dict:
-        return {
-            "X_types": ["2darray"],
-            "optional_dependency": True,
-        }
-
-    @classmethod
-    def get_test_params(
-        cls, parameter_set: Union[str, None] = None
-    ) -> Union[dict, List[dict]]:
-        """Return unit test parameter settings for the estimator.
-
-        Parameters
-        ----------
-        parameter_set : None or str, default=None
-            Name of the set of test parameters to return, for use in tests. If no
-            special parameters are defined for a value, will return `"default"` set.
-
-        Returns
-        -------
-        params : dict or list of dict
-            Parameters to create testing instances of the class.
-        """
-        return {
-            "window": 0.8,
-        }
diff --git a/tsml/distances/__init__.py b/tsml/distances/__init__.py
deleted file mode 100644
index 7aeafe0..0000000
--- a/tsml/distances/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-"""Distance functions."""
-
-__all__ = [
-    "manhattan_distance",
-]
-
-from tsml.distances._manhattan import manhattan_distance
diff --git a/tsml/distances/_manhattan.py b/tsml/distances/_manhattan.py
deleted file mode 100644
index 125ae8e..0000000
--- a/tsml/distances/_manhattan.py
+++ /dev/null
@@ -1,66 +0,0 @@
-"""Manhattan distance."""
-
-__author__ = ["chrisholder", "TonyBagnall", "baraline"]
-
-import numpy as np
-from numba import njit
-
-
-@njit(cache=True, fastmath=True)
-def manhattan_distance(x: np.ndarray, y: np.ndarray) -> float:
-    r"""Compute the manhattan distance between two time series.
-
-    The manhattan distance between two time series is defined as:
-    .. math::
-        manhattan(x, y) = \sum_{i=1}^{n} |x_i - y_i|
-
-    Parameters
-    ----------
-    x: np.ndarray, of shape (n_channels, n_timepoints) or (n_timepoints)
-        First time series.
-    y: np.ndarray, of shape (m_channels, m_timepoints) or (m_timepoints)
-        Second time series.
-
-    Returns
-    -------
-    float :
-        manhattan distance between x and y.
-
-    Raises
-    ------
-    ValueError
-        If x and y are not 1D or 2D arrays.
-
-    Examples
-    --------
-    >>> import numpy as np
-    >>> from tsml.distances import manhattan_distance
-    >>> x = np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]])
-    >>> y = np.array([[11, 12, 13, 14, 15, 16, 17, 18, 19, 20]])
-    >>> manhattan_distance(x, y)
-    100.0
-    """
-    if x.ndim == 1 and y.ndim == 1:
-        return _univariate_manhattan_distance(x, y)
-    if x.ndim == 2 and y.ndim == 2:
-        return _manhattan_distance(x, y)
-    raise ValueError("x and y must be 1D or 2D")
-
-
-@njit(cache=True, fastmath=True)
-def _manhattan_distance(x: np.ndarray, y: np.ndarray) -> float:
-    distance = 0.0
-    min_val = min(x.shape[0], y.shape[0])
-    for i in range(min_val):
-        distance += _univariate_manhattan_distance(x[i], y[i])
-    return distance
-
-
-@njit(cache=True, fastmath=True)
-def _univariate_manhattan_distance(x: np.ndarray, y: np.ndarray) -> float:
-    distance = 0.0
-    min_length = min(x.shape[0], y.shape[0])
-    for i in range(min_length):
-        difference = x[i] - y[i]
-        distance += abs(difference)
-    return distance
diff --git a/tsml/interval_based/__init__.py b/tsml/interval_based/__init__.py
deleted file mode 100644
index 3633a1f..0000000
--- a/tsml/interval_based/__init__.py
+++ /dev/null
@@ -1,21 +0,0 @@
-"""Interval-based estimators."""
-
-__all__ = [
-    "BaseIntervalForest",
-    "IntervalForestClassifier",
-    "IntervalForestRegressor",
-    "RandomIntervalClassifier",
-    "RandomIntervalRegressor",
-    "SupervisedIntervalClassifier",
-]
-
-from tsml.interval_based._base import BaseIntervalForest
-from tsml.interval_based._interval_forest import (
-    IntervalForestClassifier,
-    IntervalForestRegressor,
-)
-from tsml.interval_based._interval_pipelines import (
-    RandomIntervalClassifier,
-    RandomIntervalRegressor,
-    SupervisedIntervalClassifier,
-)
diff --git a/tsml/interval_based/_base.py b/tsml/interval_based/_base.py
deleted file mode 100644
index 1af0f90..0000000
--- a/tsml/interval_based/_base.py
+++ /dev/null
@@ -1,1086 +0,0 @@
-"""A base class for interval extracting forest estimators."""
-
-__author__ = ["MatthewMiddlehurst"]
-__all__ = ["BaseIntervalForest"]
-
-import inspect
-import time
-import warnings
-from abc import ABCMeta, abstractmethod
-from typing import List, Union
-
-import numpy as np
-from joblib import Parallel
-from sklearn.base import BaseEstimator, is_classifier, is_regressor
-from sklearn.tree import BaseDecisionTree, DecisionTreeClassifier, DecisionTreeRegressor
-from sklearn.utils import check_random_state
-from sklearn.utils.multiclass import check_classification_targets
-from sklearn.utils.parallel import delayed
-from sklearn.utils.validation import check_is_fitted
-
-from tsml.base import BaseTimeSeriesEstimator, _clone_estimator
-from tsml.transformations._interval_extraction import (
-    RandomIntervalTransformer,
-    SupervisedIntervalTransformer,
-)
-from tsml.utils.numba_functions.stats import row_mean, row_slope, row_std
-from tsml.utils.validation import check_n_jobs, is_transformer
-from tsml.vector import CITClassifier
-
-
-class BaseIntervalForest(BaseTimeSeriesEstimator, metaclass=ABCMeta):
-    """A base class for interval extracting forest estimators.
-
-    Allows the implementation of classifiers and regressors along the lines of [1][2][3]
-    which extract intervals and create an ensemble from the subsequent features.
-
-    Parameters
-    ----------
-    base_estimator : BaseEstimator or None, default=None
-        scikit-learn BaseEstimator used to build the interval ensemble. If None, use a
-        simple decision tree.
-    n_estimators : int, default=200
-        Number of estimators to build for the ensemble.
-    interval_selection_method : "random", "supervised" or "random-supervised",
-            default="random"
-        The interval selection transformer to use.
-            - "random" uses a RandomIntervalTransformer.
-            - "supervised" uses a SupervisedIntervalTransformer.
-            - "random-supervised" uses a SupervisedIntervalTransformer with
-                randomised elements.
-
-        Supervised methods can only be used for classification tasks, and require
-        function inputs for interval_features rather than transformers.
-    n_intervals : int, str, list or tuple, default="sqrt"
-        Number of intervals to extract per tree for each series_transformers series.
-
-        An int input will extract that number of intervals from the series, while a str
-        input will return a function of the series length (may differ per
-        series_transformers output) to extract that number of intervals.
-        Valid str inputs are:
-            - "sqrt": square root of the series length.
-            - "sqrt-div": sqrt of series length divided by the number
-                of series_transformers.
-
-        A list or tuple of ints and/or strs will extract the number of intervals using
-        the above rules and sum the results for the final n_intervals. i.e. [4, "sqrt"]
-        will extract sqrt(n_timepoints) + 4 intervals.
-
-        Different number of intervals for each series_transformers series can be
-        specified using a nested list or tuple. Any list or tuple input containing
-        another list or tuple must be the same length as the number of
-        series_transformers.
-
-        While random interval extraction will extract the n_intervals intervals total
-        (removing duplicates), supervised intervals will run the supervised extraction
-        process n_intervals times, returning more intervals than specified.
-    min_interval_length : int, float, list, or tuple, default=3
-        Minimum length of intervals to extract from series. float inputs take a
-        proportion of the series length to use as the minimum interval length.
-
-        Different minimum interval lengths for each series_transformers series can be
-        specified using a list or tuple. Any list or tuple input must be the same length
-        as the number of series_transformers.
-    max_interval_length : int, float, list, or tuple, default=np.inf
-        Maximum length of intervals to extract from series. float inputs take a
-        proportion of the series length to use as the maximum interval length.
-
-        Different maximum interval lengths for each series_transformers series can be
-        specified using a list or tuple. Any list or tuple input must be the same length
-        as the number of series_transformers.
-
-        Ignored for supervised interval_selection_method inputs.
-    interval_features : TransformerMixin, callable, list, tuple, or None, default=None
-        The features to extract from the intervals using transformers or callable
-        functions. If None, use the mean, standard deviation, and slope of the series.
-
-        Both transformers and functions should be able to take a 2D np.ndarray input.
-        Functions should output a 1d array (the feature for each series), and
-        transformers should output a 2d array where rows are the features for each
-        series. A list or tuple of transformers and/or functions will extract all
-        features and concatenate the output.
-
-        Different features for each series_transformers series can be specified using a
-        nested list or tuple. Any list or tuple input containing another list or tuple
-        must be the same length as the number of series_transformers.
-    series_transformers : TransformerMixin, list, tuple, or None, default=None
-        The transformers to apply to the series before extracting intervals. If None,
-        use the series as is.
-
-        A list or tuple of transformers will extract intervals from
-        all transformations concatenate the output. Including None in the list or tuple
-        will use the series as is for interval extraction.
-    att_subsample_size : int, float, list, tuple or None, default=None
-        The number of attributes to subsample for each estimator. If None, use all
-
-        If int, use that number of attributes for all estimators. If float, use that
-        proportion of attributes for all estimators.
-
-        Different subsample sizes for each series_transformers series can be specified
-        using a list or tuple. Any list or tuple input must be the same length as the
-        number of series_transformers.
-    replace_nan : "nan", int, float or None, default=None
-        The value to replace NaNs and infinite values with before fitting the base
-        estimator. int or float input will replace with the specified value, while
-        "nan" will replace infinite values with NaNs. If None, do not replace NaNs.
-    time_limit_in_minutes : int, default=0
-        Time contract to limit build time in minutes, overriding n_estimators.
-        Default of 0 means n_estimators are used.
-    contract_max_n_estimators : int, default=500
-        Max number of estimators when time_limit_in_minutes is set.
-    save_transformed_data : bool, default=False
-        Save the data transformed in fit.
-    random_state : int, RandomState instance or None, default=None
-        If `int`, random_state is the seed used by the random number generator;
-        If `RandomState` instance, random_state is the random number generator;
-        If `None`, the random number generator is the `RandomState` instance used
-        by `np.random`.
-    n_jobs : int, default=1
-        The number of jobs to run in parallel for both `fit` and `predict`.
-        ``-1`` means using all processors.
-    parallel_backend : str, ParallelBackendBase instance or None, default=None
-        Specify the parallelisation backend implementation in joblib, if None a 'prefer'
-        value of "threads" is used by default.
-        Valid options are "loky", "multiprocessing", "threading" or a custom backend.
-        See the joblib Parallel documentation for more details.
-
-    Attributes
-    ----------
-    n_instances_ : int
-        The number of train cases.
-    n_channels_ : int
-        The number of channels per case.
-    n_timepoints_ : int
-        The length of each series.
-    total_intervals_ : int
-        Total number of intervals per tree from all representations.
-    estimators_ : list of shape (n_estimators) of BaseEstimator
-        The collections of estimators trained in fit.
-    intervals_ : list of shape (n_estimators) of TransformerMixin
-        Stores the interval extraction transformer for all estimators.
-    transformed_data_ : list of shape (n_estimators) of ndarray with shape
-    (n_instances_ ,total_intervals * att_subsample_size)
-        The transformed dataset for all estimators. Only saved when
-        save_transformed_data is true.
-
-    References
-    ----------
-    .. [1] H.Deng, G.Runger, E.Tuv and M.Vladimir, "A time series forest for
-       classification and feature extraction", Information Sciences, 239, 2013
-    .. [2] Matthew Middlehurst and James Large and Anthony Bagnall. "The Canonical
-       Interval Forest (CIF) Classifier for Time Series Classification."
-       IEEE International Conference on Big Data 2020
-    .. [3] Cabello, Nestor, et al. "Fast and Accurate Time Series Classification
-       Through Supervised Interval Search." IEEE ICDM 2020
-    """
-
-    @abstractmethod
-    def __init__(
-        self,
-        base_estimator=None,
-        n_estimators=200,
-        interval_selection_method="random",
-        n_intervals="sqrt",
-        min_interval_length=3,
-        max_interval_length=np.inf,
-        interval_features=None,
-        series_transformers=None,
-        att_subsample_size=None,
-        replace_nan=None,
-        time_limit_in_minutes=None,
-        contract_max_n_estimators=500,
-        save_transformed_data=False,
-        random_state=None,
-        n_jobs=1,
-        parallel_backend=None,
-    ):
-        self.base_estimator = base_estimator
-        self.n_estimators = n_estimators
-        self.interval_selection_method = interval_selection_method
-        self.n_intervals = n_intervals
-        self.min_interval_length = min_interval_length
-        self.max_interval_length = max_interval_length
-        self.interval_features = interval_features
-        self.series_transformers = series_transformers
-        self.att_subsample_size = att_subsample_size
-        self.replace_nan = replace_nan
-        self.time_limit_in_minutes = time_limit_in_minutes
-        self.contract_max_n_estimators = contract_max_n_estimators
-        self.save_transformed_data = save_transformed_data
-        self.random_state = random_state
-        self.n_jobs = n_jobs
-        self.parallel_backend = parallel_backend
-
-        super().__init__()
-
-    # if subsampling attributes, an interval_features transformer must contain a
-    # parameter name from transformer_feature_selection and an attribute name
-    # (or property) from transformer_feature_names to allow features to be subsampled
-    transformer_feature_selection = ["features"]
-    transformer_feature_names = [
-        "features_arguments_",
-        "_features_arguments",
-        "get_features_arguments",
-        "_get_features_arguments",
-    ]
-    # an interval_features transformer must contain one of these attribute names to
-    # be able to skip transforming features in predict
-    transformer_feature_skip = ["transform_features_", "_transform_features"]
-
-    def fit(self, X: Union[np.ndarray, List[np.ndarray]], y: np.ndarray) -> object:
-        """Fit the estimator to training data.
-
-        Parameters
-        ----------
-        X : 3D np.ndarray of shape (n_instances, n_channels, n_timepoints)
-            The training data.
-        y : 1D np.ndarray of shape (n_instances)
-            The target labels for fitting, indices correspond to instance indices in X
-
-        Returns
-        -------
-        self :
-            Reference to self.
-        """
-        X, y = self._validate_data(X=X, y=y, ensure_min_samples=2)
-        X = self._convert_X(X)
-
-        rng = check_random_state(self.random_state)
-
-        self.n_instances_, self.n_channels_, self.n_timepoints_ = X.shape
-        if is_classifier(self):
-            check_classification_targets(y)
-
-            self.classes_ = np.unique(y)
-            self.n_classes_ = self.classes_.shape[0]
-            self.class_dictionary_ = {}
-            for index, class_val in enumerate(self.classes_):
-                self.class_dictionary_[class_val] = index
-
-            if self.n_classes_ == 1:
-                return self
-
-        self._base_estimator = self.base_estimator
-        if self.base_estimator is None:
-            if is_classifier(self):
-                self._base_estimator = DecisionTreeClassifier(criterion="entropy")
-            elif is_regressor(self):
-                self._base_estimator = DecisionTreeRegressor(criterion="absolute_error")
-            else:
-                raise ValueError(
-                    f"{self} must be a scikit-learn compatible classifier or "
-                    "regressor."
-                )
-        # base_estimator must be an sklearn estimator
-        elif not isinstance(self.base_estimator, BaseEstimator):
-            raise ValueError(
-                "base_estimator must be a scikit-learn BaseEstimator or None. "
-                f"Found: {self.base_estimator}"
-            )
-
-        # use the base series if series_transformers is None
-        if self.series_transformers is None or self.series_transformers == []:
-            Xt = [X]
-            self._series_transformers = [None]
-        # clone series_transformers if it is a transformer and transform the input data
-        elif is_transformer(self.series_transformers):
-            t = _clone_estimator(self.series_transformers, random_state=rng)
-            Xt = [t.fit_transform(X, y)]
-            self._series_transformers = [t]
-        # clone each series_transformers transformer and include the base series if None
-        # is in the list
-        elif isinstance(self.series_transformers, (list, tuple)):
-            Xt = []
-            self._series_transformers = []
-
-            for transformer in self.series_transformers:
-                if transformer is None:
-                    Xt.append(X)
-                    self._series_transformers.append(None)
-                elif is_transformer(transformer):
-                    t = _clone_estimator(transformer, random_state=rng)
-                    Xt.append(t.fit_transform(X, y))
-                    self._series_transformers.append(t)
-                else:
-                    raise ValueError(
-                        f"Invalid series_transformers list input. Found {transformer}"
-                    )
-        # other inputs are invalid
-        else:
-            raise ValueError(
-                f"Invalid series_transformers input. Found {self.series_transformers}"
-            )
-
-        # if only a single n_intervals value is passed it must be an int or str
-        if isinstance(self.n_intervals, (int, str)):
-            n_intervals = [[self.n_intervals]] * len(Xt)
-        elif isinstance(self.n_intervals, (list, tuple)):
-            # if input is a list and only contains ints or strs, use the list for all
-            # series in Xt
-            if all(isinstance(item, (int, str)) for item in self.n_intervals):
-                n_intervals = [self.n_intervals] * len(Xt)
-            # other lists must be the same length as Xt
-            elif len(self.n_intervals) != len(Xt):
-                raise ValueError(
-                    "n_intervals as a list or tuple containing other lists or tuples "
-                    "must be the same length as series_transformers."
-                )
-            # list items can be a list of items or a single item for each
-            # series_transformer, but each individual item must be an int or str
-            else:
-                n_intervals = []
-                for items in self.n_intervals:
-                    if isinstance(items, (list, tuple)):
-                        if not all(isinstance(item, (int, str)) for item in items):
-                            raise ValueError(
-                                "Individual items in a n_intervals list or tuple must "
-                                f"be an int or str. Input {items} does not contain "
-                                "only ints or strs"
-                            )
-                        n_intervals.append(items)
-                    elif isinstance(items, (int, str)):
-                        n_intervals.append([items])
-                    else:
-                        raise ValueError(
-                            "Individual items in a n_intervals list or tuple must be "
-                            f"an int or str. Found: {items}"
-                        )
-        # other inputs are invalid
-        else:
-            raise ValueError(f"Invalid n_intervals input. Found {self.n_intervals}")
-
-        # add together the number of intervals for each series_transformer
-        # str input must be one of a set valid options
-        self._n_intervals = [0] * len(Xt)
-        for i, series in enumerate(Xt):
-            for method in n_intervals[i]:
-                if isinstance(method, int):
-                    self._n_intervals[i] += method
-                elif isinstance(method, str):
-                    # sqrt of series length
-                    if method.lower() == "sqrt":
-                        self._n_intervals[i] += int(
-                            np.sqrt(series.shape[2]) * np.sqrt(series.shape[1])
-                        )
-                    # sqrt of series length divided by the number of series_transformers
-                    elif method.lower() == "sqrt-div":
-                        self._n_intervals[i] += int(
-                            (np.sqrt(series.shape[2]) * np.sqrt(series.shape[1]))
-                            / len(Xt)
-                        )
-                    else:
-                        raise ValueError(
-                            "Invalid str input for n_intervals. Must be "
-                            f'("sqrt","sqrt-div"). Found {method}'
-                        )
-
-        # each series_transformer must have at least 1 interval extracted
-        for i, n in enumerate(self._n_intervals):
-            if n <= 0:
-                self._n_intervals[i] = 1
-
-        self.total_intervals_ = sum(self._n_intervals)
-
-        # minimum interval length
-        if isinstance(self.min_interval_length, int):
-            self._min_interval_length = [self.min_interval_length] * len(Xt)
-        # min_interval_length must be at less than one if it is a float (proportion of
-        # of the series length)
-        elif (
-            isinstance(self.min_interval_length, float)
-            and self.min_interval_length <= 1
-        ):
-            self._min_interval_length = [
-                int(self.min_interval_length * t.shape[2]) for t in Xt
-            ]
-        # if the input is a list, it must be the same length as the number of
-        # series_transformers
-        # list values must be ints or floats. The same checks as above are performed
-        elif isinstance(self.min_interval_length, (list, tuple)):
-            if len(self.min_interval_length) != len(Xt):
-                raise ValueError(
-                    "min_interval_length as a list or tuple must be the same length "
-                    "as series_transformers."
-                )
-
-            self._min_interval_length = []
-            for i, length in enumerate(self.min_interval_length):
-                if isinstance(length, float) and length <= 1:
-                    self._min_interval_length.append(int(length * Xt[i].shape[2]))
-                elif isinstance(length, int):
-                    self._min_interval_length.append(length)
-                else:
-                    raise ValueError(
-                        "min_interval_length list items must be int or floats. "
-                        f"Found {length}"
-                    )
-        # other inputs are invalid
-        else:
-            raise ValueError(
-                f"Invalid min_interval_length input. Found {self.min_interval_length}"
-            )
-
-        # min_interval_length cannot be less than 3 or greater than the series length
-        for i, n in enumerate(self._min_interval_length):
-            if n > Xt[i].shape[2]:
-                self._min_interval_length[i] = Xt[i].shape[2]
-            elif n < 3:
-                self._min_interval_length[i] = 3
-
-        # maximum interval length
-        if (
-            isinstance(self.max_interval_length, int)
-            or self.max_interval_length == np.inf
-        ):
-            self._max_interval_length = [self.max_interval_length] * len(Xt)
-        # max_interval_length must be at less than one if it is a float  (proportion of
-        # of the series length)
-        elif (
-            isinstance(self.max_interval_length, float)
-            and self.max_interval_length <= 1
-        ):
-            self._max_interval_length = [
-                int(self.max_interval_length * t.shape[2]) for t in Xt
-            ]
-        # if the input is a list, it must be the same length as the number of
-        # series_transformers
-        # list values must be ints or floats. The same checks as above are performed
-        elif isinstance(self.max_interval_length, (list, tuple)):
-            if len(self.max_interval_length) != len(Xt):
-                raise ValueError(
-                    "max_interval_length as a list or tuple must be the same length "
-                    "as series_transformers."
-                )
-
-            self._max_interval_length = []
-            for i, length in enumerate(self.max_interval_length):
-                if isinstance(length, float) and length <= 1:
-                    self._max_interval_length.append(int(length * Xt[i].shape[2]))
-                elif isinstance(length, int):
-                    self._max_interval_length.append(length)
-                else:
-                    raise ValueError(
-                        "max_interval_length list items must be int or floats. "
-                        f"Found {length}"
-                    )
-        # other inputs are invalid
-        else:
-            raise ValueError(
-                f"Invalid max_interval_length input. Found {self.max_interval_length}"
-            )
-
-        # max_interval_length cannot be less than min_interval_length or greater than
-        # the series length
-        for i, n in enumerate(self._max_interval_length):
-            if n < self._min_interval_length[i]:
-                self._max_interval_length[i] = self._min_interval_length[i]
-            elif n > Xt[i].shape[2]:
-                self._max_interval_length[i] = Xt[i].shape[2]
-
-        # we store whether each series_transformer contains a transformer and/or
-        # function in its interval_features
-        self._interval_transformer = [False] * len(Xt)
-        self._interval_function = [False] * len(Xt)
-        # single transformer or function for all series_transformers
-        if is_transformer(self.interval_features):
-            self._interval_transformer = [True] * len(Xt)
-            transformer = _clone_estimator(self.interval_features, random_state=rng)
-            self._interval_features = [[transformer]] * len(Xt)
-        elif callable(self.interval_features):
-            self._interval_function = [True] * len(Xt)
-            self._interval_features = [[self.interval_features]] * len(Xt)
-        elif isinstance(self.interval_features, (list, tuple)):
-            # if input is a list and only contains transformers or functions, use the
-            # list for all series in Xt
-            if all(
-                is_transformer(item) or callable(item)
-                for item in self.interval_features
-            ):
-                for feature in self.interval_features:
-                    if is_transformer(feature):
-                        self._interval_transformer[0] = True
-                    elif callable(feature):
-                        self._interval_function[0] = True
-                self._interval_features = [self.interval_features] * len(Xt)
-            # other lists must be the same length as Xt
-            elif len(self.interval_features) != len(Xt):
-                raise ValueError(
-                    "interval_features as a list or tuple containing other lists or "
-                    "tuples must be the same length as series_transformers."
-                )
-            # list items can be a list of items or a single item for each
-            # series_transformer, but each individual item must be a transformer
-            # or function
-            else:
-                self._interval_features = []
-                for i, feature in enumerate(self.interval_features):
-                    if isinstance(feature, (list, tuple)):
-                        for method in feature:
-                            if is_transformer(method):
-                                self._interval_transformer[i] = True
-                                feature = _clone_estimator(feature, random_state=rng)
-                            elif callable(method):
-                                self._interval_function[i] = True
-                            else:
-                                raise ValueError(
-                                    "Individual items in a interval_features list or "
-                                    "tuple must be a transformer or function. Input "
-                                    f"{feature} does not contain only transformers and "
-                                    f"functions."
-                                )
-                        self._interval_features.append(feature)
-                    elif is_transformer(feature):
-                        self._interval_transformer[i] = True
-                        feature = _clone_estimator(feature, random_state=rng)
-                        self._interval_features.append([feature])
-                    elif callable(feature):
-                        self._interval_function[i] = True
-                        self._interval_features.append([feature])
-                    else:
-                        raise ValueError(
-                            "Individual items in a interval_features list or tuple "
-                            f"must be a transformer or function. Found {feature}"
-                        )
-        # use basic summary stats by default if None
-        elif self.interval_features is None:
-            self._interval_function = [True] * len(Xt)
-            self._interval_features = [[row_mean, row_std, row_slope]] * len(Xt)
-        # other inputs are invalid
-        else:
-            raise ValueError(
-                f"Invalid interval_features input. Found {self.interval_features}"
-            )
-
-        # att_subsample_size must be at least one if it is an int
-        if isinstance(self.att_subsample_size, int):
-            if self.att_subsample_size < 1:
-                raise ValueError(
-                    "att_subsample_size must be at least one if it is an int."
-                )
-
-            self._att_subsample_size = [self.att_subsample_size] * len(Xt)
-        # att_subsample_size must be at less than one if it is a float (proportion of
-        # total attributed to subsample)
-        elif isinstance(self.att_subsample_size, float):
-            if self.att_subsample_size > 1 or self.att_subsample_size <= 0:
-                raise ValueError(
-                    "att_subsample_size must be between 0 and 1 if it is a float."
-                )
-
-            self._att_subsample_size = [self.att_subsample_size] * len(Xt)
-        # default is no attribute subsampling with None
-        elif self.att_subsample_size is None:
-            self._att_subsample_size = [self.att_subsample_size] * len(Xt)
-        # if the input is a list, it must be the same length as the number of
-        # series_transformers
-        # list values must be ints, floats or None. The same checks as above are
-        # performed
-        elif isinstance(self.att_subsample_size, (list, tuple)):
-            if len(self.att_subsample_size) != len(Xt):
-                raise ValueError(
-                    "att_subsample_size as a list or tuple must be the same length as "
-                    "series_transformers."
-                )
-
-            self._att_subsample_size = []
-            for ssize in self.att_subsample_size:
-                if isinstance(ssize, int):
-                    if ssize < 1:
-                        raise ValueError(
-                            "att_subsample_size in list must be at least one if it is "
-                            "an int."
-                        )
-
-                    self._att_subsample_size.append(ssize)
-                elif isinstance(ssize, float):
-                    if ssize > 1:
-                        raise ValueError(
-                            "att_subsample_size in list must be between 0 and 1 if it "
-                            "is a "
-                            "float."
-                        )
-
-                    self._att_subsample_size.append(ssize)
-                elif ssize is None:
-                    self._att_subsample_size.append(ssize)
-                else:
-                    raise ValueError(
-                        "Invalid interval_features input in list. Found "
-                        f"{self.att_subsample_size}"
-                    )
-        # other inputs are invalid
-        else:
-            raise ValueError(
-                f"Invalid interval_features input. Found {self.att_subsample_size}"
-            )
-
-        # if we are subsampling attributes for a series_transformer and it uses a
-        # BaseTransformer, we must ensure it has the required parameters and
-        # attributes to do so
-        self._transformer_feature_selection = [[]] * len(Xt)
-        self._transformer_feature_names = [[]] * len(Xt)
-        for r, att_subsample in enumerate(self._att_subsample_size):
-            if att_subsample is not None:
-                for transformer in self._interval_features[r]:
-                    if is_transformer(transformer):
-                        params = inspect.signature(transformer.__init__).parameters
-
-                        # the transformer must have a parameter with one of the
-                        # names listed in transformer_feature_selection as a way to
-                        # select which features the transformer should transform
-                        has_params = False
-                        for n in self.transformer_feature_selection:
-                            if params.get(n, None) is not None:
-                                has_params = True
-                                self._transformer_feature_selection[r].append(n)
-                                break
-
-                        if not has_params:
-                            raise ValueError(
-                                "All transformers in interval_features must have a "
-                                "parameter named in transformer_feature_selection to "
-                                "be used in attribute subsampling."
-                            )
-
-                        # the transformer must have an attribute with one of the
-                        # names listed in transformer_feature_names as a list or tuple
-                        # of valid options for the previous parameter
-                        has_feature_names = False
-                        for n in self.transformer_feature_names:
-                            if hasattr(transformer, n) and isinstance(
-                                getattr(transformer, n), (list, tuple)
-                            ):
-                                has_feature_names = True
-                                self._transformer_feature_names[r].append(n)
-                                break
-
-                        if not has_feature_names:
-                            raise ValueError(
-                                "All transformers in interval_features must have an "
-                                "attribute or propertynamed in "
-                                "transformer_feature_names to be used in attribute "
-                                "subsampling."
-                            )
-
-        # verify the interval_selection_method is a valid string
-        if isinstance(self.interval_selection_method, str):
-            # SupervisedIntervals cannot currently handle transformers or regression
-            if (
-                self.interval_selection_method.lower() == "supervised"
-                or self.interval_selection_method.lower() == "random-supervised"
-            ):
-                if any(self._interval_transformer):
-                    raise ValueError(
-                        "Supervised interval_selection_method must only have function "
-                        "inputs for interval_features."
-                    )
-
-                if is_regressor(self):
-                    raise ValueError(
-                        "Supervised interval_selection_method cannot be used for "
-                        "regression."
-                    )
-            # RandomIntervals
-            elif not self.interval_selection_method.lower() == "random":
-                raise ValueError(
-                    'Unknown interval_selection_method, must be one of ("random",'
-                    '"supervised","random-supervised"). '
-                    f"Found: {self.interval_selection_method}"
-                )
-        # other inputs are invalid
-        else:
-            raise ValueError(
-                'Unknown interval_selection_method, must be one of ("random",'
-                '"supervised","random-supervised"). '
-                f"Found: {self.interval_selection_method}"
-            )
-
-        # verify replace_nan is a valid string, number or None
-        if (
-            (not isinstance(self.replace_nan, str) or self.replace_nan.lower() != "nan")
-            and not isinstance(self.replace_nan, (int, float))
-            and self.replace_nan is not None
-        ):
-            raise ValueError(f"Invalid replace_nan input. Found {self.replace_nan}")
-
-        self._n_jobs = check_n_jobs(self.n_jobs)
-
-        if self.time_limit_in_minutes is not None and self.time_limit_in_minutes > 0:
-            time_limit = self.time_limit_in_minutes * 60
-            start_time = time.time()
-            train_time = 0
-
-            self._n_estimators = 0
-            self.estimators_ = []
-            self.intervals_ = []
-            self.transformed_data_ = []
-
-            while (
-                train_time < time_limit
-                and self._n_estimators < self.contract_max_n_estimators
-            ):
-                fit = Parallel(
-                    n_jobs=self._n_jobs,
-                    backend=self.parallel_backend,
-                    prefer="threads",
-                )(
-                    delayed(self._fit_estimator)(
-                        Xt,
-                        y,
-                        rng.randint(np.iinfo(np.int32).max),
-                    )
-                    for _ in range(self._n_jobs)
-                )
-
-                (
-                    estimators,
-                    intervals,
-                    transformed_data,
-                ) = zip(*fit)
-
-                self.estimators_ += estimators
-                self.intervals_ += intervals
-                self.transformed_data_ += transformed_data
-
-                self._n_estimators += self._n_jobs
-                train_time = time.time() - start_time
-        else:
-            self._n_estimators = self.n_estimators
-
-            fit = Parallel(
-                n_jobs=self._n_jobs,
-                backend=self.parallel_backend,
-                prefer="threads",
-            )(
-                delayed(self._fit_estimator)(
-                    Xt,
-                    y,
-                    rng.randint(np.iinfo(np.int32).max),
-                )
-                for _ in range(self._n_estimators)
-            )
-
-            (
-                self.estimators_,
-                self.intervals_,
-                self.transformed_data_,
-            ) = zip(*fit)
-
-        return self
-
-    def predict(self, X: Union[np.ndarray, List[np.ndarray]]) -> np.ndarray:
-        """Predicts labels for sequences in X.
-
-        Parameters
-        ----------
-        X : 3D np.ndarray of shape (n_instances, n_channels, n_timepoints)
-            The testing data.
-
-        Returns
-        -------
-        y : array-like of shape (n_instances)
-            Predicted target labels.
-        """
-        if is_regressor(self):
-            check_is_fitted(self)
-
-            Xt = self._predict_setup(X)
-
-            y_preds = Parallel(
-                n_jobs=self._n_jobs,
-                backend=self.parallel_backend,
-                prefer="threads",
-            )(
-                delayed(self._predict_for_estimator)(
-                    Xt,
-                    self.estimators_[i],
-                    self.intervals_[i],
-                    predict_proba=False,
-                )
-                for i in range(self._n_estimators)
-            )
-
-            return np.mean(y_preds, axis=0)
-        else:
-            check_is_fitted(self)
-
-            # treat case of single class seen in fit
-            if self.n_classes_ == 1:
-                return np.repeat(
-                    list(self.class_dictionary_.keys()), X.shape[0], axis=0
-                )
-
-            return np.array(
-                [self.classes_[int(np.argmax(prob))] for prob in self._predict_proba(X)]
-            )
-
-    def _predict_proba(self, X):
-        check_is_fitted(self)
-
-        # treat case of single class seen in fit
-        if self.n_classes_ == 1:
-            return np.repeat([[1]], X.shape[0], axis=0)
-
-        Xt = self._predict_setup(X)
-
-        y_probas = Parallel(
-            n_jobs=self._n_jobs, backend=self.parallel_backend, prefer="threads"
-        )(
-            delayed(self._predict_for_estimator)(
-                Xt,
-                self.estimators_[i],
-                self.intervals_[i],
-                predict_proba=True,
-            )
-            for i in range(self._n_estimators)
-        )
-
-        output = np.sum(y_probas, axis=0) / (
-            np.ones(self.n_classes_) * self._n_estimators
-        )
-        return output
-
-    def _fit_estimator(self, Xt, y, seed):
-        # random state for this estimator
-        rng = check_random_state(seed)
-
-        intervals = []
-        transform_data_lengths = []
-        interval_features = np.empty((self.n_instances_, 0))
-
-        # for each transformed series
-        for r in range(len(Xt)):
-            # subsample attributes if enabled
-            if self._att_subsample_size[r] is not None:
-                # separate transformers and functions in separate lists
-                # add the feature names of transformers to a list to subsample from
-                # and calculate the total number of features
-                all_transformers = []
-                all_transformer_features = []
-                all_function_features = []
-                for feature in self._interval_features[r]:
-                    if is_transformer(feature):
-                        all_transformer_features += getattr(
-                            feature,
-                            self._transformer_feature_names[r][len(all_transformers)],
-                        )
-                        all_transformers.append(feature)
-                    else:
-                        all_function_features.append(feature)
-
-                # handle float subsample size
-                num_features = len(all_transformer_features) + len(
-                    all_function_features
-                )
-                att_subsample_size = self._att_subsample_size[r]
-                if isinstance(self._att_subsample_size[r], float):
-                    att_subsample_size = int(att_subsample_size * num_features)
-
-                # if the att_subsample_size is greater than the number of features
-                # give a warning and add all features
-                features = []
-                if att_subsample_size < num_features:
-                    # subsample the transformer and function features by index
-                    atts = rng.choice(
-                        num_features,
-                        att_subsample_size,
-                        replace=False,
-                    )
-                    atts.sort()
-
-                    # subsample the feature transformers using the
-                    # transformer_feature_names and transformer_feature_selection
-                    # attributes.
-                    # the presence of valid attributes is verified in fit.
-                    count = 0
-                    length = 0
-                    for n, transformer in enumerate(all_transformers):
-                        this_len = len(
-                            getattr(transformer, self._transformer_feature_names[r][n])
-                        )
-                        length += this_len
-
-                        # subsample feature names from this transformer
-                        t_features = []
-                        while count < len(atts) and atts[count] < length:
-                            t_features.append(
-                                getattr(
-                                    transformer,
-                                    self._transformer_feature_names[r][n],
-                                )[atts[count] + this_len - length]
-                            )
-                            count += 1
-
-                        # tell this transformer to only transform the selected features
-                        if len(t_features) > 0:
-                            new_transformer = _clone_estimator(transformer, seed)
-                            setattr(
-                                new_transformer,
-                                self._transformer_feature_selection[r][n],
-                                t_features,
-                            )
-                            features.append(new_transformer)
-
-                    # subsample the remaining function features
-                    for i in range(att_subsample_size - count):
-                        features.append(all_function_features[atts[count + i] - length])
-                else:
-                    warnings.warn(
-                        f"Attribute subsample size {att_subsample_size} is larger than "
-                        f"or equal to the number of attributes {num_features} for "
-                        f"series {self._series_transformers[r]}",
-                        stacklevel=2,
-                    )
-                    for feature in self._interval_features[r]:
-                        if is_transformer(feature):
-                            features.append(_clone_estimator(feature, seed))
-                        else:
-                            features.append(feature)
-            # add all features while cloning estimators if not subsampling
-            else:
-                features = []
-                for feature in self._interval_features[r]:
-                    if is_transformer(feature):
-                        features.append(_clone_estimator(feature, seed))
-                    else:
-                        features.append(feature)
-
-            # create the selected interval selector and set its parameters
-            if self.interval_selection_method == "random":
-                selector = RandomIntervalTransformer(
-                    n_intervals=self._n_intervals[r],
-                    min_interval_length=self._min_interval_length[r],
-                    max_interval_length=self._max_interval_length[r],
-                    features=features,
-                    random_state=seed,
-                )
-            elif self.interval_selection_method == "supervised":
-                selector = SupervisedIntervalTransformer(
-                    n_intervals=self._n_intervals[r],
-                    min_interval_length=self._min_interval_length[r],
-                    features=features,
-                    randomised_split_point=False,
-                    random_state=seed,
-                )
-            elif self.interval_selection_method == "random-supervised":
-                selector = SupervisedIntervalTransformer(
-                    n_intervals=self._n_intervals[r],
-                    min_interval_length=self._min_interval_length[r],
-                    features=features,
-                    randomised_split_point=True,
-                    random_state=seed,
-                )
-
-            # fit the interval selector, transform the current series using it and save
-            # the transformer
-            intervals.append(selector)
-            f = intervals[r].fit_transform(Xt[r], y)
-
-            # concatenate the data and save this transforms number of attributes
-            transform_data_lengths.append(f.shape[1])
-            interval_features = np.hstack((interval_features, f))
-
-        if isinstance(self.replace_nan, str) and self.replace_nan.lower() == "nan":
-            interval_features = np.nan_to_num(
-                interval_features, False, np.nan, np.nan, np.nan
-            )
-        elif isinstance(self.replace_nan, (int, float)):
-            interval_features = np.nan_to_num(
-                interval_features,
-                False,
-                self.replace_nan,
-                self.replace_nan,
-                self.replace_nan,
-            )
-
-        # clone and fit the base estimator using the transformed data
-        tree = _clone_estimator(self._base_estimator, random_state=seed)
-        tree.fit(interval_features, y)
-
-        # find the features used in the tree and inform the interval selectors to not
-        # transform these features if possible
-        self._efficient_predictions = True
-        relevant_features = None
-        if isinstance(tree, BaseDecisionTree):
-            relevant_features = np.unique(tree.tree_.feature[tree.tree_.feature >= 0])
-        elif isinstance(tree, CITClassifier):
-            relevant_features, _ = tree.tree_node_splits_and_gain()
-
-        if relevant_features is not None:
-            features_to_transform = [False] * interval_features.shape[1]
-            for i in relevant_features:
-                features_to_transform[i] = True
-
-            count = 0
-            for r in range(len(Xt)):
-                intervals[r].transformer_feature_skip = self.transformer_feature_skip
-
-                # if the transformers don't have valid attributes to skip False is
-                # returned
-                completed = intervals[r].set_features_to_transform(
-                    features_to_transform[count : count + transform_data_lengths[r]],
-                    raise_error=False,
-                )
-                count += transform_data_lengths[r]
-
-                if not completed:
-                    self._efficient_predictions = False
-        else:
-            self._efficient_predictions = False
-
-        return [
-            tree,
-            intervals,
-            interval_features if self.save_transformed_data else None,
-        ]
-
-    def _predict_setup(self, X):
-        X = self._validate_data(X=X, reset=False)
-        X = self._convert_X(X)
-
-        n_instances, n_channels, n_timepoints = X.shape
-
-        if n_channels != self.n_channels_:
-            raise ValueError(
-                "The number of channels in the train data does not match the number "
-                "of channels in the test data"
-            )
-        if n_timepoints != self.n_timepoints_:
-            raise ValueError(
-                "The series length of the train data does not match the series length "
-                "of the test data"
-            )
-
-        Xt = []
-        for transformer in self._series_transformers:
-            if transformer is None:
-                Xt.append(X)
-            elif is_transformer(transformer):
-                Xt.append(transformer.transform(X))
-
-        return Xt
-
-    def _predict_for_estimator(self, Xt, estimator, intervals, predict_proba=False):
-        interval_features = np.empty((Xt[0].shape[0], 0))
-
-        for r in range(len(Xt)):
-            f = intervals[r].transform(Xt[r])
-            interval_features = np.hstack((interval_features, f))
-
-        if isinstance(self.replace_nan, str) and self.replace_nan.lower() == "nan":
-            interval_features = np.nan_to_num(
-                interval_features, False, np.nan, np.nan, np.nan
-            )
-        elif isinstance(self.replace_nan, (int, float)):
-            interval_features = np.nan_to_num(
-                interval_features,
-                False,
-                self.replace_nan,
-                self.replace_nan,
-                self.replace_nan,
-            )
-
-        if predict_proba:
-            return estimator.predict_proba(interval_features)
-        else:
-            return estimator.predict(interval_features)
diff --git a/tsml/interval_based/_interval_forest.py b/tsml/interval_based/_interval_forest.py
deleted file mode 100644
index 32d3c7f..0000000
--- a/tsml/interval_based/_interval_forest.py
+++ /dev/null
@@ -1,454 +0,0 @@
-"""Configurable interval forest estimators."""
-
-__author__ = ["MatthewMiddlehurst"]
-__all__ = ["IntervalForestClassifier", "IntervalForestRegressor"]
-
-from typing import List, Union
-
-import numpy as np
-from sklearn.base import ClassifierMixin, RegressorMixin
-
-from tsml.interval_based._base import BaseIntervalForest
-
-
-class IntervalForestClassifier(ClassifierMixin, BaseIntervalForest):
-    """Configurable interval extracting forest classifier.
-
-    Extracts multiple phase-dependent intervals from time series data and builds a
-    base classifier on summary statistic extracted from each interval. Forms and
-    ensemble of these classifiers.
-
-    Allows the implementation of classifiers along the lines of [1][2][3]
-    which extract intervals and create an ensemble from the subsequent features.
-
-    By default, uses a configuration similar to TimeSeriesFroest [1].
-
-    Parameters
-    ----------
-    base_estimator : BaseEstimator or None, default=None
-        scikit-learn BaseEstimator used to build the interval ensemble. If None, use a
-        simple decision tree.
-    n_estimators : int, default=200
-        Number of estimators to build for the ensemble.
-    interval_selection_method : "random", "supervised" or "random-supervised",
-            default="random"
-        The interval selection transformer to use.
-            - "random" uses a RandomIntervalTransformer.
-            - "supervised" uses a SupervisedIntervalTransformer.
-            - "random-supervised" uses a SupervisedIntervalTransformer with
-                randomised elements.
-    n_intervals : int, str, list or tuple, default="sqrt"
-        Number of intervals to extract per tree for each series_transformers series.
-
-        An int input will extract that number of intervals from the series, while a str
-        input will return a function of the series length (may differ per
-        series_transformers output) to extract that number of intervals.
-        Valid str inputs are:
-            - "sqrt": square root of the series length.
-            - "sqrt-div": sqrt of series length divided by the number
-                of series_transformers.
-
-        A list or tuple of ints and/or strs will extract the number of intervals using
-        the above rules and sum the results for the final n_intervals. i.e. [4, "sqrt"]
-        will extract sqrt(n_timepoints) + 4 intervals.
-
-        Different number of intervals for each series_transformers series can be
-        specified using a nested list or tuple. Any list or tuple input containing
-        another list or tuple must be the same length as the number of
-        series_transformers.
-
-        While random interval extraction will extract the n_intervals intervals total
-        (removing duplicates), supervised intervals will run the supervised extraction
-        process n_intervals times, returning more intervals than specified.
-    min_interval_length : int, float, list, or tuple, default=3
-        Minimum length of intervals to extract from series. float inputs take a
-        proportion of the series length to use as the minimum interval length.
-
-        Different minimum interval lengths for each series_transformers series can be
-        specified using a list or tuple. Any list or tuple input must be the same length
-        as the number of series_transformers.
-    max_interval_length : int, float, list, or tuple, default=np.inf
-        Maximum length of intervals to extract from series. float inputs take a
-        proportion of the series length to use as the maximum interval length.
-
-        Different maximum interval lengths for each series_transformers series can be
-        specified using a list or tuple. Any list or tuple input must be the same length
-        as the number of series_transformers.
-
-        Ignored for supervised interval_selection_method inputs.
-    interval_features : TransformerMixin, callable, list, tuple, or None, default=None
-        The features to extract from the intervals using transformers or callable
-        functions. If None, use the mean, standard deviation, and slope of the series.
-
-        Both transformers and functions should be able to take a 2D np.ndarray input.
-        Functions should output a 1d array (the feature for each series), and
-        transformers should output a 2d array where rows are the features for each
-        series. A list or tuple of transformers and/or functions will extract all
-        features and concatenate the output.
-
-        Different features for each series_transformers series can be specified using a
-        nested list or tuple. Any list or tuple input containing another list or tuple
-        must be the same length as the number of series_transformers.
-    series_transformers : TransformerMixin, list, tuple, or None, default=None
-        The transformers to apply to the series before extracting intervals. If None,
-        use the series as is.
-
-        A list or tuple of transformers will extract intervals from
-        all transformations concatenate the output. Including None in the list or tuple
-        will use the series as is for interval extraction.
-    att_subsample_size : int, float, list, tuple or None, default=None
-        The number of attributes to subsample for each estimator. If None, use all
-
-        If int, use that number of attributes for all estimators. If float, use that
-        proportion of attributes for all estimators.
-
-        Different subsample sizes for each series_transformers series can be specified
-        using a list or tuple. Any list or tuple input must be the same length as the
-        number of series_transformers.
-    replace_nan : "nan", int, float or None, default=None
-        The value to replace NaNs and infinite values with before fitting the base
-        estimator. int or float input will replace with the specified value, while
-        "nan" will replace infinite values with NaNs. If None, do not replace NaNs.
-    time_limit_in_minutes : int, default=0
-        Time contract to limit build time in minutes, overriding n_estimators.
-        Default of 0 means n_estimators are used.
-    contract_max_n_estimators : int, default=500
-        Max number of estimators when time_limit_in_minutes is set.
-    save_transformed_data : bool, default=False
-        Save the data transformed in fit.
-    random_state : int, RandomState instance or None, default=None
-        If `int`, random_state is the seed used by the random number generator;
-        If `RandomState` instance, random_state is the random number generator;
-        If `None`, the random number generator is the `RandomState` instance used
-        by `np.random`.
-    n_jobs : int, default=1
-        The number of jobs to run in parallel for both `fit` and `predict`.
-        ``-1`` means using all processors.
-    parallel_backend : str, ParallelBackendBase instance or None, default=None
-        Specify the parallelisation backend implementation in joblib, if None a 'prefer'
-        value of "threads" is used by default.
-        Valid options are "loky", "multiprocessing", "threading" or a custom backend.
-        See the joblib Parallel documentation for more details.
-
-    Attributes
-    ----------
-    n_instances_ : int
-        The number of train cases in the training set.
-    n_channels_ : int
-        The number of dimensions per case in the training set.
-    n_timepoints_ : int
-        The length of each series in the training set.
-    n_classes_ : int
-        Number of classes. Extracted from the data.
-    classes_ : ndarray of shape (n_classes_)
-        Holds the label for each class.
-    class_dictionary_ : dict
-        A dictionary mapping class labels to class indices in classes_.
-    total_intervals_ : int
-        Total number of intervals per tree from all representations.
-    estimators_ : list of shape (n_estimators) of BaseEstimator
-        The collections of estimators trained in fit.
-    intervals_ : list of shape (n_estimators) of TransformerMixin
-        Stores the interval extraction transformer for all estimators.
-    transformed_data_ : list of shape (n_estimators) of ndarray with shape
-    (n_instances_ ,total_intervals * att_subsample_size)
-        The transformed dataset for all estimators. Only saved when
-        save_transformed_data is true.
-
-    References
-    ----------
-    .. [1] H.Deng, G.Runger, E.Tuv and M.Vladimir, "A time series forest for
-       classification and feature extraction", Information Sciences, 239, 2013
-    .. [2] Matthew Middlehurst and James Large and Anthony Bagnall. "The Canonical
-       Interval Forest (CIF) Classifier for Time Series Classification."
-       IEEE International Conference on Big Data 2020
-    .. [3] Cabello, Nestor, et al. "Fast and Accurate Time Series Classification
-       Through Supervised Interval Search." IEEE ICDM 2020
-
-    Examples
-    --------
-    >>> from tsml.interval_based import IntervalForestClassifier
-    >>> from tsml.utils.testing import generate_3d_test_data
-    >>> X, y = generate_3d_test_data(n_samples=10, series_length=12, random_state=0)
-    >>> clf = IntervalForestClassifier(n_estimators=10, random_state=0)
-    >>> clf.fit(X, y)
-    IntervalForestClassifier(...)
-    >>> clf.predict(X)
-    array([0, 1, 0, 1, 0, 0, 1, 1, 1, 0])
-    """
-
-    def __init__(
-        self,
-        base_estimator=None,
-        n_estimators=200,
-        interval_selection_method="random",
-        n_intervals="sqrt",
-        min_interval_length=3,
-        max_interval_length=np.inf,
-        interval_features=None,
-        series_transformers=None,
-        att_subsample_size=None,
-        replace_nan=None,
-        time_limit_in_minutes=None,
-        contract_max_n_estimators=500,
-        save_transformed_data=False,
-        random_state=None,
-        n_jobs=1,
-        parallel_backend=None,
-    ):
-        super().__init__(
-            base_estimator=base_estimator,
-            n_estimators=n_estimators,
-            interval_selection_method=interval_selection_method,
-            n_intervals=n_intervals,
-            min_interval_length=min_interval_length,
-            max_interval_length=max_interval_length,
-            interval_features=interval_features,
-            series_transformers=series_transformers,
-            att_subsample_size=att_subsample_size,
-            replace_nan=replace_nan,
-            time_limit_in_minutes=time_limit_in_minutes,
-            contract_max_n_estimators=contract_max_n_estimators,
-            save_transformed_data=save_transformed_data,
-            random_state=random_state,
-            n_jobs=n_jobs,
-            parallel_backend=parallel_backend,
-        )
-
-    def predict_proba(self, X):
-        return self._predict_proba(X)
-
-    @classmethod
-    def get_test_params(
-        cls, parameter_set: Union[str, None] = None
-    ) -> Union[dict, List[dict]]:
-        """Return unit test parameter settings for the estimator.
-
-        Parameters
-        ----------
-        parameter_set : None or str, default=None
-            Name of the set of test parameters to return, for use in tests. If no
-            special parameters are defined for a value, will return `"default"` set.
-
-        Returns
-        -------
-        params : dict or list of dict
-            Parameters to create testing instances of the class.
-        """
-        return {
-            "n_estimators": 2,
-            "n_intervals": 2,
-        }
-
-
-class IntervalForestRegressor(RegressorMixin, BaseIntervalForest):
-    """Configurable interval extracting forest regressor.
-
-    Extracts multiple phase-dependent intervals from time series data and builds a
-    base regressor on summary statistic extracted from each interval. Forms and
-    ensemble of these regressors.
-
-    Allows the implementation of regressors along the lines of [1][2][3]
-    which extract intervals and create an ensemble from the subsequent features.
-
-    By default, uses a configuration similar to TimeSeriesFroest [1].
-
-    Parameters
-    ----------
-    base_estimator : BaseEstimator or None, default=None
-        scikit-learn BaseEstimator used to build the interval ensemble. If None, use a
-        simple decision tree.
-    n_estimators : int, default=200
-        Number of estimators to build for the ensemble.
-    interval_selection_method : "random", default="random"
-        The interval selection transformer to use.
-            - "random" uses a RandomIntervalTransformer.
-    n_intervals : int, str, list or tuple, default="sqrt"
-        Number of intervals to extract per tree for each series_transformers series.
-
-        An int input will extract that number of intervals from the series, while a str
-        input will return a function of the series length (may differ per
-        series_transformers output) to extract that number of intervals.
-        Valid str inputs are:
-            - "sqrt": square root of the series length.
-            - "sqrt-div": sqrt of series length divided by the number
-                of series_transformers.
-
-        A list or tuple of ints and/or strs will extract the number of intervals using
-        the above rules and sum the results for the final n_intervals. i.e. [4, "sqrt"]
-        will extract sqrt(n_timepoints) + 4 intervals.
-
-        Different number of intervals for each series_transformers series can be
-        specified using a nested list or tuple. Any list or tuple input containing
-        another list or tuple must be the same length as the number of
-        series_transformers.
-    min_interval_length : int, float, list, or tuple, default=3
-        Minimum length of intervals to extract from series. float inputs take a
-        proportion of the series length to use as the minimum interval length.
-
-        Different minimum interval lengths for each series_transformers series can be
-        specified using a list or tuple. Any list or tuple input must be the same length
-        as the number of series_transformers.
-    max_interval_length : int, float, list, or tuple, default=np.inf
-        Maximum length of intervals to extract from series. float inputs take a
-        proportion of the series length to use as the maximum interval length.
-
-        Different maximum interval lengths for each series_transformers series can be
-        specified using a list or tuple. Any list or tuple input must be the same length
-        as the number of series_transformers.
-    interval_features : TransformerMixin, callable, list, tuple, or None, default=None
-        The features to extract from the intervals using transformers or callable
-        functions. If None, use the mean, standard deviation, and slope of the series.
-
-        Both transformers and functions should be able to take a 2D np.ndarray input.
-        Functions should output a 1d array (the feature for each series), and
-        transformers should output a 2d array where rows are the features for each
-        series. A list or tuple of transformers and/or functions will extract all
-        features and concatenate the output.
-
-        Different features for each series_transformers series can be specified using a
-        nested list or tuple. Any list or tuple input containing another list or tuple
-        must be the same length as the number of series_transformers.
-    series_transformers : TransformerMixin, list, tuple, or None, default=None
-        The transformers to apply to the series before extracting intervals. If None,
-        use the series as is.
-
-        A list or tuple of transformers will extract intervals from
-        all transformations concatenate the output. Including None in the list or tuple
-        will use the series as is for interval extraction.
-    att_subsample_size : int, float, list, tuple or None, default=None
-        The number of attributes to subsample for each estimator. If None, use all
-
-        If int, use that number of attributes for all estimators. If float, use that
-        proportion of attributes for all estimators.
-
-        Different subsample sizes for each series_transformers series can be specified
-        using a list or tuple. Any list or tuple input must be the same length as the
-        number of series_transformers.
-    replace_nan : "nan", int, float or None, default=None
-        The value to replace NaNs and infinite values with before fitting the base
-        estimator. int or float input will replace with the specified value, while
-        "nan" will replace infinite values with NaNs. If None, do not replace NaNs.
-    time_limit_in_minutes : int, default=0
-        Time contract to limit build time in minutes, overriding n_estimators.
-        Default of 0 means n_estimators are used.
-    contract_max_n_estimators : int, default=500
-        Max number of estimators when time_limit_in_minutes is set.
-    save_transformed_data : bool, default=False
-        Save the data transformed in fit.
-    random_state : int, RandomState instance or None, default=None
-        If `int`, random_state is the seed used by the random number generator;
-        If `RandomState` instance, random_state is the random number generator;
-        If `None`, the random number generator is the `RandomState` instance used
-        by `np.random`.
-    n_jobs : int, default=1
-        The number of jobs to run in parallel for both `fit` and `predict`.
-        ``-1`` means using all processors.
-    parallel_backend : str, ParallelBackendBase instance or None, default=None
-        Specify the parallelisation backend implementation in joblib, if None a 'prefer'
-        value of "threads" is used by default.
-        Valid options are "loky", "multiprocessing", "threading" or a custom backend.
-        See the joblib Parallel documentation for more details.
-
-    Attributes
-    ----------
-    n_instances_ : int
-        The number of train cases in the training set.
-    n_channels_ : int
-        The number of dimensions per case in the training set.
-    n_timepoints_ : int
-        The length of each series in the training set.
-    total_intervals_ : int
-        Total number of intervals per tree from all representations.
-    estimators_ : list of shape (n_estimators) of BaseEstimator
-        The collections of estimators trained in fit.
-    intervals_ : list of shape (n_estimators) of TransformerMixin
-        Stores the interval extraction transformer for all estimators.
-    transformed_data_ : list of shape (n_estimators) of ndarray with shape
-    (n_instances_ ,total_intervals * att_subsample_size)
-        The transformed dataset for all estimators. Only saved when
-        save_transformed_data is true.
-
-    References
-    ----------
-    .. [1] H.Deng, G.Runger, E.Tuv and M.Vladimir, "A time series forest for
-       classification and feature extraction", Information Sciences, 239, 2013
-    .. [2] Matthew Middlehurst and James Large and Anthony Bagnall. "The Canonical
-       Interval Forest (CIF) Classifier for Time Series Classification."
-       IEEE International Conference on Big Data 2020
-    .. [3] Cabello, Nestor, et al. "Fast and Accurate Time Series Classification
-       Through Supervised Interval Search." IEEE ICDM 2020
-
-    Examples
-    --------
-    >>> from tsml.interval_based import IntervalForestRegressor
-    >>> from tsml.utils.testing import generate_3d_test_data
-    >>> X, y = generate_3d_test_data(n_samples=10, series_length=12,
-    ...                              regression_target=True, random_state=0)
-    >>> reg = IntervalForestRegressor(n_estimators=10, random_state=0)
-    >>> reg.fit(X, y)
-    IntervalForestRegressor(...)
-    >>> reg.predict(X)
-    array([0.7252543 , 1.50132442, 0.95608366, 1.64399016, 0.42385504,
-           0.60639322, 1.01919317, 1.30157483, 1.66017354, 0.2900776 ])
-    """
-
-    def __init__(
-        self,
-        base_estimator=None,
-        n_estimators=200,
-        interval_selection_method="random",
-        n_intervals="sqrt",
-        min_interval_length=3,
-        max_interval_length=np.inf,
-        interval_features=None,
-        series_transformers=None,
-        att_subsample_size=None,
-        replace_nan=None,
-        time_limit_in_minutes=None,
-        contract_max_n_estimators=500,
-        save_transformed_data=False,
-        random_state=None,
-        n_jobs=1,
-        parallel_backend=None,
-    ):
-        super().__init__(
-            base_estimator=base_estimator,
-            n_estimators=n_estimators,
-            interval_selection_method=interval_selection_method,
-            n_intervals=n_intervals,
-            min_interval_length=min_interval_length,
-            max_interval_length=max_interval_length,
-            interval_features=interval_features,
-            series_transformers=series_transformers,
-            att_subsample_size=att_subsample_size,
-            replace_nan=replace_nan,
-            time_limit_in_minutes=time_limit_in_minutes,
-            contract_max_n_estimators=contract_max_n_estimators,
-            save_transformed_data=save_transformed_data,
-            random_state=random_state,
-            n_jobs=n_jobs,
-            parallel_backend=parallel_backend,
-        )
-
-    @classmethod
-    def get_test_params(
-        cls, parameter_set: Union[str, None] = None
-    ) -> Union[dict, List[dict]]:
-        """Return unit test parameter settings for the estimator.
-
-        Parameters
-        ----------
-        parameter_set : None or str, default=None
-            Name of the set of test parameters to return, for use in tests. If no
-            special parameters are defined for a value, will return `"default"` set.
-
-        Returns
-        -------
-        params : dict or list of dict
-            Parameters to create testing instances of the class.
-        """
-        return {
-            "n_estimators": 2,
-            "n_intervals": 2,
-        }
diff --git a/tsml/interval_based/_interval_pipelines.py b/tsml/interval_based/_interval_pipelines.py
deleted file mode 100644
index 3606758..0000000
--- a/tsml/interval_based/_interval_pipelines.py
+++ /dev/null
@@ -1,820 +0,0 @@
-"""Interval Extraction Pipeline Estimators.
-
-Pipeline estimators using summary statistics extracted from random or supervised
- intervals and an estimator.
-"""
-
-__author__ = ["MatthewMiddlehurst"]
-__all__ = [
-    "RandomIntervalClassifier",
-    "RandomIntervalRegressor",
-    "SupervisedIntervalClassifier",
-]
-
-from typing import List, Union
-
-import numpy as np
-from sklearn.base import ClassifierMixin, RegressorMixin
-from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
-from sklearn.ensemble._base import _set_random_states
-from sklearn.utils.validation import check_is_fitted, check_random_state
-
-from tsml.base import BaseTimeSeriesEstimator, _clone_estimator
-from tsml.transformations._interval_extraction import (
-    RandomIntervalTransformer,
-    SupervisedIntervalTransformer,
-)
-from tsml.utils.validation import check_n_jobs
-
-
-class RandomIntervalClassifier(ClassifierMixin, BaseTimeSeriesEstimator):
-    """Random Interval Classifier.
-
-    Extracts multiple intervals with random length, position and dimension from series
-    qnd concatenates them into a feature vector. Builds an estimator on the
-    transformed data.
-
-    Parameters
-    ----------
-    n_intervals : int or callable, default=100,
-        The number of intervals of random length, position and dimension to be
-        extracted.  Input should be an int or a function that takes a 3D np.ndarray
-        input and returns an int.
-    min_interval_length : int, default=3
-        The minimum length of extracted intervals. Minimum value of 3.
-    max_interval_length : int, default=3
-        The maximum length of extracted intervals. Minimum value of min_interval_length.
-    features : TransformerMixin, a function taking a 2d numpy array parameter, or list
-            of said transformers and functions, default=None
-        Transformers and functions used to extract features from selected intervals.
-        If None, defaults to [mean, median, min, max, std, 25% quantile, 75% quantile]
-    series_transformers : TransformerMixin, list, tuple, or None, default=None
-        The transformers to apply to the series before extracting intervals and
-        shapelets. If None, use the series as is.
-
-        A list or tuple of transformers will extract intervals from
-        all transformations concatenate the output. Including None in the list or tuple
-        will use the series as is for interval extraction.
-    dilation : int, list or None, default=None
-        Add dilation to extracted intervals. No dilation is added if None or 1. If a
-        list of ints, a random dilation value is selected from the list for each
-        interval.
-
-        The dilation value is selected after the interval star and end points. If the
-        number of values in the dilated interval is less than the min_interval_length,
-        the amount of dilation applied is reduced.
-    estimator : sklearn classifier, optional, default=None
-        An sklearn estimator to be built using the transformed data.
-        Defaults to sklearn RandomForestClassifier(n_estimators=200)
-    random_state : None, int or instance of RandomState, default=None
-        Seed or RandomState object used for random number generation.
-        If random_state is None, use the RandomState singleton used by np.random.
-        If random_state is an int, use a new RandomState instance seeded with seed.
-    n_jobs : int, default=1
-        The number of jobs to run in parallel for both `fit` and `transform` functions.
-        `-1` means using all processors.
-    parallel_backend : str, ParallelBackendBase instance or None, default=None
-        Specify the parallelisation backend implementation in joblib, if None a 'prefer'
-        value of "threads" is used by default.
-        Valid options are "loky", "multiprocessing", "threading" or a custom backend.
-        See the joblib Parallel documentation for more details.
-
-    Attributes
-    ----------
-    n_instances_ : int
-        The number of train cases in the training set.
-    n_channels_ : int
-        The number of dimensions per case in the training set.
-    n_timepoints_ : int
-        The length of each series in the training set.
-    n_classes_ : int
-        Number of classes. Extracted from the data.
-    classes_ : ndarray of shape (n_classes_)
-        Holds the label for each class.
-    class_dictionary_ : dict
-        A dictionary mapping class labels to class indices in classes_.
-
-    See Also
-    --------
-    RandomIntervalTransformer
-    RandomIntervalRegressor
-    SupervisedIntervalClassifier
-
-    Examples
-    --------
-    >>> from tsml.interval_based import RandomIntervalClassifier
-    >>> from tsml.utils.testing import generate_3d_test_data
-    >>> X, y = generate_3d_test_data(n_samples=8, series_length=10, random_state=0)
-    >>> clf = RandomIntervalClassifier(random_state=0)
-    >>> clf.fit(X, y)
-    RandomIntervalClassifier(...)
-    >>> clf.predict(X)
-    array([0, 1, 1, 0, 0, 1, 0, 1])
-    """
-
-    def __init__(
-        self,
-        n_intervals=100,
-        min_interval_length=3,
-        max_interval_length=np.inf,
-        features=None,
-        series_transformers=None,
-        dilation=None,
-        estimator=None,
-        n_jobs=1,
-        random_state=None,
-        parallel_backend=None,
-    ):
-        self.n_intervals = n_intervals
-        self.min_interval_length = min_interval_length
-        self.max_interval_length = max_interval_length
-        self.features = features
-        self.series_transformers = series_transformers
-        self.dilation = dilation
-        self.estimator = estimator
-        self.random_state = random_state
-        self.n_jobs = n_jobs
-        self.parallel_backend = parallel_backend
-
-        super().__init__()
-
-    def fit(self, X: Union[np.ndarray, List[np.ndarray]], y: np.ndarray) -> object:
-        """Fit the estimator to training data.
-
-        Parameters
-        ----------
-        X : 3D np.ndarray of shape (n_instances, n_channels, n_timepoints)
-            The training data.
-        y : 1D np.ndarray of shape (n_instances)
-            The class labels for fitting, indices correspond to instance indices in X
-
-        Returns
-        -------
-        self :
-            Reference to self.
-        """
-        X, y = self._validate_data(
-            X=X, y=y, ensure_min_samples=2, ensure_min_series_length=3
-        )
-        X = self._convert_X(X)
-
-        self.n_instances_, self.n_channels_, self.n_timepoints_ = X.shape
-        self.classes_ = np.unique(y)
-        self.n_classes_ = self.classes_.shape[0]
-        self.class_dictionary_ = {}
-        for index, class_val in enumerate(self.classes_):
-            self.class_dictionary_[class_val] = index
-
-        if self.n_classes_ == 1:
-            return self
-
-        self._n_jobs = check_n_jobs(self.n_jobs)
-        rng = check_random_state(self.random_state)
-
-        if isinstance(self.series_transformers, (list, tuple)):
-            self._series_transformers = [
-                None if st is None else _clone_estimator(st, random_state=rng)
-                for st in self.series_transformers
-            ]
-        else:
-            self._series_transformers = [
-                (
-                    None
-                    if self.series_transformers is None
-                    else _clone_estimator(self.series_transformers, random_state=rng)
-                )
-            ]
-
-        X_t = np.empty((X.shape[0], 0))
-        self._transformers = []
-        for st in self._series_transformers:
-            if st is not None:
-                s = st.fit_transform(X, y)
-            else:
-                s = X
-
-            ct = RandomIntervalTransformer(
-                n_intervals=self.n_intervals,
-                min_interval_length=self.min_interval_length,
-                max_interval_length=self.max_interval_length,
-                features=self.features,
-                dilation=self.dilation,
-                n_jobs=self._n_jobs,
-                parallel_backend=self.parallel_backend,
-            )
-            _set_random_states(ct, rng)
-            self._transformers.append(ct)
-            t = ct.fit_transform(s, y)
-
-            X_t = np.hstack((X_t, t))
-
-        self._estimator = _clone_estimator(
-            (
-                RandomForestClassifier(n_estimators=200)
-                if self.estimator is None
-                else self.estimator
-            ),
-            self.random_state,
-        )
-
-        m = getattr(self._estimator, "n_jobs", None)
-        if m is not None:
-            self._estimator.n_jobs = self._n_jobs
-
-        self._estimator.fit(X_t, y)
-
-        return self
-
-    def predict(self, X: Union[np.ndarray, List[np.ndarray]]) -> np.ndarray:
-        """Predicts labels for sequences in X.
-
-        Parameters
-        ----------
-        X : 3D np.array of shape (n_instances, n_channels, n_timepoints)
-            The testing data.
-
-        Returns
-        -------
-        y : array-like of shape (n_instances)
-            Predicted class labels.
-        """
-        check_is_fitted(self)
-
-        # treat case of single class seen in fit
-        if self.n_classes_ == 1:
-            return np.repeat(list(self.class_dictionary_.keys()), X.shape[0], axis=0)
-
-        X = self._validate_data(X=X, reset=False, ensure_min_series_length=3)
-        X = self._convert_X(X)
-
-        X_t = np.empty((X.shape[0], 0))
-        for i, st in enumerate(self._series_transformers):
-            if st is not None:
-                s = st.transform(X)
-            else:
-                s = X
-
-            t = self._transformers[i].transform(s)
-            X_t = np.hstack((X_t, t))
-
-        return self._estimator.predict(X_t)
-
-    def predict_proba(self, X: Union[np.ndarray, List[np.ndarray]]) -> np.ndarray:
-        """Predicts labels probabilities for sequences in X.
-
-        Parameters
-        ----------
-        X : 3D np.array of shape (n_instances, n_channels, n_timepoints)
-            The testing data.
-
-        Returns
-        -------
-        y : array-like of shape (n_instances, n_classes_)
-            Predicted probabilities using the ordering in classes_.
-        """
-        check_is_fitted(self)
-
-        # treat case of single class seen in fit
-        if self.n_classes_ == 1:
-            return np.repeat([[1]], X.shape[0], axis=0)
-
-        X = self._validate_data(X=X, reset=False, ensure_min_series_length=3)
-        X = self._convert_X(X)
-
-        X_t = np.empty((X.shape[0], 0))
-        for i, st in enumerate(self._series_transformers):
-            if st is not None:
-                s = st.transform(X)
-            else:
-                s = X
-
-            t = self._transformers[i].transform(s)
-            X_t = np.hstack((X_t, t))
-
-        m = getattr(self._estimator, "predict_proba", None)
-        if callable(m):
-            return self._estimator.predict_proba(X_t)
-        else:
-            dists = np.zeros((X.shape[0], self.n_classes_))
-            preds = self._estimator.predict(X_t)
-            for i in range(0, X.shape[0]):
-                dists[i, self.class_dictionary_[preds[i]]] = 1
-            return dists
-
-    @classmethod
-    def get_test_params(
-        cls, parameter_set: Union[str, None] = None
-    ) -> Union[dict, List[dict]]:
-        """Return unit test parameter settings for the estimator.
-
-        Parameters
-        ----------
-        parameter_set : None or str, default=None
-            Name of the set of test parameters to return, for use in tests. If no
-            special parameters are defined for a value, will return `"default"` set.
-
-        Returns
-        -------
-        params : dict or list of dict
-            Parameters to create testing instances of the class.
-        """
-        from tsml.utils.numba_functions.stats import row_mean, row_numba_min
-
-        return {
-            "n_intervals": 2,
-            "estimator": RandomForestClassifier(n_estimators=2),
-            "features": [row_mean, row_numba_min],
-        }
-
-
-class RandomIntervalRegressor(RegressorMixin, BaseTimeSeriesEstimator):
-    """Random Interval Regressor.
-
-    Extracts multiple intervals with random length, position and dimension from series
-    and concatenates them into a feature vector. Builds an estimator on the
-    transformed data.
-
-    Parameters
-    ----------
-    n_intervals : int or callable, default=100,
-        The number of intervals of random length, position and dimension to be
-        extracted.  Input should be an int or a function that takes a 3D np.ndarray
-        input and returns an int.
-    min_interval_length : int, default=3
-        The minimum length of extracted intervals. Minimum value of 3.
-    max_interval_length : int, default=3
-        The maximum length of extracted intervals. Minimum value of min_interval_length.
-    features : TransformerMixin, a function taking a 2d numpy array parameter, or list
-            of said transformers and functions, default=None
-        Transformers and functions used to extract features from selected intervals.
-        If None, defaults to [mean, median, min, max, std, 25% quantile, 75% quantile]
-    series_transformers : TransformerMixin, list, tuple, or None, default=None
-        The transformers to apply to the series before extracting intervals and
-        shapelets. If None, use the series as is.
-
-        A list or tuple of transformers will extract intervals from
-        all transformations concatenate the output. Including None in the list or tuple
-        will use the series as is for interval extraction.
-    dilation : int, list or None, default=None
-        Add dilation to extracted intervals. No dilation is added if None or 1. If a
-        list of ints, a random dilation value is selected from the list for each
-        interval.
-
-        The dilation value is selected after the interval star and end points. If the
-        number of values in the dilated interval is less than the min_interval_length,
-        the amount of dilation applied is reduced.
-    estimator : sklearn regressor, optional, default=None
-        An sklearn estimator to be built using the transformed data.
-        Defaults to sklearn RandomForestRegressor(n_estimators=200)
-    random_state : None, int or instance of RandomState, default=None
-        Seed or RandomState object used for random number generation.
-        If random_state is None, use the RandomState singleton used by np.random.
-        If random_state is an int, use a new RandomState instance seeded with seed.
-    n_jobs : int, default=1
-        The number of jobs to run in parallel for both `fit` and `transform` functions.
-        `-1` means using all processors.
-    parallel_backend : str, ParallelBackendBase instance or None, default=None
-        Specify the parallelisation backend implementation in joblib, if None a 'prefer'
-        value of "threads" is used by default.
-        Valid options are "loky", "multiprocessing", "threading" or a custom backend.
-        See the joblib Parallel documentation for more details.
-
-    Attributes
-    ----------
-    n_instances_ : int
-        The number of train cases in the training set.
-    n_channels_ : int
-        The number of dimensions per case in the training set.
-    n_timepoints_ : int
-        The length of each series in the training set.
-
-    See Also
-    --------
-    RandomIntervalTransformer
-    RandomIntervalClassifier
-
-    Examples
-    --------
-    >>> from tsml.interval_based import RandomIntervalRegressor
-    >>> from tsml.utils.testing import generate_3d_test_data
-    >>> X, y = generate_3d_test_data(n_samples=8, series_length=10,
-    ...                              regression_target=True, random_state=0)
-    >>> reg = RandomIntervalRegressor(random_state=0)
-    >>> reg.fit(X, y)
-    RandomIntervalRegressor(...)
-    >>> reg.predict(X)
-    array([0.44924979, 1.31424037, 1.11951504, 0.63780969, 0.58123516,
-           1.17135463, 0.56450198, 1.10128837])
-    """
-
-    def __init__(
-        self,
-        n_intervals=100,
-        min_interval_length=3,
-        max_interval_length=np.inf,
-        features=None,
-        series_transformers=None,
-        dilation=None,
-        estimator=None,
-        n_jobs=1,
-        random_state=None,
-        parallel_backend=None,
-    ):
-        self.n_intervals = n_intervals
-        self.min_interval_length = min_interval_length
-        self.max_interval_length = max_interval_length
-        self.features = features
-        self.series_transformers = series_transformers
-        self.dilation = dilation
-        self.estimator = estimator
-        self.random_state = random_state
-        self.n_jobs = n_jobs
-        self.parallel_backend = parallel_backend
-
-        super().__init__()
-
-    def fit(self, X: Union[np.ndarray, List[np.ndarray]], y: np.ndarray) -> object:
-        """Fit the estimator to training data.
-
-        Parameters
-        ----------
-        X : 3D np.ndarray of shape (n_instances, n_channels, n_timepoints)
-            The training data.
-        y : 1D np.ndarray of shape (n_instances)
-            The target labels for fitting, indices correspond to instance indices in X
-
-        Returns
-        -------
-        self :
-            Reference to self.
-        """
-        X, y = self._validate_data(
-            X=X, y=y, ensure_min_samples=2, ensure_min_series_length=3
-        )
-        X = self._convert_X(X)
-
-        self.n_instances_, self.n_channels_, self.n_timepoints_ = X.shape
-
-        self._n_jobs = check_n_jobs(self.n_jobs)
-        rng = check_random_state(self.random_state)
-
-        if isinstance(self.series_transformers, (list, tuple)):
-            self._series_transformers = [
-                None if st is None else _clone_estimator(st, random_state=rng)
-                for st in self.series_transformers
-            ]
-        else:
-            self._series_transformers = [
-                (
-                    None
-                    if self.series_transformers is None
-                    else _clone_estimator(self.series_transformers, random_state=rng)
-                )
-            ]
-
-        X_t = np.empty((X.shape[0], 0))
-        self._transformers = []
-        for st in self._series_transformers:
-            if st is not None:
-                s = st.fit_transform(X, y)
-            else:
-                s = X
-
-            ct = RandomIntervalTransformer(
-                n_intervals=self.n_intervals,
-                min_interval_length=self.min_interval_length,
-                max_interval_length=self.max_interval_length,
-                features=self.features,
-                dilation=self.dilation,
-                n_jobs=self._n_jobs,
-                parallel_backend=self.parallel_backend,
-            )
-            _set_random_states(ct, rng)
-            self._transformers.append(ct)
-            t = ct.fit_transform(s, y)
-
-            X_t = np.hstack((X_t, t))
-
-        self._estimator = _clone_estimator(
-            (
-                RandomForestRegressor(n_estimators=200)
-                if self.estimator is None
-                else self.estimator
-            ),
-            self.random_state,
-        )
-
-        m = getattr(self._estimator, "n_jobs", None)
-        if m is not None:
-            self._estimator.n_jobs = self._n_jobs
-
-        self._estimator.fit(X_t, y)
-
-        return self
-
-    def predict(self, X: Union[np.ndarray, List[np.ndarray]]) -> np.ndarray:
-        """Predicts labels for sequences in X.
-
-        Parameters
-        ----------
-        X : 3D np.ndarray of shape (n_instances, n_channels, n_timepoints)
-            The testing data.
-
-        Returns
-        -------
-        y : array-like of shape (n_instances)
-            Predicted target labels.
-        """
-        check_is_fitted(self)
-
-        X = self._validate_data(X=X, reset=False, ensure_min_series_length=3)
-        X = self._convert_X(X)
-
-        X_t = np.empty((X.shape[0], 0))
-        for i, st in enumerate(self._series_transformers):
-            if st is not None:
-                s = st.transform(X)
-            else:
-                s = X
-
-            t = self._transformers[i].transform(s)
-            X_t = np.hstack((X_t, t))
-
-        return self._estimator.predict(X_t)
-
-    @classmethod
-    def get_test_params(
-        cls, parameter_set: Union[str, None] = None
-    ) -> Union[dict, List[dict]]:
-        """Return unit test parameter settings for the estimator.
-
-        Parameters
-        ----------
-        parameter_set : None or str, default=None
-            Name of the set of test parameters to return, for use in tests. If no
-            special parameters are defined for a value, will return `"default"` set.
-
-        Returns
-        -------
-        params : dict or list of dict
-            Parameters to create testing instances of the class.
-        """
-        from tsml.utils.numba_functions.stats import row_mean, row_numba_min
-
-        return {
-            "n_intervals": 3,
-            "estimator": RandomForestRegressor(n_estimators=2),
-            "features": [row_mean, row_numba_min],
-        }
-
-
-class SupervisedIntervalClassifier(ClassifierMixin, BaseTimeSeriesEstimator):
-    """Supervised Interval Classifier.
-
-    Extracts multiple intervals from series with using a supervised process
-    and concatenates them into a feature vector. Builds an estimator on the
-    transformed data.
-
-    Parameters
-    ----------
-    n_intervals : int, default=50
-        The number of times the supervised interval selection process is run. This
-        process will extract more then one interval per run.
-        Each supervised extraction will output a varying amount of features based on
-        series length, number of dimensions and the number of features.
-    min_interval_length : int, default=3
-        The minimum length of extracted intervals. Minimum value of 3.
-    features : callable, list of callables, default=None
-        Functions used to extract features from selected intervals. Must take a 2d
-        array of shape (n_instances, interval_length) and return a 1d array of shape
-        (n_instances) containing the features.
-        If None, defaults to the following statistics used in [2]:
-        [mean, median, std, slope, min, max, iqr, count_mean_crossing,
-        count_above_mean].
-    metric : ["fisher"] or callable, default="fisher"
-        The metric used to evaluate the usefulness of a feature extracted on an
-        interval. If "fisher", the Fisher score is used. If a callable, it must take
-        a 1d array of shape (n_instances) and return a 1d array of scores of shape
-        (n_instances).
-    randomised_split_point : bool, default=True
-        If True, the split point for interval extraction is randomised as is done in [2]
-        rather than split in half.
-    normalise_for_search : bool, default=True
-        If True, the data is normalised for the supervised interval search process.
-        Features extracted for the transform output will not use normalised data.
-    estimator : sklearn classifier, optional, default=None
-        An sklearn estimator to be built using the transformed data.
-        Defaults to sklearn RandomForestClassifier(n_estimators=200)
-    random_state : None, int or instance of RandomState, default=None
-        Seed or RandomState object used for random number generation.
-        If random_state is None, use the RandomState singleton used by np.random.
-        If random_state is an int, use a new RandomState instance seeded with seed.
-    n_jobs : int, default=1
-        The number of jobs to run in parallel for both `fit` and `transform` functions.
-        `-1` means using all processors.
-    parallel_backend : str, ParallelBackendBase instance or None, default=None
-        Specify the parallelisation backend implementation in joblib, if None a 'prefer'
-        value of "threads" is used by default.
-        Valid options are "loky", "multiprocessing", "threading" or a custom backend.
-        See the joblib Parallel documentation for more details.
-
-    Attributes
-    ----------
-    n_instances_ : int
-        The number of train cases in the training set.
-    n_channels_ : int
-        The number of dimensions per case in the training set.
-    n_timepoints_ : int
-        The length of each series in the training set.
-    n_classes_ : int
-        Number of classes. Extracted from the data.
-    classes_ : ndarray of shape (n_classes_)
-        Holds the label for each class.
-    class_dictionary_ : dict
-        A dictionary mapping class labels to class indices in classes_.
-
-    See Also
-    --------
-    SupervisedIntervalTransformer
-    RandomIntervalClassifier
-
-    Examples
-    --------
-    >>> from tsml.interval_based import SupervisedIntervalClassifier
-    >>> from tsml.utils.testing import generate_3d_test_data
-    >>> X, y = generate_3d_test_data(n_samples=8, series_length=10, random_state=0)
-    >>> clf = SupervisedIntervalClassifier(random_state=0)
-    >>> clf.fit(X, y)
-    SupervisedIntervalClassifier(...)
-    >>> clf.predict(X)
-    array([0, 1, 1, 0, 0, 1, 0, 1])
-    """
-
-    def __init__(
-        self,
-        n_intervals=50,
-        min_interval_length=3,
-        features=None,
-        metric="fisher",
-        randomised_split_point=True,
-        normalise_for_search=True,
-        estimator=None,
-        random_state=None,
-        n_jobs=1,
-        parallel_backend=None,
-    ):
-        self.n_intervals = n_intervals
-        self.min_interval_length = min_interval_length
-        self.features = features
-        self.metric = metric
-        self.randomised_split_point = randomised_split_point
-        self.normalise_for_search = normalise_for_search
-        self.estimator = estimator
-        self.random_state = random_state
-        self.n_jobs = n_jobs
-        self.parallel_backend = parallel_backend
-
-        super().__init__()
-
-    def fit(self, X: Union[np.ndarray, List[np.ndarray]], y: np.ndarray) -> object:
-        """Fit the estimator to training data.
-
-        Parameters
-        ----------
-        X : 3D np.ndarray of shape (n_instances, n_channels, n_timepoints)
-            The training data.
-        y : 1D np.ndarray of shape (n_instances)
-            The class labels for fitting, indices correspond to instance indices in X
-
-        Returns
-        -------
-        self :
-            Reference to self.
-        """
-        X, y = self._validate_data(
-            X=X, y=y, ensure_min_samples=2, ensure_min_series_length=7
-        )
-        X = self._convert_X(X)
-
-        self.n_instances_, self.n_channels_, self.n_timepoints_ = X.shape
-        self.classes_ = np.unique(y)
-        self.n_classes_ = self.classes_.shape[0]
-        self.class_dictionary_ = {}
-        for index, class_val in enumerate(self.classes_):
-            self.class_dictionary_[class_val] = index
-
-        if self.n_classes_ == 1:
-            return self
-
-        self._n_jobs = check_n_jobs(self.n_jobs)
-
-        self._transformer = SupervisedIntervalTransformer(
-            n_intervals=self.n_intervals,
-            min_interval_length=self.min_interval_length,
-            features=self.features,
-            metric=self.metric,
-            randomised_split_point=self.randomised_split_point,
-            normalise_for_search=self.normalise_for_search,
-            random_state=self.random_state,
-            n_jobs=self.n_jobs,
-            parallel_backend=self.parallel_backend,
-        )
-
-        self._estimator = _clone_estimator(
-            (
-                RandomForestClassifier(n_estimators=200)
-                if self.estimator is None
-                else self.estimator
-            ),
-            self.random_state,
-        )
-
-        m = getattr(self._estimator, "n_jobs", None)
-        if m is not None:
-            self._estimator.n_jobs = self._n_jobs
-
-        X_t = self._transformer.fit_transform(X, y)
-        self._estimator.fit(X_t, y)
-
-        return self
-
-    def predict(self, X: Union[np.ndarray, List[np.ndarray]]) -> np.ndarray:
-        """Predicts labels for sequences in X.
-
-        Parameters
-        ----------
-        X : 3D np.array of shape (n_instances, n_channels, n_timepoints)
-            The testing data.
-
-        Returns
-        -------
-        y : array-like of shape (n_instances)
-            Predicted class labels.
-        """
-        check_is_fitted(self)
-
-        # treat case of single class seen in fit
-        if self.n_classes_ == 1:
-            return np.repeat(list(self.class_dictionary_.keys()), X.shape[0], axis=0)
-
-        X = self._validate_data(X=X, reset=False, ensure_min_series_length=7)
-        X = self._convert_X(X)
-
-        return self._estimator.predict(self._transformer.transform(X))
-
-    def predict_proba(self, X: Union[np.ndarray, List[np.ndarray]]) -> np.ndarray:
-        """Predicts labels probabilities for sequences in X.
-
-        Parameters
-        ----------
-        X : 3D np.array of shape (n_instances, n_channels, n_timepoints)
-            The testing data.
-
-        Returns
-        -------
-        y : array-like of shape (n_instances, n_classes_)
-            Predicted probabilities using the ordering in classes_.
-        """
-        check_is_fitted(self)
-
-        # treat case of single class seen in fit
-        if self.n_classes_ == 1:
-            return np.repeat([[1]], X.shape[0], axis=0)
-
-        X = self._validate_data(X=X, reset=False, ensure_min_series_length=7)
-        X = self._convert_X(X)
-
-        m = getattr(self._estimator, "predict_proba", None)
-        if callable(m):
-            return self._estimator.predict_proba(self._transformer.transform(X))
-        else:
-            dists = np.zeros((X.shape[0], self.n_classes_))
-            preds = self._estimator.predict(self._transformer.transform(X))
-            for i in range(0, X.shape[0]):
-                dists[i, self.class_dictionary_[preds[i]]] = 1
-            return dists
-
-    @classmethod
-    def get_test_params(
-        cls, parameter_set: Union[str, None] = None
-    ) -> Union[dict, List[dict]]:
-        """Return unit test parameter settings for the estimator.
-
-        Parameters
-        ----------
-        parameter_set : None or str, default=None
-            Name of the set of test parameters to return, for use in tests. If no
-            special parameters are defined for a value, will return `"default"` set.
-
-        Returns
-        -------
-        params : dict or list of dict
-            Parameters to create testing instances of the class.
-        """
-        from tsml.utils.numba_functions.stats import row_mean, row_numba_min
-
-        return {
-            "n_intervals": 1,
-            "estimator": RandomForestClassifier(n_estimators=2),
-            "features": [row_mean, row_numba_min],
-        }
diff --git a/tsml/interval_based/tests/__init__.py b/tsml/interval_based/tests/__init__.py
deleted file mode 100644
index e472d7d..0000000
--- a/tsml/interval_based/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Testing for interval-based base classes."""
diff --git a/tsml/interval_based/tests/test_interval_forest.py b/tsml/interval_based/tests/test_interval_forest.py
deleted file mode 100644
index 3a92417..0000000
--- a/tsml/interval_based/tests/test_interval_forest.py
+++ /dev/null
@@ -1,206 +0,0 @@
-"""Tests for the BaseIntervalForest class."""
-
-import numpy as np
-import pytest
-from sklearn.pipeline import make_pipeline
-from sklearn.tree import DecisionTreeClassifier
-
-from tsml.base import _clone_estimator
-from tsml.interval_based import IntervalForestClassifier
-from tsml.transformations import (
-    AutocorrelationFunctionTransformer,
-    Catch22Transformer,
-    FunctionTransformer,
-    SevenNumberSummaryTransformer,
-)
-from tsml.utils.numba_functions.stats import row_mean, row_numba_min
-from tsml.utils.testing import generate_3d_test_data
-from tsml.utils.validation import _check_optional_dependency
-from tsml.vector import CITClassifier
-
-
-@pytest.mark.parametrize(
-    "base_estimator",
-    [DecisionTreeClassifier(), CITClassifier()],
-)
-def test_interval_forest_feature_skipping(base_estimator):
-    """Test BaseIntervalForest feature skipping with different base estimators."""
-    X, y = generate_3d_test_data()
-    rs = np.random.randint(np.iinfo(np.int32).max)
-
-    est = IntervalForestClassifier(
-        base_estimator=base_estimator,
-        n_estimators=2,
-        n_intervals=2,
-        random_state=rs,
-    )
-    est.fit(X, y)
-    preds = est.predict(X)
-
-    assert est._efficient_predictions is True
-
-    est = IntervalForestClassifier(
-        base_estimator=make_pipeline(base_estimator),
-        n_estimators=2,
-        n_intervals=2,
-        random_state=rs,
-    )
-    est.fit(X, y)
-
-    assert est._efficient_predictions is False
-    assert (preds == est.predict(X)).all()
-
-
-def test_interval_forest_invalid_feature_skipping():
-    """Test BaseIntervalForest with an invalid transformer for feature skipping."""
-    X, y = generate_3d_test_data()
-
-    est = IntervalForestClassifier(
-        n_estimators=2,
-        n_intervals=2,
-        interval_features=SevenNumberSummaryTransformer(),
-    )
-    est.fit(X, y)
-
-    assert est._efficient_predictions is False
-
-
-@pytest.mark.parametrize(
-    "interval_selection_method",
-    ["random", "supervised", "random-supervised"],
-)
-def test_interval_forest_selection_methods(interval_selection_method):
-    """Test BaseIntervalForest with different interval selection methods."""
-    X, y = generate_3d_test_data()
-
-    est = IntervalForestClassifier(
-        n_estimators=2,
-        n_intervals=2,
-        interval_selection_method=interval_selection_method,
-    )
-    est.fit(X, y)
-
-    assert est.predict_proba(X).shape == (10, 2)
-
-
-@pytest.mark.parametrize(
-    "n_intervals,n_intervals_len",
-    [
-        ("sqrt", 24),
-        ("sqrt-div", 12),
-        (["sqrt-div", 2], 24),
-        ([[1, 2], "sqrt-div"], 15),
-    ],
-)
-def test_interval_forest_n_intervals(n_intervals, n_intervals_len):
-    """Test BaseIntervalForest n_interval options."""
-    X, y = generate_3d_test_data(series_length=20)
-
-    est = IntervalForestClassifier(
-        n_estimators=2,
-        n_intervals=n_intervals,
-        series_transformers=[None, FunctionTransformer(np.log1p)],
-        save_transformed_data=True,
-        random_state=0,
-    )
-    est.fit(X, y)
-    est.predict_proba(X)
-
-    data = est.transformed_data_
-    assert data[0].shape[1] == n_intervals_len
-
-
-if _check_optional_dependency("pycatch22", "pycatch22", None, raise_error=False):
-    att_subsample_c22 = Catch22Transformer(
-        features=[
-            "DN_HistogramMode_5",
-            "DN_HistogramMode_10",
-            "SB_BinaryStats_diff_longstretch0",
-        ]
-    )
-else:
-    att_subsample_c22 = SevenNumberSummaryTransformer()
-
-
-@pytest.mark.skipif(
-    not _check_optional_dependency("pycatch22", "pycatch22", None, raise_error=False),
-    reason="pycatch22 not installed",
-)
-@pytest.mark.parametrize(
-    "features,output_len",
-    [
-        (None, 3),
-        (_clone_estimator(att_subsample_c22), 3),
-        ([_clone_estimator(att_subsample_c22), _clone_estimator(att_subsample_c22)], 6),
-        (
-            [
-                row_mean,
-                _clone_estimator(att_subsample_c22),
-                row_numba_min,
-            ],
-            4,
-        ),
-    ],
-)
-def test_interval_forest_attribute_subsample(features, output_len):
-    """Test BaseIntervalForest subsampling with different interval features."""
-    X, y = generate_3d_test_data()
-
-    est = IntervalForestClassifier(
-        n_estimators=2,
-        n_intervals=2,
-        att_subsample_size=0.5,
-        interval_features=features,
-        replace_nan=0,
-        save_transformed_data=True,
-        random_state=0,
-    )
-    est.fit(X, y)
-    est.predict_proba(X)
-
-    data = est.transformed_data_
-    assert data[0].shape[1] == int(output_len * 0.5) * 2
-
-
-def test_interval_forest_invalid_attribute_subsample():
-    """Test BaseIntervalForest with an invalid transformer for subsampling."""
-    X, y = generate_3d_test_data()
-
-    est = IntervalForestClassifier(
-        n_estimators=2,
-        n_intervals=2,
-        att_subsample_size=2,
-        interval_features=SevenNumberSummaryTransformer(),
-    )
-
-    with pytest.raises(ValueError):
-        est.fit(X, y)
-
-
-@pytest.mark.parametrize(
-    "series_transformer",
-    [
-        FunctionTransformer(np.log1p),
-        [None, FunctionTransformer(np.log1p)],
-        [FunctionTransformer(np.log1p), AutocorrelationFunctionTransformer(n_lags=6)],
-    ],
-)
-def test_interval_forest_series_transformer(series_transformer):
-    """Test BaseIntervalForest with different series transformers."""
-    X, y = generate_3d_test_data()
-
-    est = IntervalForestClassifier(
-        n_estimators=2,
-        n_intervals=2,
-        series_transformers=series_transformer,
-        save_transformed_data=True,
-        random_state=0,
-    )
-    est.fit(X, y)
-    est.predict_proba(X)
-
-    data = est.transformed_data_
-    expected = (
-        len(series_transformer) * 6 if isinstance(series_transformer, list) else 6
-    )
-    assert data[0].shape[1] == expected
diff --git a/tsml/interval_based/tests/test_interval_pipelines.py b/tsml/interval_based/tests/test_interval_pipelines.py
deleted file mode 100644
index e9bdb9c..0000000
--- a/tsml/interval_based/tests/test_interval_pipelines.py
+++ /dev/null
@@ -1,38 +0,0 @@
-"""Tests for the interval pipeline classes."""
-
-from tsml.interval_based import RandomIntervalClassifier
-from tsml.transformations import FunctionTransformer
-from tsml.utils.numba_functions.general import first_order_differences_3d
-from tsml.utils.testing import generate_3d_test_data
-
-
-def test_random_interval_callable():
-    """Test RandomIntervalClassifier with a callable n_intervals."""
-    X, y = generate_3d_test_data()
-
-    def interval_func(X):
-        return int(X.shape[2] / 5)
-
-    est = RandomIntervalClassifier(
-        n_intervals=interval_func,
-    )
-    est.fit(X, y)
-
-    assert est._transformers[0]._n_intervals == 2
-
-
-def test_random_interval_series_transform_callable():
-    """Test RandomIntervalClassifier with a series transformer."""
-    X, y = generate_3d_test_data()
-
-    est = RandomIntervalClassifier(
-        n_intervals=2,
-        series_transformers=[
-            None,
-            FunctionTransformer(func=first_order_differences_3d, validate=False),
-        ],
-    )
-    est.fit(X, y)
-    est.predict_proba(X)
-
-    assert len(est._transformers) == 2
diff --git a/tsml/transformations/__init__.py b/tsml/transformations/__init__.py
index 5186259..0944e8a 100644
--- a/tsml/transformations/__init__.py
+++ b/tsml/transformations/__init__.py
@@ -1,29 +1,13 @@
 """tsml transformations."""
 
 __all__ = [
-    "AutocorrelationFunctionTransformer",
-    "ARCoefficientTransformer",
-    "Catch22Transformer",
     "FPCATransformer",
     "FunctionTransformer",
-    "RandomIntervalTransformer",
-    "SupervisedIntervalTransformer",
-    # "FixedIntervalTransformer",
-    "PeriodogramTransformer",
-    # "QuantileTransformer",
     "SevenNumberSummaryTransformer",
     "TransformerConcatenator",
 ]
 
-from tsml.transformations._acf import AutocorrelationFunctionTransformer
-from tsml.transformations._ar_coefficient import ARCoefficientTransformer
-from tsml.transformations._catch22 import Catch22Transformer
 from tsml.transformations._fpca import FPCATransformer
 from tsml.transformations._function_transformer import FunctionTransformer
-from tsml.transformations._interval_extraction import (
-    RandomIntervalTransformer,
-    SupervisedIntervalTransformer,
-)
-from tsml.transformations._periodogram import PeriodogramTransformer
 from tsml.transformations._summary_features import SevenNumberSummaryTransformer
 from tsml.transformations._transform_concatenator import TransformerConcatenator
diff --git a/tsml/transformations/_acf.py b/tsml/transformations/_acf.py
deleted file mode 100644
index a553ac0..0000000
--- a/tsml/transformations/_acf.py
+++ /dev/null
@@ -1,140 +0,0 @@
-"""Autocorrelation function transformer."""
-
-__author__ = ["MatthewMiddlehurst"]
-__all__ = ["AutocorrelationFunctionTransformer"]
-
-from typing import List, Union
-
-import numpy as np
-from numba import njit
-from sklearn.base import TransformerMixin
-
-from tsml.base import BaseTimeSeriesEstimator
-
-
-class AutocorrelationFunctionTransformer(TransformerMixin, BaseTimeSeriesEstimator):
-    """Autocorrelation function transformer.
-
-    The autocorrelation function measures how correlated a timeseries is
-    with itself at different lags. The AutocorrelationFunctionTransformer returns
-    these values as a series for each lag up to the `n_lags` specified.
-
-    Efficient implementation for collections using numba
-
-    Parameters
-    ----------
-    n_lags : int or callable, default=100
-        The maximum number of autocorrelation terms to use. If callable, the
-        function should take a 3D numpy array of shape (n_instances, n_channels,
-        n_timepoints) and return an integer.
-    min_values : int, default=0
-        Never use fewer than this number of terms to find a correlation unless the
-        series length is too short. This will reduce n_lags if needed.
-
-    Examples
-    --------
-    >>> from tsml.transformations import AutocorrelationFunctionTransformer
-    >>> from tsml.utils.testing import generate_3d_test_data
-    >>> X, _ = generate_3d_test_data(n_samples=4, n_channels=2, series_length=20,
-    ...                              random_state=0)
-    >>> tnf = AutocorrelationFunctionTransformer(n_lags=10)
-    >>> tnf.fit(X)
-    AutocorrelationFunctionTransformer(...)
-    >>> print(tnf.transform(X)[0])
-    [[ 0.10642255 -0.04497476 -0.27607675 -0.24169331  0.04717655  0.07221666
-      -0.36798515 -0.53768553  0.07550288  0.08557519]
-     [-0.21166957  0.24992846 -0.38036068  0.10243325 -0.18565336  0.05619381
-      -0.19569665  0.28835692 -0.42359509  0.21378191]]
-    """
-
-    def __init__(
-        self,
-        n_lags=100,
-        min_values=0,
-    ):
-        self.n_lags = n_lags
-        self.min_values = min_values
-
-        super().__init__()
-
-    def fit(self, X, y=None):
-        self._validate_data(X=X)
-        return self
-
-    def transform(self, X, y=None):
-        X = self._validate_data(X=X, reset=False)
-        X = self._convert_X(X)
-
-        n_instances, n_channels, n_timepoints = X.shape
-
-        lags = self.n_lags(X) if callable(self.n_lags) else self.n_lags
-        if lags > n_timepoints - self.min_values:
-            lags = n_timepoints - self.min_values
-        if lags < 0:
-            lags = 1
-
-        if lags > n_timepoints - 1:
-            raise ValueError(
-                f"lags ({lags}) must be smaller than n_timepoints - 1 "
-                f"({n_timepoints - 1})."
-            )
-
-        Xt = np.zeros((n_instances, n_channels, lags))
-        for n in range(n_channels):
-            Xt[:, n, :] = self._acf_2d(X[:, n, :], lags)
-
-        return Xt
-
-    def _more_tags(self) -> dict:
-        return {"requires_fit": False}
-
-    @classmethod
-    def get_test_params(
-        cls, parameter_set: Union[str, None] = None
-    ) -> Union[dict, List[dict]]:
-        """Return unit test parameter settings for the estimator.
-
-        Parameters
-        ----------
-        parameter_set : None or str, default=None
-            Name of the set of test parameters to return, for use in tests. If no
-            special parameters are defined for a value, will return `"default"` set.
-
-        Returns
-        -------
-        params : dict or list of dict
-            Parameters to create testing instances of the class.
-        """
-        return {
-            "n_lags": 4,
-        }
-
-    @staticmethod
-    @njit(cache=True, fastmath=True)
-    def _acf_2d(X, max_lag):
-        n_instances, length = X.shape
-
-        X_t = np.zeros((n_instances, max_lag))
-        for i, x in enumerate(X):
-            for lag in range(1, max_lag + 1):
-                lag_length = length - lag
-                x1, x2 = x[:-lag], x[lag:]
-                s1 = np.sum(x1)
-                s2 = np.sum(x2)
-                m1 = s1 / lag_length
-                m2 = s2 / lag_length
-                ss1 = np.sum(x1 * x1)
-                ss2 = np.sum(x2 * x2)
-                v1 = ss1 - s1 * m1
-                v2 = ss2 - s2 * m2
-                v1_is_zero, v2_is_zero = v1 <= 1e-9, v2 <= 1e-9
-                if v1_is_zero and v2_is_zero:  # Both zero variance,
-                    # so must be 100% correlated
-                    X_t[i][lag - 1] = 1
-                elif v1_is_zero or v2_is_zero:  # One zero variance
-                    # the other not
-                    X_t[i][lag - 1] = 0
-                else:
-                    X_t[i][lag - 1] = np.sum((x1 - m1) * (x2 - m2)) / np.sqrt(v1 * v2)
-
-        return X_t
diff --git a/tsml/transformations/_ar_coefficient.py b/tsml/transformations/_ar_coefficient.py
deleted file mode 100644
index b798d3a..0000000
--- a/tsml/transformations/_ar_coefficient.py
+++ /dev/null
@@ -1,119 +0,0 @@
-"""AR coefficient feature transformer."""
-
-__author__ = ["MatthewMiddlehurst"]
-__all__ = ["ARCoefficientTransformer"]
-
-from typing import List, Union
-
-import numpy as np
-from sklearn.base import TransformerMixin
-
-from tsml.base import BaseTimeSeriesEstimator
-from tsml.utils.validation import _check_optional_dependency
-
-
-class ARCoefficientTransformer(TransformerMixin, BaseTimeSeriesEstimator):
-    """Autoreggression coefficient feature transformer.
-
-    Coefficients of an autoregressive model using Burg's method. The Burg method
-    fits a forward-backward autoregressive model to the data using least squares
-    regression.
-
-    Parameters
-    ----------
-    order : int or callable, default=100
-        The order of the autoregression. If callable, the function should take a 3D
-        numpy array of shape (n_instances, n_channels, n_timepoints) and return an
-        integer.
-    min_values : int, default=0
-        Always transform at least this many values unless the series length is too
-        short. This will reduce order if needed.
-    replace_nan : bool, default=False
-        If True, replace NaNs in output with 0s.
-
-    Examples
-    --------
-    >>> from tsml.transformations import ARCoefficientTransformer
-    >>> from tsml.utils.testing import generate_3d_test_data
-    >>> X, _ = generate_3d_test_data(n_samples=4, n_channels=2, series_length=20,
-    ...                              random_state=0)
-    >>> tnf = ARCoefficientTransformer(order=5)
-    >>> tnf.fit(X)
-    ARCoefficientTransformer(...)
-    >>> print(tnf.transform(X)[0])
-    [[ 0.05445952 -0.02106654 -0.24989205 -0.19153596  0.08833235]
-     [-0.13034384  0.16255828 -0.27993791 -0.06842601 -0.01382752]]
-    """
-
-    def __init__(
-        self,
-        order=100,
-        min_values=0,
-        replace_nan=False,
-    ):
-        self.order = order
-        self.min_values = min_values
-        self.replace_nan = replace_nan
-
-        _check_optional_dependency("statsmodels", "statsmodels", self)
-
-        super().__init__()
-
-    def fit(self, X, y=None):
-        self._validate_data(X=X)
-        return self
-
-    def transform(self, X, y=None):
-        from statsmodels.regression.linear_model import burg
-
-        X = self._validate_data(X=X, reset=False)
-        X = self._convert_X(X)
-
-        n_instances, n_channels, n_timepoints = X.shape
-
-        order = self.order(X) if callable(self.order) else self.order
-        if order > n_timepoints - self.min_values:
-            order = n_timepoints - self.min_values
-        if order < 0:
-            order = 1
-
-        if order > n_timepoints - 1:
-            raise ValueError(
-                f"order ({order}) must be smaller than n_timepoints - 1 "
-                f"({n_timepoints - 1})."
-            )
-
-        Xt = np.zeros((n_instances, n_channels, order))
-        for i in range(n_instances):
-            for n in range(n_channels):
-                coefs, _ = burg(X[i, n], order=order)
-                Xt[i, n] = coefs
-
-        if self.replace_nan:
-            Xt[np.isnan(Xt)] = 0
-
-        return Xt
-
-    def _more_tags(self) -> dict:
-        return {"requires_fit": False, "optional_dependency": True}
-
-    @classmethod
-    def get_test_params(
-        cls, parameter_set: Union[str, None] = None
-    ) -> Union[dict, List[dict]]:
-        """Return unit test parameter settings for the estimator.
-
-        Parameters
-        ----------
-        parameter_set : None or str, default=None
-            Name of the set of test parameters to return, for use in tests. If no
-            special parameters are defined for a value, will return `"default"` set.
-
-        Returns
-        -------
-        params : dict or list of dict
-            Parameters to create testing instances of the class.
-        """
-        return {
-            "order": 4,
-        }
diff --git a/tsml/transformations/_catch22.py b/tsml/transformations/_catch22.py
deleted file mode 100644
index 932f4a8..0000000
--- a/tsml/transformations/_catch22.py
+++ /dev/null
@@ -1,335 +0,0 @@
-"""Catch22 features.
-
-A transformer for the Catch22 features.
-"""
-
-__author__ = ["MatthewMiddlehurst"]
-__all__ = ["Catch22Transformer"]
-
-
-import numpy as np
-from joblib import Parallel
-from sklearn.base import TransformerMixin
-from sklearn.utils.parallel import delayed
-
-from tsml.base import BaseTimeSeriesEstimator
-from tsml.utils.numba_functions.general import z_normalise_series
-from tsml.utils.validation import _check_optional_dependency, check_n_jobs
-
-feature_names = [
-    "DN_HistogramMode_5",
-    "DN_HistogramMode_10",
-    "SB_BinaryStats_diff_longstretch0",
-    "DN_OutlierInclude_p_001_mdrmd",
-    "DN_OutlierInclude_n_001_mdrmd",
-    "CO_f1ecac",
-    "CO_FirstMin_ac",
-    "SP_Summaries_welch_rect_area_5_1",
-    "SP_Summaries_welch_rect_centroid",
-    "FC_LocalSimple_mean3_stderr",
-    "CO_trev_1_num",
-    "CO_HistogramAMI_even_2_5",
-    "IN_AutoMutualInfoStats_40_gaussian_fmmi",
-    "MD_hrv_classic_pnn40",
-    "SB_BinaryStats_mean_longstretch1",
-    "SB_MotifThree_quantile_hh",
-    "FC_LocalSimple_mean1_tauresrat",
-    "CO_Embed2_Dist_tau_d_expfit_meandiff",
-    "SC_FluctAnal_2_dfa_50_1_2_logi_prop_r1",
-    "SC_FluctAnal_2_rsrangefit_50_1_logi_prop_r1",
-    "SB_TransitionMatrix_3ac_sumdiagcov",
-    "PD_PeriodicityWang_th0_01",
-]
-
-
-class Catch22Transformer(TransformerMixin, BaseTimeSeriesEstimator):
-    """Canonical Time-series Characteristics (Catch22).
-
-    Overview: Input n series with d dimensions of length m
-    Transforms series into the 22 Catch22 [1]_ features extracted from the hctsa [2]_
-    toolbox.
-
-    Parameters
-    ----------
-    features : int/str or List of int/str, optional, default="all"
-        The Catch22 features to extract by feature index, feature name as a str or as a
-        list of names or indices for multiple features. If "all", all features are
-        extracted.
-        Valid features are as follows:
-            ["DN_HistogramMode_5", "DN_HistogramMode_10",
-            "SB_BinaryStats_diff_longstretch0", "DN_OutlierInclude_p_001_mdrmd",
-            "DN_OutlierInclude_n_001_mdrmd", "CO_f1ecac", "CO_FirstMin_ac",
-            "SP_Summaries_welch_rect_area_5_1", "SP_Summaries_welch_rect_centroid",
-            "FC_LocalSimple_mean3_stderr", "CO_trev_1_num", "CO_HistogramAMI_even_2_5",
-            "IN_AutoMutualInfoStats_40_gaussian_fmmi", "MD_hrv_classic_pnn40",
-            "SB_BinaryStats_mean_longstretch1", "SB_MotifThree_quantile_hh",
-            "FC_LocalSimple_mean1_tauresrat", "CO_Embed2_Dist_tau_d_expfit_meandiff",
-            "SC_FluctAnal_2_dfa_50_1_2_logi_prop_r1",
-            "SC_FluctAnal_2_rsrangefit_50_1_logi_prop_r1",
-            "SB_TransitionMatrix_3ac_sumdiagcov", "PD_PeriodicityWang_th0_01"]
-    catch24 : bool, optional, default=False
-        Extract the mean and standard deviation as well as the 22 Catch22 features if
-        true. If a List of specific features to extract is provided, "Mean" and/or
-        "StandardDeviation" must be added to the List to extract these features.
-    outlier_norm : bool, optional, default=False
-        Normalise each series during the two outlier Catch22 features, which can take a
-        while to process for large values.
-    replace_nans : bool, optional, default=False
-        Replace NaN or inf values from the Catch22 transform with 0.
-    use_pycatch22 : bool, optional, default=True
-        Wraps the C based pycatch22 implementation for tsml.
-        (https://github.com/DynamicsAndNeuralSystems/pycatch22). This requires the
-        ``pycatch22`` package to be installed if True.
-    n_jobs : int, optional, default=1
-        The number of jobs to run in parallel for `transform`. Requires multiple input
-        cases. ``-1`` means using all processors.
-    parallel_backend : str, ParallelBackendBase instance or None, default=None
-        Specify the parallelisation backend implementation in joblib, if None a 'prefer'
-        value of "threads" is used by default.
-        Valid options are "loky", "multiprocessing", "threading" or a custom backend.
-        See the joblib Parallel documentation for more details.
-
-    See Also
-    --------
-    Catch22Classifier
-
-    Notes
-    -----
-    Original Catch22 package implementations:
-    https://github.com/DynamicsAndNeuralSystems/Catch22
-
-    For the Java version, see
-    https://github.com/uea-machine-learning/tsml/blob/master/src/main/java
-    /tsml/transformers/Catch22.java
-
-    References
-    ----------
-    .. [1] Lubba, C. H., Sethi, S. S., Knaute, P., Schultz, S. R., Fulcher, B. D., &
-    Jones, N. S. (2019). catch22: Canonical time-series characteristics. Data Mining
-    and Knowledge Discovery, 33(6), 1821-1852.
-    .. [2] Fulcher, B. D., Little, M. A., & Jones, N. S. (2013). Highly comparative
-    time-series analysis: the empirical structure of time series and their methods.
-    Journal of the Royal Society Interface, 10(83), 20130048.
-    """
-
-    def __init__(
-        self,
-        features="all",
-        catch24=False,
-        outlier_norm=False,
-        replace_nans=False,
-        n_jobs=1,
-        parallel_backend=None,
-    ):
-        self.features = features
-        self.catch24 = catch24
-        self.outlier_norm = outlier_norm
-        self.replace_nans = replace_nans
-        self.n_jobs = n_jobs
-        self.parallel_backend = parallel_backend
-
-        _check_optional_dependency("pycatch22", "pycatch22", self)
-
-        super().__init__()
-
-    def fit(self, X, y=None):
-        """Unused. Validates X."""
-        self._validate_data(X=X)
-        return self
-
-    def transform(self, X, y=None):
-        """Transform X into the catch22 features.
-
-        Parameters
-        ----------
-        X : 3D np.array (any number of channels, equal length series)
-                of shape (n_instances, n_channels, n_timepoints)
-            or list of numpy arrays (any number of channels, unequal length series)
-                of shape [n_instances], 2D np.array (n_channels, n_timepoints_i), where
-                n_timepoints_i is length of series i
-
-        Returns
-        -------
-        Xt : array-like, shape = [n_instances, num_features*n_channels]
-            The catch22 features for each dimension.
-        """
-        X = self._validate_data(X=X, reset=False)
-        X = self._convert_X(X)
-
-        n_instances = len(X)
-
-        f_idx = _verify_features(self.features, self.catch24)
-
-        threads_to_use = check_n_jobs(self.n_jobs)
-
-        import pycatch22
-
-        features = [
-            pycatch22.DN_HistogramMode_5,
-            pycatch22.DN_HistogramMode_10,
-            pycatch22.SB_BinaryStats_diff_longstretch0,
-            pycatch22.DN_OutlierInclude_p_001_mdrmd,
-            pycatch22.DN_OutlierInclude_n_001_mdrmd,
-            pycatch22.CO_f1ecac,
-            pycatch22.CO_FirstMin_ac,
-            pycatch22.SP_Summaries_welch_rect_area_5_1,
-            pycatch22.SP_Summaries_welch_rect_centroid,
-            pycatch22.FC_LocalSimple_mean3_stderr,
-            pycatch22.CO_trev_1_num,
-            pycatch22.CO_HistogramAMI_even_2_5,
-            pycatch22.IN_AutoMutualInfoStats_40_gaussian_fmmi,
-            pycatch22.MD_hrv_classic_pnn40,
-            pycatch22.SB_BinaryStats_mean_longstretch1,
-            pycatch22.SB_MotifThree_quantile_hh,
-            pycatch22.FC_LocalSimple_mean1_tauresrat,
-            pycatch22.CO_Embed2_Dist_tau_d_expfit_meandiff,
-            pycatch22.SC_FluctAnal_2_dfa_50_1_2_logi_prop_r1,
-            pycatch22.SC_FluctAnal_2_rsrangefit_50_1_logi_prop_r1,
-            pycatch22.SB_TransitionMatrix_3ac_sumdiagcov,
-            pycatch22.PD_PeriodicityWang_th0_01,
-        ]
-
-        c22_list = Parallel(
-            n_jobs=threads_to_use, backend=self.parallel_backend, prefer="threads"
-        )(
-            delayed(self._transform_case_pycatch22)(
-                X[i],
-                f_idx,
-                features,
-            )
-            for i in range(n_instances)
-        )
-
-        if self.replace_nans:
-            c22_list = np.nan_to_num(c22_list, False, 0, 0, 0)
-
-        return np.array(c22_list)
-
-    def _transform_case_pycatch22(self, X, f_idx, features):
-        c22 = np.zeros(len(f_idx) * len(X))
-
-        if hasattr(self, "_transform_features") and len(
-            self._transform_features
-        ) == len(c22):
-            transform_feature = self._transform_features
-        else:
-            transform_feature = [True] * len(c22)
-
-        f_count = -1
-        for i in range(len(X)):
-            dim = i * len(f_idx)
-            series = list(X[i])
-
-            if self.outlier_norm and (3 in f_idx or 4 in f_idx):
-                outlier_series = list(z_normalise_series(X[i]))
-
-            for n, feature in enumerate(f_idx):
-                f_count += 1
-                if not transform_feature[f_count]:
-                    continue
-
-                if self.outlier_norm and feature in [3, 4]:
-                    c22[dim + n] = features[feature](outlier_series)
-                if feature == 22:
-                    c22[dim + n] = np.mean(series)
-                elif feature == 23:
-                    c22[dim + n] = np.std(series)
-                else:
-                    c22[dim + n] = features[feature](series)
-
-        return c22
-
-    @property
-    def get_features_arguments(self):
-        """Return feature names for the estimators features argument."""
-        return (
-            self.features
-            if self.features != "all"
-            else (
-                feature_names + ["Mean", "StandardDeviation"]
-                if self.catch24
-                else feature_names
-            )
-        )
-
-    def _more_tags(self) -> dict:
-        return {
-            "X_types": ["np_list", "3darray"],
-            "requires_fit": False,
-            "optional_dependency": True,
-        }
-
-    @classmethod
-    def get_test_params(cls, parameter_set="default"):
-        """Return testing parameter settings for the estimator.
-
-        Parameters
-        ----------
-        parameter_set : str, default="default"
-            Name of the set of test parameters to return, for use in tests. If no
-            special parameters are defined for a value, will return `"default"` set.
-
-        Returns
-        -------
-        params : dict or list of dict, default = {}
-            Parameters to create testing instances of the class
-            Each dict are parameters to construct an "interesting" test instance, i.e.,
-            `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
-            `create_test_instance` uses the first (or only) dictionary in `params`
-        """
-        return {}
-
-
-def _verify_features(features, catch24):
-    if isinstance(features, str):
-        if features == "all":
-            f_idx = [i for i in range(22)]
-            if catch24:
-                f_idx += [22, 23]
-        elif features in feature_names:
-            f_idx = [feature_names.index(features)]
-        elif catch24 and features == "Mean":
-            f_idx = [22]
-        elif catch24 and features == "StandardDeviation":
-            f_idx = [23]
-        else:
-            raise ValueError("Invalid feature selection.")
-    elif isinstance(features, int):
-        if features >= 0 and features < 22:
-            f_idx = [features]
-        elif catch24 and features == 22:
-            f_idx = [22]
-        elif catch24 and features == 23:
-            f_idx = [23]
-        else:
-            raise ValueError("Invalid feature selection.")
-    elif isinstance(features, (list, tuple)):
-        if len(features) > 0:
-            f_idx = []
-            for f in features:
-                if isinstance(f, str):
-                    if f in feature_names:
-                        f_idx.append(feature_names.index(f))
-                    elif catch24 and f == "Mean":
-                        f_idx.append(22)
-                    elif catch24 and f == "StandardDeviation":
-                        f_idx.append(23)
-                    else:
-                        raise ValueError("Invalid feature selection.")
-                elif isinstance(f, int):
-                    if f >= 0 and f < 22:
-                        f_idx.append(f)
-                    elif catch24 and f == 22:
-                        f_idx.append(22)
-                    elif catch24 and f == 23:
-                        f_idx.append(23)
-                    else:
-                        raise ValueError("Invalid feature selection.")
-                else:
-                    raise ValueError("Invalid feature selection.")
-        else:
-            raise ValueError("Invalid feature selection.")
-    else:
-        raise ValueError("Invalid feature selection.")
-
-    return f_idx
diff --git a/tsml/transformations/_interval_extraction.py b/tsml/transformations/_interval_extraction.py
deleted file mode 100644
index eb15711..0000000
--- a/tsml/transformations/_interval_extraction.py
+++ /dev/null
@@ -1,1572 +0,0 @@
-"""Interval extraction transformers."""
-
-__author__ = ["MatthewMiddlehurst"]
-__all__ = [
-    "RandomIntervalTransformer",
-    "SupervisedIntervalTransformer",
-    # "FixedIntervalTransformer",
-]
-
-import inspect
-from typing import List, Union
-
-import numpy as np
-from joblib import Parallel
-from sklearn import preprocessing
-from sklearn.base import TransformerMixin
-from sklearn.utils import check_random_state
-from sklearn.utils.parallel import delayed
-from sklearn.utils.validation import check_is_fitted
-
-from tsml.base import BaseTimeSeriesEstimator, _clone_estimator
-from tsml.utils._tags import _safe_tags
-from tsml.utils.numba_functions.general import z_normalise_series_3d
-from tsml.utils.numba_functions.stats import (
-    fisher_score,
-    row_count_above_mean,
-    row_count_mean_crossing,
-    row_iqr,
-    row_mean,
-    row_median,
-    row_numba_max,
-    row_numba_min,
-    row_quantile25,
-    row_quantile75,
-    row_slope,
-    row_std,
-)
-from tsml.utils.validation import check_n_jobs, is_transformer
-
-
-class RandomIntervalTransformer(TransformerMixin, BaseTimeSeriesEstimator):
-    """Random interval feature transformer.
-
-    Extracts intervals with random length, position and dimension from series in fit.
-    Transforms each interval sub-series using the given transformer(s)/features and
-    concatenates them into a feature vector in transform.
-
-    Identical intervals are pruned at the end of fit, as such the number of features may
-    be less than expected from n_intervals.
-
-    Parameters
-    ----------
-    n_intervals : int or callable, default=100,
-        The number of intervals of random length, position and dimension to be
-        extracted.  Input should be an int or a function that takes a 3D np.ndarray
-        input and returns an int.
-    min_interval_length : int, default=3
-        The minimum length of extracted intervals. Minimum value of 3.
-    max_interval_length : int, default=3
-        The maximum length of extracted intervals. Minimum value of min_interval_length.
-    features : TransformerMixin, a function taking a 2d numpy array parameter, or list
-            of said transformers and functions, default=None
-        Transformers and functions used to extract features from selected intervals.
-        If None, defaults to [mean, median, min, max, std, 25% quantile, 75% quantile]
-    dilation : int, list or None, default=None
-        Add dilation to extracted intervals. No dilation is added if None or 1. If a
-        list of ints, a random dilation value is selected from the list for each
-        interval.
-
-        The dilation value is selected after the interval star and end points. If the
-        amount of values in the dilated interval is less than the min_interval_length,
-        the amount of dilation applied is reduced.
-    random_state : None, int or instance of RandomState, default=None
-        Seed or RandomState object used for random number generation.
-        If random_state is None, use the RandomState singleton used by np.random.
-        If random_state is an int, use a new RandomState instance seeded with seed.
-    n_jobs : int, default=1
-        The number of jobs to run in parallel for both `fit` and `transform` functions.
-        `-1` means using all processors.
-    parallel_backend : str, ParallelBackendBase instance or None, default=None
-        Specify the parallelisation backend implementation in joblib, if None a 'prefer'
-        value of "threads" is used by default.
-        Valid options are "loky", "multiprocessing", "threading" or a custom backend.
-        See the joblib Parallel documentation for more details.
-
-    Attributes
-    ----------
-    n_instances_ : int
-        The number of train cases.
-    n_dims_ : int
-        The number of dimensions per case.
-    series_length_ : int
-        The length of each series.
-    n_intervals_ : int
-        The number of intervals extracted after pruning identical intervals.
-    intervals_ : list of tuples
-        Contains information for each feature extracted in fit. Each tuple contains the
-        interval start, interval end, interval dimension, the feature(s) extracted and
-        the dilation.
-        Length will be n_intervals*len(features).
-
-    See Also
-    --------
-    SupervisedIntervalTransformer
-    FixedIntervalTransformer
-
-    Examples
-    --------
-    >>> from tsml.transformations import RandomIntervalTransformer
-    >>> from tsml.utils.testing import generate_3d_test_data
-    >>> X, _ = generate_3d_test_data(n_samples=4, series_length=12, random_state=0)
-    >>> tnf = RandomIntervalTransformer(n_intervals=2, random_state=0)
-    >>> tnf.fit(X)
-    RandomIntervalTransformer(...)
-    >>> print(tnf.transform(X)[0])
-    [1.04753424 0.14925939 0.8473096  1.20552675 1.08976637 0.96853798
-     1.14764656 1.07628806 0.18170775 0.8473096  1.29178823 1.08976637
-     0.96853798 1.1907773 ]
-    """
-
-    def __init__(
-        self,
-        n_intervals=100,
-        min_interval_length=3,
-        max_interval_length=np.inf,
-        features=None,
-        dilation=None,
-        random_state=None,
-        n_jobs=1,
-        parallel_backend=None,
-    ):
-        self.n_intervals = n_intervals
-        self.min_interval_length = min_interval_length
-        self.max_interval_length = max_interval_length
-        self.features = features
-        self.dilation = dilation
-        self.random_state = random_state
-        self.n_jobs = n_jobs
-        self.parallel_backend = parallel_backend
-
-        super().__init__()
-
-    transformer_feature_skip = ["transform_features_", "_transform_features"]
-
-    def fit_transform(
-        self, X: Union[np.ndarray, List[np.ndarray]], y: Union[np.ndarray, None] = None
-    ) -> np.ndarray:
-        """Fit the transformer to training data and return transformed data.
-
-        Parameters
-        ----------
-        X : 3D np.ndarray of shape (n_instances, n_channels, n_timepoints)
-            The training data.
-        y : 1D np.ndarray of shape (n_instances)
-            The class labels for fitting, indices correspond to instance indices in X
-
-        Returns
-        -------
-        X_t : 2D np.ndarray of shape (n_instances, n_features)
-            Transformed data.
-        """
-        X, rng = self._fit_setup(X)
-
-        fit = Parallel(
-            n_jobs=self._n_jobs, backend=self.parallel_backend, prefer="threads"
-        )(
-            delayed(self._generate_interval)(
-                X,
-                y,
-                rng.randint(np.iinfo(np.int32).max),
-                True,
-            )
-            for _ in range(self._n_intervals)
-        )
-
-        (
-            intervals,
-            transformed_intervals,
-        ) = zip(*fit)
-
-        current = []
-        removed_idx = []
-        self.n_intervals_ = 0
-        for i, interval in enumerate(intervals):
-            new_interval = (
-                interval[0][0],
-                interval[0][1],
-                interval[0][2],
-                interval[0][4],
-            )
-            if new_interval not in current:
-                current.append(new_interval)
-                self.intervals_.extend(interval)
-                self.n_intervals_ += 1
-            else:
-                removed_idx.append(i)
-
-        Xt = transformed_intervals[0]
-        for i in range(1, self._n_intervals):
-            if i not in removed_idx:
-                Xt = np.hstack((Xt, transformed_intervals[i]))
-
-        return Xt
-
-    def fit(
-        self, X: Union[np.ndarray, List[np.ndarray]], y: Union[np.ndarray, None] = None
-    ) -> object:
-        """Fit the transformer to training data.
-
-        Parameters
-        ----------
-        X : 3D np.ndarray of shape (n_instances, n_channels, n_timepoints)
-            The training data.
-        y : 1D np.ndarray of shape (n_instances)
-            The class labels for fitting, indices correspond to instance indices in X
-
-        Returns
-        -------
-        self :
-            Reference to self.
-        """
-        X, rng = self._fit_setup(X)
-
-        fit = Parallel(
-            n_jobs=self._n_jobs, backend=self.parallel_backend, prefer="threads"
-        )(
-            delayed(self._generate_interval)(
-                X,
-                y,
-                rng.randint(np.iinfo(np.int32).max),
-                False,
-            )
-            for _ in range(self.n_intervals)
-        )
-
-        (
-            intervals,
-            _,
-        ) = zip(*fit)
-
-        current = []
-        self.n_intervals_ = 0
-        for i in intervals:
-            interval = (i[0][0], i[0][1], i[0][2], i[0][4])
-            if interval not in current:
-                current.append(interval)
-                self.intervals_.extend(i)
-                self.n_intervals_ += 1
-
-        return self
-
-    def transform(
-        self, X: Union[np.ndarray, List[np.ndarray]], y: Union[np.ndarray, None] = None
-    ) -> np.ndarray:
-        """Transform input cases in X.
-
-        Parameters
-        ----------
-        X : 3D np.ndarray of shape (n_instances, n_channels, n_timepoints)
-            The training data.
-        y : 1D np.ndarray of shape (n_instances)
-            The class labels for fitting, indices correspond to instance indices in X
-
-        Returns
-        -------
-        X_t : 2D np.ndarray of shape (n_instances, n_features)
-            Transformed data.
-        """
-        check_is_fitted(self)
-
-        X = self._validate_data(X=X, reset=False, ensure_min_series_length=3)
-
-        if self._transform_features is None:
-            transform_features = [None] * len(self.intervals_)
-        else:
-            count = 0
-            transform_features = []
-            for _ in range(self.n_intervals_):
-                for feature in self._features:
-                    if is_transformer(feature):
-                        nf = feature.n_transformed_features
-                        transform_features.append(
-                            self._transform_features[count : count + nf]
-                        )
-                        count += nf
-                    else:
-                        transform_features.append(self._transform_features[count])
-                        count += 1
-
-        transform = Parallel(
-            n_jobs=self._n_jobs, backend=self.parallel_backend, prefer="threads"
-        )(
-            delayed(self._transform_interval)(
-                X,
-                i,
-                transform_features[i],
-            )
-            for i in range(len(self.intervals_))
-        )
-
-        Xt = transform[0]
-        for i in range(1, len(self.intervals_)):
-            Xt = np.hstack((Xt, transform[i]))
-
-        return Xt
-
-    def _fit_setup(self, X):
-        X = self._validate_data(X=X, ensure_min_series_length=3)
-        X = self._convert_X(X)
-
-        self.intervals_ = []
-        self._transform_features = None
-
-        self.n_instances_, self.n_dims_, self.series_length_ = X.shape
-
-        if callable(self.n_intervals):
-            self._n_intervals = self.n_intervals(X)
-        else:
-            self._n_intervals = self.n_intervals
-
-        self._min_interval_length = self.min_interval_length
-        if self.min_interval_length < 3:
-            self._min_interval_length = 3
-
-        self._max_interval_length = self.max_interval_length
-        if self.max_interval_length < self._min_interval_length:
-            self._max_interval_length = self._min_interval_length
-        elif self.max_interval_length > self.series_length_:
-            self._max_interval_length = self.series_length_
-
-        self._features = self.features
-        if self.features is None:
-            self._features = [
-                row_mean,
-                row_std,
-                row_numba_min,
-                row_numba_max,
-                row_median,
-                row_quantile25,
-                row_quantile75,
-            ]
-        elif not isinstance(self.features, list):
-            self._features = [self.features]
-
-        li = []
-        for feature in self._features:
-            if is_transformer(feature):
-                li.append(
-                    _clone_estimator(
-                        feature,
-                        self.random_state,
-                    )
-                )
-            elif callable(feature):
-                li.append(feature)
-            else:
-                raise ValueError(
-                    "Input features must be a list of callables or aeon transformers."
-                )
-        self._features = li
-
-        if self.dilation is None:
-            self._dilation = [1]
-        elif isinstance(self.dilation, list):
-            self._dilation = self.dilation
-        else:
-            self._dilation = [self.dilation]
-
-        self._n_jobs = check_n_jobs(self.n_jobs)
-
-        rng = check_random_state(self.random_state)
-
-        return X, rng
-
-    def _generate_interval(self, X, y, seed, transform):
-        rng = check_random_state(seed)
-
-        dim = rng.randint(self.n_dims_)
-
-        if rng.random() < 0.5:
-            interval_start = (
-                rng.randint(0, self.series_length_ - self._min_interval_length)
-                if self.series_length_ > self._min_interval_length
-                else 0
-            )
-            len_range = min(
-                self.series_length_ - interval_start,
-                self._max_interval_length,
-            )
-            length = (
-                rng.randint(0, len_range - self._min_interval_length)
-                + self._min_interval_length
-                if len_range > self._min_interval_length
-                else self._min_interval_length
-            )
-            interval_end = interval_start + length
-        else:
-            interval_end = (
-                rng.randint(0, self.series_length_ - self._min_interval_length)
-                + self._min_interval_length
-                if self.series_length_ > self._min_interval_length
-                else self._min_interval_length
-            )
-            len_range = min(interval_end, self._max_interval_length)
-            length = (
-                rng.randint(0, len_range - self._min_interval_length)
-                + self._min_interval_length
-                if len_range > self._min_interval_length
-                else self._min_interval_length
-            )
-            interval_start = interval_end - length
-
-        interval_length = interval_end - interval_start
-        dilation = rng.choice(self._dilation)
-        while interval_length / dilation < self._min_interval_length:
-            dilation -= 1
-
-        Xt = np.empty((self.n_instances_, 0)) if transform else None
-        intervals = []
-
-        for feature in self._features:
-            if is_transformer(feature):
-                if transform:
-                    feature = _clone_estimator(
-                        feature,
-                        seed,
-                    )
-
-                    t = feature.fit_transform(
-                        np.expand_dims(
-                            X[:, dim, interval_start:interval_end:dilation], axis=1
-                        ),
-                        y,
-                    )
-
-                    if t.ndim == 3 and t.shape[1] == 1:
-                        t = t.reshape((t.shape[0], t.shape[2]))
-
-                    Xt = np.hstack((Xt, t))
-                else:
-                    feature.fit(
-                        np.expand_dims(
-                            X[:, dim, interval_start:interval_end:dilation], axis=1
-                        ),
-                        y,
-                    )
-            elif transform:
-                t = [
-                    [f]
-                    for f in feature(X[:, dim, interval_start:interval_end:dilation])
-                ]
-                Xt = np.hstack((Xt, t))
-
-            intervals.append((interval_start, interval_end, dim, feature, dilation))
-
-        return intervals, Xt
-
-    def _transform_interval(self, X, idx, keep_transform):
-        interval_start, interval_end, dim, feature, dilation = self.intervals_[idx]
-
-        if keep_transform is not None:
-            if is_transformer(feature):
-                for n in self.transformer_feature_skip:
-                    if hasattr(feature, n):
-                        setattr(feature, n, keep_transform)
-                        break
-            elif not keep_transform:
-                return [[0] for _ in range(X.shape[0])]
-
-        if is_transformer(feature):
-            Xt = feature.transform(
-                np.expand_dims(X[:, dim, interval_start:interval_end:dilation], axis=1)
-            )
-
-            if Xt.ndim == 3:
-                Xt = Xt.reshape((Xt.shape[0], Xt.shape[2]))
-        else:
-            Xt = [[f] for f in feature(X[:, dim, interval_start:interval_end:dilation])]
-
-        return Xt
-
-    def set_features_to_transform(self, arr, raise_error=True):
-        """Set transform_features to the given array.
-
-        Each index in the list corresponds to the index of an interval, True intervals
-        are included in the transform, False intervals skipped and are set to 0.
-
-        If any transformers are in features, they must also have a "transform_features"
-        or "_transform_features" attribute as well as a "n_transformed_features"
-        attribute. The input array should contain an item for each of the transformers
-        "n_transformed_features" output features.
-
-        Parameters
-        ----------
-        arr : list of bools
-             A list of intervals to skip.
-        raise_error : bool, default=True
-             Whether to raise and error or return None if input or transformers are
-             invalid.
-
-        Returns
-        -------
-        completed: bool
-            Whether the operation was successful.
-        """
-        length = 0
-        for feature in self._features:
-            if is_transformer(feature):
-                if not any(
-                    hasattr(feature, n) for n in self.transformer_feature_skip
-                ) or not hasattr(feature, "n_transformed_features"):
-                    if raise_error:
-                        raise ValueError(
-                            "Transformer must have one of "
-                            f"{self.transformer_feature_skip} as an attribute and "
-                            "a n_transformed_features attribute."
-                        )
-                    else:
-                        return False
-
-                length += feature.n_transformed_features
-            else:
-                length += 1
-
-        if len(arr) != length * self.n_intervals_ or not all(
-            isinstance(b, bool) for b in arr
-        ):
-            if raise_error:
-                raise ValueError(
-                    "Input must be a list bools, matching the length of the transform "
-                    "output."
-                )
-            else:
-                return False
-
-        self._transform_features = arr
-
-        return True
-
-    @classmethod
-    def get_test_params(cls, parameter_set="default"):
-        """Return testing parameter settings for the estimator.
-
-        Parameters
-        ----------
-        parameter_set : str, default="default"
-            Name of the set of test parameters to return, for use in tests. If no
-            special parameters are defined for a value, will return `"default"` set.
-
-        Returns
-        -------
-        params : dict or list of dict, default = {}
-            Parameters to create testing instances of the class
-            Each dict are parameters to construct an "interesting" test instance, i.e.,
-            `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
-            `create_test_instance` uses the first (or only) dictionary in `params`
-        """
-        return {"n_intervals": 2}
-
-
-class SupervisedIntervalTransformer(TransformerMixin, BaseTimeSeriesEstimator):
-    """Supervised interval feature transformer.
-
-    Extracts intervals in fit using the supervised process described in [1].
-    Interval subseries are extracted for each input feature, and the usefulness of that
-    feature extracted on an interval is evaluated using the Fisher score metric.
-    Intervals are continually split in half, with the better scoring half retained as a
-    feature for the transform.
-
-    Multivariate capability is added by running the supervised interval extraction
-    process on each dimension of the input data.
-
-    As the interval features are already extracted for the supervised
-    evaluation in fit, the fit_transform method is recommended if the transformed fit
-    data is required.
-
-    Parameters
-    ----------
-    n_intervals : int, default=50
-        The number of times the supervised interval selection process is run.
-        Each supervised extraction will output a varying amount of features based on
-        series length, number of dimensions and the number of features.
-    min_interval_length : int, default=3
-        The minimum length of extracted intervals. Minimum value of 3.
-    features : callable, list of callables, default=None
-        Functions used to extract features from selected intervals. Must take a 2d
-        array of shape (n_instances, interval_length) and return a 1d array of shape
-        (n_instances) containing the features.
-        If None, defaults to the following statistics used in [2]:
-        [mean, median, std, slope, min, max, iqr, count_mean_crossing,
-        count_above_mean].
-    metric : ["fisher"] or callable, default="fisher"
-        The metric used to evaluate the usefulness of a feature extracted on an
-        interval. If "fisher", the Fisher score is used. If a callable, it must take
-        a 1d array of shape (n_instances) and return a 1d array of scores of shape
-        (n_instances).
-    randomised_split_point : bool, default=True
-        If True, the split point for interval extraction is randomised as is done in [2]
-        rather than split in half.
-    normalise_for_search : bool, default=True
-        If True, the data is normalised for the supervised interval search process.
-        Features extracted for the transform output will not use normalised data.
-    random_state : None, int or instance of RandomState, default=None
-        Seed or RandomState object used for random number generation.
-        If random_state is None, use the RandomState singleton used by np.random.
-        If random_state is an int, use a new RandomState instance seeded with seed.
-    n_jobs : int, default=1
-        The number of jobs to run in parallel for both `fit` and `transform` functions.
-        `-1` means using all processors.
-    parallel_backend : str, ParallelBackendBase instance or None, default=None
-        Specify the parallelisation backend implementation in joblib, if None a 'prefer'
-        value of "threads" is used by default.
-        Valid options are "loky", "multiprocessing", "threading" or a custom backend.
-        See the joblib Parallel documentation for more details.
-
-    Attributes
-    ----------
-    n_instances_ : int
-        The number of train cases.
-    n_dims_ : int
-        The number of dimensions per case.
-    series_length_ : int
-        The length of each series.
-    intervals_ : list of tuples
-        Contains information for each feature extracted in fit. Each tuple contains the
-        interval start, interval end, interval dimension and the feature extracted.
-        Length will be the same as the amount of transformed features.
-
-    See Also
-    --------
-    RandomIntervalTransformer
-    FixedIntervalTransformer
-
-    Notes
-    -----
-    Based on the authors (stevcabello) code: https://github.com/stevcabello/r-STSF/
-
-    References
-    ----------
-    .. [1] Cabello, N., Naghizade, E., Qi, J. and Kulik, L., 2020, November. Fast and
-        accurate time series classification through supervised interval search. In 2020
-        IEEE International Conference on Data Mining (ICDM) (pp. 948-953). IEEE.
-    .. [2] Cabello, N., Naghizade, E., Qi, J. and Kulik, L., 2021. Fast, accurate and
-        interpretable time series classification through randomization. arXiv preprint
-        arXiv:2105.14876.
-
-    Examples
-    --------
-    >>> from tsml.transformations import SupervisedIntervalTransformer
-    >>> from tsml.utils.testing import generate_3d_test_data
-    >>> X, y = generate_3d_test_data(n_samples=10, series_length=12, random_state=0)
-    >>> tnf = SupervisedIntervalTransformer(n_intervals=1, random_state=0)
-    >>> tnf.fit(X, y)
-    SupervisedIntervalTransformer(...)
-    >>> print(tnf.transform(X)[0])
-    [1.4237989  1.20552675 0.45060352 0.13125638 0.10101093 0.76688304
-     1.92732552 0.54651945 3.         2.        ]
-    """
-
-    def __init__(
-        self,
-        n_intervals=50,
-        min_interval_length=3,
-        features=None,
-        metric="fisher",
-        randomised_split_point=True,
-        normalise_for_search=True,
-        random_state=None,
-        n_jobs=1,
-        parallel_backend=None,
-    ):
-        self.n_intervals = n_intervals
-        self.min_interval_length = min_interval_length
-        self.features = features
-        self.metric = metric
-        self.randomised_split_point = randomised_split_point
-        self.normalise_for_search = normalise_for_search
-        self.random_state = random_state
-        self.n_jobs = n_jobs
-        self.parallel_backend = parallel_backend
-
-        super().__init__()
-
-    # if features contains a transformer, it must contain a parameter name from
-    # transformer_feature_selection and an attribute name (or property) from
-    # transformer_feature_names to allow a single feature to be transformed at a time.
-    transformer_feature_selection = ["features"]
-    transformer_feature_names = [
-        "features_arguments_",
-        "_features_arguments",
-        "get_features_arguments",
-        "_get_features_arguments",
-    ]
-
-    def fit_transform(
-        self, X: Union[np.ndarray, List[np.ndarray]], y: np.ndarray
-    ) -> np.ndarray:
-        """Fit the transformer to training data and return transformed data.
-
-        Parameters
-        ----------
-        X : 3D np.ndarray of shape (n_instances, n_channels, n_timepoints)
-            The training data.
-        y : 1D np.ndarray of shape (n_instances)
-            The class labels for fitting, indices correspond to instance indices in X
-
-        Returns
-        -------
-        X_t : 2D np.ndarray of shape (n_instances, n_features)
-            Transformed data.
-        """
-        X, y, rng = self._fit_setup(X, y)
-
-        X_norm = z_normalise_series_3d(X) if self.normalise_for_search else X
-
-        fit = Parallel(
-            n_jobs=self._n_jobs, backend=self.parallel_backend, prefer="threads"
-        )(
-            delayed(self._generate_intervals)(
-                X,
-                X_norm,
-                y,
-                rng.randint(np.iinfo(np.int32).max),
-                True,
-            )
-            for _ in range(self.n_intervals)
-        )
-
-        (
-            intervals,
-            transformed_intervals,
-        ) = zip(*fit)
-
-        for i in intervals:
-            self.intervals_.extend(i)
-
-        self._transform_features = [True] * len(self.intervals_)
-
-        Xt = transformed_intervals[0]
-        for i in range(1, self.n_intervals):
-            Xt = np.hstack((Xt, transformed_intervals[i]))
-
-        return Xt
-
-    def fit(self, X: Union[np.ndarray, List[np.ndarray]], y: np.ndarray) -> object:
-        """Fit the transformer to training data.
-
-        Parameters
-        ----------
-        X : 3D np.ndarray of shape (n_instances, n_channels, n_timepoints)
-            The training data.
-        y : 1D np.ndarray of shape (n_instances)
-            The class labels for fitting, indices correspond to instance indices in X
-
-        Returns
-        -------
-        self :
-            Reference to self.
-        """
-        X, y, rng = self._fit_setup(X, y)
-
-        X_norm = z_normalise_series_3d(X) if self.normalise_for_search else X
-
-        fit = Parallel(
-            n_jobs=self._n_jobs, backend=self.parallel_backend, prefer="threads"
-        )(
-            delayed(self._generate_intervals)(
-                X,
-                X_norm,
-                y,
-                rng.randint(np.iinfo(np.int32).max),
-                False,
-            )
-            for _ in range(self.n_intervals)
-        )
-
-        (
-            intervals,
-            _,
-        ) = zip(*fit)
-
-        for i in intervals:
-            self.intervals_.extend(i)
-
-        self._transform_features = [True] * len(self.intervals_)
-
-        return self
-
-    def transform(
-        self, X: Union[np.ndarray, List[np.ndarray]], y: Union[np.ndarray, None] = None
-    ) -> np.ndarray:
-        """Transform input cases in X.
-
-        Parameters
-        ----------
-        X : 3D np.ndarray of shape (n_instances, n_channels, n_timepoints)
-            The training data.
-        y : 1D np.ndarray of shape (n_instances)
-            The class labels for fitting, indices correspond to instance indices in X
-
-        Returns
-        -------
-        X_t : 2D np.ndarray of shape (n_instances, n_features)
-            Transformed data.
-        """
-        check_is_fitted(self)
-
-        X = self._validate_data(X=X, reset=False, ensure_min_series_length=7)
-
-        transform = Parallel(
-            n_jobs=self._n_jobs, backend=self.parallel_backend, prefer="threads"
-        )(
-            delayed(self._transform_intervals)(
-                X,
-                i,
-            )
-            for i in range(len(self.intervals_))
-        )
-
-        Xt = np.zeros((X.shape[0], len(transform)))
-        for i, t in enumerate(transform):
-            Xt[:, i] = t
-
-        return Xt
-
-    def _fit_setup(self, X, y):
-        X, y = self._validate_data(
-            X=X, y=y, ensure_min_samples=2, ensure_min_series_length=5
-        )
-        X = self._convert_X(X)
-
-        self.intervals_ = []
-
-        self.n_instances_, self.n_dims_, self.series_length_ = X.shape
-
-        if self.n_instances_ <= 1:
-            raise ValueError(
-                "Supervised intervals requires more than 1 training time series."
-            )
-
-        self._min_interval_length = self.min_interval_length
-        if self.min_interval_length < 3:
-            self._min_interval_length = 3
-
-        if self._min_interval_length * 2 + 1 > self.series_length_:
-            raise ValueError(
-                "Minimum interval length must be less than half the series length."
-            )
-
-        self._features = self.features
-        if self.features is None:
-            self._features = [
-                row_mean,
-                row_median,
-                row_std,
-                row_slope,
-                row_numba_min,
-                row_numba_max,
-                row_iqr,
-                row_count_mean_crossing,
-                row_count_above_mean,
-            ]
-
-        if not isinstance(self._features, list):
-            self._features = [self._features]
-
-        rng = check_random_state(self.random_state)
-
-        msg = (
-            "Transformers must have a parameter from 'transformer_feature_names' to "
-            "allow selecting single features, and a list of feature names in "
-            "'transformer_feature_names'. Transformers which require 'fit' are "
-            "currently unsupported."
-        )
-
-        li = []
-        for f in self._features:
-            if callable(f):
-                li.append(f)
-            elif is_transformer(f):
-                if _safe_tags(f, key="requires_fit") is True:
-                    raise ValueError(msg)
-
-                params = inspect.signature(f.__init__).parameters
-
-                att_name = None
-                for n in self.transformer_feature_selection:
-                    if params.get(n, None) is not None:
-                        att_name = n
-                        break
-
-                if att_name is None:
-                    raise ValueError(msg)
-
-                t_features = None
-                for n in self.transformer_feature_names:
-                    if hasattr(f, n) and isinstance(getattr(f, n), (list, tuple)):
-                        t_features = getattr(f, n)
-                        break
-
-                if t_features is None:
-                    raise ValueError(msg)
-
-                for t_f in t_features:
-                    new_transformer = _clone_estimator(f, rng)
-                    setattr(
-                        new_transformer,
-                        att_name,
-                        t_f,
-                    )
-                    li.append(new_transformer)
-            else:
-                raise ValueError()
-        self._features = li
-
-        if callable(self.metric):
-            self._metric = self.metric
-        elif self.metric == "fisher":
-            self._metric = fisher_score
-        else:
-            raise ValueError("metric must be callable or 'fisher'")
-
-        self._n_jobs = check_n_jobs(self.n_jobs)
-
-        le = preprocessing.LabelEncoder()
-        return X, le.fit_transform(y), rng
-
-    def _generate_intervals(self, X, X_norm, y, seed, keep_transform):
-        rng = check_random_state(seed)
-
-        Xt = np.empty((self.n_instances_, 0)) if keep_transform else None
-        intervals = []
-
-        for i in range(self.n_dims_):
-            for feature in self._features:
-                random_cut_point = int(rng.randint(1, self.series_length_ - 1))
-                while (
-                    self.series_length_ - random_cut_point
-                    < self._min_interval_length * 2
-                    and self.series_length_ - (self.series_length_ - random_cut_point)
-                    < self._min_interval_length * 2
-                ):
-                    random_cut_point = int(rng.randint(1, self.series_length_ - 1))
-
-                intervals_L, Xt_L = self._supervised_search(
-                    X_norm[:, i, :random_cut_point],
-                    y,
-                    0,
-                    feature,
-                    i,
-                    X[:, i, :],
-                    rng,
-                    keep_transform,
-                    is_transformer(feature),
-                )
-                intervals.extend(intervals_L)
-
-                if keep_transform:
-                    Xt = np.hstack((Xt, Xt_L))
-
-                intervals_R, Xt_R = self._supervised_search(
-                    X_norm[:, i, random_cut_point:],
-                    y,
-                    random_cut_point,
-                    feature,
-                    i,
-                    X[:, i, :],
-                    rng,
-                    keep_transform,
-                    is_transformer(feature),
-                )
-                intervals.extend(intervals_R)
-
-                if keep_transform:
-                    Xt = np.hstack((Xt, Xt_R))
-
-        return intervals, Xt
-
-    def _transform_intervals(self, X, idx):
-        if not self._transform_features[idx]:
-            return np.zeros(X.shape[0])
-
-        start, end, dim, feature = self.intervals_[idx]
-
-        if is_transformer(feature):
-            return feature.transform(X[:, dim, start:end]).flatten()
-        else:
-            return feature(X[:, dim, start:end])
-
-    def _supervised_search(
-        self,
-        X,
-        y,
-        ini_idx,
-        feature,
-        dim,
-        X_ori,
-        rng,
-        keep_transform,
-        feature_is_transformer,
-    ):
-        intervals = []
-        Xt = np.empty((X.shape[0], 0)) if keep_transform else None
-
-        while X.shape[1] >= self._min_interval_length * 2:
-            if (
-                self.randomised_split_point
-                and X.shape[1] != self._min_interval_length * 2
-            ):
-                div_point = rng.randint(
-                    self._min_interval_length, X.shape[1] - self._min_interval_length
-                )
-            else:
-                div_point = int(X.shape[1] / 2)
-
-            sub_interval_0 = X[:, :div_point]
-            sub_interval_1 = X[:, div_point:]
-
-            if feature_is_transformer:
-                interval_feature_0 = feature.transform(sub_interval_0).flatten()
-                interval_feature_1 = feature.transform(sub_interval_1).flatten()
-            else:
-                interval_feature_0 = feature(sub_interval_0)
-                interval_feature_1 = feature(sub_interval_1)
-
-            score_0 = self._metric(interval_feature_0, y)
-            score_1 = self._metric(interval_feature_1, y)
-
-            if score_0 >= score_1 and score_0 != 0:
-                end = ini_idx + len(sub_interval_0[0])
-
-                intervals.append((ini_idx, end, dim, feature))
-                X = sub_interval_0
-
-                if keep_transform:
-                    if self.normalise_for_search:
-                        if feature_is_transformer:
-                            interval_feature_to_use = feature.transform(
-                                X_ori[:, ini_idx:end]
-                            ).flatten()
-                        else:
-                            interval_feature_to_use = feature(X_ori[:, ini_idx:end])
-                    else:
-                        interval_feature_to_use = interval_feature_0
-
-                    Xt = np.hstack(
-                        (
-                            Xt,
-                            np.reshape(
-                                interval_feature_to_use,
-                                (interval_feature_to_use.shape[0], 1),
-                            ),
-                        )
-                    )
-            elif score_1 > score_0:
-                ini_idx = ini_idx + div_point
-                end = ini_idx + len(sub_interval_1[0])
-
-                intervals.append((ini_idx, end, dim, feature))
-                X = sub_interval_1
-
-                if keep_transform:
-                    if self.normalise_for_search:
-                        if feature_is_transformer:
-                            interval_feature_to_use = feature.transform(
-                                X_ori[:, ini_idx:end]
-                            ).flatten()
-                        else:
-                            interval_feature_to_use = feature(X_ori[:, ini_idx:end])
-                    else:
-                        interval_feature_to_use = interval_feature_1
-
-                    Xt = np.hstack(
-                        (
-                            Xt,
-                            np.reshape(
-                                interval_feature_to_use,
-                                (interval_feature_to_use.shape[0], 1),
-                            ),
-                        )
-                    )
-            else:
-                break
-
-        return intervals, Xt
-
-    def set_features_to_transform(self, arr, raise_error=True):
-        """Set transform_features to the given array.
-
-        Each index in the list corresponds to the index of an interval, True intervals
-        are included in the transform, False intervals skipped and are set to 0.
-
-        Parameters
-        ----------
-        arr : list of bools
-             A list of intervals to skip.
-        raise_error : bool, default=True
-             Whether to raise and error or return None if input is invalid.
-
-        Returns
-        -------
-        completed: bool
-            Whether the operation was successful.
-        """
-        if len(arr) != len(self.intervals_) or not all(
-            isinstance(b, bool) for b in arr
-        ):
-            if raise_error:
-                raise ValueError(
-                    "Input must be a list bools of length len(intervals_)."
-                )
-            else:
-                return False
-
-        self._transform_features = arr
-
-        return True
-
-    def _more_tags(self) -> dict:
-        return {"requires_y": True}
-
-    @classmethod
-    def get_test_params(cls, parameter_set="default"):
-        """Return testing parameter settings for the estimator.
-
-        Parameters
-        ----------
-        parameter_set : str, default="default"
-            Name of the set of test parameters to return, for use in tests. If no
-            special parameters are defined for a value, will return `"default"` set.
-
-        Returns
-        -------
-        params : dict or list of dict, default = {}
-            Parameters to create testing instances of the class
-            Each dict are parameters to construct an "interesting" test instance, i.e.,
-            `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
-            `create_test_instance` uses the first (or only) dictionary in `params`
-        """
-        return {
-            "n_intervals": 1,
-            "randomised_split_point": False,
-        }
-
-
-# class FixedIntervalTransformer(TransformerMixin, BaseTimeSeriesEstimator):
-#     """Fixed interval feature transformer.
-#
-#     Extracts features using a fixed set of intervals, contiunually halving the interval
-#     length until the given depth is reached.
-#     Transforms each interval sub-series using the given transformer(s)/features and
-#     concatenates them into a feature vector in transform.
-#
-#     Parameters
-#     ----------
-#     n_intervals : int or callable, default=4,
-#         The depth to extract intervals from, with the total number of intervals
-#         extracted increasing exponentially with depth. i.e. if n_intervals=3, 1 interval
-#         will be extracted from the whole series, 2 from both halves and 4 from the
-#         four quartiles of the series for 7 total intervals. As the number of intervals
-#         extracted doubles per layer, the length of each interval extracted halves.
-#     shifted_intervals : bool, default=True
-#         Whether to include additional intervals per layer by shifting the layer
-#         intervals to the right by 1/2 the interval length for each depth past 1.
-#         This effectively doubles the number of intervals extracted per layer (minus 1)
-#         by including overlapping intervals.
-#     min_interval_length : int, default=2
-#         The minimum length of extracted intervals. Minimum value of 2.
-#     features : TransformerMixin, a function taking a 2d numpy array parameter, or list
-#             of said transformers and functions, default=None
-#         Transformers and functions used to extract features from selected intervals.
-#         If None, defaults to [QuantileTransformer,
-#         QuantileTransformer(subtract_mean=True)].
-#     random_state : None, int or instance of RandomState, default=None
-#         Seed or RandomState object used for random number generation.
-#         If random_state is None, use the RandomState singleton used by np.random.
-#         If random_state is an int, use a new RandomState instance seeded with seed.
-#     n_jobs : int, default=1
-#         The number of jobs to run in parallel for both `fit` and `transform` functions.
-#         `-1` means using all processors.
-#     parallel_backend : str, ParallelBackendBase instance or None, default=None
-#         Specify the parallelisation backend implementation in joblib, if None a 'prefer'
-#         value of "threads" is used by default.
-#         Valid options are "loky", "multiprocessing", "threading" or a custom backend.
-#         See the joblib Parallel documentation for more details.
-#
-#     Attributes
-#     ----------
-#     n_instances_ : int
-#         The number of train cases.
-#     n_dims_ : int
-#         The number of dimensions per case.
-#     series_length_ : int
-#         The length of each series.
-#     n_intervals_ : int
-#         The number of intervals extracted after pruning identical intervals.
-#     intervals_ : list of tuples
-#         Contains information for each feature extracted in fit. Each tuple contains the
-#         interval start, interval end, interval dimension, the feature(s) extracted and
-#         the dilation.
-#         Length will be n_intervals*len(features).
-#
-#     See Also
-#     --------
-#     RandomIntervalTransformer
-#     SupervisedIntervalTransformer
-#
-#     Examples
-#     --------
-#     >>> from tsml.transformations import FixedIntervalTransformer
-#     >>> from tsml.utils.testing import generate_3d_test_data
-#     >>> X, _ = generate_3d_test_data(n_samples=4, series_length=12, random_state=0)
-#     >>> tnf = FixedIntervalTransformer(n_intervals=2, random_state=0)
-#     >>> tnf.fit(X)
-#     FixedIntervalTransformer(...)
-#     >>> print(tnf.transform(X)[0])
-#     [1.04753424 0.14925939 0.8473096  1.20552675 1.08976637 0.96853798
-#      1.14764656 1.07628806 0.18170775 0.8473096  1.29178823 1.08976637
-#      0.96853798 1.1907773 ]
-#     """
-#
-#     def __init__(
-#         self,
-#         n_intervals=4,
-#         shifted_intervals=True,
-#         min_interval_length=2,
-#         features=None,
-#         random_state=None,
-#         n_jobs=1,
-#         parallel_backend=None,
-#     ):
-#         self.n_intervals = n_intervals
-#         self.shifted_intervals = shifted_intervals
-#         self.min_interval_length = min_interval_length
-#         self.features = features
-#         self.random_state = random_state
-#         self.n_jobs = n_jobs
-#         self.parallel_backend = parallel_backend
-#
-#         super(FixedIntervalTransformer, self).__init__()
-#
-#     transformer_feature_skip = ["transform_features_", "_transform_features"]
-#
-#     def fit_transform(
-#         self, X: Union[np.ndarray, List[np.ndarray]], y: np.ndarray
-#     ) -> np.ndarray:
-#         """Fit the transformer to training data and return transformed data.
-#
-#         Parameters
-#         ----------
-#         X : 3D np.ndarray of shape (n_instances, n_channels, n_timepoints)
-#             The training data.
-#         y : 1D np.ndarray of shape (n_instances)
-#             The class labels for fitting, indices correspond to instance indices in X
-#
-#         Returns
-#         -------
-#         X_t : 2D np.ndarray of shape (n_instances, n_features)
-#             Transformed data.
-#         """
-#         X = self._fit_setup(X)
-#
-#         fit = Parallel(
-#             n_jobs=self._n_jobs, backend=self.parallel_backend, prefer="threads"
-#         )(
-#             delayed(self._generate_intervals)(
-#                 X,
-#                 y,
-#                 i,
-#                 True,
-#             )
-#             for i in range(self._n_intervals)
-#         )
-#
-#         (
-#             self.intervals_,
-#             Xt,
-#         ) = zip(*fit)
-#
-#         self.n_intervals_ = len(self.intervals_)
-#
-#         return Xt
-#
-#     def fit(self, X: Union[np.ndarray, List[np.ndarray]], y: np.ndarray) -> object:
-#         """Fit the transformer to training data.
-#
-#         Parameters
-#         ----------
-#         X : 3D np.ndarray of shape (n_instances, n_channels, n_timepoints)
-#             The training data.
-#         y : 1D np.ndarray of shape (n_instances)
-#             The class labels for fitting, indices correspond to instance indices in X
-#
-#         Returns
-#         -------
-#         self :
-#             Reference to self.
-#         """
-#         X = self._fit_setup(X)
-#
-#         fit = Parallel(
-#             n_jobs=self._n_jobs, backend=self.parallel_backend, prefer="threads"
-#         )(
-#             delayed(self._generate_intervals)(
-#                 X,
-#                 y,
-#                 i,
-#                 False,
-#             )
-#             for i in range(self.n_intervals)
-#         )
-#
-#         (
-#             self.intervals_,
-#             _,
-#         ) = zip(*fit)
-#
-#         self.n_intervals_ = len(self.intervals_)
-#
-#         return self
-#
-#     def transform(
-#         self, X: Union[np.ndarray, List[np.ndarray]], y: np.ndarray
-#     ) -> np.ndarray:
-#         """Transform input cases in X.
-#
-#         Parameters
-#         ----------
-#         X : 3D np.ndarray of shape (n_instances, n_channels, n_timepoints)
-#             The training data.
-#         y : 1D np.ndarray of shape (n_instances)
-#             The class labels for fitting, indices correspond to instance indices in X
-#
-#         Returns
-#         -------
-#         X_t : 2D np.ndarray of shape (n_instances, n_features)
-#             Transformed data.
-#         """
-#         check_is_fitted(self)
-#
-#         X = self._validate_data(X=X, reset=False, ensure_min_series_length=2)
-#
-#         if self._transform_features is None:
-#             transform_features = [None] * self.n_intervals_
-#         else:
-#             count = 0
-#             transform_features = []
-#             for _ in range(self.n_intervals_):
-#                 for feature in self._features:
-#                     if is_transformer(feature):
-#                         nf = feature.n_transformed_features
-#                         transform_features.append(
-#                             self._transform_features[count : count + nf]
-#                         )
-#                         count += nf
-#                     else:
-#                         transform_features.append(self._transform_features[count])
-#                         count += 1
-#
-#         transform = Parallel(
-#             n_jobs=self._n_jobs, backend=self.parallel_backend, prefer="threads"
-#         )(
-#             delayed(self._transform_interval)(
-#                 X,
-#                 i,
-#                 transform_features[i],
-#             )
-#             for i in range(self.n_intervals_)
-#         )
-#
-#         Xt = transform[0]
-#         for i in range(1, self.n_intervals_):
-#             Xt = np.hstack((Xt, transform[i]))
-#
-#         return Xt
-#
-#     def _fit_setup(self, X):
-#         X = self._validate_data(X=X, ensure_min_series_length=2)
-#         X = self._convert_X(X)
-#
-#         self.intervals_ = []
-#         self._transform_features = None
-#
-#         self.n_instances_, self.n_dims_, self.series_length_ = X.shape
-#
-#         if callable(self.n_intervals):
-#             self._n_intervals = self.n_intervals(X)
-#         else:
-#             self._n_intervals = self.n_intervals
-#
-#         self._min_interval_length = self.min_interval_length
-#         if self.min_interval_length < 2:
-#             self._min_interval_length = 2
-#
-#         self._features = self.features
-#         if self.features is None:
-#             self._features = [
-#                 QuantileTransformer(),
-#                 QuantileTransformer(subtract_mean=True),
-#             ]
-#         elif not isinstance(self.features, list):
-#             self._features = [self.features]
-#
-#         li = []
-#         for feature in self._features:
-#             if is_transformer(feature):
-#                 li.append(
-#                     _clone_estimator(
-#                         feature,
-#                         self.random_state,
-#                     )
-#                 )
-#             elif callable(feature):
-#                 li.append(feature)
-#             else:
-#                 raise ValueError(
-#                     "Input features must be a list of callables or aeon transformers."
-#                 )
-#         self._features = li
-#
-#         self._n_jobs = check_n_jobs(self.n_jobs)
-#
-#         return X
-#
-#     def _generate_intervals(self, X, y, depth, transform):
-#         Xt = np.empty((self.n_instances_, 0)) if transform else None
-#         intervals = []
-#
-#         fixed_points = [];;;;;
-#
-#         for dim in range(self.n_dims_):
-#             for points in fixed_points:
-#                 interval_start, interval_end = points
-#
-#                 for feature in self._features:
-#                     if is_transformer(feature):
-#                         if transform:
-#                             feature = _clone_estimator(
-#                                 feature,
-#                                 self.random_state
-#                             )
-#
-#                             t = feature.fit_transform(
-#                                 np.expand_dims(
-#                                     X[:, dim, interval_start:interval_end], axis=1
-#                                 ),
-#                                 y,
-#                             )
-#
-#                             if t.ndim == 3 and t.shape[1] == 1:
-#                                 t = t.reshape((t.shape[0], t.shape[2]))
-#
-#                             Xt = np.hstack((Xt, t))
-#                         else:
-#                             feature.fit(
-#                                 np.expand_dims(
-#                                     X[:, dim, interval_start:interval_end], axis=1
-#                                 ),
-#                                 y,
-#                             )
-#                     elif transform:
-#                         t = [
-#                             [f]
-#                             for f in feature(X[:, dim, interval_start:interval_end])
-#                         ]
-#                         Xt = np.hstack((Xt, t))
-#
-#                     intervals.append((interval_start, interval_end, dim, feature))
-#
-#             return intervals, Xt
-#
-#     def _transform_interval(self, X, idx, keep_transform):
-#         interval_start, interval_end, dim, feature = self.intervals_[idx]
-#
-#         if keep_transform is not None:
-#             if is_transformer(feature):
-#                 for n in self.transformer_feature_skip:
-#                     if hasattr(feature, n):
-#                         setattr(feature, n, keep_transform)
-#                         break
-#             elif not keep_transform:
-#                 return [[0] for _ in range(X.shape[0])]
-#
-#         if is_transformer(feature):
-#             Xt = feature.transform(
-#                 np.expand_dims(X[:, dim, interval_start:interval_end], axis=1)
-#             )
-#
-#             if Xt.ndim == 3:
-#                 Xt = Xt.reshape((Xt.shape[0], Xt.shape[2]))
-#         else:
-#             Xt = [[f] for f in feature(X[:, dim, interval_start:interval_end])]
-#
-#         return Xt
-#
-#     def set_features_to_transform(self, arr, raise_error=True):
-#         """Set transform_features to the given array.
-#
-#         Each index in the list corresponds to the index of an interval, True intervals
-#         are included in the transform, False intervals skipped and are set to 0.
-#
-#         If any transformers are in features, they must also have a "transform_features"
-#         or "_transform_features" attribute as well as a "n_transformed_features"
-#         attribute. The input array should contain an item for each of the transformers
-#         "n_transformed_features" output features.
-#
-#         Parameters
-#         ----------
-#         arr : list of bools
-#              A list of intervals to skip.
-#         raise_error : bool, default=True
-#              Whether to raise and error or return None if input or transformers are
-#              invalid.
-#
-#         Returns
-#         -------
-#         completed: bool
-#             Whether the operation was successful.
-#         """
-#         length = 0
-#         for feature in self._features:
-#             if is_transformer(feature):
-#                 if not any(
-#                     hasattr(feature, n) for n in self.transformer_feature_skip
-#                 ) or not hasattr(feature, "n_transformed_features"):
-#                     if raise_error:
-#                         raise ValueError(
-#                             "Transformer must have one of "
-#                             f"{self.transformer_feature_skip} as an attribute and "
-#                             "a n_transformed_features attribute."
-#                         )
-#                     else:
-#                         return False
-#
-#                 length += feature.n_transformed_features
-#             else:
-#                 length += 1
-#
-#         if len(arr) != length * self.n_intervals_ or not all(
-#             isinstance(b, bool) for b in arr
-#         ):
-#             if raise_error:
-#                 raise ValueError(
-#                     "Input must be a list bools, matching the length of the transform "
-#                     "output."
-#                 )
-#             else:
-#                 return False
-#
-#         self._transform_features = arr
-#
-#         return True
-#
-#     @classmethod
-#     def get_test_params(cls, parameter_set="default"):
-#         """Return testing parameter settings for the estimator.
-#
-#         Parameters
-#         ----------
-#         parameter_set : str, default="default"
-#             Name of the set of test parameters to return, for use in tests. If no
-#             special parameters are defined for a value, will return `"default"` set.
-#
-#         Returns
-#         -------
-#         params : dict or list of dict, default = {}
-#             Parameters to create testing instances of the class
-#             Each dict are parameters to construct an "interesting" test instance, i.e.,
-#             `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
-#             `create_test_instance` uses the first (or only) dictionary in `params`
-#         """
-#         return {"n_intervals": 2}
diff --git a/tsml/transformations/_periodogram.py b/tsml/transformations/_periodogram.py
deleted file mode 100644
index c230180..0000000
--- a/tsml/transformations/_periodogram.py
+++ /dev/null
@@ -1,121 +0,0 @@
-"""Periodogram transformer."""
-
-__author__ = ["MatthewMiddlehurst"]
-__all__ = ["PeriodogramTransformer"]
-
-import math
-
-import numpy as np
-from sklearn.base import TransformerMixin
-
-from tsml.base import BaseTimeSeriesEstimator
-from tsml.utils.validation import _check_optional_dependency, check_n_jobs
-
-
-class PeriodogramTransformer(TransformerMixin, BaseTimeSeriesEstimator):
-    """Periodogram transformer.
-
-    This transformer converts a time series into its periodogram representation.
-
-    Parameters
-    ----------
-    pad_series : bool, default=True
-        Whether to pad the series to the next power of 2. If False, the series
-        will be used as is.
-    pad_with : str, default="constant"
-        The type of padding to use. see the numpy.pad documentation mode parameter for
-        options. By default, the series will be padded with zeros.
-    constant_value : int, default=0
-        The value to use when padding with a constant value.
-    use_pyfftw : bool, default=False
-        Whether to use the pyfftw library for FFT calculations. Requires the pyfftw
-        package to be installed.
-    n_jobs : int, default=1
-        The number of threads to use for FFT calculations. Only used if use_pyfftw is
-        True.
-
-    Examples
-    --------
-    >>> from tsml.transformations import PeriodogramTransformer
-    >>> from tsml.utils.testing import generate_3d_test_data
-    >>> X, _ = generate_3d_test_data(n_samples=4, n_channels=2, series_length=20,
-    ...                              random_state=0)
-    >>> tnf = PeriodogramTransformer()  # doctest: +SKIP
-    >>> tnf.fit(X)  # doctest: +SKIP
-    PeriodogramTransformer(...)
-    >>> print(tnf.transform(X)[0])  # doctest: +SKIP
-    [[22.16456597 11.08122685  3.69018936  2.17665255  5.27387039  3.10598557
-       6.311107    1.70468284  1.8269671   0.88838033  1.56747869  3.42037058
-       1.67988661  1.71142437  3.49821716  1.25120108]
-     [22.71382067  8.64933688  6.36412194  0.9298486   5.70358068  2.70669743
-       4.33906385  0.36544821  2.28769936  3.67702091  1.45018642  1.26838712
-       3.36395549  2.69146494  2.27041859  3.9023142 ]]
-    """
-
-    def __init__(
-        self,
-        pad_series=True,
-        pad_with="constant",
-        constant_value=0,
-        use_pyfftw=False,
-        n_jobs=1,
-    ):
-        self.use_pyfftw = use_pyfftw
-        self.pad_series = pad_series
-        self.pad_with = pad_with
-        self.constant_value = constant_value
-        self.n_jobs = n_jobs
-
-        if use_pyfftw:
-            _check_optional_dependency("pyfftw", "pyfftw", self)
-
-        super().__init__()
-
-    def fit(self, X, y=None):
-        self._validate_data(X=X)
-        return self
-
-    def transform(self, X, y=None):
-        X = self._validate_data(X=X, reset=False)
-        X = self._convert_X(X)
-
-        threads_to_use = check_n_jobs(self.n_jobs)
-
-        if self.pad_series:
-            kwargs = {"mode": self.pad_with}
-            if self.pad_with == "constant":
-                kwargs["constant_values"] = self.constant_value
-
-            X = np.pad(
-                X,
-                (
-                    (0, 0),
-                    (0, 0),
-                    (
-                        0,
-                        int(
-                            math.pow(2, math.ceil(math.log(X.shape[2], 2))) - X.shape[2]
-                        ),
-                    ),
-                ),
-                **kwargs,
-            )
-
-        if self.use_pyfftw:
-            import pyfftw
-
-            old_threads = pyfftw.config.NUM_THREADS
-            pyfftw.config.NUM_THREADS = threads_to_use
-
-            fft_object = pyfftw.builders.fft(X[:, :, :])
-            Xt = np.abs(fft_object())
-            Xt = Xt[:, :, : int(X.shape[2] / 2)]
-
-            pyfftw.config.NUM_THREADS = old_threads
-        else:
-            Xt = np.abs(np.fft.fft(X)[:, :, : int(X.shape[2] / 2)])
-
-        return Xt
-
-    def _more_tags(self) -> dict:
-        return {"requires_fit": False, "optional_dependency": True}
diff --git a/tsml/transformations/_quantile.py b/tsml/transformations/_quantile.py
deleted file mode 100644
index d718176..0000000
--- a/tsml/transformations/_quantile.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# from typing import List, Union
-#
-# import numpy as np
-# from sklearn.base import TransformerMixin
-#
-# from tsml.base import BaseTimeSeriesEstimator
-#
-#
-# class QuantileTransformer(TransformerMixin, BaseTimeSeriesEstimator):
-#     """QuantileTransformer"""
-#
-#     def __init__(
-#         self,
-#         divisor=4,
-#         subtract_mean=False,
-#     ):
-#         self.divisor = divisor
-#         self.subtract_mean = subtract_mean
-#
-#         super(QuantileTransformer).__init__()
-#
-#     def fit(
-#         self, X: Union[np.ndarray, List[np.ndarray]], y: Union[np.ndarray, None] = None
-#     ) -> object:
-#         """Unused. Validates X."""
-#         self._validate_data(X=X)
-#         return self
-#
-#     def transform(
-#         self, X: Union[np.ndarray, List[np.ndarray]], y: Union[np.ndarray, None] = None
-#     ) -> np.ndarray:
-#         """Transform input cases in X.
-#
-#         Parameters
-#         ----------
-#         X : 3D np.ndarray of shape (n_instances, n_channels, n_timepoints)
-#             The training data.
-#         y : 1D np.ndarray of shape (n_instances)
-#             The class labels for fitting, indices correspond to instance indices in X
-#
-#         Returns
-#         -------
-#         X_t : 2D np.ndarray of shape (n_instances, n_features)
-#             Transformed data.
-#         """
-#         X = self._validate_data(X=X, reset=False)
-#         X = self._convert_X(X)
-#
-#         num_quantiles = 1 + (X.shape[2] - 1) // self.divisor
-#         if num_quantiles == 1:
-#             return X.quantile(torch.tensor([0.5]), dim=-1).permute(1, 2, 0)
-#         else:
-#             quantiles = X.quantile(torch.linspace(0, 1, num_quantiles), dim=-1).permute(
-#                 1, 2, 0
-#             )
-#             quantiles[..., 1::2] = quantiles[..., 1::2] - X.mean(-1, keepdims=True)
-#             return quantiles
diff --git a/tsml/vector/__init__.py b/tsml/vector/__init__.py
deleted file mode 100644
index f85cb6a..0000000
--- a/tsml/vector/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-"""sklearn-like vector estimators."""
-
-__all__ = [
-    "RotationForestClassifier",
-    "RotationForestRegressor",
-    "CITClassifier",
-]
-
-from tsml.vector._cit import CITClassifier
-from tsml.vector._rotation_forest import (
-    RotationForestClassifier,
-    RotationForestRegressor,
-)
diff --git a/tsml/vector/_cit.py b/tsml/vector/_cit.py
deleted file mode 100644
index 2ca1995..0000000
--- a/tsml/vector/_cit.py
+++ /dev/null
@@ -1,487 +0,0 @@
-"""Continuous interval tree (CIT) vector classifier (aka Time Series Tree).
-
-Continuous Interval Tree aka Time Series Tree, base classifier originally used
-in the time series forest interval-based classification algorithm. Fits sklearn
-conventions.
-"""
-
-__author__ = ["MatthewMiddlehurst"]
-__all__ = ["CITClassifier"]
-
-import math
-import sys
-from typing import Union
-
-import numpy as np
-import pandas as pd
-from numba import njit
-from sklearn import preprocessing
-from sklearn.base import BaseEstimator, ClassifierMixin
-from sklearn.utils import check_random_state
-from sklearn.utils.multiclass import check_classification_targets
-from sklearn.utils.validation import check_is_fitted
-
-
-class CITClassifier(ClassifierMixin, BaseEstimator):
-    """Continuous interval tree (CIT) vector classifier (aka Time Series Tree).
-
-    The `Time Series Tree` described in the Time Series Forest (TSF) paper Deng et al
-    (2013) [1]. A simple information gain based tree for continuous attributes using a
-    bespoke margin gain metric for tie breaking.
-
-    Implemented as a bade classifier for interval based time series classifiers such as
-    `CanonicalIntervalForest` and `DrCIF`.
-
-    Parameters
-    ----------
-    max_depth : int, default=sys.maxsize
-        Maximum depth for the tree.
-    thresholds : int, default=20
-        Number of thresholds to split continous attributes on at tree nodes.
-    random_state : int, RandomState instance or None, default=None
-        If `int`, random_state is the seed used by the random number generator;
-        If `RandomState` instance, random_state is the random number generator;
-        If `None`, the random number generator is the `RandomState` instance used
-        by `np.random`.
-
-    Attributes
-    ----------
-    n_instances_ : int
-        The number of train cases in the training set.
-    n_atts_ : int
-        The number of attributes in the training set.
-    n_classes_ : int
-        Number of classes. Extracted from the data.
-    classes_ : ndarray of shape (n_classes_)
-        Holds the label for each class.
-    class_dictionary_ : dict
-        A dictionary mapping class labels to class indices in classes_.
-
-    Notes
-    -----
-    For the Java version, see
-    `tsml <https://github.com/uea-machine-learning/tsml/blob/master/src/main/java/
-    machine_learning/classifiers/ContinuousIntervalTree.java>`_.
-
-    References
-    ----------
-    .. [1] H.Deng, G.Runger, E.Tuv and M.Vladimir, "A time series forest for
-       classification and feature extraction", Information Sciences, 239, 2013
-
-    Examples
-    --------
-    >>> from tsml.vector import CITClassifier
-    >>> from tsml.utils.testing import generate_2d_test_data
-    >>> X, y = generate_2d_test_data(n_samples=8, random_state=0)
-    >>> clf = CITClassifier(random_state=0)
-    >>> clf.fit(X, y)
-    CITClassifier(...)
-    >>> clf.predict(X)
-    array([0, 1, 0, 0, 0, 0, 0, 1])
-    """
-
-    def __init__(
-        self,
-        max_depth=sys.maxsize,
-        thresholds=20,
-        random_state=None,
-    ):
-        self.max_depth = max_depth
-        self.thresholds = thresholds
-        self.random_state = random_state
-
-        super().__init__()
-
-    def fit(self, X: Union[np.ndarray, pd.DataFrame], y: np.ndarray) -> object:
-        """Fit the estimator to training data.
-
-        Parameters
-        ----------
-        X : 2d ndarray or DataFrame of shape (n_instances, n_atts)
-            The training data.
-        y : 1D np.ndarray of shape (n_instances)
-            The class labels for fitting, indices correspond to instance indices in X
-
-        Returns
-        -------
-        self :
-            Reference to self.
-        """
-        if isinstance(X, np.ndarray) and len(X.shape) == 3 and X.shape[1] == 1:
-            X = np.reshape(X, (X.shape[0], -1))
-
-        X, y = self._validate_data(
-            X=X, y=y, ensure_min_samples=2, force_all_finite="allow-nan"
-        )
-
-        check_classification_targets(y)
-
-        self.n_instances_, self.n_atts_ = X.shape
-        self.classes_ = np.unique(y)
-        self.n_classes_ = self.classes_.shape[0]
-        self.class_dictionary_ = {}
-        for index, class_val in enumerate(self.classes_):
-            self.class_dictionary_[class_val] = index
-
-        # escape if only one class seen
-        if self.n_classes_ == 1:
-            self._is_fitted = True
-            return self
-
-        le = preprocessing.LabelEncoder()
-        y = le.fit_transform(y)
-
-        rng = check_random_state(self.random_state)
-        self._root = _TreeNode(random_state=rng)
-
-        thresholds = np.linspace(np.min(X, axis=0), np.max(X, axis=0), self.thresholds)
-
-        distribution = np.zeros(self.n_classes_)
-        for i in range(len(y)):
-            distribution[y[i]] += 1
-
-        entropy = _entropy(distribution, distribution.sum())
-
-        self._root.build_tree(
-            X,
-            y,
-            thresholds,
-            entropy,
-            distribution,
-            0,
-            self.max_depth,
-            self.n_classes_,
-            False,
-        )
-
-        self._is_fitted = True
-        return self
-
-    def predict(self, X: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
-        """Predicts labels for sequences in X.
-
-        Parameters
-        ----------
-        X : 2d ndarray or DataFrame of shape (n_instances, n_atts)
-            The testing data.
-
-        Returns
-        -------
-        y : array-like of shape (n_instances)
-            Predicted class labels.
-        """
-        return np.array(
-            [self.classes_[int(np.argmax(prob))] for prob in self.predict_proba(X)]
-        )
-
-    def predict_proba(self, X: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
-        """Predicts labels probabilities for sequences in X.
-
-        Parameters
-        ----------
-        X : 2d ndarray or DataFrame of shape (n_instances, n_atts)
-            The testing data.
-
-        Returns
-        -------
-        y : array-like of shape (n_instances, n_classes_)
-            Predicted probabilities using the ordering in classes_.
-        """
-        check_is_fitted(self)
-
-        # treat case of single class seen in fit
-        if self.n_classes_ == 1:
-            return np.repeat([[1]], X.shape[0], axis=0)
-
-        if isinstance(X, np.ndarray) and len(X.shape) == 3 and X.shape[1] == 1:
-            X = np.reshape(X, (X.shape[0], -1))
-
-        X = self._validate_data(X=X, reset=False, force_all_finite="allow-nan")
-
-        dists = np.zeros((X.shape[0], self.n_classes_))
-        for i in range(X.shape[0]):
-            dists[i] = self._root.predict_proba(X[i], self.n_classes_)
-        return dists
-
-    def tree_node_splits_and_gain(self):
-        """Recursively find the split and information gain for each tree node."""
-        splits = []
-        gains = []
-
-        if self._root.best_split > -1:
-            self._find_splits_gain(self._root, splits, gains)
-
-        return splits, gains
-
-    def _find_splits_gain(self, node, splits, gains):
-        """Recursively find the split and information gain for each tree node."""
-        splits.append(node.best_split)
-        gains.append(node.best_gain)
-
-        for next_node in node.children:
-            if next_node.best_split > -1:
-                self._find_splits_gain(next_node, splits, gains)
-
-    def _more_tags(self) -> dict:
-        return {"allow_nan": True}
-
-
-class _TreeNode:
-    """ContinuousIntervalTree tree node."""
-
-    def __init__(
-        self,
-        random_state=None,
-    ):
-        self.random_state = random_state
-
-        self.best_split = -1
-        self.best_threshold = 0
-        self.best_gain = 0.000001
-        self.best_margin = -1
-        self.children = []
-        self.leaf_distribution = []
-        self.depth = -1
-
-    def build_tree(
-        self,
-        X,
-        y,
-        thresholds,
-        entropy,
-        distribution,
-        depth,
-        max_depth,
-        n_classes,
-        leaf,
-    ):
-        self.depth = depth
-        best_distributions = []
-        best_entropies = []
-
-        if leaf is False and self.remaining_classes(distribution) and depth < max_depth:
-            for (_, att), threshold in np.ndenumerate(thresholds):
-                (
-                    info_gain,
-                    distributions,
-                    entropies,
-                ) = self.information_gain(X, y, att, threshold, entropy, n_classes)
-
-                if info_gain > self.best_gain:
-                    self.best_split = att
-                    self.best_threshold = threshold
-                    self.best_gain = info_gain
-                    self.best_margin = -1
-                    best_distributions = distributions
-                    best_entropies = entropies
-                elif info_gain == self.best_gain and info_gain > 0.000001:
-                    margin = self.margin_gain(X, att, threshold)
-                    if self.best_margin == -1:
-                        self.best_margin = self.margin_gain(
-                            X, self.best_split, self.best_threshold
-                        )
-
-                    if margin > self.best_margin or (
-                        margin == self.best_margin
-                        and self.random_state.choice([True, False])
-                    ):
-                        self.best_split = att
-                        self.best_threshold = threshold
-                        self.best_margin = margin
-                        best_distributions = distributions
-                        best_entropies = entropies
-
-        if self.best_split > -1:
-            self.children = [None, None, None]
-
-            left_idx, right_idx, missing_idx = self.split_data(
-                X, self.best_split, self.best_threshold
-            )
-
-            if len(left_idx) > 0:
-                self.children[0] = _TreeNode(random_state=self.random_state)
-                self.children[0].build_tree(
-                    X[left_idx],
-                    y[left_idx],
-                    thresholds,
-                    best_entropies[0],
-                    best_distributions[0],
-                    depth + 1,
-                    max_depth,
-                    n_classes,
-                    False,
-                )
-            else:
-                self.children[0] = _TreeNode(random_state=self.random_state)
-                self.children[0].build_tree(
-                    X,
-                    y,
-                    thresholds,
-                    entropy,
-                    distribution,
-                    depth + 1,
-                    max_depth,
-                    n_classes,
-                    True,
-                )
-
-            if len(right_idx) > 0:
-                self.children[1] = _TreeNode(random_state=self.random_state)
-                self.children[1].build_tree(
-                    X[right_idx],
-                    y[right_idx],
-                    thresholds,
-                    best_entropies[1],
-                    best_distributions[1],
-                    depth + 1,
-                    max_depth,
-                    n_classes,
-                    False,
-                )
-            else:
-                self.children[1] = _TreeNode(random_state=self.random_state)
-                self.children[1].build_tree(
-                    X,
-                    y,
-                    thresholds,
-                    entropy,
-                    distribution,
-                    depth + 1,
-                    max_depth,
-                    n_classes,
-                    True,
-                )
-
-            if len(missing_idx) > 0:
-                self.children[2] = _TreeNode(random_state=self.random_state)
-                self.children[2].build_tree(
-                    X[missing_idx],
-                    y[missing_idx],
-                    thresholds,
-                    best_entropies[2],
-                    best_distributions[2],
-                    depth + 1,
-                    max_depth,
-                    n_classes,
-                    False,
-                )
-            else:
-                self.children[2] = _TreeNode(random_state=self.random_state)
-                self.children[2].build_tree(
-                    X,
-                    y,
-                    thresholds,
-                    entropy,
-                    distribution,
-                    depth + 1,
-                    max_depth,
-                    n_classes,
-                    True,
-                )
-        else:
-            self.leaf_distribution = distribution / np.sum(distribution)
-
-        return self
-
-    def predict_proba(self, X, n_classes):
-        if self.best_split > -1:
-            if X[self.best_split] <= self.best_threshold:
-                return self.children[0].predict_proba(X, n_classes)
-            elif X[self.best_split] > self.best_threshold:
-                return self.children[1].predict_proba(X, n_classes)
-            else:
-                return self.children[2].predict_proba(X, n_classes)
-        else:
-            return self.leaf_distribution
-
-    @staticmethod
-    @njit(fastmath=True, cache=True)
-    def information_gain(X, y, attribute, threshold, parent_entropy, n_classes):
-        dist_left = np.zeros(n_classes)
-        dist_right = np.zeros(n_classes)
-        dist_missing = np.zeros(n_classes)
-        for i, case in enumerate(X):
-            if case[attribute] <= threshold:
-                dist_left[y[i]] += 1
-            elif case[attribute] > threshold:
-                dist_right[y[i]] += 1
-            else:
-                dist_missing[y[i]] += 1
-
-        sum_missing = 0
-        for v in dist_missing:
-            sum_missing += v
-        sum_left = 0
-        for v in dist_left:
-            sum_left += v
-        sum_right = 0
-        for v in dist_right:
-            sum_right += v
-
-        entropy_left = _entropy(dist_left, sum_left)
-        entropy_right = _entropy(dist_right, sum_right)
-        entropy_missing = _entropy(dist_missing, sum_missing)
-
-        num_cases = X.shape[0]
-        info_gain = (
-            parent_entropy
-            - sum_left / num_cases * entropy_left
-            - sum_right / num_cases * entropy_right
-            - sum_missing / num_cases * entropy_missing
-        )
-
-        return (
-            info_gain,
-            [dist_left, dist_right, dist_missing],
-            [entropy_left, entropy_right, entropy_missing],
-        )
-
-    @staticmethod
-    @njit(fastmath=True, cache=True)
-    def margin_gain(X, attribute, threshold):
-        margins = np.abs(X[:, attribute] - threshold)
-        return np.min(margins)
-
-    @staticmethod
-    @njit(fastmath=True, cache=True)
-    def split_data(X, best_split, best_threshold):
-        left_idx = np.zeros(len(X), dtype=np.int_)
-        left_count = 0
-        right_idx = np.zeros(len(X), dtype=np.int_)
-        right_count = 0
-        missing_idx = np.zeros(len(X), dtype=np.int_)
-        missing_count = 0
-        for i, case in enumerate(X):
-            if case[best_split] <= best_threshold:
-                left_idx[left_count] = i
-                left_count += 1
-            elif case[best_split] > best_threshold:
-                right_idx[right_count] = i
-                right_count += 1
-            else:
-                missing_idx[missing_count] = i
-                missing_count += 1
-
-        return (
-            left_idx[:left_count],
-            right_idx[:right_count],
-            missing_idx[:missing_count],
-        )
-
-    @staticmethod
-    @njit(fastmath=True, cache=True)
-    def remaining_classes(distribution):
-        remaining_classes = 0
-        for d in distribution:
-            if d > 0:
-                remaining_classes += 1
-        return remaining_classes > 1
-
-
-@njit(fastmath=True, cache=True)
-def _entropy(x, s):
-    e = 0
-    for i in x:
-        p = i / s if s > 0 else 0
-        e += -(p * math.log(p) / 0.6931471805599453) if p > 0 else 0
-    return e
diff --git a/tsml/vector/_rotation_forest.py b/tsml/vector/_rotation_forest.py
deleted file mode 100644
index 639083e..0000000
--- a/tsml/vector/_rotation_forest.py
+++ /dev/null
@@ -1,792 +0,0 @@
-"""A rotation forest (RotF) vector classifier.
-
-A rotation Forest sktime implementation for continuous values only. Fits sklearn
-conventions.
-"""
-
-__author__ = ["MatthewMiddlehurst"]
-__all__ = ["RotationForestClassifier", "RotationForestRegressor"]
-
-import time
-from typing import List, Union
-
-import numpy as np
-import pandas as pd
-from joblib import Parallel
-from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
-from sklearn.decomposition import PCA
-from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
-from sklearn.utils import check_random_state
-from sklearn.utils.multiclass import check_classification_targets
-from sklearn.utils.parallel import delayed
-from sklearn.utils.validation import check_is_fitted
-
-from tsml.base import _clone_estimator
-from tsml.utils.validation import check_n_jobs
-
-
-class RotationForestClassifier(ClassifierMixin, BaseEstimator):
-    """A Rotation Forest (RotF) classifier.
-
-    Implementation of the Rotation Forest classifier described in Rodriguez et al
-    (2013) [1]. Builds a forest of trees build on random portions of the data
-    transformed using PCA.
-
-    Intended as a benchmark for time series data and a base classifier for
-    transformation based appraoches such as ShapeletTransformClassifier, this tsml
-    implementation only works with continuous attributes.
-
-    Parameters
-    ----------
-    n_estimators : int, default=200
-        Number of estimators to build for the ensemble.
-    min_group : int, default=3
-        The minimum size of an attribute subsample group.
-    max_group : int, default=3
-        The maximum size of an attribute subsample group.
-    remove_proportion : float, default=0.5
-        The proportion of cases to be removed per group.
-    base_estimator : BaseEstimator or None, default="None"
-        Base estimator for the ensemble. By default, uses the sklearn
-        `DecisionTreeClassifier` using entropy as a splitting measure.
-    time_limit_in_minutes : int, default=0
-        Time contract to limit build time in minutes, overriding ``n_estimators``.
-        Default of `0` means ``n_estimators`` is used.
-    contract_max_n_estimators : int, default=500
-        Max number of estimators to build when ``time_limit_in_minutes`` is set.
-    save_transformed_data : bool, default=False
-        Save the data transformed in fit in ``transformed_data_``.
-    n_jobs : int, default=1
-        The number of jobs to run in parallel for both ``fit`` and ``predict``.
-        `-1` means using all processors.
-    random_state : int, RandomState instance or None, default=None
-        If `int`, random_state is the seed used by the random number generator;
-        If `RandomState` instance, random_state is the random number generator;
-        If `None`, the random number generator is the `RandomState` instance used
-        by `np.random`.
-
-    Attributes
-    ----------
-    n_instances_ : int
-        The number of train cases in the training set.
-    n_atts_ : int
-        The number of attributes in the training set.
-    n_classes_ : int
-        Number of classes. Extracted from the data.
-    classes_ : ndarray of shape (n_classes_)
-        Holds the label for each class.
-    class_dictionary_ : dict
-        A dictionary mapping class labels to class indices in classes_.
-    transformed_data_ : list of shape (n_estimators) of ndarray
-        The transformed training dataset for all classifiers. Only saved when
-        ``save_transformed_data`` is `True`.
-    estimators_ : list of shape (n_estimators) of BaseEstimator
-        The collections of estimators trained in fit.
-
-    Notes
-    -----
-    For the Java version, see
-    `tsml <https://github.com/uea-machine-learning/tsml/blob/master/src/main/java
-    /weka/classifiers/meta/RotationForest.java>`_.
-
-    References
-    ----------
-    .. [1] Rodriguez, Juan José, Ludmila I. Kuncheva, and Carlos J. Alonso. "Rotation
-       forest: A new classifier ensemble method." IEEE transactions on pattern analysis
-       and machine intelligence 28.10 (2006).
-
-    .. [2] Bagnall, A., et al. "Is rotation forest the best classifier for problems
-       with continuous features?." arXiv preprint arXiv:1809.06705 (2018).
-
-    Examples
-    --------
-    >>> from tsml.vector import RotationForestClassifier
-    >>> from tsml.utils.testing import generate_2d_test_data
-    >>> X, y = generate_2d_test_data(n_samples=8, random_state=0)
-    >>> clf = RotationForestClassifier(random_state=0)
-    >>> clf.fit(X, y)
-    RotationForestClassifier(...)
-    >>> clf.predict(X)
-    array([0, 1, 0, 0, 0, 0, 0, 1])
-    """
-
-    def __init__(
-        self,
-        n_estimators=200,
-        min_group=3,
-        max_group=3,
-        remove_proportion=0.5,
-        base_estimator=None,
-        time_limit_in_minutes=0.0,
-        contract_max_n_estimators=500,
-        save_transformed_data=False,
-        n_jobs=1,
-        random_state=None,
-    ):
-        self.n_estimators = n_estimators
-        self.min_group = min_group
-        self.max_group = max_group
-        self.remove_proportion = remove_proportion
-        self.base_estimator = base_estimator
-        self.time_limit_in_minutes = time_limit_in_minutes
-        self.contract_max_n_estimators = contract_max_n_estimators
-        self.save_transformed_data = save_transformed_data
-        self.n_jobs = n_jobs
-        self.random_state = random_state
-
-        super().__init__()
-
-    def fit(self, X: Union[np.ndarray, pd.DataFrame], y: np.ndarray) -> object:
-        """Fit the estimator to training data.
-
-        Parameters
-        ----------
-        X : 2d ndarray or DataFrame of shape (n_instances, n_atts)
-            The training data.
-        y : 1D np.ndarray of shape (n_instances)
-            The class labels for fitting, indices correspond to instance indices in X
-
-        Returns
-        -------
-        self :
-            Reference to self.
-        """
-        if isinstance(X, np.ndarray) and len(X.shape) == 3 and X.shape[1] == 1:
-            X = np.reshape(X, (X.shape[0], -1))
-
-        X, y = self._validate_data(X=X, y=y, ensure_min_samples=2, dtype=np.float32)
-
-        check_classification_targets(y)
-
-        self._n_jobs = check_n_jobs(self.n_jobs)
-
-        self.n_instances_, self.n_atts_ = X.shape
-        self.classes_ = np.unique(y)
-        self.n_classes_ = self.classes_.shape[0]
-        self.class_dictionary_ = {}
-        for index, class_val in enumerate(self.classes_):
-            self.class_dictionary_[class_val] = index
-
-        # escape if only one class seen
-        if self.n_classes_ == 1:
-            self._is_fitted = True
-            return self
-
-        time_limit = self.time_limit_in_minutes * 60
-        start_time = time.time()
-        train_time = 0
-
-        if self.base_estimator is None:
-            self._base_estimator = DecisionTreeClassifier(criterion="entropy")
-
-        # remove useless attributes
-        self._useful_atts = ~np.all(X[1:] == X[:-1], axis=0)
-        X = X[:, self._useful_atts]
-
-        self._n_atts = X.shape[1]
-
-        # normalise attributes
-        self._min = X.min(axis=0)
-        self._ptp = X.max(axis=0) - self._min
-        X = (X - self._min) / self._ptp
-
-        X_cls_split = [X[np.where(y == i)] for i in self.classes_]
-
-        if time_limit > 0:
-            self._n_estimators = 0
-            self.estimators_ = []
-            self._pcas = []
-            self._groups = []
-            self.transformed_data_ = []
-
-            while (
-                train_time < time_limit
-                and self._n_estimators < self.contract_max_n_estimators
-            ):
-                fit = Parallel(n_jobs=self._n_jobs)(
-                    delayed(self._fit_estimator)(
-                        X,
-                        X_cls_split,
-                        y,
-                        i,
-                    )
-                    for i in range(self._n_jobs)
-                )
-
-                estimators, pcas, groups, transformed_data = zip(*fit)
-
-                self.estimators_ += estimators
-                self._pcas += pcas
-                self._groups += groups
-                self.transformed_data_ += transformed_data
-
-                self._n_estimators += self._n_jobs
-                train_time = time.time() - start_time
-        else:
-            self._n_estimators = self.n_estimators
-
-            fit = Parallel(n_jobs=self._n_jobs)(
-                delayed(self._fit_estimator)(
-                    X,
-                    X_cls_split,
-                    y,
-                    i,
-                )
-                for i in range(self._n_estimators)
-            )
-
-            self.estimators_, self._pcas, self._groups, self.transformed_data_ = zip(
-                *fit
-            )
-
-        return self
-
-    def predict(self, X: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
-        """Predicts labels for sequences in X.
-
-        Parameters
-        ----------
-        X : 2d ndarray or DataFrame of shape (n_instances, n_atts)
-            The testing data.
-
-        Returns
-        -------
-        y : array-like of shape (n_instances)
-            Predicted class labels.
-        """
-        return np.array(
-            [self.classes_[int(np.argmax(prob))] for prob in self.predict_proba(X)]
-        )
-
-    def predict_proba(self, X: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
-        """Predicts labels probabilities for sequences in X.
-
-        Parameters
-        ----------
-        X : 2d ndarray or DataFrame of shape (n_instances, n_atts)
-            The testing data.
-
-        Returns
-        -------
-        y : array-like of shape (n_instances, n_classes_)
-            Predicted probabilities using the ordering in classes_.
-        """
-        check_is_fitted(self)
-
-        # treat case of single class seen in fit
-        if self.n_classes_ == 1:
-            return np.repeat([[1]], X.shape[0], axis=0)
-
-        if isinstance(X, np.ndarray) and len(X.shape) == 3 and X.shape[1] == 1:
-            X = np.reshape(X, (X.shape[0], -1))
-
-        X = self._validate_data(X=X, reset=False, dtype=np.float32)
-
-        # replace missing values with 0 and remove useless attributes
-        X = X[:, self._useful_atts]
-
-        # normalise the data.
-        X = (X - self._min) / self._ptp
-
-        y_probas = Parallel(n_jobs=self._n_jobs)(
-            delayed(self._predict_proba_for_estimator)(
-                X,
-                self.estimators_[i],
-                self._pcas[i],
-                self._groups[i],
-            )
-            for i in range(self._n_estimators)
-        )
-
-        output = np.sum(y_probas, axis=0) / (
-            np.ones(self.n_classes_) * self._n_estimators
-        )
-        return output
-
-    def _fit_estimator(self, X, X_cls_split, y, idx):
-        rs = 255 if self.random_state == 0 else self.random_state
-        rs = (
-            None
-            if self.random_state is None
-            else (rs * 37 * (idx + 1)) % np.iinfo(np.int32).max
-        )
-        rng = check_random_state(rs)
-
-        groups = _generate_groups(rng, self._n_atts, self.min_group, self.max_group)
-        pcas = []
-
-        # construct the slices to fit the PCAs too.
-        for group in groups:
-            classes = rng.choice(
-                range(self.n_classes_),
-                size=rng.randint(1, self.n_classes_ + 1),
-                replace=False,
-            )
-
-            # randomly add the classes with the randomly selected attributes.
-            X_t = np.zeros((0, len(group)))
-            for cls_idx in classes:
-                c = X_cls_split[cls_idx]
-                X_t = np.concatenate((X_t, c[:, group]), axis=0)
-
-            sample_ind = rng.choice(
-                X_t.shape[0],
-                max(1, int(X_t.shape[0] * self.remove_proportion)),
-                replace=False,
-            )
-            X_t = X_t[sample_ind]
-
-            # try to fit the PCA if it fails, remake it, and add 10 random data
-            # instances.
-            while True:
-                # ignore err state on PCA because we account if it fails.
-                with np.errstate(divide="ignore", invalid="ignore"):
-                    # differences between os occasionally. seems to happen when there
-                    # are low amounts of cases in the fit
-                    pca = PCA(random_state=rs).fit(X_t)
-
-                if not np.isnan(pca.explained_variance_ratio_).all():
-                    break
-                X_t = np.concatenate(
-                    (X_t, rng.random_sample((10, X_t.shape[1]))), axis=0
-                )
-
-            pcas.append(pca)
-
-        # merge all the pca_transformed data into one instance and build a classifier
-        # on it.
-        X_t = np.concatenate(
-            [pcas[i].transform(X[:, group]) for i, group in enumerate(groups)], axis=1
-        )
-        X_t = X_t.astype(np.float32)
-        X_t = np.nan_to_num(
-            X_t, False, 0, np.finfo(np.float32).max, np.finfo(np.float32).min
-        )
-
-        tree = _clone_estimator(self._base_estimator, random_state=rs)
-        tree.fit(X_t, y)
-
-        return tree, pcas, groups, X_t if self.save_transformed_data else None
-
-    def _predict_proba_for_estimator(self, X, clf, pcas, groups):
-        X_t = np.concatenate(
-            [pcas[i].transform(X[:, group]) for i, group in enumerate(groups)], axis=1
-        )
-        X_t = X_t.astype(np.float32)
-        X_t = np.nan_to_num(
-            X_t, False, 0, np.finfo(np.float32).max, np.finfo(np.float32).min
-        )
-
-        probas = clf.predict_proba(X_t)
-
-        if probas.shape[1] != self.n_classes_:
-            new_probas = np.zeros((probas.shape[0], self.n_classes_))
-            for i, cls in enumerate(clf.classes_):
-                cls_idx = self.class_dictionary_[cls]
-                new_probas[:, cls_idx] = probas[:, i]
-            probas = new_probas
-
-        return probas
-
-    def _train_probas_for_estimator(self, y, idx):
-        rs = 255 if self.random_state == 0 else self.random_state
-        rs = (
-            None
-            if self.random_state is None
-            else (rs * 37 * (idx + 1)) % np.iinfo(np.int32).max
-        )
-        rng = check_random_state(rs)
-
-        indices = range(self.n_instances_)
-        subsample = rng.choice(self.n_instances_, size=self.n_instances_)
-        oob = [n for n in indices if n not in subsample]
-
-        results = np.zeros((self.n_instances_, self.n_classes_))
-        if len(oob) == 0:
-            return [results, oob]
-
-        clf = _clone_estimator(self._base_estimator, rs)
-        clf.fit(self.transformed_data_[idx][subsample], y[subsample])
-        probas = clf.predict_proba(self.transformed_data_[idx][oob])
-
-        if probas.shape[1] != self.n_classes_:
-            new_probas = np.zeros((probas.shape[0], self.n_classes_))
-            for i, cls in enumerate(clf.classes_):
-                cls_idx = self.class_dictionary_[cls]
-                new_probas[:, cls_idx] = probas[:, i]
-            probas = new_probas
-
-        for n, proba in enumerate(probas):
-            results[oob[n]] += proba
-
-        return [results, oob]
-
-    @classmethod
-    def get_test_params(
-        cls, parameter_set: Union[str, None] = None
-    ) -> Union[dict, List[dict]]:
-        """Return unit test parameter settings for the estimator.
-
-        Parameters
-        ----------
-        parameter_set : None or str, default=None
-            Name of the set of test parameters to return, for use in tests. If no
-            special parameters are defined for a value, will return `"default"` set.
-
-        Returns
-        -------
-        params : dict or list of dict
-            Parameters to create testing instances of the class.
-        """
-        return {"n_estimators": 2}
-
-
-class RotationForestRegressor(RegressorMixin, BaseEstimator):
-    """A Rotation Forest (RotF) regressor.
-
-    Implementation of the Rotation Forest regressor based on the classifier described
-    in Rodriguez et al (2013) [1]. Builds a forest of trees build on random portions
-    of the data transformed using PCA.
-
-    Intended as a benchmark for time series data and a base regressor for
-    transformation based appraoches this tsml implementation only works with continuous
-    attributes. Compares to the classification version, the only alteration is the
-    base tree used and the removal of class subsampling.
-
-    Parameters
-    ----------
-    n_estimators : int, default=200
-        Number of estimators to build for the ensemble.
-    min_group : int, default=3
-        The minimum size of an attribute subsample group.
-    max_group : int, default=3
-        The maximum size of an attribute subsample group.
-    remove_proportion : float, default=0.5
-        The proportion of cases to be removed per group.
-    base_estimator : BaseEstimator or None, default="None"
-        Base estimator for the ensemble. By default, uses the sklearn
-        `DecisionTreeRegressor` using squared error as a splitting measure.
-    time_limit_in_minutes : int, default=0
-        Time contract to limit build time in minutes, overriding ``n_estimators``.
-        Default of `0` means ``n_estimators`` is used.
-    contract_max_n_estimators : int, default=500
-        Max number of estimators to build when ``time_limit_in_minutes`` is set.
-    save_transformed_data : bool, default=False
-        Save the data transformed in fit in ``transformed_data_``.
-    n_jobs : int, default=1
-        The number of jobs to run in parallel for both ``fit`` and ``predict``.
-        `-1` means using all processors.
-    random_state : int, RandomState instance or None, default=None
-        If `int`, random_state is the seed used by the random number generator;
-        If `RandomState` instance, random_state is the random number generator;
-        If `None`, the random number generator is the `RandomState` instance used
-        by `np.random`.
-
-    Attributes
-    ----------
-    n_instances_ : int
-        The number of train cases in the training set.
-    n_atts_ : int
-        The number of attributes in the training set.
-    transformed_data_ : list of shape (n_estimators) of ndarray
-        The transformed training dataset for all classifiers. Only saved when
-        ``save_transformed_data`` is `True`.
-    estimators_ : list of shape (n_estimators) of BaseEstimator
-        The collections of estimators trained in fit.
-
-    References
-    ----------
-    .. [1] Rodriguez, Juan José, Ludmila I. Kuncheva, and Carlos J. Alonso. "Rotation
-       forest: A new classifier ensemble method." IEEE transactions on pattern analysis
-       and machine intelligence 28.10 (2006).
-
-    .. [2] Bagnall, A., et al. "Is rotation forest the best classifier for problems
-       with continuous features?." arXiv preprint arXiv:1809.06705 (2018).
-
-    Examples
-    --------
-    >>> from tsml.vector import RotationForestRegressor
-    >>> from tsml.utils.testing import generate_2d_test_data
-    >>> X, y = generate_2d_test_data(n_samples=8, regression_target=True,
-    ...                              random_state=0)
-    >>> reg = RotationForestRegressor(random_state=0)
-    >>> reg.fit(X, y)
-    RotationForestRegressor(...)
-    >>> reg.predict(X)
-    array([0.19658236, 1.36872518, 0.82099324, 0.09710128, 0.83794492,
-           0.09609841, 0.97645944, 1.46865118])
-    """
-
-    def __init__(
-        self,
-        n_estimators=200,
-        min_group=3,
-        max_group=3,
-        remove_proportion=0.5,
-        base_estimator=None,
-        time_limit_in_minutes=0.0,
-        contract_max_n_estimators=500,
-        save_transformed_data=False,
-        n_jobs=1,
-        random_state=None,
-    ):
-        self.n_estimators = n_estimators
-        self.min_group = min_group
-        self.max_group = max_group
-        self.remove_proportion = remove_proportion
-        self.base_estimator = base_estimator
-        self.time_limit_in_minutes = time_limit_in_minutes
-        self.contract_max_n_estimators = contract_max_n_estimators
-        self.save_transformed_data = save_transformed_data
-        self.n_jobs = n_jobs
-        self.random_state = random_state
-
-        super().__init__()
-
-    def fit(self, X: Union[np.ndarray, pd.DataFrame], y: np.ndarray) -> object:
-        """Fit the estimator to training data.
-
-        Parameters
-        ----------
-        X : 2d ndarray or DataFrame of shape (n_instances, n_atts)
-            The training data.
-        y : 1D np.ndarray of shape (n_instances)
-            The target labels for fitting, indices correspond to instance indices in X
-
-        Returns
-        -------
-        self :
-            Reference to self.
-        """
-        if isinstance(X, np.ndarray) and len(X.shape) == 3 and X.shape[1] == 1:
-            X = np.reshape(X, (X.shape[0], -1))
-
-        X, y = self._validate_data(
-            X=X, y=y, ensure_min_samples=2, dtype=np.float32, y_numeric=True
-        )
-
-        self._n_jobs = check_n_jobs(self.n_jobs)
-
-        self.n_instances_, self.n_atts_ = X.shape
-
-        time_limit = self.time_limit_in_minutes * 60
-        start_time = time.time()
-        train_time = 0
-
-        if self.base_estimator is None:
-            self._base_estimator = DecisionTreeRegressor(criterion="squared_error")
-
-        # remove useless attributes
-        self._useful_atts = ~np.all(X[1:] == X[:-1], axis=0)
-        X = X[:, self._useful_atts]
-
-        self._n_atts = X.shape[1]
-
-        # normalise attributes
-        self._min = X.min(axis=0)
-        self._ptp = X.max(axis=0) - self._min
-        X = (X - self._min) / self._ptp
-
-        if time_limit > 0:
-            self._n_estimators = 0
-            self.estimators_ = []
-            self._pcas = []
-            self._groups = []
-            self.transformed_data_ = []
-
-            while (
-                train_time < time_limit
-                and self._n_estimators < self.contract_max_n_estimators
-            ):
-                fit = Parallel(n_jobs=self._n_jobs)(
-                    delayed(self._fit_estimator)(
-                        X,
-                        y,
-                        i,
-                    )
-                    for i in range(self._n_jobs)
-                )
-
-                estimators, pcas, groups, transformed_data = zip(*fit)
-
-                self.estimators_ += estimators
-                self._pcas += pcas
-                self._groups += groups
-                self.transformed_data_ += transformed_data
-
-                self._n_estimators += self._n_jobs
-                train_time = time.time() - start_time
-        else:
-            self._n_estimators = self.n_estimators
-
-            fit = Parallel(n_jobs=self._n_jobs)(
-                delayed(self._fit_estimator)(
-                    X,
-                    y,
-                    i,
-                )
-                for i in range(self._n_estimators)
-            )
-
-            self.estimators_, self._pcas, self._groups, self.transformed_data_ = zip(
-                *fit
-            )
-
-        return self
-
-    def predict(self, X: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
-        """Predicts labels for sequences in X.
-
-        Parameters
-        ----------
-        X : 2d ndarray or DataFrame of shape (n_instances, n_atts)
-            The testing data.
-
-        Returns
-        -------
-        y : array-like of shape (n_instances)
-            Predicted target labels.
-        """
-        check_is_fitted(self)
-
-        if isinstance(X, np.ndarray) and len(X.shape) == 3 and X.shape[1] == 1:
-            X = np.reshape(X, (X.shape[0], -1))
-
-        X = self._validate_data(X=X, reset=False, dtype=np.float32)
-
-        # replace missing values with 0 and remove useless attributes
-        X = X[:, self._useful_atts]
-
-        # normalise the data.
-        X = (X - self._min) / self._ptp
-
-        y_preds = Parallel(n_jobs=self._n_jobs)(
-            delayed(self._predict_for_estimator)(
-                X,
-                self.estimators_[i],
-                self._pcas[i],
-                self._groups[i],
-            )
-            for i in range(self._n_estimators)
-        )
-
-        output = np.sum(y_preds, axis=0) / self._n_estimators
-
-        return output
-
-    def _fit_estimator(self, X, y, idx):
-        rs = 255 if self.random_state == 0 else self.random_state
-        rs = (
-            None
-            if self.random_state is None
-            else (rs * 37 * (idx + 1)) % np.iinfo(np.int32).max
-        )
-        rng = check_random_state(rs)
-
-        groups = _generate_groups(rng, self._n_atts, self.min_group, self.max_group)
-        pcas = []
-
-        # construct the slices to fit the PCAs too.
-        for group in groups:
-            sample_ind = rng.choice(
-                X.shape[0],
-                max(1, int(X.shape[0] * self.remove_proportion)),
-                replace=False,
-            )
-            X_t = X[sample_ind]
-            X_t = X_t[:, group]
-
-            # try to fit the PCA if it fails, remake it, and add 10 random data
-            # instances.
-            while True:
-                # ignore err state on PCA because we account if it fails.
-                with np.errstate(divide="ignore", invalid="ignore"):
-                    # differences between os occasionally. seems to happen when there
-                    # are low amounts of cases in the fit
-                    pca = PCA(random_state=rs).fit(X_t)
-
-                if not np.isnan(pca.explained_variance_ratio_).all():
-                    break
-                X_t = np.concatenate(
-                    (X_t, rng.random_sample((10, X_t.shape[1]))), axis=0
-                )
-
-            pcas.append(pca)
-
-        # merge all the pca_transformed data into one instance and build a classifier
-        # on it.
-        X_t = np.concatenate(
-            [pcas[i].transform(X[:, group]) for i, group in enumerate(groups)], axis=1
-        )
-        X_t = X_t.astype(np.float32)
-        X_t = np.nan_to_num(
-            X_t, False, 0, np.finfo(np.float32).max, np.finfo(np.float32).min
-        )
-
-        tree = _clone_estimator(self._base_estimator, random_state=rs)
-        tree.fit(X_t, y)
-
-        return tree, pcas, groups, X_t if self.save_transformed_data else None
-
-    def _predict_for_estimator(self, X, clf, pcas, groups):
-        X_t = np.concatenate(
-            [pcas[i].transform(X[:, group]) for i, group in enumerate(groups)], axis=1
-        )
-        X_t = X_t.astype(np.float32)
-        X_t = np.nan_to_num(
-            X_t, False, 0, np.finfo(np.float32).max, np.finfo(np.float32).min
-        )
-
-        return clf.predict(X_t)
-
-    @classmethod
-    def get_test_params(
-        cls, parameter_set: Union[str, None] = None
-    ) -> Union[dict, List[dict]]:
-        """Return unit test parameter settings for the estimator.
-
-        Parameters
-        ----------
-        parameter_set : None or str, default=None
-            Name of the set of test parameters to return, for use in tests. If no
-            special parameters are defined for a value, will return `"default"` set.
-
-        Returns
-        -------
-        params : dict or list of dict
-            Parameters to create testing instances of the class.
-        """
-        return {"n_estimators": 2}
-
-
-def _generate_groups(rng, n_atts, min_group, max_group):
-    permutation = rng.permutation(np.arange(0, n_atts))
-
-    # select the size of each group.
-    group_size_count = np.zeros(max_group - min_group + 1)
-    n_attributes = 0
-    n_groups = 0
-    while n_attributes < n_atts:
-        n = rng.randint(group_size_count.shape[0])
-        group_size_count[n] += 1
-        n_attributes += min_group + n
-        n_groups += 1
-
-    groups = []
-    current_attribute = 0
-    current_size = 0
-    for i in range(0, n_groups):
-        while group_size_count[current_size] == 0:
-            current_size += 1
-        group_size_count[current_size] -= 1
-
-        n = min_group + current_size
-        groups.append(np.zeros(n, dtype=int))
-        for k in range(0, n):
-            if current_attribute < permutation.shape[0]:
-                groups[i][k] = permutation[current_attribute]
-            else:
-                groups[i][k] = permutation[rng.randint(permutation.shape[0])]
-            current_attribute += 1
-
-    return groups
diff --git a/tsml/vector/tests/__init__.py b/tsml/vector/tests/__init__.py
deleted file mode 100644
index 8a187d0..0000000
--- a/tsml/vector/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""sklearn-like estimator test code."""
diff --git a/tsml/vector/tests/test_rotation_forest.py b/tsml/vector/tests/test_rotation_forest.py
deleted file mode 100644
index 02316b0..0000000
--- a/tsml/vector/tests/test_rotation_forest.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""Rotation Forest test code."""
-
-__author__ = ["MatthewMiddlehurst"]
-
-import numpy as np
-
-from tsml.datasets import load_minimal_chinatown
-from tsml.vector import RotationForestClassifier
-
-
-def test_contracted_rotf():
-    """Test of RotF contracting and train estimate on unit test data."""
-    # load unit test data
-    X, y = load_minimal_chinatown(split="train")
-    X = np.reshape(X, (X.shape[0], -1))
-
-    rotf = RotationForestClassifier(
-        contract_max_n_estimators=5,
-        time_limit_in_minutes=0.25,
-        random_state=0,
-    )
-    rotf.fit(X, y)
-
-    assert len(rotf.estimators_) > 1
-
-    # test train estimate
-    proba = rotf.predict_proba(X)
-    assert isinstance(proba, np.ndarray)
-    assert proba.shape == (len(X), 2)

From cdb8d2cfc5b1a71f0d03a7fada206c71706dbb58 Mon Sep 17 00:00:00 2001
From: MatthewMiddlehurst <pfm15hbu@gmail.com>
Date: Sat, 11 Jan 2025 17:46:39 +0000
Subject: [PATCH 4/6] fixes

---
 tsml/compose/_channel_ensemble.py             | 26 ++++----
 tsml/compose/tests/test_channel_ensemble.py   | 26 +++-----
 .../tests/test_interval_extraction.py         | 66 -------------------
 .../transformations/tests/test_periodogram.py | 20 ------
 tsml/utils/testing.py                         | 11 ----
 5 files changed, 22 insertions(+), 127 deletions(-)
 delete mode 100644 tsml/transformations/tests/test_interval_extraction.py
 delete mode 100644 tsml/transformations/tests/test_periodogram.py

diff --git a/tsml/compose/_channel_ensemble.py b/tsml/compose/_channel_ensemble.py
index 1c2bfb7..847c2d3 100644
--- a/tsml/compose/_channel_ensemble.py
+++ b/tsml/compose/_channel_ensemble.py
@@ -204,17 +204,17 @@ class ChannelEnsembleClassifier(ClassifierMixin, _BaseChannelEnsemble):
     Examples
     --------
     >>> from tsml.compose import ChannelEnsembleClassifier
-    >>> from tsml.interval_based import IntervalForestClassifier
+    >>> from tsml.dummy import DummyClassifier
     >>> from tsml.utils.testing import generate_3d_test_data
     >>> X, y = generate_3d_test_data(n_samples=8, series_length=10, random_state=0)
     >>> reg = ChannelEnsembleClassifier(
-    ...     estimators=("tsf", IntervalForestClassifier(n_estimators=2), "all-split"),
+    ...     estimators=("d", DummyClassifier(), "all-split"),
     ...     random_state=0,
     ... )
     >>> reg.fit(X, y)
     ChannelEnsembleClassifier(...)
     >>> reg.predict(X)
-    array([0, 1, 1, 0, 0, 1, 0, 1])
+    array([0, 0, 0, 0, 0, 0, 0, 0])
     """
 
     def __init__(self, estimators, remainder="drop", random_state=None):
@@ -349,12 +349,12 @@ def get_test_params(
         params : dict or list of dict
             Parameters to create testing instances of the class.
         """
-        from tsml.interval_based import IntervalForestClassifier
+        from tsml.dummy import DummyClassifier
 
         return {
             "estimators": [
-                ("tsf1", IntervalForestClassifier(n_estimators=2), 0),
-                ("tsf2", IntervalForestClassifier(n_estimators=2), 0),
+                ("d1", DummyClassifier(), 0),
+                ("d2", DummyClassifier(), 0),
             ]
         }
 
@@ -411,19 +411,19 @@ class ChannelEnsembleRegressor(RegressorMixin, _BaseChannelEnsemble):
     Examples
     --------
     >>> from tsml.compose import ChannelEnsembleRegressor
-    >>> from tsml.interval_based import IntervalForestRegressor
+    >>> from tsml.dummy import DummyRegressor
     >>> from tsml.utils.testing import generate_3d_test_data
     >>> X, y = generate_3d_test_data(n_samples=8, series_length=10,
     ...                              regression_target=True, random_state=0)
     >>> reg = ChannelEnsembleRegressor(
-    ...     estimators=("tsf", IntervalForestRegressor(n_estimators=2), "all-split"),
+    ...     estimators=("d", DummyRegressor(), "all-split"),
     ...     random_state=0,
     ... )
     >>> reg.fit(X, y)
     ChannelEnsembleRegressor(...)
     >>> reg.predict(X)
-    array([0.31798318, 1.41426301, 1.06414747, 0.6924721 , 0.56660146,
-           1.26538944, 0.52324808, 1.0939405 ])
+    array([0.8672557, 0.8672557, 0.8672557, 0.8672557, 0.8672557, 0.8672557,
+           0.8672557, 0.8672557], dtype=float32)
     """
 
     def __init__(self, estimators, remainder="drop", random_state=None):
@@ -518,12 +518,12 @@ def get_test_params(
         params : dict or list of dict
             Parameters to create testing instances of the class.
         """
-        from tsml.interval_based import IntervalForestRegressor
+        from tsml.dummy import DummyRegressor
 
         return {
             "estimators": [
-                ("tsf1", IntervalForestRegressor(n_estimators=2), 0),
-                ("tsf2", IntervalForestRegressor(n_estimators=2), 0),
+                ("d1", DummyRegressor(), 0),
+                ("d2", DummyRegressor(), 0),
             ]
         }
 
diff --git a/tsml/compose/tests/test_channel_ensemble.py b/tsml/compose/tests/test_channel_ensemble.py
index 2232c75..6f082cb 100644
--- a/tsml/compose/tests/test_channel_ensemble.py
+++ b/tsml/compose/tests/test_channel_ensemble.py
@@ -9,7 +9,7 @@
     _check_key_type,
     _get_channel,
 )
-from tsml.interval_based import IntervalForestClassifier, IntervalForestRegressor
+from tsml.dummy import DummyClassifier, DummyRegressor
 from tsml.utils.testing import generate_3d_test_data, generate_unequal_test_data
 
 
@@ -17,17 +17,13 @@ def test_single_estimator():
     """Test that a single estimator is correctly applied to all channels."""
     X, y = generate_3d_test_data(n_channels=3)
 
-    ens = ChannelEnsembleClassifier(
-        estimators=[("tsf", IntervalForestClassifier(n_estimators=2), "all")]
-    )
+    ens = ChannelEnsembleClassifier(estimators=[("d", DummyClassifier(), "all")])
     ens.fit(X, y)
 
     assert len(ens.estimators_[0][2]) == 3
     assert ens.predict(X).shape == (X.shape[0],)
 
-    ens = ChannelEnsembleRegressor(
-        estimators=[("tsf", IntervalForestRegressor(n_estimators=2), "all")]
-    )
+    ens = ChannelEnsembleRegressor(estimators=[("d", DummyRegressor(), "all")])
     ens.fit(X, y)
 
     assert len(ens.estimators_[0][2]) == 3
@@ -38,18 +34,14 @@ def test_single_estimator_split():
     """Test that a single split estimator correctly creates an estimator per channel."""
     X, y = generate_3d_test_data(n_channels=3)
 
-    ens = ChannelEnsembleClassifier(
-        estimators=("tsf", IntervalForestClassifier(n_estimators=2), "all-split")
-    )
+    ens = ChannelEnsembleClassifier(estimators=("d", DummyClassifier(), "all-split"))
     ens.fit(X, y)
 
     assert len(ens.estimators_) == 3
     assert isinstance(ens.estimators_[0][2], int)
     assert ens.predict(X).shape == (X.shape[0],)
 
-    ens = ChannelEnsembleRegressor(
-        estimators=("tsf", IntervalForestRegressor(n_estimators=2), "all-split")
-    )
+    ens = ChannelEnsembleRegressor(estimators=("d", DummyRegressor(), "all-split"))
     ens.fit(X, y)
 
     assert len(ens.estimators_) == 3
@@ -62,8 +54,8 @@ def test_remainder():
     X, y = generate_3d_test_data(n_channels=3)
 
     ens = ChannelEnsembleClassifier(
-        estimators=[("tsf", IntervalForestClassifier(n_estimators=2), 0)],
-        remainder=IntervalForestClassifier(n_estimators=2),
+        estimators=[("d", DummyClassifier(), 0)],
+        remainder=DummyClassifier(),
     )
     ens.fit(X, y)
 
@@ -71,8 +63,8 @@ def test_remainder():
     assert ens.predict(X).shape == (X.shape[0],)
 
     ens = ChannelEnsembleRegressor(
-        estimators=[("tsf", IntervalForestRegressor(n_estimators=2), 0)],
-        remainder=IntervalForestRegressor(n_estimators=2),
+        estimators=[("d", DummyRegressor(), 0)],
+        remainder=DummyRegressor(),
     )
     ens.fit(X, y)
 
diff --git a/tsml/transformations/tests/test_interval_extraction.py b/tsml/transformations/tests/test_interval_extraction.py
deleted file mode 100644
index 147d4df..0000000
--- a/tsml/transformations/tests/test_interval_extraction.py
+++ /dev/null
@@ -1,66 +0,0 @@
-"""Interval extraction test code."""
-
-import pytest
-
-from tsml.transformations import (
-    Catch22Transformer,
-    RandomIntervalTransformer,
-    SevenNumberSummaryTransformer,
-    SupervisedIntervalTransformer,
-)
-from tsml.utils.numba_functions.stats import row_mean, row_median
-from tsml.utils.testing import generate_3d_test_data
-from tsml.utils.validation import _check_optional_dependency
-
-
-def test_interval_prune():
-    """Test RandomIntervalTransformer duplicate pruning."""
-    X, y = generate_3d_test_data(random_state=0, n_channels=2, series_length=10)
-
-    rit = RandomIntervalTransformer(
-        features=[row_mean, row_median],
-        n_intervals=10,
-        random_state=0,
-    )
-    X_t = rit.fit_transform(X, y)
-
-    assert X_t.shape == (10, 16)
-    assert rit.transform(X).shape == (10, 16)
-
-
-def test_random_interval_transformer():
-    """Test RandomIntervalTransformer."""
-    X, y = generate_3d_test_data(random_state=0, n_channels=2, series_length=10)
-
-    rit = RandomIntervalTransformer(
-        features=SevenNumberSummaryTransformer(),
-        n_intervals=5,
-        random_state=2,
-    )
-    X_t = rit.fit_transform(X, y)
-
-    assert X_t.shape == (10, 35)
-    assert rit.transform(X).shape == (10, 35)
-
-
-@pytest.mark.skipif(
-    not _check_optional_dependency("pycatch22", "pycatch22", None, raise_error=False),
-    reason="pycatch22 not installed",
-)
-def test_supervised_transformers():
-    """Test SupervisedIntervalTransformer."""
-    X, y = generate_3d_test_data(random_state=0)
-
-    sit = SupervisedIntervalTransformer(
-        features=[
-            Catch22Transformer(
-                features=["DN_HistogramMode_5", "SB_BinaryStats_mean_longstretch1"]
-            ),
-            row_mean,
-        ],
-        n_intervals=2,
-        random_state=0,
-    )
-    X_t = sit.fit_transform(X, y)
-
-    assert X_t.shape == (X.shape[0], 8)
diff --git a/tsml/transformations/tests/test_periodogram.py b/tsml/transformations/tests/test_periodogram.py
deleted file mode 100644
index 1316cde..0000000
--- a/tsml/transformations/tests/test_periodogram.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import pytest
-from numpy.testing import assert_array_almost_equal
-
-from tsml.transformations import PeriodogramTransformer
-from tsml.utils.testing import generate_3d_test_data
-from tsml.utils.validation import _check_optional_dependency
-
-
-@pytest.mark.skipif(
-    not _check_optional_dependency("pyfftw", "pyfftw", "PeriodogramTransformer", False),
-    reason="Only run on overnights because of intermittent fail for read/write.",
-)
-def test_periodogram_same_output():
-    """Test that the output is the same using pyfftw and not."""
-    X, y = generate_3d_test_data()
-
-    p1 = PeriodogramTransformer()
-    p2 = PeriodogramTransformer(use_pyfftw=False)
-
-    assert_array_almost_equal(p1.fit_transform(X), p2.fit_transform(X))
diff --git a/tsml/utils/testing.py b/tsml/utils/testing.py
index 7e0c98e..4705b11 100644
--- a/tsml/utils/testing.py
+++ b/tsml/utils/testing.py
@@ -96,17 +96,6 @@ def parametrize_with_checks(estimators: List[BaseEstimator]) -> Callable:
     See Also
     --------
     check_estimator : Check if estimator adheres to tsml or scikit-learn conventions.
-
-    Examples
-    --------
-    >>> from tsml.utils.testing import parametrize_with_checks
-    >>> from tsml.interval_based import IntervalForestRegressor
-    >>> from tsml.vector import RotationForestClassifier
-    >>> @parametrize_with_checks(
-    ...     [IntervalForestRegressor(), RotationForestClassifier()]
-    ... )
-    ... def test_tsml_compatible_estimator(estimator, check):
-    ...     check(estimator)
     """
     import pytest
 

From e360b5f449f7622dc47c5f0fda0ab63571b61c7b Mon Sep 17 00:00:00 2001
From: MatthewMiddlehurst <pfm15hbu@gmail.com>
Date: Sat, 11 Jan 2025 18:11:47 +0000
Subject: [PATCH 5/6] temp comment scikit-fda

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 9ade4e7..da87e90 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -49,7 +49,7 @@ dependencies = [
 [project.optional-dependencies]
 all_extras = [
     "grailts",
-    "scikit-fda>=0.7.0",
+    # "scikit-fda>=0.7.0",
     "wildboar",
 ]
 unstable_extras = [

From 461928430b658e5d398b95e0e3c7ce0f3438ca9a Mon Sep 17 00:00:00 2001
From: MatthewMiddlehurst <pfm15hbu@gmail.com>
Date: Sat, 11 Jan 2025 18:21:37 +0000
Subject: [PATCH 6/6] version

---
 README.md        | 2 +-
 tsml/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 7609c25..f5baf03 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ Please see [`tsml_eval`](https://github.com/time-series-machine-learning/tsml-ev
 is more of a sandbox for testing out new ideas and algorithms. It may contain some
 algorithms and implementations that are not available in the other toolkits.
 
-The current release of `tsml` is v0.5.0.
+The current release of `tsml` is v0.6.0.
 
 ## Installation
 
diff --git a/tsml/__init__.py b/tsml/__init__.py
index 90b5545..af5858d 100644
--- a/tsml/__init__.py
+++ b/tsml/__init__.py
@@ -1,3 +1,3 @@
 """tsml."""
 
-__version__ = "0.5.0"
+__version__ = "0.6.0"