From 38bf1361e6e27e7cd3a597e36c361783e1bd57ae Mon Sep 17 00:00:00 2001
From: Talley Lambert <talley.lambert@gmail.com>
Date: Sun, 13 Jun 2021 13:09:54 -0400
Subject: [PATCH 01/12] initial working

---
 pyclesperanto_prototype/__init__.py       |   1 +
 pyclesperanto_prototype/_fft/__init__.py  |   2 +
 pyclesperanto_prototype/_fft/_fft.py      | 112 ++++++++++++++++++++++
 pyclesperanto_prototype/_fft/_fftshift.py |  16 ++++
 tests/test_fft.py                         |  36 +++++++
 5 files changed, 167 insertions(+)
 create mode 100644 pyclesperanto_prototype/_fft/__init__.py
 create mode 100644 pyclesperanto_prototype/_fft/_fft.py
 create mode 100644 pyclesperanto_prototype/_fft/_fftshift.py
 create mode 100644 tests/test_fft.py

diff --git a/pyclesperanto_prototype/__init__.py b/pyclesperanto_prototype/__init__.py
index cdd2fc5e..bb79a2f0 100644
--- a/pyclesperanto_prototype/__init__.py
+++ b/pyclesperanto_prototype/__init__.py
@@ -6,5 +6,6 @@
 from ._tier5 import *
 from ._tier8 import *
 from ._tier9 import *
+from ._fft import *
 
 __version__ = "0.9.1"
diff --git a/pyclesperanto_prototype/_fft/__init__.py b/pyclesperanto_prototype/_fft/__init__.py
new file mode 100644
index 00000000..4510838e
--- /dev/null
+++ b/pyclesperanto_prototype/_fft/__init__.py
@@ -0,0 +1,2 @@
+from ._fft import fftn, ifftn
+from ._fftshift import fftshift
diff --git a/pyclesperanto_prototype/_fft/_fft.py b/pyclesperanto_prototype/_fft/_fft.py
new file mode 100644
index 00000000..50c5bae6
--- /dev/null
+++ b/pyclesperanto_prototype/_fft/_fft.py
@@ -0,0 +1,112 @@
+import numpy as np
+import reikna.cluda as cluda
+from reikna.core import Annotation, Parameter, Transformation, Type
+from reikna.fft import FFT
+
+from .. import create, get_device, push
+from .._tier0._pycl import OCLArray
+
+
+def _convert_axes_to_absolute(dshape, axes):
+    """axes = (-2,-1) does not work in reikna, so we have to convert that"""
+    if axes is None:
+        return None
+    elif isinstance(axes, (tuple, list)):
+        return tuple(np.arange(len(dshape))[list(axes)])
+    else:
+        raise NotImplementedError(f"axes {axes} is of unsupported type {type(axes)}")
+
+
+def _get_complex_trf(arr):
+    """On-gpu transformation that transforms a real array to a complex one."""
+    complex_dtype = cluda.dtypes.complex_for(arr.dtype)
+    return Transformation(
+        [
+            Parameter("output", Annotation(Type(complex_dtype, arr.shape), "o")),
+            Parameter("input", Annotation(arr, "i")),
+        ],
+        """
+        ${output.store_same}(
+            COMPLEX_CTR(${output.ctype})(
+                ${input.load_same},
+                0));
+        """,
+    )
+
+
+_PLANS = {}
+
+
+def get_fft_plan(arr, axes=None, fast_math=True):
+    """returns an reikna plan/FFT for `arr`.
+
+    Will cache the compiled plan and reuse.
+    """
+
+    axes = _convert_axes_to_absolute(arr.shape, axes)
+    key = (arr.shape, arr.dtype, axes, fast_math)
+    if key not in _PLANS:
+        if np.iscomplexobj(arr):
+            fft = FFT(arr, axes=axes)
+        else:
+            trf = _get_complex_trf(arr)
+            fft = FFT(trf.output, axes=axes)
+            fft.parameter.input.connect(trf, trf.output, new_input=trf.input)
+
+        thread = cluda.ocl_api().Thread(get_device().queue)
+        _PLANS[key] = fft.compile(thread, fast_math=fast_math)
+    return _PLANS[key]
+
+
+def fftn(
+    input_arr,
+    output_arr=None,
+    axes=None,
+    inplace=False,
+    fast_math=True,
+    inverse=False,
+) -> OCLArray:
+    _isnumpy = isinstance(input_arr, np.ndarray)
+    if isinstance(input_arr, OCLArray):
+        if input_arr.dtype != np.complex64:
+            raise TypeError("OCLArray input_arr has to be of complex64 type")
+    elif _isnumpy:
+        if not np.issubdtype(input_arr.dtype, np.floating):
+            input_arr = input_arr.astype(np.float32)
+    else:
+        raise TypeError("input_arr must be either OCLArray or np.ndarray")
+    if output_arr is not None and inplace:
+        raise ValueError("`output_arr` cannot be provided if `inplace` is True")
+
+    plan = get_fft_plan(input_arr, axes=axes, fast_math=fast_math)
+
+    if isinstance(input_arr, np.ndarray):
+        arr_dev = push(input_arr)
+        res_dev = create(arr_dev, np.complex64)
+    else:
+        arr_dev = input_arr
+        if inplace:
+            res_dev = arr_dev
+        else:
+            res_dev = (
+                create(arr_dev, np.complex64) if output_arr is None else output_arr
+            )
+
+    plan(res_dev, arr_dev, inverse=inverse)
+
+    if _isnumpy:
+        if inplace:
+            input_arr[:] = res_dev.get()
+        elif output_arr:
+            output_arr[:] = res_dev.get()
+    return res_dev
+
+
+def ifftn(
+    input_arr,
+    output_arr=None,
+    axes=None,
+    inplace=False,
+    fast_math=True,
+):
+    return fftn(input_arr, output_arr, axes, inplace, fast_math, True)
diff --git a/pyclesperanto_prototype/_fft/_fftshift.py b/pyclesperanto_prototype/_fft/_fftshift.py
new file mode 100644
index 00000000..3c3d7462
--- /dev/null
+++ b/pyclesperanto_prototype/_fft/_fftshift.py
@@ -0,0 +1,16 @@
+import numpy as np
+import reikna.cluda as cluda
+from reikna.fft import FFTShift
+
+from .. import create, get_device, push
+
+
+def fftshift(arr, axes=None, output_arr=None):
+    shift = FFTShift(arr, axes=axes)
+    thread = cluda.ocl_api().Thread(get_device().queue)
+    shiftc = shift.compile(thread)
+
+    arr_dev = push(arr) if isinstance(arr, np.ndarray) else arr
+    res_dev = create(arr_dev) if output_arr is None else output_arr
+    shiftc(res_dev, arr_dev)
+    return res_dev
diff --git a/tests/test_fft.py b/tests/test_fft.py
new file mode 100644
index 00000000..e894caf3
--- /dev/null
+++ b/tests/test_fft.py
@@ -0,0 +1,36 @@
+from skimage.data import grass
+from scipy.fftpack import fftn, ifftn, fftshift
+import numpy as np
+import numpy.testing as npt
+import pyclesperanto_prototype as cle
+
+GRASS = grass().astype("float32")
+
+scipy_fgrass = fftn(GRASS)
+scipy_ifgrass = ifftn(scipy_fgrass)
+
+
+def test_scipy_fft():
+    assert not np.allclose(scipy_fgrass, GRASS, atol=100)
+    assert scipy_fgrass.dtype == np.complex64
+    assert scipy_fgrass.shape == (512, 512)
+    # inverse
+    npt.assert_allclose(scipy_ifgrass.real, GRASS, atol=1e-4)
+
+
+def test_cle_fft():
+    gpu_fgrass = cle.fftn(GRASS)
+    fgrass = cle.pull(gpu_fgrass)
+
+    assert not np.allclose(fgrass, GRASS, atol=100)
+    npt.assert_allclose(fgrass, scipy_fgrass, atol=2e-1)
+
+    gpu_ifgrss = cle.ifftn(gpu_fgrass)
+    ifgrss = cle.pull(gpu_ifgrss)
+    npt.assert_allclose(ifgrss.real, GRASS, atol=1e-4)
+
+
+def test_cle_fftshift():
+    scp_shift = fftshift(GRASS)
+    cle_shift = cle.fftshift(GRASS).get()
+    npt.assert_allclose(scp_shift, cle_shift)

From be3d6b8413832b8bee47289dd9f2639380715f8a Mon Sep 17 00:00:00 2001
From: Talley Lambert <talley.lambert@gmail.com>
Date: Sun, 13 Jun 2021 16:41:31 -0400
Subject: [PATCH 02/12] more tests

---
 .github/workflows/test_and_deploy.yml   |   4 +-
 pyclesperanto_prototype/_fft/_fft.py    | 168 ++++++++++++++++--------
 pyclesperanto_prototype/_tier0/_push.py |  18 +--
 setup.py                                |  12 +-
 tests/test_fft.py                       |  47 ++++++-
 5 files changed, 177 insertions(+), 72 deletions(-)

diff --git a/.github/workflows/test_and_deploy.yml b/.github/workflows/test_and_deploy.yml
index b1904089..39ff9802 100644
--- a/.github/workflows/test_and_deploy.yml
+++ b/.github/workflows/test_and_deploy.yml
@@ -37,8 +37,8 @@ jobs:
           python --version
           conda install -y pyopencl
           python -m pip install --upgrade pip
-          pip install setuptools wheel pytest pytest-cov pytest-benchmark
-          pip install -e .
+          pip install setuptools wheel pytest pytest-cov pytest-benchmark scipy
+          pip install -e .[fft]
       - name: Test
         shell: bash -l {0}
         run: pytest -v --cov=./ --cov-report=xml
diff --git a/pyclesperanto_prototype/_fft/_fft.py b/pyclesperanto_prototype/_fft/_fft.py
index 50c5bae6..c8aef774 100644
--- a/pyclesperanto_prototype/_fft/_fft.py
+++ b/pyclesperanto_prototype/_fft/_fft.py
@@ -1,3 +1,4 @@
+from typing import Optional, Tuple, Union
 import numpy as np
 import reikna.cluda as cluda
 from reikna.core import Annotation, Parameter, Transformation, Type
@@ -6,46 +7,16 @@
 from .. import create, get_device, push
 from .._tier0._pycl import OCLArray
 
-
-def _convert_axes_to_absolute(dshape, axes):
-    """axes = (-2,-1) does not work in reikna, so we have to convert that"""
-    if axes is None:
-        return None
-    elif isinstance(axes, (tuple, list)):
-        return tuple(np.arange(len(dshape))[list(axes)])
-    else:
-        raise NotImplementedError(f"axes {axes} is of unsupported type {type(axes)}")
-
-
-def _get_complex_trf(arr):
-    """On-gpu transformation that transforms a real array to a complex one."""
-    complex_dtype = cluda.dtypes.complex_for(arr.dtype)
-    return Transformation(
-        [
-            Parameter("output", Annotation(Type(complex_dtype, arr.shape), "o")),
-            Parameter("input", Annotation(arr, "i")),
-        ],
-        """
-        ${output.store_same}(
-            COMPLEX_CTR(${output.ctype})(
-                ${input.load_same},
-                0));
-        """,
-    )
+# FFT plan cache
+_PLAN_CACHE = {}
 
 
-_PLANS = {}
+def _get_fft_plan(arr, axes=None, fast_math=True):
+    """Cache and return a reikna FFT plan suitable for `arr` type and shape."""
+    axes = _normalize_axes(arr.shape, axes)
+    plan_key = (arr.shape, arr.dtype, axes, fast_math)
 
-
-def get_fft_plan(arr, axes=None, fast_math=True):
-    """returns an reikna plan/FFT for `arr`.
-
-    Will cache the compiled plan and reuse.
-    """
-
-    axes = _convert_axes_to_absolute(arr.shape, axes)
-    key = (arr.shape, arr.dtype, axes, fast_math)
-    if key not in _PLANS:
+    if plan_key not in _PLAN_CACHE:
         if np.iscomplexobj(arr):
             fft = FFT(arr, axes=axes)
         else:
@@ -54,31 +25,69 @@ def get_fft_plan(arr, axes=None, fast_math=True):
             fft.parameter.input.connect(trf, trf.output, new_input=trf.input)
 
         thread = cluda.ocl_api().Thread(get_device().queue)
-        _PLANS[key] = fft.compile(thread, fast_math=fast_math)
-    return _PLANS[key]
+        _PLAN_CACHE[plan_key] = fft.compile(thread, fast_math=fast_math)
+    return _PLAN_CACHE[plan_key]
 
 
 def fftn(
-    input_arr,
-    output_arr=None,
-    axes=None,
-    inplace=False,
-    fast_math=True,
-    inverse=False,
+    input_arr: Union[np.ndarray, OCLArray],
+    output_arr: Union[np.ndarray, OCLArray] = None,
+    axes: Optional[Tuple[int, ...]] = None,
+    inplace: bool = False,
+    fast_math: bool = True,
+    _inverse: bool = False,
 ) -> OCLArray:
+    """Perform fast Fourier transformation on `input_array`.
+
+    Parameters
+    ----------
+    input_arr : numpy or OCL array
+        A numpy or OCL array to transform.  If an OCL array is provided, it must already
+        be of type `complex64`.  If a numpy array is provided, it will be converted
+        to `float32` before the transformation is performed.
+    output_arr : numpy or OCL array, optional
+        An optional array/buffer to use for output, by default None
+    axes : tuple of int, optional
+        T tuple with axes over which to perform the transform.
+        If not given, the transform is performed over all the axes., by default None
+    inplace : bool, optional
+        Whether to place output data in the `input_arr` buffer, by default False
+    fast_math : bool, optional
+        Whether to enable fast (less precise) mathematical operations during
+        compilation, by default True
+    _inverse : bool, optional
+        Perform inverse FFT, by default False.  (prefer using `ifftn`)
+
+    Returns
+    -------
+    OCLArray
+        result of transformation (still on GPU). Use `.get()` or `cle.pull`
+        to retrieve from GPU.
+        If `inplace` or  `output_arr` where used, data will also be placed in
+        the corresponding buffer as a side effect.
+
+    Raises
+    ------
+    TypeError
+        If OCL array is provided that is not of type complex64.  Or if an unrecognized
+        array is provided.
+    ValueError
+        If inplace is used for numpy array, or both `output_arr` and `inplace` are used.
+    """
     _isnumpy = isinstance(input_arr, np.ndarray)
     if isinstance(input_arr, OCLArray):
         if input_arr.dtype != np.complex64:
             raise TypeError("OCLArray input_arr has to be of complex64 type")
     elif _isnumpy:
-        if not np.issubdtype(input_arr.dtype, np.floating):
-            input_arr = input_arr.astype(np.float32)
+        if inplace:
+            raise ValueError("inplace FFT not supported for numpy arrays")
+        input_arr = input_arr.astype(np.float32, copy=False)
     else:
         raise TypeError("input_arr must be either OCLArray or np.ndarray")
     if output_arr is not None and inplace:
         raise ValueError("`output_arr` cannot be provided if `inplace` is True")
 
-    plan = get_fft_plan(input_arr, axes=axes, fast_math=fast_math)
+    plan = _get_fft_plan(input_arr, axes=axes, fast_math=fast_math)
 
     if isinstance(input_arr, np.ndarray):
         arr_dev = push(input_arr)
@@ -92,16 +101,40 @@ def fftn(
                 create(arr_dev, np.complex64) if output_arr is None else output_arr
             )
 
-    plan(res_dev, arr_dev, inverse=inverse)
+    plan(res_dev, arr_dev, inverse=_inverse)
 
-    if _isnumpy:
-        if inplace:
-            input_arr[:] = res_dev.get()
-        elif output_arr:
-            output_arr[:] = res_dev.get()
+    if _isnumpy and output_arr is not None:
+        output_arr[:] = res_dev.get()
     return res_dev
 
 
+def _normalize_axes(dshape, axes):
+    """Convert possibly negative axes to positive axes."""
+    if axes is None:
+        return None
+    try:
+        return tuple(np.arange(len(dshape))[list(axes)])
+    except Exception as e:
+        raise TypeError(f"Cannot normalize axes {axes}: {e}")
+
+
+def _get_complex_trf(arr):
+    """On-GPU transformation that transforms a real array to a complex one."""
+    complex_dtype = cluda.dtypes.complex_for(arr.dtype)
+    return Transformation(
+        [
+            Parameter("output", Annotation(Type(complex_dtype, arr.shape), "o")),
+            Parameter("input", Annotation(arr, "i")),
+        ],
+        """
+        ${output.store_same}(
+            COMPLEX_CTR(${output.ctype})(
+                ${input.load_same},
+                0));
+        """,
+    )
+
+
 def ifftn(
     input_arr,
     output_arr=None,
@@ -109,4 +142,31 @@ def ifftn(
     inplace=False,
     fast_math=True,
 ):
+    """Perform inverse Fourier transformation on `input_array`.
+
+    Parameters
+    ----------
+    input_arr : numpy or OCL array
+        A numpy or OCL array to transform.  If an OCL array is provided, it must already
+        be of type `complex64`.  If a numpy array is provided, it will be converted
+        to `float32` before the transformation is performed.
+    output_arr : numpy or OCL array, optional
+        An optional array/buffer to use for output, by default None.
+    axes : tuple of int, optional
+        T tuple with axes over which to perform the transform.
+        If not given, the transform is performed over all the axes., by default None.
+    inplace : bool, optional
+        Whether to place output data in the `input_arr` buffer, by default False.
+    fast_math : bool, optional
+        Whether to enable fast (less precise) mathematical operations during
+        compilation, by default True.
+
+    Returns
+    -------
+    OCLArray
+        result of transformation (still on GPU). Use `.get()` or `cle.pull`
+        to retrieve from GPU.
+        If `inplace` or  `output_arr` where used, data will also be placed in
+        the corresponding buffer as a side effect.
+    """
     return fftn(input_arr, output_arr, axes, inplace, fast_math, True)
diff --git a/pyclesperanto_prototype/_tier0/_push.py b/pyclesperanto_prototype/_tier0/_push.py
index 3c587d71..a6ffbf8f 100644
--- a/pyclesperanto_prototype/_tier0/_push.py
+++ b/pyclesperanto_prototype/_tier0/_push.py
@@ -2,13 +2,13 @@
 from ._pycl import OCLArray
 
 
-def push(any_array):
+def push(any_array, dtype=None):
     """Copies an image to GPU memory and returns its handle
 
     .. deprecated:: 0.6.0
         `push` behaviour will be changed pyclesperanto_prototype 0.7.0 to do the same as
         `push_zyx` because it's faster and having both doing different things is confusing.
-    
+
     Parameters
     ----------
     image : numpy array
@@ -16,30 +16,30 @@ def push(any_array):
     Returns
     -------
     OCLArray
-    
+
     Examples
     --------
     >>> import pyclesperanto_prototype as cle
     >>> cle.push(image)
-    
+
     References
     ----------
     .. [1] https://clij.github.io/clij2-docs/reference_push
     """
     if isinstance(any_array, OCLArray):
         return any_array
-
-    if isinstance(any_array, list) or isinstance(any_array, tuple):
+    if isinstance(any_array, (list, tuple)):
         any_array = np.asarray(any_array)
 
-    float_arr = any_array.astype(np.float32)
-    return OCLArray.from_array(float_arr)
+    _arr = any_array.astype(np.float32 if dtype is None else dtype)
+    return OCLArray.from_array(_arr)
 
 
 def push_zyx(any_array):
     import warnings
+
     warnings.warn(
         "Deprecated: `push_zyx()` is now deprecated as it does the same as `push()`.",
-        DeprecationWarning
+        DeprecationWarning,
     )
     return push(any_array)
\ No newline at end of file
diff --git a/setup.py b/setup.py
index b8800aa1..5163448b 100644
--- a/setup.py
+++ b/setup.py
@@ -14,8 +14,16 @@
     url="https://github.com/clEsperanto/pyclesperanto_prototype",
     packages=setuptools.find_packages(),
     include_package_data=True,
-    install_requires=["numpy!=1.19.4", "pyopencl", "toolz", "scikit-image>=0.18.0", "matplotlib", "transforms3d"],
-    python_requires='>=3.6',
+    install_requires=[
+        "numpy!=1.19.4",
+        "pyopencl",
+        "toolz",
+        "scikit-image>=0.18.0",
+        "matplotlib",
+        "transforms3d",
+    ],
+    python_requires=">=3.6",
+    extras_require={"fft": ["reikna"]},
     classifiers=[
         "Programming Language :: Python :: 3",
         "License :: OSI Approved :: BSD License",
diff --git a/tests/test_fft.py b/tests/test_fft.py
index e894caf3..40f85e71 100644
--- a/tests/test_fft.py
+++ b/tests/test_fft.py
@@ -3,17 +3,20 @@
 import numpy as np
 import numpy.testing as npt
 import pyclesperanto_prototype as cle
+import pytest
 
-GRASS = grass().astype("float32")
+_ = pytest.importorskip("reikna")
 
-scipy_fgrass = fftn(GRASS)
-scipy_ifgrass = ifftn(scipy_fgrass)
+GRASS = grass().astype("float32")
 
 
 def test_scipy_fft():
+    scipy_fgrass = fftn(GRASS)
+    scipy_ifgrass = ifftn(scipy_fgrass)
+
     assert not np.allclose(scipy_fgrass, GRASS, atol=100)
     assert scipy_fgrass.dtype == np.complex64
-    assert scipy_fgrass.shape == (512, 512)
+    assert scipy_fgrass.shape == GRASS.shape
     # inverse
     npt.assert_allclose(scipy_ifgrass.real, GRASS, atol=1e-4)
 
@@ -23,13 +26,47 @@ def test_cle_fft():
     fgrass = cle.pull(gpu_fgrass)
 
     assert not np.allclose(fgrass, GRASS, atol=100)
-    npt.assert_allclose(fgrass, scipy_fgrass, atol=2e-1)
+    npt.assert_allclose(fgrass, fftn(GRASS), atol=2e-1)
 
     gpu_ifgrss = cle.ifftn(gpu_fgrass)
     ifgrss = cle.pull(gpu_ifgrss)
     npt.assert_allclose(ifgrss.real, GRASS, atol=1e-4)
 
 
+def test_cle_fft_output_array():
+    input = cle.push(GRASS, np.complex64)
+    out = cle.create(input, np.complex64)
+    cle.fftn(input, out)
+    npt.assert_allclose(cle.pull(out), fftn(GRASS), atol=2e-1)
+
+
+def test_cle_fft_inplace():
+    input = cle.push(GRASS, np.complex64)
+    cle.fftn(input, inplace=True)
+    npt.assert_allclose(cle.pull(input), fftn(GRASS), atol=2e-1)
+
+
+def test_cle_fft_errors():
+
+    with pytest.raises(TypeError):
+        # existing OCLArray must be of complex type
+        cle.fftn(cle.push(GRASS))
+
+    cle.fftn(cle.push(GRASS, np.complex64))
+    cle.fftn(GRASS)
+
+    input = cle.push(GRASS, np.complex64)
+    out = cle.create(input, np.complex64)
+
+    with pytest.raises(ValueError):
+        # cannot provide both output and inplace
+        cle.fftn(input, out, inplace=True)
+
+    with pytest.raises(ValueError):
+        # cannot use inplace with numpy array
+        cle.fftn(GRASS, inplace=True)
+
+
 def test_cle_fftshift():
     scp_shift = fftshift(GRASS)
     cle_shift = cle.fftshift(GRASS).get()

From c976a7c9eaf6a8bacbb9e9d0d488beb30d435328 Mon Sep 17 00:00:00 2001
From: Talley Lambert <talley.lambert@gmail.com>
Date: Sun, 13 Jun 2021 16:44:32 -0400
Subject: [PATCH 03/12] add doc

---
 pyclesperanto_prototype/_tier0/_push.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pyclesperanto_prototype/_tier0/_push.py b/pyclesperanto_prototype/_tier0/_push.py
index a6ffbf8f..312226b1 100644
--- a/pyclesperanto_prototype/_tier0/_push.py
+++ b/pyclesperanto_prototype/_tier0/_push.py
@@ -12,6 +12,7 @@ def push(any_array, dtype=None):
     Parameters
     ----------
     image : numpy array
+    dtype : np.dtype, optional
 
     Returns
     -------
@@ -28,6 +29,7 @@ def push(any_array, dtype=None):
     """
     if isinstance(any_array, OCLArray):
         return any_array
+
     if isinstance(any_array, (list, tuple)):
         any_array = np.asarray(any_array)
 
@@ -42,4 +44,4 @@ def push_zyx(any_array):
         "Deprecated: `push_zyx()` is now deprecated as it does the same as `push()`.",
         DeprecationWarning,
     )
-    return push(any_array)
\ No newline at end of file
+    return push(any_array)

From 21e14fdc5bbbcfea7ee0a97dd44657cd058dbeae Mon Sep 17 00:00:00 2001
From: Talley Lambert <talley.lambert@gmail.com>
Date: Sun, 13 Jun 2021 22:39:42 -0400
Subject: [PATCH 04/12] change skip order

---
 tests/test_fft.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/test_fft.py b/tests/test_fft.py
index 40f85e71..63a38641 100644
--- a/tests/test_fft.py
+++ b/tests/test_fft.py
@@ -1,11 +1,11 @@
-from skimage.data import grass
-from scipy.fftpack import fftn, ifftn, fftshift
-import numpy as np
-import numpy.testing as npt
-import pyclesperanto_prototype as cle
 import pytest
 
 _ = pytest.importorskip("reikna")
+import numpy as np
+import numpy.testing as npt
+import pyclesperanto_prototype as cle
+from scipy.fftpack import fftn, fftshift, ifftn
+from skimage.data import grass
 
 GRASS = grass().astype("float32")
 

From 2038c999308a33d378fc8dce71b09dae79f76c64 Mon Sep 17 00:00:00 2001
From: Talley Lambert <talley.lambert@gmail.com>
Date: Mon, 14 Jun 2021 00:02:36 -0400
Subject: [PATCH 05/12] add fftconvolve

---
 pyclesperanto_prototype/_fft/__init__.py     |  1 +
 pyclesperanto_prototype/_fft/_fftconvolve.py | 57 ++++++++++++++++++++
 pyclesperanto_prototype/_fft/_fftshift.py    |  7 ++-
 tests/test_fft.py                            |  5 ++
 4 files changed, 68 insertions(+), 2 deletions(-)
 create mode 100644 pyclesperanto_prototype/_fft/_fftconvolve.py

diff --git a/pyclesperanto_prototype/_fft/__init__.py b/pyclesperanto_prototype/_fft/__init__.py
index 4510838e..3fd53d09 100644
--- a/pyclesperanto_prototype/_fft/__init__.py
+++ b/pyclesperanto_prototype/_fft/__init__.py
@@ -1,2 +1,3 @@
 from ._fft import fftn, ifftn
 from ._fftshift import fftshift
+from ._fftconvolve import fftconvolve
diff --git a/pyclesperanto_prototype/_fft/_fftconvolve.py b/pyclesperanto_prototype/_fft/_fftconvolve.py
new file mode 100644
index 00000000..954f4884
--- /dev/null
+++ b/pyclesperanto_prototype/_fft/_fftconvolve.py
@@ -0,0 +1,57 @@
+from pyopencl.elementwise import ElementwiseKernel
+from .. import get_device
+from ._fft import fftn, ifftn
+from .._tier0._pycl import OCLArray
+import numpy as np
+
+
+_mult_complex = ElementwiseKernel(
+    get_device().context,
+    "cfloat_t *a, cfloat_t * b",
+    "a[i] = cfloat_mul(a[i], b[i])",
+    "mult",
+)
+
+
+def fftconvolve(
+    data,
+    kernel,
+    mode="full",
+    axes=None,
+    output_arr=None,
+    inplace=False,
+    kernel_is_fft=False,
+):
+
+    if data.shape != kernel.shape:
+        raise ValueError("in1 and in2 should have the same shape")
+
+    if isinstance(data, np.ndarray):
+        data_g = OCLArray.from_array(data.astype(np.complex64))
+    else:
+        data_g = data
+
+    if isinstance(kernel, np.ndarray):
+        kernel_g = OCLArray.from_array(kernel.astype(np.complex64))
+    else:
+        kernel_g = kernel
+
+    if inplace:
+        output_arr = data_g
+    else:
+        if output_arr is None:
+            output_arr = OCLArray.empty(data_g.shape, data_g.dtype)
+        output_arr.copy_buffer(data_g)
+
+    if not kernel_is_fft:
+        kern_g = OCLArray.empty(kernel_g.shape, kernel_g.dtype)
+        kern_g.copy_buffer(kernel_g)
+        fftn(kern_g, inplace=True)
+    else:
+        kern_g = kernel_g
+
+    fftn(output_arr, inplace=True, axes=axes)
+    _mult_complex(output_arr, kern_g)
+
+    ifftn(output_arr, inplace=True, axes=axes)
+    return output_arr
diff --git a/pyclesperanto_prototype/_fft/_fftshift.py b/pyclesperanto_prototype/_fft/_fftshift.py
index 3c3d7462..d484a4b5 100644
--- a/pyclesperanto_prototype/_fft/_fftshift.py
+++ b/pyclesperanto_prototype/_fft/_fftshift.py
@@ -5,12 +5,15 @@
 from .. import create, get_device, push
 
 
-def fftshift(arr, axes=None, output_arr=None):
+def fftshift(arr, axes=None, output_arr=None, inplace=False):
     shift = FFTShift(arr, axes=axes)
     thread = cluda.ocl_api().Thread(get_device().queue)
     shiftc = shift.compile(thread)
 
     arr_dev = push(arr) if isinstance(arr, np.ndarray) else arr
-    res_dev = create(arr_dev) if output_arr is None else output_arr
+    if inplace:
+        res_dev = arr_dev
+    else:
+        res_dev = create(arr_dev) if output_arr is None else output_arr
     shiftc(res_dev, arr_dev)
     return res_dev
diff --git a/tests/test_fft.py b/tests/test_fft.py
index 63a38641..16779661 100644
--- a/tests/test_fft.py
+++ b/tests/test_fft.py
@@ -71,3 +71,8 @@ def test_cle_fftshift():
     scp_shift = fftshift(GRASS)
     cle_shift = cle.fftshift(GRASS).get()
     npt.assert_allclose(scp_shift, cle_shift)
+
+
+def test_cle_fftconvolve():
+    out = cle.fftconvolve(GRASS, GRASS).get()
+    print(out.max(), out.mean(), GRASS.mean())
\ No newline at end of file

From d7be52c4887d5a1d3df8b8ad0d591e0b423383ad Mon Sep 17 00:00:00 2001
From: Talley Lambert <talley.lambert@gmail.com>
Date: Mon, 14 Jun 2021 11:36:01 -0400
Subject: [PATCH 06/12] add convolve modes

---
 pyclesperanto_prototype/_fft/_fft.py         | 50 ++++++--------
 pyclesperanto_prototype/_fft/_fftconvolve.py | 70 ++++++++++++++++----
 tests/test_fft.py                            | 52 ++++++++++++---
 3 files changed, 118 insertions(+), 54 deletions(-)

diff --git a/pyclesperanto_prototype/_fft/_fft.py b/pyclesperanto_prototype/_fft/_fft.py
index c8aef774..b940b801 100644
--- a/pyclesperanto_prototype/_fft/_fft.py
+++ b/pyclesperanto_prototype/_fft/_fft.py
@@ -1,11 +1,13 @@
 from typing import Optional, Tuple, Union
+
 import numpy as np
 import reikna.cluda as cluda
-from reikna.core import Annotation, Parameter, Transformation, Type
+from pyopencl.array import Array
+from reikna.core import Type
 from reikna.fft import FFT
+from reikna.transformations import broadcast_const, combine_complex
 
 from .. import create, get_device, push
-from .._tier0._pycl import OCLArray
 
 # FFT plan cache
 _PLAN_CACHE = {}
@@ -18,25 +20,32 @@ def _get_fft_plan(arr, axes=None, fast_math=True):
 
     if plan_key not in _PLAN_CACHE:
         if np.iscomplexobj(arr):
-            fft = FFT(arr, axes=axes)
+            plan = FFT(arr, axes=axes)
         else:
-            trf = _get_complex_trf(arr)
-            fft = FFT(trf.output, axes=axes)
-            fft.parameter.input.connect(trf, trf.output, new_input=trf.input)
+            plan = FFT(Type(cluda.dtypes.complex_for(arr.dtype), arr.shape), axes=axes)
+            # joins two real inputs into complex output
+            cc = combine_complex(plan.parameter.input)
+            # broadcasts 0 to the imaginary component
+            bc = broadcast_const(cc.imag, 0)
+            plan.parameter.input.connect(
+                cc, cc.output, real_input=cc.real, imag_input=cc.imag
+            )
+            plan.parameter.imag_input.connect(bc, bc.output)
 
         thread = cluda.ocl_api().Thread(get_device().queue)
-        _PLAN_CACHE[plan_key] = fft.compile(thread, fast_math=fast_math)
+        _PLAN_CACHE[plan_key] = plan.compile(thread, fast_math=fast_math)
+
     return _PLAN_CACHE[plan_key]
 
 
 def fftn(
-    input_arr: Union[np.ndarray, OCLArray],
-    output_arr: Union[np.ndarray, OCLArray] = None,
+    input_arr: Union[np.ndarray, Array],
+    output_arr: Union[np.ndarray, Array] = None,
     axes: Optional[Tuple[int, ...]] = None,
     inplace: bool = False,
     fast_math: bool = True,
     _inverse: bool = False,
-) -> OCLArray:
+) -> Array:
     """Perform fast Fourier transformation on `input_array`.
 
     Parameters
@@ -75,7 +84,7 @@ def fftn(
         If inplace is used for numpy array, or both `output_arr` and `inplace` are used.
     """
     _isnumpy = isinstance(input_arr, np.ndarray)
-    if isinstance(input_arr, OCLArray):
+    if isinstance(input_arr, Array):
         if input_arr.dtype != np.complex64:
             raise TypeError("OCLArray input_arr has to be of complex64 type")
     elif _isnumpy:
@@ -89,7 +98,7 @@ def fftn(
 
     plan = _get_fft_plan(input_arr, axes=axes, fast_math=fast_math)
 
-    if isinstance(input_arr, np.ndarray):
+    if _isnumpy:
         arr_dev = push(input_arr)
         res_dev = create(arr_dev, np.complex64)
     else:
@@ -118,23 +127,6 @@ def _normalize_axes(dshape, axes):
         raise TypeError(f"Cannot normalize axes {axes}: {e}")
 
 
-def _get_complex_trf(arr):
-    """On-GPU transformation that transforms a real array to a complex one."""
-    complex_dtype = cluda.dtypes.complex_for(arr.dtype)
-    return Transformation(
-        [
-            Parameter("output", Annotation(Type(complex_dtype, arr.shape), "o")),
-            Parameter("input", Annotation(arr, "i")),
-        ],
-        """
-        ${output.store_same}(
-            COMPLEX_CTR(${output.ctype})(
-                ${input.load_same},
-                0));
-        """,
-    )
-
-
 def ifftn(
     input_arr,
     output_arr=None,
diff --git a/pyclesperanto_prototype/_fft/_fftconvolve.py b/pyclesperanto_prototype/_fft/_fftconvolve.py
index 954f4884..3d7287ee 100644
--- a/pyclesperanto_prototype/_fft/_fftconvolve.py
+++ b/pyclesperanto_prototype/_fft/_fftconvolve.py
@@ -3,7 +3,7 @@
 from ._fft import fftn, ifftn
 from .._tier0._pycl import OCLArray
 import numpy as np
-
+from .. import push, crop
 
 _mult_complex = ElementwiseKernel(
     get_device().context,
@@ -13,28 +13,44 @@
 )
 
 
+def _fix_shape(arr, shape):
+    if arr.shape == shape:
+        return arr
+    result = np.zeros(shape)
+    result[: arr.shape[0], : arr.shape[1]] = arr
+    return result
+
+
 def fftconvolve(
     data,
     kernel,
-    mode="full",
+    mode="same",
     axes=None,
     output_arr=None,
     inplace=False,
     kernel_is_fft=False,
 ):
+    if mode not in {"valid", "same", "full"}:
+        raise ValueError("acceptable mode flags are 'valid', 'same', or 'full'")
+    if data.ndim != kernel.ndim:
+        raise ValueError("data and kernel should have the same dimensionality")
 
-    if data.shape != kernel.shape:
-        raise ValueError("in1 and in2 should have the same shape")
+    # expand arrays
+    s1 = data.shape
+    s2 = kernel.shape
+    axes = tuple(range(len(s1))) if axes is None else tuple(axes)
+    shape = [
+        max((s1[i], s2[i])) if i not in axes else s1[i] + s2[i] - 1
+        for i in range(data.ndim)
+    ]
+    data = _fix_shape(data, shape)
+    kernel = _fix_shape(kernel, shape)
 
-    if isinstance(data, np.ndarray):
-        data_g = OCLArray.from_array(data.astype(np.complex64))
-    else:
-        data_g = data
+    if data.shape != kernel.shape:
+        raise ValueError("in1 and in2 must have the same shape")
 
-    if isinstance(kernel, np.ndarray):
-        kernel_g = OCLArray.from_array(kernel.astype(np.complex64))
-    else:
-        kernel_g = kernel
+    data_g = push(data, np.complex64)
+    kernel_g = push(kernel, np.complex64)
 
     if inplace:
         output_arr = data_g
@@ -52,6 +68,32 @@ def fftconvolve(
 
     fftn(output_arr, inplace=True, axes=axes)
     _mult_complex(output_arr, kern_g)
-
     ifftn(output_arr, inplace=True, axes=axes)
-    return output_arr
+
+    _out = output_arr.real if np.isrealobj(data) else output_arr
+
+    if mode == "full":
+        return _out
+    elif mode == "same":
+        return _crop_centered(_out, s1)
+    else:
+        shape_valid = [
+            _out.shape[a] if a not in axes else s1[a] - s2[a] + 1
+            for a in range(_out.ndim)
+        ]
+        return _crop_centered(_out, shape_valid)
+    return _out
+
+
+def _crop_centered(arr, newshape):
+    # Return the center newshape portion of the array.
+    newshape = np.asarray(newshape)
+    currshape = np.array(arr.shape)
+    startind = (currshape - newshape) // 2
+    return crop(
+        arr,
+        start_y=startind[0],
+        start_x=startind[1],
+        height=newshape[0],
+        width=newshape[1],
+    )
diff --git a/tests/test_fft.py b/tests/test_fft.py
index 16779661..a2108adb 100644
--- a/tests/test_fft.py
+++ b/tests/test_fft.py
@@ -1,18 +1,20 @@
 import pytest
 
-_ = pytest.importorskip("reikna")
+pytest.importorskip("reikna")  # isort: skip
+
 import numpy as np
 import numpy.testing as npt
 import pyclesperanto_prototype as cle
-from scipy.fftpack import fftn, fftshift, ifftn
+from scipy import fftpack, misc, signal
 from skimage.data import grass
 
+
 GRASS = grass().astype("float32")
 
 
 def test_scipy_fft():
-    scipy_fgrass = fftn(GRASS)
-    scipy_ifgrass = ifftn(scipy_fgrass)
+    scipy_fgrass = fftpack.fftn(GRASS)
+    scipy_ifgrass = fftpack.ifftn(scipy_fgrass)
 
     assert not np.allclose(scipy_fgrass, GRASS, atol=100)
     assert scipy_fgrass.dtype == np.complex64
@@ -26,7 +28,7 @@ def test_cle_fft():
     fgrass = cle.pull(gpu_fgrass)
 
     assert not np.allclose(fgrass, GRASS, atol=100)
-    npt.assert_allclose(fgrass, fftn(GRASS), atol=2e-1)
+    npt.assert_allclose(fgrass, fftpack.fftn(GRASS), atol=2e-1)
 
     gpu_ifgrss = cle.ifftn(gpu_fgrass)
     ifgrss = cle.pull(gpu_ifgrss)
@@ -37,13 +39,13 @@ def test_cle_fft_output_array():
     input = cle.push(GRASS, np.complex64)
     out = cle.create(input, np.complex64)
     cle.fftn(input, out)
-    npt.assert_allclose(cle.pull(out), fftn(GRASS), atol=2e-1)
+    npt.assert_allclose(cle.pull(out), fftpack.fftn(GRASS), atol=2e-1)
 
 
 def test_cle_fft_inplace():
     input = cle.push(GRASS, np.complex64)
     cle.fftn(input, inplace=True)
-    npt.assert_allclose(cle.pull(input), fftn(GRASS), atol=2e-1)
+    npt.assert_allclose(cle.pull(input), fftpack.fftn(GRASS), atol=2e-1)
 
 
 def test_cle_fft_errors():
@@ -68,11 +70,39 @@ def test_cle_fft_errors():
 
 
 def test_cle_fftshift():
-    scp_shift = fftshift(GRASS)
+    scp_shift = fftpack.fftshift(GRASS)
     cle_shift = cle.fftshift(GRASS).get()
     npt.assert_allclose(scp_shift, cle_shift)
 
 
-def test_cle_fftconvolve():
-    out = cle.fftconvolve(GRASS, GRASS).get()
-    print(out.max(), out.mean(), GRASS.mean())
\ No newline at end of file
+FACE = misc.face(gray=True).astype("float32")
+KERNEL = np.outer(
+    signal.windows.gaussian(70, 8), signal.windows.gaussian(70, 8)
+).astype("float32")
+
+
+def test_cle_fftconvolve_same():
+    cle_out = cle.fftconvolve(FACE, KERNEL, mode="same").get()
+    scp_out = signal.fftconvolve(FACE, KERNEL, mode="same")
+    assert cle_out.shape == scp_out.shape
+    assert cle_out.dtype == scp_out.dtype
+    npt.assert_allclose(cle_out, scp_out, atol=0.2)
+
+    # cle_out2 = cle.fftconvolve(cle.push(FACE), KERNEL, mode="same").get()
+    # npt.assert_allclose(cle_out, cle_out2, atol=0.2)
+
+
+def test_cle_fftconvolve_full():
+    cle_out = cle.fftconvolve(FACE, KERNEL, mode="full").get()
+    scp_out = signal.fftconvolve(FACE, KERNEL, mode="full")
+    assert cle_out.shape == scp_out.shape
+    assert cle_out.dtype == scp_out.dtype
+    npt.assert_allclose(cle_out, scp_out, atol=0.2)
+
+
+def test_cle_fftconvolve_valid():
+    cle_out = cle.fftconvolve(FACE, KERNEL, mode="valid").get()
+    scp_out = signal.fftconvolve(FACE, KERNEL, mode="valid")
+    assert cle_out.shape == scp_out.shape
+    assert cle_out.dtype == scp_out.dtype
+    npt.assert_allclose(cle_out, scp_out, atol=0.2)

From 4b8263e3c8709f5fad5617d0b766d6c517d7394b Mon Sep 17 00:00:00 2001
From: Talley Lambert <talley.lambert@gmail.com>
Date: Mon, 14 Jun 2021 11:46:22 -0400
Subject: [PATCH 07/12] update tests

---
 tests/test_fft.py | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/tests/test_fft.py b/tests/test_fft.py
index a2108adb..7c074f45 100644
--- a/tests/test_fft.py
+++ b/tests/test_fft.py
@@ -35,6 +35,19 @@ def test_cle_fft():
     npt.assert_allclose(ifgrss.real, GRASS, atol=1e-4)
 
 
+def test_cle_fft3d():
+    img = np.random.rand(128, 128, 128)
+    gpu_fimg = cle.fftn(img)
+    fimg = cle.pull(gpu_fimg)
+
+    assert not np.allclose(fimg, img, atol=100)
+    npt.assert_allclose(fimg, fftpack.fftn(img), atol=2e-1)
+
+    gpu_ifgrss = cle.ifftn(gpu_fimg)
+    ifgrss = cle.pull(gpu_ifgrss)
+    npt.assert_allclose(ifgrss.real, img, atol=1e-4)
+
+
 def test_cle_fft_output_array():
     input = cle.push(GRASS, np.complex64)
     out = cle.create(input, np.complex64)
@@ -88,8 +101,12 @@ def test_cle_fftconvolve_same():
     assert cle_out.dtype == scp_out.dtype
     npt.assert_allclose(cle_out, scp_out, atol=0.2)
 
-    # cle_out2 = cle.fftconvolve(cle.push(FACE), KERNEL, mode="same").get()
-    # npt.assert_allclose(cle_out, cle_out2, atol=0.2)
+
+@pytest.mark.xfail
+def test_cle_fftconvolve_from_oclarray():
+    cle_out = cle.fftconvolve(FACE, KERNEL, mode="same").get()
+    cle_out2 = cle.fftconvolve(cle.push(FACE), KERNEL, mode="same").get()
+    npt.assert_allclose(cle_out, cle_out2, atol=0.2)
 
 
 def test_cle_fftconvolve_full():

From 93a1a7d3dbb6618a2cf8a8a42ab8f6162af53548 Mon Sep 17 00:00:00 2001
From: Talley Lambert <talley.lambert@gmail.com>
Date: Mon, 14 Jun 2021 11:49:06 -0400
Subject: [PATCH 08/12] add aliases

---
 pyclesperanto_prototype/_fft/__init__.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/pyclesperanto_prototype/_fft/__init__.py b/pyclesperanto_prototype/_fft/__init__.py
index 3fd53d09..33e202c9 100644
--- a/pyclesperanto_prototype/_fft/__init__.py
+++ b/pyclesperanto_prototype/_fft/__init__.py
@@ -1,3 +1,19 @@
 from ._fft import fftn, ifftn
 from ._fftshift import fftshift
 from ._fftconvolve import fftconvolve
+
+# clij2 aliases
+convolve_fft = fftconvolve
+inversere_fft = ifftn
+forward_fft = fftn
+fft = fftn
+
+__all__ = [
+    "convolve_fft",
+    "fft",
+    "fftn",
+    "fftshift",
+    "forward_fft",
+    "ifftn",
+    "inversere_fft",
+]

From 23e011afa908551107fe05aacc9559639b3f66e3 Mon Sep 17 00:00:00 2001
From: Talley Lambert <talley.lambert@gmail.com>
Date: Mon, 14 Jun 2021 11:52:27 -0400
Subject: [PATCH 09/12] add reikna dep

---
 .github/workflows/test_and_deploy.yml | 2 +-
 setup.py                              | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test_and_deploy.yml b/.github/workflows/test_and_deploy.yml
index 39ff9802..13baedaa 100644
--- a/.github/workflows/test_and_deploy.yml
+++ b/.github/workflows/test_and_deploy.yml
@@ -38,7 +38,7 @@ jobs:
           conda install -y pyopencl
           python -m pip install --upgrade pip
           pip install setuptools wheel pytest pytest-cov pytest-benchmark scipy
-          pip install -e .[fft]
+          pip install -e .
       - name: Test
         shell: bash -l {0}
         run: pytest -v --cov=./ --cov-report=xml
diff --git a/setup.py b/setup.py
index 5163448b..4ae55707 100644
--- a/setup.py
+++ b/setup.py
@@ -21,9 +21,9 @@
         "scikit-image>=0.18.0",
         "matplotlib",
         "transforms3d",
+        "reikna",
     ],
     python_requires=">=3.6",
-    extras_require={"fft": ["reikna"]},
     classifiers=[
         "Programming Language :: Python :: 3",
         "License :: OSI Approved :: BSD License",

From f899171774adb9422c3b644100d2035866529754 Mon Sep 17 00:00:00 2001
From: Talley Lambert <talley.lambert@gmail.com>
Date: Mon, 14 Jun 2021 11:55:22 -0400
Subject: [PATCH 10/12] fix typo

---
 pyclesperanto_prototype/_fft/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyclesperanto_prototype/_fft/__init__.py b/pyclesperanto_prototype/_fft/__init__.py
index 33e202c9..4f765fad 100644
--- a/pyclesperanto_prototype/_fft/__init__.py
+++ b/pyclesperanto_prototype/_fft/__init__.py
@@ -4,7 +4,7 @@
 
 # clij2 aliases
 convolve_fft = fftconvolve
-inversere_fft = ifftn
+inverse_fft = ifftn
 forward_fft = fftn
 fft = fftn
 
@@ -15,5 +15,5 @@
     "fftshift",
     "forward_fft",
     "ifftn",
-    "inversere_fft",
+    "inverse_fft",
 ]

From 99296338548edf846bca3b552d1a72b994dc9bc4 Mon Sep 17 00:00:00 2001
From: Talley Lambert <talley.lambert@gmail.com>
Date: Mon, 14 Jun 2021 12:00:49 -0400
Subject: [PATCH 11/12] fix export

---
 pyclesperanto_prototype/_fft/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyclesperanto_prototype/_fft/__init__.py b/pyclesperanto_prototype/_fft/__init__.py
index 4f765fad..df198a1f 100644
--- a/pyclesperanto_prototype/_fft/__init__.py
+++ b/pyclesperanto_prototype/_fft/__init__.py
@@ -11,6 +11,7 @@
 __all__ = [
     "convolve_fft",
     "fft",
+    "fftconvolve",
     "fftn",
     "fftshift",
     "forward_fft",

From 5d0b560c52b92040b07b9fbd2194ff8064cfb604 Mon Sep 17 00:00:00 2001
From: Talley Lambert <talley.lambert@gmail.com>
Date: Tue, 15 Jun 2021 13:31:45 -0500
Subject: [PATCH 12/12] add not implemented error for now

---
 pyclesperanto_prototype/_fft/_fftconvolve.py | 11 +++++++++--
 tests/test_fft.py                            |  2 +-
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/pyclesperanto_prototype/_fft/_fftconvolve.py b/pyclesperanto_prototype/_fft/_fftconvolve.py
index 3d7287ee..e443d59a 100644
--- a/pyclesperanto_prototype/_fft/_fftconvolve.py
+++ b/pyclesperanto_prototype/_fft/_fftconvolve.py
@@ -16,8 +16,15 @@
 def _fix_shape(arr, shape):
     if arr.shape == shape:
         return arr
-    result = np.zeros(shape)
-    result[: arr.shape[0], : arr.shape[1]] = arr
+    if isinstance(arr, OCLArray):
+        # TODO
+        raise NotImplementedError("Cannot not resize/convert array to complex type..")
+        # result = OCLArray.empty(shape, arr.dtype)
+        # set(result, 0)
+        # paste(arr, result)
+    if isinstance(arr, np.ndarray):
+        result = np.zeros(shape, dtype=arr.dtype)
+        result[tuple(slice(i) for i in arr.shape)] = arr
     return result
 
 
diff --git a/tests/test_fft.py b/tests/test_fft.py
index 7c074f45..0d7acf38 100644
--- a/tests/test_fft.py
+++ b/tests/test_fft.py
@@ -105,7 +105,7 @@ def test_cle_fftconvolve_same():
 @pytest.mark.xfail
 def test_cle_fftconvolve_from_oclarray():
     cle_out = cle.fftconvolve(FACE, KERNEL, mode="same").get()
-    cle_out2 = cle.fftconvolve(cle.push(FACE), KERNEL, mode="same").get()
+    cle_out2 = cle.fftconvolve(cle.push(FACE.astype("complex64")), KERNEL, mode="same").get()
     npt.assert_allclose(cle_out, cle_out2, atol=0.2)