merge with master

alejoe91 · alejoe91 · commit 50390ed84465 · 2023-04-04T11:26:14.000+02:00
diff --git a/doc/api.rst b/doc/api.rst
@@ -149,6 +149,7 @@ spikeinterface.preprocessing
     .. autofunction:: correct_lsb
     .. autofunction:: detect_bad_channels
     .. autofunction:: filter
+    .. autofunction:: highpass_filter
     .. autofunction:: highpass_spatial_filter
     .. autofunction:: interpolate_bad_channels
     .. autofunction:: normalize_by_quantile
@@ -159,6 +160,7 @@ spikeinterface.preprocessing
     .. autofunction:: scale
     .. autofunction:: whiten
     .. autofunction:: zero_channel_pad
+    .. autofunction:: zscore
 
 
 spikeinterface.postprocessing
diff --git a/doc/modules/core.rst b/doc/modules/core.rst
@@ -591,8 +591,8 @@ same sampling frequency, number of segments, and number of samples:
 
 .. code-block:: python
     
-    recA_4_chans = read_binray('fileA.raw')
-    recB_4_chans = read_binray('fileB.raw')
+    recA_4_chans = read_binary('fileA.raw')
+    recB_4_chans = read_binary('fileB.raw')
     rec_8_chans = aggregate_channels([recA_4_chans, recB_4_chans])
 
 We can also aggregate (or stack) multiple sortings on the unit axis using the 
diff --git a/spikeinterface/preprocessing/common_reference.py b/spikeinterface/preprocessing/common_reference.py
@@ -5,6 +5,8 @@
 from .basepreprocessor import BasePreprocessor, BasePreprocessorSegment
 from ..core import get_closest_channels
 
+from .filter import fix_dtype
+
 
 class CommonReferenceRecording(BasePreprocessor):
     """
@@ -35,6 +37,8 @@ class CommonReferenceRecording(BasePreprocessor):
         Use in the local CAR implementation as the selecting annulus (exclude radius, include radius)
     verbose: bool
         If True, output is verbose
+    dtype: None or dtype
+        If None the parent dtype is kept.
 
     Returns
     -------
@@ -45,7 +49,7 @@ class CommonReferenceRecording(BasePreprocessor):
     name = 'common_reference'
 
     def __init__(self, recording, reference='global', operator='median', groups=None, ref_channel_ids=None,
-                 local_radius=(30, 55), verbose=False):
+                 local_radius=(30, 55), verbose=False, dtype=None):
 
         num_chans = recording.get_num_channels()
         neighbors = None
@@ -79,7 +83,8 @@ def __init__(self, recording, reference='global', operator='median', groups=None
                 neighbors[i] = closest_inds[i, mask]
                 assert len(neighbors[i]) > 0, "No reference channels available in the local annulus for selection."
 
-        BasePreprocessor.__init__(self, recording)
+        dtype_ = fix_dtype(recording, dtype)
+        BasePreprocessor.__init__(self, recording, dtype=dtype_)
 
         # tranforms groups (ids) to groups (indices)
         if groups is not None:
@@ -92,15 +97,16 @@ def __init__(self, recording, reference='global', operator='median', groups=None
         for parent_segment in recording._recording_segments:
             rec_segment = CommonReferenceRecordingSegment(parent_segment,
                                                           reference, operator, groups, ref_channel_inds, local_radius,
-                                                          neighbors)
+                                                          neighbors, dtype_)
             self.add_recording_segment(rec_segment)
 
         self._kwargs = dict(recording=recording, reference=reference, groups=groups, operator=operator,
-                            ref_channel_ids=ref_channel_ids, local_radius=local_radius)
+                            ref_channel_ids=ref_channel_ids, local_radius=local_radius, dtype=dtype_.str)
 
 
 class CommonReferenceRecordingSegment(BasePreprocessorSegment):
-    def __init__(self, parent_recording_segment, reference, operator, groups, ref_channel_inds, local_radius, neighbors):
+    def __init__(self, parent_recording_segment, reference, operator, groups, ref_channel_inds, local_radius,
+                 neighbors, dtype):
         BasePreprocessorSegment.__init__(self, parent_recording_segment)
 
         self.reference = reference
@@ -110,6 +116,7 @@ def __init__(self, parent_recording_segment, reference, operator, groups, ref_ch
         self.local_radius = local_radius
         self.neighbors = neighbors
         self.temp = None
+        self.dtype = dtype
 
         if self.operator == 'median':
             self.operator_func = lambda x: np.median(x, axis=1, out=self.temp)[:, None]
@@ -119,31 +126,32 @@ def __init__(self, parent_recording_segment, reference, operator, groups, ref_ch
     def get_traces(self, start_frame, end_frame, channel_indices):
         # need input trace
         all_traces = self.parent_recording_segment.get_traces(start_frame, end_frame, slice(None))
+        all_traces = all_traces.astype(self.dtype)
         self.temp = np.zeros((all_traces.shape[0],),dtype=all_traces.dtype)
         _channel_indices = np.arange(all_traces.shape[1])
         if channel_indices is not None:
             _channel_indices = _channel_indices[channel_indices]
 
         
         if self.reference == 'global':
-            out_traces = np.zeros((all_traces.shape[0], _channel_indices.size), dtype=all_traces.dtype)
+            out_traces = np.zeros((all_traces.shape[0], _channel_indices.size), dtype=self.dtype)
             for chan_inds, chan_group_inds in self._groups(_channel_indices):
                 out_inds = np.array([np.where(_channel_indices == i)[0][0] for i in chan_inds])
                 out_traces[:, out_inds] = all_traces[:, chan_inds] \
                     - self.operator_func(all_traces[:, chan_group_inds])
 
         elif self.reference == 'single':
-            out_traces = np.zeros((all_traces.shape[0], _channel_indices.size), dtype=all_traces.dtype)
+            out_traces = np.zeros((all_traces.shape[0], _channel_indices.size), dtype=self.dtype)
             for i, (chan_inds, _) in enumerate(self._groups(_channel_indices)):
                 out_inds = np.array([np.where(_channel_indices == i)[0][0] for i in chan_inds])
                 out_traces[:, out_inds] = all_traces[:, chan_inds] \
                     - self.operator_func(all_traces[:, [self.ref_channel_inds[i]]])
         
         elif self.reference == 'local':
-            out_traces = np.hstack([
-                all_traces[:, [chan_ind]] - self.operator_func(all_traces[:, self.neighbors[chan_ind]])
-                for chan_ind in _channel_indices])
-
+            out_traces = np.zeros((all_traces.shape[0], _channel_indices.size), dtype=self.dtype)
+            for i, chan_ind in enumerate(_channel_indices):
+                out_traces[:, [i]] = all_traces[:, [chan_ind]] - \
+                                            self.operator_func(all_traces[:, self.neighbors[chan_ind]])
         return out_traces
 
     def _groups(self, channel_indices):
diff --git a/spikeinterface/preprocessing/normalize_scale.py b/spikeinterface/preprocessing/normalize_scale.py
@@ -4,6 +4,8 @@
 
 from .basepreprocessor import BasePreprocessor, BasePreprocessorSegment
 
+from .filter import fix_dtype
+
 from ..core import get_random_data_chunks
 
 
@@ -50,7 +52,7 @@ class NormalizeByQuantileRecording(BasePreprocessor):
         Random seed for reproducibility
     dtype: str or np.dtype
         The dtype of the output traces. Default "float32"
-    **random_chunk_kwargs: keyword arguments for `get_random_data_chunks()` function
+    **random_chunk_kwargs: Keyword arguments for `spikeinterface.core.get_random_data_chunk()` function
 
     Returns
     -------
@@ -196,7 +198,7 @@ class CenterRecording(BasePreprocessor):
         'median' (default) | 'mean'
     dtype: str or np.dtype
         The dtype of the output traces. Default "float32"
-    **random_chunk_kwargs: keyword arguments for `get_random_data_chunks()` function
+    **random_chunk_kwargs: Keyword arguments for `spikeinterface.core.get_random_data_chunk()` function
 
     Returns
     -------
@@ -247,35 +249,63 @@ class ZScoreRecording(BasePreprocessor):
         The recording extractor to be centered
     mode: str
         "median+mad" (default) or "mean+std"
-    dtype: str or np.dtype
-        The dtype of the output traces. Default "float32"
-    **random_chunk_kwargs: keyword arguments for `get_random_data_chunks()` function
+    dtype: None or dtype
+        If None the the parent dtype is kept.
+        For integer dtype a int_scale must be also given.
+    gain : None or np.array
+        Pre-computed gain.
+    offset : None or np.array
+        Pre-computed offset
+    int_scale : None or float
+        Apply a scaling factor to fit the integer range.
+        This is used when the dtype is an integer, so that the output is scaled. 
+        For example, a value of `int_scale=200` will scale the zscore value to a standard deviation of 200.
+    **random_chunk_kwargs: Keyword arguments for `spikeinterface.core.get_random_data_chunk()` function
 
     Returns
     -------
     centered_traces: ScaleRecording
         The centered traces recording extractor object
     """
-
     name = "zscore"
 
     def __init__(
         self,
         recording,
         mode="median+mad",
+        gain=None,
+        offset=None,
+        int_scale=None,
         dtype="float32",
         **random_chunk_kwargs
     ):
 
         assert mode in ("median+mad", "mean+std")
 
+        # fix dtype
+        dtype_ = fix_dtype(recording, dtype)
+
+        if dtype_.kind == 'i':
+            assert int_scale is not None, 'For recording with dtype=int you must set dtype=float32 OR set a scale' 
+
         random_data = get_random_data_chunks(recording, **random_chunk_kwargs)
 
-        if mode == "median+mad":
+        if gain is not None:
+            assert offset is not None
+            gain = np.asarray(gain)
+            offset = np.asarray(offset)
+            n = recording.get_num_channels()
+            if gain.ndim == 1:
+                gain = gain[None, :]
+            assert gain.shape[1] == n
+            if offset.ndim == 1:
+                offset = offset[None, :]
+            assert offset.shape[1] == n
+        elif mode == "median+mad":
             medians = np.median(random_data, axis=0)
             medians = medians[None, :]
-            mads = np.median(np.abs(random_data - medians), axis=0) / 0.6745
-            mads = mads[None, :]
+            mads = np.median(np.abs(random_data - medians), axis=0) / 0.6744897501960817
+            mads = mads[None, :] 
             gain = 1 / mads
             offset = -medians / mads
         else:
@@ -285,6 +315,14 @@ def __init__(
             stds = stds[None, :]
             gain = 1 / stds
             offset = -means / stds
+        
+        if int_scale is not None:
+            gain *= int_scale
+            offset *= int_scale
+
+        # convenient to have them here
+        self.gain = gain
+        self.offset = offset
 
         BasePreprocessor.__init__(self, recording, dtype=dtype)
 
@@ -298,6 +336,8 @@ def __init__(
             recording=recording,
             dtype=np.dtype(self._dtype).str,
             mode=mode,
+            gain=gain.tolist(),
+            offset=offset.tolist()
         )
         self._kwargs.update(random_chunk_kwargs)
 
diff --git a/spikeinterface/preprocessing/preprocessinglist.py b/spikeinterface/preprocessing/preprocessinglist.py
@@ -10,7 +10,7 @@
     ScaleRecording, scale,
     ZScoreRecording, zscore,
     CenterRecording, center)
-from .whiten import WhitenRecording, whiten
+from .whiten import WhitenRecording, whiten, compute_whitening_matrix
 from .rectify import RectifyRecording, rectify
 from .clip import (
     BlankSaturationRecording, blank_staturation,
diff --git a/spikeinterface/preprocessing/tests/test_normalize_scale.py b/spikeinterface/preprocessing/tests/test_normalize_scale.py
@@ -62,22 +62,30 @@ def test_center():
 
 def test_zscore():
     rec = generate_recording()
-    # print("original")
     tr = rec.get_traces(segment_index=0)
-    # print("medians", np.median(tr, axis=0))
-    # print("stds", np.std(tr, axis=0))
 
-    # print("median+mad")
     rec2 = zscore(rec)
     tr = rec2.get_traces(segment_index=0)
-    # print("medians", np.median(tr, axis=0))
-    # print("stds", np.std(tr, axis=0))
+    meds = np.median(tr, axis=0)
+    mads = np.median(np.abs(tr - meds), axis=0) / 0.6744897501960817
+    assert np.all(np.abs(meds) < 0.01)
+    assert np.all(np.abs(mads - 1) < 0.01)
+    assert 'gain' in rec2._kwargs
     
-    # print("mean+std")
     rec3 = zscore(rec, mode="mean+std")
     tr = rec3.get_traces(segment_index=0)
-    # print("medians", np.median(tr, axis=0))
-    # print("stds", np.std(tr, axis=0))
+    assert np.all(np.abs(np.mean(tr, axis=0)) < 0.01)
+    assert np.all(np.abs(np.std(tr, axis=0) - 1) < 0.01)
+
+    rec_int = scale(rec, dtype="int16", gain=100)
+    with pytest.raises(AssertionError):
+        rec4 = zscore(rec_int, dtype=None)
+    rec4 = zscore(rec_int, dtype='float32', mode="mean+std")
+    rec4 = zscore(rec_int, dtype='int16', int_scale=256, mode="mean+std")
+    tr = rec4.get_traces(segment_index=0)
+    assert np.all(np.abs(np.mean(tr, axis=0)) < 1)
+    assert np.all(np.abs(np.std(tr, axis=0) - 256) < 1)
+
 
 
 if __name__ == '__main__':
diff --git a/spikeinterface/preprocessing/tests/test_whiten.py b/spikeinterface/preprocessing/tests/test_whiten.py
@@ -5,7 +5,7 @@
 from spikeinterface import set_global_tmp_folder
 from spikeinterface.core import generate_recording
 
-from spikeinterface.preprocessing import whiten, scale
+from spikeinterface.preprocessing import whiten, scale, compute_whitening_matrix
 
 if hasattr(pytest, "global_test_folder"):
     cache_folder = pytest.global_test_folder / "preprocessing"
@@ -16,7 +16,25 @@
 
 
 def test_whiten():
-    rec = generate_recording()
+    rec = generate_recording(num_channels=4)
+
+    print(rec.get_channel_locations())
+    random_chunk_kwargs={}
+    W, M = compute_whitening_matrix(rec, 'global', random_chunk_kwargs, apply_mean=False,
+                                    radius_um=None, eps=1e-8)
+    print(W)
+    print(M)
+
+    with pytest.raises(AssertionError):
+        W, M = compute_whitening_matrix(rec, 'local', random_chunk_kwargs, apply_mean=False,
+                                    radius_um=None, eps=1e-8)
+    W, M = compute_whitening_matrix(rec, 'local', random_chunk_kwargs, apply_mean=False,
+                                radius_um=25, eps=1e-8)
+    # W must be sparse
+    np.sum(W==0) == 6
+
+
+
 
     rec2 = whiten(rec)
     rec2.save(verbose=False)
@@ -32,6 +50,13 @@ def test_whiten():
     np.testing.assert_array_equal(rec3.get_traces(segment_index=0),
                                   rec_par.get_traces(segment_index=0))
 
+    with pytest.raises(AssertionError):
+        rec4 = whiten(rec_int, dtype=None)
+    rec4 = whiten(rec_int, dtype=None, int_scale=256)
+    assert rec4.get_dtype() == "int16"
+    assert rec4._kwargs['M'] is None
+
+
 
 if __name__ == '__main__':
     test_whiten()
diff --git a/spikeinterface/preprocessing/whiten.py b/spikeinterface/preprocessing/whiten.py
diff --git a/spikeinterface/widgets/matplotlib/timeseries.py b/spikeinterface/widgets/matplotlib/timeseries.py