Merge branch 'master' into merge_ap_lfp_neuropix

DradeAW · web-flow · commit c163acc0a1b8 · 2023-03-27T17:09:15.000+02:00
diff --git a/spikeinterface/core/baserecording.py b/spikeinterface/core/baserecording.py
@@ -10,6 +10,7 @@
 from .baserecordingsnippets import BaseRecordingSnippets
 from .core_tools import write_binary_recording, write_memory_recording, write_traces_to_zarr, check_json
 from .job_tools import split_job_kwargs, fix_job_kwargs
+from .core_tools import convert_bytes_to_str
 
 from warnings import warn
 
@@ -42,12 +43,17 @@ def __repr__(self):
         nchan = self.get_num_channels()
         sf_khz = self.get_sampling_frequency() / 1000.
         duration = self.get_total_duration()
-        txt = f'{clsname}: {nchan} channels - {nseg} segments - {sf_khz:0.1f}kHz - {duration:0.3f}s'
+        memory_size = self.get_memory_size()
+        txt = f"{clsname}: {nchan} channels - {nseg} segments - {sf_khz:0.1f}kHz - {duration:0.3f}s - {memory_size}"
         if 'file_paths' in self._kwargs:
             txt += '\n  file_paths: {}'.format(self._kwargs['file_paths'])
         if 'file_path' in self._kwargs:
             txt += '\n  file_path: {}'.format(self._kwargs['file_path'])
         return txt
+    
+    def get_memory_size(self):
+        bytes = self.get_total_samples() * self.get_num_channels() * self.get_dtype().itemsize 
+        return convert_bytes_to_str(bytes)
 
     def get_num_segments(self):
         """Returns the number of segments.
diff --git a/spikeinterface/core/core_tools.py b/spikeinterface/core/core_tools.py
@@ -811,3 +811,21 @@ def recursive_key_finder(d, key):
         else:
             if k == key:
                 yield v
+
+
+def convert_bytes_to_str(byte_value:int ) -> str:
+    """
+    Converts a number of bytes to a value in either KiB, MiB, GiB, or TiB.
+
+    Args:
+        byte_value (int): The number of bytes to convert.
+
+    Returns:
+        str: The converted value with the appropriate unit (KiB, MiB, GiB, or TiB).
+    """
+    suffixes = ['B', 'KiB', 'MiB', 'GiB', 'TiB']
+    i = 0
+    while byte_value >= 1024 and i < len(suffixes) - 1:
+        byte_value /= 1024
+        i += 1
+    return f"{byte_value:.2f} {suffixes[i]}"
diff --git a/spikeinterface/core/frameslicerecording.py b/spikeinterface/core/frameslicerecording.py
@@ -10,6 +10,18 @@ class FrameSliceRecording(BaseRecording):
 
     Do not use this class directly but use `recording.frame_slice(...)`
 
+    Parameters
+    ----------
+    parent_recording: BaseRecording
+    start_frame: None or int
+        Earliest included frame in the parent recording.
+        Times are re-referenced to start_frame in the
+        sliced object. Set to 0 by default.
+    end_frame: None or int
+        Latest frame in the parent recording. As for usual
+        python slicing, the end frame is excluded. 
+        Set to the recording's total number of samples by 
+        default
     """
 
     def __init__(self, parent_recording, start_frame=None, end_frame=None):
diff --git a/spikeinterface/core/frameslicesorting.py b/spikeinterface/core/frameslicesorting.py
@@ -2,6 +2,7 @@
 import warnings
 
 from .basesorting import BaseSorting, BaseSortingSegment
+from .waveform_tools import has_exceeding_spikes
 
 
 class FrameSliceSorting(BaseSorting):
@@ -11,27 +12,67 @@ class FrameSliceSorting(BaseSorting):
 
     Do not use this class directly but use `sorting.frame_slice(...)`
 
+    When a recording is registered for the parent sorting,
+    a corresponding sliced recording is registered to the sliced sorting.
+
+    Note that the returned sliced sorting may be empty.
+
+    Parameters
+    ----------
+    parent_sorting: BaseSorting
+    start_frame: None or int
+        Earliest included frame in the parent sorting(/recording).
+        Spike times(/traces) are re-referenced to start_frame in the
+        sliced objects. Set to 0 by default.
+    end_frame: None or int
+        Latest frame in the parent sorting(/recording). As for usual
+        python slicing, the end frame is excluded (such that the max
+        spike frame in the sliced sorting is `end_frame - start_frame - 1`)
+        If None (default), the end_frame is either:
+            - The total number of samples, if a recording is assigned
+            - The maximum spike frame + 1, if no recording is assigned
     """
 
     def __init__(self, parent_sorting, start_frame=None, end_frame=None):
         unit_ids = parent_sorting.get_unit_ids()
 
         assert parent_sorting.get_num_segments() == 1, 'FrameSliceSorting work only with one segment'
 
-        if start_frame is not None or end_frame is None:
-            parent_size = 0
-            for u in parent_sorting.get_unit_ids():
-                parent_size = np.max([parent_size, np.max(parent_sorting.get_unit_spike_train(u))])
 
         if start_frame is None:
             start_frame = 0
-        else:
-            assert 0 <= start_frame < parent_size
+        assert 0 <= start_frame, "Invalid value for start_frame: expected positive integer."
 
-        if end_frame is None:
-            end_frame = parent_size + 1
+        if parent_sorting.has_recording():
+            # Pull df end_frame from recording
+            parent_n_samples = parent_sorting._recording.get_total_samples()
+            if end_frame is None:
+                end_frame = parent_n_samples
+            assert end_frame <= parent_n_samples, (
+                "`end_frame` should be smaller than the sortings total number of samples."
+            )
+            assert start_frame <= parent_n_samples, (
+                "`start_frame` should be smaller than the sortings total number of samples."
+            )
+            if has_exceeding_spikes(parent_sorting._recording, parent_sorting):
+                raise ValueError(
+                    "The sorting object has spikes exceeding the recording duration. You have to remove those spikes "
+                    "with the `spikeinterface.curation.remove_excess_spikes()` function"
+                )
         else:
-            assert end_frame > start_frame, "'start_frame' must be smaller than 'end_frame'!"
+            # Pull df end_frame from spikes
+            if end_frame is None:
+                max_spike_time = 0
+                for u in parent_sorting.get_unit_ids():
+                    max_spike_time = np.max([max_spike_time, np.max(parent_sorting.get_unit_spike_train(u))])
+                end_frame = max_spike_time + 1
+
+        assert start_frame < end_frame, (
+            "`start_frame` should be greater than `end_frame`. "
+            "This may be due to start_frame >= max_spike_time, if the end frame "
+            "was not specified explicitly."
+        )
+            
 
         BaseSorting.__init__(self,
                              sampling_frequency=parent_sorting.get_sampling_frequency(),
diff --git a/spikeinterface/core/generate.py b/spikeinterface/core/generate.py
@@ -516,7 +516,7 @@ def generate_lazy_recording(full_traces_size_GiB: float, seed=None) -> Generator
     num_channels = 1024    
     
     GiB_to_bytes = 1024** 3
-    full_traces_size_bytes = full_traces_size_GiB * GiB_to_bytes 
+    full_traces_size_bytes = int(full_traces_size_GiB * GiB_to_bytes) 
     num_samples = int(full_traces_size_bytes / (num_channels * dtype.itemsize))
     durations = [num_samples / sampling_frequency]
 
@@ -525,6 +525,7 @@ def generate_lazy_recording(full_traces_size_GiB: float, seed=None) -> Generator
 
     return recording
 
+
 if __name__ == '__main__':
     print(generate_recording())
     print(generate_sorting())
diff --git a/spikeinterface/core/job_tools.py b/spikeinterface/core/job_tools.py
@@ -397,3 +397,5 @@ def function_wrapper(args):
     else:
         with threadpool_limits(limits=max_threads_per_process):
             return _func(segment_index, start_frame, end_frame, _worker_ctx)
+
+
diff --git a/spikeinterface/core/tests/test_frameslicesorting.py b/spikeinterface/core/tests/test_frameslicesorting.py
@@ -1,32 +1,111 @@
-from spikeinterface.core import generate_sorting
+import warnings
+
+import numpy as np
+from numpy.testing import assert_raises
+
+from spikeinterface.core import NumpyRecording, NumpySorting
 
 
 def test_FrameSliceSorting():
-    fs = 30000
-    duration = 10
-    sort = generate_sorting(num_units=10,  durations=[
-                            duration], sampling_frequency=fs)
 
-    mid_frame = (duration * fs) // 2
+    # Single segment sorting, with and without attached recording
+    # Since the default end_frame can be set either from the last spike
+    # or from the registered recording
+    sf = 10
+    nsamp = 1000
+    max_spike_time = 900
+    min_spike_time = 100
+    unit_0_train = np.arange(min_spike_time + 10, max_spike_time - 10)
+    spike_times = {
+        "0": unit_0_train,
+        "1": np.arange(min_spike_time, max_spike_time),
+    }
+    # Sorting with attached rec
+    sorting = NumpySorting.from_dict( [spike_times], sf)
+    rec = NumpyRecording([np.zeros((nsamp, 5))], sampling_frequency=sf)
+    sorting.register_recording(rec)
+    # Sorting without attached rec
+    sorting_norec = NumpySorting.from_dict( [spike_times], sf)
+    # Sorting with attached rec and exceeding spikes
+    sorting_exceeding = NumpySorting.from_dict( [spike_times], sf)
+    rec_exceeding = NumpyRecording([np.zeros((max_spike_time-1, 5))], sampling_frequency=sf)
+    with warnings.catch_warnings():
+        warnings.filterwarnings("ignore")
+        sorting_exceeding.register_recording(rec_exceeding)
+
+    mid_frame = nsamp // 2
+
     # duration of all slices is mid_frame. Spike trains are re-referenced to the start_time
-    sub_sort = sort.frame_slice(None, None)
-    for u in sort.get_unit_ids():
-        assert len(sort.get_unit_spike_train(u)) == len(
-            sub_sort.get_unit_spike_train(u))
+    # Vary start_frame/end_frame combination
+    start_frame, end_frame = None, None
+    sub_sorting = sorting.frame_slice(start_frame, end_frame)
+    assert np.array_equal(sub_sorting.get_unit_spike_train("0"), unit_0_train)
+    assert sub_sorting._recording.get_total_samples() == nsamp
+    sub_sorting_norec = sorting.frame_slice(start_frame, end_frame)
+    assert np.array_equal(sub_sorting_norec.get_unit_spike_train("0"), unit_0_train)
+
+    start_frame, end_frame = None, mid_frame
+    sub_sorting = sorting.frame_slice(start_frame, end_frame)
+    assert np.array_equal(
+        sub_sorting.get_unit_spike_train("0"), 
+        [t for t in unit_0_train if t < mid_frame]
+    )
+    assert sub_sorting._recording.get_total_samples() == mid_frame
+    sub_sorting_norec = sorting.frame_slice(start_frame, end_frame)
+    assert np.array_equal(
+        sub_sorting_norec.get_unit_spike_train("0"), 
+        sub_sorting.get_unit_spike_train("0")
+    )
+
+    start_frame, end_frame = mid_frame, None
+    sub_sorting = sorting.frame_slice(start_frame, end_frame)
+    assert np.array_equal(
+        sub_sorting.get_unit_spike_train("0"), 
+        [t - mid_frame for t in unit_0_train if t >= mid_frame]
+    )
+    assert sub_sorting._recording.get_total_samples() == nsamp - mid_frame
+    sub_sorting_norec = sorting.frame_slice(start_frame, end_frame)
+    assert np.array_equal(
+        sub_sorting_norec.get_unit_spike_train("0"), 
+        sub_sorting.get_unit_spike_train("0")
+    )
+
+    start_frame, end_frame = mid_frame - 10, mid_frame + 10
+    sub_sorting = sorting.frame_slice(start_frame, end_frame)
+    assert np.array_equal(
+        sub_sorting.get_unit_spike_train("0"), 
+        [t - start_frame for t in unit_0_train if start_frame <= t < end_frame]
+    )
+    assert sub_sorting._recording.get_total_samples() == 20
+    sub_sorting_norec = sorting.frame_slice(start_frame, end_frame)
+    assert np.array_equal(
+        sub_sorting_norec.get_unit_spike_train("0"), 
+        sub_sorting.get_unit_spike_train("0")
+    )
+
+    # Edge cases: start_frame > end_frame
+    assert_raises(Exception, sorting.frame_slice, 100, 90)
 
-    sub_sort = sort.frame_slice(None, mid_frame)
-    for u in sort.get_unit_ids():
-        assert max(sub_sort.get_unit_spike_train(u)) <= mid_frame
+    # Edge case: start_frame > max_spike_time
+    # Fails without rec (since end_frame is last spike)
+    assert_raises(Exception, sorting_norec.frame_slice, max_spike_time + 1, None)
+    # Empty sorting with rec
+    sub_sorting = sorting.frame_slice(max_spike_time + 1, None)
+    assert np.array_equal(
+        sub_sorting.get_unit_spike_train("1"), 
+        []
+    )
 
-    sub_sort = sort.frame_slice(mid_frame, None)
-    for u in sort.get_unit_ids():
-        assert max(sub_sort.get_unit_spike_train(u)) <= mid_frame
+    # Edge case: end_frame <= min_spike_time
+    # Empty sorting 
+    sub_sorting = sorting.frame_slice(None, min_spike_time)
+    assert np.array_equal( sub_sorting.get_unit_spike_train("1"), [])
 
-    sub_sort = sort.frame_slice(
-        mid_frame - mid_frame // 2, mid_frame + mid_frame // 2)
-    for u in sort.get_unit_ids():
-        assert max(sub_sort.get_unit_spike_train(u)) <= mid_frame
+    # Edge case: start_frame = end_frame
+    assert_raises(Exception, sorting.frame_slice, max_spike_time, max_spike_time)
 
+    # Sorting with exceeding spikes
+    assert_raises(Exception, sorting_exceeding.frame_slice, None, None)
 
 if __name__ == '__main__':
     test_FrameSliceSorting()
diff --git a/spikeinterface/core/tests/test_generate.py b/spikeinterface/core/tests/test_generate.py
@@ -98,3 +98,9 @@ def test_generate_lazy_recording():
     print(f"Difference between the last two {(memory_after_traces_MiB - traces_size_MiB)} MiB")
 
     (memory_after_instanciation_MiB + traces_size_MiB) == pytest.approx(memory_after_traces_MiB, rel=relative_tolerance)
+
+
+def test_generate_lazy_recording_under_giga():
+    
+    recording = generate_lazy_recording(full_traces_size_GiB=0.5)
+    assert recording.get_memory_size() == "512.00 MiB"
diff --git a/spikeinterface/extractors/neoextractors/openephys.py b/spikeinterface/extractors/neoextractors/openephys.py
@@ -81,15 +81,15 @@ class OpenEphysBinaryRecordingExtractor(NeoBaseRecordingExtractor):
     Parameters
     ----------
     folder_path: str
-        The folder path to load the recordings from.
+        The folder path to the root folder (containing the record node folders).
     load_sync_channel : bool
         If False (default) and a SYNC channel is present (e.g. Neuropixels), this is not loaded.
         If True, the SYNC channel is loaded and can be accessed in the analog signals.
-    load_sync_channel : bool
+    load_sync_timestamps : bool
         If True, the synchronized_timestamps are loaded and set as times to the recording.
         If False (default), only the t_start and sampling rate are set, and timestamps are assumed
         to be uniform and linearly increasing.
-    experiment_name: str, list, or None
+    experiment_names: str, list, or None
         If multiple experiments are available, this argument allows users to select one
         or more experiments. If None, all experiements are loaded as blocks.
         E.g. 'experiment_names="experiment2"', 'experiment_names=["experiment1", "experiment2"]'
diff --git a/spikeinterface/extractors/phykilosortextractors.py b/spikeinterface/extractors/phykilosortextractors.py
@@ -35,16 +35,19 @@ def __init__(self, folder_path, exclude_cluster_groups=None, keep_good_only=Fals
 
         phy_folder = Path(folder_path)
 
-        spike_times = np.load(phy_folder / 'spike_times.npy')
+        spike_times = np.load(phy_folder / 'spike_times.npy').astype(int)
 
         if (phy_folder / 'spike_clusters.npy').is_file():
             spike_clusters = np.load(phy_folder / 'spike_clusters.npy')
         else:
             spike_clusters = np.load(phy_folder / 'spike_templates.npy')
 
+        # spike_times and spike_clusters can be 2d sometimes --> convert to 1d.
+        spike_times = np.atleast_1d(spike_times.squeeze())
+        spike_clusters = np.atleast_1d(spike_clusters.squeeze())
+
         clust_id = np.unique(spike_clusters)
         unit_ids = list(clust_id)
-        spike_times = spike_times.astype(int)
         params = read_python(str(phy_folder / 'params.py'))
         sampling_frequency = params['sample_rate']
 
diff --git a/spikeinterface/qualitymetrics/misc_metrics.py b/spikeinterface/qualitymetrics/misc_metrics.py
diff --git a/spikeinterface/qualitymetrics/pca_metrics.py b/spikeinterface/qualitymetrics/pca_metrics.py