SpikeInterface
diff --git a/‎spikeinterface/core/job_tools.py
Lines changed: 13 additions & 7 deletions b/‎spikeinterface/core/job_tools.py
Lines changed: 13 additions & 7 deletions
diff --git a/‎spikeinterface/core/tests/test_job_tools.py
Lines changed: 29 additions & 10 deletions b/‎spikeinterface/core/tests/test_job_tools.py
Lines changed: 29 additions & 10 deletions
diff --git a/‎spikeinterface/sortingcomponents/peak_detection.py
Lines changed: 33 additions & 15 deletions b/‎spikeinterface/sortingcomponents/peak_detection.py
Lines changed: 33 additions & 15 deletions
@@ -258,6 +258,9 @@ class ChunkRecordingExecutor:
         If True, a progress bar is printed to monitor the progress of the process
     handle_returns: bool
         If True, the function can return values
+    gather_func: None or callable
+        Optional function that is called in the main thread and retrieves the results of each worker.
+        This function can be used instead of `handle_returns` to implement custom storage on-the-fly.
     n_jobs: int
         Number of jobs to be used (default 1). Use -1 to use as many jobs as number of cores
     total_memory: str
@@ -277,15 +280,16 @@ class ChunkRecordingExecutor:
         Limit the number of thread per process using threadpoolctl modules.
         This used only when n_jobs>1
         If None, no limits.
-
+    
+        
     Returns
     -------
     res: list
         If 'handle_returns' is True, the results for each chunk process
     """
 
     def __init__(self, recording, func, init_func, init_args, verbose=False, progress_bar=False, handle_returns=False,
-                 n_jobs=1, total_memory=None, chunk_size=None, chunk_memory=None, chunk_duration=None,
+                 gather_func=None, n_jobs=1, total_memory=None, chunk_size=None, chunk_memory=None, chunk_duration=None,
                  mp_context=None, job_name='', max_threads_per_process=1):
         self.recording = recording
         self.func = func
@@ -303,6 +307,7 @@ def __init__(self, recording, func, init_func, init_args, verbose=False, progres
         self.progress_bar = progress_bar
 
         self.handle_returns = handle_returns
+        self.gather_func = gather_func
 
         self.n_jobs = ensure_n_jobs(recording, n_jobs=n_jobs)
         self.chunk_size = ensure_chunk_size(recording,
@@ -339,6 +344,8 @@ def run(self):
                 res = self.func(segment_index, frame_start, frame_stop, worker_ctx)
                 if self.handle_returns:
                     returns.append(res)
+                if self.gather_func is not None:
+                    self.gather_func(res)
         else:
             n_jobs = min(self.n_jobs, len(all_chunks))
             ######## Do you want to limit the number of threads per process?
@@ -357,12 +364,11 @@ def run(self):
                 if self.progress_bar:
                     results = tqdm(results, desc=self.job_name, total=len(all_chunks))
 
-                if self.handle_returns:
-                    for res in results:
+                for res in results:
+                    if self.handle_returns:
                         returns.append(res)
-                else:
-                    for res in results:
-                        pass
+                    if self.gather_func is not None:
+                        self.gather_func(res)
 
         return returns
 
 
@@ -4,7 +4,7 @@
 from spikeinterface.core import generate_recording
 
 from spikeinterface.core.job_tools import divide_segment_into_chunks, ensure_n_jobs, ensure_chunk_size, \
-    ChunkRecordingExecutor, fix_job_kwargs, split_job_kwargs
+    ChunkRecordingExecutor, fix_job_kwargs, split_job_kwargs, divide_recording_into_chunks
 
 
 def test_divide_segment_into_chunks():
@@ -95,18 +95,37 @@ def test_ChunkRecordingExecutor():
                                        n_jobs=1, chunk_size=None)
     processor.run()
 
-    # chunk + loop
+    # simple gathering function
+    def gathering_result(res):
+        # print(res)
+        pass
+
+    # chunk + loop + gather_func
     processor = ChunkRecordingExecutor(recording, func, init_func, init_args,
-                                       verbose=True, progress_bar=False,
+                                       verbose=True, progress_bar=False, gather_func=gathering_result,
                                        n_jobs=1, chunk_memory="500k")
     processor.run()
 
-    # chunk + parallel
+    # more adavnce trick : gathering using class with callable
+    class GatherClass:
+        def __init__(self):
+            self.pos = 0
+        
+        def __call__(self, res):
+            self.pos += 1
+            # print(self.pos, res)
+            pass
+    gathering_func2 = GatherClass()
+
+    # chunk + parallel + gather_func
     processor = ChunkRecordingExecutor(recording, func, init_func, init_args,
-                                       verbose=True, progress_bar=True,
+                                       verbose=True, progress_bar=True, gather_func=gathering_func2,
                                        n_jobs=2, chunk_duration="200ms",
                                        job_name='job_name')
     processor.run()
+    num_chunks = len(divide_recording_into_chunks(recording, processor.chunk_size))
+
+    assert gathering_func2.pos == num_chunks
 
     # chunk + parallel + spawn
     processor = ChunkRecordingExecutor(recording, func, init_func, init_args,
@@ -153,9 +172,9 @@ def test_split_job_kwargs():
 
 
 if __name__ == '__main__':
-    test_divide_segment_into_chunks()
-    test_ensure_n_jobs()
-    test_ensure_chunk_size()
+    # test_divide_segment_into_chunks()
+    # test_ensure_n_jobs()
+    # test_ensure_chunk_size()
     test_ChunkRecordingExecutor()
-    test_fix_job_kwargs()
-    test_split_job_kwargs()
+    # test_fix_job_kwargs()
+    # test_split_job_kwargs()
@@ -8,7 +8,7 @@
 
 from ..core import get_chunk_with_margin
 
-from .peak_pipeline import PipelineNode, check_graph, run_nodes
+from .peak_pipeline import PipelineNode, check_graph, run_nodes, GatherToMemory, GatherToNpy
 from .tools import make_multi_method_doc
 
 try:
@@ -28,7 +28,9 @@
                    ('amplitude', 'float64'), ('segment_ind', 'int64')]
 
 
-def detect_peaks(recording, method='by_channel', pipeline_nodes=None, **kwargs):
+def detect_peaks(recording, method='by_channel', pipeline_nodes=None,
+                 gather_mode='memory', folder=None, names=None,
+                 **kwargs):
     """Peak detection based on threshold crossing in term of k x MAD.
 
     In 'by_channel' : peak are detected in each channel independently
@@ -42,6 +44,18 @@ def detect_peaks(recording, method='by_channel', pipeline_nodes=None, **kwargs):
     pipeline_nodes: None or list[PipelineNode]
         Optional additional PipelineNode need to computed just after detection time.
         This avoid reading the recording multiple times.
+    gather_mode: str
+        How to gather the results:
+        
+        * "memory": results are returned as in-memory numpy arrays
+        
+        * "npy": results are stored to .npy files in `folder`
+
+    folder: str or Path
+        If gather_mode is "npy", the folder where the files are created.
+    names: list
+        List of strings with file stems associated with returns.
+
     {method_doc}
     {job_doc}
 
@@ -66,27 +80,31 @@ def detect_peaks(recording, method='by_channel', pipeline_nodes=None, **kwargs):
     method_args = method_class.check_params(recording, **method_kwargs)
 
     extra_margin = 0
-    if pipeline_nodes is not None:
+    if pipeline_nodes is None:
+        squeeze_output = True
+    else:
         check_graph(pipeline_nodes)
         extra_margin = max(node.get_trace_margin() for node in pipeline_nodes)
-
+        squeeze_output = False
+    
+    if gather_mode == 'memory':
+        gather_func = GatherToMemory()
+    elif gather_mode == 'npy':
+        gather_func = GatherToNpy(folder, names)
+    else:
+        raise ValueError(f"Wrong gather_mode : {gather_mode}. Available gather modes: 'memory' | 'npy'")
+        
     func = _detect_peaks_chunk
     init_func = _init_worker_detect_peaks
     init_args = (recording, method, method_args, extra_margin, pipeline_nodes)
     processor = ChunkRecordingExecutor(recording, func, init_func, init_args,
-                                       handle_returns=True, job_name='detect peaks',
+                                       gather_func=gather_func, job_name='detect peaks',
                                        mp_context=mp_context, **job_kwargs)
-    outputs = processor.run()
-
-    if pipeline_nodes is None:
-        peaks = np.concatenate(outputs)
-        return peaks
-    else:
-        outs_concat = ()
-        for output_node in zip(*outputs):
-            outs_concat += (np.concatenate(output_node, axis=0), )
-        return outs_concat
+    processor.run()
 
+    outs = gather_func.finalize_buffers(squeeze_output=squeeze_output)
+    return outs
+        
 
 def _init_worker_detect_peaks(recording, method, method_args, extra_margin, pipeline_nodes):
     """Initialize a worker for detecting peaks."""