From 6a5a060be3da344d6ccc64510abc39d9c06ee867 Mon Sep 17 00:00:00 2001 From: JoeZiminski Date: Fri, 11 Oct 2024 09:32:45 +0100 Subject: [PATCH 1/3] Update infrastructure. --- .gitignore | 1 + doc/how_to/index.rst | 2 +- examples/how_to_new/combine_recordings.rst | 125 +++++++++++++++++++++ 3 files changed, 127 insertions(+), 1 deletion(-) create mode 100644 examples/how_to_new/combine_recordings.rst diff --git a/.gitignore b/.gitignore index 9c4dda937c..40f7aa1c75 100644 --- a/.gitignore +++ b/.gitignore @@ -117,6 +117,7 @@ examples/tutorials/*.svg doc/_build/* doc/tutorials/* doc/sources/* +doc/how_to_new/* *sg_execution_times.rst examples/getting_started/tmp_* diff --git a/doc/how_to/index.rst b/doc/how_to/index.rst index 5d7eae9003..1cd457a321 100644 --- a/doc/how_to/index.rst +++ b/doc/how_to/index.rst @@ -10,8 +10,8 @@ Guides on how to solve specific, short problems in SpikeInterface. Learn how to. handle_drift analyze_neuropixels load_matlab_data - combine_recordings process_by_channel_group load_your_data_into_sorting benchmark_with_hybrid_recordings drift_with_lfp + ../how_to_new/combine_recordings diff --git a/examples/how_to_new/combine_recordings.rst b/examples/how_to_new/combine_recordings.rst new file mode 100644 index 0000000000..4a088f01b1 --- /dev/null +++ b/examples/how_to_new/combine_recordings.rst @@ -0,0 +1,125 @@ +Combine recordings in SpikeInterface +==================================== + +In this tutorial we will walk through combining multiple recording objects. Sometimes this occurs due to hardware +settings (e.g. Intan software has a default setting of new files every 1 minute) or the experimenter decides to +split their recording into multiple files for different experimental conditions. If the probe has not been moved, +however, then during sorting it would likely make sense to combine these individual reocrding objects into one +recording object. + +**Why Combine?** + +Combining your data into a single recording allows you to have consistent labels (:code:`unit_ids`) across the whole recording. + +Spike sorters seek to sort spikes within a recording into groups of units. Thus if multiple :code:`Recording` objects have the +exact same probe location within some tissue and are occurring continuously in time the units between the :code:`Recordings` will +be the same. But if we sort each recording separately the unit ids given by the sorter will not be the same between each +:code:`Sorting` and so we will need extensive post-processing to try to figure out which units are actually the same between +each :code:`Sorting`. By combining everything into one :code:`Recording` all spikes will be sorted into the same pool of units. + +Combining recordings continuous in time +--------------------------------------- + +Some file formats (e.g. Intan) automatically create new files every minute or few minutes (with a setting that can be user +controlled). Other times an experimenter separates their recording for experimental reasons. SpikeInterface provides two +tools for bringing together these files into one :code:`Recording` object. + +Concatenating Recordings +^^^^^^^^^^^^^^^^^^^^^^^^ + +First let's cover concatenating recordings together. This will generate a mono-segment recording object. Let's load a set of +Intan files. + +.. code-block:: python + + import spikeinterface as si # this is only core + import spikeinterface.extractors as se + + intan_rec_one = se.read_intan('./intan_example_01.rhd', stream_id='0') # 0 is the amplifier data for Intan + intan_rec_two = se.read_intan('./intan_example_02.rhd', stream_id='0') + + print(intan_rec_one) + + """ + IntanRecordingExtractor: 64 Channels - 30.0kHz - 1 segments - 1,800,000 samples + 60.00s (1.00 minutes) uint16 dtype - 219.73 MiB + """ + + print(intan_rec_two) + + """ + IntanRecordingExtractor: 64 Channels - 30.0Khz - 1 segments - 1,800,000 samples + 60.00s (1.00 minutes) - uin16 dtype - 219.73 MiB + """ + + concatenated_recording = si.concatenate_recordings([intan_rec_one, intan_rec_two]) + + print(concatenated_recording) + + """ + ConcatenateSegmentRecording: 64 Channels - 30.0kHz - 1 segments - 3,600,000 samples + 120.00s (2.00 minutes) - uint16 dtype - 429.47 MiB + """ + +As we can see if we take the sample number (1,800,000) or time (60.00s) of each recording and add them together +we get the concatenated sample number (3,600,000) and time (120.00s). + +If we know that we will deal with a lot of files we can actually work our way through a series of them relatively quickly by doing +the following: + +.. code-block:: python + + + # make sure to use the appropriate paths for adapting to your own pipeline + # adapt the extractor for your desired file format as well + list_of_files = ['file1.rhd', 'file2.rhd', 'file3.rhd', 'file4.rhd'] + list_of_recordings = [] + for file in list_of_files: + list_of_recordings.append(se.read_intan(file, stream_id='0')) + recording = si.concatenate_recordings(list_of_recordings) + + +Append Recordings +^^^^^^^^^^^^^^^^^ + +If you wish to keep each recording as a separate segment identity (e.g. if doing baseline, stim, poststim) you can use +:code:`append` instead of :code:`concatenate`. This has the benefit of allowing you to keep different parts of data +separate, but it is important to note that not all sorters can handle multi-segment objects. + +If we use the same Intan files as above (:code:`intan_rec_one` and :code:`intan_rec_two`) we can see what happens if we +append them instead of concatenate them. + +.. code-block:: python + + recording = si.append_recordings([intan_rec_one, intan_rec_two]) + + print(recording) + + """ + AppendSegmentRecording: 64 Channels - 30.0khz - 2 segments - 3,600,000 samples + 120.00s (2.00 minutes) - uint16 dtype - 439.47 MiB + Segments: + Samples: 1,800,000 | 1,800,000 + Durations: 60.00s (1.00 minutes) | 60.00s (1.00 minutes) + Memory: 219.17 MiB | 219.17 MiB + """ + +In this case we see that our recording has two segments instead of one segment. The total sample number (3,600,00) +and the total time (120.00s), however are still the same as our example above. We can see that each segment is +exactly equivalent to one of the :code:`IntanRecordingExtractor`'s above. + + +Pitfalls +-------- + +It's important to remember that these operations are directional. So, + +.. code-block:: python + + recording_forward = si.concantenate_recordings([intan_rec_one, intan_rec_two]) + recording_backward = si.concantenate_recordings([intan_rec_two, intan_rec_one]) + + recording_forward != recording_backward + + +This is important because your spike times will be relative to the start of your recording. From 47422f5123bb94cee6a543e6574dc5d45df81c87 Mon Sep 17 00:00:00 2001 From: JoeZiminski Date: Fri, 11 Oct 2024 10:57:41 +0100 Subject: [PATCH 2/3] Convert .rst to .py file and fix everything to let it build. --- doc/conf.py | 4 +- doc/how_to/index.rst | 3 +- examples/how_to_new/README.rst | 6 + examples/how_to_new/combine_recordings.rst | 125 ------------------ .../how_to_new/plot_combine_recordings.py | 92 +++++++++++++ 5 files changed, 102 insertions(+), 128 deletions(-) create mode 100644 examples/how_to_new/README.rst delete mode 100644 examples/how_to_new/combine_recordings.rst create mode 100644 examples/how_to_new/plot_combine_recordings.py diff --git a/doc/conf.py b/doc/conf.py index e3d58ca8f2..9c60675db1 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -120,8 +120,8 @@ # for sphinx gallery plugin sphinx_gallery_conf = { 'only_warn_on_example_error': True, - 'examples_dirs': ['../examples/tutorials'], - 'gallery_dirs': ['tutorials' ], # path where to save gallery generated examples + 'examples_dirs': ['../examples/tutorials', '../examples/how_to_new'], + 'gallery_dirs': ['tutorials', 'how_to_new'], # path where to save gallery generated examples 'subsection_order': ExplicitOrder([ '../examples/tutorials/core', '../examples/tutorials/extractors', diff --git a/doc/how_to/index.rst b/doc/how_to/index.rst index 1cd457a321..b0584dc455 100644 --- a/doc/how_to/index.rst +++ b/doc/how_to/index.rst @@ -14,4 +14,5 @@ Guides on how to solve specific, short problems in SpikeInterface. Learn how to. load_your_data_into_sorting benchmark_with_hybrid_recordings drift_with_lfp - ../how_to_new/combine_recordings + combine_recordings + ../how_to_new/plot_combine_recordings diff --git a/examples/how_to_new/README.rst b/examples/how_to_new/README.rst new file mode 100644 index 0000000000..e74c7b416d --- /dev/null +++ b/examples/how_to_new/README.rst @@ -0,0 +1,6 @@ +Unused +====== + +This file is required by sphinx to build sphinx pages. But, we do not use the gallery +page features for the How To and so do not need it. Instead, we have a custom .rst file +in the How To folder that points to the sphinx-gallery outputs. diff --git a/examples/how_to_new/combine_recordings.rst b/examples/how_to_new/combine_recordings.rst deleted file mode 100644 index 4a088f01b1..0000000000 --- a/examples/how_to_new/combine_recordings.rst +++ /dev/null @@ -1,125 +0,0 @@ -Combine recordings in SpikeInterface -==================================== - -In this tutorial we will walk through combining multiple recording objects. Sometimes this occurs due to hardware -settings (e.g. Intan software has a default setting of new files every 1 minute) or the experimenter decides to -split their recording into multiple files for different experimental conditions. If the probe has not been moved, -however, then during sorting it would likely make sense to combine these individual reocrding objects into one -recording object. - -**Why Combine?** - -Combining your data into a single recording allows you to have consistent labels (:code:`unit_ids`) across the whole recording. - -Spike sorters seek to sort spikes within a recording into groups of units. Thus if multiple :code:`Recording` objects have the -exact same probe location within some tissue and are occurring continuously in time the units between the :code:`Recordings` will -be the same. But if we sort each recording separately the unit ids given by the sorter will not be the same between each -:code:`Sorting` and so we will need extensive post-processing to try to figure out which units are actually the same between -each :code:`Sorting`. By combining everything into one :code:`Recording` all spikes will be sorted into the same pool of units. - -Combining recordings continuous in time ---------------------------------------- - -Some file formats (e.g. Intan) automatically create new files every minute or few minutes (with a setting that can be user -controlled). Other times an experimenter separates their recording for experimental reasons. SpikeInterface provides two -tools for bringing together these files into one :code:`Recording` object. - -Concatenating Recordings -^^^^^^^^^^^^^^^^^^^^^^^^ - -First let's cover concatenating recordings together. This will generate a mono-segment recording object. Let's load a set of -Intan files. - -.. code-block:: python - - import spikeinterface as si # this is only core - import spikeinterface.extractors as se - - intan_rec_one = se.read_intan('./intan_example_01.rhd', stream_id='0') # 0 is the amplifier data for Intan - intan_rec_two = se.read_intan('./intan_example_02.rhd', stream_id='0') - - print(intan_rec_one) - - """ - IntanRecordingExtractor: 64 Channels - 30.0kHz - 1 segments - 1,800,000 samples - 60.00s (1.00 minutes) uint16 dtype - 219.73 MiB - """ - - print(intan_rec_two) - - """ - IntanRecordingExtractor: 64 Channels - 30.0Khz - 1 segments - 1,800,000 samples - 60.00s (1.00 minutes) - uin16 dtype - 219.73 MiB - """ - - concatenated_recording = si.concatenate_recordings([intan_rec_one, intan_rec_two]) - - print(concatenated_recording) - - """ - ConcatenateSegmentRecording: 64 Channels - 30.0kHz - 1 segments - 3,600,000 samples - 120.00s (2.00 minutes) - uint16 dtype - 429.47 MiB - """ - -As we can see if we take the sample number (1,800,000) or time (60.00s) of each recording and add them together -we get the concatenated sample number (3,600,000) and time (120.00s). - -If we know that we will deal with a lot of files we can actually work our way through a series of them relatively quickly by doing -the following: - -.. code-block:: python - - - # make sure to use the appropriate paths for adapting to your own pipeline - # adapt the extractor for your desired file format as well - list_of_files = ['file1.rhd', 'file2.rhd', 'file3.rhd', 'file4.rhd'] - list_of_recordings = [] - for file in list_of_files: - list_of_recordings.append(se.read_intan(file, stream_id='0')) - recording = si.concatenate_recordings(list_of_recordings) - - -Append Recordings -^^^^^^^^^^^^^^^^^ - -If you wish to keep each recording as a separate segment identity (e.g. if doing baseline, stim, poststim) you can use -:code:`append` instead of :code:`concatenate`. This has the benefit of allowing you to keep different parts of data -separate, but it is important to note that not all sorters can handle multi-segment objects. - -If we use the same Intan files as above (:code:`intan_rec_one` and :code:`intan_rec_two`) we can see what happens if we -append them instead of concatenate them. - -.. code-block:: python - - recording = si.append_recordings([intan_rec_one, intan_rec_two]) - - print(recording) - - """ - AppendSegmentRecording: 64 Channels - 30.0khz - 2 segments - 3,600,000 samples - 120.00s (2.00 minutes) - uint16 dtype - 439.47 MiB - Segments: - Samples: 1,800,000 | 1,800,000 - Durations: 60.00s (1.00 minutes) | 60.00s (1.00 minutes) - Memory: 219.17 MiB | 219.17 MiB - """ - -In this case we see that our recording has two segments instead of one segment. The total sample number (3,600,00) -and the total time (120.00s), however are still the same as our example above. We can see that each segment is -exactly equivalent to one of the :code:`IntanRecordingExtractor`'s above. - - -Pitfalls --------- - -It's important to remember that these operations are directional. So, - -.. code-block:: python - - recording_forward = si.concantenate_recordings([intan_rec_one, intan_rec_two]) - recording_backward = si.concantenate_recordings([intan_rec_two, intan_rec_one]) - - recording_forward != recording_backward - - -This is important because your spike times will be relative to the start of your recording. diff --git a/examples/how_to_new/plot_combine_recordings.py b/examples/how_to_new/plot_combine_recordings.py new file mode 100644 index 0000000000..e61880e769 --- /dev/null +++ b/examples/how_to_new/plot_combine_recordings.py @@ -0,0 +1,92 @@ +""" +==================================== +Combine recordings in SpikeInterface +==================================== + +In this tutorial, we will walk through combining multiple recording objects. Sometimes this occurs due to hardware +settings (e.g., Intan software has a default setting of new files every 1 minute) or the experimenter decides to +split their recording into multiple files for different experimental conditions. If the probe has not been moved, +however, then during sorting it would likely make sense to combine these individual recording objects into one +recording object. + +------------ +Why Combine? +------------ + +Combining your data into a single recording allows you to have consistent labels (`unit_ids`) across the whole recording. + +Spike sorters seek to sort spikes within a recording into groups of units. Thus if multiple `Recording` objects have the +exact same probe location within some tissue and are occurring continuously in time, the units between the `Recordings` will +be the same. But if we sort each recording separately, the unit ids given by the sorter will not be the same between each +`Sorting`, and so we will need extensive post-processing to try to figure out which units are actually the same between +each `Sorting`. By combining everything into one `Recording`, all spikes will be sorted into the same pool of units. + +--------------------------------------- +Combining recordings continuous in time +--------------------------------------- + +Some file formats (e.g., Intan) automatically create new files every minute or few minutes (with a setting that can be user +controlled). Other times an experimenter separates their recording for experimental reasons. SpikeInterface provides two +tools for bringing together these files into one `Recording` object. +""" + +# %% +# ------------------------ +# Concatenating Recordings +# ------------------------ + +# First, let's cover concatenating recordings together. This will generate a mono-segment +# recording object. Let's load a set of Intan files. 0 is the amplifier data for Intan + +import spikeinterface as si # This is only core +import spikeinterface.extractors as se + +recording_one, _ = si.generate_ground_truth_recording(durations=[25]) +recording_two, _ = si.generate_ground_truth_recording(durations=[25]) + +print(recording_one) + +print(recording_two) + +# %% +# Next, we will concatenate these recordings together. + +concatenated_recording = si.concatenate_recordings([recording_one, recording_two]) + +print(concatenated_recording) + +# %% +# If we know that we will deal with a lot of files, we can actually work our +# way through a series of them relatively quickly by doing + +list_of_recs = [si.generate_ground_truth_recording(durations=[25])[0] for _ in range(4)] +list_of_recordings = [] +for rec in list_of_recs: + list_of_recordings.append(rec) +recording = si.concatenate_recordings(list_of_recordings) + +# %% +# ----------------- +# Append Recordings +# ----------------- +# +# If you wish to keep each recording as a separate segment identity (e.g. if doing baseline, stim, poststim) you can use +# `append` instead of `concatenate`. This has the benefit of allowing you to keep different parts of data +# separate, but it is important to note that not all sorters can handle multi-segment objects. + +recording = si.append_recordings([recording_one, recording_two]) + +print(recording) + +# %% +# -------- +# Pitfalls +# -------- +# +# It's important to remember that these operations are directional. So: + +recording_forward = si.concatenate_recordings([recording_one, recording_two]) +recording_backward = si.concatenate_recordings([recording_two, recording_one]) + +# %% +# This is important because your spike times will be relative to the start of your recording. From 338bb538c307af04705b4b727a59bcb261dd0085 Mon Sep 17 00:00:00 2001 From: JoeZiminski Date: Fri, 11 Oct 2024 11:15:28 +0100 Subject: [PATCH 3/3] Remove original version. --- doc/how_to/combine_recordings.rst | 125 ------------------------------ doc/how_to/index.rst | 1 - 2 files changed, 126 deletions(-) delete mode 100644 doc/how_to/combine_recordings.rst diff --git a/doc/how_to/combine_recordings.rst b/doc/how_to/combine_recordings.rst deleted file mode 100644 index 4a088f01b1..0000000000 --- a/doc/how_to/combine_recordings.rst +++ /dev/null @@ -1,125 +0,0 @@ -Combine recordings in SpikeInterface -==================================== - -In this tutorial we will walk through combining multiple recording objects. Sometimes this occurs due to hardware -settings (e.g. Intan software has a default setting of new files every 1 minute) or the experimenter decides to -split their recording into multiple files for different experimental conditions. If the probe has not been moved, -however, then during sorting it would likely make sense to combine these individual reocrding objects into one -recording object. - -**Why Combine?** - -Combining your data into a single recording allows you to have consistent labels (:code:`unit_ids`) across the whole recording. - -Spike sorters seek to sort spikes within a recording into groups of units. Thus if multiple :code:`Recording` objects have the -exact same probe location within some tissue and are occurring continuously in time the units between the :code:`Recordings` will -be the same. But if we sort each recording separately the unit ids given by the sorter will not be the same between each -:code:`Sorting` and so we will need extensive post-processing to try to figure out which units are actually the same between -each :code:`Sorting`. By combining everything into one :code:`Recording` all spikes will be sorted into the same pool of units. - -Combining recordings continuous in time ---------------------------------------- - -Some file formats (e.g. Intan) automatically create new files every minute or few minutes (with a setting that can be user -controlled). Other times an experimenter separates their recording for experimental reasons. SpikeInterface provides two -tools for bringing together these files into one :code:`Recording` object. - -Concatenating Recordings -^^^^^^^^^^^^^^^^^^^^^^^^ - -First let's cover concatenating recordings together. This will generate a mono-segment recording object. Let's load a set of -Intan files. - -.. code-block:: python - - import spikeinterface as si # this is only core - import spikeinterface.extractors as se - - intan_rec_one = se.read_intan('./intan_example_01.rhd', stream_id='0') # 0 is the amplifier data for Intan - intan_rec_two = se.read_intan('./intan_example_02.rhd', stream_id='0') - - print(intan_rec_one) - - """ - IntanRecordingExtractor: 64 Channels - 30.0kHz - 1 segments - 1,800,000 samples - 60.00s (1.00 minutes) uint16 dtype - 219.73 MiB - """ - - print(intan_rec_two) - - """ - IntanRecordingExtractor: 64 Channels - 30.0Khz - 1 segments - 1,800,000 samples - 60.00s (1.00 minutes) - uin16 dtype - 219.73 MiB - """ - - concatenated_recording = si.concatenate_recordings([intan_rec_one, intan_rec_two]) - - print(concatenated_recording) - - """ - ConcatenateSegmentRecording: 64 Channels - 30.0kHz - 1 segments - 3,600,000 samples - 120.00s (2.00 minutes) - uint16 dtype - 429.47 MiB - """ - -As we can see if we take the sample number (1,800,000) or time (60.00s) of each recording and add them together -we get the concatenated sample number (3,600,000) and time (120.00s). - -If we know that we will deal with a lot of files we can actually work our way through a series of them relatively quickly by doing -the following: - -.. code-block:: python - - - # make sure to use the appropriate paths for adapting to your own pipeline - # adapt the extractor for your desired file format as well - list_of_files = ['file1.rhd', 'file2.rhd', 'file3.rhd', 'file4.rhd'] - list_of_recordings = [] - for file in list_of_files: - list_of_recordings.append(se.read_intan(file, stream_id='0')) - recording = si.concatenate_recordings(list_of_recordings) - - -Append Recordings -^^^^^^^^^^^^^^^^^ - -If you wish to keep each recording as a separate segment identity (e.g. if doing baseline, stim, poststim) you can use -:code:`append` instead of :code:`concatenate`. This has the benefit of allowing you to keep different parts of data -separate, but it is important to note that not all sorters can handle multi-segment objects. - -If we use the same Intan files as above (:code:`intan_rec_one` and :code:`intan_rec_two`) we can see what happens if we -append them instead of concatenate them. - -.. code-block:: python - - recording = si.append_recordings([intan_rec_one, intan_rec_two]) - - print(recording) - - """ - AppendSegmentRecording: 64 Channels - 30.0khz - 2 segments - 3,600,000 samples - 120.00s (2.00 minutes) - uint16 dtype - 439.47 MiB - Segments: - Samples: 1,800,000 | 1,800,000 - Durations: 60.00s (1.00 minutes) | 60.00s (1.00 minutes) - Memory: 219.17 MiB | 219.17 MiB - """ - -In this case we see that our recording has two segments instead of one segment. The total sample number (3,600,00) -and the total time (120.00s), however are still the same as our example above. We can see that each segment is -exactly equivalent to one of the :code:`IntanRecordingExtractor`'s above. - - -Pitfalls --------- - -It's important to remember that these operations are directional. So, - -.. code-block:: python - - recording_forward = si.concantenate_recordings([intan_rec_one, intan_rec_two]) - recording_backward = si.concantenate_recordings([intan_rec_two, intan_rec_one]) - - recording_forward != recording_backward - - -This is important because your spike times will be relative to the start of your recording. diff --git a/doc/how_to/index.rst b/doc/how_to/index.rst index b0584dc455..586a22e980 100644 --- a/doc/how_to/index.rst +++ b/doc/how_to/index.rst @@ -14,5 +14,4 @@ Guides on how to solve specific, short problems in SpikeInterface. Learn how to. load_your_data_into_sorting benchmark_with_hybrid_recordings drift_with_lfp - combine_recordings ../how_to_new/plot_combine_recordings