@@ -119,54 +119,27 @@ def _get_locations_from_pc_features(params):
119
119
120
120
Notes
121
121
-----
122
- Location of of each individual spike is computed from its low-dimensional projection.
123
- During sorting, kilosort computes the '
124
- `pc_features` (num_spikes, num_PC, num_channels) holds the PC values for each spike.
125
- Taking the first component, the subset of 32 channels associated with this
126
- spike are indexed to get the actual channel locations (in um). Then, the channel
127
- locations are weighted by their PC values.
122
+ My understanding so far. KS1 paper; The individual spike waveforms are decomposed into
123
+ 'private PCs'. Let the waveform matrix W be time (t) x channel (c). PCA
124
+ decompoisition is performed to compute c basis waveforms. Scores for each
125
+ channel onto the top three PCs are stored (these recover the waveform well.
128
126
129
127
This function is based on code in Nick Steinmetz's `spikes` repository,
130
128
https://github.com/cortex-lab/spikes
131
129
"""
132
- # Compute spike depths
133
-
134
- # for each spike, a PCA is computed just on that spike (n samples x n channels).
135
- # the components are all different between spikes, so are not saved.
136
- # This gives a (n pc = 3, num channels) set of scores.
137
- # but then how it is possible for some spikes to have zero score onto the principal channel?
138
-
139
- breakpoint ()
140
- pc_features = params ["pc_features" ][:, 0 , :]
130
+ pc_features = params ["pc_features" ][:, 0 , :].copy ()
141
131
pc_features [pc_features < 0 ] = 0
142
132
143
- # Some spikes do not load at all onto the first PC. To avoid biasing the
144
- # dataset by removing these, we repeat the above for the next PC,
145
- # to compute distances for neurons that do not load onto the 1st PC.
146
- # This is not ideal at all, it would be much better to a) find the
147
- # max value for each channel on each of the PCs (i.e. basis vectors).
148
- # Then recompute the estimated waveform peak on each channel by
149
- # summing the PCs by their respective weights. However, the PC basis
150
- # vectors themselves do not appear to be output by KS.
151
-
152
- # We include the (n_channels i.e. features) from the second PC
153
- # into the `pc_features` mostly containing the first PC. As all
154
- # operations are per-spike (i.e. row-wise)
155
- no_pc1_signal_spikes = np .where (np .sum (pc_features , axis = 1 ) == 0 )
156
-
157
- pc_features_2 = params ["pc_features" ][:, 1 , :]
158
- pc_features_2 [pc_features_2 < 0 ] = 0
159
-
160
- pc_features [no_pc1_signal_spikes ] = pc_features_2 [no_pc1_signal_spikes ]
161
-
162
133
if np .any (np .sum (pc_features , axis = 1 ) == 0 ):
134
+ # TODO: 1) handle this case for pc_features
135
+ # 2) instead use the template_features for all other versions.
163
136
raise RuntimeError (
164
137
"Some spikes do not load at all onto the first"
165
138
"or second principal component. It is necessary"
166
139
"to extend this code section to handle more components."
167
140
)
168
141
169
- # Get the channel indices corresponding to the 32 channels from the PC.
142
+ # Get the channel indices corresponding to the channels from the PC.
170
143
spike_features_indices = params ["pc_features_indices" ][params ["spike_templates" ], :]
171
144
172
145
# Compute the spike locations as the center of mass of the PC scores
@@ -343,8 +316,15 @@ def load_ks_dir(sorter_output: Path, exclude_noise: bool = True, load_pcs: bool
343
316
if load_pcs :
344
317
pc_features = np .load (sorter_output / "pc_features.npy" )
345
318
pc_features_indices = np .load (sorter_output / "pc_feature_ind.npy" )
319
+
320
+ if (sorter_output / "template_features.npy" ).is_file ():
321
+ template_features = np .load (sorter_output / "template_features.npy" )
322
+ template_features_indices = np .load (sorter_output / "templates_ind.npy" )
323
+ else :
324
+ template_features = template_features_indices = None
346
325
else :
347
326
pc_features = pc_features_indices = None
327
+ template_features = template_features_indices = None
348
328
349
329
# This makes the assumption that there will never be different .csv and .tsv files
350
330
# in the same sorter output (this should never happen, there will never even be two).
@@ -364,6 +344,8 @@ def load_ks_dir(sorter_output: Path, exclude_noise: bool = True, load_pcs: bool
364
344
365
345
if load_pcs :
366
346
pc_features = pc_features [not_noise_clusters_by_spike , :, :]
347
+ if template_features is not None :
348
+ template_features = template_features [not_noise_clusters_by_spike , :, :]
367
349
368
350
spike_clusters = spike_clusters [not_noise_clusters_by_spike ]
369
351
cluster_ids = cluster_ids [cluster_groups != 0 ]
@@ -378,6 +360,8 @@ def load_ks_dir(sorter_output: Path, exclude_noise: bool = True, load_pcs: bool
378
360
"spike_clusters" : spike_clusters .squeeze (),
379
361
"pc_features" : pc_features ,
380
362
"pc_features_indices" : pc_features_indices ,
363
+ "template_features" : template_features ,
364
+ "template_features_indices" : template_features_indices ,
381
365
"temp_scaling_amplitudes" : temp_scaling_amplitudes .squeeze (),
382
366
"cluster_ids" : cluster_ids ,
383
367
"cluster_groups" : cluster_groups ,
0 commit comments