cortex-lab
diff --git a/‎eMouse/benchmark_simulation.m
+57 b/‎eMouse/benchmark_simulation.m
+57
diff --git a/‎eMouse/compareClustering2.m
+141 b/‎eMouse/compareClustering2.m
+141
diff --git a/‎eMouse/config_eMouse.m
+67 b/‎eMouse/config_eMouse.m
+67
diff --git a/‎eMouse/make_eMouseChannelMap.m
+44 b/‎eMouse/make_eMouseChannelMap.m
+44
@@ -0,0 +1,57 @@
+function benchmark_simulation(rez, GTfilepath)
+
+load(GTfilepath)
+
+try
+    testClu = 1 + rez.st3(:,5) ; % if the auto merges were performed
+    flag = 1;
+catch
+    testClu = rez.st3(:,2) ;% no attempt to merge clusters
+    flag = 0;
+end
+
+testRes = rez.st3(:,1) ;
+
+[allScores, allFPrates, allMissRates, allMerges] = ...
+    compareClustering2(gtClu, gtRes, testClu, testRes, []);
+
+%
+clid = unique(gtClu);
+clear gtimes
+for k = 1:length(clid)
+    gtimes{k} = double(gtRes(gtClu==clid(k)));
+end
+%%
+
+figure
+
+plot(sort(cellfun(@(x) x(1), allFPrates)), '-*b', 'Linewidth', 2)
+hold all
+plot(sort(cellfun(@(x) x(1), allMissRates)), '-*r', 'Linewidth', 2)
+plot(sort(cellfun(@(x) x(end), allFPrates)), 'b', 'Linewidth', 2)
+plot(sort(cellfun(@(x) x(end), allMissRates)), 'r', 'Linewidth', 2)
+
+box off
+
+finalScores = cellfun(@(x) x(end), allScores);
+fprintf('%d / %d good cells, score > 0.8 (pre-merge) \n', sum(cellfun(@(x) x(1), allScores)>.8), numel(allScores))
+fprintf('%d / %d good cells, score > 0.8 (post-merge) \n', sum(cellfun(@(x) x(end), allScores)>.8), numel(allScores))
+
+nMerges = cellfun(@(x) numel(x)-1, allMerges);
+fprintf('Mean merges per good cell %2.2f \n', mean(nMerges(finalScores>.8)))
+
+% disp(cellfun(@(x) x(end), allScores))
+
+xlabel('ground truth cluster')
+ylabel('fractional error')
+
+legend('false positives (initial)', 'miss rates (initial)', 'false positives (best)', 'miss rates (best)')
+legend boxoff
+set(gca, 'Fontsize', 20)
+set(gcf, 'Color', 'w')
+
+if flag==1
+   title('After Kilosort AUTO merges') 
+else
+    title('Before Kilosort AUTO merges')
+end
@@ -0,0 +1,141 @@
+
+
+function [allScores, allFPs, allMisses, allMerges] = compareClustering2(cluGT, resGT, cluTest, resTest, datFilename)
+% function compareClustering(cluGT, resGT, cluTest, resTest[, datFilename])
+% - clu and res variables are length nSpikes, for ground truth (GT) and for
+% the clustering to be evaluated (Test). 
+
+
+if nargin<5
+    datFilename = [];
+end
+
+GTcluIDs = unique(cluGT);
+testCluIDs = unique(cluTest);
+jitter = 12;
+
+nSp = zeros(max(testCluIDs), 1);
+for j = 1:max(testCluIDs);
+    nSp(j) = max(1, sum(cluTest==j));
+end
+nSp0 = nSp;
+
+for cGT = 1:length(GTcluIDs)
+%     fprintf(1,'ground truth cluster ID = %d (%d spikes)\n', GTcluIDs(cGT), sum(cluGT==GTcluIDs(cGT)));
+    
+    rGT = int32(resGT(cluGT==GTcluIDs(cGT)));
+    
+%     S = sparse(numel(rGT), max(testCluIDs));
+    S = spalloc(numel(rGT), max(testCluIDs), numel(rGT) * 10);
+    % find the initial best match
+    mergeIDs = [];
+    scores = [];
+    falsePos = [];
+    missRate = [];
+    
+    igt = 1;
+    
+    nSp = nSp0;
+    nrGT = numel(rGT);
+    flag = false;
+    for j = 1:numel(cluTest)
+        while (resTest(j) > rGT(igt) + jitter)
+            % the curent spikes is now too large compared to GT, advance the GT
+            igt = igt + 1;
+            if igt>nrGT
+               flag = true;
+               break;
+            end
+        end
+        if flag
+            break;
+        end
+        
+        if resTest(j)>rGT(igt)-jitter
+            % we found a match, add a tick to the right cluster
+%             numMatch(cluTest(j)) = numMatch(cluTest(j)) + 1;
+              S(igt, cluTest(j)) = 1;
+        end
+    end
+    numMatch = sum(S,1)';
+    misses = (nrGT-numMatch)/nrGT; % missed these spikes, as a proportion of the total true spikes
+    fps = (nSp-numMatch)./nSp; % number of comparison spikes not near a GT spike, as a proportion of the number of guesses
+        %
+    %     for cTest = 1:length(testCluIDs)
+%         rTest = int32(resTest(cluTest==testCluIDs(cTest)));
+%         
+%         [miss, fp] = compareSpikeTimes(rTest, rGT);
+%         misses(cTest) = miss;
+%         fps(cTest) = fp;
+%         
+%     end
+%     
+    sc = 1-(fps+misses);
+    best = find(sc==max(sc),1);
+    mergeIDs(end+1) = best;
+    scores(end+1) = sc(best);
+    falsePos(end+1) = fps(best);
+    missRate(end+1) = misses(best);
+    
+%     fprintf(1, '  found initial best %d: score %.2f (%d spikes, %.2f FP, %.2f miss)\n', ...
+%         mergeIDs(1), scores(1), sum(cluTest==mergeIDs(1)), fps(best), misses(best));
+    
+    S0 = S(:, best);
+    nSp = nSp + nSp0(best);
+    while scores(end)>0 && (length(scores)==1 || ( scores(end)>(scores(end-1) + 1*0.01) && scores(end)<=0.99 ))
+        % find the best match
+        S = bsxfun(@max, S, S0);
+        
+        numMatch = sum(S,1)';
+        misses = (nrGT-numMatch)/nrGT; % missed these spikes, as a proportion of the total true spikes
+        fps = (nSp-numMatch)./nSp; % number of comparison spikes not near a GT spike, as a proportion of the number of guesses
+        
+        sc = 1-(fps+misses);
+        best = find(sc==max(sc),1);
+        mergeIDs(end+1) = best;
+        scores(end+1) = sc(best);
+        falsePos(end+1) = fps(best);
+        missRate(end+1) = misses(best);
+        
+%         fprintf(1, '    best merge with %d: score %.2f (%d/%d new/total spikes, %.2f FP, %.2f miss)\n', ...
+%             mergeIDs(end), scores(end), nSp0(best), nSp(best), fps(best), misses(best));
+        
+        S0 = S(:, best);
+        nSp = nSp + nSp0(best);
+                
+    end
+    
+    if length(scores)==1 || scores(end)>(scores(end-1)+0.01)
+        % the last merge did help, so include it
+        allMerges{cGT} = mergeIDs(1:end);
+        allScores{cGT} = scores(1:end);
+        allFPs{cGT} = falsePos(1:end);
+        allMisses{cGT} = missRate(1:end);
+    else
+        % the last merge actually didn't help (or didn't help enough), so
+        % exclude it
+        allMerges{cGT} = mergeIDs(1:end-1);
+        allScores{cGT} = scores(1:end-1);
+        allFPs{cGT} = falsePos(1:end-1);
+        allMisses{cGT} = missRate(1:end-1);
+    end
+    
+end
+
+initScore = zeros(1, length(GTcluIDs));
+finalScore = zeros(1, length(GTcluIDs));
+numMerges = zeros(1, length(GTcluIDs));
+fprintf(1, '\n\n--Results Summary--\n')
+for cGT = 1:length(GTcluIDs)
+%     
+%      fprintf(1,'ground truth cluster ID = %d (%d spikes)\n', GTcluIDs(cGT), sum(cluGT==GTcluIDs(cGT)));
+%      fprintf(1,'  initial score: %.2f\n', allScores{cGT}(1));
+%      fprintf(1,'  best score: %.2f (after %d merges)\n', allScores{cGT}(end), length(allScores{cGT})-1);
+%      
+     initScore(cGT) = allScores{cGT}(1);
+     finalScore(cGT) = allScores{cGT}(end);
+     numMerges(cGT) = length(allScores{cGT})-1;
+end
+
+fprintf(1, 'median initial score: %.2f; median best score: %.2f\n', median(initScore), median(finalScore));
+fprintf(1, 'total merges required: %d\n', sum(numMerges));
@@ -0,0 +1,67 @@
+ops.GPU                 = useGPU; % whether to run this code on an Nvidia GPU (much faster, mexGPUall first)		
+ops.parfor              = 0; % whether to use parfor to accelerate some parts of the algorithm		
+ops.verbose             = 1; % whether to print command line progress		
+ops.showfigures         = 1; % whether to plot figures during optimization		
+		
+ops.datatype            = 'dat';  % binary ('dat', 'bin') or 'openEphys'		
+ops.fbinary             = fullfile(fpath, 'sim_binary.dat'); % will be created for 'openEphys'		
+ops.fproc               = fullfile(fpath, 'temp_wh.dat'); % residual from RAM of preprocessed data		
+ops.root                = fpath; % 'openEphys' only: where raw files are		
+% define the channel map as a filename (string) or simply an array		
+ops.chanMap             = fullfile(fpath, 'chanMap.mat'); % make this file using createChannelMapFile.m		
+% ops.chanMap = 1:ops.Nchan; % treated as linear probe if unavailable chanMap file		
+
+ops.fs                  = 25000;        % sampling rate		
+ops.NchanTOT            = 34;           % total number of channels		
+ops.Nchan               = 32;           % number of active channels 		
+ops.Nfilt               = 64;           % number of filters to use (2-4 times more than Nchan, should be a multiple of 32)     		
+ops.nNeighPC            = 12; % visualization only (Phy): number of channnels to mask the PCs, leave empty to skip (12)		
+ops.nNeigh              = 16; % visualization only (Phy): number of neighboring templates to retain projections of (16)		
+		
+% options for channel whitening		
+ops.whitening           = 'full'; % type of whitening (default 'full', for 'noSpikes' set options for spike detection below)		
+ops.nSkipCov            = 1; % compute whitening matrix from every N-th batch (1)		
+ops.whiteningRange      = 32; % how many channels to whiten together (Inf for whole probe whitening, should be fine if Nchan<=32)		
+		
+ops.criterionNoiseChannels = 0.2; % fraction of "noise" templates allowed to span all channel groups (see createChannelMapFile for more info). 		
+
+% other options for controlling the model and optimization		
+ops.Nrank               = 3;    % matrix rank of spike template model (3)		
+ops.nfullpasses         = 6;    % number of complete passes through data during optimization (6)		
+ops.maxFR               = 20000;  % maximum number of spikes to extract per batch (20000)		
+ops.fshigh              = 200;   % frequency for high pass filtering		
+% ops.fslow             = 2000;   % frequency for low pass filtering (optional)
+ops.ntbuff              = 64;    % samples of symmetrical buffer for whitening and spike detection		
+ops.scaleproc           = 200;   % int16 scaling of whitened data		
+ops.NT                  = 128*1024+ ops.ntbuff;% this is the batch size (try decreasing if out of memory) 		
+% for GPU should be multiple of 32 + ntbuff		
+		
+% the following options can improve/deteriorate results. 		
+% when multiple values are provided for an option, the first two are beginning and ending anneal values, 		
+% the third is the value used in the final pass. 		
+ops.Th               = [4 10 10];    % threshold for detecting spikes on template-filtered data ([6 12 12])		
+ops.lam              = [5 5 5];   % large means amplitudes are forced around the mean ([10 30 30])		
+ops.nannealpasses    = 4;            % should be less than nfullpasses (4)		
+ops.momentum         = 1./[20 400];  % start with high momentum and anneal (1./[20 1000])		
+ops.shuffle_clusters = 1;            % allow merges and splits during optimization (1)		
+ops.mergeT           = .1;           % upper threshold for merging (.1)		
+ops.splitT           = .1;           % lower threshold for splitting (.1)		
+		
+% options for initializing spikes from data		
+ops.initialize      = 'no';    %'fromData' or 'no'		
+ops.spkTh           = -6;      % spike threshold in standard deviations (4)		
+ops.loc_range       = [3  1];  % ranges to detect peaks; plus/minus in time and channel ([3 1])		
+ops.long_range      = [30  6]; % ranges to detect isolated peaks ([30 6])		
+ops.maskMaxChannels = 5;       % how many channels to mask up/down ([5])		
+ops.crit            = .65;     % upper criterion for discarding spike repeates (0.65)		
+ops.nFiltMax        = 10000;   % maximum "unique" spikes to consider (10000)		
+		
+% load predefined principal components (visualization only (Phy): used for features)		
+dd                  = load('PCspikes2.mat'); % you might want to recompute this from your own data		
+ops.wPCA            = dd.Wi(:,1:7);   % PCs 		
+		
+% options for posthoc merges (under construction)		
+ops.fracse  = 0.1; % binning step along discriminant axis for posthoc merges (in units of sd)		
+ops.epu     = Inf;		
+		
+ops.ForceMaxRAMforDat   = 20e9; % maximum RAM the algorithm will try to use; on Windows it will autodetect.
@@ -0,0 +1,44 @@
+function make_eMouseChannelMap(fpath)
+% create a channel Map file for simulated data (eMouse)
+
+% here I know a priori what order my channels are in.  So I just manually 
+% make a list of channel indices (and give
+% an index to dead channels too). chanMap(1) is the row in the raw binary
+% file for the first channel. chanMap(1:2) = [33 34] in my case, which happen to
+% be dead channels. 
+
+chanMap = [33 34 8 10 12 14 16 18 20 22 24 26 28 30 32 ...
+    7 9 11 13 15 17 19 21 23 25 27 29 31 1 2 3 4 5 6];
+
+% the first thing Kilosort does is reorder the data with data = data(chanMap, :).
+% Now we declare which channels are "connected" in this normal ordering, 
+% meaning not dead or used for non-ephys data
+
+connected = true(34, 1); connected(1:2) = 0;
+
+% now we define the horizontal (x) and vertical (y) coordinates of these
+% 34 channels. For dead or nonephys channels the values won't matter. Again
+% I will take this information from the specifications of the probe. These
+% are in um here, but the absolute scaling doesn't really matter in the
+% algorithm. 
+
+xcoords = 20 * [NaN NaN  1 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0];
+ycoords = 20 * [NaN NaN  7 8 9 9 10 10 11 11 12 12 13 13 14 14 15 15 16 ...
+    17 17 18 18 19 19 20 20 21 21 22 22 23 23 24]; 
+
+% Often, multi-shank probes or tetrodes will be organized into groups of
+% channels that cannot possibly share spikes with the rest of the probe. This helps
+% the algorithm discard noisy templates shared across groups. In
+% this case, we set kcoords to indicate which group the channel belongs to.
+% In our case all channels are on the same shank in a single group so we
+% assign them all to group 1. 
+
+kcoords = [NaN NaN 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1];
+
+% at this point in Kilosort we do data = data(connected, :), ycoords =
+% ycoords(connected), xcoords = xcoords(connected) and kcoords =
+% kcoords(connected) and no more channel map information is needed (in particular
+% no "adjacency graphs" like in KlustaKwik). 
+% Now we can save our channel map for the eMouse. 
+
+save(fullfile(fpath, 'chanMap.mat'), 'chanMap', 'connected', 'xcoords', 'ycoords', 'kcoords')