diff --git a/probe_construction/MERFISHProbeDesign.m b/probe_construction/MERFISHProbeDesign.m index d79e094..dafc7a1 100644 --- a/probe_construction/MERFISHProbeDesign.m +++ b/probe_construction/MERFISHProbeDesign.m @@ -660,13 +660,15 @@ function MERFISHProbeDesign(varargin) fprintf(logFID, '%s - Building transcriptome object.\n', datestr(datetime)); - % Build transcriptome using existing abundance data + %Build transcriptome using existing abundance data transcriptome = Transcriptome(rawTranscriptomeFasta, ... 'abundPath', fpkmPath, ... 'verbose', true, ... 'headerType', transcriptomeHeaderType, ... 'IDType', transcriptomeIDType); - + %transcriptome = Transcriptome(rawTranscriptomeFasta, ... + % 'abundPath', fpkmPath, ... + % 'verbose', true); transcriptome.Save(transcriptomePath); fprintf(logFID, '%s - Transcriptome object saved to %s\n', datestr(datetime), transcriptomePath); @@ -718,7 +720,7 @@ function MERFISHProbeDesign(varargin) % Generate a OTTable for isoforms for the given gene isoSpecificityTables(i) = OTTable(localTranscriptome, ... isoSpecificityTable_lengthOfExactHomology, ... % lengthOfExactHomology is the length of exact homology used to calculate penalties - 'verbose', false, ... + 'verbose', false, ... 'transferAbund', false); else @@ -798,7 +800,7 @@ function MERFISHProbeDesign(varargin) %% Create parallel pool... speeds up the construction of the TRDesigner and the construction of libraries if isempty(gcp('nocreate')) fprintf(logFID, '%s - Start parallel processing pool\n', datestr(datetime)); - p = parpool(5); % Insert a number here appropriate to the used computational resources + p = parpool(4); % Insert a number here appropriate to the used computational resources else p = gcp; end @@ -915,6 +917,11 @@ function MERFISHProbeDesign(varargin) fprintf(logFID, '%s - Target regions loaded from %s\n', datestr(datetime), trRegionsPath); end + %write the genename + genenames={targetRegions.geneName}'; + geneids={targetRegions.id}'; + numRegions={targetRegions.numRegions}'; + writetable(table(genenames,geneids,numRegions),[analysisSavePath 'targetRegions.csv'],'WriteRowNames',false) %% ------------------------------------------------------------------------ % Step 2: Compile the library % The target regions designed above will be compiled into template @@ -956,13 +963,20 @@ function MERFISHProbeDesign(varargin) % string miss-match. Replacing '_' with ',' in original codebook solves % this issue. - if ~strcmp(transcriptomeIDType, 'NCBI') - finalIds = strrep(finalIds, '_', ','); - end + %if ~strcmp(transcriptomeIDType, 'NCBI') + % finalIds = strrep(finalIds, '_', ','); + %end finalGenes = {codebook.name}; % Extract gene common names from codebook barcodes = char({codebook.barcode}) == '1'; % Extract string barcodes and convert to logical matrix + %check if gene and ids match the reference + genes_from_ids=targetRegions(ismember({targetRegions.id}, finalIds)); + genenames=targetRegions(ismember({targetRegions.geneName}, finalGenes)); + if length(setdiff({genenames.geneName},{genes_from_ids.geneName}))>0 + warning([char(setdiff({genenames.geneName},{genes_from_ids.geneName})),' ',char(setdiff({genes_from_ids.id},{genenames.id})),' is different from the reference']); + fprintf(logFID, ['WARNING! ', char(setdiff({genenames.geneName},{genes_from_ids.geneName})),' ',char(setdiff({genes_from_ids.id},{genenames.id})),' is different from the reference\n']); + end if versionMatch finalTargetRegions = targetRegions(ismember({targetRegions.id}, finalIds)); % Extract only the desired target regions @@ -1035,13 +1049,16 @@ function MERFISHProbeDesign(varargin) if keepGoingFlag oligos = []; + allOligos = []; lastGene = ''; - - if keepAllPossibleProbes - allHeaders = cell(sum(vertcat(finalTargetRegions.numRegions)), 1); - allSeqs = cell(sum(vertcat(finalTargetRegions.numRegions)), 1); - seqCount = 1; - end + + Genes={}; + ProbeNumbers={}; + %if keepAllPossibleProbes + % allHeaders = cell(sum(vertcat(finalTargetRegions.numRegions)), 1); + % allSeqs = cell(sum(vertcat(finalTargetRegions.numRegions)), 1); + % seqCount = 1; + %end for i=1:length(finalIds) % Save local gene @@ -1176,15 +1193,19 @@ function MERFISHProbeDesign(varargin) display(['... ' headers{indsToRemove(r)}]); end - % display(indsToKeep) - indsToKeepForReal = [indsToKeepForReal, indsToKeep]; + % display(indsToKeep) + %indsToKeepForReal = [indsToKeepForReal, indsToKeep]; + indsToKeepForReal = indsToKeep; end - + indsToKeepForAll = indsToKeepForReal(1:length(indsToKeepForReal)); indsToKeepForReal = indsToKeepForReal(randperm(length(indsToKeepForReal), min([length(indsToKeepForReal) numProbesPerGene]))); display(['... keeping ' num2str(length(indsToKeepForReal)) ' probes']); fprintf(logFID, '%s - Retaining %d probes\n', datestr(datetime), length(indsToKeepForReal)); + %write the genename + Genes=[Genes,localGeneName]; + ProbeNumbers=[ProbeNumbers,length(indsToKeepForReal)]; % Check on number if length(indsToKeepForReal) < numProbesPerGene @@ -1253,14 +1274,18 @@ function MERFISHProbeDesign(varargin) oligos(end+1).Header = headers{indsToKeepForReal(s)}; oligos(end).Sequence = seqs{indsToKeepForReal(s)}; end - - if keepAllPossibleProbes - % Append all headers + seqs to cell - allHeaders(seqCount:(seqCount + length(headers) - 1)) = headers; - allSeqs(seqCount:(seqCount + length(seqs) - 1)) = seqs; - seqCount = seqCount + length(seqs); + for s=1:length(indsToKeepForAll) + allOligos(end+1).Header = headers{indsToKeepForAll(s)}; + allOligos(end).Sequence = seqs{indsToKeepForAll(s)}; end + %if keepAllPossibleProbes + % Append all headers + seqs to cell + % allHeaders(seqCount:(seqCount + length(headers) - 1)) = headers; + % allSeqs(seqCount:(seqCount + length(seqs) - 1)) = seqs; + % seqCount = seqCount + length(seqs); + %end + else fprintf(1, 'Empty!\n'); @@ -1281,10 +1306,13 @@ function MERFISHProbeDesign(varargin) fastawrite(possibleOligosPath, oligos); display(['... completed in ' num2str(toc(writeTimer))]); fprintf(logFID, '%s - Completed in %d s\n', datestr(datetime), toc(writeTimer)); - + %%%%%Write out how many probes per gene as a table + Genes=Genes' + ProbeNumbers=ProbeNumbers' + writetable(table(Genes,ProbeNumbers),[analysisSavePath 'probesPerGene.csv'],'WriteRowNames',false) if keepAllPossibleProbes - allOligos = cell2struct([allHeaders, allSeqs], {'Header', 'Sequence'}, 2); + %allOligos = cell2struct([allHeaders, allSeqs], {'Header', 'Sequence'}, 2); PageBreak(); fprintf(1, 'Writing: %s\n', allOligosPath);