From ae60d7eea27d2079163e84b19903d04d64f7b6b4 Mon Sep 17 00:00:00 2001 From: Rong Li Lab Date: Tue, 30 May 2017 22:12:57 -0400 Subject: [PATCH] checked files:before append appendAdapters --- .gitignore | 2 + Db/Mouse.parameters.xml | 22 ++++------ Db/Mouse.transcriptList.txt | 2 +- abundantrnaParse.m | 31 +++++--------- cdnaParse.m | 35 +++++++-------- main.m | 2 +- ncrnaParse.m | 38 ++++++++--------- oligosParse.m | 66 ++++++++--------------------- readParameters.m | 27 +++++------- runOligoArray.m | 6 ++- transcriptListParse.m | 18 ++++---- utilities/blastAbundantRNASimple.m | 16 +++---- utilities/blastFileSplit.m | 21 +++------ utilities/checkSpecialTranscripts.m | 8 +--- utilities/pickExpressedSeq.m | 12 ++---- utilities/rearrangeOligos.m | 6 +-- 16 files changed, 117 insertions(+), 195 deletions(-) diff --git a/.gitignore b/.gitignore index 8067929..ec3c961 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,5 @@ *.nsq *.log +# MatLab temp files +*.asv \ No newline at end of file diff --git a/Db/Mouse.parameters.xml b/Db/Mouse.parameters.xml index 20965c4..b422820 100644 --- a/Db/Mouse.parameters.xml +++ b/Db/Mouse.parameters.xml @@ -2,6 +2,10 @@ 1 + cdna + ncrna + gene:\S* + gene_biotype:\S* @@ -20,8 +24,6 @@ C:\FISHerMan\Db\Mouse38.cdna.fa - ENS\w*T\d* - ENS\w*G\d* @@ -30,20 +32,16 @@ C:\FISHerMan\Db\Mouse38.ncrna.fa 1 C:\FISHerMan\Db\Mouse.trna.fas - ENS\w*T\d* - ENS\w*G\d* - gene_biotype:\S* 0.001 - ENS\w*T\d* - :rRNA - :Mt_rRNA - :tRNA - :Mt_tRNA + :rRNA + :Mt_rRNA + :tRNA + :Mt_tRNA @@ -78,8 +76,6 @@ - ENS\w*T\d* - ENS\w*G\d* 48 1000 30 @@ -87,7 +83,7 @@ 200000 -S 2 0 - C:\FISHerMan\Db\Mouse.STList.fas + C:\FISHerMan\Db\Mouse.STList.fas diff --git a/Db/Mouse.transcriptList.txt b/Db/Mouse.transcriptList.txt index dcbf6d9..c75541a 100644 --- a/Db/Mouse.transcriptList.txt +++ b/Db/Mouse.transcriptList.txt @@ -4,6 +4,6 @@ >ENSMUST00000015612 ->ENSMUST00000183557 +>ENSMUST00000180842 >ENSMUST00000084289 diff --git a/abundantrnaParse.m b/abundantrnaParse.m index 0ba3e2c..c554e54 100644 --- a/abundantrnaParse.m +++ b/abundantrnaParse.m @@ -1,21 +1,9 @@ function [Header,Sequence]... =abundantrnaParse(cdnaHeader,cdnaSequence,ncrnaHeader,ncrnaSequence,seqData,params) -% switch length(varargin) -% case 0 -% seqData = []; -% params = struct('species','Mouse','verbose',1,... -% 'percent',0.001,... -% 'keys',{'ENS\w*T\d*',':rRNA',':Mt_rRNA',':tRNA',':Mt_tRNA'}); -% case 1 -% seqData = varargin{1}; -% params = struct('species','Mouse','verbose',1,... -% 'percent',0.001,... -% 'keys',{'ENS\w*T\d*',':rRNA',':Mt_rRNA',':tRNA',':Mt_tRNA'}); -% otherwise -% seqData = varargin{1}; -% params = varargin{2}; -% end +% params = struct('species','Mouse','verbose',1,... +% 'percent',0.001,... +% 'keys',{'ENS\w*T\d*',':rRNA',':Mt_rRNA',':tRNA',':Mt_tRNA'}); if params(1).verbose disp('generating abundant rna database files for Blast'); @@ -35,16 +23,17 @@ end for n = 1:length(ncrnaHeader) - if ~(isempty(pos{2,1}{n,1}) && isempty(pos{3,1}{n,1})... - && isempty(pos{4,1}{n,1}) && isempty(pos{5,1}{n,1})) - transcriptID{end+1,1} = ncrnaHeader{n,1}(1:pos{1,1}{n,1}); + if ~(isempty(pos{1,1}{n,1}) && isempty(pos{2,1}{n,1})... + && isempty(pos{3,1}{n,1}) && isempty(pos{4,1}{n,1})) + temp=regexp(ncrnaHeader{n,1}, ':'); + transcriptID{end+1,1} = ncrnaHeader{n,1}(1:temp(1)-1); end end transcriptID = unique(transcriptID); Header = vertcat(cdnaHeader,ncrnaHeader); Sequence = vertcat(cdnaSequence,ncrnaSequence); -[Header, Sequence] = pickExpressedSeq(transcriptID, Header, Sequence, params); +[Header, Sequence] = pickExpressedSeq(transcriptID, Header, Sequence); abundantrna = [params(1).species '.abundantrna.fas']; if exist(abundantrna, 'file') @@ -56,8 +45,8 @@ % MatLab's use of blastlocal requires short entry name simpleHeader = Header; for n = 1:length(Header) - pos = regexp(Header{n,1}, params(1).keys, 'end'); - simpleHeader{n,1} = Header{n,1}(1:pos); + pos = regexp(Header{n,1}, ':'); + simpleHeader{n,1} = Header{n,1}(1:pos(1)-1); end if exist(abundantrnaDb, 'file') delete([abundantrnaDb '*']); diff --git a/cdnaParse.m b/cdnaParse.m index 7f009b2..f16be78 100644 --- a/cdnaParse.m +++ b/cdnaParse.m @@ -1,20 +1,8 @@ function [Header,Sequence]=cdnaParse(cdna,seqData,params) -% cdna = 'C:\OligoArray\Mouse38.cdna.fa'; - -% switch length(varargin) -% case 0 -% seqData = []; -% params = struct('species','Mouse','verbose',1,... -% 'keys',{'ENS\w*T\d*','ENS\w*G\d*'}); -% case 1 -% seqData = varargin{1}; -% params = struct('species','Mouse','verbose',1,... -% 'keys',{'ENS\w*T\d*','ENS\w*G\d*'}); -% otherwise -% seqData = varargin{1}; -% params = varargin{2}; -% end +% params = struct('species','Mouse','verbose',1,... +% 'dir1','C:\FISHerMan\Db\Mouse38.cdna.fa',... +% 'keys',{'cdna','gene:\S*'}); if params(1).verbose disp('reading the cdna data file'); @@ -34,15 +22,22 @@ for n = 1:length(Header) temp = Header{n,1}; - temp1 = temp(pos1{1,1}{n,1}:pos2{1,1}{n,1}); - temp2 = temp(pos1{2,1}{n,1}:pos2{2,1}{n,1}); + temp1 = temp(1:pos1{1,1}{n,1}-2); + temp2 = temp(pos1{2,1}{n,1}+5:pos2{2,1}{n,1}); if isempty(temp1) disp('missing transcript ID'); + elseif strfind(temp1,'.') + temp1pos=strfind(temp1,'.'); + temp1=temp1(1:temp1pos(1)-1); end if isempty(temp2) disp('missing gene ID'); + elseif strfind(temp2,'.') + temp2pos=strfind(temp2,'.'); + temp2=temp2(1:temp2pos(1)-1); end + Header{n,1} = strcat(temp1, ':', temp2); end @@ -50,7 +45,7 @@ if params(1).verbose disp(' picking expressed sequences according to RNA-seq data'); end - [Header, Sequence] = pickExpressedSeq(seqData, Header, Sequence, params); + [Header, Sequence] = pickExpressedSeq(seqData, Header, Sequence); end if params(1).verbose @@ -71,8 +66,8 @@ % MatLab's use of blastlocal requires short entry names simpleHeader = Header; for n = 1:length(Header) - pos = regexp(Header{n,1}, params(1).keys, 'end'); - simpleHeader{n,1} = Header{n,1}(1:pos); + pos = regexp(Header{n,1}, ':'); + simpleHeader{n,1} = Header{n,1}(1:pos(1)-1); end if exist(cdnaDb, 'file') delete([cdnaDb '*']); diff --git a/main.m b/main.m index 446efd5..e491353 100644 --- a/main.m +++ b/main.m @@ -63,7 +63,7 @@ [probeHeader,probeSequence,probeSequence3Seg,probeSequenceCore]... =blast1stPCR(adapterList,probeHeader,probeSequence,probeSequence3Seg,probeSequenceCore,params.onePCR); -%% Save the probes of each transcripts into individual files +%% Remove non-specific probes that will affect other synthesis steps [probeHeader,probeSequence,probeSequence3Seg,probeSequenceCore]... =blastOtherSteps(adapterList,probeHeader,probeSequence,probeSequence3Seg,probeSequenceCore,params.otherSteps); diff --git a/ncrnaParse.m b/ncrnaParse.m index 5b8aa32..810fd20 100644 --- a/ncrnaParse.m +++ b/ncrnaParse.m @@ -1,20 +1,9 @@ function [Header,Sequence]=ncrnaParse(ncrna,seqData,trna,params) -% ncrna = 'C:\OligoArray\Mouse38.ncrna.fa'; - -% switch length(varargin) -% case 0 -% seqData = []; -% params = struct('species','Mouse','verbose',1,... -% 'keys',{'ENS\w*T\d*','ENS\w*G\d*','gene_biotype:\S*'}); -% case 1 -% seqData = varargin{1}; -% params = struct('species','Mouse','verbose',1,... -% 'keys',{'ENS\w*T\d*','ENS\w*G\d*','gene_biotype:\S*'}); -% otherwise -% seqData = varargin{1}; -% params = varargin{2}; -% end +% params = struct('species','Mouse','verbose',1,... +% 'dir1','C:\FISHerMan\Db\Mouse38.ncrna.fa',... +% 'tRNA',1,'dirT','C:\FISHerMan\Db\Mouse.trna.fas',... +% 'keys',{'ncrna','gene:\S*','gene_biotype:\S*'}); if params(1).verbose disp('reading the ncrna data file'); @@ -34,18 +23,27 @@ for n = 1:length(Header) temp = Header{n,1}; - temp1 = temp(pos1{1,1}{n,1}:pos2{1,1}{n,1}); - temp2 = temp(pos1{2,1}{n,1}:pos2{2,1}{n,1}); + temp1 = temp(1:pos1{1,1}{n,1}-2); + temp2 = temp(pos1{2,1}{n,1}+5:pos2{2,1}{n,1}); temp3 = temp(pos1{3,1}{n,1}+13:pos2{3,1}{n,1}); if isempty(temp1) disp('missing transcript ID'); + elseif strfind(temp1,'.') + temp1pos=strfind(temp1,'.'); + temp1=temp1(1:temp1pos(1)-1); end if isempty(temp2) disp('missing gene ID'); + elseif strfind(temp2,'.') + temp2pos=strfind(temp2,'.'); + temp2=temp2(1:temp2pos(1)-1); end if isempty(temp3) disp('missing gene type'); + elseif strfind(temp3,'.') + temp3pos=strfind(temp3,'.'); + temp3=temp3(1:temp3pos(1)-1); end Header{n,1} = strcat(temp1, ':', temp2, ':', temp3); @@ -55,7 +53,7 @@ if params(1).verbose disp(' picking expressed sequences according to RNA-seq data'); end - [Header, Sequence] = pickExpressedSeq(seqData, Header, Sequence, params); + [Header, Sequence] = pickExpressedSeq(seqData, Header, Sequence); end if ~isempty(trna) @@ -89,8 +87,8 @@ % MatLab's use of blastlocal requires short entry names simpleHeader = Header; for n = 1:length(Header) - pos = regexp(Header{n,1}, params(1).keys, 'end'); - simpleHeader{n,1} = Header{n,1}(1:pos); + pos = regexp(Header{n,1}, ':'); + simpleHeader{n,1} = Header{n,1}(1:pos(1)-1); end if exist(ncrnaDb, 'file') delete([ncrnaDb '*']); diff --git a/oligosParse.m b/oligosParse.m index 81a8867..1d68927 100644 --- a/oligosParse.m +++ b/oligosParse.m @@ -1,26 +1,19 @@ function oligos = oligosParse(params) -% oligos = 'C:\OligoArray\oligos.txt'; - -% if length(varargin) >= 1 -% params = varargin{1}; -% else -% params = struct('species','Mouse','verbose',1,... -% 'keys',{'ENS\w*T\d*','ENS\w*G\d*'},'number',48,... -% 'thres',30,'querySize',30,'DbSize',2*10^5,'seqNum',1000,... -% 'blastArgs','-S 2','parallel', 0,... -% 'specialTranscripts','C:\FISHerMan\Db\Mouse.STList.fas'); -% end +% params = struct('species','Mouse','verbose',1,... +% 'number',48,'seqNum',1000,'thres',30,'querySize',30,... +% 'DbSize',2*10^5,'blastArgs','-S 2','parallel', 0,... +% 'specialTranscripts','C:\FISHerMan\Db\Mouse.STList.fas'); + +if params(1).verbose + disp('reading the result file from OligoArray'); +end oligos = [params(1).species '.tempoligos.txt']; if ~exist(oligos, 'file') warning('missing important files from OligoArray'); end -if params(1).verbose - disp('reading the result file from OligoArray'); -end - fid = fopen(oligos,'r'); fmt = '%s %f %f %f %f %f %f %s %s %s %*[^\n]'; temp = textscan(fid,fmt,'CollectOutput',true,'delimiter','\t','TreatAsEmpty','NA'); @@ -39,14 +32,16 @@ if params(1).verbose && mod(n, 1000) == 1 disp([' analyzing oligo entry no. ' num2str(n)]); end - [pos1, pos2] = regexp(nonspecificHits{n,1}, params(2).keys, 'start', 'end'); + + pos = regexp(geneNames{n,1}, ':'); + geneName=geneNames{n,1}(pos(1)+1:end); flag = 0; - for m = 1:length(pos1) - if ~strfind(geneNames{n,1}, nonspecificHits{n,1}(pos1(m):pos2(m))) - flag = 1; - end + if length(regexp(nonspecificHits{n,1}, geneName)) < ... + length(regexp(nonspecificHits{n,1}, ':')) + flag = 1; end + if flag == 1 index = [index n]; end @@ -66,33 +61,6 @@ specificHits{n,1} = seqrcomplement(specificHits{n,1}); end -%% Remove transcripts without enough oligos -% if params.verbose -% disp('removing transcripts without enough oligos'); -% end - -% pos = regexp(geneNames, params(1).keys, 'end'); -% trimNames = {}; -% for n = 1:length(geneNames) -% trimNames{end+1} = geneNames{n,1}(1:pos{n,1}); -% end -% trimNames = trimNames'; -% uniqueNames = unique(trimNames, 'stable'); -% -% indexTotal = zeros(length(trimNames),1); -% for n = 1:length(uniqueNames) -% index = ismember(trimNames, uniqueNames{n,1}); -% if sum(index) < params.number -% indexTotal = indexTotal+index; -% disp(['transcript ' uniqueNames{n,1} ' has less than ' num2str(params.number) ' probes']); -% end -% end -% -% indexTotal = logical(indexTotal); -% geneNames(indexTotal) = []; -% nonspecificHits(indexTotal) = []; -% specificHits(indexTotal) = []; - %% Blast oligos against abundant rna database and remove non-specific oligos [geneNames,specificHits,nonspecificHits]... =blastAbundantRNASimple(geneNames,specificHits,nonspecificHits,params); @@ -103,10 +71,10 @@ disp('removing transcripts without enough oligos'); end -pos = regexp(geneNames, params(1).keys, 'end'); +pos = regexp(geneNames, ':'); trimNames = {}; for n = 1:length(geneNames) - trimNames{end+1} = geneNames{n,1}(1:pos{n,1}); + trimNames{end+1} = geneNames{n,1}(1:pos{n,1}(1)-1); end trimNames = trimNames'; uniqueNames = unique(trimNames, 'stable'); diff --git a/readParameters.m b/readParameters.m index 109450f..6897974 100644 --- a/readParameters.m +++ b/readParameters.m @@ -29,6 +29,11 @@ %% Parse general parameters verbose=xmlParse(general, 'general', 'verbose'); +gkey1=xmlParse(general, 'general', 'key1'); +gkey2=xmlParse(general, 'general', 'key2'); +gkey3=xmlParse(general, 'general', 'key3'); +gkey4=xmlParse(general, 'general', 'key4'); +gkey5=xmlParse(general, 'general', 'key5'); %% Parse parameters for rnaSeq data=xmlParse(rnaSeq, 'rnaSeq', 'data'); @@ -58,13 +63,11 @@ %% Parse parameters for cdna dir1=xmlParse(cdna, 'cdna', 'dir1'); -key1=xmlParse(cdna, 'cdna', 'key1'); -key2=xmlParse(cdna, 'cdna', 'key2'); params.cdna = struct('species',species,... 'verbose',str2double(verbose.getFirstChild.getData),... 'dir1',char(dir1.getFirstChild.getData),... - 'keys',{char(key1.getFirstChild.getData),char(key2.getFirstChild.getData)}); + 'keys',{char(gkey1.getFirstChild.getData),char(gkey3.getFirstChild.getData)}); %% Parse parameters for ncrna dir1=xmlParse(ncrna, 'ncrna', 'dir1'); @@ -74,17 +77,14 @@ else dirT=tRNA; end -key1=xmlParse(ncrna, 'ncrna', 'key1'); -key2=xmlParse(ncrna, 'ncrna', 'key2'); -key3=xmlParse(ncrna, 'ncrna', 'key3'); params.ncrna = struct('species',species,... 'verbose',str2double(verbose.getFirstChild.getData),... 'dir1',char(dir1.getFirstChild.getData),... 'tRNA',str2double(tRNA.getFirstChild.getData),... 'dirT',char(dirT.getFirstChild.getData),... - 'keys',{char(key1.getFirstChild.getData),char(key2.getFirstChild.getData),... - char(key3.getFirstChild.getData)}); + 'keys',{char(gkey2.getFirstChild.getData),char(gkey3.getFirstChild.getData),... + char(gkey4.getFirstChild.getData)}); %% Parse parameters for abundantrna percent=xmlParse(abundantrna, 'abundantrna', 'percent'); @@ -92,14 +92,12 @@ key2=xmlParse(abundantrna, 'abundantrna', 'key2'); key3=xmlParse(abundantrna, 'abundantrna', 'key3'); key4=xmlParse(abundantrna, 'abundantrna', 'key4'); -key5=xmlParse(abundantrna, 'abundantrna', 'key5'); params.abundantrna = struct('species',species,... 'verbose',str2double(verbose.getFirstChild.getData),... 'percent',str2double(percent.getFirstChild.getData),... 'keys',{char(key1.getFirstChild.getData),char(key2.getFirstChild.getData),... - char(key3.getFirstChild.getData),char(key4.getFirstChild.getData),... - char(key5.getFirstChild.getData)}); + char(key3.getFirstChild.getData),char(key4.getFirstChild.getData)}); %% Parse parameters for transcriptList dir1=xmlParse(transcriptList, 'transcriptList', 'dir1'); @@ -116,8 +114,6 @@ %% Parse parameters for oligos -key1=xmlParse(oligos, 'oligos', 'key1'); -key2=xmlParse(oligos, 'oligos', 'key2'); num=xmlParse(oligos, 'oligos', 'number'); seqNum=xmlParse(oligos, 'oligos', 'seqNum'); thres=xmlParse(oligos, 'oligos', 'thres'); @@ -125,11 +121,10 @@ DbSize=xmlParse(oligos, 'oligos', 'DbSize'); blastArgs=xmlParse(oligos, 'oligos', 'blastArgs'); parallel=xmlParse(oligos, 'oligos', 'parallel'); -dir1=xmlParse(oligos, 'oligos', 'dir1'); +dirST=xmlParse(oligos, 'oligos', 'dirST'); params.oligos = struct('species',species,... 'verbose',str2double(verbose.getFirstChild.getData),... - 'keys',{char(key1.getFirstChild.getData),char(key2.getFirstChild.getData)},... 'number',str2double(num.getFirstChild.getData),... 'seqNum',str2double(seqNum.getFirstChild.getData),... 'thres',str2double(thres.getFirstChild.getData),... @@ -137,7 +132,7 @@ 'DbSize',str2double(DbSize.getFirstChild.getData),... 'blastArgs',char(blastArgs.getFirstChild.getData),... 'parallel',str2double(parallel.getFirstChild.getData),... - 'specialTranscripts',char(dir1.getFirstChild.getData)); + 'specialTranscripts',char(dirST.getFirstChild.getData)); %% Parse parameters for adapters dir1=xmlParse(adapters, 'adapters', 'dir1'); diff --git a/runOligoArray.m b/runOligoArray.m index 5b47f22..8794291 100644 --- a/runOligoArray.m +++ b/runOligoArray.m @@ -65,5 +65,9 @@ function runOligoArray(varargin) disp('done generating oligos'); end -copyfile([params.oligoArrayPath 'oligos.txt'], [params.species '.tempoligos.txt']); +tempOligos=[params.species '.tempoligos.txt']; +if exist(tempOligos,'file') + delete(tempOligos); +end +copyfile([params.oligoArrayPath 'oligos.txt'], tempOligos); diff --git a/transcriptListParse.m b/transcriptListParse.m index e1a59ae..7b72dd1 100644 --- a/transcriptListParse.m +++ b/transcriptListParse.m @@ -2,11 +2,9 @@ ncrnaHeader,ncrnaSequence,params) % transcriptList = 'C:\FISHerMan\transcriptList.fas'; -% if length(varargin) >= 1 -% params = varargin{1}; -% else -% params = struct('species','Mouse','verbose',1,'length',40,'number',24); -% end +% params = struct('species','Mouse','verbose',1,... +% 'dir1','C:\FISHerMan\Db\Mouse.transcriptList.fas',... +% 'length',40,'number',24); %% Pick transcript sequences that are longer than a certain threshold if params(1).verbose @@ -22,8 +20,8 @@ longSequence = {}; for n = 1:length(cdnaSequence) temp = cdnaHeader{n,1}; - pos = regexp(temp, 'ENS\w*T\d*', 'end'); - temp = temp(1:pos); + pos = regexp(temp, ':'); + temp = temp(1:pos(1)-1); if length(cdnaSequence{n,1}) >= (params(1).length*params(1).number) &&... sum(ismember(targetHeader, temp)) longHeader{end+1} = cdnaHeader{n,1}; @@ -32,11 +30,11 @@ end for n = 1:length(ncrnaSequence) temp = ncrnaHeader{n,1}; - pos = regexp(temp, 'ENS\w*T\d*', 'end'); - temp = temp(1:pos); + pos = regexp(temp, ':'); + temp = temp(1:pos(1)-1); if length(ncrnaSequence{n,1}) >= (params(1).length*params(1).number) &&... sum(ismember(targetHeader, temp)) - longHeader{end+1} = ncrnaHeader{n,1}; + longHeader{end+1} = ncrnaHeader{n,1}(1:pos(2)-1); longSequence{end+1} = ncrnaSequence{n,1}; end end diff --git a/utilities/blastAbundantRNASimple.m b/utilities/blastAbundantRNASimple.m index 61f922a..2649b34 100644 --- a/utilities/blastAbundantRNASimple.m +++ b/utilities/blastAbundantRNASimple.m @@ -9,7 +9,7 @@ % params = varargin{1}; % else % params = struct('species','Mouse','verbose',1,... -% 'thres',30,'querySize',30,'DbSize',2*10^5,'seqNum',1000,... +% 'seqNum',1000,'thres',30,'querySize',30,'DbSize',2*10^5,... % 'blastArgs','-S 2','parallel', 0); % end @@ -26,7 +26,7 @@ disp(' spliting fasta files for parallel computing'); end -filePathList = blastFileSplit(Header, Sequence, params(1).seqNum, params); +filePathList = blastFileSplit(Header, Sequence, params); fileNum = length(filePathList); %% Blast mouse oligos against abundant rna @@ -43,13 +43,13 @@ parfor k = 1:fileNum if verbose disp([' blasting temporary file no. ' num2str(k)]); - startTime(k) = tic; +% startTime(k) = tic; end blastData{k,1} = blastOp(filePathList{k}, DbPath, blastArgs); - if verbose - totalTime(k) = toc(startTime(k)); - disp([' elapsed time is ' num2str(totalTime(k)) ' seconds']); - end +% if verbose +% totalTime(k) = toc(startTime(k)); +% disp([' elapsed time is ' num2str(totalTime(k)) ' seconds']); +% end end delete(poolobj); else @@ -76,7 +76,7 @@ for n = 1:length(data) flag = 0; for m = 1:length(data(n).Hits) - if ~strfind(data(n).Query,data(n).Hits(m).Name) + if isempty(strfind(data(n).Query,data(n).Hits(m).Name)) flag = 1; end end diff --git a/utilities/blastFileSplit.m b/utilities/blastFileSplit.m index 97a43f2..1c5b78d 100644 --- a/utilities/blastFileSplit.m +++ b/utilities/blastFileSplit.m @@ -1,23 +1,14 @@ -function filePathList = blastFileSplit(Header, Sequence, seqNum, params) - -% switch length(varargin) -% case 0 -% seqNum = 48; -% params = struct('species','Mouse','verbose',1,'keys','ENS\w*T\d*'); -% case 1 -% seqNum = varargin{1}; -% params = struct('species','Mouse','verbose',1,'keys','ENS\w*T\d*'); -% otherwise -% seqNum = varargin{1}; -% params = varargin{2}; -% end +function filePathList = blastFileSplit(Header, Sequence, params) +% params = struct('species','Mouse','verbose',1,'seqNum',1000); + +seqNum = params(1).seqNum; filePathList = {}; % MatLab's use of blastlocal requires short entry names for n = 1:length(Header) - pos = regexp(Header{n,1}, params(1).keys, 'end'); - Header{n,1} = Header{n,1}(1:pos); + pos = regexp(Header{n,1}, ':'); + Header{n,1} = Header{n,1}(1:pos(1)-1); Header{n,1} = strcat(Header{n,1}, '=', num2str(n)); end diff --git a/utilities/checkSpecialTranscripts.m b/utilities/checkSpecialTranscripts.m index a0d7136..c535079 100644 --- a/utilities/checkSpecialTranscripts.m +++ b/utilities/checkSpecialTranscripts.m @@ -1,11 +1,7 @@ function notSpecialTranscript = checkSpecialTranscripts(transcriptName,params) -% if length(varargin) >= 1 -% params = varargin{1}; -% else -% params = struct('species','Mouse','verbose',1,... -% 'specialTranscripts','C:\FISHerMan\Db\Mouse.STList.fas'); -% end +% params = struct('species','Mouse','verbose',1,... +% 'specialTranscripts','C:\FISHerMan\Db\Mouse.STList.fas'); [specialTranscripts,~]=fastaread(params(1).specialTranscripts); specialTranscripts=specialTranscripts'; diff --git a/utilities/pickExpressedSeq.m b/utilities/pickExpressedSeq.m index bed6edf..cb4b517 100644 --- a/utilities/pickExpressedSeq.m +++ b/utilities/pickExpressedSeq.m @@ -1,17 +1,11 @@ -function [expressedHeader, expressedSequence] = pickExpressedSeq(seqData, Header, Sequence, params) - -% if length(varargin) >= 1 -% params = varargin{1}; -% else -% params = struct('species','Mouse','verbose',1,'keys','ENS\w*T\d*'); -% end +function [expressedHeader, expressedSequence] = pickExpressedSeq(seqData, Header, Sequence) transcriptID = seqData(:,1); -pos = regexp(Header, params(1).keys, 'end'); +pos = regexp(Header, ':'); temp = Header; for n = 1:length(Header) - temp{n,1} = Header{n,1}(1:pos{n,1}); + temp{n,1} = Header{n,1}(1:pos{n,1}(1)-1); end [~,index,~] = intersect(temp, transcriptID, 'stable'); diff --git a/utilities/rearrangeOligos.m b/utilities/rearrangeOligos.m index 20511f1..af50f77 100644 --- a/utilities/rearrangeOligos.m +++ b/utilities/rearrangeOligos.m @@ -3,11 +3,7 @@ function [rearrangedHeader,rearrangedSequence,rearrangednonSequence]=... rearrangeOligos(Header,Sequence,nonSequence,params) -% if length(varargin) >= 1 -% params = varargin{1}; -% else -% params = struct('species','Mouse','verbose',1); -% end +% params = struct('species','Mouse','verbose',1); uniqueHeader = unique(Header, 'stable');