diff --git a/.gitignore b/.gitignore
index 8067929..ec3c961 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,5 @@
*.nsq
*.log
+# MatLab temp files
+*.asv
\ No newline at end of file
diff --git a/Db/Mouse.parameters.xml b/Db/Mouse.parameters.xml
index 20965c4..b422820 100644
--- a/Db/Mouse.parameters.xml
+++ b/Db/Mouse.parameters.xml
@@ -2,6 +2,10 @@
1
+ cdna
+ ncrna
+ gene:\S*
+ gene_biotype:\S*
@@ -20,8 +24,6 @@
C:\FISHerMan\Db\Mouse38.cdna.fa
- ENS\w*T\d*
- ENS\w*G\d*
@@ -30,20 +32,16 @@
C:\FISHerMan\Db\Mouse38.ncrna.fa
1
C:\FISHerMan\Db\Mouse.trna.fas
- ENS\w*T\d*
- ENS\w*G\d*
- gene_biotype:\S*
0.001
- ENS\w*T\d*
- :rRNA
- :Mt_rRNA
- :tRNA
- :Mt_tRNA
+ :rRNA
+ :Mt_rRNA
+ :tRNA
+ :Mt_tRNA
@@ -78,8 +76,6 @@
- ENS\w*T\d*
- ENS\w*G\d*
48
1000
30
@@ -87,7 +83,7 @@
200000
-S 2
0
- C:\FISHerMan\Db\Mouse.STList.fas
+ C:\FISHerMan\Db\Mouse.STList.fas
diff --git a/Db/Mouse.transcriptList.txt b/Db/Mouse.transcriptList.txt
index dcbf6d9..c75541a 100644
--- a/Db/Mouse.transcriptList.txt
+++ b/Db/Mouse.transcriptList.txt
@@ -4,6 +4,6 @@
>ENSMUST00000015612
->ENSMUST00000183557
+>ENSMUST00000180842
>ENSMUST00000084289
diff --git a/abundantrnaParse.m b/abundantrnaParse.m
index 0ba3e2c..c554e54 100644
--- a/abundantrnaParse.m
+++ b/abundantrnaParse.m
@@ -1,21 +1,9 @@
function [Header,Sequence]...
=abundantrnaParse(cdnaHeader,cdnaSequence,ncrnaHeader,ncrnaSequence,seqData,params)
-% switch length(varargin)
-% case 0
-% seqData = [];
-% params = struct('species','Mouse','verbose',1,...
-% 'percent',0.001,...
-% 'keys',{'ENS\w*T\d*',':rRNA',':Mt_rRNA',':tRNA',':Mt_tRNA'});
-% case 1
-% seqData = varargin{1};
-% params = struct('species','Mouse','verbose',1,...
-% 'percent',0.001,...
-% 'keys',{'ENS\w*T\d*',':rRNA',':Mt_rRNA',':tRNA',':Mt_tRNA'});
-% otherwise
-% seqData = varargin{1};
-% params = varargin{2};
-% end
+% params = struct('species','Mouse','verbose',1,...
+% 'percent',0.001,...
+% 'keys',{'ENS\w*T\d*',':rRNA',':Mt_rRNA',':tRNA',':Mt_tRNA'});
if params(1).verbose
disp('generating abundant rna database files for Blast');
@@ -35,16 +23,17 @@
end
for n = 1:length(ncrnaHeader)
- if ~(isempty(pos{2,1}{n,1}) && isempty(pos{3,1}{n,1})...
- && isempty(pos{4,1}{n,1}) && isempty(pos{5,1}{n,1}))
- transcriptID{end+1,1} = ncrnaHeader{n,1}(1:pos{1,1}{n,1});
+ if ~(isempty(pos{1,1}{n,1}) && isempty(pos{2,1}{n,1})...
+ && isempty(pos{3,1}{n,1}) && isempty(pos{4,1}{n,1}))
+ temp=regexp(ncrnaHeader{n,1}, ':');
+ transcriptID{end+1,1} = ncrnaHeader{n,1}(1:temp(1)-1);
end
end
transcriptID = unique(transcriptID);
Header = vertcat(cdnaHeader,ncrnaHeader);
Sequence = vertcat(cdnaSequence,ncrnaSequence);
-[Header, Sequence] = pickExpressedSeq(transcriptID, Header, Sequence, params);
+[Header, Sequence] = pickExpressedSeq(transcriptID, Header, Sequence);
abundantrna = [params(1).species '.abundantrna.fas'];
if exist(abundantrna, 'file')
@@ -56,8 +45,8 @@
% MatLab's use of blastlocal requires short entry name
simpleHeader = Header;
for n = 1:length(Header)
- pos = regexp(Header{n,1}, params(1).keys, 'end');
- simpleHeader{n,1} = Header{n,1}(1:pos);
+ pos = regexp(Header{n,1}, ':');
+ simpleHeader{n,1} = Header{n,1}(1:pos(1)-1);
end
if exist(abundantrnaDb, 'file')
delete([abundantrnaDb '*']);
diff --git a/cdnaParse.m b/cdnaParse.m
index 7f009b2..f16be78 100644
--- a/cdnaParse.m
+++ b/cdnaParse.m
@@ -1,20 +1,8 @@
function [Header,Sequence]=cdnaParse(cdna,seqData,params)
-% cdna = 'C:\OligoArray\Mouse38.cdna.fa';
-
-% switch length(varargin)
-% case 0
-% seqData = [];
-% params = struct('species','Mouse','verbose',1,...
-% 'keys',{'ENS\w*T\d*','ENS\w*G\d*'});
-% case 1
-% seqData = varargin{1};
-% params = struct('species','Mouse','verbose',1,...
-% 'keys',{'ENS\w*T\d*','ENS\w*G\d*'});
-% otherwise
-% seqData = varargin{1};
-% params = varargin{2};
-% end
+% params = struct('species','Mouse','verbose',1,...
+% 'dir1','C:\FISHerMan\Db\Mouse38.cdna.fa',...
+% 'keys',{'cdna','gene:\S*'});
if params(1).verbose
disp('reading the cdna data file');
@@ -34,15 +22,22 @@
for n = 1:length(Header)
temp = Header{n,1};
- temp1 = temp(pos1{1,1}{n,1}:pos2{1,1}{n,1});
- temp2 = temp(pos1{2,1}{n,1}:pos2{2,1}{n,1});
+ temp1 = temp(1:pos1{1,1}{n,1}-2);
+ temp2 = temp(pos1{2,1}{n,1}+5:pos2{2,1}{n,1});
if isempty(temp1)
disp('missing transcript ID');
+ elseif strfind(temp1,'.')
+ temp1pos=strfind(temp1,'.');
+ temp1=temp1(1:temp1pos(1)-1);
end
if isempty(temp2)
disp('missing gene ID');
+ elseif strfind(temp2,'.')
+ temp2pos=strfind(temp2,'.');
+ temp2=temp2(1:temp2pos(1)-1);
end
+
Header{n,1} = strcat(temp1, ':', temp2);
end
@@ -50,7 +45,7 @@
if params(1).verbose
disp(' picking expressed sequences according to RNA-seq data');
end
- [Header, Sequence] = pickExpressedSeq(seqData, Header, Sequence, params);
+ [Header, Sequence] = pickExpressedSeq(seqData, Header, Sequence);
end
if params(1).verbose
@@ -71,8 +66,8 @@
% MatLab's use of blastlocal requires short entry names
simpleHeader = Header;
for n = 1:length(Header)
- pos = regexp(Header{n,1}, params(1).keys, 'end');
- simpleHeader{n,1} = Header{n,1}(1:pos);
+ pos = regexp(Header{n,1}, ':');
+ simpleHeader{n,1} = Header{n,1}(1:pos(1)-1);
end
if exist(cdnaDb, 'file')
delete([cdnaDb '*']);
diff --git a/main.m b/main.m
index 446efd5..e491353 100644
--- a/main.m
+++ b/main.m
@@ -63,7 +63,7 @@
[probeHeader,probeSequence,probeSequence3Seg,probeSequenceCore]...
=blast1stPCR(adapterList,probeHeader,probeSequence,probeSequence3Seg,probeSequenceCore,params.onePCR);
-%% Save the probes of each transcripts into individual files
+%% Remove non-specific probes that will affect other synthesis steps
[probeHeader,probeSequence,probeSequence3Seg,probeSequenceCore]...
=blastOtherSteps(adapterList,probeHeader,probeSequence,probeSequence3Seg,probeSequenceCore,params.otherSteps);
diff --git a/ncrnaParse.m b/ncrnaParse.m
index 5b8aa32..810fd20 100644
--- a/ncrnaParse.m
+++ b/ncrnaParse.m
@@ -1,20 +1,9 @@
function [Header,Sequence]=ncrnaParse(ncrna,seqData,trna,params)
-% ncrna = 'C:\OligoArray\Mouse38.ncrna.fa';
-
-% switch length(varargin)
-% case 0
-% seqData = [];
-% params = struct('species','Mouse','verbose',1,...
-% 'keys',{'ENS\w*T\d*','ENS\w*G\d*','gene_biotype:\S*'});
-% case 1
-% seqData = varargin{1};
-% params = struct('species','Mouse','verbose',1,...
-% 'keys',{'ENS\w*T\d*','ENS\w*G\d*','gene_biotype:\S*'});
-% otherwise
-% seqData = varargin{1};
-% params = varargin{2};
-% end
+% params = struct('species','Mouse','verbose',1,...
+% 'dir1','C:\FISHerMan\Db\Mouse38.ncrna.fa',...
+% 'tRNA',1,'dirT','C:\FISHerMan\Db\Mouse.trna.fas',...
+% 'keys',{'ncrna','gene:\S*','gene_biotype:\S*'});
if params(1).verbose
disp('reading the ncrna data file');
@@ -34,18 +23,27 @@
for n = 1:length(Header)
temp = Header{n,1};
- temp1 = temp(pos1{1,1}{n,1}:pos2{1,1}{n,1});
- temp2 = temp(pos1{2,1}{n,1}:pos2{2,1}{n,1});
+ temp1 = temp(1:pos1{1,1}{n,1}-2);
+ temp2 = temp(pos1{2,1}{n,1}+5:pos2{2,1}{n,1});
temp3 = temp(pos1{3,1}{n,1}+13:pos2{3,1}{n,1});
if isempty(temp1)
disp('missing transcript ID');
+ elseif strfind(temp1,'.')
+ temp1pos=strfind(temp1,'.');
+ temp1=temp1(1:temp1pos(1)-1);
end
if isempty(temp2)
disp('missing gene ID');
+ elseif strfind(temp2,'.')
+ temp2pos=strfind(temp2,'.');
+ temp2=temp2(1:temp2pos(1)-1);
end
if isempty(temp3)
disp('missing gene type');
+ elseif strfind(temp3,'.')
+ temp3pos=strfind(temp3,'.');
+ temp3=temp3(1:temp3pos(1)-1);
end
Header{n,1} = strcat(temp1, ':', temp2, ':', temp3);
@@ -55,7 +53,7 @@
if params(1).verbose
disp(' picking expressed sequences according to RNA-seq data');
end
- [Header, Sequence] = pickExpressedSeq(seqData, Header, Sequence, params);
+ [Header, Sequence] = pickExpressedSeq(seqData, Header, Sequence);
end
if ~isempty(trna)
@@ -89,8 +87,8 @@
% MatLab's use of blastlocal requires short entry names
simpleHeader = Header;
for n = 1:length(Header)
- pos = regexp(Header{n,1}, params(1).keys, 'end');
- simpleHeader{n,1} = Header{n,1}(1:pos);
+ pos = regexp(Header{n,1}, ':');
+ simpleHeader{n,1} = Header{n,1}(1:pos(1)-1);
end
if exist(ncrnaDb, 'file')
delete([ncrnaDb '*']);
diff --git a/oligosParse.m b/oligosParse.m
index 81a8867..1d68927 100644
--- a/oligosParse.m
+++ b/oligosParse.m
@@ -1,26 +1,19 @@
function oligos = oligosParse(params)
-% oligos = 'C:\OligoArray\oligos.txt';
-
-% if length(varargin) >= 1
-% params = varargin{1};
-% else
-% params = struct('species','Mouse','verbose',1,...
-% 'keys',{'ENS\w*T\d*','ENS\w*G\d*'},'number',48,...
-% 'thres',30,'querySize',30,'DbSize',2*10^5,'seqNum',1000,...
-% 'blastArgs','-S 2','parallel', 0,...
-% 'specialTranscripts','C:\FISHerMan\Db\Mouse.STList.fas');
-% end
+% params = struct('species','Mouse','verbose',1,...
+% 'number',48,'seqNum',1000,'thres',30,'querySize',30,...
+% 'DbSize',2*10^5,'blastArgs','-S 2','parallel', 0,...
+% 'specialTranscripts','C:\FISHerMan\Db\Mouse.STList.fas');
+
+if params(1).verbose
+ disp('reading the result file from OligoArray');
+end
oligos = [params(1).species '.tempoligos.txt'];
if ~exist(oligos, 'file')
warning('missing important files from OligoArray');
end
-if params(1).verbose
- disp('reading the result file from OligoArray');
-end
-
fid = fopen(oligos,'r');
fmt = '%s %f %f %f %f %f %f %s %s %s %*[^\n]';
temp = textscan(fid,fmt,'CollectOutput',true,'delimiter','\t','TreatAsEmpty','NA');
@@ -39,14 +32,16 @@
if params(1).verbose && mod(n, 1000) == 1
disp([' analyzing oligo entry no. ' num2str(n)]);
end
- [pos1, pos2] = regexp(nonspecificHits{n,1}, params(2).keys, 'start', 'end');
+
+ pos = regexp(geneNames{n,1}, ':');
+ geneName=geneNames{n,1}(pos(1)+1:end);
flag = 0;
- for m = 1:length(pos1)
- if ~strfind(geneNames{n,1}, nonspecificHits{n,1}(pos1(m):pos2(m)))
- flag = 1;
- end
+ if length(regexp(nonspecificHits{n,1}, geneName)) < ...
+ length(regexp(nonspecificHits{n,1}, ':'))
+ flag = 1;
end
+
if flag == 1
index = [index n];
end
@@ -66,33 +61,6 @@
specificHits{n,1} = seqrcomplement(specificHits{n,1});
end
-%% Remove transcripts without enough oligos
-% if params.verbose
-% disp('removing transcripts without enough oligos');
-% end
-
-% pos = regexp(geneNames, params(1).keys, 'end');
-% trimNames = {};
-% for n = 1:length(geneNames)
-% trimNames{end+1} = geneNames{n,1}(1:pos{n,1});
-% end
-% trimNames = trimNames';
-% uniqueNames = unique(trimNames, 'stable');
-%
-% indexTotal = zeros(length(trimNames),1);
-% for n = 1:length(uniqueNames)
-% index = ismember(trimNames, uniqueNames{n,1});
-% if sum(index) < params.number
-% indexTotal = indexTotal+index;
-% disp(['transcript ' uniqueNames{n,1} ' has less than ' num2str(params.number) ' probes']);
-% end
-% end
-%
-% indexTotal = logical(indexTotal);
-% geneNames(indexTotal) = [];
-% nonspecificHits(indexTotal) = [];
-% specificHits(indexTotal) = [];
-
%% Blast oligos against abundant rna database and remove non-specific oligos
[geneNames,specificHits,nonspecificHits]...
=blastAbundantRNASimple(geneNames,specificHits,nonspecificHits,params);
@@ -103,10 +71,10 @@
disp('removing transcripts without enough oligos');
end
-pos = regexp(geneNames, params(1).keys, 'end');
+pos = regexp(geneNames, ':');
trimNames = {};
for n = 1:length(geneNames)
- trimNames{end+1} = geneNames{n,1}(1:pos{n,1});
+ trimNames{end+1} = geneNames{n,1}(1:pos{n,1}(1)-1);
end
trimNames = trimNames';
uniqueNames = unique(trimNames, 'stable');
diff --git a/readParameters.m b/readParameters.m
index 109450f..6897974 100644
--- a/readParameters.m
+++ b/readParameters.m
@@ -29,6 +29,11 @@
%% Parse general parameters
verbose=xmlParse(general, 'general', 'verbose');
+gkey1=xmlParse(general, 'general', 'key1');
+gkey2=xmlParse(general, 'general', 'key2');
+gkey3=xmlParse(general, 'general', 'key3');
+gkey4=xmlParse(general, 'general', 'key4');
+gkey5=xmlParse(general, 'general', 'key5');
%% Parse parameters for rnaSeq
data=xmlParse(rnaSeq, 'rnaSeq', 'data');
@@ -58,13 +63,11 @@
%% Parse parameters for cdna
dir1=xmlParse(cdna, 'cdna', 'dir1');
-key1=xmlParse(cdna, 'cdna', 'key1');
-key2=xmlParse(cdna, 'cdna', 'key2');
params.cdna = struct('species',species,...
'verbose',str2double(verbose.getFirstChild.getData),...
'dir1',char(dir1.getFirstChild.getData),...
- 'keys',{char(key1.getFirstChild.getData),char(key2.getFirstChild.getData)});
+ 'keys',{char(gkey1.getFirstChild.getData),char(gkey3.getFirstChild.getData)});
%% Parse parameters for ncrna
dir1=xmlParse(ncrna, 'ncrna', 'dir1');
@@ -74,17 +77,14 @@
else
dirT=tRNA;
end
-key1=xmlParse(ncrna, 'ncrna', 'key1');
-key2=xmlParse(ncrna, 'ncrna', 'key2');
-key3=xmlParse(ncrna, 'ncrna', 'key3');
params.ncrna = struct('species',species,...
'verbose',str2double(verbose.getFirstChild.getData),...
'dir1',char(dir1.getFirstChild.getData),...
'tRNA',str2double(tRNA.getFirstChild.getData),...
'dirT',char(dirT.getFirstChild.getData),...
- 'keys',{char(key1.getFirstChild.getData),char(key2.getFirstChild.getData),...
- char(key3.getFirstChild.getData)});
+ 'keys',{char(gkey2.getFirstChild.getData),char(gkey3.getFirstChild.getData),...
+ char(gkey4.getFirstChild.getData)});
%% Parse parameters for abundantrna
percent=xmlParse(abundantrna, 'abundantrna', 'percent');
@@ -92,14 +92,12 @@
key2=xmlParse(abundantrna, 'abundantrna', 'key2');
key3=xmlParse(abundantrna, 'abundantrna', 'key3');
key4=xmlParse(abundantrna, 'abundantrna', 'key4');
-key5=xmlParse(abundantrna, 'abundantrna', 'key5');
params.abundantrna = struct('species',species,...
'verbose',str2double(verbose.getFirstChild.getData),...
'percent',str2double(percent.getFirstChild.getData),...
'keys',{char(key1.getFirstChild.getData),char(key2.getFirstChild.getData),...
- char(key3.getFirstChild.getData),char(key4.getFirstChild.getData),...
- char(key5.getFirstChild.getData)});
+ char(key3.getFirstChild.getData),char(key4.getFirstChild.getData)});
%% Parse parameters for transcriptList
dir1=xmlParse(transcriptList, 'transcriptList', 'dir1');
@@ -116,8 +114,6 @@
%% Parse parameters for oligos
-key1=xmlParse(oligos, 'oligos', 'key1');
-key2=xmlParse(oligos, 'oligos', 'key2');
num=xmlParse(oligos, 'oligos', 'number');
seqNum=xmlParse(oligos, 'oligos', 'seqNum');
thres=xmlParse(oligos, 'oligos', 'thres');
@@ -125,11 +121,10 @@
DbSize=xmlParse(oligos, 'oligos', 'DbSize');
blastArgs=xmlParse(oligos, 'oligos', 'blastArgs');
parallel=xmlParse(oligos, 'oligos', 'parallel');
-dir1=xmlParse(oligos, 'oligos', 'dir1');
+dirST=xmlParse(oligos, 'oligos', 'dirST');
params.oligos = struct('species',species,...
'verbose',str2double(verbose.getFirstChild.getData),...
- 'keys',{char(key1.getFirstChild.getData),char(key2.getFirstChild.getData)},...
'number',str2double(num.getFirstChild.getData),...
'seqNum',str2double(seqNum.getFirstChild.getData),...
'thres',str2double(thres.getFirstChild.getData),...
@@ -137,7 +132,7 @@
'DbSize',str2double(DbSize.getFirstChild.getData),...
'blastArgs',char(blastArgs.getFirstChild.getData),...
'parallel',str2double(parallel.getFirstChild.getData),...
- 'specialTranscripts',char(dir1.getFirstChild.getData));
+ 'specialTranscripts',char(dirST.getFirstChild.getData));
%% Parse parameters for adapters
dir1=xmlParse(adapters, 'adapters', 'dir1');
diff --git a/runOligoArray.m b/runOligoArray.m
index 5b47f22..8794291 100644
--- a/runOligoArray.m
+++ b/runOligoArray.m
@@ -65,5 +65,9 @@ function runOligoArray(varargin)
disp('done generating oligos');
end
-copyfile([params.oligoArrayPath 'oligos.txt'], [params.species '.tempoligos.txt']);
+tempOligos=[params.species '.tempoligos.txt'];
+if exist(tempOligos,'file')
+ delete(tempOligos);
+end
+copyfile([params.oligoArrayPath 'oligos.txt'], tempOligos);
diff --git a/transcriptListParse.m b/transcriptListParse.m
index e1a59ae..7b72dd1 100644
--- a/transcriptListParse.m
+++ b/transcriptListParse.m
@@ -2,11 +2,9 @@
ncrnaHeader,ncrnaSequence,params)
% transcriptList = 'C:\FISHerMan\transcriptList.fas';
-% if length(varargin) >= 1
-% params = varargin{1};
-% else
-% params = struct('species','Mouse','verbose',1,'length',40,'number',24);
-% end
+% params = struct('species','Mouse','verbose',1,...
+% 'dir1','C:\FISHerMan\Db\Mouse.transcriptList.fas',...
+% 'length',40,'number',24);
%% Pick transcript sequences that are longer than a certain threshold
if params(1).verbose
@@ -22,8 +20,8 @@
longSequence = {};
for n = 1:length(cdnaSequence)
temp = cdnaHeader{n,1};
- pos = regexp(temp, 'ENS\w*T\d*', 'end');
- temp = temp(1:pos);
+ pos = regexp(temp, ':');
+ temp = temp(1:pos(1)-1);
if length(cdnaSequence{n,1}) >= (params(1).length*params(1).number) &&...
sum(ismember(targetHeader, temp))
longHeader{end+1} = cdnaHeader{n,1};
@@ -32,11 +30,11 @@
end
for n = 1:length(ncrnaSequence)
temp = ncrnaHeader{n,1};
- pos = regexp(temp, 'ENS\w*T\d*', 'end');
- temp = temp(1:pos);
+ pos = regexp(temp, ':');
+ temp = temp(1:pos(1)-1);
if length(ncrnaSequence{n,1}) >= (params(1).length*params(1).number) &&...
sum(ismember(targetHeader, temp))
- longHeader{end+1} = ncrnaHeader{n,1};
+ longHeader{end+1} = ncrnaHeader{n,1}(1:pos(2)-1);
longSequence{end+1} = ncrnaSequence{n,1};
end
end
diff --git a/utilities/blastAbundantRNASimple.m b/utilities/blastAbundantRNASimple.m
index 61f922a..2649b34 100644
--- a/utilities/blastAbundantRNASimple.m
+++ b/utilities/blastAbundantRNASimple.m
@@ -9,7 +9,7 @@
% params = varargin{1};
% else
% params = struct('species','Mouse','verbose',1,...
-% 'thres',30,'querySize',30,'DbSize',2*10^5,'seqNum',1000,...
+% 'seqNum',1000,'thres',30,'querySize',30,'DbSize',2*10^5,...
% 'blastArgs','-S 2','parallel', 0);
% end
@@ -26,7 +26,7 @@
disp(' spliting fasta files for parallel computing');
end
-filePathList = blastFileSplit(Header, Sequence, params(1).seqNum, params);
+filePathList = blastFileSplit(Header, Sequence, params);
fileNum = length(filePathList);
%% Blast mouse oligos against abundant rna
@@ -43,13 +43,13 @@
parfor k = 1:fileNum
if verbose
disp([' blasting temporary file no. ' num2str(k)]);
- startTime(k) = tic;
+% startTime(k) = tic;
end
blastData{k,1} = blastOp(filePathList{k}, DbPath, blastArgs);
- if verbose
- totalTime(k) = toc(startTime(k));
- disp([' elapsed time is ' num2str(totalTime(k)) ' seconds']);
- end
+% if verbose
+% totalTime(k) = toc(startTime(k));
+% disp([' elapsed time is ' num2str(totalTime(k)) ' seconds']);
+% end
end
delete(poolobj);
else
@@ -76,7 +76,7 @@
for n = 1:length(data)
flag = 0;
for m = 1:length(data(n).Hits)
- if ~strfind(data(n).Query,data(n).Hits(m).Name)
+ if isempty(strfind(data(n).Query,data(n).Hits(m).Name))
flag = 1;
end
end
diff --git a/utilities/blastFileSplit.m b/utilities/blastFileSplit.m
index 97a43f2..1c5b78d 100644
--- a/utilities/blastFileSplit.m
+++ b/utilities/blastFileSplit.m
@@ -1,23 +1,14 @@
-function filePathList = blastFileSplit(Header, Sequence, seqNum, params)
-
-% switch length(varargin)
-% case 0
-% seqNum = 48;
-% params = struct('species','Mouse','verbose',1,'keys','ENS\w*T\d*');
-% case 1
-% seqNum = varargin{1};
-% params = struct('species','Mouse','verbose',1,'keys','ENS\w*T\d*');
-% otherwise
-% seqNum = varargin{1};
-% params = varargin{2};
-% end
+function filePathList = blastFileSplit(Header, Sequence, params)
+% params = struct('species','Mouse','verbose',1,'seqNum',1000);
+
+seqNum = params(1).seqNum;
filePathList = {};
% MatLab's use of blastlocal requires short entry names
for n = 1:length(Header)
- pos = regexp(Header{n,1}, params(1).keys, 'end');
- Header{n,1} = Header{n,1}(1:pos);
+ pos = regexp(Header{n,1}, ':');
+ Header{n,1} = Header{n,1}(1:pos(1)-1);
Header{n,1} = strcat(Header{n,1}, '=', num2str(n));
end
diff --git a/utilities/checkSpecialTranscripts.m b/utilities/checkSpecialTranscripts.m
index a0d7136..c535079 100644
--- a/utilities/checkSpecialTranscripts.m
+++ b/utilities/checkSpecialTranscripts.m
@@ -1,11 +1,7 @@
function notSpecialTranscript = checkSpecialTranscripts(transcriptName,params)
-% if length(varargin) >= 1
-% params = varargin{1};
-% else
-% params = struct('species','Mouse','verbose',1,...
-% 'specialTranscripts','C:\FISHerMan\Db\Mouse.STList.fas');
-% end
+% params = struct('species','Mouse','verbose',1,...
+% 'specialTranscripts','C:\FISHerMan\Db\Mouse.STList.fas');
[specialTranscripts,~]=fastaread(params(1).specialTranscripts);
specialTranscripts=specialTranscripts';
diff --git a/utilities/pickExpressedSeq.m b/utilities/pickExpressedSeq.m
index bed6edf..cb4b517 100644
--- a/utilities/pickExpressedSeq.m
+++ b/utilities/pickExpressedSeq.m
@@ -1,17 +1,11 @@
-function [expressedHeader, expressedSequence] = pickExpressedSeq(seqData, Header, Sequence, params)
-
-% if length(varargin) >= 1
-% params = varargin{1};
-% else
-% params = struct('species','Mouse','verbose',1,'keys','ENS\w*T\d*');
-% end
+function [expressedHeader, expressedSequence] = pickExpressedSeq(seqData, Header, Sequence)
transcriptID = seqData(:,1);
-pos = regexp(Header, params(1).keys, 'end');
+pos = regexp(Header, ':');
temp = Header;
for n = 1:length(Header)
- temp{n,1} = Header{n,1}(1:pos{n,1});
+ temp{n,1} = Header{n,1}(1:pos{n,1}(1)-1);
end
[~,index,~] = intersect(temp, transcriptID, 'stable');
diff --git a/utilities/rearrangeOligos.m b/utilities/rearrangeOligos.m
index 20511f1..af50f77 100644
--- a/utilities/rearrangeOligos.m
+++ b/utilities/rearrangeOligos.m
@@ -3,11 +3,7 @@
function [rearrangedHeader,rearrangedSequence,rearrangednonSequence]=...
rearrangeOligos(Header,Sequence,nonSequence,params)
-% if length(varargin) >= 1
-% params = varargin{1};
-% else
-% params = struct('species','Mouse','verbose',1);
-% end
+% params = struct('species','Mouse','verbose',1);
uniqueHeader = unique(Header, 'stable');