Skip to content

Commit

Permalink
done flexible parameters
Browse files Browse the repository at this point in the history
  • Loading branch information
Rong Li Lab authored and Rong Li Lab committed May 26, 2017
1 parent 1bfe9a6 commit 9843c76
Show file tree
Hide file tree
Showing 11 changed files with 200 additions and 103 deletions.
36 changes: 36 additions & 0 deletions Db/Mouse.parameters.xml
Original file line number Diff line number Diff line change
Expand Up @@ -117,4 +117,40 @@
<grr>CCGCAACATCCAGCATCGTG</grr>
</onePCR>

<!-- otherSteps parameters -->
<!-- blastOtherSteps.m -->
<otherSteps>
<key1>ENS\w*T\d*</key1>
<thres>22</thres>
<querySize>50</querySize>
<blastArgs1>-S 2</blastArgs1>
<blastArgs2>-S 3</blastArgs2>
<grr>CCGCAACATCCAGCATCGTG</grr>
<T7r>CCCTATAGTGAGTCGTATTA</T7r>
<rRr>AGAGTGAGTAGTAGTGGAGT</rRr>
<rGr>GATGATGTAGTAGTAAGGGT</rGr>
<rBr>TGTGATGGAAGTTAGAGGGT</rBr>
<rIRr>GGAGTAGTTGGTTGTTAGGA</rIRr>
</otherSteps>

<!-- arna parameters -->
<!-- blastAbundantRNA.m -->
<arna>
<key1>ENS\w*T\d*</key1>
<seqNum>1000</seqNum>
<thres>30</thres>
<querySize>73</querySize>
<DbSize>200000</DbSize>
<blastArgs>-S 2</blastArgs>
<parallel>0</parallel>
</arna>

<!-- probeList parameters -->
<!-- generateProbeList.m -->
<probeList>
<key1>ENS\w*T\d*</key1>
<number>48</number>
<dir1>C:\FISHerMan\Db\Mouse.STList.fas</dir1>
</probeList>

</parameters>
41 changes: 21 additions & 20 deletions blastAbundantRNA.m
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
% homology of 15 nt or more.

function [Header,Sequence,nonSequence,nonSequence2]...
=blastAbundantRNA(adapterList,Header,Sequence,nonSequence,nonSequence2,varargin)
=blastAbundantRNA(adapterList,Header,Sequence,nonSequence,nonSequence2,params)

if length(varargin) >= 1
params = varargin{1};
else
params = struct('species','Mouse','verbose',1,...
'thres',30,'querySize',73,'DbSize',2*10^5,'seqNum',1000,...
'blastArgs','-S 2','parallel', 0);
end
% if length(varargin) >= 1
% params = varargin{1};
% else
% params = struct('species','Mouse','verbose',1,...
% 'seqNum',1000,'thres',30,'querySize',73,'DbSize',200000,...
% 'blastArgs','-S 2','parallel', 0);
% end

if isempty(nonSequence)
nonSequence = Sequence;
Expand All @@ -21,28 +21,29 @@
nonSequence2 = Sequence;
end

if params.verbose
if params(1).verbose
disp('removing non-specific probes against the abundant rna database');
end

%% Split one giant fasta file into smaller ones, so that parallel computing is possible
if params.verbose
if params(1).verbose
disp(' spliting fasta files for parallel computing');
end

filePathList = blastFileSplit(Header, Sequence, params.seqNum);
filePathList = blastFileSplit(Header, Sequence, params(1).seqNum, params);
fileNum = length(filePathList);

%% Blast mouse oligos against abundant rna
eValue = bitScore2eValue(params.thres, params.querySize, params.DbSize);
DbPath = [params(1).species '.abundantrnaDb.fas'];
params(1).DbSize = getDbSize(DbPath);

DbPath = [params.species '.abundantrnaDb.fas'];
blastArgs = [params.blastArgs ' -e ' num2str(eValue)];
eValue = bitScore2eValue(params(1).thres, params(1).querySize, params(1).DbSize);
blastArgs = [params(1).blastArgs ' -e ' num2str(eValue)];

blastData = {};
if params.parallel
if params(1).parallel
poolobj = parpool;
verbose = params.verbose;
verbose = params(1).verbose;
parfor k = 1:fileNum
if verbose
disp([' blasting temporary file no. ' num2str(k)]);
Expand All @@ -57,12 +58,12 @@
delete(poolobj);
else
for k = 1:fileNum
if params.verbose
if params(1).verbose
disp([' blasting temporary file no. ' num2str(k)]);
startTime = tic;
end
blastData{k,1} = blastOp(filePathList{k}, DbPath, blastArgs);
if params.verbose
if params(1).verbose
totalTime = toc(startTime);
disp([' elapsed time is ' num2str(totalTime) ' seconds']);
end
Expand Down Expand Up @@ -95,8 +96,8 @@
nonSequence2(seqDelete)= [];

%% Check how many transcripts are left after this step of screening
[geneNumLeft,geneNumDelete]=checkTranscriptsLeft(adapterList,Header);
if params.verbose
[geneNumLeft,geneNumDelete]=checkTranscriptsLeft(adapterList,Header,params);
if params(1).verbose
disp([num2str(geneNumDelete) ' out of ' num2str(geneNumLeft+geneNumDelete)...
' FISH escaped FISHerMan''s net']);
end
Expand Down
37 changes: 19 additions & 18 deletions blastOtherSteps.m
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
function [probeHeader,probeSequence,probeSequence3Seg,probeSequenceCore]...
=blastOtherSteps(adapterList,probeHeader,probeSequence,probeSequence3Seg,probeSequenceCore,varargin)

if length(varargin) >= 1
params = varargin{1};
else
params = struct('species','Mouse','verbose',1,'keys','ENS\w*T\d*',...
'thres',22,'querySize',50,'blastArgs','',...
'grr','CCGCAACATCCAGCATCGTG','T7r','CCCTATAGTGAGTCGTATTA',...
'rRr','AGAGTGAGTAGTAGTGGAGT','rGr','GATGATGTAGTAGTAAGGGT',...
'rBr','TGTGATGGAAGTTAGAGGGT','rIRr','GGAGTAGTTGGTTGTTAGGA');
end

if params.verbose
=blastOtherSteps(adapterList,probeHeader,probeSequence,probeSequence3Seg,probeSequenceCore,params)

% if length(varargin) >= 1
% params = varargin{1};
% else
% params = struct('species','Mouse','verbose',1,'keys','ENS\w*T\d*',...
% 'thres',22,'querySize',50,...
% 'blastArgs1','-S 2','blastArgs2','-S 3',...
% 'grr','CCGCAACATCCAGCATCGTG','T7r','CCCTATAGTGAGTCGTATTA',...
% 'rRr','AGAGTGAGTAGTAGTGGAGT','rGr','GATGATGTAGTAGTAAGGGT',...
% 'rBr','TGTGATGGAAGTTAGAGGGT','rIRr','GGAGTAGTTGGTTGTTAGGA');
% end

if params(1).verbose
disp('removing probes that non-specifically bind to 2nd PCR primers and other probes');
end

Expand All @@ -21,17 +22,17 @@

simpleHeader = probeHeader;
for n = 1:length(probeHeader)
pos = regexp(probeHeader{n,1}, params.keys, 'end');
pos = regexp(probeHeader{n,1}, params(1).keys, 'end');
simpleHeader{n,1} = probeHeader{n,1}(1:pos);
end

seqDelete = [];
for n = 1:length(adapterHeader)
if params.verbose% && mod(n, 1000) == 1
if params(1).verbose% && mod(n, 1000) == 1
disp([' working on trancript ' adapterHeader{n,1}]);
end
temp...
=blastOneTranscript(adapterHeader{n,1},adapterSequence{n,1},simpleHeader,probeSequenceCore);
=blastOneTranscript(adapterHeader{n,1},adapterSequence{n,1},simpleHeader,probeSequenceCore,params);
seqDelete = [seqDelete temp];
end

Expand All @@ -41,8 +42,8 @@
probeSequenceCore(seqDelete)= [];

%% Check how many transcripts are left after this step of screening
[geneNumLeft,geneNumDelete]=checkTranscriptsLeft(adapterList,probeHeader);
if params.verbose
[geneNumLeft,geneNumDelete]=checkTranscriptsLeft(adapterList,probeHeader,params);
if params(1).verbose
disp([num2str(geneNumDelete) ' out of ' num2str(geneNumLeft+geneNumDelete)...
' FISH escaped FISHerMan''s net']);
end
Expand Down
43 changes: 22 additions & 21 deletions generateProbeList.m
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
function probeList=generateProbeList(adapterList,probeHeader,probeSequence,varargin)
function probeList=generateProbeList(adapterList,probeHeader,probeSequence,params)

if length(varargin) >= 1
params = varargin{1};
else
params = struct('species','Mouse','verbose',1,'number',48,...
'keys','ENS\w*T\d*');
end
% if length(varargin) >= 1
% params = varargin{1};
% else
% params = struct('species','Mouse','verbose',1,'keys','ENS\w*T\d*',...
% 'number',48,'specialTranscripts','C:\FISHerMan\Db\Mouse.STList.fas');
% end

%% Remove transcripts without enough probes
if params.verbose
if params(1).verbose
disp('generating the list of probes to order');
disp(' removing transcripts without enough probes');
end

pos = regexp(probeHeader, params.keys, 'end');
pos = regexp(probeHeader, params(1).keys, 'end');
trimHeader = {};
for n = 1:length(probeHeader)
trimHeader{end+1} = probeHeader{n,1}(1:pos{n,1});
Expand All @@ -24,11 +24,12 @@
indexTotal = zeros(length(trimHeader),1);
for n = 1:length(uniqueHeader)
index = ismember(trimHeader, uniqueHeader{n,1});
if sum(index) < params.number && checkSpecialTranscripts(uniqueHeader{n,1}) % for Bin's special sequences
if sum(index) < params(1).number &&...
checkSpecialTranscripts(uniqueHeader{n,1},params) % for Bin's special sequences
indexTotal = indexTotal+index;
if params.verbose
if params(1).verbose
disp([' transcript ' uniqueHeader{n,1} ...
' has less than ' num2str(params.number) ' probes']);
' has less than ' num2str(params(1).number) ' probes']);
end
end
end
Expand All @@ -43,29 +44,29 @@
indexTotal = [];
for n = 1:length(uniqueHeader)
index = ismember(trimHeader, uniqueHeader{n,1});
if sum(index) > params.number
if sum(index) > params(1).number
index=find(index>0);
index=index(params.number+1:end);
index=index(params(1).number+1:end);
indexTotal=[indexTotal;index];
end
end

probeHeader(indexTotal) = [];
probeSequence(indexTotal) = [];

disp(' randomizing and saving the list of probes');
indexTotal = randperm(length(probeHeader))';
probeHeader = probeHeader(indexTotal);
probeSequence = probeSequence(indexTotal);
probeList = [params.species '.probes.fas'];
% disp(' randomizing and saving the list of probes');
% indexTotal = randperm(length(probeHeader))';
% probeHeader = probeHeader(indexTotal);
% probeSequence = probeSequence(indexTotal);
probeList = [params(1).species '.probes.nr.fas'];
if exist(probeList, 'file')
delete(probeList);
end
fastawrite(probeList,probeHeader,probeSequence);

%% Check how many transcripts are left after this step of screening
[geneNumLeft,geneNumDelete]=checkTranscriptsLeft(adapterList,probeHeader);
if params.verbose
[geneNumLeft,geneNumDelete]=checkTranscriptsLeft(adapterList,probeHeader,params);
if params(1).verbose
disp([num2str(geneNumDelete) ' out of ' num2str(geneNumLeft+geneNumDelete)...
' FISH escaped FISHerMan''s net']);
end
6 changes: 3 additions & 3 deletions main.m
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,14 @@

%% Save the probes of each transcripts into individual files
[probeHeader,probeSequence,probeSequence3Seg,probeSequenceCore]...
=blastOtherSteps(adapterList,probeHeader,probeSequence,probeSequence3Seg,probeSequenceCore);
=blastOtherSteps(adapterList,probeHeader,probeSequence,probeSequence3Seg,probeSequenceCore,params.otherSteps);

%% Remove non-specific probes against the abundant rna database
[probeHeader,probeSequence3Seg,probeSequence,probeSequenceCore]...
=blastAbundantRNA(adapterList,probeHeader,probeSequence3Seg,probeSequence,probeSequenceCore);
=blastAbundantRNA(adapterList,probeHeader,probeSequence3Seg,probeSequence,probeSequenceCore,params.arna);

%% Generate the list of probes
probeList=generateProbeList(adapterList,probeHeader,probeSequence);
probeList=generateProbeList(adapterList,probeHeader,probeSequence,params.probeList);

disp('done designing FISH probes');
disp('FISHerMan is at rest');
Expand Down
2 changes: 1 addition & 1 deletion oligosParse.m
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@
indexTotal = zeros(length(trimNames),1);
for n = 1:length(uniqueNames)
index = ismember(trimNames, uniqueNames{n,1});
if sum(index) < params(1).number &&...
if sum(index) < params(1).number &&...
checkSpecialTranscripts(uniqueNames{n,1},params) % for Bin's special sequences
indexTotal = indexTotal+index;
if params(1).verbose
Expand Down
60 changes: 60 additions & 0 deletions readParameters.m
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
oligos=xmlParse(parameters, 'parameters', 'oligos');
adapters=xmlParse(parameters, 'parameters', 'adapters');
onePCR=xmlParse(parameters, 'parameters', 'onePCR');
otherSteps=xmlParse(parameters, 'parameters', 'otherSteps');
arna=xmlParse(parameters, 'parameters', 'arna');
probeList=xmlParse(parameters, 'parameters', 'probeList');

%% Parse general parameters
verbose=xmlParse(general, 'general', 'verbose');
Expand Down Expand Up @@ -180,3 +183,60 @@
'gf',char(gf.getFirstChild.getData),...
'grr',char(grr.getFirstChild.getData));

%% Parse parameters for otherSteps
key1=xmlParse(otherSteps, 'otherSteps', 'key1');
thres=xmlParse(otherSteps, 'otherSteps', 'thres');
querySize=xmlParse(otherSteps, 'otherSteps', 'querySize');
blastArgs1=xmlParse(otherSteps, 'otherSteps', 'blastArgs1');
blastArgs2=xmlParse(otherSteps, 'otherSteps', 'blastArgs2');
grr=xmlParse(otherSteps, 'otherSteps', 'grr');
T7r=xmlParse(otherSteps, 'otherSteps', 'T7r');
rRr=xmlParse(otherSteps, 'otherSteps', 'rRr');
rGr=xmlParse(otherSteps, 'otherSteps', 'rGr');
rBr=xmlParse(otherSteps, 'otherSteps', 'rBr');
rIRr=xmlParse(otherSteps, 'otherSteps', 'rIRr');

params.otherSteps = struct('species',species,...
'verbose',str2double(verbose.getFirstChild.getData),...
'keys',char(key1.getFirstChild.getData),...
'thres',str2double(thres.getFirstChild.getData),...
'querySize',str2double(querySize.getFirstChild.getData),...
'blastArgs1',char(blastArgs1.getFirstChild.getData),...
'blastArgs2',char(blastArgs2.getFirstChild.getData),...
'grr',char(grr.getFirstChild.getData),...
'T7r',char(T7r.getFirstChild.getData),...
'rRr',char(rRr.getFirstChild.getData),...
'rGr',char(rGr.getFirstChild.getData),...
'rBr',char(rBr.getFirstChild.getData),...
'rIRr',char(rIRr.getFirstChild.getData));

%% Parse parameters for arna
key1=xmlParse(arna, 'arna', 'key1');
seqNum=xmlParse(arna, 'arna', 'seqNum');
thres=xmlParse(arna, 'arna', 'thres');
querySize=xmlParse(arna, 'arna', 'querySize');
DbSize=xmlParse(arna, 'arna', 'DbSize');
blastArgs=xmlParse(arna, 'arna', 'blastArgs');
parallel=xmlParse(arna, 'arna', 'parallel');

params.arna = struct('species',species,...
'verbose',str2double(verbose.getFirstChild.getData),...
'keys',char(key1.getFirstChild.getData),...
'seqNum',str2double(seqNum.getFirstChild.getData),...
'thres',str2double(thres.getFirstChild.getData),...
'querySize',str2double(querySize.getFirstChild.getData),...
'DbSize',str2double(DbSize.getFirstChild.getData),...
'blastArgs',char(blastArgs.getFirstChild.getData),...
'parallel',str2double(parallel.getFirstChild.getData));

%% Parse parameters for probeList
key1=xmlParse(probeList, 'probeList', 'key1');
num=xmlParse(probeList, 'probeList', 'number');
dir1=xmlParse(probeList, 'probeList', 'dir1');

params.probeList = struct('species',species,...
'verbose',str2double(verbose.getFirstChild.getData),...
'keys',char(key1.getFirstChild.getData),...
'number',str2double(num.getFirstChild.getData),...
'specialTranscripts',char(dir1.getFirstChild.getData));

Loading

0 comments on commit 9843c76

Please sign in to comment.