Skip to content

Commit 1bfe9a6

Browse files
Rong Li LabRong Li Lab
Rong Li Lab
authored and
Rong Li Lab
committed
flexible parameters:before 1stPCR
1 parent 4f1d3b0 commit 1bfe9a6

10 files changed

+143
-85
lines changed

Db/Mouse.parameters.xml

+28-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
</general>
66

77
<!-- rnaSeq parameters -->
8-
<!-- averageRNAseq.m and readRNASeq.m -->
8+
<!-- averageRNASeq.m and readRNASeq.m -->
99
<rnaSeq>
1010
<data>2</data>
1111
<dir1>C:\FISHerMan\Db\mouse_frontal_cortex_mRNASeq_ENCFF653BKJ.xlsx</dir1>
@@ -90,4 +90,31 @@
9090
<dir1>C:\FISHerMan\Db\Mouse.STList.fas</dir1>
9191
</oligos>
9292

93+
<!-- adapters parameters -->
94+
<!-- appendAdapters.m -->
95+
<adapters>
96+
<dir1>C:\FISHerMan\Db\Mouse.alladapters.fas</dir1>
97+
<key1>ENS\w*T\d*</key1>
98+
<gf>GGAATCGTTGCGGGTGTCCT</gf>
99+
<grr>CCGCAACATCCAGCATCGTG</grr>
100+
<T7r>CCCTATAGTGAGTCGTATTA</T7r>
101+
<rRr>AGAGTGAGTAGTAGTGGAGT</rRr>
102+
<rGr>GATGATGTAGTAGTAAGGGT</rGr>
103+
<rBr>TGTGATGGAAGTTAGAGGGT</rBr>
104+
<rIRr>GGAGTAGTTGGTTGTTAGGA</rIRr>
105+
</adapters>
106+
107+
<!-- 1stPCR parameters -->
108+
<!-- blast1stPCR.m -->
109+
<onePCR>
110+
<key1>ENS\w*T\d*</key1>
111+
<seqNum>1000</seqNum>
112+
<thres>22</thres>
113+
<querySize>20</querySize>
114+
<blastArgs>-S 3</blastArgs>
115+
<parallel>0</parallel>
116+
<gf>GGAATCGTTGCGGGTGTCCT</gf>
117+
<grr>CCGCAACATCCAGCATCGTG</grr>
118+
</onePCR>
119+
93120
</parameters>

appendAdapters.m

+17-17
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,25 @@
11
function [adapterList,probeHeader,probeSequence,probeSequence3Seg,probeSequenceCore]...
2-
=appendAdapters(adapterList,oligos,varargin)
2+
=appendAdapters(adapterList,oligos,params)
33

4-
if length(varargin) >= 1
5-
params = varargin{1};
6-
else
7-
params = struct('species','Mouse','verbose',1,'keys','ENS\w*T\d*',...
8-
'gf','GGAATCGTTGCGGGTGTCCT','grr','CCGCAACATCCAGCATCGTG',...
9-
'T7r','CCCTATAGTGAGTCGTATTA',...
10-
'rRr','AGAGTGAGTAGTAGTGGAGT','rGr','GATGATGTAGTAGTAAGGGT',...
11-
'rBr','TGTGATGGAAGTTAGAGGGT','rIRr','GGAGTAGTTGGTTGTTAGGA');
12-
end
4+
% if length(varargin) >= 1
5+
% params = varargin{1};
6+
% else
7+
% params = struct('species','Mouse','verbose',1,'keys','ENS\w*T\d*',...
8+
% 'gf','GGAATCGTTGCGGGTGTCCT','grr','CCGCAACATCCAGCATCGTG',...
9+
% 'T7r','CCCTATAGTGAGTCGTATTA',...
10+
% 'rRr','AGAGTGAGTAGTAGTGGAGT','rGr','GATGATGTAGTAGTAAGGGT',...
11+
% 'rBr','TGTGATGGAAGTTAGAGGGT','rIRr','GGAGTAGTTGGTTGTTAGGA');
12+
% end
1313

14-
if params.verbose
14+
if params(1).verbose
1515
disp('concatenating oligos with adapters');
1616
end
1717

1818
[Header, Sequence] = fastaread(oligos);
1919
Header = Header';
2020
Sequence = Sequence';
2121

22-
pos = regexp(Header, params.keys, 'end');
22+
pos = regexp(Header, params(1).keys, 'end');
2323
trimmedHeader = Header;
2424
for n = 1:length(Header)
2525
trimmedHeader{n,1} = Header{n,1}(1:pos{n,1});
@@ -31,7 +31,7 @@
3131
adapterHeader = uniqueHeader;
3232
adapterSequence = adapterSequence(1:geneNum)';
3333

34-
adapterList = [params.species '.adapters.fas'];
34+
adapterList = [params(1).species '.adapters.fas'];
3535
if exist(adapterList, 'file')
3636
delete(adapterList);
3737
end
@@ -53,7 +53,7 @@
5353
probeSequence3Seg = {};
5454
probeSequenceCore = {};
5555
for n = 1:length(adapterHeader)
56-
if params.verbose && mod(n, 1000) == 1
56+
if params(1).verbose && mod(n, 1000) == 1
5757
disp([' concatenating oligos for transcript no. ' num2str(n)]);
5858
end
5959
index = ismember(trimmedHeader, adapterHeader{n,1});
@@ -66,9 +66,9 @@
6666
temp3Seg = probe3Seg{m,1};
6767
tempCore = probeCore{m,1};
6868

69-
temp = strcat(params.gf, adapterSequence{n,1}, temp, params.grr);
70-
temp3Seg = strcat(adapterSequence{n,1}, temp3Seg, params.grr, 'CCC');
71-
tempCore = strcat(adapterSequence{n,1}(11:20), tempCore, params.grr(1:10));
69+
temp = strcat(params(1).gf, adapterSequence{n,1}, temp, params(1).grr);
70+
temp3Seg = strcat(adapterSequence{n,1}, temp3Seg, params(1).grr, 'CCC');
71+
tempCore = strcat(adapterSequence{n,1}(11:20), tempCore, params(1).grr(1:10));
7272

7373
probe{m,1} = temp;
7474
probe3Seg{m,1} = temp3Seg;

blast1stPCR.m

+32-33
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,32 @@
11
function [probeHeader,probeSequence,probeSequence3Seg,probeSequenceCore]...
2-
=blast1stPCR(adapterList,probeHeader,probeSequence,probeSequence3Seg,probeSequenceCore,varargin)
3-
4-
if length(varargin) >= 1
5-
params = varargin{1};
6-
else
7-
params = struct('species','Mouse','verbose',1,'keys','ENS\w*T\d*',...
8-
'thres',22,'querySize',20,'seqNum',1000,...
9-
'blastArgs','','parallel', 0,...
10-
'gf','GGAATCGTTGCGGGTGTCCT','grr','CCGCAACATCCAGCATCGTG',...
11-
'T7r','CCCTATAGTGAGTCGTATTA',...
12-
'rRr','AGAGTGAGTAGTAGTGGAGT','rGr','GATGATGTAGTAGTAAGGGT',...
13-
'rBr','TGTGATGGAAGTTAGAGGGT','rIRr','GGAGTAGTTGGTTGTTAGGA');
14-
end
15-
16-
if params.verbose
2+
=blast1stPCR(adapterList,probeHeader,probeSequence,probeSequence3Seg,probeSequenceCore,params)
3+
4+
% if length(varargin) >= 1
5+
% params = varargin{1};
6+
% else
7+
% params = struct('species','Mouse','verbose',1,'keys','ENS\w*T\d*',...
8+
% 'thres',22,'querySize',20,'seqNum',1000,...
9+
% 'blastArgs','-S 3','parallel', 0,...
10+
% 'gf','GGAATCGTTGCGGGTGTCCT','grr','CCGCAACATCCAGCATCGTG',...
11+
% 'T7r','CCCTATAGTGAGTCGTATTA',...
12+
% 'rRr','AGAGTGAGTAGTAGTGGAGT','rGr','GATGATGTAGTAGTAAGGGT',...
13+
% 'rBr','TGTGATGGAAGTTAGAGGGT','rIRr','GGAGTAGTTGGTTGTTAGGA');
14+
% end
15+
16+
if params(1).verbose
1717
disp('removing probes that non-specifically bind to primers in the 1st PCR step');
1818
end
1919

2020
%% Generate probe database files for Blast
21-
if params.verbose
21+
if params(1).verbose
2222
disp(' generating probe database files for Blast');
2323
end
2424

25-
probesDb = [params.species '.probesDb.fas'];
25+
probesDb = [params(1).species '.probesDb.fas'];
2626
% MatLab's use of blastlocal requires short entry names
2727
simpleHeader = probeHeader;
2828
for n = 1:length(probeHeader)
29-
pos = regexp(probeHeader{n,1}, params.keys, 'end');
29+
pos = regexp(probeHeader{n,1}, params(1).keys, 'end');
3030
simpleHeader{n,1} = strcat(probeHeader{n,1}(1:pos),'=',num2str(n));
3131
end
3232
if exist(probesDb, 'file')
@@ -36,34 +36,33 @@
3636
blastformat('Inputdb', probesDb,...
3737
'FormatArgs', '-o T -p F');
3838

39-
params.DbSize = getDbSize(probesDb);
40-
4139
%% Split one giant fasta file into smaller ones, so that parallel computing is possible
42-
if params.verbose
40+
if params(1).verbose
4341
disp(' spliting fasta files for parallel computing');
4442
end
4543

4644
[adapterHeader, adapterSequence] = fastaread(adapterList);
4745
adapterHeader = adapterHeader';
4846
adapterSequence = adapterSequence';
4947
adapterHeader{end+1,1} = 'ENSPRIMERT00';
50-
adapterSequence{end+1,1} = params.gf;
48+
adapterSequence{end+1,1} = params(1).gf;
5149
adapterHeader{end+1,1} = 'ENSPRIMERT01';
52-
adapterSequence{end+1,1} = params.grr;
50+
adapterSequence{end+1,1} = params(1).grr;
5351

54-
filePathList = blastFileSplit(adapterHeader, adapterSequence, params.seqNum);
52+
filePathList = blastFileSplit(adapterHeader, adapterSequence, params(1).seqNum, params);
5553
fileNum = length(filePathList);
5654

5755
%% Blast primers against probes
58-
eValue = bitScore2eValue(params.thres, params.querySize, params.DbSize);
59-
6056
DbPath = probesDb;
61-
blastArgs = [params.blastArgs ' -e ' num2str(eValue) ' -b ' num2str(length(probeHeader))];
57+
params(1).DbSize = getDbSize(DbPath);
58+
59+
eValue = bitScore2eValue(params(1).thres, params(1).querySize, params(1).DbSize);
60+
blastArgs = [params(1).blastArgs ' -e ' num2str(eValue) ' -b ' num2str(length(probeHeader))];
6261

6362
blastData = {};
64-
if params.parallel
63+
if params(1).parallel
6564
poolobj = parpool;
66-
verbose = params.verbose;
65+
verbose = params(1).verbose;
6766
parfor k = 1:fileNum
6867
if verbose
6968
disp([' blasting temporary file no. ' num2str(k)]);
@@ -78,12 +77,12 @@
7877
delete(poolobj);
7978
else
8079
for k = 1:fileNum
81-
if params.verbose
80+
if params(1).verbose
8281
disp([' blasting temporary file no. ' num2str(k)]);
8382
startTime = tic;
8483
end
8584
blastData{k,1} = blastOp(filePathList{k}, DbPath, blastArgs);
86-
if params.verbose
85+
if params(1).verbose
8786
totalTime = toc(startTime);
8887
disp([' elapsed time is ' num2str(totalTime) ' seconds']);
8988
end
@@ -114,8 +113,8 @@
114113
probeSequenceCore(seqDelete)= [];
115114

116115
%% Check how many transcripts are left after this step of screening
117-
[geneNumLeft,geneNumDelete]=checkTranscriptsLeft(adapterList,probeHeader);
118-
if params.verbose
116+
[geneNumLeft,geneNumDelete]=checkTranscriptsLeft(adapterList,probeHeader,params);
117+
if params(1).verbose
119118
disp([num2str(geneNumDelete) ' out of ' num2str(geneNumLeft+geneNumDelete)...
120119
' FISH escaped FISHerMan''s net']);
121120
end

main.m

+3-6
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,11 @@
3535
% RNA seq data, and it will tell me which transcripts in the ncrna
3636
% database are expressed. But be sure to include rRNA and tRNA, for often
3737
% these two types of RNA are depleted in RNA seq.
38+
[cdnaHeader,cdnaSequence]=cdnaParse(params.cdna(1).dir1,seqData,params.cdna);
3839
if params.rnaSeq(1).mRNA
39-
[cdnaHeader,cdnaSequence]=cdnaParse(params.cdna(1).dir1,seqData,params.cdna);
4040
[ncrnaHeader,ncrnaSequence]...
4141
=ncrnaParse(params.ncrna(1).dir1,[],params.ncrna(1).dirT,params.ncrna);
4242
else
43-
[cdnaHeader,cdnaSequence]=cdnaParse(params.cdna(1).dir1,seqData,params.cdna);
4443
[ncrnaHeader,ncrnaSequence]...
4544
=ncrnaParse(params.ncrna(1).dir1,seqData,params.ncrna(1).dirT,params.ncrna);
4645
end
@@ -57,14 +56,12 @@
5756
oligoList=oligosParse(params.oligos);
5857

5958
%% Append pre-designed adapters to the raw list of oligos
60-
adapterList = input('input the directory where the list of adapters can be found: ');
61-
6259
[adapterList,probeHeader,probeSequence,probeSequence3Seg,probeSequenceCore]...
63-
=appendAdapters(adapterList,oligoList);
60+
=appendAdapters(params.adapters(1).dir1,oligoList,params.adapters);
6461

6562
%% Remove probes that non-specifically bind to primers in the 1st PCR step
6663
[probeHeader,probeSequence,probeSequence3Seg,probeSequenceCore]...
67-
=blast1stPCR(adapterList,probeHeader,probeSequence,probeSequence3Seg,probeSequenceCore);
64+
=blast1stPCR(adapterList,probeHeader,probeSequence,probeSequence3Seg,probeSequenceCore,params.onePCR);
6865

6966
%% Save the probes of each transcripts into individual files
7067
[probeHeader,probeSequence,probeSequence3Seg,probeSequenceCore]...

readParameters.m

+46
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
transcriptList=xmlParse(parameters, 'parameters', 'transcriptList');
2222
OligoArray=xmlParse(parameters, 'parameters', 'OligoArray');
2323
oligos=xmlParse(parameters, 'parameters', 'oligos');
24+
adapters=xmlParse(parameters, 'parameters', 'adapters');
25+
onePCR=xmlParse(parameters, 'parameters', 'onePCR');
2426

2527
%% Parse general parameters
2628
verbose=xmlParse(general, 'general', 'verbose');
@@ -134,3 +136,47 @@
134136
'parallel',str2double(parallel.getFirstChild.getData),...
135137
'specialTranscripts',char(dir1.getFirstChild.getData));
136138

139+
%% Parse parameters for adapters
140+
dir1=xmlParse(adapters, 'adapters', 'dir1');
141+
key1=xmlParse(adapters, 'adapters', 'key1');
142+
gf=xmlParse(adapters, 'adapters', 'gf');
143+
grr=xmlParse(adapters, 'adapters', 'grr');
144+
T7r=xmlParse(adapters, 'adapters', 'T7r');
145+
rRr=xmlParse(adapters, 'adapters', 'rRr');
146+
rGr=xmlParse(adapters, 'adapters', 'rGr');
147+
rBr=xmlParse(adapters, 'adapters', 'rBr');
148+
rIRr=xmlParse(adapters, 'adapters', 'rIRr');
149+
150+
params.adapters = struct('species',species,...
151+
'verbose',str2double(verbose.getFirstChild.getData),...
152+
'dir1',char(dir1.getFirstChild.getData),...
153+
'keys',char(key1.getFirstChild.getData),...
154+
'gf',char(gf.getFirstChild.getData),...
155+
'grr',char(grr.getFirstChild.getData),...
156+
'T7r',char(T7r.getFirstChild.getData),...
157+
'rRr',char(rRr.getFirstChild.getData),...
158+
'rGr',char(rGr.getFirstChild.getData),...
159+
'rBr',char(rBr.getFirstChild.getData),...
160+
'rIRr',char(rIRr.getFirstChild.getData));
161+
162+
%% Parse parameters for 1stPCR
163+
key1=xmlParse(onePCR, 'onePCR', 'key1');
164+
seqNum=xmlParse(onePCR, 'onePCR', 'seqNum');
165+
thres=xmlParse(onePCR, 'onePCR', 'thres');
166+
querySize=xmlParse(onePCR, 'onePCR', 'querySize');
167+
blastArgs=xmlParse(onePCR, 'onePCR', 'blastArgs');
168+
parallel=xmlParse(onePCR, 'onePCR', 'parallel');
169+
gf=xmlParse(onePCR, 'onePCR', 'gf');
170+
grr=xmlParse(onePCR, 'onePCR', 'grr');
171+
172+
params.onePCR = struct('species',species,...
173+
'verbose',str2double(verbose.getFirstChild.getData),...
174+
'keys',char(key1.getFirstChild.getData),...
175+
'seqNum',str2double(seqNum.getFirstChild.getData),...
176+
'thres',str2double(thres.getFirstChild.getData),...
177+
'querySize',str2double(querySize.getFirstChild.getData),...
178+
'blastArgs',char(blastArgs.getFirstChild.getData),...
179+
'parallel',str2double(parallel.getFirstChild.getData),...
180+
'gf',char(gf.getFirstChild.getData),...
181+
'grr',char(grr.getFirstChild.getData));
182+

utilities/blastAbundantRNASimple.m

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
disp(' spliting fasta files for parallel computing');
2727
end
2828

29-
filePathList = blastFileSplit(Header, Sequence, params(1).seqNum);
29+
filePathList = blastFileSplit(Header, Sequence, params(1).seqNum, params);
3030
fileNum = length(filePathList);
3131

3232
%% Blast mouse oligos against abundant rna

utilities/blastFileSplit.m

+14-14
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,22 @@
1-
function filePathList = blastFileSplit(Header, Sequence, varargin)
2-
3-
switch length(varargin)
4-
case 0
5-
seqNum = 48;
6-
params = struct('species','Mouse','verbose',1,'keys','ENS\w*T\d*');
7-
case 1
8-
seqNum = varargin{1};
9-
params = struct('species','Mouse','verbose',1,'keys','ENS\w*T\d*');
10-
otherwise
11-
seqNum = varargin{1};
12-
params = varargin{2};
13-
end
1+
function filePathList = blastFileSplit(Header, Sequence, seqNum, params)
2+
3+
% switch length(varargin)
4+
% case 0
5+
% seqNum = 48;
6+
% params = struct('species','Mouse','verbose',1,'keys','ENS\w*T\d*');
7+
% case 1
8+
% seqNum = varargin{1};
9+
% params = struct('species','Mouse','verbose',1,'keys','ENS\w*T\d*');
10+
% otherwise
11+
% seqNum = varargin{1};
12+
% params = varargin{2};
13+
% end
1414

1515
filePathList = {};
1616

1717
% MatLab's use of blastlocal requires short entry names
1818
for n = 1:length(Header)
19-
pos = regexp(Header{n,1}, params.keys, 'end');
19+
pos = regexp(Header{n,1}, params(1).keys, 'end');
2020
Header{n,1} = Header{n,1}(1:pos);
2121
Header{n,1} = strcat(Header{n,1}, '=', num2str(n));
2222
end

utilities/checkTranscriptsLeft.m

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
geneNumLeft = length(uniqueHeader);
2121
geneNumDelete = geneNumTotal-geneNumLeft;
2222

23-
[adapterHeader,adapterSequence] = pickExpressedSeq(uniqueHeader,adapterHeader,adapterSequence);
23+
[adapterHeader,adapterSequence] = pickExpressedSeq(uniqueHeader,adapterHeader,adapterSequence,params);
2424
if exist(adapterList, 'file')
2525
delete(adapterList);
2626
end

utilities/getDbSize.m

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
params = varargin{1};
77
else
88
params = struct('species','Mouse','verbose',1,...
9-
'thres',40,'querySize',20,'DbSize',5*10^7,'blastArgs','-S 1');
9+
'thres',40,'querySize',20,'DbSize',10^8,'blastArgs','-S 1');
1010
end
1111

1212
eValue = bitScore2eValue(params.thres, params.querySize, params.DbSize);

0 commit comments

Comments
 (0)