From bf5000599446048c2f89f8ce05cfaceedf77473c Mon Sep 17 00:00:00 2001 From: Rong Li Lab Date: Wed, 31 May 2017 21:55:56 -0400 Subject: [PATCH] checked FISHerMan --- Db/Mouse.parameters.xml | 10 - Mouse/Mouse.adapters.txt | 12 - Mouse/Mouse.probes.nr.txt | 560 ----------------------------- appendAdapters.m | 29 +- averageRNASeq.m | 14 +- blast1stPCR.m | 28 +- blastAbundantRNA.m | 21 +- blastOtherSteps.m | 22 +- generateProbeList.m | 14 +- readParameters.m | 22 +- readRNASeq.m | 12 +- utilities/blastAbundantRNASimple.m | 5 - utilities/blastFileSplit.m | 6 +- utilities/blastOneTranscript.m | 19 +- utilities/checkTranscriptsLeft.m | 14 +- utilities/findSeqDelete.m | 6 - utilities/pickExpressedSeq.m | 6 +- 17 files changed, 69 insertions(+), 731 deletions(-) delete mode 100644 Mouse/Mouse.adapters.txt delete mode 100644 Mouse/Mouse.probes.nr.txt diff --git a/Db/Mouse.parameters.xml b/Db/Mouse.parameters.xml index b422820..0e028fb 100644 --- a/Db/Mouse.parameters.xml +++ b/Db/Mouse.parameters.xml @@ -90,20 +90,13 @@ C:\FISHerMan\Db\Mouse.alladapters.fas - ENS\w*T\d* GGAATCGTTGCGGGTGTCCT CCGCAACATCCAGCATCGTG - CCCTATAGTGAGTCGTATTA - AGAGTGAGTAGTAGTGGAGT - GATGATGTAGTAGTAAGGGT - TGTGATGGAAGTTAGAGGGT - GGAGTAGTTGGTTGTTAGGA - ENS\w*T\d* 1000 22 20 @@ -116,7 +109,6 @@ - ENS\w*T\d* 22 50 -S 2 @@ -132,7 +124,6 @@ - ENS\w*T\d* 1000 30 73 @@ -144,7 +135,6 @@ - ENS\w*T\d* 48 C:\FISHerMan\Db\Mouse.STList.fas diff --git a/Mouse/Mouse.adapters.txt b/Mouse/Mouse.adapters.txt deleted file mode 100644 index 412d3b0..0000000 --- a/Mouse/Mouse.adapters.txt +++ /dev/null @@ -1,12 +0,0 @@ ->ENSMUST00000015612 -CCACCTTAACACGCGATGAT - ->ENSMUST00000084289 -GTCGAATATCCACCACCGTA - ->ENSMUST00000100497 -CTGTTGTAGCGCCTGTCTAG - ->ENSMUST00000118875 -ATCTGTATGCTCAGGCGATT - diff --git a/Mouse/Mouse.probes.nr.txt b/Mouse/Mouse.probes.nr.txt deleted file mode 100644 index daba8a8..0000000 --- a/Mouse/Mouse.probes.nr.txt +++ /dev/null @@ -1,560 +0,0 @@ ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATTGGGTTCATCTCACAGTCCAGCCCCGTGAA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATCAGAGGCAGAGATGAAAGGTGGAGTGTCCC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATCAGGTGCCCCCATTGAGACAGCTTCCAGGA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATAGCTTGCACTGGGGACCTTCCTGCCCCACA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATACTGGTAGGAACCCAAGGTGTTATGGCAGG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATAGGGTCCCGGCTCCAGGAAGCACTCGTTGA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATACAGGTGTGACCCTCGAACCCAGGTGGACA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATCTCCGTTGGCACAGGGGTTGGCTGAGCAGA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATCTGGCATTGCTCACCTGTCCACCCAGGCTC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATGCCTGAGGCCTGAACATAGCAGTGACCCCC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATAAAGAAGGTGGACAGAGCTCTTCCAGATGG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATAGAGGGGCAGGTGCAGGAGAAGTGAGGGGT -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATCCCTGCAGGGGTCAGGAAACTGGCAAGTCT -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATTCCAGGGGCACACTGGCAGATCCCTTGTCC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATCAGGGCTCTGGGGATCCTCCACACAGAAGC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATGGATGACAGGGAAATTGAGTGGCAAGAGCA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATTGGCCCGGATCTCCTCTCCCTCTTCAGGCA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATGCCACCTCCTCTGCCTCTACCGCCCTTCTT -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATCTGCTGCACGCCTCAGTCTGAGCTGTTTCC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATCAGGTAGACCCGTGATGGCAGGGATTGGGC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATGCATAACTTGCCTTGGAATCCAGGAGGGCA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATTAGGAGGAGCCCGTGTCAATACAGAGGCCT -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATACAGGCCAGAGATCTCTATGCCTTGGCTCA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATGGCAGGACAGTGGGAAGTCACAGAGAGCCC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATACAGGCACTGGAACGAGGGCCCACTCTGGA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATGCTTCCGGGCACACAAGTCTATCAGAGTCT -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATCCCCTCGAGGTGTGTCTTGGCAGGTTGCCT -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATGGGGCTGTCTGCACAGCTGGGGTTAGTCTT -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATGGAAGCCAGTGGCACAGAGGCAGAAGAAAG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATGTTCACACAGGTACCCCCATTGAGGCACGA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATACGGCAGTCTGGCAGTGGCGACCTGTGTGA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATAGGTGCAGGAATAGCCCTCAGGACGGATGC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATACCATTGAGACAGGGCCCTGAGTGACAAGC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATTCACTACAGGTCAACCCCATGTAGCCTGCA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATATGGGCACAGGGTGTGGAGATGCAGCCACC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATGGCAATGGCCATGGTGACAGGGGCAGTTGT -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATAACACAGCCAGGACTTCCGTCAGGGCAGAG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATCTGTGTTCCTGACCTTGGCATTGCTGTCCA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATTGTTGGGGGTGCACAAGGGAACCTCACAAA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATAGCTTTATTATCCATGAGTTTGGGGTTCTC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATGTGACTGGAGAACATGGCCTCATCCCTGCA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATTGTGGGGAGGCAGAGAGAGGGCAAGGAGTC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATTGGGCCACTGGTTACTTACTGTTCATCTAC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATGCACATCGTAGGTGCTTAATAAATAGTTGC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATTGGGCAGTAGATAGCAGAGGCTCCTTTTGG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATACAAGAAACATTTTACACCCAATTCGCCCA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATTTCCAGAGTTTAGGGATTCTCGAAATCAAC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000015612:ENSMUSG00000015468 -GGAATCGTTGCGGGTGTCCTCCACCTTAACACGCGATGATTGAAGCTGCCCTATTTTCCAGGAAAATCAC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTATAGCACAAGAATGTCAATTTATTCCCCTCC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTAGACACATAAACACAAAATATCCCCACAGGT -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTATCTTATCTCACATAGGGATTCAAACCCTTC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTAAGTACCTCGTATCGCTGGAGAGGAACATCC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTATACAAGTCTAACCATACAGAAACCATACCG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTAGAACAAACACAAACATTTCAGCCAGTAGAC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTACAGTAGGAACCAATAAGAAAATGGTTGAGC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTAAAAAATGAAAGCAGAGCAGCCACCTTGCCA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTAGAATCTCAGAGCACATCCGTGGAGGGCAAC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTAAGTGCAGTACTGCTCAATGTGCCTTATGCA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTACTGACCGATCACAAAGCATCCTGGTTCCAG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTAACTGAAATCTCATCTAGAAAACGACGTCAG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTAAAGGGCCTTACAAATGCTTATTGCCAAAGT -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTAAAGATCGTTTAGATCTAGCATACCACACAG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTATATGGAAAACATGGGTACTTTTTACCACGG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTAGTCCCTGTGACACACATCACATCTGCTTAT -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTACACAAAAGCCAGTCAAGTAAGGGAACTGCC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTATGCAGCCAAATATAGTGCAATTGCTGGGCA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTACACAGTGCAGGCCCGAAGAGATCAGTAGAA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTAAGCTGATGCTCCTTAAAGGCATTGACAGTG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTAAAGTAAGAGGTCTATGGACCCACCCAATTA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTATGGAAATGATACAGGCCGTTTGGTTTCTCT -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTAGGAAATACTCCTCTTATAAGACTAGATGCG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTACAGCTAGCAAAAGATTCCATAGAAACCCTT -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTATGGGAGTCTGTTAGAGGTTTTGTCTGCTCA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTAAACCTCACACATGAACTCAATGATAGGTTG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTATGGCAGGTCTCACAGACTGATGAAAGCCAA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTACCCTCCCAGAGGGTGGTAATAACCTTCAGG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTAGACCGGCCTACTGGAGTGTACCTCATAGAG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTATGTAATCACATCAAGTGCTTGTACTGAGTC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTATCATTCAGGTGTCCTGCTAAAGCTTCTAAA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTAGGCTCACAACTGACACCCACTGCACAGACA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTAGGAGGGTCACCTCCATATCAATCCTATCCC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTACAGTGGGTGTGCCGTGTACATGTTTCTTTT -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTACCAGGCTGCCGATCTCCAAATATCTGCATC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTAGCACCATAGTGTCTACCACCTCCCTGTTGA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTATTTAGGGATCTGAACCTGAAAATGATTGGC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTATGGTTTCCCAACGGTTCCAAGGCCAGGACG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTACCGGAGCCGCGTAACGCTTGCTCTCTGGAA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTAGGACAGGGACCCGACTCTGTCACGGCTCTT -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTACTTGGGTCCAGAATGTCTGTGCTACAGAGC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTACTTTCCCTGAACAGGTGTGAATAAAAGCGG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTATCAACATGTCTGGATTTGTATTATCCCAGG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTAGGAAGATCTCAGGGAACGGACGTCTGCTGT -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTACTGTGTTGTGCTTCAGTCACAGGACTTGCT -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTATCTGCAGAGATTAAAGATGTCTGCACCCAT -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTATAGGACTAGACGGACATTATAAACTCTCTC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000084289:ENSMUSG00000042500 -GGAATCGTTGCGGGTGTCCTGTCGAATATCCACCACCGTACTCAGCAAACACCAGTGGCACTGCTGAGCG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000100497:ENSMUSG00000029580 -GGAATCGTTGCGGGTGTCCTCTGTTGTAGCGCCTGTCTAGTGTGTTGGCATAGAGGTCTTTACGGATGTC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000100497:ENSMUSG00000029580 -GGAATCGTTGCGGGTGTCCTCTGTTGTAGCGCCTGTCTAGCTGCAAAGAAGCTGTGCTCGCGGGTGGACG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000100497:ENSMUSG00000029580 -GGAATCGTTGCGGGTGTCCTCTGTTGTAGCGCCTGTCTAGGATGGAATTGAATGTAGTTTCATGGATGCC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000100497:ENSMUSG00000029580 -GGAATCGTTGCGGGTGTCCTCTGTTGTAGCGCCTGTCTAGGAAAAGAGCCTCAGGGCATCGGAACCGCTC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000100497:ENSMUSG00000029580 -GGAATCGTTGCGGGTGTCCTCTGTTGTAGCGCCTGTCTAGGATGCGGCAGTGGCCATCTCCTGCTCGAAG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000100497:ENSMUSG00000029580 -GGAATCGTTGCGGGTGTCCTCTGTTGTAGCGCCTGTCTAGCTTCTCTTTGATGTCACGCACGATTTCCCT -CCGCAACATCCAGCATCGTG - ->ENSMUST00000100497:ENSMUSG00000029580 -GGAATCGTTGCGGGTGTCCTCTGTTGTAGCGCCTGTCTAGGTGGTGAAGCTGTAGCCACGCTCGGTCAGG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000100497:ENSMUSG00000029580 -GGAATCGTTGCGGGTGTCCTCTGTTGTAGCGCCTGTCTAGGCTGGGGTGTTGAAGGTCTCAAACATGATC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000100497:ENSMUSG00000029580 -GGAATCGTTGCGGGTGTCCTCTGTTGTAGCGCCTGTCTAGGACGACCAGCGCAGCGATATCGTCATCCAT -CCGCAACATCCAGCATCGTG - ->ENSMUST00000100497:ENSMUSG00000029580 -GGAATCGTTGCGGGTGTCCTCTGTTGTAGCGCCTGTCTAGCGTCTCCGGAGTCCATCACAATGCCTGTGG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000100497:ENSMUSG00000029580 -GGAATCGTTGCGGGTGTCCTCTGTTGTAGCGCCTGTCTAGACCCACATAGGAGTCCTTCTGACCCATTCC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000100497:ENSMUSG00000029580 -GGAATCGTTGCGGGTGTCCTCTGTTGTAGCGCCTGTCTAGCAGTTGGTAACAATGCCATGTTCAATGGGG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000100497:ENSMUSG00000029580 -GGAATCGTTGCGGGTGTCCTCTGTTGTAGCGCCTGTCTAGACGATGGAGGGGAATACAGCCCGGGGAGCA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000100497:ENSMUSG00000029580 -GGAATCGTTGCGGGTGTCCTCTGTTGTAGCGCCTGTCTAGGATGACCTGGCCGTCAGGCAGCTCATAGCT -CCGCAACATCCAGCATCGTG - ->ENSMUST00000100497:ENSMUSG00000029580 -GGAATCGTTGCGGGTGTCCTCTGTTGTAGCGCCTGTCTAGGTGAGGGAGAGCATAGCCCTCGTAGATGGG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000100497:ENSMUSG00000029580 -GGAATCGTTGCGGGTGTCCTCTGTTGTAGCGCCTGTCTAGGGGGCCACACGCAGCTCATTGTAGAAGGTG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000100497:ENSMUSG00000029580 -GGAATCGTTGCGGGTGTCCTCTGTTGTAGCGCCTGTCTAGGGCATACAGGGACAGCACAGCCTGGATGGC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000100497:ENSMUSG00000029580 -GGAATCGTTGCGGGTGTCCTCTGTTGTAGCGCCTGTCTAGTGTGTAAGGTAAGGTGTGCACTTTTATTGG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000100497:ENSMUSG00000029580 -GGAATCGTTGCGGGTGTCCTCTGTTGTAGCGCCTGTCTAGAGGCCTCAGACCTGGGCCATTCAGAAATTA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000118875:ENSMUSG00000057666 -GGAATCGTTGCGGGTGTCCTATCTGTATGCTCAGGCGATTCAGGGAGGGCTGCAGTCCGTATTTATAGGA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000118875:ENSMUSG00000057666 -GGAATCGTTGCGGGTGTCCTATCTGTATGCTCAGGCGATTGCCTCCCGCCCTGCTTATCCAGTCCTAGCT -CCGCAACATCCAGCATCGTG - ->ENSMUST00000118875:ENSMUSG00000057666 -GGAATCGTTGCGGGTGTCCTATCTGTATGCTCAGGCGATTGAGGCCTGAGCTACGTGCACCCGTAAAGCC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000118875:ENSMUSG00000057666 -GGAATCGTTGCGGGTGTCCTATCTGTATGCTCAGGCGATTGAAGATGCGGCCGTCTCTGGAACAGGGAGG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000118875:ENSMUSG00000057666 -GGAATCGTTGCGGGTGTCCTATCTGTATGCTCAGGCGATTACACCGACCTTCACCATTTTGTCTACGGGA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000118875:ENSMUSG00000057666 -GGAATCGTTGCGGGTGTCCTATCTGTATGCTCAGGCGATTCAACAATCTCCACTTTGCCACTGCAAATGG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000118875:ENSMUSG00000057666 -GGAATCGTTGCGGGTGTCCTATCTGTATGCTCAGGCGATTCAGTGATGGCATGGACTGTGGTCATGAGCC -CCGCAACATCCAGCATCGTG - ->ENSMUST00000118875:ENSMUSG00000057666 -GGAATCGTTGCGGGTGTCCTATCTGTATGCTCAGGCGATTATCTTGAGTGAGTTGTCATATTTCTCGTGG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000118875:ENSMUSG00000057666 -GGAATCGTTGCGGGTGTCCTATCTGTATGCTCAGGCGATTGGTGAAGACACCAGTAGACTCCACGACATA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000118875:ENSMUSG00000057666 -GGAATCGTTGCGGGTGTCCTATCTGTATGCTCAGGCGATTTTTGATGTTAGTGGGGTCTCGCTCCTGGAA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000118875:ENSMUSG00000057666 -GGAATCGTTGCGGGTGTCCTATCTGTATGCTCAGGCGATTGTCAGATCCACGACGGACACATTGGGGGTA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000118875:ENSMUSG00000057666 -GGAATCGTTGCGGGTGTCCTATCTGTATGCTCAGGCGATTATACTGGAACATGTAGACCATGTAGTTGAG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000118875:ENSMUSG00000057666 -GGAATCGTTGCGGGTGTCCTATCTGTATGCTCAGGCGATTCAGTGAGCTTCCCGTTCAGCTCTGGGATGA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000118875:ENSMUSG00000057666 -GGAATCGTTGCGGGTGTCCTATCTGTATGCTCAGGCGATTTTCCCGTTGATGACAAGCTTCCCATTCTCG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000118875:ENSMUSG00000057666 -GGAATCGTTGCGGGTGTCCTATCTGTATGCTCAGGCGATTGGGCATCGGCAGAAGGGGCGGAGATGATGA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000118875:ENSMUSG00000057666 -GGAATCGTTGCGGGTGTCCTATCTGTATGCTCAGGCGATTCCACGGCCATCACGCCACAGCTTTCCAGAG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000118875:ENSMUSG00000057666 -GGAATCGTTGCGGGTGTCCTATCTGTATGCTCAGGCGATTCTTGGCAGCACCAGTGGATGCAGGGATGAT -CCGCAACATCCAGCATCGTG - ->ENSMUST00000118875:ENSMUSG00000057666 -GGAATCGTTGCGGGTGTCCTATCTGTATGCTCAGGCGATTCCGAGTTGGGATAGGGCCTCTCTTGCTCAG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000118875:ENSMUSG00000057666 -GGAATCGTTGCGGGTGTCCTATCTGTATGCTCAGGCGATTGGGTGCAGCGAACTTTATTGATGGTATTCA -CCGCAACATCCAGCATCGTG - ->ENSMUST00000118875:ENSMUSG00000057666 -GGAATCGTTGCGGGTGTCCTATCTGTATGCTCAGGCGATTCTGTTATTATGGGGGTCTGGGATGGAAATT -CCGCAACATCCAGCATCGTG - ->ENSMUST00000118875:ENSMUSG00000057666 -GGAATCGTTGCGGGTGTCCTATCTGTATGCTCAGGCGATTGGGGTGGGTGGTCCAGGGTTTCTTACTCCT -CCGCAACATCCAGCATCGTG - ->ENSMUST00000118875:ENSMUSG00000057666 -GGAATCGTTGCGGGTGTCCTATCTGTATGCTCAGGCGATTTAGCCGTATTCATTGTCATACCAGGAAATG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000118875:ENSMUSG00000057666 -GGAATCGTTGCGGGTGTCCTATCTGTATGCTCAGGCGATTCCGGCATCGAAGGTGGAAGAGTGGGAGTTG -CCGCAACATCCAGCATCGTG - ->ENSMUST00000118875:ENSMUSG00000057666 -GGAATCGTTGCGGGTGTCCTATCTGTATGCTCAGGCGATTCTCAGATGCCTGCTTCACCACCTTCTTGAT -CCGCAACATCCAGCATCGTG - ->ENSMUST00000118875:ENSMUSG00000057666 -GGAATCGTTGCGGGTGTCCTATCTGTATGCTCAGGCGATTGAGACAACCTGGTCCTCAGTGTAGCCCAAG -CCGCAACATCCAGCATCGTG - diff --git a/appendAdapters.m b/appendAdapters.m index 985bdcf..c7f7071 100644 --- a/appendAdapters.m +++ b/appendAdapters.m @@ -1,15 +1,11 @@ function [adapterList,probeHeader,probeSequence,probeSequence3Seg,probeSequenceCore]... =appendAdapters(adapterList,oligos,params) -% if length(varargin) >= 1 -% params = varargin{1}; -% else -% params = struct('species','Mouse','verbose',1,'keys','ENS\w*T\d*',... -% 'gf','GGAATCGTTGCGGGTGTCCT','grr','CCGCAACATCCAGCATCGTG',... -% 'T7r','CCCTATAGTGAGTCGTATTA',... -% 'rRr','AGAGTGAGTAGTAGTGGAGT','rGr','GATGATGTAGTAGTAAGGGT',... -% 'rBr','TGTGATGGAAGTTAGAGGGT','rIRr','GGAGTAGTTGGTTGTTAGGA'); -% end +% params = struct('species','Mouse','verbose',1,... +% 'gf','GGAATCGTTGCGGGTGTCCT','grr','CCGCAACATCCAGCATCGTG',... +% 'T7r','CCCTATAGTGAGTCGTATTA',... +% 'rRr','AGAGTGAGTAGTAGTGGAGT','rGr','GATGATGTAGTAGTAAGGGT',... +% 'rBr','TGTGATGGAAGTTAGAGGGT','rIRr','GGAGTAGTTGGTTGTTAGGA'); if params(1).verbose disp('concatenating oligos with adapters'); @@ -19,10 +15,10 @@ Header = Header'; Sequence = Sequence'; -pos = regexp(Header, params(1).keys, 'end'); +pos = regexp(Header, ':'); trimmedHeader = Header; for n = 1:length(Header) - trimmedHeader{n,1} = Header{n,1}(1:pos{n,1}); + trimmedHeader{n,1} = Header{n,1}(1:pos{n,1}(1)-1); end uniqueHeader = unique(trimmedHeader, 'stable'); @@ -37,17 +33,6 @@ end fastawrite(adapterList, adapterHeader, adapterSequence); -% adapterRSequence = adapterSequence; -% adapterGSequence = adapterSequence; -% adapterBSequence = adapterSequence; -% adapterIRSequence = adapterSequence; -% for n = 1:length(adapterSequence) -% adapterRSequence{n,1} = strcat(params.rRr, adapterSequence{n,1}); -% adapterGSequence{n,1} = strcat(params.rGr, adapterSequence{n,1}); -% adapterBSequence{n,1} = strcat(params.rBr, adapterSequence{n,1}); -% adapterIRSequence{n,1} = strcat(params.rIRr, adapterSequence{n,1}); -% end - probeHeader = {}; probeSequence = {}; probeSequence3Seg = {}; diff --git a/averageRNASeq.m b/averageRNASeq.m index 7f0c9ee..b68c6da 100644 --- a/averageRNASeq.m +++ b/averageRNASeq.m @@ -1,14 +1,10 @@ function seqData = averageRNASeq(seqData1,seqData2,params) -% seqData1 = 'mouse_frontal_cortex_mRNASeq_ENCFF653BKJ.xlsx'; -% seqData2 = 'mouse_frontal_cortex_mRNASeq_ENCFF703SOK.xlsx'; - -% if length(varargin) >= 1 -% params = varargin{1}; -% else -% params = struct('species','Mouse','verbose',1,... -% 'keys',{'ENS\w*T\d*','ENS\w*G\d*'},'thres',0.1); -% end +% params = struct('species','Mouse','verbose',1,'data',2,... +% 'dir1','C:\FISHerMan\Db\mouse_frontal_cortex_mRNASeq_ENCFF653BKJ.xlsx',... +% 'dir2','C:\FISHerMan\Db\mouse_frontal_cortex_mRNASeq_ENCFF703SOK.xlsx',... +% 'mRNA',1,'keys',{'ENS\w*T\d*','ENS\w*G\d*'},... +% 'thres',0.1); seqData1 = readRNASeq(seqData1,params); seqData2 = readRNASeq(seqData2,params); diff --git a/blast1stPCR.m b/blast1stPCR.m index a8f8b6e..202117e 100644 --- a/blast1stPCR.m +++ b/blast1stPCR.m @@ -1,17 +1,10 @@ function [probeHeader,probeSequence,probeSequence3Seg,probeSequenceCore]... =blast1stPCR(adapterList,probeHeader,probeSequence,probeSequence3Seg,probeSequenceCore,params) -% if length(varargin) >= 1 -% params = varargin{1}; -% else -% params = struct('species','Mouse','verbose',1,'keys','ENS\w*T\d*',... -% 'thres',22,'querySize',20,'seqNum',1000,... -% 'blastArgs','-S 3','parallel', 0,... -% 'gf','GGAATCGTTGCGGGTGTCCT','grr','CCGCAACATCCAGCATCGTG',... -% 'T7r','CCCTATAGTGAGTCGTATTA',... -% 'rRr','AGAGTGAGTAGTAGTGGAGT','rGr','GATGATGTAGTAGTAAGGGT',... -% 'rBr','TGTGATGGAAGTTAGAGGGT','rIRr','GGAGTAGTTGGTTGTTAGGA'); -% end +% params = struct('species','Mouse','verbose',1,... +% 'thres',22,'querySize',20,'seqNum',1000,... +% 'blastArgs','-S 3','parallel', 0,... +% 'gf','GGAATCGTTGCGGGTGTCCT','grr','CCGCAACATCCAGCATCGTG'); if params(1).verbose disp('removing probes that non-specifically bind to primers in the 1st PCR step'); @@ -26,8 +19,8 @@ % MatLab's use of blastlocal requires short entry names simpleHeader = probeHeader; for n = 1:length(probeHeader) - pos = regexp(probeHeader{n,1}, params(1).keys, 'end'); - simpleHeader{n,1} = strcat(probeHeader{n,1}(1:pos),'=',num2str(n)); + pos = regexp(probeHeader{n,1}, ':'); + simpleHeader{n,1} = strcat(probeHeader{n,1}(1:pos(1)-1),'=',num2str(n)); end if exist(probesDb, 'file') delete([probesDb '*']); @@ -49,7 +42,7 @@ adapterHeader{end+1,1} = 'ENSPRIMERT01'; adapterSequence{end+1,1} = params(1).grr; -filePathList = blastFileSplit(adapterHeader, adapterSequence, params(1).seqNum, params); +filePathList = blastFileSplit(adapterHeader, adapterSequence, params); fileNum = length(filePathList); %% Blast primers against probes @@ -66,13 +59,8 @@ parfor k = 1:fileNum if verbose disp([' blasting temporary file no. ' num2str(k)]); - startTime(k) = tic; end blastData{k,1} = blastOp(filePathList{k}, DbPath, blastArgs); - if verbose - totalTime(k) = toc(startTime(k)); - disp([' elapsed time is ' num2str(totalTime(k)) ' seconds']); - end end delete(poolobj); else @@ -113,7 +101,7 @@ probeSequenceCore(seqDelete)= []; %% Check how many transcripts are left after this step of screening -[geneNumLeft,geneNumDelete]=checkTranscriptsLeft(adapterList,probeHeader,params); +[geneNumLeft,geneNumDelete]=checkTranscriptsLeft(adapterList,probeHeader); if params(1).verbose disp([num2str(geneNumDelete) ' out of ' num2str(geneNumLeft+geneNumDelete)... ' FISH escaped FISHerMan''s net']); diff --git a/blastAbundantRNA.m b/blastAbundantRNA.m index 245890c..f87ad30 100644 --- a/blastAbundantRNA.m +++ b/blastAbundantRNA.m @@ -5,13 +5,9 @@ function [Header,Sequence,nonSequence,nonSequence2]... =blastAbundantRNA(adapterList,Header,Sequence,nonSequence,nonSequence2,params) -% if length(varargin) >= 1 -% params = varargin{1}; -% else -% params = struct('species','Mouse','verbose',1,... -% 'seqNum',1000,'thres',30,'querySize',73,'DbSize',200000,... -% 'blastArgs','-S 2','parallel', 0); -% end +% params = struct('species','Mouse','verbose',1,... +% 'seqNum',1000,'thres',30,'querySize',73,'DbSize',200000,... +% 'blastArgs','-S 2','parallel', 0); if isempty(nonSequence) nonSequence = Sequence; @@ -30,7 +26,7 @@ disp(' spliting fasta files for parallel computing'); end -filePathList = blastFileSplit(Header, Sequence, params(1).seqNum, params); +filePathList = blastFileSplit(Header, Sequence, params); fileNum = length(filePathList); %% Blast mouse oligos against abundant rna @@ -47,13 +43,8 @@ parfor k = 1:fileNum if verbose disp([' blasting temporary file no. ' num2str(k)]); - startTime(k) = tic; end blastData{k,1} = blastOp(filePathList{k}, DbPath, blastArgs); - if verbose - totalTime(k) = toc(startTime(k)); - disp([' elapsed time is ' num2str(totalTime(k)) ' seconds']); - end end delete(poolobj); else @@ -80,7 +71,7 @@ for n = 1:length(data) flag = 0; for m = 1:length(data(n).Hits) - if ~strfind(data(n).Query,data(n).Hits(m).Name) + if isempty(strfind(data(n).Query,data(n).Hits(m).Name)) flag = 1; end end @@ -96,7 +87,7 @@ nonSequence2(seqDelete)= []; %% Check how many transcripts are left after this step of screening -[geneNumLeft,geneNumDelete]=checkTranscriptsLeft(adapterList,Header,params); +[geneNumLeft,geneNumDelete]=checkTranscriptsLeft(adapterList,Header); if params(1).verbose disp([num2str(geneNumDelete) ' out of ' num2str(geneNumLeft+geneNumDelete)... ' FISH escaped FISHerMan''s net']); diff --git a/blastOtherSteps.m b/blastOtherSteps.m index 259bcf3..bb07bbb 100644 --- a/blastOtherSteps.m +++ b/blastOtherSteps.m @@ -1,16 +1,12 @@ function [probeHeader,probeSequence,probeSequence3Seg,probeSequenceCore]... =blastOtherSteps(adapterList,probeHeader,probeSequence,probeSequence3Seg,probeSequenceCore,params) -% if length(varargin) >= 1 -% params = varargin{1}; -% else -% params = struct('species','Mouse','verbose',1,'keys','ENS\w*T\d*',... -% 'thres',22,'querySize',50,... -% 'blastArgs1','-S 2','blastArgs2','-S 3',... -% 'grr','CCGCAACATCCAGCATCGTG','T7r','CCCTATAGTGAGTCGTATTA',... -% 'rRr','AGAGTGAGTAGTAGTGGAGT','rGr','GATGATGTAGTAGTAAGGGT',... -% 'rBr','TGTGATGGAAGTTAGAGGGT','rIRr','GGAGTAGTTGGTTGTTAGGA'); -% end +% params = struct('species','Mouse','verbose',1,'keys',... +% 'thres',22,'querySize',50,... +% 'blastArgs1','-S 2','blastArgs2','-S 3',... +% 'grr','CCGCAACATCCAGCATCGTG','T7r','CCCTATAGTGAGTCGTATTA',... +% 'rRr','AGAGTGAGTAGTAGTGGAGT','rGr','GATGATGTAGTAGTAAGGGT',... +% 'rBr','TGTGATGGAAGTTAGAGGGT','rIRr','GGAGTAGTTGGTTGTTAGGA'); if params(1).verbose disp('removing probes that non-specifically bind to 2nd PCR primers and other probes'); @@ -22,8 +18,8 @@ simpleHeader = probeHeader; for n = 1:length(probeHeader) - pos = regexp(probeHeader{n,1}, params(1).keys, 'end'); - simpleHeader{n,1} = probeHeader{n,1}(1:pos); + pos = regexp(probeHeader{n,1}, ':'); + simpleHeader{n,1} = probeHeader{n,1}(1:pos(1)-1); end seqDelete = []; @@ -42,7 +38,7 @@ probeSequenceCore(seqDelete)= []; %% Check how many transcripts are left after this step of screening -[geneNumLeft,geneNumDelete]=checkTranscriptsLeft(adapterList,probeHeader,params); +[geneNumLeft,geneNumDelete]=checkTranscriptsLeft(adapterList,probeHeader); if params(1).verbose disp([num2str(geneNumDelete) ' out of ' num2str(geneNumLeft+geneNumDelete)... ' FISH escaped FISHerMan''s net']); diff --git a/generateProbeList.m b/generateProbeList.m index ad4b569..d3fa303 100644 --- a/generateProbeList.m +++ b/generateProbeList.m @@ -1,11 +1,7 @@ function probeList=generateProbeList(adapterList,probeHeader,probeSequence,params) -% if length(varargin) >= 1 -% params = varargin{1}; -% else -% params = struct('species','Mouse','verbose',1,'keys','ENS\w*T\d*',... -% 'number',48,'specialTranscripts','C:\FISHerMan\Db\Mouse.STList.fas'); -% end +% params = struct('species','Mouse','verbose',1,'number',48,... +% 'specialTranscripts','C:\FISHerMan\Db\Mouse.STList.fas'); %% Remove transcripts without enough probes if params(1).verbose @@ -13,10 +9,10 @@ disp(' removing transcripts without enough probes'); end -pos = regexp(probeHeader, params(1).keys, 'end'); +pos = regexp(probeHeader, ':'); trimHeader = {}; for n = 1:length(probeHeader) - trimHeader{end+1} = probeHeader{n,1}(1:pos{n,1}); + trimHeader{end+1} = probeHeader{n,1}(1:pos{n,1}(1)-1); end trimHeader = trimHeader'; uniqueHeader = unique(trimHeader, 'stable'); @@ -65,7 +61,7 @@ fastawrite(probeList,probeHeader,probeSequence); %% Check how many transcripts are left after this step of screening -[geneNumLeft,geneNumDelete]=checkTranscriptsLeft(adapterList,probeHeader,params); +[geneNumLeft,geneNumDelete]=checkTranscriptsLeft(adapterList,probeHeader); if params(1).verbose disp([num2str(geneNumDelete) ' out of ' num2str(geneNumLeft+geneNumDelete)... ' FISH escaped FISHerMan''s net']); diff --git a/readParameters.m b/readParameters.m index 6897974..75801a6 100644 --- a/readParameters.m +++ b/readParameters.m @@ -136,29 +136,16 @@ %% Parse parameters for adapters dir1=xmlParse(adapters, 'adapters', 'dir1'); -key1=xmlParse(adapters, 'adapters', 'key1'); gf=xmlParse(adapters, 'adapters', 'gf'); grr=xmlParse(adapters, 'adapters', 'grr'); -T7r=xmlParse(adapters, 'adapters', 'T7r'); -rRr=xmlParse(adapters, 'adapters', 'rRr'); -rGr=xmlParse(adapters, 'adapters', 'rGr'); -rBr=xmlParse(adapters, 'adapters', 'rBr'); -rIRr=xmlParse(adapters, 'adapters', 'rIRr'); params.adapters = struct('species',species,... 'verbose',str2double(verbose.getFirstChild.getData),... 'dir1',char(dir1.getFirstChild.getData),... - 'keys',char(key1.getFirstChild.getData),... 'gf',char(gf.getFirstChild.getData),... - 'grr',char(grr.getFirstChild.getData),... - 'T7r',char(T7r.getFirstChild.getData),... - 'rRr',char(rRr.getFirstChild.getData),... - 'rGr',char(rGr.getFirstChild.getData),... - 'rBr',char(rBr.getFirstChild.getData),... - 'rIRr',char(rIRr.getFirstChild.getData)); + 'grr',char(grr.getFirstChild.getData)); %% Parse parameters for 1stPCR -key1=xmlParse(onePCR, 'onePCR', 'key1'); seqNum=xmlParse(onePCR, 'onePCR', 'seqNum'); thres=xmlParse(onePCR, 'onePCR', 'thres'); querySize=xmlParse(onePCR, 'onePCR', 'querySize'); @@ -169,7 +156,6 @@ params.onePCR = struct('species',species,... 'verbose',str2double(verbose.getFirstChild.getData),... - 'keys',char(key1.getFirstChild.getData),... 'seqNum',str2double(seqNum.getFirstChild.getData),... 'thres',str2double(thres.getFirstChild.getData),... 'querySize',str2double(querySize.getFirstChild.getData),... @@ -179,7 +165,6 @@ 'grr',char(grr.getFirstChild.getData)); %% Parse parameters for otherSteps -key1=xmlParse(otherSteps, 'otherSteps', 'key1'); thres=xmlParse(otherSteps, 'otherSteps', 'thres'); querySize=xmlParse(otherSteps, 'otherSteps', 'querySize'); blastArgs1=xmlParse(otherSteps, 'otherSteps', 'blastArgs1'); @@ -193,7 +178,6 @@ params.otherSteps = struct('species',species,... 'verbose',str2double(verbose.getFirstChild.getData),... - 'keys',char(key1.getFirstChild.getData),... 'thres',str2double(thres.getFirstChild.getData),... 'querySize',str2double(querySize.getFirstChild.getData),... 'blastArgs1',char(blastArgs1.getFirstChild.getData),... @@ -206,7 +190,6 @@ 'rIRr',char(rIRr.getFirstChild.getData)); %% Parse parameters for arna -key1=xmlParse(arna, 'arna', 'key1'); seqNum=xmlParse(arna, 'arna', 'seqNum'); thres=xmlParse(arna, 'arna', 'thres'); querySize=xmlParse(arna, 'arna', 'querySize'); @@ -216,7 +199,6 @@ params.arna = struct('species',species,... 'verbose',str2double(verbose.getFirstChild.getData),... - 'keys',char(key1.getFirstChild.getData),... 'seqNum',str2double(seqNum.getFirstChild.getData),... 'thres',str2double(thres.getFirstChild.getData),... 'querySize',str2double(querySize.getFirstChild.getData),... @@ -225,13 +207,11 @@ 'parallel',str2double(parallel.getFirstChild.getData)); %% Parse parameters for probeList -key1=xmlParse(probeList, 'probeList', 'key1'); num=xmlParse(probeList, 'probeList', 'number'); dir1=xmlParse(probeList, 'probeList', 'dir1'); params.probeList = struct('species',species,... 'verbose',str2double(verbose.getFirstChild.getData),... - 'keys',char(key1.getFirstChild.getData),... 'number',str2double(num.getFirstChild.getData),... 'specialTranscripts',char(dir1.getFirstChild.getData)); diff --git a/readRNASeq.m b/readRNASeq.m index 7111820..4cd6dac 100644 --- a/readRNASeq.m +++ b/readRNASeq.m @@ -1,12 +1,10 @@ function data = readRNASeq(seqData, params) -% seqData = 'C:\FISHerMan\Mouse\mouse_frontal_cortex_mRNASeq_ENCFF703SOK.xlsx'; - -% if length(varargin) >= 1 -% params = varargin{1}; -% else -% params = struct('species','Mouse','verbose',1,'keys',{'ENS\w*T\d*','ENS\w*G\d*'}); -% end +% params = struct('species','Mouse','verbose',1,'data',2,... +% 'dir1','C:\FISHerMan\Db\mouse_frontal_cortex_mRNASeq_ENCFF653BKJ.xlsx',... +% 'dir2','C:\FISHerMan\Db\mouse_frontal_cortex_mRNASeq_ENCFF703SOK.xlsx',... +% 'mRNA',1,'keys',{'ENS\w*T\d*','ENS\w*G\d*'},... +% 'thres',0.1); if params(1).verbose disp('reading the RNA-seq data file'); diff --git a/utilities/blastAbundantRNASimple.m b/utilities/blastAbundantRNASimple.m index 2649b34..2c786fb 100644 --- a/utilities/blastAbundantRNASimple.m +++ b/utilities/blastAbundantRNASimple.m @@ -43,13 +43,8 @@ parfor k = 1:fileNum if verbose disp([' blasting temporary file no. ' num2str(k)]); -% startTime(k) = tic; end blastData{k,1} = blastOp(filePathList{k}, DbPath, blastArgs); -% if verbose -% totalTime(k) = toc(startTime(k)); -% disp([' elapsed time is ' num2str(totalTime(k)) ' seconds']); -% end end delete(poolobj); else diff --git a/utilities/blastFileSplit.m b/utilities/blastFileSplit.m index 1c5b78d..bf4e92b 100644 --- a/utilities/blastFileSplit.m +++ b/utilities/blastFileSplit.m @@ -8,7 +8,11 @@ % MatLab's use of blastlocal requires short entry names for n = 1:length(Header) pos = regexp(Header{n,1}, ':'); - Header{n,1} = Header{n,1}(1:pos(1)-1); + if ~isempty(pos) + Header{n,1} = Header{n,1}(1:pos(1)-1); + else + Header{n,1} = Header{n,1}; + end Header{n,1} = strcat(Header{n,1}, '=', num2str(n)); end diff --git a/utilities/blastOneTranscript.m b/utilities/blastOneTranscript.m index 2322eca..9d0f59e 100644 --- a/utilities/blastOneTranscript.m +++ b/utilities/blastOneTranscript.m @@ -1,16 +1,12 @@ function seqDelete... =blastOneTranscript(OTAdapterHeader,OTAdapterSequence,probeHeader,probeSequenceCore,params) -% if length(varargin) >= 1 -% params = varargin{1}; -% else -% params = struct('species','Mouse','verbose',1,... -% 'thres',22,'querySize',50,... -% 'blastArgs1','-S 2','blastArgs2','-S 3',... -% 'grr','CCGCAACATCCAGCATCGTG','T7r','CCCTATAGTGAGTCGTATTA',... -% 'rRr','AGAGTGAGTAGTAGTGGAGT','rGr','GATGATGTAGTAGTAAGGGT',... -% 'rBr','TGTGATGGAAGTTAGAGGGT','rIRr','GGAGTAGTTGGTTGTTAGGA'); -% end +% params = struct('species','Mouse','verbose',1,'keys',... +% 'thres',22,'querySize',50,... +% 'blastArgs1','-S 2','blastArgs2','-S 3',... +% 'grr','CCGCAACATCCAGCATCGTG','T7r','CCCTATAGTGAGTCGTATTA',... +% 'rRr','AGAGTGAGTAGTAGTGGAGT','rGr','GATGATGTAGTAGTAAGGGT',... +% 'rBr','TGTGATGGAAGTTAGAGGGT','rIRr','GGAGTAGTTGGTTGTTAGGA'); index = ismember(probeHeader, OTAdapterHeader); temp = 1:length(probeHeader); @@ -53,7 +49,8 @@ 'FormatArgs', '-o T -p F'); %% Generate a temporary probe list file for blast -filePathList = blastFileSplit(OTHeader, OTSequenceCore, length(OTHeader), params); +params(1).seqNum = length(OTHeader); +filePathList = blastFileSplit(OTHeader, OTSequenceCore, params); %% Blast probes against 2nd PCR primers and other probes DbPath1 = OTDb1; diff --git a/utilities/checkTranscriptsLeft.m b/utilities/checkTranscriptsLeft.m index c3acdb7..0e5e03d 100644 --- a/utilities/checkTranscriptsLeft.m +++ b/utilities/checkTranscriptsLeft.m @@ -1,10 +1,6 @@ -function [geneNumLeft,geneNumDelete]=checkTranscriptsLeft(adapterList,probeHeader,params) +function [geneNumLeft,geneNumDelete]=checkTranscriptsLeft(adapterList,probeHeader) -% if length(varargin) >= 1 -% params = varargin{1}; -% else -% params = struct('species','Mouse','verbose',1,'keys','ENS\w*T\d*'); -% end +% params = struct('species','Mouse','verbose',1); [adapterHeader, adapterSequence] = fastaread(adapterList); adapterHeader = adapterHeader'; @@ -13,14 +9,14 @@ simpleHeader = probeHeader; for n = 1:length(probeHeader) - pos = regexp(probeHeader{n,1}, params(1).keys, 'end'); - simpleHeader{n,1} = probeHeader{n,1}(1:pos); + pos = regexp(probeHeader{n,1}, ':'); + simpleHeader{n,1} = probeHeader{n,1}(1:pos(1)-1); end uniqueHeader = unique(simpleHeader,'stable'); geneNumLeft = length(uniqueHeader); geneNumDelete = geneNumTotal-geneNumLeft; -[adapterHeader,adapterSequence] = pickExpressedSeq(uniqueHeader,adapterHeader,adapterSequence,params); +[adapterHeader,adapterSequence] = pickExpressedSeq(uniqueHeader,adapterHeader,adapterSequence); if exist(adapterList, 'file') delete(adapterList); end diff --git a/utilities/findSeqDelete.m b/utilities/findSeqDelete.m index f4699be..38fe4b0 100644 --- a/utilities/findSeqDelete.m +++ b/utilities/findSeqDelete.m @@ -1,11 +1,5 @@ function seqDelete=findSeqDelete(data) -% if length(varargin) >= 1 -% params = varargin{1}; -% else -% params = struct('species','Mouse','verbose',1); -% end - %% Delete probes that non-specifically bind 2nd PCR primers seqDelete = []; for n = 1:length(data{2,1}) diff --git a/utilities/pickExpressedSeq.m b/utilities/pickExpressedSeq.m index cb4b517..c51268d 100644 --- a/utilities/pickExpressedSeq.m +++ b/utilities/pickExpressedSeq.m @@ -5,7 +5,11 @@ pos = regexp(Header, ':'); temp = Header; for n = 1:length(Header) - temp{n,1} = Header{n,1}(1:pos{n,1}(1)-1); + if ~isempty(pos{n,1}) + temp{n,1} = Header{n,1}(1:pos{n,1}(1)-1); + else + temp{n,1} = Header{n,1}; + end end [~,index,~] = intersect(temp, transcriptID, 'stable');