Skip to content

Commit

Permalink
change
Browse files Browse the repository at this point in the history
Former-commit-id: dab9662a50082e6ae76a8ec3391abb9b3295c0e9 [formerly 76e6c0c] [formerly e85355900940db170b5b0c5770372f402af9b189] [formerly ae7befcc01c68bcb87367cfe837281fea9f4a4e7 [formerly 3e5745879c0ded5db94d47bb908976aa55c916dc]] [formerly 8648bf4dabbe98db23bbf69e2e128fae2edeadbc [formerly 05da5ee]]
Former-commit-id: 7e7c3cc
Former-commit-id: 5618a1b8d8510d88d96aa548db608c31806cb644 [formerly 0f5da7a]
Former-commit-id: 3ae7ab3af1882cc013c256c185bcc8e9e2291b51
Former-commit-id: 5685054
  • Loading branch information
smortezah committed Dec 10, 2018
1 parent ab11621 commit 387d062
Show file tree
Hide file tree
Showing 14 changed files with 208 additions and 46 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ add_executable(smashpp src/main.cpp
)


add_executable(smashpp-inv-rep src/invRep.cpp)

#add_executable(excule_N src/excludeN.cpp)

#add_executable(bench src/bench.cpp)
Expand Down
1 change: 1 addition & 0 deletions bye
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
NGAT
1 change: 1 addition & 0 deletions hi
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
NGAT
1 change: 1 addition & 0 deletions mori
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
AAAATGGGCGTACAATTAGACTGTGAATTCTAAGGAAATAAGTGCTAGTTGTATTGTCGGGAAGGACAACTGGGTACCCCTTAGTCACTCCTGAACTACGGAATGACGGAGCAGCATGATTACATCAGTGTTGACTGATATATAGTTAGATTTAGGTGTCGGCTAAATGTAGACTTTGGCGGAAAATCAATGCCTCGCTTCCACGCTTGTTGTAGTTCTACGGACTTACCGTTTGGTTAAAGAACATGTAAGTCGAGCGAGTTTTAGTTCCTCCGTTATTATAGGAGGTGCGTGCACCCAAATGCCAGCTCAGATAGAGGCCTGCACTTTGCTGGGTACCCAACCTGTGAAAACCCAAGCAGCAGGTCAGGCCACCCTCGGTTTAAGGAATTTTCCCAGTGCACTGCACATGAACTGAACCCAATACCAGAACGTTACATTTCACCTCTGGGTGTAATCATCTTTTAGGTTTAAATCCATCTGTGTGTAACTGTTAACTGATCGTTCAGTCTAGACTTCCGAGACCTCGTGCACCGCCTTGGGTTCACAAGCGGGCATTTGAATACTATCGCGCAACGGGGTGCCTGCCGTGATTACGCGCACCGCTTTTATGCTCGAGTTAAAGAAACCAACACTCATAAGTAGTTGACTAGATGCCAACTCGACGCGGTGGAGTAAATATCTTCGCAATGTTAGGTGAAAGCACCCATTGAATAAATTTCTTCGGTTCATTCATAGCGGTGTATCAACCCGGTAGCGAAGCCGTGAGGGACAAGCATGGGCGAGCGCATGACGCAAGGCGGGTTTCCGGTACTGCGCGGGGGGCTGTTTTGGGGGAGGGGTCGCCGTTGACCCGATCGAACACGTAAGAGCTAGTGGATGGTAGCCGACATGAAGATCTCTCGAACAACCCCTGACCCTGGCCGCGTACGTGAAAAAGGTCGCCCTGGAATCTCCGTGTGGAACAGGGTCTGATCCGTCGCGAACGGCATCAAGGCGC
81 changes: 81 additions & 0 deletions out.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion refm.REMOVED.git-id
Original file line number Diff line number Diff line change
@@ -1 +1 @@
30e3373f721b154dabe25e71f964ad974245dd55
dea3db0aa7ff3e95ea470ef6fa89e6447aca9361
96 changes: 54 additions & 42 deletions script/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,60 +15,72 @@ fi

### Simulate synthetic dataset
if [[ $SYNTHETIC -eq 1 ]]; then
### Small sizes: ref:1000, tar:1000
./goose-fastqsimulation -eh -eo -es -edb -rn 2 -ri 5 -ra 15 -rm 0 -s 101 \
-ls 50 -n 5 -f 0.2,0.3,0.3,0.2,0.0 r_a
./goose-fastqsimulation -eh -eo -es -edb -rn 1 -ri 5 -ra 20 -rm 0 -s 68 \
-ls 50 -n 5 -f 0.25,0.25,0.25,0.25,0.0 r_b
./goose-fastqsimulation -eh -eo -es -edb -rn 3 -ri 5 -ra 10 -rm 0 -s 3 \
-ls 50 -n 5 -f 0.25,0.25,0.25,0.25,0.0 r_c
./goose-fastqsimulation -eh -eo -es -edb -rn 1 -ri 5 -ra 20 -rm 0 -s 1785 \
-ls 50 -n 5 -f 0.3,0.2,0.2,0.3,0.0 r_d
cat r_a r_b r_c r_d > refs
# ### Small sizes: ref:1000, tar:1000
# # ./goose-fastqsimulation -eh -eo -es -edb -rn 2 -ri 5 -ra 15 -rm 0 -s 101 \
# # -ls 50 -n 5 -f 0.2,0.3,0.3,0.2,0.0 r_a
# # ./goose-fastqsimulation -eh -eo -es -edb -rn 1 -ri 5 -ra 20 -rm 0 -s 68 \
# # -ls 50 -n 5 -f 0.25,0.25,0.25,0.25,0.0 r_b
# # ./goose-fastqsimulation -eh -eo -es -edb -rn 3 -ri 5 -ra 10 -rm 0 -s 3 \
# # -ls 50 -n 5 -f 0.25,0.25,0.25,0.25,0.0 r_c
# # ./goose-fastqsimulation -eh -eo -es -edb -rn 1 -ri 5 -ra 20 -rm 0 -s 1785 \
# # -ls 50 -n 5 -f 0.3,0.2,0.2,0.3,0.0 r_d
# ./goose-fastqsimulation -eh -eo -es -edb -rm 0 -s 201 -ls 50 -n 5 \
# -f 0.2,0.3,0.3,0.2,0.0 r_a
# ./goose-fastqsimulation -eh -eo -es -edb -rm 0 -s 58 -ls 50 -n 5 \
# -f 0.25,0.25,0.25,0.25,0.0 r_b
# ./goose-fastqsimulation -eh -eo -es -edb -rm 0 -s 3 -ls 50 -n 5 \
# -f 0.25,0.25,0.25,0.25,0.0 r_c
# ./goose-fastqsimulation -eh -eo -es -edb -rm 0 -s 1785 -ls 50 -n 5 \
# -f 0.3,0.2,0.2,0.3,0.0 r_d
# cat r_a r_b r_c r_d > refs

./goose-mutatedna -mr 0.00 < r_d > t_a
./goose-mutatedna -mr 0.01 < r_b > t_c
./goose-mutatedna -mr 0.02 < r_a > t_b
./goose-mutatedna -mr 0.05 < r_c > t_d
cat t_a t_b t_c t_d > tars
# cp r_a t_b
# cp r_b t_c
# cp r_c t_d
# cp r_d t_a
# # ./goose-mutatedna -mr 0.00 < r_d > t_a
# # ./goose-mutatedna -mr 0.01 < r_b > t_c
# # ./goose-mutatedna -mr 0.02 < r_a > t_b
# # ./goose-mutatedna -mr 0.05 < r_c > t_d
# cat t_a t_b t_c t_d > tars

### Medium sizes: ref:100000, tar:100000
./goose-fastqsimulation -eh -eo -es -edb -rn 2 -ri 50 -ra 500 -rm 0 -s 1001 \
-ls 100 -n 250 -f 0.25,0.25,0.25,0.25,0.0 r_a
./goose-fastqsimulation -eh -eo -es -edb -rn 1 -ri 50 -ra 500 -rm 0 -s 608 \
-ls 100 -n 250 -f 0.3,0.2,0.2,0.3,0.0 r_b
./goose-fastqsimulation -eh -eo -es -edb -rn 3 -ri 50 -ra 500 -rm 0 -s 30 \
-ls 100 -n 250 -f 0.2,0.3,0.3,0.2,0.0 r_c
./goose-fastqsimulation -eh -eo -es -edb -rn 1 -ri 50 -ra 500 -rm 0 -s 17805 \
-ls 100 -n 250 -f 0.25,0.25,0.25,0.25,0.0 r_d
./goose-fastqsimulation -eh -eo -es -edb -rm 0 -s 1001 -ls 100 -n 250 \
-f 0.25,0.25,0.25,0.25,0.0 r_a
./goose-fastqsimulation -eh -eo -es -edb -rm 0 -s 608 -ls 100 -n 250 \
-f 0.3,0.2,0.2,0.3,0.0 r_b
./goose-fastqsimulation -eh -eo -es -edb -rm 0 -s 30 -ls 100 -n 250 \
-f 0.2,0.3,0.3,0.2,0.0 r_c
./goose-fastqsimulation -eh -eo -es -edb -rm 0 -s 17805 -ls 100 -n 250 \
-f 0.25,0.25,0.25,0.25,0.0 r_d
cat r_a r_b r_c r_d > refm

./goose-mutatedna -mr 0.00 < r_d > t_a
./goose-mutatedna -mr 0.00 < r_a > t_d
./smashpp-inv-rep < r_b > t_c
./goose-mutatedna -mr 0.01 < r_c > t_b
./goose-mutatedna -mr 0.02 < r_b > t_c
./goose-mutatedna -mr 0.05 < r_a > t_d
./goose-mutatedna -mr 0.00 < r_d > t_a
cat t_a t_b t_c t_d > tarm

### Large sizes: ref:5000000, tar:5000000
./goose-fastqsimulation -eh -eo -es -edb -rn 2 -ri 500 -ra 1000 -rm 0 \
-ls 100 -n 12500 -f 0.3,0.2,0.3,0.2,0.0 -s 10101 r_a
./goose-fastqsimulation -eh -eo -es -edb -rn 1 -ri 500 -ra 1000 -rm 0 \
-ls 100 -n 12500 -f 0.3,0.2,0.2,0.3,0.0 -s 6018 r_b
./goose-fastqsimulation -eh -eo -es -edb -rn 3 -ri 500 -ra 1000 -rm 0 \
-ls 100 -n 12500 -f 0.25,0.25,0.25,0.25,0.0 -s 10 r_c
./goose-fastqsimulation -eh -eo -es -edb -rn 1 -ri 500 -ra 1000 -rm 0 \
-ls 100 -n 12500 -f 0.25,0.25,0.25,0.25,0.0 -s 7 r_d
cat r_a r_b r_c r_d > refl
# ### Large sizes: ref:5000000, tar:5000000
# ./goose-fastqsimulation -eh -eo -es -edb -rn 2 -ri 500 -ra 1000 -rm 0 \
# -ls 100 -n 12500 -f 0.3,0.2,0.3,0.2,0.0 -s 10101 r_a
# ./goose-fastqsimulation -eh -eo -es -edb -rn 1 -ri 500 -ra 1000 -rm 0 \
# -ls 100 -n 12500 -f 0.3,0.2,0.2,0.3,0.0 -s 6018 r_b
# ./goose-fastqsimulation -eh -eo -es -edb -rn 3 -ri 500 -ra 1000 -rm 0 \
# -ls 100 -n 12500 -f 0.25,0.25,0.25,0.25,0.0 -s 10 r_c
# ./goose-fastqsimulation -eh -eo -es -edb -rn 1 -ri 500 -ra 1000 -rm 0 \
# -ls 100 -n 12500 -f 0.25,0.25,0.25,0.25,0.0 -s 7 r_d
# cat r_a r_b r_c r_d > refl

./goose-mutatedna -mr 0.00 < r_c > t_a
./goose-mutatedna -mr 0.01 < r_b > t_b
./goose-mutatedna -mr 0.02 < r_a > t_c
./goose-mutatedna -mr 0.05 < r_d > t_d
cat t_a t_b t_c t_d > tarl
# ./goose-mutatedna -mr 0.00 < r_c > t_a
# ./goose-mutatedna -mr 0.01 < r_b > t_b
# ./goose-mutatedna -mr 0.02 < r_a > t_c
# ./goose-mutatedna -mr 0.05 < r_d > t_d
# cat t_a t_b t_c t_d > tarl

rm r_* t_*
fi

### Get real dataset
#if [[ $REAL -eq 1 ]]; then
#fi
#fi
Binary file added smashpp-inv-rep
Binary file not shown.
2 changes: 1 addition & 1 deletion smashpp.REMOVED.git-id
Original file line number Diff line number Diff line change
@@ -1 +1 @@
8e1251bf63b02eee2bfc1ff20993845a5f985318
60f94b22879cfff5a2a3e71ed9d4078a923567b7
16 changes: 16 additions & 0 deletions src/filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,22 @@ void Filter::aggregate_final_pos (const string& ref, const string& tar) const {
midf0.close(); remove(midf0Name.c_str());
finf.close();
}
else if (midf0IsEmpty && !midf1IsEmpty) {
ifstream midf1(midf1Name);
ofstream finf(gen_name(ref, tar, Format::POSITION));

finf << POS_HDR <<'\t'<< file_name(ref) <<'\t'<< to_string(file_size(ref))
<<'\t'<< file_name(tar) <<'\t'<< to_string(file_size(tar));
finf << '\n';

const u64 size = file_size(midf1Name);
vector<char> buffer(size, 0);
midf1.read (buffer.data(), size);
finf.write(buffer.data(), size);

midf1.close(); remove(midf1Name.c_str());
finf.close();
}
else {
ifstream midf0(midf0Name), midf1(midf1Name);
ofstream finf(gen_name(ref, tar, Format::POSITION));
Expand Down
48 changes: 48 additions & 0 deletions src/invRep.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#include <iostream>
#include <fstream>
#include <vector>
#include <algorithm>
using namespace std;

// a,A->84(T) c,C->71(G) g,G->67(C) t,T->65(A) n,N->78(N)
static constexpr char REV[123] {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // #20
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0,'T', 0,'G', 0, 0, 0,'C', 0, 0, 0, 0, 0, 0,'N', 0,
0, 0, 0, 0,'A', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'T', 0,'G',
0, 0, 0,'C', 0, 0, 0, 0, 0, 0,'N', 0, 0, 0, 0, 0,'A', 0, 0, 0,
0, 0, 0
};

int main (int argc, char* argv[]) {
const string inFileName = argv[1];
const string outFileName = argv[2];
size_t size;
{
ifstream inFile(inFileName, ifstream::ate | ifstream::binary);
size = inFile.tellg();
inFile.close();
}

ifstream inFile(inFileName);
vector<char> buffer(size, 0);
inFile.read(buffer.data(), size);
inFile.close();

// reverse(buffer.begin(), buffer.end());

// // for (auto& c : buffer)
// // c = REV[static_cast<unsigned char>(c)];

// for (auto it=buffer.data();it<buffer.data()+size;++it)
// *it = REV[static_cast<unsigned char>(*it)];



ofstream outFile(outFileName);
outFile.write(buffer.data(), size);
outFile.close();

return 0;
}
1 change: 1 addition & 0 deletions tarm

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion tarm.REMOVED.git-id

This file was deleted.

2 changes: 1 addition & 1 deletion tars
Original file line number Diff line number Diff line change
@@ -1 +1 @@
CATGTTCTTTAACCAAACGGTAAGTCCGTAGAACTACAACAAGCGTGGAAGCGAGGCATTGATTTTCCGCCAAAGTCTACATTTAGCCGACACCTAAATTTAACTATATATCAGTCAACACTGATGTAATCATGCTGCTCCGTCATTCAATAGTTCAGGAGTGACTAAGGGGTACCCAGTTGTCCTTCCCGACAATACATCTAGCACTTATTTCCTACACCTAAATTTAACATTGTACGCCCATTTTTCTTATTTGATGACGCATCGTAAGGACCAGGCGTCGGAGCAGCCAAGCTTTTAGAACACGAGCGGTAGTCCGACCTGCGGGGGCCGGGTTCTGGCCGGTGTACTTCGGTTCTCGGGTGAGCAGGCCCAGCAGCCCCAGCACGCTCTGAACGAAGGCAGTCCCAGCTCGCTTACCATGTCCGAACGACCACAAAGGGACGCAGCACGTGGGTGTTACGTGCTCAAGGTGGGGCCTGTGGGACCGACTGTGACGGAGAGATCCTATCATTAGGAGGAGTTGTCTTCTCGGCGTTCATTAGTCCCCAGGAAGGATTAGACTGATACTGGGTCTCCGCGCATGGCGCCTTCATAAGAAAACGTTTAAGAGCAGTCTAGGCCCATAATAATAGGAGGCGAGACGGAACCGCTCTCCACAGAAGAACATTACGGGGCTTAAGCTTGCCTGCCGGATGCTAGTAACAAACAGGAACGTGAACTTACATGCGTAGTCTTCACGTTGACATGACAGTTACACACAGATGGAATTAAGCCTAAAAGATGATTACACCGGCAACTGAAATGTTACGTTCTGGTATTGGGTTCATTTCATGTGCAGTGCGACCGCAAAATTCCTTAAACCGAGGGTGGCCTGACCTGCTGCTTGGGTTTAACCGAGTTGGGGACCTAGTAAAGTGCAGGCCTCTATCTGAGTTGGCGCAGGCGTAACGCACCTCCTATAATAACGGAGCAACTAAAACTCGGGTTCAGTTTGTAC
TACATGTTCTTTAACCAAACGGTAAGTCCGTAGAACTACAACAAGCGTGGAAGCGAGGCATTGATTTTCCGCCAAAGTCTACATTTAGCCGACACCTAAATCTAACTATATATCAGTCAACACTGATGTAATCATGCTGCTCCGTCATTCCGTAGTTCAGGAGTGACTAAGGGGTACCCAGTTGTCCTTCCCGACAATACAACTAGCACTTATTTCCTTAGAATTCACAGTCTAATTGTACGCCCATTTTGCGCCTTGATGCCGTTCGCGACGGATCAGACCCTGTTCCACACGGAGATTCCAGGGCGACCTTTTTCACGTACGCGGCCAGGGTCAGGGGTTGTTCGAGAGATCTTCATGTCGGCTACCATCCACTAGCTCTTACGTGTTCGATCGGGTCAACGGCGACCCCTCCCCCAAAACAGCCCCCCGCGCAGTACCGGAAACCCGCCTTGCGTCATGCGCTCGCCCATGCTTGTCCCTCACGGCTTCGCTACCGGGTTGATACACCGCTATGAATGAACCGAAGAAATTTATTCAATGGGTGCTTTCACCTAACATTGCGAAGATATTTACTCCACCGCGTCGAGTTGGCATCTAGTCAACTACTTATGAGTGTTGGTTTCTTTAACTCGAGCATAAAAGCGGTGCGCGTAATCACGGCAGGCACCCCGTTGCGCGATAGTATTCAAATGCCCGCTTGTGAACCCAAGGCGGTGCACGAGGTCTCGGAAGTCTAGACTGAACGATCAGTTAACAGTTACACACAGATGGATTTAAACCTAAAAGATGATTACACCCAGAGGTGAAATGTAACGTTCTGGTATTGGGTTCAGTTCATGTGCAGTGCACTGGGAAAATTCCTTAAACCGAGGGTGGCCTGACCTGCTGCTTGGGTTTTCACAGGTTGGGTACCCAGCAAAGTGCAGGCCTCTATCTGAGCTGGCATTTGGGTGCACGCACCTCCTATAATAACGGAGGAACTAAAACTCGCTCGACT

0 comments on commit 387d062

Please sign in to comment.