forked from bowhan/piPipes
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpiPipes_insertBed_to_bed2.cpp
93 lines (87 loc) · 3.07 KB
/
piPipes_insertBed_to_bed2.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
/*
# piPipes, a set of pipelines for PIWI-interacting RNA (piRNA) and transposon analysis
# Copyright (C) 2014 Bo Han, Wei Wang, Zhiping Weng, Phillip Zamore
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <iostream>
#include <fstream>
#include <string>
#include <memory>
#include <deque>
#include <unordered_map>
using namespace std;
int main (int argc, char** argv) {
string usage=R"(
This program takes two inputs:
1. the insert file with two fields, first being the sequence and second being the number of times this seuqence been read;
2. a standard BED file produced by bowtie reading the 1st file as input.
and produces BED2 format, with 1-3 fields being name of chromosome, start and end, same as standard BED format.
But the 4th column is the number of times this sequence been read, 5th column being the number of times this sequence being
mapped. the 6th column is the sequence itself.
usage:
istBed2Bed2 input.insert mapped.bed > output.bed2
Please contact [email protected] for any questions or bugs.
)";
if (argc != 3) {
cerr << usage;
exit (1);
}
ifstream ist {argv[1]};
ifstream bed {argv[2]};
deque <int> istReads {};
deque <string> istSeq {};
string line {}, sequence {};
while (getline (ist, line)) {
auto iter = line.cbegin ();
while (*++iter!= '\t') ;
istSeq.emplace_back (line.cbegin(), iter);
istReads.emplace_back (stoi (string(++iter, line.cend())));
}
auto istSize = istReads.size ();
allocator<int> alloc;
int* istNTM = alloc.allocate (istSize);
int * p = istNTM;
while (p!=istNTM+istSize) {
*p++=0;
}
while (getline (bed, line)) {
auto iter1 = line.cbegin ();
while (*++iter1!='\t');
while (*++iter1!='\t');
while (*++iter1!='\t');
auto iter2 = ++iter1;
while (*++iter1!='\t');
int n = stoi (string {iter2, iter1});
// if (istSize < n) {
// cerr << "Error: column 4 of " << line << " is larger than the lines of insert file..." << endl;
// exit (1);
// }
++istNTM[n];
}
bed.clear ();
bed.seekg (0, ios::beg);
while (getline (bed, line)) {
auto iter1 = line.cbegin ();
while (*++iter1!='\t');
while (*++iter1!='\t');
while (*++iter1!='\t');
auto iter2 = ++iter1;
cout << string {line.cbegin(), iter1};
while (*++iter1!='\t');
int n = stoi (string {iter2, iter1});
while (*++iter1!='\t');
cout << istReads[n] << '\t' << istNTM[n] << '\t' << *(++iter1) << '\t' << istSeq[n] << '\n';
}
alloc.deallocate(istNTM, istSize);
}