-
Notifications
You must be signed in to change notification settings - Fork 4
/
anchor_junction.rb
48 lines (41 loc) · 1.4 KB
/
anchor_junction.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#/project/itmatlab/aligner_benchmark/dataset/human/dataset_t3r1/anchor/cig
# fixed.sam
sam_file = ARGV[0]
dataset = ARGV[1]
species = ARGV[2]
readnames_by_group = {}
d = "/project/itmatlab/aligner_benchmark/dataset/#{species}/dataset_#{dataset}/junction/*cig"
puts d
# This yields full path and file name
Dir[d].each do |fn|
File.open(fn).each do |l|
readnames_by_group[l.split("\t")[0]] = fn.split("/")[-1]
end
end
# puts readnames_by_group
files = {}
filenames = []
unique = readnames_by_group.values.uniq
unique.each do |names|
puts names
filenames << "#{sam_file}_#{names}"
files[names] = File.open("#{sam_file}_#{names}", "w")
end
puts "files: #{files}"
puts "filenames: #{filenames.join("\t")}"
File.open(sam_file).each do |line|
name = line.split("\t")[0]
if readnames_by_group[name]
files[readnames_by_group[name]].puts line
end
end
files.each_value do |f|
f.close()
end
Dir["/project/itmatlab/aligner_benchmark/dataset/#{species}/dataset_#{dataset}/junction/*cig"].each do |fn|
ind = filenames.index {|x| x =~ /#{fn.split("/")[-1]}$/}
puts ind
`sort -t'.' -k 2n #{filenames[ind]} > #{filenames[ind]}_s`
`ruby #{File.expand_path(File.dirname(__FILE__))}/compare2truth_multi_mappers.rb -s #{fn} #{filenames[ind]}_s > #{filenames[ind]}_comp_res_multi_mappers.txt`
`ruby #{File.expand_path(File.dirname(__FILE__))}/compare2truth.rb -s #{fn} #{filenames[ind]}_s > #{filenames[ind]}_comp_res.txt`
end