forked from nsmro/synphoni
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalysis_intervening_genes.py
72 lines (66 loc) · 2.8 KB
/
analysis_intervening_genes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/usr/bin/env python3
import argparse
import csv
import textwrap
import synphoni.utils as su
from synphoni.logo import logo_ASCII
parser = argparse.ArgumentParser(formatter_class = argparse.RawDescriptionHelpFormatter,
description = textwrap.dedent(f"""\
{logo_ASCII()}
Analysis script of the SYNPHONI (detection of ancestral SYNteny based on PHylogeny and Ortholog Network Inference) pipeline:
Summarizes the number of intervening between consecutive block genes
"""))
parser.add_argument("-c", "--chrom_data",
help = "pickle file of the chromdata, generated by step 1",
type = str,
required = True)
parser.add_argument("-sy", "--synt_file",
help = "synt file, generated by step 4",
required = True,
type = str)
parser.add_argument("-ms", "--multi_sp_file",
help = "multi species file, generated by step 4",
required = True,
type = str)
parser.add_argument("-o", "--output",
help = "Prefix of the output tsv file",
type = str,
required = True)
args = parser.parse_args()
chrom_dict = su.load_chrom_data(filepath = args.chrom_data)
multi_sp_dict = {}
with open(args.multi_sp_file, "r") as f:
msp_reader = csv.reader(f, delimiter = "\t")
for row in msp_reader:
for block in row[1:]:
multi_sp_dict[block] = row[0]
output_file = f"{args.output}.tsv"
with open(args.synt_file, "r") as synt, open(output_file, "w") as out:
synt_reader = csv.reader(synt, delimiter = "\t")
tsv_writer = csv.writer(out, delimiter = "\t")
tsv_writer.writerow(("multi_sp",
"block_id",
"chromosome",
"species",
"acc_pair",
"para",
"intervening_genes"))
for row in synt_reader:
block_id = row[0]
multi_sp = multi_sp_dict[block_id]
species = row[1]
chrom = row[7].split(":")[0]
acc_ls = row[9].split(",")
og_ls = row[10].split(",")
for (acc1, acc2), (og1, og2) in zip(su.window(acc_ls), su.window(og_ls)):
pos1 = chrom_dict[og1][species][chrom][acc1][0]
pos2 = chrom_dict[og2][species][chrom][acc2][0]
nb_intervening = abs(pos2 - pos1) - 1
para = "para" if og1 == og2 else "not_para"
tsv_writer.writerow((multi_sp,
block_id,
chrom,
species,
f"{acc1},{acc2}",
para,
nb_intervening))