-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjust_cancer.py
110 lines (91 loc) · 3.4 KB
/
just_cancer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
from oakvar import BasePostAggregator
from pathlib import Path
import sqlite3
class CravatPostAggregator (BasePostAggregator):
genes:set[str] = set()
significance_filter:list[str] = [
"Affects, risk factor",
"Benign, risk factor",
"Conflicting interpretations of pathogenicity",
"Conflicting interpretations of pathogenicity, other",
"Conflicting interpretations of pathogenicity, other, risk factor",
"Conflicting interpretations of pathogenicity, risk factor",
"Likely pathogenic",
"Pathogenic",
"Pathogenic, protective",
"Uncertain significance",
"association, risk factor",
"protective, risk factor",
"risk factor"
]
col_index:int = 0
def check(self):
return True
def setup (self):
with open(str(Path(__file__).parent)+"/data/genes.txt") as f:
self.genes = set(f.read().split("\n"))
self.result_path:Path = Path(self.output_dir, self.run_name + "_longevity.sqlite")
self.longevity_conn:Connection = sqlite3.connect(self.result_path)
self.longevity_cursor:Cursor = self.longevity_conn.cursor()
sql_create:str = """ CREATE TABLE IF NOT EXISTS cancer (
id integer NOT NULL PRIMARY KEY,
chrom text,
pos text,
gene text,
rsid text,
cdnachange text,
zegot text,
alelfreq text,
phenotype text,
significance text,
ncbi text
)"""
self.longevity_cursor.execute(sql_create)
self.longevity_cursor.execute("DELETE FROM cancer;")
self.longevity_conn.commit()
def cleanup (self):
if self.longevity_cursor is not None:
self.longevity_cursor.close()
if self.longevity_conn is not None:
self.longevity_conn.commit()
self.longevity_conn.close()
return
def annotate (self, input_data):
significance:str = input_data['clinvar__sig']
if significance not in self.significance_filter:
return
gene:str = input_data['base__hugo']
if gene not in self.genes:
return
isOk:bool = False
omim_id:str = input_data['omim__omim_id']
if omim_id is not None and omim_id != '':
isOk = True
ncbi_desc:str = input_data['ncbigene__ncbi_desc']
if ncbi_desc is not None and ncbi_desc != '':
isOk = True
clinvar_id:str = input_data['clinvar__id']
if clinvar_id is not None and clinvar_id != '':
isOk = True
pubmed_n:str = input_data['pubmed__n']
if pubmed_n is not None and pubmed_n != '':
isOk = True
if not isOk:
return
sql:str = """ INSERT INTO cancer (
chrom,
pos,
gene,
rsid,
cdnachange,
zegot,
alelfreq,
phenotype,
significance,
ncbi
) VALUES (?,?,?,?,?,?,?,?,?,?) """
task:tuple[str] = (input_data['base__chrom'], input_data['base__pos'], gene,
input_data['dbsnp__rsid'], input_data['base__cchange'],
input_data['vcfinfo__zygosity'], input_data['gnomad__af'],
input_data['clinvar__disease_names'], significance, ncbi_desc)
self.longevity_cursor.execute(sql, task)