-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDrugTargetPairRetriever.py
95 lines (72 loc) · 3.83 KB
/
DrugTargetPairRetriever.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# Return a set of dictionaries providing drug->gene and gene->drug pairs
# If we don't have them locally then go to www.drugbank.ca/ to pull them
import sys
import urllib
import zipfile
import csv
import sqlite3
import ProjectDefinitions
#all of the drug information, more than we need now
#zipped_filename = data_directory + "drugbank.xml.zip"
#filename = data_directory + "drugbank.xml"
#source_url = "http://www.drugbank.ca/system/downloads/current/drugbank.xml.zip"
#all known targets of drugs
zipped_filename = ProjectDefinitions.drugbankdir + "all_target_ids_all.csv.zip"
filename = ProjectDefinitions.drugbankdir + "all_target_ids_all.csv"
source_url = "http://www.drugbank.ca/system/downloads/current/all_target_ids_all.csv.zip"
def drug_main():
drug_data = getDrugData()
return drug_data
def getDrugData():
#Download the data if it isn't present
try:
if ProjectDefinitions.DEBUG:
sys.stdout.write("trying to open yeast data file at: " + filename + " ")
f_ptr = open(filename, "r")
if ProjectDefinitions.DEBUG:
print "File opened successfully!"
f_ptr.close()
except IOError:
print "WARNING: Couldn't find the file, downloading the compressed version..."
print "source: " + source_url
print "destination: " + zipped_filename
#download from previously known good loction
urllib.urlretrieve(source_url, zipped_filename)
if ProjectDefinitions.DEBUG:
sys.stdout.write("Download completed, decompressing...")
#unzip the results
zf_ptr = zipfile.ZipFile(zipped_filename)
zipfile.ZipFile.extractall(zf_ptr,ProjectDefinitions.drugbankdir)
if ProjectDefinitions.DEBUG:
print "file decompressed!"
fileToSQLite();
# return the full contents of the fasta file
return "test";
#from the original drug data some of the drugs are genetic modifiers, and some are physical modifiers
#this function will separate physiological effects from genetic modifications
def fileToSQLite():
print "trying to convert file to a sqlite database..."
conn = sqlite3.connect(ProjectDefinitions.data_directory+ProjectDefinitions.drugbank_db_name)
curs = conn.cursor()
curs.execute("DROP TABLE IF EXISTS 'drugs'");
curs.execute("CREATE TABLE drugs (ID INTEGER PRIMARY KEY,Name TEXT,Gene_Name TEXT ,GenBank_Protein_ID INTEGER,GenBank_Gene_ID TEXT,UniProt_ID TEXT,Uniprot_Title TEXT ,PDB_ID TEXT,GeneCard_ID TEXT,GenAtlas_ID TEXT,HGNC_ID TEXT,HPRD_ID INTEGER,Species_Category TEXT,Species TEXT,Drug_IDs TEXT, Yeast_ID TEXT);")
#curs.execute("CREATE TABLE drugs (geneName TEXT PRIMARY KEY, type INTEGER, term TEXT, definition TEXT);")
print "database created, reading in..."
with open(filename, 'rU') as fin:
dr = csv.DictReader(fin)
to_db = [(i['ID'], i['Name'], i['Gene Name'], i['GenBank Protein ID'], i['GenBank Gene ID'],
i['UniProt ID'], i['Uniprot Title'], i['PDB ID'], i['GeneCard ID'], i['GenAtlas ID'],
i['HGNC ID'], i['HPRD ID'], i['Species Category'], i['Species'], i['Drug IDs']) for i in dr]
curs.executemany("INSERT INTO drugs (ID,Name,Gene_Name,GenBank_Protein_ID,GenBank_Gene_ID,UniProt_ID,Uniprot_Title ,PDB_ID,GeneCard_ID,GenAtlas_ID,HGNC_ID,HPRD_ID,Species_Category,Species,Drug_IDs) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);", to_db)
conn.commit()
curs.execute("SELECT Gene_Name FROM drugs WHERE Drug_IDs LIKE '%DB00157%'")
while True:
row = curs.fetchone()
if row == None:
print "done printing drug results!"
break
print row[0]
# Standard boilerplate to call the main() function to begin
# the program. This also ensures that the main isn't called when used as a module
if __name__ == '__main__':
drug_main()