Skip to content

Commit

Permalink
add material views
Browse files Browse the repository at this point in the history
  • Loading branch information
Dylan Hoogduin authored and Dylan Hoogduin committed Apr 17, 2020
1 parent 81ed102 commit 86a6877
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 1 deletion.
39 changes: 39 additions & 0 deletions MaterialViews.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import sqlalchemy


class MaterialViews():

def __init__(self, db):
self.db = db
self.run_mv_transcript()

def run_mv_transcript(self):
with open('mviews/transcript_mv.sql') as file:
query = file.read()
self.prepare(query)
result = self.db.execute("SELECT id FROM tissue")
tissues = [item for item in result]
for tissue in tissues:
tissue_id = tissue[0]
query = "SELECT gene, tissue, avg(count) from transcript "
query += "WHERE tissue = {tissue} ".format(tissue=tissue_id)
query += "GROUP BY gene "
query += "ORDER BY avg(count) DESC "
query += "LIMIT 100 "
result = self.db.execute(query)
for item in result:
query = "INSERT INTO transcript_mv (gene, tissue, count_avg) "
query += "VALUES ({x[0]}, {x[1]}, {x[2]})".format(x=list(item))
self.db.execute(query)
self.db.close()

def prepare(self, file):
sql_command = ''
for line in file:
if not line.startswith('--') and line.strip('\n'):
sql_command += line.strip('\n')
if sql_command.endswith(';'):
try:
self.db.execute(sqlalchemy.text(sql_command))
finally:
sql_command = ''
9 changes: 8 additions & 1 deletion Seeder.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os, time, sys
from Database import db
from MaterialViews import MaterialViews
from Merger import GeneAliasRetriever
from sqlalchemy.exc import SQLAlchemyError
from dotenv import load_dotenv
Expand Down Expand Up @@ -150,6 +151,9 @@ def insert_count(self, stage_file, tissue_file):
print('--- All counts inserted! ---')
self.connection.close()

def run_mv(self):
self.connect_to_db(self.database)
MaterialViews(self.connection)

if __name__ == '__main__':
DB_DIALECT = os.getenv("DB_DIALECT")
Expand All @@ -163,6 +167,7 @@ def insert_count(self, stage_file, tissue_file):
INSERT_REF = os.getenv("INSERT_REF")
INSERT_COUNT = os.getenv("INSERT_COUNT")
CORRECT_GENES = os.getenv("CORRECT_GENES")
RUN_MV = os.getenv("RUN_MV")
seeder = Seeder(
dialect=DB_DIALECT,
driver=DB_DRIVER,
Expand All @@ -178,4 +183,6 @@ def insert_count(self, stage_file, tissue_file):
if INSERT_COUNT == "true":
seeder.insert_count(stage_file='datasets/stage.csv', tissue_file='datasets/tissue.csv')
if CORRECT_GENES == "true":
seeder.correct_genes("updated_genes.txt")
seeder.correct_genes("updated_genes.txt")
if RUN_MV == "true":
seeder.run_mv()
7 changes: 7 additions & 0 deletions mviews/transcript_mv.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
DROP TABLE IF EXISTS transcript_mv;
CREATE TABLE transcript_mv (
gene INT NOT NULL
, tissue INT NOT NULL
, count_avg DECIMAL(10,2) NOT NULL
, UNIQUE INDEX product (gene, tissue)
);

0 comments on commit 86a6877

Please sign in to comment.