From 86a687788c28b938d5baccc5f64428683f1c3359 Mon Sep 17 00:00:00 2001 From: Dylan Hoogduin Date: Fri, 17 Apr 2020 15:35:21 +0200 Subject: [PATCH] add material views --- MaterialViews.py | 39 +++++++++++++++++++++++++++++++++++++++ Seeder.py | 9 ++++++++- mviews/transcript_mv.sql | 7 +++++++ 3 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 MaterialViews.py create mode 100644 mviews/transcript_mv.sql diff --git a/MaterialViews.py b/MaterialViews.py new file mode 100644 index 0000000..548b652 --- /dev/null +++ b/MaterialViews.py @@ -0,0 +1,39 @@ +import sqlalchemy + + +class MaterialViews(): + + def __init__(self, db): + self.db = db + self.run_mv_transcript() + + def run_mv_transcript(self): + with open('mviews/transcript_mv.sql') as file: + query = file.read() + self.prepare(query) + result = self.db.execute("SELECT id FROM tissue") + tissues = [item for item in result] + for tissue in tissues: + tissue_id = tissue[0] + query = "SELECT gene, tissue, avg(count) from transcript " + query += "WHERE tissue = {tissue} ".format(tissue=tissue_id) + query += "GROUP BY gene " + query += "ORDER BY avg(count) DESC " + query += "LIMIT 100 " + result = self.db.execute(query) + for item in result: + query = "INSERT INTO transcript_mv (gene, tissue, count_avg) " + query += "VALUES ({x[0]}, {x[1]}, {x[2]})".format(x=list(item)) + self.db.execute(query) + self.db.close() + + def prepare(self, file): + sql_command = '' + for line in file: + if not line.startswith('--') and line.strip('\n'): + sql_command += line.strip('\n') + if sql_command.endswith(';'): + try: + self.db.execute(sqlalchemy.text(sql_command)) + finally: + sql_command = '' \ No newline at end of file diff --git a/Seeder.py b/Seeder.py index 6c3fc5a..044eb9f 100644 --- a/Seeder.py +++ b/Seeder.py @@ -1,5 +1,6 @@ import os, time, sys from Database import db +from MaterialViews import MaterialViews from Merger import GeneAliasRetriever from sqlalchemy.exc import SQLAlchemyError from dotenv import load_dotenv @@ -150,6 +151,9 @@ def insert_count(self, stage_file, tissue_file): print('--- All counts inserted! ---') self.connection.close() + def run_mv(self): + self.connect_to_db(self.database) + MaterialViews(self.connection) if __name__ == '__main__': DB_DIALECT = os.getenv("DB_DIALECT") @@ -163,6 +167,7 @@ def insert_count(self, stage_file, tissue_file): INSERT_REF = os.getenv("INSERT_REF") INSERT_COUNT = os.getenv("INSERT_COUNT") CORRECT_GENES = os.getenv("CORRECT_GENES") + RUN_MV = os.getenv("RUN_MV") seeder = Seeder( dialect=DB_DIALECT, driver=DB_DRIVER, @@ -178,4 +183,6 @@ def insert_count(self, stage_file, tissue_file): if INSERT_COUNT == "true": seeder.insert_count(stage_file='datasets/stage.csv', tissue_file='datasets/tissue.csv') if CORRECT_GENES == "true": - seeder.correct_genes("updated_genes.txt") \ No newline at end of file + seeder.correct_genes("updated_genes.txt") + if RUN_MV == "true": + seeder.run_mv() \ No newline at end of file diff --git a/mviews/transcript_mv.sql b/mviews/transcript_mv.sql new file mode 100644 index 0000000..ad18a2b --- /dev/null +++ b/mviews/transcript_mv.sql @@ -0,0 +1,7 @@ +DROP TABLE IF EXISTS transcript_mv; +CREATE TABLE transcript_mv ( + gene INT NOT NULL + , tissue INT NOT NULL + , count_avg DECIMAL(10,2) NOT NULL + , UNIQUE INDEX product (gene, tissue) +); \ No newline at end of file