-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
16 changed files
with
133 additions
and
163 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,54 +1,44 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import os | ||
from py2neo import Graph | ||
from pandas import DataFrame | ||
from pathlib import Path | ||
import glob | ||
import sys | ||
import sqlite3 | ||
import subprocess | ||
from pandas import DataFrame | ||
import json | ||
from timeit import default_timer as timer | ||
|
||
os.system('echo "dbms.security.auth_enabled=false" >> /var/lib/neo4j/conf/neo4j.conf') | ||
os.system('neo4j start') | ||
os.system('sleep 20') | ||
os.system("neo4j start") | ||
os.system("sleep 20") | ||
|
||
from py2neo import Graph | ||
import yaml | ||
graph = Graph("bolt://localhost:7687") | ||
|
||
for file in glob.glob("/mnt/*.cypher"): | ||
print("Running query: " + file) | ||
query = open(file).read() | ||
df = DataFrame(graph.run(query).data()) | ||
df.to_csv("/out/" + os.path.basename(file).split(".")[0]+".csv", index=False) | ||
|
||
os.system('neo4j stop') | ||
os.system('sleep 20') | ||
|
||
|
||
os.system('rm -f /out/materialised_queries.sqlite') | ||
con = sqlite3.connect("/out/materialised_queries.sqlite") | ||
cur = con.cursor() | ||
|
||
cur.execute("CREATE TABLE metadata(json BLOB);") | ||
for file in glob.glob("/mnt/*.json"): | ||
cur.execute("INSERT INTO metadata VALUES (?);", (open(file).read(),)) | ||
|
||
for file in glob.glob("/out/*.csv"): | ||
table_name = os.path.basename(file).split(".")[0] | ||
with open(file) as f: | ||
header = f.readline() | ||
columns = header.split(",") | ||
cur.execute("CREATE TABLE " | ||
+ table_name | ||
+ " (" + ", ".join([c + " TEXT" for c in columns]) + ");") | ||
|
||
con.close() | ||
|
||
for file in glob.glob("/out/*.csv"): | ||
table_name = os.path.basename(file).split(".")[0] | ||
result = subprocess.run(['sqlite3', | ||
'/out/materialised_queries.sqlite', | ||
'-cmd', | ||
'.mode csv', | ||
'.import --skip 1 ' + file + ' ' + table_name], | ||
capture_output=True) | ||
for file in os.listdir("/materialised_queries"): | ||
if not file.endswith(".yaml"): | ||
continue | ||
|
||
query_id = Path(file).stem | ||
|
||
query = yaml.safe_load(open(f"/materialised_queries/{file}")) | ||
|
||
start_time = timer() | ||
|
||
print(f"Running query {query_id}") | ||
df = DataFrame(graph.run(query['cypher_query']).data()) | ||
|
||
end_time = timer() | ||
|
||
query['start_time'] = start_time | ||
query['end_time'] = end_time | ||
query['time'] = end_time - start_time | ||
|
||
print(f"Saving {len(df)} rows to {Path(f'/out/{query_id}.csv.gz')}") | ||
df.to_csv(Path(f"/out/{query_id}.csv.gz"), index=False, compression="gzip") | ||
|
||
with open(f"/out/{query_id}.json", "w") as f: | ||
json.dump(query, f) | ||
|
||
os.system("sleep 20") | ||
os.system("neo4j stop") | ||
os.system("sleep 20") | ||
|
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
29 changes: 29 additions & 0 deletions
29
dataload/materialised_queries/impc_x_gwas/impc_x_gwas.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
title: Human GWAS variants to mouse models in IMPC | ||
description: Connects human GWAS variants to mouse models in IMPC using multiple different graph paths through phenotype and disease ontologies | ||
uses_datasources: | ||
- IMPC | ||
- GWAS | ||
- OLS.mp | ||
- OLS.hp | ||
- OLS.upheno | ||
- OLS.efo | ||
- OLS.mondo | ||
- OLS.doid | ||
- OLS.oba | ||
cypher_query: |- | ||
MATCH (snp:`gwas:SNP`)-[:`gwas:associated_with`]->(trait) | ||
MATCH (trait)<-[:`upheno:phenotypeToTrait`]-(speciesNeutralPhenotype) | ||
MATCH (speciesNeutralPhenotype)<-[:`biolink:broad_match`]-(descendantPhenotype)-[:sourceId]->(descendantSourceId) | ||
WHERE "OLS.mp" IN descendantPhenotype.`grebi:datasources` | ||
MATCH (descendantPhenotype)<-[:`impc:phenotype`]-(mouseGene) | ||
RETURN "gwas->oba->upheno->mp->impc" AS graph_path, | ||
[id in snp.id WHERE id =~ "rs[0-9]*" | id][0] AS gwas_variant, | ||
[id in trait.id WHERE id =~ "oba:.*" | id][0] AS trait_id, | ||
trait.`grebi:name`[0] as trait_name, | ||
[id in speciesNeutralPhenotype.id WHERE id =~ "upheno:[0-9]*" | id][0] AS species_neutral_phenotype_id, | ||
speciesNeutralPhenotype.`grebi:name`[0] AS species_neutral_phenotype_name, | ||
[id in descendantPhenotype.id WHERE id =~ "mp:[0-9]*" | id][0] AS mouse_phenotype, | ||
descendantPhenotype.`grebi:name`[0] AS mouse_phenotype_name, | ||
mouseGene.`grebi:name`[0] AS mouse_gene_name, | ||
[id in mouseGene.id WHERE id =~ "mgi:[0-9]*" | id][0] AS mouse_gene_id | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
10 changes: 0 additions & 10 deletions
10
dataload/queries/genes_to_diseases_ranked_by_otar_score.cypher
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters