From c5d073a0df6c354f08127913281861d4b32daa32 Mon Sep 17 00:00:00 2001 From: James McLaughlin Date: Wed, 4 Dec 2024 17:54:33 +0000 Subject: [PATCH] add gwas_x_impc notebook --- notebooks/gwas_x_impc.ipynb | 75 +++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 notebooks/gwas_x_impc.ipynb diff --git a/notebooks/gwas_x_impc.ipynb b/notebooks/gwas_x_impc.ipynb new file mode 100644 index 0000000..b0d5aa4 --- /dev/null +++ b/notebooks/gwas_x_impc.ipynb @@ -0,0 +1,75 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "from pathlib import Path\n", + "from py2neo import Graph\n", + "from pandas import DataFrame\n", + "graph = Graph(\"bolt://localhost:7687\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "df = DataFrame(graph.run(\"\"\"\n", + "MATCH (snp:`gwas:SNP`)-[:`gwas:associated_with`]->(measurement)\n", + "MATCH (measurement)<-[:`upheno:phenotypeToTrait`]-(phenotype)\n", + "MATCH (phenotype)<-[:`biolink:broad_match`]-(descendantPhenotype)-[:sourceId]->(descendantSourceId)\n", + "WHERE \"OLS.mp\" IN descendantPhenotype.`grebi:datasources`\n", + "MATCH (descendantPhenotype)<-[:`impc:phenotype`]-(mouseGene)\n", + "WHERE \"IMPC\" IN mouseGene.`grebi:datasources`\n", + "RETURN DISTINCT [id in snp.id WHERE id =~ \"rs[0-9]*\" | id][0] AS gwas_variant,\n", + "[id in measurement.id WHERE id =~ \"oba:.*\" | id][0] AS measurement_id,\n", + "measurement.`grebi:name`[0] as measurement_name,\n", + "[id in phenotype.id WHERE id =~ \"upheno:[0-9]*\" | id][0] AS species_neutral_phenotype_id,\n", + "phenotype.`grebi:name`[0] AS species_neutral_phenotype_name,\n", + "[id in descendantPhenotype.id WHERE id =~ \"mp:[0-9]*\" | id][0] AS mouse_phenotype,\n", + "descendantPhenotype.`grebi:name`[0] AS mouse_phenotype_name,\n", + "mouseGene.`grebi:name`[0] AS mouse_gene_name,\n", + "[id in mouseGene.id WHERE id =~ \"mgi:[0-9]*\" | id][0] AS mouse_gene_id\n", + "\"\"\").data())\n", + "\n", + "#print(df.head(5).to_markdown())\n", + "df.to_csv(\"gwas_to_impc.csv\", index=False)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}