-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Initial integration to be merged with production.
- Loading branch information
1 parent
266fb77
commit 52a2a4c
Showing
24 changed files
with
492 additions
and
22 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
[submodule "gennifer"] | ||
path = gennifer | ||
url = [email protected]:di2ag/gennifer.git |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from .celery import app as celery_app | ||
|
||
__all__ = ("celery_app",) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
import os | ||
from celery import Celery | ||
|
||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "chp_api.settings") | ||
app = Celery("chp_api") | ||
app.config_from_object("django.conf:settings", namespace="CELERY") | ||
app.autodiscover_tasks() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from django.contrib import admin | ||
|
||
# Register your models here. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
from django.apps import AppConfig | ||
|
||
|
||
class GenniferConfig(AppConfig): | ||
default_auto_field = 'django.db.models.BigAutoField' | ||
name = 'gennifer' |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
from django.db import models | ||
from django.contrib.auth.models import User | ||
|
||
|
||
class Algorithm(models.Model): | ||
name = models.CharField(max_length=128) | ||
run_url = models.URLField(max_length=128) | ||
|
||
def __str__(self): | ||
return self.name | ||
|
||
|
||
class Dataset(models.Model): | ||
title = models.CharField(max_length=128) | ||
zenodo_id = models.CharField(max_length=128, primary_key=True) | ||
doi = models.CharField(max_length=128) | ||
description = models.TextField(null=True, blank=True) | ||
upload_user = models.ForeignKey(User, on_delete=models.CASCADE, null=True, blank=True) | ||
|
||
def save(self, *args, **kwargs): | ||
import re | ||
|
||
CLEANR = re.compile('<.*?>') | ||
|
||
info = self.get_record() | ||
self.doi = info["doi"] | ||
self.description = re.sub(CLEANR, '', infoi["metadata"]["description"]) | ||
self.title = re.sub(CLEANR, '', infoi["metadata"]["title"]) | ||
|
||
def get_record(self): | ||
return requests.get(f"https://zenodo.org/api/records/{self.zenodo_id}").json() | ||
|
||
|
||
class Gene(models.Model): | ||
name = models.CharField(max_length=128) | ||
curie = models.CharField(max_length=128) | ||
variant = models.TextField(null=True, blank=True) | ||
|
||
def __str__(self): | ||
return self.name | ||
|
||
|
||
class InferenceStudy(models.Model): | ||
algorithm = models.ForeignKey(Algorithm, on_delete=models.CASCADE, related_name='studies') | ||
user = models.ForeignKey(User, on_delete=models.CASCADE, null=True, blank=True, related_name='studies') | ||
dataset = models.ForeignKey(Dataset, on_delete=models.CASCADE, related_name='studies') | ||
timestamp = models.DateTimeField(auto_now_add=True) | ||
# Study characteristics for all edge weights in a given study over a dataset | ||
max_study_edge_weight = models.FloatField(null=True) | ||
min_study_edge_weight = models.FloatField(null=True) | ||
avg_study_edge_weight = models.FloatField(null=True) | ||
std_study_edge_weight = models.FloatField(null=True) | ||
is_public = models.BooleanField(default=False) | ||
status = models.CharField(max_length=10) | ||
error_message = models.TextField(null=True, blank=True) | ||
|
||
class InferenceResult(models.Model): | ||
# Stands for transcription factor | ||
tf = models.ForeignKey(Gene, on_delete=models.CASCADE) | ||
# Target is the gene that is regulated by the transcription factor | ||
target = models.ForeignKey(Gene, on_delete=models.CASCADE) | ||
edge_weight = models.FloatField() | ||
study = models.ForeignKey(InferenceStudy, on_delete=models.CASCADE, related_name='results') | ||
is_public = models.BooleanField(default=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
from rest_framework import serializers | ||
|
||
from .models import Dataset, InferenceStudy, InferenceResult | ||
|
||
class DatasetSerializer(serializers.ModelSerializer): | ||
class Meta: | ||
model = Dataset | ||
fields = ['name', 'zenodo_id', 'doi', 'description'] | ||
|
||
|
||
class InferenceStudySerializer(serializers.ModelSerializer): | ||
class Meta: | ||
model = InferenceStudy | ||
fields = [ | ||
'algorithm', | ||
'user', | ||
'dataset', | ||
'timestamp', | ||
'max_study_edge_weight', | ||
'min_study_edge_weight', | ||
'avg_study_edge_weight', | ||
'std_study_edge_weight', | ||
'is_public', | ||
] | ||
|
||
class InferenceResultSerializer(serializers.ModelSerializer): | ||
class Meta: | ||
model = InferenceResult | ||
fields = [ | ||
'tf', | ||
'target', | ||
'edge_weight', | ||
'study', | ||
'is_public', | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
import os | ||
import time | ||
import pandas as pd | ||
import requests | ||
|
||
from celery import shared_task | ||
|
||
from .models import Dataset, Gene, InferenceStudy, InferenceResult | ||
from dispacter.models import DispatcherSettings | ||
|
||
def normalize_nodes(curies): | ||
dispatcher_settings = DispatcherSettings.load() | ||
base_url = dispatcher_settings.sri_node_normalizer_baseurl | ||
return requests.post( | ||
f'{base_url}/get_normalized_nodes', | ||
json={"curies": curies} | ||
) | ||
|
||
def extract_variant_info(gene_id): | ||
split = gene_id.split('(') | ||
gene_id = split[0] | ||
if len(split) > 1: | ||
variant_info = split[1][:-1] | ||
else: | ||
variant_info = None | ||
return gene_id, variant_info | ||
|
||
def save_inference_study(study, status, failed=False): | ||
study.status = status["task_status"] | ||
if failed: | ||
study.message = status["task_result"] | ||
else: | ||
# Construct Dataframe from result | ||
df = pd.DataFrame.from_records(status["task_result"]) | ||
|
||
# Add study edge weight features | ||
stats = df["EdgeWeight"].astype(float).describe() | ||
study.max_study_edge_weight = stats["max"] | ||
study.min_study_edge_weight = stats["min"] | ||
study.avg_study_edge_weight = stats["mean"] | ||
study.std_study_edge_weight = stats["std"] | ||
|
||
# Collect all genes | ||
genes = set() | ||
for _, row in df.iterrows(): | ||
gene1, _ = extract_variant_info(row["Gene1"]) | ||
gene2, _ = extract_variant_info(row["Gene2"]) | ||
genes.add(gene1) | ||
genes.add(gene2) | ||
|
||
# Normalize | ||
res = normalize_nodes(list(genes)) | ||
|
||
# Now Extract results | ||
for _, row in df.iterrows(): | ||
# Construct Gene Objects | ||
gene1, variant_info1 = extract_variant_info(row["Gene1"]) | ||
gene2, variant_info2 = extract_variant_info(row["Gene2"]) | ||
gene1_obj, created = Gene.objects.get_or_create( | ||
name=res[gene1]["id"]["label"], | ||
curie=gene1, | ||
variant=variant_info1, | ||
) | ||
if created: | ||
gene1_obj.save() | ||
gene2_obj, created = Gene.objects.get_or_create( | ||
name=res[gene2]["id"]["label"], | ||
curie=gene2, | ||
variant=variant_info2, | ||
) | ||
if created: | ||
gene2_obj.save() | ||
# Construct and save Result | ||
result = InferenceResult.objects.create( | ||
tf=gene1_obj, | ||
target=gene2_obj, | ||
edge_weight=row["EdgeWeight"], | ||
study=study, | ||
) | ||
result.save() | ||
study.save() | ||
return True | ||
|
||
def get_status(algo, task_id): | ||
return requests.get(f'{algo.url}/status/{task_id}').json() | ||
|
||
@shared_task(name="create_gennifer_task") | ||
def create_task(algorithm, zenodo_id, hyperparameters, user): | ||
# Initialize dataset instance | ||
dataset, created = Dataset.objects.get_or_create( | ||
zenodo_id=zenodo_id, | ||
upload_user=user, | ||
) | ||
if created: | ||
dataset.save() | ||
|
||
# Send to gennifer app | ||
gennifer_request = { | ||
"zenodo_id": zenodo_id, | ||
"hyperparameters": hyperparameters, | ||
} | ||
task["task_id"] = requests.post(f'{algo.url}/run', data=gennifer_request) | ||
|
||
# Get initial status | ||
status = get_status(algo, task["task_id"]) | ||
|
||
# Create Inference Study | ||
study = InferenceStudy.objects.create( | ||
algorithm=algo, | ||
user=user, | ||
dataset=dataset, | ||
status=status["task_status"], | ||
) | ||
# Save initial study | ||
study.save() | ||
|
||
# Enter a loop to keep checking back in and populate the study once it has completed. | ||
#TODO: Not sure if this is best practice | ||
while True: | ||
# Check in every 2 seconds | ||
time.sleep(2) | ||
status = get_status(algo, task["task_id"]) | ||
if status["task_status"] == 'SUCCESS': | ||
return save_inference_study(study, status) | ||
if status["task_status"] == "FAILURE": | ||
return save_inference_study(study, status, failed=True) | ||
if status["task_status"] != study.status: | ||
study.status = status["task_status"] | ||
study.save() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from django.test import TestCase | ||
|
||
# Create your tests here. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
from django.urls import path, include | ||
from rest_framework.routers import DefaultRouter | ||
|
||
from . import views | ||
|
||
# Create router and register viewsets | ||
router = DefaultRouter() | ||
router.register(r'datasets', views.DatasetViewSet, basename='dataset') | ||
router.register(r'inference_studies', views.InferenceStudyViewSet, basename='inference_study') | ||
router.register(r'inference_results', views.InferenceResultViewSet, basename='inference_result') | ||
|
||
urlpatterns = [ | ||
path('', include(router.urls)), | ||
path('run', views.run.as_view()), | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
import requests | ||
|
||
from django.http import HttpResponse, JsonResponse | ||
from django.shortcuts import get_object_or_404 | ||
from django.core.exceptions import ObjectDoesNotExist | ||
|
||
from rest_framework import viewsets | ||
from rest_framework.views import APIView | ||
from rest_framework.requests import Response | ||
from rest_framework.permissions import IsAuthenticated | ||
from django_filters.rest_framework import DjangoFilterBackend | ||
|
||
from .models import Dataset, InferenceStudy, InferenceResult, Algorithm | ||
from .serializers import DatasetSerializer, InferenceStudySerializer, InferenceResultSerializer | ||
from .tasks import create_task | ||
|
||
class DatasetViewSet(viewsets.ModelViewSet): | ||
queryset = Dataset.objects.all() | ||
serializer_class = DatasetSerializer | ||
filter_backend = [DjangoFilterBackend] | ||
filterset_fields = ['user', 'zenodo_id'] | ||
permission_classes = [IsAuthenticated] | ||
|
||
|
||
class InferenceStudyViewSet(viewsets.ModelViewSet): | ||
serializer_class = InferenceStudySerializer | ||
filter_backend = [DjangoFilterBackend] | ||
filterset_fields = ['is_public', 'dataset', 'algorithm'] | ||
permission_classes = [IsAuthenticated] | ||
|
||
def get_queryset(self): | ||
user = self.request.user | ||
return InferenceStudy.objects.filter(user=user) | ||
|
||
|
||
class InferenceResultViewSet(viewsets.ModelViewSet): | ||
serializer_class = InferenceResultSerializer | ||
filter_backend = [DjangoFilterBackend] | ||
filterset_fields = ['is_public', 'study'] | ||
permission_classes = [IsAuthenticated] | ||
|
||
def get_queryset(self): | ||
user = self.request.user | ||
return InferenceResult.objects.filter(user=user) | ||
|
||
|
||
class run(APIView): | ||
|
||
def post(self, request): | ||
""" Request comes in as a list of algorithms to run. | ||
""" | ||
# Build gennifer requests | ||
tasks = request.data['tasks'] | ||
response = [] | ||
for task in tasks: | ||
algorithm_name = task.get("algorithm_name", None) | ||
zenodo_id = task.get("zenodo_id", None) | ||
hyperparameters = task.get("hyperparameters", None) | ||
if not algorithm_name: | ||
task["error"] = "No algorithm name provided." | ||
response.append(task) | ||
continue | ||
if not zenodo_id: | ||
task["error"] = "No dataset Zenodo identifer provided." | ||
response.append(task) | ||
continue | ||
try: | ||
algo = Algorithm.objects.get(name=algorithm_name) | ||
except ObjectDoesNotExist: | ||
task["error"] = f"The algorithm: {algorithm_name} is not supported in Gennifer." | ||
response.append(task) | ||
continue | ||
# If all pass, now send to gennifer services | ||
task["task_id"] = create_task.delay(algo, zenodo_id, hyperparameters, request.user).id | ||
response.append(task) | ||
return Response(response) |
Oops, something went wrong.