Skip to content

Commit

Permalink
Initial integration to be merged with production.
Browse files Browse the repository at this point in the history
  • Loading branch information
anonymous-271828 committed May 29, 2023
1 parent 266fb77 commit 52a2a4c
Show file tree
Hide file tree
Showing 24 changed files with 492 additions and 22 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "gennifer"]
path = gennifer
url = [email protected]:di2ag/gennifer.git
1 change: 1 addition & 0 deletions chp_api/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ RUN pip3 install --no-cache /wheels/*
COPY ./chp_api $APP_HOME/chp_api
COPY ./manage.py $APP_HOME
COPY ./dispatcher $APP_HOME/dispatcher
COPY ./gennifer $APP_HOME/gennifer
COPY ./gunicorn.config.py $APP_HOME

# chown all the files to the app user
Expand Down
3 changes: 3 additions & 0 deletions chp_api/chp_api/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .celery import app as celery_app

__all__ = ("celery_app",)
7 changes: 7 additions & 0 deletions chp_api/chp_api/celery.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import os
from celery import Celery

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "chp_api.settings")
app = Celery("chp_api")
app.config_from_object("django.conf:settings", namespace="CELERY")
app.autodiscover_tasks()
5 changes: 5 additions & 0 deletions chp_api/chp_api/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
'django.contrib.messages',
'django.contrib.staticfiles',
'rest_framework',
'django_filters',
'dispatcher.apps.DispatcherConfig',
'chp_utils',
'django_extensions',
Expand Down Expand Up @@ -173,3 +174,7 @@
os.environ["DJANGO_SUPERUSER_EMAIL"] = dse_file.readline().strip()
with open(env("DJANGO_SUPERUSER_PASSWORD_FILE"), 'r') as dsp_file:
os.environ["DJANGO_SUPERUSER_PASSWORD"] = dsp_file.readline().strip()

# Celery Settings
CELERY_BROKER_URL = os.environ.get("CELERY_BROKER_URL", "redis://localhost:6379")
CELERY_RESULT_BACKEND = os.environ.get("CELERY_RESULT_BACKEND", "redis://localhost:6379")
1 change: 1 addition & 0 deletions chp_api/chp_api/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
urlpatterns = [
path('admin/', admin.site.urls),
path('', include('dispatcher.urls')),
path('gennifer/', include('gennifer.urls')),
]


1 change: 1 addition & 0 deletions chp_api/dispatcher/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def load(cls):

class DispatcherSettings(Singleton):
trapi_version = models.CharField(max_length=28, default='1.4')
sri_node_normalizer_baseurl = models.URLField(max_length=128, default='https://nodenormalization-sri.renci.org')

def __str__(self):
return 'settings'
Empty file added chp_api/gennifer/__init__.py
Empty file.
3 changes: 3 additions & 0 deletions chp_api/gennifer/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from django.contrib import admin

# Register your models here.
6 changes: 6 additions & 0 deletions chp_api/gennifer/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from django.apps import AppConfig


class GenniferConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'gennifer'
Empty file.
64 changes: 64 additions & 0 deletions chp_api/gennifer/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from django.db import models
from django.contrib.auth.models import User


class Algorithm(models.Model):
name = models.CharField(max_length=128)
run_url = models.URLField(max_length=128)

def __str__(self):
return self.name


class Dataset(models.Model):
title = models.CharField(max_length=128)
zenodo_id = models.CharField(max_length=128, primary_key=True)
doi = models.CharField(max_length=128)
description = models.TextField(null=True, blank=True)
upload_user = models.ForeignKey(User, on_delete=models.CASCADE, null=True, blank=True)

def save(self, *args, **kwargs):
import re

CLEANR = re.compile('<.*?>')

info = self.get_record()
self.doi = info["doi"]
self.description = re.sub(CLEANR, '', infoi["metadata"]["description"])
self.title = re.sub(CLEANR, '', infoi["metadata"]["title"])

def get_record(self):
return requests.get(f"https://zenodo.org/api/records/{self.zenodo_id}").json()


class Gene(models.Model):
name = models.CharField(max_length=128)
curie = models.CharField(max_length=128)
variant = models.TextField(null=True, blank=True)

def __str__(self):
return self.name


class InferenceStudy(models.Model):
algorithm = models.ForeignKey(Algorithm, on_delete=models.CASCADE, related_name='studies')
user = models.ForeignKey(User, on_delete=models.CASCADE, null=True, blank=True, related_name='studies')
dataset = models.ForeignKey(Dataset, on_delete=models.CASCADE, related_name='studies')
timestamp = models.DateTimeField(auto_now_add=True)
# Study characteristics for all edge weights in a given study over a dataset
max_study_edge_weight = models.FloatField(null=True)
min_study_edge_weight = models.FloatField(null=True)
avg_study_edge_weight = models.FloatField(null=True)
std_study_edge_weight = models.FloatField(null=True)
is_public = models.BooleanField(default=False)
status = models.CharField(max_length=10)
error_message = models.TextField(null=True, blank=True)

class InferenceResult(models.Model):
# Stands for transcription factor
tf = models.ForeignKey(Gene, on_delete=models.CASCADE)
# Target is the gene that is regulated by the transcription factor
target = models.ForeignKey(Gene, on_delete=models.CASCADE)
edge_weight = models.FloatField()
study = models.ForeignKey(InferenceStudy, on_delete=models.CASCADE, related_name='results')
is_public = models.BooleanField(default=False)
35 changes: 35 additions & 0 deletions chp_api/gennifer/serializers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from rest_framework import serializers

from .models import Dataset, InferenceStudy, InferenceResult

class DatasetSerializer(serializers.ModelSerializer):
class Meta:
model = Dataset
fields = ['name', 'zenodo_id', 'doi', 'description']


class InferenceStudySerializer(serializers.ModelSerializer):
class Meta:
model = InferenceStudy
fields = [
'algorithm',
'user',
'dataset',
'timestamp',
'max_study_edge_weight',
'min_study_edge_weight',
'avg_study_edge_weight',
'std_study_edge_weight',
'is_public',
]

class InferenceResultSerializer(serializers.ModelSerializer):
class Meta:
model = InferenceResult
fields = [
'tf',
'target',
'edge_weight',
'study',
'is_public',
]
129 changes: 129 additions & 0 deletions chp_api/gennifer/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
import os
import time
import pandas as pd
import requests

from celery import shared_task

from .models import Dataset, Gene, InferenceStudy, InferenceResult
from dispacter.models import DispatcherSettings

def normalize_nodes(curies):
dispatcher_settings = DispatcherSettings.load()
base_url = dispatcher_settings.sri_node_normalizer_baseurl
return requests.post(
f'{base_url}/get_normalized_nodes',
json={"curies": curies}
)

def extract_variant_info(gene_id):
split = gene_id.split('(')
gene_id = split[0]
if len(split) > 1:
variant_info = split[1][:-1]
else:
variant_info = None
return gene_id, variant_info

def save_inference_study(study, status, failed=False):
study.status = status["task_status"]
if failed:
study.message = status["task_result"]
else:
# Construct Dataframe from result
df = pd.DataFrame.from_records(status["task_result"])

# Add study edge weight features
stats = df["EdgeWeight"].astype(float).describe()
study.max_study_edge_weight = stats["max"]
study.min_study_edge_weight = stats["min"]
study.avg_study_edge_weight = stats["mean"]
study.std_study_edge_weight = stats["std"]

# Collect all genes
genes = set()
for _, row in df.iterrows():
gene1, _ = extract_variant_info(row["Gene1"])
gene2, _ = extract_variant_info(row["Gene2"])
genes.add(gene1)
genes.add(gene2)

# Normalize
res = normalize_nodes(list(genes))

# Now Extract results
for _, row in df.iterrows():
# Construct Gene Objects
gene1, variant_info1 = extract_variant_info(row["Gene1"])
gene2, variant_info2 = extract_variant_info(row["Gene2"])
gene1_obj, created = Gene.objects.get_or_create(
name=res[gene1]["id"]["label"],
curie=gene1,
variant=variant_info1,
)
if created:
gene1_obj.save()
gene2_obj, created = Gene.objects.get_or_create(
name=res[gene2]["id"]["label"],
curie=gene2,
variant=variant_info2,
)
if created:
gene2_obj.save()
# Construct and save Result
result = InferenceResult.objects.create(
tf=gene1_obj,
target=gene2_obj,
edge_weight=row["EdgeWeight"],
study=study,
)
result.save()
study.save()
return True

def get_status(algo, task_id):
return requests.get(f'{algo.url}/status/{task_id}').json()

@shared_task(name="create_gennifer_task")
def create_task(algorithm, zenodo_id, hyperparameters, user):
# Initialize dataset instance
dataset, created = Dataset.objects.get_or_create(
zenodo_id=zenodo_id,
upload_user=user,
)
if created:
dataset.save()

# Send to gennifer app
gennifer_request = {
"zenodo_id": zenodo_id,
"hyperparameters": hyperparameters,
}
task["task_id"] = requests.post(f'{algo.url}/run', data=gennifer_request)

# Get initial status
status = get_status(algo, task["task_id"])

# Create Inference Study
study = InferenceStudy.objects.create(
algorithm=algo,
user=user,
dataset=dataset,
status=status["task_status"],
)
# Save initial study
study.save()

# Enter a loop to keep checking back in and populate the study once it has completed.
#TODO: Not sure if this is best practice
while True:
# Check in every 2 seconds
time.sleep(2)
status = get_status(algo, task["task_id"])
if status["task_status"] == 'SUCCESS':
return save_inference_study(study, status)
if status["task_status"] == "FAILURE":
return save_inference_study(study, status, failed=True)
if status["task_status"] != study.status:
study.status = status["task_status"]
study.save()
3 changes: 3 additions & 0 deletions chp_api/gennifer/tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from django.test import TestCase

# Create your tests here.
15 changes: 15 additions & 0 deletions chp_api/gennifer/urls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from django.urls import path, include
from rest_framework.routers import DefaultRouter

from . import views

# Create router and register viewsets
router = DefaultRouter()
router.register(r'datasets', views.DatasetViewSet, basename='dataset')
router.register(r'inference_studies', views.InferenceStudyViewSet, basename='inference_study')
router.register(r'inference_results', views.InferenceResultViewSet, basename='inference_result')

urlpatterns = [
path('', include(router.urls)),
path('run', views.run.as_view()),
]
76 changes: 76 additions & 0 deletions chp_api/gennifer/views.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import requests

from django.http import HttpResponse, JsonResponse
from django.shortcuts import get_object_or_404
from django.core.exceptions import ObjectDoesNotExist

from rest_framework import viewsets
from rest_framework.views import APIView
from rest_framework.requests import Response
from rest_framework.permissions import IsAuthenticated
from django_filters.rest_framework import DjangoFilterBackend

from .models import Dataset, InferenceStudy, InferenceResult, Algorithm
from .serializers import DatasetSerializer, InferenceStudySerializer, InferenceResultSerializer
from .tasks import create_task

class DatasetViewSet(viewsets.ModelViewSet):
queryset = Dataset.objects.all()
serializer_class = DatasetSerializer
filter_backend = [DjangoFilterBackend]
filterset_fields = ['user', 'zenodo_id']
permission_classes = [IsAuthenticated]


class InferenceStudyViewSet(viewsets.ModelViewSet):
serializer_class = InferenceStudySerializer
filter_backend = [DjangoFilterBackend]
filterset_fields = ['is_public', 'dataset', 'algorithm']
permission_classes = [IsAuthenticated]

def get_queryset(self):
user = self.request.user
return InferenceStudy.objects.filter(user=user)


class InferenceResultViewSet(viewsets.ModelViewSet):
serializer_class = InferenceResultSerializer
filter_backend = [DjangoFilterBackend]
filterset_fields = ['is_public', 'study']
permission_classes = [IsAuthenticated]

def get_queryset(self):
user = self.request.user
return InferenceResult.objects.filter(user=user)


class run(APIView):

def post(self, request):
""" Request comes in as a list of algorithms to run.
"""
# Build gennifer requests
tasks = request.data['tasks']
response = []
for task in tasks:
algorithm_name = task.get("algorithm_name", None)
zenodo_id = task.get("zenodo_id", None)
hyperparameters = task.get("hyperparameters", None)
if not algorithm_name:
task["error"] = "No algorithm name provided."
response.append(task)
continue
if not zenodo_id:
task["error"] = "No dataset Zenodo identifer provided."
response.append(task)
continue
try:
algo = Algorithm.objects.get(name=algorithm_name)
except ObjectDoesNotExist:
task["error"] = f"The algorithm: {algorithm_name} is not supported in Gennifer."
response.append(task)
continue
# If all pass, now send to gennifer services
task["task_id"] = create_task.delay(algo, zenodo_id, hyperparameters, request.user).id
response.append(task)
return Response(response)
Loading

0 comments on commit 52a2a4c

Please sign in to comment.