From c6ac2f01d4a577612efbe887308f322bc369f87a Mon Sep 17 00:00:00 2001 From: Dave Lawrence Date: Fri, 13 Sep 2024 17:32:31 +0930 Subject: [PATCH 1/7] issue #1171 - generate files for download --- analysis/tasks/analysis_grid_export_tasks.py | 74 ++++++++++++++++++++ analysis/views/views_grid.py | 37 ++++------ snpdb/graphs/graphcache.py | 18 +---- snpdb/models/models.py | 22 +++++- 4 files changed, 112 insertions(+), 39 deletions(-) create mode 100644 analysis/tasks/analysis_grid_export_tasks.py diff --git a/analysis/tasks/analysis_grid_export_tasks.py b/analysis/tasks/analysis_grid_export_tasks.py new file mode 100644 index 000000000..b3d24b731 --- /dev/null +++ b/analysis/tasks/analysis_grid_export_tasks.py @@ -0,0 +1,74 @@ +import logging +import os +import uuid + +import celery + +from django.conf import settings +from django.utils import timezone + +from analysis.analysis_templates import get_cohort_analysis, get_sample_analysis +from analysis.grid_export import node_grid_get_export_iterator +from analysis.models import AnalysisTemplate, SampleNode +from library.django_utils import FakeRequest +from library.guardian_utils import admin_bot +from library.utils import name_from_filename, sha256sum_str, mk_path_for_file +from snpdb.models import Cohort, Sample, CachedGeneratedFile + + +def get_grid_downloadable_file_params_hash(pk, export_type): + return sha256sum_str(f"{pk}-{export_type}") + + +def _write_cached_generated_file(cgf: CachedGeneratedFile, filename, file_iterator): + logging.info("Starting to write %s", filename) + media_root_filename = os.path.join(settings.MEDIA_ROOT, str(uuid.uuid4()), filename) + try: + mk_path_for_file(media_root_filename) + with open(media_root_filename, "w") as f: + for line in file_iterator(): + f.write(line) # Already has newline + cgf.filename = media_root_filename + cgf.generate_end = timezone.now() + logging.info("Wrote %s", media_root_filename) + except Exception as e: + logging.error("Failed to write %s: %s", media_root_filename, e) + cgf.exception = str(e) + cgf.save() + + +@celery.shared_task +def export_cohort_to_downloadable_file(cohort_id, export_type): + # This should have been created in analysis.views.views_grid.cohort_grid_export + params_hash = get_grid_downloadable_file_params_hash(cohort_id, export_type) + cgf = CachedGeneratedFile.objects.get(generator="export_cohort_to_downloadable_file", + params_hash=params_hash) + + cohort = Cohort.objects.get(pk=cohort_id) + analysis_template = AnalysisTemplate.get_template_from_setting("ANALYSIS_TEMPLATES_AUTO_COHORT_EXPORT") + analysis = get_cohort_analysis(cohort, analysis_template) + node = analysis.analysisnode_set.get_subclass(output_node=True) # Should only be 1 + basename = "_".join([name_from_filename(cohort.name), "annotated", f"v{analysis.annotation_version.pk}", + str(cohort.genome_build)]) + + request = FakeRequest(user=admin_bot()) + filename, file_iterator = node_grid_get_export_iterator(request, node, export_type, basename=basename) + _write_cached_generated_file(cgf, filename, file_iterator) + + +@celery.shared_task +def export_sample_to_downloadable_file(sample_id, export_type): + # This should have been created in analysis.views.views_grid.sample_grid_export + params_hash = get_grid_downloadable_file_params_hash(sample_id, export_type) + cgf = CachedGeneratedFile.objects.get(generator="export_sample_to_downloadable_file", + params_hash=params_hash) + + sample = Sample.objects.get(pk=sample_id) + analysis_template = AnalysisTemplate.get_template_from_setting("ANALYSIS_TEMPLATES_AUTO_SAMPLE") + analysis = get_sample_analysis(sample, analysis_template) + node = SampleNode.objects.get(analysis=analysis, output_node=True) # Should only be 1 + basename = "_".join([name_from_filename(sample.name), "annotated", f"v{analysis.annotation_version.pk}", + str(sample.genome_build)]) + request = FakeRequest(user=admin_bot()) + filename, file_iterator = node_grid_get_export_iterator(request, node, export_type, basename=basename) + _write_cached_generated_file(cgf, filename, file_iterator) diff --git a/analysis/views/views_grid.py b/analysis/views/views_grid.py index c14d5ffaf..e1b1f9bd8 100644 --- a/analysis/views/views_grid.py +++ b/analysis/views/views_grid.py @@ -4,6 +4,7 @@ from django.contrib.postgres.aggregates.general import StringAgg from django.core.cache import cache +from django.http import JsonResponse from django.http.response import StreamingHttpResponse, HttpResponseRedirect from django.urls import reverse from django.utils.decorators import method_decorator @@ -11,14 +12,13 @@ from django.views.decorators.vary import vary_on_cookie from analysis import grids -from analysis.analysis_templates import get_sample_analysis, get_cohort_analysis from analysis.grid_export import node_grid_get_export_iterator -from analysis.models import AnalysisNode, AnalysisTemplate, SampleNode +from analysis.models import AnalysisNode +from analysis.tasks.analysis_grid_export_tasks import export_cohort_to_downloadable_file, export_sample_to_downloadable_file, get_grid_downloadable_file_params_hash from analysis.views.analysis_permissions import get_node_subclass_or_non_fatal_exception from analysis.views.node_json_view import NodeJSONGetView, NodeJSONViewMixin from library.constants import WEEK_SECS -from library.utils import name_from_filename -from snpdb.models import Sample, Cohort +from snpdb.models import Sample, Cohort, CachedGeneratedFile from snpdb.models.models_variant import Variant _NODE_GRID_ALLOWED_PARAMS = { @@ -111,35 +111,26 @@ def _get_data(self, request, node, **kwargs): def cohort_grid_export(request, cohort_id, export_type): EXPORT_TYPES = {"csv", "vcf"} - - cohort = Cohort.get_for_user(request.user, cohort_id) + Cohort.get_for_user(request.user, cohort_id) # Permission check if export_type not in EXPORT_TYPES: raise ValueError(f"{export_type} must be one of: {EXPORT_TYPES}") - analysis_template = AnalysisTemplate.get_template_from_setting("ANALYSIS_TEMPLATES_AUTO_COHORT_EXPORT") - analysis = get_cohort_analysis(cohort, analysis_template) - node = analysis.analysisnode_set.get_subclass(output_node=True) # Should only be 1 - basename = "_".join([name_from_filename(cohort.name), "annotated", f"v{analysis.annotation_version.pk}", - str(cohort.genome_build)]) - - filename, file_iterator = node_grid_get_export_iterator(request, node, export_type, basename=basename) - return _get_streaming_response(filename, file_iterator) + params_hash = get_grid_downloadable_file_params_hash(cohort_id, export_type) + task = export_cohort_to_downloadable_file.si(cohort_id, export_type) + cgf = CachedGeneratedFile.get_or_create_and_launch("export_cohort_to_downloadable_file", params_hash, task) + return JsonResponse({"celery_task": cgf.task_id}) def sample_grid_export(request, sample_id, export_type): EXPORT_TYPES = {"csv", "vcf"} - - sample = Sample.get_for_user(request.user, sample_id) + Sample.get_for_user(request.user, sample_id) # Permission check if export_type not in EXPORT_TYPES: raise ValueError(f"{export_type} must be one of: {EXPORT_TYPES}") - analysis_template = AnalysisTemplate.get_template_from_setting("ANALYSIS_TEMPLATES_AUTO_SAMPLE") - analysis = get_sample_analysis(sample, analysis_template) - node = SampleNode.objects.get(analysis=analysis, output_node=True) # Should only be 1 - basename = "_".join([name_from_filename(sample.name), "annotated", f"v{analysis.annotation_version.pk}", - str(sample.genome_build)]) - filename, file_iterator = node_grid_get_export_iterator(request, node, export_type, basename=basename) - return _get_streaming_response(filename, file_iterator) + params_hash = get_grid_downloadable_file_params_hash(sample_id, export_type) + task = export_sample_to_downloadable_file.si(sample_id, export_type) + cgf = CachedGeneratedFile.get_or_create_and_launch("export_sample_to_downloadable_file", params_hash, task) + return JsonResponse({"celery_task": cgf.task_id}) def node_grid_export(request, analysis_id): diff --git a/snpdb/graphs/graphcache.py b/snpdb/graphs/graphcache.py index 2cbe974c7..e9317a785 100644 --- a/snpdb/graphs/graphcache.py +++ b/snpdb/graphs/graphcache.py @@ -2,6 +2,7 @@ import logging import os +from celery import signature from celery.result import AsyncResult from django.conf import settings from django.utils import timezone @@ -37,18 +38,5 @@ def async_graph(graph_class_name, *args): generator = cacheablegraph.get_name() params_hash = cacheablegraph.get_params_hash() - cached_graph, created = CachedGeneratedFile.objects.get_or_create(generator=generator, - params_hash=params_hash) - if created or not cached_graph.task_id: - logging.debug("Launching Celery Job for graph: generator=%s, params_hash=%s", generator, params_hash) - async_result = generate_graph.delay(graph_class_name, *args) # @UndefinedVariable - cached_graph.task_id = async_result.id - cached_graph.generate_start = timezone.now() - cached_graph.save() - else: - async_result = AsyncResult(cached_graph.task_id) - - if async_result.result: - cached_graph.save_from_async_result(async_result) - - return cached_graph + task: signature = generate_graph.si(graph_class_name, *args) + return CachedGeneratedFile.get_or_create_and_launch(generator, params_hash, task) diff --git a/snpdb/models/models.py b/snpdb/models/models.py index 034bd7a36..2685c9e41 100644 --- a/snpdb/models/models.py +++ b/snpdb/models/models.py @@ -15,6 +15,7 @@ from re import RegexFlag from typing import TypedDict, Optional +from celery import signature from celery.result import AsyncResult from django.conf import settings from django.contrib.auth.models import User, Group @@ -52,7 +53,7 @@ class CachedGeneratedFile(models.Model): filename = models.TextField(null=True) exception = models.TextField(null=True) generator = models.TextField() - params_hash = models.TextField() # sha256 of params used to generate graph + params_hash = models.TextField() # sha256 of params task_id = models.CharField(max_length=36, null=True) task_status = models.TextField(null=True) # TODO: what's the actual size? generate_start = models.DateTimeField(null=True) @@ -68,6 +69,25 @@ def __str__(self): description = f"task: {self.task_id} sent: {self.generate_start}" return f"{self.generator}({self.params_hash}): {description}" + @staticmethod + def get_or_create_and_launch(generator, params_hash, task: signature) -> 'CachedGeneratedFile': + cgf, created = CachedGeneratedFile.objects.get_or_create(generator=generator, + params_hash=params_hash) + if created or not cgf.task_id: + logging.debug("Launching Celery Job for CachedGeneratedFile(generator=%s, params_hash=%s)", + generator, params_hash) + async_result = task.apply_async() + cgf.task_id = async_result.id + cgf.generate_start = timezone.now() + cgf.save() + else: + async_result = AsyncResult(cgf.task_id) + + if async_result.result: + cgf.save_from_async_result(async_result) + + return cgf + def save_from_async_result(self, async_result): self.task_status = async_result.status self.generate_end = timezone.now() From 4f3d973ec0eb6c41e10209532ec3873131efbe01 Mon Sep 17 00:00:00 2001 From: Dave Lawrence Date: Tue, 17 Sep 2024 12:10:23 +0930 Subject: [PATCH 2/7] issue #1171 - generated file download --- analysis/tasks/analysis_grid_export_tasks.py | 2 + analysis/views/views.py | 4 +- analysis/views/views_grid.py | 3 +- library/django_utils/__init__.py | 8 +++ pedigree/views.py | 4 +- seqauto/views.py | 8 +-- snpdb/graphs/graphcache.py | 3 - snpdb/models/models.py | 10 +++ snpdb/models/models_somalier.py | 7 +-- snpdb/templates/snpdb/data/view_vcf.html | 63 ++++++++++++++++++- snpdb/views/views.py | 27 ++++++-- snpdb/views/views_json.py | 6 +- .../default_static/js/generated_graphs.js | 15 +++++ 13 files changed, 131 insertions(+), 29 deletions(-) diff --git a/analysis/tasks/analysis_grid_export_tasks.py b/analysis/tasks/analysis_grid_export_tasks.py index b3d24b731..ee473c037 100644 --- a/analysis/tasks/analysis_grid_export_tasks.py +++ b/analysis/tasks/analysis_grid_export_tasks.py @@ -29,11 +29,13 @@ def _write_cached_generated_file(cgf: CachedGeneratedFile, filename, file_iterat for line in file_iterator(): f.write(line) # Already has newline cgf.filename = media_root_filename + cgf.task_status = "SUCCESS" cgf.generate_end = timezone.now() logging.info("Wrote %s", media_root_filename) except Exception as e: logging.error("Failed to write %s: %s", media_root_filename, e) cgf.exception = str(e) + cgf.task_status = "FAILURE" cgf.save() diff --git a/analysis/views/views.py b/analysis/views/views.py index bf9bacef6..1b34929ab 100644 --- a/analysis/views/views.py +++ b/analysis/views/views.py @@ -609,14 +609,14 @@ def node_graph(request, analysis_id, node_id, graph_type_id, cmap): get_node_subclass_or_404(request.user, node_id) # Permission check node_graph_type = NodeGraphType.objects.get(pk=graph_type_id) cached_graph = graphcache.async_graph(node_graph_type.graph_class, cmap, node_id) - return HttpResponseRedirect(reverse("cached_generated_file_check", kwargs={"cgf_id": cached_graph.id})) + return redirect(cached_graph) def column_summary_boxplot(request, analysis_id, node_id, label, variant_column): get_node_subclass_or_404(request.user, node_id) # Permission check graph_class_name = full_class_name(ColumnBoxplotGraph) cached_graph = graphcache.async_graph(graph_class_name, node_id, label, variant_column) - return HttpResponseRedirect(reverse("cached_generated_file_check", kwargs={"cgf_id": cached_graph.id})) + return redirect(cached_graph) def cohort_zygosity_filters(request, analysis_id, node_id, cohort_id): diff --git a/analysis/views/views_grid.py b/analysis/views/views_grid.py index e1b1f9bd8..1f5f4cb53 100644 --- a/analysis/views/views_grid.py +++ b/analysis/views/views_grid.py @@ -6,6 +6,7 @@ from django.core.cache import cache from django.http import JsonResponse from django.http.response import StreamingHttpResponse, HttpResponseRedirect +from django.shortcuts import redirect from django.urls import reverse from django.utils.decorators import method_decorator from django.views.decorators.cache import cache_page @@ -118,7 +119,7 @@ def cohort_grid_export(request, cohort_id, export_type): params_hash = get_grid_downloadable_file_params_hash(cohort_id, export_type) task = export_cohort_to_downloadable_file.si(cohort_id, export_type) cgf = CachedGeneratedFile.get_or_create_and_launch("export_cohort_to_downloadable_file", params_hash, task) - return JsonResponse({"celery_task": cgf.task_id}) + return redirect(cgf) def sample_grid_export(request, sample_id, export_type): diff --git a/library/django_utils/__init__.py b/library/django_utils/__init__.py index 346b4309b..703ef76d6 100644 --- a/library/django_utils/__init__.py +++ b/library/django_utils/__init__.py @@ -1,5 +1,6 @@ import datetime import operator +import os from functools import reduce from functools import wraps, partial @@ -39,6 +40,13 @@ def get_url_from_view_path(view_path): return f'{protocol}://{current_site.domain}{view_path}' +def get_url_from_media_root_filename(filename): + media_root_with_slash = os.path.join(settings.MEDIA_ROOT, "") + if not filename.startswith(media_root_with_slash): + raise ValueError(f"'{filename}' must start with MEDIA_ROOT: {media_root_with_slash}") + return os.path.join(settings.MEDIA_URL, filename[len(media_root_with_slash):]) + + def add_save_message(request, valid, name, created=False): action = "created" if created else "saved" diff --git a/pedigree/views.py b/pedigree/views.py index 13b2b812a..6858c463d 100644 --- a/pedigree/views.py +++ b/pedigree/views.py @@ -4,7 +4,7 @@ from django.forms.formsets import formset_factory from django.forms.models import ModelChoiceField from django.http.response import HttpResponseRedirect -from django.shortcuts import get_object_or_404, render +from django.shortcuts import get_object_or_404, render, redirect from django.urls.base import reverse from library.utils import full_class_name @@ -37,7 +37,7 @@ def pedigree_chart(request, ped_file_id): ped_file = PedFile.get_for_user(request.user, ped_file_id) # Make sure we can access it graph_class_name = full_class_name(PedigreeChart) cached_graph = graphcache.async_graph(graph_class_name, ped_file.pk) - return HttpResponseRedirect(reverse("cached_generated_file_check", kwargs={"cgf_id": cached_graph.id})) + return redirect(cached_graph) def ped_files(request): diff --git a/seqauto/views.py b/seqauto/views.py index 33682f3e2..d5a90494c 100644 --- a/seqauto/views.py +++ b/seqauto/views.py @@ -8,7 +8,7 @@ from django.core.exceptions import PermissionDenied from django.db.models.aggregates import Count from django.http.response import HttpResponseRedirect, JsonResponse, HttpResponse -from django.shortcuts import render, get_object_or_404 +from django.shortcuts import render, get_object_or_404, redirect from django.urls.base import reverse from django.utils.decorators import method_decorator from django.utils.safestring import mark_safe @@ -478,7 +478,7 @@ def sequencing_run_qc_graph(request, sequencing_run_id, qc_compare_type): _ = QCCompareType(qc_compare_type) # Check valid graph_class_name = full_class_name(SequencingRunQCGraph) cached_graph = graphcache.async_graph(graph_class_name, sequencing_run_id, qc_compare_type) - return HttpResponseRedirect(reverse("cached_generated_file_check", kwargs={"cgf_id": cached_graph.id})) + return redirect(cached_graph) def sequencing_run_qc_json_graph(request, sequencing_run_id, qc_compare_type): @@ -565,13 +565,13 @@ def get_field(f): def index_metrics_qc_graph(request, illumina_qc_id): graph_class_name = full_class_name(IndexMetricsQCGraph) cached_graph = graphcache.async_graph(graph_class_name, illumina_qc_id) - return HttpResponseRedirect(reverse("cached_generated_file_check", kwargs={"cgf_id": cached_graph.id})) + return redirect(cached_graph) def qc_exec_summary_graph(request, qc_exec_summary_id, qc_compare_type): graph_class_name = full_class_name(QCExecSummaryGraph) cached_graph = graphcache.async_graph(graph_class_name, qc_exec_summary_id, qc_compare_type) - return HttpResponseRedirect(reverse("cached_generated_file_check", kwargs={"cgf_id": cached_graph.id})) + return redirect(cached_graph) def qc_exec_summary_json_graph(request, qc_exec_summary_id, qc_compare_type): diff --git a/snpdb/graphs/graphcache.py b/snpdb/graphs/graphcache.py index e9317a785..7fb8505b3 100644 --- a/snpdb/graphs/graphcache.py +++ b/snpdb/graphs/graphcache.py @@ -1,11 +1,8 @@ import abc -import logging import os from celery import signature -from celery.result import AsyncResult from django.conf import settings -from django.utils import timezone from library.graphs import graph_base from library.utils import import_class diff --git a/snpdb/models/models.py b/snpdb/models/models.py index 2685c9e41..fa52c088a 100644 --- a/snpdb/models/models.py +++ b/snpdb/models/models.py @@ -10,6 +10,7 @@ import re from dataclasses import dataclass from datetime import datetime +from fileinput import filename from functools import cached_property, total_ordering from html import escape from re import RegexFlag @@ -33,6 +34,7 @@ from more_itertools import first from classification.enums.classification_enums import ShareLevel +from library.django_utils import get_url_from_media_root_filename from library.django_utils.django_object_managers import ObjectManagerCachingRequest from library.enums.log_level import LogLevel from library.preview_request import PreviewModelMixin @@ -69,6 +71,14 @@ def __str__(self): description = f"task: {self.task_id} sent: {self.generate_start}" return f"{self.generator}({self.params_hash}): {description}" + def get_absolute_url(self): + return reverse("cached_generated_file_check", kwargs={"cgf_id": self.pk}) + + def get_media_url(self): + if self.filename is None: + raise ValueError(f"{self}.filename is None") + return get_url_from_media_root_filename(self.filename) + @staticmethod def get_or_create_and_launch(generator, params_hash, task: signature) -> 'CachedGeneratedFile': cgf, created = CachedGeneratedFile.objects.get_or_create(generator=generator, diff --git a/snpdb/models/models_somalier.py b/snpdb/models/models_somalier.py index f31095fb2..b3008d224 100644 --- a/snpdb/models/models_somalier.py +++ b/snpdb/models/models_somalier.py @@ -14,6 +14,7 @@ from django_extensions.db.models import TimeStampedModel from model_utils.managers import InheritanceManager +from library.django_utils import get_url_from_media_root_filename from library.utils import execute_cmd from patients.models_enums import Sex from pedigree.ped.export_ped import write_unrelated_ped, write_trio_ped @@ -72,11 +73,7 @@ def sample_name_to_id(sample_name: str): @staticmethod def media_url(file_path): # Need to use a slash, so that later joins don't have absolute path - media_root_with_slash = os.path.join(settings.MEDIA_ROOT, "") - if not file_path.startswith(media_root_with_slash): - raise ValueError(f"'{file_path}' must start with MEDIA_ROOT: {media_root_with_slash}") - - return os.path.join(settings.MEDIA_URL, file_path[len(media_root_with_slash):]) + return get_url_from_media_root_filename(file_path) class SomalierVCFExtract(AbstractSomalierModel): diff --git a/snpdb/templates/snpdb/data/view_vcf.html b/snpdb/templates/snpdb/data/view_vcf.html index 7b67cefe5..a946f187b 100644 --- a/snpdb/templates/snpdb/data/view_vcf.html +++ b/snpdb/templates/snpdb/data/view_vcf.html @@ -10,6 +10,7 @@ {% block title %}{{ vcf.name }}{% endblock %} {% block head %} + - + + + + @@ -122,14 +141,13 @@

Sample: {{ sample.name }}

{% csrf_token %} {% crispy form form_helper.horizontal %} - {% if can_download_vcf %} + {% if annotated_download_files %}
- {% if can_download_annotated_vcf %} - - - {% endif %} + {% for file_type in annotated_download_files %} +
+ {% endfor %}
{% endif %} diff --git a/snpdb/templates/snpdb/data/view_vcf.html b/snpdb/templates/snpdb/data/view_vcf.html index 7cea6fd25..e233c16b5 100644 --- a/snpdb/templates/snpdb/data/view_vcf.html +++ b/snpdb/templates/snpdb/data/view_vcf.html @@ -10,7 +10,7 @@ {% block title %}{{ vcf.name }}{% endblock %} {% block head %} - +