diff --git a/analysis/grid_export.py b/analysis/grid_export.py index ea505c848..251baede5 100644 --- a/analysis/grid_export.py +++ b/analysis/grid_export.py @@ -92,6 +92,14 @@ def _grid_export_vcf(genome_build, colmodels, items, sample_ids, sample_names_by pseudo_buffer = StashFile() vcf_writer = Writer(pseudo_buffer, vcf_reader) + # Need to pass escapechar + vcf_writer.writer = csv.writer( + pseudo_buffer, + delimiter="\t", + lineterminator="\n", + quoting=csv.QUOTE_NONE, + escapechar="\\", + ) def iter_row_writer(): for obj in items: diff --git a/analysis/tasks/analysis_grid_export_tasks.py b/analysis/tasks/analysis_grid_export_tasks.py new file mode 100644 index 000000000..f380f2bf1 --- /dev/null +++ b/analysis/tasks/analysis_grid_export_tasks.py @@ -0,0 +1,123 @@ +import gzip +import logging +import os +import uuid +import zipfile +from typing import Optional + +import celery + +from django.conf import settings +from django.utils import timezone + +from analysis.analysis_templates import get_cohort_analysis, get_sample_analysis +from analysis.grid_export import node_grid_get_export_iterator +from analysis.models import AnalysisTemplate, SampleNode +from library.django_utils import FakeRequest +from library.guardian_utils import admin_bot +from library.utils import name_from_filename, sha256sum_str, mk_path_for_file +from snpdb.models import Cohort, Sample, CachedGeneratedFile + + +def get_annotated_download_files_cgf(generator, pk) -> dict[str, Optional[CachedGeneratedFile]]: + annotated_download_files = {} + try: + AnalysisTemplate.get_template_from_setting("ANALYSIS_TEMPLATES_AUTO_COHORT_EXPORT") + params_hash_vcf = get_grid_downloadable_file_params_hash(pk, "vcf") + cgf_vcf = CachedGeneratedFile.objects.filter(generator=generator, + params_hash=params_hash_vcf).first() + params_hash_csv = get_grid_downloadable_file_params_hash(pk, "csv") + cgf_csv = CachedGeneratedFile.objects.filter(generator=generator, + params_hash=params_hash_csv).first() + + annotated_download_files = {"vcf": cgf_vcf, "csv": cgf_csv} + except ValueError: + pass + + return annotated_download_files + + +def get_grid_downloadable_file_params_hash(pk, export_type): + return sha256sum_str(f"{pk}-{export_type}") + + +def update_cgf_progress_iterator(iterator, cgf_id, total_records, update_size): + update_size = int(update_size) # make sure int so modulus below will hit + cgf_qs = CachedGeneratedFile.objects.filter(id=cgf_id) + cgf_qs.update(progress=0) + + for i, record in enumerate(iterator): + if i % update_size == 0: + progress = i / total_records + cgf_qs.update(progress=progress) + yield record + cgf_qs.update(progress=1, task_status='SUCCESS') + + +def _write_node_to_cached_generated_file(cgf, analysis, node, name, export_type): + basename = "_".join([name_from_filename(name), "annotated", f"v{analysis.annotation_version.pk}", + str(analysis.genome_build)]) + request = FakeRequest(user=admin_bot()) + basename, file_iterator = node_grid_get_export_iterator(request, node, export_type, basename=basename) + open_func = open + if export_type == 'vcf': + open_func = gzip.open + basename += ".gz" + + total_records = node.count + update_size = max(1000, total_records / 100) # 1% or every 1k records + update_progress_iterator = update_cgf_progress_iterator(file_iterator(), cgf.pk, total_records, update_size) + + media_root_filename = os.path.join(settings.GENERATED_DIR, cgf.generator, str(cgf.pk), basename) + logging.info("Starting to write %s", media_root_filename) + try: + mk_path_for_file(media_root_filename) + with open_func(media_root_filename, "wt") as f: + for line in update_progress_iterator: + f.write(line) # Already has newline + + if export_type == 'csv': + original_filename = media_root_filename + zip_file_path = media_root_filename + ".zip" + with zipfile.ZipFile(zip_file_path, 'w') as zipf: + zipf.write(original_filename, arcname=os.path.basename(original_filename)) + os.unlink(original_filename) + media_root_filename = zip_file_path + cgf.filename = media_root_filename + cgf.task_status = "SUCCESS" + cgf.generate_end = timezone.now() + logging.info("Wrote %s", media_root_filename) + # Write CSVs to Zip (requires the file to be there already) + except Exception as e: + logging.error("Failed to write %s: %s", media_root_filename, e) + cgf.exception = str(e) + cgf.task_status = "FAILURE" + cgf.save() + + +@celery.shared_task +def export_cohort_to_downloadable_file(cohort_id, export_type): + # This should have been created in analysis.views.views_grid.cohort_grid_export + params_hash = get_grid_downloadable_file_params_hash(cohort_id, export_type) + cgf = CachedGeneratedFile.objects.get(generator="export_cohort_to_downloadable_file", + params_hash=params_hash) + + cohort = Cohort.objects.get(pk=cohort_id) + analysis_template = AnalysisTemplate.get_template_from_setting("ANALYSIS_TEMPLATES_AUTO_COHORT_EXPORT") + analysis = get_cohort_analysis(cohort, analysis_template) + node = analysis.analysisnode_set.get_subclass(output_node=True) # Should only be 1 + _write_node_to_cached_generated_file(cgf, analysis, node, cohort.name, export_type) + + +@celery.shared_task +def export_sample_to_downloadable_file(sample_id, export_type): + # This should have been created in analysis.views.views_grid.sample_grid_export + params_hash = get_grid_downloadable_file_params_hash(sample_id, export_type) + cgf = CachedGeneratedFile.objects.get(generator="export_sample_to_downloadable_file", + params_hash=params_hash) + + sample = Sample.objects.get(pk=sample_id) + analysis_template = AnalysisTemplate.get_template_from_setting("ANALYSIS_TEMPLATES_AUTO_SAMPLE") + analysis = get_sample_analysis(sample, analysis_template) + node = SampleNode.objects.get(analysis=analysis, output_node=True) # Should only be 1 + _write_node_to_cached_generated_file(cgf, analysis, node, sample.name, export_type) diff --git a/analysis/templates/analysis/node_data/node_data_graph.html b/analysis/templates/analysis/node_data/node_data_graph.html index 34e47ad17..8a36fe466 100644 --- a/analysis/templates/analysis/node_data/node_data_graph.html +++ b/analysis/templates/analysis/node_data/node_data_graph.html @@ -2,7 +2,7 @@ {% load static %} {% block top %} - + {% endblock top %} {% block css %} diff --git a/analysis/views/views.py b/analysis/views/views.py index bf9bacef6..1b34929ab 100644 --- a/analysis/views/views.py +++ b/analysis/views/views.py @@ -609,14 +609,14 @@ def node_graph(request, analysis_id, node_id, graph_type_id, cmap): get_node_subclass_or_404(request.user, node_id) # Permission check node_graph_type = NodeGraphType.objects.get(pk=graph_type_id) cached_graph = graphcache.async_graph(node_graph_type.graph_class, cmap, node_id) - return HttpResponseRedirect(reverse("cached_generated_file_check", kwargs={"cgf_id": cached_graph.id})) + return redirect(cached_graph) def column_summary_boxplot(request, analysis_id, node_id, label, variant_column): get_node_subclass_or_404(request.user, node_id) # Permission check graph_class_name = full_class_name(ColumnBoxplotGraph) cached_graph = graphcache.async_graph(graph_class_name, node_id, label, variant_column) - return HttpResponseRedirect(reverse("cached_generated_file_check", kwargs={"cgf_id": cached_graph.id})) + return redirect(cached_graph) def cohort_zygosity_filters(request, analysis_id, node_id, cohort_id): diff --git a/analysis/views/views_grid.py b/analysis/views/views_grid.py index c14d5ffaf..ba1561138 100644 --- a/analysis/views/views_grid.py +++ b/analysis/views/views_grid.py @@ -4,21 +4,22 @@ from django.contrib.postgres.aggregates.general import StringAgg from django.core.cache import cache +from django.http import JsonResponse from django.http.response import StreamingHttpResponse, HttpResponseRedirect +from django.shortcuts import redirect from django.urls import reverse from django.utils.decorators import method_decorator from django.views.decorators.cache import cache_page from django.views.decorators.vary import vary_on_cookie from analysis import grids -from analysis.analysis_templates import get_sample_analysis, get_cohort_analysis from analysis.grid_export import node_grid_get_export_iterator -from analysis.models import AnalysisNode, AnalysisTemplate, SampleNode +from analysis.models import AnalysisNode +from analysis.tasks.analysis_grid_export_tasks import export_cohort_to_downloadable_file, export_sample_to_downloadable_file, get_grid_downloadable_file_params_hash from analysis.views.analysis_permissions import get_node_subclass_or_non_fatal_exception from analysis.views.node_json_view import NodeJSONGetView, NodeJSONViewMixin from library.constants import WEEK_SECS -from library.utils import name_from_filename -from snpdb.models import Sample, Cohort +from snpdb.models import Sample, Cohort, CachedGeneratedFile from snpdb.models.models_variant import Variant _NODE_GRID_ALLOWED_PARAMS = { @@ -111,35 +112,30 @@ def _get_data(self, request, node, **kwargs): def cohort_grid_export(request, cohort_id, export_type): EXPORT_TYPES = {"csv", "vcf"} - - cohort = Cohort.get_for_user(request.user, cohort_id) + Cohort.get_for_user(request.user, cohort_id) # Permission check if export_type not in EXPORT_TYPES: raise ValueError(f"{export_type} must be one of: {EXPORT_TYPES}") - analysis_template = AnalysisTemplate.get_template_from_setting("ANALYSIS_TEMPLATES_AUTO_COHORT_EXPORT") - analysis = get_cohort_analysis(cohort, analysis_template) - node = analysis.analysisnode_set.get_subclass(output_node=True) # Should only be 1 - basename = "_".join([name_from_filename(cohort.name), "annotated", f"v{analysis.annotation_version.pk}", - str(cohort.genome_build)]) - - filename, file_iterator = node_grid_get_export_iterator(request, node, export_type, basename=basename) - return _get_streaming_response(filename, file_iterator) + params_hash = get_grid_downloadable_file_params_hash(cohort_id, export_type) + task = export_cohort_to_downloadable_file.si(cohort_id, export_type) + cgf = CachedGeneratedFile.get_or_create_and_launch("export_cohort_to_downloadable_file", params_hash, task) + if cgf.exception: + raise ValueError(cgf.exception) + return redirect(cgf) def sample_grid_export(request, sample_id, export_type): EXPORT_TYPES = {"csv", "vcf"} - - sample = Sample.get_for_user(request.user, sample_id) + Sample.get_for_user(request.user, sample_id) # Permission check if export_type not in EXPORT_TYPES: raise ValueError(f"{export_type} must be one of: {EXPORT_TYPES}") - analysis_template = AnalysisTemplate.get_template_from_setting("ANALYSIS_TEMPLATES_AUTO_SAMPLE") - analysis = get_sample_analysis(sample, analysis_template) - node = SampleNode.objects.get(analysis=analysis, output_node=True) # Should only be 1 - basename = "_".join([name_from_filename(sample.name), "annotated", f"v{analysis.annotation_version.pk}", - str(sample.genome_build)]) - filename, file_iterator = node_grid_get_export_iterator(request, node, export_type, basename=basename) - return _get_streaming_response(filename, file_iterator) + params_hash = get_grid_downloadable_file_params_hash(sample_id, export_type) + task = export_sample_to_downloadable_file.si(sample_id, export_type) + cgf = CachedGeneratedFile.get_or_create_and_launch("export_sample_to_downloadable_file", params_hash, task) + if cgf.exception: + raise ValueError(cgf.exception) + return redirect(cgf) def node_grid_export(request, analysis_id): diff --git a/genes/templates/genes/view_gene_symbol.html b/genes/templates/genes/view_gene_symbol.html index 572aacffe..cef225d4c 100644 --- a/genes/templates/genes/view_gene_symbol.html +++ b/genes/templates/genes/view_gene_symbol.html @@ -23,7 +23,7 @@ - + - + + + + @@ -122,14 +141,13 @@

Sample: {{ sample.name }}

{% csrf_token %} {% crispy form form_helper.horizontal %} - {% if can_download_vcf %} + {% if annotated_download_files %}
- {% if can_download_annotated_vcf %} - - - {% endif %} + {% for file_type in annotated_download_files %} +
+ {% endfor %}
{% endif %} diff --git a/snpdb/templates/snpdb/data/view_vcf.html b/snpdb/templates/snpdb/data/view_vcf.html index 7b67cefe5..e233c16b5 100644 --- a/snpdb/templates/snpdb/data/view_vcf.html +++ b/snpdb/templates/snpdb/data/view_vcf.html @@ -10,6 +10,7 @@ {% block title %}{{ vcf.name }}{% endblock %} {% block head %} +