Skip to content

Commit

Permalink
Merge branch 'feature/issue_1171_generate_file_for_download'
Browse files Browse the repository at this point in the history
  • Loading branch information
davmlaw committed Sep 19, 2024
2 parents ad4525d + 4f62a0a commit 539be3e
Show file tree
Hide file tree
Showing 30 changed files with 552 additions and 155 deletions.
8 changes: 8 additions & 0 deletions analysis/grid_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,14 @@ def _grid_export_vcf(genome_build, colmodels, items, sample_ids, sample_names_by
pseudo_buffer = StashFile()

vcf_writer = Writer(pseudo_buffer, vcf_reader)
# Need to pass escapechar
vcf_writer.writer = csv.writer(
pseudo_buffer,
delimiter="\t",
lineterminator="\n",
quoting=csv.QUOTE_NONE,
escapechar="\\",
)

def iter_row_writer():
for obj in items:
Expand Down
123 changes: 123 additions & 0 deletions analysis/tasks/analysis_grid_export_tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
import gzip
import logging
import os
import uuid
import zipfile
from typing import Optional

import celery

from django.conf import settings
from django.utils import timezone

from analysis.analysis_templates import get_cohort_analysis, get_sample_analysis
from analysis.grid_export import node_grid_get_export_iterator
from analysis.models import AnalysisTemplate, SampleNode
from library.django_utils import FakeRequest
from library.guardian_utils import admin_bot
from library.utils import name_from_filename, sha256sum_str, mk_path_for_file
from snpdb.models import Cohort, Sample, CachedGeneratedFile


def get_annotated_download_files_cgf(generator, pk) -> dict[str, Optional[CachedGeneratedFile]]:
annotated_download_files = {}
try:
AnalysisTemplate.get_template_from_setting("ANALYSIS_TEMPLATES_AUTO_COHORT_EXPORT")
params_hash_vcf = get_grid_downloadable_file_params_hash(pk, "vcf")
cgf_vcf = CachedGeneratedFile.objects.filter(generator=generator,
params_hash=params_hash_vcf).first()
params_hash_csv = get_grid_downloadable_file_params_hash(pk, "csv")
cgf_csv = CachedGeneratedFile.objects.filter(generator=generator,
params_hash=params_hash_csv).first()

annotated_download_files = {"vcf": cgf_vcf, "csv": cgf_csv}
except ValueError:
pass

return annotated_download_files


def get_grid_downloadable_file_params_hash(pk, export_type):
return sha256sum_str(f"{pk}-{export_type}")


def update_cgf_progress_iterator(iterator, cgf_id, total_records, update_size):
update_size = int(update_size) # make sure int so modulus below will hit
cgf_qs = CachedGeneratedFile.objects.filter(id=cgf_id)
cgf_qs.update(progress=0)

for i, record in enumerate(iterator):
if i % update_size == 0:
progress = i / total_records
cgf_qs.update(progress=progress)
yield record
cgf_qs.update(progress=1, task_status='SUCCESS')


def _write_node_to_cached_generated_file(cgf, analysis, node, name, export_type):
basename = "_".join([name_from_filename(name), "annotated", f"v{analysis.annotation_version.pk}",
str(analysis.genome_build)])
request = FakeRequest(user=admin_bot())
basename, file_iterator = node_grid_get_export_iterator(request, node, export_type, basename=basename)
open_func = open
if export_type == 'vcf':
open_func = gzip.open
basename += ".gz"

total_records = node.count
update_size = max(1000, total_records / 100) # 1% or every 1k records
update_progress_iterator = update_cgf_progress_iterator(file_iterator(), cgf.pk, total_records, update_size)

media_root_filename = os.path.join(settings.GENERATED_DIR, cgf.generator, str(cgf.pk), basename)
logging.info("Starting to write %s", media_root_filename)
try:
mk_path_for_file(media_root_filename)
with open_func(media_root_filename, "wt") as f:
for line in update_progress_iterator:
f.write(line) # Already has newline

if export_type == 'csv':
original_filename = media_root_filename
zip_file_path = media_root_filename + ".zip"
with zipfile.ZipFile(zip_file_path, 'w') as zipf:
zipf.write(original_filename, arcname=os.path.basename(original_filename))
os.unlink(original_filename)
media_root_filename = zip_file_path
cgf.filename = media_root_filename
cgf.task_status = "SUCCESS"
cgf.generate_end = timezone.now()
logging.info("Wrote %s", media_root_filename)
# Write CSVs to Zip (requires the file to be there already)
except Exception as e:
logging.error("Failed to write %s: %s", media_root_filename, e)
cgf.exception = str(e)
cgf.task_status = "FAILURE"
cgf.save()


@celery.shared_task
def export_cohort_to_downloadable_file(cohort_id, export_type):
# This should have been created in analysis.views.views_grid.cohort_grid_export
params_hash = get_grid_downloadable_file_params_hash(cohort_id, export_type)
cgf = CachedGeneratedFile.objects.get(generator="export_cohort_to_downloadable_file",
params_hash=params_hash)

cohort = Cohort.objects.get(pk=cohort_id)
analysis_template = AnalysisTemplate.get_template_from_setting("ANALYSIS_TEMPLATES_AUTO_COHORT_EXPORT")
analysis = get_cohort_analysis(cohort, analysis_template)
node = analysis.analysisnode_set.get_subclass(output_node=True) # Should only be 1
_write_node_to_cached_generated_file(cgf, analysis, node, cohort.name, export_type)


@celery.shared_task
def export_sample_to_downloadable_file(sample_id, export_type):
# This should have been created in analysis.views.views_grid.sample_grid_export
params_hash = get_grid_downloadable_file_params_hash(sample_id, export_type)
cgf = CachedGeneratedFile.objects.get(generator="export_sample_to_downloadable_file",
params_hash=params_hash)

sample = Sample.objects.get(pk=sample_id)
analysis_template = AnalysisTemplate.get_template_from_setting("ANALYSIS_TEMPLATES_AUTO_SAMPLE")
analysis = get_sample_analysis(sample, analysis_template)
node = SampleNode.objects.get(analysis=analysis, output_node=True) # Should only be 1
_write_node_to_cached_generated_file(cgf, analysis, node, sample.name, export_type)
2 changes: 1 addition & 1 deletion analysis/templates/analysis/node_data/node_data_graph.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
{% load static %}
{% block top %}
<link rel="stylesheet" href="{% static 'css/graphs.css' %}" />
<script src="{% static 'js/generated_graphs.js' %}"></script>
<script src="{% static 'js/cached_generated_files.js' %}"></script>
{% endblock top %}

{% block css %}
Expand Down
4 changes: 2 additions & 2 deletions analysis/views/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -609,14 +609,14 @@ def node_graph(request, analysis_id, node_id, graph_type_id, cmap):
get_node_subclass_or_404(request.user, node_id) # Permission check
node_graph_type = NodeGraphType.objects.get(pk=graph_type_id)
cached_graph = graphcache.async_graph(node_graph_type.graph_class, cmap, node_id)
return HttpResponseRedirect(reverse("cached_generated_file_check", kwargs={"cgf_id": cached_graph.id}))
return redirect(cached_graph)


def column_summary_boxplot(request, analysis_id, node_id, label, variant_column):
get_node_subclass_or_404(request.user, node_id) # Permission check
graph_class_name = full_class_name(ColumnBoxplotGraph)
cached_graph = graphcache.async_graph(graph_class_name, node_id, label, variant_column)
return HttpResponseRedirect(reverse("cached_generated_file_check", kwargs={"cgf_id": cached_graph.id}))
return redirect(cached_graph)


def cohort_zygosity_filters(request, analysis_id, node_id, cohort_id):
Expand Down
42 changes: 19 additions & 23 deletions analysis/views/views_grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,22 @@

from django.contrib.postgres.aggregates.general import StringAgg
from django.core.cache import cache
from django.http import JsonResponse
from django.http.response import StreamingHttpResponse, HttpResponseRedirect
from django.shortcuts import redirect
from django.urls import reverse
from django.utils.decorators import method_decorator
from django.views.decorators.cache import cache_page
from django.views.decorators.vary import vary_on_cookie

from analysis import grids
from analysis.analysis_templates import get_sample_analysis, get_cohort_analysis
from analysis.grid_export import node_grid_get_export_iterator
from analysis.models import AnalysisNode, AnalysisTemplate, SampleNode
from analysis.models import AnalysisNode
from analysis.tasks.analysis_grid_export_tasks import export_cohort_to_downloadable_file, export_sample_to_downloadable_file, get_grid_downloadable_file_params_hash
from analysis.views.analysis_permissions import get_node_subclass_or_non_fatal_exception
from analysis.views.node_json_view import NodeJSONGetView, NodeJSONViewMixin
from library.constants import WEEK_SECS
from library.utils import name_from_filename
from snpdb.models import Sample, Cohort
from snpdb.models import Sample, Cohort, CachedGeneratedFile
from snpdb.models.models_variant import Variant

_NODE_GRID_ALLOWED_PARAMS = {
Expand Down Expand Up @@ -111,35 +112,30 @@ def _get_data(self, request, node, **kwargs):

def cohort_grid_export(request, cohort_id, export_type):
EXPORT_TYPES = {"csv", "vcf"}

cohort = Cohort.get_for_user(request.user, cohort_id)
Cohort.get_for_user(request.user, cohort_id) # Permission check
if export_type not in EXPORT_TYPES:
raise ValueError(f"{export_type} must be one of: {EXPORT_TYPES}")

analysis_template = AnalysisTemplate.get_template_from_setting("ANALYSIS_TEMPLATES_AUTO_COHORT_EXPORT")
analysis = get_cohort_analysis(cohort, analysis_template)
node = analysis.analysisnode_set.get_subclass(output_node=True) # Should only be 1
basename = "_".join([name_from_filename(cohort.name), "annotated", f"v{analysis.annotation_version.pk}",
str(cohort.genome_build)])

filename, file_iterator = node_grid_get_export_iterator(request, node, export_type, basename=basename)
return _get_streaming_response(filename, file_iterator)
params_hash = get_grid_downloadable_file_params_hash(cohort_id, export_type)
task = export_cohort_to_downloadable_file.si(cohort_id, export_type)
cgf = CachedGeneratedFile.get_or_create_and_launch("export_cohort_to_downloadable_file", params_hash, task)
if cgf.exception:
raise ValueError(cgf.exception)
return redirect(cgf)


def sample_grid_export(request, sample_id, export_type):
EXPORT_TYPES = {"csv", "vcf"}

sample = Sample.get_for_user(request.user, sample_id)
Sample.get_for_user(request.user, sample_id) # Permission check
if export_type not in EXPORT_TYPES:
raise ValueError(f"{export_type} must be one of: {EXPORT_TYPES}")

analysis_template = AnalysisTemplate.get_template_from_setting("ANALYSIS_TEMPLATES_AUTO_SAMPLE")
analysis = get_sample_analysis(sample, analysis_template)
node = SampleNode.objects.get(analysis=analysis, output_node=True) # Should only be 1
basename = "_".join([name_from_filename(sample.name), "annotated", f"v{analysis.annotation_version.pk}",
str(sample.genome_build)])
filename, file_iterator = node_grid_get_export_iterator(request, node, export_type, basename=basename)
return _get_streaming_response(filename, file_iterator)
params_hash = get_grid_downloadable_file_params_hash(sample_id, export_type)
task = export_sample_to_downloadable_file.si(sample_id, export_type)
cgf = CachedGeneratedFile.get_or_create_and_launch("export_sample_to_downloadable_file", params_hash, task)
if cgf.exception:
raise ValueError(cgf.exception)
return redirect(cgf)


def node_grid_export(request, analysis_id):
Expand Down
2 changes: 1 addition & 1 deletion genes/templates/genes/view_gene_symbol.html
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
<link rel="stylesheet" href="{% static 'css/graphs.css' %}" />
<script type="text/javascript" src="{% static 'js/lib/plotly-latest.min.js' %}"></script>
<script type="text/javascript" src="{% static 'js/grid.js' %}"></script>
<script src="{% static 'js/generated_graphs.js' %}"></script>
<script src="{% static 'js/cached_generated_files.js' %}"></script>
<style>
#hotspot-graph, #hotspot-graph img {
width: 100%;
Expand Down
8 changes: 8 additions & 0 deletions library/django_utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import datetime
import operator
import os
from functools import reduce
from functools import wraps, partial

Expand Down Expand Up @@ -39,6 +40,13 @@ def get_url_from_view_path(view_path):
return f'{protocol}://{current_site.domain}{view_path}'


def get_url_from_media_root_filename(filename):
media_root_with_slash = os.path.join(settings.MEDIA_ROOT, "")
if not filename.startswith(media_root_with_slash):
raise ValueError(f"'{filename}' must start with MEDIA_ROOT: {media_root_with_slash}")
return os.path.join(settings.MEDIA_URL, filename[len(media_root_with_slash):])


def add_save_message(request, valid, name, created=False):
action = "created" if created else "saved"

Expand Down
2 changes: 1 addition & 1 deletion library/django_utils/jqgrid_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def grid_export_csv(colmodels, items) -> Iterator[str]:
pseudo_buffer = StashFile()
header, labels = colmodel_header_labels(colmodels, label_overrides=label_overrides)
# Don't use dictwriter as some sample names may be the same
writer = csv.writer(pseudo_buffer, dialect='excel')
writer = csv.writer(pseudo_buffer, dialect='excel', escapechar='\\', quoting=csv.QUOTE_NONE)
writer.writerow(header)

def iter_row_writer():
Expand Down
2 changes: 1 addition & 1 deletion pedigree/templates/pedigree/view_ped_file.html
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
{% block head %}
{{ block.super }}
<link rel="stylesheet" href="{% static 'css/graphs.css' %}" />
<script src="{% static 'js/generated_graphs.js' %}"></script>
<script src="{% static 'js/cached_generated_files.js' %}"></script>
{% endblock %}

{% block css %}
Expand Down
4 changes: 2 additions & 2 deletions pedigree/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from django.forms.formsets import formset_factory
from django.forms.models import ModelChoiceField
from django.http.response import HttpResponseRedirect
from django.shortcuts import get_object_or_404, render
from django.shortcuts import get_object_or_404, render, redirect
from django.urls.base import reverse

from library.utils import full_class_name
Expand Down Expand Up @@ -37,7 +37,7 @@ def pedigree_chart(request, ped_file_id):
ped_file = PedFile.get_for_user(request.user, ped_file_id) # Make sure we can access it
graph_class_name = full_class_name(PedigreeChart)
cached_graph = graphcache.async_graph(graph_class_name, ped_file.pk)
return HttpResponseRedirect(reverse("cached_generated_file_check", kwargs={"cgf_id": cached_graph.id}))
return redirect(cached_graph)


def ped_files(request):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
}

</style>
<script src="{% static 'js/generated_graphs.js' %}"></script>
<script src="{% static 'js/cached_generated_files.js' %}"></script>
<script>
{% if qc.data_state == 'C' and exec_summary.data_state == 'C' %}
EXEC_SUMMARY_ID = {{ exec_summary.pk }};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
{% load seqauto_record_tags %}
<link rel="stylesheet" href="{% static 'css/graphs.css' %}" />
<link rel="stylesheet" href="{% static 'css/seqauto.css' %}" />
<script src="{% static 'js/generated_graphs.js' %}"></script>
<script src="{% static 'js/cached_generated_files.js' %}"></script>
<script>
var DELETE_URL = "{% url 'cached_generated_file_delete' %}";
var BASE_QC_POLL_URL = "{% url 'sequencing_run_qc_graph' sequencing_run.pk 'QC_COMPARE_TYPE' %}";
Expand Down
2 changes: 1 addition & 1 deletion seqauto/templates/seqauto/view_sequencing_run.html
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
{% block head %}
<link rel="stylesheet" href="{% static 'css/graphs.css' %}" />
<link rel="stylesheet" href="{% static 'css/seqauto.css' %}" />
<script src="{% static 'js/generated_graphs.js' %}"></script>
<script src="{% static 'js/cached_generated_files.js' %}"></script>

<style>
input#id_path {
Expand Down
8 changes: 4 additions & 4 deletions seqauto/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from django.core.exceptions import PermissionDenied
from django.db.models.aggregates import Count
from django.http.response import HttpResponseRedirect, JsonResponse, HttpResponse
from django.shortcuts import render, get_object_or_404
from django.shortcuts import render, get_object_or_404, redirect
from django.urls.base import reverse
from django.utils.decorators import method_decorator
from django.utils.safestring import mark_safe
Expand Down Expand Up @@ -478,7 +478,7 @@ def sequencing_run_qc_graph(request, sequencing_run_id, qc_compare_type):
_ = QCCompareType(qc_compare_type) # Check valid
graph_class_name = full_class_name(SequencingRunQCGraph)
cached_graph = graphcache.async_graph(graph_class_name, sequencing_run_id, qc_compare_type)
return HttpResponseRedirect(reverse("cached_generated_file_check", kwargs={"cgf_id": cached_graph.id}))
return redirect(cached_graph)


def sequencing_run_qc_json_graph(request, sequencing_run_id, qc_compare_type):
Expand Down Expand Up @@ -565,13 +565,13 @@ def get_field(f):
def index_metrics_qc_graph(request, illumina_qc_id):
graph_class_name = full_class_name(IndexMetricsQCGraph)
cached_graph = graphcache.async_graph(graph_class_name, illumina_qc_id)
return HttpResponseRedirect(reverse("cached_generated_file_check", kwargs={"cgf_id": cached_graph.id}))
return redirect(cached_graph)


def qc_exec_summary_graph(request, qc_exec_summary_id, qc_compare_type):
graph_class_name = full_class_name(QCExecSummaryGraph)
cached_graph = graphcache.async_graph(graph_class_name, qc_exec_summary_id, qc_compare_type)
return HttpResponseRedirect(reverse("cached_generated_file_check", kwargs={"cgf_id": cached_graph.id}))
return redirect(cached_graph)


def qc_exec_summary_json_graph(request, qc_exec_summary_id, qc_compare_type):
Expand Down
Loading

0 comments on commit 539be3e

Please sign in to comment.