diff --git a/analysis/tasks/analysis_grid_export_tasks.py b/analysis/tasks/analysis_grid_export_tasks.py index 910eab253..4d31cdb62 100644 --- a/analysis/tasks/analysis_grid_export_tasks.py +++ b/analysis/tasks/analysis_grid_export_tasks.py @@ -39,13 +39,16 @@ def get_grid_downloadable_file_params_hash(pk, export_type): return sha256sum_str(f"{pk}-{export_type}") -def _write_cached_generated_file(cgf: CachedGeneratedFile, filename, file_iterator): +def _write_cached_generated_file(cgf: CachedGeneratedFile, total_records, filename, file_iterator): + update_size = max(1000, total_records / 100) # 1% or every 1k records + update_progress_iterator = update_cgf_progress_iterator(file_iterator(), cgf.pk, total_records, update_size) + logging.info("Starting to write %s", filename) media_root_filename = os.path.join(settings.MEDIA_ROOT, str(uuid.uuid4()), filename) try: mk_path_for_file(media_root_filename) with open(media_root_filename, "w") as f: - for line in file_iterator(): + for line in update_progress_iterator: f.write(line) # Already has newline cgf.filename = media_root_filename cgf.task_status = "SUCCESS" @@ -58,6 +61,19 @@ def _write_cached_generated_file(cgf: CachedGeneratedFile, filename, file_iterat cgf.save() +def update_cgf_progress_iterator(iterator, cgf_id, total_records, update_size): + update_size = int(update_size) # make sure int so modulus below will hit + cgf_qs = CachedGeneratedFile.objects.filter(id=cgf_id) + cgf_qs.update(progress=0) + + for i, record in enumerate(iterator): + if i % update_size == 0: + progress = i / total_records + cgf_qs.update(progress=progress) + yield record + cgf_qs.update(progress=1, task_status='SUCCESS') + + @celery.shared_task def export_cohort_to_downloadable_file(cohort_id, export_type): # This should have been created in analysis.views.views_grid.cohort_grid_export @@ -74,7 +90,7 @@ def export_cohort_to_downloadable_file(cohort_id, export_type): request = FakeRequest(user=admin_bot()) filename, file_iterator = node_grid_get_export_iterator(request, node, export_type, basename=basename) - _write_cached_generated_file(cgf, filename, file_iterator) + _write_cached_generated_file(cgf, node.count, filename, file_iterator) @celery.shared_task @@ -92,4 +108,4 @@ def export_sample_to_downloadable_file(sample_id, export_type): str(sample.genome_build)]) request = FakeRequest(user=admin_bot()) filename, file_iterator = node_grid_get_export_iterator(request, node, export_type, basename=basename) - _write_cached_generated_file(cgf, filename, file_iterator) + _write_cached_generated_file(cgf, node.count, filename, file_iterator) diff --git a/snpdb/migrations/0146_cachedgeneratedfile_progress.py b/snpdb/migrations/0146_cachedgeneratedfile_progress.py new file mode 100644 index 000000000..bbe7d015c --- /dev/null +++ b/snpdb/migrations/0146_cachedgeneratedfile_progress.py @@ -0,0 +1,17 @@ +# Generated by Django 4.2.11 on 2024-09-19 02:16 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("snpdb", "0145_one_off_handle_renamed_table_inherited_index"), + ] + + operations = [ + migrations.AddField( + model_name="cachedgeneratedfile", + name="progress", + field=models.FloatField(null=True), + ), + ] diff --git a/snpdb/models/models.py b/snpdb/models/models.py index 7a4079c7f..5030727e2 100644 --- a/snpdb/models/models.py +++ b/snpdb/models/models.py @@ -63,6 +63,7 @@ class CachedGeneratedFile(models.Model): task_status = models.TextField(null=True) generate_start = models.DateTimeField(null=True) generate_end = models.DateTimeField(null=True) + progress = models.FloatField(null=True) class Meta: unique_together = ("generator", "params_hash") @@ -92,6 +93,7 @@ def get_or_create_and_launch(generator, params_hash, task: signature) -> 'Cached async_result = task.apply_async() cgf.task_id = async_result.id cgf.generate_start = timezone.now() + cgf.progress = 0.0 cgf.save() else: async_result = AsyncResult(cgf.task_id) diff --git a/snpdb/templates/snpdb/data/sample_graphs_tab.html b/snpdb/templates/snpdb/data/sample_graphs_tab.html index ba22ae7f1..70d106893 100644 --- a/snpdb/templates/snpdb/data/sample_graphs_tab.html +++ b/snpdb/templates/snpdb/data/sample_graphs_tab.html @@ -1,5 +1,4 @@ {% load static %} -