Skip to content

Commit

Permalink
issue #1178 - properly quote CSVs (handle commas)
Browse files Browse the repository at this point in the history
  • Loading branch information
davmlaw committed Oct 8, 2024
1 parent f60ae5e commit 72293cc
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 18 deletions.
2 changes: 1 addition & 1 deletion analysis/tasks/analysis_grid_export_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def _write_node_to_cached_generated_file(cgf, analysis, node, name, export_type)
if export_type == 'csv':
original_filename = media_root_filename
zip_file_path = media_root_filename + ".zip"
with zipfile.ZipFile(zip_file_path, 'w') as zipf:
with zipfile.ZipFile(zip_file_path, 'w', compression=zipfile.ZIP_DEFLATED) as zipf:
zipf.write(original_filename, arcname=os.path.basename(original_filename))
os.unlink(original_filename)
media_root_filename = zip_file_path
Expand Down
2 changes: 1 addition & 1 deletion library/django_utils/jqgrid_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def grid_export_csv(colmodels, items) -> Iterator[str]:
pseudo_buffer = StashFile()
header, labels = colmodel_header_labels(colmodels, label_overrides=label_overrides)
# Don't use dictwriter as some sample names may be the same
writer = csv.writer(pseudo_buffer, dialect='excel', escapechar='\\', quoting=csv.QUOTE_NONE)
writer = csv.writer(pseudo_buffer, dialect='excel', quoting=csv.QUOTE_MINIMAL)
writer.writerow(header)

def iter_row_writer():
Expand Down
52 changes: 36 additions & 16 deletions snpdb/management/commands/one_off_fix_csv_quoting.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import csv
import logging
import os
import zipfile
from zipfile import ZipFile

from django.core.management.base import BaseCommand
from snpdb.models import CachedGeneratedFile
Expand All @@ -11,25 +13,43 @@ class Command(BaseCommand):
"""
@staticmethod
def fix_csv_zip(zip_filename):
# Move the old one to a backup
backup_name = zip_filename + ".backup"
os.rename(zip_filename, backup_name)
def fix_csv(csv_file, fixed_csv_filename):
reader = csv.reader(csv_file.read().decode('utf-8').splitlines(), dialect='excel', escapechar='\\',
quoting=csv.QUOTE_NONE)

with zipfile.ZipFile(zip_filename, 'r') as zip_file:
csv_filename = os.path.splitext(os.path.basename(zip_filename))[0]
with zip_file.open(csv_filename) as csv_file:
reader = csv.reader(csv_file.read().decode('utf-8').splitlines(), dialect='excel', escapechar='\\',
quoting=csv.QUOTE_NONE)
logging.info("Fixing in file: %s", fixed_csv_filename)
with open(fixed_csv_filename, "wt") as out_csv_file:
writer = csv.writer(out_csv_file, dialect='excel', quoting=csv.QUOTE_MINIMAL)
for r in reader:
writer.writerow(r)

@staticmethod
def fix_csv_zip(zip_file, zip_filename):
csv_filename = os.path.splitext(os.path.basename(zip_filename))[0]
with zip_file.open(csv_filename) as csv_file:
fixed_csv_filename = os.path.splitext(zip_filename)[0]
with open(fixed_csv_filename, "wt") as out_csv_file:
writer = csv.writer(out_csv_file, dialect='excel', quoting=csv.QUOTE_MINIMAL)
for r in reader:
writer.writerow(r)
Command.fix_csv(csv_file, fixed_csv_filename)

zip_file_path = fixed_csv_filename + ".zip"
logging.info("Putting into zip: %s", zip_file_path)
with zipfile.ZipFile(zip_file_path, 'w', compression=zipfile.ZIP_DEFLATED) as zipf:
zipf.write(fixed_csv_filename, arcname=os.path.basename(fixed_csv_filename))
os.unlink(fixed_csv_filename)

def handle(self, *args, **options):
AFFECTED_GENERATORS = ["export_sample_to_downloadable_file", "export_cohort_to_downloadable_file"]
for cgf in CachedGeneratedFile.objects.filter(generator__in=AFFECTED_GENERATORS):
self.fix_csv_zip(cgf.filename)

for cgf in CachedGeneratedFile.objects.filter(generator__in=AFFECTED_GENERATORS, filename__isnull=False):
if not (cgf.filename.endswith(".csv") or cgf.filename.endswith(".csv.zip")):
continue

if os.path.exists(cgf.filename):
logging.info("Need to fix: %s", cgf.filename)
# Move the old one to a backup
backup_name = cgf.filename + ".backup"
os.rename(cgf.filename, backup_name)
if cgf.filename.endswith(".zip"):
with zipfile.ZipFile(backup_name, 'r') as zip_file:
self.fix_csv_zip(zip_file, cgf.filename)
else:
with open(backup_name, "rb") as csv_file:
Command.fix_csv(csv_file, cgf.filename)

0 comments on commit 72293cc

Please sign in to comment.