diff --git a/backend/download/conftest.py b/backend/download/conftest.py index 1eb5e9b54..5ef03873d 100644 --- a/backend/download/conftest.py +++ b/backend/download/conftest.py @@ -50,18 +50,22 @@ def index_ml_mock_corpus(es_client, ml_mock_corpus): def index_mock_corpus(es_client, mock_corpus, index_small_mock_corpus, index_large_mock_corpus, index_ml_mock_corpus): yield mock_corpus -def save_all_results_csv(mock_corpus, mock_corpus_specs): +def all_results_request_json(mock_corpus, mock_corpus_specs): fields = mock_corpus_specs['fields'] query = mock_corpus_specs['example_query'] - request_json = { + return { 'corpus': mock_corpus, 'es_query': MATCH_ALL, 'fields': fields, 'route': '/search/{};query={}'.format(mock_corpus, query) } + +def save_all_results_csv(mock_corpus, mock_corpus_specs): + request_json = all_results_request_json(mock_corpus, mock_corpus_specs) results = tasks.download_scroll(request_json) - filename = tasks.make_csv(results, request_json) + fake_id = mock_corpus + '_all_results' + filename = tasks.make_csv(results, request_json, fake_id) return filename diff --git a/backend/download/create_csv.py b/backend/download/create_csv.py index 01e01092b..f0f93b8e9 100644 --- a/backend/download/create_csv.py +++ b/backend/download/create_csv.py @@ -21,12 +21,10 @@ def write_file(filename, fieldnames, rows, dialect = 'excel'): return filepath -def create_filename(descriptive_part, essential_suffix = '.csv'): - max_length = 255 - (len(essential_suffix) + len(settings.CSV_FILES_PATH)) - truncated = descriptive_part[:min(max_length, len(descriptive_part))] - return truncated + essential_suffix +def create_filename(download_id): + return f'{download_id}.csv' -def search_results_csv(results, fields, query): +def search_results_csv(results, fields, query, download_id): entries = [] field_set = set(fields) field_set.update(['query']) @@ -50,14 +48,14 @@ def search_results_csv(results, fields, query): entry.update({highlight_field_name: soup.get_text()}) entries.append(entry) - filename = create_filename(query) + filename = create_filename(download_id) field_set.discard('context') fieldnames = sorted(field_set) filepath = write_file(filename, fieldnames, entries, dialect = 'resultsDialect') return filepath -def term_frequency_csv(queries, results, field_name, unit = None): +def term_frequency_csv(queries, results, field_name, download_id, unit = None): has_token_counts = results[0].get('token_count', None) != None query_column = ['Query'] if len(queries) > 1 else [] freq_columns = ['Term frequency', 'Relative term frequency (by # documents)', 'Total documents'] @@ -66,17 +64,10 @@ def term_frequency_csv(queries, results, field_name, unit = None): rows = term_frequency_csv_rows(queries, results, field_name, unit) - filename = term_frequency_filename(queries, field_name) + filename = create_filename(download_id) filepath = write_file(filename, fieldnames, rows) return filepath -def term_frequency_filename(queries, field_name): - querystring = '_'.join(queries) - timestamp = datetime.now().isoformat(sep='_', timespec='minutes') # ensure csv filenames are unique with timestamp - suffix = '_' + timestamp + '.csv' - description = 'term_frequency_{}_{}'.format(field_name, querystring) - return create_filename(description, suffix) - def term_frequency_csv_rows(queries, results, field_name, unit): for result in results: field_value = format_field_value(result['key'], unit) diff --git a/backend/download/mail.py b/backend/download/mail.py index 5dc840ec1..0220edd58 100644 --- a/backend/download/mail.py +++ b/backend/download/mail.py @@ -20,8 +20,8 @@ def send_csv_email(user_email, username, download_id): subject = 'I-Analyzer CSV download' from_email = settings.DEFAULT_FROM_EMAIL - path = Download.objects.get(id=download_id).filename - _, filename = os.path.split(path) + download = Download.objects.get(id=download_id) + filename = download.descriptive_filename() context = { 'email_title': 'Download CSV', diff --git a/backend/download/models.py b/backend/download/models.py index ca49e8db6..2e5e9bcd1 100644 --- a/backend/download/models.py +++ b/backend/download/models.py @@ -51,3 +51,10 @@ def complete(self, filename = None): self.filename = filename self.completed = datetime.now() self.save() + + def descriptive_filename(self): + corpus_name = self.corpus.name + type_name = self.download_type + timestamp = self.completed.strftime('%Y-%m-%d %H:%M') + + return f'{type_name}__{corpus_name}__{timestamp}.csv' diff --git a/backend/download/tasks.py b/backend/download/tasks.py index 91c88f358..a47340ba6 100644 --- a/backend/download/tasks.py +++ b/backend/download/tasks.py @@ -37,9 +37,9 @@ def download_scroll(request_json, download_size=10000): return results @shared_task() -def make_csv(results, request_json): +def make_csv(results, request_json, log_id): query = create_query(request_json) - filepath = create_csv.search_results_csv(results, request_json['fields'], query) + filepath = create_csv.search_results_csv(results, request_json['fields'], query, log_id) return filepath @@ -82,7 +82,7 @@ def download_search_results(request_json, user): make_chain = lambda: chain( download_scroll.s(request_json, download_limit), - make_csv.s(request_json), + make_csv.s(request_json, download.id), complete_download.s(download.id), csv_data_email.s(user.email, user.username), ).on_error(complete_failed_download.s(download.id)) @@ -90,12 +90,12 @@ def download_search_results(request_json, user): return try_download(make_chain, download) @shared_task() -def make_term_frequency_csv(results_per_series, parameters_per_series): +def make_term_frequency_csv(results_per_series, parameters_per_series, log_id): ''' Export term frequency results to a csv. ''' query_per_series, field_name, unit = extract_term_frequency_download_metadata(parameters_per_series) - return create_csv.term_frequency_csv(query_per_series, results_per_series, field_name, unit = unit) + return create_csv.term_frequency_csv(query_per_series, results_per_series, field_name, log_id, unit = unit) def term_frequency_full_data_tasks(parameters_per_series, visualization_type): @@ -166,7 +166,7 @@ def download_full_data(request_json, user): make_chain = lambda : chain( task, - make_term_frequency_csv.s(parameters), + make_term_frequency_csv.s(parameters, download.id), complete_download.s(download.id), csv_data_email.s(user.email, user.username), ).on_error(complete_failed_download.s(download.id)) diff --git a/backend/download/tests/test_csv_results.py b/backend/download/tests/test_csv_results.py index b6ff8b0da..f33a896ef 100644 --- a/backend/download/tests/test_csv_results.py +++ b/backend/download/tests/test_csv_results.py @@ -45,7 +45,7 @@ def result_csv_with_highlights(csv_directory): route = 'parliament-netherlands_query=test' fields = ['speech'] - file = create_csv.search_results_csv(hits(mock_es_result), fields, route) + file = create_csv.search_results_csv(hits(mock_es_result), fields, route, 0) return file def test_create_csv(result_csv_with_highlights): @@ -190,7 +190,7 @@ def test_csv_encoding(ml_mock_corpus_results_csv): @pytest.fixture() def term_frequency_file(index_small_mock_corpus, csv_directory): - filename = create_csv.term_frequency_csv(mock_queries, mock_timeline_result, 'date', unit = 'year') + filename = create_csv.term_frequency_csv(mock_queries, mock_timeline_result, 'date', 0, unit = 'year') return filename diff --git a/backend/download/tests/test_file_storage.py b/backend/download/tests/test_file_storage.py index 767b23f46..0f96c30b7 100644 --- a/backend/download/tests/test_file_storage.py +++ b/backend/download/tests/test_file_storage.py @@ -1,7 +1,13 @@ +import os from download import tasks +from download.conftest import all_results_request_json +from download.models import Download -def test_format_route_to_filename(): - route = '/search/mock-corpus;query=test' - request_json = { 'route': route } - output = tasks.create_query(request_json) - assert output == 'mock-corpus_query=test' +def test_download_filename(auth_user, small_mock_corpus, index_small_mock_corpus, small_mock_corpus_specs): + request = all_results_request_json(small_mock_corpus, small_mock_corpus_specs) + tasks.download_search_results(request, auth_user).apply() + download = Download.objects.latest('completed') + _, filename = os.path.split(download.filename) + name, ext = os.path.splitext(filename) + assert name == str(download.id) + assert ext == '.csv' diff --git a/backend/download/tests/test_full_data.py b/backend/download/tests/test_full_data.py index 385fb701b..47e553310 100644 --- a/backend/download/tests/test_full_data.py +++ b/backend/download/tests/test_full_data.py @@ -24,7 +24,8 @@ def test_timeline_full_data(small_mock_corpus, index_small_mock_corpus, small_mo group = tasks.term_frequency_full_data_tasks(full_data_parameters, 'date_term_frequency') results = group.apply().get() - filename = tasks.make_term_frequency_csv(results, full_data_parameters) + log_id = 0 # fake ID + filename = tasks.make_term_frequency_csv(results, full_data_parameters, log_id) with open(filename) as f: reader = csv.DictReader(f) diff --git a/backend/download/views.py b/backend/download/views.py index 273bc672f..0e40bfa67 100644 --- a/backend/download/views.py +++ b/backend/download/views.py @@ -21,14 +21,15 @@ logger = logging.getLogger() -def send_csv_file(directory, filename, download_type, encoding, format=None): +def send_csv_file(download, directory, encoding, format=None): ''' Perform final formatting and send a CSV file as a FileResponse ''' converted_filename = convert_csv.convert_csv( - directory, filename, download_type, encoding, format) + directory, download.filename, download.download_type, encoding, format) path = os.path.join(directory, converted_filename) - return FileResponse(open(path, 'rb'), filename=filename, as_attachment=True) + + return FileResponse(open(path, 'rb'), filename=download.descriptive_filename(), as_attachment=True) class ResultsDownloadView(APIView): ''' @@ -51,13 +52,13 @@ def post(self, request, *args, **kwargs): handle_tags_in_request(request) search_results = es_download.normal_search( corpus_name, request.data['es_query'], request.data['size']) - csv_path = tasks.make_csv(search_results, request.data) - directory, filename = os.path.split(csv_path) - # Create download for download history download = Download.objects.create( download_type='search_results', corpus=corpus, parameters=request.data, user=request.user) + csv_path = tasks.make_csv(search_results, request.data, download.id) + directory, filename = os.path.split(csv_path) + # Create download for download history download.complete(filename=filename) - return send_csv_file(directory, filename, 'search_results', request.data['encoding']) + return send_csv_file(download, directory, request.data['encoding']) except Exception as e: logger.error(e) raise APIException(detail = 'Download failed: could not generate csv file') @@ -138,13 +139,13 @@ def get(self, request, *args, **kwargs): encoding = request.query_params.get('encoding', 'utf-8') format = request.query_params.get('table_format', None) - record = Download.objects.get(id=id) - if not record.user == request.user: + download = Download.objects.get(id=id) + if not download.user == request.user: raise PermissionDenied(detail='User has no access to this download') directory = settings.CSV_FILES_PATH - if not os.path.isfile(os.path.join(directory, record.filename)): + if not os.path.isfile(os.path.join(directory, download.filename)): raise NotFound(detail='File does not exist') - return send_csv_file(directory, record.filename, record.download_type, encoding, format) + return send_csv_file(download, directory, encoding, format)