From 35abc5e599f57ba0f318ad840048df7226805d25 Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Wed, 11 Oct 2023 17:07:29 +0200 Subject: [PATCH] convert ngram data to csv format --- backend/download/create_csv.py | 19 +++++++++++++++++- backend/download/tasks.py | 6 ++++-- backend/download/tests/test_csv_results.py | 23 ++++++++++++++++++++++ 3 files changed, 45 insertions(+), 3 deletions(-) diff --git a/backend/download/create_csv.py b/backend/download/create_csv.py index 01e01092b..19b4dd754 100644 --- a/backend/download/create_csv.py +++ b/backend/download/create_csv.py @@ -108,4 +108,21 @@ def format_field_value(value, unit): 'week': '%Y-%m-%d', 'day': '%Y-%m-%d' } - return date.strftime(formats[unit]) + return + +def ngram_csv(results, filename): + rows = ngram_table(results) + fieldnames = ['date', 'N-gram', 'Frequency'] + filepath = write_file(filename, fieldnames, rows) + return filepath + +def ngram_table(results): + rows = [] + for index, time_point in enumerate(results['time_points']): + for ngram in results['words']: + rows.append({ + 'date': time_point, + 'N-gram': ngram['label'], + 'Frequency': ngram['data'][index] + }) + return rows diff --git a/backend/download/tasks.py b/backend/download/tasks.py index ab70f5296..3f2d39938 100644 --- a/backend/download/tasks.py +++ b/backend/download/tasks.py @@ -90,10 +90,12 @@ def download_search_results(request_json, user): return try_download(make_chain, download) @shared_task() -def make_term_frequency_csv(results_per_series, parameters_per_series): +def make_full_data_csv(results_per_series, visualization_type, parameters_per_series): ''' Export term frequency results to a csv. ''' + if visualization_type == 'ngram': + return create_csv.ngram_csv(results_per_series) query_per_series, field_name, unit = extract_term_frequency_download_metadata(parameters_per_series) return create_csv.term_frequency_csv(query_per_series, results_per_series, field_name, unit = unit) @@ -170,7 +172,7 @@ def download_full_data(request_json, user): make_chain = lambda : chain( task, - make_term_frequency_csv.s(parameters), + make_full_data_csv.s(visualization_type, parameters), complete_download.s(download.id), csv_data_email.s(user.email, user.username), ).on_error(complete_failed_download.s(download.id)) diff --git a/backend/download/tests/test_csv_results.py b/backend/download/tests/test_csv_results.py index b6ff8b0da..589881cf3 100644 --- a/backend/download/tests/test_csv_results.py +++ b/backend/download/tests/test_csv_results.py @@ -208,3 +208,26 @@ def test_date_format(): for value, unit, expected in cases: assert create_csv.format_field_value(value, unit) == expected + + +mock_ngram_data = { + 'words': [ + {'label': 'ex parrot', 'data': [2, 3]}, + {'label': 'this parrot what', 'data': [4, 8]}, + {'label': 'dead parrot when', 'data': [4, 6]}, + ], + 'time_points': ['1960-1965', '1962-1967'] +} + +expected_csv_table = [ + {'date': '1960-1965', 'N-gram': 'ex parrot', 'Frequency': 2}, + {'date': '1960-1965', 'N-gram': 'this parrot what', 'Frequency': 4}, + {'date': '1960-1965', 'N-gram': 'dead parrot when', 'Frequency': 4}, + {'date': '1962-1967', 'N-gram': 'ex parrot', 'Frequency': 3}, + {'date': '1962-1967', 'N-gram': 'this parrot what', 'Frequency': 8}, + {'date': '1962-1967', 'N-gram': 'dead parrot when', 'Frequency': 6}, +] + +def test_ngram_table(): + table = create_csv.ngram_table(mock_ngram_data) + assert table == expected_csv_table \ No newline at end of file