Skip to content

Commit

Permalink
convert ngram data to csv format
Browse files Browse the repository at this point in the history
  • Loading branch information
BeritJanssen committed Oct 11, 2023
1 parent a17c744 commit 35abc5e
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 3 deletions.
19 changes: 18 additions & 1 deletion backend/download/create_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,4 +108,21 @@ def format_field_value(value, unit):
'week': '%Y-%m-%d',
'day': '%Y-%m-%d'
}
return date.strftime(formats[unit])
return

def ngram_csv(results, filename):
rows = ngram_table(results)
fieldnames = ['date', 'N-gram', 'Frequency']
filepath = write_file(filename, fieldnames, rows)
return filepath

def ngram_table(results):
rows = []
for index, time_point in enumerate(results['time_points']):
for ngram in results['words']:
rows.append({
'date': time_point,
'N-gram': ngram['label'],
'Frequency': ngram['data'][index]
})
return rows
6 changes: 4 additions & 2 deletions backend/download/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,12 @@ def download_search_results(request_json, user):
return try_download(make_chain, download)

@shared_task()
def make_term_frequency_csv(results_per_series, parameters_per_series):
def make_full_data_csv(results_per_series, visualization_type, parameters_per_series):
'''
Export term frequency results to a csv.
'''
if visualization_type == 'ngram':
return create_csv.ngram_csv(results_per_series)
query_per_series, field_name, unit = extract_term_frequency_download_metadata(parameters_per_series)
return create_csv.term_frequency_csv(query_per_series, results_per_series, field_name, unit = unit)

Expand Down Expand Up @@ -170,7 +172,7 @@ def download_full_data(request_json, user):

make_chain = lambda : chain(
task,
make_term_frequency_csv.s(parameters),
make_full_data_csv.s(visualization_type, parameters),
complete_download.s(download.id),
csv_data_email.s(user.email, user.username),
).on_error(complete_failed_download.s(download.id))
Expand Down
23 changes: 23 additions & 0 deletions backend/download/tests/test_csv_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,3 +208,26 @@ def test_date_format():

for value, unit, expected in cases:
assert create_csv.format_field_value(value, unit) == expected


mock_ngram_data = {
'words': [
{'label': 'ex parrot', 'data': [2, 3]},
{'label': 'this parrot what', 'data': [4, 8]},
{'label': 'dead parrot when', 'data': [4, 6]},
],
'time_points': ['1960-1965', '1962-1967']
}

expected_csv_table = [
{'date': '1960-1965', 'N-gram': 'ex parrot', 'Frequency': 2},
{'date': '1960-1965', 'N-gram': 'this parrot what', 'Frequency': 4},
{'date': '1960-1965', 'N-gram': 'dead parrot when', 'Frequency': 4},
{'date': '1962-1967', 'N-gram': 'ex parrot', 'Frequency': 3},
{'date': '1962-1967', 'N-gram': 'this parrot what', 'Frequency': 8},
{'date': '1962-1967', 'N-gram': 'dead parrot when', 'Frequency': 6},
]

def test_ngram_table():
table = create_csv.ngram_table(mock_ngram_data)
assert table == expected_csv_table

0 comments on commit 35abc5e

Please sign in to comment.