From 27d66d846bb62e05a5bf0298be7fd3c7554ae1f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Ja=C5=A1ek?= Date: Fri, 27 Sep 2024 11:26:49 +0200 Subject: [PATCH] fix encoding in csv files on windows (#1098) excel needs BOM to load it properly CPCN-761 --- newsroom/agenda/formatters/csv_formatter.py | 25 ++++++++++++++------- tests/core/test_csv_formatter.py | 2 +- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/newsroom/agenda/formatters/csv_formatter.py b/newsroom/agenda/formatters/csv_formatter.py index f71a7b141..0188556a3 100644 --- a/newsroom/agenda/formatters/csv_formatter.py +++ b/newsroom/agenda/formatters/csv_formatter.py @@ -1,13 +1,14 @@ -from newsroom.formatter import BaseFormatter import csv import io import arrow -from werkzeug.utils import secure_filename -from newsroom.utils import parse_dates + from datetime import datetime from typing import List, Dict, Any, Union, Tuple, Optional -from newsroom.agenda.utils import get_filtered_subject from flask import current_app as app +from werkzeug.utils import secure_filename +from newsroom.utils import parse_dates +from newsroom.formatter import BaseFormatter +from newsroom.agenda.utils import get_filtered_subject class CSVFormatter(BaseFormatter): @@ -39,7 +40,7 @@ def serialize_to_csv(self, items: List[Dict[str, Any]]) -> bytes: csv_writer.writerow(item) csv_string.seek(0) # Reset the buffer position - return csv_string.getvalue().encode("utf-8") + return csv_string.getvalue().encode("utf-8-sig") def format_event(self, item: Dict[str, Any]) -> Dict[str, Any]: subj_schemas = app.config.get("AGENDA_CSV_SUBJECT_SCHEMES", []) @@ -96,9 +97,7 @@ def format_location(self, item: Dict[str, Any], field: str) -> str: return "" def format_list(self, item: Dict[str, Any], key: str, language: Optional[str] = None) -> str: - values = [ - v.get("translations", {}).get("name", {}).get(language) or v.get("name", "") for v in item.get(key, []) - ] + values = [get_translated_name(v, language) for v in item.get(key, [])] return ",".join(list(filter(bool, values))) def format_contact_info(self, item: Dict[str, Any]) -> str: @@ -129,3 +128,13 @@ def format_coverage(self, item: Dict[str, Any], field: str) -> str: for coverage in coverages: value.append(coverage.get(field, "")) return ",".join(value) + + +def get_translated_name(value: Dict[str, Any], language: Optional[str] = None) -> str: + """ + Get translation for the given language + """ + try: + return value["translations"]["name"][language] + except (KeyError, TypeError): + return value.get("name", "") diff --git a/tests/core/test_csv_formatter.py b/tests/core/test_csv_formatter.py index f73505fb2..422897119 100644 --- a/tests/core/test_csv_formatter.py +++ b/tests/core/test_csv_formatter.py @@ -50,7 +50,7 @@ def read_csv(data): csv_data = formatter.format_item(data, item_type="agenda") - csv_string = csv_data.decode("utf-8") + csv_string = csv_data.decode("utf-8-sig") csv_lines = csv_string.split("\n") csv_reader = csv.reader(csv_lines) header = next(csv_reader)