From bff435dfcdceb6f169fb1e47043eb4e9fa6e5068 Mon Sep 17 00:00:00 2001 From: Matt Hegarty Date: Fri, 3 Mar 2023 20:59:51 +0000 Subject: [PATCH] Escape formulae on export (#540) --- docs/formats.rst | 4 +++ docs/requirements.txt | 2 +- src/tablib/formats/_xlsx.py | 28 +++++++++++-------- tests/test_tablib.py | 55 +++++++++++++++++++++++++++++++++++++ 4 files changed, 77 insertions(+), 12 deletions(-) diff --git a/docs/formats.rst b/docs/formats.rst index c3620da6..268c3c83 100644 --- a/docs/formats.rst +++ b/docs/formats.rst @@ -247,6 +247,10 @@ data. Reads cell values instead of formulas. +You can export data to xlsx format by calling :meth:`export('xlsx') <.export>`. +There are optional parameters to control the export. +For available parameters, see :meth:`tablib.formats._xlsx.XLSXFormat.export_set`. + .. admonition:: Binary Warning The ``xlsx`` file format is binary, so make sure to write in binary mode:: diff --git a/docs/requirements.txt b/docs/requirements.txt index 498f43db..553b0c0f 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1 +1 @@ -sphinx==4.2.0 +sphinx==6.1.3 diff --git a/src/tablib/formats/_xlsx.py b/src/tablib/formats/_xlsx.py index c84ad540..974e14bb 100644 --- a/src/tablib/formats/_xlsx.py +++ b/src/tablib/formats/_xlsx.py @@ -35,12 +35,18 @@ def detect(cls, stream): return False @classmethod - def export_set(cls, dataset, freeze_panes=True, invalid_char_subst="-"): + def export_set(cls, dataset, freeze_panes=True, invalid_char_subst="-", escape=False): """Returns XLSX representation of Dataset. - If dataset.title contains characters which are considered invalid for an XLSX file + If ``freeze_panes`` is True, Export will freeze panes only after first line. + + If ``dataset.title`` contains characters which are considered invalid for an XLSX file sheet name (http://www.excelcodex.com/2012/06/worksheets-naming-conventions/), they will - be replaced with `invalid_char_subst`. + be replaced with ``invalid_char_subst``. + + If ``escape`` is True, formulae will have the leading '=' character removed. + This is a security measure to prevent formulae from executing by default + in exported XLSX files. """ wb = Workbook() ws = wb.worksheets[0] @@ -50,19 +56,16 @@ def export_set(cls, dataset, freeze_panes=True, invalid_char_subst="-"): if dataset.title else 'Tablib Dataset' ) - cls.dset_sheet(dataset, ws, freeze_panes=freeze_panes) + cls.dset_sheet(dataset, ws, freeze_panes=freeze_panes, escape=escape) stream = BytesIO() wb.save(stream) return stream.getvalue() @classmethod - def export_book(cls, databook, freeze_panes=True, invalid_char_subst="-"): + def export_book(cls, databook, freeze_panes=True, invalid_char_subst="-", escape=False): """Returns XLSX representation of DataBook. - - If dataset.title contains characters which are considered invalid for an XLSX file - sheet name (http://www.excelcodex.com/2012/06/worksheets-naming-conventions/), they will - be replaced with `invalid_char_subst`. + See export_set(). """ wb = Workbook() @@ -75,7 +78,7 @@ def export_book(cls, databook, freeze_panes=True, invalid_char_subst="-"): if dset.title else 'Sheet%s' % (i) ) - cls.dset_sheet(dset, ws, freeze_panes=freeze_panes) + cls.dset_sheet(dset, ws, freeze_panes=freeze_panes, escape=escape) stream = BytesIO() wb.save(stream) @@ -125,7 +128,7 @@ def import_book(cls, dbook, in_stream, headers=True, read_only=True): dbook.add_sheet(data) @classmethod - def dset_sheet(cls, dataset, ws, freeze_panes=True): + def dset_sheet(cls, dataset, ws, freeze_panes=True, escape=False): """Completes given worksheet from given Dataset.""" _package = dataset._package(dicts=False) @@ -166,3 +169,6 @@ def dset_sheet(cls, dataset, ws, freeze_panes=True): cell.value = col except (ValueError, TypeError): cell.value = str(col) + + if escape and cell.data_type == 'f' and cell.value.startswith('='): + cell.value = cell.value.replace("=", "") diff --git a/tests/test_tablib.py b/tests/test_tablib.py index 29772f36..971e3d1b 100755 --- a/tests/test_tablib.py +++ b/tests/test_tablib.py @@ -14,6 +14,7 @@ from uuid import uuid4 from MarkupPy import markup +from openpyxl.reader.excel import load_workbook import tablib from tablib.core import Row, detect_format @@ -1117,6 +1118,60 @@ def test_xlsx_cell_values(self): data = tablib.Dataset().load(fh) self.assertEqual(data.headers[0], 'Hello World') + def test_xlsx_export_set_escape_formulae(self): + """ + Test that formulae are sanitised on export. + """ + data.append(('=SUM(1+1)',)) + _xlsx = data.export('xlsx') + + # read back using openpyxl because tablib reads formulae as values + wb = load_workbook(filename=BytesIO(_xlsx)) + self.assertEqual('=SUM(1+1)', wb.active['A1'].value) + + _xlsx = data.export('xlsx', escape=True) + wb = load_workbook(filename=BytesIO(_xlsx)) + self.assertEqual('SUM(1+1)', wb.active['A1'].value) + + def test_xlsx_export_book_escape_formulae(self): + """ + Test that formulae are sanitised on export. + """ + data.append(('=SUM(1+1)',)) + _book = tablib.Databook() + _book.add_sheet(data) + _xlsx = _book.export('xlsx') + + # read back using openpyxl because tablib reads formulae as values + wb = load_workbook(filename=BytesIO(_xlsx)) + self.assertEqual('=SUM(1+1)', wb.active['A1'].value) + + _xlsx = _book.export('xlsx', escape=True) + wb = load_workbook(filename=BytesIO(_xlsx)) + self.assertEqual('SUM(1+1)', wb.active['A1'].value) + + def test_xlsx_export_set_escape_formulae_in_header(self): + data.headers = ('=SUM(1+1)',) + _xlsx = data.export('xlsx') + wb = load_workbook(filename=BytesIO(_xlsx)) + self.assertEqual('=SUM(1+1)', wb.active['A1'].value) + + _xlsx = data.export('xlsx', escape=True) + wb = load_workbook(filename=BytesIO(_xlsx)) + self.assertEqual('SUM(1+1)', wb.active['A1'].value) + + def test_xlsx_export_book_escape_formulae_in_header(self): + data.headers = ('=SUM(1+1)',) + _book = tablib.Databook() + _book.add_sheet(data) + _xlsx = _book.export('xlsx') + wb = load_workbook(filename=BytesIO(_xlsx)) + self.assertEqual('=SUM(1+1)', wb.active['A1'].value) + + _xlsx = _book.export('xlsx', escape=True) + wb = load_workbook(filename=BytesIO(_xlsx)) + self.assertEqual('SUM(1+1)', wb.active['A1'].value) + def test_xlsx_bad_dimensions(self): """Test loading file with bad dimension. Must be done with read_only=False."""