From fc4ac6dae043b09ee59586f127db6355e8a185a4 Mon Sep 17 00:00:00 2001 From: matthewhegarty Date: Tue, 28 Feb 2023 09:32:24 +0000 Subject: [PATCH 1/7] initial checkin --- pytest.ini | 2 +- src/tablib/formats/_xlsx.py | 2 ++ tests/test_tablib.py | 18 ++++++++++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/pytest.ini b/pytest.ini index 32b87e39..b717f9c1 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,3 +1,3 @@ [pytest] norecursedirs = .git .* -addopts = -rsxX --showlocals --tb=native --cov=tablib --cov=tests --cov-report xml --cov-report term --cov-report html +#addopts = -rsxX --showlocals --tb=native --cov=tablib --cov=tests --cov-report xml --cov-report term --cov-report html diff --git a/src/tablib/formats/_xlsx.py b/src/tablib/formats/_xlsx.py index c84ad540..29ef88e0 100644 --- a/src/tablib/formats/_xlsx.py +++ b/src/tablib/formats/_xlsx.py @@ -163,6 +163,8 @@ def dset_sheet(cls, dataset, ws, freeze_panes=True): cell.alignment = wrap_text try: + print(cell.data_type) + print(cell.value) cell.value = col except (ValueError, TypeError): cell.value = str(col) diff --git a/tests/test_tablib.py b/tests/test_tablib.py index 29772f36..294593cf 100755 --- a/tests/test_tablib.py +++ b/tests/test_tablib.py @@ -14,6 +14,7 @@ from uuid import uuid4 from MarkupPy import markup +from openpyxl.reader.excel import load_workbook import tablib from tablib.core import Row, detect_format @@ -1117,6 +1118,23 @@ def test_xlsx_cell_values(self): data = tablib.Dataset().load(fh) self.assertEqual(data.headers[0], 'Hello World') + def test_xlsx_safe_export_formulae(self): + """ + Test that formulae are sanitised on export. + """ + # data.append(('first', '=SUM(A1:A2)')) + # _book = tablib.Databook() + # _book.add_sheet(data) + xls_source = Path(__file__).parent / 'files' / 'xlsx_cell_values.xlsx' + with xls_source.open('rb') as fh: + data = tablib.Dataset().load(fh) + _xlsx = data.export('xlsx') + # read back using openpyxl because tablib reads formulae as values + wb = load_workbook(filename=BytesIO(_xlsx)) + ws = wb.active + print(ws) + self.assertEqual('SUM(1+1)', ws['B1'].value) + def test_xlsx_bad_dimensions(self): """Test loading file with bad dimension. Must be done with read_only=False.""" From 5dfb3e5c76fa875c3b4bff3888820604141d6ec1 Mon Sep 17 00:00:00 2001 From: matthewhegarty Date: Wed, 1 Mar 2023 14:37:21 +0000 Subject: [PATCH 2/7] added 'escape' param to XLSX export --- src/tablib/formats/_xlsx.py | 24 +++++++------- tests/test_tablib.py | 63 +++++++++++++++++++++++++++++++------ 2 files changed, 67 insertions(+), 20 deletions(-) diff --git a/src/tablib/formats/_xlsx.py b/src/tablib/formats/_xlsx.py index 29ef88e0..2fffded3 100644 --- a/src/tablib/formats/_xlsx.py +++ b/src/tablib/formats/_xlsx.py @@ -35,12 +35,16 @@ def detect(cls, stream): return False @classmethod - def export_set(cls, dataset, freeze_panes=True, invalid_char_subst="-"): + def export_set(cls, dataset, freeze_panes=True, invalid_char_subst="-", escape=False): """Returns XLSX representation of Dataset. If dataset.title contains characters which are considered invalid for an XLSX file sheet name (http://www.excelcodex.com/2012/06/worksheets-naming-conventions/), they will be replaced with `invalid_char_subst`. + + If escape is True, formulae will have the leading '=' character removed. + This is a security measure to prevent formulae from executing by default + in exported XLSX files. """ wb = Workbook() ws = wb.worksheets[0] @@ -50,19 +54,16 @@ def export_set(cls, dataset, freeze_panes=True, invalid_char_subst="-"): if dataset.title else 'Tablib Dataset' ) - cls.dset_sheet(dataset, ws, freeze_panes=freeze_panes) + cls.dset_sheet(dataset, ws, freeze_panes=freeze_panes, escape=escape) stream = BytesIO() wb.save(stream) return stream.getvalue() @classmethod - def export_book(cls, databook, freeze_panes=True, invalid_char_subst="-"): + def export_book(cls, databook, freeze_panes=True, invalid_char_subst="-", escape=False): """Returns XLSX representation of DataBook. - - If dataset.title contains characters which are considered invalid for an XLSX file - sheet name (http://www.excelcodex.com/2012/06/worksheets-naming-conventions/), they will - be replaced with `invalid_char_subst`. + See export_set(). """ wb = Workbook() @@ -75,7 +76,7 @@ def export_book(cls, databook, freeze_panes=True, invalid_char_subst="-"): if dset.title else 'Sheet%s' % (i) ) - cls.dset_sheet(dset, ws, freeze_panes=freeze_panes) + cls.dset_sheet(dset, ws, freeze_panes=freeze_panes, escape=escape) stream = BytesIO() wb.save(stream) @@ -125,7 +126,7 @@ def import_book(cls, dbook, in_stream, headers=True, read_only=True): dbook.add_sheet(data) @classmethod - def dset_sheet(cls, dataset, ws, freeze_panes=True): + def dset_sheet(cls, dataset, ws, freeze_panes=True, escape=False): """Completes given worksheet from given Dataset.""" _package = dataset._package(dicts=False) @@ -163,8 +164,9 @@ def dset_sheet(cls, dataset, ws, freeze_panes=True): cell.alignment = wrap_text try: - print(cell.data_type) - print(cell.value) cell.value = col except (ValueError, TypeError): cell.value = str(col) + + if escape and cell.data_type == 'f' and cell.value.startswith('='): + cell.value = cell.value.replace("=", "") diff --git a/tests/test_tablib.py b/tests/test_tablib.py index 294593cf..488f26cd 100755 --- a/tests/test_tablib.py +++ b/tests/test_tablib.py @@ -1118,22 +1118,67 @@ def test_xlsx_cell_values(self): data = tablib.Dataset().load(fh) self.assertEqual(data.headers[0], 'Hello World') - def test_xlsx_safe_export_formulae(self): + def test_xlsx_export_set_escape_formulae(self): """ Test that formulae are sanitised on export. """ - # data.append(('first', '=SUM(A1:A2)')) - # _book = tablib.Databook() - # _book.add_sheet(data) - xls_source = Path(__file__).parent / 'files' / 'xlsx_cell_values.xlsx' - with xls_source.open('rb') as fh: - data = tablib.Dataset().load(fh) + data.append(('=SUM(1+1)',)) _xlsx = data.export('xlsx') + + # read back using openpyxl because tablib reads formulae as values + wb = load_workbook(filename=BytesIO(_xlsx)) + ws = wb.active + self.assertEqual('=SUM(1+1)', ws['A1'].value) + + _xlsx = data.export('xlsx', escape=True) + wb = load_workbook(filename=BytesIO(_xlsx)) + ws = wb.active + self.assertEqual('SUM(1+1)', ws['A1'].value) + + def test_xlsx_export_book_escape_formulae(self): + """ + Test that formulae are sanitised on export. + """ + data.append(('=SUM(1+1)',)) + _book = tablib.Databook() + _book.add_sheet(data) + _xlsx = _book.export('xlsx') + # read back using openpyxl because tablib reads formulae as values wb = load_workbook(filename=BytesIO(_xlsx)) ws = wb.active - print(ws) - self.assertEqual('SUM(1+1)', ws['B1'].value) + self.assertEqual('=SUM(1+1)', ws['A1'].value) + + _xlsx = _book.export('xlsx', escape=True) + wb = load_workbook(filename=BytesIO(_xlsx)) + ws = wb.active + self.assertEqual('SUM(1+1)', ws['A1'].value) + + def test_xlsx_export_set_escape_formulae_in_header(self): + data.headers = ('=SUM(1+1)',) + _xlsx = data.export('xlsx') + wb = load_workbook(filename=BytesIO(_xlsx)) + ws = wb.active + self.assertEqual('=SUM(1+1)', ws['A1'].value) + + _xlsx = data.export('xlsx', escape=True) + wb = load_workbook(filename=BytesIO(_xlsx)) + ws = wb.active + self.assertEqual('SUM(1+1)', ws['A1'].value) + + def test_xlsx_export_book_escape_formulae_in_header(self): + data.headers = ('=SUM(1+1)',) + _book = tablib.Databook() + _book.add_sheet(data) + _xlsx = _book.export('xlsx') + wb = load_workbook(filename=BytesIO(_xlsx)) + ws = wb.active + self.assertEqual('=SUM(1+1)', ws['A1'].value) + + _xlsx = _book.export('xlsx', escape=True) + wb = load_workbook(filename=BytesIO(_xlsx)) + ws = wb.active + self.assertEqual('SUM(1+1)', ws['A1'].value) def test_xlsx_bad_dimensions(self): """Test loading file with bad dimension. Must be done with From 667bc24203fbe3b561c6953ff2d11adeed34b936 Mon Sep 17 00:00:00 2001 From: matthewhegarty Date: Wed, 1 Mar 2023 14:40:42 +0000 Subject: [PATCH 3/7] fixed pytest config --- pytest.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytest.ini b/pytest.ini index b717f9c1..32b87e39 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,3 +1,3 @@ [pytest] norecursedirs = .git .* -#addopts = -rsxX --showlocals --tb=native --cov=tablib --cov=tests --cov-report xml --cov-report term --cov-report html +addopts = -rsxX --showlocals --tb=native --cov=tablib --cov=tests --cov-report xml --cov-report term --cov-report html From 2ffcd8ca10c9c8f8ed6784eda74ea47fb0041e70 Mon Sep 17 00:00:00 2001 From: matthewhegarty Date: Wed, 1 Mar 2023 15:13:44 +0000 Subject: [PATCH 4/7] removed used of temp variable in tests --- tests/test_tablib.py | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/tests/test_tablib.py b/tests/test_tablib.py index 488f26cd..971e3d1b 100755 --- a/tests/test_tablib.py +++ b/tests/test_tablib.py @@ -1127,13 +1127,11 @@ def test_xlsx_export_set_escape_formulae(self): # read back using openpyxl because tablib reads formulae as values wb = load_workbook(filename=BytesIO(_xlsx)) - ws = wb.active - self.assertEqual('=SUM(1+1)', ws['A1'].value) + self.assertEqual('=SUM(1+1)', wb.active['A1'].value) _xlsx = data.export('xlsx', escape=True) wb = load_workbook(filename=BytesIO(_xlsx)) - ws = wb.active - self.assertEqual('SUM(1+1)', ws['A1'].value) + self.assertEqual('SUM(1+1)', wb.active['A1'].value) def test_xlsx_export_book_escape_formulae(self): """ @@ -1146,25 +1144,21 @@ def test_xlsx_export_book_escape_formulae(self): # read back using openpyxl because tablib reads formulae as values wb = load_workbook(filename=BytesIO(_xlsx)) - ws = wb.active - self.assertEqual('=SUM(1+1)', ws['A1'].value) + self.assertEqual('=SUM(1+1)', wb.active['A1'].value) _xlsx = _book.export('xlsx', escape=True) wb = load_workbook(filename=BytesIO(_xlsx)) - ws = wb.active - self.assertEqual('SUM(1+1)', ws['A1'].value) + self.assertEqual('SUM(1+1)', wb.active['A1'].value) def test_xlsx_export_set_escape_formulae_in_header(self): data.headers = ('=SUM(1+1)',) _xlsx = data.export('xlsx') wb = load_workbook(filename=BytesIO(_xlsx)) - ws = wb.active - self.assertEqual('=SUM(1+1)', ws['A1'].value) + self.assertEqual('=SUM(1+1)', wb.active['A1'].value) _xlsx = data.export('xlsx', escape=True) wb = load_workbook(filename=BytesIO(_xlsx)) - ws = wb.active - self.assertEqual('SUM(1+1)', ws['A1'].value) + self.assertEqual('SUM(1+1)', wb.active['A1'].value) def test_xlsx_export_book_escape_formulae_in_header(self): data.headers = ('=SUM(1+1)',) @@ -1172,13 +1166,11 @@ def test_xlsx_export_book_escape_formulae_in_header(self): _book.add_sheet(data) _xlsx = _book.export('xlsx') wb = load_workbook(filename=BytesIO(_xlsx)) - ws = wb.active - self.assertEqual('=SUM(1+1)', ws['A1'].value) + self.assertEqual('=SUM(1+1)', wb.active['A1'].value) _xlsx = _book.export('xlsx', escape=True) wb = load_workbook(filename=BytesIO(_xlsx)) - ws = wb.active - self.assertEqual('SUM(1+1)', ws['A1'].value) + self.assertEqual('SUM(1+1)', wb.active['A1'].value) def test_xlsx_bad_dimensions(self): """Test loading file with bad dimension. Must be done with From 1fa1132a9e540a6dbce5fbff8c8fb37bbcf72f89 Mon Sep 17 00:00:00 2001 From: matthewhegarty Date: Fri, 3 Mar 2023 15:40:18 +0000 Subject: [PATCH 5/7] added link to docstring in formats.rst --- docs/formats.rst | 4 ++++ docs/requirements.txt | 2 +- src/tablib/formats/_xlsx.py | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/formats.rst b/docs/formats.rst index c3620da6..268c3c83 100644 --- a/docs/formats.rst +++ b/docs/formats.rst @@ -247,6 +247,10 @@ data. Reads cell values instead of formulas. +You can export data to xlsx format by calling :meth:`export('xlsx') <.export>`. +There are optional parameters to control the export. +For available parameters, see :meth:`tablib.formats._xlsx.XLSXFormat.export_set`. + .. admonition:: Binary Warning The ``xlsx`` file format is binary, so make sure to write in binary mode:: diff --git a/docs/requirements.txt b/docs/requirements.txt index 498f43db..553b0c0f 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1 +1 @@ -sphinx==4.2.0 +sphinx==6.1.3 diff --git a/src/tablib/formats/_xlsx.py b/src/tablib/formats/_xlsx.py index 2fffded3..7b6c2298 100644 --- a/src/tablib/formats/_xlsx.py +++ b/src/tablib/formats/_xlsx.py @@ -38,6 +38,8 @@ def detect(cls, stream): def export_set(cls, dataset, freeze_panes=True, invalid_char_subst="-", escape=False): """Returns XLSX representation of Dataset. + If freeze_panes is True, Export will freeze panes only after first line. + If dataset.title contains characters which are considered invalid for an XLSX file sheet name (http://www.excelcodex.com/2012/06/worksheets-naming-conventions/), they will be replaced with `invalid_char_subst`. From 75840a5351a0c2e016cf164c3e4a37484f3001ee Mon Sep 17 00:00:00 2001 From: matthewhegarty Date: Fri, 3 Mar 2023 15:47:26 +0000 Subject: [PATCH 6/7] updated docstring to define parameters --- src/tablib/formats/_xlsx.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/tablib/formats/_xlsx.py b/src/tablib/formats/_xlsx.py index 7b6c2298..ef574113 100644 --- a/src/tablib/formats/_xlsx.py +++ b/src/tablib/formats/_xlsx.py @@ -38,13 +38,16 @@ def detect(cls, stream): def export_set(cls, dataset, freeze_panes=True, invalid_char_subst="-", escape=False): """Returns XLSX representation of Dataset. - If freeze_panes is True, Export will freeze panes only after first line. + If ``freeze_panes`` is True, Export will freeze panes only after first line. - If dataset.title contains characters which are considered invalid for an XLSX file + ``invalid_char_substring`` controls the character which will be used to replace any + invalid chars which are defined in the dataset title. + + If ``dataset.title`` contains characters which are considered invalid for an XLSX file sheet name (http://www.excelcodex.com/2012/06/worksheets-naming-conventions/), they will be replaced with `invalid_char_subst`. - If escape is True, formulae will have the leading '=' character removed. + If ``escape`` is True, formulae will have the leading '=' character removed. This is a security measure to prevent formulae from executing by default in exported XLSX files. """ From 080b55a26b368dd3400cbccf3ec3c2c119ce46c9 Mon Sep 17 00:00:00 2001 From: matthewhegarty Date: Fri, 3 Mar 2023 15:48:19 +0000 Subject: [PATCH 7/7] removed extraneous param definition --- src/tablib/formats/_xlsx.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/tablib/formats/_xlsx.py b/src/tablib/formats/_xlsx.py index ef574113..974e14bb 100644 --- a/src/tablib/formats/_xlsx.py +++ b/src/tablib/formats/_xlsx.py @@ -40,12 +40,9 @@ def export_set(cls, dataset, freeze_panes=True, invalid_char_subst="-", escape=F If ``freeze_panes`` is True, Export will freeze panes only after first line. - ``invalid_char_substring`` controls the character which will be used to replace any - invalid chars which are defined in the dataset title. - If ``dataset.title`` contains characters which are considered invalid for an XLSX file sheet name (http://www.excelcodex.com/2012/06/worksheets-naming-conventions/), they will - be replaced with `invalid_char_subst`. + be replaced with ``invalid_char_subst``. If ``escape`` is True, formulae will have the leading '=' character removed. This is a security measure to prevent formulae from executing by default