Skip to content

Commit

Permalink
Escape formulae on export (#540)
Browse files Browse the repository at this point in the history
  • Loading branch information
matthewhegarty authored Mar 3, 2023
1 parent 4363c43 commit bff435d
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 12 deletions.
4 changes: 4 additions & 0 deletions docs/formats.rst
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,10 @@ data.

Reads cell values instead of formulas.

You can export data to xlsx format by calling :meth:`export('xlsx') <.export>`.
There are optional parameters to control the export.
For available parameters, see :meth:`tablib.formats._xlsx.XLSXFormat.export_set`.

.. admonition:: Binary Warning

The ``xlsx`` file format is binary, so make sure to write in binary mode::
Expand Down
2 changes: 1 addition & 1 deletion docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
sphinx==4.2.0
sphinx==6.1.3
28 changes: 17 additions & 11 deletions src/tablib/formats/_xlsx.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,18 @@ def detect(cls, stream):
return False

@classmethod
def export_set(cls, dataset, freeze_panes=True, invalid_char_subst="-"):
def export_set(cls, dataset, freeze_panes=True, invalid_char_subst="-", escape=False):
"""Returns XLSX representation of Dataset.
If dataset.title contains characters which are considered invalid for an XLSX file
If ``freeze_panes`` is True, Export will freeze panes only after first line.
If ``dataset.title`` contains characters which are considered invalid for an XLSX file
sheet name (http://www.excelcodex.com/2012/06/worksheets-naming-conventions/), they will
be replaced with `invalid_char_subst`.
be replaced with ``invalid_char_subst``.
If ``escape`` is True, formulae will have the leading '=' character removed.
This is a security measure to prevent formulae from executing by default
in exported XLSX files.
"""
wb = Workbook()
ws = wb.worksheets[0]
Expand All @@ -50,19 +56,16 @@ def export_set(cls, dataset, freeze_panes=True, invalid_char_subst="-"):
if dataset.title else 'Tablib Dataset'
)

cls.dset_sheet(dataset, ws, freeze_panes=freeze_panes)
cls.dset_sheet(dataset, ws, freeze_panes=freeze_panes, escape=escape)

stream = BytesIO()
wb.save(stream)
return stream.getvalue()

@classmethod
def export_book(cls, databook, freeze_panes=True, invalid_char_subst="-"):
def export_book(cls, databook, freeze_panes=True, invalid_char_subst="-", escape=False):
"""Returns XLSX representation of DataBook.
If dataset.title contains characters which are considered invalid for an XLSX file
sheet name (http://www.excelcodex.com/2012/06/worksheets-naming-conventions/), they will
be replaced with `invalid_char_subst`.
See export_set().
"""

wb = Workbook()
Expand All @@ -75,7 +78,7 @@ def export_book(cls, databook, freeze_panes=True, invalid_char_subst="-"):
if dset.title else 'Sheet%s' % (i)
)

cls.dset_sheet(dset, ws, freeze_panes=freeze_panes)
cls.dset_sheet(dset, ws, freeze_panes=freeze_panes, escape=escape)

stream = BytesIO()
wb.save(stream)
Expand Down Expand Up @@ -125,7 +128,7 @@ def import_book(cls, dbook, in_stream, headers=True, read_only=True):
dbook.add_sheet(data)

@classmethod
def dset_sheet(cls, dataset, ws, freeze_panes=True):
def dset_sheet(cls, dataset, ws, freeze_panes=True, escape=False):
"""Completes given worksheet from given Dataset."""
_package = dataset._package(dicts=False)

Expand Down Expand Up @@ -166,3 +169,6 @@ def dset_sheet(cls, dataset, ws, freeze_panes=True):
cell.value = col
except (ValueError, TypeError):
cell.value = str(col)

if escape and cell.data_type == 'f' and cell.value.startswith('='):
cell.value = cell.value.replace("=", "")
55 changes: 55 additions & 0 deletions tests/test_tablib.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from uuid import uuid4

from MarkupPy import markup
from openpyxl.reader.excel import load_workbook

import tablib
from tablib.core import Row, detect_format
Expand Down Expand Up @@ -1117,6 +1118,60 @@ def test_xlsx_cell_values(self):
data = tablib.Dataset().load(fh)
self.assertEqual(data.headers[0], 'Hello World')

def test_xlsx_export_set_escape_formulae(self):
"""
Test that formulae are sanitised on export.
"""
data.append(('=SUM(1+1)',))
_xlsx = data.export('xlsx')

# read back using openpyxl because tablib reads formulae as values
wb = load_workbook(filename=BytesIO(_xlsx))
self.assertEqual('=SUM(1+1)', wb.active['A1'].value)

_xlsx = data.export('xlsx', escape=True)
wb = load_workbook(filename=BytesIO(_xlsx))
self.assertEqual('SUM(1+1)', wb.active['A1'].value)

def test_xlsx_export_book_escape_formulae(self):
"""
Test that formulae are sanitised on export.
"""
data.append(('=SUM(1+1)',))
_book = tablib.Databook()
_book.add_sheet(data)
_xlsx = _book.export('xlsx')

# read back using openpyxl because tablib reads formulae as values
wb = load_workbook(filename=BytesIO(_xlsx))
self.assertEqual('=SUM(1+1)', wb.active['A1'].value)

_xlsx = _book.export('xlsx', escape=True)
wb = load_workbook(filename=BytesIO(_xlsx))
self.assertEqual('SUM(1+1)', wb.active['A1'].value)

def test_xlsx_export_set_escape_formulae_in_header(self):
data.headers = ('=SUM(1+1)',)
_xlsx = data.export('xlsx')
wb = load_workbook(filename=BytesIO(_xlsx))
self.assertEqual('=SUM(1+1)', wb.active['A1'].value)

_xlsx = data.export('xlsx', escape=True)
wb = load_workbook(filename=BytesIO(_xlsx))
self.assertEqual('SUM(1+1)', wb.active['A1'].value)

def test_xlsx_export_book_escape_formulae_in_header(self):
data.headers = ('=SUM(1+1)',)
_book = tablib.Databook()
_book.add_sheet(data)
_xlsx = _book.export('xlsx')
wb = load_workbook(filename=BytesIO(_xlsx))
self.assertEqual('=SUM(1+1)', wb.active['A1'].value)

_xlsx = _book.export('xlsx', escape=True)
wb = load_workbook(filename=BytesIO(_xlsx))
self.assertEqual('SUM(1+1)', wb.active['A1'].value)

def test_xlsx_bad_dimensions(self):
"""Test loading file with bad dimension. Must be done with
read_only=False."""
Expand Down

0 comments on commit bff435d

Please sign in to comment.