Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support column_width in xlsx format #516

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ Here is a list of passed and present much-appreciated contributors:
Bruno Soares
Claude Paroz
Daniel Santos
Egor Osokin
Erik Youngren
Hugo van Kemenade
Iuri de Silvio
Expand Down
13 changes: 13 additions & 0 deletions docs/formats.rst
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,19 @@ The ``import_set()`` method also supports a ``skip_lines`` parameter that you
can set to a number of lines that should be skipped before starting to read
data.

The ``export_set()`` method supports a ``column_width`` parameter. Depending on the
value you pass, the column width will be set accordingly. It can be either ``None``, an integer, or "adaptive".
If "adaptive" is passed, the column width will be unique for every column and will be
calculated based on values' length. For example::

data = tablib.Dataset()
data.export('xlsx', column_width='adaptive')



.. versionchanged:: 3.3.0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

3.3.0 has been released, let's target 3.4.0:

Suggested change
.. versionchanged:: 3.3.0
.. versionchanged:: 3.4.0

The ``column_width`` parameter for ``export_set()`` was added.

.. versionchanged:: 3.1.0

The ``skip_lines`` parameter for ``import_set()`` was added.
Expand Down
35 changes: 34 additions & 1 deletion src/tablib/formats/_xlsx.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
""" Tablib - XLSX Support.
"""
from __future__ import annotations

import re
from io import BytesIO
Expand Down Expand Up @@ -35,12 +36,17 @@ def detect(cls, stream):
return False

@classmethod
def export_set(cls, dataset, freeze_panes=True, invalid_char_subst="-"):
def export_set(
cls, dataset, freeze_panes=True, invalid_char_subst="-",
column_width: str | int | None = "adaptive"
):
"""Returns XLSX representation of Dataset.

If dataset.title contains characters which are considered invalid for an XLSX file
sheet name (http://www.excelcodex.com/2012/06/worksheets-naming-conventions/), they will
be replaced with `invalid_char_subst`.

column_width: can be None, an integer, or "adaptive"
"""
wb = Workbook()
ws = wb.worksheets[0]
Expand All @@ -51,6 +57,11 @@ def export_set(cls, dataset, freeze_panes=True, invalid_char_subst="-"):
)

cls.dset_sheet(dataset, ws, freeze_panes=freeze_panes)
if isinstance(column_width, str) and column_width != "adaptive":
raise ValueError(f"Unsupported value `{column_width}` passed to `column_width` "
"parameter. It supports 'adaptive' or integer values")
Comment on lines +61 to +62
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe something like this, to be closer to existing exceptions?

And let's start assigning exception messages to a variable first, for the reasons set out at https://github.com/henryiii/flake8-errmsg

Suggested change
raise ValueError(f"Unsupported value `{column_width}` passed to `column_width` "
"parameter. It supports 'adaptive' or integer values")
msg = (
f"Invalid value for column_width: {column_width}. "
f"Must be 'adaptive' or an integer."
)
raise ValueError(msg)


cls._adapt_column_width(ws, column_width)

stream = BytesIO()
wb.save(stream)
Expand Down Expand Up @@ -166,3 +177,25 @@ def dset_sheet(cls, dataset, ws, freeze_panes=True):
cell.value = col
except (ValueError, TypeError):
cell.value = str(col)

@classmethod
def _adapt_column_width(cls, worksheet,
width: str | int | None) -> None:
if width is None:
return

column_widths = []
if isinstance(width, str) and width == "adaptive":
for row in worksheet.values:
for i, cell in enumerate(row):
cell = str(cell)
if len(column_widths) > i:
if len(cell) > column_widths[i]:
column_widths[i] = len(cell)
else:
column_widths += [len(cell)]
else:
column_widths = [width] * worksheet.max_column

for i, column_width in enumerate(column_widths, 1): # start at 1
worksheet.column_dimensions[get_column_letter(i)].width = column_width
43 changes: 43 additions & 0 deletions tests/test_tablib.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@
from collections import OrderedDict
from io import BytesIO, StringIO
from pathlib import Path
from tempfile import TemporaryFile
from uuid import uuid4

from MarkupPy import markup
from openpyxl import load_workbook

import tablib
from tablib.core import Row, detect_format
Expand Down Expand Up @@ -1100,6 +1102,47 @@ def test_xlsx_bad_dimensions(self):
data = tablib.Dataset().load(fh, read_only=False)
self.assertEqual(data.height, 3)

def _helper_export_column_width(self, input_arg):
"""check that column width adapts to value length"""
def _get_width(data, input_arg):
xlsx_content = data.export('xlsx', column_width=input_arg)
wb = load_workbook(filename=BytesIO(xlsx_content))
ws = wb.active
return ws.column_dimensions['A'].width

xls_source = Path(__file__).parent / 'files' / 'xlsx_cell_values.xlsx'
with xls_source.open('rb') as fh:
data = tablib.Dataset().load(fh)
width_before = _get_width(data, input_arg)
data.append([
'verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue',
])
width_after = _get_width(data, width_before)
return width_before, width_after

def test_xlsx_column_width_none(self):
"""check column width with None"""
width_before, width_after = self._helper_export_column_width(None)
self.assertEqual(width_before, 13)
self.assertEqual(width_after, 13)

def test_xlsx_column_width_adaptive(self):
"""check column width with 'adaptive'"""
width_before, width_after = self._helper_export_column_width("adaptive")
self.assertEqual(width_before, 11)
self.assertEqual(width_after, 11)

def test_xlsx_column_width_integer(self):
"""check column width with an integer"""
width_before, width_after = self._helper_export_column_width(10)
self.assertEqual(width_before, 10)
self.assertEqual(width_after, 10)

def test_xlsx_column_width_value_error(self):
"""check column width with invalid input"""
with self.assertRaises(ValueError):
self._helper_export_column_width("invalid input")


class JSONTests(BaseTestCase):
def test_json_format_detect(self):
Expand Down