Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support column_width in xlsx format #516

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,4 @@ Here is a list of passed and present much-appreciated contributors:
Tommy Anthony
Tsuyoshi Hombashi
Tushar Makkar
Egor Osokin
Birdi7 marked this conversation as resolved.
Show resolved Hide resolved
14 changes: 14 additions & 0 deletions docs/formats.rst
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,20 @@ The ``import_set()`` method also supports a ``skip_lines`` parameter that you
can set to a number of lines that should be skipped before starting to read
data.

The ``export_set()`` method supports a ``column_width`` parameter. Depending on the
value you pass, the column width will be set accordingly. It can be either None, int, or "adaptive".
Birdi7 marked this conversation as resolved.
Show resolved Hide resolved
If "adaptive" is passed, the column width will be unique for every column and will be
calculated based on values' length. Example of usage

```python3
data = tablib.Dataset()
data.export('xlsx', column_width='adaptive')
```
Birdi7 marked this conversation as resolved.
Show resolved Hide resolved


.. versionchanged:: 3.3.0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

3.3.0 has been released, let's target 3.4.0:

Suggested change
.. versionchanged:: 3.3.0
.. versionchanged:: 3.4.0

The ``column_width`` parameter for ``export_set()`` was added.

.. versionchanged:: 3.1.0

The ``skip_lines`` parameter for ``import_set()`` was added.
Expand Down
35 changes: 34 additions & 1 deletion src/tablib/formats/_xlsx.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import re
from io import BytesIO
from typing import Optional, Union
hugovk marked this conversation as resolved.
Show resolved Hide resolved

from openpyxl.reader.excel import ExcelReader, load_workbook
from openpyxl.styles import Alignment, Font
Expand Down Expand Up @@ -35,12 +36,17 @@ def detect(cls, stream):
return False

@classmethod
def export_set(cls, dataset, freeze_panes=True, invalid_char_subst="-"):
def export_set(
cls, dataset, freeze_panes=True, invalid_char_subst="-",
column_width: Optional[Union[str, int]] = "adaptive"
):
"""Returns XLSX representation of Dataset.

If dataset.title contains characters which are considered invalid for an XLSX file
sheet name (http://www.excelcodex.com/2012/06/worksheets-naming-conventions/), they will
be replaced with `invalid_char_subst`.

column_width: can have int, None, or "adaptive" as a value
Birdi7 marked this conversation as resolved.
Show resolved Hide resolved
"""
wb = Workbook()
ws = wb.worksheets[0]
Expand All @@ -51,6 +57,11 @@ def export_set(cls, dataset, freeze_panes=True, invalid_char_subst="-"):
)

cls.dset_sheet(dataset, ws, freeze_panes=freeze_panes)
if isinstance(column_width, str) and column_width != "adaptive":
raise ValueError(f"Unsupported value `{column_width}` passed to `column_width` "
f"parameter. It supports 'adaptive' or integer values")
Birdi7 marked this conversation as resolved.
Show resolved Hide resolved

cls._adapt_column_width(ws, column_width)

stream = BytesIO()
wb.save(stream)
Expand Down Expand Up @@ -166,3 +177,25 @@ def dset_sheet(cls, dataset, ws, freeze_panes=True):
cell.value = col
except (ValueError, TypeError):
cell.value = str(col)

@classmethod
def _adapt_column_width(cls, worksheet,
width: Optional[Union[str, int]]) -> None:
if width is None:
return

column_widths = []
if isinstance(width, str) and width == "adaptive":
for row in worksheet.values:
for i, cell in enumerate(row):
cell = str(cell)
if len(column_widths) > i:
if len(cell) > column_widths[i]:
column_widths[i] = len(cell)
else:
column_widths += [len(cell)]
else:
column_widths = [width] * len(worksheet.values)

for i, column_width in enumerate(column_widths, 1): # start at 1
worksheet.column_dimensions[get_column_letter(i)].width = column_width
20 changes: 20 additions & 0 deletions tests/test_tablib.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@
from collections import OrderedDict
from io import BytesIO, StringIO
from pathlib import Path
from tempfile import TemporaryFile
from uuid import uuid4

from MarkupPy import markup
from openpyxl import load_workbook

import tablib
from tablib.core import Row, detect_format
Expand Down Expand Up @@ -1100,7 +1102,25 @@ def test_xlsx_bad_dimensions(self):
data = tablib.Dataset().load(fh, read_only=False)
self.assertEqual(data.height, 3)

def test_xlsx_column_width(self):
"""check that column width adapts to value length"""
def _get_width(data):
xlsx_content = data.export('xlsx')
wb = load_workbook(filename=BytesIO(xlsx_content))
ws = wb.active
return ws.column_dimensions['A'].width

xls_source = Path(__file__).parent / 'files' / 'xlsx_cell_values.xlsx'
with xls_source.open('rb') as fh:
data = tablib.Dataset().load(fh)
width_before = _get_width(data)
data.append([
'verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue',
])
width_after = _get_width(data)

self.assertNotEqual(width_before, width_after)

class JSONTests(BaseTestCase):
def test_json_format_detect(self):
"""Test JSON format detection."""
Expand Down