From ddfee5defb015cc68c14958358203abb96b03bf1 Mon Sep 17 00:00:00 2001 From: Egor Date: Thu, 17 Feb 2022 13:18:54 +0300 Subject: [PATCH 01/12] feat: support column_width in xlxs format --- AUTHORS | 1 + docs/formats.rst | 9 +++++++++ src/tablib/formats/_xlsx.py | 36 +++++++++++++++++++++++++++++++++++- 3 files changed, 45 insertions(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index a3d356dc..6d5cc8a6 100644 --- a/AUTHORS +++ b/AUTHORS @@ -31,3 +31,4 @@ Here is a list of passed and present much-appreciated contributors: Tommy Anthony Tsuyoshi Hombashi Tushar Makkar + Egor Osokin diff --git a/docs/formats.rst b/docs/formats.rst index c3620da6..8d462e2c 100644 --- a/docs/formats.rst +++ b/docs/formats.rst @@ -234,6 +234,15 @@ The ``import_set()`` method also supports a ``skip_lines`` parameter that you can set to a number of lines that should be skipped before starting to read data. +The ``export_set()`` method supports a ``column_width`` parameter. Depending on the +value you pass, the column width will be set accordingly. It can be either None, int, or "adapt". +If "adapt" is passed, the column width will be unique for every column and will be +calculated based on values' length + + +.. versionchanged:: 3.2.0 + The ``column_width`` parameter for ``export_set()`` was added. + .. versionchanged:: 3.1.0 The ``skip_lines`` parameter for ``import_set()`` was added. diff --git a/src/tablib/formats/_xlsx.py b/src/tablib/formats/_xlsx.py index b2628443..aa0fcc3f 100644 --- a/src/tablib/formats/_xlsx.py +++ b/src/tablib/formats/_xlsx.py @@ -3,12 +3,14 @@ import re from io import BytesIO +from typing import Optional, Union from openpyxl.reader.excel import ExcelReader, load_workbook from openpyxl.styles import Alignment, Font from openpyxl.utils import get_column_letter from openpyxl.workbook import Workbook from openpyxl.writer.excel import ExcelWriter +from openpyxl.utils import get_column_letter import tablib @@ -35,12 +37,17 @@ def detect(cls, stream): return False @classmethod - def export_set(cls, dataset, freeze_panes=True, invalid_char_subst="-"): + def export_set( + cls, dataset, freeze_panes=True, invalid_char_subst="-", + column_width: Optional[Union[str, int]] = "adaptive" + ): """Returns XLSX representation of Dataset. If dataset.title contains characters which are considered invalid for an XLSX file sheet name (http://www.excelcodex.com/2012/06/worksheets-naming-conventions/), they will be replaced with `invalid_char_subst`. + + column_width: can have int, None, or "adaptive" as a value """ wb = Workbook() ws = wb.worksheets[0] @@ -51,6 +58,11 @@ def export_set(cls, dataset, freeze_panes=True, invalid_char_subst="-"): ) cls.dset_sheet(dataset, ws, freeze_panes=freeze_panes) + if isinstance(column_width, str) and column_width != "adaptive": + raise ValueError(f"Unsupported value `{column_width}` passed to `column_width` " + f"parameter. It supports 'adaptive' or integer values") + + cls._adapt_column_width(ws, column_width) stream = BytesIO() wb.save(stream) @@ -166,3 +178,25 @@ def dset_sheet(cls, dataset, ws, freeze_panes=True): cell.value = col except (ValueError, TypeError): cell.value = str(col) + + @classmethod + def _adapt_column_width(cls, worksheet, + width: Optional[Union[str, int]]) -> None: + if width is None: + return + + column_widths = [] + if isinstance(width, str) and width == "adaptive": + for row in worksheet.values: + for i, cell in enumerate(row): + cell = str(cell) + if len(column_widths) > i: + if len(cell) > column_widths[i]: + column_widths[i] = len(cell) + else: + column_widths += [len(cell)] + else: + column_widths = [width] * len(worksheet.values) + + for i, column_width in enumerate(column_widths, 1): # start at 1 + worksheet.column_dimensions[get_column_letter(i)].width = column_width From b7d313e703d1c99e4522c0a10b632156e4b6e8c2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 17 Feb 2022 10:20:38 +0000 Subject: [PATCH 02/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/tablib/formats/_xlsx.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/tablib/formats/_xlsx.py b/src/tablib/formats/_xlsx.py index aa0fcc3f..92bb2994 100644 --- a/src/tablib/formats/_xlsx.py +++ b/src/tablib/formats/_xlsx.py @@ -10,7 +10,6 @@ from openpyxl.utils import get_column_letter from openpyxl.workbook import Workbook from openpyxl.writer.excel import ExcelWriter -from openpyxl.utils import get_column_letter import tablib From ead09ee4779eac001c554fad706743f4651a939c Mon Sep 17 00:00:00 2001 From: Egor Date: Fri, 1 Apr 2022 13:12:25 +0300 Subject: [PATCH 03/12] Update docs/formats.rst Co-authored-by: Hugo van Kemenade --- docs/formats.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/formats.rst b/docs/formats.rst index 8d462e2c..da98b5db 100644 --- a/docs/formats.rst +++ b/docs/formats.rst @@ -240,7 +240,7 @@ If "adapt" is passed, the column width will be unique for every column and will calculated based on values' length -.. versionchanged:: 3.2.0 +.. versionchanged:: 3.3.0 The ``column_width`` parameter for ``export_set()`` was added. .. versionchanged:: 3.1.0 From 3bcf8aa90f7c0ee9d721abb963f435efce8fb8c8 Mon Sep 17 00:00:00 2001 From: Egor Date: Fri, 1 Apr 2022 14:56:57 +0300 Subject: [PATCH 04/12] chore: add tests for adaptive column width in xlsx format --- docs/formats.rst | 11 ++++++++--- tests/test_tablib.py | 20 ++++++++++++++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/docs/formats.rst b/docs/formats.rst index da98b5db..adabc80b 100644 --- a/docs/formats.rst +++ b/docs/formats.rst @@ -235,9 +235,14 @@ can set to a number of lines that should be skipped before starting to read data. The ``export_set()`` method supports a ``column_width`` parameter. Depending on the -value you pass, the column width will be set accordingly. It can be either None, int, or "adapt". -If "adapt" is passed, the column width will be unique for every column and will be -calculated based on values' length +value you pass, the column width will be set accordingly. It can be either None, int, or "adaptive". +If "adaptive" is passed, the column width will be unique for every column and will be +calculated based on values' length. Example of usage + +```python3 +data = tablib.Dataset() +data.export('xlsx', column_width='adaptive') +``` .. versionchanged:: 3.3.0 diff --git a/tests/test_tablib.py b/tests/test_tablib.py index 0ee4e86d..63afebed 100755 --- a/tests/test_tablib.py +++ b/tests/test_tablib.py @@ -9,9 +9,11 @@ from collections import OrderedDict from io import BytesIO, StringIO from pathlib import Path +from tempfile import TemporaryFile from uuid import uuid4 from MarkupPy import markup +from openpyxl import load_workbook import tablib from tablib.core import Row, detect_format @@ -1092,7 +1094,25 @@ def test_xlsx_bad_dimensions(self): data = tablib.Dataset().load(fh, read_only=False) self.assertEqual(data.height, 3) + def test_xlsx_column_width(self): + """check that column width adapts to value length""" + def _get_width(data): + xlsx_content = data.export('xlsx') + wb = load_workbook(filename=BytesIO(xlsx_content)) + ws = wb.active + return ws.column_dimensions['A'].width + xls_source = Path(__file__).parent / 'files' / 'xlsx_cell_values.xlsx' + with xls_source.open('rb') as fh: + data = tablib.Dataset().load(fh) + width_before = _get_width(data) + data.append([ + 'verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue', + ]) + width_after = _get_width(data) + + assert width_before != width_after + class JSONTests(BaseTestCase): def test_json_format_detect(self): """Test JSON format detection.""" From 79f35934bcec50f56513dd9f6a2299e1e64331eb Mon Sep 17 00:00:00 2001 From: Egor Date: Fri, 1 Apr 2022 15:02:06 +0300 Subject: [PATCH 05/12] chore: change assert style --- tests/test_tablib.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_tablib.py b/tests/test_tablib.py index dd64c504..d5060312 100755 --- a/tests/test_tablib.py +++ b/tests/test_tablib.py @@ -1118,8 +1118,8 @@ def _get_width(data): 'verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue', ]) width_after = _get_width(data) - - assert width_before != width_after + + self.assertNotEqual(width_before, width_after) class JSONTests(BaseTestCase): def test_json_format_detect(self): From 7e594544ce94ccbb785eca288e07f2632dd1c59a Mon Sep 17 00:00:00 2001 From: Egor Date: Fri, 1 Apr 2022 15:44:31 +0300 Subject: [PATCH 06/12] Update src/tablib/formats/_xlsx.py Co-authored-by: Hugo van Kemenade --- src/tablib/formats/_xlsx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tablib/formats/_xlsx.py b/src/tablib/formats/_xlsx.py index 92bb2994..3dc3dedb 100644 --- a/src/tablib/formats/_xlsx.py +++ b/src/tablib/formats/_xlsx.py @@ -46,7 +46,7 @@ def export_set( sheet name (http://www.excelcodex.com/2012/06/worksheets-naming-conventions/), they will be replaced with `invalid_char_subst`. - column_width: can have int, None, or "adaptive" as a value + column_width: can be None, an integer, or "adaptive" """ wb = Workbook() ws = wb.worksheets[0] From 74eacdb23e94fa04056ccdc8fc5010c42992c1ae Mon Sep 17 00:00:00 2001 From: Egor Date: Fri, 1 Apr 2022 15:44:38 +0300 Subject: [PATCH 07/12] Update src/tablib/formats/_xlsx.py Co-authored-by: Hugo van Kemenade --- src/tablib/formats/_xlsx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tablib/formats/_xlsx.py b/src/tablib/formats/_xlsx.py index 3dc3dedb..26f53dd4 100644 --- a/src/tablib/formats/_xlsx.py +++ b/src/tablib/formats/_xlsx.py @@ -59,7 +59,7 @@ def export_set( cls.dset_sheet(dataset, ws, freeze_panes=freeze_panes) if isinstance(column_width, str) and column_width != "adaptive": raise ValueError(f"Unsupported value `{column_width}` passed to `column_width` " - f"parameter. It supports 'adaptive' or integer values") + "parameter. It supports 'adaptive' or integer values") cls._adapt_column_width(ws, column_width) From bed476cbb1a12ac71ee3b2f399c01f7292557561 Mon Sep 17 00:00:00 2001 From: Egor Date: Fri, 1 Apr 2022 15:24:26 +0300 Subject: [PATCH 08/12] chore: alphabetical order in AUTHORS --- AUTHORS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index c160ac7f..819eef27 100644 --- a/AUTHORS +++ b/AUTHORS @@ -9,6 +9,7 @@ Here is a list of passed and present much-appreciated contributors: Bruno Soares Claude Paroz Daniel Santos + Egor Osokin Erik Youngren Hugo van Kemenade Iuri de Silvio @@ -32,4 +33,3 @@ Here is a list of passed and present much-appreciated contributors: Tommy Anthony Tsuyoshi Hombashi Tushar Makkar - Egor Osokin From 45ae8ca6654259af58b7216925d63fc578258ae0 Mon Sep 17 00:00:00 2001 From: Egor Date: Fri, 1 Apr 2022 15:44:45 +0300 Subject: [PATCH 09/12] chore: docs --- docs/formats.rst | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/docs/formats.rst b/docs/formats.rst index adabc80b..ab904a0c 100644 --- a/docs/formats.rst +++ b/docs/formats.rst @@ -235,14 +235,13 @@ can set to a number of lines that should be skipped before starting to read data. The ``export_set()`` method supports a ``column_width`` parameter. Depending on the -value you pass, the column width will be set accordingly. It can be either None, int, or "adaptive". +value you pass, the column width will be set accordingly. It can be either ``None``, an integer, or "adaptive". If "adaptive" is passed, the column width will be unique for every column and will be -calculated based on values' length. Example of usage +calculated based on values' length. For example:: + + data = tablib.Dataset() + data.export('xlsx', column_width='adaptive') -```python3 -data = tablib.Dataset() -data.export('xlsx', column_width='adaptive') -``` .. versionchanged:: 3.3.0 From 5a458ac43138ae010fb499618f787cd8d4e5195f Mon Sep 17 00:00:00 2001 From: Egor Date: Fri, 1 Apr 2022 15:50:41 +0300 Subject: [PATCH 10/12] chore: add tests and fix bug --- src/tablib/formats/_xlsx.py | 2 +- tests/test_tablib.py | 47 +++++++++++++++++++++++++++++++++++-- 2 files changed, 46 insertions(+), 3 deletions(-) diff --git a/src/tablib/formats/_xlsx.py b/src/tablib/formats/_xlsx.py index 26f53dd4..1d0379d5 100644 --- a/src/tablib/formats/_xlsx.py +++ b/src/tablib/formats/_xlsx.py @@ -195,7 +195,7 @@ def _adapt_column_width(cls, worksheet, else: column_widths += [len(cell)] else: - column_widths = [width] * len(worksheet.values) + column_widths = [width] * worksheet.max_column for i, column_width in enumerate(column_widths, 1): # start at 1 worksheet.column_dimensions[get_column_letter(i)].width = column_width diff --git a/tests/test_tablib.py b/tests/test_tablib.py index d5060312..69148a79 100755 --- a/tests/test_tablib.py +++ b/tests/test_tablib.py @@ -1102,10 +1102,30 @@ def test_xlsx_bad_dimensions(self): data = tablib.Dataset().load(fh, read_only=False) self.assertEqual(data.height, 3) - def test_xlsx_column_width(self): + def test_xlsx_column_width_none(self): + """check that column width adapts to value length""" + + def _get_width(data): + xlsx_content = data.export('xlsx', column_width=None) + wb = load_workbook(filename=BytesIO(xlsx_content)) + ws = wb.active + return ws.column_dimensions['A'].width + + xls_source = Path(__file__).parent / 'files' / 'xlsx_cell_values.xlsx' + with xls_source.open('rb') as fh: + data = tablib.Dataset().load(fh) + width_before = _get_width(data) + data.append([ + 'verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue', + ]) + width_after = _get_width(data) + + self.assertEqual(width_before, width_after) + + def test_xlsx_column_width_adaptive(self): """check that column width adapts to value length""" def _get_width(data): - xlsx_content = data.export('xlsx') + xlsx_content = data.export('xlsx', column_width='adaptive') wb = load_workbook(filename=BytesIO(xlsx_content)) ws = wb.active return ws.column_dimensions['A'].width @@ -1120,7 +1140,30 @@ def _get_width(data): width_after = _get_width(data) self.assertNotEqual(width_before, width_after) + + def test_xlsx_column_width_integer(self): + """check that column width adapts to value length""" + _some_integer = 10 + def _get_width(data): + xlsx_content = data.export('xlsx', column_width=_some_integer) + wb = load_workbook(filename=BytesIO(xlsx_content)) + ws = wb.active + return ws.column_dimensions['A'].width + + xls_source = Path(__file__).parent / 'files' / 'xlsx_cell_values.xlsx' + with xls_source.open('rb') as fh: + data = tablib.Dataset().load(fh) + width_before = _get_width(data) + data.append([ + 'verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue', + ]) + width_after = _get_width(data) + + self.assertEqual(_some_integer, width_before) + self.assertEqual(_some_integer, width_after) + + class JSONTests(BaseTestCase): def test_json_format_detect(self): """Test JSON format detection.""" From 2cec185b03f3c58fc174468e896632aa1e4b3497 Mon Sep 17 00:00:00 2001 From: Egor Date: Wed, 8 Jun 2022 14:25:59 +0300 Subject: [PATCH 11/12] refactor: helper function for tests and new test case for error Co-authored-by: Hugo van Kemenade --- tests/test_tablib.py | 70 ++++++++++++++++---------------------------- 1 file changed, 25 insertions(+), 45 deletions(-) diff --git a/tests/test_tablib.py b/tests/test_tablib.py index 69148a79..d32cb33c 100755 --- a/tests/test_tablib.py +++ b/tests/test_tablib.py @@ -1102,11 +1102,10 @@ def test_xlsx_bad_dimensions(self): data = tablib.Dataset().load(fh, read_only=False) self.assertEqual(data.height, 3) - def test_xlsx_column_width_none(self): + def _helper_export_column_width(self, input_arg): """check that column width adapts to value length""" - - def _get_width(data): - xlsx_content = data.export('xlsx', column_width=None) + def _get_width(data, input_arg): + xlsx_content = data.export('xlsx', column_width=input_arg) wb = load_workbook(filename=BytesIO(xlsx_content)) ws = wb.active return ws.column_dimensions['A'].width @@ -1114,54 +1113,35 @@ def _get_width(data): xls_source = Path(__file__).parent / 'files' / 'xlsx_cell_values.xlsx' with xls_source.open('rb') as fh: data = tablib.Dataset().load(fh) - width_before = _get_width(data) + width_before = _get_width(data, input_arg) data.append([ 'verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue', ]) - width_after = _get_width(data) + width_after = _get_width(data, width_before) + return width_before, width_after - self.assertEqual(width_before, width_after) - - def test_xlsx_column_width_adaptive(self): - """check that column width adapts to value length""" - def _get_width(data): - xlsx_content = data.export('xlsx', column_width='adaptive') - wb = load_workbook(filename=BytesIO(xlsx_content)) - ws = wb.active - return ws.column_dimensions['A'].width + def test_xlsx_column_width_none(self): + """check column width with None""" + width_before, width_after = self._helper_export_column_width(None) + self.assertEqual(width_before, 13) + self.assertEqual(width_after, 13) - xls_source = Path(__file__).parent / 'files' / 'xlsx_cell_values.xlsx' - with xls_source.open('rb') as fh: - data = tablib.Dataset().load(fh) - width_before = _get_width(data) - data.append([ - 'verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue', - ]) - width_after = _get_width(data) - - self.assertNotEqual(width_before, width_after) + def test_xlsx_column_width_adaptive(self): + """check column width with 'adaptive'""" + width_before, width_after = self._helper_export_column_width("adaptive") + self.assertEqual(width_before, 11) + self.assertEqual(width_after, 11) def test_xlsx_column_width_integer(self): - """check that column width adapts to value length""" - _some_integer = 10 - - def _get_width(data): - xlsx_content = data.export('xlsx', column_width=_some_integer) - wb = load_workbook(filename=BytesIO(xlsx_content)) - ws = wb.active - return ws.column_dimensions['A'].width - - xls_source = Path(__file__).parent / 'files' / 'xlsx_cell_values.xlsx' - with xls_source.open('rb') as fh: - data = tablib.Dataset().load(fh) - width_before = _get_width(data) - data.append([ - 'verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue-verylongvalue', - ]) - width_after = _get_width(data) - - self.assertEqual(_some_integer, width_before) - self.assertEqual(_some_integer, width_after) + """check column width with an integer""" + width_before, width_after = self._helper_export_column_width(10) + self.assertEqual(width_before, 10) + self.assertEqual(width_after, 10) + + def test_xlsx_column_width_value_error(self): + """check column width with invalid input""" + with self.assertRaises(ValueError): + self._helper_export_column_width("invalid input") class JSONTests(BaseTestCase): From 0f7e2efdafd89a9fe6bfadabca488c6c0737c841 Mon Sep 17 00:00:00 2001 From: Egor Date: Wed, 21 Dec 2022 13:34:48 +0300 Subject: [PATCH 12/12] chore: use from __future__ import annotations --- src/tablib/formats/_xlsx.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/tablib/formats/_xlsx.py b/src/tablib/formats/_xlsx.py index 1d0379d5..bd200da9 100644 --- a/src/tablib/formats/_xlsx.py +++ b/src/tablib/formats/_xlsx.py @@ -1,9 +1,9 @@ """ Tablib - XLSX Support. """ +from __future__ import annotations import re from io import BytesIO -from typing import Optional, Union from openpyxl.reader.excel import ExcelReader, load_workbook from openpyxl.styles import Alignment, Font @@ -38,7 +38,7 @@ def detect(cls, stream): @classmethod def export_set( cls, dataset, freeze_panes=True, invalid_char_subst="-", - column_width: Optional[Union[str, int]] = "adaptive" + column_width: str | int | None = "adaptive" ): """Returns XLSX representation of Dataset. @@ -180,7 +180,7 @@ def dset_sheet(cls, dataset, ws, freeze_panes=True): @classmethod def _adapt_column_width(cls, worksheet, - width: Optional[Union[str, int]]) -> None: + width: str | int | None) -> None: if width is None: return