diff --git a/tablib/formats/_html.py b/tablib/formats/_html.py index 655e74c5..0ce0b5c3 100644 --- a/tablib/formats/_html.py +++ b/tablib/formats/_html.py @@ -11,6 +11,7 @@ else: from cStringIO import StringIO from tablib.packages import markup +import bs4 import tablib from tablib.compat import unicode @@ -68,3 +69,25 @@ def export_book(databook): wrapper.write('\n') return stream.getvalue().decode('utf-8') + + +def import_set(dset, in_stream, **kwargs): + dset.wipe() + text = in_stream.read() + tables = bs4.BeautifulSoup(markup=text).find_all('table') + if len(tables) != 1: + raise ValueError('Expected 1 table, found %s' % len(tables)) + table = tables[0] + + if table.thead.tr: + dset.headers = [ + x.string for x in table.thead.tr.find_all('th', recursive=False)] + + # this finds rows inside , , also. + for i, row in enumerate(table.find_all('tr')): + # skip first row if it was used for the headers + if i == 0 and dset.headers: + continue + dset.append( + [cell.get_text() for cell in row.find_all('td', recursive=False)]) + diff --git a/test_tablib.py b/test_tablib.py index be41ee71..cfaa791c 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -6,6 +6,9 @@ import unittest import sys import os + +from StringIO import StringIO + import tablib from tablib.compat import markup, unicode, is_py3 from tablib.core import Row @@ -296,6 +299,56 @@ def test_html_export_none_value(self): self.assertEqual(html, d.html) + def test_html_import(self): + html = markup.page() + html.table.open() + html.thead.open() + + html.tr(markup.oneliner.th(self.founders.headers)) + html.thead.close() + + for founder in self.founders: + html.tr(markup.oneliner.td(founder)) + + html.table.close() + html = StringIO(str(html)) + + data.html = html + + self.assertEqual(['first_name', 'last_name', 'gpa'], data.headers) + self.assertEqual([ + ('John', 'Adams', '90'), + ('George', 'Washington', '67'), + ('Thomas', 'Jefferson', '50'), + ], data[:]) + + def test_html_import_no_headers(self): + html = markup.page() + html.table.open() + html.thead.open() + + for founder in self.founders: + html.tr(markup.oneliner.td(founder)) + + html.table.close() + html = StringIO(str(html)) + + data.html = html + + self.assertEqual(None, data.headers) + self.assertEqual([ + ('John', 'Adams', '90'), + ('George', 'Washington', '67'), + ('Thomas', 'Jefferson', '50'), + ], data[:]) + + def test_html_import_no_table(self): + html = StringIO(str(markup.page())) + + with self.assertRaises(ValueError) as e: + data.html = html + self.assertEqual('Expected 1 table, found 0', e.exception.message) + def test_latex_export(self): """LaTeX export"""