Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HTML input: formats.html.import_set #245

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions tablib/formats/_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
else:
from cStringIO import StringIO
from tablib.packages import markup
import bs4

import tablib
from tablib.compat import unicode
Expand Down Expand Up @@ -68,3 +69,25 @@ def export_book(databook):
wrapper.write('\n')

return stream.getvalue().decode('utf-8')


def import_set(dset, in_stream, **kwargs):
dset.wipe()
text = in_stream.read()
tables = bs4.BeautifulSoup(markup=text).find_all('table')
if len(tables) != 1:
raise ValueError('Expected 1 table, found %s' % len(tables))
table = tables[0]

if table.thead.tr:
dset.headers = [
x.string for x in table.thead.tr.find_all('th', recursive=False)]

# this finds rows inside <thead>, <tfoot>, <tbody> also.
for i, row in enumerate(table.find_all('tr')):
# skip first row if it was used for the headers
if i == 0 and dset.headers:
continue
dset.append(
[cell.get_text() for cell in row.find_all('td', recursive=False)])

53 changes: 53 additions & 0 deletions test_tablib.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
import unittest
import sys
import os

from StringIO import StringIO

import tablib
from tablib.compat import markup, unicode, is_py3
from tablib.core import Row
Expand Down Expand Up @@ -296,6 +299,56 @@ def test_html_export_none_value(self):

self.assertEqual(html, d.html)

def test_html_import(self):
html = markup.page()
html.table.open()
html.thead.open()

html.tr(markup.oneliner.th(self.founders.headers))
html.thead.close()

for founder in self.founders:
html.tr(markup.oneliner.td(founder))

html.table.close()
html = StringIO(str(html))

data.html = html

self.assertEqual(['first_name', 'last_name', 'gpa'], data.headers)
self.assertEqual([
('John', 'Adams', '90'),
('George', 'Washington', '67'),
('Thomas', 'Jefferson', '50'),
], data[:])

def test_html_import_no_headers(self):
html = markup.page()
html.table.open()
html.thead.open()

for founder in self.founders:
html.tr(markup.oneliner.td(founder))

html.table.close()
html = StringIO(str(html))

data.html = html

self.assertEqual(None, data.headers)
self.assertEqual([
('John', 'Adams', '90'),
('George', 'Washington', '67'),
('Thomas', 'Jefferson', '50'),
], data[:])

def test_html_import_no_table(self):
html = StringIO(str(markup.page()))

with self.assertRaises(ValueError) as e:
data.html = html
self.assertEqual('Expected 1 table, found 0', e.exception.message)

def test_latex_export(self):
"""LaTeX export"""

Expand Down