From fcee87132acce5f81e253b4caf1337e413392476 Mon Sep 17 00:00:00 2001
From: klothe <klothe@users.noreply.github.com>
Date: Tue, 5 Jul 2016 13:53:24 -0400
Subject: [PATCH 1/3] add html.import_set and tests

---
 tablib/formats/_html.py | 25 +++++++++++++++++++
 test_tablib.py          | 53 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+)
diff --git a/tablib/formats/_html.py b/tablib/formats/_html.py
index 655e74c5..f72e6fe7 100644
--- a/tablib/formats/_html.py
+++ b/tablib/formats/_html.py
@@ -11,6 +11,7 @@
 else:
     from cStringIO import StringIO
     from tablib.packages import markup
+import bs4
 
 import tablib
 from tablib.compat import unicode
@@ -68,3 +69,27 @@ def export_book(databook):
         wrapper.write('\n')
 
     return stream.getvalue().decode('utf-8')
+
+
+def import_set(dset, in_stream, headers=True, **kwargs):
+    dset.wipe()
+    text = in_stream.read()
+    tables = bs4.BeautifulSoup(markup=text).find_all('table')
+    if len(tables) != 1:
+        raise ValueError('Expected 1 table, found %s' % len(tables))
+    table = tables[0]
+
+    if table.thead.tr:
+        dset.headers = [
+            x.string for x in table.thead.tr.find_all('th', recursive=False)]
+
+    # this finds rows inside <thead>, <tfoot>, <tbody> also.
+    for i, row in enumerate(table.find_all('tr')):
+        # skip first row if it was used for the headers
+        if i == 0 and dset.headers:
+            continue
+        dset.append(
+            [cell.get_text() for cell in row.find_all('td', recursive=False)])
+
+    print dset
+
diff --git a/test_tablib.py b/test_tablib.py
index be41ee71..cfaa791c 100755
--- a/test_tablib.py
+++ b/test_tablib.py
@@ -6,6 +6,9 @@
 import unittest
 import sys
 import os
+
+from StringIO import StringIO
+
 import tablib
 from tablib.compat import markup, unicode, is_py3
 from tablib.core import Row
@@ -296,6 +299,56 @@ def test_html_export_none_value(self):
 
         self.assertEqual(html, d.html)
 
+    def test_html_import(self):
+        html = markup.page()
+        html.table.open()
+        html.thead.open()
+
+        html.tr(markup.oneliner.th(self.founders.headers))
+        html.thead.close()
+
+        for founder in self.founders:
+            html.tr(markup.oneliner.td(founder))
+
+        html.table.close()
+        html = StringIO(str(html))
+
+        data.html = html
+
+        self.assertEqual(['first_name', 'last_name', 'gpa'], data.headers)
+        self.assertEqual([
+            ('John', 'Adams', '90'),
+            ('George', 'Washington', '67'),
+            ('Thomas', 'Jefferson', '50'),
+        ], data[:])
+
+    def test_html_import_no_headers(self):
+        html = markup.page()
+        html.table.open()
+        html.thead.open()
+
+        for founder in self.founders:
+            html.tr(markup.oneliner.td(founder))
+
+        html.table.close()
+        html = StringIO(str(html))
+
+        data.html = html
+
+        self.assertEqual(None, data.headers)
+        self.assertEqual([
+            ('John', 'Adams', '90'),
+            ('George', 'Washington', '67'),
+            ('Thomas', 'Jefferson', '50'),
+        ], data[:])
+
+    def test_html_import_no_table(self):
+        html = StringIO(str(markup.page()))
+
+        with self.assertRaises(ValueError) as e:
+            data.html = html
+        self.assertEqual('Expected 1 table, found 0', e.exception.message)
+
     def test_latex_export(self):
         """LaTeX export"""
 

From 59898105e6b48972a38cff6f9efde3e5d764c6f8 Mon Sep 17 00:00:00 2001
From: klothe <klothe@users.noreply.github.com>
Date: Tue, 5 Jul 2016 14:09:55 -0400
Subject: [PATCH 2/3] remove unused argument to html.import_set

---
 tablib/formats/_html.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tablib/formats/_html.py b/tablib/formats/_html.py
index f72e6fe7..8389b944 100644
--- a/tablib/formats/_html.py
+++ b/tablib/formats/_html.py
@@ -71,7 +71,7 @@ def export_book(databook):
     return stream.getvalue().decode('utf-8')
 
 
-def import_set(dset, in_stream, headers=True, **kwargs):
+def import_set(dset, in_stream, **kwargs):
     dset.wipe()
     text = in_stream.read()
     tables = bs4.BeautifulSoup(markup=text).find_all('table')

From 2874a2466bcc862e220dc43747db6a9a72f2dbf6 Mon Sep 17 00:00:00 2001
From: klothe <klothe@users.noreply.github.com>
Date: Tue, 5 Jul 2016 14:15:27 -0400
Subject: [PATCH 3/3] remove print

---
 tablib/formats/_html.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tablib/formats/_html.py b/tablib/formats/_html.py
index 8389b944..0ce0b5c3 100644
--- a/tablib/formats/_html.py
+++ b/tablib/formats/_html.py
@@ -91,5 +91,3 @@ def import_set(dset, in_stream, **kwargs):
         dset.append(
             [cell.get_text() for cell in row.find_all('td', recursive=False)])
 
-    print dset
-