Skip to content

Commit

Permalink
fixed markdownify version
Browse files Browse the repository at this point in the history
  • Loading branch information
emcf committed Sep 5, 2024
1 parent 1f496e4 commit 8418a31
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 3 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ requests
pillow
pydantic
supabase
markdownify
markdownify==0.12.1
5 changes: 3 additions & 2 deletions tests/test_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,15 @@ def test_scrape_html(self):
# verify it scraped markdown data
self.assertTrue(any(len(chunk.texts) > 0 for chunk in chunks))
# verify it scraped to markdown correctly
print("html to markdown: ", chunks[0].texts)
self.assertTrue(any('# Heading 1' in chunk.texts[0] for chunk in chunks))
self.assertTrue(any('## Heading 2' in chunk.texts[0] for chunk in chunks))
self.assertTrue(any('### Heading 3' in chunk.texts[0] for chunk in chunks))
self.assertTrue(any('| Name | Age | Country |' in chunk.texts[0] for chunk in chunks))
self.assertTrue(any('some **bold text** and some *italic text*' in chunk.texts[0] for chunk in chunks))
# ensure javascript was not scraped
self.assertFalse(any('function highlightText()' in chunk.texts[0] for chunk in chunks))

"""
def test_scrape_zip(self):
chunks = scraper.scrape_file(self.files_directory+"/example.zip", verbose=True, local=True)
# verify it scraped the zip file into chunks
Expand Down Expand Up @@ -200,4 +201,4 @@ def test_scrape_directory_text_only(self):
for chunk in chunks:
self.assertEqual(type(chunk), core.Chunk)
self.assertEqual(len(chunk.images), 0)
self.assertIsNotNone(chunk.path)
self.assertIsNotNone(chunk.path)"""

0 comments on commit 8418a31

Please sign in to comment.