Skip to content

Commit

Permalink
Filter tables
Browse files Browse the repository at this point in the history
  • Loading branch information
VikParuchuri committed Oct 22, 2024
1 parent cddc918 commit 8598d63
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 7 deletions.
12 changes: 6 additions & 6 deletions marker/tables/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,19 @@ def get_table_boxes(pages: List[Page], doc: PdfDocument, fname):
pnum = page.pnum
# The bbox for the entire table
bbox = [b.bbox for b in page.layout.bboxes if b.label == "Table"]

if len(bbox) == 0:
table_counts.append(0)
img_sizes.append(None)
continue

highres_img = render_image(doc[pnum], dpi=settings.SURYA_TABLE_DPI)

page_table_imgs = []
page_bboxes = []

# Merge tables that are next to each other
bbox = merge_tables(bbox)
bbox = list(filter(lambda b: b[3] - b[1] > 10 and b[2] - b[0] > 10, bbox))

if len(bbox) == 0:
table_counts.append(0)
img_sizes.append(None)
continue

# Number of tables per page
table_counts.append(len(bbox))
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "marker-pdf"
version = "0.3.3"
version = "0.3.4"
description = "Convert PDF to markdown with high speed and accuracy."
authors = ["Vik Paruchuri <[email protected]>"]
readme = "README.md"
Expand Down

0 comments on commit 8598d63

Please sign in to comment.