Skip to content

Commit

Permalink
add layout merging changes
Browse files Browse the repository at this point in the history
  • Loading branch information
iammosespaulr committed Nov 14, 2024
1 parent 65b8c56 commit 63cdac1
Show file tree
Hide file tree
Showing 6 changed files with 105 additions and 32 deletions.
8 changes: 4 additions & 4 deletions marker/v2/builders/document.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
from marker.settings import settings
from marker.v2.builders import BaseBuilder
from marker.v2.builders.layout import LayoutBuilder
from marker.v2.providers import BaseProvider
from marker.v2.providers.pdf import PdfProvider
from marker.v2.schema.document import Document
from marker.v2.schema.groups.page import PageGroup
from marker.v2.schema.polygon import PolygonBox


class DocumentBuilder(BaseBuilder):
def __call__(self, provider: BaseProvider, layout_builder: LayoutBuilder):
def __call__(self, provider: PdfProvider, layout_builder: LayoutBuilder):
document = self.build_document(provider)
layout_builder(document)
layout_builder(document, provider)
return document

def build_document(self, provider: BaseProvider):
def build_document(self, provider: PdfProvider):
if provider.config.page_range is None:
page_range = range(len(provider))
else:
Expand Down
55 changes: 53 additions & 2 deletions marker/v2/builders/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@

from marker.settings import settings
from marker.v2.builders import BaseBuilder
from marker.v2.schema.blocks import LAYOUT_BLOCK_REGISTRY
from marker.v2.providers.pdf import PdfProvider
from marker.v2.schema.blocks import LAYOUT_BLOCK_REGISTRY, Block, Text
from marker.v2.schema.document import Document
from marker.v2.schema.groups.page import PageGroup
from marker.v2.schema.polygon import PolygonBox
from marker.v2.schema.text.line import Line


class LayoutBuilder(BaseBuilder):
Expand All @@ -17,9 +19,10 @@ def __init__(self, layout_model, config=None):

super().__init__(config)

def __call__(self, document: Document):
def __call__(self, document: Document, provider: PdfProvider):
layout_results = self.surya_layout(document.pages)
self.add_blocks_to_pages(document.pages, layout_results)
self.merge_blocks(document.pages, provider)

@classmethod
def get_batch_size(cls):
Expand All @@ -44,3 +47,51 @@ def add_blocks_to_pages(self, pages: List[PageGroup], layout_results: List[Layou
for bbox in sorted(layout_result.bboxes, key=lambda x: x.position):
block_cls = LAYOUT_BLOCK_REGISTRY[bbox.label]
page.add_block(block_cls, PolygonBox(polygon=bbox.polygon))

def merge_blocks(self, document_pages: List[PageGroup], provider: PdfProvider):
provider_page_lines = provider.page_lines
for idx, (document_page, provider_lines) in enumerate(zip(document_pages, provider_page_lines.values())):
all_line_idxs = set(range(len(provider_lines)))
page_size = provider.doc[idx].get_size()
max_intersections = {}
for line_idx, line in enumerate(provider_lines):
for block_idx, block in enumerate(document_page.children):
line.polygon.rescale(page_size, document_page.polygon.size)
intersection_pct = line.polygon.intersection_pct(block.polygon)
if line_idx not in max_intersections:
max_intersections[line_idx] = (intersection_pct, block_idx)
elif intersection_pct > max_intersections[line_idx][0]:
max_intersections[line_idx] = (intersection_pct, block_idx)

assigned_line_idxs = set()
for line_idx, line in enumerate(provider_lines):
if line_idx in max_intersections and max_intersections[line_idx][0] > 0.0:
document_page.add_full_block(line)
block_idx = max_intersections[line_idx][1]
block: Block = document_page.children[block_idx]
block.add_structure(line)
assigned_line_idxs.add(line_idx)

for line_idx in all_line_idxs.difference(assigned_line_idxs):
min_dist = None
min_dist_idx = None
line: Line = provider_lines[line_idx]
for block_idx, block in enumerate(document_page.children):
if block_idx == line_idx or block.block_type is None:
continue
dist = line.polygon.center_distance(block.polygon)
if min_dist_idx is None or dist < min_dist:
min_dist = dist
min_dist_idx = block_idx

if min_dist_idx is not None:
document_page.add_full_block(line)
nearest_block = document_page.children[min_dist_idx]
nearest_block.add_structure(line)
assigned_line_idxs.add(line_idx)

for line_idx in all_line_idxs.difference(assigned_line_idxs):
line: Line = provider_lines[line_idx]
document_page.add_full_block(line)
text_block = document_page.add_block(Text, polygon=line.polygon)
text_block.add_structure(line)
2 changes: 2 additions & 0 deletions marker/v2/providers/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ def __init__(self, filepath: str, config: PdfProviderConfig):
self.config = config
self.page_lines: Dict[int, List[Line]] = {}

self.doc: pdfium.PdfDocument

self.setup()

def __len__(self) -> int:
Expand Down
12 changes: 9 additions & 3 deletions marker/v2/schema/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,14 @@ class Block(BaseModel):

@property
def _id(self):
page_path = f"/page/{self.pnum}"
if self.block_num is not None:
return f"{page_path}/block/{self.block_num}"
page_path = f"/page/{self.page_id}"
if self.block_id is not None:
return f"{page_path}/block/{self.block_id}"
else:
return page_path

def add_structure(self, block: Block):
if self.structure is None:
self.structure = [block._id]
else:
self.structure.append(block._id)
36 changes: 23 additions & 13 deletions marker/v2/schema/groups/page.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from typing import List

from marker.v2.schema import Block
from PIL import Image

from marker.v2.schema.blocks import LAYOUT_BLOCK_REGISTRY
from marker.v2.schema import Block
from marker.v2.schema.polygon import PolygonBox


Expand All @@ -13,22 +12,33 @@ class PageGroup(Block):
highres_image: Image.Image | None = None
children: List[Block] | None = None

def add_block(self, block_cls: Block, polygon: PolygonBox) -> Block:
max_id = max([b.block_id for b in self.children or []], default=0)
def incr_block_id(self):
if self.block_id is None:
self.block_id = 0
else:
self.block_id += 1

def add_child(self, block: Block):
if self.children is None:
self.children = [block]
else:
self.children.append(block)

def add_block(self, block_cls: type[Block], polygon: PolygonBox) -> Block:
self.incr_block_id()
block = block_cls(
polygon=polygon,
block_id=max_id + 1,
block_id=self.block_id,
page_id=self.page_id,
)
if isinstance(self.children, list):
self.children.append(block)
else:
self.children = [block]
self.add_child(block)
return block

def add_full_block(self, block: Block) -> Block:
self.incr_block_id()
block.block_id = self.block_id
self.add_child(block)
return block

def get_block(self, block_id: str) -> Block | None:
for block in self.children:
if block._id == block_id:
return block
def get_block(self, block_id: int) -> Block | None:
return self.children[block_id]
24 changes: 14 additions & 10 deletions marker/v2/schema/polygon.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,12 @@ def area(self):
return self.width * self.height

@property
def center(bbox):
return [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2]
def center(self):
return [(self.bbox[0] + self.bbox[2]) / 2, (self.bbox[1] + self.bbox[3]) / 2]

@property
def size(self):
return [self.width, self.height]

@computed_field
@property
Expand Down Expand Up @@ -72,8 +76,8 @@ def rescale(self, processor_size, image_size):

new_corners = copy.deepcopy(self.polygon)
for corner in new_corners:
corner[0] = int(corner[0] * width_scaler)
corner[1] = int(corner[1] * height_scaler)
corner[0] = corner[0] * width_scaler
corner[1] = corner[1] * height_scaler
self.polygon = new_corners

def fit_to_bounds(self, bounds):
Expand All @@ -83,23 +87,23 @@ def fit_to_bounds(self, bounds):
corner[1] = max(min(corner[1], bounds[3]), bounds[1])
self.polygon = new_corners

def merge(self, other):
def merge(self, other: PolygonBox):
x1 = min(self.bbox[0], other.bbox[0])
y1 = min(self.bbox[1], other.bbox[1])
x2 = max(self.bbox[2], other.bbox[2])
y2 = max(self.bbox[3], other.bbox[3])
self.polygon = [[x1, y1], [x2, y1], [x2, y2], [x1, y2]]

def overlap_x(self, other):
def overlap_x(self, other: PolygonBox):
return max(0, min(self.bbox[2], other.bbox[2]) - max(self.bbox[0], other.bbox[0]))

def overlap_y(self, other):
def overlap_y(self, other: PolygonBox):
return max(0, min(self.bbox[3], other.bbox[3]) - max(self.bbox[1], other.bbox[1]))

def intersection_area(self, other):
def intersection_area(self, other: PolygonBox):
return self.overlap_x(other) * self.overlap_y(other)

def intersection_pct(self, other, x_margin=0, y_margin=0):
def intersection_pct(self, other: PolygonBox, x_margin=0, y_margin=0):
assert 0 <= x_margin <= 1
assert 0 <= y_margin <= 1
if self.area == 0:
Expand All @@ -110,7 +114,7 @@ def intersection_pct(self, other, x_margin=0, y_margin=0):
if y_margin:
y_margin = int(min(self.height, other.height) * y_margin)

intersection = self.intersection_area(other, x_margin, y_margin)
intersection = self.intersection_area(other)
return intersection / self.area

@classmethod
Expand Down

0 comments on commit 63cdac1

Please sign in to comment.