From 5a000bc8bda0293de74b00b33b3610f94cc68b94 Mon Sep 17 00:00:00 2001 From: Fedor Chervinskii Date: Sun, 3 Dec 2023 09:54:20 +0100 Subject: [PATCH] speedup table extraction --- fitz/table.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fitz/table.py b/fitz/table.py index d42097e88..1154fdf1c 100644 --- a/fitz/table.py +++ b/fitz/table.py @@ -1214,7 +1214,11 @@ def get_text(cell): cell = Rect(cell) # we need a Rect object text = "" # result text for block in TEXTPAGE.extractRAWDICT()["blocks"]: + if Rect(block["bbox"]).intersect(cell).is_empty: + continue for line in block["lines"]: + if Rect(line["bbox"]).intersect(cell).is_empty: + continue for span in line["spans"]: chars = span["chars"] if text and chars: