diff --git a/dedoc/api/api_utils.py b/dedoc/api/api_utils.py
index df8a1286..1287912d 100644
--- a/dedoc/api/api_utils.py
+++ b/dedoc/api/api_utils.py
@@ -133,7 +133,7 @@ def json2html(text: str, paragraph: TreeNode, tables: Optional[List[Table]], tab
if tables is not None and len(tables) > 0:
text += "
Tables:
"
for table in tables:
- text += __table2html(table, table2id)
+ text += table2html(table, table2id)
text += "
"
return text
@@ -201,7 +201,7 @@ def __annotations2html(paragraph: TreeNode, table2id: Dict[str, int]) -> str:
return text.replace("\n", "
")
-def __table2html(table: Table, table2id: Dict[str, int]) -> str:
+def table2html(table: Table, table2id: Dict[str, int]) -> str:
uid = table.metadata.uid
text = f" table {table2id[uid]}:
"
text += f'\n\n'
diff --git a/dedoc/readers/pdf_reader/data_classes/tables/scantable.py b/dedoc/readers/pdf_reader/data_classes/tables/scantable.py
index c7c47fe1..4bc057df 100644
--- a/dedoc/readers/pdf_reader/data_classes/tables/scantable.py
+++ b/dedoc/readers/pdf_reader/data_classes/tables/scantable.py
@@ -5,6 +5,7 @@
import numpy as np
from dedocutils.data_structures import BBox
+from dedoc.data_structures import CellWithMeta, Table, TableMetadata
from dedoc.readers.pdf_reader.data_classes.tables.cell import Cell
from dedoc.readers.pdf_reader.data_classes.tables.location import Location
@@ -27,6 +28,11 @@ def extended(self, table: "ScanTable") -> None:
# extend order
self.order = max(self.order, table.order)
+ def to_table(self) -> Table:
+ metadata = TableMetadata(page_id=self.page_number, uid=self.name, rotated_angle=self.location.rotated_angle)
+ cells_with_meta = [[CellWithMeta.create_from_cell(cell) for cell in row] for row in self.matrix_cells]
+ return Table(metadata=metadata, cells=cells_with_meta)
+
@staticmethod
def get_cells_text(attr_cells: List[List[Cell]]) -> List[List[str]]:
attrs = []
diff --git a/dedoc/readers/pdf_reader/pdf_base_reader.py b/dedoc/readers/pdf_reader/pdf_base_reader.py
index d52e0d3c..fd6ed93b 100644
--- a/dedoc/readers/pdf_reader/pdf_base_reader.py
+++ b/dedoc/readers/pdf_reader/pdf_base_reader.py
@@ -13,10 +13,7 @@
import dedoc.utils.parameter_utils as param_utils
from dedoc.attachments_extractors.concrete_attachments_extractors.pdf_attachments_extractor import PDFAttachmentsExtractor
from dedoc.common.exceptions.bad_file_error import BadFileFormatError
-from dedoc.data_structures.cell_with_meta import CellWithMeta
from dedoc.data_structures.line_with_meta import LineWithMeta
-from dedoc.data_structures.table import Table
-from dedoc.data_structures.table_metadata import TableMetadata
from dedoc.data_structures.unstructured_document import UnstructuredDocument
from dedoc.extensions import recognized_extensions, recognized_mimes
from dedoc.readers.base_reader import BaseReader
@@ -92,12 +89,7 @@ def read(self, file_path: str, parameters: Optional[dict] = None) -> Unstructure
)
lines, scan_tables, attachments, warnings, other_fields = self._parse_document(file_path, params_for_parse)
- tables = []
- for scan_table in scan_tables:
- metadata = TableMetadata(page_id=scan_table.page_number, uid=scan_table.name, rotated_angle=scan_table.location.rotated_angle)
- cells_with_meta = [[CellWithMeta.create_from_cell(cell) for cell in row] for row in scan_table.matrix_cells]
- table = Table(metadata=metadata, cells=cells_with_meta)
- tables.append(table)
+ tables = [scan_table.to_table() for scan_table in scan_tables]
if self._can_contain_attachements(file_path) and self.attachment_extractor.with_attachments(parameters):
attachments += self.attachment_extractor.extract(file_path=file_path, parameters=parameters)
diff --git a/resources/benchmarks/table_benchmark.json b/resources/benchmarks/table_benchmark.json
new file mode 100644
index 00000000..d7a9d7c6
--- /dev/null
+++ b/resources/benchmarks/table_benchmark.json
@@ -0,0 +1,16 @@
+{
+ "mode_metric_structure_only": false,
+ "mean": 0.9468374367023571,
+ "images": {
+ "example_with_table0_0.png": 0.9525583036909738,
+ "example_with_table0_1.png": 0.9264351862896008,
+ "example_with_table6.png": 0.989010989010989,
+ "example_with_table4.jpg": 0.908436211832951,
+ "example_with_table17.jpg": 0.8078952936402488,
+ "example_with_table_hor_vert_union.png": 0.9896091617933723,
+ "example_with_table1.png": 0.9781560283687943,
+ "example_with_table_horizontal_union.jpg": 0.9925757575757576,
+ "example_with_table3.png": 0.9778008866078716,
+ "example_with_table5.png": 0.9458965482130129
+ }
+}
\ No newline at end of file
diff --git a/resources/benchmarks/table_benchmark_on_generated_data.json b/resources/benchmarks/table_benchmark_on_generated_data.json
new file mode 100644
index 00000000..130bcd28
--- /dev/null
+++ b/resources/benchmarks/table_benchmark_on_generated_data.json
@@ -0,0 +1,506 @@
+{
+ "mode_metric_structure_only": true,
+ "mean": 0.9467889492889642,
+ "images": {
+ "0OEG7D5CXUSXDNEXAZ8A3.png": 0.993103448275862,
+ "0IS8OPRTM71QYN821WA5S.png": 0.9878048780487805,
+ "0KX1D4AGMTM3EWR0EF0A5.png": 0.989010989010989,
+ "0QBK1U71YOHBG5Z23MT7E.png": 0.9916666666666667,
+ "0DC57AS1OYZ1BRHZHPIO2.png": 0.96,
+ "0GJE73OG32H2P2SL2AI2J.png": 0.9905660377358491,
+ "0GYAQKWTI3LN6DNZFM2TZ.png": 0.9904761904761905,
+ "0GHKLO6LOH5LBTYEUND3S.png": 0.9917355371900827,
+ "0F831FOUA10K3594FG4IM.png": 0.9896907216494846,
+ "0XG0I2F0MMZ3QMXWLWFMX.png": 0.9935064935064936,
+ "0WMTO9U10ILEB9HCX4C0B.png": 0.9863013698630136,
+ "0C1ZYGFL2YNFM2W3P2KN1.png": 0.9795918367346939,
+ "0A4G5JAZSJS4BT5LBZ2Q3.png": 0.9850746268656716,
+ "0I75SMSDR5JSJXF07PN6J.png": 0.9915966386554622,
+ "0K9EAAIYXSUT80SYF3ML4.png": 0.9836065573770492,
+ "0PH78O2B9CJAM6MMINZXT.png": 0.9876543209876543,
+ "1BRZ4ALOZMMEXGR4AVJWG.png": 0.9876543209876543,
+ "0AFVW6AL3EH9H76ONNDYF.png": 0.9848484848484849,
+ "0EVCQHN9C65AUYG1UAN3C.png": 0.9696969696969697,
+ "0DD9D0ILAPJIH77GEVRGP.png": 0.4098360655737705,
+ "1BHU2JO8ODKS3OL4RIU6A.png": 0.9905660377358491,
+ "0OG2AZLHJPMBX43O2O9LR.png": 0.989010989010989,
+ "0M64SMZT9HTN6LXQ4M24T.png": 0.9846153846153847,
+ "0TY12X0C3U2BPZC81PW66.png": 0.9836065573770492,
+ "0W109P7LI6B5HIYM3SJ5A.png": 0.9873417721518988,
+ "0H6272E6S2YUDJWBSWKQN.png": 0.9873417721518988,
+ "0ZRX97WSSVCVQ3NJ5959P.png": 0.9873417721518988,
+ "0ZFX4HDI3O7YQFDYRRYKI.png": 0.9722222222222222,
+ "1A4SDGAXB66WDBW7OUH58.png": 0.9876543209876543,
+ "0HQVUJMOQRQQ5FIP4PMZF.png": 0.9859154929577465,
+ "0Q335MQBC8UJJMASJUNWZ.png": 0.9938271604938271,
+ "0AF02R419WL1YN97ZV144.png": 0.9868421052631579,
+ "0JWOGY4C0KQ14J958GLYD.png": 0.9922480620155039,
+ "0JCVUE03Y5YD8A45IOIA5.png": 0.972972972972973,
+ "0R10PSLELMJ0SPFCXX92A.png": 0.9814814814814815,
+ "0BF411IVR1HLU1Q44I3K7.png": 0.990990990990991,
+ "0K9C1HJZ8K3L6CRAQ6VCW.png": 0.9850746268656716,
+ "0Q2MRICBMAFRV1GRRR5TA.png": 0.5189873417721519,
+ "0ZK44UG99IWIPKRSCOSJV.png": 0.9795918367346939,
+ "0S09D3ZPVQ8YOT55XIOE1.png": 0.9887640449438202,
+ "0KRLZUD3DQAU1DYDU99ZH.png": 0.9882352941176471,
+ "1ATIOLLN3DOAHKX75560Q.png": 0.5460992907801419,
+ "0KSBBUINDNN16F2ZLQHV4.png": 0.9915254237288136,
+ "0P5IE8XH9BN2EGC0DX27Z.png": 0.989010989010989,
+ "0GJ88Q9SMUOWF3WILKG14.png": 0.98,
+ "0QYA242XOQ0Y9078UC7NI.png": 0.984375,
+ "0F4Z8B4S5RV008LHJBW8S.png": 0.9896907216494846,
+ "0GWJH40B21AJBR1F73FXI.png": 0.9722222222222222,
+ "0ZO44O69QHTV62QJ3X9KH.png": 0.9883720930232558,
+ "0VK3KLUJVLAB9SRQDN6EJ.png": 0.45622119815668205,
+ "0HQHS3BO0IIOJ5L2EP2H4.png": 0.9882352941176471,
+ "1DBQ2M6XQ66Y2895PYNOM.png": 0.9824561403508771,
+ "0A4OYW3ZL5QP76IGF0DK0.png": 0.9836065573770492,
+ "1AFVW6AL3EH9H76ONNDYF.png": 0.9863013698630136,
+ "0J0JQM9WD7B0RCNKWBC5S.png": 0.972972972972973,
+ "0OVSLM3WAA36TZQCOL1WS.png": 0.9821428571428571,
+ "0GMEN2MGE7HN3ROOZQ5YD.png": 0.9879518072289156,
+ "0AJLKOKRHEVOTGE90GEH6.png": 0.9922480620155039,
+ "0DUX4YKT5JYJO3Z573OG8.png": 0.967741935483871,
+ "0A4SDGAXB66WDBW7OUH58.png": 0.9871794871794872,
+ "0G5S5CXGRLABEYII4QG2Z.png": 0.9887640449438202,
+ "0YM92E2EEDDGHAUW2YZ8Q.png": 0.9863013698630136,
+ "0F0E32N4VR4Q9960I0DB8.png": 0.975609756097561,
+ "0JNQPLSGKLPQ0UAAFYL5T.png": 0.967741935483871,
+ "0YL4VFF3LUUQITLVU3U9V.png": 0.9902912621359223,
+ "0ZO2Z3XCHZLB43ARH68WS.png": 0.9803921568627451,
+ "0SHL8BKLII1AGBZ1SEB4U.png": 0.9876543209876543,
+ "0KBLEG9N1SBX956ZCIP5I.png": 0.9795918367346939,
+ "1B1QX4K8U8P9QA3HVLRPN.png": 0.9896907216494846,
+ "0D0ZG3O9YHMQAPHCD0890.png": 0.9924242424242424,
+ "0PTMPFGYNVJWO6FCX1QRZ.png": 0.9933333333333333,
+ "0A7ZA5BA5TPHBN2WP6TT9.png": 0.9904761904761905,
+ "0DE9UIIVMYH3UK0SYFVUG.png": 0.9767441860465116,
+ "0U0LFAJATVD9YEC1Z3497.png": 0.9767441860465116,
+ "1A3YX0911ULBZSCUBNDZS.png": 0.9929577464788732,
+ "0WRQRWHH2CMV2L4CE3SN9.png": 0.9878048780487805,
+ "0C1X00FENSOUN2Y08Y3JT.png": 0.9824561403508771,
+ "0ONRU7A4SU4WAUWF25FRP.png": 0.9878048780487805,
+ "1DD9D0ILAPJIH77GEVRGP.png": 0.9824561403508771,
+ "0CBFM7HG55Z7O8F4Y0O0L.png": 0.9905660377358491,
+ "0C9EM94JJTICVGS6U2T2U.png": 0.9917355371900827,
+ "0KK57808VO3HNS1AW4CJO.png": 0.9767441860465116,
+ "0E7XFLPH56MT23HNK3MZ6.png": 0.9886363636363636,
+ "0J818KH6HIIA83D74FXS3.png": 0.9887640449438202,
+ "0TLMD42BW0F4NSD9PG19X.png": 0.984375,
+ "0PSN5QFZWTPA9U05O7MZ3.png": 0.99,
+ "0GI0JNFJAOXK5OJKRXCND.png": 0.9891304347826086,
+ "0Z4X1LVZ1K4NE2RR8P7EA.png": 0.9859154929577465,
+ "0S709DW5AZF9VPCPMVHXB.png": 0.9891304347826086,
+ "1B87OEX5XX0BHUOQAS50A.png": 0.9767441860465116,
+ "1AAERNSDA06GDA7OFZVCA.png": 0.9850746268656716,
+ "1CORAY089OILX2OWIKU1E.png": 0.98,
+ "0WO86MK2DC2EZUZLSMFA1.png": 0.9911504424778761,
+ "0KNUPYHEXZYSW1TNZ6I7L.png": 0.9846153846153847,
+ "0ONF59OAQYX89LAM941E6.png": 0.9859154929577465,
+ "0E1IVAEMQXKVCH3Q0JCVX.png": 0.9782608695652174,
+ "0K8YJZK75V8SXL0GIM4SU.png": 0.9896907216494846,
+ "0LUL2CVQ1HLC1KL6D2VMP.png": 0.4065934065934066,
+ "0WIEGQEF4G9LN2UM49Y12.png": 0.9876543209876543,
+ "0AQ9EL10BYBSGJO2RLC6Q.png": 0.9888888888888889,
+ "0M0WYXRJONRUQ3ZG24MJJ.png": 0.989010989010989,
+ "0E3XQJO1C4CKR9TNFB4IC.png": 0.9871794871794872,
+ "0H17CYXGJTHXPQUP51TBI.png": 0.9911504424778761,
+ "0NK736IIIHGBF52E1UKQ4.png": 0.9859154929577465,
+ "0DYIHMLOKOR6HNF2XAI8F.png": 0.9836065573770492,
+ "0BZ5GZPTUSCNBNGBNQZEG.png": 0.9859154929577465,
+ "0M47PMX0DRIVKCJBYKHPJ.png": 0.9767441860465116,
+ "00MK8C41M7MW013CJ9SPU.png": 0.9922480620155039,
+ "0DMXCT01TPF8O33UMENE4.png": 0.9917355371900827,
+ "0WS9VI6T1X0M5H6D8O67Z.png": 0.9859154929577465,
+ "0XQ9XQOL15RDKQT4YZUQC.png": 0.5739130434782609,
+ "1DD8FWYLADAY5EJ3UZUD9.png": 0.9876543209876543,
+ "0MXPSYD5A5U86BSSZQMJN.png": 0.975609756097561,
+ "0QAOLXSIIRIRQ3W1OP7Y8.png": 0.9921259842519685,
+ "1BZ5GZPTUSCNBNGBNQZEG.png": 0.9767441860465116,
+ "0Y5AIJNHB8DTPQOC92X6P.png": 0.9882352941176471,
+ "0IL3BP1QRAZ54V54IBK9A.png": 0.9876543209876543,
+ "0MDCUYD9ASW4AGWD3ZYK5.png": 0.9891304347826086,
+ "0MON88TOR16AGTBLDTGJC.png": 0.9904761904761905,
+ "0QVCHWR0EZCMQ5J5P0Z1J.png": 0.9767441860465116,
+ "0IPJ09DW34Q275Z5CMS1X.png": 0.99,
+ "1A7ZU26KX6C0LG0D3T3ZS.png": 0.9863013698630136,
+ "0M49YEV7H4P48EONCBFPS.png": 0.9863013698630136,
+ "1D34PI1NNCV0AB4WCQMB3.png": 0.9863013698630136,
+ "0AGYYXV88WJW2FC6FVV3Q.png": 0.9863013698630136,
+ "0F9W69ODT3GQCQ6F11L2E.png": 0.9767441860465116,
+ "0Q3RJT1DJMPO9D9BE6JNO.png": 0.9868421052631579,
+ "0ETQJY2HRGYIBO46BSD3P.png": 0.4503105590062112,
+ "0BRZ4ALOZMMEXGR4AVJWG.png": 0.43983402489626555,
+ "0WLG2ZXPFXZGF9RM2Z6N6.png": 0.9871794871794872,
+ "1BP5KU2XHXZ0C431B4OL9.png": 0.43450479233226835,
+ "00ZG4J0UMAHQMR57DQ5T7.png": 0.9818181818181818,
+ "0S5HD36LFVDWLLH6UFK9I.png": 0.9939759036144579,
+ "0EW4PZW85MH9BS8VI83KZ.png": 0.9848484848484849,
+ "0EMFKQLMGGAFPLQGUEZSJ.png": 0.96,
+ "0H4TWDI39J0HRG239GQ10.png": 0.9938650306748467,
+ "0BSXNNN0LA94101P5D38I.png": 0.9882352941176471,
+ "0SLVZSD9X7VZPGQU0Q2QN.png": 0.9850746268656716,
+ "0K6WPSDJC0ICOWFEASYB4.png": 0.9911504424778761,
+ "0TY3MTJ6YZDE6QI73SH5A.png": 0.9859154929577465,
+ "0B87OEX5XX0BHUOQAS50A.png": 0.9896907216494846,
+ "1C1X00FENSOUN2Y08Y3JT.png": 0.967741935483871,
+ "0KEM29NIZZ7UI3CTN6NEA.png": 0.9896907216494846,
+ "0JZQMX95783K8QW3ERXSM.png": 0.8827586206896552,
+ "0R47TY8TMFAL346RUY0LW.png": 0.9696969696969697,
+ "0EG83QLMPW7MGGMGBYGPD.png": 0.9882352941176471,
+ "1AGYYXV88WJW2FC6FVV3Q.png": 0.9927536231884058,
+ "0OF74SYX6Q102JCQ5KELF.png": 0.9896907216494846,
+ "1BX1I2HS6BLV92NZHV6J1.png": 0.9940828402366864,
+ "1DDEMI2034QD7F4QRH1IV.png": 0.972972972972973,
+ "0Z8LGXZ1SMLBHV5T6Y4O9.png": 0.9859154929577465,
+ "0NGE5XRBD2YHBZFMDL7VD.png": 0.9795918367346939,
+ "0SX4TWDHV25DCZV3HQEHH.png": 0.9777777777777777,
+ "00Q04QLVCESVWCSMDAURN.png": 0.9855072463768116,
+ "0SK696SAQW3MZNDMD4W85.png": 0.984375,
+ "0F4WBFLG32FAT22W0NGEY.png": 0.45871559633027525,
+ "0TNFF3RUQ2UL3PRNYF45M.png": 0.9868421052631579,
+ "1A4OYW3ZL5QP76IGF0DK0.png": 0.9904761904761905,
+ "0IUNRRJ3JHMEAORR2EXRS.png": 0.9908256880733946,
+ "0L764EQB3ZGC3FYQ20PR9.png": 0.9863013698630136,
+ "0XZJ4SZWY0ZOD9QBZP96A.png": 0.9922480620155039,
+ "0PU3J7NYVCB6XLSJJOEZ9.png": 0.9911504424778761,
+ "0DDKIN1PFJQTFW1JADVHT.png": 0.9863013698630136,
+ "1A4G5JAZSJS4BT5LBZ2Q3.png": 0.9836065573770492,
+ "0DPMX3BRIG9CWZPYKXFWS.png": 0.9921259842519685,
+ "0N7P792721CFI8EDOCB0N.png": 0.9908256880733946,
+ "1BIC4PMO7M3ZB8WUC3STJ.png": 0.9933333333333333,
+ "0XNBY82W4NFSD9GV6ONKU.png": 0.9911504424778761,
+ "0F3P8XGEMBYESYCYAOQPN.png": 0.9923076923076923,
+ "00D983SP0WHF6YGMKSHCR.png": 0.9803921568627451,
+ "0N91H0ZWMHBPPPPON4HUW.png": 0.993006993006993,
+ "0AWZPWR198XN7U8HY1E32.png": 0.9836065573770492,
+ "0S0Z9J05KZWNPKUFRD78Z.png": 0.9927007299270073,
+ "0FCDXM7JS1QEBBY3DCGBM.png": 0.9795918367346939,
+ "0OEVVJNLZKKW7GOPM188W.png": 0.9615384615384616,
+ "0Z7FUMCO707ZDI55EG306.png": 0.9878048780487805,
+ "0DE4P4M2855D754NA8993.png": 0.9722222222222222,
+ "0UZ81HSUQSHVVGU56NIOG.png": 0.9902912621359223,
+ "0AAERNSDA06GDA7OFZVCA.png": 0.992,
+ "0WZXI1YECN77S9GD6GQ4M.png": 0.98989898989899,
+ "0S8HOU13AW544ALTKAB73.png": 0.989010989010989,
+ "0AAPDAAK73MRINE7PM0ZJ.png": 0.41628959276018096,
+ "0UJ2AFVE6RWGTYSB6DKLJ.png": 0.45871559633027525,
+ "0ISYQEE43TA3O41XMA47A.png": 0.993103448275862,
+ "0L2E8S3ICCMGPE9PS3RLV.png": 0.9908256880733946,
+ "0BIC4PMO7M3ZB8WUC3STJ.png": 0.984375,
+ "0CTFYQFHQ1S1FLIEAPZTB.png": 0.9767441860465116,
+ "0A9RJA2I3YJT58JR2MEOT.png": 0.9818181818181818,
+ "0UGHOJ96BTPB57BR0DJS7.png": 0.4505494505494505,
+ "0TW35WW1PRLL2YKVYWYRM.png": 0.9818181818181818,
+ "0HTO45RT9NH5KQUCLOV2H.png": 0.9722222222222222,
+ "0F0TA5W8GO31TXUFMHHTO.png": 0.98989898989899,
+ "0HWMSCT6L3MCGFJV4OXF8.png": 0.975609756097561,
+ "0KCIUQNXNE3ZMX5ECY7V3.png": 0.9925925925925926,
+ "0ET4I24PZATQRKGMGG5KC.png": 0.975609756097561,
+ "0I6WVEL7V26O3KJJ1GGYF.png": 0.9896907216494846,
+ "1A2AT7TW5KOMUUAK7TQXT.png": 0.9767441860465116,
+ "0W8NNJL30MNEY6RTPD6DA.png": 0.9767441860465116,
+ "0XC0XOHP855H9DFG41W9T.png": 0.9803921568627451,
+ "1DC57AS1OYZ1BRHZHPIO2.png": 0.9903846153846154,
+ "00WVVGSQ00B0IZU4OKPHQ.png": 0.9916666666666667,
+ "0RTI5C20W407SL59RANEM.png": 0.991304347826087,
+ "0A0DA327P9Y532UTLHE2N.png": 0.9722222222222222,
+ "0AQZMEU4Q38NKK4USHAC5.png": 0.9896907216494846,
+ "0U7602J86XPC7AVTSPMWL.png": 0.9878048780487805,
+ "0DSGAEKSK52RUNGEOGEXP.png": 0.9921259842519685,
+ "0JD5R5NDJKRRHT1UI6GFW.png": 0.7058823529411764,
+ "0PQ9OK98A29AC6GEI3DKQ.png": 0.4882352941176471,
+ "0TA7SVAQC7PKDE8BUP3NF.png": 0.9887640449438202,
+ "0TQM47CA0F30LG2C0S2KN.png": 0.9887640449438202,
+ "0HA1FE8828DJ86ZIJUIX4.png": 0.967741935483871,
+ "0P1Y0C88Y17DSXE616MQN.png": 0.98989898989899,
+ "0I6GDDWCTMF9V4YLGLBIM.png": 0.4036697247706422,
+ "0SI6DA6CAXUMFYSXBXIF6.png": 0.9906542056074766,
+ "0NM9CUQJV6W2N9434O81D.png": 0.9859154929577465,
+ "0WU8XJP1VJSLZXQ7S43HM.png": 0.9767441860465116,
+ "0P0WR7JJ9JBXO0HVMDETS.png": 0.975609756097561,
+ "0ZNTZMWW1X0QZV4AGDHYL.png": 0.9926470588235294,
+ "0C98HOE9TQ4HZK6DKGF5I.png": 0.989010989010989,
+ "0JCDZWWAMUR9FRGHL9IVN.png": 0.9911504424778761,
+ "0PVN50SJP1LUTHE2TID60.png": 0.9926470588235294,
+ "0D7CMRTBBENLYDO7EWWVZ.png": 0.98,
+ "0JOTZX26K6UJB6LNVK9RH.png": 0.975609756097561,
+ "0ZFOZ6UKG7DCCD5HSUIIX.png": 0.9876543209876543,
+ "0L7V0ZXS2M9JMSBD05I25.png": 0.9873417721518988,
+ "0G1E97R3QFH7FG9AUAIFB.png": 0.9863013698630136,
+ "0CORAY089OILX2OWIKU1E.png": 0.984375,
+ "0EH9JARAL7RYD3CVMM8AZ.png": 0.5185185185185186,
+ "00KDBG5H22KPNCPCK7L2P.png": 0.9848484848484849,
+ "00XJ5C1RWIRVID9IPUX8G.png": 0.8,
+ "0FFJM5ABUDDCT2DOCW2T4.png": 0.9916666666666667,
+ "0D34PI1NNCV0AB4WCQMB3.png": 0.9896907216494846,
+ "0X9D7AJTD7S91BNHMQ4L0.png": 0.9876543209876543,
+ "0W9SN5GJDEWTG3WAPGPDZ.png": 0.9887640449438202,
+ "0ATIOLLN3DOAHKX75560Q.png": 0.9882352941176471,
+ "1C9EM94JJTICVGS6U2T2U.png": 0.9883720930232558,
+ "0TG6BRHGF3C865C2OL6DE.png": 0.9882352941176471,
+ "1BUP8L4PGVBNQE1GSCGJZ.png": 0.9863013698630136,
+ "1AJLKOKRHEVOTGE90GEH6.png": 0.989010989010989,
+ "1C98HOE9TQ4HZK6DKGF5I.png": 0.9859154929577465,
+ "0IH65GI6IN6RQWJE04YPG.png": 0.9859154929577465,
+ "0DNHG32KRYJ9PQ7UU1YL5.png": 0.9863013698630136,
+ "0EV54WP1Y9JDCWMDIT0OM.png": 0.975609756097561,
+ "0BE3I0HX6XWZQA4EFY99C.png": 0.984375,
+ "0O7G4HGEK48J2NUB5RCES.png": 0.9882352941176471,
+ "1BXWVCNXW1Z4N1XG8QOG4.png": 0.9905660377358491,
+ "0M2V36SUMHY2U8FRS9NYZ.png": 0.4424778761061947,
+ "0STJA7OMA59TOQ8XQ54G5.png": 0.98,
+ "0VB0OIQZQXKY5PA111Q8B.png": 0.984375,
+ "0RBPX6DU1W6LIYA2VRAA4.png": 0.972972972972973,
+ "0SP3KJJ2HMQZF088NH2DR.png": 0.9904761904761905,
+ "1D0ZG3O9YHMQAPHCD0890.png": 0.9655172413793104,
+ "0XZ590ZLZXRB09XIADL9V.png": 0.9934640522875817,
+ "0QU6QW0KAWVXZ6TL7FVJE.png": 0.9933774834437086,
+ "0PKH21420YW57OPRJR21R.png": 0.9922480620155039,
+ "0TX7Y5KWQ2MVU3579QIYH.png": 0.9777777777777777,
+ "0Y6OW4PMMWG05F4ZFYQ40.png": 0.9767441860465116,
+ "0EK5DRITVR9G3KDVF1CTJ.png": 0.9876543209876543,
+ "0DDEMI2034QD7F4QRH1IV.png": 0.9933774834437086,
+ "0HJXUBEZQCR1DEUQ8V30I.png": 0.9932885906040269,
+ "0BG5K95UCWQ3JXWC501XA.png": 0.9886363636363636,
+ "00TNQG8N9T3KUVMZ7AWTB.png": 0.967741935483871,
+ "0TJSB9YOUAG7C9OZW3U80.png": 0.9848484848484849,
+ "0SYEGYPSNLKCALCQBPGK2.png": 0.9929577464788732,
+ "0IP23CAYMTIVE93KLVMRA.png": 0.9824561403508771,
+ "0KFRN6DX1A6MMGS24B39T.png": 0.9850746268656716,
+ "1CTFYQFHQ1S1FLIEAPZTB.png": 0.9803921568627451,
+ "0U9U2Q7VBD1V6HBT7FQKM.png": 0.9923076923076923,
+ "0S7MUFP120D8OP4ZCCCUV.png": 0.4873417721518988,
+ "0BXWVCNXW1Z4N1XG8QOG4.png": 0.9873417721518988,
+ "1A7ZA5BA5TPHBN2WP6TT9.png": 0.9824561403508771,
+ "1ACY14LU0VWSKDOHEAVZM.png": 0.9924812030075187,
+ "0MPO1XXHHM8I5BOIT3DB9.png": 0.9876543209876543,
+ "0RSQ19UNM98CNWII5Q25F.png": 0.975609756097561,
+ "0EAA9XEBN9W7XDBPK31UZ.png": 0.9803921568627451,
+ "0U0BR4A64P7CE7YZ57HQ1.png": 0.9911504424778761,
+ "0XFNT3NMKFW1DB0F2LVY3.png": 0.9916666666666667,
+ "1AQZMEU4Q38NKK4USHAC5.png": 0.9904761904761905,
+ "0VGZMTO2VCZVZKGAOHZEU.png": 0.9910714285714286,
+ "0DBQ2M6XQ66Y2895PYNOM.png": 0.984375,
+ "0BP5KU2XHXZ0C431B4OL9.png": 0.9811320754716981,
+ "0PYCGJHF1705P4NTCM8AS.png": 0.9824561403508771,
+ "0RAGYZ9465I7GLXZXCLCQ.png": 0.9924812030075187,
+ "1A9560NY0NQ5OVZQQBJRQ.png": 0.4636363636363636,
+ "0KXDSHWWWYQJBXT2Y6U8S.png": 0.9803921568627451,
+ "1BF411IVR1HLU1Q44I3K7.png": 0.984375,
+ "0T1ZL9NSVN3385DR7B86C.png": 0.9824561403508771,
+ "0SYKTWM1EF4KS646AWQEL.png": 0.9803921568627451,
+ "0S104IFNSN5EJ31212IOP.png": 0.989010989010989,
+ "0H2RZUXKBQEVFJ2JT29R4.png": 0.9818181818181818,
+ "0SVC8WRHPF38HHKBN65YD.png": 0.9926470588235294,
+ "0HVIW7DPWCJSWJ5PCJDM2.png": 0.9855072463768116,
+ "0PRIZA7CG2JAL9GTN265B.png": 0.9929577464788732,
+ "0FXLG8PO267BZPBBXIX4E.png": 0.9922480620155039,
+ "1B0LNAITDDPPCJ4I6XIWK.png": 0.9868421052631579,
+ "0YNQ2KZ01B1TWP9FR5DE7.png": 0.45360824742268047,
+ "0A8AVSZNK6GTNOCBEVFOY.png": 0.9722222222222222,
+ "0XM8RQF6JQDOTJ5WQVHFE.png": 0.9873417721518988,
+ "0JBU3LJRDTMJI2XGB6NUE.png": 0.9868421052631579,
+ "0FKIASN9E4KCZ0JRCAJLQ.png": 0.9917355371900827,
+ "0A2AT7TW5KOMUUAK7TQXT.png": 0.9882352941176471,
+ "0QISJETVE3HGF1PMBD1BM.png": 0.9848484848484849,
+ "0KBOWWQLYSIZ0P4SIZMHJ.png": 0.993421052631579,
+ "0OMZO818L9AC4U3JJTKGD.png": 0.9863013698630136,
+ "0IVOAVCWOJ4CA92H7CM1Q.png": 0.9917355371900827,
+ "0SH9F7EHAT35OVT003OC5.png": 0.3728813559322034,
+ "0F7BJ4Z9F1R95HUG4RRZD.png": 0.9767441860465116,
+ "00RJGV4A4UTMTLDEIR1IG.png": 0.975609756097561,
+ "0BUP8L4PGVBNQE1GSCGJZ.png": 0.967741935483871,
+ "0B1QX4K8U8P9QA3HVLRPN.png": 0.9923664122137404,
+ "0IZ8M2UHYSA9H6K8XIOKS.png": 0.9855072463768116,
+ "0KLEV2650Z6X2DAUO94QK.png": 0.9876543209876543,
+ "0MRYJGMAVHEDMZ3XSX9XI.png": 0.9871794871794872,
+ "0I6PWVE3HEK6ZZ5K53UY4.png": 0.9818181818181818,
+ "1BE3I0HX6XWZQA4EFY99C.png": 0.984375,
+ "0M7CJCA8K3PX504PNHJRT.png": 0.9883720930232558,
+ "0ESACK4QILSDBXRS54UK0.png": 0.9795918367346939,
+ "0KLU5K631Q9RHQOY6771B.png": 0.4444444444444444,
+ "0RAZV12CY84ZGA4BRZQUC.png": 0.9871794871794872,
+ "0HZ4TDEJG6BY7B2RTALZK.png": 0.9868421052631579,
+ "0ROPMUV96VG8PTONLNGV9.png": 0.9887640449438202,
+ "0L194VI2NIOAX4AUCU2WG.png": 0.9767441860465116,
+ "0PG6K8IFJM2PHHLA1S4Y6.png": 0.9905660377358491,
+ "0H5AHQVKHAKQ1W636PLCS.png": 0.9878048780487805,
+ "0ZAHJJUMYDOQIMIUUFAUD.png": 0.9863013698630136,
+ "0MO39PWU9N82Y88WNANVM.png": 0.984375,
+ "0ZSUP0IMF3PK86DIVWQ8V.png": 0.967741935483871,
+ "0M1B6J5CTPBITI79C68MO.png": 0.9824561403508771,
+ "0BWJOYJSDHL1XJH6UG2RM.png": 0.9882352941176471,
+ "0SPYHIS3OEEZ082CFJEGF.png": 0.9871794871794872,
+ "0A3YX0911ULBZSCUBNDZS.png": 0.9896907216494846,
+ "0FK1CU21TAIHIR7YWZ2W7.png": 0.9818181818181818,
+ "0WP1ZBKQCK8W2W0ZXI2Z4.png": 0.7916666666666666,
+ "1AF02R419WL1YN97ZV144.png": 0.9767441860465116,
+ "0BKBFKJTQPLQBNIBZSM7E.png": 0.9916666666666667,
+ "1C1ZYGFL2YNFM2W3P2KN1.png": 0.9871794871794872,
+ "0IHOYC7KXLECI1F3G1WAF.png": 0.9848484848484849,
+ "1A0DA327P9Y532UTLHE2N.png": 0.9868421052631579,
+ "0SK9B35AHQ2OQA1RDKHHP.png": 0.9917355371900827,
+ "0EECJZYQ42MZLSWPOK9ZH.png": 0.9887640449438202,
+ "0UFBWJZOD5PBKMVX7G231.png": 0.9824561403508771,
+ "0OZ6DU5POAFSM589UXX4S.png": 0.9876543209876543,
+ "0OUIP8MTUSWLFQ6J13VXT.png": 0.967741935483871,
+ "0NFAI2Z8TAUKU6S7892KH.png": 0.975609756097561,
+ "0F3VUGWY35HLOJYHPT78G.png": 0.9883720930232558,
+ "0AYZOGNX998RYQVPWP1OA.png": 0.9846153846153847,
+ "0UC2QTKS4ITXYK4E6HU9T.png": 0.9939759036144579,
+ "0KK6YAU45B9B34SSZTAS7.png": 0.9836065573770492,
+ "0WV2Q54214D8ARYKCMBE0.png": 0.547945205479452,
+ "0TUDLFORB7K1BVA4U0ULU.png": 0.9917355371900827,
+ "0XZRML313QJ6X82YZJLYT.png": 0.9848484848484849,
+ "0ACY14LU0VWSKDOHEAVZM.png": 0.9873417721518988,
+ "0HH9NAZ1I95NJINORKJIM.png": 0.9795918367346939,
+ "1AWZPWR198XN7U8HY1E32.png": 0.9795918367346939,
+ "0TLG8NFY9BXHB15A47OGW.png": 0.9926470588235294,
+ "1CBFM7HG55Z7O8F4Y0O0L.png": 0.9848484848484849,
+ "0EV3WT6VJG3QH2HFJEIBA.png": 0.975609756097561,
+ "0OBPU21JDPO0KPYEQGLFO.png": 0.9722222222222222,
+ "0MJ27YD7XBYLQKM87RM3Y.png": 0.9887640449438202,
+ "0BHU2JO8ODKS3OL4RIU6A.png": 0.99,
+ "0WVB351NNWY8OOQQRRW6F.png": 0.476878612716763,
+ "1BCT1VG1R4HUK3Q6NMZGU.png": 0.9916666666666667,
+ "0YJ043WAWUTW4AEMDTD4R.png": 0.9782608695652174,
+ "0YS08VVMS1YPOHVJOFXXA.png": 0.98989898989899,
+ "0EWWFSOUCGGD5BK6RKMKO.png": 0.522875816993464,
+ "0VCTD6BP09MBAXOOM5Y5E.png": 0.975609756097561,
+ "0S7ZGBZ7OBI15CZS5V95A.png": 0.984375,
+ "0JJ9O2OQ6O13OAOFM7643.png": 0.99,
+ "00TXY79AHYWJ7WLXB3VLV.png": 0.9846153846153847,
+ "0J2UQ7WIZXFK4I5TV9UHW.png": 0.9935064935064936,
+ "0TYF1PBQCH64LANCKYWY7.png": 0.9859154929577465,
+ "0SWG2OW7F5RLADFAHJ9A4.png": 0.9882352941176471,
+ "0RV3TKC89HQD4FRFCTNSK.png": 0.9767441860465116,
+ "1BQBJ8UFLH7H3JQ965JF6.png": 0.9863013698630136,
+ "0C70JEJWPOAT1S8RUWCVB.png": 0.972972972972973,
+ "0RCE6GI0QYPCA15RH6HM7.png": 0.49382716049382713,
+ "0SB1QV5XRJM6W0HRU4AH7.png": 0.9891304347826086,
+ "0I1HQDO584A6ODC54PLNA.png": 0.9891304347826086,
+ "1AWHACFMS9KSHM18INN41.png": 0.9836065573770492,
+ "1BKBFKJTQPLQBNIBZSM7E.png": 0.9863013698630136,
+ "0T0Q44ALMC9WURWEESEMP.png": 0.9875,
+ "1A9RJA2I3YJT58JR2MEOT.png": 0.9615384615384616,
+ "0DD8FWYLADAY5EJ3UZUD9.png": 0.9868421052631579,
+ "0F078JDZMTC8C8H2P8IVA.png": 0.9921875,
+ "0L5KEP1L6K1ALH88LLMEY.png": 0.9795918367346939,
+ "0U2FXJ2H3K5SQTZNJ1WV1.png": 0.98,
+ "0U49K9QPO02GF77TU5JB8.png": 0.9863013698630136,
+ "0A9560NY0NQ5OVZQQBJRQ.png": 0.9836065573770492,
+ "0MRQ2DF27RW94C36QLLTZ.png": 0.9863013698630136,
+ "0BCT1VG1R4HUK3Q6NMZGU.png": 0.9795918367346939,
+ "0GQC64N9E830BWDTF8L0Q.png": 0.9910714285714286,
+ "0HIESCSLITYADXZHOO7IA.png": 0.989010989010989,
+ "0FZFGRN9B0WT3XCQMOVPJ.png": 0.9767441860465116,
+ "00LQMDL10JL253UW69YUO.png": 0.9818181818181818,
+ "0U79XK18POJ6HCLLOXS4Z.png": 0.9905660377358491,
+ "0I3RG6GXJ2VILV3BPFIY4.png": 0.9767441860465116,
+ "0X8PV0Z6SNEKPIPOCP5HR.png": 0.3931034482758621,
+ "0UFQOEKLIWTX65AY778BD.png": 0.5275590551181102,
+ "0HZUERFF8VNKXAZLV8RO5.png": 0.9850746268656716,
+ "0FWXHCMHZ7KG6WYRNWD6Q.png": 0.9922480620155039,
+ "0A9B6NHM7J57SCT1Z8TAS.png": 0.9861111111111112,
+ "0WOTQFWQFAEPN0HZ6MYIL.png": 0.9929577464788732,
+ "0IUNSDMCG8WWVJJ758NN9.png": 0.9887640449438202,
+ "0XLK4S5OWK77LRNU2JAG9.png": 0.46543778801843316,
+ "00FMSMFBJU5732FGUTLIF.png": 0.9821428571428571,
+ "0YOETJE558OS77GHG5L5U.png": 0.9876543209876543,
+ "0BKXE7HQJOJV0I1LL8YOF.png": 0.9821428571428571,
+ "0AWHACFMS9KSHM18INN41.png": 0.5416666666666667,
+ "1BL58Q9DLPBQF73ROGFDX.png": 0.9921875,
+ "0Q7EACO6OF8WQFZXI1MRQ.png": 0.9896907216494846,
+ "0R1IOV08YNRVC0KQS84EF.png": 0.9818181818181818,
+ "0SEF4O8YR8ULW23U32SE6.png": 0.9836065573770492,
+ "0IQGTS9QZK0ZYRL80GOSD.png": 0.9767441860465116,
+ "0E00IBZTY74DGR1SSX77L.png": 0.975609756097561,
+ "0BR0V61AWXYXVQSK6RMY7.png": 0.9911504424778761,
+ "0MESCFGQYOQNMVWD6B1VU.png": 0.9885057471264368,
+ "0F3GIMIL9E4UNWEFYLKGV.png": 0.9824561403508771,
+ "1A8AVSZNK6GTNOCBEVFOY.png": 0.9910714285714286,
+ "0E7WX1NX5ZKR24SEIUKRN.png": 0.9811320754716981,
+ "0QZOZCFYQ2TK5C0Q3KN5C.png": 0.5106382978723405,
+ "0SDC2B1I853GR50G545IX.png": 0.9891304347826086,
+ "0FEKB24PHTZNT3KIZZVIS.png": 0.9876543209876543,
+ "0SIW9Q9NWY3TWRC712D4J.png": 0.9876543209876543,
+ "0JFFFUOFXDOLV2ZGQJAPB.png": 0.9887640449438202,
+ "0O976W9Y9NDSJ24YV7HU9.png": 0.975609756097561,
+ "0B0LNAITDDPPCJ4I6XIWK.png": 0.9811320754716981,
+ "1AAPDAAK73MRINE7PM0ZJ.png": 0.9852941176470589,
+ "0HC8F1RENJE297WV8RW0N.png": 0.45517241379310347,
+ "0OXJ4SWAYILOZVQCGO1OB.png": 0.9937106918238994,
+ "0I3S2Z8YWZ0JOIMKGU51B.png": 0.972972972972973,
+ "0Y0LZ2LRH7BR5ZDYBTH7U.png": 0.9824561403508771,
+ "0T0LAS5REAE827IQO0Q9U.png": 0.98989898989899,
+ "1AQ9EL10BYBSGJO2RLC6Q.png": 0.9868421052631579,
+ "0L1YL688ZRRPYAJ07UOFQ.png": 0.9911504424778761,
+ "1BWJOYJSDHL1XJH6UG2RM.png": 0.9922480620155039,
+ "0TELO9B7QI0QQVFMJXAQ1.png": 0.9896907216494846,
+ "0XDX2OT3OG575I0U99YAQ.png": 0.54,
+ "0X49B57NNHU6FEB4J21VY.png": 0.993006993006993,
+ "0DHJ8WY2XLWKG7K345LAK.png": 0.975609756097561,
+ "0BQBJ8UFLH7H3JQ965JF6.png": 0.989010989010989,
+ "00CBN2MRTC48ZY50RUSBW.png": 0.9767441860465116,
+ "1D7CMRTBBENLYDO7EWWVZ.png": 0.9863013698630136,
+ "0BX1I2HS6BLV92NZHV6J1.png": 0.9722222222222222,
+ "0XQE375V4J34MLJYN711T.png": 0.9722222222222222,
+ "1BKXE7HQJOJV0I1LL8YOF.png": 0.9795918367346939,
+ "0E3OA2PY1K3B44GN9AS0Y.png": 0.9863013698630136,
+ "0SCRALC3GPIO2ZD918U8L.png": 0.478021978021978,
+ "0ITKDLWB7SDGMM8980ZSS.png": 0.9911504424778761,
+ "1BYRMKANKN4PL6JFPG8AR.png": 0.989010989010989,
+ "0BYRMKANKN4PL6JFPG8AR.png": 0.9803921568627451,
+ "0R8W6O2N25AVQI9FQ5IL7.png": 0.972972972972973,
+ "0J9TV59N7U65CB7YCHD38.png": 0.9922480620155039,
+ "0VX41MM59ET2MK09202C3.png": 0.9896907216494846,
+ "1CGP5R7FMVCKR47XK6IVA.png": 0.9896907216494846,
+ "1BSXNNN0LA94101P5D38I.png": 0.9926470588235294,
+ "0UMVEM9RUVZDRJRFA1W2V.png": 0.9722222222222222,
+ "0KPHJHUXB0MS3B9RHL57O.png": 0.9868421052631579,
+ "0TYH6IN161KXZT369VVWQ.png": 0.9795918367346939,
+ "0AUTW1OL7IAPO1JH1TQUR.png": 0.984375,
+ "0GNCKEB99NZ0J9GCAI0TH.png": 0.9850746268656716,
+ "0CGP5R7FMVCKR47XK6IVA.png": 0.9915966386554622,
+ "0NNLAUZDCGVKZP852ZJ7X.png": 0.9836065573770492,
+ "0EJW9DEXTHUR17CZCUPB1.png": 0.9850746268656716,
+ "0JZRIWIFSATGGFL8P0NZF.png": 0.9908256880733946,
+ "0VNHMSVYYS2Q0H0VJDNAK.png": 0.9782608695652174,
+ "0Z2ZZWW84O21E70F5RGIA.png": 0.993103448275862,
+ "0UODYVKUWDGD6S5D7LNAW.png": 0.9930555555555556,
+ "0NGNPB7KAJSSKSHQV1KZS.png": 0.9767441860465116,
+ "0Q3C8N8G8GXV2EP88XEXI.png": 0.9795918367346939,
+ "0IU89E255WY0KPUD6L7Y9.png": 0.9902912621359223,
+ "0F22CQYG638LSZROETJ9V.png": 0.9904761904761905,
+ "0HBX9X0EJVVL4TA9CJ25G.png": 0.9873417721518988,
+ "0IRDSID7UDBLOIRB9JQ9S.png": 0.9883720930232558,
+ "0IHCMVD5NO41KSAB3ODC0.png": 0.5213270142180095,
+ "1AYZOGNX998RYQVPWP1OA.png": 0.9931506849315068,
+ "0WM2Y66O2ZJA831TN2E7Z.png": 0.9615384615384616,
+ "0CPW27F5C8I03UQBVBL2Y.png": 0.42500000000000004,
+ "1BR0V61AWXYXVQSK6RMY7.png": 0.989010989010989,
+ "0JJPRMSYFQLJKD3JYA1JP.png": 0.9850746268656716,
+ "0RT937QPOOWU9LKZVU0G3.png": 0.9922480620155039,
+ "0EFBK546D496KI033ACDF.png": 0.972972972972973,
+ "0EP1D1EXZC4VOMGZJGQQT.png": 0.9891304347826086,
+ "0DSQ4IAVY32EHCJ0AJM1Y.png": 0.9824561403508771,
+ "0F4HFOUP4374O8RL4E914.png": 0.9824561403508771,
+ "0IMS5FXCTVU6GSCR5CHTK.png": 0.984375,
+ "0P82SO3E98ECMRNRS62D4.png": 0.9868421052631579,
+ "1AUTW1OL7IAPO1JH1TQUR.png": 0.9911504424778761,
+ "0WFIWI83FBAOLU16M27NL.png": 0.9939024390243902,
+ "1A9B6NHM7J57SCT1Z8TAS.png": 0.9859154929577465,
+ "0UU3AG1PSZ1H78B6J17PA.png": 0.9882352941176471,
+ "0UVW81GETVKT5GPM6ZX0S.png": 0.9803921568627451,
+ "0IKFXKSQ9OA3OCRGQBZFI.png": 0.9795918367346939,
+ "1C70JEJWPOAT1S8RUWCVB.png": 0.9803921568627451,
+ "0A7ZU26KX6C0LG0D3T3ZS.png": 0.9818181818181818,
+ "0BL58Q9DLPBQF73ROGFDX.png": 0.9937106918238994,
+ "0UQWQMAYVXUFY65GH4ION.png": 0.9836065573770492,
+ "0R77TU5P7A0F1YTLIGSOA.png": 0.9863013698630136,
+ "0Q740R8QE6ZAF034ZMGQG.png": 0.9917355371900827,
+ "1BG5K95UCWQ3JXWC501XA.png": 0.9926470588235294,
+ "1CPW27F5C8I03UQBVBL2Y.png": 0.9916666666666667
+ }
+}
\ No newline at end of file
diff --git a/scripts/benchmark_table/benchmark_table.py b/scripts/benchmark_table/benchmark_table.py
new file mode 100644
index 00000000..c6cbd7cb
--- /dev/null
+++ b/scripts/benchmark_table/benchmark_table.py
@@ -0,0 +1,167 @@
+import zipfile
+from pathlib import Path
+import json
+import pprint
+from typing import Optional, List
+import numpy as np
+import wget
+
+from dedoc.api.api_utils import table2html
+from dedoc.config import get_config
+from dedoc.readers import PdfImageReader
+from dedoc.readers.pdf_reader.pdf_image_reader.table_recognizer.table_recognizer import TableRecognizer
+from scripts.benchmark_table.metric import TEDS
+
+path_result = Path(__file__).parent / ".." / ".." / "resources" / "benchmarks"
+path_result.absolute().mkdir(parents=True, exist_ok=True)
+
+table_recognizer = TableRecognizer(config=get_config())
+image_reader = PdfImageReader(config=get_config())
+
+GENERATED_BENCHMARK = "on_generated_data"
+OURDATA_BENCHMARK = "on_our_data"
+TYPE_BENCHMARK = OURDATA_BENCHMARK
+
+
+def call_metric(pred_json: dict, true_json: dict, structure_only: bool = False, ignore_nodes: Optional[List] = None) -> dict:
+ teds = TEDS(structure_only=structure_only, ignore_nodes=ignore_nodes)
+ scores = teds.batch_evaluate(pred_json, true_json)
+ pp = pprint.PrettyPrinter()
+ pp.pprint(scores)
+
+ return scores
+
+
+def get_tables(image_path: Path) -> str:
+ document = image_reader.read(str(image_path))
+
+ for table in document.tables:
+ table.metadata.uid = "test_id"
+ table2id = {"test_id": 0}
+ html_tables = [table2html(table, table2id) for table in document.tables]
+
+ # TODO: while works with one table in an image
+ return html_tables[0]
+
+
+def make_predict_json(data_path: Path) -> dict:
+ predict_json = {}
+ for pathname in Path.iterdir(data_path):
+ print(pathname)
+
+ predict_json[pathname.name] = {"html": "" + get_tables(pathname) + ""}
+
+ return predict_json
+
+
+def download_dataset(data_dir: Path, name_zip: str, url: str) -> None:
+ if Path.exists(data_dir):
+ print(f"Use cached benchmark data from {data_dir}")
+ return
+
+ data_dir.mkdir(parents=True, exist_ok=True)
+ pdfs_zip_path = data_dir / name_zip
+ wget.download(url, str(data_dir))
+
+ with zipfile.ZipFile(pdfs_zip_path, 'r') as zip_ref:
+ zip_ref.extractall(data_dir)
+ pdfs_zip_path.unlink()
+
+ print(f"Benchmark data downloaded to {data_dir}")
+
+
+def prediction(path_pred: Path, path_images: Path) -> dict:
+ pred_json = make_predict_json(path_images)
+ with path_pred.open("w") as fd:
+ json.dump(pred_json, fd, indent=2, ensure_ascii=False)
+
+ return pred_json
+
+
+def benchmark_on_our_data() -> dict:
+ data_dir = Path(get_config()["intermediate_data_path"]) / "benchmark_table_data"
+ path_images = data_dir / "images"
+ path_gt = data_dir / "gt.json"
+ path_pred = data_dir / "pred.json"
+ download_dataset(data_dir,
+ name_zip="benchmark_table_data.zip",
+ url="https://at.ispras.ru/owncloud/index.php/s/Xaf4OyHj6xN2RHH/download")
+
+ mode_metric_structure_only = False
+
+ with open(path_gt, "r") as fp:
+ gt_json = json.load(fp)
+ '''
+ Creating base html (based on method predictions for future labeling)
+ path_images = data_dir / "images_tmp"
+ pred_json = prediction("gt_tmp.json", path_images)
+ '''
+ pred_json = prediction(path_pred, path_images)
+ scores = call_metric(pred_json=pred_json, true_json=gt_json, structure_only=mode_metric_structure_only)
+
+ result = dict()
+ result["mode_metric_structure_only"] = mode_metric_structure_only
+ result["mean"] = np.mean([score for score in scores.values()])
+ result["images"] = scores
+
+ return result
+
+
+def benchmark_on_generated_table() -> dict:
+ """
+ Generated data from https://github.com/hassan-mahmood/TIES_DataGeneration
+ Article generation information https://arxiv.org/pdf/1905.13391.pdf
+ Note: generate the 1st table tape category
+ Note: don't use header table tag , replacing on | tag
+ Note: all generated data (four categories) you can download from
+ TODO: some tables have a low quality. Should to trace the reason.
+ All generated data (all categories) we can download from https://at.ispras.ru/owncloud/index.php/s/cjpCIR7I0G4JzZU
+ """
+
+ data_dir = Path(get_config()["intermediate_data_path"]) / "visualizeimgs" / "category1"
+ path_images = data_dir / "img_500"
+ path_gt = data_dir / "html_500"
+ download_dataset(data_dir,
+ name_zip="benchmark_table_data_generated_500_tables_category_1.zip",
+ url="https://at.ispras.ru/owncloud/index.php/s/gItWxupnF2pve6B/download")
+ mode_metric_structure_only = True
+
+ # make common ground-truth file
+ common_gt_json = {}
+ for pathname in Path.iterdir(path_gt):
+ image_name = pathname.name.split(".")[0] + '.png'
+ with open(pathname, "r") as fp:
+ table_html = fp.read()
+ # exclude header tags
+ table_html = table_html.replace(" | ", "")
+
+ common_gt_json[image_name] = {"html": table_html}
+
+ file_common_gt = data_dir / "common_gt.json"
+ with file_common_gt.open("w") as fd:
+ json.dump(common_gt_json, fd, indent=2, ensure_ascii=False)
+
+ # calculate metrics
+ path_pred = data_dir / "pred.json"
+
+ pred_json = prediction(path_pred, path_images)
+ scores = call_metric(pred_json=pred_json, true_json=common_gt_json,
+ structure_only=mode_metric_structure_only,
+ ignore_nodes=['span', 'style', 'head', 'h4'])
+
+ result = dict()
+ result["mode_metric_structure_only"] = mode_metric_structure_only
+ result["mean"] = np.mean([score for score in scores.values()])
+ result["images"] = scores
+
+ return result
+
+
+if __name__ == "__main__":
+ result = benchmark_on_our_data() if TYPE_BENCHMARK == OURDATA_BENCHMARK else benchmark_on_generated_table()
+
+ # save benchmarks
+ file_result = path_result / f"table_benchmark_{TYPE_BENCHMARK}.json"
+ with file_result.open("w") as fd:
+ json.dump(result, fd, indent=2, ensure_ascii=False)
diff --git a/scripts/benchmark_table/metric.py b/scripts/benchmark_table/metric.py
new file mode 100644
index 00000000..ff84a4a7
--- /dev/null
+++ b/scripts/benchmark_table/metric.py
@@ -0,0 +1,161 @@
+# Copyright 2020 IBM
+# Author: peter.zhong@au1.ibm.com
+#
+# This is free software; you can redistribute it and/or modify
+# it under the terms of the Apache 2.0 License.
+#
+# This software is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# Apache 2.0 License for more details.
+
+# Source: https://github.com/ibm-aur-nlp/PubTabNet
+
+import distance
+from apted import APTED, Config
+from apted.helpers import Tree
+from lxml import etree, html
+from collections import deque
+
+from tqdm import tqdm
+
+
+class TableTree(Tree):
+ def __init__(self, tag, colspan=None, rowspan=None, content=None, visible=None, *children):
+ self.tag = tag
+ self.colspan = colspan
+ self.rowspan = rowspan
+ self.content = content
+ self.visible = visible
+ self.children = list(children)
+
+ def bracket(self):
+ """Show tree using brackets notation
+ """
+ if self.tag == "td" or self.tag == 'th':
+ result = f'"tag": {self.tag}, "colspan": {self.colspan}, "rowspan": {self.rowspan}, "text": {self.content}'
+ else:
+ result = f'"tag": {self.tag}'
+ for child in self.children:
+ result += child.bracket()
+ return "{{" + result + "}}"
+
+
+class CustomConfig(Config):
+ @staticmethod
+ def maximum(*sequences):
+ """Get maximum possible value
+ """
+ return max(map(len, sequences))
+
+ def normalized_distance(self, *sequences) -> float:
+ """Get distance from 0 to 1
+ """
+ return float(distance.levenshtein(*sequences)) / self.maximum(*sequences)
+
+ def rename(self, node1: TableTree, node2: TableTree) -> float:
+ """Compares attributes of trees"""
+ if (node1.tag != node2.tag) or (node1.colspan != node2.colspan) or (node1.rowspan != node2.rowspan):
+ return 1.
+ if node1.tag == "td":
+ if not node1.visible or not node2.visible:
+ return 0.
+ if node1.content or node2.content:
+ return self.normalized_distance(node1.content, node2.content)
+ return 0.
+
+
+class TEDS(object):
+ """ Tree Edit Distance based Similarity
+ """
+
+ def __init__(self, structure_only=False, n_jobs=1, ignore_nodes=None):
+ assert isinstance(n_jobs, int) and (n_jobs >= 1), "n_jobs must be an integer greather than 1"
+ self.structure_only = structure_only
+ self.n_jobs = n_jobs
+ self.ignore_nodes = ignore_nodes
+ self.__tokens__ = []
+
+ def tokenize(self, node):
+ """ Tokenizes table cells
+ """
+ self.__tokens__.append(f"<{node.tag}>")
+ if node.text is not None:
+ self.__tokens__ += list(node.text)
+ for n in node.getchildren():
+ self.tokenize(n)
+ if node.tag != "unk":
+ self.__tokens__.append(f"{node.tag}>")
+ if node.tag != "td" and node.tail is not None:
+ self.__tokens__ += list(node.tail)
+
+ def get_span(self, node, name_span: str) -> int:
+ value = int(node.attrib.get(name_span, "1"))
+ return 1 if value <= 0 else value
+
+ def load_html_tree(self, node, parent=None):
+ """ Converts HTML tree to the format required by apted
+ """
+ if node.tag == "td":
+ if self.structure_only:
+ cell = []
+ else:
+ self.__tokens__ = []
+ self.tokenize(node)
+ cell = self.__tokens__[1:-1].copy()
+
+ try:
+ new_node = TableTree(tag=node.tag,
+ colspan=self.get_span(node, "colspan"),
+ rowspan=self.get_span(node, "rowspan"),
+ content=cell,
+ visible=False if node.attrib.get("style") == "display: none" else True, *deque())
+ except Exception as ex:
+ print(f"Bad html file. HTML parse exception. Exception's msg: {ex}")
+ raise ex
+ else:
+ new_node = TableTree(node.tag, None, None, None, True, *deque())
+ if parent is not None:
+ parent.children.append(new_node)
+ if node.tag != "td":
+ for n in node.getchildren():
+ self.load_html_tree(n, new_node)
+ if parent is None:
+ return new_node
+
+ def evaluate(self, pred: str, true: str) -> float:
+ """ Computes TEDS score between the prediction and the ground truth of a given sample
+ """
+ if (not pred) or (not true):
+ return 0.0
+ parser = html.HTMLParser(remove_comments=True, encoding="utf-8")
+ pred = html.fromstring(pred, parser=parser)
+ true = html.fromstring(true, parser=parser)
+ if pred.xpath("body/table") and true.xpath("body/table"):
+ pred = pred.xpath("body/table")[0]
+ true = true.xpath("body/table")[0]
+ if self.ignore_nodes:
+ etree.strip_tags(pred, *self.ignore_nodes)
+ etree.strip_tags(true, *self.ignore_nodes)
+ n_nodes_pred = len(pred.xpath(".//*"))
+ n_nodes_true = len(true.xpath(".//*"))
+ n_nodes = max(n_nodes_pred, n_nodes_true)
+ tree_pred = self.load_html_tree(pred)
+ tree_true = self.load_html_tree(true)
+
+ distance = APTED(tree_pred, tree_true, CustomConfig()).compute_edit_distance()
+ return 1.0 - (float(distance) / n_nodes)
+ else:
+ return 0.0
+
+ def batch_evaluate(self, pred_json, true_json):
+ """ Computes TEDS score between the prediction and the ground truth of
+ a batch of samples
+ @params pred_json: {'FILENAME': 'HTML CODE', ...}
+ @params true_json: {'FILENAME': {'html': 'HTML CODE'}, ...}
+ @output: {'FILENAME': 'TEDS SCORE', ...}
+ """
+ samples = true_json.keys()
+ scores = [self.evaluate(pred_json.get(filename, "")["html"], true_json[filename]["html"]) for filename in tqdm(samples)]
+ scores = dict(zip(samples, scores))
+ return scores
diff --git a/scripts/benchmark_table/requirements.txt b/scripts/benchmark_table/requirements.txt
new file mode 100644
index 00000000..99314805
--- /dev/null
+++ b/scripts/benchmark_table/requirements.txt
@@ -0,0 +1,3 @@
+# for metric TEDS:
+apted==1.0.3
+distance==0.1.3
\ No newline at end of file
|