-
Notifications
You must be signed in to change notification settings - Fork 338
/
Copy pathlayout_parsing.py
158 lines (134 loc) · 4.91 KB
/
layout_parsing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import os
import sys
import ailia
import cv2
from numpy import fix
import json
sys.path.append("../../util")
from logging import getLogger # noqa: E402
from arg_utils import get_base_parser, update_parser
from detector_utils import plot_results, reverse_letterbox
from model_utils import check_and_download_models # noqa: E402
from layout_parsing_utils import pdf_to_images, preprocess
from yolox import YOLOX_LABEL_MAP
REMOTE_PATH = "https://storage.googleapis.com/ailia-models/unstructured-inference/"
IMAGE_PATH = "input.jpg"
PDF_PATH = "sample.pdf"
SAVE_IMAGE_PATH = ""
WEIGHT_PATH = "layout_parsing_yolox.onnx"
MODEL_PATH = WEIGHT_PATH + ".prototxt"
INPUT_SHAPE = (1024, 768)
SCORE_THR = 0.25
NMS_THR = 0.1
logger = getLogger(__name__)
parser = get_base_parser('Layout parsing', IMAGE_PATH, SAVE_IMAGE_PATH)
parser.add_argument(
"-i", "--input",
type=str,
help="input file name",
default=IMAGE_PATH,
)
parser.add_argument(
"-d", "--dpi", default=200, type=int, help="dpi"
)
parser.add_argument(
'-th', '--threshold',
default=SCORE_THR, type=float,
help='The detection threshold for yolo. (default: '+str(SCORE_THR)+')'
)
parser.add_argument(
'-iou', '--iou',
default=NMS_THR, type=float,
help='The detection iou for yolo. (default: '+str(NMS_THR)+')'
)
parser.add_argument(
"-fp", "--from_pdf",
action="store_true",
help="set this option when target file is pdf",
)
parser.add_argument(
'-w', '--write_json',
action='store_true',
help='Flag to output results to json file.'
)
args = update_parser(parser)
def infer_from_pdf(detector: ailia.Detector):
image_names = pdf_to_images(args.input[0], dpi=args.dpi, output_folder=args.savepath)
for image_name in image_names:
img_orig = cv2.imread(image_name)
img_processed, ratio = preprocess(img_orig, INPUT_SHAPE)
def compute():
detector.compute(img_orig, args.threshold, args.iou)
return None
# inference
logger.info('Start inference...')
if args.benchmark:
logger.info('BENCHMARK mode')
total_time = 0
for i in range(args.benchmark_count):
start = int(round(time.time() * 1000))
output = compute()
end = int(round(time.time() * 1000))
if i != 0:
total_time = total_time + (end - start)
logger.info(f'\tailia processing time {end - start} ms')
logger.info(f'\taverage time {total_time / (args.benchmark_count-1)} ms')
else:
output = compute()
res_img = plot_results(detector, img_orig, YOLOX_LABEL_MAP)
detect_object = detector
cv2.imwrite(image_name.replace(".ppm", "_parsed.jpg"), res_img)
def infer_from_image(detector: ailia.Detector):
image_filename = args.input[0]
img_orig = cv2.imread(image_filename)
img_processed, ratio = preprocess(img_orig, INPUT_SHAPE)
def compute():
detector.compute(img_orig, args.threshold, args.iou)
return None
logger.info("Start: inference...")
if args.benchmark:
logger.info("BENCHMARK mode")
total_time = 0
for i in range(args.benchmark_count):
start = int(rount(time.time() * 1000))
output = compute()
end = int(rount(time.time() * 1000))
if i != 0:
total_time = total_time + (end - start)
logger.info(f"\tailia processing time {end - start} ms")
logger.info(f"\taverage time {total_time / (args.benchmark_count - 1)} ms")
else:
output = compute()
res_img = plot_results(detector, img_orig, YOLOX_LABEL_MAP)
extension = os.path.splitext(image_filename)[1]
output_filename = image_filename.replace(extension, "_parsed.jpg")
cv2.imwrite(output_filename, res_img)
if args.write_json:
json_filename = image_filename.replace(extension, "_parsed.json")
out_list = []
for idx in range(detector.get_object_count()):
d = detector.get_object(idx)
out_list.append({
"category": d.category, "category_name": YOLOX_LABEL_MAP[d.category],
"prob": float(d.prob),
"x": float(d.x), "y": float(d.y), "w": float(d.w), "h": float(d.h)
})
with open(json_filename, "w", encoding="utf-8") as f:
json.dump(out_list, f, indent=2)
if __name__ == "__main__":
check_and_download_models(WEIGHT_PATH, MODEL_PATH, REMOTE_PATH)
env_id = args.env_id
detector = ailia.Detector(
MODEL_PATH,
WEIGHT_PATH,
len(YOLOX_LABEL_MAP),
format=ailia.NETWORK_IMAGE_FORMAT_BGR,
channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
range=ailia.NETWORK_IMAGE_RANGE_U_INT8,
algorithm=ailia.DETECTOR_ALGORITHM_YOLOX,
env_id=env_id,
)
if args.from_pdf:
infer_from_pdf(detector)
else:
infer_from_image(detector)