-
Notifications
You must be signed in to change notification settings - Fork 0
/
calc_taildirection3.py
367 lines (286 loc) · 12.4 KB
/
calc_taildirection3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
import numpy as np
from scipy import stats
from scipy import signal
from scipy.interpolate import UnivariateSpline
from PIL import Image
import cv2
from tqdm import tqdm
import pandas as pd
from pathlib import Path
import math
import settings
DATASET_DIR = 'data/dataset'
OUTPUT_DIR = 'data/scores'
CALC_NAME = Path(__file__).stem.lstrip('calc_')
DATASET_NAME = settings.DATASET_NAME
manga109_parser = settings.manga109_parser
LENGTH_T = 0.05 # 伸ばす距離の比率
TOP_DEG = 30 # 広げる角度
def main():
books = manga109_parser.books
pbar = tqdm(total=len(books))
for i, book in enumerate(books):
annotation, dataset, characters_target = init(i, book)
scores_output_df = pd.DataFrame(0, index=dataset['annotation_id'].values, columns=characters_target)
scores_df = pd.DataFrame(columns=characters_target, )
w = np.linalg.norm([annotation['page'][0]['@width'], annotation['page'][0]['@height']]) * LENGTH_T
for page in annotation['page']:
texts = page['text']
bodys = page['body']
faces = page['face']
frames = page['frame']
img = Image.open(manga109_parser.img_path(book=book, index=page['@index']))
img = np.array(img)
gray = conv_gray(img)
thresh = conv_thresh(gray)
labels = get_labels(thresh)
# img_copy = img.copy()
for text in texts:
if text['@id'] not in dataset['annotation_id'].values:
continue
# ここから計算スタート
try:
contours = get_balloon_contours(labels, text)
except:
print('err')
continue
if is_balloon(contours, text) is False:
continue
try:
curve = curvature_splines(contours[:, 0], contours[:, 1])
if curve.mean() > 0:
curve = curve * -1
except:
continue
target_frame = whitemap_target_frame(text, frames, thresh.shape, thickness=15)
target_bool = target_frame[contours[:, 1], contours[:, 0]] == 0
contours_target = contours[target_bool]
curve_target = curve[target_bool]
try:
tail_index = calc_tail_index(curve_target)
except:
continue
if tail_index is None:
continue
left_index, right_index = calc_vertex_index(curve_target, tail_index)
if left_index is None:
continue
vertex_top = contours_target[tail_index]
vertex_left = contours_target[left_index]
vertex_right = contours_target[right_index]
line_center = np.array([(vertex_left[0] + vertex_right[0]) / 2, (vertex_left[1] + vertex_right[1]) / 2])
if is_tail2(vertex_top, vertex_left, vertex_right, curve_target) is False:
continue
endpoint = calc_direction(vertex_top, line_center, w)
""" ここから追加分 """
line_size = np.linalg.norm(endpoint - vertex_top)
vector = (endpoint - vertex_top) / line_size
vertical = np.array([vector[1], vector[0] * -1])
vertical_size = line_size * math.tan(math.radians(TOP_DEG / 2))
vertex_2 = endpoint + (vertical * vertical_size)
vertex_3 = endpoint + (vertical * vertical_size * -1)
# cv2.ellipse(img_copy, tuple(vertex_top), (8, 8), 0, 0, 360, (255, 0, 0), thickness=2)
# cv2.line(img_copy, tuple(vertex_top), tuple(endpoint.astype('int')), (0, 255, 0), thickness=2)
# 顔および体から当たり判定を全探索
speaker_charas = set()
for rois in [bodys, faces]:
for roi in rois:
for points in [[vertex_top, vertex_2], [vertex_2, vertex_3], [vertex_3, vertex_top]]:
p1 = points[0]
p2 = points[1]
if is_in_bounding(p1, p2, roi):
speaker_charas.add(roi['@character'])
speaker_charas = speaker_charas & set(characters_target)
score = 1.0 / len(speaker_charas) if len(speaker_charas) != 0 else 0
scores_se = pd.Series(score, index=speaker_charas, name=text['@id'])
scores_df = scores_df.append(scores_se)
# print(speaker_charas)
pbar.set_postfix(page=f'{page["@index"] + 1}/{len(annotation["page"])}')
# Image.fromarray(np.uint8(img_copy)).save(f'notebooks/tmp_img/is_tail/{book}_{page["@index"]:03}.jpg')
scores_df = scores_df.fillna(0.0)
scores_output_df = scores_output_df.add(scores_df, fill_value=0)
with open(f'{OUTPUT_DIR}/{i+1:03}_{book}/{CALC_NAME}_{DATASET_NAME}.csv', 'w') as f:
scores_output_df.to_csv(f)
pbar.update(1)
def init(i, book):
annotation = manga109_parser.get_annotation(book)
dataset_path = Path(f'{DATASET_DIR}/{i+1:03}_{book}.csv')
with open(dataset_path, 'r') as f:
dataset = pd.read_csv(f)
with open(f'{DATASET_DIR}/{i+1:03}_{book}_character.txt', 'r') as f:
characters_target = f.read()
characters_target = characters_target.split('\n')
return annotation, dataset, characters_target
def conv_gray(img):
return cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
def conv_thresh(gray):
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) # 大津の二値化
return thresh
def get_labels(thresh):
_, labels = cv2.connectedComponents(thresh)
return labels
# テキストが所属するフレームを返す
def whitemap_target_frame(text, frames, img_shape, thickness=10):
frame_img = np.zeros(img_shape, dtype='uint8')
inframe = list(filter(lambda frame: is_inframe(frame, text), frames))
if len(inframe) == 0:
return frame_img
for f in inframe:
cv2.rectangle(frame_img, (f['@xmin'], f['@ymin']), (f['@xmax'], f['@ymax']), 255, thickness=thickness)
return frame_img
# frameの矩形とROI(body・face・text)の矩形の当たり判定
def is_inframe(frame, roi):
x, y = calc_box_center(roi)
if x >= frame['@xmin'] and x <= frame['@xmax']:
if y >= frame['@ymin'] and y <= frame['@ymax']:
return True
return False
# 矩形の中心を求める
def calc_box_center(roi):
x_center = (roi['@xmin'] + roi['@xmax']) / 2
y_center = (roi['@ymin'] + roi['@ymax']) / 2
return x_center, y_center
def get_balloon_contours(labels, text):
# 座標をもとに対象領域のラベル情報を取得
xmin, xmax, ymin, ymax = text['@xmin'], text['@xmax'], text['@ymin'], text['@ymax']
labels_target = labels[ymin:ymax, xmin:xmax]
# ラベルの最頻値
balloon_label, _ = stats.mode(labels_target[labels_target != 0])
if len(balloon_label) == 0:
return np.empty((0, 0))
balloon_label = balloon_label[0]
# 吹き出しのみの二値画像
balloon_img = np.zeros(labels.shape, dtype='uint8')
balloon_img[labels == balloon_label] = 255
contours, _ = cv2.findContours(balloon_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
contours = contours[0].squeeze()
if contours.ndim != 2:
return np.empty((0, 0))
return contours
# 曲率計算
def curvature_splines(x, y=None, error=0.1):
"""Calculate the signed curvature of a 2D curve at each point
using interpolating splines.
Parameters
----------
x,y: numpy.array(dtype=float) shape (n_points, )
or
y=None and
x is a numpy.array(dtype=complex) shape (n_points, )
In the second case the curve is represented as a np.array
of complex numbers.
error : float
The admisible error when interpolating the splines
Returns
-------
curvature: numpy.array shape (n_points, )
Note: This is 2-3x slower (1.8 ms for 2000 points) than `curvature_gradient`
but more accurate, especially at the borders.
"""
# handle list of complex case
if y is None:
x, y = x.real, x.imag
t = np.arange(x.shape[0])
std = error * np.ones_like(x)
fx = UnivariateSpline(t, x, k=4, w=1 / np.sqrt(std))
fy = UnivariateSpline(t, y, k=4, w=1 / np.sqrt(std))
xˈ = fx.derivative(1)(t)
xˈˈ = fx.derivative(2)(t)
yˈ = fy.derivative(1)(t)
yˈˈ = fy.derivative(2)(t)
curvature = (xˈ * yˈˈ - yˈ * xˈˈ) / np.power(xˈ ** 2 + yˈ ** 2, 3 / 2)
return curvature
def is_balloon(contours, text):
# そもそも空のとき
if len(contours) == 0:
return False
# ベーシックな情報の抽出
xmin, xmax, ymin, ymax = text['@xmin'], text['@xmax'], text['@ymin'], text['@ymax']
bx, by = contours[:, 0], contours[:, 1]
b_xmin, b_xmax, b_ymin, b_ymax = bx.min(), bx.max(), by.min(), by.max()
text_w, text_h = xmax - xmin, ymax - ymin
balloon_w, balloon_h = b_xmax - b_xmin, b_ymax - b_ymin
# 輪郭線が長すぎたら吹き出しとみなさない
if len(contours) > (text_w * 2 + text_h * 2) * 4:
return False
return True
# 曲率をもとに,tailになるindexを取得する
def calc_tail_index(curve):
order = int(len(curve) / 10)
minid = signal.argrelmin(curve, order=order)
curve_mins = curve[minid]
if is_tail(curve_mins) is False:
return None
tail_index = minid[0][curve_mins.argmin()]
return tail_index
# 局所最小値を入れてしっぽか判断
def is_tail(curve_mins):
if len(curve_mins) == 0:
return False
curve_minimam = curve_mins.min() # 頂点候補
curve_mins_other = curve_mins[np.where(curve_mins != curve_minimam)]
# if curve_minimam < 0.05:
# return False
# 対立候補がなく,一箇所だけ最小値を取ってればしっぽとする
if len(curve_mins_other) == 0:
return True
curve_mins_mean = curve_mins_other.mean()
per = np.abs(curve_mins_mean) / np.abs(curve_minimam)
# 頂点の候補が多すぎず,他の頂点の平均値が最小値の半分未満だったとき
if len(curve_mins_other) < 5 and per < 0.5:
return True
return False
def is_tail2(vertex_top, vertex_left, vertex_right, curve):
u = vertex_left - vertex_top
v = vertex_right - vertex_top
x = np.inner(u, v)
s = np.linalg.norm(u)
t = np.linalg.norm(v)
cos = x/(s*t)
deg = np.rad2deg(np.arccos(np.clip(cos, -1.0, 1.0)))
if deg > 125:
return False
if s > len(curve) * 0.2 or t > len(curve) * 0.2:
return False
return True
# 線分の端を求める
def calc_direction(vertex, center, w):
vector = (vertex - center) / np.linalg.norm(vertex - center)
move = w * vector
endpoint = vertex + move
return endpoint
# 根本の座標を求める
def calc_vertex_index(curve, tail_index):
curve_diff = np.diff(curve)
curve_diff_pos = np.where(curve_diff > 0)[0]
curve_diff_neg = np.where(curve_diff < 0)[0]
try:
left_vertex_id = curve_diff_pos[curve_diff_pos < tail_index][-1] + 1
right_vertex_id = curve_diff_neg[curve_diff_neg > tail_index][0]
except:
return None, None
return left_vertex_id, right_vertex_id
# 線分交差判定
def intersect(p1, p2, p3, p4):
tc1 = (p1[0] - p2[0]) * (p3[1] - p1[1]) + (p1[1] - p2[1]) * (p1[0] - p3[0])
tc2 = (p1[0] - p2[0]) * (p4[1] - p1[1]) + (p1[1] - p2[1]) * (p1[0] - p4[0])
td1 = (p3[0] - p4[0]) * (p1[1] - p3[1]) + (p3[1] - p4[1]) * (p3[0] - p1[0])
td2 = (p3[0] - p4[0]) * (p2[1] - p3[1]) + (p3[1] - p4[1]) * (p3[0] - p2[0])
return tc1 * tc2 < 0 and td1 * td2 < 0
# 矩形と線分の交差判定
def is_in_bounding(vertex, endpoint, b):
lt = [b['@xmin'], b['@ymin']]
rt = [b['@xmax'], b['@ymin']]
rb = [b['@xmax'], b['@ymax']]
lb = [b['@xmin'], b['@ymax']]
if intersect(vertex, endpoint, lt, rt):
return True
if intersect(vertex, endpoint, rt, rb):
return True
if intersect(vertex, endpoint, rb, lb):
return True
if intersect(vertex, endpoint, lb, lt):
return True
return False
if __name__ == '__main__':
main()