-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimage_utils.py
175 lines (144 loc) · 5.4 KB
/
image_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
import base64
import hashlib
import os
import uuid
import warnings
from io import BytesIO
from pathlib import Path
import requests
from PIL import Image, ImageSequence, PngImagePlugin, ImageOps
# Modified from: https://github.com/gradio-app/gradio/blob/main/gradio/processing_utils.py
CACHE_DIR = '/tmp'
HASH_SEED_PATH = f'{CACHE_DIR}/hash_seed.txt'
def get_pil_exif_bytes(pil_image):
if "exif" in pil_image.info:
return pil_image.info["exif"]
def get_pil_metadata(pil_image):
# Copy any text-only metadata
metadata = PngImagePlugin.PngInfo()
for key, value in pil_image.info.items():
if isinstance(key, str) and isinstance(value, str):
metadata.add_text(key, value)
return metadata
def encode_pil_to_bytes(pil_image, format="png"):
with BytesIO() as output_bytes:
if format.lower() == "gif":
frames = [frame.copy() for frame in ImageSequence.Iterator(pil_image)]
frames[0].save(
output_bytes,
format=format,
save_all=True,
append_images=frames[1:],
loop=0,
)
else:
if format.lower() == "png":
params = {"pnginfo": get_pil_metadata(pil_image)}
else:
exif = get_pil_exif_bytes(pil_image)
params = {"exif": exif} if exif else {}
pil_image.save(output_bytes, format, **params)
return output_bytes.getvalue()
def get_hash_seed() -> str:
try:
if os.path.exists(HASH_SEED_PATH):
with open(HASH_SEED_PATH) as j:
return j.read().strip()
else:
with open(HASH_SEED_PATH, "w") as j:
seed = uuid.uuid4().hex
j.write(seed)
return seed
except Exception:
return uuid.uuid4().hex
hash_seed = get_hash_seed().encode("utf-8")
def hash_bytes(bytes: bytes):
sha = hashlib.sha256()
sha.update(hash_seed)
sha.update(bytes)
return sha.hexdigest()
def save_pil_to_cache(
img: Image.Image,
cache_dir: str,
name: str = "image",
format: str = "webp",
) -> str:
bytes_data = encode_pil_to_bytes(img, format)
temp_dir = Path(cache_dir) / hash_bytes(bytes_data)
temp_dir.mkdir(exist_ok=True, parents=True)
filename = str((temp_dir / f"{name}.{format}").resolve())
(temp_dir / f"{name}.{format}").resolve().write_bytes(bytes_data)
return filename
# 对分辨率较大的图片进行压缩,提升识别效果
# 仅对部分图片有效,对那种图片很大,但是要识别的内容在图片中占比很小的情况会起负面效果
def resize_image(input_image):
width, height = input_image.size
max_size = 499
if width > max_size and height > max_size:
if width > height:
new_width = max_size
new_height = int(max_size * height / width)
else:
new_height = max_size
new_width = int(max_size * width / height)
resized_image = input_image.resize((new_width, new_height), Image.LANCZOS)
return resized_image
return input_image
def format_image(
image_path: str,
cache_dir: str = CACHE_DIR,
resize: bool = True,
) -> Image.Image | str | None:
if image_path.startswith("http://") or image_path.startswith("https://"):
image_obj = Image.open(requests.get(image_path, stream=True).raw)
elif image_path.startswith("file://"):
image_obj = Image.open(image_path[7:])
elif image_path.startswith("data:image"):
if "base64," in image_path:
_, base64_data = image_path.split("base64,", 1)
data = base64.b64decode(base64_data)
image_obj = Image.open(BytesIO(data))
else:
image_obj = Image.open(image_path)
if image_obj is None:
raise ValueError(
f"Unrecognized image input, support local path, http url, base64 and PIL.Image, got {image_path}")
image_name_arr = os.path.basename(image_path).split(".")
image_name = image_name_arr[0]
image_ext = image_name_arr[1]
# 把 png 转成 jpg 提升识别效果
if "png" == image_ext:
rgb_img = image_obj.convert("RGB")
image_path = f"{CACHE_DIR}/{image_name}.jpg"
rgb_img.save(image_path)
image_ext = "jpg"
image_obj = Image.open(f"{CACHE_DIR}/{image_name}.jpg")
if resize:
image_obj = resize_image(image_obj)
exif = image_obj.getexif()
# 274 is the code for image rotation and 1 means "correct orientation"
if exif.get(274, 1) != 1 and hasattr(ImageOps, "exif_transpose"):
try:
image_obj = ImageOps.exif_transpose(image_obj)
except Exception:
warnings.warn(
f"Failed to transpose image {image_path} based on EXIF data."
)
if image_ext in ["jpg", "jpeg"]:
image_ext = "jpeg"
try:
path = save_pil_to_cache(
image_obj, cache_dir=cache_dir, name=image_name, format=image_ext
)
# Catch error if format is not supported by PIL
except (KeyError, ValueError):
path = save_pil_to_cache(
image_obj,
cache_dir=cache_dir,
name=image_name,
format="png", # type: ignore
)
return path
def convert_image_to_base64(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')