-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprocess_kather.py
88 lines (62 loc) · 2.65 KB
/
process_kather.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# Credit: This code is modified from the original code {https://github.com/PathologyFoundation/plip/blob/main/reproducibility/generate_validation_datasets}
# =============================================================================
import pandas as pd
import sys, os, platform, copy, shutil
opj = os.path.join
import numpy as np
from tqdm import tqdm
from PIL import Image, ImageFile
import shutil
from functools import partial
import warnings
warnings.filterwarnings("ignore")
import multiprocess as mp
ImageFile.LOAD_TRUNCATED_IMAGES = True
seed=1
import random
random.seed(seed)
def process_images_in_parallel(image_paths, num_workers=4):
# Create a pool of workers
pool = mp.Pool(num_workers)
# Use partial to pass the output size to the resize function
resizeimg_func = partial(resizeimg)
# Map the resize function to the list of image paths
pool.map(resizeimg_func, image_paths)
# Close the pool and wait for all workers to finish
pool.close()
pool.join()
def resizeimg(fp):
pbar.update(num_cpus)
newsize = 224
img = Image.open(fp)
filename = os.path.basename(fp)
if img.size[0] != img.size[1]:
width, height = img.size
min_dimension = min(width, height) # Determine the smallest dimension
scale_factor = newsize / min_dimension # Calculate the scale factor needed to make the smallest dimension 224
# Calculate the new size of the image
new_width = int(width * scale_factor)
new_height = int(height * scale_factor)
img = img.resize((new_width, new_height)) # Resize the image using the calculated size
# center crop
left = (width - newsize) / 2 # Calculate the coordinates to crop the center of the image
top = (height - newsize) / 2
right = left + newsize
bottom = top + newsize
img_resize = img.crop((left, top, right, bottom)) # Crop the image using the calculated coordinates
else:
img_resize = img.resize((newsize, newsize))
img_resize.save(fp)
if __name__ == '__main__':
cwd = os.getcwd()
assert cwd.endswith('kather'), f"Please make sure this script is in main 'kather' dataset directory and run it from the 'kather' directory. Current working directory is: {cwd}"
paths = []
for root, dirs, files in os.walk(opj(cwd,'images')):
for file in files:
if file.endswith('.tif'):
paths.append(opj(root, file))
num_cpus = mp.cpu_count()//2
pbar = tqdm(total=int(len(paths)))
pbar.set_description('Resizing images')
process_images_in_parallel(paths, num_workers=num_cpus)
print('Finished processing.')