-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathgenerate_region_embeddings.py
99 lines (70 loc) · 2.92 KB
/
generate_region_embeddings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
from PIL import Image
from HIPT_4K.hipt_4k import HIPT_4K
from HIPT_4K.hipt_model_utils import eval_transforms
import torch
import sys
import numpy as np
import argparse
import os
import glob
import random
# function for generating embedding for a 4k patch
def generate_embedding(region, model):
"""generate_embedding
Args:
region (PIL.Image): image to run inference on
model (HIPT_4K): HIPT model
Returns:
numpy.ndarray: embedding vector for the image
"""
x = eval_transforms()(region).unsqueeze(dim=0)
out = model.forward(x)
embedding = out.cpu().numpy()
return embedding
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--patch_dir', type=str, default=None, help="path to where patches are")
parser.add_argument("--vit256", type=str, default='HIPT_4K/Checkpoints/vit256_small_dino.pth', help="path to checkpoint")
parser.add_argument("--vit4k", type=str, default='HIPT_4K/Checkpoints/vit4k_xs_dino.pth', help="path to checkpoint")
parser.add_argument('--feats_dir', type=str, default=None, help='Path to where embeddings are stored')
args = parser.parse_args()
# get the list of bags
bag_list = glob.glob( os.path.join(args.patch_dir, '*') )
print("total WSIs to be processed:", len(bag_list))
# model init
model = HIPT_4K(model256_path=args.vit256, model4k_path=args.vit4k)
model.eval()
random.shuffle(bag_list)
if not os.path.exists(args.feats_dir):
os.makedirs(args.feats_dir, exist_ok=True)
for bag in bag_list:
# since the embeddings folder is in the same folder as the patches,
# we should ignore the embeddings folder
if bag == args.feats_dir:
continue
if not os.path.exists( os.path.join(args.feats_dir, bag.split("/")[-1] + ".pt") ):
# get the list of patches for this bag
regions = glob.glob( os.path.join(bag, "*") )
# init a matrix that holds embeddings for all patches
patch_array = np.zeros((len(regions), 192))
print("processing", bag, "num regions:", len(regions))
for i in range(len(regions)):
try:
# try to open patch
region = Image.open(regions[i]).convert('RGB')
except Exception as e:
# log
print("region could not be loaded:", regions[i])
print(e)
continue
# generate the embedding
embedding = generate_embedding(region, model)
# store
patch_array[i] = embedding
# save embeddings as a pt file
torch.save(torch.from_numpy(patch_array), os.path.join(args.feats_dir, bag.split("/")[-1]+".pt"))
else:
print("skipping, features already generated")
continue
if __name__ == '__main__':
main()