-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathupload_media_ifcb.py
executable file
·92 lines (71 loc) · 3.32 KB
/
upload_media_ifcb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#! /usr/bin/env python
import argparse
import os
from pprint import pprint
import io
import base64
import urllib.request, json
from time import perf_counter as tictoc
import pandas as pd
import tator
import api_util
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument('CSV', help='csv with ifcb "pid,class,score" columns')
parser.add_argument('--dashboard', default='https://ifcb-data.whoi.edu', help='ifcb dashboard url to pull bin images from')
#parser.add_argument('--dataset', default='mvco', help='dataset the ifcb csv content belongs to')
tator_args = parser.add_argument_group(title='Tator Parameters', description=None)
tator_args.add_argument('--host', default='https://tator.whoi.edu', help='Default is "https://tator.whoi.edu"')
tator_args.add_argument('--token', required=True, help='Tator user-access token (required)')
tator_args.add_argument('--media_type', required=True, help='Name or ID of MediaType (required). If name, PROJECT must also be specified')
tator_args.add_argument('--project', help='Name or ID of project. Only required when MEDIA_TYPE given is a Name') # isiis
args = parser.parse_args()
return args
if __name__ == '__main__':
download_time = 0
todisk_time = 0
upload_time = 0
# 0) inputs: CSV, ifcb dashboard params, tator_configs
args = get_args()
api = tator.get_api(args.host, args.token)
args.media_type_id = api_util.get_mediatype(api,args.media_type,project=args.project)
# 1) ingest csv
df = pd.read_csv(args.CSV)
#df['url'] = df.pid.apply(lambda pid:f'{args.dashboard}/{args.dataset}/{pid}.png')
new = df.pid.str.rsplit("_", n=1, expand=True)
df['bin'],df['roi_num'] = new[0],new[1]
df['url'] = df[['bin','roi_num']].apply(lambda row: f'{args.dashboard}/api/image_data/{row.bin}/{row.roi_num}', axis=1)
start_time = tictoc()
for idx,row in df.iterrows():
print(idx, row.bin, row.roi_num, end=' ... ', flush=True)
# 2) download image locally
local_fname = f'tator_transcode_workspace/{row.pid}.png'
tic = tictoc()
with urllib.request.urlopen(row.url) as url:
content = json.load(url)
img_data = content['data']
download_time += tictoc()-tic
# 3) save img to disk
tic = tictoc()
with open(local_fname, "wb") as f:
f.write(base64.b64decode(img_data))
todisk_time += tictoc()-tic
# 4) create attributes dict
attribs = {'pid':row['pid'],
'bin':row['bin'],
'Class':row['class'],
'ModelScore':row['score'],
}
# 5) upload to tator
tic = tictoc()
for progress, response in tator.util.upload_media(api, args.media_type_id, path=local_fname, attributes=attribs):
pass
#print(f"{idx} - Upload progress for {row.pid}: {progress}%")
upload_time += tictoc()-tic
print(response.message)
# 6) remove downloaded image
os.remove(local_fname)
total_time = tictoc()-start_time
print(f'Download: {download_time}s ({download_time/total_time:.1%})')
print(f'ToDisk: {todisk_time}s ({todisk_time/total_time:.1%})')
print(f'Upload: {upload_time}s ({upload_time/total_time:.1%})')