-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathmake_file_summary.py
186 lines (148 loc) · 5.88 KB
/
make_file_summary.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
#!/usr/bin/env python3
"""
Script that gathers counts of localizations and states (e.g. detections and tracks)
for each file in a particular section. Also gathers the last edit session duration for
each of the media.
"""
import argparse
import datetime
import logging
import urllib.parse
import sys
import traceback
import progressbar
import pandas as pd
import tator
logging.basicConfig(
filename='make_file_summary.log',
filemode='w',
format='%(asctime)s %(levelname)s:%(message)s',
datefmt='%m/%d/%Y %I:%M:%S %p',
level=logging.INFO)
logger = logging.getLogger(__name__)
def parse_args() -> argparse.Namespace:
""" Parse the provided arguments
Returns parsed arguments in a namespace object.
"""
parser = argparse.ArgumentParser(
description="Creates a file (media) summary for the provided project/section. Localization counts, state counts, and last session duration are recorded.")
parser = tator.get_parser(parser=parser)
parser.add_argument('--project', type=int, required=True, help='Unique project id')
parser.add_argument('--section', type=str, required=True, help='Name of section to process')
args = parser.parse_args()
logger.info(args)
return args
def process_section(
host: str,
token: str,
project_id: int,
section_name: str) -> str:
""" Processes media in section and creates a summary .csv file
Collects counts of localization types and state types.
Also gather last session durations.
Args:
host (str): Tator url server
token (str): User access token to tator server
project_id (int): Unique identifier of section's project
section_name (str): Section name to process
Returns:
Returns the filename of the report created
Postconditions:
section_name.csv created with summary information
"""
# Get the interface to Tator
tator_api = tator.get_api(host=host, token=token)
# Get the localization types associated with the project and gather
# the names of the visible ones
localization_types = tator_api.get_localization_type_list(project=project_id)
localization_type_names = []
localization_type_id_name_map = {}
for loc_type in localization_types:
name = loc_type.name + " (counts)"
localization_type_names.append(name)
localization_type_id_name_map[loc_type.id] = name
# Get the state names (e.g. track type names) associated with the project
state_types = tator_api.get_state_type_list(project=project_id)
state_type_names = []
state_type_id_name_map = {}
for state_type in state_types:
name = state_type.name + " (counts)"
state_type_names.append(name)
state_type_id_name_map[state_type.id] = name
# Grab all the media in this section
attribute_filter = [f'tator_user_sections::{section_name}']
medias = tator_api.get_media_list(project=project_id, attribute_contains=attribute_filter)
# Set the column names
column_names = [
'Section',
'Media ID',
'Media',
'URL',
'Last Session Duration',
]
column_names.extend(localization_type_names)
column_names.extend(state_type_names)
# Loop over each of the media, extract the appropriate data, and save it for later processing
report_data = []
progress = progressbar.ProgressBar()
for media in progress(medias):
# User may or may not have edited the media. If not, the datetime entries will be None.
# Otherwise, these values will be DateTime objects
if media.last_edit_end == None:
last_session_duration = "N/A"
else:
last_session_duration = str(media.last_edit_end - media.last_edit_start)
# Cycle through the localization types and gather the localizations associated with
# the current type. Then count it.
localization_type_counts = {}
for type_id in localization_type_id_name_map:
localizations = tator_api.get_localization_list(
project=project_id,
media_id=[media.id],
type=type_id)
loc_name = localization_type_id_name_map[type_id]
localization_type_counts[loc_name] = len(localizations)
# Cycle through the state types and gather the states associated with the
# current type. Then count it.
state_type_counts = {}
for type_id in state_type_id_name_map:
states = tator_api.get_state_list(
project=project_id,
media_id=[media.id],
type=type_id)
state_name = state_type_id_name_map[type_id]
state_type_counts[state_name] = len(states)
# Gather all the data into a dictionary to be later converted
media_data = {
'Section': section_name,
'Media ID': media.id,
'Media': media.name,
'URL': urllib.parse.urljoin(host, f"{project_id}/annotation/{media.id}"),
"Last Session Duration": last_session_duration,
**localization_type_counts,
**state_type_counts,
}
report_data.append(media_data)
# Create the summary report
output_name = f"{section_name}.csv"
df = pd.DataFrame(data=report_data, columns=column_names)
df.to_csv(output_name, index=False)
return output_name
def main():
""" Main routine of this script
"""
args = parse_args()
try:
report_filename = process_section(
host=args.host,
token=args.token,
project_id=args.project,
section_name=args.section)
print(f"Created {report_filename}")
except Exception:
print(f"[ERROR] Problem occurred. Review the .log file.")
error_msg = traceback.format_exc()
logging.error(error_msg)
print(f"[FINISHED] make_file_summary.py ")
if __name__ == "__main__":
main()