-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrunconfig_parser.py
79 lines (60 loc) · 2.36 KB
/
runconfig_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
"""
Parses metadata and sbx2hf arguments to one Configuration class.
"""
import logging
import requests
from urllib.parse import urlsplit
def _fetch_metadata(api_endpoint : str, resource_name):
metadata_query = f"{api_endpoint}?resource={resource_name}"
logging.info(f"Fetching metadata from {metadata_query}")
try:
resp = requests.get(metadata_query)
resp.raise_for_status()
except requests.exceptions.HTTPError as err:
logging.info("Fatal error: could not fetch metadata from metadata api")
raise SystemExit(err)
metadata = resp.json()
return metadata
def create_runconfig(sbx2hf_args : dict):
if sbx2hf_args.get('url'):
resource_name = sbx2hf_args['url'].strip('/').split('/')[-1]
metadata = _fetch_metadata(sbx2hf_args['sbx_metadata_api'], resource_name)
return ConfigFromURL(sbx2hf_args, metadata)
else:
metadata = sbx2hf_args.get('metadata', {})
return ConfigFromPaths(sbx2hf_args, metadata)
class Config():
def __init__(self, sbx2hf_args : dict, metadata : dict):
self.sbx2hf_args = sbx2hf_args
self.metadata = metadata
@property
def resource_name(self):
return self.metadata.get('id', 'unnamed_resource')
@property
def output_folder(self):
if self.sbx2hf_args['hf_output_folder']:
return self.sbx2hf_args['hf_output_folder']
return self.resource_name
class ConfigFromPaths(Config):
@property
def datapaths(self):
if self.path is not list:
raise ValueError()
return self.path
class ConfigFromURL(Config):
@property
def bz2link(self):
split_path = urlsplit(self.sbx2hf_args['url'])
return f'{split_path.scheme}://{split_path.netloc}/lb/resurser/meningsmangder/{self.resource_name}.xml.bz2'
@property
def bz2_local_path(self):
return f'{self.output_folder}/{self.resource_name}.xml.bz2'
def download_file(self, to : str = None):
to = to if to else self.bz2_local_path
logging.info(f"Downloading file {self.bz2link} file to {to}")
with requests.get(self.bz2link, stream=True) as r:
r.raise_for_status()
with open(self.bz2_local_path, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
return self.bz2_local_path