forked from mamurak/object-detection-rest
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdata_ingestion.py
48 lines (36 loc) · 1.34 KB
/
data_ingestion.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from os import environ, path
from boto3 import resource
def ingest_data(data_folder='./data', max_images=5):
print('Commencing data ingestion.')
s3_endpoint_url = environ.get(
'S3_ENDPOINT_URL', environ.get('AWS_S3_ENDPOINT')
)
s3_access_key = environ.get(
'S3_ACCESS_KEY', environ.get('AWS_ACCESS_KEY_ID')
)
s3_secret_key = environ.get(
'S3_SECRET_KEY', environ.get('AWS_SECRET_ACCESS_KEY')
)
s3_bucket_name = environ.get(
'S3_BUCKET_NAME', environ.get('AWS_S3_BUCKET')
)
print(f'Downloading data from bucket "{s3_bucket_name}" '
f'from S3 storage at {s3_endpoint_url}')
s3 = resource(
's3', endpoint_url=s3_endpoint_url,
aws_access_key_id=s3_access_key, aws_secret_access_key=s3_secret_key
)
bucket = s3.Bucket(s3_bucket_name)
prefix = 'onnx-models/vision/tiny-yolo/'
download_count = 0
for s3_object in bucket.objects.filter(Prefix=prefix):
if download_count == max_images:
break
key = s3_object.key
if key.endswith('.jpg'):
local_file_path = path.join(data_folder, path.basename(key))
bucket.download_file(key, local_file_path)
download_count += 1
print('Finished data ingestion.')
if __name__ == '__main__':
ingest_data(data_folder='/data')