-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathaws-glue-export-job.py
82 lines (68 loc) · 2.18 KB
/
aws-glue-export-job.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import sys
import zipfile
from zipfile import ZipFile
import os
from os.path import basename
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
import boto3
from botocore.exceptions import ClientError
## @params: [JOB_NAME]
args = getResolvedOptions(sys.argv, ['JOB_NAME'])
sc = SparkContext()
glueContext = GlueContext(sc)
spark = glueContext.spark_session
logger = glueContext.get_logger()
job = Job(glueContext)
job.init(args['JOB_NAME'], args)
# Parameters
TARGET_BUCKET='glue3dev'
# Send file to S3
def upload_file(file_name, bucket, object_name=None):
"""Upload a file to an S3 bucket
:param file_name: File to upload
:param bucket: Bucket to upload to
:param object_name: S3 object name. If not specified then file_name is used
:return: True if file was uploaded, else False
"""
# If S3 object_name was not specified, use file_name
if object_name is None:
object_name = os.path.basename(file_name)
# Upload the file
s3_client = boto3.client('s3')
try:
response = s3_client.upload_file(file_name, bucket, object_name)
except ClientError as e:
logger.error(e)
return False
return True
def zipfolder(zipFileName, target_dir):
zipobj = zipfile.ZipFile(zipFileName, 'w', compression=zipfile.ZIP_DEFLATED)
rootlen = len(target_dir) + 1
for base, dirs, files in os.walk(target_dir):
for file in files:
fn = os.path.join(base, file)
zipobj.write(fn, fn[rootlen:])
def env():
logger.info('Env:')
e = ""
for k, v in sorted(os.environ.items()):
e = e + f'\n{k} : {v}'
logger.info(e)
def content():
logger.info('Content:')
out = ""
for root, dirs, files in os.walk("/opt/amazon"):
path = root.split(os.sep)
out = out + f'\n{(len(path) - 1) * "---"} {os.path.basename(root)}'
for file in files:
out = out + f'\n{len(path) * "---"} {file}'
logger.info(out)
content()
env()
zipfolder('/tmp/glue3.zip', '/opt/amazon')
upload_file('/tmp/glue3.zip', TARGET_BUCKET)
job.commit()