Skip to content

Commit

Permalink
Two more samples for Airflow + S3
Browse files Browse the repository at this point in the history
  • Loading branch information
luisbelloch committed Dec 8, 2021
1 parent f6048ba commit f25a69f
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 16 deletions.
34 changes: 34 additions & 0 deletions airflow/dags/s3_bucket_operations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import os
from datetime import datetime

from airflow.decorators import task
from airflow.models.dag import DAG
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
from airflow.providers.amazon.aws.operators.s3_bucket import S3CreateBucketOperator, S3DeleteBucketOperator

# By default, it will use 'aws_default' connection. You can create it here by running `make minio_credentials`
# If you want to change it, use a variable and pass it as `aws_conn_id` to all AWS operators.
AWS_CONN_ID = 'aws_default'

BUCKET_NAME = os.environ.get('BUCKET_NAME', 'patatas')

@task(task_id="s3_bucket_dag_add_keys_to_bucket")
def upload_keys():
s3_hook = S3Hook()
for i in range(0, 3):
s3_hook.load_string(string_data="input", key=f"path/data{i}", bucket_name=BUCKET_NAME)

with DAG(
dag_id='s3_bucket_operations',
schedule_interval=None,
start_date=datetime(2021, 1, 1),
catchup=False,
default_args={"bucket_name": BUCKET_NAME},
max_active_runs=1,
tags=['upv'],
) as dag:

create_bucket = S3CreateBucketOperator(task_id='s3_bucket_dag_create', region_name='us-east-1')
add_keys_to_bucket = upload_keys()
delete_bucket = S3DeleteBucketOperator(task_id='s3_bucket_dag_delete', force_delete=True)
create_bucket >> add_keys_to_bucket >> delete_bucket
25 changes: 9 additions & 16 deletions airflow/dags/s3_file_sensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,17 @@

from airflow.decorators import task
from airflow.models.dag import DAG
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
from airflow.providers.amazon.aws.operators.s3_bucket import S3CreateBucketOperator, S3DeleteBucketOperator

# By default, it will use 'aws_default' connection. You can create it here by running `make minio_credentials`
# If you want to change it, use a variable and pass it as `aws_conn_id` to all AWS operators.
AWS_CONN_ID = 'aws_default'
from airflow.models.variable import Variable
from airflow.providers.amazon.aws.sensors.s3_key import S3KeySensor, S3KeySizeSensor

BUCKET_NAME = os.environ.get('BUCKET_NAME', 'patatas')

@task(task_id="s3_bucket_dag_add_keys_to_bucket")
def upload_keys():
s3_hook = S3Hook()
for i in range(0, 3):
s3_hook.load_string(string_data="input", key=f"path/data{i}", bucket_name=BUCKET_NAME)
@task(task_id="do_something")
def do_something():
print("Something!")

with DAG(
dag_id='s3_bucket_dag',
dag_id='s3_file_sensor',
schedule_interval=None,
start_date=datetime(2021, 1, 1),
catchup=False,
Expand All @@ -28,7 +22,6 @@ def upload_keys():
tags=['upv'],
) as dag:

create_bucket = S3CreateBucketOperator(task_id='s3_bucket_dag_create', region_name='us-east-1')
add_keys_to_bucket = upload_keys()
delete_bucket = S3DeleteBucketOperator(task_id='s3_bucket_dag_delete', force_delete=True)
create_bucket >> add_keys_to_bucket >> delete_bucket
op = S3KeySensor(task_id="s3_key_sensor", bucket_key="s3://gasolina/some_file.json", bucket_name=None, dag=dag)
end_task = do_something()
op >> end_task

0 comments on commit f25a69f

Please sign in to comment.