An Ansible role that installs Airflow
ansible-galaxy install -r requirements.yml
Available variables are listed below, along with default values:
# The home folder for airflow, default is ~/airflow
airflow_home: ~/airflow
# The folder where your airflow pipelines live, most likely a
# subfolder in a code repository
airflow_dags_folder: "{{airflow_home}}/dags"
# The folder where airflow should store its log files. This location
airflow_base_log_folder: "{{airflow_home}}/logs"
# An S3 location can be provided for log backups
# For S3, use the full URL to the base folder (starting with "s3://...")
airflow_s3_log_folder: "None"
# The executor class that airflow should use. Choices include
# SequentialExecutor, LocalExecutor, CeleryExecutor
airflow_executor: "SequentialExecutor"
# The SqlAlchemy connection string to the metadata database.
# SqlAlchemy supports many different database engine, more information
# their website
airflow_sql_alchemy_conn: "sqlite:///{{airflow_home}}/airflow.db"
# The SqlAlchemy pool size is the maximum number of database connections
# in the pool.
airflow_sql_alchemy_pool_size: 5
# The SqlAlchemy pool recycle is the number of seconds a connection
# can be idle in the pool before it is invalidated. This config does
# not apply to sqlite.
airflow_sql_alchemy_pool_recycle: 3600
# The amount of parallelism as a setting to the executor. This defines
# the max number of task instances that should run simultaneously
# on this airflow installation
airflow_parallelism: 32
# The number of task instances allowed to run concurrently by the scheduler
airflow_dag_concurrency: 16
# Are DAGs paused by default at creation
airflow_dags_are_paused_at_creation: False
# The maximum number of active DAG runs per DAG
airflow_max_active_runs_per_dag: 16
# Whether to load the examples that ship with Airflow. It's good to
# get started, but you probably want to set this to False in a production
# environment
airflow_load_examples: True
# Where your Airflow plugins are stored
airflow_plugins_folder: "{{airflow_home}}/plugins"
# Secret key to save connection passwords in the db
airflow_fernet_key: temporary_fernet_key
# Whether to disable pickling dags
airflow_donot_pickle: False
# How long before timing out a python file import while filling the DagBag
airflow_dagbag_import_timeout: 30
# The base url of your website as airflow cannot guess what domain or
# cname you are using. This is use in automated emails that
# airflow sends to point links to the right web server
airflow_base_url: http://localhost:8080
# The ip specified when starting the web server
airflow_web_server_host: 0.0.0.0
# The port on which to run the web server
airflow_web_server_port: 8080
# Secret key used to run your flask app
airflow_secret_key: temporary_key
# Number of workers to run the Gunicorn web server
airflow_workers: 4
# The worker class gunicorn should use. Choices include
# sync (default), eventlet, gevent
airflow_worker_class: sync
# Expose the configuration file in the web server
airflow_expose_config: true
# Set to true to turn on authentication : http://pythonhosted.org/airflow/installation.html#web-authentication
airflow_authenticate: False
# Filter the list of dags by owner name (requires authentication to be enabled)
airflow_filter_by_owner: False
airflow_email_backend: airflow.utils.send_email_smtp
If you want airflow to send emails on retries, failure, and you want to the airflow.utils.send_email function, you have to configure an smtp server here
airflow_smtp_host: localhost
airflow_smtp_starttls: True
airflow_smtp_ssl: False
airflow_smtp_user: airflow
airflow_smtp_port: 25
airflow_smtp_password: airflow
airflow_smtp_mail_from: [email protected]
This section only applies if you are using the CeleryExecutor in core
section above
# The app name that will be used by celery
airflow_celery_app_name: airflow.executors.celery_executor
# The concurrency that will be used when starting workers with the
# "airflow worker" command. This defines the number of task instances that
# a worker will take, so size up your workers based on the resources on
# your worker box and the nature of your tasks
airflow_celeryd_concurrency: 16
# When you start an airflow worker, airflow starts a tiny web server
# subprocess to serve the workers local log files to the airflow main
# web server, who then builds pages and sends them to users. This defines
# the port on which the logs are served. It needs to be unused, and open
# visible from the main web server to connect into the workers.
airflow_worker_log_server_port: 8793
# The Celery broker URL. Celery supports RabbitMQ, Redis and experimentally
# a sqlalchemy database. Refer to the Celery documentation for more
# information.
airflow_broker_url: sqla+mysql://airflow:airflow@localhost:3306/airflow
# Another key Celery setting
airflow_celery_result_backend: db+mysql://airflow:airflow@localhost:3306/airflow
# Celery Flower is a sweet UI for Celery. Airflow has a shortcut to start
# it `airflow flower`. This defines the port that Celery Flower runs on
airflow_flower_port: 5555
# Default queue that tasks get assigned to and that worker listen on.
airflow_default_queue: default
# Task instances listen for external kill signal (when you clear tasks
# from the CLI or the UI), this defines the frequency at which they should
# listen (in seconds).
airflow_job_heartbeat_sec: 5
# The scheduler constantly tries to trigger new tasks (look at the
# scheduler section in the docs for more information). This defines
# how often the scheduler should run (in seconds).
airflow_scheduler_heartbeat_sec: 5
# Statsd (https://github.com/etsy/statsd) integration settings
airflow_statsd_on: False
airflow_statsd_host: localhost
airflow_statsd_port: 8125
airflow_statsd_prefix: airflow
# Mesos master address which MesosExecutor will connect to.
airflow_mesos_master: localhost:5050
# The framework name which Airflow scheduler will register itself as on mesos
airflow_mesos_framework_name: Airflow
# Number of cpu cores required for running one task instance using
# 'airflow run <dag_id> <task_id> <execution_date> --local -p <pickle_id>'
# command on a mesos slave
airflow_mesos_task_cpu: 1
# Memory in MB required for running one task instance using
# 'airflow run <dag_id> <task_id> <execution_date> --local -p <pickle_id>'
# command on a mesos slave
airflow_mesos_task_memory: 256
# Enable framework checkpointing for mesos
# See http://mesos.apache.org/documentation/latest/slave-recovery/
airflow_mesos_checkpoint: False
# Failover timeout in milliseconds.
# When checkpointing is enabled and this option is set, Mesos waits until the configured timeout for
# the MesosExecutor framework to re-register after a failover. Mesos shuts down running tasks if the
# MesosExecutor framework fails to re-register within this timeframe.
airflow_mesos_failover_timeout: 604800
# Enable framework authentication for mesos
# See http://mesos.apache.org/documentation/latest/configuration/
airflow_mesos_authenticate: False
# Mesos credentials, if authentication is enabled
airflow_mesos_default_principal: admin
airflow_mesos_default_secret: admin
- hosts: servers
roles:
- { role: kyungw00k.airflow }
BSD
Kyungwook Park @kyungw00k(twitter, github)