Skip to content

Commit b51ab0a

Browse files
authored
feat!: Use Dockerfile based on upstream [DSRE-1116] (#1638)
* CI runtime decreased by 70-80% * docker-compose setup time decreased significantly. **Local use: takes a few seconds as opposed to a few minutes before**. Airflow Variables and Connections are loaded via `import` CLI command; replaces `bin/run` script using Airflow CLI. * Improved dev secrets security by dynamically generating a Fernet key in makefile * `.env` file is generated from `make up`, contains UID and Fernet key. `.env` is automatically loaded into environment variables by `docker-compose` * Retire shell scripts to use builtin features in our stack * `bin/run` replaced by docker-compose and CI * `bin/test-dag-tags` replaced by pytest unit tests * `bin/test-parse` replaced by pytest unit tests
1 parent f5977b1 commit b51ab0a

23 files changed

+652
-929
lines changed

Diff for: .circleci/config.yml

+16-28
Original file line numberDiff line numberDiff line change
@@ -16,43 +16,30 @@ orbs:
1616

1717
jobs:
1818
unit-tests:
19-
executor:
19+
executor: &python-executor
2020
name: python/default
2121
tag: 3.8.12
2222
steps:
2323
- checkout
2424
- python/install-packages:
2525
pip-dependency-file: requirements.txt
2626
pkg-manager: pip
27-
- run:
28-
name: 🧪 Verify requirements.txt is sync'ed with requirements.in
29-
command: |
30-
pip install pip-tools
31-
pip-compile --quiet
32-
git diff --exit-code requirements.txt
3327
- run:
3428
name: 🧪 Pytest
35-
command: python -m pytest
36-
- run:
37-
name: 🧪 Compile DAGs
38-
command: python -m py_compile dags/*.py
39-
- run:
40-
name: 🧹 Clean up
41-
command: find . -name *.pyc -delete
29+
command: python -m pytest --junitxml=test-results/junit.xml
30+
- store_test_results:
31+
path: test-results
4232

43-
integration-tests:
44-
executor:
45-
name: docker/machine
46-
dlc: true
33+
validate-requirements:
34+
executor: *python-executor
4735
steps:
4836
- checkout
49-
- run: docker-compose pull
50-
- run: docker-compose build
51-
- run: sudo chown -R 10001:10001 .
5237
- run:
53-
name: Check DAGs can be parsed and are correctly tagged
54-
# Valid DAG tags are defined in: `bin/test-dag-tags.py`
55-
command: bash bin/test-parse
38+
name: 🧪 Verify requirements.txt is sync'ed with requirements.in
39+
command: |
40+
pip install pip-tools
41+
pip-compile --quiet
42+
git diff --exit-code requirements.txt
5643
5744
sync_gcs:
5845
docker:
@@ -72,7 +59,7 @@ workflows:
7259
ci:
7360
jobs:
7461
- docker/publish:
75-
name: Docker build test
62+
name: 🛠️ Docker build test
7663
before_build: &version
7764
- run:
7865
name: Generate build version.json
@@ -95,13 +82,14 @@ workflows:
9582
ignore: /.*/
9683

9784
- unit-tests:
98-
name: Unit tests
85+
name: 🧪 Unit tests
9986
filters: *ci-filter
10087

101-
- integration-tests:
102-
name: Integration tests
88+
- validate-requirements:
89+
name: 🧪 Validate requirements
10390
filters: *ci-filter
10491

92+
10593
publish:
10694
jobs:
10795
- docker/publish:

Diff for: .dockerignore

+3-2
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,15 @@ __pycache__/
1414

1515
# Airflow stuff
1616
logs/
17-
{AIRFLOW_HOME}/
18-
examples/
1917

2018
# Virtual environment
2119
.env/
2220
.venv/
2321
venv/
2422

23+
# Airflow dev resources
24+
resources/
25+
2526
# IDE
2627
.idea
2728
.vscode/

Diff for: Dockerfile

+22-65
Original file line numberDiff line numberDiff line change
@@ -1,79 +1,36 @@
1-
# Use buster image because the default bullseye image has updated coreutils that require a newer
2-
# linux kernel than provided by CircleCI, per
3-
# https://forums.docker.com/t/multiple-projects-stopped-building-on-docker-hub-operation-not-permitted/92570/6
4-
# and https://forums.docker.com/t/multiple-projects-stopped-building-on-docker-hub-operation-not-permitted/92570/11
5-
FROM python:3.8.12-slim-buster
6-
MAINTAINER Harold Woo <[email protected]>
1+
FROM apache/airflow:slim-2.3.3-python3.8
72

8-
ARG AIRFLOW_UID=10001
9-
ARG AIRFLOW_GID=10001
10-
ARG PROJECT_DIR="/app"
3+
ARG PROJECT_DIR="/opt/airflow"
114

12-
# add a non-privileged user for installing and running the application
13-
RUN mkdir $PROJECT_DIR && \
14-
chown $AIRFLOW_UID:$AIRFLOW_GID $PROJECT_DIR && \
15-
groupadd --gid $AIRFLOW_GID app && \
16-
useradd --no-create-home --uid $AIRFLOW_UID --gid $AIRFLOW_GID --home-dir $PROJECT_DIR app
5+
ENV PYTHONUNBUFFERED=1
6+
ENV PYTHONPATH="$PYTHONPATH:$PROJECT_DIR"
7+
ENV AIRFLOW_HOME=$PROJECT_DIR
178

18-
RUN apt-get update && \
19-
apt-get install -y --no-install-recommends \
20-
apt-transport-https build-essential curl git libpq-dev python-dev \
21-
default-libmysqlclient-dev gettext sqlite3 libffi-dev \
22-
lsb-release gnupg vim screen procps default-mysql-client && \
9+
USER root
10+
RUN apt-get update \
11+
&& apt-get install -y --no-install-recommends build-essential default-libmysqlclient-dev
12+
13+
# Legacy docker image dependencies to be reviewed
14+
RUN apt-get install -y --no-install-recommends \
15+
lsb-release gnupg curl && \
2316
CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)" && \
2417
echo "deb http://packages.cloud.google.com/apt $CLOUD_SDK_REPO main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \
2518
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \
2619
apt-get update -y && apt-get install google-cloud-sdk -y && apt-get install google-cloud-sdk-gke-gcloud-auth-plugin && \
27-
apt-get remove -y lsb-release gnupg && \
28-
apt-get autoremove -y && \
20+
apt-get remove -y lsb-release gnupg
21+
22+
RUN apt-get autoremove -yqq --purge && \
2923
apt-get clean && \
30-
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
24+
rm -rf /var/lib/apt/lists/*
3125

32-
# Install Python dependencies
33-
COPY requirements.txt /tmp/
34-
# Switch to /tmp to install dependencies outside home dir
35-
WORKDIR /tmp
26+
USER airflow
3627

37-
RUN pip install --upgrade pip
38-
RUN export SLUGIFY_USES_TEXT_UNIDECODE=yes && pip install --no-cache-dir -r requirements.txt
28+
COPY requirements.txt /
29+
RUN pip install --no-cache-dir -r /requirements.txt
3930

40-
# Switch back to home directory
4131
WORKDIR $PROJECT_DIR
4232

43-
COPY . $PROJECT_DIR
44-
45-
RUN chown -R $AIRFLOW_UID:$AIRFLOW_GID $PROJECT_DIR
46-
47-
USER $AIRFLOW_UID
48-
49-
ENV PYTHONUNBUFFERED=1 \
50-
PORT=8000\
51-
PYTHONPATH="$PYTHONPATH:$PROJECT_DIR"
52-
# AWS_ACCESS_KEY_ID= \
53-
# AWS_SECRET_ACCESS_KEY= \
54-
# DEPLOY_ENVIRONMENT =
55-
56-
ENV AIRFLOW_HOME=$PROJECT_DIR \
57-
AIRFLOW_EMAIL_BACKEND="airflow.utils.email.send_email_smtp"
58-
# AIRFLOW_AUTHENTICATE= \
59-
# AIRFLOW_AUTH_BACKEND= \
60-
# AIRFLOW_BROKER_URL= \
61-
# AIRFLOW_RESULT_URL= \
62-
# AIRFLOW_FLOWER_PORT= \
63-
# AIRFLOW_DATABASE_URL= \
64-
# AIRFLOW__CORE__FERNET_KEY= \
65-
# AIRFLOW_SECRET_KEY= \
66-
# AIRFLOW_SMTP_HOST= \
67-
# AIRFLOW_SMTP_USER= \
68-
# AIRFLOW_SMTP_PASSWORD= \
69-
# AIRFLOW_SMTP_FROM= \
70-
71-
72-
EXPOSE $PORT
73-
74-
# Using /bin/bash as the entrypoint works around some volume mount issues on Windows
75-
# where volume-mounted files do not have execute bits set.
76-
# https://github.com/docker/compose/issues/2301#issuecomment-154450785 has additional background.
77-
ENTRYPOINT ["/bin/bash", "/app/bin/run"]
33+
# deploylib expects /app/version.json, copy the file if it exists
34+
COPY *version.json /app/version.json
7835

79-
CMD ["web"]
36+
COPY . .

Diff for: Makefile

+9-10
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,27 @@
1-
.PHONY: build clean migrate redis-cli run secret shell stop up
1+
.PHONY: build clean redis-cli run shell stop up
22

33

44
help:
55
@echo "Welcome to the Telemetry Airflow\n"
66
@echo "The list of commands for local development:\n"
77
@echo " build Builds the docker images for the docker-compose setup"
8-
@echo " build-linux Builds the docker images using the current user ID and group ID"
98
@echo " clean Stops and removes all docker containers"
109
@echo " pip-compile Compile dependencies from 'requirements.in' into 'requirements.txt'"
1110
@echo " pip-install-local Install pip project requirements to your local environment"
12-
@echo " migrate Runs the Django database migrations"
1311
@echo " redis-cli Opens a Redis CLI"
1412
@echo " shell Opens a Bash shell"
15-
@echo " up Runs the whole stack, served under http://localhost:8000/"
13+
@echo " up Runs the whole stack, served under http://localhost:8080/"
1614
@echo " gke Create a sandbox gke cluster for testing"
1715
@echo " clean-gke Delete the sandbox gke cluster"
1816
@echo " stop Stops the docker containers"
1917

2018
build:
2119
docker-compose build
2220

23-
build-linux:
24-
docker-compose build --build-arg AIRFLOW_UID="$$(id -u)" --build-arg AIRFLOW_GID="$$(id -g)"
25-
2621
pip-compile:
2722
pip-compile
2823

29-
clean: stop
24+
clean: stop
3025
docker-compose rm -f
3126
rm -rf logs/*
3227
if [ -f airflow-worker.pid ]; then rm airflow-worker.pid; fi
@@ -35,7 +30,7 @@ pip-install-local: pip-compile
3530
pip install -r requirements.txt
3631

3732
shell:
38-
docker-compose run web bash
33+
docker-compose run airflow-webserver bash
3934

4035
redis-cli:
4136
docker-compose run redis redis-cli -h redis
@@ -45,7 +40,11 @@ stop:
4540
docker-compose stop
4641

4742
up:
48-
docker-compose up
43+
grep -qF 'AIRFLOW_UID=' .env || echo "AIRFLOW_UID=$$(id -u)" >> .env
44+
grep -qF 'FERNET_KEY=' .env || echo "FERNET_KEY=$$(python -c "from cryptography.fernet import Fernet; fernet_key = Fernet.generate_key(); print(fernet_key.decode())")" >> .env
45+
docker-compose up --wait
46+
docker-compose exec airflow-webserver airflow variables import dev_variables.json
47+
docker-compose exec airflow-webserver airflow connections import dev_connections.json
4948

5049
gke:
5150
bin/start_gke

0 commit comments

Comments
 (0)