Skip to content

Commit

Permalink
feat(ingest): use uv for python package installs (datahub-project#9885
Browse files Browse the repository at this point in the history
)
  • Loading branch information
hsheth2 authored Feb 26, 2024
1 parent a1f2216 commit 02f41b7
Show file tree
Hide file tree
Showing 16 changed files with 245 additions and 237 deletions.
6 changes: 5 additions & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
**/node_modules/
*/build/
*/*/build/
*/venv/
**/venv/
**/.tox/
**/.mypy_cache/
**/.pytest_cache/
**/__pycache__/
out
**/*.class
# Have to copy gradle/wrapper/gradle-wrapper.jar, can't exclude ALL jars
Expand Down
4 changes: 2 additions & 2 deletions .github/scripts/docker_helpers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ function get_tag_full {
}

function get_python_docker_release_v {
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},0.0.0+docker.${SHORT_SHA},g" -e 's,refs/tags/v\(.*\),\1+docker,g' -e 's,refs/pull/\([0-9]*\).*,0.0.0+docker.pr\1,g')
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},1!0.0.0+docker.${SHORT_SHA},g" -e 's,refs/tags/v\(.*\),1!\1+docker,g' -e 's,refs/pull/\([0-9]*\).*,1!0.0.0+docker.pr\1,g')
}

function get_unique_tag {
Expand All @@ -37,4 +37,4 @@ function get_unique_tag_slim {

function get_unique_tag_full {
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA}-full,g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g')
}
}
22 changes: 14 additions & 8 deletions .github/workflows/docker-unified.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,17 @@ jobs:
with:
python-version: "3.10"
cache: "pip"
- uses: actions/cache@v4
with:
path: |
~/.cache/uv
key: ${{ runner.os }}-uv-${{ hashFiles('**/requirements.txt') }}
- name: Set up JDK 17
uses: actions/setup-java@v3
with:
distribution: "zulu"
java-version: 17
- uses: gradle/gradle-build-action@v2
- name: Ensure packages are correct
run: |
python ./.github/scripts/check_python_package.py
Expand Down Expand Up @@ -978,14 +984,14 @@ jobs:
if: failure()
run: |
docker ps -a
docker logs datahub-gms >& gms-${{ matrix.test_strategy }}.log || true
docker logs datahub-actions >& actions-${{ matrix.test_strategy }}.log || true
docker logs datahub-mae-consumer >& mae-${{ matrix.test_strategy }}.log || true
docker logs datahub-mce-consumer >& mce-${{ matrix.test_strategy }}.log || true
docker logs broker >& broker-${{ matrix.test_strategy }}.log || true
docker logs mysql >& mysql-${{ matrix.test_strategy }}.log || true
docker logs elasticsearch >& elasticsearch-${{ matrix.test_strategy }}.log || true
docker logs datahub-frontend-react >& frontend-${{ matrix.test_strategy }}.log || true
docker logs datahub-datahub-gms-1 >& gms-${{ matrix.test_strategy }}.log || true
docker logs datahub-datahub-actions-1 >& actions-${{ matrix.test_strategy }}.log || true
docker logs datahub-datahub-mae-consumer-1 >& mae-${{ matrix.test_strategy }}.log || true
docker logs datahub-datahub-mce-consumer-1 >& mce-${{ matrix.test_strategy }}.log || true
docker logs datahub-broker-1 >& broker-${{ matrix.test_strategy }}.log || true
docker logs datahub-mysql-1 >& mysql-${{ matrix.test_strategy }}.log || true
docker logs datahub-elasticsearch-1 >& elasticsearch-${{ matrix.test_strategy }}.log || true
docker logs datahub-datahub-frontend-react-1 >& frontend-${{ matrix.test_strategy }}.log || true
- name: Upload logs
uses: actions/upload-artifact@v3
if: failure()
Expand Down
9 changes: 9 additions & 0 deletions .github/workflows/metadata-ingestion.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,15 @@ jobs:
java-version: 17
- uses: gradle/gradle-build-action@v2
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache: "pip"
- uses: actions/cache@v4
with:
path: |
~/.cache/uv
key: ${{ runner.os }}-uv-${{ hashFiles('**/requirements.txt') }}
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
Expand Down
21 changes: 14 additions & 7 deletions docker/datahub-ingestion-base/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ RUN apt-get update && apt-get install -y -qq \
ldap-utils \
unixodbc \
libodbc2 \
&& python -m pip install --no-cache --upgrade pip wheel setuptools \
&& python -m pip install --no-cache --upgrade pip uv>=0.1.10 wheel setuptools \
&& rm -rf /var/lib/apt/lists/* /var/cache/apk/*

# compiled against newer golang for security fixes
Expand All @@ -59,16 +59,22 @@ COPY --from=dockerize-binary /go/bin/dockerize /usr/local/bin
COPY ./docker/datahub-ingestion-base/base-requirements.txt requirements.txt
COPY ./docker/datahub-ingestion-base/entrypoint.sh /entrypoint.sh

RUN pip install --no-cache -r requirements.txt && \
pip uninstall -y acryl-datahub && \
chmod +x /entrypoint.sh && \
addgroup --gid 1000 datahub && \
adduser --disabled-password --uid 1000 --gid 1000 --home /datahub-ingestion datahub
RUN addgroup --gid 1000 datahub && \
adduser --disabled-password --uid 1000 --gid 1000 --home /datahub-ingestion datahub && \
chmod +x /entrypoint.sh

USER datahub
ENV VIRTUAL_ENV=/datahub-ingestion/.venv
ENV PATH="${VIRTUAL_ENV}/bin:$PATH"
RUN python3 -m venv $VIRTUAL_ENV && \
uv pip install --no-cache -r requirements.txt && \
pip uninstall -y acryl-datahub

ENTRYPOINT [ "/entrypoint.sh" ]

FROM ${BASE_IMAGE} as full-install

USER 0
RUN apt-get update && apt-get install -y -qq \
default-jre-headless \
&& rm -rf /var/lib/apt/lists/* /var/cache/apk/*
Expand All @@ -91,10 +97,11 @@ RUN if [ $(arch) = "x86_64" ]; then \
ldconfig; \
fi;

USER datahub

FROM ${BASE_IMAGE} as slim-install
# Do nothing else on top of base

FROM ${APP_ENV}-install

USER datahub
ENV PATH="/datahub-ingestion/.local/bin:$PATH"
Loading

0 comments on commit 02f41b7

Please sign in to comment.