From 02f41b74b669bca6e2e49b4cc87db9e508479d2c Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 26 Feb 2024 15:02:47 -0800 Subject: [PATCH] feat(ingest): use `uv` for python package installs (#9885) --- .dockerignore | 6 +- .github/scripts/docker_helpers.sh | 4 +- .github/workflows/docker-unified.yml | 22 +- .github/workflows/metadata-ingestion.yml | 9 + docker/datahub-ingestion-base/Dockerfile | 21 +- .../base-requirements.txt | 269 ++++++++---------- docker/datahub-ingestion-base/entrypoint.sh | 6 +- .../regenerate-base-requirements.sh | 7 +- docker/datahub-ingestion/Dockerfile | 23 +- docker/datahub-ingestion/Dockerfile-slim-only | 5 +- docker/datahub-ingestion/pyspark_jars.sh | 2 +- .../airflow-plugin/build.gradle | 44 ++- .../airflow-plugin/setup.py | 5 +- metadata-ingestion/build.gradle | 35 ++- metadata-ingestion/setup.py | 16 +- smoke-test/build.gradle | 8 +- 16 files changed, 245 insertions(+), 237 deletions(-) diff --git a/.dockerignore b/.dockerignore index 701263f5fedded..602b46750d3708 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,7 +1,11 @@ **/node_modules/ */build/ */*/build/ -*/venv/ +**/venv/ +**/.tox/ +**/.mypy_cache/ +**/.pytest_cache/ +**/__pycache__/ out **/*.class # Have to copy gradle/wrapper/gradle-wrapper.jar, can't exclude ALL jars diff --git a/.github/scripts/docker_helpers.sh b/.github/scripts/docker_helpers.sh index 334465532db06b..0487c69eee0ef4 100755 --- a/.github/scripts/docker_helpers.sh +++ b/.github/scripts/docker_helpers.sh @@ -24,7 +24,7 @@ function get_tag_full { } function get_python_docker_release_v { - echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},0.0.0+docker.${SHORT_SHA},g" -e 's,refs/tags/v\(.*\),\1+docker,g' -e 's,refs/pull/\([0-9]*\).*,0.0.0+docker.pr\1,g') + echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},1!0.0.0+docker.${SHORT_SHA},g" -e 's,refs/tags/v\(.*\),1!\1+docker,g' -e 's,refs/pull/\([0-9]*\).*,1!0.0.0+docker.pr\1,g') } function get_unique_tag { @@ -37,4 +37,4 @@ function get_unique_tag_slim { function get_unique_tag_full { echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA}-full,g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g') -} \ No newline at end of file +} diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index 56d3a45e09095a..af9f2c4415eb98 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -38,11 +38,17 @@ jobs: with: python-version: "3.10" cache: "pip" + - uses: actions/cache@v4 + with: + path: | + ~/.cache/uv + key: ${{ runner.os }}-uv-${{ hashFiles('**/requirements.txt') }} - name: Set up JDK 17 uses: actions/setup-java@v3 with: distribution: "zulu" java-version: 17 + - uses: gradle/gradle-build-action@v2 - name: Ensure packages are correct run: | python ./.github/scripts/check_python_package.py @@ -978,14 +984,14 @@ jobs: if: failure() run: | docker ps -a - docker logs datahub-gms >& gms-${{ matrix.test_strategy }}.log || true - docker logs datahub-actions >& actions-${{ matrix.test_strategy }}.log || true - docker logs datahub-mae-consumer >& mae-${{ matrix.test_strategy }}.log || true - docker logs datahub-mce-consumer >& mce-${{ matrix.test_strategy }}.log || true - docker logs broker >& broker-${{ matrix.test_strategy }}.log || true - docker logs mysql >& mysql-${{ matrix.test_strategy }}.log || true - docker logs elasticsearch >& elasticsearch-${{ matrix.test_strategy }}.log || true - docker logs datahub-frontend-react >& frontend-${{ matrix.test_strategy }}.log || true + docker logs datahub-datahub-gms-1 >& gms-${{ matrix.test_strategy }}.log || true + docker logs datahub-datahub-actions-1 >& actions-${{ matrix.test_strategy }}.log || true + docker logs datahub-datahub-mae-consumer-1 >& mae-${{ matrix.test_strategy }}.log || true + docker logs datahub-datahub-mce-consumer-1 >& mce-${{ matrix.test_strategy }}.log || true + docker logs datahub-broker-1 >& broker-${{ matrix.test_strategy }}.log || true + docker logs datahub-mysql-1 >& mysql-${{ matrix.test_strategy }}.log || true + docker logs datahub-elasticsearch-1 >& elasticsearch-${{ matrix.test_strategy }}.log || true + docker logs datahub-datahub-frontend-react-1 >& frontend-${{ matrix.test_strategy }}.log || true - name: Upload logs uses: actions/upload-artifact@v3 if: failure() diff --git a/.github/workflows/metadata-ingestion.yml b/.github/workflows/metadata-ingestion.yml index e7d6b7b97c0993..c3d2e71c08011d 100644 --- a/.github/workflows/metadata-ingestion.yml +++ b/.github/workflows/metadata-ingestion.yml @@ -51,6 +51,15 @@ jobs: java-version: 17 - uses: gradle/gradle-build-action@v2 - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: "pip" + - uses: actions/cache@v4 + with: + path: | + ~/.cache/uv + key: ${{ runner.os }}-uv-${{ hashFiles('**/requirements.txt') }} - uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} diff --git a/docker/datahub-ingestion-base/Dockerfile b/docker/datahub-ingestion-base/Dockerfile index 0bf0d2f88af738..220c6f7d448aff 100644 --- a/docker/datahub-ingestion-base/Dockerfile +++ b/docker/datahub-ingestion-base/Dockerfile @@ -50,7 +50,7 @@ RUN apt-get update && apt-get install -y -qq \ ldap-utils \ unixodbc \ libodbc2 \ - && python -m pip install --no-cache --upgrade pip wheel setuptools \ + && python -m pip install --no-cache --upgrade pip uv>=0.1.10 wheel setuptools \ && rm -rf /var/lib/apt/lists/* /var/cache/apk/* # compiled against newer golang for security fixes @@ -59,16 +59,22 @@ COPY --from=dockerize-binary /go/bin/dockerize /usr/local/bin COPY ./docker/datahub-ingestion-base/base-requirements.txt requirements.txt COPY ./docker/datahub-ingestion-base/entrypoint.sh /entrypoint.sh -RUN pip install --no-cache -r requirements.txt && \ - pip uninstall -y acryl-datahub && \ - chmod +x /entrypoint.sh && \ - addgroup --gid 1000 datahub && \ - adduser --disabled-password --uid 1000 --gid 1000 --home /datahub-ingestion datahub +RUN addgroup --gid 1000 datahub && \ + adduser --disabled-password --uid 1000 --gid 1000 --home /datahub-ingestion datahub && \ + chmod +x /entrypoint.sh + +USER datahub +ENV VIRTUAL_ENV=/datahub-ingestion/.venv +ENV PATH="${VIRTUAL_ENV}/bin:$PATH" +RUN python3 -m venv $VIRTUAL_ENV && \ + uv pip install --no-cache -r requirements.txt && \ + pip uninstall -y acryl-datahub ENTRYPOINT [ "/entrypoint.sh" ] FROM ${BASE_IMAGE} as full-install +USER 0 RUN apt-get update && apt-get install -y -qq \ default-jre-headless \ && rm -rf /var/lib/apt/lists/* /var/cache/apk/* @@ -91,10 +97,11 @@ RUN if [ $(arch) = "x86_64" ]; then \ ldconfig; \ fi; +USER datahub + FROM ${BASE_IMAGE} as slim-install # Do nothing else on top of base FROM ${APP_ENV}-install -USER datahub ENV PATH="/datahub-ingestion/.local/bin:$PATH" diff --git a/docker/datahub-ingestion-base/base-requirements.txt b/docker/datahub-ingestion-base/base-requirements.txt index 607004d2a99e73..b7cd91898c97d4 100644 --- a/docker/datahub-ingestion-base/base-requirements.txt +++ b/docker/datahub-ingestion-base/base-requirements.txt @@ -3,22 +3,20 @@ acryl-datahub-classify==0.0.9 acryl-PyHive==0.6.16 acryl-sqlglot==20.4.1.dev14 aenum==3.1.15 -aiohttp==3.9.1 +aiohttp==3.9.3 aiosignal==1.3.1 alembic==1.13.1 altair==4.2.0 -annotated-types==0.6.0 -anyio==3.7.1 +anyio==4.3.0 apache-airflow==2.7.3 -apache-airflow-providers-common-sql==1.9.0 +apache-airflow-providers-common-sql==1.11.0 apache-airflow-providers-ftp==3.7.0 -apache-airflow-providers-http==4.8.0 +apache-airflow-providers-http==4.9.1 apache-airflow-providers-imap==3.5.0 -apache-airflow-providers-sqlite==3.6.0 -apispec==6.3.1 -appdirs==1.4.4 -appnope==0.1.3 -argcomplete==3.2.1 +apache-airflow-providers-sqlite==3.7.1 +apispec==6.4.0 +appnope==0.1.4 +argcomplete==3.2.2 argon2-cffi==23.1.0 argon2-cffi-bindings==21.2.0 asgiref==3.7.2 @@ -26,25 +24,24 @@ asn1crypto==1.5.1 asttokens==2.4.1 async-timeout==4.0.3 asynch==0.2.3 -attrs==23.1.0 +attrs==23.2.0 avro==1.11.3 avro-gen3==0.7.11 Babel==2.14.0 backoff==2.2.1 -beautifulsoup4==4.12.2 +beautifulsoup4==4.12.3 bleach==6.1.0 blinker==1.7.0 blis==0.7.11 -boto3==1.34.8 -botocore==1.34.8 -bowler==0.9.0 +boto3==1.34.49 +botocore==1.34.49 bracex==2.4 cached-property==1.5.2 cachelib==0.9.0 cachetools==5.3.2 catalogue==2.0.10 cattrs==23.2.3 -certifi==2023.11.17 +certifi==2024.2.2 cffi==1.16.0 chardet==5.2.0 charset-normalizer==3.3.2 @@ -53,33 +50,31 @@ click==8.1.7 click-default-group==1.2.4 click-spinner==0.1.10 clickclick==20.10.2 -clickhouse-driver==0.2.6 +clickhouse-driver==0.2.7 clickhouse-sqlalchemy==0.2.4 cloudpickle==3.0.0 colorama==0.4.6 colorlog==4.8.0 -comm==0.2.0 +comm==0.2.1 confection==0.1.4 ConfigUpdater==3.2 confluent-kafka==2.3.0 -connexion==2.14.2 -cron-descriptor==1.4.0 +connexion==2.14.1 +cron-descriptor==1.4.3 croniter==2.0.1 -cryptography==41.0.7 -cx-Oracle==8.3.0 +cryptography==42.0.4 +cx_Oracle==8.3.0 cymem==2.0.8 -dask==2023.12.1 -databricks-cli==0.18.0 databricks-dbapi==0.6.0 -databricks-sdk==0.15.0 -databricks-sql-connector==2.9.3 -debugpy==1.8.0 +databricks-sdk==0.20.0 +databricks-sql-connector==2.9.4 +debugpy==1.8.1 decorator==5.1.1 defusedxml==0.7.1 -deltalake==0.14.0 +deltalake==0.15.3 Deprecated==1.2.14 -dill==0.3.7 -dnspython==2.4.2 +dill==0.3.8 +dnspython==2.6.1 docker==7.0.0 docutils==0.20.1 ecdsa==0.18.0 @@ -90,25 +85,22 @@ et-xmlfile==1.1.0 exceptiongroup==1.2.0 executing==2.0.1 expandvars==0.12.0 -fastapi==0.109.1 -fastavro==1.9.2 -fastjsonschema==2.19.0 -feast==0.31.1 +fastavro==1.9.4 +fastjsonschema==2.19.1 filelock==3.13.1 -fissix==21.11.13 Flask==2.2.5 flatdict==4.0.1 frozenlist==1.4.1 fsspec==2023.12.2 -future==0.18.3 -GeoAlchemy2==0.14.3 +future==1.0.0 +GeoAlchemy2==0.14.4 gitdb==4.0.11 -GitPython==3.1.41 -google-api-core==2.15.0 -google-auth==2.25.2 -google-cloud-appengine-logging==1.4.0 +GitPython==3.1.42 +google-api-core==2.17.1 +google-auth==2.28.1 +google-cloud-appengine-logging==1.4.2 google-cloud-audit-log==0.2.5 -google-cloud-bigquery==3.14.1 +google-cloud-bigquery==3.17.2 google-cloud-core==2.4.1 google-cloud-datacatalog-lineage==0.2.2 google-cloud-logging==3.5.0 @@ -116,98 +108,92 @@ google-crc32c==1.5.0 google-re2==1.1 google-resumable-media==2.7.0 googleapis-common-protos==1.62.0 -gql==3.4.1 +gql==3.5.0 graphql-core==3.2.3 graphviz==0.20.1 great-expectations==0.15.50 greenlet==3.0.3 grpc-google-iam-v1==0.13.0 -grpcio==1.60.0 -grpcio-reflection==1.60.0 -grpcio-status==1.60.0 -grpcio-tools==1.60.0 +grpcio==1.62.0 +grpcio-status==1.62.0 +grpcio-tools==1.62.0 gssapi==1.8.3 gunicorn==21.2.0 h11==0.14.0 -hdbcli==2.19.20 -httpcore==1.0.2 -httptools==0.6.1 -httpx==0.26.0 +hdbcli==2.19.21 +httpcore==1.0.4 +httpx==0.27.0 humanfriendly==10.0 idna==3.6 ijson==3.2.3 -importlib-metadata==6.11.0 +importlib-metadata==7.0.1 importlib-resources==6.1.1 inflection==0.5.1 ipaddress==1.0.23 ipykernel==6.17.1 -ipython==8.19.0 +ipython==8.21.0 ipython-genutils==0.2.0 -ipywidgets==8.1.1 +ipywidgets==8.1.2 iso3166==2.1.1 isodate==0.6.1 itsdangerous==2.1.2 jedi==0.19.1 -Jinja2==3.1.2 +Jinja2==3.1.3 jmespath==1.0.1 JPype1==1.5.0 jsonlines==4.0.0 jsonpatch==1.33 jsonpointer==2.4 jsonref==1.1.0 -jsonschema==4.20.0 +jsonschema==4.21.1 jsonschema-specifications==2023.12.1 -jupyter-server==1.24.0 +jupyter-server==1.16.0 jupyter_client==7.4.9 -jupyter_core==4.12.0 -jupyterlab-widgets==3.0.9 +jupyter_core==5.0.0 jupyterlab_pygments==0.3.0 +jupyterlab_widgets==3.0.10 langcodes==3.3.0 lark==1.1.4 lazy-object-proxy==1.10.0 leb128==1.0.5 -limits==3.7.0 +limits==3.9.0 linear-tsv==1.1.0 -linkify-it-py==2.0.2 -lkml==1.3.3 -locket==1.0.0 +linkify-it-py==2.0.3 +lkml==1.3.4 lockfile==0.12.2 looker-sdk==23.0.0 -lxml==4.9.4 -lz4==4.3.2 +lxml==5.1.0 +lz4==4.3.3 makefun==1.15.2 -Mako==1.3.0 -Markdown==3.5.1 +Mako==1.3.2 +Markdown==3.5.2 markdown-it-py==3.0.0 -MarkupSafe==2.1.3 -marshmallow==3.20.1 -marshmallow-oneofschema==3.0.1 +MarkupSafe==2.1.5 +marshmallow==3.20.2 +marshmallow-oneofschema==3.1.1 marshmallow-sqlalchemy==0.26.1 matplotlib-inline==0.1.6 mdit-py-plugins==0.4.0 mdurl==0.1.2 mistune==3.0.2 mixpanel==4.10.0 -mlflow-skinny==2.9.2 -mmh3==4.0.1 +mlflow-skinny==2.10.2 mmhash3==3.0.1 -more-itertools==10.1.0 -moreorless==0.4.0 -moto==4.2.12 +more-itertools==10.2.0 +moto==4.2.14 msal==1.22.0 -multidict==6.0.4 +multidict==6.0.5 murmurhash==1.0.10 -mypy==1.8.0 mypy-extensions==1.0.0 nbclassic==1.0.0 nbclient==0.6.3 -nbconvert==7.13.1 -nbformat==5.9.1 -nest-asyncio==1.5.8 +nbconvert==7.16.1 +nbformat==5.9.2 +nest-asyncio==1.6.0 networkx==3.2.1 notebook==6.5.6 -notebook_shim==0.2.3 -numpy==1.26.2 +notebook_shim==0.2.4 +numpy==1.26.4 oauthlib==3.2.2 okta==1.7.0 openlineage-airflow==1.2.0 @@ -215,133 +201,127 @@ openlineage-integration-common==1.2.0 openlineage-python==1.2.0 openlineage_sql==1.2.0 openpyxl==3.1.2 -opentelemetry-api==1.22.0 -opentelemetry-exporter-otlp==1.22.0 -opentelemetry-exporter-otlp-proto-common==1.22.0 -opentelemetry-exporter-otlp-proto-grpc==1.22.0 -opentelemetry-exporter-otlp-proto-http==1.22.0 -opentelemetry-proto==1.22.0 -opentelemetry-sdk==1.22.0 -opentelemetry-semantic-conventions==0.43b0 +opentelemetry-api==1.16.0 +opentelemetry-exporter-otlp==1.16.0 +opentelemetry-exporter-otlp-proto-grpc==1.16.0 +opentelemetry-exporter-otlp-proto-http==1.16.0 +opentelemetry-proto==1.16.0 +opentelemetry-sdk==1.16.0 +opentelemetry-semantic-conventions==0.37b0 ordered-set==4.1.0 packaging==23.2 -pandas==1.5.3 -pandavro==1.5.2 -pandocfilters==1.5.0 -parse==1.20.0 +pandas==2.2.1 +pandocfilters==1.5.1 +parse==1.20.1 parso==0.8.3 -partd==1.4.1 +pathlib_abc==0.1.1 pathspec==0.12.1 -pathy==0.10.3 +pathy==0.11.0 pendulum==2.1.2 pexpect==4.9.0 phonenumbers==8.13.0 platformdirs==3.11.0 -pluggy==1.3.0 +pluggy==1.4.0 preshed==3.0.9 prison==0.2.1 progressbar2==4.3.2 -prometheus-client==0.19.0 +prometheus_client==0.20.0 prompt-toolkit==3.0.43 proto-plus==1.23.0 -protobuf==4.25.1 -psutil==5.9.7 +protobuf==4.25.3 +psutil==5.9.8 psycopg2-binary==2.9.9 ptyprocess==0.7.0 pure-eval==0.2.2 pure-sasl==0.6.2 py-partiql-parser==0.5.0 -pyarrow==11.0.0 +pyarrow==12.0.1 +pyarrow-hotfix==0.6 pyasn1==0.5.1 pyasn1-modules==0.3.0 pyathena==2.25.2 pycountry==23.12.11 pycparser==2.21 -pycryptodome==3.19.0 -pydantic==1.10.13 -pydantic_core==2.14.6 -pydash==7.0.6 +pycryptodome==3.20.0 +pydantic==1.10.14 +pydash==7.0.7 pydruid==0.6.6 Pygments==2.17.2 pyiceberg==0.4.0 -pymongo==4.6.1 +pymongo==4.6.2 PyMySQL==1.1.0 -pyOpenSSL==23.3.0 +pyOpenSSL==24.0.0 pyparsing==3.0.9 pyspnego==0.10.2 python-daemon==3.0.1 python-dateutil==2.8.2 -python-dotenv==1.0.0 python-jose==3.3.0 python-ldap==3.4.4 python-nvd3==0.15.0 -python-slugify==8.0.1 +python-slugify==8.0.4 python-stdnum==1.19 -python-tds==1.14.0 -python-utils==3.8.1 -python3-openid==3.2.0 -pytz==2023.3.post1 +python-tds==1.15.0 +python-utils==3.8.2 +pytz==2023.4 pytzdata==2020.1 PyYAML==6.0.1 pyzmq==24.0.1 redash-toolbelt==0.1.9 -redshift-connector==2.0.918 -referencing==0.32.0 +redshift-connector==2.1.0 +referencing==0.33.0 regex==2023.12.25 requests==2.31.0 -requests-file==1.5.1 -requests-gssapi==1.2.3 +requests-file==2.0.0 +requests-gssapi==1.3.0 requests-ntlm==1.2.0 -requests-toolbelt==0.10.1 -responses==0.24.1 +requests-toolbelt==1.0.0 +responses==0.25.0 rfc3339-validator==0.1.4 rfc3986==2.0.0 rich==13.7.0 rich-argparse==1.4.0 -rpds-py==0.15.2 +rpds-py==0.18.0 rsa==4.9 ruamel.yaml==0.17.17 -ruamel.yaml.clib==0.2.8 s3transfer==0.10.0 -schwifty==2023.11.2 -scipy==1.11.4 +schwifty==2024.1.1.post0 +scipy==1.12.0 scramp==1.4.4 Send2Trash==1.8.2 -sentry-sdk==1.39.1 +sentry-sdk==1.40.5 setproctitle==1.3.3 simple-salesforce==1.12.5 six==1.16.0 +slack-sdk==3.18.1 smart-open==6.4.0 smmap==5.0.1 sniffio==1.3.0 -snowflake-connector-python==3.6.0 +snowflake-connector-python==3.7.1 snowflake-sqlalchemy==1.5.1 sortedcontainers==2.4.0 soupsieve==2.5 -spacy==3.4.3 +spacy==3.5.0 spacy-legacy==3.0.12 spacy-loggers==1.0.5 sql-metadata==2.2.2 SQLAlchemy==1.4.44 sqlalchemy-bigquery==1.9.0 -sqlalchemy-hana==1.1.1 +sqlalchemy-hana==1.3.0 SQLAlchemy-JSONField==1.0.2 sqlalchemy-pytds==0.3.5 sqlalchemy-redshift==0.8.14 SQLAlchemy-Utils==0.41.1 -sqlalchemy2-stubs==0.0.2a37 sqllineage==1.3.8 sqlparse==0.4.4 srsly==2.4.8 stack-data==0.6.3 -starlette==0.32.0.post1 strictyaml==1.7.3 tableauserverclient==0.25 tableschema==1.20.2 tabulate==0.9.0 tabulator==1.53.5 tenacity==8.2.3 -teradatasql==20.0.0.2 +teradatasql==20.0.0.7 teradatasqlalchemy==17.20.0.0 termcolor==2.4.0 terminado==0.18.0 @@ -351,38 +331,31 @@ thrift==0.16.0 thrift-sasl==0.4.3 tinycss2==1.2.1 toml==0.10.2 -tomli==2.0.1 tomlkit==0.12.3 -toolz==0.12.0 +toolz==0.12.1 tornado==6.4 -tqdm==4.66.1 +tqdm==4.66.2 traitlets==5.2.1.post0 -trino==0.327.0 -typeguard==2.13.3 +trino==0.328.0 typer==0.7.0 typing-inspect==0.9.0 typing_extensions==4.9.0 +tzdata==2024.1 tzlocal==5.2 -uc-micro-py==1.0.2 +uc-micro-py==1.0.3 ujson==5.9.0 unicodecsv==0.14.1 -universal-pathlib==0.1.4 urllib3==1.26.18 -uvicorn==0.25.0 -uvloop==0.19.0 vertica-python==1.3.8 vertica-sqlalchemy-dialect==0.0.8.1 -vininfo==1.7.0 -volatile==2.1.0 -wasabi==0.10.1 -watchfiles==0.21.0 -wcmatch==8.5 -wcwidth==0.2.12 +vininfo==1.8.0 +wasabi==1.1.2 +wcmatch==8.5.1 +wcwidth==0.2.13 webencodings==0.5.1 websocket-client==1.7.0 -websockets==12.0 -Werkzeug==2.2.3 -widgetsnbextension==4.0.9 +Werkzeug==2.3.8 +widgetsnbextension==4.0.10 wrapt==1.16.0 WTForms==3.0.1 xlrd==2.0.1 diff --git a/docker/datahub-ingestion-base/entrypoint.sh b/docker/datahub-ingestion-base/entrypoint.sh index 518bb215614678..73cc9358d03c9b 100644 --- a/docker/datahub-ingestion-base/entrypoint.sh +++ b/docker/datahub-ingestion-base/entrypoint.sh @@ -1,10 +1,10 @@ #!/usr/bin/bash -if [ ! -z "$ACTIONS_EXTRA_PACKAGES" ]; then - pip install --user $ACTIONS_EXTRA_PACKAGES +if [ -n "$ACTIONS_EXTRA_PACKAGES" ]; then + uv pip install $ACTIONS_EXTRA_PACKAGES fi -if [[ ! -z "$ACTIONS_CONFIG" && ! -z "$ACTIONS_EXTRA_PACKAGES" ]]; then +if [[ -n "$ACTIONS_CONFIG" && -n "$ACTIONS_EXTRA_PACKAGES" ]]; then mkdir -p /tmp/datahub/logs curl -q "$ACTIONS_CONFIG" -o config.yaml exec dockerize -wait ${DATAHUB_GMS_PROTOCOL:-http}://$DATAHUB_GMS_HOST:$DATAHUB_GMS_PORT/health -timeout 240s \ diff --git a/docker/datahub-ingestion-base/regenerate-base-requirements.sh b/docker/datahub-ingestion-base/regenerate-base-requirements.sh index 6fb331afa484a3..13d74922d9013b 100755 --- a/docker/datahub-ingestion-base/regenerate-base-requirements.sh +++ b/docker/datahub-ingestion-base/regenerate-base-requirements.sh @@ -13,14 +13,12 @@ VENV_DIR=$(mktemp -d) python -c "import sys; assert sys.version_info >= (3, 9), 'Python 3.9 or higher is required.'" python -m venv $VENV_DIR source $VENV_DIR/bin/activate -pip install --upgrade pip setuptools wheel +pip install --upgrade pip uv setuptools wheel echo "Using virtualenv at $VENV_DIR" # Install stuff. pushd $DATAHUB_DIR/metadata-ingestion -pip install -e . -pip install -e '../metadata-ingestion-modules/airflow-plugin/[plugin-v2]' -pip install -e '.[all]' +uv pip install -e '.[all]' -e '../metadata-ingestion-modules/airflow-plugin/[plugin-v2]' popd # Generate the requirements file. @@ -31,6 +29,7 @@ popd echo "# Generated requirements file. Run ./$SCRIPT_NAME to regenerate." > base-requirements.txt pip freeze \ | grep -v -E "^-e" \ + | grep -v -E "^uv==" \ | grep -v "Flask-" \ | grep -v -E "(py4j|PyJWT)==" \ | grep -v -E "(pyspark|pydeequ)==" \ diff --git a/docker/datahub-ingestion/Dockerfile b/docker/datahub-ingestion/Dockerfile index 4f0e66251b1542..6c8829557837cf 100644 --- a/docker/datahub-ingestion/Dockerfile +++ b/docker/datahub-ingestion/Dockerfile @@ -6,26 +6,23 @@ ARG PIP_MIRROR_URL=null ARG DEBIAN_REPO_URL=https://deb.debian.org/debian FROM $BASE_IMAGE:$DOCKER_VERSION as base -USER 0 +USER datahub -COPY ./metadata-ingestion /datahub-ingestion -COPY ./metadata-ingestion-modules/airflow-plugin /datahub-ingestion/airflow-plugin +COPY --chown=datahub ./metadata-ingestion /datahub-ingestion +COPY --chown=datahub ./metadata-ingestion-modules/airflow-plugin /datahub-ingestion/airflow-plugin ARG RELEASE_VERSION WORKDIR /datahub-ingestion RUN sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" src/datahub/__init__.py && \ sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" airflow-plugin/src/datahub_airflow_plugin/__init__.py && \ - cat src/datahub/__init__.py && \ - chown -R datahub /datahub-ingestion - -USER datahub -ENV PATH="/datahub-ingestion/.local/bin:$PATH" + cat src/datahub/__init__.py | grep __version__ && \ + cat airflow-plugin/src/datahub_airflow_plugin/__init__.py | grep __version__ FROM base as slim-install ARG PIP_MIRROR_URL RUN if [ "${PIP_MIRROR_URL}" != "null" ] ; then pip config set global.index-url ${PIP_MIRROR_URL} ; fi -RUN pip install --no-cache --user ".[base,datahub-rest,datahub-kafka,snowflake,bigquery,redshift,mysql,postgres,hive,clickhouse,glue,dbt,looker,lookml,tableau,powerbi,superset,datahub-business-glossary]" +RUN uv pip install --no-cache "acryl-datahub[base,datahub-rest,datahub-kafka,snowflake,bigquery,redshift,mysql,postgres,hive,clickhouse,glue,dbt,looker,lookml,tableau,powerbi,superset,datahub-business-glossary] @ ." FROM base as full-install-build ARG PIP_MIRROR_URL @@ -39,14 +36,13 @@ USER datahub COPY ./docker/datahub-ingestion/pyspark_jars.sh . RUN if [ "${PIP_MIRROR_URL}" != "null" ] ; then pip config set global.index-url ${PIP_MIRROR_URL} ; fi -RUN pip install --no-cache --user ".[base]" && \ - pip install --no-cache --user "./airflow-plugin[acryl-datahub-airflow-plugin]" && \ - pip install --no-cache --user ".[all]" +RUN uv pip install --no-cache "acryl-datahub[base,all] @ ." "acryl-datahub-airflow-plugin[plugin-v2] @ ./airflow-plugin" && \ + datahub --version RUN ./pyspark_jars.sh FROM base as full-install -COPY --from=full-install-build /datahub-ingestion/.local /datahub-ingestion/.local +COPY --from=full-install-build ${VIRTUAL_ENV} ${VIRTUAL_ENV} FROM base as dev-install # Dummy stage for development. Assumes code is built on your machine and mounted to this image. @@ -55,4 +51,3 @@ FROM base as dev-install FROM ${APP_ENV}-install as final USER datahub -ENV PATH="/datahub-ingestion/.local/bin:$PATH" diff --git a/docker/datahub-ingestion/Dockerfile-slim-only b/docker/datahub-ingestion/Dockerfile-slim-only index 24412958a2d08c..ba43bd3c3c6be7 100644 --- a/docker/datahub-ingestion/Dockerfile-slim-only +++ b/docker/datahub-ingestion/Dockerfile-slim-only @@ -15,16 +15,15 @@ RUN sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEAS chown -R datahub /datahub-ingestion USER datahub -ENV PATH="/datahub-ingestion/.local/bin:$PATH" FROM base as slim-install ARG PIP_MIRROR_URL RUN if [ "${PIP_MIRROR_URL}" != "null" ] ; then pip config set global.index-url ${PIP_MIRROR_URL} ; fi -RUN pip install --no-cache --user ".[base,datahub-rest,datahub-kafka,snowflake,bigquery,redshift,mysql,postgres,hive,clickhouse,glue,dbt,looker,lookml,tableau,powerbi,superset,datahub-business-glossary]" +RUN uv pip install --no-cache "acryl-datahub[base,datahub-rest,datahub-kafka,snowflake,bigquery,redshift,mysql,postgres,hive,clickhouse,glue,dbt,looker,lookml,tableau,powerbi,superset,datahub-business-glossary] @ ." && \ + datahub --version FROM slim-install as final USER datahub -ENV PATH="/datahub-ingestion/.local/bin:$PATH" diff --git a/docker/datahub-ingestion/pyspark_jars.sh b/docker/datahub-ingestion/pyspark_jars.sh index ab4b223f0358a5..833c3079b82df2 100755 --- a/docker/datahub-ingestion/pyspark_jars.sh +++ b/docker/datahub-ingestion/pyspark_jars.sh @@ -2,7 +2,7 @@ set -ex -PYSPARK_JARS="$(python -m site --user-site)/pyspark/jars" +PYSPARK_JARS="$(python -c 'import site; print(site.getsitepackages()[0])')/pyspark/jars" function replace_jar { JAR_PREFIX=$1 diff --git a/metadata-ingestion-modules/airflow-plugin/build.gradle b/metadata-ingestion-modules/airflow-plugin/build.gradle index 9555f92c8831dd..554da819af41d4 100644 --- a/metadata-ingestion-modules/airflow-plugin/build.gradle +++ b/metadata-ingestion-modules/airflow-plugin/build.gradle @@ -18,7 +18,7 @@ if (extra_pip_extras != "") { ext.extra_pip_extras = "," + extra_pip_extras } -def pip_install_command = "${venv_name}/bin/pip install -e ../../metadata-ingestion" +def pip_install_command = "VIRTUAL_ENV=${venv_name} ${venv_name}/bin/uv pip install -e ../../metadata-ingestion" task checkPythonVersion(type: Exec) { commandLine python_executable, '-c', 'import sys; assert sys.version_info >= (3, 7)' @@ -29,8 +29,8 @@ task environmentSetup(type: Exec, dependsOn: checkPythonVersion) { inputs.file file('setup.py') outputs.file(sentinel_file) commandLine 'bash', '-c', - "${python_executable} -m venv ${venv_name} &&" + - "${venv_name}/bin/python -m pip install --upgrade pip wheel 'setuptools>=63.0.0' && " + + "${python_executable} -m venv ${venv_name} && set -x && " + + "${venv_name}/bin/python -m pip install --upgrade pip uv wheel 'setuptools>=63.0.0' && " + "touch ${sentinel_file}" } @@ -41,8 +41,9 @@ task installPackage(type: Exec, dependsOn: [environmentSetup, ':metadata-ingesti // Workaround for https://github.com/yaml/pyyaml/issues/601. // See https://github.com/yaml/pyyaml/issues/601#issuecomment-1638509577. // and https://github.com/datahub-project/datahub/pull/8435. - commandLine 'bash', '-x', '-c', - "${pip_install_command} install 'Cython<3.0' 'PyYAML<6' --no-build-isolation && " + + commandLine 'bash', '-c', + "source ${venv_name}/bin/activate && set -x && " + + "pip install 'Cython<3.0' 'PyYAML<6' --no-build-isolation && " + "${pip_install_command} -e .[ignore${extra_pip_extras}] ${extra_pip_requirements} &&" + "touch ${sentinel_file}" } @@ -53,7 +54,8 @@ task installDev(type: Exec, dependsOn: [install]) { def sentinel_file = "${venv_name}/.build_install_dev_sentinel" inputs.file file('setup.py') outputs.file("${sentinel_file}") - commandLine 'bash', '-x', '-c', + commandLine 'bash', '-c', + "source ${venv_name}/bin/activate && set -x && " + "${pip_install_command} -e .[dev${extra_pip_extras}] ${extra_pip_requirements} && " + "touch ${sentinel_file}" } @@ -72,8 +74,8 @@ task lint(type: Exec, dependsOn: installDev) { "mypy --show-traceback --show-error-codes src/ tests/" } task lintFix(type: Exec, dependsOn: installDev) { - commandLine 'bash', '-x', '-c', - "source ${venv_name}/bin/activate && " + + commandLine 'bash', '-c', + "source ${venv_name}/bin/activate && set -x && " + "black src/ tests/ && " + "isort src/ tests/ && " + "flake8 src/ tests/ && " + @@ -85,30 +87,18 @@ task installDevTest(type: Exec, dependsOn: [installDev]) { inputs.file file('setup.py') outputs.dir("${venv_name}") outputs.file("${sentinel_file}") - commandLine 'bash', '-x', '-c', + commandLine 'bash', '-c', + "source ${venv_name}/bin/activate && set -x && " + "${pip_install_command} -e .[dev,integration-tests${extra_pip_extras}] ${extra_pip_requirements} && " + "touch ${sentinel_file}" } -def testFile = hasProperty('testFile') ? testFile : 'unknown' -task testSingle(dependsOn: [installDevTest]) { - doLast { - if (testFile != 'unknown') { - exec { - commandLine 'bash', '-x', '-c', - "source ${venv_name}/bin/activate && pytest ${testFile}" - } - } else { - throw new GradleException("No file provided. Use -PtestFile=") - } - } -} - task testQuick(type: Exec, dependsOn: installDevTest) { inputs.files(project.fileTree(dir: "src/", include: "**/*.py")) inputs.files(project.fileTree(dir: "tests/")) - commandLine 'bash', '-x', '-c', - "source ${venv_name}/bin/activate && pytest --cov-config=setup.cfg --cov-report xml:coverage_quick.xml -vv --continue-on-collection-errors --junit-xml=junit.quick.xml" + commandLine 'bash', '-c', + "source ${venv_name}/bin/activate && set -x && " + + "pytest --cov-config=setup.cfg --cov-report xml:coverage_quick.xml -vv --continue-on-collection-errors --junit-xml=junit.quick.xml" } @@ -117,7 +107,9 @@ task cleanPythonCache(type: Exec) { "find src -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete -o -type d -empty -delete" } task buildWheel(type: Exec, dependsOn: [install, cleanPythonCache]) { - commandLine 'bash', '-c', "source ${venv_name}/bin/activate && " + 'pip install build && RELEASE_VERSION="\${RELEASE_VERSION:-0.0.0.dev1}" RELEASE_SKIP_TEST=1 RELEASE_SKIP_UPLOAD=1 ./scripts/release.sh' + commandLine 'bash', '-c', + "source ${venv_name}/bin/activate && set -x && " + + 'pip install build && RELEASE_VERSION="\${RELEASE_VERSION:-0.0.0.dev1}" RELEASE_SKIP_TEST=1 RELEASE_SKIP_UPLOAD=1 ./scripts/release.sh' } build.dependsOn install diff --git a/metadata-ingestion-modules/airflow-plugin/setup.py b/metadata-ingestion-modules/airflow-plugin/setup.py index 1a3e844cedc1ff..40e1e9eb03ac0a 100644 --- a/metadata-ingestion-modules/airflow-plugin/setup.py +++ b/metadata-ingestion-modules/airflow-plugin/setup.py @@ -15,7 +15,9 @@ def get_long_description(): _version: str = package_metadata["__version__"] -_self_pin = f"=={_version}" if not _version.endswith("dev0") else "" +_self_pin = ( + f"=={_version}" if not (_version.endswith("dev0") or "docker" in _version) else "" +) base_requirements = { @@ -163,6 +165,7 @@ def get_long_description(): # Dependencies. install_requires=list(base_requirements), extras_require={ + "ignore": [], # This is a dummy extra to allow for trailing commas in the list. **{plugin: list(dependencies) for plugin, dependencies in plugins.items()}, "dev": list(dev_requirements), "integration-tests": list(integration_test_requirements), diff --git a/metadata-ingestion/build.gradle b/metadata-ingestion/build.gradle index f493e33ce596c8..1aae02ab8bede9 100644 --- a/metadata-ingestion/build.gradle +++ b/metadata-ingestion/build.gradle @@ -26,7 +26,7 @@ task environmentSetup(type: Exec, dependsOn: checkPythonVersion) { outputs.file(sentinel_file) commandLine 'bash', '-c', "${python_executable} -m venv ${venv_name} && " + - "${venv_name}/bin/python -m pip install --upgrade pip wheel 'setuptools>=63.0.0' && " + + "${venv_name}/bin/python -m pip install --upgrade pip uv wheel 'setuptools>=63.0.0' && " + "touch ${sentinel_file}" } @@ -43,7 +43,8 @@ task installPackageOnly(type: Exec, dependsOn: runPreFlightScript) { inputs.file file('setup.py') outputs.file(sentinel_file) commandLine 'bash', '-x', '-c', - "${venv_name}/bin/pip install -e . &&" + + "source ${venv_name}/bin/activate && " + + "uv pip install -e . &&" + "touch ${sentinel_file}" } @@ -52,7 +53,8 @@ task installPackage(type: Exec, dependsOn: installPackageOnly) { inputs.file file('setup.py') outputs.file(sentinel_file) commandLine 'bash', '-x', '-c', - "${venv_name}/bin/pip install -e . ${extra_pip_requirements} && " + + "source ${venv_name}/bin/activate && " + + "uv pip install -e . ${extra_pip_requirements} && " + "touch ${sentinel_file}" } @@ -70,7 +72,7 @@ task customPackageGenerate(type: Exec, dependsOn: [environmentSetup, installPack def package_version = project.findProperty('package_version') commandLine 'bash', '-c', "source ${venv_name}/bin/activate && " + - "pip install build && " + + "uv pip install build && " + "./scripts/custom_package_codegen.sh '${package_name}' '${package_version}'" } @@ -82,7 +84,7 @@ task installDev(type: Exec, dependsOn: [install]) { outputs.file(sentinel_file) commandLine 'bash', '-c', "source ${venv_name}/bin/activate && set -x && " + - "${venv_name}/bin/pip install -e .[dev] ${extra_pip_requirements} && " + + "uv pip install -e .[dev] ${extra_pip_requirements} && " + "touch ${sentinel_file}" } @@ -92,7 +94,7 @@ task installAll(type: Exec, dependsOn: [install]) { outputs.file(sentinel_file) commandLine 'bash', '-c', "source ${venv_name}/bin/activate && set -x && " + - "${venv_name}/bin/pip install -e .[all] ${extra_pip_requirements} && " + + "uv pip install -e .[all] ${extra_pip_requirements} && " + "touch ${sentinel_file}" } @@ -142,7 +144,8 @@ task testQuick(type: Exec, dependsOn: [installDev, ':metadata-models:generateJso outputs.dir("${venv_name}") def cvg_arg = get_coverage_arg("quick") commandLine 'bash', '-c', - "source ${venv_name}/bin/activate && pytest ${cvg_arg} tests/unit --random-order --durations=20 -m 'not integration' -vv --continue-on-collection-errors --junit-xml=junit.quick.xml" + "source ${venv_name}/bin/activate && set -x && " + + "pytest ${cvg_arg} tests/unit --random-order --durations=20 -m 'not integration' -vv --continue-on-collection-errors --junit-xml=junit.quick.xml" } task installDevTest(type: Exec, dependsOn: [install]) { @@ -151,7 +154,9 @@ task installDevTest(type: Exec, dependsOn: [install]) { outputs.dir("${venv_name}") outputs.file(sentinel_file) commandLine 'bash', '-c', - "${venv_name}/bin/pip install -e .[dev,integration-tests] ${extra_pip_requirements} && touch ${sentinel_file}" + "source ${venv_name}/bin/activate && set -x && " + + "uv pip install -e .[dev,integration-tests] ${extra_pip_requirements} && " + + "touch ${sentinel_file}" } def testFile = hasProperty('testFile') ? testFile : 'unknown' @@ -171,22 +176,26 @@ task testSingle(dependsOn: [installDevTest]) { task testIntegrationBatch0(type: Exec, dependsOn: [installDevTest]) { def cvg_arg = get_coverage_arg("intBatch0") commandLine 'bash', '-c', - "source ${venv_name}/bin/activate && pytest ${cvg_arg} --durations=50 -m 'integration_batch_0' -vv --continue-on-collection-errors --junit-xml=junit.integrationbatch0.xml" + "source ${venv_name}/bin/activate && set -x && " + + "pytest ${cvg_arg} --durations=50 -m 'integration_batch_0' -vv --continue-on-collection-errors --junit-xml=junit.integrationbatch0.xml" } task testIntegrationBatch1(type: Exec, dependsOn: [installDevTest]) { def cvg_arg = get_coverage_arg("intBatch1") commandLine 'bash', '-c', - "source ${venv_name}/bin/activate && pytest ${cvg_arg} --durations=50 -m 'integration_batch_1' -vv --continue-on-collection-errors --junit-xml=junit.integrationbatch1.xml" + "source ${venv_name}/bin/activate && set -x && " + + "pytest ${cvg_arg} --durations=50 -m 'integration_batch_1' -vv --continue-on-collection-errors --junit-xml=junit.integrationbatch1.xml" } task testIntegrationBatch2(type: Exec, dependsOn: [installDevTest]) { def cvg_arg = get_coverage_arg("intBatch2") commandLine 'bash', '-c', - "source ${venv_name}/bin/activate && pytest ${cvg_arg} --durations=20 -m 'integration_batch_2' -vv --continue-on-collection-errors --junit-xml=junit.integrationbatch2.xml" + "source ${venv_name}/bin/activate && set -x && " + + "pytest ${cvg_arg} --durations=20 -m 'integration_batch_2' -vv --continue-on-collection-errors --junit-xml=junit.integrationbatch2.xml" } task testFull(type: Exec, dependsOn: [installDevTest]) { commandLine 'bash', '-c', - "source ${venv_name}/bin/activate && pytest --durations=50 -vv --continue-on-collection-errors --junit-xml=junit.full.xml" + "source ${venv_name}/bin/activate && set -x && " + + "pytest --durations=50 -vv --continue-on-collection-errors --junit-xml=junit.full.xml" } task specGen(type: Exec, dependsOn: [codegen, installDevTest]) { @@ -203,7 +212,7 @@ task cleanPythonCache(type: Exec) { "find src tests -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete -o -type d -empty -delete" } task buildWheel(type: Exec, dependsOn: [install, codegen, cleanPythonCache]) { - commandLine 'bash', '-c', "source ${venv_name}/bin/activate && " + 'pip install build && RELEASE_VERSION="\${RELEASE_VERSION:-0.0.0.dev1}" RELEASE_SKIP_TEST=1 RELEASE_SKIP_UPLOAD=1 ./scripts/release.sh' + commandLine 'bash', '-c', "source ${venv_name}/bin/activate && " + 'uv pip install build && RELEASE_VERSION="\${RELEASE_VERSION:-0.0.0.dev1}" RELEASE_SKIP_TEST=1 RELEASE_SKIP_UPLOAD=1 ./scripts/release.sh' } build.dependsOn install diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 3c6aafec261851..f29869638f3f21 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -6,6 +6,10 @@ with open("./src/datahub/__init__.py") as fp: exec(fp.read(), package_metadata) +_version: str = package_metadata["__version__"] +_self_pin = ( + f"=={_version}" if not (_version.endswith("dev0") or "docker" in _version) else "" +) base_requirements = { # Typing extension should be >=3.10.0.2 ideally but we can't restrict due to a Airflow 2.1 dependency conflict. @@ -17,7 +21,7 @@ # pydantic 1.10.3 is incompatible with typing-extensions 4.1.1 - https://github.com/pydantic/pydantic/issues/4885 "pydantic>=1.10.0,!=1.10.3", "mixpanel>=4.9.0", - "sentry-sdk", + "sentry-sdk>=1.40.5", } framework_common = { @@ -272,7 +276,7 @@ }, # Integrations. "airflow": { - f"acryl-datahub-airflow-plugin == {package_metadata['__version__']}", + f"acryl-datahub-airflow-plugin{_self_pin}", }, "circuit-breaker": { "gql>=3.3.0", @@ -398,12 +402,18 @@ # This is mainly used to exclude plugins from the Docker image. all_exclude_plugins: Set[str] = { + # The Airflow extra is only retained for compatibility, but new users should + # be using the datahub-airflow-plugin package instead. + "airflow", # SQL Server ODBC requires additional drivers, and so we don't want to keep # it included in the default "all" installation. "mssql-odbc", # duckdb doesn't have a prebuilt wheel for Linux arm7l or aarch64, so we # simply exclude it. "datahub-lite", + # Feast tends to have overly restrictive dependencies and hence doesn't + # play nice with the "all" installation. + "feast", } mypy_stubs = { @@ -678,7 +688,7 @@ setuptools.setup( # Package metadata. name=package_metadata["__package_name__"], - version=package_metadata["__version__"], + version=_version, url="https://datahubproject.io/", project_urls={ "Documentation": "https://datahubproject.io/docs/", diff --git a/smoke-test/build.gradle b/smoke-test/build.gradle index 3cba93c452a101..2cedb3c7570b81 100644 --- a/smoke-test/build.gradle +++ b/smoke-test/build.gradle @@ -49,10 +49,12 @@ task installDev(type: Exec) { inputs.file file('pyproject.toml') inputs.file file('requirements.txt') outputs.file("${venv_name}/.build_install_dev_sentinel") - commandLine 'bash', '-x', '-c', + commandLine 'bash', '-c', + "set -x && " + "${python_executable} -m venv ${venv_name} && " + - "${venv_name}/bin/pip install --upgrade pip wheel setuptools && " + - "${venv_name}/bin/pip install -r requirements.txt && " + + "${venv_name}/bin/python -m pip install --upgrade pip uv wheel setuptools && " + + "set +x && source ${venv_name}/bin/activate && set -x && " + + "uv pip install -r requirements.txt && " + "touch ${venv_name}/.build_install_dev_sentinel" }