Skip to content

Commit

Permalink
python312Packages.mlflow: 2.14.3 -> 2.16.2 (#347104)
Browse files Browse the repository at this point in the history
  • Loading branch information
vcunat committed Oct 13, 2024
2 parents 8528157 + 8e03a09 commit fba3090
Show file tree
Hide file tree
Showing 4 changed files with 225 additions and 63 deletions.
85 changes: 85 additions & 0 deletions pkgs/development/python-modules/databricks-sdk/default.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
{
lib,
stdenv,
buildPythonPackage,
fetchFromGitHub,

# build-system
setuptools,

# dependencies
google-auth,
requests,

# tests
pyfakefs,
pytestCheckHook,
pytest-mock,
requests-mock,
}:

buildPythonPackage rec {
pname = "databricks-sdk";
version = "0.34.0";
pyproject = true;

src = fetchFromGitHub {
owner = "databricks";
repo = "databricks-sdk-py";
rev = "refs/tags/v${version}";
hash = "sha256-pbOm1aTHtIAwk/TJ5CCT9/CqSTuHTWkRgJuflObkU54=";
};

build-system = [
setuptools
];

dependencies = [
google-auth
requests
];

pythonImportsCheck = [
"databricks.sdk"
];

nativeCheckInputs = [
pyfakefs
pytestCheckHook
pytest-mock
requests-mock
];

disabledTests =
[
# Require internet access
# ValueError: default auth: cannot configure default credentials, please chec...
"test_azure_cli_does_not_specify_tenant_id_with_msi"
"test_azure_cli_fallback"
"test_azure_cli_user_no_management_access"
"test_azure_cli_user_with_management_access"
"test_azure_cli_with_warning_on_stderr"
"test_azure_cli_workspace_header_present"
"test_config_azure_cli_host"
"test_config_azure_cli_host_and_resource_id"
"test_config_azure_cli_host_and_resource_i_d_configuration_precedence"
"test_load_azure_tenant_id_404"
"test_load_azure_tenant_id_happy_path"
"test_load_azure_tenant_id_no_location_header"
"test_load_azure_tenant_id_unparsable_location_header"
]
++ lib.optionals (stdenv.hostPlatform.isDarwin && stdenv.hostPlatform.isx86_64) [
# requests.exceptions.ChunkedEncodingError: ("Connection broken: ConnectionResetError(54, 'Connection reset by peer')", ConnectionResetError(54, 'Connection reset by peer'))
"test_github_oidc_flow_works_with_azure"
];

__darwinAllowLocalNetworking = true;

meta = {
description = "Databricks SDK for Python";
homepage = "https://github.com/databricks/databricks-sdk-py";
changelog = "https://github.com/databricks/databricks-sdk-py/blob/${src.rev}/CHANGELOG.md";
license = lib.licenses.asl20;
maintainers = with lib.maintainers; [ GaetanLepage ];
};
}
158 changes: 115 additions & 43 deletions pkgs/development/python-modules/mlflow/default.nix
Original file line number Diff line number Diff line change
@@ -1,118 +1,190 @@
{
lib,
fetchFromGitHub,

# build-system
setuptools,

# dependencies
alembic,
buildPythonPackage,
cachetools,
click,
cloudpickle,
databricks-cli,
databricks-sdk,
docker,
entrypoints,
fetchPypi,
flask,
gitpython,
gorilla,
graphene,
gunicorn,
importlib-metadata,
jinja2,
markdown,
matplotlib,
numpy,
opentelemetry-api,
opentelemetry-sdk,
packaging,
pandas,
prometheus-flask-exporter,
protobuf,
python-dateutil,
pythonOlder,
pyarrow,
pytz,
pyyaml,
querystring-parser,
requests,
setuptools,
scikit-learn,
scipy,
simplejson,
sqlalchemy,
sqlparse,

# tests
aiohttp,
azure-core,
azure-storage-blob,
azure-storage-file,
boto3,
botocore,
catboost,
datasets,
fastapi,
google-cloud-storage,
httpx,
jwt,
keras,
langchain,
librosa,
moto,
opentelemetry-exporter-otlp,
optuna,
pydantic,
pyspark,
pytestCheckHook,
pytorch-lightning,
sentence-transformers,
starlette,
statsmodels,
tensorflow,
torch,
transformers,
uvicorn,
xgboost,
}:

buildPythonPackage rec {
pname = "mlflow";
version = "2.14.3";
version = "2.16.2";
pyproject = true;

disabled = pythonOlder "3.8";

src = fetchPypi {
inherit pname version;
hash = "sha256-KSyuS4NXSgyyIxF+IkyqZ5iTMHivAjNxnCthK+pkVhc=";
src = fetchFromGitHub {
owner = "mlflow";
repo = "mlflow";
rev = "refs/tags/v${version}";
hash = "sha256-7W1gpVgJSN/iXoW987eCHfcOeE3D/ZJ2W/eilDdzOww=";
};

# Remove currently broken dependency `shap`, a model explainability package.
# This seems quite unprincipled especially with tests not being enabled,
# but not mlflow has a 'skinny' install option which does not require `shap`.
pythonRemoveDeps = [ "shap" ];
pythonRelaxDeps = [
"gunicorn"
"packaging"
"pytz"
"pyarrow"
build-system = [
setuptools
];

propagatedBuildInputs = [
dependencies = [
alembic
cachetools
click
cloudpickle
databricks-cli
databricks-sdk
docker
entrypoints
flask
gitpython
gorilla
graphene
gunicorn
importlib-metadata
jinja2
markdown
matplotlib
numpy
opentelemetry-api
opentelemetry-sdk
packaging
pandas
prometheus-flask-exporter
protobuf
pyarrow
python-dateutil
pytz
pyyaml
querystring-parser
requests
scikit-learn
scipy
setuptools
#shap
simplejson
sqlalchemy
sqlparse
];

pythonImportsCheck = [ "mlflow" ];

# no tests in PyPI dist
# run into https://stackoverflow.com/questions/51203641/attributeerror-module-alembic-context-has-no-attribute-config
# also, tests use conda so can't run on NixOS without buildFHSEnv
nativeCheckInputs = [
aiohttp
azure-core
azure-storage-blob
azure-storage-file
boto3
botocore
catboost
datasets
fastapi
google-cloud-storage
httpx
jwt
keras
langchain
librosa
moto
opentelemetry-exporter-otlp
optuna
pydantic
pyspark
pytestCheckHook
pytorch-lightning
sentence-transformers
starlette
statsmodels
tensorflow
torch
transformers
uvicorn
xgboost
];

disabledTestPaths = [
# Requires unpackaged `autogen`
"tests/autogen/test_autogen_autolog.py"

# Requires unpackaged `diviner`
"tests/diviner/test_diviner_model_export.py"

# Requires unpackaged `sktime`
"examples/sktime/test_sktime_model_export.py"

# Requires `fastai` which would cause a circular dependency
"tests/fastai/test_fastai_autolog.py"
"tests/fastai/test_fastai_model_export.py"

# Requires `spacy` which would cause a circular dependency
"tests/spacy/test_spacy_model_export.py"

# Requires `tensorflow.keras` which is not included in our outdated version of `tensorflow` (2.13.0)
"tests/gateway/providers/test_ai21labs.py"
"tests/tensorflow/test_keras_model_export.py"
"tests/tensorflow/test_keras_pyfunc_model_works_with_all_input_types.py"
"tests/tensorflow/test_mlflow_callback.py"
];

# I (@GaetanLepage) gave up at enabling tests:
# - They require a lot of dependencies (some unpackaged);
# - Many errors occur at collection time;
# - Most (all ?) tests require internet access anyway.
doCheck = false;

meta = with lib; {
meta = {
description = "Open source platform for the machine learning lifecycle";
mainProgram = "mlflow";
homepage = "https://github.com/mlflow/mlflow";
changelog = "https://github.com/mlflow/mlflow/blob/v${version}/CHANGELOG.md";
license = licenses.asl20;
maintainers = with maintainers; [ tbenst ];
license = lib.licenses.asl20;
maintainers = with lib.maintainers; [ tbenst ];
};
}
43 changes: 23 additions & 20 deletions pkgs/servers/mlflow-server/default.nix
Original file line number Diff line number Diff line change
@@ -1,36 +1,39 @@
{ python3, writeText}:
{ python3Packages, writers }:

let
py = python3.pkgs;
py = python3Packages;

gunicornScript = writers.writePython3 "gunicornMlflow" { } ''
import re
import sys
from gunicorn.app.wsgiapp import run
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', ''', sys.argv[0])
sys.exit(run())
'';
in
py.toPythonApplication
(py.mlflow.overridePythonAttrs(old: rec {
py.toPythonApplication (
py.mlflow.overridePythonAttrs (old: {

propagatedBuildInputs = old.propagatedBuildInputs ++ [
propagatedBuildInputs = old.dependencies ++ [
py.boto3
py.mysqlclient
];

postPatch = (old.postPatch or "") + ''
substituteInPlace mlflow/utils/process.py --replace \
"child = subprocess.Popen(cmd, env=cmd_env, cwd=cwd, universal_newlines=True," \
"cmd[0]='$out/bin/gunicornMlflow'; child = subprocess.Popen(cmd, env=cmd_env, cwd=cwd, universal_newlines=True,"
'';
postPatch =
(old.postPatch or "")
+ ''
cat mlflow/utils/process.py
gunicornScript = writeText "gunicornMlflow"
''
#!/usr/bin/env python
import re
import sys
from gunicorn.app.wsgiapp import run
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', ''', sys.argv[0])
sys.exit(run())
substituteInPlace mlflow/utils/process.py --replace-fail \
"process = subprocess.Popen(" \
"cmd[0]='${gunicornScript}'; process = subprocess.Popen("
'';

postInstall = ''
gpath=$out/bin/gunicornMlflow
cp ${gunicornScript} $gpath
chmod 555 $gpath
'';
}))
})
)
2 changes: 2 additions & 0 deletions pkgs/top-level/python-packages.nix
Original file line number Diff line number Diff line change
Expand Up @@ -2886,6 +2886,8 @@ self: super: with self; {

databricks-sql-connector = callPackage ../development/python-modules/databricks-sql-connector { };

databricks-sdk = callPackage ../development/python-modules/databricks-sdk { };

dataclass-factory = callPackage ../development/python-modules/dataclass-factory { };

dataclass-wizard = callPackage ../development/python-modules/dataclass-wizard { };
Expand Down

0 comments on commit fba3090

Please sign in to comment.