diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cfa2b332..41b8f8a0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -66,5 +66,7 @@ jobs: run: | mkdir ../temp cp README.md ../temp + mkdir ../temp/data + cp data/housing.parquet ../temp/data cd ../temp pixi run --manifest-path ../glum/pixi.toml -e ${{ matrix.environment }} python -m doctest -v README.md diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 118587a1..a2ee4f7b 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -7,10 +7,10 @@ Changelog ========= -UNRELEASED ----------- +3.1.1 - 2025-01-13 +------------------ -**Bug fix: +**Bug fix:** - Fixed a bug where :meth:`~glum.TweedieDistribution._rowwise_gradient_hessian` and :meth:`~glum.TweedieDistribution._eta_mu_deviance` would call functions with wrong arguments in the ``p = 3`` case. - Fixed :class:`glum.InverseGaussianDistribution` not using the optimized gradient, Hessian and deviance implementations, as well as those derivatives having the wrong sign. @@ -34,6 +34,7 @@ UNRELEASED - Fixed a bug where :meth:`glum.GeneralizedLinearRegressor.fit` would raise a ``dtype`` mismatch error if fit with ``alpha_search=True``. - Use data type (``float64`` or ``float32``) dependent precision in solvers. + 3.0.2 - 2024-06-25 ------------------ @@ -48,6 +49,7 @@ UNRELEASED - Removed libblas MKL from the development environment. - Replaced deprecated 'oldest-supported-numpy' dependency with 'numpy' to support 2.0 release. + 3.0.1 - 2024-05-23 ------------------ diff --git a/README.md b/README.md index 110cf195..e55c625d 100644 --- a/README.md +++ b/README.md @@ -33,15 +33,18 @@ Why did we choose the name `glum`? We wanted a name that had the letters GLM and # A classic example predicting housing prices ```python +>>> import pandas as pd >>> from sklearn.datasets import fetch_openml >>> from glum import GeneralizedLinearRegressor >>> >>> # This dataset contains house sale prices for King County, which includes >>> # Seattle. It includes homes sold between May 2014 and May 2015. ->>> house_data = fetch_openml(name="house_sales", version=3, as_frame=True) +>>> # The full version of this dataset can be found at: +>>> # https://www.openml.org/search?type=data&status=active&id=42092 +>>> house_data = pd.read_parquet("data/housing.parquet") >>> >>> # Use only select features ->>> X = house_data.data[ +>>> X = house_data[ ... [ ... "bedrooms", ... "bathrooms", @@ -59,7 +62,7 @@ Why did we choose the name `glum`? We wanted a name that had the letters GLM and >>> >>> # Model whether a house had an above or below median price via a Binomial >>> # distribution. We'll be doing L1-regularized logistic regression. ->>> price = house_data.target +>>> price = house_data["price"] >>> y = (price < price.median()).values.astype(int) >>> model = GeneralizedLinearRegressor( ... family='binomial', @@ -88,7 +91,7 @@ n_iter ... alpha=0.001, ... formula="bedrooms + np.log(bathrooms + 1) + bs(sqft_living, 3) + C(waterfront)" ... ) ->>> _ = model_formula.fit(X=house_data.data, y=y) +>>> _ = model_formula.fit(X=house_data, y=y) ``` diff --git a/pixi.toml b/pixi.toml index 31344b83..de08f5a7 100644 --- a/pixi.toml +++ b/pixi.toml @@ -13,8 +13,8 @@ store-benchmark-golden-master = "python tests/glm/test_benchmark_golden_master.p [feature.docs.tasks] make-docs = "cd docs && make html" -serve-docs = { cmd = "python -m http.server --directory docs/_build/html", depends_on = ["make-docs"] } -readthedocs = { cmd = "rm -rf $READTHEDOCS_OUTPUT/html && cp -r docs/_build/html $READTHEDOCS_OUTPUT/html", depends_on = ["make-docs"] } +serve-docs = { cmd = "python -m http.server --directory docs/_build/html", depends-on = ["make-docs"] } +readthedocs = { cmd = "rm -rf $READTHEDOCS_OUTPUT/html && cp -r docs/_build/html $READTHEDOCS_OUTPUT/html", depends-on = ["make-docs"] } [feature.benchmark.tasks] glm-benchmarks-run = "glm_benchmarks_run"