From 82a12f5afdbda07c9d0c467b20a5c8c9bb2d93ec Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Wed, 10 Apr 2024 08:47:42 +0100 Subject: [PATCH] feat: support weekend argument in business_day_count (#15544) --- crates/polars-ops/src/series/ops/business.rs | 17 ++++++-- .../src/dsl/function_expr/business.rs | 12 +++--- .../polars-plan/src/dsl/functions/business.rs | 4 +- py-polars/polars/functions/business.py | 29 ++++++++++--- py-polars/src/functions/business.rs | 4 +- .../time_series/test_business_day_count.py | 13 ++++-- .../business/test_business_day_count.py | 41 +++++++++++++++++++ 7 files changed, 98 insertions(+), 22 deletions(-) diff --git a/crates/polars-ops/src/series/ops/business.rs b/crates/polars-ops/src/series/ops/business.rs index 5b792453b5c4..115ccf8ae389 100644 --- a/crates/polars-ops/src/series/ops/business.rs +++ b/crates/polars-ops/src/series/ops/business.rs @@ -2,12 +2,21 @@ use polars_core::prelude::arity::binary_elementwise_values; use polars_core::prelude::*; /// Count the number of business days between `start` and `end`, excluding `end`. -pub fn business_day_count(start: &Series, end: &Series) -> PolarsResult { +/// +/// # Arguments +/// - `start`: Series holding start dates. +/// - `end`: Series holding end dates. +/// - `week_mask`: A boolean array of length 7, where `true` indicates that the day is a business day. +pub fn business_day_count( + start: &Series, + end: &Series, + week_mask: [bool; 7], +) -> PolarsResult { + if !week_mask.iter().any(|&x| x) { + polars_bail!(ComputeError:"`week_mask` must have at least one business day"); + } let start_dates = start.date()?; let end_dates = end.date()?; - - // TODO: support customising weekdays - let week_mask: [bool; 7] = [true, true, true, true, true, false, false]; let n_business_days_in_week_mask = week_mask.iter().filter(|&x| *x).count() as i32; let out = match (start_dates.len(), end_dates.len()) { diff --git a/crates/polars-plan/src/dsl/function_expr/business.rs b/crates/polars-plan/src/dsl/function_expr/business.rs index f9a38b1165cc..745dcfdff8f5 100644 --- a/crates/polars-plan/src/dsl/function_expr/business.rs +++ b/crates/polars-plan/src/dsl/function_expr/business.rs @@ -12,7 +12,7 @@ use crate::prelude::SeriesUdf; #[derive(Clone, PartialEq, Debug, Eq, Hash)] pub enum BusinessFunction { #[cfg(feature = "business")] - BusinessDayCount, + BusinessDayCount { week_mask: [bool; 7] }, } impl Display for BusinessFunction { @@ -20,7 +20,7 @@ impl Display for BusinessFunction { use BusinessFunction::*; let s = match self { #[cfg(feature = "business")] - &BusinessDayCount => "business_day_count", + &BusinessDayCount { .. } => "business_day_count", }; write!(f, "{s}") } @@ -30,16 +30,16 @@ impl From for SpecialEq> { use BusinessFunction::*; match func { #[cfg(feature = "business")] - BusinessDayCount => { - map_as_slice!(business_day_count) + BusinessDayCount { week_mask } => { + map_as_slice!(business_day_count, week_mask) }, } } } #[cfg(feature = "business")] -pub(super) fn business_day_count(s: &[Series]) -> PolarsResult { +pub(super) fn business_day_count(s: &[Series], week_mask: [bool; 7]) -> PolarsResult { let start = &s[0]; let end = &s[1]; - polars_ops::prelude::business_day_count(start, end) + polars_ops::prelude::business_day_count(start, end, week_mask) } diff --git a/crates/polars-plan/src/dsl/functions/business.rs b/crates/polars-plan/src/dsl/functions/business.rs index 4bfdcc0b20cc..0a0210ced57f 100644 --- a/crates/polars-plan/src/dsl/functions/business.rs +++ b/crates/polars-plan/src/dsl/functions/business.rs @@ -1,12 +1,12 @@ use super::*; #[cfg(feature = "dtype-date")] -pub fn business_day_count(start: Expr, end: Expr) -> Expr { +pub fn business_day_count(start: Expr, end: Expr, week_mask: [bool; 7]) -> Expr { let input = vec![start, end]; Expr::Function { input, - function: FunctionExpr::Business(BusinessFunction::BusinessDayCount {}), + function: FunctionExpr::Business(BusinessFunction::BusinessDayCount { week_mask }), options: FunctionOptions { allow_rename: true, ..Default::default() diff --git a/py-polars/polars/functions/business.py b/py-polars/polars/functions/business.py index ae5791fde2a6..125bda15113e 100644 --- a/py-polars/polars/functions/business.py +++ b/py-polars/polars/functions/business.py @@ -1,7 +1,7 @@ from __future__ import annotations import contextlib -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Iterable from polars._utils.parse_expr_input import parse_as_expression from polars._utils.wrap import wrap_expr @@ -19,19 +19,21 @@ def business_day_count( start: date | IntoExprColumn, end: date | IntoExprColumn, + week_mask: Iterable[bool] = (True, True, True, True, True, False, False), ) -> Expr: """ Count the number of business days between `start` and `end` (not including `end`). - By default, Saturday and Sunday are excluded. The ability to - customise week mask and holidays is not yet implemented. - Parameters ---------- start Start dates. end End dates. + week_mask + Which days of the week to count. The default is Monday to Friday. + If you wanted to count only Monday to Thursday, you would pass + `(True, True, True, True, False, False, False)`. Returns ------- @@ -62,7 +64,24 @@ def business_day_count( Note how the two "count" columns differ due to the weekend (2020-01-04 - 2020-01-05) not being counted by `business_day_count`. + + You can pass a custom weekend - for example, if you only take Sunday off: + + >>> week_mask = (True, True, True, True, True, True, False) + >>> df.with_columns( + ... total_day_count=(pl.col("end") - pl.col("start")).dt.total_days(), + ... business_day_count=pl.business_day_count("start", "end", week_mask), + ... ) + shape: (2, 4) + ┌────────────┬────────────┬─────────────────┬────────────────────┐ + │ start ┆ end ┆ total_day_count ┆ business_day_count │ + │ --- ┆ --- ┆ --- ┆ --- │ + │ date ┆ date ┆ i64 ┆ i32 │ + ╞════════════╪════════════╪═════════════════╪════════════════════╡ + │ 2020-01-01 ┆ 2020-01-02 ┆ 1 ┆ 1 │ + │ 2020-01-02 ┆ 2020-01-10 ┆ 8 ┆ 7 │ + └────────────┴────────────┴─────────────────┴────────────────────┘ """ start_pyexpr = parse_as_expression(start) end_pyexpr = parse_as_expression(end) - return wrap_expr(plr.business_day_count(start_pyexpr, end_pyexpr)) + return wrap_expr(plr.business_day_count(start_pyexpr, end_pyexpr, week_mask)) diff --git a/py-polars/src/functions/business.rs b/py-polars/src/functions/business.rs index 246f902b895a..0ca6ec058d4a 100644 --- a/py-polars/src/functions/business.rs +++ b/py-polars/src/functions/business.rs @@ -4,8 +4,8 @@ use pyo3::prelude::*; use crate::PyExpr; #[pyfunction] -pub fn business_day_count(start: PyExpr, end: PyExpr) -> PyExpr { +pub fn business_day_count(start: PyExpr, end: PyExpr, week_mask: [bool; 7]) -> PyExpr { let start = start.inner; let end = end.inner; - dsl::business_day_count(start, end).into() + dsl::business_day_count(start, end, week_mask).into() } diff --git a/py-polars/tests/parametric/time_series/test_business_day_count.py b/py-polars/tests/parametric/time_series/test_business_day_count.py index 0cb1bf95df33..7d9d61fbc5cb 100644 --- a/py-polars/tests/parametric/time_series/test_business_day_count.py +++ b/py-polars/tests/parametric/time_series/test_business_day_count.py @@ -4,7 +4,7 @@ import hypothesis.strategies as st import numpy as np -from hypothesis import given, reject +from hypothesis import assume, given, reject import polars as pl from polars._utils.various import parse_version @@ -13,17 +13,24 @@ @given( start=st.dates(min_value=dt.date(1969, 1, 1), max_value=dt.date(1970, 12, 31)), end=st.dates(min_value=dt.date(1969, 1, 1), max_value=dt.date(1970, 12, 31)), + week_mask=st.lists( + st.sampled_from([True, False]), + min_size=7, + max_size=7, + ), ) def test_against_np_busday_count( start: dt.date, end: dt.date, + week_mask: tuple[bool, ...], ) -> None: + assume(any(week_mask)) result = ( pl.DataFrame({"start": [start], "end": [end]}) - .select(n=pl.business_day_count("start", "end"))["n"] + .select(n=pl.business_day_count("start", "end", week_mask=week_mask))["n"] .item() ) - expected = np.busday_count(start, end) + expected = np.busday_count(start, end, weekmask=week_mask) if start > end and parse_version(np.__version__) < parse_version("1.25"): # Bug in old versions of numpy reject() diff --git a/py-polars/tests/unit/functions/business/test_business_day_count.py b/py-polars/tests/unit/functions/business/test_business_day_count.py index 74befbd3268b..13a1a05dbb7b 100644 --- a/py-polars/tests/unit/functions/business/test_business_day_count.py +++ b/py-polars/tests/unit/functions/business/test_business_day_count.py @@ -1,5 +1,7 @@ from datetime import date +import pytest + import polars as pl from polars.testing import assert_series_equal @@ -50,6 +52,45 @@ def test_business_day_count() -> None: assert_series_equal(result, expected) +def test_business_day_count_w_week_mask() -> None: + df = pl.DataFrame( + { + "start": [date(2020, 1, 1), date(2020, 1, 2)], + "end": [date(2020, 1, 2), date(2020, 1, 10)], + } + ) + result = df.select( + business_day_count=pl.business_day_count( + "start", "end", week_mask=(True, True, True, True, True, True, False) + ), + )["business_day_count"] + expected = pl.Series("business_day_count", [1, 7], pl.Int32) + assert_series_equal(result, expected) + + result = df.select( + business_day_count=pl.business_day_count( + "start", "end", week_mask=(True, True, True, False, False, False, True) + ), + )["business_day_count"] + expected = pl.Series("business_day_count", [1, 4], pl.Int32) + assert_series_equal(result, expected) + + +def test_business_day_count_w_week_mask_invalid() -> None: + with pytest.raises(ValueError, match=r"expected a sequence of length 7 \(got 2\)"): + pl.business_day_count("start", "end", week_mask=(False, 0)) # type: ignore[arg-type] + df = pl.DataFrame( + { + "start": [date(2020, 1, 1), date(2020, 1, 2)], + "end": [date(2020, 1, 2), date(2020, 1, 10)], + } + ) + with pytest.raises( + pl.ComputeError, match="`week_mask` must have at least one business day" + ): + df.select(pl.business_day_count("start", "end", week_mask=[False] * 7)) + + def test_business_day_count_schema() -> None: lf = pl.LazyFrame( {