From e2730e205ede26d447191b715048c315afa3ff53 Mon Sep 17 00:00:00 2001 From: Martin Vonk Date: Thu, 7 Mar 2024 13:51:34 +0100 Subject: [PATCH] add check for duplicated index #30 --- src/spei/utils.py | 9 +++++++++ tests/test_validate.py | 20 +++++++++++++++++--- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/src/spei/utils.py b/src/spei/utils.py index 93ed57e..2b086bd 100644 --- a/src/spei/utils.py +++ b/src/spei/utils.py @@ -49,6 +49,15 @@ def validate_index(index: Index) -> DatetimeIndex: ) index = DatetimeIndex(to_datetime(index)) + if index.has_duplicates: + msg = ( + "Duplicated indices found. Please remove them. For instance by" + " using `series = " + "series.loc[~series.index.duplicated(keep='first/last')]`" + ) + logging.error(msg) + raise ValueError(msg) + return index diff --git a/tests/test_validate.py b/tests/test_validate.py index 537466d..17ea6dd 100644 --- a/tests/test_validate.py +++ b/tests/test_validate.py @@ -1,23 +1,37 @@ import logging import pytest -from pandas import DataFrame, Series, to_datetime +from pandas import DataFrame, DatetimeIndex, Series, Timestamp, to_datetime from spei.utils import validate_index, validate_series def test_validate_index(caplog) -> None: caplog.set_level(logging.INFO) - series = Series([1, 2, 3], index=["2018", "2019", "2020"]) + series = Series([1.0, 2.0, 3.0], index=["2018", "2019", "2020"]) validate_index(series.index) msg = ( f"Expected the index to be a DatetimeIndex. Automatically converted " f"{type(series.index)} using pd.to_datetime(Index)\n" ) - print(f"{caplog.text=}") assert msg in caplog.text +def test_validate_index_duplicated(caplog) -> None: + caplog.set_level(logging.ERROR) + series = Series( + [1.0, 1.0], + index=DatetimeIndex([Timestamp("2000-01-01"), Timestamp("2000-01-01")]), + ) + with pytest.raises(ValueError): + validate_index(series.index) + msg = ( + "Duplicated indices found. Please remove them. For instance by using" + "`series = series.loc[~series.index.duplicated(keep='first/last')]`" + ) + assert msg in caplog.text + + def test_validate_series() -> None: with pytest.raises(TypeError): validate_series([1, 2, 3])