diff --git a/src/data_loader.py b/src/data_loader.py index c1c1f1d..5878b87 100644 --- a/src/data_loader.py +++ b/src/data_loader.py @@ -31,6 +31,6 @@ def load_ecommerce_data(path=None, encoding="ISO-8859-1"): if data.empty: print(f"The file {path} is empty!") - raise EmptyDataError(f"No data found in {path}!") + raise ValueError("The loaded dataframe is empty.") return data diff --git a/src/data_preprocessing.py b/src/data_preprocessing.py new file mode 100644 index 0000000..02ff270 --- /dev/null +++ b/src/data_preprocessing.py @@ -0,0 +1,22 @@ +import pandas as pd + +def remove_and_check_missing(df): + """ + Remove rows with missing values in 'CustomerID' and 'Description' columns. + Then, check if there are any missing values left in the dataframe. + If there are, raise a MissingValueError. + """ + + # Remove rows with missing values in 'CustomerID' and 'Description' + df = df.dropna(subset=['CustomerID', 'Description']) + + # Check if there are any missing values left + if df.isna().sum().sum() != 0: + missing_count = df.isna().sum().sum() + message = f"There are {missing_count} missing values left in the dataframe." + print(message) + raise ValueError(message) + + return df + +