Skip to content

Commit

Permalink
update data_loader with the error message and add data_preprocessing.…
Browse files Browse the repository at this point in the history
…py module for removing missing values
  • Loading branch information
AshyScripts committed Oct 24, 2023
1 parent 82eb994 commit a34d653
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,6 @@ def load_ecommerce_data(path=None, encoding="ISO-8859-1"):

if data.empty:
print(f"The file {path} is empty!")
raise EmptyDataError(f"No data found in {path}!")
raise ValueError("The loaded dataframe is empty.")

return data
22 changes: 22 additions & 0 deletions src/data_preprocessing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import pandas as pd

def remove_and_check_missing(df):
"""
Remove rows with missing values in 'CustomerID' and 'Description' columns.
Then, check if there are any missing values left in the dataframe.
If there are, raise a MissingValueError.
"""

# Remove rows with missing values in 'CustomerID' and 'Description'
df = df.dropna(subset=['CustomerID', 'Description'])

# Check if there are any missing values left
if df.isna().sum().sum() != 0:
missing_count = df.isna().sum().sum()
message = f"There are {missing_count} missing values left in the dataframe."
print(message)
raise ValueError(message)

return df


0 comments on commit a34d653

Please sign in to comment.