From bb7807467207ab2ddcd097b136edc99cae244be2 Mon Sep 17 00:00:00 2001
From: Ashkan Ghanavati <ashkan.ghanavati92@gmail.com>
Date: Wed, 25 Oct 2023 14:21:34 -0400
Subject: [PATCH] final cleaning to follow pylint rules for spacing and
 docstrings

---
 src/anomaly_code_handler.py             | 27 +++++++++++----------
 src/data_loader.py                      | 32 ++++++++++---------------
 src/duplicates_handler.py               | 23 +++++++++++-------
 src/missing_values_handler.py           | 21 ++++++++--------
 src/transaction_status_handler.py       | 25 ++++++++++---------
 test/test_anomaly_code_handler.py       | 25 ++++++++++---------
 test/test_data_loader.py                | 16 ++++++-------
 test/test_duplicates_handler.py         | 30 +++++++++++++----------
 test/test_missing_values_handler.py     | 22 ++++++++++-------
 test/test_transaction_status_handler.py | 25 ++++++++++---------
 10 files changed, 130 insertions(+), 116 deletions(-)

diff --git a/src/anomaly_code_handler.py b/src/anomaly_code_handler.py
index 529cf46..82bcf07 100644
--- a/src/anomaly_code_handler.py
+++ b/src/anomaly_code_handler.py
@@ -1,40 +1,43 @@
-import pandas as pd
+"""
+A module for removing anomalies from StockCode column if they have 0 or 1 
+digit characters since the normal values are 5 or 6 digits.
+"""
+
 import pickle
 import os
 
 # Determine the absolute path of the project directory
 PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+INPUT_PICKLE_PATH = os.path.join(PROJECT_DIR, 'data',
+                                 'processed','after_transaction_status.pkl')
+OUTPUT_PICKLE_PATH = os.path.join(PROJECT_DIR, 'data',
+                                  'processed', 'after_anomaly_code.pkl')
 
-INPUT_PICKLE_PATH = os.path.join(PROJECT_DIR, 'data', 'processed','after_transaction_status.pkl')
-OUTPUT_PICKLE_PATH = os.path.join(PROJECT_DIR, 'data', 'processed', 'after_anomaly_code.pkl')
-
-def handle_anomalous_codes(input_pickle_path=INPUT_PICKLE_PATH, output_pickle_path=OUTPUT_PICKLE_PATH):
+def handle_anomalous_codes(input_pickle_path=INPUT_PICKLE_PATH,
+                           output_pickle_path=OUTPUT_PICKLE_PATH):
     """
     Load the DataFrame from the input pickle, remove rows with stock codes that 
-    have 0 or 1 numeric characters, then save the DataFrame back to a pickle and return its path.
+    have 0 or 1 numeric characters, 
+    then save the DataFrame back to a pickle and return its path.
     
     :param input_pickle_path: Path to the input pickle file.
     :param output_pickle_path: Path to the output pickle file.
     :return: Path to the saved pickle file.
     """
-    
     # Load DataFrame from input pickle
     if os.path.exists(input_pickle_path):
         with open(input_pickle_path, "rb") as file:
             df = pickle.load(file)
     else:
         raise FileNotFoundError(f"No data found at the specified path: {input_pickle_path}")
-    
     # Finding the stock codes with 0 and 1 numeric characters
     unique_stock_codes = df['StockCode'].unique()
-    anomalous_stock_codes = [code for code in unique_stock_codes if sum(c.isdigit() for c in str(code)) in (0, 1)]
-    
+    anomalous_stock_codes = [code for code in unique_stock_codes if
+                             sum(c.isdigit() for c in str(code)) in (0, 1)]
     # Removing rows with these anomalous stock codes
     df = df[~df['StockCode'].isin(anomalous_stock_codes)]
-    
     # Save the data to output pickle
     with open(output_pickle_path, "wb") as file:
         pickle.dump(df, file)
-    
     print(f"Data saved to {output_pickle_path}.")
     return output_pickle_path
diff --git a/src/data_loader.py b/src/data_loader.py
index 67dc4e4..149f296 100644
--- a/src/data_loader.py
+++ b/src/data_loader.py
@@ -1,55 +1,47 @@
+"""
+Module to handle the loading of e-commerce dataset from either pickle or Excel file format.
+"""
+
+import pickle
 import os
 import pandas as pd
-import pickle
 
 # Determine the absolute path of the project directory
 PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 
 # Use the project directory to construct paths to other directories
-DEFAULT_PICKLE_PATH = os.path.join(PROJECT_DIR, 'data', 'processed', 'raw_data.pkl')
+DEFAULT_PICKLE_PATH = os.path.join(PROJECT_DIR, 'data',
+                                   'processed', 'raw_data.pkl')
 DEFAULT_EXCEL_PATH = os.path.join(PROJECT_DIR, 'data', 'Online Retail.xlsx')
 
 def load_data(pickle_path=DEFAULT_PICKLE_PATH, excel_path=DEFAULT_EXCEL_PATH):
     """
-    Load the e-commerce dataset. 
+    Load the e-commerce dataset.
     First, try to load from the pickle file. If it doesn't exist, load from the excel file.
-    Regardless of the source, save the loaded data as a pickle for future use and return the path to that pickle.
+    Regardless of the source, save the loaded data as a pickle for future use and
+    return the path to that pickle.
     
     :param pickle_path: Path to the pickle file.
-    :param csv_path: Path to the Excel file.
+    :param excel_path: Path to the Excel file.
     :return: Path to the saved pickle file.
     """
-    
     # Placeholder for the DataFrame
     df = None
-
     # Check if pickle file exists
     if os.path.exists(pickle_path):
         with open(pickle_path, "rb") as file:
             df = pickle.load(file)
         print(f"Data loaded successfully from {pickle_path}.")
-    
-    # If pickle doesn't exist, load CSV
+    # If pickle doesn't exist, load from Excel
     elif os.path.exists(excel_path):
         df = pd.read_excel(excel_path)
         print(f"Data loaded from {excel_path}.")
-
-
     else:
         error_message = f"No data found in the specified paths: {pickle_path} or {excel_path}"
         print(error_message)
         raise FileNotFoundError(error_message)
-
     # Save the data to pickle for future use (or re-save it if loaded from existing pickle)
     with open(pickle_path, "wb") as file:
         pickle.dump(df, file)
-    
     print(f"Data saved to {pickle_path} for future use.")
     return pickle_path
-
-
-
-
-
-
-
diff --git a/src/duplicates_handler.py b/src/duplicates_handler.py
index 2b03919..82824d8 100644
--- a/src/duplicates_handler.py
+++ b/src/duplicates_handler.py
@@ -1,12 +1,22 @@
-import pandas as pd
+"""
+A module for removing duplicates in dataset based on subset of 
+following columns:
+- InvoiceNo
+- StockCode
+- Description
+- CustomerID
+- Quantity
+"""
+
 import pickle
 import os
 
 # Determine the absolute path of the project directory
 PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-
-INPUT_PICKLE_PATH = os.path.join(PROJECT_DIR, 'data', 'processed','after_missing_values.pkl')
-OUTPUT_PICKLE_PATH = os.path.join(PROJECT_DIR, 'data', 'processed', 'after_duplicates.pkl')
+INPUT_PICKLE_PATH = os.path.join(PROJECT_DIR, 'data',
+                                 'processed','after_missing_values.pkl')
+OUTPUT_PICKLE_PATH = os.path.join(PROJECT_DIR, 'data',
+                                  'processed', 'after_duplicates.pkl')
 
 def remove_duplicates(input_pickle_path=INPUT_PICKLE_PATH, output_pickle_path=OUTPUT_PICKLE_PATH):
     """
@@ -17,23 +27,18 @@ def remove_duplicates(input_pickle_path=INPUT_PICKLE_PATH, output_pickle_path=OU
     :param output_pickle_path: Path to the output pickle file.
     :return: Path to the saved pickle file.
     """
-    
     # Load DataFrame from input pickle
     if os.path.exists(input_pickle_path):
         with open(input_pickle_path, "rb") as file:
             df = pickle.load(file)
     else:
         raise FileNotFoundError(f"No data found at the specified path: {input_pickle_path}")
-    
     # Columns to check for duplicates
     columns_to_check = ['InvoiceNo', 'StockCode', 'Description', 'CustomerID', 'Quantity']
-    
     # Drop duplicates
     df = df.drop_duplicates(subset=columns_to_check)
-    
     # Save the data to output pickle
     with open(output_pickle_path, "wb") as file:
         pickle.dump(df, file)
-    
     print(f"Data saved to {output_pickle_path}.")
     return output_pickle_path
diff --git a/src/missing_values_handler.py b/src/missing_values_handler.py
index ae6bec4..b4e41d7 100644
--- a/src/missing_values_handler.py
+++ b/src/missing_values_handler.py
@@ -1,7 +1,10 @@
-import pickle
-import pandas as pd
+"""
+A module for removing missig values from dataset based on CustomeID
+and Description column.
+"""
+
 import os
-# ======================== Missing Values ========================
+import pickle
 
 # Determine the absolute path of the project directory
 PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
@@ -11,36 +14,32 @@
 
 def handle_missing(input_pickle_path=INPUT_PICKLE_PATH, output_pickle_path=OUTPUT_PICKLE_PATH):
     """
-    Load the DataFrame from the input pickle, remove rows with missing values in 'CustomerID' and 'Description' columns.
+    Load the DataFrame from the input pickle, 
+    remove rows with missing values in 'CustomerID' and 'Description' columns.
     Then, check if there are any missing values left in the dataframe.
-    If there are, raise a ValueError. Finally, save the DataFrame back to a pickle and return its path.
+    If there are, raise a ValueError. Finally, 
+    save the DataFrame back to a pickle and return its path.
     
     :param input_pickle_path: Path to the input pickle file.
     :param output_pickle_path: Path to the output pickle file.
     :return: Path to the saved pickle file.
     """
-    
     # Load DataFrame from input pickle
     if os.path.exists(input_pickle_path):
         with open(input_pickle_path, "rb") as file:
             df = pickle.load(file)
     else:
         raise FileNotFoundError(f"No data found at the specified path: {input_pickle_path}")
-    
     # Remove rows with missing values in 'CustomerID' and 'Description'
     df = df.dropna(subset=['CustomerID', 'Description'])
-    
     # Check if there are any missing values left
     if df.isna().sum().sum() != 0:
         missing_count = df.isna().sum().sum()
         message = f"There are {missing_count} missing values left in the dataframe."
         print(message)
         raise ValueError(message)
-    
     # Save the data to output pickle
     with open(output_pickle_path, "wb") as file:
         pickle.dump(df, file)
-    
     print(f"Data saved to {output_pickle_path}.")
     return output_pickle_path
-
diff --git a/src/transaction_status_handler.py b/src/transaction_status_handler.py
index 63c90eb..ace6197 100644
--- a/src/transaction_status_handler.py
+++ b/src/transaction_status_handler.py
@@ -1,15 +1,22 @@
-import pandas as pd
+"""
+A module for adding a new column named transaction_status based on the 
+starting character of InvoiceNo column. 
+transaction_status values are Cancelled or Completed.
+"""
+
 import pickle
 import os
 import numpy as np
 
 # Determine the absolute path of the project directory
 PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+INPUT_PICKLE_PATH = os.path.join(PROJECT_DIR, 'data',
+                                 'processed','after_duplicates.pkl')
+OUTPUT_PICKLE_PATH = os.path.join(PROJECT_DIR, 'data',
+                                  'processed', 'after_transaction_status.pkl')
 
-INPUT_PICKLE_PATH = os.path.join(PROJECT_DIR, 'data', 'processed','after_duplicates.pkl')
-OUTPUT_PICKLE_PATH = os.path.join(PROJECT_DIR, 'data', 'processed', 'after_transaction_status.pkl')
-
-def handle_transaction_status(input_pickle_path=INPUT_PICKLE_PATH, output_pickle_path=OUTPUT_PICKLE_PATH):
+def handle_transaction_status(input_pickle_path=INPUT_PICKLE_PATH,
+                              output_pickle_path=OUTPUT_PICKLE_PATH):
     """
     Load the DataFrame from the input pickle, add a 'transaction_status' column 
     to indicate whether the transaction was 'Cancelled' or 'Completed'. 
@@ -21,24 +28,20 @@ def handle_transaction_status(input_pickle_path=INPUT_PICKLE_PATH, output_pickle
     
     :raises KeyError: If the 'InvoiceNo' column doesn't exist in the dataframe.
     """
-    
     # Load DataFrame from input pickle
     if os.path.exists(input_pickle_path):
         with open(input_pickle_path, "rb") as file:
             df = pickle.load(file)
     else:
         raise FileNotFoundError(f"No data found at the specified path: {input_pickle_path}")
-    
     # Check if 'InvoiceNo' column exists
     if 'InvoiceNo' not in df.columns:
         raise KeyError("The input dataframe does not contain an 'InvoiceNo' column.")
-    
     # Add the 'Transaction_Status' column
-    df['transaction_status'] = np.where(df['InvoiceNo'].astype(str).str.startswith('C'), 'Cancelled', 'Completed')
-    
+    df['transaction_status'] = np.where(df['InvoiceNo'].astype(str).str.startswith('C'),
+                                        'Cancelled', 'Completed')
     # Save the data to output pickle
     with open(output_pickle_path, "wb") as file:
         pickle.dump(df, file)
-    
     print(f"Data saved to {output_pickle_path}.")
     return output_pickle_path
diff --git a/test/test_anomaly_code_handler.py b/test/test_anomaly_code_handler.py
index 0bf3f9b..8058a71 100644
--- a/test/test_anomaly_code_handler.py
+++ b/test/test_anomaly_code_handler.py
@@ -1,30 +1,33 @@
-import pytest
+"""
+A module for testing anomaly_code_handler module.
+"""
+
 import os
 import pickle
 from src.anomaly_code_handler import handle_anomalous_codes
 
 # Determine the absolute path of the project directory
 PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-
-INPUT_PICKLE_PATH = os.path.join(PROJECT_DIR, 'data', 'processed','after_transaction_status.pkl')
-OUTPUT_PICKLE_PATH = os.path.join(PROJECT_DIR, 'data', 'processed', 'after_anomaly_code.pkl')
+INPUT_PICKLE_PATH = os.path.join(PROJECT_DIR, 'data',
+                                 'processed','after_transaction_status.pkl')
+OUTPUT_PICKLE_PATH = os.path.join(PROJECT_DIR, 'data',
+                                  'processed', 'after_anomaly_code.pkl')
 
 def test_handle_anomalous_codes():
     """
     Test that handle_anomalous_codes correctly removes rows with stock codes 
     that have 0 or 1 numeric characters.
     """
-    result = handle_anomalous_codes(input_pickle_path=INPUT_PICKLE_PATH, output_pickle_path=OUTPUT_PICKLE_PATH)
-    assert result == OUTPUT_PICKLE_PATH, f"Expected {OUTPUT_PICKLE_PATH}, but got {result}."
-
+    result = handle_anomalous_codes(input_pickle_path=INPUT_PICKLE_PATH,
+                                    output_pickle_path=OUTPUT_PICKLE_PATH)
+    assert result == OUTPUT_PICKLE_PATH,\
+        f"Expected {OUTPUT_PICKLE_PATH}, but got {result}."
     # Load the output pickle file to check the 'StockCode' column
     with open(OUTPUT_PICKLE_PATH, "rb") as file:
         df = pickle.load(file)
-
     # Check for stock codes with 0 or 1 numeric characters
     unique_stock_codes = df['StockCode'].unique()
-    anomalous_stock_codes = [code for code in unique_stock_codes if sum(c.isdigit() for c in str(code)) in (0, 1)]
-    
+    anomalous_stock_codes = [code for code in unique_stock_codes if
+                             sum(c.isdigit() for c in str(code)) in (0, 1)]
     # Assert that no such anomalous stock codes exist
     assert len(anomalous_stock_codes) == 0, "Anomalous stock codes found in the dataframe."
-
diff --git a/test/test_data_loader.py b/test/test_data_loader.py
index 8cfba50..b580cc5 100644
--- a/test/test_data_loader.py
+++ b/test/test_data_loader.py
@@ -1,5 +1,9 @@
-import pytest
+"""
+Tests for data_loader module.
+"""
+
 import os
+import pytest
 from src.data_loader import load_data
 
 # Determine the absolute path of the project directory
@@ -15,25 +19,22 @@
 #     """
 #     # Ensure the pickle file exists for this test
 #     assert os.path.exists(PICKLE_PATH), "Pickle file doesn't exist for testing."
-    
 #     result = load_data(pickle_path=PICKLE_PATH, csv_path=CSV_PATH)
 #     assert result == PICKLE_PATH, f"Expected {PICKLE_PATH}, but got {result}."
 
 def test_load_data_from_excel():
     """
-    Test that load_data correctly loads data from Excel and saves as pickle when pickle doesn't exist.
+    Test that load_data correctly loads data from Excel and saves as pickle
+    when pickle doesn't exist.
     """
     # Temporarily rename the pickle to simulate its absence
     if os.path.exists(PICKLE_PATH):
         os.rename(PICKLE_PATH, PICKLE_PATH + ".bak")
-    
     result = load_data(pickle_path=PICKLE_PATH, excel_path=EXCEL_PATH)
     assert result == PICKLE_PATH, f"Expected {PICKLE_PATH}, but got {result}."
-
     # Rename pickle back to its original name
     if os.path.exists(PICKLE_PATH + ".bak"):
         os.rename(PICKLE_PATH + ".bak", PICKLE_PATH)
-
 def test_load_data_no_files():
     """
     Test that load_data raises an error when neither pickle nor Excel exists.
@@ -43,13 +44,10 @@ def test_load_data_no_files():
         os.rename(PICKLE_PATH, PICKLE_PATH + ".bak")
     if os.path.exists(EXCEL_PATH):
         os.rename(EXCEL_PATH, EXCEL_PATH + ".bak")
-    
     with pytest.raises(FileNotFoundError):
         load_data(pickle_path=PICKLE_PATH, excel_path=EXCEL_PATH)
-    
     # Rename files back to their original names
     if os.path.exists(PICKLE_PATH + ".bak"):
         os.rename(PICKLE_PATH + ".bak", PICKLE_PATH)
     if os.path.exists(EXCEL_PATH + ".bak"):
         os.rename(EXCEL_PATH + ".bak", EXCEL_PATH)
-
diff --git a/test/test_duplicates_handler.py b/test/test_duplicates_handler.py
index 47d6522..7f66c34 100644
--- a/test/test_duplicates_handler.py
+++ b/test/test_duplicates_handler.py
@@ -1,14 +1,18 @@
-import pytest
+"""
+A test for duplicates_handler module.
+"""
+
 import os
-import pandas as pd
 import pickle
+import pytest
 from src.duplicates_handler import remove_duplicates
 
 # Determine the absolute path of the project directory
 PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-
-INPUT_PICKLE_PATH = os.path.join(PROJECT_DIR, 'data', 'processed','after_missing_values.pkl')
-OUTPUT_PICKLE_PATH = os.path.join(PROJECT_DIR, 'data', 'processed', 'after_duplicates.pkl')
+INPUT_PICKLE_PATH = os.path.join(PROJECT_DIR, 'data',
+                                 'processed','after_missing_values.pkl')
+OUTPUT_PICKLE_PATH = os.path.join(PROJECT_DIR, 'data',
+                                  'processed', 'after_duplicates.pkl')
 
 def test_remove_duplicates_no_input_file():
     """
@@ -17,24 +21,24 @@ def test_remove_duplicates_no_input_file():
     # Temporarily rename the input file
     if os.path.exists(INPUT_PICKLE_PATH):
         os.rename(INPUT_PICKLE_PATH, INPUT_PICKLE_PATH + ".bak")
-    
     with pytest.raises(FileNotFoundError):
-        remove_duplicates(input_pickle_path=INPUT_PICKLE_PATH, output_pickle_path=OUTPUT_PICKLE_PATH)
-    
+        remove_duplicates(input_pickle_path=INPUT_PICKLE_PATH,
+                          output_pickle_path=OUTPUT_PICKLE_PATH)
     # Rename input file back to its original name
     if os.path.exists(INPUT_PICKLE_PATH + ".bak"):
         os.rename(INPUT_PICKLE_PATH + ".bak", INPUT_PICKLE_PATH)
 
 def test_remove_duplicates():
     """
-    Test that remove_duplicates correctly removes duplicates and saves to the output pickle.
+    Test that remove_duplicates correctly removes duplicates and
+    saves to the output pickle.
     """
-    result = remove_duplicates(input_pickle_path=INPUT_PICKLE_PATH, output_pickle_path=OUTPUT_PICKLE_PATH)
+    result = remove_duplicates(input_pickle_path=INPUT_PICKLE_PATH,
+                               output_pickle_path=OUTPUT_PICKLE_PATH)
     assert result == OUTPUT_PICKLE_PATH, f"Expected {OUTPUT_PICKLE_PATH}, but got {result}."
-
     # Check if duplicates are truly removed
     with open(OUTPUT_PICKLE_PATH, "rb") as file:
         df = pickle.load(file)
     columns_to_check = ['InvoiceNo', 'StockCode', 'Description', 'CustomerID', 'Quantity']
-    assert not df.duplicated(subset=columns_to_check).any(), "There are still duplicates in the dataframe."
-
+    assert not df.duplicated(subset=columns_to_check).any(),\
+        "There are still duplicates in the dataframe."
diff --git a/test/test_missing_values_handler.py b/test/test_missing_values_handler.py
index 106f943..fb9f5e2 100644
--- a/test/test_missing_values_handler.py
+++ b/test/test_missing_values_handler.py
@@ -1,20 +1,25 @@
+"""
+A test module for testing missing_values_handler module.
+"""
+
+import os
 import pytest
 from src.missing_values_handler import handle_missing
-import os
 
 # Determine the absolute path of the project directory
 PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-
-INPUT_PICKLE_PATH = os.path.join(PROJECT_DIR, 'data', 'processed', 'raw_data.pkl')
-OUTPUT_PICKLE_PATH = os.path.join(PROJECT_DIR, 'data', 'processed','after_missing_values.pkl')
+INPUT_PICKLE_PATH = os.path.join(PROJECT_DIR, 'data',
+                                 'processed', 'raw_data.pkl')
+OUTPUT_PICKLE_PATH = os.path.join(PROJECT_DIR, 'data',
+                                  'processed','after_missing_values.pkl')
 
 def test_handle_missing_success():
     """
     Test successful removal of rows with missing values and saving of the dataframe.
     """
-    result = handle_missing(input_pickle_path=INPUT_PICKLE_PATH, output_pickle_path=OUTPUT_PICKLE_PATH)
+    result = handle_missing(input_pickle_path=INPUT_PICKLE_PATH,
+                            output_pickle_path=OUTPUT_PICKLE_PATH)
     assert result == OUTPUT_PICKLE_PATH, f"Expected {OUTPUT_PICKLE_PATH}, but got {result}."
-
 def test_handle_missing_file_not_found():
     """
     Test that handle_missing raises an error when the input pickle doesn't exist.
@@ -22,10 +27,9 @@ def test_handle_missing_file_not_found():
     # Rename the input pickle temporarily to simulate its absence
     if os.path.exists(INPUT_PICKLE_PATH):
         os.rename(INPUT_PICKLE_PATH, INPUT_PICKLE_PATH + ".bak")
-    
     with pytest.raises(FileNotFoundError):
-        handle_missing(input_pickle_path=INPUT_PICKLE_PATH, output_pickle_path=OUTPUT_PICKLE_PATH)
-    
+        handle_missing(input_pickle_path=INPUT_PICKLE_PATH,
+                       output_pickle_path=OUTPUT_PICKLE_PATH)
     # Rename the input pickle back to its original name
     if os.path.exists(INPUT_PICKLE_PATH + ".bak"):
         os.rename(INPUT_PICKLE_PATH + ".bak", INPUT_PICKLE_PATH)
diff --git a/test/test_transaction_status_handler.py b/test/test_transaction_status_handler.py
index aacfb38..83df9cb 100644
--- a/test/test_transaction_status_handler.py
+++ b/test/test_transaction_status_handler.py
@@ -1,30 +1,33 @@
-import pytest
+"""
+A module for testing transaction_status_handler module.
+"""
+
 import os
 import pickle
 from src.transaction_status_handler import handle_transaction_status
 
 # Determine the absolute path of the project directory
 PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-
-INPUT_PICKLE_PATH = os.path.join(PROJECT_DIR, 'data', 'processed','after_duplicates.pkl')
-OUTPUT_PICKLE_PATH = os.path.join(PROJECT_DIR, 'data', 'processed', 'after_transaction_status.pkl')
+INPUT_PICKLE_PATH = os.path.join(PROJECT_DIR, 'data',
+                                 'processed','after_duplicates.pkl')
+OUTPUT_PICKLE_PATH = os.path.join(PROJECT_DIR, 'data',
+                                  'processed', 'after_transaction_status.pkl')
 
 def test_handle_transaction_status():
     """
     Test that handle_transaction_status correctly adds the 'transaction_status' column 
     based on the 'InvoiceNo' and ensures statuses are 'Cancelled' or 'Completed'.
     """
-    result = handle_transaction_status(input_pickle_path=INPUT_PICKLE_PATH, output_pickle_path=OUTPUT_PICKLE_PATH)
+    result = handle_transaction_status(input_pickle_path=INPUT_PICKLE_PATH,
+                                       output_pickle_path=OUTPUT_PICKLE_PATH)
     assert result == OUTPUT_PICKLE_PATH, f"Expected {OUTPUT_PICKLE_PATH}, but got {result}."
-
     # Load the output pickle file and check the 'transaction_status' column
     with open(OUTPUT_PICKLE_PATH, "rb") as file:
         df = pickle.load(file)
-
     # Assert that 'transaction_status' column exists
-    assert 'transaction_status' in df.columns, "'transaction_status' column not found in the dataframe."
-
+    assert 'transaction_status' in df.columns,\
+        "'transaction_status' column not found in the dataframe."
     # Check if all values in 'transaction_status' are either 'Cancelled' or 'Completed'
     unique_statuses = df['transaction_status'].unique()
-    assert set(unique_statuses) == {'Cancelled', 'Completed'}, "Unexpected values found in 'transaction_status' column."
-
+    assert set(unique_statuses) == {'Cancelled', 'Completed'},\
+        "Unexpected values found in 'transaction_status' column."