Skip to content

Commit

Permalink
Final modifications
Browse files Browse the repository at this point in the history
  • Loading branch information
zazass8 committed Oct 23, 2024
1 parent 97ae9cf commit ab30da2
Show file tree
Hide file tree
Showing 4 changed files with 280 additions and 53 deletions.
38 changes: 28 additions & 10 deletions mlxtend/frequent_patterns/association_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,13 @@
# License: BSD 3 clause

from itertools import combinations
from typing import Optional

import numpy as np
import pandas as pd

from ..frequent_patterns import fpcommon as fpc

_metrics = [
"antecedent support",
"consequent support",
Expand All @@ -31,8 +34,8 @@

def association_rules(
df: pd.DataFrame,
df_or: pd.DataFrame,
num_itemsets: int,
df_orig: Optional[pd.DataFrame] = None,
null_values=False,
metric="confidence",
min_threshold=0.8,
Expand All @@ -48,13 +51,13 @@ def association_rules(
pandas DataFrame of frequent itemsets
with columns ['support', 'itemsets']
df_or : pandas DataFrame
DataFrame with original input data
df_orig : pandas DataFrame (default: None)
DataFrame with original input data. Only provided when null_values exist
num_itemsets : int
Number of transactions in original input data
null_values : bool (default: True)
null_values : bool (default: False)
In case there are null values as NaNs in the original input data
metric : string (default: 'confidence')
Expand Down Expand Up @@ -112,6 +115,13 @@ def association_rules(
https://rasbt.github.io/mlxtend/user_guide/frequent_patterns/association_rules/
"""
# if null values exist, df_orig must be provided
if null_values and df_orig is None:
raise TypeError("If null values exist, df_orig must be provided.")

# check for valid input
fpc.valid_input_check(df_orig, null_values)

if not df.shape[0]:
raise ValueError(
"The input DataFrame `df` containing " "the frequent itemsets is empty."
Expand All @@ -125,8 +135,8 @@ def association_rules(
)

def kulczynski_helper(sAC, sA, sC, disAC, disA, disC, dis_int, dis_int_):
conf_AC = sAC / sA
conf_CA = sAC / sC
conf_AC = sAC * (num_itemsets - disAC) / (sA * (num_itemsets - disA) - dis_int)
conf_CA = sAC * (num_itemsets - disAC) / (sC * (num_itemsets - disC) - dis_int_)
kulczynski = (conf_AC + conf_CA) / 2
return kulczynski

Expand Down Expand Up @@ -234,13 +244,21 @@ def certainty_metric_helper(sAC, sA, sC, disAC, disA, disC, dis_int, dis_int_):
rule_supports = []

# Define the disabled df, assign columns from original df to be the same on the disabled.
disabled = df_or.copy()
if null_values:
disabled = df_orig.copy()
disabled = np.where(pd.isna(disabled), 1, np.nan) + np.where(
(disabled == 0) | (disabled == 1), np.nan, 0
)
disabled = pd.DataFrame(disabled)
disabled.columns = df_or.columns
if all(isinstance(key, str) for key in list(frequent_items_dict.keys())[0]):
disabled.columns = df_orig.columns

if all(
isinstance(key, np.int64) for key in list(frequent_items_dict.keys())[0]
):
cols = np.arange(0, len(df_orig.columns), 1)
disabled.columns = cols
df_orig.columns = cols

# iterate over all frequent itemsets
for k in frequent_items_dict.keys():
Expand Down Expand Up @@ -280,8 +298,8 @@ def certainty_metric_helper(sAC, sA, sC, disAC, disA, disC, dis_int, dis_int_):
__dec = disabled.loc[:, list(consequent)]

# select data of antecedent and consequent from original
dec_ = df_or.loc[:, list(antecedent)]
dec__ = df_or.loc[:, list(consequent)]
dec_ = df_orig.loc[:, list(antecedent)]
dec__ = df_orig.loc[:, list(consequent)]

# disabled counts
disAC, disA, disC, dis_int, dis_int_ = 0, 0, 0, 0, 0
Expand Down
31 changes: 26 additions & 5 deletions mlxtend/frequent_patterns/fpcommon.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,18 +68,15 @@ def setup_fptree(df, min_support):
return tree, disabled, rank


def generate_itemsets(
generator, df_or, disabled, min_support, num_itemsets, colname_map
):
def generate_itemsets(generator, df, disabled, min_support, num_itemsets, colname_map):
itemsets = []
supports = []
df = df_or.copy().values
for sup, iset in generator:
itemsets.append(frozenset(iset))
# select data of iset from disabled dataset
dec = disabled[:, iset]
# select data of iset from original dataset
_dec = df[:, iset]
_dec = df.values[:, iset]

# case if iset only has one element
if len(iset) == 1:
Expand Down Expand Up @@ -122,6 +119,10 @@ def generate_itemsets(


def valid_input_check(df, null_values=False):
# Return early if df is None
if df is None:
return

if f"{type(df)}" == "<class 'pandas.core.frame.SparseDataFrame'>":
msg = (
"SparseDataFrame support has been deprecated in pandas 1.0,"
Expand Down Expand Up @@ -163,6 +164,20 @@ def valid_input_check(df, null_values=False):
"Please use a DataFrame with bool type",
DeprecationWarning,
)

# If null_values is True but no NaNs are found, raise an error
has_nans = pd.isna(df).any().any()
if null_values and not has_nans:
warnings.warn(
"null_values=True is inefficient when there are no NaN values in the DataFrame."
"Set null_values=False for faster output."
)
# If null_values is False but NaNs are found, raise an error
if not null_values and has_nans:
raise ValueError(
"NaN values are not permitted in the DataFrame when null_values=False."
)

# Pandas is much slower than numpy, so use np.where on Numpy arrays
if hasattr(df, "sparse"):
if df.size == 0:
Expand All @@ -185,6 +200,12 @@ def valid_input_check(df, null_values=False):
"The allowed values for a DataFrame"
" are True, False, 0, 1. Found value %s" % (val)
)

if null_values:
s = (
"The allowed values for a DataFrame"
" are True, False, 0, 1, NaN. Found value %s" % (val)
)
raise ValueError(s)


Expand Down
2 changes: 1 addition & 1 deletion mlxtend/frequent_patterns/fpgrowth.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def fpgrowth(
The support is computed as the fraction
transactions_where_item(s)_occur / total_transactions.
null_values : bool (default: True)
null_values : bool (default: False)
In case there are null values as NaNs in the original input data
use_colnames : bool (default: False)
Expand Down
Loading

0 comments on commit ab30da2

Please sign in to comment.