From dd727403b65732cd46c3cd501e6bc4086d50859a Mon Sep 17 00:00:00 2001 From: Nachiket18 Date: Sat, 18 May 2024 16:07:10 -0400 Subject: [PATCH 01/17] Adding the code for calculating Jaccard Similiarity --- mlxtend/frequent_patterns/association_rules.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/mlxtend/frequent_patterns/association_rules.py b/mlxtend/frequent_patterns/association_rules.py index bcd243482..39853577d 100644 --- a/mlxtend/frequent_patterns/association_rules.py +++ b/mlxtend/frequent_patterns/association_rules.py @@ -14,7 +14,7 @@ import pandas as pd -def association_rules(df, metric="confidence", min_threshold=0.8, support_only=False): +def association_rules(df: pd.DataFrame, metric="confidence", min_threshold=0.8, support_only=False) -> pd.DataFrame: """Generates a DataFrame of association rules including the metrics 'score', 'confidence', and 'lift' @@ -116,6 +116,14 @@ def zhangs_metric_helper(sAC, sA, sC): zhangs_metric = np.where(denominator == 0, 0, numerator / denominator) return zhangs_metric + + def jaccard_metric_helper(sAC, sA, sC): + numerator = metric_dict["support"](sAC, sA, sC) + denominator = sA + sC - numerator + + jaccard_metric = numerator / denominator + return jaccard_metric + # metrics for association rules metric_dict = { @@ -127,6 +135,7 @@ def zhangs_metric_helper(sAC, sA, sC): "leverage": lambda sAC, sA, sC: metric_dict["support"](sAC, sA, sC) - sA * sC, "conviction": lambda sAC, sA, sC: conviction_helper(sAC, sA, sC), "zhangs_metric": lambda sAC, sA, sC: zhangs_metric_helper(sAC, sA, sC), + "jaccard": lambda sAC, sA, sC: jaccard_metric_helper(sAC, sA, sC), } columns_ordered = [ @@ -138,6 +147,7 @@ def zhangs_metric_helper(sAC, sA, sC): "leverage", "conviction", "zhangs_metric", + "jaccard" ] # check for metric compliance From 96eed5bf010904efb51aaad033a30cde052084c2 Mon Sep 17 00:00:00 2001 From: Rany Kamel Date: Tue, 21 May 2024 00:48:40 -0400 Subject: [PATCH 02/17] Added some jaccard tests --- .../tests/test_association_rules.py | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/mlxtend/frequent_patterns/tests/test_association_rules.py b/mlxtend/frequent_patterns/tests/test_association_rules.py index c2850924b..be42039ba 100644 --- a/mlxtend/frequent_patterns/tests/test_association_rules.py +++ b/mlxtend/frequent_patterns/tests/test_association_rules.py @@ -46,6 +46,7 @@ "leverage", "conviction", "zhangs_metric", + "jaccard", ] @@ -58,15 +59,15 @@ def test_default(): expect = pd.DataFrame( [ - [(8,), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0], - [(6,), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0], - [(8, 3), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0], - [(8, 5), (3,), 0.6, 0.8, 0.6, 1.0, 1.25, 0.12, np.inf, 0.5], - [(8,), (3, 5), 0.6, 0.8, 0.6, 1.0, 1.25, 0.12, np.inf, 0.5], - [(3,), (5,), 0.8, 1.0, 0.8, 1.0, 1.0, 0.0, np.inf, 0], - [(5,), (3,), 1.0, 0.8, 0.8, 0.8, 1.0, 0.0, 1.0, 0], - [(10,), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0], - [(8,), (3,), 0.6, 0.8, 0.6, 1.0, 1.25, 0.12, np.inf, 0.5], + [(8,), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0, 0.6], + [(6,), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0, 0.6], + [(8, 3), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0, 0.6], + [(8, 5), (3,), 0.6, 0.8, 0.6, 1.0, 1.25, 0.12, np.inf, 0.5, 0.75], + [(8,), (3, 5), 0.6, 0.8, 0.6, 1.0, 1.25, 0.12, np.inf, 0.5, 0.75], + [(3,), (5,), 0.8, 1.0, 0.8, 1.0, 1.0, 0.0, np.inf, 0, 0.8], + [(5,), (3,), 1.0, 0.8, 0.8, 0.8, 1.0, 0.0, 1.0, 0, 0.8], + [(10,), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0, 0.6], + [(8,), (3,), 0.6, 0.8, 0.6, 1.0, 1.25, 0.12, np.inf, 0.5, 0.75], ], columns=columns_ordered, ) @@ -130,6 +131,7 @@ def test_empty_result(): "leverage", "conviction", "zhangs_metric", + "jaccard", ] ) res_df = association_rules(df_freq_items, min_threshold=2) From 3548d8905475a679277a47c2b12460e857591115 Mon Sep 17 00:00:00 2001 From: Rany Kamel Date: Tue, 21 May 2024 01:04:50 -0400 Subject: [PATCH 03/17] More jaccard tests --- .../frequent_patterns/tests/test_association_rules.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/mlxtend/frequent_patterns/tests/test_association_rules.py b/mlxtend/frequent_patterns/tests/test_association_rules.py index be42039ba..f72d279e5 100644 --- a/mlxtend/frequent_patterns/tests/test_association_rules.py +++ b/mlxtend/frequent_patterns/tests/test_association_rules.py @@ -178,6 +178,17 @@ def test_confidence(): assert res_df.values.shape[0] == 9 +def test_jaccard(): + res_df = association_rules(df_freq_items, min_threshold=0.7, metric="jaccard") + assert res_df.values.shape[0] == 8 + + res_df = association_rules( + df_freq_items_with_colnames, min_threshold=0.7, metric="jaccard" + ) + assert res_df.values.shape[0] == 8 + + + def test_frozenset_selection(): res_df = association_rules(df_freq_items) From 2e04122138297a56620580c77c54f686e631a12c Mon Sep 17 00:00:00 2001 From: Rany Kamel Date: Tue, 21 May 2024 01:20:58 -0400 Subject: [PATCH 04/17] Implemented Centered Confidence and Certainty Factor --- mlxtend/frequent_patterns/association_rules.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/mlxtend/frequent_patterns/association_rules.py b/mlxtend/frequent_patterns/association_rules.py index 39853577d..b67f934c8 100644 --- a/mlxtend/frequent_patterns/association_rules.py +++ b/mlxtend/frequent_patterns/association_rules.py @@ -123,6 +123,13 @@ def jaccard_metric_helper(sAC, sA, sC): jaccard_metric = numerator / denominator return jaccard_metric + + def certainty_metric_helper(sAC, sA, sC): + certainty_num = metric_dict["centered_confidence"](sAC, sA, sC) + certainty_denom = 1 - sC + + cert_metric = np.where(certainty_denom == 0, 0, certainty_num / certainty_denom) + return cert_metric # metrics for association rules @@ -136,6 +143,8 @@ def jaccard_metric_helper(sAC, sA, sC): "conviction": lambda sAC, sA, sC: conviction_helper(sAC, sA, sC), "zhangs_metric": lambda sAC, sA, sC: zhangs_metric_helper(sAC, sA, sC), "jaccard": lambda sAC, sA, sC: jaccard_metric_helper(sAC, sA, sC), + "centered_confidence": lambda sAC, sA, sC: metric_dict["confidence"](sAC, sA, sC) - sC, + "certainty": lambda sAC, sA, sC: certainty_metric_helper(sAC, sA, sC), } columns_ordered = [ @@ -147,7 +156,9 @@ def jaccard_metric_helper(sAC, sA, sC): "leverage", "conviction", "zhangs_metric", - "jaccard" + "jaccard", + "centered_confidence", + "certainty", ] # check for metric compliance From 776c1ada80b26f8f60d7350b1e426d53b7986dda Mon Sep 17 00:00:00 2001 From: Rany Kamel Date: Tue, 21 May 2024 01:28:07 -0400 Subject: [PATCH 05/17] Added parameter to choose only certain metrics for association_rules --- .../frequent_patterns/association_rules.py | 35 +++++++++---------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/mlxtend/frequent_patterns/association_rules.py b/mlxtend/frequent_patterns/association_rules.py index b67f934c8..289d1b876 100644 --- a/mlxtend/frequent_patterns/association_rules.py +++ b/mlxtend/frequent_patterns/association_rules.py @@ -13,8 +13,21 @@ import numpy as np import pandas as pd +_metrics = [ + "antecedent support", + "consequent support", + "support", + "confidence", + "lift", + "leverage", + "conviction", + "zhangs_metric", + "jaccard", + "centered_confidence", + "certainty", +] -def association_rules(df: pd.DataFrame, metric="confidence", min_threshold=0.8, support_only=False) -> pd.DataFrame: +def association_rules(df: pd.DataFrame, metric = "confidence", min_threshold = 0.8, support_only = False, return_metrics: list = _metrics) -> pd.DataFrame: """Generates a DataFrame of association rules including the metrics 'score', 'confidence', and 'lift' @@ -147,20 +160,6 @@ def certainty_metric_helper(sAC, sA, sC): "certainty": lambda sAC, sA, sC: certainty_metric_helper(sAC, sA, sC), } - columns_ordered = [ - "antecedent support", - "consequent support", - "support", - "confidence", - "lift", - "leverage", - "conviction", - "zhangs_metric", - "jaccard", - "centered_confidence", - "certainty", - ] - # check for metric compliance if support_only: metric = "support" @@ -221,7 +220,7 @@ def certainty_metric_helper(sAC, sA, sC): # check if frequent rule was generated if not rule_supports: - return pd.DataFrame(columns=["antecedents", "consequents"] + columns_ordered) + return pd.DataFrame(columns=["antecedents", "consequents"] + return_metrics) else: # generate metrics @@ -233,7 +232,7 @@ def certainty_metric_helper(sAC, sA, sC): if support_only: sAC = rule_supports[0] - for m in columns_ordered: + for m in return_metrics: df_res[m] = np.nan df_res["support"] = sAC @@ -241,7 +240,7 @@ def certainty_metric_helper(sAC, sA, sC): sAC = rule_supports[0] sA = rule_supports[1] sC = rule_supports[2] - for m in columns_ordered: + for m in return_metrics: df_res[m] = metric_dict[m](sAC, sA, sC) return df_res From 4f5b4a5550e7d31440627f2215a82f7134822370 Mon Sep 17 00:00:00 2001 From: ankithn30 Date: Thu, 6 Jun 2024 19:50:09 -0400 Subject: [PATCH 06/17] --- .../tests/test_association_rules.py | 32 ++++++++++++------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/mlxtend/frequent_patterns/tests/test_association_rules.py b/mlxtend/frequent_patterns/tests/test_association_rules.py index f72d279e5..d2e21948f 100644 --- a/mlxtend/frequent_patterns/tests/test_association_rules.py +++ b/mlxtend/frequent_patterns/tests/test_association_rules.py @@ -47,6 +47,8 @@ "conviction", "zhangs_metric", "jaccard", + "centered_confidence", + "certainty" ] @@ -59,24 +61,30 @@ def test_default(): expect = pd.DataFrame( [ - [(8,), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0, 0.6], - [(6,), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0, 0.6], - [(8, 3), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0, 0.6], - [(8, 5), (3,), 0.6, 0.8, 0.6, 1.0, 1.25, 0.12, np.inf, 0.5, 0.75], - [(8,), (3, 5), 0.6, 0.8, 0.6, 1.0, 1.25, 0.12, np.inf, 0.5, 0.75], - [(3,), (5,), 0.8, 1.0, 0.8, 1.0, 1.0, 0.0, np.inf, 0, 0.8], - [(5,), (3,), 1.0, 0.8, 0.8, 0.8, 1.0, 0.0, 1.0, 0, 0.8], - [(10,), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0, 0.6], - [(8,), (3,), 0.6, 0.8, 0.6, 1.0, 1.25, 0.12, np.inf, 0.5, 0.75], + [(8,), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0, 0.6, 0.0, 0.0], + [(6,), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0, 0.6, 0.0, 0.0], + [(8, 3), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0, 0.6, 0.0, 0.0], + [(8, 5), (3,), 0.6, 0.8, 0.6, 1.0, 1.25, 0.12, np.inf, 0.5, 0.75, 0.2, 1.0], + [(8,), (3, 5), 0.6, 0.8, 0.6, 1.0, 1.25, 0.12, np.inf, 0.5, 0.75, 0.2, 1.0], + [(3,), (5,), 0.8, 1.0, 0.8, 1.0, 1.0, 0.0, np.inf, 0, 0.8, 0.0, 0.0], + [(5,), (3,), 1.0, 0.8, 0.8, 0.8, 1.0, 0.0, 1.0, 0.0, 0.8, 0.0, 0.0], + [(10,), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0, 0.6, 0.0, 0.0], + [(8,), (3,), 0.6, 0.8, 0.6, 1.0, 1.25, 0.12, np.inf, 0.5, 0.75, 0.2, 1.0], + + ], + columns=columns_ordered, ) - + print(res_df.columns) + expect["antecedents"] = expect["antecedents"].apply(lambda x: str(frozenset(x))) expect["consequents"] = expect["consequents"].apply(lambda x: str(frozenset(x))) expect.sort_values(columns_ordered, inplace=True) expect.reset_index(inplace=True, drop=True) - + print(expect == res_df) + print(expect) + print(res_df) assert res_df.equals(expect), res_df @@ -132,6 +140,8 @@ def test_empty_result(): "conviction", "zhangs_metric", "jaccard", + "centered_confidence", + "certainty" ] ) res_df = association_rules(df_freq_items, min_threshold=2) From c8ced372137bf8ac32b48563787fc7798efbb867 Mon Sep 17 00:00:00 2001 From: AnkithN <53663967+ankithn30@users.noreply.github.com> Date: Mon, 10 Jun 2024 20:22:56 -0400 Subject: [PATCH 07/17] removed centered confidence --- .../frequent_patterns/association_rules.py | 4 +--- .../tests/test_association_rules.py | 20 +++++++++---------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/mlxtend/frequent_patterns/association_rules.py b/mlxtend/frequent_patterns/association_rules.py index 289d1b876..9689f6623 100644 --- a/mlxtend/frequent_patterns/association_rules.py +++ b/mlxtend/frequent_patterns/association_rules.py @@ -23,7 +23,6 @@ "conviction", "zhangs_metric", "jaccard", - "centered_confidence", "certainty", ] @@ -138,7 +137,7 @@ def jaccard_metric_helper(sAC, sA, sC): return jaccard_metric def certainty_metric_helper(sAC, sA, sC): - certainty_num = metric_dict["centered_confidence"](sAC, sA, sC) + certainty_num = metric_dict["confidence"](sAC, sA, sC) - sC certainty_denom = 1 - sC cert_metric = np.where(certainty_denom == 0, 0, certainty_num / certainty_denom) @@ -156,7 +155,6 @@ def certainty_metric_helper(sAC, sA, sC): "conviction": lambda sAC, sA, sC: conviction_helper(sAC, sA, sC), "zhangs_metric": lambda sAC, sA, sC: zhangs_metric_helper(sAC, sA, sC), "jaccard": lambda sAC, sA, sC: jaccard_metric_helper(sAC, sA, sC), - "centered_confidence": lambda sAC, sA, sC: metric_dict["confidence"](sAC, sA, sC) - sC, "certainty": lambda sAC, sA, sC: certainty_metric_helper(sAC, sA, sC), } diff --git a/mlxtend/frequent_patterns/tests/test_association_rules.py b/mlxtend/frequent_patterns/tests/test_association_rules.py index d2e21948f..2c7d93923 100644 --- a/mlxtend/frequent_patterns/tests/test_association_rules.py +++ b/mlxtend/frequent_patterns/tests/test_association_rules.py @@ -47,7 +47,6 @@ "conviction", "zhangs_metric", "jaccard", - "centered_confidence", "certainty" ] @@ -61,15 +60,15 @@ def test_default(): expect = pd.DataFrame( [ - [(8,), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0, 0.6, 0.0, 0.0], - [(6,), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0, 0.6, 0.0, 0.0], - [(8, 3), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0, 0.6, 0.0, 0.0], - [(8, 5), (3,), 0.6, 0.8, 0.6, 1.0, 1.25, 0.12, np.inf, 0.5, 0.75, 0.2, 1.0], - [(8,), (3, 5), 0.6, 0.8, 0.6, 1.0, 1.25, 0.12, np.inf, 0.5, 0.75, 0.2, 1.0], - [(3,), (5,), 0.8, 1.0, 0.8, 1.0, 1.0, 0.0, np.inf, 0, 0.8, 0.0, 0.0], - [(5,), (3,), 1.0, 0.8, 0.8, 0.8, 1.0, 0.0, 1.0, 0.0, 0.8, 0.0, 0.0], - [(10,), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0, 0.6, 0.0, 0.0], - [(8,), (3,), 0.6, 0.8, 0.6, 1.0, 1.25, 0.12, np.inf, 0.5, 0.75, 0.2, 1.0], + [(8,), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0, 0.6, 0.0], + [(6,), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0, 0.6, 0.0], + [(8, 3), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0, 0.6, 0.0], + [(8, 5), (3,), 0.6, 0.8, 0.6, 1.0, 1.25, 0.12, np.inf, 0.5, 0.75, 1.0], + [(8,), (3, 5), 0.6, 0.8, 0.6, 1.0, 1.25, 0.12, np.inf, 0.5, 0.75, 1.0], + [(3,), (5,), 0.8, 1.0, 0.8, 1.0, 1.0, 0.0, np.inf, 0, 0.8, 0.0], + [(5,), (3,), 1.0, 0.8, 0.8, 0.8, 1.0, 0.0, 1.0, 0.0, 0.8, 0.0], + [(10,), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0, 0.6, 0.0], + [(8,), (3,), 0.6, 0.8, 0.6, 1.0, 1.25, 0.12, np.inf, 0.5, 0.75, 1.0], ], @@ -140,7 +139,6 @@ def test_empty_result(): "conviction", "zhangs_metric", "jaccard", - "centered_confidence", "certainty" ] ) From f57ee76d13eb874d352134a30a2fc816d29f074d Mon Sep 17 00:00:00 2001 From: Rany Kamel Date: Sun, 23 Jun 2024 15:27:32 -0400 Subject: [PATCH 08/17] remove print statements in test --- mlxtend/frequent_patterns/tests/test_association_rules.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/mlxtend/frequent_patterns/tests/test_association_rules.py b/mlxtend/frequent_patterns/tests/test_association_rules.py index 2c7d93923..937768ad9 100644 --- a/mlxtend/frequent_patterns/tests/test_association_rules.py +++ b/mlxtend/frequent_patterns/tests/test_association_rules.py @@ -75,15 +75,11 @@ def test_default(): columns=columns_ordered, ) - print(res_df.columns) - + expect["antecedents"] = expect["antecedents"].apply(lambda x: str(frozenset(x))) expect["consequents"] = expect["consequents"].apply(lambda x: str(frozenset(x))) expect.sort_values(columns_ordered, inplace=True) expect.reset_index(inplace=True, drop=True) - print(expect == res_df) - print(expect) - print(res_df) assert res_df.equals(expect), res_df From 5fd361d9ad63416df02f20f604a343e39957762c Mon Sep 17 00:00:00 2001 From: Rany Kamel Date: Sun, 23 Jun 2024 15:44:57 -0400 Subject: [PATCH 09/17] Implemented kulczynski algorithm Nachiket Deo --- mlxtend/frequent_patterns/association_rules.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/mlxtend/frequent_patterns/association_rules.py b/mlxtend/frequent_patterns/association_rules.py index 9689f6623..7905cb7f2 100644 --- a/mlxtend/frequent_patterns/association_rules.py +++ b/mlxtend/frequent_patterns/association_rules.py @@ -102,6 +102,12 @@ def association_rules(df: pd.DataFrame, metric = "confidence", min_threshold = 0 "Dataframe needs to contain the\ columns 'support' and 'itemsets'" ) + + def kulczynski_helper(sAC, sA, sC): + conf_AC = sAC / sA + conf_CA = sAC / sC + kulczynski = (conf_AC + conf_CA) / 2 + return kulczynski def conviction_helper(sAC, sA, sC): confidence = sAC / sA @@ -156,6 +162,7 @@ def certainty_metric_helper(sAC, sA, sC): "zhangs_metric": lambda sAC, sA, sC: zhangs_metric_helper(sAC, sA, sC), "jaccard": lambda sAC, sA, sC: jaccard_metric_helper(sAC, sA, sC), "certainty": lambda sAC, sA, sC: certainty_metric_helper(sAC, sA, sC), + "kulczynski": lambda sAC, sA, sC: kulczynski_helper(sAC, sA, sC), } # check for metric compliance From 5f598771f58c634cd3c2ed401b67ae8818c452fd Mon Sep 17 00:00:00 2001 From: Rany Kamel Date: Sun, 23 Jun 2024 16:32:38 -0400 Subject: [PATCH 10/17] Implemented Kulcynski + Certainty tests Co-authored-by: Nachiket18 Co-authored-by: AnkithN --- .../frequent_patterns/association_rules.py | 1 + .../tests/test_association_rules.py | 38 ++++++++++++------- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/mlxtend/frequent_patterns/association_rules.py b/mlxtend/frequent_patterns/association_rules.py index 7905cb7f2..cd0a6e754 100644 --- a/mlxtend/frequent_patterns/association_rules.py +++ b/mlxtend/frequent_patterns/association_rules.py @@ -24,6 +24,7 @@ "zhangs_metric", "jaccard", "certainty", + "kulczynski" ] def association_rules(df: pd.DataFrame, metric = "confidence", min_threshold = 0.8, support_only = False, return_metrics: list = _metrics) -> pd.DataFrame: diff --git a/mlxtend/frequent_patterns/tests/test_association_rules.py b/mlxtend/frequent_patterns/tests/test_association_rules.py index 937768ad9..2fc69c007 100644 --- a/mlxtend/frequent_patterns/tests/test_association_rules.py +++ b/mlxtend/frequent_patterns/tests/test_association_rules.py @@ -47,7 +47,8 @@ "conviction", "zhangs_metric", "jaccard", - "certainty" + "certainty", + "kulczynski" ] @@ -60,17 +61,15 @@ def test_default(): expect = pd.DataFrame( [ - [(8,), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0, 0.6, 0.0], - [(6,), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0, 0.6, 0.0], - [(8, 3), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0, 0.6, 0.0], - [(8, 5), (3,), 0.6, 0.8, 0.6, 1.0, 1.25, 0.12, np.inf, 0.5, 0.75, 1.0], - [(8,), (3, 5), 0.6, 0.8, 0.6, 1.0, 1.25, 0.12, np.inf, 0.5, 0.75, 1.0], - [(3,), (5,), 0.8, 1.0, 0.8, 1.0, 1.0, 0.0, np.inf, 0, 0.8, 0.0], - [(5,), (3,), 1.0, 0.8, 0.8, 0.8, 1.0, 0.0, 1.0, 0.0, 0.8, 0.0], - [(10,), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0, 0.6, 0.0], - [(8,), (3,), 0.6, 0.8, 0.6, 1.0, 1.25, 0.12, np.inf, 0.5, 0.75, 1.0], - - + [(8,), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0, 0.6, 0.0, 0.8], + [(6,), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0, 0.6, 0.0, 0.8], + [(8, 3), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0, 0.6, 0.0, 0.8], + [(8, 5), (3,), 0.6, 0.8, 0.6, 1.0, 1.25, 0.12, np.inf, 0.5, 0.75, 1.0, 0.875], + [(8,), (3, 5), 0.6, 0.8, 0.6, 1.0, 1.25, 0.12, np.inf, 0.5, 0.75, 1.0, 0.875], + [(3,), (5,), 0.8, 1.0, 0.8, 1.0, 1.0, 0.0, np.inf, 0, 0.8, 0.0, 0.9], + [(5,), (3,), 1.0, 0.8, 0.8, 0.8, 1.0, 0.0, 1.0, 0.0, 0.8, 0.0, 0.9], + [(10,), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0, 0.6, 0.0, 0.8], + [(8,), (3,), 0.6, 0.8, 0.6, 1.0, 1.25, 0.12, np.inf, 0.5, 0.75, 1.0, 0.875], ], columns=columns_ordered, @@ -135,7 +134,8 @@ def test_empty_result(): "conviction", "zhangs_metric", "jaccard", - "certainty" + "certainty", + "kulczynski", ] ) res_df = association_rules(df_freq_items, min_threshold=2) @@ -191,7 +191,19 @@ def test_jaccard(): ) assert res_df.values.shape[0] == 8 +def test_certainty(): + res_df = association_rules(df_freq_items, metric="certainty", min_threshold=0.6) + assert res_df.values.shape[0] == 3 + + res_df = association_rules(df_freq_items_with_colnames, metric="certainty", min_threshold=0.6) + assert res_df.values.shape[0] == 3 + +def test_kulczynski(): + res_df = association_rules(df_freq_items, metric="kulczynski", min_threshold=0.9) + assert res_df.values.shape[0] == 2 + res_df = association_rules(df_freq_items_with_colnames, metric="kulczynski", min_threshold=0.6) + assert res_df.values.shape[0] == 16 def test_frozenset_selection(): res_df = association_rules(df_freq_items) From d797f65c477bbd711e332cda670f056a60b7b3c7 Mon Sep 17 00:00:00 2001 From: Rany Kamel Date: Sun, 23 Jun 2024 16:38:38 -0400 Subject: [PATCH 11/17] fixed flake8 formatting --- .../frequent_patterns/association_rules.py | 41 +++++++++---------- .../tests/test_association_rules.py | 3 ++ 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/mlxtend/frequent_patterns/association_rules.py b/mlxtend/frequent_patterns/association_rules.py index cd0a6e754..d0b76931b 100644 --- a/mlxtend/frequent_patterns/association_rules.py +++ b/mlxtend/frequent_patterns/association_rules.py @@ -13,21 +13,21 @@ import numpy as np import pandas as pd -_metrics = [ - "antecedent support", - "consequent support", - "support", - "confidence", - "lift", - "leverage", - "conviction", - "zhangs_metric", - "jaccard", - "certainty", - "kulczynski" -] - -def association_rules(df: pd.DataFrame, metric = "confidence", min_threshold = 0.8, support_only = False, return_metrics: list = _metrics) -> pd.DataFrame: +_metrics = ["antecedent support", + "consequent support", + "support", + "confidence", + "lift", + "leverage", + "conviction", + "zhangs_metric", + "jaccard", + "certainty", + "kulczynski" + ] + + +def association_rules(df: pd.DataFrame, metric="confidence", min_threshold=0.8, support_only=False, return_metrics: list = _metrics) -> pd.DataFrame: """Generates a DataFrame of association rules including the metrics 'score', 'confidence', and 'lift' @@ -103,10 +103,10 @@ def association_rules(df: pd.DataFrame, metric = "confidence", min_threshold = 0 "Dataframe needs to contain the\ columns 'support' and 'itemsets'" ) - + def kulczynski_helper(sAC, sA, sC): - conf_AC = sAC / sA - conf_CA = sAC / sC + conf_AC = sAC / sA + conf_CA = sAC / sC kulczynski = (conf_AC + conf_CA) / 2 return kulczynski @@ -135,14 +135,14 @@ def zhangs_metric_helper(sAC, sA, sC): zhangs_metric = np.where(denominator == 0, 0, numerator / denominator) return zhangs_metric - + def jaccard_metric_helper(sAC, sA, sC): numerator = metric_dict["support"](sAC, sA, sC) denominator = sA + sC - numerator jaccard_metric = numerator / denominator return jaccard_metric - + def certainty_metric_helper(sAC, sA, sC): certainty_num = metric_dict["confidence"](sAC, sA, sC) - sC certainty_denom = 1 - sC @@ -150,7 +150,6 @@ def certainty_metric_helper(sAC, sA, sC): cert_metric = np.where(certainty_denom == 0, 0, certainty_num / certainty_denom) return cert_metric - # metrics for association rules metric_dict = { "antecedent support": lambda _, sA, __: sA, diff --git a/mlxtend/frequent_patterns/tests/test_association_rules.py b/mlxtend/frequent_patterns/tests/test_association_rules.py index 2fc69c007..f98bf24a7 100644 --- a/mlxtend/frequent_patterns/tests/test_association_rules.py +++ b/mlxtend/frequent_patterns/tests/test_association_rules.py @@ -191,6 +191,7 @@ def test_jaccard(): ) assert res_df.values.shape[0] == 8 + def test_certainty(): res_df = association_rules(df_freq_items, metric="certainty", min_threshold=0.6) assert res_df.values.shape[0] == 3 @@ -198,6 +199,7 @@ def test_certainty(): res_df = association_rules(df_freq_items_with_colnames, metric="certainty", min_threshold=0.6) assert res_df.values.shape[0] == 3 + def test_kulczynski(): res_df = association_rules(df_freq_items, metric="kulczynski", min_threshold=0.9) assert res_df.values.shape[0] == 2 @@ -205,6 +207,7 @@ def test_kulczynski(): res_df = association_rules(df_freq_items_with_colnames, metric="kulczynski", min_threshold=0.6) assert res_df.values.shape[0] == 16 + def test_frozenset_selection(): res_df = association_rules(df_freq_items) From 5f6f582f08b6292c5dbe76c9dea3f332159ad35a Mon Sep 17 00:00:00 2001 From: Rany Kamel Date: Sun, 23 Jun 2024 17:59:46 -0400 Subject: [PATCH 12/17] Added jaccard, certainty, and kulczynski to notebook documentation Co-authored-by: AnkithN --- .../frequent_patterns/association_rules.ipynb | 1708 +---------------- 1 file changed, 53 insertions(+), 1655 deletions(-) diff --git a/docs/sources/user_guide/frequent_patterns/association_rules.ipynb b/docs/sources/user_guide/frequent_patterns/association_rules.ipynb index f81a8562d..3f963b78f 100644 --- a/docs/sources/user_guide/frequent_patterns/association_rules.ipynb +++ b/docs/sources/user_guide/frequent_patterns/association_rules.ipynb @@ -96,6 +96,29 @@ "- introduced in [7]\n", "\n", "Measures both association and dissociation. Value ranges between -1 and 1. A positive value (>0) indicates Association and negative value indicated dissociation.\n", + "\n", + "#### 'jaccard':\n", + "\n", + "$$\\text{jaccard}(A\\rightarrow C) = \\frac{\\text{support}(A\\rightarrow C)}{\\text{support}(A) + \\text{support}(C) - \\text{support}(A\\rightarrow C)}, \\;\\;\\; \\text{range: } [0, 1]$$\n", + "\n", + "- introduced in [8]\n", + "\n", + "Measures similarity between A and C. Value ranges between 0 and 1. A value of 0 indicates complete dissimilarity, and a value of 1 indicates complete similarity.\n", + " \n", + "#### 'certainty':\n", + " $$\\text{certainty}(A\\rightarrow C) = \\frac{\\text{confidence}(A\\rightarrow C) - \\text{support}(C)}{1 - \\text{support}(C)}, \\;\\;\\; \\text{range: } [-1, 1]$$ \n", + "\n", + " - introduced in [9]\n", + " \n", + " Measures\n", + "the certainty between A and C. Value ranges from -1 and 1, where 0 indicates independence. \n", + "\n", + "#### 'kulczynski':\n", + " $$\\text{Kulczynski}(A\\rightarrow C) = \\frac{1}{2}\\left(\\frac{\\text{support}(A\\rightarrow C)}{\\text{support}(A)} + \\frac{\\text{support}(A\\rightarrow C)}{\\text{support}(C)}\\right), \\;\\;\\; \\text{range: } [0, 1]$$\n", + "\n", + " - introduced in [10]\n", + "\n", + " Measures the association between A and C. Value ranges from 0 to 1. Rules near 0 or 1 are considered negatively or positively associated, respectively. Rules near 0.5 are considered to be unintresting.\n", " " ] }, @@ -122,7 +145,13 @@ "\n", "[6] Sergey Brin, Rajeev Motwani, Jeffrey D. Ullman, and Shalom Turk. Dynamic itemset counting and implication rules for market basket data. In SIGMOD 1997, Proceedings ACM SIGMOD International Conference on Management of Data, pages 255-264, Tucson, Arizona, USA, May 1997\n", "\n", - "[7] Xiaowei Yan , Chengqi Zhang & Shichao Zhang (2009) CONFIDENCE METRICS FOR ASSOCIATION RULE MINING, Applied Artificial Intelligence, 23:8, 713-737 https://www.tandfonline.com/doi/pdf/10.1080/08839510903208062.\n" + "[7] Xiaowei Yan , Chengqi Zhang & Shichao Zhang (2009) CONFIDENCE METRICS FOR ASSOCIATION RULE MINING, Applied Artificial Intelligence, 23:8, 713-737 https://www.tandfonline.com/doi/pdf/10.1080/08839510903208062.\n", + "\n", + "[8] Pang-Ning Tan, Vipin Kumar, Jaideep Srivastava. Selecting the right objective measure for association analysis. Information Systems, Volume 29, Issue 4, 2004, Pages 293-313.\n", + "\n", + "[9] Berzal Fernando, Blanco Ignacio, Sánchez Daniel, Vila, María-Amparo. Measuring the accuracy and interest of association rules: A new framework. Intelligent Data Analysis, Volume 6, no. 3, 2002, Pages 221-235.\n", + "\n", + "[10] Wu, T., Chen, Y., Han, J. Re-examination of interestingness measures in pattern mining: a unified framework. Data Min Knowl Disc 21, 371–397 (2010). https://doi.org/10.1007/s10618-009-0161-2." ] }, { @@ -141,114 +170,9 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
supportitemsets
01.0(Kidney Beans)
10.8(Eggs)
20.6(Yogurt)
30.6(Onion)
40.6(Milk)
50.8(Kidney Beans, Eggs)
60.6(Kidney Beans, Yogurt)
70.6(Eggs, Onion)
80.6(Kidney Beans, Onion)
90.6(Eggs, Kidney Beans, Onion)
100.6(Kidney Beans, Milk)
\n", - "
" - ], - "text/plain": [ - " support itemsets\n", - "0 1.0 (Kidney Beans)\n", - "1 0.8 (Eggs)\n", - "2 0.6 (Yogurt)\n", - "3 0.6 (Onion)\n", - "4 0.6 (Milk)\n", - "5 0.8 (Kidney Beans, Eggs)\n", - "6 0.6 (Kidney Beans, Yogurt)\n", - "7 0.6 (Eggs, Onion)\n", - "8 0.6 (Kidney Beans, Onion)\n", - "9 0.6 (Eggs, Kidney Beans, Onion)\n", - "10 0.6 (Kidney Beans, Milk)" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import pandas as pd\n", "from mlxtend.preprocessing import TransactionEncoder\n", @@ -282,252 +206,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
antecedentsconsequentsantecedent supportconsequent supportsupportconfidenceliftleverageconvictionzhangs_metric
0(Kidney Beans)(Eggs)1.00.80.80.801.000.001.00.0
1(Eggs)(Kidney Beans)0.81.00.81.001.000.00inf0.0
2(Yogurt)(Kidney Beans)0.61.00.61.001.000.00inf0.0
3(Eggs)(Onion)0.80.60.60.751.250.121.61.0
4(Onion)(Eggs)0.60.80.61.001.250.12inf0.5
5(Onion)(Kidney Beans)0.61.00.61.001.000.00inf0.0
6(Kidney Beans, Eggs)(Onion)0.80.60.60.751.250.121.61.0
7(Onion, Eggs)(Kidney Beans)0.61.00.61.001.000.00inf0.0
8(Kidney Beans, Onion)(Eggs)0.60.80.61.001.250.12inf0.5
9(Eggs)(Kidney Beans, Onion)0.80.60.60.751.250.121.61.0
10(Onion)(Kidney Beans, Eggs)0.60.80.61.001.250.12inf0.5
11(Milk)(Kidney Beans)0.61.00.61.001.000.00inf0.0
\n", - "
" - ], - "text/plain": [ - " antecedents consequents antecedent support \\\n", - "0 (Kidney Beans) (Eggs) 1.0 \n", - "1 (Eggs) (Kidney Beans) 0.8 \n", - "2 (Yogurt) (Kidney Beans) 0.6 \n", - "3 (Eggs) (Onion) 0.8 \n", - "4 (Onion) (Eggs) 0.6 \n", - "5 (Onion) (Kidney Beans) 0.6 \n", - "6 (Kidney Beans, Eggs) (Onion) 0.8 \n", - "7 (Onion, Eggs) (Kidney Beans) 0.6 \n", - "8 (Kidney Beans, Onion) (Eggs) 0.6 \n", - "9 (Eggs) (Kidney Beans, Onion) 0.8 \n", - "10 (Onion) (Kidney Beans, Eggs) 0.6 \n", - "11 (Milk) (Kidney Beans) 0.6 \n", - "\n", - " consequent support support confidence lift leverage conviction \\\n", - "0 0.8 0.8 0.80 1.00 0.00 1.0 \n", - "1 1.0 0.8 1.00 1.00 0.00 inf \n", - "2 1.0 0.6 1.00 1.00 0.00 inf \n", - "3 0.6 0.6 0.75 1.25 0.12 1.6 \n", - "4 0.8 0.6 1.00 1.25 0.12 inf \n", - "5 1.0 0.6 1.00 1.00 0.00 inf \n", - "6 0.6 0.6 0.75 1.25 0.12 1.6 \n", - "7 1.0 0.6 1.00 1.00 0.00 inf \n", - "8 0.8 0.6 1.00 1.25 0.12 inf \n", - "9 0.6 0.6 0.75 1.25 0.12 1.6 \n", - "10 0.8 0.6 1.00 1.25 0.12 inf \n", - "11 1.0 0.6 1.00 1.00 0.00 inf \n", - "\n", - " zhangs_metric \n", - "0 0.0 \n", - "1 0.0 \n", - "2 0.0 \n", - "3 1.0 \n", - "4 0.5 \n", - "5 0.0 \n", - "6 1.0 \n", - "7 0.0 \n", - "8 0.5 \n", - "9 1.0 \n", - "10 0.5 \n", - "11 0.0 " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from mlxtend.frequent_patterns import association_rules\n", "\n", @@ -550,156 +231,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
antecedentsconsequentsantecedent supportconsequent supportsupportconfidenceliftleverageconvictionzhangs_metric
0(Eggs)(Onion)0.80.60.60.751.250.121.61.0
1(Onion)(Eggs)0.60.80.61.001.250.12inf0.5
2(Kidney Beans, Eggs)(Onion)0.80.60.60.751.250.121.61.0
3(Kidney Beans, Onion)(Eggs)0.60.80.61.001.250.12inf0.5
4(Eggs)(Kidney Beans, Onion)0.80.60.60.751.250.121.61.0
5(Onion)(Kidney Beans, Eggs)0.60.80.61.001.250.12inf0.5
\n", - "
" - ], - "text/plain": [ - " antecedents consequents antecedent support \\\n", - "0 (Eggs) (Onion) 0.8 \n", - "1 (Onion) (Eggs) 0.6 \n", - "2 (Kidney Beans, Eggs) (Onion) 0.8 \n", - "3 (Kidney Beans, Onion) (Eggs) 0.6 \n", - "4 (Eggs) (Kidney Beans, Onion) 0.8 \n", - "5 (Onion) (Kidney Beans, Eggs) 0.6 \n", - "\n", - " consequent support support confidence lift leverage conviction \\\n", - "0 0.6 0.6 0.75 1.25 0.12 1.6 \n", - "1 0.8 0.6 1.00 1.25 0.12 inf \n", - "2 0.6 0.6 0.75 1.25 0.12 1.6 \n", - "3 0.8 0.6 1.00 1.25 0.12 inf \n", - "4 0.6 0.6 0.75 1.25 0.12 1.6 \n", - "5 0.8 0.6 1.00 1.25 0.12 inf \n", - "\n", - " zhangs_metric \n", - "0 1.0 \n", - "1 0.5 \n", - "2 1.0 \n", - "3 0.5 \n", - "4 1.0 \n", - "5 0.5 " - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "rules = association_rules(frequent_itemsets, metric=\"lift\", min_threshold=1.2)\n", "rules" @@ -720,163 +254,9 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
antecedentsconsequentsantecedent supportconsequent supportsupportconfidenceliftleverageconvictionzhangs_metricantecedent_len
0(Eggs)(Onion)0.80.60.60.751.250.121.61.01
1(Onion)(Eggs)0.60.80.61.001.250.12inf0.51
2(Kidney Beans, Eggs)(Onion)0.80.60.60.751.250.121.61.02
3(Kidney Beans, Onion)(Eggs)0.60.80.61.001.250.12inf0.52
4(Eggs)(Kidney Beans, Onion)0.80.60.60.751.250.121.61.01
5(Onion)(Kidney Beans, Eggs)0.60.80.61.001.250.12inf0.51
\n", - "
" - ], - "text/plain": [ - " antecedents consequents antecedent support \\\n", - "0 (Eggs) (Onion) 0.8 \n", - "1 (Onion) (Eggs) 0.6 \n", - "2 (Kidney Beans, Eggs) (Onion) 0.8 \n", - "3 (Kidney Beans, Onion) (Eggs) 0.6 \n", - "4 (Eggs) (Kidney Beans, Onion) 0.8 \n", - "5 (Onion) (Kidney Beans, Eggs) 0.6 \n", - "\n", - " consequent support support confidence lift leverage conviction \\\n", - "0 0.6 0.6 0.75 1.25 0.12 1.6 \n", - "1 0.8 0.6 1.00 1.25 0.12 inf \n", - "2 0.6 0.6 0.75 1.25 0.12 1.6 \n", - "3 0.8 0.6 1.00 1.25 0.12 inf \n", - "4 0.6 0.6 0.75 1.25 0.12 1.6 \n", - "5 0.8 0.6 1.00 1.25 0.12 inf \n", - "\n", - " zhangs_metric antecedent_len \n", - "0 1.0 1 \n", - "1 0.5 1 \n", - "2 1.0 2 \n", - "3 0.5 2 \n", - "4 1.0 1 \n", - "5 0.5 1 " - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "rules[\"antecedent_len\"] = rules[\"antecedents\"].apply(lambda x: len(x))\n", "rules" @@ -891,78 +271,9 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
antecedentsconsequentsantecedent supportconsequent supportsupportconfidenceliftleverageconvictionzhangs_metricantecedent_len
3(Kidney Beans, Onion)(Eggs)0.60.80.61.01.250.12inf0.52
\n", - "
" - ], - "text/plain": [ - " antecedents consequents antecedent support consequent support \\\n", - "3 (Kidney Beans, Onion) (Eggs) 0.6 0.8 \n", - "\n", - " support confidence lift leverage conviction zhangs_metric \\\n", - "3 0.6 1.0 1.25 0.12 inf 0.5 \n", - "\n", - " antecedent_len \n", - "3 2 " - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "rules[ (rules['antecedent_len'] >= 2) &\n", " (rules['confidence'] > 0.75) &\n", @@ -978,78 +289,9 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
antecedentsconsequentsantecedent supportconsequent supportsupportconfidenceliftleverageconvictionzhangs_metricantecedent_len
2(Kidney Beans, Eggs)(Onion)0.80.60.60.751.250.121.61.02
\n", - "
" - ], - "text/plain": [ - " antecedents consequents antecedent support consequent support \\\n", - "2 (Kidney Beans, Eggs) (Onion) 0.8 0.6 \n", - "\n", - " support confidence lift leverage conviction zhangs_metric \\\n", - "2 0.6 0.75 1.25 0.12 1.6 1.0 \n", - "\n", - " antecedent_len \n", - "2 2 " - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "rules[rules['antecedents'] == {'Eggs', 'Kidney Beans'}]" ] @@ -1089,90 +331,9 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
itemsetssupport
0[177, 176]0.253623
1[177, 179]0.253623
2[176, 178]0.217391
3[176, 179]0.217391
4[93, 100]0.181159
5[177, 178]0.108696
6[177, 176, 178]0.108696
\n", - "
" - ], - "text/plain": [ - " itemsets support\n", - "0 [177, 176] 0.253623\n", - "1 [177, 179] 0.253623\n", - "2 [176, 178] 0.217391\n", - "3 [176, 179] 0.217391\n", - "4 [93, 100] 0.181159\n", - "5 [177, 178] 0.108696\n", - "6 [177, 176, 178] 0.108696" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import pandas as pd\n", "\n", @@ -1209,328 +370,9 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
antecedentsconsequentsantecedent supportconsequent supportsupportconfidenceliftleverageconvictionzhangs_metric
0(176)(177)NaNNaN0.253623NaNNaNNaNNaNNaN
1(177)(176)NaNNaN0.253623NaNNaNNaNNaNNaN
2(179)(177)NaNNaN0.253623NaNNaNNaNNaNNaN
3(177)(179)NaNNaN0.253623NaNNaNNaNNaNNaN
4(178)(176)NaNNaN0.217391NaNNaNNaNNaNNaN
5(176)(178)NaNNaN0.217391NaNNaNNaNNaNNaN
6(179)(176)NaNNaN0.217391NaNNaNNaNNaNNaN
7(176)(179)NaNNaN0.217391NaNNaNNaNNaNNaN
8(100)(93)NaNNaN0.181159NaNNaNNaNNaNNaN
9(93)(100)NaNNaN0.181159NaNNaNNaNNaNNaN
10(178)(177)NaNNaN0.108696NaNNaNNaNNaNNaN
11(177)(178)NaNNaN0.108696NaNNaNNaNNaNNaN
12(178, 176)(177)NaNNaN0.108696NaNNaNNaNNaNNaN
13(178, 177)(176)NaNNaN0.108696NaNNaNNaNNaNNaN
14(177, 176)(178)NaNNaN0.108696NaNNaNNaNNaNNaN
15(178)(177, 176)NaNNaN0.108696NaNNaNNaNNaNNaN
16(176)(178, 177)NaNNaN0.108696NaNNaNNaNNaNNaN
17(177)(178, 176)NaNNaN0.108696NaNNaNNaNNaNNaN
\n", - "
" - ], - "text/plain": [ - " antecedents consequents antecedent support consequent support support \\\n", - "0 (176) (177) NaN NaN 0.253623 \n", - "1 (177) (176) NaN NaN 0.253623 \n", - "2 (179) (177) NaN NaN 0.253623 \n", - "3 (177) (179) NaN NaN 0.253623 \n", - "4 (178) (176) NaN NaN 0.217391 \n", - "5 (176) (178) NaN NaN 0.217391 \n", - "6 (179) (176) NaN NaN 0.217391 \n", - "7 (176) (179) NaN NaN 0.217391 \n", - "8 (100) (93) NaN NaN 0.181159 \n", - "9 (93) (100) NaN NaN 0.181159 \n", - "10 (178) (177) NaN NaN 0.108696 \n", - "11 (177) (178) NaN NaN 0.108696 \n", - "12 (178, 176) (177) NaN NaN 0.108696 \n", - "13 (178, 177) (176) NaN NaN 0.108696 \n", - "14 (177, 176) (178) NaN NaN 0.108696 \n", - "15 (178) (177, 176) NaN NaN 0.108696 \n", - "16 (176) (178, 177) NaN NaN 0.108696 \n", - "17 (177) (178, 176) NaN NaN 0.108696 \n", - "\n", - " confidence lift leverage conviction zhangs_metric \n", - "0 NaN NaN NaN NaN NaN \n", - "1 NaN NaN NaN NaN NaN \n", - "2 NaN NaN NaN NaN NaN \n", - "3 NaN NaN NaN NaN NaN \n", - "4 NaN NaN NaN NaN NaN \n", - "5 NaN NaN NaN NaN NaN \n", - "6 NaN NaN NaN NaN NaN \n", - "7 NaN NaN NaN NaN NaN \n", - "8 NaN NaN NaN NaN NaN \n", - "9 NaN NaN NaN NaN NaN \n", - "10 NaN NaN NaN NaN NaN \n", - "11 NaN NaN NaN NaN NaN \n", - "12 NaN NaN NaN NaN NaN \n", - "13 NaN NaN NaN NaN NaN \n", - "14 NaN NaN NaN NaN NaN \n", - "15 NaN NaN NaN NaN NaN \n", - "16 NaN NaN NaN NaN NaN \n", - "17 NaN NaN NaN NaN NaN " - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from mlxtend.frequent_patterns import association_rules\n", "\n", @@ -1547,175 +389,9 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
antecedentsconsequentssupport
0(176)(177)0.253623
1(177)(176)0.253623
2(179)(177)0.253623
3(177)(179)0.253623
4(178)(176)0.217391
5(176)(178)0.217391
6(179)(176)0.217391
7(176)(179)0.217391
8(100)(93)0.181159
9(93)(100)0.181159
10(178)(177)0.108696
11(177)(178)0.108696
12(178, 176)(177)0.108696
13(178, 177)(176)0.108696
14(177, 176)(178)0.108696
15(178)(177, 176)0.108696
16(176)(178, 177)0.108696
17(177)(178, 176)0.108696
\n", - "
" - ], - "text/plain": [ - " antecedents consequents support\n", - "0 (176) (177) 0.253623\n", - "1 (177) (176) 0.253623\n", - "2 (179) (177) 0.253623\n", - "3 (177) (179) 0.253623\n", - "4 (178) (176) 0.217391\n", - "5 (176) (178) 0.217391\n", - "6 (179) (176) 0.217391\n", - "7 (176) (179) 0.217391\n", - "8 (100) (93) 0.181159\n", - "9 (93) (100) 0.181159\n", - "10 (178) (177) 0.108696\n", - "11 (177) (178) 0.108696\n", - "12 (178, 176) (177) 0.108696\n", - "13 (178, 177) (176) 0.108696\n", - "14 (177, 176) (178) 0.108696\n", - "15 (178) (177, 176) 0.108696\n", - "16 (176) (178, 177) 0.108696\n", - "17 (177) (178, 176) 0.108696" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "res = res[['antecedents', 'consequents', 'support']]\n", "res" @@ -1738,156 +414,9 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
antecedentsconsequentsantecedent supportconsequent supportsupportconfidenceliftleverageconvictionzhangs_metric
0(Eggs)(Onion)0.80.60.60.751.250.121.61.0
1(Onion)(Eggs)0.60.80.61.001.250.12inf0.5
2(Kidney Beans, Eggs)(Onion)0.80.60.60.751.250.121.61.0
3(Kidney Beans, Onion)(Eggs)0.60.80.61.001.250.12inf0.5
4(Eggs)(Kidney Beans, Onion)0.80.60.60.751.250.121.61.0
5(Onion)(Kidney Beans, Eggs)0.60.80.61.001.250.12inf0.5
\n", - "
" - ], - "text/plain": [ - " antecedents consequents antecedent support \\\n", - "0 (Eggs) (Onion) 0.8 \n", - "1 (Onion) (Eggs) 0.6 \n", - "2 (Kidney Beans, Eggs) (Onion) 0.8 \n", - "3 (Kidney Beans, Onion) (Eggs) 0.6 \n", - "4 (Eggs) (Kidney Beans, Onion) 0.8 \n", - "5 (Onion) (Kidney Beans, Eggs) 0.6 \n", - "\n", - " consequent support support confidence lift leverage conviction \\\n", - "0 0.6 0.6 0.75 1.25 0.12 1.6 \n", - "1 0.8 0.6 1.00 1.25 0.12 inf \n", - "2 0.6 0.6 0.75 1.25 0.12 1.6 \n", - "3 0.8 0.6 1.00 1.25 0.12 inf \n", - "4 0.6 0.6 0.75 1.25 0.12 1.6 \n", - "5 0.8 0.6 1.00 1.25 0.12 inf \n", - "\n", - " zhangs_metric \n", - "0 1.0 \n", - "1 0.5 \n", - "2 1.0 \n", - "3 0.5 \n", - "4 1.0 \n", - "5 0.5 " - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import pandas as pd\n", "from mlxtend.preprocessing import TransactionEncoder\n", @@ -1919,140 +448,9 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
antecedentsconsequentsantecedent supportconsequent supportsupportconfidenceliftleverageconvictionzhangs_metric
0(Eggs)(Onion)0.80.60.60.751.250.121.61.0
1(Onion)(Eggs)0.60.80.61.001.250.12inf0.5
2(Kidney Beans, Eggs)(Onion)0.80.60.60.751.250.121.61.0
4(Eggs)(Kidney Beans, Onion)0.80.60.60.751.250.121.61.0
5(Onion)(Kidney Beans, Eggs)0.60.80.61.001.250.12inf0.5
\n", - "
" - ], - "text/plain": [ - " antecedents consequents antecedent support \\\n", - "0 (Eggs) (Onion) 0.8 \n", - "1 (Onion) (Eggs) 0.6 \n", - "2 (Kidney Beans, Eggs) (Onion) 0.8 \n", - "4 (Eggs) (Kidney Beans, Onion) 0.8 \n", - "5 (Onion) (Kidney Beans, Eggs) 0.6 \n", - "\n", - " consequent support support confidence lift leverage conviction \\\n", - "0 0.6 0.6 0.75 1.25 0.12 1.6 \n", - "1 0.8 0.6 1.00 1.25 0.12 inf \n", - "2 0.6 0.6 0.75 1.25 0.12 1.6 \n", - "4 0.6 0.6 0.75 1.25 0.12 1.6 \n", - "5 0.8 0.6 1.00 1.25 0.12 inf \n", - "\n", - " zhangs_metric \n", - "0 1.0 \n", - "1 0.5 \n", - "2 1.0 \n", - "4 1.0 \n", - "5 0.5 " - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "antecedent_sele = rules['antecedents'] == frozenset({'Onion', 'Kidney Beans'}) # or frozenset({'Kidney Beans', 'Onion'})\n", "consequent_sele = rules['consequents'] == frozenset({'Eggs'})\n", @@ -2096,7 +494,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.6" + "version": "3.11.9" }, "toc": { "nav_menu": {}, From 8402ac3743eb35fa2301d527804af6fcb4b6fff5 Mon Sep 17 00:00:00 2001 From: Rany Kamel Date: Sun, 23 Jun 2024 18:25:27 -0400 Subject: [PATCH 13/17] Added changes to changelog --- docs/sources/CHANGELOG.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/docs/sources/CHANGELOG.md b/docs/sources/CHANGELOG.md index aeb7c4687..c605a1bfc 100755 --- a/docs/sources/CHANGELOG.md +++ b/docs/sources/CHANGELOG.md @@ -6,6 +6,24 @@ The CHANGELOG for the current development version is available at [https://github.com/rasbt/mlxtend/blob/master/docs/sources/CHANGELOG.md](https://github.com/rasbt/mlxtend/blob/master/docs/sources/CHANGELOG.md). --- + +### Version 0.23.3 (TBD) + +##### Downloads + +- [Source code (zip)](https://github.com/rasbt/mlxtend/archive/v0.23.3.zip) + +- [Source code (tar.gz)](https://github.com/rasbt/mlxtend/archive/v0.23.3.tar.gz) + +##### New Features and Enhancements + +- [`mlxtend.frequent_patterns.association_rules`](https://rasbt.github.io/mlxtend/user_guide/frequent_patterns/association_rules/) Implemented three new metrics: Jaccard, Certainty, and Kulczynski. ([#1096](https://github.com/rasbt/mlxtend/issues/1096)) + +##### Changes + +- [`mlxtend.frequent_patterns.association_rules`](https://rasbt.github.io/mlxtend/user_guide/frequent_patterns/association_rules/) Added optional parameter 'return_metrics' to only return a given list of metrics, rather than every possible metric. + + ### Version 0.23.2 (TBD) ##### Downloads From 261a37d73a903af77fd99324bd9f795d909d14d7 Mon Sep 17 00:00:00 2001 From: Sebastian Raschka Date: Mon, 1 Jul 2024 19:19:14 -0500 Subject: [PATCH 14/17] Update test_association_rules.py --- mlxtend/frequent_patterns/tests/test_association_rules.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mlxtend/frequent_patterns/tests/test_association_rules.py b/mlxtend/frequent_patterns/tests/test_association_rules.py index f98bf24a7..f7807cfc2 100644 --- a/mlxtend/frequent_patterns/tests/test_association_rules.py +++ b/mlxtend/frequent_patterns/tests/test_association_rules.py @@ -52,6 +52,7 @@ ] +# fmt: off def test_default(): res_df = association_rules(df_freq_items) res_df["antecedents"] = res_df["antecedents"].apply(lambda x: str(frozenset(x))) From f313efe8a7fff05de0fc0967e80c274c36f65f7f Mon Sep 17 00:00:00 2001 From: Sebastian Raschka Date: Mon, 1 Jul 2024 19:28:05 -0500 Subject: [PATCH 15/17] Update test_association_rules.py --- .../frequent_patterns/tests/test_association_rules.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/mlxtend/frequent_patterns/tests/test_association_rules.py b/mlxtend/frequent_patterns/tests/test_association_rules.py index f7807cfc2..1035183c9 100644 --- a/mlxtend/frequent_patterns/tests/test_association_rules.py +++ b/mlxtend/frequent_patterns/tests/test_association_rules.py @@ -48,7 +48,7 @@ "zhangs_metric", "jaccard", "certainty", - "kulczynski" + "kulczynski", ] @@ -81,6 +81,7 @@ def test_default(): expect.sort_values(columns_ordered, inplace=True) expect.reset_index(inplace=True, drop=True) assert res_df.equals(expect), res_df +# fmt: on def test_datatypes(): @@ -197,7 +198,9 @@ def test_certainty(): res_df = association_rules(df_freq_items, metric="certainty", min_threshold=0.6) assert res_df.values.shape[0] == 3 - res_df = association_rules(df_freq_items_with_colnames, metric="certainty", min_threshold=0.6) + res_df = association_rules( + df_freq_items_with_colnames, metric="certainty", min_threshold=0.6 + ) assert res_df.values.shape[0] == 3 @@ -205,7 +208,9 @@ def test_kulczynski(): res_df = association_rules(df_freq_items, metric="kulczynski", min_threshold=0.9) assert res_df.values.shape[0] == 2 - res_df = association_rules(df_freq_items_with_colnames, metric="kulczynski", min_threshold=0.6) + res_df = association_rules( + df_freq_items_with_colnames, metric="kulczynski", min_threshold=0.6 + ) assert res_df.values.shape[0] == 16 From aad9473f17977639b44889d85160868aaab83106 Mon Sep 17 00:00:00 2001 From: Sebastian Raschka Date: Mon, 1 Jul 2024 19:28:39 -0500 Subject: [PATCH 16/17] Update association_rules.py --- .../frequent_patterns/association_rules.py | 37 +++++++++++-------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/mlxtend/frequent_patterns/association_rules.py b/mlxtend/frequent_patterns/association_rules.py index d0b76931b..16cba9b3d 100644 --- a/mlxtend/frequent_patterns/association_rules.py +++ b/mlxtend/frequent_patterns/association_rules.py @@ -13,21 +13,28 @@ import numpy as np import pandas as pd -_metrics = ["antecedent support", - "consequent support", - "support", - "confidence", - "lift", - "leverage", - "conviction", - "zhangs_metric", - "jaccard", - "certainty", - "kulczynski" - ] - - -def association_rules(df: pd.DataFrame, metric="confidence", min_threshold=0.8, support_only=False, return_metrics: list = _metrics) -> pd.DataFrame: +_metrics = [ + "antecedent support", + "consequent support", + "support", + "confidence", + "lift", + "leverage", + "conviction", + "zhangs_metric", + "jaccard", + "certainty", + "kulczynski", +] + + +def association_rules( + df: pd.DataFrame, + metric="confidence", + min_threshold=0.8, + support_only=False, + return_metrics: list = _metrics, +) -> pd.DataFrame: """Generates a DataFrame of association rules including the metrics 'score', 'confidence', and 'lift' From d3367480a1dfc23a1cc6121a18d43711b2f2a1e7 Mon Sep 17 00:00:00 2001 From: Sebastian Raschka Date: Mon, 1 Jul 2024 19:31:26 -0500 Subject: [PATCH 17/17] Update CHANGELOG.md --- docs/sources/CHANGELOG.md | 30 +++--------------------------- 1 file changed, 3 insertions(+), 27 deletions(-) diff --git a/docs/sources/CHANGELOG.md b/docs/sources/CHANGELOG.md index c605a1bfc..914cce35c 100755 --- a/docs/sources/CHANGELOG.md +++ b/docs/sources/CHANGELOG.md @@ -7,22 +7,6 @@ The CHANGELOG for the current development version is available at --- -### Version 0.23.3 (TBD) - -##### Downloads - -- [Source code (zip)](https://github.com/rasbt/mlxtend/archive/v0.23.3.zip) - -- [Source code (tar.gz)](https://github.com/rasbt/mlxtend/archive/v0.23.3.tar.gz) - -##### New Features and Enhancements - -- [`mlxtend.frequent_patterns.association_rules`](https://rasbt.github.io/mlxtend/user_guide/frequent_patterns/association_rules/) Implemented three new metrics: Jaccard, Certainty, and Kulczynski. ([#1096](https://github.com/rasbt/mlxtend/issues/1096)) - -##### Changes - -- [`mlxtend.frequent_patterns.association_rules`](https://rasbt.github.io/mlxtend/user_guide/frequent_patterns/association_rules/) Added optional parameter 'return_metrics' to only return a given list of metrics, rather than every possible metric. - ### Version 0.23.2 (TBD) @@ -34,21 +18,13 @@ The CHANGELOG for the current development version is available at ##### New Features and Enhancements +- [`mlxtend.frequent_patterns.association_rules`](https://rasbt.github.io/mlxtend/user_guide/frequent_patterns/association_rules/) Implemented three new metrics: Jaccard, Certainty, and Kulczynski. ([#1096](https://github.com/rasbt/mlxtend/issues/1096)) - Integrated scikit-learn's `set_output` method into `TransactionEncoder` ([#1087](https://github.com/rasbt/mlxtend/issues/1087) via [it176131](https://github.com/it176131)) - - - -### Version 0.23.2 (TBD) - -##### Downloads - -- [Source code (zip)](https://github.com/rasbt/mlxtend/archive/v0.23.2.zip) - -- [Source code (tar.gz)](https://github.com/rasbt/mlxtend/archive/v0.23.2.tar.gz) - ##### Changes +- [`mlxtend.frequent_patterns.association_rules`](https://rasbt.github.io/mlxtend/user_guide/frequent_patterns/association_rules/) Added optional parameter 'return_metrics' to only return a given list of metrics, rather than every possible metric. + - Add `n_classes_` attribute to stacking classifiers for compatibility with scikit-learn 1.3 ([#1091](https://github.com/rasbt/mlxtend/issues/1091)) - Use Scipy's instead of NumPy's decompositions in PCA for improved accuracy in edge cases ([#1080](https://github.com/rasbt/mlxtend/issues/1080) via [[fkdosilovic](https://github.com/rasbt/mlxtend/issues?q=is%3Apr+is%3Aopen+author%3Afkdosilovic)])