diff --git a/docs/sources/CHANGELOG.md b/docs/sources/CHANGELOG.md
index aeb7c4687..914cce35c 100755
--- a/docs/sources/CHANGELOG.md
+++ b/docs/sources/CHANGELOG.md
@@ -6,6 +6,8 @@ The CHANGELOG for the current development version is available at
[https://github.com/rasbt/mlxtend/blob/master/docs/sources/CHANGELOG.md](https://github.com/rasbt/mlxtend/blob/master/docs/sources/CHANGELOG.md).
---
+
+
### Version 0.23.2 (TBD)
##### Downloads
@@ -16,21 +18,13 @@ The CHANGELOG for the current development version is available at
##### New Features and Enhancements
+- [`mlxtend.frequent_patterns.association_rules`](https://rasbt.github.io/mlxtend/user_guide/frequent_patterns/association_rules/) Implemented three new metrics: Jaccard, Certainty, and Kulczynski. ([#1096](https://github.com/rasbt/mlxtend/issues/1096))
- Integrated scikit-learn's `set_output` method into `TransactionEncoder` ([#1087](https://github.com/rasbt/mlxtend/issues/1087) via [it176131](https://github.com/it176131))
-
-
-
-### Version 0.23.2 (TBD)
-
-##### Downloads
-
-- [Source code (zip)](https://github.com/rasbt/mlxtend/archive/v0.23.2.zip)
-
-- [Source code (tar.gz)](https://github.com/rasbt/mlxtend/archive/v0.23.2.tar.gz)
-
##### Changes
+- [`mlxtend.frequent_patterns.association_rules`](https://rasbt.github.io/mlxtend/user_guide/frequent_patterns/association_rules/) Added optional parameter 'return_metrics' to only return a given list of metrics, rather than every possible metric.
+
- Add `n_classes_` attribute to stacking classifiers for compatibility with scikit-learn 1.3 ([#1091](https://github.com/rasbt/mlxtend/issues/1091))
- Use Scipy's instead of NumPy's decompositions in PCA for improved accuracy in edge cases ([#1080](https://github.com/rasbt/mlxtend/issues/1080) via [[fkdosilovic](https://github.com/rasbt/mlxtend/issues?q=is%3Apr+is%3Aopen+author%3Afkdosilovic)])
diff --git a/docs/sources/user_guide/frequent_patterns/association_rules.ipynb b/docs/sources/user_guide/frequent_patterns/association_rules.ipynb
index f81a8562d..3f963b78f 100644
--- a/docs/sources/user_guide/frequent_patterns/association_rules.ipynb
+++ b/docs/sources/user_guide/frequent_patterns/association_rules.ipynb
@@ -96,6 +96,29 @@
"- introduced in [7]\n",
"\n",
"Measures both association and dissociation. Value ranges between -1 and 1. A positive value (>0) indicates Association and negative value indicated dissociation.\n",
+ "\n",
+ "#### 'jaccard':\n",
+ "\n",
+ "$$\\text{jaccard}(A\\rightarrow C) = \\frac{\\text{support}(A\\rightarrow C)}{\\text{support}(A) + \\text{support}(C) - \\text{support}(A\\rightarrow C)}, \\;\\;\\; \\text{range: } [0, 1]$$\n",
+ "\n",
+ "- introduced in [8]\n",
+ "\n",
+ "Measures similarity between A and C. Value ranges between 0 and 1. A value of 0 indicates complete dissimilarity, and a value of 1 indicates complete similarity.\n",
+ " \n",
+ "#### 'certainty':\n",
+ " $$\\text{certainty}(A\\rightarrow C) = \\frac{\\text{confidence}(A\\rightarrow C) - \\text{support}(C)}{1 - \\text{support}(C)}, \\;\\;\\; \\text{range: } [-1, 1]$$ \n",
+ "\n",
+ " - introduced in [9]\n",
+ " \n",
+ " Measures\n",
+ "the certainty between A and C. Value ranges from -1 and 1, where 0 indicates independence. \n",
+ "\n",
+ "#### 'kulczynski':\n",
+ " $$\\text{Kulczynski}(A\\rightarrow C) = \\frac{1}{2}\\left(\\frac{\\text{support}(A\\rightarrow C)}{\\text{support}(A)} + \\frac{\\text{support}(A\\rightarrow C)}{\\text{support}(C)}\\right), \\;\\;\\; \\text{range: } [0, 1]$$\n",
+ "\n",
+ " - introduced in [10]\n",
+ "\n",
+ " Measures the association between A and C. Value ranges from 0 to 1. Rules near 0 or 1 are considered negatively or positively associated, respectively. Rules near 0.5 are considered to be unintresting.\n",
" "
]
},
@@ -122,7 +145,13 @@
"\n",
"[6] Sergey Brin, Rajeev Motwani, Jeffrey D. Ullman, and Shalom Turk. Dynamic itemset counting and implication rules for market basket data. In SIGMOD 1997, Proceedings ACM SIGMOD International Conference on Management of Data, pages 255-264, Tucson, Arizona, USA, May 1997\n",
"\n",
- "[7] Xiaowei Yan , Chengqi Zhang & Shichao Zhang (2009) CONFIDENCE METRICS FOR ASSOCIATION RULE MINING, Applied Artificial Intelligence, 23:8, 713-737 https://www.tandfonline.com/doi/pdf/10.1080/08839510903208062.\n"
+ "[7] Xiaowei Yan , Chengqi Zhang & Shichao Zhang (2009) CONFIDENCE METRICS FOR ASSOCIATION RULE MINING, Applied Artificial Intelligence, 23:8, 713-737 https://www.tandfonline.com/doi/pdf/10.1080/08839510903208062.\n",
+ "\n",
+ "[8] Pang-Ning Tan, Vipin Kumar, Jaideep Srivastava. Selecting the right objective measure for association analysis. Information Systems, Volume 29, Issue 4, 2004, Pages 293-313.\n",
+ "\n",
+ "[9] Berzal Fernando, Blanco Ignacio, Sánchez Daniel, Vila, María-Amparo. Measuring the accuracy and interest of association rules: A new framework. Intelligent Data Analysis, Volume 6, no. 3, 2002, Pages 221-235.\n",
+ "\n",
+ "[10] Wu, T., Chen, Y., Han, J. Re-examination of interestingness measures in pattern mining: a unified framework. Data Min Knowl Disc 21, 371–397 (2010). https://doi.org/10.1007/s10618-009-0161-2."
]
},
{
@@ -141,114 +170,9 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " support | \n",
- " itemsets | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 1.0 | \n",
- " (Kidney Beans) | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 0.8 | \n",
- " (Eggs) | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 0.6 | \n",
- " (Yogurt) | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 0.6 | \n",
- " (Onion) | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 0.6 | \n",
- " (Milk) | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " 0.8 | \n",
- " (Kidney Beans, Eggs) | \n",
- "
\n",
- " \n",
- " 6 | \n",
- " 0.6 | \n",
- " (Kidney Beans, Yogurt) | \n",
- "
\n",
- " \n",
- " 7 | \n",
- " 0.6 | \n",
- " (Eggs, Onion) | \n",
- "
\n",
- " \n",
- " 8 | \n",
- " 0.6 | \n",
- " (Kidney Beans, Onion) | \n",
- "
\n",
- " \n",
- " 9 | \n",
- " 0.6 | \n",
- " (Eggs, Kidney Beans, Onion) | \n",
- "
\n",
- " \n",
- " 10 | \n",
- " 0.6 | \n",
- " (Kidney Beans, Milk) | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " support itemsets\n",
- "0 1.0 (Kidney Beans)\n",
- "1 0.8 (Eggs)\n",
- "2 0.6 (Yogurt)\n",
- "3 0.6 (Onion)\n",
- "4 0.6 (Milk)\n",
- "5 0.8 (Kidney Beans, Eggs)\n",
- "6 0.6 (Kidney Beans, Yogurt)\n",
- "7 0.6 (Eggs, Onion)\n",
- "8 0.6 (Kidney Beans, Onion)\n",
- "9 0.6 (Eggs, Kidney Beans, Onion)\n",
- "10 0.6 (Kidney Beans, Milk)"
- ]
- },
- "execution_count": 1,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"import pandas as pd\n",
"from mlxtend.preprocessing import TransactionEncoder\n",
@@ -282,252 +206,9 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " antecedents | \n",
- " consequents | \n",
- " antecedent support | \n",
- " consequent support | \n",
- " support | \n",
- " confidence | \n",
- " lift | \n",
- " leverage | \n",
- " conviction | \n",
- " zhangs_metric | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " (Kidney Beans) | \n",
- " (Eggs) | \n",
- " 1.0 | \n",
- " 0.8 | \n",
- " 0.8 | \n",
- " 0.80 | \n",
- " 1.00 | \n",
- " 0.00 | \n",
- " 1.0 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " (Eggs) | \n",
- " (Kidney Beans) | \n",
- " 0.8 | \n",
- " 1.0 | \n",
- " 0.8 | \n",
- " 1.00 | \n",
- " 1.00 | \n",
- " 0.00 | \n",
- " inf | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " (Yogurt) | \n",
- " (Kidney Beans) | \n",
- " 0.6 | \n",
- " 1.0 | \n",
- " 0.6 | \n",
- " 1.00 | \n",
- " 1.00 | \n",
- " 0.00 | \n",
- " inf | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " (Eggs) | \n",
- " (Onion) | \n",
- " 0.8 | \n",
- " 0.6 | \n",
- " 0.6 | \n",
- " 0.75 | \n",
- " 1.25 | \n",
- " 0.12 | \n",
- " 1.6 | \n",
- " 1.0 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " (Onion) | \n",
- " (Eggs) | \n",
- " 0.6 | \n",
- " 0.8 | \n",
- " 0.6 | \n",
- " 1.00 | \n",
- " 1.25 | \n",
- " 0.12 | \n",
- " inf | \n",
- " 0.5 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " (Onion) | \n",
- " (Kidney Beans) | \n",
- " 0.6 | \n",
- " 1.0 | \n",
- " 0.6 | \n",
- " 1.00 | \n",
- " 1.00 | \n",
- " 0.00 | \n",
- " inf | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 6 | \n",
- " (Kidney Beans, Eggs) | \n",
- " (Onion) | \n",
- " 0.8 | \n",
- " 0.6 | \n",
- " 0.6 | \n",
- " 0.75 | \n",
- " 1.25 | \n",
- " 0.12 | \n",
- " 1.6 | \n",
- " 1.0 | \n",
- "
\n",
- " \n",
- " 7 | \n",
- " (Onion, Eggs) | \n",
- " (Kidney Beans) | \n",
- " 0.6 | \n",
- " 1.0 | \n",
- " 0.6 | \n",
- " 1.00 | \n",
- " 1.00 | \n",
- " 0.00 | \n",
- " inf | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 8 | \n",
- " (Kidney Beans, Onion) | \n",
- " (Eggs) | \n",
- " 0.6 | \n",
- " 0.8 | \n",
- " 0.6 | \n",
- " 1.00 | \n",
- " 1.25 | \n",
- " 0.12 | \n",
- " inf | \n",
- " 0.5 | \n",
- "
\n",
- " \n",
- " 9 | \n",
- " (Eggs) | \n",
- " (Kidney Beans, Onion) | \n",
- " 0.8 | \n",
- " 0.6 | \n",
- " 0.6 | \n",
- " 0.75 | \n",
- " 1.25 | \n",
- " 0.12 | \n",
- " 1.6 | \n",
- " 1.0 | \n",
- "
\n",
- " \n",
- " 10 | \n",
- " (Onion) | \n",
- " (Kidney Beans, Eggs) | \n",
- " 0.6 | \n",
- " 0.8 | \n",
- " 0.6 | \n",
- " 1.00 | \n",
- " 1.25 | \n",
- " 0.12 | \n",
- " inf | \n",
- " 0.5 | \n",
- "
\n",
- " \n",
- " 11 | \n",
- " (Milk) | \n",
- " (Kidney Beans) | \n",
- " 0.6 | \n",
- " 1.0 | \n",
- " 0.6 | \n",
- " 1.00 | \n",
- " 1.00 | \n",
- " 0.00 | \n",
- " inf | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " antecedents consequents antecedent support \\\n",
- "0 (Kidney Beans) (Eggs) 1.0 \n",
- "1 (Eggs) (Kidney Beans) 0.8 \n",
- "2 (Yogurt) (Kidney Beans) 0.6 \n",
- "3 (Eggs) (Onion) 0.8 \n",
- "4 (Onion) (Eggs) 0.6 \n",
- "5 (Onion) (Kidney Beans) 0.6 \n",
- "6 (Kidney Beans, Eggs) (Onion) 0.8 \n",
- "7 (Onion, Eggs) (Kidney Beans) 0.6 \n",
- "8 (Kidney Beans, Onion) (Eggs) 0.6 \n",
- "9 (Eggs) (Kidney Beans, Onion) 0.8 \n",
- "10 (Onion) (Kidney Beans, Eggs) 0.6 \n",
- "11 (Milk) (Kidney Beans) 0.6 \n",
- "\n",
- " consequent support support confidence lift leverage conviction \\\n",
- "0 0.8 0.8 0.80 1.00 0.00 1.0 \n",
- "1 1.0 0.8 1.00 1.00 0.00 inf \n",
- "2 1.0 0.6 1.00 1.00 0.00 inf \n",
- "3 0.6 0.6 0.75 1.25 0.12 1.6 \n",
- "4 0.8 0.6 1.00 1.25 0.12 inf \n",
- "5 1.0 0.6 1.00 1.00 0.00 inf \n",
- "6 0.6 0.6 0.75 1.25 0.12 1.6 \n",
- "7 1.0 0.6 1.00 1.00 0.00 inf \n",
- "8 0.8 0.6 1.00 1.25 0.12 inf \n",
- "9 0.6 0.6 0.75 1.25 0.12 1.6 \n",
- "10 0.8 0.6 1.00 1.25 0.12 inf \n",
- "11 1.0 0.6 1.00 1.00 0.00 inf \n",
- "\n",
- " zhangs_metric \n",
- "0 0.0 \n",
- "1 0.0 \n",
- "2 0.0 \n",
- "3 1.0 \n",
- "4 0.5 \n",
- "5 0.0 \n",
- "6 1.0 \n",
- "7 0.0 \n",
- "8 0.5 \n",
- "9 1.0 \n",
- "10 0.5 \n",
- "11 0.0 "
- ]
- },
- "execution_count": 2,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"from mlxtend.frequent_patterns import association_rules\n",
"\n",
@@ -550,156 +231,9 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " antecedents | \n",
- " consequents | \n",
- " antecedent support | \n",
- " consequent support | \n",
- " support | \n",
- " confidence | \n",
- " lift | \n",
- " leverage | \n",
- " conviction | \n",
- " zhangs_metric | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " (Eggs) | \n",
- " (Onion) | \n",
- " 0.8 | \n",
- " 0.6 | \n",
- " 0.6 | \n",
- " 0.75 | \n",
- " 1.25 | \n",
- " 0.12 | \n",
- " 1.6 | \n",
- " 1.0 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " (Onion) | \n",
- " (Eggs) | \n",
- " 0.6 | \n",
- " 0.8 | \n",
- " 0.6 | \n",
- " 1.00 | \n",
- " 1.25 | \n",
- " 0.12 | \n",
- " inf | \n",
- " 0.5 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " (Kidney Beans, Eggs) | \n",
- " (Onion) | \n",
- " 0.8 | \n",
- " 0.6 | \n",
- " 0.6 | \n",
- " 0.75 | \n",
- " 1.25 | \n",
- " 0.12 | \n",
- " 1.6 | \n",
- " 1.0 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " (Kidney Beans, Onion) | \n",
- " (Eggs) | \n",
- " 0.6 | \n",
- " 0.8 | \n",
- " 0.6 | \n",
- " 1.00 | \n",
- " 1.25 | \n",
- " 0.12 | \n",
- " inf | \n",
- " 0.5 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " (Eggs) | \n",
- " (Kidney Beans, Onion) | \n",
- " 0.8 | \n",
- " 0.6 | \n",
- " 0.6 | \n",
- " 0.75 | \n",
- " 1.25 | \n",
- " 0.12 | \n",
- " 1.6 | \n",
- " 1.0 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " (Onion) | \n",
- " (Kidney Beans, Eggs) | \n",
- " 0.6 | \n",
- " 0.8 | \n",
- " 0.6 | \n",
- " 1.00 | \n",
- " 1.25 | \n",
- " 0.12 | \n",
- " inf | \n",
- " 0.5 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " antecedents consequents antecedent support \\\n",
- "0 (Eggs) (Onion) 0.8 \n",
- "1 (Onion) (Eggs) 0.6 \n",
- "2 (Kidney Beans, Eggs) (Onion) 0.8 \n",
- "3 (Kidney Beans, Onion) (Eggs) 0.6 \n",
- "4 (Eggs) (Kidney Beans, Onion) 0.8 \n",
- "5 (Onion) (Kidney Beans, Eggs) 0.6 \n",
- "\n",
- " consequent support support confidence lift leverage conviction \\\n",
- "0 0.6 0.6 0.75 1.25 0.12 1.6 \n",
- "1 0.8 0.6 1.00 1.25 0.12 inf \n",
- "2 0.6 0.6 0.75 1.25 0.12 1.6 \n",
- "3 0.8 0.6 1.00 1.25 0.12 inf \n",
- "4 0.6 0.6 0.75 1.25 0.12 1.6 \n",
- "5 0.8 0.6 1.00 1.25 0.12 inf \n",
- "\n",
- " zhangs_metric \n",
- "0 1.0 \n",
- "1 0.5 \n",
- "2 1.0 \n",
- "3 0.5 \n",
- "4 1.0 \n",
- "5 0.5 "
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"rules = association_rules(frequent_itemsets, metric=\"lift\", min_threshold=1.2)\n",
"rules"
@@ -720,163 +254,9 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " antecedents | \n",
- " consequents | \n",
- " antecedent support | \n",
- " consequent support | \n",
- " support | \n",
- " confidence | \n",
- " lift | \n",
- " leverage | \n",
- " conviction | \n",
- " zhangs_metric | \n",
- " antecedent_len | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " (Eggs) | \n",
- " (Onion) | \n",
- " 0.8 | \n",
- " 0.6 | \n",
- " 0.6 | \n",
- " 0.75 | \n",
- " 1.25 | \n",
- " 0.12 | \n",
- " 1.6 | \n",
- " 1.0 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " (Onion) | \n",
- " (Eggs) | \n",
- " 0.6 | \n",
- " 0.8 | \n",
- " 0.6 | \n",
- " 1.00 | \n",
- " 1.25 | \n",
- " 0.12 | \n",
- " inf | \n",
- " 0.5 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " (Kidney Beans, Eggs) | \n",
- " (Onion) | \n",
- " 0.8 | \n",
- " 0.6 | \n",
- " 0.6 | \n",
- " 0.75 | \n",
- " 1.25 | \n",
- " 0.12 | \n",
- " 1.6 | \n",
- " 1.0 | \n",
- " 2 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " (Kidney Beans, Onion) | \n",
- " (Eggs) | \n",
- " 0.6 | \n",
- " 0.8 | \n",
- " 0.6 | \n",
- " 1.00 | \n",
- " 1.25 | \n",
- " 0.12 | \n",
- " inf | \n",
- " 0.5 | \n",
- " 2 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " (Eggs) | \n",
- " (Kidney Beans, Onion) | \n",
- " 0.8 | \n",
- " 0.6 | \n",
- " 0.6 | \n",
- " 0.75 | \n",
- " 1.25 | \n",
- " 0.12 | \n",
- " 1.6 | \n",
- " 1.0 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " (Onion) | \n",
- " (Kidney Beans, Eggs) | \n",
- " 0.6 | \n",
- " 0.8 | \n",
- " 0.6 | \n",
- " 1.00 | \n",
- " 1.25 | \n",
- " 0.12 | \n",
- " inf | \n",
- " 0.5 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " antecedents consequents antecedent support \\\n",
- "0 (Eggs) (Onion) 0.8 \n",
- "1 (Onion) (Eggs) 0.6 \n",
- "2 (Kidney Beans, Eggs) (Onion) 0.8 \n",
- "3 (Kidney Beans, Onion) (Eggs) 0.6 \n",
- "4 (Eggs) (Kidney Beans, Onion) 0.8 \n",
- "5 (Onion) (Kidney Beans, Eggs) 0.6 \n",
- "\n",
- " consequent support support confidence lift leverage conviction \\\n",
- "0 0.6 0.6 0.75 1.25 0.12 1.6 \n",
- "1 0.8 0.6 1.00 1.25 0.12 inf \n",
- "2 0.6 0.6 0.75 1.25 0.12 1.6 \n",
- "3 0.8 0.6 1.00 1.25 0.12 inf \n",
- "4 0.6 0.6 0.75 1.25 0.12 1.6 \n",
- "5 0.8 0.6 1.00 1.25 0.12 inf \n",
- "\n",
- " zhangs_metric antecedent_len \n",
- "0 1.0 1 \n",
- "1 0.5 1 \n",
- "2 1.0 2 \n",
- "3 0.5 2 \n",
- "4 1.0 1 \n",
- "5 0.5 1 "
- ]
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"rules[\"antecedent_len\"] = rules[\"antecedents\"].apply(lambda x: len(x))\n",
"rules"
@@ -891,78 +271,9 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " antecedents | \n",
- " consequents | \n",
- " antecedent support | \n",
- " consequent support | \n",
- " support | \n",
- " confidence | \n",
- " lift | \n",
- " leverage | \n",
- " conviction | \n",
- " zhangs_metric | \n",
- " antecedent_len | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 3 | \n",
- " (Kidney Beans, Onion) | \n",
- " (Eggs) | \n",
- " 0.6 | \n",
- " 0.8 | \n",
- " 0.6 | \n",
- " 1.0 | \n",
- " 1.25 | \n",
- " 0.12 | \n",
- " inf | \n",
- " 0.5 | \n",
- " 2 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " antecedents consequents antecedent support consequent support \\\n",
- "3 (Kidney Beans, Onion) (Eggs) 0.6 0.8 \n",
- "\n",
- " support confidence lift leverage conviction zhangs_metric \\\n",
- "3 0.6 1.0 1.25 0.12 inf 0.5 \n",
- "\n",
- " antecedent_len \n",
- "3 2 "
- ]
- },
- "execution_count": 8,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"rules[ (rules['antecedent_len'] >= 2) &\n",
" (rules['confidence'] > 0.75) &\n",
@@ -978,78 +289,9 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " antecedents | \n",
- " consequents | \n",
- " antecedent support | \n",
- " consequent support | \n",
- " support | \n",
- " confidence | \n",
- " lift | \n",
- " leverage | \n",
- " conviction | \n",
- " zhangs_metric | \n",
- " antecedent_len | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 2 | \n",
- " (Kidney Beans, Eggs) | \n",
- " (Onion) | \n",
- " 0.8 | \n",
- " 0.6 | \n",
- " 0.6 | \n",
- " 0.75 | \n",
- " 1.25 | \n",
- " 0.12 | \n",
- " 1.6 | \n",
- " 1.0 | \n",
- " 2 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " antecedents consequents antecedent support consequent support \\\n",
- "2 (Kidney Beans, Eggs) (Onion) 0.8 0.6 \n",
- "\n",
- " support confidence lift leverage conviction zhangs_metric \\\n",
- "2 0.6 0.75 1.25 0.12 1.6 1.0 \n",
- "\n",
- " antecedent_len \n",
- "2 2 "
- ]
- },
- "execution_count": 9,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"rules[rules['antecedents'] == {'Eggs', 'Kidney Beans'}]"
]
@@ -1089,90 +331,9 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " itemsets | \n",
- " support | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " [177, 176] | \n",
- " 0.253623 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " [177, 179] | \n",
- " 0.253623 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " [176, 178] | \n",
- " 0.217391 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " [176, 179] | \n",
- " 0.217391 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " [93, 100] | \n",
- " 0.181159 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " [177, 178] | \n",
- " 0.108696 | \n",
- "
\n",
- " \n",
- " 6 | \n",
- " [177, 176, 178] | \n",
- " 0.108696 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " itemsets support\n",
- "0 [177, 176] 0.253623\n",
- "1 [177, 179] 0.253623\n",
- "2 [176, 178] 0.217391\n",
- "3 [176, 179] 0.217391\n",
- "4 [93, 100] 0.181159\n",
- "5 [177, 178] 0.108696\n",
- "6 [177, 176, 178] 0.108696"
- ]
- },
- "execution_count": 10,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"import pandas as pd\n",
"\n",
@@ -1209,328 +370,9 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " antecedents | \n",
- " consequents | \n",
- " antecedent support | \n",
- " consequent support | \n",
- " support | \n",
- " confidence | \n",
- " lift | \n",
- " leverage | \n",
- " conviction | \n",
- " zhangs_metric | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " (176) | \n",
- " (177) | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.253623 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " (177) | \n",
- " (176) | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.253623 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " (179) | \n",
- " (177) | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.253623 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " (177) | \n",
- " (179) | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.253623 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " (178) | \n",
- " (176) | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.217391 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " (176) | \n",
- " (178) | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.217391 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 6 | \n",
- " (179) | \n",
- " (176) | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.217391 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 7 | \n",
- " (176) | \n",
- " (179) | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.217391 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 8 | \n",
- " (100) | \n",
- " (93) | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.181159 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 9 | \n",
- " (93) | \n",
- " (100) | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.181159 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 10 | \n",
- " (178) | \n",
- " (177) | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.108696 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 11 | \n",
- " (177) | \n",
- " (178) | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.108696 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 12 | \n",
- " (178, 176) | \n",
- " (177) | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.108696 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 13 | \n",
- " (178, 177) | \n",
- " (176) | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.108696 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 14 | \n",
- " (177, 176) | \n",
- " (178) | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.108696 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 15 | \n",
- " (178) | \n",
- " (177, 176) | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.108696 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 16 | \n",
- " (176) | \n",
- " (178, 177) | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.108696 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 17 | \n",
- " (177) | \n",
- " (178, 176) | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.108696 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " antecedents consequents antecedent support consequent support support \\\n",
- "0 (176) (177) NaN NaN 0.253623 \n",
- "1 (177) (176) NaN NaN 0.253623 \n",
- "2 (179) (177) NaN NaN 0.253623 \n",
- "3 (177) (179) NaN NaN 0.253623 \n",
- "4 (178) (176) NaN NaN 0.217391 \n",
- "5 (176) (178) NaN NaN 0.217391 \n",
- "6 (179) (176) NaN NaN 0.217391 \n",
- "7 (176) (179) NaN NaN 0.217391 \n",
- "8 (100) (93) NaN NaN 0.181159 \n",
- "9 (93) (100) NaN NaN 0.181159 \n",
- "10 (178) (177) NaN NaN 0.108696 \n",
- "11 (177) (178) NaN NaN 0.108696 \n",
- "12 (178, 176) (177) NaN NaN 0.108696 \n",
- "13 (178, 177) (176) NaN NaN 0.108696 \n",
- "14 (177, 176) (178) NaN NaN 0.108696 \n",
- "15 (178) (177, 176) NaN NaN 0.108696 \n",
- "16 (176) (178, 177) NaN NaN 0.108696 \n",
- "17 (177) (178, 176) NaN NaN 0.108696 \n",
- "\n",
- " confidence lift leverage conviction zhangs_metric \n",
- "0 NaN NaN NaN NaN NaN \n",
- "1 NaN NaN NaN NaN NaN \n",
- "2 NaN NaN NaN NaN NaN \n",
- "3 NaN NaN NaN NaN NaN \n",
- "4 NaN NaN NaN NaN NaN \n",
- "5 NaN NaN NaN NaN NaN \n",
- "6 NaN NaN NaN NaN NaN \n",
- "7 NaN NaN NaN NaN NaN \n",
- "8 NaN NaN NaN NaN NaN \n",
- "9 NaN NaN NaN NaN NaN \n",
- "10 NaN NaN NaN NaN NaN \n",
- "11 NaN NaN NaN NaN NaN \n",
- "12 NaN NaN NaN NaN NaN \n",
- "13 NaN NaN NaN NaN NaN \n",
- "14 NaN NaN NaN NaN NaN \n",
- "15 NaN NaN NaN NaN NaN \n",
- "16 NaN NaN NaN NaN NaN \n",
- "17 NaN NaN NaN NaN NaN "
- ]
- },
- "execution_count": 11,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"from mlxtend.frequent_patterns import association_rules\n",
"\n",
@@ -1547,175 +389,9 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " antecedents | \n",
- " consequents | \n",
- " support | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " (176) | \n",
- " (177) | \n",
- " 0.253623 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " (177) | \n",
- " (176) | \n",
- " 0.253623 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " (179) | \n",
- " (177) | \n",
- " 0.253623 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " (177) | \n",
- " (179) | \n",
- " 0.253623 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " (178) | \n",
- " (176) | \n",
- " 0.217391 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " (176) | \n",
- " (178) | \n",
- " 0.217391 | \n",
- "
\n",
- " \n",
- " 6 | \n",
- " (179) | \n",
- " (176) | \n",
- " 0.217391 | \n",
- "
\n",
- " \n",
- " 7 | \n",
- " (176) | \n",
- " (179) | \n",
- " 0.217391 | \n",
- "
\n",
- " \n",
- " 8 | \n",
- " (100) | \n",
- " (93) | \n",
- " 0.181159 | \n",
- "
\n",
- " \n",
- " 9 | \n",
- " (93) | \n",
- " (100) | \n",
- " 0.181159 | \n",
- "
\n",
- " \n",
- " 10 | \n",
- " (178) | \n",
- " (177) | \n",
- " 0.108696 | \n",
- "
\n",
- " \n",
- " 11 | \n",
- " (177) | \n",
- " (178) | \n",
- " 0.108696 | \n",
- "
\n",
- " \n",
- " 12 | \n",
- " (178, 176) | \n",
- " (177) | \n",
- " 0.108696 | \n",
- "
\n",
- " \n",
- " 13 | \n",
- " (178, 177) | \n",
- " (176) | \n",
- " 0.108696 | \n",
- "
\n",
- " \n",
- " 14 | \n",
- " (177, 176) | \n",
- " (178) | \n",
- " 0.108696 | \n",
- "
\n",
- " \n",
- " 15 | \n",
- " (178) | \n",
- " (177, 176) | \n",
- " 0.108696 | \n",
- "
\n",
- " \n",
- " 16 | \n",
- " (176) | \n",
- " (178, 177) | \n",
- " 0.108696 | \n",
- "
\n",
- " \n",
- " 17 | \n",
- " (177) | \n",
- " (178, 176) | \n",
- " 0.108696 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " antecedents consequents support\n",
- "0 (176) (177) 0.253623\n",
- "1 (177) (176) 0.253623\n",
- "2 (179) (177) 0.253623\n",
- "3 (177) (179) 0.253623\n",
- "4 (178) (176) 0.217391\n",
- "5 (176) (178) 0.217391\n",
- "6 (179) (176) 0.217391\n",
- "7 (176) (179) 0.217391\n",
- "8 (100) (93) 0.181159\n",
- "9 (93) (100) 0.181159\n",
- "10 (178) (177) 0.108696\n",
- "11 (177) (178) 0.108696\n",
- "12 (178, 176) (177) 0.108696\n",
- "13 (178, 177) (176) 0.108696\n",
- "14 (177, 176) (178) 0.108696\n",
- "15 (178) (177, 176) 0.108696\n",
- "16 (176) (178, 177) 0.108696\n",
- "17 (177) (178, 176) 0.108696"
- ]
- },
- "execution_count": 12,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"res = res[['antecedents', 'consequents', 'support']]\n",
"res"
@@ -1738,156 +414,9 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " antecedents | \n",
- " consequents | \n",
- " antecedent support | \n",
- " consequent support | \n",
- " support | \n",
- " confidence | \n",
- " lift | \n",
- " leverage | \n",
- " conviction | \n",
- " zhangs_metric | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " (Eggs) | \n",
- " (Onion) | \n",
- " 0.8 | \n",
- " 0.6 | \n",
- " 0.6 | \n",
- " 0.75 | \n",
- " 1.25 | \n",
- " 0.12 | \n",
- " 1.6 | \n",
- " 1.0 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " (Onion) | \n",
- " (Eggs) | \n",
- " 0.6 | \n",
- " 0.8 | \n",
- " 0.6 | \n",
- " 1.00 | \n",
- " 1.25 | \n",
- " 0.12 | \n",
- " inf | \n",
- " 0.5 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " (Kidney Beans, Eggs) | \n",
- " (Onion) | \n",
- " 0.8 | \n",
- " 0.6 | \n",
- " 0.6 | \n",
- " 0.75 | \n",
- " 1.25 | \n",
- " 0.12 | \n",
- " 1.6 | \n",
- " 1.0 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " (Kidney Beans, Onion) | \n",
- " (Eggs) | \n",
- " 0.6 | \n",
- " 0.8 | \n",
- " 0.6 | \n",
- " 1.00 | \n",
- " 1.25 | \n",
- " 0.12 | \n",
- " inf | \n",
- " 0.5 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " (Eggs) | \n",
- " (Kidney Beans, Onion) | \n",
- " 0.8 | \n",
- " 0.6 | \n",
- " 0.6 | \n",
- " 0.75 | \n",
- " 1.25 | \n",
- " 0.12 | \n",
- " 1.6 | \n",
- " 1.0 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " (Onion) | \n",
- " (Kidney Beans, Eggs) | \n",
- " 0.6 | \n",
- " 0.8 | \n",
- " 0.6 | \n",
- " 1.00 | \n",
- " 1.25 | \n",
- " 0.12 | \n",
- " inf | \n",
- " 0.5 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " antecedents consequents antecedent support \\\n",
- "0 (Eggs) (Onion) 0.8 \n",
- "1 (Onion) (Eggs) 0.6 \n",
- "2 (Kidney Beans, Eggs) (Onion) 0.8 \n",
- "3 (Kidney Beans, Onion) (Eggs) 0.6 \n",
- "4 (Eggs) (Kidney Beans, Onion) 0.8 \n",
- "5 (Onion) (Kidney Beans, Eggs) 0.6 \n",
- "\n",
- " consequent support support confidence lift leverage conviction \\\n",
- "0 0.6 0.6 0.75 1.25 0.12 1.6 \n",
- "1 0.8 0.6 1.00 1.25 0.12 inf \n",
- "2 0.6 0.6 0.75 1.25 0.12 1.6 \n",
- "3 0.8 0.6 1.00 1.25 0.12 inf \n",
- "4 0.6 0.6 0.75 1.25 0.12 1.6 \n",
- "5 0.8 0.6 1.00 1.25 0.12 inf \n",
- "\n",
- " zhangs_metric \n",
- "0 1.0 \n",
- "1 0.5 \n",
- "2 1.0 \n",
- "3 0.5 \n",
- "4 1.0 \n",
- "5 0.5 "
- ]
- },
- "execution_count": 13,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"import pandas as pd\n",
"from mlxtend.preprocessing import TransactionEncoder\n",
@@ -1919,140 +448,9 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " antecedents | \n",
- " consequents | \n",
- " antecedent support | \n",
- " consequent support | \n",
- " support | \n",
- " confidence | \n",
- " lift | \n",
- " leverage | \n",
- " conviction | \n",
- " zhangs_metric | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " (Eggs) | \n",
- " (Onion) | \n",
- " 0.8 | \n",
- " 0.6 | \n",
- " 0.6 | \n",
- " 0.75 | \n",
- " 1.25 | \n",
- " 0.12 | \n",
- " 1.6 | \n",
- " 1.0 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " (Onion) | \n",
- " (Eggs) | \n",
- " 0.6 | \n",
- " 0.8 | \n",
- " 0.6 | \n",
- " 1.00 | \n",
- " 1.25 | \n",
- " 0.12 | \n",
- " inf | \n",
- " 0.5 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " (Kidney Beans, Eggs) | \n",
- " (Onion) | \n",
- " 0.8 | \n",
- " 0.6 | \n",
- " 0.6 | \n",
- " 0.75 | \n",
- " 1.25 | \n",
- " 0.12 | \n",
- " 1.6 | \n",
- " 1.0 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " (Eggs) | \n",
- " (Kidney Beans, Onion) | \n",
- " 0.8 | \n",
- " 0.6 | \n",
- " 0.6 | \n",
- " 0.75 | \n",
- " 1.25 | \n",
- " 0.12 | \n",
- " 1.6 | \n",
- " 1.0 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " (Onion) | \n",
- " (Kidney Beans, Eggs) | \n",
- " 0.6 | \n",
- " 0.8 | \n",
- " 0.6 | \n",
- " 1.00 | \n",
- " 1.25 | \n",
- " 0.12 | \n",
- " inf | \n",
- " 0.5 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " antecedents consequents antecedent support \\\n",
- "0 (Eggs) (Onion) 0.8 \n",
- "1 (Onion) (Eggs) 0.6 \n",
- "2 (Kidney Beans, Eggs) (Onion) 0.8 \n",
- "4 (Eggs) (Kidney Beans, Onion) 0.8 \n",
- "5 (Onion) (Kidney Beans, Eggs) 0.6 \n",
- "\n",
- " consequent support support confidence lift leverage conviction \\\n",
- "0 0.6 0.6 0.75 1.25 0.12 1.6 \n",
- "1 0.8 0.6 1.00 1.25 0.12 inf \n",
- "2 0.6 0.6 0.75 1.25 0.12 1.6 \n",
- "4 0.6 0.6 0.75 1.25 0.12 1.6 \n",
- "5 0.8 0.6 1.00 1.25 0.12 inf \n",
- "\n",
- " zhangs_metric \n",
- "0 1.0 \n",
- "1 0.5 \n",
- "2 1.0 \n",
- "4 1.0 \n",
- "5 0.5 "
- ]
- },
- "execution_count": 14,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"antecedent_sele = rules['antecedents'] == frozenset({'Onion', 'Kidney Beans'}) # or frozenset({'Kidney Beans', 'Onion'})\n",
"consequent_sele = rules['consequents'] == frozenset({'Eggs'})\n",
@@ -2096,7 +494,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.10.6"
+ "version": "3.11.9"
},
"toc": {
"nav_menu": {},
diff --git a/mlxtend/frequent_patterns/association_rules.py b/mlxtend/frequent_patterns/association_rules.py
index bcd243482..16cba9b3d 100644
--- a/mlxtend/frequent_patterns/association_rules.py
+++ b/mlxtend/frequent_patterns/association_rules.py
@@ -13,8 +13,28 @@
import numpy as np
import pandas as pd
-
-def association_rules(df, metric="confidence", min_threshold=0.8, support_only=False):
+_metrics = [
+ "antecedent support",
+ "consequent support",
+ "support",
+ "confidence",
+ "lift",
+ "leverage",
+ "conviction",
+ "zhangs_metric",
+ "jaccard",
+ "certainty",
+ "kulczynski",
+]
+
+
+def association_rules(
+ df: pd.DataFrame,
+ metric="confidence",
+ min_threshold=0.8,
+ support_only=False,
+ return_metrics: list = _metrics,
+) -> pd.DataFrame:
"""Generates a DataFrame of association rules including the
metrics 'score', 'confidence', and 'lift'
@@ -91,6 +111,12 @@ def association_rules(df, metric="confidence", min_threshold=0.8, support_only=F
columns 'support' and 'itemsets'"
)
+ def kulczynski_helper(sAC, sA, sC):
+ conf_AC = sAC / sA
+ conf_CA = sAC / sC
+ kulczynski = (conf_AC + conf_CA) / 2
+ return kulczynski
+
def conviction_helper(sAC, sA, sC):
confidence = sAC / sA
conviction = np.empty(confidence.shape, dtype=float)
@@ -117,6 +143,20 @@ def zhangs_metric_helper(sAC, sA, sC):
return zhangs_metric
+ def jaccard_metric_helper(sAC, sA, sC):
+ numerator = metric_dict["support"](sAC, sA, sC)
+ denominator = sA + sC - numerator
+
+ jaccard_metric = numerator / denominator
+ return jaccard_metric
+
+ def certainty_metric_helper(sAC, sA, sC):
+ certainty_num = metric_dict["confidence"](sAC, sA, sC) - sC
+ certainty_denom = 1 - sC
+
+ cert_metric = np.where(certainty_denom == 0, 0, certainty_num / certainty_denom)
+ return cert_metric
+
# metrics for association rules
metric_dict = {
"antecedent support": lambda _, sA, __: sA,
@@ -127,19 +167,11 @@ def zhangs_metric_helper(sAC, sA, sC):
"leverage": lambda sAC, sA, sC: metric_dict["support"](sAC, sA, sC) - sA * sC,
"conviction": lambda sAC, sA, sC: conviction_helper(sAC, sA, sC),
"zhangs_metric": lambda sAC, sA, sC: zhangs_metric_helper(sAC, sA, sC),
+ "jaccard": lambda sAC, sA, sC: jaccard_metric_helper(sAC, sA, sC),
+ "certainty": lambda sAC, sA, sC: certainty_metric_helper(sAC, sA, sC),
+ "kulczynski": lambda sAC, sA, sC: kulczynski_helper(sAC, sA, sC),
}
- columns_ordered = [
- "antecedent support",
- "consequent support",
- "support",
- "confidence",
- "lift",
- "leverage",
- "conviction",
- "zhangs_metric",
- ]
-
# check for metric compliance
if support_only:
metric = "support"
@@ -200,7 +232,7 @@ def zhangs_metric_helper(sAC, sA, sC):
# check if frequent rule was generated
if not rule_supports:
- return pd.DataFrame(columns=["antecedents", "consequents"] + columns_ordered)
+ return pd.DataFrame(columns=["antecedents", "consequents"] + return_metrics)
else:
# generate metrics
@@ -212,7 +244,7 @@ def zhangs_metric_helper(sAC, sA, sC):
if support_only:
sAC = rule_supports[0]
- for m in columns_ordered:
+ for m in return_metrics:
df_res[m] = np.nan
df_res["support"] = sAC
@@ -220,7 +252,7 @@ def zhangs_metric_helper(sAC, sA, sC):
sAC = rule_supports[0]
sA = rule_supports[1]
sC = rule_supports[2]
- for m in columns_ordered:
+ for m in return_metrics:
df_res[m] = metric_dict[m](sAC, sA, sC)
return df_res
diff --git a/mlxtend/frequent_patterns/tests/test_association_rules.py b/mlxtend/frequent_patterns/tests/test_association_rules.py
index c2850924b..1035183c9 100644
--- a/mlxtend/frequent_patterns/tests/test_association_rules.py
+++ b/mlxtend/frequent_patterns/tests/test_association_rules.py
@@ -46,9 +46,13 @@
"leverage",
"conviction",
"zhangs_metric",
+ "jaccard",
+ "certainty",
+ "kulczynski",
]
+# fmt: off
def test_default():
res_df = association_rules(df_freq_items)
res_df["antecedents"] = res_df["antecedents"].apply(lambda x: str(frozenset(x)))
@@ -58,16 +62,17 @@ def test_default():
expect = pd.DataFrame(
[
- [(8,), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0],
- [(6,), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0],
- [(8, 3), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0],
- [(8, 5), (3,), 0.6, 0.8, 0.6, 1.0, 1.25, 0.12, np.inf, 0.5],
- [(8,), (3, 5), 0.6, 0.8, 0.6, 1.0, 1.25, 0.12, np.inf, 0.5],
- [(3,), (5,), 0.8, 1.0, 0.8, 1.0, 1.0, 0.0, np.inf, 0],
- [(5,), (3,), 1.0, 0.8, 0.8, 0.8, 1.0, 0.0, 1.0, 0],
- [(10,), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0],
- [(8,), (3,), 0.6, 0.8, 0.6, 1.0, 1.25, 0.12, np.inf, 0.5],
+ [(8,), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0, 0.6, 0.0, 0.8],
+ [(6,), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0, 0.6, 0.0, 0.8],
+ [(8, 3), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0, 0.6, 0.0, 0.8],
+ [(8, 5), (3,), 0.6, 0.8, 0.6, 1.0, 1.25, 0.12, np.inf, 0.5, 0.75, 1.0, 0.875],
+ [(8,), (3, 5), 0.6, 0.8, 0.6, 1.0, 1.25, 0.12, np.inf, 0.5, 0.75, 1.0, 0.875],
+ [(3,), (5,), 0.8, 1.0, 0.8, 1.0, 1.0, 0.0, np.inf, 0, 0.8, 0.0, 0.9],
+ [(5,), (3,), 1.0, 0.8, 0.8, 0.8, 1.0, 0.0, 1.0, 0.0, 0.8, 0.0, 0.9],
+ [(10,), (5,), 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, np.inf, 0, 0.6, 0.0, 0.8],
+ [(8,), (3,), 0.6, 0.8, 0.6, 1.0, 1.25, 0.12, np.inf, 0.5, 0.75, 1.0, 0.875],
],
+
columns=columns_ordered,
)
@@ -75,8 +80,8 @@ def test_default():
expect["consequents"] = expect["consequents"].apply(lambda x: str(frozenset(x)))
expect.sort_values(columns_ordered, inplace=True)
expect.reset_index(inplace=True, drop=True)
-
assert res_df.equals(expect), res_df
+# fmt: on
def test_datatypes():
@@ -130,6 +135,9 @@ def test_empty_result():
"leverage",
"conviction",
"zhangs_metric",
+ "jaccard",
+ "certainty",
+ "kulczynski",
]
)
res_df = association_rules(df_freq_items, min_threshold=2)
@@ -176,6 +184,36 @@ def test_confidence():
assert res_df.values.shape[0] == 9
+def test_jaccard():
+ res_df = association_rules(df_freq_items, min_threshold=0.7, metric="jaccard")
+ assert res_df.values.shape[0] == 8
+
+ res_df = association_rules(
+ df_freq_items_with_colnames, min_threshold=0.7, metric="jaccard"
+ )
+ assert res_df.values.shape[0] == 8
+
+
+def test_certainty():
+ res_df = association_rules(df_freq_items, metric="certainty", min_threshold=0.6)
+ assert res_df.values.shape[0] == 3
+
+ res_df = association_rules(
+ df_freq_items_with_colnames, metric="certainty", min_threshold=0.6
+ )
+ assert res_df.values.shape[0] == 3
+
+
+def test_kulczynski():
+ res_df = association_rules(df_freq_items, metric="kulczynski", min_threshold=0.9)
+ assert res_df.values.shape[0] == 2
+
+ res_df = association_rules(
+ df_freq_items_with_colnames, metric="kulczynski", min_threshold=0.6
+ )
+ assert res_df.values.shape[0] == 16
+
+
def test_frozenset_selection():
res_df = association_rules(df_freq_items)