Skip to content

Commit abe5032

Browse files
author
Devin Lu
committed
docs(eda): added target analysis description to docstrings and eda docs
1 parent 03bc7b4 commit abe5032

File tree

3 files changed

+41
-0
lines changed

3 files changed

+41
-0
lines changed

dataprep/eda/create_diff_report/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ def create_diff_report(
3636
----------
3737
df_list
3838
The DataFrames for which data are calculated.
39+
target
40+
Target feature to be compared against all other columns.
3941
config
4042
A dictionary for configuring the visualizations
4143
E.g. config={"hist.bins": 20}

dataprep/eda/create_diff_report/diff_formatter.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,8 @@ def format_diff_report(
8484
Currently only the 'basic' is fully implemented.
8585
progress
8686
Whether to show the progress bar.
87+
target
88+
Target feature to be compared against all other columns.
8789
8890
Returns
8991
-------
@@ -122,6 +124,13 @@ def format_diff_report(
122124
def validate_target(target: str, df_list: List[pd.DataFrame]):
123125
"""
124126
Helper function, verify that target column exists
127+
128+
Parameters
129+
----------
130+
target
131+
Target feature to be compared against all other columns.
132+
df_list
133+
The Dataframe for which data are calculated.
125134
"""
126135
exists = False
127136
for df in df_list:
@@ -140,6 +149,8 @@ def format_basic(df_list: List[pd.DataFrame], target: Optional[str], cfg: Config
140149
----------
141150
df_list
142151
The DataFrames for which data are calculated.
152+
target
153+
Target feature to be compared against all other columns.
143154
cfg
144155
The config dict user passed in. E.g. config = {"hist.bins": 20}
145156
Without user's specifications, the default is "auto"
@@ -242,6 +253,8 @@ def compute_plot_data(
242253
E.g. dtype = {"a": Continuous, "b": "Nominal"} or
243254
dtype = {"a": Continuous(), "b": "nominal"}
244255
or dtype = Continuous() or dtype = "Continuous" or dtype = Continuous()
256+
target
257+
Target feature to be compared against all other columns.
245258
"""
246259
# pylint: disable=too-many-branches, too-many-locals
247260

docs/source/user_guide/eda/create_diff_report.ipynb

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,32 @@
8181
"source": [
8282
"create_diff_report({\"df_train\": df_train, \"df_test\": df_test})"
8383
]
84+
},
85+
{
86+
"cell_type": "markdown",
87+
"id": "b15f40d6",
88+
"metadata": {},
89+
"source": [
90+
"## Target Feature Analysis"
91+
]
92+
},
93+
{
94+
"cell_type": "markdown",
95+
"id": "a79fe776",
96+
"metadata": {},
97+
"source": [
98+
"Users can also pass in a target column to compare how the feature relates to all other features in both dataframes."
99+
]
100+
},
101+
{
102+
"cell_type": "code",
103+
"execution_count": null,
104+
"id": "8d6bed85",
105+
"metadata": {},
106+
"outputs": [],
107+
"source": [
108+
"create_diff_report([df_train, df_test], target='LotArea')"
109+
]
84110
}
85111
],
86112
"metadata": {

0 commit comments

Comments
 (0)