Skip to content

Commit 0b801fa

Browse files
author
Devin Lu
committed
refactor(eda): validated numerical target dtype, removed comments
1 parent abe5032 commit 0b801fa

File tree

3 files changed

+6
-14
lines changed

3 files changed

+6
-14
lines changed

dataprep/eda/create_diff_report/__init__.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -85,16 +85,6 @@ def create_diff_report(
8585
"legend_labels": components["legend_lables"],
8686
}
8787

88-
# {% for div in value.plots[1] %}
89-
# <div class="vp-plot">
90-
# {{ div }}
91-
# {% if key in context.components.dfs[1].variables %}
92-
# {{ context.components.dfs[1].variables[key].plots[1][loop.index0] }}
93-
# {% endif %}
94-
# </div>
95-
96-
# return context
97-
9888
template_base = ENV_LOADER.get_template("base.html")
9989
report = template_base.render(context=context, zip=zip)
10090
return Report(report)

dataprep/eda/create_diff_report/diff_formatter.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,8 @@ def compute_plot_data(
282282
col_dtype = col_dtype[0]
283283

284284
orig = [src for src, seq in labeled_cols.items() if col in seq]
285+
if col == target and not is_dtype(col_dtype, Continuous_v1()):
286+
raise ValueError("Sorry, target must be a numerical feature.")
285287

286288
if is_dtype(col_dtype, Continuous_v1()):
287289
data.append((col, Continuous_v1(), diff_cont_calcs(srs.apply("dropna"), cfg), orig))

dataprep/eda/diff/render.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ def bar_viz(
101101
("Source", "@orig"),
102102
]
103103

104+
# Used to add y-padding to the graphs
104105
col1_min = df[0][col].min()
105106
col2_min = df[1][col].min()
106107
col1_max = df[0][col].max()
@@ -159,6 +160,7 @@ def bar_viz(
159160
_format_axis(fig, 0, df[baseline].max(), "y")
160161

161162
df1, df2 = df_list[0], df_list[1]
163+
# Feature analysis here
162164
if target != col and target and col in df1.columns and col in df2.columns:
163165
col1, col2 = df_list[0][col], df_list[1][col]
164166
row_avgs_1 = []
@@ -261,7 +263,6 @@ def hist_viz(
261263
)
262264
bottom = 0 if yscale == "linear" or df.empty else counts.min() / 2
263265
if y_start is not None and y_end is not None:
264-
# fig.y_range = (y_start * (1 - y_inc), y_end * (1 + y_inc))
265266
fig.extra_y_ranges = {
266267
"Counts": Range1d(start=y_start * (1 - y_inc), end=y_end * (1 + y_inc))
267268
}
@@ -287,8 +288,6 @@ def hist_viz(
287288
fill_color=CATEGORY10[i],
288289
line_color=CATEGORY10[i],
289290
)
290-
# if col == 'LotFrontage':
291-
# breakpoint()
292291

293292
hover = HoverTool(tooltips=tooltips, attachment="vertical", mode="vline")
294293
fig.add_tools(hover)
@@ -312,6 +311,7 @@ def hist_viz(
312311
fig.xaxis.axis_label = x_axis_label
313312
fig.xaxis.axis_label_standoff = 0
314313

314+
# Feature analysis here
315315
if target and target != col and col in df1.columns and col in df2.columns:
316316
col1, col2 = df1[col], df2[col]
317317
source1, source2 = col1, col2
@@ -347,7 +347,7 @@ def hist_viz(
347347
y_range_name="Averages",
348348
line_width=4,
349349
)
350-
fig.add_layout(LinearAxis(y_range_name="Averages", axis_label="Bin Averages"), "right")
350+
fig.add_layout(LinearAxis(y_range_name="Averages"), "right")
351351
return fig
352352

353353

0 commit comments

Comments
 (0)