Skip to content

Commit 891a8c2

Browse files
committed
code quality
1 parent 6e03159 commit 891a8c2

File tree

2 files changed

+122
-123
lines changed

2 files changed

+122
-123
lines changed

klib/__init__.py

+7-8
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
"""
22
Data Science Module for Python
33
==================================
4-
klib is an easy to use Python library of customized functions for cleaning and analyzing data.
4+
klib is an easy to use Python library of customized functions for cleaning and \
5+
analyzing data.
56
"""
67

78
__author__ = """Andreas Kanz"""
@@ -16,9 +17,7 @@
1617
pool_duplicate_subsets,
1718
)
1819
from .describe import cat_plot, corr_mat, corr_plot, dist_plot, missingval_plot
19-
from .preprocess import feature_selection_pipe, num_pipe, cat_pipe, train_dev_test_split
20-
21-
# __version__ = __version__
20+
from .preprocess import cat_pipe, feature_selection_pipe, num_pipe, train_dev_test_split
2221

2322
__all__ = [
2423
"clean_column_names",
@@ -39,10 +38,10 @@
3938
"__version__",
4039
]
4140

42-
# in future versions and especially with an increased number of functions, only the
41+
# In future versions and especially with an increased number of functions, only the
4342
# most frequently used functions will be imported into the namespace to be accessible
4443
# from klib.function directly. The remaining functions can be found in the respective
4544
# modules:
46-
# 'clean',
47-
# 'describe',
48-
# 'preprocess',
45+
# - klib.clean
46+
# - klib.describe
47+
# - klib.preprocess

klib/describe.py

+115-115
Original file line numberDiff line numberDiff line change
@@ -672,123 +672,123 @@ def missingval_plot(
672672
if mv_total == 0:
673673
print("No missing values found in the dataset.")
674674
return None
675-
else:
676-
# Create figure and axes
677-
fig = plt.figure(figsize=figsize)
678-
gs = fig.add_gridspec(nrows=6, ncols=6, left=0.1, wspace=0.05)
679-
ax1 = fig.add_subplot(gs[:1, :5])
680-
ax2 = fig.add_subplot(gs[1:, :5])
681-
ax3 = fig.add_subplot(gs[:1, 5:])
682-
ax4 = fig.add_subplot(gs[1:, 5:])
683-
684-
# ax1 - Barplot
685-
colors = plt.get_cmap(cmap)(mv_cols / np.max(mv_cols)) # color bars by height
686-
ax1.bar(range(len(mv_cols)), np.round((mv_cols_ratio) * 100, 2), color=colors)
687-
ax1.get_xaxis().set_visible(False)
688-
ax1.set(frame_on=False, xlim=(-0.5, len(mv_cols) - 0.5))
689-
ax1.set_ylim(0, np.max(mv_cols_ratio) * 100)
690-
ax1.grid(linestyle=":", linewidth=1)
691-
ax1.yaxis.set_major_formatter(ticker.PercentFormatter(decimals=0))
692-
ax1.tick_params(axis="y", colors="#111111", length=1)
693-
694-
# annotate values on top of the bars
695-
for rect, label in zip(ax1.patches, mv_cols):
696-
height = rect.get_height()
697-
ax1.text(
698-
0.1 + rect.get_x() + rect.get_width() / 2,
699-
height + 0.5,
700-
label,
701-
ha="center",
702-
va="bottom",
703-
rotation="90",
704-
alpha=0.5,
705-
fontsize="11",
706-
)
707675

708-
ax1.set_frame_on(True)
709-
for _, spine in ax1.spines.items():
710-
spine.set_visible(True)
711-
spine.set_color(spine_color)
712-
ax1.spines["top"].set_color(None)
713-
714-
# ax2 - Heatmap
715-
sns.heatmap(data.isna(), cbar=False, cmap="binary", ax=ax2)
716-
ax2.set_yticks(np.round(ax2.get_yticks()[0::5], -1))
717-
ax2.set_yticklabels(ax2.get_yticks())
718-
ax2.set_xticklabels(
719-
ax2.get_xticklabels(),
720-
horizontalalignment="center",
721-
fontweight="light",
722-
fontsize="12",
723-
)
724-
ax2.tick_params(length=1, colors="#111111")
725-
for _, spine in ax2.spines.items():
726-
spine.set_visible(True)
727-
spine.set_color(spine_color)
728-
729-
# ax3 - Summary
730-
fontax3 = {"color": "#111111", "weight": "normal", "size": 14}
731-
ax3.get_xaxis().set_visible(False)
732-
ax3.get_yaxis().set_visible(False)
733-
ax3.set(frame_on=False)
734-
735-
ax3.text(
736-
0.025,
737-
0.875,
738-
f"Total: {np.round(total_datapoints/1000,1)}K",
739-
transform=ax3.transAxes,
740-
fontdict=fontax3,
741-
)
742-
ax3.text(
743-
0.025,
744-
0.675,
745-
f"Missing: {np.round(mv_total/1000,1)}K",
746-
transform=ax3.transAxes,
747-
fontdict=fontax3,
748-
)
749-
ax3.text(
750-
0.025,
751-
0.475,
752-
f"Relative: {np.round(mv_total/total_datapoints*100,1)}%",
753-
transform=ax3.transAxes,
754-
fontdict=fontax3,
755-
)
756-
ax3.text(
757-
0.025,
758-
0.275,
759-
f"Max-col: {np.round(mv_cols.max()/data.shape[0]*100)}%",
760-
transform=ax3.transAxes,
761-
fontdict=fontax3,
762-
)
763-
ax3.text(
764-
0.025,
765-
0.075,
766-
f"Max-row: {np.round(mv_rows.max()/data.shape[1]*100)}%",
767-
transform=ax3.transAxes,
768-
fontdict=fontax3,
676+
# Create figure and axes
677+
fig = plt.figure(figsize=figsize)
678+
gs = fig.add_gridspec(nrows=6, ncols=6, left=0.1, wspace=0.05)
679+
ax1 = fig.add_subplot(gs[:1, :5])
680+
ax2 = fig.add_subplot(gs[1:, :5])
681+
ax3 = fig.add_subplot(gs[:1, 5:])
682+
ax4 = fig.add_subplot(gs[1:, 5:])
683+
684+
# ax1 - Barplot
685+
colors = plt.get_cmap(cmap)(mv_cols / np.max(mv_cols)) # color bars by height
686+
ax1.bar(range(len(mv_cols)), np.round((mv_cols_ratio) * 100, 2), color=colors)
687+
ax1.get_xaxis().set_visible(False)
688+
ax1.set(frame_on=False, xlim=(-0.5, len(mv_cols) - 0.5))
689+
ax1.set_ylim(0, np.max(mv_cols_ratio) * 100)
690+
ax1.grid(linestyle=":", linewidth=1)
691+
ax1.yaxis.set_major_formatter(ticker.PercentFormatter(decimals=0))
692+
ax1.tick_params(axis="y", colors="#111111", length=1)
693+
694+
# annotate values on top of the bars
695+
for rect, label in zip(ax1.patches, mv_cols):
696+
height = rect.get_height()
697+
ax1.text(
698+
0.1 + rect.get_x() + rect.get_width() / 2,
699+
height + 0.5,
700+
label,
701+
ha="center",
702+
va="bottom",
703+
rotation="90",
704+
alpha=0.5,
705+
fontsize="11",
769706
)
770707

771-
# ax4 - Scatter plot
772-
ax4.get_yaxis().set_visible(False)
773-
for _, spine in ax4.spines.items():
774-
spine.set_color(spine_color)
775-
ax4.tick_params(axis="x", colors="#111111", length=1)
776-
777-
ax4.scatter(
778-
mv_rows,
779-
range(len(mv_rows)),
780-
s=mv_rows,
781-
c=mv_rows,
782-
cmap=cmap,
783-
marker=".",
784-
vmin=1,
785-
)
786-
ax4.set_ylim((0, len(mv_rows))[::-1]) # limit and invert y-axis
787-
ax4.set_xlim(0, max(mv_rows) + 0.5)
788-
ax4.grid(linestyle=":", linewidth=1)
708+
ax1.set_frame_on(True)
709+
for _, spine in ax1.spines.items():
710+
spine.set_visible(True)
711+
spine.set_color(spine_color)
712+
ax1.spines["top"].set_color(None)
713+
714+
# ax2 - Heatmap
715+
sns.heatmap(data.isna(), cbar=False, cmap="binary", ax=ax2)
716+
ax2.set_yticks(np.round(ax2.get_yticks()[0::5], -1))
717+
ax2.set_yticklabels(ax2.get_yticks())
718+
ax2.set_xticklabels(
719+
ax2.get_xticklabels(),
720+
horizontalalignment="center",
721+
fontweight="light",
722+
fontsize="12",
723+
)
724+
ax2.tick_params(length=1, colors="#111111")
725+
for _, spine in ax2.spines.items():
726+
spine.set_visible(True)
727+
spine.set_color(spine_color)
728+
729+
# ax3 - Summary
730+
fontax3 = {"color": "#111111", "weight": "normal", "size": 14}
731+
ax3.get_xaxis().set_visible(False)
732+
ax3.get_yaxis().set_visible(False)
733+
ax3.set(frame_on=False)
734+
735+
ax3.text(
736+
0.025,
737+
0.875,
738+
f"Total: {np.round(total_datapoints/1000,1)}K",
739+
transform=ax3.transAxes,
740+
fontdict=fontax3,
741+
)
742+
ax3.text(
743+
0.025,
744+
0.675,
745+
f"Missing: {np.round(mv_total/1000,1)}K",
746+
transform=ax3.transAxes,
747+
fontdict=fontax3,
748+
)
749+
ax3.text(
750+
0.025,
751+
0.475,
752+
f"Relative: {np.round(mv_total/total_datapoints*100,1)}%",
753+
transform=ax3.transAxes,
754+
fontdict=fontax3,
755+
)
756+
ax3.text(
757+
0.025,
758+
0.275,
759+
f"Max-col: {np.round(mv_cols.max()/data.shape[0]*100)}%",
760+
transform=ax3.transAxes,
761+
fontdict=fontax3,
762+
)
763+
ax3.text(
764+
0.025,
765+
0.075,
766+
f"Max-row: {np.round(mv_rows.max()/data.shape[1]*100)}%",
767+
transform=ax3.transAxes,
768+
fontdict=fontax3,
769+
)
789770

790-
gs.figure.suptitle(
791-
"Missing value plot", x=0.45, y=0.94, fontsize=18, color="#111111"
792-
)
771+
# ax4 - Scatter plot
772+
ax4.get_yaxis().set_visible(False)
773+
for _, spine in ax4.spines.items():
774+
spine.set_color(spine_color)
775+
ax4.tick_params(axis="x", colors="#111111", length=1)
776+
777+
ax4.scatter(
778+
mv_rows,
779+
range(len(mv_rows)),
780+
s=mv_rows,
781+
c=mv_rows,
782+
cmap=cmap,
783+
marker=".",
784+
vmin=1,
785+
)
786+
ax4.set_ylim((0, len(mv_rows))[::-1]) # limit and invert y-axis
787+
ax4.set_xlim(0, max(mv_rows) + 0.5)
788+
ax4.grid(linestyle=":", linewidth=1)
793789

794-
return gs
790+
gs.figure.suptitle(
791+
"Missing value plot", x=0.45, y=0.94, fontsize=18, color="#111111"
792+
)
793+
794+
return gs

0 commit comments

Comments
 (0)