@@ -672,123 +672,123 @@ def missingval_plot(
672
672
if mv_total == 0 :
673
673
print ("No missing values found in the dataset." )
674
674
return None
675
- else :
676
- # Create figure and axes
677
- fig = plt .figure (figsize = figsize )
678
- gs = fig .add_gridspec (nrows = 6 , ncols = 6 , left = 0.1 , wspace = 0.05 )
679
- ax1 = fig .add_subplot (gs [:1 , :5 ])
680
- ax2 = fig .add_subplot (gs [1 :, :5 ])
681
- ax3 = fig .add_subplot (gs [:1 , 5 :])
682
- ax4 = fig .add_subplot (gs [1 :, 5 :])
683
-
684
- # ax1 - Barplot
685
- colors = plt .get_cmap (cmap )(mv_cols / np .max (mv_cols )) # color bars by height
686
- ax1 .bar (range (len (mv_cols )), np .round ((mv_cols_ratio ) * 100 , 2 ), color = colors )
687
- ax1 .get_xaxis ().set_visible (False )
688
- ax1 .set (frame_on = False , xlim = (- 0.5 , len (mv_cols ) - 0.5 ))
689
- ax1 .set_ylim (0 , np .max (mv_cols_ratio ) * 100 )
690
- ax1 .grid (linestyle = ":" , linewidth = 1 )
691
- ax1 .yaxis .set_major_formatter (ticker .PercentFormatter (decimals = 0 ))
692
- ax1 .tick_params (axis = "y" , colors = "#111111" , length = 1 )
693
-
694
- # annotate values on top of the bars
695
- for rect , label in zip (ax1 .patches , mv_cols ):
696
- height = rect .get_height ()
697
- ax1 .text (
698
- 0.1 + rect .get_x () + rect .get_width () / 2 ,
699
- height + 0.5 ,
700
- label ,
701
- ha = "center" ,
702
- va = "bottom" ,
703
- rotation = "90" ,
704
- alpha = 0.5 ,
705
- fontsize = "11" ,
706
- )
707
675
708
- ax1 .set_frame_on (True )
709
- for _ , spine in ax1 .spines .items ():
710
- spine .set_visible (True )
711
- spine .set_color (spine_color )
712
- ax1 .spines ["top" ].set_color (None )
713
-
714
- # ax2 - Heatmap
715
- sns .heatmap (data .isna (), cbar = False , cmap = "binary" , ax = ax2 )
716
- ax2 .set_yticks (np .round (ax2 .get_yticks ()[0 ::5 ], - 1 ))
717
- ax2 .set_yticklabels (ax2 .get_yticks ())
718
- ax2 .set_xticklabels (
719
- ax2 .get_xticklabels (),
720
- horizontalalignment = "center" ,
721
- fontweight = "light" ,
722
- fontsize = "12" ,
723
- )
724
- ax2 .tick_params (length = 1 , colors = "#111111" )
725
- for _ , spine in ax2 .spines .items ():
726
- spine .set_visible (True )
727
- spine .set_color (spine_color )
728
-
729
- # ax3 - Summary
730
- fontax3 = {"color" : "#111111" , "weight" : "normal" , "size" : 14 }
731
- ax3 .get_xaxis ().set_visible (False )
732
- ax3 .get_yaxis ().set_visible (False )
733
- ax3 .set (frame_on = False )
734
-
735
- ax3 .text (
736
- 0.025 ,
737
- 0.875 ,
738
- f"Total: { np .round (total_datapoints / 1000 ,1 )} K" ,
739
- transform = ax3 .transAxes ,
740
- fontdict = fontax3 ,
741
- )
742
- ax3 .text (
743
- 0.025 ,
744
- 0.675 ,
745
- f"Missing: { np .round (mv_total / 1000 ,1 )} K" ,
746
- transform = ax3 .transAxes ,
747
- fontdict = fontax3 ,
748
- )
749
- ax3 .text (
750
- 0.025 ,
751
- 0.475 ,
752
- f"Relative: { np .round (mv_total / total_datapoints * 100 ,1 )} %" ,
753
- transform = ax3 .transAxes ,
754
- fontdict = fontax3 ,
755
- )
756
- ax3 .text (
757
- 0.025 ,
758
- 0.275 ,
759
- f"Max-col: { np .round (mv_cols .max ()/ data .shape [0 ]* 100 )} %" ,
760
- transform = ax3 .transAxes ,
761
- fontdict = fontax3 ,
762
- )
763
- ax3 .text (
764
- 0.025 ,
765
- 0.075 ,
766
- f"Max-row: { np .round (mv_rows .max ()/ data .shape [1 ]* 100 )} %" ,
767
- transform = ax3 .transAxes ,
768
- fontdict = fontax3 ,
676
+ # Create figure and axes
677
+ fig = plt .figure (figsize = figsize )
678
+ gs = fig .add_gridspec (nrows = 6 , ncols = 6 , left = 0.1 , wspace = 0.05 )
679
+ ax1 = fig .add_subplot (gs [:1 , :5 ])
680
+ ax2 = fig .add_subplot (gs [1 :, :5 ])
681
+ ax3 = fig .add_subplot (gs [:1 , 5 :])
682
+ ax4 = fig .add_subplot (gs [1 :, 5 :])
683
+
684
+ # ax1 - Barplot
685
+ colors = plt .get_cmap (cmap )(mv_cols / np .max (mv_cols )) # color bars by height
686
+ ax1 .bar (range (len (mv_cols )), np .round ((mv_cols_ratio ) * 100 , 2 ), color = colors )
687
+ ax1 .get_xaxis ().set_visible (False )
688
+ ax1 .set (frame_on = False , xlim = (- 0.5 , len (mv_cols ) - 0.5 ))
689
+ ax1 .set_ylim (0 , np .max (mv_cols_ratio ) * 100 )
690
+ ax1 .grid (linestyle = ":" , linewidth = 1 )
691
+ ax1 .yaxis .set_major_formatter (ticker .PercentFormatter (decimals = 0 ))
692
+ ax1 .tick_params (axis = "y" , colors = "#111111" , length = 1 )
693
+
694
+ # annotate values on top of the bars
695
+ for rect , label in zip (ax1 .patches , mv_cols ):
696
+ height = rect .get_height ()
697
+ ax1 .text (
698
+ 0.1 + rect .get_x () + rect .get_width () / 2 ,
699
+ height + 0.5 ,
700
+ label ,
701
+ ha = "center" ,
702
+ va = "bottom" ,
703
+ rotation = "90" ,
704
+ alpha = 0.5 ,
705
+ fontsize = "11" ,
769
706
)
770
707
771
- # ax4 - Scatter plot
772
- ax4 .get_yaxis ().set_visible (False )
773
- for _ , spine in ax4 .spines .items ():
774
- spine .set_color (spine_color )
775
- ax4 .tick_params (axis = "x" , colors = "#111111" , length = 1 )
776
-
777
- ax4 .scatter (
778
- mv_rows ,
779
- range (len (mv_rows )),
780
- s = mv_rows ,
781
- c = mv_rows ,
782
- cmap = cmap ,
783
- marker = "." ,
784
- vmin = 1 ,
785
- )
786
- ax4 .set_ylim ((0 , len (mv_rows ))[::- 1 ]) # limit and invert y-axis
787
- ax4 .set_xlim (0 , max (mv_rows ) + 0.5 )
788
- ax4 .grid (linestyle = ":" , linewidth = 1 )
708
+ ax1 .set_frame_on (True )
709
+ for _ , spine in ax1 .spines .items ():
710
+ spine .set_visible (True )
711
+ spine .set_color (spine_color )
712
+ ax1 .spines ["top" ].set_color (None )
713
+
714
+ # ax2 - Heatmap
715
+ sns .heatmap (data .isna (), cbar = False , cmap = "binary" , ax = ax2 )
716
+ ax2 .set_yticks (np .round (ax2 .get_yticks ()[0 ::5 ], - 1 ))
717
+ ax2 .set_yticklabels (ax2 .get_yticks ())
718
+ ax2 .set_xticklabels (
719
+ ax2 .get_xticklabels (),
720
+ horizontalalignment = "center" ,
721
+ fontweight = "light" ,
722
+ fontsize = "12" ,
723
+ )
724
+ ax2 .tick_params (length = 1 , colors = "#111111" )
725
+ for _ , spine in ax2 .spines .items ():
726
+ spine .set_visible (True )
727
+ spine .set_color (spine_color )
728
+
729
+ # ax3 - Summary
730
+ fontax3 = {"color" : "#111111" , "weight" : "normal" , "size" : 14 }
731
+ ax3 .get_xaxis ().set_visible (False )
732
+ ax3 .get_yaxis ().set_visible (False )
733
+ ax3 .set (frame_on = False )
734
+
735
+ ax3 .text (
736
+ 0.025 ,
737
+ 0.875 ,
738
+ f"Total: { np .round (total_datapoints / 1000 ,1 )} K" ,
739
+ transform = ax3 .transAxes ,
740
+ fontdict = fontax3 ,
741
+ )
742
+ ax3 .text (
743
+ 0.025 ,
744
+ 0.675 ,
745
+ f"Missing: { np .round (mv_total / 1000 ,1 )} K" ,
746
+ transform = ax3 .transAxes ,
747
+ fontdict = fontax3 ,
748
+ )
749
+ ax3 .text (
750
+ 0.025 ,
751
+ 0.475 ,
752
+ f"Relative: { np .round (mv_total / total_datapoints * 100 ,1 )} %" ,
753
+ transform = ax3 .transAxes ,
754
+ fontdict = fontax3 ,
755
+ )
756
+ ax3 .text (
757
+ 0.025 ,
758
+ 0.275 ,
759
+ f"Max-col: { np .round (mv_cols .max ()/ data .shape [0 ]* 100 )} %" ,
760
+ transform = ax3 .transAxes ,
761
+ fontdict = fontax3 ,
762
+ )
763
+ ax3 .text (
764
+ 0.025 ,
765
+ 0.075 ,
766
+ f"Max-row: { np .round (mv_rows .max ()/ data .shape [1 ]* 100 )} %" ,
767
+ transform = ax3 .transAxes ,
768
+ fontdict = fontax3 ,
769
+ )
789
770
790
- gs .figure .suptitle (
791
- "Missing value plot" , x = 0.45 , y = 0.94 , fontsize = 18 , color = "#111111"
792
- )
771
+ # ax4 - Scatter plot
772
+ ax4 .get_yaxis ().set_visible (False )
773
+ for _ , spine in ax4 .spines .items ():
774
+ spine .set_color (spine_color )
775
+ ax4 .tick_params (axis = "x" , colors = "#111111" , length = 1 )
776
+
777
+ ax4 .scatter (
778
+ mv_rows ,
779
+ range (len (mv_rows )),
780
+ s = mv_rows ,
781
+ c = mv_rows ,
782
+ cmap = cmap ,
783
+ marker = "." ,
784
+ vmin = 1 ,
785
+ )
786
+ ax4 .set_ylim ((0 , len (mv_rows ))[::- 1 ]) # limit and invert y-axis
787
+ ax4 .set_xlim (0 , max (mv_rows ) + 0.5 )
788
+ ax4 .grid (linestyle = ":" , linewidth = 1 )
793
789
794
- return gs
790
+ gs .figure .suptitle (
791
+ "Missing value plot" , x = 0.45 , y = 0.94 , fontsize = 18 , color = "#111111"
792
+ )
793
+
794
+ return gs
0 commit comments