Skip to content

Commit c48990a

Browse files
committed
z test, anova, and plot with external pvalue
1 parent eedd761 commit c48990a

File tree

2 files changed

+136
-1
lines changed

2 files changed

+136
-1
lines changed
Binary file not shown.

individual_feature_and_statistics/feat_stat.py

+136-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import matplotlib.pyplot as plt
33
from scipy.stats import normaltest
44
from statannotations.Annotator import Annotator
5+
from statannotations.stats.StatTest import StatTest
56

67
def col_generator(df, cols_to_join = ['Metadata_Compound', 'Metadata_Concentration']):
78
"""
@@ -156,4 +157,138 @@ def plot_with_markers(df, feature, x, palette_boxplot=None, order_to_plot = [],
156157
annot.new_plot(g.ax, pairs_stat, data=df, x=x, y=feature, order=order_to_plot)
157158
annot.configure(test=stat_test, text_format='star', loc='inside', verbose=2).apply_test().annotate()
158159
g.set_xticklabels(new_labels, fontsize=15)
159-
plt.show()
160+
plt.show()
161+
162+
163+
def plot_with_anova(df, feature, x, palette_boxplot=None, order_to_plot = [], hue_order_boxplot = [],
164+
pairs_stat = [], show_hist=False, rotation=75,
165+
new_labels=[], ylabel="", hue_col="Metadata_Time", title_plot=None,
166+
title_legend='Time (days)',
167+
set_lim=False, xlim=None, ylim=None,
168+
col_groupby=None, category_mapping=None,
169+
perform_stat_test=True):
170+
"""
171+
172+
"""
173+
from scipy.stats import f_oneway
174+
# Required descriptors for annotate
175+
custom_long_name = 'One-way ANOVA statistical test'
176+
custom_short_name = 'One-way ANOVA'
177+
custom_func = f_oneway
178+
custom_test = StatTest(custom_func, custom_long_name, custom_short_name)
179+
180+
sns.set(font_scale=1.7)
181+
sns.set_style("dark")
182+
g=sns.catplot(data=df,
183+
x=x,
184+
y=feature,
185+
kind="box",
186+
legend=False,
187+
height=7,
188+
aspect=1.5,
189+
palette=palette_boxplot,
190+
boxprops={'alpha': 0.6},
191+
order=order_to_plot,
192+
saturation=1
193+
)
194+
195+
# Create the strip plot
196+
for category, group_data in df.groupby(col_groupby):
197+
properties = category_mapping.get(category, {"marker": "o", "color": "black"})
198+
marker = properties["marker"]
199+
color = properties["color"]
200+
plt.scatter(group_data[x], group_data[feature], label=category, marker=marker,
201+
color="none", # Set facecolor to "none" for no fill
202+
edgecolor=color, # Use the specified color for marker edges
203+
)
204+
plt.legend([],[], frameon=False)
205+
g.add_legend(title='')
206+
g.set(xlabel=None, title=title_plot, ylabel=ylabel)
207+
# Customize the appearance
208+
if set_lim:
209+
g.ax.set_ylim(xlim,ylim)
210+
plt.xticks(rotation=rotation)
211+
plt.legend([],[], frameon=False)
212+
# g.add_legend(title=title_legend)
213+
214+
if perform_stat_test:
215+
annot = Annotator(g.ax, pairs_stat,
216+
data=df, x=x, y=feature, order=order_to_plot)
217+
annot.reset_configuration()
218+
annot.new_plot(g.ax, pairs_stat, data=df, x=x, y=feature, order=order_to_plot)
219+
annot.configure(test=custom_test, text_format='star', loc='inside', verbose=2).apply_test().annotate()
220+
g.set_xticklabels(new_labels, fontsize=15)
221+
plt.show()
222+
223+
def plot_pvalue_calculated_elsewhere(df, feature, x, palette_boxplot=None, order_to_plot = [], hue_order_boxplot = [],
224+
pairs_stat = [], show_hist=False, rotation=75,
225+
new_labels=[], ylabel="", hue_col="Metadata_Time", title_plot=None,
226+
title_legend='Time (days)',
227+
set_lim=False, xlim=None, ylim=None,
228+
col_groupby=None, category_mapping=None,
229+
perform_stat_test=True,
230+
p_values=None):
231+
"""
232+
233+
"""
234+
235+
sns.set(font_scale=1.7)
236+
sns.set_style("dark")
237+
g=sns.catplot(data=df,
238+
x=x,
239+
y=feature,
240+
kind="box",
241+
legend=False,
242+
height=7,
243+
aspect=1.5,
244+
palette=palette_boxplot,
245+
boxprops={'alpha': 0.6},
246+
order=order_to_plot,
247+
saturation=1
248+
)
249+
250+
# Create the strip plot
251+
for category, group_data in df.groupby(col_groupby):
252+
properties = category_mapping.get(category, {"marker": "o", "color": "black"})
253+
marker = properties["marker"]
254+
color = properties["color"]
255+
plt.scatter(group_data[x], group_data[feature], label=category, marker=marker,
256+
color="none", # Set facecolor to "none" for no fill
257+
edgecolor=color, # Use the specified color for marker edges
258+
)
259+
plt.legend([],[], frameon=False)
260+
g.add_legend(title='')
261+
g.set(xlabel=None, title=title_plot, ylabel=ylabel)
262+
# Customize the appearance
263+
if set_lim:
264+
g.ax.set_ylim(xlim,ylim)
265+
plt.xticks(rotation=rotation)
266+
plt.legend([],[], frameon=False)
267+
# g.add_legend(title=title_legend)
268+
269+
if perform_stat_test:
270+
annot = Annotator(g.ax, pairs_stat,
271+
data=df, x=x, y=feature, order=order_to_plot)
272+
annot.reset_configuration()
273+
annot.new_plot(g.ax, pairs_stat, data=df, x=x, y=feature, order=order_to_plot)
274+
annot.configure(text_format="star", loc="inside")
275+
annot.set_pvalues_and_annotate(p_values)
276+
g.set_xticklabels(new_labels, fontsize=15)
277+
plt.show()
278+
279+
280+
def z_test_pairs(df, feature, pairs, label_column):
281+
"""
282+
"""
283+
from statsmodels.stats.weightstats import ztest
284+
285+
pvalues_list = []
286+
for p in pairs:
287+
ztest_value, p_valor = ztest(x1=df.query(f"{label_column} in '{p[0]}'").reset_index()[feature], x2=df.query(f"{label_column} in '{p[1]}'").reset_index()[feature])
288+
if p_valor<.05:
289+
print(f'{p[0]} x {p[1]}: p-value {p_valor}, REJECT null hypothesis')
290+
else:
291+
print(f"{p[0]} x {p[1]}: p-value {p_valor}, failed to reject null hypothesis")
292+
pvalues_list.append(p_valor)
293+
294+
return pvalues_list

0 commit comments

Comments
 (0)