From 7e9e6e87ad90f1c2a654906a40bd19f6599341e5 Mon Sep 17 00:00:00 2001 From: JGarciaCondado Date: Thu, 21 Mar 2024 17:20:48 +0100 Subject: [PATCH] [ENH] Standardize naming of figures and prints --- src/ageml/ui.py | 90 ++++++++++++++--------------- src/ageml/utils.py | 9 +++ src/ageml/visualizer.py | 76 +++++++++++------------- tests/test_ageml/test_ui.py | 47 +++++++-------- tests/test_ageml/test_visualizer.py | 16 ++--- 5 files changed, 120 insertions(+), 118 deletions(-) diff --git a/src/ageml/ui.py b/src/ageml/ui.py index 380249d..5834240 100644 --- a/src/ageml/ui.py +++ b/src/ageml/ui.py @@ -23,7 +23,7 @@ import ageml.messages as messages from ageml.visualizer import Visualizer -from ageml.utils import create_directory, feature_extractor, significant_markers, convert, log +from ageml.utils import create_directory, feature_extractor, significant_markers, convert, log, NameTag from ageml.modelling import AgeML, Classifier from ageml.processing import find_correlations, covariate_correction @@ -153,21 +153,21 @@ def command_setup(self, dir_path): dir_path: directory path to create""" # Create directory - command_dir = os.path.join(self.dir_path, dir_path) - if os.path.exists(command_dir): - warnings.warn("Directory %s already exists files may be overwritten." % command_dir, + self.command_dir = os.path.join(self.dir_path, dir_path) + if os.path.exists(self.command_dir): + warnings.warn("Directory %s already exists files may be overwritten." % self.command_dir, category=UserWarning) else: - create_directory(command_dir) + create_directory(self.command_dir) # Create .txt log file to save results and log time - self.log_path = os.path.join(command_dir, "log.txt") + self.log_path = os.path.join(self.command_dir, "log.txt") with open(self.log_path, "a") as f: current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") f.write(current_time + "\n") # Set visualizer as command directory - self.set_visualizer(command_dir) + self.set_visualizer(self.command_dir) # Reset flags self.set_flags() @@ -637,9 +637,9 @@ def age_distribution(self, ages_dict: dict, name=""): # Select age information print("-----------------------------------") - print("Age distribution %s" % name) + print("Age distribution of %s" % name) for key, vals in ages_dict.items(): - print(key) + print("[Group: %s]" % key) print("Mean age: %.2f" % np.mean(vals)) print("Std age: %.2f" % np.std(vals)) print("Age range: [%d,%d]" % (np.min(vals), np.max(vals))) @@ -664,7 +664,7 @@ def age_distribution(self, ages_dict: dict, name=""): # Use visualiser self.visualizer.age_distribution(ages, labels, name) - def features_vs_age(self, features_dict: dict, significance: float = 0.05, name: str = ""): + def features_vs_age(self, features_dict: dict, tag, significance: float = 0.05, ): """Use visualizer to explore relationship between features and age. Parameters @@ -674,7 +674,7 @@ def features_vs_age(self, features_dict: dict, significance: float = 0.05, name: # Select data to visualize print("-----------------------------------") - print("Features by correlation with Age of Controls %s" % name) + print("Features by correlation with Age of Controls [System: %s]" % tag.system) print("significance: %.2g * -> FDR, ** -> bonferroni" % significance) # Make lists to store covariate info for each dataframe @@ -703,9 +703,9 @@ def features_vs_age(self, features_dict: dict, significance: float = 0.05, name: # Use visualizer to show results self.visualizer.features_vs_age(X_list, y_list, corr_list, order_list, - significance_list, feature_names, list(features_dict.keys()), name) + significance_list, feature_names, tag, list(features_dict.keys())) - def model_age(self, df, model, name: str = ""): + def model_age(self, df, model, tag): """Use AgeML to fit age model with data. Parameters @@ -716,11 +716,7 @@ def model_age(self, df, model, name: str = ""): # Show training pipeline print("-----------------------------------") - if name == "": - print(f"Training Age Model for all controls ({self.args.model_type})") - - else: - print(f"Training Age Model ({self.args.model_type}): {name}") + print(f"Training Age Model [Covariate:{tag.covar}, System:{tag.system}]") print(model.pipeline) # Select data to model @@ -735,8 +731,8 @@ def model_age(self, df, model, name: str = ""): # Fit model and plot results y_pred, y_corrected = model.fit_age(X, y) - self.visualizer.true_vs_pred_age(y, y_pred, name) - self.visualizer.age_bias_correction(y, y_pred, y_corrected, name) + self.visualizer.true_vs_pred_age(y, y_pred, tag) + self.visualizer.age_bias_correction(y, y_pred, y_corrected, tag) # Calculate deltas deltas = y_corrected - y @@ -753,20 +749,21 @@ def model_all(self): for covar in self.covars: for system in self.systems: - model_name = f"{covar}_{system}" + tag = NameTag(covar=covar, system=system) ageml_model = self.generate_model() self.models[covar][system], df_pred, self.betas[covar][system] = self.model_age(self.dfs['cn'][covar][system], - ageml_model, name=model_name) + ageml_model, tag=tag) df_pred = df_pred.drop(columns=['age']) df_pred.rename(columns=lambda x: f"{x}_{system}", inplace=True) self.preds['cn'][covar][system] = df_pred - def predict_age(self, df, model, beta: np.ndarray = None, model_name: str = None): + def predict_age(self, df, model, tag: NameTag, beta: np.ndarray = None,): """Use AgeML to predict age with data.""" # Show prediction pipeline print("-----------------------------------") - print(f"Predicting with Age Model ({self.args.model_type}): {model_name}") + print(f"Predicting for {tag.group}") + print(f"with Age Model [Covariate:{tag.covar}, System:{tag.system}]") print(model.pipeline) # Select data to model @@ -799,9 +796,9 @@ def predict_all(self): continue for covar in self.covars: for system in self.systems: - model_name = f"{covar}_{system}" + tag = NameTag(group=subject_type, covar=covar, system=system) df_pred = self.predict_age(self.dfs[subject_type][covar][system], self.models[covar][system], - self.betas[covar][system], model_name=model_name) + tag, self.betas[covar][system]) df_pred = df_pred.drop(columns=['age']) df_pred.rename(columns=lambda x: f"{x}_{system}", inplace=True) self.preds[subject_type][covar][system] = df_pred @@ -828,9 +825,9 @@ def save_predictions(self): # Save dataframe to csv filename = "predicted_age" + self.naming + ".csv" - df_ages.to_csv(os.path.join(self.dir_path, filename)) + df_ages.to_csv(os.path.join(self.command_dir, filename)) - def factors_vs_deltas(self, dict_ages, df_factors, group="", significance=0.05): + def factors_vs_deltas(self, dict_ages, df_factors, tag, significance=0.05): """Calculate correlations between factors and deltas. Parameters @@ -841,7 +838,7 @@ def factors_vs_deltas(self, dict_ages, df_factors, group="", significance=0.05): # Select age information print("-----------------------------------") - print("Correlations between lifestyle factors for %s" % group) + print("Correlations between lifestyle factors for %s" % tag.group) print("significance: %.2g * -> FDR, ** -> bonferroni" % significance) # Iterate over systems @@ -852,7 +849,7 @@ def factors_vs_deltas(self, dict_ages, df_factors, group="", significance=0.05): factor_names = df_factors.columns.to_list() for system, df in dict_ages.items(): - print(system) + print(f"System: {system}") # Select data to visualize deltas = df['delta_%s' % system].to_numpy() @@ -872,9 +869,9 @@ def factors_vs_deltas(self, dict_ages, df_factors, group="", significance=0.05): print("%d. %s %s: %.2f (%.2g)" % (i + 1, significant[o], factor_names[o], corr[o], p_values[o])) # Use visualizer to show bar graph - self.visualizer.factors_vs_deltas(corrs, list(dict_ages.keys()), factor_names, significants, group) + self.visualizer.factors_vs_deltas(corrs, list(dict_ages.keys()), factor_names, significants, tag) - def deltas_by_group(self, dfs, system: str = None, significance: float = 0.05): + def deltas_by_group(self, dfs, tag, significance: float = 0.05): """Calculate summary metrics of deltas by group. Parameters @@ -885,14 +882,14 @@ def deltas_by_group(self, dfs, system: str = None, significance: float = 0.05): # Select age information print("-----------------------------------") - print("Delta distribution by group %s" % system) + print("Delta distribution for System:%s" % tag.system) # Obtain deltas means and stds deltas = [] for group, df in dfs.items(): - vals = df["delta_%s" % system].to_numpy() + vals = df["delta_%s" % tag.system].to_numpy() deltas.append(vals) - print(group) + print(f"[Group: {group}]") print("Mean delta: %.2f" % np.mean(vals)) print("Std delta: %.2f" % np.std(vals)) print("Delta range: [%d, %d]" % (np.min(vals), np.max(vals))) @@ -926,9 +923,9 @@ def deltas_by_group(self, dfs, system: str = None, significance: float = 0.05): print(pval_message) # Use visualizer - self.visualizer.deltas_by_groups(deltas, labels, system) + self.visualizer.deltas_by_groups(deltas, labels, tag) - def classify(self, df1, df2, groups, system: str = None, beta: np.ndarray = None): + def classify(self, df1, df2, groups, tag, beta: np.ndarray = None): """Classify two groups based on deltas. Parameters @@ -941,7 +938,7 @@ def classify(self, df1, df2, groups, system: str = None, beta: np.ndarray = None # Classification print("-----------------------------------") - print(f"Classification between groups {groups[0]} and {groups[1]} (system: {system})") + print(f"Classification between groups {groups[0]} and {groups[1]} [System: {tag.system}]") # Select delta information delta_cols = [col for col in df1.columns if "delta" in col] @@ -976,7 +973,7 @@ def classify(self, df1, df2, groups, system: str = None, beta: np.ndarray = None print(f"{delta} = {coef:.3f} ({np.abs(coef)/max_coef:.3f})") # Visualize AUC - self.visualizer.classification_auc(y, y_pred, groups, system) + self.visualizer.classification_auc(y, y_pred, groups, tag) @log def run_wrapper(self, run): @@ -1003,12 +1000,12 @@ def run_age(self): # Use visualizer to show age distribution of controls per covariate (all systems share the age distribution) cn_ages = {covar: self.dfs['cn'][covar][self.systems[0]]['age'].to_list() for covar in self.covars} - self.age_distribution(cn_ages, name="controls" + self.naming) + self.age_distribution(cn_ages, name="Controls") # Show features vs age for controls for each system for system in self.systems: cn_features = {covar: self.dfs['cn'][covar][system] for covar in self.covars} - self.features_vs_age(cn_features, name="controls" + self.naming + "_" + system) + self.features_vs_age(cn_features, tag=NameTag(system=system)) # Model age for each system on controls self.model_all() @@ -1032,13 +1029,14 @@ def run_factor_correlation(self): # For each subject type and system run correlation analysis for subject_type in self.subject_types: + tag = NameTag(group=subject_type) dfs_systems = {} df_sub = self.df_ages.loc[self.df_clinical[subject_type]] df_factors = self.df_factors.loc[df_sub.index] for system in self.systems: df_sys = df_sub[[col for col in df_sub.columns if system in col]] dfs_systems[system] = df_sys - self.factors_vs_deltas(dfs_systems, df_factors, subject_type) + self.factors_vs_deltas(dfs_systems, df_factors, tag) def run_clinical(self): """Analyse differences between deltas in clinical groups.""" @@ -1056,12 +1054,12 @@ def run_clinical(self): # Use visualizer to show age distribution per clinical group ages = {g: dfs[g].iloc[:, 0].to_list() for g in self.subject_types} - self.age_distribution(ages, name="clinical_groups") + self.age_distribution(ages, name="Clinical Groups") # Show differences in groups per system for system in self.systems: dfs_systems = {g: dfs[g][[col for col in dfs[g].columns if system in col]] for g in self.subject_types} - self.deltas_by_group(dfs_systems, system=system) + self.deltas_by_group(dfs_systems, tag=NameTag(system=system)) def run_classification(self): """Run classification between two different clinical groups.""" @@ -1087,11 +1085,11 @@ def run_classification(self): for system in self.systems: df_group1_system = df_group1[[col for col in df_group1.columns if system in col]] df_group2_system = df_group2[[col for col in df_group2.columns if system in col]] - self.classify(df_group1_system, df_group2_system, [self.args.group1, self.args.group2], system=system) + self.classify(df_group1_system, df_group2_system, [self.args.group1, self.args.group2], tag=NameTag(system=system)) # Create a classifier for all systems if len(self.systems) > 1: - self.classify(df_group1, df_group2, [self.args.group1, self.args.group2], system="all") + self.classify(df_group1, df_group2, [self.args.group1, self.args.group2], tag=NameTag(system="all")) class CLI(Interface): diff --git a/src/ageml/utils.py b/src/ageml/utils.py index 373a3c7..54cf026 100644 --- a/src/ageml/utils.py +++ b/src/ageml/utils.py @@ -103,3 +103,12 @@ def wrapper(instance, *args, **kwargs): return result return wrapper + + +class NameTag: + """Class to create unique names for objects.""" + + def __init__(self, group="", covar="", system=""): + self.group = group + self.covar = covar + self.system = system diff --git a/src/ageml/visualizer.py b/src/ageml/visualizer.py index 733c3c9..36eeef0 100644 --- a/src/ageml/visualizer.py +++ b/src/ageml/visualizer.py @@ -16,7 +16,7 @@ from sklearn.linear_model import LinearRegression from sklearn.metrics import roc_curve, roc_auc_score -from .utils import insert_newlines, create_directory +from .utils import insert_newlines, create_directory, NameTag plt.rcParams.update({'font.size': 12}) @@ -83,11 +83,14 @@ def age_distribution(self, Ys: list, labels=None, name: str = ""): plt.xlabel("Age (years)") plt.ylabel("Count") plt.title("Age distribution") - plt.savefig(os.path.join(self.path_for_fig, "age_distribution_%s.png" % name)) + + # Save fig + filename = "age_distribution_" + name.lower().replace(" ", "_") + ".png" + plt.savefig(os.path.join(self.path_for_fig, filename)) plt.close() def features_vs_age(self, X: list, Y: list, corr: list, order: list, markers, - feature_names, labels: list = None, name: str = ""): + feature_names, tag: NameTag = None, labels: list = None): """Plot correlation between features and age. Parameters @@ -133,17 +136,15 @@ def features_vs_age(self, X: list, Y: list, corr: list, order: list, markers, title += "\n$\\rho_{%s}$: %s%.3f" % (label, markers[n][o], corr[n][o]) ax.set_title(title) ax.legend(labels) - plt.suptitle(f"Features vs. Age\n{name}", y=0.99) + plt.suptitle(f"Features vs. Age\n{tag.system}", y=0.99) plt.tight_layout() - if name == "": - filename = "features_vs_age.png" - else: - filename = f"features_vs_age_{name}.png" + # Save file + filename = f"features_vs_age_controls{'_'+tag.system if tag.system != '' else ''}.png" plt.savefig(os.path.join(self.path_for_fig, filename)) plt.close() - def true_vs_pred_age(self, y_true, y_pred, name: str = ""): + def true_vs_pred_age(self, y_true, y_pred, tag: NameTag): """Plot true age vs predicted age. Parameters @@ -157,17 +158,18 @@ def true_vs_pred_age(self, y_true, y_pred, name: str = ""): # Plot true vs predicted age plt.scatter(y_true, y_pred) plt.plot(age_range, age_range, color="k", linestyle="dashed") - plt.title(f"Chronological vs Predicted Age \n{name}") + plt.title(f"Chronological vs Predicted Age \n [Covariate: {tag.covar}, System:{tag.system}") plt.xlabel("Chronological Age") plt.ylabel("Predicted Age") - if name == "": - filename = "chronological_vs_pred_age.png" - else: - filename = f"chronological_vs_pred_age_{name}.png" + + # Save file + filename = (f"chronological_vs_pred_age" + f"{'_' + tag.covar if tag.covar != '' else ''}" + f"{'_' + tag.system if tag.system != '' else ''}.png") plt.savefig(os.path.join(self.path_for_fig, filename)) plt.close() - def age_bias_correction(self, y_true, y_pred, y_corrected, name: str = ""): + def age_bias_correction(self, y_true, y_pred, y_corrected, tag: NameTag): """Plot before and after age bias correction procedure. Parameters @@ -203,15 +205,16 @@ def age_bias_correction(self, y_true, y_pred, y_corrected, name: str = ""): plt.ylabel("Predicted Age") plt.xlabel("Chronological Age") plt.tight_layout() - if name == "": - filename = "age_bias_correction.png" - else: - filename = f"age_bias_correction_{name}.png" - plt.suptitle(f"{name}\n", y=1.00) + + # Save file + filename = (f"age_bias_correction" + f"{'_' + tag.covar if tag.covar != '' else ''}" + f"{'_' + tag.system if tag.system != '' else ''}.png") + plt.suptitle(f"[Covariate: {tag.covar}, System:{tag.system}\n", y=1.00) plt.savefig(os.path.join(self.path_for_fig, filename)) plt.close() - def factors_vs_deltas(self, corrs, groups, labels, markers, system: str = None): + def factors_vs_deltas(self, corrs, groups, labels, markers, tag: NameTag): """Plot bar graph for correlation between factors and deltas. Parameters @@ -254,18 +257,13 @@ def bargraph(ax, labels, corrs, markers, group): # Save figure fig.set_size_inches(10, 5 * len(corrs)) - if system is not None: - fig.suptitle(f"Correlation of factors with age deltas. System: {system}", y=0.99) - filename = f"factors_vs_deltas_system_{system}.png" - else: - fig.suptitle("Correlation of factors with age deltas.", y=0.99) - filename = "factors_vs_deltas.png" - + fig.suptitle(f"Correlation of factors with age deltas of {tag.group}", y=0.99) + filename = f"factors_vs_deltas{'_' + tag.group if tag.group != '' else ''}.png" plt.tight_layout() plt.savefig(os.path.join(self.path_for_fig, filename)) plt.close() - def deltas_by_groups(self, deltas, labels, system: str = None): + def deltas_by_groups(self, deltas, labels, tag: NameTag): """Plot box plot for deltas in each group. Parameters @@ -288,17 +286,14 @@ def deltas_by_groups(self, deltas, labels, system: str = None): plt.scatter(x, vals, color=self.cmap(clevel)) plt.xlabel("Gruop") plt.ylabel("Delta") - if system is None: - filename = "clinical_groups_box_plot.png" - plt.suptitle("Age Delta by clinical group.") - else: - filename = f"clinical_groups_box_plot_{system}.png" - plt.suptitle(f"Age Delta by clinical group. System: {system}", y=0.99) + # Save file + filename = f"clinical_groups_box_plot{'_' + tag.system if tag.system != '' else ''}.png" + plt.suptitle(f"Age Delta by clinical group. System: {tag.system}", y=0.99) plt.savefig(os.path.join(self.path_for_fig, filename)) plt.close() - def classification_auc(self, y, y_pred, groups, system: str = None): + def classification_auc(self, y, y_pred, groups, tag: NameTag): """Plot ROC curve. Parameters @@ -318,10 +313,9 @@ def classification_auc(self, y, y_pred, groups, system: str = None): plt.ylabel('True Positive Rate') plt.title('ROC curve %s vs %s' % (groups[0], groups[1])) plt.legend(loc="lower right") - if system is not None: - filename = f"roc_curve_{groups[0]}_vs_{groups[1]}_{system}.png" - plt.suptitle(f"System: {system}") - else: - filename = f"roc_curve_{groups[0]}_vs_{groups[1]}.png" + + # Save file + filename = f"roc_curve_{groups[0]}_vs_{groups[1]}{'_' + tag.system if tag.system != '' else ''}.png" + plt.suptitle(f"System: {tag.system}") plt.savefig(os.path.join(self.path_for_fig, filename)) plt.close() diff --git a/tests/test_ageml/test_ui.py b/tests/test_ageml/test_ui.py index e0be429..4cae463 100644 --- a/tests/test_ageml/test_ui.py +++ b/tests/test_ageml/test_ui.py @@ -418,14 +418,15 @@ def test_run_age(dummy_interface, features): svg_paths = [ os.path.join(dummy_interface.dir_path, f"model_age/figures/{fig}.png") for fig in figs ] + print(os.listdir(os.path.join(dummy_interface.dir_path, "model_age/figures"))) assert all([os.path.exists(svg_path) for svg_path in svg_paths]) # Check for the existence of the log - log_path = os.path.join(dummy_interface.dir_path, "model_age/log.txt") + log_path = os.path.join(dummy_interface.command_dir, "log.txt") assert os.path.exists(log_path) # Check for the existence of the output CSV - csv_path = os.path.join(dummy_interface.dir_path, "predicted_age.csv") + csv_path = os.path.join(dummy_interface.command_dir, "predicted_age.csv") assert os.path.exists(csv_path) # Check that the output CSV has the right columns @@ -462,7 +463,7 @@ def test_run_age_clinical(dummy_interface, features, clinical): assert os.path.exists(log_path) # Check for the existence of the output CSV - csv_path = os.path.join(dummy_interface.dir_path, "predicted_age.csv") + csv_path = os.path.join(dummy_interface.dir_path, "model_age/predicted_age.csv") assert os.path.exists(csv_path) # Check that the output CSV has the right columns @@ -490,15 +491,15 @@ def test_run_age_cov(dummy_interface, features, covariates): "age_bias_correction_m_all", "chronological_vs_pred_age_f_all", "chronological_vs_pred_age_m_all", - f"age_distribution_controls_{dummy_interface.args.covar_name}", - f"features_vs_age_controls_{dummy_interface.args.covar_name}_all"] + "age_distribution_controls", + "features_vs_age_controls_all"] # Print files in path svg_paths = [os.path.join(dummy_interface.dir_path, f"model_age/figures/{fig}.png") for fig in figs] assert all([os.path.exists(svg_path) for svg_path in svg_paths]) # Check for the existence of the output CSV csv_path = os.path.join(dummy_interface.dir_path, - f"predicted_age_{dummy_interface.args.covar_name}.csv") + f"model_age/predicted_age_{dummy_interface.args.covar_name}.csv") assert os.path.exists(csv_path) # Check that the output CSV has the right columns @@ -529,14 +530,14 @@ def test_run_age_cov_clinical(dummy_interface, features, covariates, clinical): "age_bias_correction_m_all", "chronological_vs_pred_age_f_all", "chronological_vs_pred_age_m_all", - f"age_distribution_controls_{dummy_interface.args.covar_name}", - f"features_vs_age_controls_{dummy_interface.args.covar_name}_all"] + "age_distribution_controls", + "features_vs_age_controls_all"] svg_paths = [os.path.join(dummy_interface.dir_path, f"model_age/figures/{fig}.png") for fig in figs] assert all([os.path.exists(svg_path) for svg_path in svg_paths]) # Check for the existence of the output CSV csv_path = os.path.join(dummy_interface.dir_path, - f"predicted_age_{dummy_interface.args.covar_name}.csv") + f"model_age/predicted_age_{dummy_interface.args.covar_name}.csv") assert os.path.exists(csv_path) # Check that the output CSV has the right columns @@ -559,18 +560,18 @@ def test_run_age_systems(dummy_interface, systems, features): # Systems names system_names = list(dummy_interface.dict_systems.keys()) - figs = ["age_distribution_controls_multisystem"] + figs = ["age_distribution_controls"] for system_name in system_names: figs.append(f"age_bias_correction_all_{system_name}") figs.append(f"chronological_vs_pred_age_all_{system_name}") - figs.append(f"features_vs_age_controls_multisystem_{system_name}") + figs.append(f"features_vs_age_controls_{system_name}") # Check existance of figures svg_paths = [os.path.join(dummy_interface.dir_path, f"model_age/figures/{fig}.png") for fig in figs] assert all([os.path.exists(svg_path) for svg_path in svg_paths]) # Check existence of output CSV csv_path = os.path.join(dummy_interface.dir_path, - "predicted_age_multisystem.csv") + "model_age/predicted_age_multisystem.csv") assert os.path.exists(csv_path) # Check that the output CSV has the right columns @@ -596,18 +597,18 @@ def test_run_age_systems_clinical(dummy_interface, systems, features, clinical): # Systems names system_names = list(dummy_interface.dict_systems.keys()) - figs = ["age_distribution_controls_multisystem"] + figs = ["age_distribution_controls"] for system_name in system_names: figs.append(f"age_bias_correction_all_{system_name}") figs.append(f"chronological_vs_pred_age_all_{system_name}") - figs.append(f"features_vs_age_controls_multisystem_{system_name}") + figs.append(f"features_vs_age_controls_{system_name}") # Check existance of figures svg_paths = [os.path.join(dummy_interface.dir_path, f"model_age/figures/{fig}.png") for fig in figs] assert all([os.path.exists(svg_path) for svg_path in svg_paths]) # Check existence of output CSV csv_path = os.path.join(dummy_interface.dir_path, - "predicted_age_multisystem.csv") + "model_age/predicted_age_multisystem.csv") assert os.path.exists(csv_path) # Check that the output CSV has the right columns @@ -635,20 +636,20 @@ def test_run_age_cov_and_systems(dummy_interface, systems, features, covariates) # Systems names system_names = list(dummy_interface.dict_systems.keys()) - figs = ["age_distribution_controls_gender_multisystem"] + figs = ["age_distribution_controls"] for system_name in system_names: figs.append(f"age_bias_correction_f_{system_name}") figs.append(f"age_bias_correction_m_{system_name}") figs.append(f"chronological_vs_pred_age_f_{system_name}") figs.append(f"chronological_vs_pred_age_m_{system_name}") - figs.append(f"features_vs_age_controls_gender_multisystem_{system_name}") + figs.append(f"features_vs_age_controls_{system_name}") # Check existance of figures svg_paths = [os.path.join(dummy_interface.dir_path, f"model_age/figures/{fig}.png") for fig in figs] assert all([os.path.exists(svg_path) for svg_path in svg_paths]) # Check existence of output CSV csv_path = os.path.join(dummy_interface.dir_path, - f"predicted_age_{dummy_interface.args.covar_name}_multisystem.csv") + f"model_age/predicted_age_{dummy_interface.args.covar_name}_multisystem.csv") assert os.path.exists(csv_path) # Check that the output CSV has the right columns @@ -679,20 +680,20 @@ def test_run_age_cov_and_systems_clinical(dummy_interface, systems, features, co # Systems names system_names = list(dummy_interface.dict_systems.keys()) - figs = ["age_distribution_controls_gender_multisystem"] + figs = ["age_distribution_controls"] for system_name in system_names: figs.append(f"age_bias_correction_f_{system_name}") figs.append(f"age_bias_correction_m_{system_name}") figs.append(f"chronological_vs_pred_age_f_{system_name}") figs.append(f"chronological_vs_pred_age_m_{system_name}") - figs.append(f"features_vs_age_controls_gender_multisystem_{system_name}") + figs.append(f"features_vs_age_controls_{system_name}") # Check existance of figures svg_paths = [os.path.join(dummy_interface.dir_path, f"model_age/figures/{fig}.png") for fig in figs] assert all([os.path.exists(svg_path) for svg_path in svg_paths]) # Check existence of output CSV csv_path = os.path.join(dummy_interface.dir_path, - f"predicted_age_{dummy_interface.args.covar_name}_multisystem.csv") + f"model_age/predicted_age_{dummy_interface.args.covar_name}_multisystem.csv") assert os.path.exists(csv_path) # Check that the output CSV has the right columns @@ -712,7 +713,7 @@ def test_run_factor_correlation(dummy_interface, ages, factors): assert os.path.exists(dummy_interface.dir_path) # Check for the existence of the output figures - figs = ["factors_vs_deltas_system_cn"] + figs = ["factors_vs_deltas_cn"] svg_paths = [ os.path.join(dummy_interface.dir_path, f"factor_correlation/figures/{fig}.png") for fig in figs ] @@ -736,7 +737,7 @@ def test_run_factor_correlation_systems(dummy_interface, ages_multisystem, facto # Check for the existence of the output figures figs = [] - figs.append("factors_vs_deltas_system_cn") + figs.append("factors_vs_deltas_cn") svg_paths = [ os.path.join(dummy_interface.dir_path, f"factor_correlation/figures/{fig}.png") for fig in figs ] diff --git a/tests/test_ageml/test_visualizer.py b/tests/test_ageml/test_visualizer.py index e7b9501..f34ca12 100644 --- a/tests/test_ageml/test_visualizer.py +++ b/tests/test_ageml/test_visualizer.py @@ -7,7 +7,7 @@ import ageml.modelling as modelling import ageml.ui as ui import ageml.utils as utils -from ageml.utils import significant_markers +from ageml.utils import significant_markers, NameTag import ageml.visualizer as viz from ageml.datasets import SyntheticData from .test_modelling import AgeMLTest @@ -61,11 +61,11 @@ def test_features_vs_age(dummy_viz, np_test_data): reject_bon, _, _, _ = multipletests(p_values, alpha=0.05, method='bonferroni') reject_fdr, _, _, _ = multipletests(p_values, alpha=0.05, method='fdr_bh') significant = significant_markers(reject_bon, reject_fdr) - dummy_viz.features_vs_age([X], [Y], [corr], [order], [significant], ["X1", "X2", "X3"], + dummy_viz.features_vs_age([X], [Y], [corr], [order], [significant], ["X1", "X2", "X3"], tag=NameTag(), labels=["all"]) # Check file existence - svg_path = os.path.join(dummy_viz.dir, "figures/features_vs_age.png") + svg_path = os.path.join(dummy_viz.dir, "figures/features_vs_age_controls.png") assert os.path.exists(svg_path) # Cleanup shutil.rmtree(os.path.dirname(svg_path)) @@ -77,7 +77,7 @@ def test_true_vs_pred_age(dummy_viz, np_test_data, dummy_ml): Y = np_test_data[:, -1] # Fit Age Y_pred, _ = dummy_ml.fit_age(X, Y) - dummy_viz.true_vs_pred_age(Y, Y_pred) + dummy_viz.true_vs_pred_age(Y, Y_pred, tag=NameTag()) # Check file existence svg_path = os.path.join(dummy_viz.dir, "figures/chronological_vs_pred_age.png") assert os.path.exists(svg_path) @@ -91,7 +91,7 @@ def test_age_bias_correction(dummy_viz, np_test_data, dummy_ml): Y = np_test_data[:, -1] # Fit Age Y_pred, Y_corrected = dummy_ml.fit_age(X, Y) - dummy_viz.age_bias_correction(Y, Y_pred, Y_corrected) + dummy_viz.age_bias_correction(Y, Y_pred, Y_corrected, tag=NameTag()) # Check file existence svg_path = os.path.join(dummy_viz.dir, "figures/age_bias_correction.png") assert os.path.exists(svg_path) @@ -106,7 +106,7 @@ def test_factors_vs_deltas(dummy_viz): labels = ["factor1", "factor2", "factor3", "factor4", "factor5"] markers = [['', '*', '', '*', '**']] # Plot - dummy_viz.factors_vs_deltas(corrs, groups, labels, markers) + dummy_viz.factors_vs_deltas(corrs, groups, labels, markers, tag=NameTag()) # Check file existence svg_path = os.path.join(dummy_viz.dir, "figures/factors_vs_deltas.png") assert os.path.exists(svg_path) @@ -125,7 +125,7 @@ def test_deltas_by_groups(dummy_viz, np_test_data, dummy_ml): # Create dummy labels labels = ["Group 1"] # Plot - dummy_viz.deltas_by_groups([deltas], labels) + dummy_viz.deltas_by_groups([deltas], labels, tag=NameTag()) # Check file existence svg_path = os.path.join(dummy_viz.dir, "figures/clinical_groups_box_plot.png") assert os.path.exists(svg_path) @@ -139,7 +139,7 @@ def test_classification_auc(dummy_viz): y_pred = [0.1, 0.2, 0.3, 0.4, 0.5, 0.7, 0.6, 0.8, 0.9, 0.99] groups = ["group1", "group2"] # Plot - dummy_viz.classification_auc(y, y_pred, groups) + dummy_viz.classification_auc(y, y_pred, groups, tag=NameTag()) # Check file existence svg_path = os.path.join(dummy_viz.dir, "figures/roc_curve_%s_vs_%s.png" % (groups[0], groups[1])) assert os.path.exists(svg_path)