diff --git a/MorphoMapping/__init__.py b/MorphoMapping/__init__.py deleted file mode 100644 index 6dbfedd..0000000 --- a/MorphoMapping/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .morphomapping import MM - -__all__ = ["MM"] diff --git a/MorphoMapping/morphomapping.py b/MorphoMapping/morphomapping.py deleted file mode 100644 index 7399092..0000000 --- a/MorphoMapping/morphomapping.py +++ /dev/null @@ -1,1220 +0,0 @@ -# Author: Amelie Bauerdick -# WabnitzLab -# 2024 - - -""" -Create interactive umap and densmap visualizations. -Plot feature importance and apply several clustering algorithms after dimensionality reduction. -Class MM is based on pandas DataFrames. - -Class: - - MM - -Functions: - - convert_to_CSV(fcs_path: str, csv_path: str) - read_CSV(path: str, add_index: bool, index_name: str) -> df - get_features -> list - get_df -> df - add_metadata(label: str, value) -> df - rename_variables(label_mapping) - select_condition(condition: str, value) - select_events(event_size: int) - drop_variables(*labels) - drop_events(first_row: int, last_row: int) - save_feature(*features) -> df - concat_variables(*dataframes) -> df - save_xlsx(path: str) - save_csv(path: str) - concat_df(*new_df, join='inner') - update_column_values(column_name: str, rename_values) - minmax_norm(first_column: str =None, last_column: str =None) - quant_scaler(first_column: str =None, last_column: str =None) - umap(nn: int, mdist: float, met: str) - dmap(dlambda: float, nn: int, mdist: float, met: str) - feature_importance(dep: str, indep: str) -> df - plot_feature_importance(features, path: str, base_width: int, base_height: int) - cluster_kmeans(n_cluster: int) - cluster_gmm(number_component: int, random_s: int) - cluster_hdbscan(cluster_size: int) - check_dataframe() - prepare_data_source() - configure_hover_tooltips(feature: str, hover_tooltips: list[tuple[str, str]] = None) - create_base_plot(fig_width: int, fig_height: int, fig_title: str, label_x: str, label_y: str, range_x: list[float], range_y: list[float], tools_emb, title_align: str) - configure_axes_and_legend(plot, show_axes: bool, show_legend: bool) - cat_plot(feature: str, subs: list[str], colors: list[str], outputf: str, fig_width: int, fig_height: int, fig_title: str, label_x: str,label_y: str, range_x: list[float], range_y: list[float], - hover_tooltips: list[tuple[str, str]] = None, show_legend: bool = False, point_size: int, point_alpha: float, show_axes: bool = False, title_align: str = 'center') -> None: - lin_plot(outputf: str, feature: str, colors: list[str], fig_width: int, fig_height: int, fig_title: str, label_x: str, label_y: str, range_x: list[float], range_y: list[float], - hover_tooltips: list[tuple[str, str]] = None, show_legend: bool = False, point_size: int, point_alpha: float, show_axes: bool = True, title_align: str = 'center') -> None: - -variables: - - self.df - -""" - - - -# Import Packages - -import flowkit as fk -import pandas as pd -import numpy as np -from matplotlib import pyplot as plt -from sklearn.ensemble import RandomForestRegressor -from sklearn.model_selection import train_test_split -from sklearn.preprocessing import MinMaxScaler, QuantileTransformer -from sklearn.metrics import r2_score -import sklearn.cluster as cluster -from sklearn.mixture import GaussianMixture -import umap -import hdbscan -from bokeh.models import (HoverTool, - ColumnDataSource, - Range1d, - LinearColorMapper, - CategoricalColorMapper) -from bokeh.plotting import figure, show, output_file -import bokeh - -from typing import Optional - -bokeh_version = bokeh.__version__ -bokeh_above_three = int(bokeh_version.split(".")[0]) >= 3 - - -class MM: - - """\ - A class to create interactive dimensionality reduction plots. - Based on pandas DataFrame. - - Parameters - ---------- - df - DataFrame containing all Imaging Flow Cytometry data - - Returns - ------- - None - - """ - - - - def __init__(self): - self.df = pd.DataFrame() - - def convert_to_CSV(self, fcs_path: str, csv_path: str): - """\ - Converts fcs-file to .csv file and saves it to csv_path. - - Parameters - ---------- - fcs_path - path to fcs file - csv_path - path to csv file - - Returns - ------- - None - - """ - try: - if not csv_path: - raise ValueError("The CSV path is empty or invalid.") - - sample = fk.Sample(fcs_path) - sample.export(filename=csv_path, source='raw') - - print(f"File successfully converted to {csv_path}.") - - except FileNotFoundError: - print(f"FCS file not found: {fcs_path}") - raise - except Exception as e: - print(f"Conversion failed: {e}") - raise - - def read_CSV(self, path: str, add_index: bool =False, index_name: str ='Index'): - """\ - Load csv-file and save it as self.df - - Parameters - ---------- - path - path to csv file - add_index - add index to df - index_name - set name of index - - Returns - ---------- - DataFrame - - """ - - self.df = pd.read_csv(path) - - #rename columns - self.df.columns = ( - self.df.columns.str.strip() - .str.replace(' ', '_') - .str.replace('&', 'and') - .str.replace('+', 'plus') - .str.replace('-', 'minus') - ) - - if add_index: - self.df[index_name] = range(len(self.df)) - self.df.set_index(index_name, inplace=True) - - return self.df - - def get_features(self): - """\ - return list of self.df columns - """ - return list(self.df.columns) - - def get_df(self): - """\ - return self.df - """ - return self.df - - def add_metadata(self, label: str, value): - """\ - add column with specific value to self.df - - Parameters - ---------- - label - name of column - value - value of column - - Returns - ---------- - DataFrame - - """ - empty = self.df.empty - if empty: - raise ValueError("Dataframe is empty.") - - self.df[label] = value - return self.df - - def rename_variables(self, label_mapping): - """\ - rename column(s) with new column labels - - Parameters - ---------- - label_mapping - old and new assigned column labels - - Returns - ---------- - None - - """ - - missing_columns = [col for col in label_mapping.keys() if col not in self.df.columns] - - if missing_columns: - print(f"The following columns do not exist: {missing_columns}") - else: - self.df.rename(columns=label_mapping, inplace=True) - - def select_condition(self, condition: str, value): - """\ - select specific rows by condition and save new df as self.df - - Parameters - ---------- - condition - name of column - value - value of column - - Returns - ---------- - None - """ - - if condition not in self.df.columns: - raise ValueError(f"Column '{condition}' does not exist.") - if value not in self.df[condition].values: - raise ValueError(f"Value '{value}' does not exist in column '{condition}'.") - # select rows and save new df - self.df = self.df.loc[self.df[condition] == value] - - def select_events(self, event_size: int): - """\ - randomly select events and save as self.df - - Parameters - ---------- - event_size - number of events - - Returns - ---------- - None - """ - - if event_size > self.df.shape[0]: - raise ValueError(f"Number of events '{event_size}' is larger than the number of rows ({self.df.shape[0]}).") - else: - self.df = self.df.sample(event_size, random_state=1).copy() - self.df.sort_index(inplace=True) - - def drop_variables(self, *labels): - """\ - drop certain columns from self.df - - Parameters - ---------- - labels - name of column(s) - - Returns - ---------- - None - """ - missing_labels = [label for label in labels if label not in self.df.columns] - if missing_labels: - raise ValueError(f"Column(s) {missing_labels} do not exist.") - else: - self.df = self.df.drop(columns=list(labels)) - - def drop_events(self, first_row: int, last_row: int): - """\ - drop specific rows from self.df - - Parameters - ---------- - first_row - number of first row that should be dropped - last_row - number of last row that should be dropped - - Returns - ---------- - None - """ - if first_row not in self.df.index: - raise ValueError(f"First row '{first_row}' does not exist.") - if last_row not in self.df.index: - raise ValueError(f"Last row '{last_row}' does not exist.") - if first_row > last_row: - raise ValueError(f"First row number '{first_row}' is greater than last row number '{last_row}'.") - self.df = self.df.drop(index=self.df.index[first_row:last_row + 1]) - - def save_feature(self, *features): - """\ - save specific columns of self.df in new DataFrame and return new DataFrame - - Parameters - ---------- - features - name of features which should be returned as df - - Returns - ---------- - DataFrame - """ - labels = [feature for feature in features if feature not in self.df.columns] - if labels: - raise ValueError(f"Column(s) {labels} do not exist.") - - df = self.df[list(features)].copy() - - return df - - def concat_variables(self, *dataframes): - """\ - attach new columns as df to self.df and return resulting df - - Parameters - ---------- - dataframes - new dataframes which should be attached to self.df - - Returns - ---------- - DataFrame - """ - result_df = pd.concat([self.df] + list(dataframes), axis=1) - self.df = result_df - return result_df - - def save_xlsx(self, path: str): - """\ - save self.df as xlsx file to chosen path - - Parameters - ---------- - path - chosen path - - Returns - ---------- - None - """ - self.df.to_excel(path, index=False) - print(f"DataFrame successfully saved to {path}") - - def save_csv(self, path: str): - """\ - save self.df as csv to chosen path - - Parameters - ---------- - path - chosen path - - Returns - ---------- - None - """ - self.df.to_csv(path, index=False) - print(f"DataFrame successfully saved to {path}") - - def concat_df(self, *new_df, join='inner'): - """\ - concatenate self.df and new DataFrame(s) (joining inner by default) - - Parameters - ---------- - new_df - DataFrames that should be concatenated - join - type of joining DataFrames - - Returns - ---------- - None - """ - self.df = pd.concat([self.df, *new_df], join=join) - - def update_column_values(self, column_name: str, rename_values): - """\ - replace values in a specific column with a specific new value - - Parameters - ---------- - column_name - name of column - rename_values - new value of column - - Returns - ---------- - None - """ - - if column_name not in self.df.columns: - raise ValueError(f"Column '{column_name}' does not exist.") - - self.df[column_name] = self.df[column_name].astype(str) - - for original_value, new_value in rename_values.items(): - self.df[column_name] = self.df[column_name].str.replace(str(original_value), new_value) - - def minmax_norm(self, first_column: Optional[str] = None, last_column: str =None): - """\ - Apply MinMax normalization to self.df. - Specify whether all columns should be normalized by setting parameters. - - Parameters - ---------- - first_column - name of first column. Start of normalization. - last_column: - name of last column. End of normalization. - - Returns - ---------- - None - - """ - - #check df - if first_column is None or last_column is None: - df1 = self.df - else: - if first_column not in self.df.columns: - raise ValueError(f"First column '{first_column}' does not exist.") - if last_column not in self.df.columns: - raise ValueError(f"Last column '{last_column}' does not exist.") - - first_id = self.df.columns.get_loc(first_column) - last_id = self.df.columns.get_loc(last_column) - if first_id > last_id: - raise ValueError(f"First column '{first_column}' is after last column '{last_column}'.") - - df1 = self.df.iloc[:, first_id:last_id + 1] - - # Apply min-max normalization - df = (df1 - df1.min()) / (df1.max() - df1.min()) - - # Replace the subset - if first_column is not None and last_column is not None: - self.df.iloc[:, first_id:last_id + 1] = df - else: - self.df = df - - - def quant_scaler(self, first_column: Optional[str] = None, last_column: str =None): - """\ - Apply QuantileTransformer to self.df. - Specify whether all columns should be normalized by setting parameters. - - Parameters - ---------- - first_column - name of first column. Start of normalization. - last_column - name of last column. End of normalization. - - Returns - ---------- - None - """ - - if first_column is None or last_column is None: - df1 = self.df - else: - if first_column not in self.df.columns: - raise ValueError(f"First column '{first_column}' does not exist.") - if last_column not in self.df.columns: - raise ValueError(f"Last column '{last_column}' does not exist.") - - first_id = self.df.columns.get_loc(first_column) - last_id = self.df.columns.get_loc(last_column) - if first_id > last_id: - raise ValueError(f"First column '{first_column}' is after last column '{last_column}'.") - - df1 = self.df.iloc[:, first_id:last_id + 1] - - # Apply Quantile Transformation - scaler = QuantileTransformer(output_distribution='uniform') - df2 = pd.DataFrame(scaler.fit_transform(df1), - columns=df1.columns, - index=df1.index) - - if first_column is not None and last_column is not None: - self.df.iloc[:, first_id:last_id + 1] = df2 - else: - self.df = df2 - - - def umap(self, nn: int, mdist: float, met: str): - """\ - Run umap with self.df. Adds x and y values to self.df as extra columns. - - Parameters - ---------- - nn - nearest neighbours for umap settings - mdist - minimum distance for umap settings - met - metric for umap settings - - Returns - ---------- - None - - """ - reducer = umap.UMAP( - n_neighbors=nn, - min_dist=mdist, - metric=met - ) - - embedding = reducer.fit_transform(self.df) - - x = embedding[:, 0] - y = embedding[:, 1] - - self.df['x'] = x - self.df['y'] = y - - def dmap(self, dlambda: float, nn: int, mdist: float, met: str): - """\ - Run dmap with self.df. Adds x and y values to self.df as extra columns. - - Parameters - ---------- - dlambda - denslambda for densmap settings - nn - nearest neighbours for densmap settings - mdist - minimum distance for densmap settings - met - metric for densmap settings - - Returns - ---------- - None - """ - - reducer = umap.UMAP( - densmap=True, - dens_lambda=dlambda, - n_neighbors=nn, - min_dist=mdist, - metric=met - ) - - embedding = reducer.fit_transform(self.df) - x = embedding[:, 0] - y = embedding[:, 1] - - self.df['x'] = x - self.df['y'] = y - - def feature_importance(self, dep: str, indep: str): - """\ - Calculates feature importance of columns in self.df (especially for x and y after dmap/umap were run). - Returns DataFrame with the 10 most important features and their according importance values. - - Parameters - ---------- - dep - name of column which should represent the dependent variable - indep - name of column which should represent the independent variable - - Returns - ---------- - DataFrame - - """ - - data = self.df.copy() - data = data.drop(indep, axis=1) - - #split data - train_df, test_df = train_test_split(data, test_size=0.2, random_state=42) - - X_train = train_df.drop(dep, axis=1) - y_train = train_df[dep] - X_test = test_df.drop(dep, axis=1) - y_test = test_df[dep] - - print("length of data for training:", len(X_train)) - print("length of data for testing:", len(X_test)) - - # run RandomForestRegressor - model = RandomForestRegressor(n_estimators=100, random_state=42) - model.fit(X_train, y_train) - - # predict dependent variable - y_pred = model.predict(X_test) - - # r²-value calculation - r2 = r2_score(y_test, y_pred) - print("r² Score:", r2) - - # Feature Importance - importance = model.feature_importances_ - - # sort features according to importance - s_id = np.argsort(importance) - pos = np.arange(s_id.shape[0]) - - # MinMax scaling - scaler = MinMaxScaler() - importance_scaled = scaler.fit_transform(importance.reshape(-1, 1)).flatten() - - # importance - total_importance = np.sum(importance_scaled) - percentage_importance = (importance_scaled / total_importance) * 100 - - # show top ten - top_n = 10 - s_id = s_id[-top_n:] - features = pd.DataFrame( - {'index1': np.array(X_train.columns)[s_id], 'importance_normalized': importance_scaled[s_id], - 'percentage_importance': percentage_importance[s_id]}) - return features - - - def plot_feature_importance(self, features, path: str, base_width: int =10, base_height: int =6): - """\ - Plots the ten most important features and returns a pyplot. - Needs the ten top features and their importances as parameters. - - Parameters - ---------- - features - DataFrame returned by function feature_importance - path - path to dict where plot should be saved - base_width - width of plot - base_height - height of plot - - Returns - ---------- - Show plot - - """ - - num_features = len(features) - - plot_width = base_width - plot_height = base_height + 0.2 * num_features - - ax = features.plot.bar(x='index1', - y='importance_normalized', - color='darkgray', - legend=False, - figsize=(plot_width, plot_height), - width=0.8, fontsize=20) - - plt.xlabel('') - plt.ylabel('Importance', fontsize=20) - - #adjust text height - for i, v in enumerate(features['percentage_importance']): - if features['importance_normalized'][i] + 0.01 > 1.1: - text_height = 1 - else: - text_height = features['importance_normalized'][i] + 0.01 - ax.text(i, text_height, f'{v:.1f}%', ha='center', va='bottom', fontsize=16, color='black') - - plt.title('Top 10 Features', fontsize=30, loc='left') - plt.ylim(0, 1.1) - plt.xticks(rotation=45, ha='right') - - if path is not None: - try: - plt.savefig(path, dpi=300, bbox_inches='tight') - print(f"Plot successfully saved to {path}") - except Exception as e: - print(f"An error occurred while saving to png: {e}") - - return plt.show() - - def cluster_kmeans(self, n_cluster: int, label_x: str, label_y: str): - """\ - Cluster self.df by kmeans clustering and show result as plt.show(). - - Parameters - ---------- - number_cluster - number of clusters - label_x - x-axis label - label_y - y-axis label - - Returns - ---------- - Show plot - """ - - kmeans_labels = cluster.KMeans(n_clusters=n_cluster).fit_predict(self.df) - - #plot - plt.style.use('seaborn-v0_8-poster') - plt.figure(figsize=(6, 6)) - - plt.scatter(self.df[['x']], - self.df[['y']], - c=kmeans_labels, - s=1, - cmap='Set1'); - - plt.title('K-Means Clustering') - plt.xlabel(label_x) - plt.ylabel(label_y) - plt.xticks([]) - plt.yticks([]) - - self.df['kmeans_cluster'] = kmeans_labels.tolist() - return plt.show() - - def cluster_gmm(self, number_component: int, random_s: int, label_x: str, label_y: str): - """\ - Cluster self.df by Gaussian Mixture Modeles and plot result. - - Parameters - ---------- - number_component - number of components of gmm clustering - random_s - random state of gmm clustering - label_x - x-axis label - label_y - y-axis label - - Returns - ---------- - Show plot - - """ - - gmm = GaussianMixture(n_components=number_component, random_state=random_s) - - gmm.fit(self.df) - gaussian_labels = gmm.predict(self.df) - - #plot - plt.style.use('seaborn-v0_8-poster') - plt.figure(figsize=(6, 6)) - - plt.scatter(self.df[['x']], - self.df[['y']], - c=gaussian_labels, - s=1, - cmap='Set1'); - - plt.title('Gaussian Mixture Clustering') - plt.xlabel(label_x) - plt.ylabel(label_y) - plt.xticks([]) - plt.yticks([]) - - self.df['GMM_cluster'] = gaussian_labels.tolist() - return plt.show() - - def cluster_hdbscan(self, cluster_size: int, label_x: str, label_y: str): - """\ - Cluster self.df by hdbscan and plot result. - - Parameters - ---------- - cluster_size - size of clusters - label_x - x-axis label - label_y - y-axis label - - Returns - ---------- - Show plot - - """ - - clusterer = hdbscan.HDBSCAN(min_cluster_size=cluster_size, gen_min_span_tree=True) - - clusterer.fit(self.df) - hdbscan_labels = clusterer.labels_ - - # Plot - outliers_mask = hdbscan_labels == -1 - plt.style.use('seaborn-v0_8-poster') - plt.figure(figsize=(6, 6)) - - plt.scatter(self.df[['x']], - self.df[['y']], - c=hdbscan_labels, - cmap='Spectral', - s=5) - - plt.scatter(self.df.loc[outliers_mask, 'x'], - self.df.loc[outliers_mask, 'y'], - s=4, - c='gray', - marker='v', - label='Outliers', - alpha=0.5) - - plt.title('HDBSCAN Clustering') - plt.xlabel(label_x) - plt.ylabel(label_y) - plt.legend(markerscale=6) - - plt.xticks([]) - plt.yticks([]) - - self.df['hdbscan_cluster'] = hdbscan_labels.tolist() - return plt.show() - - - - #def for dmap/umap plots - - def check_dataframe(self): - """\ - check if df is empty - """ - if self.df.empty: - raise ValueError("Dataframe is empty.") - - def prepare_data_source(self): - """\ - set ColumnDataSource as self.df and return ColumnDataSource - """ - return ColumnDataSource(self.df) - - def configure_hover_tooltips(self, feature: str, hover_tooltips: list[tuple[str, str]] = None): - """\ - Set hover tooltips. Either standard or personalized. - - Parameters - ---------- - feature - column of dataset - hover_tooltips - - Returns - ---------- - HoverTool - - """ - if hover_tooltips is None: - hover_tooltips = [ - ("Feature", f"{feature}"), - ("Index", "ID"), - ("X-Value", "x"), - ("Y-Value", "y") - ] - - hover_tooltips_formatted = "".join([ - f"
{label}: @{field}
" - for label, field in hover_tooltips - ]) - - hovertool_kwargs = { - "name": "data", - "tooltips": hover_tooltips_formatted - } - return HoverTool(**hovertool_kwargs) - - def create_base_plot(self, fig_width: int, - fig_height: int, - fig_title: str, - label_x: str, - label_y: str, - range_x: list[float], - range_y: list[float], - tools_emb, - title_align: str): - """\ - Settings for plotting umap/densmap. - - Parameters - ---------- - fig_width - width of figure - fig_height - height of figure - fig_title - title of figure - label_x - x-axis label - label_y - y-axis label - range_x - range of x-axis - range_y - range of y-axis - tools_emb - tools that should be embedded - title_align - position of title - - Returns - ---------- - plot - - """ - if not bokeh_above_three: - plot_kwargs = dict( - plot_width=fig_width, - plot_height=fig_height, - title=fig_title, - tools=tools_emb, - x_axis_label=label_x, - y_axis_label=label_y, - x_range=Range1d(start=range_x[0], end=range_x[1]), - y_range=Range1d(start=range_y[0], end=range_y[1]) - ) - else: - plot_kwargs = dict( - width=fig_width, - height=fig_height, - title=fig_title, - tools=tools_emb, - x_axis_label=label_x, - y_axis_label=label_y, - x_range=Range1d(start=range_x[0], end=range_x[1]), - y_range=Range1d(start=range_y[0], end=range_y[1]) - ) - - plot = figure(**plot_kwargs) - - # configure title - plot.title.align = title_align - plot.title.text_font_size = '30pt' - - return plot - - def configure_axes_and_legend(self, plot, show_axes: bool, show_legend: bool): - """\ - Settings for axes and legend. - - Parameters - ---------- - plot - show_axes - add axes - show_legend - add legend - - Returns - ---------- - None - - """ - - #axes - if not show_axes: - plot.xaxis.visible = False - plot.yaxis.visible = False - - plot.grid.visible = False - plot.outline_line_color = None - - plot.xaxis.axis_label_text_font_size = "20pt" - plot.yaxis.axis_label_text_font_size = "20pt" - - plot.xaxis.ticker = [] - plot.yaxis.ticker = [] - - # legend - if show_legend: - plot.legend.title_text_font_style = "bold" - plot.legend.background_fill_alpha = 0.0 - plot.legend.border_line_alpha = 0 - plot.legend.label_text_font_size = '20pt' - plot.legend.title_text_font_size = '20pt' - plot.legend.glyph_height = 30 - plot.legend.glyph_width = 30 - plot.add_layout(plot.legend[0], 'center') - else: - plot.legend.visible = False - - - def cat_plot(self, feature: str, - subs: list[str], - colors: list[str], - outputf: str, - fig_width: int, - fig_height: int, - fig_title: str, - label_x: str, - label_y: str, - range_x: list[float], - range_y: list[float], - hover_tooltips: list[tuple[str, str]] = None, - show_legend: bool = False, - point_size: int = 10, - point_alpha: float = 0.6, - show_axes: bool = False, - title_align: str = 'center') -> None: - - """\ - Create plot with categorical color mapper.Choose feature, colors and more. - Loads html file. - - Parameters - ---------- - feature - feature for categorical mapper - subs - values of feature - colors - list of colors - outputf - path to outputfile - fig_width - width of figure - fig_height - height of figure - fig_title - title of figure - label_x - x-axis label - label_y - y-axis label - range_x - x-axis range - range_y - y-axis range - hover_tooltips - hover tooltips - show_legend - possibility to add legend - point_size - size of points - point_alpha - alpha of points - show_axes - add axes - title_align - position of title - - Returns - ---------- - show plot - - """ - - self.check_dataframe() - output_file(outputf) - - source = self.prepare_data_source() - hover_emb = self.configure_hover_tooltips(feature, hover_tooltips) - cm = CategoricalColorMapper(palette=colors, factors=subs) - - tools_emb = ['save', 'lasso_select', 'pan', 'wheel_zoom', 'reset', hover_emb] - plot = self.create_base_plot(fig_width, - fig_height, - fig_title, - label_x, - label_y, - range_x, - range_y, - tools_emb, - title_align) - - plot.circle('x', 'y', - size=point_size, - color={'field': feature, 'transform': cm}, - alpha=point_alpha, - source=source, - name="data", - legend_group=feature) - - self.configure_axes_and_legend(plot, show_axes, show_legend) - - show(plot) - - def lin_plot(self, - outputf: str, - feature: str, - colors: list[str], - fig_width: int, - fig_height: int, - fig_title: str, - label_x: str, - label_y: str, - range_x: list[float], - range_y: list[float], - hover_tooltips: list[tuple[str, str]] = None, - show_legend: bool = False, - point_size: int = 3, - point_alpha: float = 0.7, - show_axes: bool = True, - title_align: str = 'center') -> None: - - - """\ - Create plot with Linear color mapper.Choose feature, colors and more. - Loads html file. - - Parameters - ---------- - outputf - path to output file - feature - feature for linear mapper - colors - list of colors - fig_width - width of figure - fig_height - height of figure - fig_title - title of figure - label_x - x-axis label - label_y - y-axis label - range_x - range of x-axis - range_y - range of y-axis - hover_tooltips - hover tooltips - show_legend - add legend - point_size - size of points - point_alpha - alpha of points - show_axes - add axes - title_align - title position - - Returns - ---------- - show plot - - """ - - - self.check_dataframe() - output_file(outputf) - - source = self.prepare_data_source() - hover_emb = self.configure_hover_tooltips(feature, hover_tooltips) - lm = LinearColorMapper(palette=colors, low=min(self.df[feature]), high=max(self.df[feature])) - - tools_emb = ['save', 'lasso_select', 'pan', 'wheel_zoom', 'reset', hover_emb] - plot = self.create_base_plot(fig_width, - fig_height, - fig_title, - label_x, - label_y, - range_x, - range_y, - tools_emb, - title_align) - - plot.circle('x', 'y', - size=point_size, - fill_color={'field': feature, 'transform': lm}, - alpha=point_alpha, - line_alpha=0, - line_width=0.03, - source=source, - name="data", - legend_group=feature) - - self.configure_axes_and_legend(plot, show_axes, show_legend) - - show(plot) - - - - - - - - - - - - - - - diff --git a/morphomapping/__init__.py b/morphomapping/__init__.py deleted file mode 100644 index 6dbfedd..0000000 --- a/morphomapping/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .morphomapping import MM - -__all__ = ["MM"] diff --git a/morphomapping/morphomapping.py b/morphomapping/morphomapping.py deleted file mode 100644 index 493d7af..0000000 --- a/morphomapping/morphomapping.py +++ /dev/null @@ -1,1220 +0,0 @@ -# Author: Amelie Bauerdick -# WabnitzLab -# 2024 - - -""" -Create interactive umap and densmap visualizations. -Plot feature importance and apply several clustering algorithms after dimensionality reduction. -Class MM is based on pandas DataFrames. - -Class: - - MM - -Functions: - - convert_to_CSV(fcs_path: str, csv_path: str) - read_CSV(path: str, add_index: bool, index_name: str) -> df - get_features -> list - get_df -> df - add_metadata(label: str, value) -> df - rename_variables(label_mapping) - select_condition(condition: str, value) - select_events(event_size: int) - drop_variables(*labels) - drop_events(first_row: int, last_row: int) - save_feature(*features) -> df - concat_variables(*dataframes) -> df - save_xlsx(path: str) - save_csv(path: str) - concat_df(*new_df, join='inner') - update_column_values(column_name: str, rename_values) - minmax_norm(first_column: str =None, last_column: str =None) - quant_scaler(first_column: str =None, last_column: str =None) - umap(nn: int, mdist: float, met: str) - dmap(dlambda: float, nn: int, mdist: float, met: str) - feature_importance(dep: str, indep: str) -> df - plot_feature_importance(features, path: str, base_width: int, base_height: int) - cluster_kmeans(n_cluster: int) - cluster_gmm(number_component: int, random_s: int) - cluster_hdbscan(cluster_size: int) - check_dataframe() - prepare_data_source() - configure_hover_tooltips(feature: str, hover_tooltips: list[tuple[str, str]] = None) - create_base_plot(fig_width: int, fig_height: int, fig_title: str, label_x: str, label_y: str, range_x: list[float], range_y: list[float], tools_emb, title_align: str) - configure_axes_and_legend(plot, show_axes: bool, show_legend: bool) - cat_plot(feature: str, subs: list[str], colors: list[str], outputf: str, fig_width: int, fig_height: int, fig_title: str, label_x: str,label_y: str, range_x: list[float], range_y: list[float], - hover_tooltips: list[tuple[str, str]] = None, show_legend: bool = False, point_size: int, point_alpha: float, show_axes: bool = False, title_align: str = 'center') -> None: - lin_plot(outputf: str, feature: str, colors: list[str], fig_width: int, fig_height: int, fig_title: str, label_x: str, label_y: str, range_x: list[float], range_y: list[float], - hover_tooltips: list[tuple[str, str]] = None, show_legend: bool = False, point_size: int, point_alpha: float, show_axes: bool = True, title_align: str = 'center') -> None: - -variables: - - self.df - -""" - - - -# Import Packages - -import flowkit as fk -import pandas as pd -import numpy as np -from matplotlib import pyplot as plt -from sklearn.ensemble import RandomForestRegressor -from sklearn.model_selection import train_test_split -from sklearn.preprocessing import MinMaxScaler, QuantileTransformer -from sklearn.metrics import r2_score -import sklearn.cluster as cluster -from sklearn.mixture import GaussianMixture -import umap -import hdbscan -from bokeh.models import (HoverTool, - ColumnDataSource, - Range1d, - LinearColorMapper, - CategoricalColorMapper) -from bokeh.plotting import figure, show, output_file -import bokeh - -from typing import Optional - -bokeh_version = bokeh.__version__ -bokeh_above_three = int(bokeh_version.split(".")[0]) >= 3 - - -class MM: - - """\ - A class to create interactive dimensionality reduction plots. - Based on pandas DataFrame. - - Parameters - ---------- - df - DataFrame containing all Imaging Flow Cytometry data - - Returns - ------- - None - - """ - - - - def __init__(self): - self.df = pd.DataFrame() - - def convert_to_CSV(self, fcs_path: str, csv_path: str): - """\ - Converts fcs-file to .csv file and saves it to csv_path. - - Parameters - ---------- - fcs_path - path to fcs file - csv_path - path to csv file - - Returns - ------- - None - - """ - try: - if not csv_path: - raise ValueError("The CSV path is empty or invalid.") - - sample = fk.Sample(fcs_path) - sample.export(filename=csv_path, source='raw') - - print(f"File successfully converted to {csv_path}.") - - except FileNotFoundError: - print(f"FCS file not found: {fcs_path}") - raise - except Exception as e: - print(f"Conversion failed: {e}") - raise - - def read_CSV(self, path: str, add_index: bool =False, index_name: str ='Index'): - """\ - Load csv-file and save it as self.df - - Parameters - ---------- - path - path to csv file - add_index - add index to df - index_name - set name of index - - Returns - ---------- - DataFrame - - """ - - self.df = pd.read_csv(path) - - #rename columns - self.df.columns = ( - self.df.columns.str.strip() - .str.replace(' ', '_') - .str.replace('&', 'and') - .str.replace('+', 'plus') - .str.replace('-', 'minus') - ) - - if add_index: - self.df[index_name] = range(len(self.df)) - self.df.set_index(index_name, inplace=True) - - return self.df - - def get_features(self): - """\ - return list of self.df columns - """ - return list(self.df.columns) - - def get_df(self): - """\ - return self.df - """ - return self.df - - def add_metadata(self, label: str, value): - """\ - add column with specific value to self.df - - Parameters - ---------- - label - name of column - value - value of column - - Returns - ---------- - DataFrame - - """ - empty = self.df.empty - if empty: - raise ValueError("Dataframe is empty.") - - self.df[label] = value - return self.df - - def rename_variables(self, label_mapping): - """\ - rename column(s) with new column labels - - Parameters - ---------- - label_mapping - old and new assigned column labels - - Returns - ---------- - None - - """ - - missing_columns = [col for col in label_mapping.keys() if col not in self.df.columns] - - if missing_columns: - print(f"The following columns do not exist: {missing_columns}") - else: - self.df.rename(columns=label_mapping, inplace=True) - - def select_condition(self, condition: str, value): - """\ - select specific rows by condition and save new df as self.df - - Parameters - ---------- - condition - name of column - value - value of column - - Returns - ---------- - None - """ - - if condition not in self.df.columns: - raise ValueError(f"Column '{condition}' does not exist.") - if value not in self.df[condition].values: - raise ValueError(f"Value '{value}' does not exist in column '{condition}'.") - # select rows and save new df - self.df = self.df.loc[self.df[condition] == value] - - def select_events(self, event_size: int): - """\ - randomly select events and save as self.df - - Parameters - ---------- - event_size - number of events - - Returns - ---------- - None - """ - - if event_size > self.df.shape[0]: - raise ValueError(f"Number of events '{event_size}' is larger than the number of rows ({self.df.shape[0]}).") - else: - self.df = self.df.sample(event_size, random_state=1).copy() - self.df.sort_index(inplace=True) - - def drop_variables(self, *labels): - """\ - drop certain columns from self.df - - Parameters - ---------- - labels - name of column(s) - - Returns - ---------- - None - """ - missing_labels = [label for label in labels if label not in self.df.columns] - if missing_labels: - raise ValueError(f"Column(s) {missing_labels} do not exist.") - else: - self.df = self.df.drop(columns=list(labels)) - - def drop_events(self, first_row: int, last_row: int): - """\ - drop specific rows from self.df - - Parameters - ---------- - first_row - number of first row that should be dropped - last_row - number of last row that should be dropped - - Returns - ---------- - None - """ - if first_row not in self.df.index: - raise ValueError(f"First row '{first_row}' does not exist.") - if last_row not in self.df.index: - raise ValueError(f"Last row '{last_row}' does not exist.") - if first_row > last_row: - raise ValueError(f"First row number '{first_row}' is greater than last row number '{last_row}'.") - self.df = self.df.drop(index=self.df.index[first_row:last_row + 1]) - - def save_feature(self, *features): - """\ - save specific columns of self.df in new DataFrame and return new DataFrame - - Parameters - ---------- - features - name of features which should be returned as df - - Returns - ---------- - DataFrame - """ - labels = [feature for feature in features if feature not in self.df.columns] - if labels: - raise ValueError(f"Column(s) {labels} do not exist.") - - df = self.df[list(features)].copy() - - return df - - def concat_variables(self, *dataframes): - """\ - attach new columns as df to self.df and return resulting df - - Parameters - ---------- - dataframes - new dataframes which should be attached to self.df - - Returns - ---------- - DataFrame - """ - result_df = pd.concat([self.df] + list(dataframes), axis=1) - self.df = result_df - return result_df - - def save_xlsx(self, path: str): - """\ - save self.df as xlsx file to chosen path - - Parameters - ---------- - path - chosen path - - Returns - ---------- - None - """ - self.df.to_excel(path, index=False) - print(f"DataFrame successfully saved to {path}") - - def save_csv(self, path: str): - """\ - save self.df as csv to chosen path - - Parameters - ---------- - path - chosen path - - Returns - ---------- - None - """ - self.df.to_csv(path, index=False) - print(f"DataFrame successfully saved to {path}") - - def concat_df(self, *new_df, join='inner'): - """\ - concatenate self.df and new DataFrame(s) (joining inner by default) - - Parameters - ---------- - new_df - DataFrames that should be concatenated - join - type of joining DataFrames - - Returns - ---------- - None - """ - self.df = pd.concat([self.df, *new_df], join=join) - - def update_column_values(self, column_name: str, rename_values): - """\ - replace values in a specific column with a specific new value - - Parameters - ---------- - column_name - name of column - rename_values - new value of column - - Returns - ---------- - None - """ - - if column_name not in self.df.columns: - raise ValueError(f"Column '{column_name}' does not exist.") - - self.df[column_name] = self.df[column_name].astype(str) - - for original_value, new_value in rename_values.items(): - self.df[column_name] = self.df[column_name].str.replace(str(original_value), new_value) - - def minmax_norm(self, first_column: Optional[str] = None, last_column: str =None): - """\ - Apply MinMax normalization to self.df. - Specify whether all columns should be normalized by setting parameters. - - Parameters - ---------- - first_column - name of first column. Start of normalization. - last_column: - name of last column. End of normalization. - - Returns - ---------- - None - - """ - - #check df - if first_column is None or last_column is None: - df1 = self.df - else: - if first_column not in self.df.columns: - raise ValueError(f"First column '{first_column}' does not exist.") - if last_column not in self.df.columns: - raise ValueError(f"Last column '{last_column}' does not exist.") - - first_id = self.df.columns.get_loc(first_column) - last_id = self.df.columns.get_loc(last_column) - if first_id > last_id: - raise ValueError(f"First column '{first_column}' is after last column '{last_column}'.") - - df1 = self.df.iloc[:, first_id:last_id + 1] - - # Apply min-max normalization - df = (df1 - df1.min()) / (df1.max() - df1.min()) - - # Replace the subset - if first_column is not None and last_column is not None: - self.df.iloc[:, first_id:last_id + 1] = df - else: - self.df = df - - - def quant_scaler(self, first_column: Optional[str] = None, last_column: str =None): - """\ - Apply QuantileTransformer to self.df. - Specify whether all columns should be normalized by setting parameters. - - Parameters - ---------- - first_column - name of first column. Start of normalization. - last_column - name of last column. End of normalization. - - Returns - ---------- - None - """ - - if first_column is None or last_column is None: - df1 = self.df - else: - if first_column not in self.df.columns: - raise ValueError(f"First column '{first_column}' does not exist.") - if last_column not in self.df.columns: - raise ValueError(f"Last column '{last_column}' does not exist.") - - first_id = self.df.columns.get_loc(first_column) - last_id = self.df.columns.get_loc(last_column) - if first_id > last_id: - raise ValueError(f"First column '{first_column}' is after last column '{last_column}'.") - - df1 = self.df.iloc[:, first_id:last_id + 1] - - # Apply Quantile Transformation - scaler = QuantileTransformer(output_distribution='uniform') - df2 = pd.DataFrame(scaler.fit_transform(df1), - columns=df1.columns, - index=df1.index) - - if first_column is not None and last_column is not None: - self.df.iloc[:, first_id:last_id + 1] = df2 - else: - self.df = df2 - - - def umap(self, nn: int, mdist: float, met: str): - """\ - Run umap with self.df. Adds x and y values to self.df as extra columns. - - Parameters - ---------- - nn - nearest neighbours for umap settings - mdist - minimum distance for umap settings - met - metric for umap settings - - Returns - ---------- - None - - """ - reducer = umap.UMAP( - n_neighbors=nn, - min_dist=mdist, - metric=met - ) - - embedding = reducer.fit_transform(self.df) - - x = embedding[:, 0] - y = embedding[:, 1] - - self.df['x'] = x - self.df['y'] = y - - def dmap(self, dlambda: float, nn: int, mdist: float, met: str): - """\ - Run dmap with self.df. Adds x and y values to self.df as extra columns. - - Parameters - ---------- - dlambda - denslambda for densmap settings - nn - nearest neighbours for densmap settings - mdist - minimum distance for densmap settings - met - metric for densmap settings - - Returns - ---------- - None - """ - - reducer = umap.UMAP( - densmap=True, - dens_lambda=dlambda, - n_neighbors=nn, - min_dist=mdist, - metric=met - ) - - embedding = reducer.fit_transform(self.df) - x = embedding[:, 0] - y = embedding[:, 1] - - self.df['x'] = x - self.df['y'] = y - - def feature_importance(self, dep: str, indep: str): - """\ - Calculates feature importance of columns in self.df (especially for x and y after dmap/umap were run). - Returns DataFrame with the 10 most important features and their according importance values. - - Parameters - ---------- - dep - name of column which should represent the dependent variable - indep - name of column which should represent the independent variable - - Returns - ---------- - DataFrame - - """ - - data = self.df.copy() - data = data.drop(indep, axis=1) - - #split data - train_df, test_df = train_test_split(data, test_size=0.2, random_state=42) - - X_train = train_df.drop(dep, axis=1) - y_train = train_df[dep] - X_test = test_df.drop(dep, axis=1) - y_test = test_df[dep] - - print("length of data for training:", len(X_train)) - print("length of data for testing:", len(X_test)) - - # run RandomForestRegressor - model = RandomForestRegressor(n_estimators=100, random_state=42) - model.fit(X_train, y_train) - - # predict dependent variable - y_pred = model.predict(X_test) - - # r²-value calculation - r2 = r2_score(y_test, y_pred) - print("r² Score:", r2) - - # Feature Importance - importance = model.feature_importances_ - - # sort features according to importance - s_id = np.argsort(importance) - pos = np.arange(s_id.shape[0]) - - # MinMax scaling - scaler = MinMaxScaler() - importance_scaled = scaler.fit_transform(importance.reshape(-1, 1)).flatten() - - # importance - total_importance = np.sum(importance_scaled) - percentage_importance = (importance_scaled / total_importance) * 100 - - # show top ten - top_n = 10 - s_id = s_id[-top_n:] - features = pd.DataFrame( - {'index1': np.array(X_train.columns)[s_id], 'importance_normalized': importance_scaled[s_id], - 'percentage_importance': percentage_importance[s_id]}) - return features - - - def plot_feature_importance(self, features, path: str, base_width: int =10, base_height: int =6): - """\ - Plots the ten most important features and returns a pyplot. - Needs the ten top features and their importances as parameters. - - Parameters - ---------- - features - DataFrame returned by function feature_importance - path - path to dict where plot should be saved - base_width - width of plot - base_height - height of plot - - Returns - ---------- - Show plot - - """ - - num_features = len(features) - - plot_width = base_width - plot_height = base_height + 0.2 * num_features - - ax = features.plot.bar(x='index1', - y='importance_normalized', - color='darkgray', - legend=False, - figsize=(plot_width, plot_height), - width=0.8, fontsize=20) - - plt.xlabel('') - plt.ylabel('Importance', fontsize=20) - - #adjust text height - for i, v in enumerate(features['percentage_importance']): - if features['importance_normalized'][i] + 0.01 > 1.1: - text_height = 1 - else: - text_height = features['importance_normalized'][i] + 0.01 - ax.text(i, text_height, f'{v:.1f}%', ha='center', va='bottom', fontsize=16, color='black') - - plt.title('Top 10 Features', fontsize=30, loc='left') - plt.ylim(0, 1.1) - plt.xticks(rotation=45, ha='right') - - if path is not None: - try: - plt.savefig(path, dpi=300, bbox_inches='tight') - print(f"Plot successfully saved to {path}") - except Exception as e: - print(f"An error occurred while saving to png: {e}") - - return plt.show() - - def cluster_kmeans(self, n_cluster: int, label_x: str, label_y: str): - """\ - Cluster self.df by kmeans clustering and show result as plt.show(). - - Parameters - ---------- - number_cluster - number of clusters - label_x - x-axis label - label_y - y-axis label - - Returns - ---------- - Show plot - """ - - kmeans_labels = cluster.KMeans(n_clusters=n_cluster).fit_predict(self.df) - - #plot - plt.style.use('seaborn-v0_8-poster') - plt.figure(figsize=(6, 6)) - - plt.scatter(self.df[['x']], - self.df[['y']], - c=kmeans_labels, - s=1, - cmap='Set1'); - - plt.title('K-Means Clustering') - plt.xlabel(label_x) - plt.ylabel(label_y) - plt.xticks([]) - plt.yticks([]) - - self.df['kmeans_cluster'] = kmeans_labels.tolist() - return plt.show() - - def cluster_gmm(self, number_component: int, random_s: int, label_x: str, label_y: str): - """\ - Cluster self.df by Gaussian Mixture Modeles and plot result. - - Parameters - ---------- - number_component - number of components of gmm clustering - random_s - random state of gmm clustering - label_x - x-axis label - label_y - y-axis label - - Returns - ---------- - Show plot - - """ - - gmm = GaussianMixture(n_components=number_component, random_state=random_s) - - gmm.fit(self.df) - gaussian_labels = gmm.predict(self.df) - - #plot - plt.style.use('seaborn-v0_8-poster') - plt.figure(figsize=(6, 6)) - - plt.scatter(self.df[['x']], - self.df[['y']], - c=gaussian_labels, - s=1, - cmap='Set1'); - - plt.title('Gaussian Mixture Clustering') - plt.xlabel(label_x) - plt.ylabel(label_y) - plt.xticks([]) - plt.yticks([]) - - self.df['GMM_cluster'] = gaussian_labels.tolist() - return plt.show() - - def cluster_hdbscan(self, cluster_size: int, label_x: str, label_y: str): - """\ - Cluster self.df by hdbscan and plot result. - - Parameters - ---------- - cluster_size - size of clusters - label_x - x-axis label - label_y - y-axis label - - Returns - ---------- - Show plot - - """ - - clusterer = hdbscan.HDBSCAN(min_cluster_size=cluster_size, gen_min_span_tree=True) - - clusterer.fit(self.df) - hdbscan_labels = clusterer.labels_ - - # Plot - outliers_mask = hdbscan_labels == -1 - plt.style.use('seaborn-v0_8-poster') - plt.figure(figsize=(6, 6)) - - plt.scatter(self.df[['x']], - self.df[['y']], - c=hdbscan_labels, - cmap='Spectral', - s=5) - - plt.scatter(self.df.loc[outliers_mask, 'x'], - self.df.loc[outliers_mask, 'y'], - s=4, - c='gray', - marker='v', - label='Outliers', - alpha=0.5) - - plt.title('HDBSCAN Clustering') - plt.xlabel(label_x) - plt.ylabel(label_y) - plt.legend(markerscale=6) - - plt.xticks([]) - plt.yticks([]) - - self.df['hdbscan_cluster'] = hdbscan_labels.tolist() - return plt.show() - - - - #def for dmap/umap plots - - def check_dataframe(self): - """\ - check if df is empty - """ - if self.df.empty: - raise ValueError("Dataframe is empty.") - - def prepare_data_source(self): - """\ - set ColumnDataSource as self.df and return ColumnDataSource - """ - return ColumnDataSource(self.df) - - def configure_hover_tooltips(self, feature: str, hover_tooltips: list[tuple[str, str]] = None): - """\ - Set hover tooltips. Either standard or personalized. - - Parameters - ---------- - feature - column of dataset - hover_tooltips - - Returns - ---------- - HoverTool - - """ - if hover_tooltips is None: - hover_tooltips = [ - ("Feature", f"{feature}"), - ("Index", "ID"), - ("X-Value", "x"), - ("Y-Value", "y") - ] - - hover_tooltips_formatted = "".join([ - f"
{label}: @{field}
" - for label, field in hover_tooltips - ]) - - hovertool_kwargs = { - "name": "data", - "tooltips": hover_tooltips_formatted - } - return HoverTool(**hovertool_kwargs) - - def create_base_plot(self, fig_width: int, - fig_height: int, - fig_title: str, - label_x: str, - label_y: str, - range_x: list[float], - range_y: list[float], - tools_emb, - title_align: str): - """\ - Settings for plotting umap/densmap. - - Parameters - ---------- - fig_width - width of figure - fig_height - height of figure - fig_title - title of figure - label_x - x-axis label - label_y - y-axis label - range_x - range of x-axis - range_y - range of y-axis - tools_emb - tools that should be embedded - title_align - position of title - - Returns - ---------- - plot - - """ - if not bokeh_above_three: - plot_kwargs = dict( - plot_width=fig_width, - plot_height=fig_height, - title=fig_title, - tools=tools_emb, - x_axis_label=label_x, - y_axis_label=label_y, - x_range=Range1d(start=range_x[0], end=range_x[1]), - y_range=Range1d(start=range_y[0], end=range_y[1]) - ) - else: - plot_kwargs = dict( - width=fig_width, - height=fig_height, - title=fig_title, - tools=tools_emb, - x_axis_label=label_x, - y_axis_label=label_y, - x_range=Range1d(start=range_x[0], end=range_x[1]), - y_range=Range1d(start=range_y[0], end=range_y[1]) - ) - - plot = figure(**plot_kwargs) - - # configure title - plot.title.align = title_align - plot.title.text_font_size = '30pt' - - return plot - - def configure_axes_and_legend(self, plot, show_axes: bool, show_legend: bool): - """\ - Settings for axes and legend. - - Parameters - ---------- - plot - show_axes - add axes - show_legend - add legend - - Returns - ---------- - None - - """ - - #axes - if not show_axes: - plot.xaxis.visible = False - plot.yaxis.visible = False - - plot.grid.visible = False - plot.outline_line_color = None - - plot.xaxis.axis_label_text_font_size = "20pt" - plot.yaxis.axis_label_text_font_size = "20pt" - - plot.xaxis.ticker = [] - plot.yaxis.ticker = [] - - # legend - if show_legend: - plot.legend.title_text_font_style = "bold" - plot.legend.background_fill_alpha = 0.0 - plot.legend.border_line_alpha = 0 - plot.legend.label_text_font_size = '20pt' - plot.legend.title_text_font_size = '20pt' - plot.legend.glyph_height = 30 - plot.legend.glyph_width = 30 - plot.add_layout(plot.legend[0], 'center') - else: - plot.legend.visible = False - - - def cat_plot(self, feature: str, - subs: list[str], - colors: list[str], - outputf: str, - fig_width: int, - fig_height: int, - fig_title: str, - label_x: str, - label_y: str, - range_x: list[float], - range_y: list[float], - hover_tooltips: list[tuple[str, str]] = None, - show_legend: bool = False, - point_size: int = 10, - point_alpha: float = 0.6, - show_axes: bool = False, - title_align: str = 'center') -> None: - - """\ - Create plot with categorical color mapper.Choose feature, colors and more. - Loads html file. - - Parameters - ---------- - feature - feature for categorical mapper - subs - values of feature - colors - list of colors - outputf - path to outputfile - fig_width - width of figure - fig_height - height of figure - fig_title - title of figure - label_x - x-axis label - label_y - y-axis label - range_x - x-axis range - range_y - y-axis range - hover_tooltips - hover tooltips - show_legend - possibility to add legend - point_size - size of points - point_alpha - alpha of points - show_axes - add axes - title_align - position of title - - Returns - ---------- - show plot - - """ - - self.check_dataframe() - output_file(outputf) - - source = self.prepare_data_source() - hover_emb = self.configure_hover_tooltips(feature, hover_tooltips) - cm = CategoricalColorMapper(palette=colors, factors=subs) - - tools_emb = ['save', 'lasso_select', 'pan', 'wheel_zoom', 'reset', hover_emb] - plot = self.create_base_plot(fig_width, - fig_height, - fig_title, - label_x, - label_y, - range_x, - range_y, - tools_emb, - title_align) - - plot.circle('x', 'y', - size=point_size, - color={'field': feature, 'transform': cm}, - alpha=point_alpha, - source=source, - name="data", - legend_group=feature) - - self.configure_axes_and_legend(plot, show_axes, show_legend) - - show(plot) - - def lin_plot(self, - outputf: str, - feature: str, - colors: list[str], - fig_width: int, - fig_height: int, - fig_title: str, - label_x: str, - label_y: str, - range_x: list[float], - range_y: list[float], - hover_tooltips: list[tuple[str, str]] = None, - show_legend: bool = False, - point_size: int = 3, - point_alpha: float = 0.7, - show_axes: bool = True, - title_align: str = 'center') -> None: - - - """\ - Create plot with Linear color mapper.Choose feature, colors and more. - Loads html file. - - Parameters - ---------- - outputf - path to output file - feature - feature for linear mapper - colors - list of colors - fig_width - width of figure - fig_height - height of figure - fig_title - title of figure - label_x - x-axis label - label_y - y-axis label - range_x - range of x-axis - range_y - range of y-axis - hover_tooltips - hover tooltips - show_legend - add legend - point_size - size of points - point_alpha - alpha of points - show_axes - add axes - title_align - title position - - Returns - ---------- - show plot - - """ - - - self.check_dataframe() - output_file(outputf) - - source = self.prepare_data_source() - hover_emb = self.configure_hover_tooltips(feature, hover_tooltips) - lm = LinearColorMapper(palette=colors, low=min(self.df[feature]), high=max(self.df[feature])) - - tools_emb = ['save', 'lasso_select', 'pan', 'wheel_zoom', 'reset', hover_emb] - plot = self.create_base_plot(fig_width, - fig_height, - fig_title, - label_x, - label_y, - range_x, - range_y, - tools_emb, - title_align) - - plot.circle('x', 'y', - size=point_size, - fill_color={'field': feature, 'transform': lm}, - alpha=point_alpha, - line_alpha=0, - line_width=0.03, - source=source, - name="data", - legend_group=feature) - - self.configure_axes_and_legend(plot, show_axes, show_legend) - - show(plot) - - - - - - - - - - - - - - -