Skip to content

Commit

Permalink
test with copd data
Browse files Browse the repository at this point in the history
  • Loading branch information
cwieder committed Jul 25, 2023
1 parent 85cdf37 commit fd7e80c
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 18 deletions.
30 changes: 30 additions & 0 deletions COPD_PI_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import pandas as pd
import numpy as np
import pathintegrate
import sspa


md = pd.read_csv('H:/Documents/pathway-integration/COPDgene/COPDgene_phonotype.txt', sep='\t')
prot = pd.read_csv('H:/Documents/pathway-integration/COPDgene/COPDgene_proteomics_UniProt.csv', index_col=0)
metab = pd.read_csv('H:/Documents/pathway-integration/COPDgene/COPDgene_metabolomics_CHEBI_mapped.csv', index_col=0)
trans = pd.read_csv('D:/COPDgene/Processed/COPDgene_transcriptomics_filt_Q1_scaled.csv', index_col=0)
metab['Group'] = metab.index.map(dict(zip(md['sid'], md["COPD"])))

metab = metab[metab['Group'].isin([0, 1])]
intersect_samples = set(metab.index.tolist()) & set(prot.index.tolist()) & set(trans.index.tolist())
prot = prot.loc[intersect_samples, :]
metab = metab.loc[intersect_samples, :]
trans = trans.loc[intersect_samples, :]

mo_paths_all = pd.read_csv('D:/Pathway_databases\Reactome_multi_omics_ChEBI_Uniprot_Ensembl.csv', index_col=0, dtype=object)

pi_model = pathintegrate.PathIntegrate(omics_data={'Metabolomics': metab, 'Proteomics':prot.iloc[:, :-1], 'Transcriptomics': trans.iloc[:, :-1]},
metadata=metab['Group'],
pathway_source=mo_paths_all,
sspa_scoring='svd',
min_coverage=2)

copdgene_multi_view = pi_model.MultiView(ncomp=4)

# launch the pathwy network explorer on a local server
pathintegrate.launch_network_app(copdgene_multi_view, mo_paths_all)
34 changes: 20 additions & 14 deletions PathIntegrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,11 @@ def get_multi_omics_coverage(self):
return coverage

def MultiView(self, ncomp=2):
print('Generating pathway scores...')
sspa_scores = [self.sspa_method(i, self.pathway_source, self.min_coverage, return_molecular_importance=True) for i in self.omics_data.values()]
self.sspa_scores_mv = dict(zip(self.omics_data.keys(), [i[0] for i in sspa_scores]))
# self.sspa_scores_mv = {k: self.sspa_method(v, self.pathway_source, self.min_coverage) for k, v in self.omics_data.items()}
print('Fitting MultiView model')
mv = MBPLS(n_components=ncomp)
mv.fit([i.copy(deep=True) for i in self.sspa_scores_mv.values()], self.labels)

Expand All @@ -107,13 +109,17 @@ def MultiView(self, ncomp=2):

def SingleView(self, model=sklearn.linear_model.LogisticRegression, model_params=None):
concat_data = pd.concat(self.omics_data.values(), axis=1)
print('Generating pathway scores...')

sspa_scores =self.sspa_method(concat_data, self.pathway_source, self.min_coverage, return_molecular_importance=True)
self.sspa_scores_sv = sspa_scores[0]

if model_params:
sv = model(**model_params)
else:
sv = model()
print('Fitting SingleView model')

sv.fit(X=self.sspa_scores_sv, y=self.labels)
sv.sspa_scores = self.sspa_scores_sv
sv.name = 'SingleView'
Expand Down Expand Up @@ -142,31 +148,31 @@ def VIP_multiBlock(x_weights, x_superscores, x_loadings, y_loadings):
vip_scores = np.sqrt(p * np.sum(sumsquares*(weights_norm**2), axis=1) / np.sum(sumsquares))
return vip_scores

metab = pd.read_csv('data/metabolomics_example.csv', index_col=0)
prot = pd.read_csv('data/proteomics_example.csv', index_col=0)
# metab = pd.read_csv('data/metabolomics_example.csv', index_col=0)
# prot = pd.read_csv('data/proteomics_example.csv', index_col=0)

# make possible to download MO paths from reactome
# mo_paths = sspa.process_reactome(
# organism='Homo sapiens',
# download_latest=True,
# omics_type='multiomics',
# filepath='data/')
# # make possible to download MO paths from reactome
# # mo_paths = sspa.process_reactome(
# # organism='Homo sapiens',
# # download_latest=True,
# # omics_type='multiomics',
# # filepath='data/')

# load pre-loaded pathways
mo_paths = sspa.process_gmt(infile='data/Reactome_Homo_sapiens_pathways_multiomics_R85.gmt')
# # load pre-loaded pathways
# mo_paths = sspa.process_gmt(infile='data/Reactome_Homo_sapiens_pathways_multiomics_R85.gmt')

pi_model = PathIntegrate({'Metabolomics': metab, 'Proteomics':prot.iloc[:, :-1]}, metadata=prot['Group'], pathway_source=mo_paths, sspa_scoring='svd', min_coverage=2)
# pi_model = PathIntegrate({'Metabolomics': metab, 'Proteomics':prot.iloc[:, :-1]}, metadata=prot['Group'], pathway_source=mo_paths, sspa_scoring='svd', min_coverage=2)

# covid_multi_view = pi_model.MultiView(ncomp=5)

# launch the pathwy network explorer on a local server
# # launch the pathwy network explorer on a local server
# launch_network_app(covid_multi_view, mo_paths)

# print(covid_multi_view.A_corrected_)
# print(covid_multi_view.vip)

# plot_functs.plot_block_importance(covid_multi_view)

covid_single_view = pi_model.SingleView(model_params={'random_state':0})
launch_network_app(covid_single_view, mo_paths)
# covid_single_view = pi_model.SingleView(model_params={'random_state':0})
# launch_network_app(covid_single_view, mo_paths)
# print(covid_single_view.intercept_)
6 changes: 2 additions & 4 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def find_root(G,child):
# the default graph is the pathway hierarchy coloured by root pathway membership as defined by Reactome
G = nx.from_pandas_edgelist(hierarchy_hsa, source=0, target=1, create_using=nx.DiGraph())
hierarchy_hsa_all['Root'] = [find_root(G, i) for i in hierarchy_hsa_all[1]]
root_cmap = dict(zip(set(hierarchy_hsa_all['Root']), sns.color_palette("hls", len(set(hierarchy_hsa_all['Root']))).as_hex()))
root_cmap = dict(zip(set(hierarchy_hsa_all['Root']), sns.color_palette("husl", len(set(hierarchy_hsa_all['Root']))).as_hex()))
cy_mo = nx.readwrite.json_graph.cytoscape_data(G)


Expand Down Expand Up @@ -346,7 +346,7 @@ def update_bar_chart(pathway):
pass

pathway_df_molec = pd.concat(pathways_dfs, axis=1)
fig = make_subplots(rows=1, cols=len(pathways_dfs), shared_xaxes=False)
fig = make_subplots(rows=1, cols=len(pathways_dfs), shared_xaxes='rows')

for i in range(0, len(pathways_dfs)):
fig.add_trace(go.Bar(x=pathway_df_molec.index, y=pathway_df_molec.iloc[:,i], name=pathway_df_molec.columns[i]), row=1, col=i+1)
Expand Down Expand Up @@ -428,8 +428,6 @@ def launch_network_app(pi_model, pathway_source, hierarchy_source='preloaded'):
# style=CONTENT_STYLE
)



app.layout = html.Div([
navbar,
sidebar,
Expand Down

0 comments on commit fd7e80c

Please sign in to comment.