Skip to content

Commit

Permalink
add quickstart
Browse files Browse the repository at this point in the history
  • Loading branch information
cwieder committed Sep 29, 2023
1 parent dec763f commit ebdc7dc
Show file tree
Hide file tree
Showing 5 changed files with 1,437 additions and 276 deletions.
12 changes: 11 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,14 @@ PathIntegrate Python package for pathway-based multi-omics data integration

![PathIntegrate graphical abstract](PathIntegrateGraphic.png "Title")

Stable build, docs, and tutorials coming soon!
## Features

## Installation
```
pip install pathintegrate
```

## Tutorials and documentation
Please see our Quickstart guide on Google Colab

Full documentation and function reference for PathIntegrate can be found via our ReadTheDocs page
1,532 changes: 1,340 additions & 192 deletions quickstart.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
sspa>=1.0.0
114 changes: 70 additions & 44 deletions src/pathintegrate/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ def find_root(G,child):
G = nx.from_pandas_edgelist(hierarchy_hsa, source=0, target=1, create_using=nx.DiGraph())
hierarchy_hsa_all['Root'] = [find_root(G, i) for i in hierarchy_hsa_all[1]]
root_cmap = dict(zip(set(hierarchy_hsa_all['Root']), sns.color_palette("husl", len(set(hierarchy_hsa_all['Root']))).as_hex()))
#save cmap to csv
pd.DataFrame.from_dict(root_cmap, orient='index').to_csv('root_cmap.csv')
cy_mo = nx.readwrite.json_graph.cytoscape_data(G)


Expand Down Expand Up @@ -142,38 +144,6 @@ def find_root(G,child):
)


sidebar2 = html.Div(
[html.P("Node information"),
html.Hr(),
dbc.ListGroup(
[
dbc.ListGroupItem(
html.Div(
[html.P("Pathway name"), html.P(id='cytoscape-mouseoverNodeData-output-name')
])),
dbc.ListGroupItem(html.Div(
[html.P("Parent pathway"), html.P(id='cytoscape-mouseoverNodeData-output-root')
])),
dbc.ListGroupItem(html.Div(
[html.P("Coverage"), html.P(id='cytoscape-mouseoverNodeData-output-coverage')
])),
]),

html.Br(),

],
style={
"position": "fixed",
"top": 0,
"right": 0,
"bottom": 0,
"width": "16rem",
"padding": "1rem",
"padding-top": "5rem",
"background-color": "#BBDEFB",
},
)



navbar = dbc.NavbarSimple(
Expand Down Expand Up @@ -215,15 +185,15 @@ def find_root(G,child):
'style': {
'background-color': 'data(color)',
'shape': 'ellipse',
'label': 'data(label)',
# 'label': 'data(label)',
'text-wrap': 'wrap',
'text-background-color': 'yellow',
'text-max-width': '120px',
'width': 'data(MO_coverage)',
'height':'data(MO_coverage)',
'text-justification': 'auto',
'font-family': ['Verdana', 'Roboto', 'Arial'],
'font-size': '10px'
'font-size': '0px'
}
},
{
Expand Down Expand Up @@ -290,7 +260,7 @@ def displayTapNodeData(data):
Input('mo_graph', 'mouseoverNodeData'))
def displayTapNodeData(data):
if data:
return data['MO_coverage']
return data['Coverage']


# Download image
Expand Down Expand Up @@ -381,14 +351,6 @@ def launch_network_app(pi_model, pathway_source, hierarchy_source='preloaded', p
global name_dict
name_dict = dict(zip(pathway_source.index, pathway_source['Pathway_name']))
G.add_nodes_from([(node, {'Name': attr, 'label': attr}) for (node, attr) in name_dict.items()])
G.add_nodes_from([(node, {'Root': attr,
'RootCol': root_cmap[attr],
'color': root_cmap[attr],
'RootName': name_dict[attr]}) for (node, attr) in dict(zip(hierarchy_hsa_all[1], hierarchy_hsa_all['Root'])).items()])
G.add_nodes_from([(node, {'MO_coverage': np.sqrt(attr)*2.5}) for (node, attr) in pi_model.coverage.items()])
if p_values:
pval_cmap = dict(zip(p_values.keys(), get_hex_colors(p_values.values(), 'cmc.lajolla_r')))
G.add_nodes_from([(node, {'PvalColour': attr}) for (node, attr) in pval_cmap.items()])

global modelname
modelname = pi_model.name
Expand All @@ -412,6 +374,20 @@ def launch_network_app(pi_model, pathway_source, hierarchy_source='preloaded', p
# # add vip as node colour
# vip_cmap = dict(zip(pathways_accessible, get_hex_colors(pi_model.vip['VIP_scaled'].tolist(), 'Blues')))
# G.add_nodes_from([(node, {'VIPColour': attr}) for (node, attr) in vip_cmap.items()])
#
# filter root pathways for pathways accessible by the model
hierarchy_hsa_all_filt = hierarchy_hsa_all[hierarchy_hsa_all[1].isin(pathways_accessible)]
root_cmap = dict(zip(set(hierarchy_hsa_all_filt['Root']), sns.color_palette("husl", len(set(hierarchy_hsa_all_filt['Root']))).as_hex()))

G.add_nodes_from([(node, {'Root': attr,
'RootCol': root_cmap[attr],
'color': root_cmap[attr],
'RootName': name_dict[attr]}) for (node, attr) in dict(zip(hierarchy_hsa_all_filt[1], hierarchy_hsa_all_filt['Root'])).items()])
G.add_nodes_from([(node, {'MO_coverage': np.sqrt(attr)*2.5}) for (node, attr) in pi_model.coverage.items()])
G.add_nodes_from([(node, {'Coverage': attr}) for (node, attr) in pi_model.coverage.items()])
if p_values:
pval_cmap = dict(zip(p_values.keys(), get_hex_colors(p_values.values(), 'cmc.lajolla_r')))
G.add_nodes_from([(node, {'PvalColour': attr}) for (node, attr) in pval_cmap.items()])

# add molecular importances for plotting
global molecule_importances
Expand All @@ -434,6 +410,56 @@ def launch_network_app(pi_model, pathway_source, hierarchy_source='preloaded', p
# style=CONTENT_STYLE
)

sidebar2 = html.Div(
[html.P("Node information"),
html.Hr(),
dbc.ListGroup(
[
dbc.ListGroupItem(
html.Div(
[html.P("Pathway name"), html.P(id='cytoscape-mouseoverNodeData-output-name')
])),
dbc.ListGroupItem(html.Div(
[html.P("Parent pathway"), html.P(id='cytoscape-mouseoverNodeData-output-root')
])),
dbc.ListGroupItem(html.Div(
[html.P("Coverage"), html.P(id='cytoscape-mouseoverNodeData-output-coverage')
])),
]),

html.Br(),
# Legend for node colours
html.P("Node colour legend"),
html.Hr(),
# make a legend for the root pathway colours
html.Div([
html.P("Root pathway"),
dbc.ListGroup(
[
dbc.ListGroupItem(
html.Div(
[html.P(i), html.P(name_dict[i])
], style={'background-color': root_cmap[i]})
) for i in root_cmap.keys()
]),
]),
html.Br(),

],
style={
"position": "fixed",
"top": 0,
"right": 0,
"bottom": 0,
"width": "16rem",
"padding": "1rem",
"padding-top": "5rem",
"background-color": "#BBDEFB",
},
)



app.layout = html.Div([
navbar,
sidebar,
Expand Down Expand Up @@ -471,7 +497,7 @@ def launch_network_app(pi_model, pathway_source, hierarchy_source='preloaded', p
# sidebar2,
# ]),],fluid=True)
# app.layout = html.Div([dcc.Location(id="url"), navbar, sidebar, content, sidebar2])
app.run(debug=False, use_reloader=False)
app.run(debug=True, use_reloader=False)



Expand Down
54 changes: 15 additions & 39 deletions src/pathintegrate/pathintegrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@ def get_multi_omics_coverage(self):

def MultiView(self, ncomp=2):
print('Generating pathway scores...')
sspa_scores = [self.sspa_method(self.pathway_source, self.min_coverage).fit_transform(i) for i in self.omics_data_scaled.values()]
sspa_scores_ = [self.sspa_method(self.pathway_source, self.min_coverage) for i in self.omics_data_scaled.values()]
sspa_scores = [sspa_scores_[n].fit_transform(i) for n, i in enumerate(self.omics_data_scaled.values())]
# sspa_scores = [self.sspa_method(self.pathway_source, self.min_coverage).fit_transform(i) for i in self.omics_data_scaled.values()]
# sspa_scores = [self.sspa_method(i, self.pathway_source, self.min_coverage, return_molecular_importance=True) for i in self.omics_data.values()]

self.sspa_scores_mv = dict(zip(self.omics_data.keys(), sspa_scores))
Expand All @@ -51,12 +53,12 @@ def MultiView(self, ncomp=2):
vip_df['Name'] = vip_df.index.map(dict(zip(self.pathway_source.index, self.pathway_source['Pathway_name'])))
vip_df['Source'] = sum([[k] * v.shape[1] for k, v in self.sspa_scores_mv.items()], [])
vip_df['VIP_scaled'] = vip_df.groupby('Source')[0].transform(lambda x: StandardScaler().fit_transform(x.values[:,np.newaxis]).ravel())

vip_df['VIP'] = vip_scores
mv.name = 'MultiView'

# only some sspa methods can return the molecular importance
if hasattr(sspa_scores[0], 'molecular_importance'):
mv.molecular_importances = dict(zip(self.omics_data.keys(), [i.molecular_importance for i in sspa_scores]))
if hasattr(sspa_scores_[0], 'molecular_importance'):
mv.molecular_importance = dict(zip(self.omics_data.keys(), [i.molecular_importance for i in sspa_scores_]))
mv.beta = mv.beta_.flatten()
mv.vip = vip_df
mv.omics_names = list(self.omics_data.keys())
Expand Down Expand Up @@ -124,7 +126,7 @@ def SingleViewGridSearchCV(self, param_grid, model=sklearn.linear_model.Logistic
pipe_sv = sklearn.pipeline.Pipeline([
('Scaler', StandardScaler().set_output(transform="pandas")),
('sspa', self.sspa_method(self.pathway_source, self.min_coverage)),
('sv', model())
('model', model())
])

# Set up cross-validation
Expand All @@ -135,53 +137,27 @@ def SingleViewGridSearchCV(self, param_grid, model=sklearn.linear_model.Logistic
def MultiViewCV(self):
# Set up sklearn pipeline
pipe_mv = sklearn.pipeline.Pipeline([
('sspa', self.sspa_method(self.pathway_source, self.min_coverage)),
('mbpls', MBPLS(n_components=2))
])
pass

# Set up cross-validation
cv_res = cross_val_score(pipe_mv, X=[i.copy(deep=True) for i in self.omics_data.values()], y=self.labels)
return cv_res

def MultiViewGridSearchCV(self):
pass



def VIP_multiBlock(x_weights, x_superscores, x_loadings, y_loadings):
# stack the weights from all blocks
weights = np.vstack(x_weights)
# normalise the weights
weights_norm = weights / np.sqrt(np.sum(weights**2, axis=0))
# calculate product of sum of squares of superscores and y loadings
sumsquares = np.sum(x_superscores**2, axis=0) * np.sum(y_loadings**2, axis=0)
# p = number of variables - stack the loadings from all blocks
p = np.vstack(x_loadings).shape[0]

# VIP is a weighted sum of squares of PLS weights
vip_scores = np.sqrt(p * np.sum(sumsquares*(weights_norm**2), axis=1) / np.sum(sumsquares))
return vip_scores

# metab = pd.read_csv('data/metabolomics_example.csv', index_col=0)
# prot = pd.read_csv('data/proteomics_example.csv', index_col=0)

# # make possible to download MO paths from reactome
# # mo_paths = sspa.process_reactome(
# # organism='Homo sapiens',
# # download_latest=True,
# # omics_type='multiomics',
# # filepath='data/')

# # load pre-loaded pathways
# mo_paths = sspa.process_gmt(infile='data/Reactome_Homo_sapiens_pathways_multiomics_R85.gmt')

# pi_model = PathIntegrate({'Metabolomics': metab, 'Proteomics':prot.iloc[:, :-1]}, metadata=prot['Group'], pathway_source=mo_paths, sspa_scoring='svd', min_coverage=2)

# covid_multi_view = pi_model.MultiView(ncomp=5)

# # launch the pathwy network explorer on a local server
# launch_network_app(covid_multi_view, mo_paths)

# print(covid_multi_view.A_corrected_)
# print(covid_multi_view.vip)

# plot_functs.plot_block_importance(covid_multi_view)

# covid_single_view = pi_model.SingleView(model_params={'random_state':0})
# launch_network_app(covid_single_view, mo_paths)
# print(covid_single_view.intercept_)
vip_scores = np.sqrt(p * np.sum(sumsquares*(weights**2), axis=1) / np.sum(sumsquares))
return vip_scores

0 comments on commit ebdc7dc

Please sign in to comment.