add quickstart

cwieder · Sep 29, 2023 · ebdc7dc · ebdc7dc
1 parent dec763f
commit ebdc7dc
Show file tree

Hide file tree

Showing 5 changed files with 1,437 additions and 276 deletions.
diff --git a/README.md b/README.md
@@ -3,4 +3,14 @@ PathIntegrate Python package for pathway-based multi-omics data integration
 
 ![PathIntegrate graphical abstract](PathIntegrateGraphic.png "Title")
 
-Stable build, docs, and tutorials coming soon!
+## Features
+
+## Installation
+```
+pip install pathintegrate
+```
+
+## Tutorials and documentation
+Please see our Quickstart guide on Google Colab
+
+Full documentation and function reference for PathIntegrate can be found via our ReadTheDocs page
diff --git a/quickstart.ipynb b/quickstart.ipynb
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1 @@
+sspa>=1.0.0
diff --git a/src/pathintegrate/app.py b/src/pathintegrate/app.py
@@ -62,6 +62,8 @@ def find_root(G,child):
 G = nx.from_pandas_edgelist(hierarchy_hsa, source=0, target=1, create_using=nx.DiGraph())
 hierarchy_hsa_all['Root'] = [find_root(G, i) for i in hierarchy_hsa_all[1]]
 root_cmap = dict(zip(set(hierarchy_hsa_all['Root']), sns.color_palette("husl", len(set(hierarchy_hsa_all['Root']))).as_hex()))
+#save cmap to csv
+pd.DataFrame.from_dict(root_cmap, orient='index').to_csv('root_cmap.csv')
 cy_mo = nx.readwrite.json_graph.cytoscape_data(G)
 
 
@@ -142,38 +144,6 @@ def find_root(G,child):
 )
 
 
-sidebar2 = html.Div(
-    [html.P("Node information"),
-    html.Hr(),
-    dbc.ListGroup(
-    [
-        dbc.ListGroupItem(
-            html.Div(
-                [html.P("Pathway name"), html.P(id='cytoscape-mouseoverNodeData-output-name')
-                ])),
-        dbc.ListGroupItem(html.Div(
-                [html.P("Parent pathway"), html.P(id='cytoscape-mouseoverNodeData-output-root')
-                ])),
-        dbc.ListGroupItem(html.Div(
-                [html.P("Coverage"), html.P(id='cytoscape-mouseoverNodeData-output-coverage')
-                ])),
-    ]),
-
-    html.Br(),
-
-    ],
-    style={
-    "position": "fixed",
-    "top": 0,
-    "right": 0,
-    "bottom": 0,
-    "width": "16rem",
-    "padding": "1rem",
-    "padding-top": "5rem",
-    "background-color": "#BBDEFB",
-},
-)
-
 
 
 navbar = dbc.NavbarSimple(
@@ -215,15 +185,15 @@ def find_root(G,child):
         'style': {
             'background-color': 'data(color)',
             'shape': 'ellipse',
-            'label': 'data(label)',
+            # 'label': 'data(label)',
             'text-wrap': 'wrap',
             'text-background-color': 'yellow',
             'text-max-width': '120px',
             'width': 'data(MO_coverage)',
             'height':'data(MO_coverage)',
             'text-justification': 'auto',
             'font-family': ['Verdana', 'Roboto', 'Arial'],
-            'font-size': '10px'
+            'font-size': '0px'
         }
     },
     {
@@ -290,7 +260,7 @@ def displayTapNodeData(data):
               Input('mo_graph', 'mouseoverNodeData'))
 def displayTapNodeData(data):
     if data:
-        return data['MO_coverage']
+        return data['Coverage']
 
 
 # Download image
@@ -381,14 +351,6 @@ def launch_network_app(pi_model, pathway_source, hierarchy_source='preloaded', p
     global name_dict
     name_dict = dict(zip(pathway_source.index, pathway_source['Pathway_name']))
     G.add_nodes_from([(node, {'Name': attr, 'label': attr}) for (node, attr) in name_dict.items()])
-    G.add_nodes_from([(node, {'Root': attr, 
-                              'RootCol': root_cmap[attr], 
-                              'color': root_cmap[attr], 
-                              'RootName': name_dict[attr]}) for (node, attr) in dict(zip(hierarchy_hsa_all[1], hierarchy_hsa_all['Root'])).items()])
-    G.add_nodes_from([(node, {'MO_coverage': np.sqrt(attr)*2.5}) for (node, attr) in pi_model.coverage.items()])
-    if p_values:
-        pval_cmap = dict(zip(p_values.keys(), get_hex_colors(p_values.values(), 'cmc.lajolla_r')))
-        G.add_nodes_from([(node, {'PvalColour': attr}) for (node, attr) in pval_cmap.items()])
 
     global modelname
     modelname = pi_model.name
@@ -412,6 +374,20 @@ def launch_network_app(pi_model, pathway_source, hierarchy_source='preloaded', p
         # # add vip as node colour
         # vip_cmap = dict(zip(pathways_accessible, get_hex_colors(pi_model.vip['VIP_scaled'].tolist(), 'Blues')))
         # G.add_nodes_from([(node, {'VIPColour': attr}) for (node, attr) in vip_cmap.items()])
+        #
+    # filter root pathways for pathways accessible by the model
+    hierarchy_hsa_all_filt = hierarchy_hsa_all[hierarchy_hsa_all[1].isin(pathways_accessible)]
+    root_cmap = dict(zip(set(hierarchy_hsa_all_filt['Root']), sns.color_palette("husl", len(set(hierarchy_hsa_all_filt['Root']))).as_hex()))
+
+    G.add_nodes_from([(node, {'Root': attr, 
+                              'RootCol': root_cmap[attr], 
+                              'color': root_cmap[attr], 
+                              'RootName': name_dict[attr]}) for (node, attr) in dict(zip(hierarchy_hsa_all_filt[1], hierarchy_hsa_all_filt['Root'])).items()])
+    G.add_nodes_from([(node, {'MO_coverage': np.sqrt(attr)*2.5}) for (node, attr) in pi_model.coverage.items()])
+    G.add_nodes_from([(node, {'Coverage': attr}) for (node, attr) in pi_model.coverage.items()])
+    if p_values:
+        pval_cmap = dict(zip(p_values.keys(), get_hex_colors(p_values.values(), 'cmc.lajolla_r')))
+        G.add_nodes_from([(node, {'PvalColour': attr}) for (node, attr) in pval_cmap.items()])
 
     # add molecular importances for plotting
     global molecule_importances
@@ -434,6 +410,56 @@ def launch_network_app(pi_model, pathway_source, hierarchy_source='preloaded', p
     # style=CONTENT_STYLE
     )
 
+    sidebar2 = html.Div(
+        [html.P("Node information"),
+        html.Hr(),
+        dbc.ListGroup(
+        [
+            dbc.ListGroupItem(
+                html.Div(
+                    [html.P("Pathway name"), html.P(id='cytoscape-mouseoverNodeData-output-name')
+                    ])),
+            dbc.ListGroupItem(html.Div(
+                    [html.P("Parent pathway"), html.P(id='cytoscape-mouseoverNodeData-output-root')
+                    ])),
+            dbc.ListGroupItem(html.Div(
+                    [html.P("Coverage"), html.P(id='cytoscape-mouseoverNodeData-output-coverage')
+                    ])),
+        ]),
+
+        html.Br(),
+        # Legend for node colours 
+        html.P("Node colour legend"),
+        html.Hr(),
+        # make a legend for the root pathway colours
+        html.Div([
+            html.P("Root pathway"),
+            dbc.ListGroup(
+            [
+                dbc.ListGroupItem(
+                    html.Div(
+                        [html.P(i), html.P(name_dict[i])
+                        ], style={'background-color': root_cmap[i]})
+                ) for i in root_cmap.keys()
+            ]),
+        ]),
+        html.Br(),
+
+        ],
+        style={
+        "position": "fixed",
+        "top": 0,
+        "right": 0,
+        "bottom": 0,
+        "width": "16rem",
+        "padding": "1rem",
+        "padding-top": "5rem",
+        "background-color": "#BBDEFB",
+    },
+    )
+
+
+
     app.layout = html.Div([
         navbar,
         sidebar,
@@ -471,7 +497,7 @@ def launch_network_app(pi_model, pathway_source, hierarchy_source='preloaded', p
     #                         sidebar2,
     #                         ]),],fluid=True)
     # app.layout = html.Div([dcc.Location(id="url"), navbar, sidebar, content, sidebar2])
-    app.run(debug=False, use_reloader=False)
+    app.run(debug=True, use_reloader=False)
 
 
 

diff --git a/src/pathintegrate/pathintegrate.py b/src/pathintegrate/pathintegrate.py
@@ -37,7 +37,9 @@ def get_multi_omics_coverage(self):
 
     def MultiView(self, ncomp=2):
         print('Generating pathway scores...')
-        sspa_scores = [self.sspa_method(self.pathway_source, self.min_coverage).fit_transform(i) for i in self.omics_data_scaled.values()]
+        sspa_scores_ = [self.sspa_method(self.pathway_source, self.min_coverage) for i in self.omics_data_scaled.values()]
+        sspa_scores = [sspa_scores_[n].fit_transform(i) for n, i in enumerate(self.omics_data_scaled.values())]
+        # sspa_scores = [self.sspa_method(self.pathway_source, self.min_coverage).fit_transform(i) for i in self.omics_data_scaled.values()]
         # sspa_scores = [self.sspa_method(i, self.pathway_source, self.min_coverage, return_molecular_importance=True) for i in self.omics_data.values()]
 
         self.sspa_scores_mv = dict(zip(self.omics_data.keys(), sspa_scores))
@@ -51,12 +53,12 @@ def MultiView(self, ncomp=2):
         vip_df['Name'] = vip_df.index.map(dict(zip(self.pathway_source.index, self.pathway_source['Pathway_name'])))
         vip_df['Source'] = sum([[k] * v.shape[1] for k, v in self.sspa_scores_mv.items()], [])
         vip_df['VIP_scaled'] = vip_df.groupby('Source')[0].transform(lambda x: StandardScaler().fit_transform(x.values[:,np.newaxis]).ravel())
-
+        vip_df['VIP'] = vip_scores
         mv.name = 'MultiView'
 
         # only some sspa methods can return the molecular importance
-        if hasattr(sspa_scores[0], 'molecular_importance'):
-            mv.molecular_importances = dict(zip(self.omics_data.keys(), [i.molecular_importance for i in sspa_scores]))
+        if hasattr(sspa_scores_[0], 'molecular_importance'):
+            mv.molecular_importance = dict(zip(self.omics_data.keys(), [i.molecular_importance for i in sspa_scores_]))
         mv.beta = mv.beta_.flatten()
         mv.vip = vip_df
         mv.omics_names = list(self.omics_data.keys())
@@ -124,7 +126,7 @@ def SingleViewGridSearchCV(self, param_grid, model=sklearn.linear_model.Logistic
         pipe_sv = sklearn.pipeline.Pipeline([
             ('Scaler', StandardScaler().set_output(transform="pandas")),
             ('sspa', self.sspa_method(self.pathway_source, self.min_coverage)),
-            ('sv', model())
+            ('model', model())
         ])
 
         # Set up cross-validation
@@ -135,53 +137,27 @@ def SingleViewGridSearchCV(self, param_grid, model=sklearn.linear_model.Logistic
     def MultiViewCV(self):
         # Set up sklearn pipeline
         pipe_mv = sklearn.pipeline.Pipeline([
+            ('sspa', self.sspa_method(self.pathway_source, self.min_coverage)),
             ('mbpls', MBPLS(n_components=2))
         ])
-        pass
 
         # Set up cross-validation
+        cv_res = cross_val_score(pipe_mv, X=[i.copy(deep=True) for i in self.omics_data.values()], y=self.labels)
+        return cv_res
+
+    def MultiViewGridSearchCV(self):
+        pass
 
 
 
 def VIP_multiBlock(x_weights, x_superscores, x_loadings, y_loadings):
     # stack the weights from all blocks 
     weights = np.vstack(x_weights)
-    # normalise the weights
-    weights_norm = weights / np.sqrt(np.sum(weights**2, axis=0))
     # calculate product of sum of squares of superscores and y loadings
     sumsquares = np.sum(x_superscores**2, axis=0) * np.sum(y_loadings**2, axis=0)
     # p = number of variables - stack the loadings from all blocks
     p = np.vstack(x_loadings).shape[0]
 
     # VIP is a weighted sum of squares of PLS weights 
-    vip_scores = np.sqrt(p * np.sum(sumsquares*(weights_norm**2), axis=1) / np.sum(sumsquares))
-    return vip_scores
-
-# metab = pd.read_csv('data/metabolomics_example.csv', index_col=0)
-# prot = pd.read_csv('data/proteomics_example.csv', index_col=0)
-
-# # make possible to download MO paths from reactome
-# # mo_paths = sspa.process_reactome(
-# #     organism='Homo sapiens',
-# #     download_latest=True,
-# #     omics_type='multiomics',
-# #     filepath='data/')
-
-# # load pre-loaded pathways 
-# mo_paths = sspa.process_gmt(infile='data/Reactome_Homo_sapiens_pathways_multiomics_R85.gmt')
-
-# pi_model = PathIntegrate({'Metabolomics': metab, 'Proteomics':prot.iloc[:, :-1]}, metadata=prot['Group'], pathway_source=mo_paths, sspa_scoring='svd', min_coverage=2)
-
-# covid_multi_view = pi_model.MultiView(ncomp=5)
-
-# # launch the pathwy network explorer on a local server
-# launch_network_app(covid_multi_view, mo_paths)
-
-# print(covid_multi_view.A_corrected_)
-# print(covid_multi_view.vip)
-
-# plot_functs.plot_block_importance(covid_multi_view)
-
-# covid_single_view = pi_model.SingleView(model_params={'random_state':0})
-# launch_network_app(covid_single_view, mo_paths)
-# print(covid_single_view.intercept_)
+    vip_scores = np.sqrt(p * np.sum(sumsquares*(weights**2), axis=1) / np.sum(sumsquares))
+    return vip_scores