Adding clustering without subsampling to UI

labsyspharm · Jul 1, 2022 · e1b9c97 · e1b9c97
1 parent 8cfedfc
commit e1b9c97
Show file tree

Hide file tree

Showing 9 changed files with 37 additions and 10 deletions.
diff --git a/.gitignore b/.gitignore
@@ -13,6 +13,7 @@ static/frontend/node_modules
 .ipynb_checkpoints/
 minerva_analysis/data/
 visinity/data/
+testmatrix.npy
 
 # testing
 /coverage

diff --git a/minerva_analysis/client/src/css/main.css b/minerva_analysis/client/src/css/main.css
@@ -619,6 +619,15 @@ polygon:hover {
     margin-bottom: 0;
 }
 
+
+.subsample_label {
+    font-size: 9pt;
+    color: white;
+    margin-bottom: 0;
+    margin-right:10px;
+}
+
+
 #similarity_val {
     font-size: 9pt;
     color: rgba(255, 165, 0);

diff --git a/minerva_analysis/client/src/js/services/dataLayer.js b/minerva_analysis/client/src/js/services/dataLayer.js
@@ -495,11 +495,12 @@ class DataLayer {
         }
     }
 
-    async customCluster(numClusters) {
+    async customCluster(numClusters, subsample=true) {
         try {
             let response = await fetch('/custom_cluster?' + new URLSearchParams({
                 datasource: datasource,
                 numClusters: numClusters,
+                subsample: subsample,
                 mode: mode
 
             }))

diff --git a/minerva_analysis/client/src/js/views/scatter.js b/minerva_analysis/client/src/js/views/scatter.js
@@ -419,13 +419,14 @@ class Scatterplot {
     async customCluster() {
         const self = this;
         let numberOfClusters = document.getElementById('custom_cluster_number')
+        let subsample = document.getElementById('subsample_cluster').checked;
         if (!numberOfClusters || !numberOfClusters.value) {
             return;
         }
         numberOfClusters = _.toInteger(numberOfClusters.value);
         // document.getElementById('custom_cluster_loading').innerHTML += '<span class="spinner-border spinner-border-sm" role="status" aria-hidden="true"></span>'
         try {
-            let updatedNeighborhoods = await self.dataLayer.customCluster(numberOfClusters)
+            let updatedNeighborhoods = await self.dataLayer.customCluster(numberOfClusters, subsample)
             self.neighborhoodTable.updateNeighborhoods(updatedNeighborhoods);
         } catch (e) {
         }

diff --git a/minerva_analysis/client/templates/index.html b/minerva_analysis/client/templates/index.html
@@ -61,6 +61,13 @@ <h5 id="cluster_header">Cluster</h5>
                                         <div class="row" id="cluster-number-input">
                                             <input type="number" id="custom_cluster_number" placeholder="Num. Clusters">
                                         </div>
+                                        <div class="row" id="cluster-subsample">
+                                            <label>
+                                                <span class="subsample_label">Subsample</span>
+                                                <input id="subsample_cluster" type="checkbox" class="checkbox style-2 " checked="checked">
+                                            </label>
+
+                                        </div>
                                     </div>
                                     <div class="col-md-1 col">
                                         <span class="material-icons" id="custom_cluster_submit" data-toggle="tooltip"

diff --git a/minerva_analysis/server/models/data_model.py b/minerva_analysis/server/models/data_model.py
@@ -35,6 +35,7 @@
 import zarr
 from numcodecs import Blosc
 from scipy import spatial
+from pycave.bayes import gmm
 
 # from line_profiler_pycharm import profile
 
@@ -416,7 +417,7 @@ def brush_selection(datasource_name, brush, selection_ids):
         pheno_col = phenotype_list.index(pheno)
         these_ids = np.argwhere(
             (neighborhoods[:, pheno_col] >= brush_range[0]) & (
-                        neighborhoods[:, pheno_col] <= brush_range[1])).flatten()
+                    neighborhoods[:, pheno_col] <= brush_range[1])).flatten()
         valid_ids = np.intersect1d(valid_ids, these_ids)
     obj = get_neighborhood_stats(datasource_name, valid_ids, np_datasource, fields=fields)
     return obj
@@ -425,7 +426,7 @@ def brush_selection(datasource_name, brush, selection_ids):
 #
 
 
-def create_custom_clusters(datasource_name, num_clusters, mode='single'):
+def create_custom_clusters(datasource_name, num_clusters, mode='single', subsample=True):
     global config
     global datasource
     database_model.delete(database_model.Neighborhood, custom=True)
@@ -436,15 +437,18 @@ def create_custom_clusters(datasource_name, num_clusters, mode='single'):
 
     if mode == 'single':
 
-        g_mixtures = GaussianMixture(n_components=num_clusters)
         data = np.load(Path(config[datasource_name]['embedding']))
         # # TODO REMOVE CLUSTER HARDCODE
         # coords = data[:, 0:2]
         # neighborhoods = np.load(Path(config[datasource_name]['neighborhoods']))
         # pcaed = PCA(n_components=2).fit_transform(neighborhoods)
         # data = np.hstack((data, pcaed))
+        if subsample:
+            g_mixtures = GaussianMixture(n_components=num_clusters)
+        else:
+            g_mixtures = gmm.GaussianMixture(num_components=num_clusters)
         g_mixtures.fit(data)
-        clusters = g_mixtures.predict(data)
+        clusters = np.array(g_mixtures.predict(data))
         for cluster in np.sort(np.unique(clusters)).astype(int).tolist():
             indices = np.argwhere(clusters == cluster).flatten()
             f = io.BytesIO()
@@ -486,8 +490,10 @@ def create_custom_clusters(datasource_name, num_clusters, mode='single'):
         max_cluster_id = database_model.max(database_model.NeighborhoodStats, 'neighborhood_id')
         if max_cluster_id is None:
             max_cluster_id = 0
-
-        g_mixtures = GaussianMixture(n_components=num_clusters)
+        if subsample:
+            g_mixtures = GaussianMixture(n_components=num_clusters)
+        else:
+            g_mixtures = gmm.GaussianMixture(num_components=num_clusters)
 
         pca = PCA(n_components=2).fit(combined_neighborhoods['full_neighborhoods'])
         pcaed = pca.transform(combined_neighborhoods['full_neighborhoods'])

diff --git a/minerva_analysis/server/routes/data_routes.py b/minerva_analysis/server/routes/data_routes.py
@@ -295,8 +295,9 @@ def get_custom_clusters():
     # Parse (rect - [x, y, r], channels [string])
     datasource = request.args.get('datasource')
     num_clusters = int(request.args.get('numClusters'))
+    subsample = request.args.get('subsample') == 'true'
     mode = request.args.get('mode')
-    data_model.create_custom_clusters(datasource, num_clusters, mode)
+    data_model.create_custom_clusters(datasource, num_clusters, mode, subsample)
     resp = data_model.get_neighborhood_list(datasource)
     return serialize_and_submit_json(resp)
 

diff --git a/minerva_analysis/server/utils/timing.py b/minerva_analysis/server/utils/timing.py
@@ -201,7 +201,7 @@ def create_matrix(_phenotypes_array, _len_phenos, _neighbors, _distances, _lengt
             times_dict[num_cells_str][radius_str]['perm_test'] = time.time() - timer
             timer = time.time()
 
-    pickle.dump(times_dict, open('rev_dict_perm_'+str(k)+'.pk', 'wb'))
+    pickle.dump(times_dict, open('mon_dict_perm_'+str(k)+'.pk', 'wb'))
 
 #
 

diff --git a/requirements.yml b/requirements.yml
@@ -24,6 +24,7 @@ dependencies:
   - pip
   - pip:
       - elementpath==2.3.2
+      - pycave==3.1.3
       - numpy-indexed
       - ome-types==0.2.9
       - opencv-python