-
Notifications
You must be signed in to change notification settings - Fork 0
/
clustering.py
44 lines (38 loc) · 1.28 KB
/
clustering.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import scanpy as sc
import matplotlib.pyplot as plt
import time
import numpy as np
import c_index
from sklearn.cluster import KMeans, HDBSCAN
from validclust.validclust import ValidClust
from gapstatistics.gapstatistics import GapStatistics
def cluster(adata, embedding, method="leiden"):
start = time.time()
sc.pp.neighbors(adata, use_rep=embedding, key_added=embedding)
if method == "leiden":
sc.tl.leiden(adata, flavor="igraph", n_iterations=2, directed=False)
elif method == "louvain":
sc.tl.louvain(adata, flavor="igraph", n_iterations=2)
elif method == "kmeans":
x_pca = adata.obsm['X_pca']
kmeans = KMeans(n_clusters=2, random_state=42).fit(x_pca)
adata.obs['kmeans'] = kmeans.labels_.astype(str)
elif method == "HDBSCAN":
# dense = np.asarray(adata.X.todense())
# adata.X = dense
x_pca = adata.obsm['X_pca']
hdb = HDBSCAN(min_cluster_size=10)
hdb.fit(x_pca)
labels = [str(x) for x in hdb.labels_]
adata.obs['hdbscan'] = labels
end = time.time()
elapsed = end - start
return adata, elapsed
def plot_embedding(adata, basis, color):
sc.pl.embedding(adata, basis, color=color)
plt.show()
"""
metrics.fowlkes_mallows_score()
GapStatistics()
ValidClust()
"""