diff --git a/examples/nearest_node.py b/examples/nearest_node.py new file mode 100644 index 00000000..5f430422 --- /dev/null +++ b/examples/nearest_node.py @@ -0,0 +1,32 @@ +""" +nearest_nodes example based on breast cancer data. +""" + +from plot_breast_cancer import * +from sklearn import neighbors, preprocessing + +# new patient data incoming +i = np.random.randint(len(X)) +new_patient_data = 1.05*X[i] +new_patient_data = new_patient_data.reshape(1, -1) + +# re-use lens1 model +newlens1 = model.decision_function(new_patient_data) + +# re-construct lens2 model +X_norm = np.linalg.norm(X, axis=1) +scaler = preprocessing.MinMaxScaler() +scaler.fit(X_norm.reshape(-1, 1)) + +newlens2 = scaler.transform(np.linalg.norm(new_patient_data, axis=1).reshape(1, -1)) + +newlens = np.c_[newlens1, newlens2] + +# find nearest nodes +nn = neighbors.NearestNeighbors(n_neighbors=3) +node_ids = mapper.nearest_nodes(newlens, new_patient_data, graph, mapper.cover, lens, X, nn) + +print("Nearest nodes:") +for node_id in node_ids: + diags = y[graph['nodes'][node_id]] + print(" {}: diagnosis {:.1f}%".format(node_id, np.sum(diags)*100.0/len(diags))) diff --git a/examples/output/breast-cancer.html b/examples/output/breast-cancer.html index 0c70daf9..1d34353f 100644 --- a/examples/output/breast-cancer.html +++ b/examples/output/breast-cancer.html @@ -280,7 +280,7 @@ - + @@ -418,62 +418,62 @@

Node Distribution

-
-
10.8%
+
+
22.8%
-
-
1.9%
+
+
2.5%
-
-
8.9%
+
+
1.9%
-
-
13.3%
+
+
1.9%
-
-
17.7%
+
+
1.3%
-
-
26.6%
+
+
3.8%
-
-
12.0%
+
+
0.6%
-
-
1.9%
+
+
1.3%
-
+
1.3%
-
-
5.7%
+
+
62.7%
diff --git a/examples/plot_breast_cancer.py b/examples/plot_breast_cancer.py index 47ea0afa..f85d026b 100644 --- a/examples/plot_breast_cancer.py +++ b/examples/plot_breast_cancer.py @@ -68,8 +68,13 @@ # Visualization mapper.visualize(graph, + X=X, + X_names=feature_names, + lens=lens, + lens_names=["Isolation forest", "l2-norm"], path_html="output/breast-cancer.html", title="Wisconsin Breast Cancer Dataset", + color_function=y, custom_tooltips=y)