Skip to content

Commit

Permalink
ENH: update logic of kmeans entropy, increase loggin
Browse files Browse the repository at this point in the history
  • Loading branch information
maffettone committed Dec 14, 2023
1 parent 422a54c commit 7e00df5
Showing 1 changed file with 13 additions and 9 deletions.
22 changes: 13 additions & 9 deletions pdf_agents/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from numpy.typing import ArrayLike
from scipy.stats import rv_discrete
from sklearn.cluster import KMeans
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression

from .agents import PDFBaseAgent
from .utils import discretize, make_hashable, make_wafer_grid_list
Expand All @@ -19,7 +19,7 @@

class PassiveKmeansAgent(PDFBaseAgent, ClusterAgentBase):
def __init__(self, k_clusters, *args, **kwargs):
estimator = KMeans(k_clusters)
estimator = KMeans(k_clusters, n_init="auto")
_default_kwargs = self.get_beamline_objects()
_default_kwargs.update(kwargs)
super().__init__(*args, estimator=estimator, **kwargs)
Expand Down Expand Up @@ -195,11 +195,13 @@ def _sample_uncertainty_proxy(self, batch_size=1):
self.model.fit(sorted_observables)
# retreive centers
centers = self.model.cluster_centers_
# calculate distances of all measurements from the centers
distances = self.model.transform(sorted_observables)
# determine golf-score of each point (minimum value)
min_landscape = distances.min(axis=1)

if self.bounds.size == 2:
# One dimensional case, Use the Dan Olds approach
# calculate distances of all measurements from the centers
distances = self.model.transform(sorted_observables)
# determine golf-score of each point (minimum value)
min_landscape = distances.min(axis=1)
# Assume a 1d scan
# generate 'uncertainty weights' - as a polynomial fit of the golf-score for each point
_x = np.arange(*self.bounds, self.motor_resolution)
Expand All @@ -209,8 +211,10 @@ def _sample_uncertainty_proxy(self, batch_size=1):
else:
# assume a 2d scan, use a linear model to predict the uncertainty
grid = make_wafer_grid_list(*self.bounds.ravel(), step=self.motor_resolution)
uncertainty_preds = LinearRegression().fit(sorted_independents, min_landscape).predict(grid)
top_indicies = np.argsort(uncertainty_preds)[-batch_size:]
labels = self.model.predict(sorted_independents)
proby_preds = LogisticRegression().fit(sorted_independents, labels).predict_proba(grid)
shannon = -np.sum(proby_preds * np.log(1 / proby_preds), axis=-1)
top_indicies = np.argsort(shannon)[-batch_size:]
return grid[top_indicies], centers

def ask(self, batch_size=1):
Expand All @@ -223,7 +227,7 @@ def ask(self, batch_size=1):
for suggestion in suggestions:
hashable_suggestion = make_hashable(discretize(suggestion, self.motor_resolution))
if hashable_suggestion in self.knowledge_cache:
logger.info(
logger.warn(
f"Suggestion {suggestion} is ignored as already in the knowledge cache: {hashable_suggestion}"
)
continue
Expand Down

0 comments on commit 7e00df5

Please sign in to comment.