From 1bc6d07f9f0ef56baa31bfbd45b118f344b8f016 Mon Sep 17 00:00:00 2001 From: maffettone Date: Wed, 13 Dec 2023 08:32:50 -0800 Subject: [PATCH] ENH: Modifications for 2d kmeans search --- pdf_agents/agents.py | 24 ++++++++-------- pdf_agents/sklearn.py | 34 +++++++++++++++++------ pdf_agents/startup_scripts/mmm4-kmeans.py | 26 +++++++++++++++++ pdf_agents/utils.py | 13 +++++++++ 4 files changed, 77 insertions(+), 20 deletions(-) create mode 100644 pdf_agents/startup_scripts/mmm4-kmeans.py diff --git a/pdf_agents/agents.py b/pdf_agents/agents.py index 67fb56a..fd8eb5b 100644 --- a/pdf_agents/agents.py +++ b/pdf_agents/agents.py @@ -23,7 +23,7 @@ class PDFBaseAgent(Agent, ABC): def __init__( self, *args, - motor_name: str = "Grid_X", + motor_names: List[str] = ["Grid_X"], motor_resolution: float = 0.0002, data_key: str = "chi_I", roi_key: str = "chi_Q", @@ -32,7 +32,7 @@ def __init__( **kwargs, ): self._rkvs = redis.Redis(host="info.pdf.nsls2.bnl.gov", port=6379, db=0) # redis key value store - self._motor_name = motor_name + self._motor_names = motor_names self._motor_resolution = motor_resolution self._data_key = data_key self._roi_key = roi_key @@ -52,7 +52,7 @@ def __init__( _default_kwargs = self.get_beamline_objects() _default_kwargs.update(kwargs) md = dict( - motor_name=self.motor_name, + motor_names=self.motor_names, motor_resolution=self.motor_resolution, data_key=self.data_key, roi_key=self.roi_key, @@ -88,14 +88,16 @@ def unpack_run(self, run) -> Tuple[Union[float, ArrayLike], Union[float, ArrayLi idx_max = np.where(ordinate > self.roi[1])[0][-1] if len(np.where(ordinate > self.roi[1])[0]) else None y = y[idx_min:idx_max] try: - x = run.start["more_info"][self.motor_name][self.motor_name]["value"] + x = np.array( + [run.start["more_info"][motor_name][motor_name]["value"] for motor_name in self.motor_names] + ) except KeyError: - x = run.start[self.motor_name][self.motor_name]["value"] + x = np.array([run.start[motor_name][motor_name]["value"] for motor_name in self.motor_names]) return x, y def server_registrations(self) -> None: self._register_property("motor_resolution") - self._register_property("motor_name") + self._register_property("motor_names") self._register_property("exposure_time") self._register_property("sample_number") self._register_property("data_key") @@ -105,13 +107,13 @@ def server_registrations(self) -> None: return super().server_registrations() @property - def motor_name(self): + def motor_names(self): """Name of motor to be used as the independent variable in the experiment""" - return self._motor_name + return self._motor_names - @motor_name.setter - def motor_name(self, value: str): - self._motor_name = value + @motor_names.setter + def motor_names(self, value: str): + self._motor_names = value @property def motor_resolution(self): diff --git a/pdf_agents/sklearn.py b/pdf_agents/sklearn.py index 3020987..35c44cf 100644 --- a/pdf_agents/sklearn.py +++ b/pdf_agents/sklearn.py @@ -9,9 +9,10 @@ from numpy.typing import ArrayLike from scipy.stats import rv_discrete from sklearn.cluster import KMeans +from sklearn.linear_model import LinearRegression from .agents import PDFBaseAgent -from .utils import discretize, make_hashable +from .utils import discretize, make_hashable, make_wafer_grid_list logger = logging.getLogger(__name__) @@ -125,6 +126,10 @@ def __init__(self, *args, bounds: ArrayLike, **kwargs): self._bounds = bounds self.knowledge_cache = set() # Discretized knowledge cache of previously asked/told points + @property + def name(self): + return "PDFActiveKMeans" + @property def bounds(self): return self._bounds @@ -169,11 +174,19 @@ def _sample_uncertainty_proxy(self, batch_size=1): distances = self.model.transform(sorted_observables) # determine golf-score of each point (minimum value) min_landscape = distances.min(axis=1) - # generate 'uncertainty weights' - as a polynomial fit of the golf-score for each point - _x = np.arange(*self.bounds, self.motor_resolution) - uwx = polyval(_x, polyfit(sorted_independents, min_landscape, deg=5)) - # Chose from the polynomial fit - return pick_from_distribution(_x, uwx, num_picks=batch_size), centers + if self.bounds.size == 2: + # Assume a 1d scan + # generate 'uncertainty weights' - as a polynomial fit of the golf-score for each point + _x = np.arange(*self.bounds, self.motor_resolution) + uwx = polyval(_x, polyfit(sorted_independents, min_landscape, deg=5)) + # Chose from the polynomial fit + return pick_from_distribution(_x, uwx, num_picks=batch_size), centers + else: + # assume a 2d scan, use a linear model to predict the uncertainty + grid = make_wafer_grid_list(*self.bounds, step=self.min_step_size) + uncertainty_preds = LinearRegression().fit(sorted_independents, min_landscape).predict(grid) + top_indicies = np.argsort(uncertainty_preds)[-batch_size:] + return grid[top_indicies], centers def ask(self, batch_size=1): suggestions, centers = self._sample_uncertainty_proxy(batch_size) @@ -182,11 +195,14 @@ def ask(self, batch_size=1): suggestions = [suggestions] # Keep non redundant suggestions and add to knowledge cache for suggestion in suggestions: - if suggestion in self.knowledge_cache: - logger.info(f"Suggestion {suggestion} is ignored as already in the knowledge cache") + hashable_suggestion = make_hashable(discretize(suggestion, self.min_step_size)) + if hashable_suggestion in self.knowledge_cache: + logger.info( + f"Suggestion {suggestion} is ignored as already in the knowledge cache: {hashable_suggestion}" + ) continue else: - self.knowledge_cache.add(make_hashable(discretize(suggestion, self.motor_resolution))) + self.knowledge_cache.add(hashable_suggestion) kept_suggestions.append(suggestion) base_doc = dict( diff --git a/pdf_agents/startup_scripts/mmm4-kmeans.py b/pdf_agents/startup_scripts/mmm4-kmeans.py new file mode 100644 index 0000000..d69a521 --- /dev/null +++ b/pdf_agents/startup_scripts/mmm4-kmeans.py @@ -0,0 +1,26 @@ +import numpy as np +from bluesky_adaptive.server import register_variable, shutdown_decorator, startup_decorator + +from pdf_agents.sklearn import ActiveKmeansAgent + +agent = ActiveKmeansAgent( + bounds=np.array([(-32, 32), (-32, 32)]), + ask_on_tell=False, + report_on_tell=True, + k_clusters=4, + motor_names=["Grid_X", "Grid_Y"], +) + + +@startup_decorator +def startup(): + agent.start() + + +@shutdown_decorator +def shutdown_agent(): + return agent.stop() + + +register_variable("Tell Cache", agent, "tell_cache") +register_variable("Agent Name", agent, "instance_name") diff --git a/pdf_agents/utils.py b/pdf_agents/utils.py index eacd4f4..aacdbc7 100644 --- a/pdf_agents/utils.py +++ b/pdf_agents/utils.py @@ -27,3 +27,16 @@ def make_hashable(x): return tuple(map(float, x)) except TypeError: return float(x) + + +def make_wafer_grid_list(x_min, x_max, y_min, y_max, step): + """ + Make the list of all of the possible 2d points that lie within a circle of the origin + """ + x = np.arange(x_min, x_max, step) + y = np.arange(y_min, y_max, step) + xx, yy = np.meshgrid(x, y) + center = np.array([x_min + (x_max - x_min) / 2, y_min + (y_max - y_min) / 2]) + distance = np.sqrt((xx - center[0]) ** 2 + (yy - center[1]) ** 2) + radius = min((x_max - x_min) / 2, (y_max - y_min) / 2) + return np.array([xx[distance < radius], yy[distance < radius]]).T