From aec9e8d2534c88982bc0e76698962c19117b04fe Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Sat, 3 Feb 2024 20:40:22 -0800 Subject: [PATCH 01/62] Add component_editor.py file --- .gitignore | 2 - src/dash_component_editor.py | 407 +++++++++++++++++++++++++++++++++++ 2 files changed, 407 insertions(+), 2 deletions(-) create mode 100644 src/dash_component_editor.py diff --git a/.gitignore b/.gitignore index 496e1ee..57516b2 100644 --- a/.gitignore +++ b/.gitignore @@ -11,8 +11,6 @@ data/.file_manager_vars.pkl data/mlexchange_store/ .DS_Store -src/dash_component_editor.py - # C extensions *.so diff --git a/src/dash_component_editor.py b/src/dash_component_editor.py new file mode 100644 index 0000000..181bae3 --- /dev/null +++ b/src/dash_component_editor.py @@ -0,0 +1,407 @@ +import re +from typing import Callable +# noinspection PyUnresolvedReferences +from inspect import signature, _empty + +from dash import html, dcc, dash_table, Input, Output, State, MATCH, ALL +import dash_bootstrap_components as dbc +import dash_daq as daq + +import base64 +#import PIL.Image +import io +#import plotly.express as px +# Procedural dash form generation + +""" +{'name', 'title', 'value', 'type', +""" + + +class SimpleItem(dbc.Col): + def __init__(self, + name, + base_id, + title=None, + param_key=None, + type='number', + debounce=True, + **kwargs): + + if param_key == None: + param_key = name + self.label = dbc.Label(title) + self.input = dbc.Input(type=type, + debounce=debounce, + id={**base_id, + 'name': name, + 'param_key': param_key}, + **kwargs) + + super(SimpleItem, self).__init__(children=[self.label, self.input]) + + +class FloatItem(SimpleItem): + pass + + +class IntItem(SimpleItem): + def __init__(self, *args, **kwargs): + if 'min' not in kwargs: + kwargs['min'] = -9007199254740991 + super(IntItem, self).__init__(*args, step=1, **kwargs) + + +class StrItem(SimpleItem): + def __init__(self, *args, **kwargs): + super(StrItem, self).__init__(*args, type='text', **kwargs) + + +class SliderItem(dbc.Col): + def __init__(self, + name, + base_id, + title=None, + param_key=None, + debounce=True, + visible=True, + **kwargs): + + if param_key == None: + param_key = name + self.label = dbc.Label(title) + self.input = dcc.Slider(id={**base_id, + 'name': name, + 'param_key': param_key, + 'layer': 'input'}, + tooltip={"placement": "bottom", "always_visible": True}, + **kwargs) + + style = {} + if not visible: + style['display'] = 'none' + + super(SliderItem, self).__init__(id={**base_id, + 'name': name, + 'param_key': param_key, + 'layer': 'form_group'}, + children=[self.label, self.input], + style=style) + + +class DropdownItem(dbc.Col): + def __init__(self, + name, + base_id, + title=None, + param_key=None, + debounce=True, + visible=True, + **kwargs): + + if param_key == None: + param_key = name + self.label = dbc.Label(title) + self.input = dcc.Dropdown(id={**base_id, + 'name': name, + 'param_key': param_key, + 'layer': 'input'}, + **kwargs) + + style = {} + if not visible: + style['display'] = 'none' + + super(DropdownItem, self).__init__(id={**base_id, + 'name': name, + 'param_key': param_key, + 'layer': 'form_group'}, + children=[self.label, self.input], + style=style) + + +class RadioItem(dbc.Col): + def __init__(self, + name, + base_id, + title=None, + param_key=None, + visible=True, + **kwargs): + + if param_key == None: + param_key = name + self.label = dbc.Label(title) + self.input = dbc.RadioItems(id={**base_id, + 'name': name, + 'param_key': param_key, + 'layer': 'input'}, + **kwargs) + + style = {} + if not visible: + style['display'] = 'none' + + super(RadioItem, self).__init__(id={**base_id, + 'name': name, + 'param_key': param_key, + 'layer': 'form_group'}, + children=[self.label, self.input], + style=style) + + +class BoolItem(dbc.Col): + def __init__(self, + name, + base_id, + title=None, + param_key=None, + visible=True, + **kwargs): + + if param_key == None: + param_key = name + self.label = dbc.Label(title) + self.input = daq.ToggleSwitch(id={**base_id, + 'name': name, + 'param_key': param_key, + 'layer': 'input'}, + **kwargs) + self.output_label = dbc.Label('False/True') + + style = {} + if not visible: + style['display'] = 'none' + + super(BoolItem, self).__init__(id={**base_id, + 'name': name, + 'param_key': param_key, + 'layer': 'form_group'}, + children=[self.label, self.input, self.output_label], + style=style) + + +class ImgItem(dbc.Col): + def __init__(self, + name, + src, + base_id, + title=None, + param_key=None, + width='100px', + visible=True, + **kwargs): + + if param_key == None: + param_key = name + + if not (width.endswith('px') or width.endswith('%')): + width = width + 'px' + + self.label = dbc.Label(title) + + encoded_image = base64.b64encode(open(src, 'rb').read()) + self.src = 'data:image/png;base64,{}'.format(encoded_image.decode()) + self.input_img = html.Img(id={**base_id, + 'name': name, + 'param_key': param_key, + 'layer': 'input'}, + src=self.src, + style={'height':'auto', 'width':width}, + **kwargs) + + style = {} + if not visible: + style['display'] = 'none' + + super(ImgItem, self).__init__(id={**base_id, + 'name': name, + 'param_key': param_key, + 'layer': 'form_group'}, + children=[self.label, self.input_img], + style=style) + + +# class GraphItem(dbc.Col): +# def __init__(self, +# name, +# base_id, +# title=None, +# param_key=None, +# visible=True, +# figure = None, +# **kwargs): +# +# self.name = name +# if param_key == None: +# param_key = name +# self.label = dbc.Label(title) +# self.input_graph = dcc.Graph(id={**base_id, +# 'name': name, +# 'param_key': param_key, +# 'layer': 'input'}, +# **kwargs) +# +# self.input_upload = dcc.Upload(id={**base_id, +# 'name': name+'_upload', +# 'param_key': param_key, +# 'layer': 'input'}, +# children=html.Div([ +# 'Drag and Drop or ', +# html.A('Select Files') +# ]), +# style={ +# 'width': '95%', +# 'height': '60px', +# 'lineHeight': '60px', +# 'borderWidth': '1px', +# 'borderStyle': 'dashed', +# 'borderRadius': '5px', +# 'textAlign': 'center', +# 'margin': '10px' +# }, +# multiple = False) +# +# style = {} +# if not visible: +# style['display'] = 'none' +# +# super(GraphItem, self).__init__(id={**base_id, +# 'name': name, +# 'param_key': param_key, +# 'layer': 'form_group'}, +# children=[self.label, self.input_upload, self.input_graph], +# style=style) +# +# # Issue: cannot get inputs from the callback decorator +# def return_upload(self, *args): +# print(f'before if, args {args}') +# if args: +# print(f'args {args}') +# img_bytes = base64.b64decode(contents.split(",")[1]) +# img = PIL.Image.open(io.BytesIO(img_bytes)) +# fig = px.imshow(img, binary_string=True) +# return fig +# +# def init_callbacks(self, app): +# app.callback(Output({**self.id, +# 'name': self.name, +# 'layer': 'input'}, 'figure', allow_duplicate=True), +# Input({**self.id, +# 'name': self.name+'_upload', +# 'layer': 'input'}, +# 'contents'), +# State({**self.id, +# 'name': self.name+'_upload', +# 'layer': 'input'}, 'last_modified'), +# State({**self.id, +# 'name': self.name+'_upload', +# 'layer': 'input'}, 'filename'), +# prevent_initial_call=True)(self.return_upload()) + + + +class ParameterEditor(dbc.Form): + + type_map = {float: FloatItem, + int: IntItem, + str: StrItem, + } + + def __init__(self, _id, parameters, **kwargs): + self._parameters = parameters + + super(ParameterEditor, self).__init__(id=_id, children=[], className='kwarg-editor', **kwargs) + self.children = self.build_children() + + def init_callbacks(self, app): + app.callback(Output(self.id, 'n_submit'), + Input({**self.id, + 'name': ALL}, + 'value'), + State(self.id, 'n_submit'), + ) + + for child in self.children: + if hasattr(child,"init_callbacks"): + child.init_callbacks(app) + + + @property + def values(self): + return {param['name']: param.get('value', None) for param in self._parameters} + + @property + def parameters(self): + return {param['name']: param for param in self._parameters} + + def _determine_type(self, parameter_dict): + if 'type' in parameter_dict: + if parameter_dict['type'] in self.type_map: + return parameter_dict['type'] + elif parameter_dict['type'].__name__ in self.type_map: + return parameter_dict['type'].__name__ + elif type(parameter_dict['value']) in self.type_map: + return type(parameter_dict['value']) + raise TypeError(f'No item type could be determined for this parameter: {parameter_dict}') + + def build_children(self, values=None): + children = [] + for parameter_dict in self._parameters: + parameter_dict = parameter_dict.copy() + if values and parameter_dict['name'] in values: + parameter_dict['value'] = values[parameter_dict['name']] + type = self._determine_type(parameter_dict) + parameter_dict.pop('type', None) + item = self.type_map[type](**parameter_dict, base_id=self.id) + children.append(item) + + return children + + +class JSONParameterEditor(ParameterEditor): + type_map = {'float': FloatItem, + 'int': IntItem, + 'str': StrItem, + 'slider': SliderItem, + 'dropdown': DropdownItem, + 'radio': RadioItem, + 'bool': BoolItem, + 'img': ImgItem, + #'graph': GraphItem, + } + + def __init__(self, _id, json_blob, **kwargs): + super(ParameterEditor, self).__init__(id=_id, children=[], className='kwarg-editor', **kwargs) + self._json_blob = json_blob + self.children = self.build_children() + + def build_children(self, values=None): + children = [] + for json_record in self._json_blob: + ... + # build a parameter dict from self.json_blob + ... + type = json_record.get('type', self._determine_type(json_record)) + json_record = json_record.copy() + if values and json_record['name'] in values: + json_record['value'] = values[json_record['name']] + json_record.pop('type', None) + item = self.type_map[type](**json_record, base_id=self.id) + children.append(item) + + return children + + +class KwargsEditor(ParameterEditor): + def __init__(self, instance_index, func: Callable, **kwargs): + self.func = func + self._instance_index = instance_index + + parameters = [{'name': name, 'value': param.default} for name, param in signature(func).parameters.items() + if param.default is not _empty] + + super(KwargsEditor, self).__init__(dict(index=instance_index, type='kwargs-editor'), parameters=parameters, **kwargs) + + def new_record(self): + return {name: p.default for name, p in signature(self.func).parameters.items() if p.default is not _empty} From ec15317257ceb141d3296f2042a6e7ad34a22080 Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Mon, 5 Feb 2024 14:29:40 -0800 Subject: [PATCH 02/62] Correct typo - first commit to fix the filepath issue --- src/frontend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/frontend.py b/src/frontend.py index 5018236..59feb61 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -576,7 +576,7 @@ def update_statistics(selected_data, clusters, assigned_labels, label_names): def toggle_modal(n_submit, n_apply, is_open, input_data): ''' - This callback pop up a winder to remind user to follow this flow: + This callback pop up a window to remind user to follow this flow: select dataset -> Submit dimension reduction job -> Apply clustering Args: n_submit (int): Number of clicks on the 'Submit' button. From 19c7bf12306165793a33c392da1b6b4cc2c6789a Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Wed, 7 Feb 2024 21:19:31 -0800 Subject: [PATCH 03/62] Remove input_data dependence on the submit_dimension_reduction_job --- src/app_layout.py | 2 ++ src/frontend.py | 12 ++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/app_layout.py b/src/app_layout.py index 5664c1b..60ce790 100644 --- a/src/app_layout.py +++ b/src/app_layout.py @@ -279,6 +279,8 @@ dcc.Store(id='experiment-id', data=None), # data_label_schema, latent vectors, clusters dcc.Store(id='input_data', data=None), + # to store the example dataset + dcc.Store(id='example_data', data=None), dcc.Store(id='input_labels', data=None), dcc.Store(id='label_schema', data=None), dcc.Store(id='model_id', data=None), diff --git a/src/frontend.py b/src/frontend.py index 59feb61..b034705 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -82,12 +82,12 @@ def show_clustering_gui_layouts(selected_algo): return item_list @app.callback( - Output('input_data', 'data'), + Output('input_data', 'data'), #Output('example_data', 'data'), Output('input_labels', 'data'), Output('label_schema', 'data'), Output('label-dropdown', 'options'), Output('user-upload-data-dir', 'data'), - Input('dataset-selection', 'value'), # Example dataset + Input('dataset-selection', 'value'), Input({'base_id': 'file-manager', 'name': 'docker-file-paths'},'data'), # FM Input('feature-vector-model-list', 'value'), # data clinic ) @@ -180,7 +180,6 @@ def job_content_dict(content): State('dataset-selection', 'value'), State('user-upload-data-dir', 'data'), State('feature-vector-model-list', 'value'), - State('input_data', 'data'), State('model_id', 'data'), State('algo-dropdown', 'value'), State('additional-model-params', 'children'), @@ -188,8 +187,7 @@ def job_content_dict(content): prevent_initial_call=True ) def submit_dimension_reduction_job(submit_n_clicks, - selected_dataset, user_upload_data_dir, data_clinic_file_path, - input_data, model_id, selected_algo, children): + selected_dataset, user_upload_data_dir, data_clinic_file_path, model_id, selected_algo, children): """ This callback is triggered every time the Submit button is hit: - compute latent vectors, which will be saved in data/output/experiment_id @@ -210,7 +208,9 @@ def submit_dimension_reduction_job(submit_n_clicks, heatmap: empty heatmap figure interval: set interval component to trigger to find the latent_vectors.npy file (-1) """ - if not submit_n_clicks or not input_data: + if not submit_n_clicks: + raise PreventUpdate + if not selected_dataset and not user_upload_data_dir and not data_clinic_file_path: raise PreventUpdate input_params = {} From 23b86bee90cf6a18e58d6c7b704ff249e3d3aa74 Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Wed, 7 Feb 2024 21:46:01 -0800 Subject: [PATCH 04/62] Remove dependency on input_data of toggle_modal function --- src/frontend.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/frontend.py b/src/frontend.py index b034705..3c7e8e5 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -570,11 +570,13 @@ def update_statistics(selected_data, clusters, assigned_labels, label_names): ], [ State("modal", "is_open"), - State('input_data', 'data'), + State('dataset-selection', 'value'), + State('user-upload-data-dir', 'data'), + State('feature-vector-model-list', 'value'), ] ) def toggle_modal(n_submit, n_apply, - is_open, input_data): + is_open, selected_dataset, user_upload_data_dir, data_clinic_file_path): ''' This callback pop up a window to remind user to follow this flow: select dataset -> Submit dimension reduction job -> Apply clustering @@ -587,10 +589,13 @@ def toggle_modal(n_submit, n_apply, is_open (bool): New state of the modal window. modal_body_text (str): Text to be displayed in the modal body. ''' + at_least_one_dataset_selected = False + if selected_dataset or user_upload_data_dir or data_clinic_file_path: + at_least_one_dataset_selected = True - if n_submit and input_data is None: + if n_submit and not at_least_one_dataset_selected: return True, "Please select an example dataset or upload your own zipped dataset." - elif n_apply and input_data is None: + elif n_apply and not at_least_one_dataset_selected: return True, "Please select an example dataset or upload your own zipped dataset." elif n_apply and n_submit is None: return True, "Please select a dimension reduction algorithm and click 'Submit' button before clustering." From d1803414614302d3791d0b0a7f2bce7762c53ec4 Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Wed, 7 Feb 2024 22:06:40 -0800 Subject: [PATCH 05/62] Rename selected_dataset to selected_example_dataset, and keep select ed_dataset to determine which 3 choice --- src/frontend.py | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/frontend.py b/src/frontend.py index 3c7e8e5..b9d9b26 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -87,15 +87,15 @@ def show_clustering_gui_layouts(selected_algo): Output('label_schema', 'data'), Output('label-dropdown', 'options'), Output('user-upload-data-dir', 'data'), - Input('dataset-selection', 'value'), - Input({'base_id': 'file-manager', 'name': 'docker-file-paths'},'data'), # FM - Input('feature-vector-model-list', 'value'), # data clinic + Input('example-dataset-selection', 'value'), # example dataset + Input({'base_id': 'file-manager', 'name': 'docker-file-paths'},'data'), # FM dataset + Input('feature-vector-model-list', 'value'), # data clinic dataset ) -def update_data_n_label_schema(selected_dataset, upload_file_paths, data_clinic_file_path): +def update_data_n_label_schema(selected_example_dataset, upload_file_paths, data_clinic_file_path): ''' This callback updates the selected dataset from the provided example datasets, as well as labels, and label schema Args: - dataset-selection: selected dataset from the provided example datasets, not the one that user uploaded + example-dataset-selection: selected dataset from the provided example datasets, not the one that user uploaded upload_file_pahts: Data project info, the user uploaded zip file using FileManager, list Returns: input_data: input image data, numpy.ndarray @@ -126,14 +126,14 @@ def update_data_n_label_schema(selected_dataset, upload_file_paths, data_clinic_ labels = np.full((data.shape[0],), -1) user_upload_data_dir = os.path.dirname(upload_file_paths[0]['uri']) # Example dataset option 1 - elif selected_dataset == "data/example_shapes/Demoshapes.npz": - data = np.load("/app/work/" + selected_dataset)['arr_0'] + elif selected_example_dataset == "data/example_shapes/Demoshapes.npz": + data = np.load("/app/work/" + selected_example_dataset)['arr_0'] labels = np.load("/app/work/data/example_shapes/DemoLabels.npy") f = open("/app/work/data/example_shapes/label_schema.json") label_schema = json.load(f) # Example dataset option 2 - elif selected_dataset == "data/example_latentrepresentation/f_vectors.parquet": - df = pd.read_parquet("/app/work/" + selected_dataset) + elif selected_example_dataset == "data/example_latentrepresentation/f_vectors.parquet": + df = pd.read_parquet("/app/work/" + selected_example_dataset) data = df.values labels = np.full((df.shape[0],), -1) # DataClinic options @@ -177,7 +177,7 @@ def job_content_dict(content): ], Input('run-algo', 'n_clicks'), [ - State('dataset-selection', 'value'), + State('example-dataset-selection', 'value'), State('user-upload-data-dir', 'data'), State('feature-vector-model-list', 'value'), State('model_id', 'data'), @@ -187,7 +187,7 @@ def job_content_dict(content): prevent_initial_call=True ) def submit_dimension_reduction_job(submit_n_clicks, - selected_dataset, user_upload_data_dir, data_clinic_file_path, model_id, selected_algo, children): + selected_example_dataset, user_upload_data_dir, data_clinic_file_path, model_id, selected_algo, children): """ This callback is triggered every time the Submit button is hit: - compute latent vectors, which will be saved in data/output/experiment_id @@ -195,7 +195,7 @@ def submit_dimension_reduction_job(submit_n_clicks, - reset heatmap to no image Args: submit_n_clicks: num of clicks for the submit button - selected_dataset: selected example dataset + selected_example_dataset: selected example dataset user_upload_data_dir: user uploaded dataset model_id: uid of selected dimension reduciton algo selected_algo: selected dimension reduction algo @@ -210,7 +210,7 @@ def submit_dimension_reduction_job(submit_n_clicks, """ if not submit_n_clicks: raise PreventUpdate - if not selected_dataset and not user_upload_data_dir and not data_clinic_file_path: + if not selected_example_dataset and not user_upload_data_dir and not data_clinic_file_path: raise PreventUpdate input_params = {} @@ -247,6 +247,8 @@ def submit_dimension_reduction_job(submit_n_clicks, selected_dataset = user_upload_data_dir elif data_clinic_file_path is not None: selected_dataset = data_clinic_file_path + else: + selected_dataset = selected_example_dataset # check which dimension reduction algo, then compose command if selected_algo == 'PCA': @@ -570,13 +572,13 @@ def update_statistics(selected_data, clusters, assigned_labels, label_names): ], [ State("modal", "is_open"), - State('dataset-selection', 'value'), + State('example-dataset-selection', 'value'), State('user-upload-data-dir', 'data'), State('feature-vector-model-list', 'value'), ] ) def toggle_modal(n_submit, n_apply, - is_open, selected_dataset, user_upload_data_dir, data_clinic_file_path): + is_open, selected_example_dataset, user_upload_data_dir, data_clinic_file_path): ''' This callback pop up a window to remind user to follow this flow: select dataset -> Submit dimension reduction job -> Apply clustering @@ -590,7 +592,7 @@ def toggle_modal(n_submit, n_apply, modal_body_text (str): Text to be displayed in the modal body. ''' at_least_one_dataset_selected = False - if selected_dataset or user_upload_data_dir or data_clinic_file_path: + if selected_example_dataset or user_upload_data_dir or data_clinic_file_path: at_least_one_dataset_selected = True if n_submit and not at_least_one_dataset_selected: From 080b0878a8687475f18e8e51a9a145e55c273ca1 Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Thu, 8 Feb 2024 09:23:08 -0800 Subject: [PATCH 06/62] Simplify the warining logic in toggle_modal --- src/app_layout.py | 5 ++--- src/frontend.py | 7 +++---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/app_layout.py b/src/app_layout.py index 60ce790..d59fa00 100644 --- a/src/app_layout.py +++ b/src/app_layout.py @@ -60,7 +60,7 @@ ), dbc.Label('Or try Example Dataset', className='mr-2'), dcc.Dropdown( - id='dataset-selection', + id='example-dataset-selection', options=DATA_OPTION, #value = DATA_OPTION[0]['value'], clearable=False, @@ -279,8 +279,7 @@ dcc.Store(id='experiment-id', data=None), # data_label_schema, latent vectors, clusters dcc.Store(id='input_data', data=None), - # to store the example dataset - dcc.Store(id='example_data', data=None), + dcc.Store(id='example_data', data=None), # to store the example dataset dcc.Store(id='input_labels', data=None), dcc.Store(id='label_schema', data=None), dcc.Store(id='model_id', data=None), diff --git a/src/frontend.py b/src/frontend.py index b9d9b26..2e6d896 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -595,10 +595,9 @@ def toggle_modal(n_submit, n_apply, if selected_example_dataset or user_upload_data_dir or data_clinic_file_path: at_least_one_dataset_selected = True - if n_submit and not at_least_one_dataset_selected: - return True, "Please select an example dataset or upload your own zipped dataset." - elif n_apply and not at_least_one_dataset_selected: - return True, "Please select an example dataset or upload your own zipped dataset." + if ((n_submit and not at_least_one_dataset_selected) or + (n_apply and not at_least_one_dataset_selected)): + return True, "Please select an example dataset or upload your own zipped dataset or choose DataClinic outpu." elif n_apply and n_submit is None: return True, "Please select a dimension reduction algorithm and click 'Submit' button before clustering." From b7b0dae18ef2619707dc8bec42a123b18db71a0e Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Thu, 8 Feb 2024 13:32:44 -0800 Subject: [PATCH 07/62] Remove dependency of input_data in update_heatmap function --- src/app_layout.py | 2 - src/frontend.py | 147 ++++++++++++++++++++++++++++++++-------------- 2 files changed, 104 insertions(+), 45 deletions(-) diff --git a/src/app_layout.py b/src/app_layout.py index d59fa00..08b3c36 100644 --- a/src/app_layout.py +++ b/src/app_layout.py @@ -278,8 +278,6 @@ dcc.Store(id='run-counter', data=0), dcc.Store(id='experiment-id', data=None), # data_label_schema, latent vectors, clusters - dcc.Store(id='input_data', data=None), - dcc.Store(id='example_data', data=None), # to store the example dataset dcc.Store(id='input_labels', data=None), dcc.Store(id='label_schema', data=None), dcc.Store(id='model_id', data=None), diff --git a/src/frontend.py b/src/frontend.py index 2e6d896..227c967 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -82,14 +82,17 @@ def show_clustering_gui_layouts(selected_algo): return item_list @app.callback( - Output('input_data', 'data'), #Output('example_data', 'data'), - Output('input_labels', 'data'), - Output('label_schema', 'data'), - Output('label-dropdown', 'options'), - Output('user-upload-data-dir', 'data'), - Input('example-dataset-selection', 'value'), # example dataset - Input({'base_id': 'file-manager', 'name': 'docker-file-paths'},'data'), # FM dataset - Input('feature-vector-model-list', 'value'), # data clinic dataset + [ + Output('input_labels', 'data'), + Output('label_schema', 'data'), + Output('label-dropdown', 'options'), + Output('user-upload-data-dir', 'data'), + ], + [ + Input('example-dataset-selection', 'value'), # example dataset + Input({'base_id': 'file-manager', 'name': 'docker-file-paths'},'data'), # FM dataset + Input('feature-vector-model-list', 'value'), # data clinic dataset + ] ) def update_data_n_label_schema(selected_example_dataset, upload_file_paths, data_clinic_file_path): ''' @@ -104,50 +107,45 @@ def update_data_n_label_schema(selected_example_dataset, upload_file_paths, data label_dropdown: label dropdown options user_upload_data_dir: dir name for the user uploaded zip file ''' - # FM + labels = None + label_schema = {} + + # check if user is using user uploaded zip file or example dataset or data clinic file + # priority level: FileManage > DataClinic > Example Datasets + + # FileManager - user uploaded zip file of images data_project = DataProject() data_project.init_from_dict(upload_file_paths) data_set = data_project.data # list of len 1920, each element is a local_dataset.LocalDataset object - - data = None - labels = None - label_schema = {} options = [] user_upload_data_dir = None - - # FM options if len(data_set) > 0: - data = [] - for i in range(len(data_set)): #if dataset too large, dash will exit with code 247, 137 - image, uri = data_project.data[i].read_data(export='pillow') - data.append(np.array(image)) - data = np.array(data) - print(data.shape) - labels = np.full((data.shape[0],), -1) + labels = np.full((len(data_set),), -1) user_upload_data_dir = os.path.dirname(upload_file_paths[0]['uri']) + # DataClinic options + elif data_clinic_file_path is not None: + df = pd.read_parquet(data_clinic_file_path) + # data = df.values + labels = np.full((df.shape[0],), -1) # Example dataset option 1 elif selected_example_dataset == "data/example_shapes/Demoshapes.npz": - data = np.load("/app/work/" + selected_example_dataset)['arr_0'] + # data = np.load("/app/work/" + selected_example_dataset)['arr_0'] labels = np.load("/app/work/data/example_shapes/DemoLabels.npy") f = open("/app/work/data/example_shapes/label_schema.json") label_schema = json.load(f) # Example dataset option 2 elif selected_example_dataset == "data/example_latentrepresentation/f_vectors.parquet": df = pd.read_parquet("/app/work/" + selected_example_dataset) - data = df.values - labels = np.full((df.shape[0],), -1) - # DataClinic options - elif data_clinic_file_path is not None: - df = pd.read_parquet(data_clinic_file_path) - data = df.values + # data = df.values labels = np.full((df.shape[0],), -1) + if label_schema: options = [{'label': f'Label {label}', 'value': label} for label in label_schema] options.insert(0, {'label': 'Unlabeled', 'value': -1}) options.insert(0, {'label': 'All', 'value': -2}) - return data, labels, label_schema, options, user_upload_data_dir + return labels, label_schema, options, user_upload_data_dir def job_content_dict(content): job_content = {# 'mlex_app': content['name'], @@ -177,9 +175,9 @@ def job_content_dict(content): ], Input('run-algo', 'n_clicks'), [ - State('example-dataset-selection', 'value'), - State('user-upload-data-dir', 'data'), - State('feature-vector-model-list', 'value'), + State('example-dataset-selection', 'value'), # 2 example dataset + State('user-upload-data-dir', 'data'), # FM + State('feature-vector-model-list', 'value'), # DataClinic State('model_id', 'data'), State('algo-dropdown', 'value'), State('additional-model-params', 'children'), @@ -187,7 +185,8 @@ def job_content_dict(content): prevent_initial_call=True ) def submit_dimension_reduction_job(submit_n_clicks, - selected_example_dataset, user_upload_data_dir, data_clinic_file_path, model_id, selected_algo, children): + selected_example_dataset, user_upload_data_dir, data_clinic_file_path, + model_id, selected_algo, children): """ This callback is triggered every time the Submit button is hit: - compute latent vectors, which will be saved in data/output/experiment_id @@ -242,7 +241,7 @@ def submit_dimension_reduction_job(submit_n_clicks, output_path = OUTPUT_DIR / experiment_id output_path.mkdir(parents=True, exist_ok=True) - # check if user is using user uploaded zip file or example dataset or data clinic file + # check if user is using user uploaded zip file or example dataset or data clinic file if user_upload_data_dir is not None: selected_dataset = user_upload_data_dir elif data_clinic_file_path is not None: @@ -266,7 +265,6 @@ def submit_dimension_reduction_job(submit_n_clicks, print("respnse: ", response) # job_response = get_job(user=None, mlex_app=job_content['mlex_app']) - return experiment_id, 'cluster', -1, -2, go.Figure(go.Heatmap()), -1 @app.callback( @@ -404,6 +402,7 @@ def update_scatter_plot(latent_vectors, selected_cluster, selected_label, scatte if latent_vectors is None or children is None: raise PreventUpdate latent_vectors = np.array(latent_vectors) + print("latent vector shape:", latent_vectors.shape) n_components = children['props']['children'][0]["props"]["children"][1]["props"]["value"] @@ -461,34 +460,96 @@ def update_scatter_plot(latent_vectors, selected_cluster, selected_label, scatte Input('scatter', 'selectedData'), Input('mean-std-toggle', 'value'), ], - State('input_data', 'data'), + [ + State('example-dataset-selection', 'value'), # example dataset + State({'base_id': 'file-manager', 'name': 'docker-file-paths'},'data'), # FM dataset + State('feature-vector-model-list', 'value'), # data clinic dataset + + ], prevent_initial_call=True + ) -def update_heatmap(click_data, selected_data, display_option, input_data): +def update_heatmap(click_data, selected_data, display_option, + selected_example_dataset, upload_file_paths, data_clinic_file_path): ''' This callback update the heatmap Args: click_data: clicked data on scatter figure selected_data: lasso or rect selected data points on scatter figure display_option: option to display mean or std - input_data: input image data Returns: fig: updated heatmap ''' - if input_data is None: + ################## + print("seleced_example_dataset:", selected_example_dataset) + + if not selected_example_dataset and not upload_file_paths and not data_clinic_file_path: raise PreventUpdate - images = np.array(input_data) + print(selected_data is not None) + + # user select a group of points if selected_data is not None and len(selected_data['points']) > 0: selected_indices = [point['customdata'][0] for point in selected_data['points']] # Access customdata for the original indices - selected_images = images[selected_indices] + print("selected_indices", selected_indices) + + ### FileManager + # print("upload_file_paths") # if not selected, its an empty list not None + selected_images = [] + data_project = DataProject() + data_project.init_from_dict(upload_file_paths) + data_set = data_project.data + if len(data_set) > 0: + for i in selected_indices: + image, uri = data_project.data[i].read_data(export='pillow') + selected_images.append(np.array(image)) + ### DataClinic + elif data_clinic_file_path is not None: + print("data_clinic_file_path") + df = pd.read_parquet(data_clinic_file_path) + selected_images = df.iloc[selected_indices].values + ### Example dataset + elif selected_example_dataset == "data/example_shapes/Demoshapes.npz": + print("Demoshapes.npz") + selected_images = np.load("/app/work/" + selected_example_dataset)['arr_0'][selected_indices] + print(selected_images.shape) + elif selected_example_dataset == "data/example_latentrepresentation/f_vectors.parquet": + print("f_vectors.parque") + df = pd.read_parquet("/app/work/" + selected_example_dataset) + selected_images = df.iloc[selected_indices].values + selected_images = np.array(selected_images) + + print("selected_images shape:", selected_images.shape) + + # display options if display_option == 'mean': heatmap_data = go.Heatmap(z=np.mean(selected_images, axis=0)) elif display_option == 'sigma': heatmap_data = go.Heatmap(z=np.std(selected_images, axis=0)) + elif click_data is not None and len(click_data['points']) > 0: selected_index = click_data['points'][0]['customdata'][0] # click_data['points'][0]['pointIndex'] - heatmap_data = go.Heatmap(z=images[selected_index]) + ### FileManager + clicked_image = [] + data_project = DataProject() + data_project.init_from_dict(upload_file_paths) + data_set = data_project.data + if len(data_set) > 0: + clicked_image = data_project.data[selected_index].read_data(export='pillow') + ### DataClinic + elif data_clinic_file_path is not None: + df = pd.read_parquet(data_clinic_file_path) + clicked_image = df.iloc[selected_index].values + ### Example dataset + elif selected_example_dataset == "data/example_shapes/Demoshapes.npz": + clicked_image = np.load("/app/work/" + selected_example_dataset)['arr_0'][selected_index] + elif selected_example_dataset == "data/example_latentrepresentation/f_vectors.parquet": + df = pd.read_parquet("/app/work/" + selected_example_dataset) + clicked_image = df.iloc[selected_index].values + clicked_image = np.array(clicked_image) + + heatmap_data = go.Heatmap(z=clicked_image) + else: heatmap_data = go.Heatmap() From 3251a13a14341b562af6e5405c9aab64e36e9d42 Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Thu, 8 Feb 2024 13:41:45 -0800 Subject: [PATCH 08/62] Show clicked single iamge works --- src/frontend.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/frontend.py b/src/frontend.py index 227c967..8a2ae8a 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -530,12 +530,11 @@ def update_heatmap(click_data, selected_data, display_option, elif click_data is not None and len(click_data['points']) > 0: selected_index = click_data['points'][0]['customdata'][0] # click_data['points'][0]['pointIndex'] ### FileManager - clicked_image = [] data_project = DataProject() data_project.init_from_dict(upload_file_paths) data_set = data_project.data if len(data_set) > 0: - clicked_image = data_project.data[selected_index].read_data(export='pillow') + clicked_image, uri = data_project.data[selected_index].read_data(export='pillow') ### DataClinic elif data_clinic_file_path is not None: df = pd.read_parquet(data_clinic_file_path) @@ -548,6 +547,7 @@ def update_heatmap(click_data, selected_data, display_option, clicked_image = df.iloc[selected_index].values clicked_image = np.array(clicked_image) + heatmap_data = go.Heatmap(z=clicked_image) else: @@ -600,7 +600,7 @@ def update_statistics(selected_data, clusters, assigned_labels, label_names): selected_indices = [point['customdata'][0] for point in selected_data['points']] # Access customdata for the original indices selected_clusters = [] - if clusters: + if clusters is not None: selected_clusters = clusters[selected_indices] selected_labels = assigned_labels[selected_indices] From 73d1b4736902738a982ecc6afc8e7cae91167e24 Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Thu, 8 Feb 2024 14:17:29 -0800 Subject: [PATCH 09/62] Correct the error before clustering was done, and select --- src/frontend.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/frontend.py b/src/frontend.py index 8a2ae8a..f207539 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -480,14 +480,9 @@ def update_heatmap(click_data, selected_data, display_option, Returns: fig: updated heatmap ''' - ################## - print("seleced_example_dataset:", selected_example_dataset) - if not selected_example_dataset and not upload_file_paths and not data_clinic_file_path: raise PreventUpdate - print(selected_data is not None) - # user select a group of points if selected_data is not None and len(selected_data['points']) > 0: selected_indices = [point['customdata'][0] for point in selected_data['points']] # Access customdata for the original indices @@ -500,12 +495,14 @@ def update_heatmap(click_data, selected_data, display_option, data_project.init_from_dict(upload_file_paths) data_set = data_project.data if len(data_set) > 0: + print("FM file") for i in selected_indices: image, uri = data_project.data[i].read_data(export='pillow') selected_images.append(np.array(image)) ### DataClinic elif data_clinic_file_path is not None: print("data_clinic_file_path") + print(data_clinic_file_path) df = pd.read_parquet(data_clinic_file_path) selected_images = df.iloc[selected_indices].values ### Example dataset @@ -546,7 +543,6 @@ def update_heatmap(click_data, selected_data, display_option, df = pd.read_parquet("/app/work/" + selected_example_dataset) clicked_image = df.iloc[selected_index].values clicked_image = np.array(clicked_image) - heatmap_data = go.Heatmap(z=clicked_image) @@ -592,8 +588,7 @@ def update_statistics(selected_data, clusters, assigned_labels, label_names): Returns: [num_images, clusters, labels]: statistics ''' - - clusters = np.array(clusters) + assigned_labels = np.array(assigned_labels) if selected_data is not None and len(selected_data['points']) > 0: @@ -601,6 +596,7 @@ def update_statistics(selected_data, clusters, assigned_labels, label_names): selected_data['points']] # Access customdata for the original indices selected_clusters = [] if clusters is not None: + clusters = np.array(clusters) selected_clusters = clusters[selected_indices] selected_labels = assigned_labels[selected_indices] From 0306abd48295b0fed9d42cee481cc41197f0923d Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Thu, 8 Feb 2024 14:41:56 -0800 Subject: [PATCH 10/62] Able to read images when using DataClinic generated parquet file --- docker/requirements.txt | 3 ++- src/frontend.py | 11 ++++++----- src/latentxp_utils.py | 36 +++++++++++++++++++++++++++++++++++- 3 files changed, 43 insertions(+), 7 deletions(-) diff --git a/docker/requirements.txt b/docker/requirements.txt index 13c2d7d..bc93d1e 100644 --- a/docker/requirements.txt +++ b/docker/requirements.txt @@ -11,4 +11,5 @@ requests==2.26.0 pyarrow==11.0.0 diskcache==5.6.3 pandas -numpy \ No newline at end of file +numpy +Pillow \ No newline at end of file diff --git a/src/frontend.py b/src/frontend.py index f207539..8b07789 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -15,7 +15,7 @@ from file_manager.data_project import DataProject from app_layout import app, DOCKER_DATA, UPLOAD_FOLDER_ROOT -from latentxp_utils import kmeans_kwargs, dbscan_kwargs, hdbscan_kwargs, hex_to_rgba, generate_scatter_data, remove_key_from_dict_list, get_content, get_trained_models_list +from latentxp_utils import kmeans_kwargs, dbscan_kwargs, hdbscan_kwargs, hex_to_rgba, generate_scatter_data, remove_key_from_dict_list, get_content, get_trained_models_list, load_images_by_indices from dash_component_editor import JSONParameterEditor @@ -503,8 +503,9 @@ def update_heatmap(click_data, selected_data, display_option, elif data_clinic_file_path is not None: print("data_clinic_file_path") print(data_clinic_file_path) - df = pd.read_parquet(data_clinic_file_path) - selected_images = df.iloc[selected_indices].values + directory_path = os.path.dirname(data_clinic_file_path) + selected_images = load_images_by_indices(directory_path, selected_indices) + ### Example dataset elif selected_example_dataset == "data/example_shapes/Demoshapes.npz": print("Demoshapes.npz") @@ -534,8 +535,8 @@ def update_heatmap(click_data, selected_data, display_option, clicked_image, uri = data_project.data[selected_index].read_data(export='pillow') ### DataClinic elif data_clinic_file_path is not None: - df = pd.read_parquet(data_clinic_file_path) - clicked_image = df.iloc[selected_index].values + directory_path = os.path.dirname(data_clinic_file_path) + selected_images = load_images_by_indices(directory_path, selected_indices) ### Example dataset elif selected_example_dataset == "data/example_shapes/Demoshapes.npz": clicked_image = np.load("/app/work/" + selected_example_dataset)['arr_0'][selected_index] diff --git a/src/latentxp_utils.py b/src/latentxp_utils.py index 977823b..090d8fd 100755 --- a/src/latentxp_utils.py +++ b/src/latentxp_utils.py @@ -4,6 +4,7 @@ from copy import deepcopy import requests import os +from PIL import Image kmeans_kwargs = {"gui_parameters": [{"type": "dropdown", "name": "ncluster-dropdown-menu", "title": "Number of clusters", "param_key": "n_clusters", "options": [{"label": i, "value": i} for i in range(1, 21)], @@ -353,4 +354,37 @@ def get_trained_models_list(user, app): trained_models.append({'label': app+': '+model['job_kwargs']['kwargs']['job_type'], 'value': out_path+filename}) trained_models.reverse() - return trained_models \ No newline at end of file + return trained_models + + +def load_images_from_directory(directory_path, indices): + image_data = [] + for filename in os.listdir(directory_path): + if filename.endswith(".png") or filename.endswith(".jpg"): + file_path = os.path.join(directory_path, filename) + try: + img = Image.open(file_path) + img_array = np.array(img) + image_data.append(img_array) + except Exception as e: + print(f"Error processing {file_path}: {e}") + + image_data = np.array(image_data) + return image_data + +def load_images_by_indices(directory_path, indices): + image_data = [] + filenames = [filename for filename in sorted(os.listdir(directory_path)) if filename.lower().endswith(('.png', '.jpg'))] + for index in indices: + if index in range(len(filenames)): + filename = filenames[index] + file_path = os.path.join(directory_path, filename) + try: + img = Image.open(file_path) + img_array = np.array(img) + image_data.append(img_array) + except Exception as e: + print(f"Error processing {file_path}: {e}") + + image_data = np.array(image_data) + return image_data From 4e1b1750b256bfbef3927256cbb8faa48a4cc284 Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Wed, 21 Feb 2024 09:58:15 -0800 Subject: [PATCH 11/62] Try to fix the data-click clickdata no show issue - does not report error- but still no show --- src/frontend.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/frontend.py b/src/frontend.py index 8b07789..823be33 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -536,7 +536,7 @@ def update_heatmap(click_data, selected_data, display_option, ### DataClinic elif data_clinic_file_path is not None: directory_path = os.path.dirname(data_clinic_file_path) - selected_images = load_images_by_indices(directory_path, selected_indices) + clicked_image = load_images_by_indices(directory_path, [selected_index]) ### Example dataset elif selected_example_dataset == "data/example_shapes/Demoshapes.npz": clicked_image = np.load("/app/work/" + selected_example_dataset)['arr_0'][selected_index] @@ -559,7 +559,9 @@ def update_heatmap(click_data, selected_data, display_option, aspect_y = 1 if heatmap_data['z'] is not None: if heatmap_data['z'].size > 0: - aspect_y, aspect_x = np.shape(heatmap_data['z']) + print("aaa") + print(np.shape(heatmap_data['z'])) + aspect_y, aspect_x = np.shape(heatmap_data['z'])[-2:] return go.Figure( data=heatmap_data, From e802e08b87c41cb9dbfa47ab881d61a326cbac83 Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Wed, 21 Feb 2024 14:05:06 -0800 Subject: [PATCH 12/62] Rename job contetn, mlex_app name --- src/frontend.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/frontend.py b/src/frontend.py index 823be33..b89bb7f 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -149,7 +149,7 @@ def update_data_n_label_schema(selected_example_dataset, upload_file_paths, data def job_content_dict(content): job_content = {# 'mlex_app': content['name'], - 'mlex_app': 'dimension reduction demo', + 'mlex_app': 'latent_space_explorer', 'service_type': content['service_type'], 'working_directory': DATA_DIR, 'job_kwargs': {'uri': content['uri'], @@ -559,7 +559,6 @@ def update_heatmap(click_data, selected_data, display_option, aspect_y = 1 if heatmap_data['z'] is not None: if heatmap_data['z'].size > 0: - print("aaa") print(np.shape(heatmap_data['z'])) aspect_y, aspect_x = np.shape(heatmap_data['z'])[-2:] From 0a19fc67d953c36b7fe0a9bc33eeaa50290980a9 Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Wed, 21 Feb 2024 14:11:16 -0800 Subject: [PATCH 13/62] Change output file path --- src/frontend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/frontend.py b/src/frontend.py index b89bb7f..e03c028 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -21,7 +21,7 @@ #### GLOBAL PARAMS #### DATA_DIR = str(os.environ['DATA_DIR']) -OUTPUT_DIR = pathlib.Path('data/output') +OUTPUT_DIR = pathlib.Path('data/mlexchange_store/admin') USER = 'admin' #'mlexchange-team' # move to env file UPLOAD_FOLDER_ROOT = "data/upload" From 612887815297b5c9ffe16027ad07400fc8aa8554 Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Wed, 21 Feb 2024 14:12:03 -0800 Subject: [PATCH 14/62] Update output path) --- src/frontend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/frontend.py b/src/frontend.py index e03c028..7c33462 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -21,8 +21,8 @@ #### GLOBAL PARAMS #### DATA_DIR = str(os.environ['DATA_DIR']) -OUTPUT_DIR = pathlib.Path('data/mlexchange_store/admin') USER = 'admin' #'mlexchange-team' # move to env file +OUTPUT_DIR = pathlib.Path('data/mlexchange_store/' + USER) UPLOAD_FOLDER_ROOT = "data/upload" @app.callback( From c039dff14053782e7025ac823afebd0a8e048e44 Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Wed, 13 Mar 2024 13:39:05 -0700 Subject: [PATCH 15/62] Add pre-commit-config file --- .pre-commit-config.yaml | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..83d178f --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,34 @@ +default_language_version: + python: python3 +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-ast + - id: check-case-conflict + - id: check-merge-conflict + - id: check-symlinks + - id: check-yaml + - id: debug-statements + - repo: https://github.com/gitguardian/ggshield + rev: v1.25.0 + hooks: + - id: ggshield + language_version: python3 + stages: [commit] + # Using this mirror lets us use mypyc-compiled black, which is about 2x faster + - repo: https://github.com/psf/black-pre-commit-mirror + rev: 24.2.0 + hooks: + - id: black + - repo: https://github.com/pycqa/flake8 + rev: 7.0.0 + hooks: + - id: flake8 + - repo: https://github.com/pycqa/isort + rev: 5.13.2 + hooks: + - id: isort + args: ["--profile", "black"] \ No newline at end of file From 7b9ce84182145185d0c329bd9a12bf720b0c24c4 Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Wed, 13 Mar 2024 13:49:48 -0700 Subject: [PATCH 16/62] Include .env file --- .env | 26 ++++++++++++++++++++++++++ .gitignore | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 .env diff --git a/.env b/.env new file mode 100644 index 0000000..6a3b729 --- /dev/null +++ b/.env @@ -0,0 +1,26 @@ +USER = admin + +TILED_SINGLE_USER_API_KEY= + +PREFECT_DB_PW=unique_password +PREFECT_DB_USER=prefect_user +PREFECT_DB_NAME=prefect +PREFECT_DB_SERVER=prefect_db + +TILED_DB_PW= +TILED_DB_USER=tiled_user +TILED_DB_NAME=tiled +TILED_DB_SERVER=tiled_db + +TILED_SINGLE_USER_API_KEY= + + +MLEX_SEGM_USER=mlex_segm_user +MLEX_SEGM_PW= + +TILED_API_KEY= + +TILED_INGEST_TILED_CONFIG_PATH=/deploy/config +TILED_INGEST_RMQ_HOST=rabbitmq +TILED_INGEST_RMQ_USER=guest +TILED_INGEST_RMQ_PW=guest \ No newline at end of file diff --git a/.gitignore b/.gitignore index 57516b2..6968502 100644 --- a/.gitignore +++ b/.gitignore @@ -128,7 +128,7 @@ celerybeat.pid *.sage.py # Environments -.env +# .env .venv env/ venv/ From cb68701aacc0f40736400d71c1a4ff05682679de Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Wed, 13 Mar 2024 13:50:31 -0700 Subject: [PATCH 17/62] Update docker-compose to replace compute api with prefect, does not work --- docker-compose.yml | 40 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 383494d..e62e4a6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,6 +1,34 @@ version: "3" services: + + prefect: + image: prefecthq/prefect:2.14-python3.11 + command: prefect server start + environment: + - PREFECT_SERVER_API_HOST=0.0.0.0 + - PREFECT_API_DATABASE_CONNECTION_URL=postgresql+asyncpg://${PREFECT_DB_USER}:${PREFECT_DB_PW}@prefect_db:5432/${PREFECT_DB_NAME} # Needed if using postgres and not sqlite + # - PREFECT_UI_API_URL=https://localhost/api. needed if nginx is handling ssl termination + - PREFECT_LOGGING_LEVEL=DEBUG + ports: + - 4200:4200 + depends_on: + - prefect_db + networks: + mle_net: + + prefect_db: + image: postgres:14.5-alpine + environment: + - POSTGRES_USER=${PREFECT_DB_USER} + - POSTGRES_PASSWORD=${PREFECT_DB_PW} + - POSTGRES_DB=${PREFECT_DB_NAME} + volumes: + - ./data/prefect_db:/var/lib/postgresql/data:rw + restart: unless-stopped + networks: + mle_net: + front-end: restart: "unless-stopped" container_name: "latentxp" @@ -16,11 +44,17 @@ services: - ./src:/app/work/src ports: - "8070:8070" + # networks: + # - computing_api_default networks: - - computing_api_default + mle_net: + +# networks: +# computing_api_default: +# external: true networks: - computing_api_default: - external: true + mle_net: + driver: bridge # env file: set up pwd \ No newline at end of file From b1ba1f4753a8a7415244d0fe42bdd751e818cf28 Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Fri, 15 Mar 2024 13:21:20 -0700 Subject: [PATCH 18/62] Remove prefect --- docker-compose.yml | 39 +++------------------------------------ 1 file changed, 3 insertions(+), 36 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index e62e4a6..84e1ee3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,33 +2,6 @@ version: "3" services: - prefect: - image: prefecthq/prefect:2.14-python3.11 - command: prefect server start - environment: - - PREFECT_SERVER_API_HOST=0.0.0.0 - - PREFECT_API_DATABASE_CONNECTION_URL=postgresql+asyncpg://${PREFECT_DB_USER}:${PREFECT_DB_PW}@prefect_db:5432/${PREFECT_DB_NAME} # Needed if using postgres and not sqlite - # - PREFECT_UI_API_URL=https://localhost/api. needed if nginx is handling ssl termination - - PREFECT_LOGGING_LEVEL=DEBUG - ports: - - 4200:4200 - depends_on: - - prefect_db - networks: - mle_net: - - prefect_db: - image: postgres:14.5-alpine - environment: - - POSTGRES_USER=${PREFECT_DB_USER} - - POSTGRES_PASSWORD=${PREFECT_DB_PW} - - POSTGRES_DB=${PREFECT_DB_NAME} - volumes: - - ./data/prefect_db:/var/lib/postgresql/data:rw - restart: unless-stopped - networks: - mle_net: - front-end: restart: "unless-stopped" container_name: "latentxp" @@ -44,17 +17,11 @@ services: - ./src:/app/work/src ports: - "8070:8070" - # networks: - # - computing_api_default networks: - mle_net: - -# networks: -# computing_api_default: -# external: true + mlex_mle_net: networks: - mle_net: - driver: bridge + mlex_mle_net: + external: true # env file: set up pwd \ No newline at end of file From 58cab54d8fdb5173799c92c17c80ed6b3e660049 Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Fri, 15 Mar 2024 13:38:54 -0700 Subject: [PATCH 19/62] add prefect: --- docker/requirements.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docker/requirements.txt b/docker/requirements.txt index bc93d1e..3861b96 100644 --- a/docker/requirements.txt +++ b/docker/requirements.txt @@ -12,4 +12,6 @@ pyarrow==11.0.0 diskcache==5.6.3 pandas numpy -Pillow \ No newline at end of file +Pillow +# prefect +prefect-client==2.14.21 \ No newline at end of file From bbb35c1ab785bda57a56199589c51b53f9e5d945 Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Fri, 15 Mar 2024 13:39:15 -0700 Subject: [PATCH 20/62] Add prefect components --- src/frontend.py | 7 +++++++ src/utils_prefect.py | 0 2 files changed, 7 insertions(+) create mode 100644 src/utils_prefect.py diff --git a/src/frontend.py b/src/frontend.py index 7c33462..09a304d 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -18,6 +18,13 @@ from latentxp_utils import kmeans_kwargs, dbscan_kwargs, hdbscan_kwargs, hex_to_rgba, generate_scatter_data, remove_key_from_dict_list, get_content, get_trained_models_list, load_images_by_indices from dash_component_editor import JSONParameterEditor +from utils_prefect import ( + get_children_flow_run_ids, + get_flow_run_name, + get_flow_runs_by_name, + schedule_prefect_flow, +) + #### GLOBAL PARAMS #### DATA_DIR = str(os.environ['DATA_DIR']) diff --git a/src/utils_prefect.py b/src/utils_prefect.py new file mode 100644 index 0000000..e69de29 From da3210031294d2aa21bc2a5bb4120c4d3b72c645 Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Fri, 15 Mar 2024 13:43:00 -0700 Subject: [PATCH 21/62] Prefect update, not done yet; --- src/frontend.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/frontend.py b/src/frontend.py index 09a304d..5316a20 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -231,7 +231,16 @@ def submit_dimension_reduction_job(submit_n_clicks, job_content = job_content_dict(model_content) job_content['job_kwargs']['kwargs'] = {} job_content['job_kwargs']['kwargs']['parameters'] = input_params - #TODO: other kwargs + + + # prefect + job_uid = schedule_prefect_flow( + FLOW__NAME, + parameters=TRAIN_PARAMS_EXAMPLE, + flow_run_name=f"{job_name} {current_time}", + tags=PREFECT_TAGS + ["train", project_name], + ) + job_message = f"Job has been succesfully submitted with uid: {job_uid} and mask uri: {mask_uri}" compute_dict = {'user_uid': USER, 'host_list': ['mlsandbox.als.lbl.gov', 'local.als.lbl.gov', 'vaughan.als.lbl.gov'], From ffc570920b89bda1286bd98bc9d60d67dc693ebb Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Tue, 19 Mar 2024 15:10:30 -0700 Subject: [PATCH 22/62] update .env file --- .env | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.env b/.env index 6a3b729..4ea6411 100644 --- a/.env +++ b/.env @@ -6,6 +6,11 @@ PREFECT_DB_PW=unique_password PREFECT_DB_USER=prefect_user PREFECT_DB_NAME=prefect PREFECT_DB_SERVER=prefect_db +PREFECT_API_URL=http://prefect:4200/api +FLOW_NAME="Parent flow/launch_parent_flow" +TIMEZONE="US/Pacific" + +PREFECT_TAGS='["latent-space-explorer"]' TILED_DB_PW= TILED_DB_USER=tiled_user From 0aec2b5d65cb55e0b89c6edd5155f2733bc4355c Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Tue, 19 Mar 2024 15:10:59 -0700 Subject: [PATCH 23/62] Update to use prefect --- docker-compose.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index 84e1ee3..4210039 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,7 +11,11 @@ services: mem_limit: 2g environment: DATA_DIR: "${PWD}/data/" - # USER: "$USER" + PREFECT_TAGS: "${PREFECT_TAGS}" + PREFECT_API_URL: '${PREFECT_API_URL}' + FLOW_NAME: '${FLOW_NAME}' + TIMEZONE: "${TIMEZONE}" + USER: "${USER}" volumes: - ./data:/app/work/data - ./src:/app/work/src From 5e15f92b2ab993e23d4389ee172ddb3023c95679 Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Tue, 19 Mar 2024 15:11:30 -0700 Subject: [PATCH 24/62] Prefect related utilies --- src/latentxp_utils.py | 2 + src/utils_prefect.py | 105 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+) diff --git a/src/latentxp_utils.py b/src/latentxp_utils.py index 090d8fd..76be66f 100755 --- a/src/latentxp_utils.py +++ b/src/latentxp_utils.py @@ -388,3 +388,5 @@ def load_images_by_indices(directory_path, indices): image_data = np.array(image_data) return image_data + + diff --git a/src/utils_prefect.py b/src/utils_prefect.py index e69de29..2b6612a 100644 --- a/src/utils_prefect.py +++ b/src/utils_prefect.py @@ -0,0 +1,105 @@ +import asyncio +from typing import Optional + +from prefect import get_client +from prefect.client.schemas.filters import ( + FlowRunFilter, + FlowRunFilterName, + FlowRunFilterParentFlowRunId, + FlowRunFilterTags, +) + + +async def _schedule( + deployment_name: str, + flow_run_name: str, + parameters: Optional[dict] = None, + tags: Optional[list] = [], +): + async with get_client() as client: + deployment = await client.read_deployment_by_name(deployment_name) + assert ( + deployment + ), f"No deployment found in config for deployment_name {deployment_name}" + flow_run = await client.create_flow_run_from_deployment( + deployment.id, + parameters=parameters, + name=flow_run_name, + tags=tags, + ) + return flow_run.id + + +def schedule_prefect_flow( + deployment_name: str, + parameters: Optional[dict] = None, + flow_run_name: Optional[str] = None, + tags: Optional[list] = [], +): + if not flow_run_name: + model_name = parameters["model_name"] + flow_run_name = f"{deployment_name}: {model_name}" + flow_run_id = asyncio.run( + _schedule(deployment_name, flow_run_name, parameters, tags) + ) + return flow_run_id + + +async def _get_name(flow_run_id): + async with get_client() as client: + flow_run = await client.read_flow_run(flow_run_id) + if flow_run.state.is_final(): + if flow_run.state.is_completed(): + return flow_run.name + return None + + +def get_flow_run_name(flow_run_id): + """Retrieves the name of the flow with the given id.""" + return asyncio.run(_get_name(flow_run_id)) + + +async def _flow_run_query( + tags=None, flow_run_name=None, parent_flow_run_id=None, sort="START_TIME_DESC" +): + flow_run_filter_parent_flow_run_id = ( + FlowRunFilterParentFlowRunId(any_=[parent_flow_run_id]) + if parent_flow_run_id + else None + ) + async with get_client() as client: + flow_runs = await client.read_flow_runs( + flow_run_filter=FlowRunFilter( + name=FlowRunFilterName(like_=flow_run_name), + parent_flow_run_id=flow_run_filter_parent_flow_run_id, + tags=FlowRunFilterTags(all_=tags), + ), + sort=sort, + ) + return flow_runs + + +def get_flow_runs_by_name(flow_run_name=None, tags=None): + flow_runs_by_name = [] + flow_runs = asyncio.run(_flow_run_query(tags, flow_run_name=flow_run_name)) + for flow_run in flow_runs: + if flow_run.state_name in {"Failed", "Crashed"}: + flow_name = f"❌ {flow_run.name}" + elif flow_run.state_name == "Completed": + flow_name = f"✅ {flow_run.name}" + elif flow_run.state_name == "Cancelled": + flow_name = f"🚫 {flow_run.name}" + else: + flow_name = f"🕑 {flow_run.name}" + flow_runs_by_name.append({"label": flow_name, "value": str(flow_run.id)}) + return flow_runs_by_name + + +def get_children_flow_run_ids(parent_flow_run_id, sort="START_TIME_ASC"): + children_flow_runs = asyncio.run( + _flow_run_query(parent_flow_run_id=parent_flow_run_id, sort=sort) + ) + children_flow_run_ids = [ + str(children_flow_run.id) for children_flow_run in children_flow_runs + ] + return children_flow_run_ids \ No newline at end of file From a8098f74c8be92f159f138f578f4e3cb9bbf6f3b Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Tue, 19 Mar 2024 15:11:53 -0700 Subject: [PATCH 25/62] Add a component to ask user input a job name --- src/app_layout.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/app_layout.py b/src/app_layout.py index 08b3c36..c43c17b 100644 --- a/src/app_layout.py +++ b/src/app_layout.py @@ -104,6 +104,17 @@ html.Hr(), html.Div( [ + dbc.Label('Name your job', className='mr-2'), + dcc.Input( + id="job_name", + placeholder="test0", + style={'width':'100%'} + ), + ] + ), + html.Hr(), + html.Div( + [ dbc.Button( "Submit", color="secondary", From a2743da67500a8801993d2aae4187e4e931eeead Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Tue, 19 Mar 2024 15:27:10 -0700 Subject: [PATCH 26/62] get job name from user input: --- src/app_layout.py | 2 +- src/frontend.py | 148 ++++++++++++++++++++++++++++++++-------------- 2 files changed, 105 insertions(+), 45 deletions(-) diff --git a/src/app_layout.py b/src/app_layout.py index c43c17b..2aac91e 100644 --- a/src/app_layout.py +++ b/src/app_layout.py @@ -106,7 +106,7 @@ [ dbc.Label('Name your job', className='mr-2'), dcc.Input( - id="job_name", + id="job-name", placeholder="test0", style={'width':'100%'} ), diff --git a/src/frontend.py b/src/frontend.py index 5316a20..f928624 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -11,6 +11,9 @@ import requests import os import requests +import pytz +from datetime import datetime + from file_manager.data_project import DataProject @@ -31,6 +34,52 @@ USER = 'admin' #'mlexchange-team' # move to env file OUTPUT_DIR = pathlib.Path('data/mlexchange_store/' + USER) UPLOAD_FOLDER_ROOT = "data/upload" +PREFECT_TAGS = json.loads(os.getenv("PREFECT_TAGS", '["latent-space-explorer"]')) +TIMEZONE = os.getenv("TIMEZONE", "US/Pacific") +FLOW_NAME = os.getenv("FLOW_NAME", "") + + +# TODO: Get model parameters from UI +TRAIN_PARAMS_EXAMPLE = { + "flow_type": "podman", + "params_list": [ + { + "image_name": "ghcr.io/mlexchange/mlex_latent_explorer", + "image_tag": "main", + "command": 'python -c \\"import time; time.sleep(30)\\"', + "params": { + "io_parameters": {"uid_save": "uid0001", "uid_retrieve": "uid0001"} + }, + "volumes": [f"{DATA_DIR}:/app/work/data"], + }, + { + "image_name": "ghcr.io/mlexchange/mlex_latent_explorer", + "image_tag": "main", + "command": 'python -c \\"import time; time.sleep(10)\\"', + "params": { + "io_parameters": {"uid_save": "uid0001", "uid_retrieve": "uid0001"} + }, + "volumes": [f"{DATA_DIR}:/app/work/data"], + }, + ], +} + +INFERENCE_PARAMS_EXAMPLE = { + "flow_type": "podman", + "params_list": [ + { + "image_name": "ghcr.io/mlexchange/mlex_latent_explorer", + "image_tag": "main", + "command": 'python -c \\"import time; time.sleep(30)\\"', + "params": { + "io_parameters": {"uid_save": "uid0001", "uid_retrieve": "uid0001"} + }, + "volumes": [f"{DATA_DIR}:/app/work/data"], + }, + ], +} + + @app.callback( Output('additional-model-params', 'children'), @@ -182,6 +231,7 @@ def job_content_dict(content): ], Input('run-algo', 'n_clicks'), [ + State('job-name', 'value'), # job_name State('example-dataset-selection', 'value'), # 2 example dataset State('user-upload-data-dir', 'data'), # FM State('feature-vector-model-list', 'value'), # DataClinic @@ -192,6 +242,7 @@ def job_content_dict(content): prevent_initial_call=True ) def submit_dimension_reduction_job(submit_n_clicks, + job_name, selected_example_dataset, user_upload_data_dir, data_clinic_file_path, model_id, selected_algo, children): """ @@ -226,44 +277,53 @@ def submit_dimension_reduction_job(submit_n_clicks, value = child["props"]["children"][1]["props"]["value"] input_params[key] = value print("Dimension reduction algo params: ", input_params) + model_content = get_content(model_id) print(model_content) job_content = job_content_dict(model_content) job_content['job_kwargs']['kwargs'] = {} job_content['job_kwargs']['kwargs']['parameters'] = input_params + # check if user is using user uploaded zip file or example dataset or data clinic file + if user_upload_data_dir is not None: + selected_dataset = user_upload_data_dir + elif data_clinic_file_path is not None: + selected_dataset = data_clinic_file_path + else: + selected_dataset = selected_example_dataset + print(selected_dataset) # prefect + current_time = datetime.now(pytz.timezone(TIMEZONE)).strftime("%Y/%m/%d %H:%M:%S") + if not job_name: + job_name = "test0" + project_name = selected_dataset.split("/")[-1] # name of the dataset, get it from FM + print(PREFECT_TAGS, flush=True) + # job_uid is the job_uid = schedule_prefect_flow( - FLOW__NAME, + FLOW_NAME, parameters=TRAIN_PARAMS_EXAMPLE, flow_run_name=f"{job_name} {current_time}", tags=PREFECT_TAGS + ["train", project_name], ) - job_message = f"Job has been succesfully submitted with uid: {job_uid} and mask uri: {mask_uri}" - - compute_dict = {'user_uid': USER, - 'host_list': ['mlsandbox.als.lbl.gov', 'local.als.lbl.gov', 'vaughan.als.lbl.gov'], - 'requirements': {'num_processors': 2, - 'num_gpus': 0, - 'num_nodes': 2}, - } - compute_dict['job_list'] = [job_content] - compute_dict['dependencies'] = {'0':[]} - compute_dict['requirements']['num_nodes'] = 1 + job_message = f"Job has been succesfully submitted with uid: {job_uid}." + print("Job message") + print(job_message, flush=True) + + # compute_dict = {'user_uid': USER, + # 'host_list': ['mlsandbox.als.lbl.gov', 'local.als.lbl.gov', 'vaughan.als.lbl.gov'], + # 'requirements': {'num_processors': 2, + # 'num_gpus': 0, + # 'num_nodes': 2}, + # } + # compute_dict['job_list'] = [job_content] + # compute_dict['dependencies'] = {'0':[]} + # compute_dict['requirements']['num_nodes'] = 1 # create user directory to store users data/experiments experiment_id = str(uuid.uuid4()) # create unique id for experiment output_path = OUTPUT_DIR / experiment_id output_path.mkdir(parents=True, exist_ok=True) - - # check if user is using user uploaded zip file or example dataset or data clinic file - if user_upload_data_dir is not None: - selected_dataset = user_upload_data_dir - elif data_clinic_file_path is not None: - selected_dataset = data_clinic_file_path - else: - selected_dataset = selected_example_dataset # check which dimension reduction algo, then compose command if selected_algo == 'PCA': @@ -275,10 +335,10 @@ def submit_dimension_reduction_job(submit_n_clicks, #print(docker_cmd) docker_cmd = docker_cmd + ' \'' + json.dumps(input_params) + '\'' #print(docker_cmd) - job_content['job_kwargs']['cmd'] = docker_cmd + #job_content['job_kwargs']['cmd'] = docker_cmd - response = requests.post('http://job-service:8080/api/v0/workflows', json=compute_dict) - print("respnse: ", response) + # response = requests.post('http://job-service:8080/api/v0/workflows', json=compute_dict) + # print("respnse: ", response) # job_response = get_job(user=None, mlex_app=job_content['mlex_app']) return experiment_id, 'cluster', -1, -2, go.Figure(go.Heatmap()), -1 @@ -679,27 +739,27 @@ def toggle_modal(n_submit, n_apply, return False, "No alert." -@app.callback( - Output('feature-vector-model-list', 'options'), - Input('interval-component', 'n_intervals'), -) -def update_trained_model_list(interval): - ''' - This callback updates the list of trained models - Args: - tab_value: Tab option - prob_refresh_n_clicks: Button to refresh the list of probability-based trained models - similarity_refresh_n_clicks: Button to refresh the list of similarity-based trained models - Returns: - prob_model_list: List of trained models in mlcoach - similarity_model_list: List of trained models in data clinic and mlcoach - ''' - data_clinic_models = get_trained_models_list(USER, 'data_clinic') - ml_coach_models = get_trained_models_list(USER, 'mlcoach') - feature_vector_models = data_clinic_models + ml_coach_models - #print(feature_vector_models) - - return feature_vector_models +# @app.callback( +# Output('feature-vector-model-list', 'options'), +# Input('interval-component', 'n_intervals'), +# ) +# def update_trained_model_list(interval): +# ''' +# This callback updates the list of trained models +# Args: +# tab_value: Tab option +# prob_refresh_n_clicks: Button to refresh the list of probability-based trained models +# similarity_refresh_n_clicks: Button to refresh the list of similarity-based trained models +# Returns: +# prob_model_list: List of trained models in mlcoach +# similarity_model_list: List of trained models in data clinic and mlcoach +# ''' +# data_clinic_models = get_trained_models_list(USER, 'data_clinic') +# ml_coach_models = get_trained_models_list(USER, 'mlcoach') +# feature_vector_models = data_clinic_models + ml_coach_models +# #print(feature_vector_models) + +# return feature_vector_models if __name__ == '__main__': From 173977d90477287075ff28700c77352fb232e3e3 Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Wed, 20 Mar 2024 12:04:34 -0700 Subject: [PATCH 27/62] Success run DR job through Prefect and show the latent vectors --- src/frontend.py | 98 +++++++++++++++++-------------------------------- 1 file changed, 34 insertions(+), 64 deletions(-) diff --git a/src/frontend.py b/src/frontend.py index f928624..c188172 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -44,23 +44,14 @@ "flow_type": "podman", "params_list": [ { - "image_name": "ghcr.io/mlexchange/mlex_latent_explorer", + "image_name": "ghcr.io/runboj/mlex_dimension_reduction_pca", "image_tag": "main", "command": 'python -c \\"import time; time.sleep(30)\\"', "params": { "io_parameters": {"uid_save": "uid0001", "uid_retrieve": "uid0001"} }, "volumes": [f"{DATA_DIR}:/app/work/data"], - }, - { - "image_name": "ghcr.io/mlexchange/mlex_latent_explorer", - "image_tag": "main", - "command": 'python -c \\"import time; time.sleep(10)\\"', - "params": { - "io_parameters": {"uid_save": "uid0001", "uid_retrieve": "uid0001"} - }, - "volumes": [f"{DATA_DIR}:/app/work/data"], - }, + } ], } @@ -68,7 +59,7 @@ "flow_type": "podman", "params_list": [ { - "image_name": "ghcr.io/mlexchange/mlex_latent_explorer", + "image_name": "ghcr.io/runboj/mlex_dimension_reduction_pca", "image_tag": "main", "command": 'python -c \\"import time; time.sleep(30)\\"', "params": { @@ -203,18 +194,18 @@ def update_data_n_label_schema(selected_example_dataset, upload_file_paths, data return labels, label_schema, options, user_upload_data_dir -def job_content_dict(content): - job_content = {# 'mlex_app': content['name'], - 'mlex_app': 'latent_space_explorer', - 'service_type': content['service_type'], - 'working_directory': DATA_DIR, - 'job_kwargs': {'uri': content['uri'], - 'cmd': content['cmd'][0]} - } - if 'map' in content: - job_content['job_kwargs']['map'] = content['map'] +# def job_content_dict(content): +# job_content = {# 'mlex_app': content['name'], +# 'mlex_app': 'latent_space_explorer', +# 'service_type': content['service_type'], +# 'working_directory': DATA_DIR, +# 'job_kwargs': {'uri': content['uri'], +# 'cmd': content['cmd'][0]} +# } +# if 'map' in content: +# job_content['job_kwargs']['map'] = content['map'] - return job_content +# return job_content @app.callback( [ @@ -277,12 +268,6 @@ def submit_dimension_reduction_job(submit_n_clicks, value = child["props"]["children"][1]["props"]["value"] input_params[key] = value print("Dimension reduction algo params: ", input_params) - - model_content = get_content(model_id) - print(model_content) - job_content = job_content_dict(model_content) - job_content['job_kwargs']['kwargs'] = {} - job_content['job_kwargs']['kwargs']['parameters'] = input_params # check if user is using user uploaded zip file or example dataset or data clinic file if user_upload_data_dir is not None: @@ -295,11 +280,28 @@ def submit_dimension_reduction_job(submit_n_clicks, # prefect current_time = datetime.now(pytz.timezone(TIMEZONE)).strftime("%Y/%m/%d %H:%M:%S") - if not job_name: - job_name = "test0" + if not job_name: job_name = "test0" + # job_name += " " + str(current_time) project_name = selected_dataset.split("/")[-1] # name of the dataset, get it from FM print(PREFECT_TAGS, flush=True) - # job_uid is the + + # create user directory to store users data/experiments + experiment_id = experiment_id = str(uuid.uuid4()) + output_path = OUTPUT_DIR / experiment_id + output_path.mkdir(parents=True, exist_ok=True) + + # check which dimension reduction algo, then compose command + if selected_algo == 'PCA': + TRAIN_PARAMS_EXAMPLE["params_list"][0]["command"] = "python pca_run.py" + elif selected_algo == 'UMAP': + TRAIN_PARAMS_EXAMPLE["params_list"][0]["command"] = "python umap_run.py" + + TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["io_parameters"]["images_dir"] = selected_dataset + TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["io_parameters"]["output_dir"] = str(output_path) + TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["model_parameters"] = input_params + print(TRAIN_PARAMS_EXAMPLE) + + # run prefect job, job_uid is the new experiment id job_uid = schedule_prefect_flow( FLOW_NAME, parameters=TRAIN_PARAMS_EXAMPLE, @@ -307,39 +309,7 @@ def submit_dimension_reduction_job(submit_n_clicks, tags=PREFECT_TAGS + ["train", project_name], ) job_message = f"Job has been succesfully submitted with uid: {job_uid}." - print("Job message") print(job_message, flush=True) - - # compute_dict = {'user_uid': USER, - # 'host_list': ['mlsandbox.als.lbl.gov', 'local.als.lbl.gov', 'vaughan.als.lbl.gov'], - # 'requirements': {'num_processors': 2, - # 'num_gpus': 0, - # 'num_nodes': 2}, - # } - # compute_dict['job_list'] = [job_content] - # compute_dict['dependencies'] = {'0':[]} - # compute_dict['requirements']['num_nodes'] = 1 - - # create user directory to store users data/experiments - experiment_id = str(uuid.uuid4()) # create unique id for experiment - output_path = OUTPUT_DIR / experiment_id - output_path.mkdir(parents=True, exist_ok=True) - - # check which dimension reduction algo, then compose command - if selected_algo == 'PCA': - cmd_list = ["python pca_run.py", selected_dataset, str(output_path)] - elif selected_algo == 'UMAP': - cmd_list = ["python umap_run.py", selected_dataset, str(output_path)] - - docker_cmd = " ".join(cmd_list) - #print(docker_cmd) - docker_cmd = docker_cmd + ' \'' + json.dumps(input_params) + '\'' - #print(docker_cmd) - #job_content['job_kwargs']['cmd'] = docker_cmd - - # response = requests.post('http://job-service:8080/api/v0/workflows', json=compute_dict) - # print("respnse: ", response) - # job_response = get_job(user=None, mlex_app=job_content['mlex_app']) return experiment_id, 'cluster', -1, -2, go.Figure(go.Heatmap()), -1 From 58de19499e4630926ea778f3323ba969517dede7 Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Wed, 20 Mar 2024 13:15:09 -0700 Subject: [PATCH 28/62] Remove compute api related function --- src/frontend.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/frontend.py b/src/frontend.py index c188172..7a73a08 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -194,19 +194,6 @@ def update_data_n_label_schema(selected_example_dataset, upload_file_paths, data return labels, label_schema, options, user_upload_data_dir -# def job_content_dict(content): -# job_content = {# 'mlex_app': content['name'], -# 'mlex_app': 'latent_space_explorer', -# 'service_type': content['service_type'], -# 'working_directory': DATA_DIR, -# 'job_kwargs': {'uri': content['uri'], -# 'cmd': content['cmd'][0]} -# } -# if 'map' in content: -# job_content['job_kwargs']['map'] = content['map'] - -# return job_content - @app.callback( [ # flag the read variable From b6ee3c488838b8158760b40b430fd39364790a21 Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Fri, 22 Mar 2024 15:26:38 -0700 Subject: [PATCH 29/62] Work with FM and tiled, need minor error remove --- docker-compose.yml | 1 + src/frontend.py | 120 ++++++++++++++++++++++++++------------------- 2 files changed, 70 insertions(+), 51 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 4210039..75a44f5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -19,6 +19,7 @@ services: volumes: - ./data:/app/work/data - ./src:/app/work/src + - ../mlex_file_manager/file_manager:/app/work/src/file_manager ports: - "8070:8070" networks: diff --git a/src/frontend.py b/src/frontend.py index 7a73a08..cd3f173 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -133,15 +133,15 @@ def show_clustering_gui_layouts(selected_algo): Output('input_labels', 'data'), Output('label_schema', 'data'), Output('label-dropdown', 'options'), - Output('user-upload-data-dir', 'data'), + # Output('user-upload-data-dir', 'data'), ], [ Input('example-dataset-selection', 'value'), # example dataset - Input({'base_id': 'file-manager', 'name': 'docker-file-paths'},'data'), # FM dataset + Input({'base_id': 'file-manager', 'name': 'data-project-dict'},'data'), # FM dataset Input('feature-vector-model-list', 'value'), # data clinic dataset ] ) -def update_data_n_label_schema(selected_example_dataset, upload_file_paths, data_clinic_file_path): +def update_data_n_label_schema(selected_example_dataset, data_project_dict, data_clinic_file_path): ''' This callback updates the selected dataset from the provided example datasets, as well as labels, and label schema Args: @@ -161,14 +161,16 @@ def update_data_n_label_schema(selected_example_dataset, upload_file_paths, data # priority level: FileManage > DataClinic > Example Datasets # FileManager - user uploaded zip file of images - data_project = DataProject() - data_project.init_from_dict(upload_file_paths) - data_set = data_project.data # list of len 1920, each element is a local_dataset.LocalDataset object + # data_project = DataProject() + # data_project.init_from_dict(upload_file_paths) + + data_project = DataProject.from_dict(data_project_dict) + data_set_len = data_project.datasets[-1].cumulative_data_count - 1 # list of len 1920, each element is a local_dataset.LocalDataset object options = [] - user_upload_data_dir = None - if len(data_set) > 0: - labels = np.full((len(data_set),), -1) - user_upload_data_dir = os.path.dirname(upload_file_paths[0]['uri']) + #user_upload_data_dir = None + if data_set_len > 0: + labels = np.full((data_set_len,), -1) + # user_upload_data_dir = os.path.dirname(data_project_dict[0]['uri']) # DataClinic options elif data_clinic_file_path is not None: df = pd.read_parquet(data_clinic_file_path) @@ -192,7 +194,7 @@ def update_data_n_label_schema(selected_example_dataset, upload_file_paths, data options.insert(0, {'label': 'Unlabeled', 'value': -1}) options.insert(0, {'label': 'All', 'value': -2}) - return labels, label_schema, options, user_upload_data_dir + return labels, label_schema, options #, user_upload_data_dir @app.callback( [ @@ -211,18 +213,20 @@ def update_data_n_label_schema(selected_example_dataset, upload_file_paths, data [ State('job-name', 'value'), # job_name State('example-dataset-selection', 'value'), # 2 example dataset - State('user-upload-data-dir', 'data'), # FM + # State('user-upload-data-dir', 'data'), # FM State('feature-vector-model-list', 'value'), # DataClinic State('model_id', 'data'), State('algo-dropdown', 'value'), State('additional-model-params', 'children'), + State({"base_id": "file-manager", "name": "data-project-dict"}, "data") # DataProject for FM + ], prevent_initial_call=True ) def submit_dimension_reduction_job(submit_n_clicks, job_name, - selected_example_dataset, user_upload_data_dir, data_clinic_file_path, - model_id, selected_algo, children): + selected_example_dataset, data_clinic_file_path, + model_id, selected_algo, children, data_project_dict): """ This callback is triggered every time the Submit button is hit: - compute latent vectors, which will be saved in data/output/experiment_id @@ -245,7 +249,7 @@ def submit_dimension_reduction_job(submit_n_clicks, """ if not submit_n_clicks: raise PreventUpdate - if not selected_example_dataset and not user_upload_data_dir and not data_clinic_file_path: + if not selected_example_dataset and not data_project_dict and not data_clinic_file_path: raise PreventUpdate input_params = {} @@ -254,28 +258,41 @@ def submit_dimension_reduction_job(submit_n_clicks, key = child["props"]["children"][1]["props"]["id"]["param_key"] value = child["props"]["children"][1]["props"]["value"] input_params[key] = value - print("Dimension reduction algo params: ", input_params) + print("Dimension reduction algo params: ", input_params, flush=True) # check if user is using user uploaded zip file or example dataset or data clinic file - if user_upload_data_dir is not None: - selected_dataset = user_upload_data_dir - elif data_clinic_file_path is not None: - selected_dataset = data_clinic_file_path + data_project = DataProject.from_dict(data_project_dict) + if len(data_project.datasets) > 0: + print("FMM", flush=True) + data_project = DataProject.from_dict(data_project_dict) + io_parameters = {"data_uris": [dataset.uri for dataset in data_project.datasets], + "data_tiled_api_key": data_project.api_key, + "data_type": data_project.data_type, + "root_uri": data_project.root_uri, + } + + # elif data_clinic_file_path is not None: + # selected_dataset = data_clinic_file_path else: - selected_dataset = selected_example_dataset - print(selected_dataset) + print("selected_example_dataset: " + selected_example_dataset, flush=True) + io_parameters = {"data_uris": [selected_example_dataset], + "data_tiled_api_key": None, + "data_type": "file", + "root_uri": None, + } # prefect current_time = datetime.now(pytz.timezone(TIMEZONE)).strftime("%Y/%m/%d %H:%M:%S") if not job_name: job_name = "test0" - # job_name += " " + str(current_time) - project_name = selected_dataset.split("/")[-1] # name of the dataset, get it from FM + job_name += " " + str(current_time) + # project_name = selected_dataset.split("/")[-1] # name of the dataset, get it from FM ## this is an issue + project_name = "fake_name" print(PREFECT_TAGS, flush=True) # create user directory to store users data/experiments - experiment_id = experiment_id = str(uuid.uuid4()) - output_path = OUTPUT_DIR / experiment_id - output_path.mkdir(parents=True, exist_ok=True) + # experiment_id = str(uuid.uuid4()) + # output_path = OUTPUT_DIR / experiment_id + # output_path.mkdir(parents=True, exist_ok=True) # check which dimension reduction algo, then compose command if selected_algo == 'PCA': @@ -283,12 +300,13 @@ def submit_dimension_reduction_job(submit_n_clicks, elif selected_algo == 'UMAP': TRAIN_PARAMS_EXAMPLE["params_list"][0]["command"] = "python umap_run.py" - TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["io_parameters"]["images_dir"] = selected_dataset - TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["io_parameters"]["output_dir"] = str(output_path) + TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["io_parameters"] = io_parameters + TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["io_parameters"]["output_dir"] = str(OUTPUT_DIR) + TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["io_parameters"]["uid_save"] = "" TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["model_parameters"] = input_params print(TRAIN_PARAMS_EXAMPLE) - # run prefect job, job_uid is the new experiment id + # run prefect job, job_uid is the new experiment id -> uid_save in the pca_example.yaml file job_uid = schedule_prefect_flow( FLOW_NAME, parameters=TRAIN_PARAMS_EXAMPLE, @@ -298,7 +316,7 @@ def submit_dimension_reduction_job(submit_n_clicks, job_message = f"Job has been succesfully submitted with uid: {job_uid}." print(job_message, flush=True) - return experiment_id, 'cluster', -1, -2, go.Figure(go.Heatmap()), -1 + return job_uid, 'cluster', -1, -2, go.Figure(go.Heatmap()), -1 @app.callback( [ @@ -325,9 +343,13 @@ def read_latent_vectors(n_intervals, experiment_id, max_intervals): """ if experiment_id is None or n_intervals == 0 or max_intervals == 0: raise PreventUpdate + + children_flows = get_children_flow_run_ids(experiment_id) + print("child flow") + print(children_flows) #read the latent vectors from the output dir - output_path = OUTPUT_DIR / experiment_id + output_path = OUTPUT_DIR / children_flows[0] npz_files = list(output_path.glob('*.npy')) if len(npz_files) > 0 : lv_filepath = npz_files[0] # latent vector file path @@ -494,16 +516,16 @@ def update_scatter_plot(latent_vectors, selected_cluster, selected_label, scatte Input('mean-std-toggle', 'value'), ], [ - State('example-dataset-selection', 'value'), # example dataset - State({'base_id': 'file-manager', 'name': 'docker-file-paths'},'data'), # FM dataset - State('feature-vector-model-list', 'value'), # data clinic dataset + State('example-dataset-selection', 'value'), # example dataset + State({"base_id": "file-manager", "name": "data-project-dict"}, "data"), # DataProject for FM + State('feature-vector-model-list', 'value'), # data clinic dataset ], prevent_initial_call=True ) def update_heatmap(click_data, selected_data, display_option, - selected_example_dataset, upload_file_paths, data_clinic_file_path): + selected_example_dataset, data_project_dict, data_clinic_file_path): ''' This callback update the heatmap Args: @@ -513,7 +535,7 @@ def update_heatmap(click_data, selected_data, display_option, Returns: fig: updated heatmap ''' - if not selected_example_dataset and not upload_file_paths and not data_clinic_file_path: + if not selected_example_dataset and not data_project_dict and not data_clinic_file_path: raise PreventUpdate # user select a group of points @@ -524,21 +546,18 @@ def update_heatmap(click_data, selected_data, display_option, ### FileManager # print("upload_file_paths") # if not selected, its an empty list not None selected_images = [] - data_project = DataProject() - data_project.init_from_dict(upload_file_paths) - data_set = data_project.data - if len(data_set) > 0: + + data_project = DataProject.from_dict(data_project_dict) + data_set_len = data_project.datasets[-1].cumulative_data_count - 1 + if data_set_len > 0: print("FM file") - for i in selected_indices: - image, uri = data_project.data[i].read_data(export='pillow') - selected_images.append(np.array(image)) + selected_images, _ = data_project.read(selected_indices, export='pillow') ### DataClinic elif data_clinic_file_path is not None: print("data_clinic_file_path") print(data_clinic_file_path) directory_path = os.path.dirname(data_clinic_file_path) selected_images = load_images_by_indices(directory_path, selected_indices) - ### Example dataset elif selected_example_dataset == "data/example_shapes/Demoshapes.npz": print("Demoshapes.npz") @@ -559,13 +578,12 @@ def update_heatmap(click_data, selected_data, display_option, heatmap_data = go.Heatmap(z=np.std(selected_images, axis=0)) elif click_data is not None and len(click_data['points']) > 0: - selected_index = click_data['points'][0]['customdata'][0] # click_data['points'][0]['pointIndex'] + selected_index = click_data['points'][0]['customdata'][0] ### FileManager - data_project = DataProject() - data_project.init_from_dict(upload_file_paths) - data_set = data_project.data - if len(data_set) > 0: - clicked_image, uri = data_project.data[selected_index].read_data(export='pillow') + data_project = DataProject.from_dict(data_project_dict) + data_set_len = data_project.datasets[-1].cumulative_data_count - 1 + if data_set_len > 0: + selected_images, _ = data_project.read([selected_index], export='pillow') ### DataClinic elif data_clinic_file_path is not None: directory_path = os.path.dirname(data_clinic_file_path) From e9b7c223aeb639e0ef4c5fbc17d04c170d55a32e Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Fri, 22 Mar 2024 16:51:58 -0700 Subject: [PATCH 30/62] Remove wrong dataset length method --- src/frontend.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/frontend.py b/src/frontend.py index cd3f173..9c8a820 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -165,11 +165,10 @@ def update_data_n_label_schema(selected_example_dataset, data_project_dict, data # data_project.init_from_dict(upload_file_paths) data_project = DataProject.from_dict(data_project_dict) - data_set_len = data_project.datasets[-1].cumulative_data_count - 1 # list of len 1920, each element is a local_dataset.LocalDataset object options = [] #user_upload_data_dir = None - if data_set_len > 0: - labels = np.full((data_set_len,), -1) + if len(data_project.datasets) > 0: + labels = np.full((len(data_project.datasets),), -1) # user_upload_data_dir = os.path.dirname(data_project_dict[0]['uri']) # DataClinic options elif data_clinic_file_path is not None: @@ -263,7 +262,7 @@ def submit_dimension_reduction_job(submit_n_clicks, # check if user is using user uploaded zip file or example dataset or data clinic file data_project = DataProject.from_dict(data_project_dict) if len(data_project.datasets) > 0: - print("FMM", flush=True) + print("FM", flush=True) data_project = DataProject.from_dict(data_project_dict) io_parameters = {"data_uris": [dataset.uri for dataset in data_project.datasets], "data_tiled_api_key": data_project.api_key, @@ -548,8 +547,7 @@ def update_heatmap(click_data, selected_data, display_option, selected_images = [] data_project = DataProject.from_dict(data_project_dict) - data_set_len = data_project.datasets[-1].cumulative_data_count - 1 - if data_set_len > 0: + if len(data_project.datasets) > 0: print("FM file") selected_images, _ = data_project.read(selected_indices, export='pillow') ### DataClinic @@ -581,8 +579,7 @@ def update_heatmap(click_data, selected_data, display_option, selected_index = click_data['points'][0]['customdata'][0] ### FileManager data_project = DataProject.from_dict(data_project_dict) - data_set_len = data_project.datasets[-1].cumulative_data_count - 1 - if data_set_len > 0: + if len(data_project.datasets) > 0: selected_images, _ = data_project.read([selected_index], export='pillow') ### DataClinic elif data_clinic_file_path is not None: From 65c8917160e21072dddcfe91bf39ddd758fa57cd Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Fri, 22 Mar 2024 16:53:00 -0700 Subject: [PATCH 31/62] Remove alert for now --- src/frontend.py | 72 ++++++++++++++++++++++++------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/src/frontend.py b/src/frontend.py index 9c8a820..b41994a 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -671,44 +671,44 @@ def update_statistics(selected_data, clusters, assigned_labels, label_names): f"Labels represented: {labels_str}", ] -@app.callback( - [Output("modal", "is_open"), Output("modal-body", "children")], - [ - Input('run-algo', 'n_clicks'), - Input('run-cluster-algo', 'n_clicks'), - ], - [ - State("modal", "is_open"), - State('example-dataset-selection', 'value'), - State('user-upload-data-dir', 'data'), - State('feature-vector-model-list', 'value'), - ] -) -def toggle_modal(n_submit, n_apply, - is_open, selected_example_dataset, user_upload_data_dir, data_clinic_file_path): - ''' - This callback pop up a window to remind user to follow this flow: - select dataset -> Submit dimension reduction job -> Apply clustering - Args: - n_submit (int): Number of clicks on the 'Submit' button. - n_apply (int): Number of clicks on the 'Apply' button. - is_open (bool): Current state of the modal window (open/closed). - input_data (list): User selected data - Returns: - is_open (bool): New state of the modal window. - modal_body_text (str): Text to be displayed in the modal body. - ''' - at_least_one_dataset_selected = False - if selected_example_dataset or user_upload_data_dir or data_clinic_file_path: - at_least_one_dataset_selected = True +# @app.callback( +# [Output("modal", "is_open"), Output("modal-body", "children")], +# [ +# Input('run-algo', 'n_clicks'), +# Input('run-cluster-algo', 'n_clicks'), +# ], +# [ +# State("modal", "is_open"), +# State('example-dataset-selection', 'value'), +# State('user-upload-data-dir', 'data'), +# State('feature-vector-model-list', 'value'), +# ] +# ) +# def toggle_modal(n_submit, n_apply, +# is_open, selected_example_dataset, user_upload_data_dir, data_clinic_file_path): +# ''' +# This callback pop up a window to remind user to follow this flow: +# select dataset -> Submit dimension reduction job -> Apply clustering +# Args: +# n_submit (int): Number of clicks on the 'Submit' button. +# n_apply (int): Number of clicks on the 'Apply' button. +# is_open (bool): Current state of the modal window (open/closed). +# input_data (list): User selected data +# Returns: +# is_open (bool): New state of the modal window. +# modal_body_text (str): Text to be displayed in the modal body. +# ''' +# at_least_one_dataset_selected = False +# if selected_example_dataset or user_upload_data_dir or data_clinic_file_path: +# at_least_one_dataset_selected = True - if ((n_submit and not at_least_one_dataset_selected) or - (n_apply and not at_least_one_dataset_selected)): - return True, "Please select an example dataset or upload your own zipped dataset or choose DataClinic outpu." - elif n_apply and n_submit is None: - return True, "Please select a dimension reduction algorithm and click 'Submit' button before clustering." +# if ((n_submit and not at_least_one_dataset_selected) or +# (n_apply and not at_least_one_dataset_selected)): +# return True, "Please select an example dataset or upload your own zipped dataset or choose DataClinic outpu." +# elif n_apply and n_submit is None: +# return True, "Please select a dimension reduction algorithm and click 'Submit' button before clustering." - return False, "No alert." +# return False, "No alert." # @app.callback( From 44f85ad324643fea738c130dc42f77b67e4be553 Mon Sep 17 00:00:00 2001 From: taxe10 Date: Sun, 24 Mar 2024 11:59:15 -0700 Subject: [PATCH 32/62] formatting --- src/app_layout.py | 428 ++++++++++++++++++++++++++-------------------- 1 file changed, 239 insertions(+), 189 deletions(-) diff --git a/src/app_layout.py b/src/app_layout.py index 2aac91e..702128d 100644 --- a/src/app_layout.py +++ b/src/app_layout.py @@ -1,47 +1,55 @@ -from dash import Dash, html, dcc +import pathlib + import dash_bootstrap_components as dbc -from dash_iconify import DashIconify -from dash.long_callback import DiskcacheLongCallbackManager -import plotly.graph_objects as go import dash_uploader as du import diskcache -import pathlib -import os +import plotly.graph_objects as go +from dash import Dash, dcc, html +from dash.long_callback import DiskcacheLongCallbackManager +from dash_iconify import DashIconify +from file_manager.main import FileManager import templates -from file_manager.main import FileManager -### GLOBAL VARIABLES -ALGORITHM_DATABASE = {"PCA": "PCA", "UMAP": "UMAP",} -CLUSTER_ALGORITHM_DATABASE = {"KMeans": "KMeans", "DBSCAN": "DBSCAN", "HDBSCAN": "HDBSCAN"} +# GLOBAL VARIABLES +ALGORITHM_DATABASE = { + "PCA": "PCA", + "UMAP": "UMAP", +} +CLUSTER_ALGORITHM_DATABASE = { + "KMeans": "KMeans", + "DBSCAN": "DBSCAN", + "HDBSCAN": "HDBSCAN", +} DATA_OPTION = [ {"label": "Synthetic Shapes", "value": "data/example_shapes/Demoshapes.npz"}, - {"label": "Latent representations from encoder-decoder model", "value": "data/example_latentrepresentation/f_vectors.parquet"} + { + "label": "Latent representations from encoder-decoder model", + "value": "data/example_latentrepresentation/f_vectors.parquet", + }, ] -DOCKER_DATA = pathlib.Path.home() / 'data' #/app/work/data -UPLOAD_FOLDER_ROOT = DOCKER_DATA / 'upload' #/app/work/data/upload - -# DATA_CLINIC_OPTION = +DOCKER_DATA = pathlib.Path.home() / "data" # /app/work/data +UPLOAD_FOLDER_ROOT = DOCKER_DATA / "upload" # /app/work/data/upload -#### SETUP DASH APP #### +# SETUP DASH APP cache = diskcache.Cache("./cache") long_callback_manager = DiskcacheLongCallbackManager(cache) external_stylesheets = [dbc.themes.BOOTSTRAP, "../assets/segmentation-style.css"] -app = Dash(__name__, - external_stylesheets=external_stylesheets, - suppress_callback_exceptions=True, - long_callback_manager=long_callback_manager) +app = Dash( + __name__, + external_stylesheets=external_stylesheets, + suppress_callback_exceptions=True, + long_callback_manager=long_callback_manager, +) server = app.server -dash_file_explorer = FileManager(DOCKER_DATA, - UPLOAD_FOLDER_ROOT, - open_explorer=False) +dash_file_explorer = FileManager(DOCKER_DATA, UPLOAD_FOLDER_ROOT, open_explorer=False) dash_file_explorer.init_callbacks(app) du.configure_upload(app, UPLOAD_FOLDER_ROOT, use_upload_id=False) -#### BEGIN DASH CODE #### +# BEGIN DASH CODE header = templates.header() # right panel: uploader, scatter plot, individual image plot image_panel = [ @@ -49,72 +57,72 @@ id="image-card", children=[ dbc.CardHeader( - [ - dbc.Label('Upload your own zipped dataset', className='mr-2'), + [ + dbc.Label("Upload your own zipped dataset", className="mr-2"), dash_file_explorer.file_explorer, - dbc.Label('Or select Data Clinic modal', className='mr-2'), + dbc.Label("Or select Data Clinic modal", className="mr-2"), dcc.Dropdown( - id='feature-vector-model-list', + id="feature-vector-model-list", clearable=False, - style={'margin-bottom': '1rem'} + style={"margin-bottom": "1rem"}, ), - dbc.Label('Or try Example Dataset', className='mr-2'), + dbc.Label("Or try Example Dataset", className="mr-2"), dcc.Dropdown( - id='example-dataset-selection', + id="example-dataset-selection", options=DATA_OPTION, - #value = DATA_OPTION[0]['value'], clearable=False, - style={'margin-bottom': '1rem'} + style={"margin-bottom": "1rem"}, ), ] ), dbc.CardBody( dcc.Graph( id="scatter", - figure=go.Figure(go.Scattergl(mode='markers')), + figure=go.Figure(go.Scattergl(mode="markers")), ) ), - dbc.CardFooter( - dcc.Graph( - id="heatmap", - figure=go.Figure(go.Heatmap()) - ) - ) - ] + dbc.CardFooter(dcc.Graph(id="heatmap", figure=go.Figure(go.Heatmap()))), + ], ) ] # left panel: choose algorithms, submit job, choose scatter plot attributes, and statistics... algo_panel = html.Div( - [dbc.Card( - id="algo-card", - style={"width": "100%"}, - children=[ - dbc.Collapse(children=[ - dbc.CardHeader("Select Dimension Reduction Algorithms"), - dbc.CardBody( - [ - dbc.Label("Algorithm", className='mr-2'), - dcc.Dropdown(id="algo-dropdown", - options=[{"label": entry, "value": entry} for entry in ALGORITHM_DATABASE], - style={'min-width': '250px'}, - value='PCA', - ), - html.Div(id='additional-model-params'), + [ + dbc.Card( + id="algo-card", + style={"width": "100%"}, + children=[ + dbc.Collapse( + children=[ + dbc.CardHeader("Select Dimension Reduction Algorithms"), + dbc.CardBody( + [ + dbc.Label("Algorithm", className="mr-2"), + dcc.Dropdown( + id="algo-dropdown", + options=[ + {"label": entry, "value": entry} + for entry in ALGORITHM_DATABASE + ], + style={"min-width": "250px"}, + value="PCA", + ), + html.Div(id="additional-model-params"), html.Hr(), html.Div( [ - dbc.Label('Name your job', className='mr-2'), + dbc.Label("Name your job", className="mr-2"), dcc.Input( id="job-name", placeholder="test0", - style={'width':'100%'} + style={"width": "100%"}, ), ] ), html.Hr(), html.Div( - [ + [ dbc.Button( "Submit", color="secondary", @@ -122,22 +130,25 @@ outline=True, size="lg", className="m-1", - style={'width':'50%'} + style={"width": "50%"}, ), ], - className='row', - style={'align-items': 'center', 'justify-content': 'center'} + className="row", + style={ + "align-items": "center", + "justify-content": "center", + }, ), - html.Div(id='invisible-apply-div') - ] + html.Div(id="invisible-apply-div"), + ] + ), + ], + id="model-collapse", + is_open=True, + style={"margin-bottom": "0rem"}, ) ], - id="model-collapse", - is_open=True, - style = {'margin-bottom': '0rem'} - ) - ] - ) + ) ] ) @@ -147,116 +158,149 @@ id="cluster-algo-card", style={"width": "100%"}, children=[ - dbc.Collapse(children=[ + dbc.Collapse( + children=[ dbc.CardHeader("Select Clustering Algorithms"), - dbc.CardBody([ - dbc.Label("Algorithm", className='mr-2'), - dcc.Dropdown(id="cluster-algo-dropdown", - options=[{"label": entry, "value": entry} for entry in CLUSTER_ALGORITHM_DATABASE], - style={'min-width': '250px'}, - value='DBSCAN', - ), - html.Div(id='additional-cluster-params'), - html.Hr(), - html.Div( - [ - dbc.Button( - "Apply", - color="secondary", - id="run-cluster-algo", - outline=True, - size="lg", - className="m-1", - style={'width':'50%'} - ), - ], - className='row', - style={'align-items': 'center', 'justify-content': 'center'} - ), - html.Div(id='invisible-submit-div') - ] - - ) + dbc.CardBody( + [ + dbc.Label("Algorithm", className="mr-2"), + dcc.Dropdown( + id="cluster-algo-dropdown", + options=[ + {"label": entry, "value": entry} + for entry in CLUSTER_ALGORITHM_DATABASE + ], + style={"min-width": "250px"}, + value="DBSCAN", + ), + html.Div(id="additional-cluster-params"), + html.Hr(), + html.Div( + [ + dbc.Button( + "Apply", + color="secondary", + id="run-cluster-algo", + outline=True, + size="lg", + className="m-1", + style={"width": "50%"}, + ), + ], + className="row", + style={ + "align-items": "center", + "justify-content": "center", + }, + ), + html.Div(id="invisible-submit-div"), + ] + ), ], - id="cluster-model-collapse", - is_open=True, - style = {'margin-bottom': '0rem'} + id="cluster-model-collapse", + is_open=True, + style={"margin-bottom": "0rem"}, ) - ] + ], ) ] ) -scatter_control_panel = html.Div( - [dbc.Card( - style={"width": "100%"}, - children=[ - dbc.CardHeader("Scatter Plot Control Panel"), - dbc.CardBody([ - dbc.Label('Scatter Colors', className='mr-3'), - dcc.RadioItems(id='scatter-color', - options=[ - {'label': 'cluster', 'value': 'cluster'}, - {'label': 'label', 'value': 'label'} - ], - value = 'cluster', - style={'min-width': '250px'}, - className='mb-2'), - dbc.Label("Select cluster", className='mr-3'), - dcc.Dropdown(id='cluster-dropdown', - value=-1, - style={'min-width': '250px'}, - className='mb-2'), - dbc.Label("Select label", className='mr-3'), - dcc.Dropdown(id='label-dropdown', - value=-2, - style={'min-width': '250px'}, - ) - ]) - ] - ), - dcc.Interval( - id='interval-component', - interval=3000, # in milliseconds - max_intervals=-1, # keep triggering indefinitely, None - n_intervals=0, - ), +scatter_control_panel = html.Div( + [ + dbc.Card( + style={"width": "100%"}, + children=[ + dbc.CardHeader("Scatter Plot Control Panel"), + dbc.CardBody( + [ + dbc.Label("Scatter Colors", className="mr-3"), + dcc.RadioItems( + id="scatter-color", + options=[ + {"label": "cluster", "value": "cluster"}, + {"label": "label", "value": "label"}, + ], + value="cluster", + style={"min-width": "250px"}, + className="mb-2", + ), + dbc.Label("Select cluster", className="mr-3"), + dcc.Dropdown( + id="cluster-dropdown", + value=-1, + style={"min-width": "250px"}, + className="mb-2", + ), + dbc.Label("Select label", className="mr-3"), + dcc.Dropdown( + id="label-dropdown", + value=-2, + style={"min-width": "250px"}, + ), + ] + ), + ], + ), + dcc.Interval( + id="interval-component", + interval=3000, # in milliseconds + max_intervals=-1, # keep triggering indefinitely, None + n_intervals=0, + ), ] ) -heatmap_control_panel = html.Div( - [dbc.Card( - style={"width": "100%"}, - children=[ - dbc.CardHeader("Heatmap Control Panel"), - dbc.CardBody([ - dbc.Label([ - 'Select a Group of Points using ', - html.Span(html.I(DashIconify(icon="lucide:lasso")), className='icon'), - ' or ', - html.Span(html.I(DashIconify(icon="lucide:box-select")), className='icon'), - ' Tools :' - ], - className='mb-3'), - dbc.Label(id='stats-div', children=[ - 'Number of images selected: 0', - html.Br(), - 'Clusters represented: N/A', - html.Br(), - 'Labels represented: N/A', - ]), - dbc.Label('Display Image Options', className='mr-3'), - dcc.RadioItems(id='mean-std-toggle', - options=[ - {'label': 'Mean', 'value': 'mean'}, - {'label': 'Standard Deviation', 'value': 'sigma'} - ], - value = 'mean', - style={'min-width': '250px'}, - className='mb-2'), - ]) - ] - )] +heatmap_control_panel = html.Div( + [ + dbc.Card( + style={"width": "100%"}, + children=[ + dbc.CardHeader("Heatmap Control Panel"), + dbc.CardBody( + [ + dbc.Label( + [ + "Select a Group of Points using ", + html.Span( + html.I(DashIconify(icon="lucide:lasso")), + className="icon", + ), + " or ", + html.Span( + html.I(DashIconify(icon="lucide:box-select")), + className="icon", + ), + " Tools :", + ], + className="mb-3", + ), + dbc.Label( + id="stats-div", + children=[ + "Number of images selected: 0", + html.Br(), + "Clusters represented: N/A", + html.Br(), + "Labels represented: N/A", + ], + ), + dbc.Label("Display Image Options", className="mr-3"), + dcc.RadioItems( + id="mean-std-toggle", + options=[ + {"label": "Mean", "value": "mean"}, + {"label": "Standard Deviation", "value": "sigma"}, + ], + value="mean", + style={"min-width": "250px"}, + className="mb-2", + ), + ] + ), + ], + ) + ] ) # add alert pop up window @@ -274,7 +318,13 @@ ) -control_panel = [algo_panel, cluster_algo_panel, scatter_control_panel, heatmap_control_panel, modal] +control_panel = [ + algo_panel, + cluster_algo_panel, + scatter_control_panel, + heatmap_control_panel, + modal, +] # metadata @@ -283,34 +333,34 @@ id="no-display", children=[ # Store for user created contents - dcc.Store(id='image-length', data=0), - dcc.Store(id='user-upload-data-dir', data=None), - dcc.Store(id='dataset-options', data=DATA_OPTION), - dcc.Store(id='run-counter', data=0), - dcc.Store(id='experiment-id', data=None), + dcc.Store(id="image-length", data=0), + dcc.Store(id="user-upload-data-dir", data=None), + dcc.Store(id="dataset-options", data=DATA_OPTION), + dcc.Store(id="run-counter", data=0), + dcc.Store(id="experiment-id", data=None), # data_label_schema, latent vectors, clusters - dcc.Store(id='input_labels', data=None), - dcc.Store(id='label_schema', data=None), - dcc.Store(id='model_id', data=None), - dcc.Store(id='latent_vectors', data=None), - dcc.Store(id='clusters', data=None), + dcc.Store(id="input_labels", data=None), + dcc.Store(id="label_schema", data=None), + dcc.Store(id="model_id", data=None), + dcc.Store(id="latent_vectors", data=None), + dcc.Store(id="clusters", data=None), ], ) ] -##### DEFINE LAYOUT #### +# DEFINE LAYOUT app.layout = html.Div( [ - header, + header, dbc.Container( - children = [ - dbc.Row([ dbc.Col(control_panel, width=4), - dbc.Col(image_panel, width=7) - ]), + children=[ + dbc.Row( + [dbc.Col(control_panel, width=4), dbc.Col(image_panel, width=7)] + ), dbc.Row(dbc.Col(meta)), ] ), - modal + modal, ] -) \ No newline at end of file +) From 28a5b1021d391425094bc9bee38186426c7ee1a1 Mon Sep 17 00:00:00 2001 From: taxe10 Date: Sun, 24 Mar 2024 12:04:50 -0700 Subject: [PATCH 33/62] formatting --- src/frontend.py | 681 ++++++++++++++++++++++++------------------------ 1 file changed, 344 insertions(+), 337 deletions(-) diff --git a/src/frontend.py b/src/frontend.py index b41994a..d3d470f 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -1,38 +1,36 @@ -import dash -from dash import html, Input, Output, State -from dash.exceptions import PreventUpdate -import plotly.graph_objects as go -import pandas as pd -import numpy as np -from sklearn.cluster import MiniBatchKMeans, DBSCAN, HDBSCAN -import pathlib import json -import uuid -import requests import os -import requests -import pytz +import pathlib +import uuid from datetime import datetime - +import numpy as np +import pandas as pd +import plotly.graph_objects as go +import pytz +import requests +from dash import Input, Output, State, html +from dash.exceptions import PreventUpdate from file_manager.data_project import DataProject +from sklearn.cluster import DBSCAN, HDBSCAN, MiniBatchKMeans -from app_layout import app, DOCKER_DATA, UPLOAD_FOLDER_ROOT -from latentxp_utils import kmeans_kwargs, dbscan_kwargs, hdbscan_kwargs, hex_to_rgba, generate_scatter_data, remove_key_from_dict_list, get_content, get_trained_models_list, load_images_by_indices +from app_layout import app from dash_component_editor import JSONParameterEditor - -from utils_prefect import ( - get_children_flow_run_ids, - get_flow_run_name, - get_flow_runs_by_name, - schedule_prefect_flow, +from latentxp_utils import ( + dbscan_kwargs, + generate_scatter_data, + hdbscan_kwargs, + hex_to_rgba, + kmeans_kwargs, + load_images_by_indices, + remove_key_from_dict_list, ) +from utils_prefect import get_children_flow_run_ids, schedule_prefect_flow - -#### GLOBAL PARAMS #### -DATA_DIR = str(os.environ['DATA_DIR']) -USER = 'admin' #'mlexchange-team' # move to env file -OUTPUT_DIR = pathlib.Path('data/mlexchange_store/' + USER) +# GLOBAL PARAMS +DATA_DIR = str(os.environ["DATA_DIR"]) +USER = "admin" # 'mlexchange-team' move to env file +OUTPUT_DIR = pathlib.Path("data/mlexchange_store/" + USER) UPLOAD_FOLDER_ROOT = "data/upload" PREFECT_TAGS = json.loads(os.getenv("PREFECT_TAGS", '["latent-space-explorer"]')) TIMEZONE = os.getenv("TIMEZONE", "US/Pacific") @@ -71,78 +69,86 @@ } - @app.callback( - Output('additional-model-params', 'children'), - Output('model_id', 'data'), - Input('algo-dropdown', 'value') + Output("additional-model-params", "children"), + Output("model_id", "data"), + Input("algo-dropdown", "value"), ) def show_dimension_reduction_gui_layouts(selected_algo): - ''' + """ This callback display dropdown menu in the frontend for different dimension reduction algos Args: selected_algo: Selected dimension reduction algorithm Returns: item_list: dropdown menu html code model_uid: selected algo's uid - ''' - data = requests.get('http://content-api:8000/api/v0/models').json() # all model - - if selected_algo == 'PCA': - conditions = {'name': 'PCA'} - elif selected_algo == 'UMAP': - conditions = {'name': 'UMAP'} - - model = [d for d in data if all((k in d and d[k] == v) for k, v in conditions.items())] # filter pca or umap - model_uid = model[0]['content_id'] - new_model = remove_key_from_dict_list(model[0]["gui_parameters"], 'comp_group') - - item_list = JSONParameterEditor(_id={'type': str(uuid.uuid4())}, - json_blob=new_model, + """ + data = requests.get("http://content-api:8000/api/v0/models").json() # all model + + if selected_algo == "PCA": + conditions = {"name": "PCA"} + elif selected_algo == "UMAP": + conditions = {"name": "UMAP"} + + model = [ + d for d in data if all((k in d and d[k] == v) for k, v in conditions.items()) + ] # filter pca or umap + model_uid = model[0]["content_id"] + new_model = remove_key_from_dict_list(model[0]["gui_parameters"], "comp_group") + + item_list = JSONParameterEditor( + _id={"type": str(uuid.uuid4())}, + json_blob=new_model, ) item_list.init_callbacks(app) - + return item_list, model_uid + @app.callback( - Output('additional-cluster-params', 'children'), - Input('cluster-algo-dropdown', 'value'), + Output("additional-cluster-params", "children"), + Input("cluster-algo-dropdown", "value"), ) def show_clustering_gui_layouts(selected_algo): - ''' + """ This callback display drop down menu in the fronend for different clustering algos Args: selected_algo: selected clustering algorithm Returns: item_list: dropdown menu html code - ''' - if selected_algo == 'KMeans': + """ + if selected_algo == "KMeans": kwargs = kmeans_kwargs - elif selected_algo == 'DBSCAN': + elif selected_algo == "DBSCAN": kwargs = dbscan_kwargs - elif selected_algo == 'HDBSCAN': + elif selected_algo == "HDBSCAN": kwargs = hdbscan_kwargs - - item_list = JSONParameterEditor(_id={'type': str(uuid.uuid4())}, - json_blob=kwargs["gui_parameters"]) + + item_list = JSONParameterEditor( + _id={"type": str(uuid.uuid4())}, json_blob=kwargs["gui_parameters"] + ) item_list.init_callbacks(app) return item_list + @app.callback( [ - Output('input_labels', 'data'), - Output('label_schema', 'data'), - Output('label-dropdown', 'options'), - # Output('user-upload-data-dir', 'data'), + Output("input_labels", "data"), + Output("label_schema", "data"), + Output("label-dropdown", "options"), ], [ - Input('example-dataset-selection', 'value'), # example dataset - Input({'base_id': 'file-manager', 'name': 'data-project-dict'},'data'), # FM dataset - Input('feature-vector-model-list', 'value'), # data clinic dataset - ] + Input("example-dataset-selection", "value"), # example dataset + Input( + {"base_id": "file-manager", "name": "data-project-dict"}, "data" + ), # FM dataset + Input("feature-vector-model-list", "value"), # data clinic dataset + ], ) -def update_data_n_label_schema(selected_example_dataset, data_project_dict, data_clinic_file_path): - ''' +def update_data_n_label_schema( + selected_example_dataset, data_project_dict, data_clinic_file_path +): + """ This callback updates the selected dataset from the provided example datasets, as well as labels, and label schema Args: example-dataset-selection: selected dataset from the provided example datasets, not the one that user uploaded @@ -153,79 +159,82 @@ def update_data_n_label_schema(selected_example_dataset, data_project_dict, data label_schema: the text of each unique label label_dropdown: label dropdown options user_upload_data_dir: dir name for the user uploaded zip file - ''' + """ labels = None label_schema = {} # check if user is using user uploaded zip file or example dataset or data clinic file # priority level: FileManage > DataClinic > Example Datasets - # FileManager - user uploaded zip file of images - # data_project = DataProject() - # data_project.init_from_dict(upload_file_paths) - data_project = DataProject.from_dict(data_project_dict) options = [] - #user_upload_data_dir = None + # user_upload_data_dir = None if len(data_project.datasets) > 0: labels = np.full((len(data_project.datasets),), -1) - # user_upload_data_dir = os.path.dirname(data_project_dict[0]['uri']) # DataClinic options elif data_clinic_file_path is not None: df = pd.read_parquet(data_clinic_file_path) - # data = df.values labels = np.full((df.shape[0],), -1) # Example dataset option 1 elif selected_example_dataset == "data/example_shapes/Demoshapes.npz": - # data = np.load("/app/work/" + selected_example_dataset)['arr_0'] labels = np.load("/app/work/data/example_shapes/DemoLabels.npy") f = open("/app/work/data/example_shapes/label_schema.json") label_schema = json.load(f) # Example dataset option 2 - elif selected_example_dataset == "data/example_latentrepresentation/f_vectors.parquet": + elif ( + selected_example_dataset + == "data/example_latentrepresentation/f_vectors.parquet" + ): df = pd.read_parquet("/app/work/" + selected_example_dataset) - # data = df.values labels = np.full((df.shape[0],), -1) + if label_schema: + options = [ + {"label": f"Label {label}", "value": label} for label in label_schema + ] + options.insert(0, {"label": "Unlabeled", "value": -1}) + options.insert(0, {"label": "All", "value": -2}) - if label_schema: - options = [{'label': f'Label {label}', 'value': label} for label in label_schema] - options.insert(0, {'label': 'Unlabeled', 'value': -1}) - options.insert(0, {'label': 'All', 'value': -2}) + return labels, label_schema, options - return labels, label_schema, options #, user_upload_data_dir @app.callback( [ # flag the read variable - Output('experiment-id', 'data'), + Output("experiment-id", "data"), # reset scatter plot control panel - Output('scatter-color', 'value'), - Output('cluster-dropdown', 'value'), - Output('label-dropdown', 'value'), + Output("scatter-color", "value"), + Output("cluster-dropdown", "value"), + Output("label-dropdown", "value"), # reset heatmap - Output('heatmap', 'figure', allow_duplicate=True), + Output("heatmap", "figure", allow_duplicate=True), # reset interval value to - Output('interval-component', 'max_intervals'), + Output("interval-component", "max_intervals"), ], - Input('run-algo', 'n_clicks'), + Input("run-algo", "n_clicks"), [ - State('job-name', 'value'), # job_name - State('example-dataset-selection', 'value'), # 2 example dataset - # State('user-upload-data-dir', 'data'), # FM - State('feature-vector-model-list', 'value'), # DataClinic - State('model_id', 'data'), - State('algo-dropdown', 'value'), - State('additional-model-params', 'children'), - State({"base_id": "file-manager", "name": "data-project-dict"}, "data") # DataProject for FM - + State("job-name", "value"), # job_name + State("example-dataset-selection", "value"), # 2 example dataset + State("feature-vector-model-list", "value"), # DataClinic + State("model_id", "data"), + State("algo-dropdown", "value"), + State("additional-model-params", "children"), + State( + {"base_id": "file-manager", "name": "data-project-dict"}, "data" + ), # DataProject for FM ], - prevent_initial_call=True + prevent_initial_call=True, ) -def submit_dimension_reduction_job(submit_n_clicks, - job_name, - selected_example_dataset, data_clinic_file_path, - model_id, selected_algo, children, data_project_dict): +def submit_dimension_reduction_job( + submit_n_clicks, + job_name, + selected_example_dataset, + data_clinic_file_path, + model_id, + selected_algo, + children, + data_project_dict, +): """ This callback is triggered every time the Submit button is hit: - compute latent vectors, which will be saved in data/output/experiment_id @@ -248,84 +257,87 @@ def submit_dimension_reduction_job(submit_n_clicks, """ if not submit_n_clicks: raise PreventUpdate - if not selected_example_dataset and not data_project_dict and not data_clinic_file_path: + if ( + not selected_example_dataset + and not data_project_dict + and not data_clinic_file_path + ): raise PreventUpdate input_params = {} if children: - for child in children['props']['children']: - key = child["props"]["children"][1]["props"]["id"]["param_key"] + for child in children["props"]["children"]: + key = child["props"]["children"][1]["props"]["id"]["param_key"] value = child["props"]["children"][1]["props"]["value"] input_params[key] = value print("Dimension reduction algo params: ", input_params, flush=True) - - # check if user is using user uploaded zip file or example dataset or data clinic file + + # check if user is using user uploaded zip file or example dataset or data clinic file data_project = DataProject.from_dict(data_project_dict) if len(data_project.datasets) > 0: print("FM", flush=True) data_project = DataProject.from_dict(data_project_dict) - io_parameters = {"data_uris": [dataset.uri for dataset in data_project.datasets], - "data_tiled_api_key": data_project.api_key, - "data_type": data_project.data_type, - "root_uri": data_project.root_uri, - } - - # elif data_clinic_file_path is not None: - # selected_dataset = data_clinic_file_path + io_parameters = { + "data_uris": [dataset.uri for dataset in data_project.datasets], + "data_tiled_api_key": data_project.api_key, + "data_type": data_project.data_type, + "root_uri": data_project.root_uri, + } + else: print("selected_example_dataset: " + selected_example_dataset, flush=True) - io_parameters = {"data_uris": [selected_example_dataset], - "data_tiled_api_key": None, - "data_type": "file", - "root_uri": None, - } + io_parameters = { + "data_uris": [selected_example_dataset], + "data_tiled_api_key": None, + "data_type": "file", + "root_uri": None, + } # prefect current_time = datetime.now(pytz.timezone(TIMEZONE)).strftime("%Y/%m/%d %H:%M:%S") - if not job_name: job_name = "test0" + if not job_name: + job_name = "test0" job_name += " " + str(current_time) # project_name = selected_dataset.split("/")[-1] # name of the dataset, get it from FM ## this is an issue project_name = "fake_name" print(PREFECT_TAGS, flush=True) - - # create user directory to store users data/experiments - # experiment_id = str(uuid.uuid4()) - # output_path = OUTPUT_DIR / experiment_id - # output_path.mkdir(parents=True, exist_ok=True) # check which dimension reduction algo, then compose command - if selected_algo == 'PCA': + if selected_algo == "PCA": TRAIN_PARAMS_EXAMPLE["params_list"][0]["command"] = "python pca_run.py" - elif selected_algo == 'UMAP': + elif selected_algo == "UMAP": TRAIN_PARAMS_EXAMPLE["params_list"][0]["command"] = "python umap_run.py" - + TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["io_parameters"] = io_parameters - TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["io_parameters"]["output_dir"] = str(OUTPUT_DIR) + TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["io_parameters"]["output_dir"] = ( + str(OUTPUT_DIR) + ) TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["io_parameters"]["uid_save"] = "" TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["model_parameters"] = input_params print(TRAIN_PARAMS_EXAMPLE) # run prefect job, job_uid is the new experiment id -> uid_save in the pca_example.yaml file job_uid = schedule_prefect_flow( - FLOW_NAME, - parameters=TRAIN_PARAMS_EXAMPLE, - flow_run_name=f"{job_name} {current_time}", - tags=PREFECT_TAGS + ["train", project_name], - ) + FLOW_NAME, + parameters=TRAIN_PARAMS_EXAMPLE, + flow_run_name=f"{job_name} {current_time}", + tags=PREFECT_TAGS + ["train", project_name], + ) job_message = f"Job has been succesfully submitted with uid: {job_uid}." print(job_message, flush=True) - - return job_uid, 'cluster', -1, -2, go.Figure(go.Heatmap()), -1 + + return job_uid, "cluster", -1, -2, go.Figure(go.Heatmap()), -1 + @app.callback( - [ - Output('latent_vectors', 'data'), - Output('interval-component', 'max_intervals', allow_duplicate=True), + [ + Output("latent_vectors", "data"), + Output("interval-component", "max_intervals", allow_duplicate=True), ], - Input('interval-component', 'n_intervals'), - State('experiment-id', 'data'), - State('interval-component', 'max_intervals'), - prevent_initial_call=True + Input("interval-component", "n_intervals"), + State("experiment-id", "data"), + State("interval-component", "max_intervals"), + prevent_initial_call=True, ) def read_latent_vectors(n_intervals, experiment_id, max_intervals): """ @@ -342,37 +354,39 @@ def read_latent_vectors(n_intervals, experiment_id, max_intervals): """ if experiment_id is None or n_intervals == 0 or max_intervals == 0: raise PreventUpdate - + children_flows = get_children_flow_run_ids(experiment_id) print("child flow") print(children_flows) - #read the latent vectors from the output dir + # read the latent vectors from the output dir output_path = OUTPUT_DIR / children_flows[0] - npz_files = list(output_path.glob('*.npy')) - if len(npz_files) > 0 : - lv_filepath = npz_files[0] # latent vector file path + npz_files = list(output_path.glob("*.npy")) + if len(npz_files) > 0: + lv_filepath = npz_files[0] # latent vector file path latent_vectors = np.load(str(lv_filepath)) print("latent vector", latent_vectors.shape) return latent_vectors, 0 else: return None, -1 - + + @app.callback( [ - Output('clusters', 'data'), - Output('cluster-dropdown', 'options'), + Output("clusters", "data"), + Output("cluster-dropdown", "options"), ], - Input('run-cluster-algo', 'n_clicks'), + Input("run-cluster-algo", "n_clicks"), [ - State('latent_vectors', 'data'), - State('cluster-algo-dropdown', 'value'), - State('additional-cluster-params', 'children'), - State('experiment-id', 'data'), - ] + State("latent_vectors", "data"), + State("cluster-algo-dropdown", "value"), + State("additional-cluster-params", "children"), + State("experiment-id", "data"), + ], ) -def apply_clustering(apply_n_clicks, - latent_vectors, selected_algo, children, experiment_id): +def apply_clustering( + apply_n_clicks, latent_vectors, selected_algo, children, experiment_id +): """ This callback is triggered by click the 'Apply' button at the clustering panel: - apply cluster @@ -386,58 +400,72 @@ def apply_clustering(apply_n_clicks, Returns: clusters: clustering result for each data point """ - ## TODO: pop up a widow to ask user to first run diemnsion reduction then apply + # TODO: pop up a widow to ask user to first run diemnsion reduction then apply if apply_n_clicks == 0 or experiment_id is None: raise PreventUpdate latent_vectors = np.array(latent_vectors) input_params = {} if children: - for child in children['props']['children']: - key = child["props"]["children"][1]["props"]["id"]["param_key"] + for child in children["props"]["children"]: + key = child["props"]["children"][1]["props"]["id"]["param_key"] value = child["props"]["children"][1]["props"]["value"] input_params[key] = value print("Clustering params:", input_params) - + if selected_algo == "KMeans": - obj = MiniBatchKMeans(n_clusters=input_params['n_clusters']) + obj = MiniBatchKMeans(n_clusters=input_params["n_clusters"]) elif selected_algo == "DBSCAN": - obj = DBSCAN(eps=input_params['eps'], min_samples=input_params['min_samples']) + obj = DBSCAN(eps=input_params["eps"], min_samples=input_params["min_samples"]) elif selected_algo == "HDBSCAN": - obj = HDBSCAN(min_cluster_size=input_params['min_cluster_size']) + obj = HDBSCAN(min_cluster_size=input_params["min_cluster_size"]) clusters, options = None, None if obj: clusters = obj.fit_predict(latent_vectors) output_path = OUTPUT_DIR / experiment_id - np.save(output_path/'clusters.npy', clusters) + np.save(output_path / "clusters.npy", clusters) unique_clusters = np.unique(clusters) - options = [{'label': f'Cluster {cluster}', 'value': cluster} for cluster in unique_clusters if cluster != -1] - options.insert(0, {'label': 'All', 'value': -1}) + options = [ + {"label": f"Cluster {cluster}", "value": cluster} + for cluster in unique_clusters + if cluster != -1 + ] + options.insert(0, {"label": "All", "value": -1}) return clusters, options + @app.callback( - Output('scatter', 'figure'), + Output("scatter", "figure"), [ - Input('latent_vectors', 'data'), - Input('cluster-dropdown', 'value'), - Input('label-dropdown', 'value'), - Input('scatter-color', 'value'), - Input('clusters', 'data'), #move clusters to the input + Input("latent_vectors", "data"), + Input("cluster-dropdown", "value"), + Input("label-dropdown", "value"), + Input("scatter-color", "value"), + Input("clusters", "data"), # move clusters to the input ], [ - State('scatter', 'figure'), - State('scatter', 'selectedData'), - State('additional-model-params', 'children'), - - State('input_labels', 'data'), - State('label_schema', 'data'), - ] + State("scatter", "figure"), + State("scatter", "selectedData"), + State("additional-model-params", "children"), + State("input_labels", "data"), + State("label_schema", "data"), + ], ) -def update_scatter_plot(latent_vectors, selected_cluster, selected_label, scatter_color, clusters, - current_figure, selected_data, children, labels, label_names): - ''' +def update_scatter_plot( + latent_vectors, + selected_cluster, + selected_label, + scatter_color, + clusters, + current_figure, + selected_data, + children, + labels, + label_names, +): + """ This callback update the scater plot Args: latent_vectors: data from dimension reduction algos @@ -452,48 +480,56 @@ def update_scatter_plot(latent_vectors, selected_cluster, selected_label, scatte label_names: same as label_schema defined earlier Returns: fig: updated scatter figure - ''' + """ if latent_vectors is None or children is None: raise PreventUpdate latent_vectors = np.array(latent_vectors) print("latent vector shape:", latent_vectors.shape) - n_components = children['props']['children'][0]["props"]["children"][1]["props"]["value"] + n_components = children["props"]["children"][0]["props"]["children"][1]["props"][ + "value" + ] - # if selected_data is not None and len(selected_data.get('points', [])) > 0: - # selected_indices = [point['customdata'][0] for point in selected_data['points']] - if selected_data is not None and len(selected_data.get('points', [])) > 0: + if selected_data is not None and len(selected_data.get("points", [])) > 0: selected_indices = [] - for point in selected_data['points']: - if 'customdata' in point and len(point['customdata']): - selected_indices.append(point['customdata'][0]) + for point in selected_data["points"]: + if "customdata" in point and len(point["customdata"]): + selected_indices.append(point["customdata"][0]) print("selected indices: ", selected_indices) else: selected_indices = None - - if not clusters: # when clusters is None, i.e., after submit dimension reduction but before apply clustering + + if ( + not clusters + ): # when clusters is None, i.e., after submit dimension reduction but before apply clustering clusters = [-1 for i in range(latent_vectors.shape[0])] cluster_names = {a: a for a in np.unique(clusters).astype(int)} - - scatter_data = generate_scatter_data(latent_vectors, - n_components, - selected_cluster, - clusters, - cluster_names, - selected_label, - labels, - label_names, - scatter_color) + + scatter_data = generate_scatter_data( + latent_vectors, + n_components, + selected_cluster, + clusters, + cluster_names, + selected_label, + labels, + label_names, + scatter_color, + ) fig = go.Figure(scatter_data) fig.update_layout(legend=dict(tracegroupgap=20)) - if current_figure and 'xaxis' in current_figure['layout'] and 'yaxis' in current_figure[ - 'layout'] and 'autorange' in current_figure['layout']['xaxis'] and current_figure['layout']['xaxis'][ - 'autorange'] is False: + if ( + current_figure + and "xaxis" in current_figure["layout"] + and "yaxis" in current_figure["layout"] + and "autorange" in current_figure["layout"]["xaxis"] + and current_figure["layout"]["xaxis"]["autorange"] is False + ): # Update the axis range with current figure's values if available and if autorange is False - fig.update_xaxes(range=current_figure['layout']['xaxis']['range']) - fig.update_yaxes(range=current_figure['layout']['yaxis']['range']) + fig.update_xaxes(range=current_figure["layout"]["xaxis"]["range"]) + fig.update_yaxes(range=current_figure["layout"]["yaxis"]["range"]) else: # If it's the initial figure or autorange is True, set autorange to True to fit all points in view fig.update_xaxes(autorange=True) @@ -503,29 +539,38 @@ def update_scatter_plot(latent_vectors, selected_cluster, selected_label, scatte # Use the selected indices to highlight the selected points in the updated figure for trace in fig.data: if trace.marker.color is not None: - trace.marker.color = [hex_to_rgba('grey', 0.3) if i not in selected_indices else 'red' for i in - range(len(trace.marker.color))] + trace.marker.color = [ + hex_to_rgba("grey", 0.3) if i not in selected_indices else "red" + for i in range(len(trace.marker.color)) + ] return fig + @app.callback( - Output('heatmap', 'figure', allow_duplicate=True), + Output("heatmap", "figure", allow_duplicate=True), [ - Input('scatter', 'clickData'), - Input('scatter', 'selectedData'), - Input('mean-std-toggle', 'value'), + Input("scatter", "clickData"), + Input("scatter", "selectedData"), + Input("mean-std-toggle", "value"), ], [ - State('example-dataset-selection', 'value'), # example dataset - State({"base_id": "file-manager", "name": "data-project-dict"}, "data"), # DataProject for FM - State('feature-vector-model-list', 'value'), # data clinic dataset - + State("example-dataset-selection", "value"), # example dataset + State( + {"base_id": "file-manager", "name": "data-project-dict"}, "data" + ), # DataProject for FM + State("feature-vector-model-list", "value"), # data clinic dataset ], - prevent_initial_call=True - + prevent_initial_call=True, ) -def update_heatmap(click_data, selected_data, display_option, - selected_example_dataset, data_project_dict, data_clinic_file_path): - ''' +def update_heatmap( + click_data, + selected_data, + display_option, + selected_example_dataset, + data_project_dict, + data_clinic_file_path, +): + """ This callback update the heatmap Args: click_data: clicked data on scatter figure @@ -533,117 +578,135 @@ def update_heatmap(click_data, selected_data, display_option, display_option: option to display mean or std Returns: fig: updated heatmap - ''' - if not selected_example_dataset and not data_project_dict and not data_clinic_file_path: + """ + if ( + not selected_example_dataset + and not data_project_dict + and not data_clinic_file_path + ): raise PreventUpdate - + # user select a group of points - if selected_data is not None and len(selected_data['points']) > 0: - selected_indices = [point['customdata'][0] for point in selected_data['points']] # Access customdata for the original indices + if selected_data is not None and len(selected_data["points"]) > 0: + selected_indices = [ + point["customdata"][0] for point in selected_data["points"] + ] # Access customdata for the original indices print("selected_indices", selected_indices) - - ### FileManager + + # FileManager # print("upload_file_paths") # if not selected, its an empty list not None selected_images = [] data_project = DataProject.from_dict(data_project_dict) if len(data_project.datasets) > 0: print("FM file") - selected_images, _ = data_project.read(selected_indices, export='pillow') - ### DataClinic + selected_images, _ = data_project.read(selected_indices, export="pillow") + # DataClinic elif data_clinic_file_path is not None: print("data_clinic_file_path") print(data_clinic_file_path) directory_path = os.path.dirname(data_clinic_file_path) selected_images = load_images_by_indices(directory_path, selected_indices) - ### Example dataset + # Example dataset elif selected_example_dataset == "data/example_shapes/Demoshapes.npz": print("Demoshapes.npz") - selected_images = np.load("/app/work/" + selected_example_dataset)['arr_0'][selected_indices] + selected_images = np.load("/app/work/" + selected_example_dataset)["arr_0"][ + selected_indices + ] print(selected_images.shape) - elif selected_example_dataset == "data/example_latentrepresentation/f_vectors.parquet": + elif ( + selected_example_dataset + == "data/example_latentrepresentation/f_vectors.parquet" + ): print("f_vectors.parque") df = pd.read_parquet("/app/work/" + selected_example_dataset) selected_images = df.iloc[selected_indices].values selected_images = np.array(selected_images) print("selected_images shape:", selected_images.shape) - + # display options - if display_option == 'mean': + if display_option == "mean": heatmap_data = go.Heatmap(z=np.mean(selected_images, axis=0)) - elif display_option == 'sigma': + elif display_option == "sigma": heatmap_data = go.Heatmap(z=np.std(selected_images, axis=0)) - elif click_data is not None and len(click_data['points']) > 0: - selected_index = click_data['points'][0]['customdata'][0] - ### FileManager + elif click_data is not None and len(click_data["points"]) > 0: + selected_index = click_data["points"][0]["customdata"][0] + # FileManager data_project = DataProject.from_dict(data_project_dict) if len(data_project.datasets) > 0: - selected_images, _ = data_project.read([selected_index], export='pillow') - ### DataClinic + selected_images, _ = data_project.read([selected_index], export="pillow") + # DataClinic elif data_clinic_file_path is not None: directory_path = os.path.dirname(data_clinic_file_path) clicked_image = load_images_by_indices(directory_path, [selected_index]) - ### Example dataset + # Example dataset elif selected_example_dataset == "data/example_shapes/Demoshapes.npz": - clicked_image = np.load("/app/work/" + selected_example_dataset)['arr_0'][selected_index] - elif selected_example_dataset == "data/example_latentrepresentation/f_vectors.parquet": + clicked_image = np.load("/app/work/" + selected_example_dataset)["arr_0"][ + selected_index + ] + elif ( + selected_example_dataset + == "data/example_latentrepresentation/f_vectors.parquet" + ): df = pd.read_parquet("/app/work/" + selected_example_dataset) clicked_image = df.iloc[selected_index].values clicked_image = np.array(clicked_image) - + heatmap_data = go.Heatmap(z=clicked_image) else: heatmap_data = go.Heatmap() # only update heat map when the input data is 2d images, do not update for input latent vectors - if heatmap_data['z'] is None or len(np.shape(heatmap_data['z'])) < 2: + if heatmap_data["z"] is None or len(np.shape(heatmap_data["z"])) < 2: raise PreventUpdate - + # Determine the aspect ratio based on the shape of the heatmap_data's z-values aspect_x = 1 aspect_y = 1 - if heatmap_data['z'] is not None: - if heatmap_data['z'].size > 0: - print(np.shape(heatmap_data['z'])) - aspect_y, aspect_x = np.shape(heatmap_data['z'])[-2:] + if heatmap_data["z"] is not None: + if heatmap_data["z"].size > 0: + print(np.shape(heatmap_data["z"])) + aspect_y, aspect_x = np.shape(heatmap_data["z"])[-2:] return go.Figure( data=heatmap_data, layout=dict( autosize=True, yaxis=dict(scaleanchor="x", scaleratio=aspect_y / aspect_x), - ) + ), ) + @app.callback( - Output('stats-div', 'children'), - Input('scatter', 'selectedData'), + Output("stats-div", "children"), + Input("scatter", "selectedData"), [ - State('clusters', 'data'), - State('input_labels', 'data'), - State('label_schema', 'data'), - ] + State("clusters", "data"), + State("input_labels", "data"), + State("label_schema", "data"), + ], ) def update_statistics(selected_data, clusters, assigned_labels, label_names): - ''' + """ This callback update the statistics panel Args: selected_data: lasso or rect selected data points on scatter figure clusters: clusters for latent vectors assigned_labels: labels for each latent vector - label_names: same as label schema + label_names: same as label schema Returns: [num_images, clusters, labels]: statistics - ''' - + """ + assigned_labels = np.array(assigned_labels) - if selected_data is not None and len(selected_data['points']) > 0: - selected_indices = [point['customdata'][0] for point in - selected_data['points']] # Access customdata for the original indices + if selected_data is not None and len(selected_data["points"]) > 0: + selected_indices = [ + point["customdata"][0] for point in selected_data["points"] + ] # Access customdata for the original indices selected_clusters = [] if clusters is not None: clusters = np.array(clusters) @@ -657,7 +720,9 @@ def update_statistics(selected_data, clusters, assigned_labels, label_names): # Format the clusters and labels as comma-separated strings clusters_str = ", ".join(str(cluster) for cluster in unique_clusters) label_int_to_str_map = {val: key for key, val in label_names.items()} - labels_str = ", ".join(str(label_int_to_str_map[label]) for label in unique_labels if label >= 0) + labels_str = ", ".join( + str(label_int_to_str_map[label]) for label in unique_labels if label >= 0 + ) else: num_images = 0 clusters_str = "N/A" @@ -671,68 +736,10 @@ def update_statistics(selected_data, clusters, assigned_labels, label_names): f"Labels represented: {labels_str}", ] -# @app.callback( -# [Output("modal", "is_open"), Output("modal-body", "children")], -# [ -# Input('run-algo', 'n_clicks'), -# Input('run-cluster-algo', 'n_clicks'), -# ], -# [ -# State("modal", "is_open"), -# State('example-dataset-selection', 'value'), -# State('user-upload-data-dir', 'data'), -# State('feature-vector-model-list', 'value'), -# ] -# ) -# def toggle_modal(n_submit, n_apply, -# is_open, selected_example_dataset, user_upload_data_dir, data_clinic_file_path): -# ''' -# This callback pop up a window to remind user to follow this flow: -# select dataset -> Submit dimension reduction job -> Apply clustering -# Args: -# n_submit (int): Number of clicks on the 'Submit' button. -# n_apply (int): Number of clicks on the 'Apply' button. -# is_open (bool): Current state of the modal window (open/closed). -# input_data (list): User selected data -# Returns: -# is_open (bool): New state of the modal window. -# modal_body_text (str): Text to be displayed in the modal body. -# ''' -# at_least_one_dataset_selected = False -# if selected_example_dataset or user_upload_data_dir or data_clinic_file_path: -# at_least_one_dataset_selected = True - -# if ((n_submit and not at_least_one_dataset_selected) or -# (n_apply and not at_least_one_dataset_selected)): -# return True, "Please select an example dataset or upload your own zipped dataset or choose DataClinic outpu." -# elif n_apply and n_submit is None: -# return True, "Please select a dimension reduction algorithm and click 'Submit' button before clustering." - -# return False, "No alert." - - -# @app.callback( -# Output('feature-vector-model-list', 'options'), -# Input('interval-component', 'n_intervals'), -# ) -# def update_trained_model_list(interval): -# ''' -# This callback updates the list of trained models -# Args: -# tab_value: Tab option -# prob_refresh_n_clicks: Button to refresh the list of probability-based trained models -# similarity_refresh_n_clicks: Button to refresh the list of similarity-based trained models -# Returns: -# prob_model_list: List of trained models in mlcoach -# similarity_model_list: List of trained models in data clinic and mlcoach -# ''' -# data_clinic_models = get_trained_models_list(USER, 'data_clinic') -# ml_coach_models = get_trained_models_list(USER, 'mlcoach') -# feature_vector_models = data_clinic_models + ml_coach_models -# #print(feature_vector_models) - -# return feature_vector_models - - -if __name__ == '__main__': - app.run_server(debug=True, host='0.0.0.0', port=8070, ) + +if __name__ == "__main__": + app.run_server( + debug=True, + host="0.0.0.0", + port=8070, + ) From 7105ae392ba5c1f51a2b73bdf74e12789f6c786e Mon Sep 17 00:00:00 2001 From: taxe10 Date: Sun, 24 Mar 2024 12:09:51 -0700 Subject: [PATCH 34/62] replacing env for example and updating gitignore --- .env | 31 ------------------------------- .env.example | 8 ++++++++ .gitignore | 2 +- 3 files changed, 9 insertions(+), 32 deletions(-) delete mode 100644 .env create mode 100644 .env.example diff --git a/.env b/.env deleted file mode 100644 index 4ea6411..0000000 --- a/.env +++ /dev/null @@ -1,31 +0,0 @@ -USER = admin - -TILED_SINGLE_USER_API_KEY= - -PREFECT_DB_PW=unique_password -PREFECT_DB_USER=prefect_user -PREFECT_DB_NAME=prefect -PREFECT_DB_SERVER=prefect_db -PREFECT_API_URL=http://prefect:4200/api -FLOW_NAME="Parent flow/launch_parent_flow" -TIMEZONE="US/Pacific" - -PREFECT_TAGS='["latent-space-explorer"]' - -TILED_DB_PW= -TILED_DB_USER=tiled_user -TILED_DB_NAME=tiled -TILED_DB_SERVER=tiled_db - -TILED_SINGLE_USER_API_KEY= - - -MLEX_SEGM_USER=mlex_segm_user -MLEX_SEGM_PW= - -TILED_API_KEY= - -TILED_INGEST_TILED_CONFIG_PATH=/deploy/config -TILED_INGEST_RMQ_HOST=rabbitmq -TILED_INGEST_RMQ_USER=guest -TILED_INGEST_RMQ_PW=guest \ No newline at end of file diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..269e4d2 --- /dev/null +++ b/.env.example @@ -0,0 +1,8 @@ +USER=admin + +PREFECT_API_URL=http://prefect:4200/api +FLOW_NAME="Parent flow/launch_parent_flow" +TIMEZONE="US/Pacific" +PREFECT_TAGS='["latent-space-explorer"]' + +TILED_API_KEY= diff --git a/.gitignore b/.gitignore index 6968502..57516b2 100644 --- a/.gitignore +++ b/.gitignore @@ -128,7 +128,7 @@ celerybeat.pid *.sage.py # Environments -# .env +.env .venv env/ venv/ From 4b471fdbba2081b531c53cb04968908c2e9adc1f Mon Sep 17 00:00:00 2001 From: taxe10 Date: Sun, 24 Mar 2024 12:38:54 -0700 Subject: [PATCH 35/62] removed modal duplicate --- src/app_layout.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/app_layout.py b/src/app_layout.py index 702128d..a1e7f3b 100644 --- a/src/app_layout.py +++ b/src/app_layout.py @@ -361,6 +361,5 @@ dbc.Row(dbc.Col(meta)), ] ), - modal, ] ) From 9b44de0255232ce7c52ffa17b5f649236d136044 Mon Sep 17 00:00:00 2001 From: taxe10 Date: Sun, 24 Mar 2024 12:43:22 -0700 Subject: [PATCH 36/62] removed unused packages from Dockerfile and added data directory env var --- .env.example | 1 + docker-compose.yml | 8 +++---- docker/Dockerfile | 50 +++-------------------------------------- docker/requirements.txt | 4 +--- 4 files changed, 8 insertions(+), 55 deletions(-) diff --git a/.env.example b/.env.example index 269e4d2..8b37080 100644 --- a/.env.example +++ b/.env.example @@ -6,3 +6,4 @@ TIMEZONE="US/Pacific" PREFECT_TAGS='["latent-space-explorer"]' TILED_API_KEY= +DATA_DIR=$PWD/data diff --git a/docker-compose.yml b/docker-compose.yml index 75a44f5..61c05cc 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,23 +10,21 @@ services: dockerfile: "docker/Dockerfile" mem_limit: 2g environment: - DATA_DIR: "${PWD}/data/" + DATA_DIR: "${DATA_DIR}" PREFECT_TAGS: "${PREFECT_TAGS}" PREFECT_API_URL: '${PREFECT_API_URL}' FLOW_NAME: '${FLOW_NAME}' TIMEZONE: "${TIMEZONE}" USER: "${USER}" volumes: - - ./data:/app/work/data + - $DATA_DIR:/app/work/data - ./src:/app/work/src - ../mlex_file_manager/file_manager:/app/work/src/file_manager ports: - - "8070:8070" + - "127.0.0.1:8070:8070" networks: mlex_mle_net: networks: mlex_mle_net: external: true - -# env file: set up pwd \ No newline at end of file diff --git a/docker/Dockerfile b/docker/Dockerfile index d57f623..87d8d55 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,61 +1,17 @@ -# FROM python:3.9 -# LABEL maintainer="THE MLEXCHANGE TEAM" - -# RUN ls -# COPY docker/requirements.txt requirements.txt - -# RUN apt-get update && apt-get install -y \ -# build-essential \ -# wget \ -# python3-pip\ -# ffmpeg\ -# libsm6\ -# libxext6 - -# RUN pip3 install --upgrade pip &&\ -# pip3 install --timeout=2000 -r requirements.txt\ -# pip install git+https://github.com/taxe10/mlex_file_manager - -# RUN git clone https://github.com/mlexchange/mlex_dash_component_editor - -# # EXPOSE 8000 - -# WORKDIR /app/work -# ENV HOME /app/work -# COPY src src -# # ENV PYTHONUNBUFFERED=1 -# RUN mv /mlex_dash_component_editor/src/dash_component_editor.py /app/work/src/dash_component_editor.py - -# CMD ["bash"] -# #CMD python3 src/frontend.py -# CMD sleep 3600 - FROM python:3.9 LABEL maintainer="THE MLEXCHANGE TEAM" RUN ls COPY docker/requirements.txt requirements.txt -RUN apt-get update && apt-get install -y \ - build-essential \ - wget \ - python3-pip\ - ffmpeg\ - libsm6\ - libxext6 - RUN pip3 install --upgrade pip &&\ - pip3 install --timeout=2000 -r requirements.txt\ - pip install git+https://github.com/taxe10/mlex_file_manager + pip3 install -r requirements.txt\ + pip install git+https://github.com/mlexchange/mlex_file_manager\ + pip install git+https://github.com/mlexchange/mlex_dash_component_editor -RUN git clone https://github.com/mlexchange/mlex_dash_component_editor WORKDIR /app/work ENV HOME /app/work COPY src src -RUN mv /mlex_dash_component_editor/src/dash_component_editor.py /app/work/src/dash_component_editor.py CMD ["bash"] -#CMD sleep 3600 CMD python3 src/frontend.py - - diff --git a/docker/requirements.txt b/docker/requirements.txt index 3861b96..658ca06 100644 --- a/docker/requirements.txt +++ b/docker/requirements.txt @@ -1,5 +1,4 @@ dash==2.9.3 -# dash_component_editor==0.0.7 dash-core-components==2.0.0 dash-bootstrap-components==1.0.2 dash-html-components==2.0.0 @@ -13,5 +12,4 @@ diskcache==5.6.3 pandas numpy Pillow -# prefect -prefect-client==2.14.21 \ No newline at end of file +prefect-client==2.14.21 From a982f0c61d855d463790dd6a98342fb7b7d14e1f Mon Sep 17 00:00:00 2001 From: taxe10 Date: Sun, 24 Mar 2024 12:44:00 -0700 Subject: [PATCH 37/62] removed dash_component_editor file --- src/dash_component_editor.py | 407 ----------------------------------- 1 file changed, 407 deletions(-) delete mode 100644 src/dash_component_editor.py diff --git a/src/dash_component_editor.py b/src/dash_component_editor.py deleted file mode 100644 index 181bae3..0000000 --- a/src/dash_component_editor.py +++ /dev/null @@ -1,407 +0,0 @@ -import re -from typing import Callable -# noinspection PyUnresolvedReferences -from inspect import signature, _empty - -from dash import html, dcc, dash_table, Input, Output, State, MATCH, ALL -import dash_bootstrap_components as dbc -import dash_daq as daq - -import base64 -#import PIL.Image -import io -#import plotly.express as px -# Procedural dash form generation - -""" -{'name', 'title', 'value', 'type', -""" - - -class SimpleItem(dbc.Col): - def __init__(self, - name, - base_id, - title=None, - param_key=None, - type='number', - debounce=True, - **kwargs): - - if param_key == None: - param_key = name - self.label = dbc.Label(title) - self.input = dbc.Input(type=type, - debounce=debounce, - id={**base_id, - 'name': name, - 'param_key': param_key}, - **kwargs) - - super(SimpleItem, self).__init__(children=[self.label, self.input]) - - -class FloatItem(SimpleItem): - pass - - -class IntItem(SimpleItem): - def __init__(self, *args, **kwargs): - if 'min' not in kwargs: - kwargs['min'] = -9007199254740991 - super(IntItem, self).__init__(*args, step=1, **kwargs) - - -class StrItem(SimpleItem): - def __init__(self, *args, **kwargs): - super(StrItem, self).__init__(*args, type='text', **kwargs) - - -class SliderItem(dbc.Col): - def __init__(self, - name, - base_id, - title=None, - param_key=None, - debounce=True, - visible=True, - **kwargs): - - if param_key == None: - param_key = name - self.label = dbc.Label(title) - self.input = dcc.Slider(id={**base_id, - 'name': name, - 'param_key': param_key, - 'layer': 'input'}, - tooltip={"placement": "bottom", "always_visible": True}, - **kwargs) - - style = {} - if not visible: - style['display'] = 'none' - - super(SliderItem, self).__init__(id={**base_id, - 'name': name, - 'param_key': param_key, - 'layer': 'form_group'}, - children=[self.label, self.input], - style=style) - - -class DropdownItem(dbc.Col): - def __init__(self, - name, - base_id, - title=None, - param_key=None, - debounce=True, - visible=True, - **kwargs): - - if param_key == None: - param_key = name - self.label = dbc.Label(title) - self.input = dcc.Dropdown(id={**base_id, - 'name': name, - 'param_key': param_key, - 'layer': 'input'}, - **kwargs) - - style = {} - if not visible: - style['display'] = 'none' - - super(DropdownItem, self).__init__(id={**base_id, - 'name': name, - 'param_key': param_key, - 'layer': 'form_group'}, - children=[self.label, self.input], - style=style) - - -class RadioItem(dbc.Col): - def __init__(self, - name, - base_id, - title=None, - param_key=None, - visible=True, - **kwargs): - - if param_key == None: - param_key = name - self.label = dbc.Label(title) - self.input = dbc.RadioItems(id={**base_id, - 'name': name, - 'param_key': param_key, - 'layer': 'input'}, - **kwargs) - - style = {} - if not visible: - style['display'] = 'none' - - super(RadioItem, self).__init__(id={**base_id, - 'name': name, - 'param_key': param_key, - 'layer': 'form_group'}, - children=[self.label, self.input], - style=style) - - -class BoolItem(dbc.Col): - def __init__(self, - name, - base_id, - title=None, - param_key=None, - visible=True, - **kwargs): - - if param_key == None: - param_key = name - self.label = dbc.Label(title) - self.input = daq.ToggleSwitch(id={**base_id, - 'name': name, - 'param_key': param_key, - 'layer': 'input'}, - **kwargs) - self.output_label = dbc.Label('False/True') - - style = {} - if not visible: - style['display'] = 'none' - - super(BoolItem, self).__init__(id={**base_id, - 'name': name, - 'param_key': param_key, - 'layer': 'form_group'}, - children=[self.label, self.input, self.output_label], - style=style) - - -class ImgItem(dbc.Col): - def __init__(self, - name, - src, - base_id, - title=None, - param_key=None, - width='100px', - visible=True, - **kwargs): - - if param_key == None: - param_key = name - - if not (width.endswith('px') or width.endswith('%')): - width = width + 'px' - - self.label = dbc.Label(title) - - encoded_image = base64.b64encode(open(src, 'rb').read()) - self.src = 'data:image/png;base64,{}'.format(encoded_image.decode()) - self.input_img = html.Img(id={**base_id, - 'name': name, - 'param_key': param_key, - 'layer': 'input'}, - src=self.src, - style={'height':'auto', 'width':width}, - **kwargs) - - style = {} - if not visible: - style['display'] = 'none' - - super(ImgItem, self).__init__(id={**base_id, - 'name': name, - 'param_key': param_key, - 'layer': 'form_group'}, - children=[self.label, self.input_img], - style=style) - - -# class GraphItem(dbc.Col): -# def __init__(self, -# name, -# base_id, -# title=None, -# param_key=None, -# visible=True, -# figure = None, -# **kwargs): -# -# self.name = name -# if param_key == None: -# param_key = name -# self.label = dbc.Label(title) -# self.input_graph = dcc.Graph(id={**base_id, -# 'name': name, -# 'param_key': param_key, -# 'layer': 'input'}, -# **kwargs) -# -# self.input_upload = dcc.Upload(id={**base_id, -# 'name': name+'_upload', -# 'param_key': param_key, -# 'layer': 'input'}, -# children=html.Div([ -# 'Drag and Drop or ', -# html.A('Select Files') -# ]), -# style={ -# 'width': '95%', -# 'height': '60px', -# 'lineHeight': '60px', -# 'borderWidth': '1px', -# 'borderStyle': 'dashed', -# 'borderRadius': '5px', -# 'textAlign': 'center', -# 'margin': '10px' -# }, -# multiple = False) -# -# style = {} -# if not visible: -# style['display'] = 'none' -# -# super(GraphItem, self).__init__(id={**base_id, -# 'name': name, -# 'param_key': param_key, -# 'layer': 'form_group'}, -# children=[self.label, self.input_upload, self.input_graph], -# style=style) -# -# # Issue: cannot get inputs from the callback decorator -# def return_upload(self, *args): -# print(f'before if, args {args}') -# if args: -# print(f'args {args}') -# img_bytes = base64.b64decode(contents.split(",")[1]) -# img = PIL.Image.open(io.BytesIO(img_bytes)) -# fig = px.imshow(img, binary_string=True) -# return fig -# -# def init_callbacks(self, app): -# app.callback(Output({**self.id, -# 'name': self.name, -# 'layer': 'input'}, 'figure', allow_duplicate=True), -# Input({**self.id, -# 'name': self.name+'_upload', -# 'layer': 'input'}, -# 'contents'), -# State({**self.id, -# 'name': self.name+'_upload', -# 'layer': 'input'}, 'last_modified'), -# State({**self.id, -# 'name': self.name+'_upload', -# 'layer': 'input'}, 'filename'), -# prevent_initial_call=True)(self.return_upload()) - - - -class ParameterEditor(dbc.Form): - - type_map = {float: FloatItem, - int: IntItem, - str: StrItem, - } - - def __init__(self, _id, parameters, **kwargs): - self._parameters = parameters - - super(ParameterEditor, self).__init__(id=_id, children=[], className='kwarg-editor', **kwargs) - self.children = self.build_children() - - def init_callbacks(self, app): - app.callback(Output(self.id, 'n_submit'), - Input({**self.id, - 'name': ALL}, - 'value'), - State(self.id, 'n_submit'), - ) - - for child in self.children: - if hasattr(child,"init_callbacks"): - child.init_callbacks(app) - - - @property - def values(self): - return {param['name']: param.get('value', None) for param in self._parameters} - - @property - def parameters(self): - return {param['name']: param for param in self._parameters} - - def _determine_type(self, parameter_dict): - if 'type' in parameter_dict: - if parameter_dict['type'] in self.type_map: - return parameter_dict['type'] - elif parameter_dict['type'].__name__ in self.type_map: - return parameter_dict['type'].__name__ - elif type(parameter_dict['value']) in self.type_map: - return type(parameter_dict['value']) - raise TypeError(f'No item type could be determined for this parameter: {parameter_dict}') - - def build_children(self, values=None): - children = [] - for parameter_dict in self._parameters: - parameter_dict = parameter_dict.copy() - if values and parameter_dict['name'] in values: - parameter_dict['value'] = values[parameter_dict['name']] - type = self._determine_type(parameter_dict) - parameter_dict.pop('type', None) - item = self.type_map[type](**parameter_dict, base_id=self.id) - children.append(item) - - return children - - -class JSONParameterEditor(ParameterEditor): - type_map = {'float': FloatItem, - 'int': IntItem, - 'str': StrItem, - 'slider': SliderItem, - 'dropdown': DropdownItem, - 'radio': RadioItem, - 'bool': BoolItem, - 'img': ImgItem, - #'graph': GraphItem, - } - - def __init__(self, _id, json_blob, **kwargs): - super(ParameterEditor, self).__init__(id=_id, children=[], className='kwarg-editor', **kwargs) - self._json_blob = json_blob - self.children = self.build_children() - - def build_children(self, values=None): - children = [] - for json_record in self._json_blob: - ... - # build a parameter dict from self.json_blob - ... - type = json_record.get('type', self._determine_type(json_record)) - json_record = json_record.copy() - if values and json_record['name'] in values: - json_record['value'] = values[json_record['name']] - json_record.pop('type', None) - item = self.type_map[type](**json_record, base_id=self.id) - children.append(item) - - return children - - -class KwargsEditor(ParameterEditor): - def __init__(self, instance_index, func: Callable, **kwargs): - self.func = func - self._instance_index = instance_index - - parameters = [{'name': name, 'value': param.default} for name, param in signature(func).parameters.items() - if param.default is not _empty] - - super(KwargsEditor, self).__init__(dict(index=instance_index, type='kwargs-editor'), parameters=parameters, **kwargs) - - def new_record(self): - return {name: p.default for name, p in signature(self.func).parameters.items() if p.default is not _empty} From 1820a47c7c6f466b3f0ebc845132ca21259aeb3e Mon Sep 17 00:00:00 2001 From: taxe10 Date: Sun, 24 Mar 2024 12:44:39 -0700 Subject: [PATCH 38/62] add flake file --- .flake8 | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .flake8 diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..a9f89c5 --- /dev/null +++ b/.flake8 @@ -0,0 +1,7 @@ +[flake8] +# 127 is width of the Github code viewer, +# black default is 88 so this will only warn about comments >127 +max-line-length = 127 +# Ignore errors due to incompatibility with black +#https://black.readthedocs.io/en/stable/guides/using_black_with_other_tools.html +extend-ignore = E203,E701 From 5f2191a7e45ec8b25bd6ce4f5b74cede8af0e35b Mon Sep 17 00:00:00 2001 From: taxe10 Date: Sun, 24 Mar 2024 16:33:48 -0700 Subject: [PATCH 39/62] Placed plots side by side and fixed callback errors when a child flow has not started yet --- src/app_layout.py | 56 ++++++++++++++++++++++++++++++++++++++++------- src/frontend.py | 53 +++++++++++++++++++++++++++----------------- 2 files changed, 81 insertions(+), 28 deletions(-) diff --git a/src/app_layout.py b/src/app_layout.py index a1e7f3b..7acb7a8 100644 --- a/src/app_layout.py +++ b/src/app_layout.py @@ -76,12 +76,51 @@ ] ), dbc.CardBody( - dcc.Graph( - id="scatter", - figure=go.Figure(go.Scattergl(mode="markers")), - ) + [ + dbc.Row( + [ + dbc.Col( + dcc.Graph( + id="scatter", + figure=go.Figure( + go.Scattergl(mode="markers"), + layout=go.Layout( + autosize=True, + margin=go.layout.Margin( + l=20, + r=20, + b=20, + t=20, + pad=0, + ), + ), + ), + ), + width=6, + ), + dbc.Col( + dcc.Graph( + id="heatmap", + figure=go.Figure( + go.Heatmap(), + layout=go.Layout( + autosize=True, + margin=go.layout.Margin( + l=20, + r=20, + b=20, + t=20, + pad=0, + ), + ), + ), + ), + width=6, + ), + ] + ), + ] ), - dbc.CardFooter(dcc.Graph(id="heatmap", figure=go.Figure(go.Heatmap()))), ], ) ] @@ -356,10 +395,11 @@ dbc.Container( children=[ dbc.Row( - [dbc.Col(control_panel, width=4), dbc.Col(image_panel, width=7)] + [dbc.Col(control_panel, width=4), dbc.Col(image_panel, width=8)] ), dbc.Row(dbc.Col(meta)), - ] + ], + fluid=True, ), - ] + ], ) diff --git a/src/frontend.py b/src/frontend.py index d3d470f..cacfdba 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -11,11 +11,11 @@ import requests from dash import Input, Output, State, html from dash.exceptions import PreventUpdate +from dash_component_editor import JSONParameterEditor from file_manager.data_project import DataProject from sklearn.cluster import DBSCAN, HDBSCAN, MiniBatchKMeans from app_layout import app -from dash_component_editor import JSONParameterEditor from latentxp_utils import ( dbscan_kwargs, generate_scatter_data, @@ -36,7 +36,6 @@ TIMEZONE = os.getenv("TIMEZONE", "US/Pacific") FLOW_NAME = os.getenv("FLOW_NAME", "") - # TODO: Get model parameters from UI TRAIN_PARAMS_EXAMPLE = { "flow_type": "podman", @@ -326,7 +325,14 @@ def submit_dimension_reduction_job( job_message = f"Job has been succesfully submitted with uid: {job_uid}." print(job_message, flush=True) - return job_uid, "cluster", -1, -2, go.Figure(go.Heatmap()), -1 + fig = go.Figure( + go.Heatmap(), + layout=go.Layout( + autosize=True, + margin=go.layout.Margin(l=20, r=20, b=20, t=20, pad=0), + ), + ) + return job_uid, "cluster", -1, -2, fig, -1 @app.callback( @@ -356,19 +362,16 @@ def read_latent_vectors(n_intervals, experiment_id, max_intervals): raise PreventUpdate children_flows = get_children_flow_run_ids(experiment_id) - print("child flow") - print(children_flows) - - # read the latent vectors from the output dir - output_path = OUTPUT_DIR / children_flows[0] - npz_files = list(output_path.glob("*.npy")) - if len(npz_files) > 0: - lv_filepath = npz_files[0] # latent vector file path - latent_vectors = np.load(str(lv_filepath)) - print("latent vector", latent_vectors.shape) - return latent_vectors, 0 - else: - return None, -1 + if len(children_flows) > 0: + # read the latent vectors from the output dir + output_path = OUTPUT_DIR / children_flows[0] + npz_files = list(output_path.glob("*.npy")) + if len(npz_files) > 0: + lv_filepath = npz_files[0] # latent vector file path + latent_vectors = np.load(str(lv_filepath)) + print("latent vector", latent_vectors.shape) + return latent_vectors, 0 + return None, -1 @app.callback( @@ -518,7 +521,10 @@ def update_scatter_plot( ) fig = go.Figure(scatter_data) - fig.update_layout(legend=dict(tracegroupgap=20)) + fig.update_layout( + margin=go.layout.Margin(l=20, r=20, b=20, t=20, pad=0), + legend=dict(tracegroupgap=20), + ) if ( current_figure @@ -600,7 +606,9 @@ def update_heatmap( data_project = DataProject.from_dict(data_project_dict) if len(data_project.datasets) > 0: print("FM file") - selected_images, _ = data_project.read(selected_indices, export="pillow") + selected_images, _ = data_project.read_datasets( + selected_indices, export="pillow" + ) # DataClinic elif data_clinic_file_path is not None: print("data_clinic_file_path") @@ -675,6 +683,7 @@ def update_heatmap( data=heatmap_data, layout=dict( autosize=True, + margin=go.layout.Margin(l=20, r=20, b=20, t=20, pad=0), yaxis=dict(scaleanchor="x", scaleratio=aspect_y / aspect_x), ), ) @@ -700,10 +709,14 @@ def update_statistics(selected_data, clusters, assigned_labels, label_names): Returns: [num_images, clusters, labels]: statistics """ - assigned_labels = np.array(assigned_labels) + print("assigned_labels", assigned_labels, flush=True) - if selected_data is not None and len(selected_data["points"]) > 0: + if ( + selected_data is not None + and len(selected_data["points"]) > 0 + and assigned_labels != [-1] + ): selected_indices = [ point["customdata"][0] for point in selected_data["points"] ] # Access customdata for the original indices From 0debabd20465d656c7bddb065e2fafe000bc1e43 Mon Sep 17 00:00:00 2001 From: taxe10 Date: Sun, 24 Mar 2024 16:43:39 -0700 Subject: [PATCH 40/62] check for child flow --- src/frontend.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/frontend.py b/src/frontend.py index cacfdba..c9223ef 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -425,16 +425,18 @@ def apply_clustering( clusters, options = None, None if obj: - clusters = obj.fit_predict(latent_vectors) - output_path = OUTPUT_DIR / experiment_id - np.save(output_path / "clusters.npy", clusters) - unique_clusters = np.unique(clusters) - options = [ - {"label": f"Cluster {cluster}", "value": cluster} - for cluster in unique_clusters - if cluster != -1 - ] - options.insert(0, {"label": "All", "value": -1}) + children_flows = get_children_flow_run_ids(experiment_id) + if len(children_flows) > 0: + clusters = obj.fit_predict(latent_vectors) + output_path = OUTPUT_DIR / children_flows[0] + np.save(output_path / "clusters.npy", clusters) + unique_clusters = np.unique(clusters) + options = [ + {"label": f"Cluster {cluster}", "value": cluster} + for cluster in unique_clusters + if cluster != -1 + ] + options.insert(0, {"label": "All", "value": -1}) return clusters, options From 0ed60a1af4b284a112598ba434cb51b3c88328af Mon Sep 17 00:00:00 2001 From: taxe10 Date: Sun, 24 Mar 2024 17:12:05 -0700 Subject: [PATCH 41/62] add job dropdown --- src/app_layout.py | 7 ++++++- src/frontend.py | 35 +++++++++++++++++++++++++---------- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/src/app_layout.py b/src/app_layout.py index 7acb7a8..180d870 100644 --- a/src/app_layout.py +++ b/src/app_layout.py @@ -155,8 +155,13 @@ dcc.Input( id="job-name", placeholder="test0", - style={"width": "100%"}, + style={ + "width": "100%", + "margin-bottom": "1rem", + }, ), + dbc.Label("Select a job..."), + dcc.Dropdown(id="job-selector"), ] ), html.Hr(), diff --git a/src/frontend.py b/src/frontend.py index c9223ef..a197e5e 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -25,7 +25,11 @@ load_images_by_indices, remove_key_from_dict_list, ) -from utils_prefect import get_children_flow_run_ids, schedule_prefect_flow +from utils_prefect import ( + get_children_flow_run_ids, + get_flow_runs_by_name, + schedule_prefect_flow, +) # GLOBAL PARAMS DATA_DIR = str(os.environ["DATA_DIR"]) @@ -130,6 +134,16 @@ def show_clustering_gui_layouts(selected_algo): return item_list +@app.callback( + Output("job-selector", "options"), + Input("interval-component", "n_intervals"), +) +def update_job_selector(n_intervals): + # TODO: Split train/inference and add data project name + jobs = get_flow_runs_by_name(tags=PREFECT_TAGS) + return jobs + + @app.callback( [ Output("input_labels", "data"), @@ -199,8 +213,6 @@ def update_data_n_label_schema( @app.callback( [ - # flag the read variable - Output("experiment-id", "data"), # reset scatter plot control panel Output("scatter-color", "value"), Output("cluster-dropdown", "value"), @@ -247,7 +259,6 @@ def submit_dimension_reduction_job( selected_algo: selected dimension reduction algo children: div for algo's parameters Returns: - experiment-id: uuid for current run cluster-dropdown: options for cluster dropdown scatter-color: default scatter-color value cluster-dropdown: default cluster-dropdown value @@ -332,7 +343,7 @@ def submit_dimension_reduction_job( margin=go.layout.Margin(l=20, r=20, b=20, t=20, pad=0), ), ) - return job_uid, "cluster", -1, -2, fig, -1 + return "cluster", -1, -2, fig, -1 @app.callback( @@ -341,7 +352,7 @@ def submit_dimension_reduction_job( Output("interval-component", "max_intervals", allow_duplicate=True), ], Input("interval-component", "n_intervals"), - State("experiment-id", "data"), + State("job-selector", "value"), State("interval-component", "max_intervals"), prevent_initial_call=True, ) @@ -384,7 +395,7 @@ def read_latent_vectors(n_intervals, experiment_id, max_intervals): State("latent_vectors", "data"), State("cluster-algo-dropdown", "value"), State("additional-cluster-params", "children"), - State("experiment-id", "data"), + State("job-selector", "value"), ], ) def apply_clustering( @@ -414,7 +425,7 @@ def apply_clustering( key = child["props"]["children"][1]["props"]["id"]["param_key"] value = child["props"]["children"][1]["props"]["value"] input_params[key] = value - print("Clustering params:", input_params) + print("Clustering params:", input_params, flush=True) if selected_algo == "KMeans": obj = MiniBatchKMeans(n_clusters=input_params["n_clusters"]) @@ -438,6 +449,8 @@ def apply_clustering( ] options.insert(0, {"label": "All", "value": -1}) + print("clusters", clusters, flush=True) + return clusters, options @@ -489,7 +502,7 @@ def update_scatter_plot( if latent_vectors is None or children is None: raise PreventUpdate latent_vectors = np.array(latent_vectors) - print("latent vector shape:", latent_vectors.shape) + print("latent vector shape:", latent_vectors.shape, flush=True) n_components = children["props"]["children"][0]["props"]["children"][1]["props"][ "value" @@ -646,7 +659,9 @@ def update_heatmap( # FileManager data_project = DataProject.from_dict(data_project_dict) if len(data_project.datasets) > 0: - selected_images, _ = data_project.read([selected_index], export="pillow") + selected_images, _ = data_project.read_datasets( + [selected_index], export="pillow" + ) # DataClinic elif data_clinic_file_path is not None: directory_path = os.path.dirname(data_clinic_file_path) From 6b11ca3808f76cb20d170599c1e24ad5d42a10f0 Mon Sep 17 00:00:00 2001 From: taxe10 Date: Sun, 24 Mar 2024 19:36:23 -0700 Subject: [PATCH 42/62] add conda flows and simplify data clinic options temporarily --- src/frontend.py | 150 ++++++++++++++++++++++++++++++------------------ 1 file changed, 93 insertions(+), 57 deletions(-) diff --git a/src/frontend.py b/src/frontend.py index a197e5e..0a14c5f 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -22,7 +22,6 @@ hdbscan_kwargs, hex_to_rgba, kmeans_kwargs, - load_images_by_indices, remove_key_from_dict_list, ) from utils_prefect import ( @@ -39,37 +38,65 @@ PREFECT_TAGS = json.loads(os.getenv("PREFECT_TAGS", '["latent-space-explorer"]')) TIMEZONE = os.getenv("TIMEZONE", "US/Pacific") FLOW_NAME = os.getenv("FLOW_NAME", "") - -# TODO: Get model parameters from UI -TRAIN_PARAMS_EXAMPLE = { - "flow_type": "podman", - "params_list": [ - { - "image_name": "ghcr.io/runboj/mlex_dimension_reduction_pca", - "image_tag": "main", - "command": 'python -c \\"import time; time.sleep(30)\\"', - "params": { - "io_parameters": {"uid_save": "uid0001", "uid_retrieve": "uid0001"} +MODEL_DIR = "data/models" +FLOW_TYPE = "conda" +CONDA_ENV_NAME = "dimension_reduction_pca" + + +if FLOW_TYPE == "podman": + TRAIN_PARAMS_EXAMPLE = { + "flow_type": "podman", + "params_list": [ + { + "image_name": "ghcr.io/runboj/mlex_dimension_reduction_pca", + "image_tag": "main", + "command": 'python -c \\"import time; time.sleep(30)\\"', + "params": { + "io_parameters": {"uid_save": "uid0001", "uid_retrieve": "uid0001"} + }, + "volumes": [f"{DATA_DIR}:/app/work/data"], + } + ], + } + + INFERENCE_PARAMS_EXAMPLE = { + "flow_type": "podman", + "params_list": [ + { + "image_name": "ghcr.io/runboj/mlex_dimension_reduction_pca", + "image_tag": "main", + "command": 'python -c \\"import time; time.sleep(30)\\"', + "params": { + "io_parameters": {"uid_save": "uid0001", "uid_retrieve": "uid0001"} + }, + "volumes": [f"{DATA_DIR}:/app/work/data"], }, - "volumes": [f"{DATA_DIR}:/app/work/data"], - } - ], -} - -INFERENCE_PARAMS_EXAMPLE = { - "flow_type": "podman", - "params_list": [ - { - "image_name": "ghcr.io/runboj/mlex_dimension_reduction_pca", - "image_tag": "main", - "command": 'python -c \\"import time; time.sleep(30)\\"', - "params": { - "io_parameters": {"uid_save": "uid0001", "uid_retrieve": "uid0001"} + ], + } +else: + TRAIN_PARAMS_EXAMPLE = { + "flow_type": "conda", + "params_list": [ + { + "conda_env_name": f"{CONDA_ENV_NAME}", + "params": { + "io_parameters": {"uid_save": "uid0001", "uid_retrieve": "uid0001"} + }, + } + ], + } + + INFERENCE_PARAMS_EXAMPLE = { + "flow_type": "conda", + "params_list": [ + { + "conda_env_name": f"{CONDA_ENV_NAME}", + "params": { + "io_parameters": {"uid_save": "uid0001", "uid_retrieve": "uid0001"} + }, }, - "volumes": [f"{DATA_DIR}:/app/work/data"], - }, - ], -} + ], + } @app.callback( @@ -155,12 +182,9 @@ def update_job_selector(n_intervals): Input( {"base_id": "file-manager", "name": "data-project-dict"}, "data" ), # FM dataset - Input("feature-vector-model-list", "value"), # data clinic dataset ], ) -def update_data_n_label_schema( - selected_example_dataset, data_project_dict, data_clinic_file_path -): +def update_data_n_label_schema(selected_example_dataset, data_project_dict): """ This callback updates the selected dataset from the provided example datasets, as well as labels, and label schema Args: @@ -184,10 +208,6 @@ def update_data_n_label_schema( # user_upload_data_dir = None if len(data_project.datasets) > 0: labels = np.full((len(data_project.datasets),), -1) - # DataClinic options - elif data_clinic_file_path is not None: - df = pd.read_parquet(data_clinic_file_path) - labels = np.full((df.shape[0],), -1) # Example dataset option 1 elif selected_example_dataset == "data/example_shapes/Demoshapes.npz": labels = np.load("/app/work/data/example_shapes/DemoLabels.npy") @@ -314,9 +334,19 @@ def submit_dimension_reduction_job( # check which dimension reduction algo, then compose command if selected_algo == "PCA": - TRAIN_PARAMS_EXAMPLE["params_list"][0]["command"] = "python pca_run.py" + if FLOW_TYPE == "podman": + TRAIN_PARAMS_EXAMPLE["params_list"][0]["command"] = "python pca_run.py" + else: + TRAIN_PARAMS_EXAMPLE["params_list"][0][ + "python_file_name" + ] = "mlex_dimension_reduction_pca/pca_run.py" elif selected_algo == "UMAP": - TRAIN_PARAMS_EXAMPLE["params_list"][0]["command"] = "python umap_run.py" + if FLOW_TYPE == "podman": + TRAIN_PARAMS_EXAMPLE["params_list"][0]["command"] = "python umap_run.py" + else: + TRAIN_PARAMS_EXAMPLE["params_list"][0][ + "python_file_name" + ] = "mlex_dimension_reduction_umap/umap_run.py" TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["io_parameters"] = io_parameters TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["io_parameters"]["output_dir"] = ( @@ -579,7 +609,6 @@ def update_scatter_plot( State( {"base_id": "file-manager", "name": "data-project-dict"}, "data" ), # DataProject for FM - State("feature-vector-model-list", "value"), # data clinic dataset ], prevent_initial_call=True, ) @@ -589,7 +618,6 @@ def update_heatmap( display_option, selected_example_dataset, data_project_dict, - data_clinic_file_path, ): """ This callback update the heatmap @@ -600,11 +628,7 @@ def update_heatmap( Returns: fig: updated heatmap """ - if ( - not selected_example_dataset - and not data_project_dict - and not data_clinic_file_path - ): + if not selected_example_dataset and not data_project_dict: raise PreventUpdate # user select a group of points @@ -624,12 +648,6 @@ def update_heatmap( selected_images, _ = data_project.read_datasets( selected_indices, export="pillow" ) - # DataClinic - elif data_clinic_file_path is not None: - print("data_clinic_file_path") - print(data_clinic_file_path) - directory_path = os.path.dirname(data_clinic_file_path) - selected_images = load_images_by_indices(directory_path, selected_indices) # Example dataset elif selected_example_dataset == "data/example_shapes/Demoshapes.npz": print("Demoshapes.npz") @@ -662,10 +680,6 @@ def update_heatmap( selected_images, _ = data_project.read_datasets( [selected_index], export="pillow" ) - # DataClinic - elif data_clinic_file_path is not None: - directory_path = os.path.dirname(data_clinic_file_path) - clicked_image = load_images_by_indices(directory_path, [selected_index]) # Example dataset elif selected_example_dataset == "data/example_shapes/Demoshapes.npz": clicked_image = np.load("/app/work/" + selected_example_dataset)["arr_0"][ @@ -767,6 +781,28 @@ def update_statistics(selected_data, clusters, assigned_labels, label_names): ] +@app.callback( + Output("feature-vector-model-list", "options"), + Input("interval-component", "n_intervals"), +) +def update_feature_vector_model_list(n_intervals): + """ + This callback update the feature vector model list + Args: + n_intervals: interval component + Returns: + options: feature vector model list + """ + # TODO: Connect to data clinic + # TODO: Check if inference has already taken place in this dataset + folder_names = [ + os.path.join(dirpath, dir) + for dirpath, dirs, _ in os.walk(MODEL_DIR) + for dir in dirs + ] + return folder_names + + if __name__ == "__main__": app.run_server( debug=True, From 437c7f25ef09242cae49176f3321887b1a9f97eb Mon Sep 17 00:00:00 2001 From: taxe10 Date: Sun, 24 Mar 2024 21:49:38 -0700 Subject: [PATCH 43/62] adding inference step for autoencoder and temporarily removing using user --- src/frontend.py | 62 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 48 insertions(+), 14 deletions(-) diff --git a/src/frontend.py b/src/frontend.py index 0a14c5f..c7ff338 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -1,3 +1,4 @@ +import copy import json import os import pathlib @@ -32,7 +33,7 @@ # GLOBAL PARAMS DATA_DIR = str(os.environ["DATA_DIR"]) -USER = "admin" # 'mlexchange-team' move to env file +USER = "" # 'mlexchange-team' move to env file OUTPUT_DIR = pathlib.Path("data/mlexchange_store/" + USER) UPLOAD_FOLDER_ROOT = "data/upload" PREFECT_TAGS = json.loads(os.getenv("PREFECT_TAGS", '["latent-space-explorer"]')) @@ -294,6 +295,7 @@ def submit_dimension_reduction_job( ): raise PreventUpdate + job_params = job_params = copy.deepcopy(TRAIN_PARAMS_EXAMPLE) input_params = {} if children: for child in children["props"]["children"]: @@ -323,43 +325,75 @@ def submit_dimension_reduction_job( "root_uri": None, } + # Autoencoder + if data_clinic_file_path is not None: + auto_io_params = io_parameters.copy() + auto_io_params["model_dir"] = data_clinic_file_path + "/last.ckpt" + if FLOW_TYPE == "podman": + autoencoder_params = { + "image_name": "ghcr.io/mlexchange/mlex_pytorch_autoencoders:main", + "image_tag": "main", + "command": "python src/predict_model.py", + "params": { + "io_parameters": auto_io_params, + "target_width": 64, + "target_height": 64, + "batch_size": 32, + }, + "volumes": [f"{DATA_DIR}:/app/work/data"], + } + else: + autoencoder_params = { + "conda_env_name": "pytorch_autoencoders", + "params": { + "io_parameters": auto_io_params, + "target_width": 64, + "target_height": 64, + "batch_size": 32, + }, + "python_file_name": "mlex_pytorch_autoencoders/src/predict_model.py", + } + job_params["params_list"].insert(0, autoencoder_params) + # prefect current_time = datetime.now(pytz.timezone(TIMEZONE)).strftime("%Y/%m/%d %H:%M:%S") if not job_name: job_name = "test0" job_name += " " + str(current_time) - # project_name = selected_dataset.split("/")[-1] # name of the dataset, get it from FM ## this is an issue + # TODO: Hash root_uri + data_uris project_name = "fake_name" print(PREFECT_TAGS, flush=True) # check which dimension reduction algo, then compose command if selected_algo == "PCA": if FLOW_TYPE == "podman": - TRAIN_PARAMS_EXAMPLE["params_list"][0]["command"] = "python pca_run.py" + job_params["params_list"][-1]["command"] = "python pca_run.py" else: - TRAIN_PARAMS_EXAMPLE["params_list"][0][ + job_params["params_list"][-1][ "python_file_name" ] = "mlex_dimension_reduction_pca/pca_run.py" elif selected_algo == "UMAP": if FLOW_TYPE == "podman": - TRAIN_PARAMS_EXAMPLE["params_list"][0]["command"] = "python umap_run.py" + job_params["params_list"][-1]["command"] = "python umap_run.py" else: - TRAIN_PARAMS_EXAMPLE["params_list"][0][ + job_params["params_list"][-1][ "python_file_name" ] = "mlex_dimension_reduction_umap/umap_run.py" - TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["io_parameters"] = io_parameters - TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["io_parameters"]["output_dir"] = ( - str(OUTPUT_DIR) + job_params["params_list"][-1]["params"]["io_parameters"] = io_parameters + job_params["params_list"][-1]["params"]["io_parameters"]["output_dir"] = str( + OUTPUT_DIR ) - TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["io_parameters"]["uid_save"] = "" - TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["model_parameters"] = input_params - print(TRAIN_PARAMS_EXAMPLE) + job_params["params_list"][-1]["params"]["io_parameters"]["uid_save"] = "" + job_params["params_list"][-1]["params"]["io_parameters"]["uid_retrieve"] = "" + job_params["params_list"][-1]["params"]["model_parameters"] = input_params + print(job_params) + print(TRAIN_PARAMS_EXAMPLE, flush=True) # run prefect job, job_uid is the new experiment id -> uid_save in the pca_example.yaml file job_uid = schedule_prefect_flow( FLOW_NAME, - parameters=TRAIN_PARAMS_EXAMPLE, + parameters=job_params, flow_run_name=f"{job_name} {current_time}", tags=PREFECT_TAGS + ["train", project_name], ) @@ -405,7 +439,7 @@ def read_latent_vectors(n_intervals, experiment_id, max_intervals): children_flows = get_children_flow_run_ids(experiment_id) if len(children_flows) > 0: # read the latent vectors from the output dir - output_path = OUTPUT_DIR / children_flows[0] + output_path = OUTPUT_DIR / children_flows[-1] npz_files = list(output_path.glob("*.npy")) if len(npz_files) > 0: lv_filepath = npz_files[0] # latent vector file path From d2afe4c0653766f7df67fb8c5be94136ac7048d5 Mon Sep 17 00:00:00 2001 From: taxe10 Date: Wed, 27 Mar 2024 19:12:10 -0700 Subject: [PATCH 44/62] loading env variables from file when running outside docker and adding tiled api key --- docker/requirements.txt | 1 + src/app_layout.py | 15 ++++++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/docker/requirements.txt b/docker/requirements.txt index 658ca06..1fda0c8 100644 --- a/docker/requirements.txt +++ b/docker/requirements.txt @@ -12,4 +12,5 @@ diskcache==5.6.3 pandas numpy Pillow +python-dotenv prefect-client==2.14.21 diff --git a/src/app_layout.py b/src/app_layout.py index 180d870..b195cdd 100644 --- a/src/app_layout.py +++ b/src/app_layout.py @@ -1,3 +1,4 @@ +import os import pathlib import dash_bootstrap_components as dbc @@ -7,10 +8,13 @@ from dash import Dash, dcc, html from dash.long_callback import DiskcacheLongCallbackManager from dash_iconify import DashIconify +from dotenv import load_dotenv from file_manager.main import FileManager import templates +load_dotenv(".env") + # GLOBAL VARIABLES ALGORITHM_DATABASE = { "PCA": "PCA", @@ -29,8 +33,11 @@ "value": "data/example_latentrepresentation/f_vectors.parquet", }, ] -DOCKER_DATA = pathlib.Path.home() / "data" # /app/work/data -UPLOAD_FOLDER_ROOT = DOCKER_DATA / "upload" # /app/work/data/upload +DATA_DIR = pathlib.Path( + os.getenv("DATA_DIR") +) # pathlib.Path.home() / "data" # /app/work/data +UPLOAD_FOLDER_ROOT = DATA_DIR / "upload" # /app/work/data/upload +TILED_API_KEY = os.getenv("TILED_API_KEY") # SETUP DASH APP cache = diskcache.Cache("./cache") @@ -45,7 +52,9 @@ server = app.server -dash_file_explorer = FileManager(DOCKER_DATA, UPLOAD_FOLDER_ROOT, open_explorer=False) +dash_file_explorer = FileManager( + DATA_DIR, UPLOAD_FOLDER_ROOT, open_explorer=False, api_key=TILED_API_KEY +) dash_file_explorer.init_callbacks(app) du.configure_upload(app, UPLOAD_FOLDER_ROOT, use_upload_id=False) From 825673d021edcdfddd8f04ca50f4bfadd9dfae94 Mon Sep 17 00:00:00 2001 From: taxe10 Date: Wed, 27 Mar 2024 21:47:11 -0700 Subject: [PATCH 45/62] arranged filepaths according to env variables to enable running outside docker --- .env.example | 8 +++++-- docker-compose.yml | 2 +- src/app_layout.py | 10 ++++---- src/frontend.py | 57 ++++++++++++++++++++++++++++------------------ 4 files changed, 46 insertions(+), 31 deletions(-) diff --git a/.env.example b/.env.example index 8b37080..712e091 100644 --- a/.env.example +++ b/.env.example @@ -1,9 +1,13 @@ USER=admin +DEFAULT_ALGORITHM_DESCRIPTION=/path/to/PCA_v1.0.0.json -PREFECT_API_URL=http://prefect:4200/api +PREFECT_API_URL=http://localhost:4200/api FLOW_NAME="Parent flow/launch_parent_flow" TIMEZONE="US/Pacific" PREFECT_TAGS='["latent-space-explorer"]' +CONTENT_API_URL="http://localhost:8000/api/v0/models" + TILED_API_KEY= -DATA_DIR=$PWD/data + +DATA_DIR=/path/to/data diff --git a/docker-compose.yml b/docker-compose.yml index 61c05cc..b2b550e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -13,13 +13,13 @@ services: DATA_DIR: "${DATA_DIR}" PREFECT_TAGS: "${PREFECT_TAGS}" PREFECT_API_URL: '${PREFECT_API_URL}' + CONTENT_API_URL: '${CONTENT_API_URL}' FLOW_NAME: '${FLOW_NAME}' TIMEZONE: "${TIMEZONE}" USER: "${USER}" volumes: - $DATA_DIR:/app/work/data - ./src:/app/work/src - - ../mlex_file_manager/file_manager:/app/work/src/file_manager ports: - "127.0.0.1:8070:8070" networks: diff --git a/src/app_layout.py b/src/app_layout.py index b195cdd..fd6f845 100644 --- a/src/app_layout.py +++ b/src/app_layout.py @@ -13,7 +13,7 @@ import templates -load_dotenv(".env") +load_dotenv(".env", override=True) # GLOBAL VARIABLES ALGORITHM_DATABASE = { @@ -33,11 +33,9 @@ "value": "data/example_latentrepresentation/f_vectors.parquet", }, ] -DATA_DIR = pathlib.Path( - os.getenv("DATA_DIR") -) # pathlib.Path.home() / "data" # /app/work/data -UPLOAD_FOLDER_ROOT = DATA_DIR / "upload" # /app/work/data/upload -TILED_API_KEY = os.getenv("TILED_API_KEY") +DATA_DIR = pathlib.Path(os.getenv("DATA_DIR")) +UPLOAD_FOLDER_ROOT = "data/upload" +TILED_API_KEY = os.getenv("TILED_API_KEY", None) # SETUP DASH APP cache = diskcache.Cache("./cache") diff --git a/src/frontend.py b/src/frontend.py index c7ff338..6a426ea 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -1,7 +1,6 @@ import copy import json import os -import pathlib import uuid from datetime import datetime @@ -12,11 +11,12 @@ import requests from dash import Input, Output, State, html from dash.exceptions import PreventUpdate -from dash_component_editor import JSONParameterEditor +from dotenv import load_dotenv from file_manager.data_project import DataProject from sklearn.cluster import DBSCAN, HDBSCAN, MiniBatchKMeans from app_layout import app +from dash_component_editor import JSONParameterEditor from latentxp_utils import ( dbscan_kwargs, generate_scatter_data, @@ -31,18 +31,25 @@ schedule_prefect_flow, ) +load_dotenv(".env") + # GLOBAL PARAMS -DATA_DIR = str(os.environ["DATA_DIR"]) -USER = "" # 'mlexchange-team' move to env file -OUTPUT_DIR = pathlib.Path("data/mlexchange_store/" + USER) -UPLOAD_FOLDER_ROOT = "data/upload" +USER = os.getenv("USER", "") # 'mlexchange-team' move to env file + +DATA_DIR = os.getenv("DATA_DIR", "data") +MODEL_DIR = "data/models" +OUTPUT_DIR = f"data/mlexchange_store/{USER}" +UPLOAD_FOLDER_ROOT = f"{DATA_DIR}/upload" + PREFECT_TAGS = json.loads(os.getenv("PREFECT_TAGS", '["latent-space-explorer"]')) TIMEZONE = os.getenv("TIMEZONE", "US/Pacific") FLOW_NAME = os.getenv("FLOW_NAME", "") -MODEL_DIR = "data/models" FLOW_TYPE = "conda" CONDA_ENV_NAME = "dimension_reduction_pca" +CONTENT_API_URL = os.getenv("CONTENT_API_URL", "http://localhost:8000/api/v0/models") +DEFAULT_ALGORITHM_DESCRIPTION = os.getenv("DEFAULT_ALGORITHM_DESCRIPTION") + if FLOW_TYPE == "podman": TRAIN_PARAMS_EXAMPLE = { @@ -53,7 +60,7 @@ "image_tag": "main", "command": 'python -c \\"import time; time.sleep(30)\\"', "params": { - "io_parameters": {"uid_save": "uid0001", "uid_retrieve": "uid0001"} + "io_parameters": {"uid_save": "uid0001", "uid_retrieve": None} }, "volumes": [f"{DATA_DIR}:/app/work/data"], } @@ -68,7 +75,7 @@ "image_tag": "main", "command": 'python -c \\"import time; time.sleep(30)\\"', "params": { - "io_parameters": {"uid_save": "uid0001", "uid_retrieve": "uid0001"} + "io_parameters": {"uid_save": "uid0001", "uid_retrieve": None} }, "volumes": [f"{DATA_DIR}:/app/work/data"], }, @@ -81,7 +88,7 @@ { "conda_env_name": f"{CONDA_ENV_NAME}", "params": { - "io_parameters": {"uid_save": "uid0001", "uid_retrieve": "uid0001"} + "io_parameters": {"uid_save": "uid0001", "uid_retrieve": None} }, } ], @@ -93,7 +100,7 @@ { "conda_env_name": f"{CONDA_ENV_NAME}", "params": { - "io_parameters": {"uid_save": "uid0001", "uid_retrieve": "uid0001"} + "io_parameters": {"uid_save": "uid0001", "uid_retrieve": None} }, }, ], @@ -114,7 +121,12 @@ def show_dimension_reduction_gui_layouts(selected_algo): item_list: dropdown menu html code model_uid: selected algo's uid """ - data = requests.get("http://content-api:8000/api/v0/models").json() # all model + try: + data = requests.get(CONTENT_API_URL).json() # all model + except Exception as e: + print(f"Cannot access content api: {e}", flush=True) + with open(DEFAULT_ALGORITHM_DESCRIPTION, "r") as f: + data = [json.load(f)] if selected_algo == "PCA": conditions = {"name": "PCA"} @@ -385,7 +397,7 @@ def submit_dimension_reduction_job( OUTPUT_DIR ) job_params["params_list"][-1]["params"]["io_parameters"]["uid_save"] = "" - job_params["params_list"][-1]["params"]["io_parameters"]["uid_retrieve"] = "" + job_params["params_list"][-1]["params"]["io_parameters"]["uid_retrieve"] = None job_params["params_list"][-1]["params"]["model_parameters"] = input_params print(job_params) print(TRAIN_PARAMS_EXAMPLE, flush=True) @@ -439,11 +451,10 @@ def read_latent_vectors(n_intervals, experiment_id, max_intervals): children_flows = get_children_flow_run_ids(experiment_id) if len(children_flows) > 0: # read the latent vectors from the output dir - output_path = OUTPUT_DIR / children_flows[-1] - npz_files = list(output_path.glob("*.npy")) - if len(npz_files) > 0: - lv_filepath = npz_files[0] # latent vector file path - latent_vectors = np.load(str(lv_filepath)) + output_path = f"{OUTPUT_DIR}/{children_flows[-1]}/latent_vectors.npy" + print(output_path, flush=True) + if os.path.exists(output_path): + latent_vectors = np.load(output_path) print("latent vector", latent_vectors.shape) return latent_vectors, 0 return None, -1 @@ -503,8 +514,8 @@ def apply_clustering( children_flows = get_children_flow_run_ids(experiment_id) if len(children_flows) > 0: clusters = obj.fit_predict(latent_vectors) - output_path = OUTPUT_DIR / children_flows[0] - np.save(output_path / "clusters.npy", clusters) + output_path = f"{OUTPUT_DIR}/{children_flows[0]}" + np.save(f"{output_path}/clusters.npy", clusters) unique_clusters = np.unique(clusters) options = [ {"label": f"Cluster {cluster}", "value": cluster} @@ -749,7 +760,9 @@ def update_heatmap( layout=dict( autosize=True, margin=go.layout.Margin(l=20, r=20, b=20, t=20, pad=0), - yaxis=dict(scaleanchor="x", scaleratio=aspect_y / aspect_x), + yaxis=dict( + scaleanchor="x", scaleratio=aspect_y / aspect_x, autorange="reversed" + ), ), ) @@ -780,7 +793,7 @@ def update_statistics(selected_data, clusters, assigned_labels, label_names): if ( selected_data is not None and len(selected_data["points"]) > 0 - and assigned_labels != [-1] + and (assigned_labels != [-1]).all() ): selected_indices = [ point["customdata"][0] for point in selected_data["points"] From bcaa30811948f3dbe83b066cbf32d5e796dd9134 Mon Sep 17 00:00:00 2001 From: taxe10 Date: Wed, 27 Mar 2024 22:13:42 -0700 Subject: [PATCH 46/62] added optional output directory --- .env.example | 3 +++ docker-compose.yml | 1 + src/frontend.py | 3 ++- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.env.example b/.env.example index 712e091..29d423f 100644 --- a/.env.example +++ b/.env.example @@ -11,3 +11,6 @@ CONTENT_API_URL="http://localhost:8000/api/v0/models" TILED_API_KEY= DATA_DIR=/path/to/data +OUTPUT_DIR=/path/to/output # optional - if not provided, will default to data + # If running on a container and this path is not mounted, the container + # will not have access to this output directory diff --git a/docker-compose.yml b/docker-compose.yml index b2b550e..dce28fb 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,6 +11,7 @@ services: mem_limit: 2g environment: DATA_DIR: "${DATA_DIR}" + OUTPUT_DIR: "${OUTPUT_DIR}" PREFECT_TAGS: "${PREFECT_TAGS}" PREFECT_API_URL: '${PREFECT_API_URL}' CONTENT_API_URL: '${CONTENT_API_URL}' diff --git a/src/frontend.py b/src/frontend.py index 6a426ea..5b4cb77 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -38,7 +38,8 @@ DATA_DIR = os.getenv("DATA_DIR", "data") MODEL_DIR = "data/models" -OUTPUT_DIR = f"data/mlexchange_store/{USER}" +OUTPUT_DIR = os.getenv("OUTPUT_DIR", "data") +OUTPUT_DIR = f"{OUTPUT_DIR}/mlexchange_store/{USER}" UPLOAD_FOLDER_ROOT = f"{DATA_DIR}/upload" PREFECT_TAGS = json.loads(os.getenv("PREFECT_TAGS", '["latent-space-explorer"]')) From 307fc58bd03c81eeae84c07763c0d47b8fdf5214 Mon Sep 17 00:00:00 2001 From: taxe10 Date: Fri, 29 Mar 2024 10:23:25 -0700 Subject: [PATCH 47/62] removing output directory and replacing it with read and write directories --- .env.example | 6 ++---- docker-compose.yml | 8 ++++---- src/app_layout.py | 5 ++--- src/frontend.py | 34 ++++++++++++++++++++-------------- 4 files changed, 28 insertions(+), 25 deletions(-) diff --git a/.env.example b/.env.example index 29d423f..8f4a37c 100644 --- a/.env.example +++ b/.env.example @@ -10,7 +10,5 @@ CONTENT_API_URL="http://localhost:8000/api/v0/models" TILED_API_KEY= -DATA_DIR=/path/to/data -OUTPUT_DIR=/path/to/output # optional - if not provided, will default to data - # If running on a container and this path is not mounted, the container - # will not have access to this output directory +READ_DIR=/path/to/read/data +WRITE_DIR=/path/to/write/mlex_store diff --git a/docker-compose.yml b/docker-compose.yml index dce28fb..8f729c3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,16 +10,16 @@ services: dockerfile: "docker/Dockerfile" mem_limit: 2g environment: - DATA_DIR: "${DATA_DIR}" - OUTPUT_DIR: "${OUTPUT_DIR}" + READ_DIR: "${READ_DIR}" + WRITE_DIR: "${WRITE_DIR}" PREFECT_TAGS: "${PREFECT_TAGS}" PREFECT_API_URL: '${PREFECT_API_URL}' CONTENT_API_URL: '${CONTENT_API_URL}' FLOW_NAME: '${FLOW_NAME}' TIMEZONE: "${TIMEZONE}" - USER: "${USER}" volumes: - - $DATA_DIR:/app/work/data + - $READ_DIR:/app/work/data + - $WRITE_DIR:/app/work/mlex_store - ./src:/app/work/src ports: - "127.0.0.1:8070:8070" diff --git a/src/app_layout.py b/src/app_layout.py index fd6f845..493fdff 100644 --- a/src/app_layout.py +++ b/src/app_layout.py @@ -1,5 +1,4 @@ import os -import pathlib import dash_bootstrap_components as dbc import dash_uploader as du @@ -33,7 +32,7 @@ "value": "data/example_latentrepresentation/f_vectors.parquet", }, ] -DATA_DIR = pathlib.Path(os.getenv("DATA_DIR")) +READ_DIR = os.getenv("READ_DIR") UPLOAD_FOLDER_ROOT = "data/upload" TILED_API_KEY = os.getenv("TILED_API_KEY", None) @@ -51,7 +50,7 @@ server = app.server dash_file_explorer = FileManager( - DATA_DIR, UPLOAD_FOLDER_ROOT, open_explorer=False, api_key=TILED_API_KEY + READ_DIR, UPLOAD_FOLDER_ROOT, open_explorer=False, api_key=TILED_API_KEY ) dash_file_explorer.init_callbacks(app) du.configure_upload(app, UPLOAD_FOLDER_ROOT, use_upload_id=False) diff --git a/src/frontend.py b/src/frontend.py index 5b4cb77..2aff44e 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -34,13 +34,11 @@ load_dotenv(".env") # GLOBAL PARAMS -USER = os.getenv("USER", "") # 'mlexchange-team' move to env file +READ_DIR = os.getenv("READ_DIR", "data") +WRITE_DIR = os.getenv("WRITE_DIR", "mlex_store") -DATA_DIR = os.getenv("DATA_DIR", "data") MODEL_DIR = "data/models" -OUTPUT_DIR = os.getenv("OUTPUT_DIR", "data") -OUTPUT_DIR = f"{OUTPUT_DIR}/mlexchange_store/{USER}" -UPLOAD_FOLDER_ROOT = f"{DATA_DIR}/upload" +UPLOAD_FOLDER_ROOT = "data/upload" PREFECT_TAGS = json.loads(os.getenv("PREFECT_TAGS", '["latent-space-explorer"]')) TIMEZONE = os.getenv("TIMEZONE", "US/Pacific") @@ -63,7 +61,10 @@ "params": { "io_parameters": {"uid_save": "uid0001", "uid_retrieve": None} }, - "volumes": [f"{DATA_DIR}:/app/work/data"], + "volumes": [ + f"{READ_DIR}:/app/work/data", + f"{WRITE_DIR}:/app/work/mlex_store", + ], } ], } @@ -78,7 +79,10 @@ "params": { "io_parameters": {"uid_save": "uid0001", "uid_retrieve": None} }, - "volumes": [f"{DATA_DIR}:/app/work/data"], + "volumes": [ + f"{READ_DIR}:/app/work/data", + f"{WRITE_DIR}:/app/work/mlex_store", + ], }, ], } @@ -219,7 +223,6 @@ def update_data_n_label_schema(selected_example_dataset, data_project_dict): data_project = DataProject.from_dict(data_project_dict) options = [] - # user_upload_data_dir = None if len(data_project.datasets) > 0: labels = np.full((len(data_project.datasets),), -1) # Example dataset option 1 @@ -353,7 +356,10 @@ def submit_dimension_reduction_job( "target_height": 64, "batch_size": 32, }, - "volumes": [f"{DATA_DIR}:/app/work/data"], + "volumes": [ + f"{READ_DIR}:/app/work/data", + f"{WRITE_DIR}:/app/work/mlex_store", + ], } else: autoencoder_params = { @@ -394,9 +400,9 @@ def submit_dimension_reduction_job( ] = "mlex_dimension_reduction_umap/umap_run.py" job_params["params_list"][-1]["params"]["io_parameters"] = io_parameters - job_params["params_list"][-1]["params"]["io_parameters"]["output_dir"] = str( - OUTPUT_DIR - ) + job_params["params_list"][-1]["params"]["io_parameters"][ + "output_dir" + ] = "mlex_store" job_params["params_list"][-1]["params"]["io_parameters"]["uid_save"] = "" job_params["params_list"][-1]["params"]["io_parameters"]["uid_retrieve"] = None job_params["params_list"][-1]["params"]["model_parameters"] = input_params @@ -452,7 +458,7 @@ def read_latent_vectors(n_intervals, experiment_id, max_intervals): children_flows = get_children_flow_run_ids(experiment_id) if len(children_flows) > 0: # read the latent vectors from the output dir - output_path = f"{OUTPUT_DIR}/{children_flows[-1]}/latent_vectors.npy" + output_path = f"mlex_store/{children_flows[-1]}/latent_vectors.npy" print(output_path, flush=True) if os.path.exists(output_path): latent_vectors = np.load(output_path) @@ -515,7 +521,7 @@ def apply_clustering( children_flows = get_children_flow_run_ids(experiment_id) if len(children_flows) > 0: clusters = obj.fit_predict(latent_vectors) - output_path = f"{OUTPUT_DIR}/{children_flows[0]}" + output_path = f"mlex_store/{children_flows[0]}" np.save(f"{output_path}/clusters.npy", clusters) unique_clusters = np.unique(clusters) options = [ From 409dc2a6f36c073f393bc983af97b8748fc7e756 Mon Sep 17 00:00:00 2001 From: taxe10 Date: Fri, 29 Mar 2024 10:25:14 -0700 Subject: [PATCH 48/62] adding description --- .env.example | 1 + 1 file changed, 1 insertion(+) diff --git a/.env.example b/.env.example index 8f4a37c..d8d342e 100644 --- a/.env.example +++ b/.env.example @@ -6,6 +6,7 @@ FLOW_NAME="Parent flow/launch_parent_flow" TIMEZONE="US/Pacific" PREFECT_TAGS='["latent-space-explorer"]' +# MLEx Content Registry API CONTENT_API_URL="http://localhost:8000/api/v0/models" TILED_API_KEY= From dc0f8b71930416053519677f5e747a26321551a3 Mon Sep 17 00:00:00 2001 From: taxe10 Date: Fri, 29 Mar 2024 10:29:13 -0700 Subject: [PATCH 49/62] rearranging example --- .env.example | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.env.example b/.env.example index d8d342e..6c5e611 100644 --- a/.env.example +++ b/.env.example @@ -12,4 +12,4 @@ CONTENT_API_URL="http://localhost:8000/api/v0/models" TILED_API_KEY= READ_DIR=/path/to/read/data -WRITE_DIR=/path/to/write/mlex_store +WRITE_DIR=/path/to/write/results From 4b899d8097f9758df4110469ab6f2df61231a08c Mon Sep 17 00:00:00 2001 From: taxe10 Date: Fri, 29 Mar 2024 11:12:10 -0700 Subject: [PATCH 50/62] added dotenv --- docker/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/requirements.txt b/docker/requirements.txt index 1fda0c8..c4f1660 100644 --- a/docker/requirements.txt +++ b/docker/requirements.txt @@ -3,6 +3,7 @@ dash-core-components==2.0.0 dash-bootstrap-components==1.0.2 dash-html-components==2.0.0 dash-iconify==0.1.2 +dotenv plotly==5.14.1 scikit-learn==1.3.0 dash-uploader==0.6.0 From d1d4b72c61e9a2079d0f9dcd76c44fd998f93e76 Mon Sep 17 00:00:00 2001 From: taxe10 Date: Fri, 29 Mar 2024 11:12:57 -0700 Subject: [PATCH 51/62] updating defaults to docker --- .env.example | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.env.example b/.env.example index 6c5e611..13f6ffd 100644 --- a/.env.example +++ b/.env.example @@ -1,13 +1,13 @@ USER=admin DEFAULT_ALGORITHM_DESCRIPTION=/path/to/PCA_v1.0.0.json -PREFECT_API_URL=http://localhost:4200/api +PREFECT_API_URL=http://prefect:4200/api FLOW_NAME="Parent flow/launch_parent_flow" TIMEZONE="US/Pacific" PREFECT_TAGS='["latent-space-explorer"]' # MLEx Content Registry API -CONTENT_API_URL="http://localhost:8000/api/v0/models" +CONTENT_API_URL="http://content-registry:8000/api/v0/models" TILED_API_KEY= From 3ad68f14ee89f363eb1683172a374721ad7097bf Mon Sep 17 00:00:00 2001 From: taxe10 Date: Fri, 29 Mar 2024 11:14:29 -0700 Subject: [PATCH 52/62] removed duplicate --- docker/requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/docker/requirements.txt b/docker/requirements.txt index c4f1660..1fda0c8 100644 --- a/docker/requirements.txt +++ b/docker/requirements.txt @@ -3,7 +3,6 @@ dash-core-components==2.0.0 dash-bootstrap-components==1.0.2 dash-html-components==2.0.0 dash-iconify==0.1.2 -dotenv plotly==5.14.1 scikit-learn==1.3.0 dash-uploader==0.6.0 From d4e4888e0d123304c81adb50feba0c2f30603904 Mon Sep 17 00:00:00 2001 From: taxe10 Date: Fri, 29 Mar 2024 12:33:13 -0700 Subject: [PATCH 53/62] removed user and fixed typo --- .env.example | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.env.example b/.env.example index 13f6ffd..7ee3a5b 100644 --- a/.env.example +++ b/.env.example @@ -1,4 +1,3 @@ -USER=admin DEFAULT_ALGORITHM_DESCRIPTION=/path/to/PCA_v1.0.0.json PREFECT_API_URL=http://prefect:4200/api @@ -7,7 +6,7 @@ TIMEZONE="US/Pacific" PREFECT_TAGS='["latent-space-explorer"]' # MLEx Content Registry API -CONTENT_API_URL="http://content-registry:8000/api/v0/models" +CONTENT_API_URL="http://content-api:8000/api/v0/models" TILED_API_KEY= From 1dfac55bab15a3f661908f02b5ef99f0b192eeb4 Mon Sep 17 00:00:00 2001 From: taxe10 Date: Fri, 29 Mar 2024 12:33:40 -0700 Subject: [PATCH 54/62] using relative path for docker --- src/app_layout.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/app_layout.py b/src/app_layout.py index 493fdff..917705c 100644 --- a/src/app_layout.py +++ b/src/app_layout.py @@ -32,7 +32,7 @@ "value": "data/example_latentrepresentation/f_vectors.parquet", }, ] -READ_DIR = os.getenv("READ_DIR") +READ_DIR = "data" UPLOAD_FOLDER_ROOT = "data/upload" TILED_API_KEY = os.getenv("TILED_API_KEY", None) From 945e07431534348a81e60243e2eb087e878f2d1b Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Fri, 29 Mar 2024 20:34:30 -0700 Subject: [PATCH 55/62] Mount file manager --- docker-compose.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index 8f729c3..c9568aa 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -20,7 +20,8 @@ services: volumes: - $READ_DIR:/app/work/data - $WRITE_DIR:/app/work/mlex_store - - ./src:/app/work/src + # - ./src:/app/work/src + - ../mlex_file_manager/file_manager:/app/work/src/file_manager ports: - "127.0.0.1:8070:8070" networks: From 6bf4d5d71bab8bf79ae404e8108979512a56fdd4 Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Fri, 29 Mar 2024 21:24:53 -0700 Subject: [PATCH 56/62] Reorganize the control panels and select job panel --- src/app_layout.py | 225 +++++++++++++++++++++++++--------------------- 1 file changed, 124 insertions(+), 101 deletions(-) diff --git a/src/app_layout.py b/src/app_layout.py index 917705c..2ab620d 100644 --- a/src/app_layout.py +++ b/src/app_layout.py @@ -57,7 +57,104 @@ # BEGIN DASH CODE header = templates.header() -# right panel: uploader, scatter plot, individual image plot +# right panel: file manager, scatter plot, individual image plot +scatter_control_panel = html.Div( + [ + dbc.Card( + style={"width": "100%"}, + children=[ + dbc.CardHeader("Scatter Plot Control Panel"), + dbc.CardBody( + [ + dbc.Label("Scatter Colors", className="mr-3"), + dcc.RadioItems( + id="scatter-color", + options=[ + {"label": "cluster", "value": "cluster"}, + {"label": "label", "value": "label"}, + ], + value="cluster", + style={"min-width": "250px"}, + className="mb-2", + ), + dbc.Label("Select cluster", className="mr-3"), + dcc.Dropdown( + id="cluster-dropdown", + value=-1, + style={"min-width": "250px"}, + className="mb-2", + ), + dbc.Label("Select label", className="mr-3"), + dcc.Dropdown( + id="label-dropdown", + value=-2, + style={"min-width": "250px"}, + ), + ] + ), + ], + ), + dcc.Interval( + id="interval-component", + interval=3000, # in milliseconds + max_intervals=-1, # keep triggering indefinitely, None + n_intervals=0, + ), + ] +) + +heatmap_control_panel = html.Div( + [ + dbc.Card( + style={"width": "100%"}, + children=[ + dbc.CardHeader("Heatmap Control Panel"), + dbc.CardBody( + [ + dbc.Label( + [ + "Select a Group of Points using ", + html.Span( + html.I(DashIconify(icon="lucide:lasso")), + className="icon", + ), + " or ", + html.Span( + html.I(DashIconify(icon="lucide:box-select")), + className="icon", + ), + " Tools :", + ], + className="mb-3", + ), + dbc.Label( + id="stats-div", + children=[ + "Number of images selected: 0", + html.Br(), + "Clusters represented: N/A", + html.Br(), + "Labels represented: N/A", + ], + ), + dbc.Label("Display Image Options", className="mr-3"), + dcc.RadioItems( + id="mean-std-toggle", + options=[ + {"label": "Mean", "value": "mean"}, + {"label": "Standard Deviation", "value": "sigma"}, + ], + value="mean", + style={"min-width": "250px"}, + className="mb-2", + ), + ] + ), + ], + ) + ] +) + image_panel = [ dbc.Card( id="image-card", @@ -127,6 +224,23 @@ ), ] ), + dbc.CardFooter( + [ + dbc.Row( + [ + dbc.Col( + scatter_control_panel, + width=6, + ), + dbc.Col( + heatmap_control_panel, + width=6, + ), + ] + ) + ] + + ) ], ) ] @@ -166,11 +280,8 @@ "margin-bottom": "1rem", }, ), - dbc.Label("Select a job..."), - dcc.Dropdown(id="job-selector"), ] ), - html.Hr(), html.Div( [ dbc.Button( @@ -189,6 +300,13 @@ "justify-content": "center", }, ), + html.Hr(), + html.Div( + [ + dbc.Label("Select a job..."), + dcc.Dropdown(id="job-selector"), + ] + ), html.Div(id="invisible-apply-div"), ] ), @@ -256,102 +374,7 @@ ] ) -scatter_control_panel = html.Div( - [ - dbc.Card( - style={"width": "100%"}, - children=[ - dbc.CardHeader("Scatter Plot Control Panel"), - dbc.CardBody( - [ - dbc.Label("Scatter Colors", className="mr-3"), - dcc.RadioItems( - id="scatter-color", - options=[ - {"label": "cluster", "value": "cluster"}, - {"label": "label", "value": "label"}, - ], - value="cluster", - style={"min-width": "250px"}, - className="mb-2", - ), - dbc.Label("Select cluster", className="mr-3"), - dcc.Dropdown( - id="cluster-dropdown", - value=-1, - style={"min-width": "250px"}, - className="mb-2", - ), - dbc.Label("Select label", className="mr-3"), - dcc.Dropdown( - id="label-dropdown", - value=-2, - style={"min-width": "250px"}, - ), - ] - ), - ], - ), - dcc.Interval( - id="interval-component", - interval=3000, # in milliseconds - max_intervals=-1, # keep triggering indefinitely, None - n_intervals=0, - ), - ] -) -heatmap_control_panel = html.Div( - [ - dbc.Card( - style={"width": "100%"}, - children=[ - dbc.CardHeader("Heatmap Control Panel"), - dbc.CardBody( - [ - dbc.Label( - [ - "Select a Group of Points using ", - html.Span( - html.I(DashIconify(icon="lucide:lasso")), - className="icon", - ), - " or ", - html.Span( - html.I(DashIconify(icon="lucide:box-select")), - className="icon", - ), - " Tools :", - ], - className="mb-3", - ), - dbc.Label( - id="stats-div", - children=[ - "Number of images selected: 0", - html.Br(), - "Clusters represented: N/A", - html.Br(), - "Labels represented: N/A", - ], - ), - dbc.Label("Display Image Options", className="mr-3"), - dcc.RadioItems( - id="mean-std-toggle", - options=[ - {"label": "Mean", "value": "mean"}, - {"label": "Standard Deviation", "value": "sigma"}, - ], - value="mean", - style={"min-width": "250px"}, - className="mb-2", - ), - ] - ), - ], - ) - ] -) # add alert pop up window modal = html.Div( @@ -371,8 +394,8 @@ control_panel = [ algo_panel, cluster_algo_panel, - scatter_control_panel, - heatmap_control_panel, + # scatter_control_panel, + # heatmap_control_panel, modal, ] From 782d22ab0ce7a2eb6846135d63bc2d681c4c43dc Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Mon, 1 Apr 2024 15:39:51 -0700 Subject: [PATCH 57/62] Ignore /result --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 57516b2..96318fb 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ __pycache__/ test.py # output dir +results/ data/output/ data/upload/ data/.file_manager_vars.pkl From e3efbbb1daeb1071d21a9b4433bfd7d6d1dc578a Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Mon, 1 Apr 2024 15:40:10 -0700 Subject: [PATCH 58/62] Use accordion and update style --- src/app_layout.py | 236 ++++++++++++++---------------- src/assets/segmentation-style.css | 5 + src/frontend.py | 2 +- 3 files changed, 114 insertions(+), 129 deletions(-) diff --git a/src/app_layout.py b/src/app_layout.py index 2ab620d..035e2fb 100644 --- a/src/app_layout.py +++ b/src/app_layout.py @@ -246,136 +246,109 @@ ] # left panel: choose algorithms, submit job, choose scatter plot attributes, and statistics... -algo_panel = html.Div( +algo_panel = dbc.AccordionItem( [ - dbc.Card( - id="algo-card", - style={"width": "100%"}, - children=[ - dbc.Collapse( - children=[ - dbc.CardHeader("Select Dimension Reduction Algorithms"), - dbc.CardBody( - [ - dbc.Label("Algorithm", className="mr-2"), - dcc.Dropdown( - id="algo-dropdown", - options=[ - {"label": entry, "value": entry} - for entry in ALGORITHM_DATABASE - ], - style={"min-width": "250px"}, - value="PCA", - ), - html.Div(id="additional-model-params"), - html.Hr(), - html.Div( - [ - dbc.Label("Name your job", className="mr-2"), - dcc.Input( - id="job-name", - placeholder="test0", - style={ - "width": "100%", - "margin-bottom": "1rem", - }, - ), - ] - ), - html.Div( - [ - dbc.Button( - "Submit", - color="secondary", - id="run-algo", - outline=True, - size="lg", - className="m-1", - style={"width": "50%"}, - ), - ], - className="row", - style={ - "align-items": "center", - "justify-content": "center", - }, - ), - html.Hr(), - html.Div( - [ - dbc.Label("Select a job..."), - dcc.Dropdown(id="job-selector"), - ] - ), - html.Div(id="invisible-apply-div"), - ] + dbc.CardBody( + [ + dbc.Label("Algorithm", className="mr-2"), + dcc.Dropdown( + id="algo-dropdown", + options=[ + {"label": entry, "value": entry} + for entry in ALGORITHM_DATABASE + ], + style={"min-width": "250px"}, + value="PCA", + ), + html.Div(id="additional-model-params"), + html.Hr(), + html.Div( + [ + dbc.Label("Name your job", className="mr-2"), + dcc.Input( + id="job-name", + placeholder="test0", + style={ + "width": "100%", + "margin-bottom": "1rem", + }, + ), + ] + ), + html.Div( + [ + dbc.Button( + "Submit", + color="secondary", + id="run-algo", + outline=True, + size="lg", + className="m-1", + style={"width": "50%"}, ), ], - id="model-collapse", - is_open=True, - style={"margin-bottom": "0rem"}, - ) - ], - ) - ] + className="row", + style={ + "align-items": "center", + "justify-content": "center", + }, + ), + html.Hr(), + html.Div( + [ + dbc.Label("Select a job..."), + dcc.Dropdown(id="job-selector"), + ] + ), + html.Div(id="invisible-apply-div"), + ] + ), + ], + title="Select Dimension Reduction Algorithms", ) -cluster_algo_panel = html.Div( +cluster_algo_panel = dbc.AccordionItem( [ - dbc.Card( - id="cluster-algo-card", - style={"width": "100%"}, - children=[ - dbc.Collapse( - children=[ - dbc.CardHeader("Select Clustering Algorithms"), - dbc.CardBody( - [ - dbc.Label("Algorithm", className="mr-2"), - dcc.Dropdown( - id="cluster-algo-dropdown", - options=[ - {"label": entry, "value": entry} - for entry in CLUSTER_ALGORITHM_DATABASE - ], - style={"min-width": "250px"}, - value="DBSCAN", - ), - html.Div(id="additional-cluster-params"), - html.Hr(), - html.Div( - [ - dbc.Button( - "Apply", - color="secondary", - id="run-cluster-algo", - outline=True, - size="lg", - className="m-1", - style={"width": "50%"}, - ), - ], - className="row", - style={ - "align-items": "center", - "justify-content": "center", - }, - ), - html.Div(id="invisible-submit-div"), - ] + dbc.CardBody( + [ + dbc.Label("Algorithm", className="mr-2"), + dcc.Dropdown( + id="cluster-algo-dropdown", + options=[ + {"label": entry, "value": entry} + for entry in CLUSTER_ALGORITHM_DATABASE + ], + style={"min-width": "250px"}, + value="DBSCAN", + ), + html.Div(id="additional-cluster-params"), + html.Hr(), + html.Div( + [ + dbc.Button( + "Apply", + color="secondary", + id="run-cluster-algo", + outline=True, + size="lg", + className="m-1", + style={"width": "50%"}, ), ], - id="cluster-model-collapse", - is_open=True, - style={"margin-bottom": "0rem"}, - ) - ], - ) - ] + className="row", + style={ + "align-items": "center", + "justify-content": "center", + }, + ), + html.Div(id="invisible-submit-div"), + ] + ), + ], + title="Select Clustering Algorithms", ) - # add alert pop up window modal = html.Div( [ @@ -390,14 +363,17 @@ ] ) - -control_panel = [ - algo_panel, - cluster_algo_panel, - # scatter_control_panel, - # heatmap_control_panel, - modal, -] +control_panel = dbc.Accordion( + [ + algo_panel, + cluster_algo_panel + ], + style={ + 'position': 'sticky', + 'top': '10%', + 'width': '100%' + } + ) # metadata @@ -429,8 +405,12 @@ dbc.Container( children=[ dbc.Row( - [dbc.Col(control_panel, width=4), dbc.Col(image_panel, width=8)] + [ + dbc.Col(control_panel, width=4, style={'display': 'flex', 'margin-top': '1em'}), + dbc.Col(image_panel, width=8) + ] ), + dbc.Row(dbc.Col(modal)), dbc.Row(dbc.Col(meta)), ], fluid=True, diff --git a/src/assets/segmentation-style.css b/src/assets/segmentation-style.css index 4cb18ba..8e76b71 100644 --- a/src/assets/segmentation-style.css +++ b/src/assets/segmentation-style.css @@ -32,3 +32,8 @@ label { margin: 0; border-style: solid; } + +.accordion-button { + font-size: large; + font-weight: bold; +} diff --git a/src/frontend.py b/src/frontend.py index 2aff44e..7a1315c 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -43,7 +43,7 @@ PREFECT_TAGS = json.loads(os.getenv("PREFECT_TAGS", '["latent-space-explorer"]')) TIMEZONE = os.getenv("TIMEZONE", "US/Pacific") FLOW_NAME = os.getenv("FLOW_NAME", "") -FLOW_TYPE = "conda" +FLOW_TYPE = "podman" #"conda" CONDA_ENV_NAME = "dimension_reduction_pca" CONTENT_API_URL = os.getenv("CONTENT_API_URL", "http://localhost:8000/api/v0/models") From ab89eb0df92777d199a1a29d2566e713a2b58321 Mon Sep 17 00:00:00 2001 From: taxe10 Date: Tue, 2 Apr 2024 14:45:59 -0700 Subject: [PATCH 59/62] adding current path to output directories and default examples directories --- src/app_layout.py | 11 ++++++----- src/frontend.py | 14 +++++++------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/app_layout.py b/src/app_layout.py index 2ab620d..24b022b 100644 --- a/src/app_layout.py +++ b/src/app_layout.py @@ -26,10 +26,13 @@ } DATA_OPTION = [ - {"label": "Synthetic Shapes", "value": "data/example_shapes/Demoshapes.npz"}, + { + "label": "Synthetic Shapes", + "value": f"{os.getcwd()}/data/example_shapes/Demoshapes.npz", + }, { "label": "Latent representations from encoder-decoder model", - "value": "data/example_latentrepresentation/f_vectors.parquet", + "value": f"{os.getcwd()}/data/example_latentrepresentation/f_vectors.parquet", }, ] READ_DIR = "data" @@ -239,8 +242,7 @@ ] ) ] - - ) + ), ], ) ] @@ -375,7 +377,6 @@ ) - # add alert pop up window modal = html.Div( [ diff --git a/src/frontend.py b/src/frontend.py index 2aff44e..b74355f 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -402,7 +402,7 @@ def submit_dimension_reduction_job( job_params["params_list"][-1]["params"]["io_parameters"] = io_parameters job_params["params_list"][-1]["params"]["io_parameters"][ "output_dir" - ] = "mlex_store" + ] = f"{os.getcwd()}/mlex_store" job_params["params_list"][-1]["params"]["io_parameters"]["uid_save"] = "" job_params["params_list"][-1]["params"]["io_parameters"]["uid_retrieve"] = None job_params["params_list"][-1]["params"]["model_parameters"] = input_params @@ -701,18 +701,18 @@ def update_heatmap( selected_indices, export="pillow" ) # Example dataset - elif selected_example_dataset == "data/example_shapes/Demoshapes.npz": + elif "data/example_shapes/Demoshapes.npz" in selected_example_dataset: print("Demoshapes.npz") - selected_images = np.load("/app/work/" + selected_example_dataset)["arr_0"][ + selected_images = np.load(selected_example_dataset)["arr_0"][ selected_indices ] print(selected_images.shape) elif ( - selected_example_dataset - == "data/example_latentrepresentation/f_vectors.parquet" + "data/example_latentrepresentation/f_vectors.parquet" + in selected_example_dataset ): - print("f_vectors.parque") - df = pd.read_parquet("/app/work/" + selected_example_dataset) + print("f_vectors.parquet") + df = pd.read_parquet(selected_example_dataset) selected_images = df.iloc[selected_indices].values selected_images = np.array(selected_images) From 2e40a2c3bef6766686049914bb980f1bcf0de621 Mon Sep 17 00:00:00 2001 From: taxe10 Date: Tue, 2 Apr 2024 14:46:18 -0700 Subject: [PATCH 60/62] added tiled api key --- docker-compose.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/docker-compose.yml b/docker-compose.yml index c9568aa..33854e9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -15,6 +15,7 @@ services: PREFECT_TAGS: "${PREFECT_TAGS}" PREFECT_API_URL: '${PREFECT_API_URL}' CONTENT_API_URL: '${CONTENT_API_URL}' + TILED_API_KEY: '${TILED_API_KEY}' FLOW_NAME: '${FLOW_NAME}' TIMEZONE: "${TIMEZONE}" volumes: From 6666baa80d40384d16cf3ea067afb58f20667291 Mon Sep 17 00:00:00 2001 From: taxe10 Date: Tue, 2 Apr 2024 15:30:15 -0700 Subject: [PATCH 61/62] adding sample dimension reduction techniques --- src/assets/sample_models.json | 192 ++++++++++++++++++++++++++++++++++ 1 file changed, 192 insertions(+) create mode 100644 src/assets/sample_models.json diff --git a/src/assets/sample_models.json b/src/assets/sample_models.json new file mode 100644 index 0000000..9d601b9 --- /dev/null +++ b/src/assets/sample_models.json @@ -0,0 +1,192 @@ +[ + { + "content_id": "uid1", + "content_type": "model", + "name": "PCA", + "public": true, + "version": "1.0.0", + "type": "unsupervised", + "owner": "mlexchange team", + "service_type": "frontend", + "docker_image_uri": "ghcr.io/runboj/mlex_dimension_reduction_pca:main", + "conda_env_name": "mlex_dimension_reduction_pca", + "reference": "PCA algorithm", + "application": [ + "dimension reduction" + ], + "description": "PCA-based dimension reduction", + "gui_parameters": [ + { + "type": "dropdown", + "name": "ncomp-dropdown-menu", + "title": "Number of Components", + "value": 2, + "options": [ + { + "label": "2", + "value": 2 + }, + { + "label": "3", + "value": 3 + } + ], + "param_key": "n_components", + "comp_group": "all" + } + ], + "cmd": [ + "python pca_run.py" + ], + "kwargs": {}, + "compute_resources": { + "num_processors": 1, + "num_gpus": 0 + } + }, + { + "content_id": "uid2", + "content_type": "model", + "name": "UMAP", + "public": true, + "version": "1.0.0", + "type": "unsupervised", + "owner": "mlexchange team", + "service_type": "frontend", + "docker_image_uri": "ghcr.io/runboj/mlex_dimension_reduction_umap:main", + "conda_env_name": "mlex_dimension_reduction_umap", + "reference": "UMAP algorithm", + "application": [ + "dimension reduction" + ], + "description": "UMAP algotihtm for dimension reduction", + "gui_parameters": [ + { + "type": "dropdown", + "name": "ncomp-dropdown-menu-2", + "title": "Number of Components", + "value": 2, + "options": [ + { + "label": "2", + "value": 2 + }, + { + "label": "3", + "value": 3 + } + ], + "param_key": "n_components", + "comp_group": "all" + }, + { + "type": "dropdown", + "name": "mindist-dropdown-menu", + "title": "Min distance between points", + "value": 0.1, + "options": [ + { + "label": 0.1, + "value": 0.1 + }, + { + "label": 0.2, + "value": 0.2 + }, + { + "label": 0.3, + "value": 0.3 + }, + { + "label": 0.4, + "value": 0.4 + }, + { + "label": 0.5, + "value": 0.5 + }, + { + "label": 0.6, + "value": 0.6 + }, + { + "label": 0.7, + "value": 0.7 + }, + { + "label": 0.8, + "value": 0.8 + }, + { + "label": 0.9, + "value": 0.9 + }, + { + "label": 1.0, + "value": 1.0 + } + ], + "param_key": "min_dist", + "comp_group": "all" + }, + { + "type": "dropdown", + "name": "nneighbor-dropdown-menu", + "title": "Number of Nearest Neighbors", + "value": 15, + "options": [ + { + "label": 5, + "value": 5 + }, + { + "label": 10, + "value": 10 + }, + { + "label": 15, + "value": 15 + }, + { + "label": 20, + "value": 20 + }, + { + "label": 25, + "value": 25 + }, + { + "label": 30, + "value": 30 + }, + { + "label": 35, + "value": 35 + }, + { + "label": 40, + "value": 40 + }, + { + "label": 45, + "value": 45 + }, + { + "label": 50, + "value": 50 + } + ], + "param_key": "n_neighbors", + "comp_group": "all" + } + ], + "cmd": [ + "python umap_run.py" + ], + "kwargs": {}, + "compute_resources": { + "num_processors": 1, + "num_gpus": 0 + } + } +] From 0d5b82d7a3001e299ec6d6c4a4ea7dd56d1260df Mon Sep 17 00:00:00 2001 From: taxe10 Date: Tue, 2 Apr 2024 15:33:37 -0700 Subject: [PATCH 62/62] add slurm jobs --- .env.example | 5 +++ docker-compose.yml | 3 ++ src/frontend.py | 78 +++++++++++++++++++++++----------------------- 3 files changed, 47 insertions(+), 39 deletions(-) diff --git a/.env.example b/.env.example index 7ee3a5b..be2df74 100644 --- a/.env.example +++ b/.env.example @@ -12,3 +12,8 @@ TILED_API_KEY= READ_DIR=/path/to/read/data WRITE_DIR=/path/to/write/results + +# Slurm jobs +PARTITIONS='["p1", "p2"]' +RESERVATIONS='["r1", "r2"]' +MAX_TIME="1:00:00" diff --git a/docker-compose.yml b/docker-compose.yml index 33854e9..4f3a7e6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -18,6 +18,9 @@ services: TILED_API_KEY: '${TILED_API_KEY}' FLOW_NAME: '${FLOW_NAME}' TIMEZONE: "${TIMEZONE}" + PARTITIONS: "${PARTITIONS}" + RESERVATIONS: "${RESERVATIONS}" + MAX_TIME: "${MAX_TIME}" volumes: - $READ_DIR:/app/work/data - $WRITE_DIR:/app/work/mlex_store diff --git a/src/frontend.py b/src/frontend.py index b74355f..b448d97 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -44,11 +44,13 @@ TIMEZONE = os.getenv("TIMEZONE", "US/Pacific") FLOW_NAME = os.getenv("FLOW_NAME", "") FLOW_TYPE = "conda" -CONDA_ENV_NAME = "dimension_reduction_pca" CONTENT_API_URL = os.getenv("CONTENT_API_URL", "http://localhost:8000/api/v0/models") DEFAULT_ALGORITHM_DESCRIPTION = os.getenv("DEFAULT_ALGORITHM_DESCRIPTION") +PARTITIONS = os.getenv("PARTITIONS", None) +RESERVATIONS = os.getenv("RESERVATIONS", None) +MAX_TIME = os.getenv("MAX_TIME", "1:00:00") if FLOW_TYPE == "podman": TRAIN_PARAMS_EXAMPLE = { @@ -69,29 +71,12 @@ ], } - INFERENCE_PARAMS_EXAMPLE = { - "flow_type": "podman", - "params_list": [ - { - "image_name": "ghcr.io/runboj/mlex_dimension_reduction_pca", - "image_tag": "main", - "command": 'python -c \\"import time; time.sleep(30)\\"', - "params": { - "io_parameters": {"uid_save": "uid0001", "uid_retrieve": None} - }, - "volumes": [ - f"{READ_DIR}:/app/work/data", - f"{WRITE_DIR}:/app/work/mlex_store", - ], - }, - ], - } -else: +elif FLOW_TYPE == "conda": TRAIN_PARAMS_EXAMPLE = { "flow_type": "conda", "params_list": [ { - "conda_env_name": f"{CONDA_ENV_NAME}", + "conda_env_name": "mlex_dimension_reduction_pca", "params": { "io_parameters": {"uid_save": "uid0001", "uid_retrieve": None} }, @@ -99,15 +84,21 @@ ], } - INFERENCE_PARAMS_EXAMPLE = { - "flow_type": "conda", +else: + TRAIN_PARAMS_EXAMPLE = { + "flow_type": "slurm", "params_list": [ { - "conda_env_name": f"{CONDA_ENV_NAME}", + "job_name": "latent_space_explorer", + "num_nodes": 1, + "partitions": PARTITIONS, + "reservations": RESERVATIONS, + "max_time": MAX_TIME, + "conda_env_name": "mlex_dimension_reduction_pca", "params": { "io_parameters": {"uid_save": "uid0001", "uid_retrieve": None} }, - }, + } ], } @@ -130,8 +121,8 @@ def show_dimension_reduction_gui_layouts(selected_algo): data = requests.get(CONTENT_API_URL).json() # all model except Exception as e: print(f"Cannot access content api: {e}", flush=True) - with open(DEFAULT_ALGORITHM_DESCRIPTION, "r") as f: - data = [json.load(f)] + with open("src/assets/sample_models.json", "r") as f: + data = json.load(f) if selected_algo == "PCA": conditions = {"name": "PCA"} @@ -345,33 +336,42 @@ def submit_dimension_reduction_job( if data_clinic_file_path is not None: auto_io_params = io_parameters.copy() auto_io_params["model_dir"] = data_clinic_file_path + "/last.ckpt" + auto_params = ( + { + "io_parameters": auto_io_params, + "target_width": 64, + "target_height": 64, + "batch_size": 32, + }, + ) + # TODO: Use content registry to retrieve the model parameters if FLOW_TYPE == "podman": autoencoder_params = { "image_name": "ghcr.io/mlexchange/mlex_pytorch_autoencoders:main", "image_tag": "main", "command": "python src/predict_model.py", - "params": { - "io_parameters": auto_io_params, - "target_width": 64, - "target_height": 64, - "batch_size": 32, - }, + "params": auto_params, "volumes": [ f"{READ_DIR}:/app/work/data", f"{WRITE_DIR}:/app/work/mlex_store", ], } - else: + elif FLOW_TYPE == "conda": autoencoder_params = { "conda_env_name": "pytorch_autoencoders", - "params": { - "io_parameters": auto_io_params, - "target_width": 64, - "target_height": 64, - "batch_size": 32, - }, + "params": auto_params, "python_file_name": "mlex_pytorch_autoencoders/src/predict_model.py", } + else: + autoencoder_params = { + "job_name": "latent_space_explorer", + "num_nodes": 1, + "partitions": PARTITIONS, + "reservations": RESERVATIONS, + "max_time": MAX_TIME, + "conda_env_name": "pytorch_autoencoders", + "params": auto_params, + } job_params["params_list"].insert(0, autoencoder_params) # prefect