From aec9e8d2534c88982bc0e76698962c19117b04fe Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Sat, 3 Feb 2024 20:40:22 -0800
Subject: [PATCH 01/62] Add component_editor.py file

---
 .gitignore                   |   2 -
 src/dash_component_editor.py | 407 +++++++++++++++++++++++++++++++++++
 2 files changed, 407 insertions(+), 2 deletions(-)
 create mode 100644 src/dash_component_editor.py

diff --git a/.gitignore b/.gitignore
index 496e1ee..57516b2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,8 +11,6 @@ data/.file_manager_vars.pkl
 data/mlexchange_store/
 .DS_Store
 
-src/dash_component_editor.py
-
 # C extensions
 *.so
 
diff --git a/src/dash_component_editor.py b/src/dash_component_editor.py
new file mode 100644
index 0000000..181bae3
--- /dev/null
+++ b/src/dash_component_editor.py
@@ -0,0 +1,407 @@
+import re
+from typing import Callable
+# noinspection PyUnresolvedReferences
+from inspect import signature, _empty
+
+from dash import html, dcc, dash_table, Input, Output, State, MATCH, ALL
+import dash_bootstrap_components as dbc
+import dash_daq as daq
+
+import base64
+#import PIL.Image
+import io
+#import plotly.express as px
+# Procedural dash form generation
+
+"""
+{'name', 'title', 'value', 'type', 
+"""
+
+
+class SimpleItem(dbc.Col):     
+    def __init__(self,
+                 name,
+                 base_id,
+                 title=None,
+                 param_key=None,
+                 type='number',
+                 debounce=True,
+                 **kwargs):
+        
+        if param_key == None:
+            param_key = name
+        self.label = dbc.Label(title)
+        self.input = dbc.Input(type=type,
+                               debounce=debounce,
+                               id={**base_id,
+                                   'name': name,
+                                   'param_key': param_key},
+                               **kwargs)
+
+        super(SimpleItem, self).__init__(children=[self.label, self.input])
+
+
+class FloatItem(SimpleItem):
+    pass
+
+
+class IntItem(SimpleItem):
+    def __init__(self, *args, **kwargs):
+        if 'min' not in kwargs:
+            kwargs['min'] = -9007199254740991  
+        super(IntItem, self).__init__(*args, step=1, **kwargs)
+
+
+class StrItem(SimpleItem):
+    def __init__(self, *args, **kwargs):
+        super(StrItem, self).__init__(*args, type='text', **kwargs)
+
+
+class SliderItem(dbc.Col):
+    def __init__(self,
+                 name,       
+                 base_id,   
+                 title=None,
+                 param_key=None,
+                 debounce=True,
+                 visible=True,
+                 **kwargs):
+        
+        if param_key == None:
+            param_key = name
+        self.label = dbc.Label(title)
+        self.input = dcc.Slider(id={**base_id,
+                                    'name': name,
+                                    'param_key': param_key,
+                                    'layer': 'input'},
+                                    tooltip={"placement": "bottom", "always_visible": True},
+                                    **kwargs)
+
+        style = {}
+        if not visible:
+            style['display'] = 'none'
+
+        super(SliderItem, self).__init__(id={**base_id,
+                                             'name': name,
+                                             'param_key': param_key,
+                                             'layer': 'form_group'},
+                                              children=[self.label, self.input],
+                                              style=style)
+
+
+class DropdownItem(dbc.Col):
+    def __init__(self,
+                 name,       
+                 base_id,  
+                 title=None,
+                 param_key=None,
+                 debounce=True,
+                 visible=True,
+                 **kwargs):
+
+        if param_key == None:
+            param_key = name
+        self.label = dbc.Label(title)
+        self.input = dcc.Dropdown(id={**base_id,
+                                    'name': name,
+                                    'param_key': param_key,
+                                    'layer': 'input'},
+                                **kwargs)
+
+        style = {}
+        if not visible:
+            style['display'] = 'none'
+
+        super(DropdownItem, self).__init__(id={**base_id,
+                                                 'name': name,
+                                                 'param_key': param_key,
+                                                 'layer': 'form_group'},
+                                             children=[self.label, self.input],
+                                             style=style)
+
+
+class RadioItem(dbc.Col):
+    def __init__(self,
+                 name,
+                 base_id,
+                 title=None,
+                 param_key=None,
+                 visible=True,
+                 **kwargs):
+
+        if param_key == None:
+            param_key = name
+        self.label = dbc.Label(title)
+        self.input = dbc.RadioItems(id={**base_id,
+                                        'name': name,
+                                        'param_key': param_key,
+                                        'layer': 'input'},
+                                    **kwargs)
+
+        style = {}
+        if not visible:
+            style['display'] = 'none'
+
+        super(RadioItem, self).__init__(id={**base_id,
+                                               'name': name,
+                                               'param_key': param_key,
+                                               'layer': 'form_group'},
+                                           children=[self.label, self.input],
+                                           style=style)
+
+
+class BoolItem(dbc.Col):
+    def __init__(self,
+                 name,
+                 base_id,
+                 title=None,
+                 param_key=None,
+                 visible=True,
+                 **kwargs):
+
+        if param_key == None:
+            param_key = name
+        self.label = dbc.Label(title)
+        self.input = daq.ToggleSwitch(id={**base_id,
+                                          'name': name,
+                                          'param_key': param_key,
+                                          'layer': 'input'},
+                                      **kwargs)
+        self.output_label = dbc.Label('False/True')
+
+        style = {}
+        if not visible:
+            style['display'] = 'none'
+
+        super(BoolItem, self).__init__(id={**base_id,
+                                           'name': name,
+                                           'param_key': param_key,
+                                           'layer': 'form_group'},
+                                       children=[self.label, self.input, self.output_label],
+                                       style=style)
+
+
+class ImgItem(dbc.Col):
+    def __init__(self,
+                 name,
+                 src,
+                 base_id,
+                 title=None,
+                 param_key=None,
+                 width='100px',
+                 visible=True,
+                 **kwargs):
+
+        if param_key == None:
+            param_key = name
+        
+        if not (width.endswith('px') or width.endswith('%')):
+            width = width + 'px'
+        
+        self.label = dbc.Label(title)
+        
+        encoded_image = base64.b64encode(open(src, 'rb').read())
+        self.src = 'data:image/png;base64,{}'.format(encoded_image.decode())
+        self.input_img = html.Img(id={**base_id,
+                                     'name': name,
+                                     'param_key': param_key,
+                                     'layer': 'input'},
+                                     src=self.src,
+                                     style={'height':'auto', 'width':width},
+                                  **kwargs)
+
+        style = {}
+        if not visible:
+            style['display'] = 'none'
+
+        super(ImgItem, self).__init__(id={**base_id,
+                                           'name': name,
+                                           'param_key': param_key,
+                                           'layer': 'form_group'},
+                                       children=[self.label, self.input_img],
+                                       style=style)
+
+
+# class GraphItem(dbc.Col):
+#     def __init__(self,
+#                  name,
+#                  base_id,
+#                  title=None,
+#                  param_key=None,
+#                  visible=True,
+#                  figure = None,
+#                  **kwargs):
+# 
+#         self.name = name
+#         if param_key == None:
+#             param_key = name
+#         self.label = dbc.Label(title)
+#         self.input_graph = dcc.Graph(id={**base_id,
+#                                     'name': name,
+#                                     'param_key': param_key,
+#                                     'layer': 'input'},
+#                                     **kwargs)
+#                                 
+#         self.input_upload = dcc.Upload(id={**base_id,
+#                                     'name': name+'_upload',
+#                                     'param_key': param_key,
+#                                     'layer': 'input'},
+#                                     children=html.Div([
+#                                         'Drag and Drop or ',
+#                                         html.A('Select Files')
+#                                     ]),
+#                                     style={
+#                                         'width': '95%',
+#                                         'height': '60px',
+#                                         'lineHeight': '60px',
+#                                         'borderWidth': '1px',
+#                                         'borderStyle': 'dashed',
+#                                         'borderRadius': '5px',
+#                                         'textAlign': 'center',
+#                                         'margin': '10px'
+#                         },
+#                         multiple = False)
+# 
+#         style = {}
+#         if not visible:
+#             style['display'] = 'none'
+# 
+#         super(GraphItem, self).__init__(id={**base_id,
+#                                            'name': name,
+#                                            'param_key': param_key,
+#                                            'layer': 'form_group'},
+#                                        children=[self.label, self.input_upload, self.input_graph],
+#                                        style=style)
+#      
+#     # Issue: cannot get inputs from the callback decorator    
+#     def return_upload(self, *args): 
+#         print(f'before if, args {args}')
+#         if args:
+#             print(f'args {args}')
+#             img_bytes = base64.b64decode(contents.split(",")[1])
+#             img = PIL.Image.open(io.BytesIO(img_bytes))
+#             fig = px.imshow(img, binary_string=True)
+#             return fig 
+#     
+#     def init_callbacks(self, app):
+#         app.callback(Output({**self.id,
+#                             'name': self.name,
+#                             'layer': 'input'}, 'figure', allow_duplicate=True), 
+#                      Input({**self.id,
+#                             'name': self.name+'_upload',
+#                             'layer': 'input'},
+#                             'contents'),
+#                      State({**self.id,
+#                             'name': self.name+'_upload',
+#                             'layer': 'input'}, 'last_modified'),
+#                      State({**self.id,
+#                             'name': self.name+'_upload',
+#                             'layer': 'input'}, 'filename'), 
+#                      prevent_initial_call=True)(self.return_upload())
+
+
+
+class ParameterEditor(dbc.Form):
+
+    type_map = {float: FloatItem,
+                int: IntItem,
+                str: StrItem,
+                }
+
+    def __init__(self, _id, parameters, **kwargs):
+        self._parameters = parameters
+
+        super(ParameterEditor, self).__init__(id=_id, children=[], className='kwarg-editor', **kwargs)
+        self.children = self.build_children()
+
+    def init_callbacks(self, app):
+        app.callback(Output(self.id, 'n_submit'), 
+                     Input({**self.id,
+                            'name': ALL},
+                            'value'), 
+                     State(self.id, 'n_submit'), 
+                    )
+        
+        for child in self.children:
+            if hasattr(child,"init_callbacks"):
+                child.init_callbacks(app)   
+    
+    
+    @property
+    def values(self):
+        return {param['name']: param.get('value', None) for param in self._parameters} 
+
+    @property
+    def parameters(self):
+        return {param['name']: param for param in self._parameters}
+
+    def _determine_type(self, parameter_dict):
+        if 'type' in parameter_dict:
+            if parameter_dict['type'] in self.type_map:
+                return parameter_dict['type']
+            elif parameter_dict['type'].__name__ in self.type_map:
+                return parameter_dict['type'].__name__
+        elif type(parameter_dict['value']) in self.type_map:
+            return type(parameter_dict['value'])
+        raise TypeError(f'No item type could be determined for this parameter: {parameter_dict}')
+
+    def build_children(self, values=None):
+        children = []
+        for parameter_dict in self._parameters:
+            parameter_dict = parameter_dict.copy()
+            if values and parameter_dict['name'] in values:
+                parameter_dict['value'] = values[parameter_dict['name']]
+            type = self._determine_type(parameter_dict)
+            parameter_dict.pop('type', None)
+            item = self.type_map[type](**parameter_dict, base_id=self.id) 
+            children.append(item)
+
+        return children
+        
+
+class JSONParameterEditor(ParameterEditor):
+    type_map = {'float': FloatItem,
+                'int': IntItem,
+                'str': StrItem,
+                'slider': SliderItem,
+                'dropdown': DropdownItem,
+                'radio': RadioItem,
+                'bool': BoolItem,
+                'img': ImgItem,
+                #'graph': GraphItem,
+                }
+
+    def __init__(self, _id, json_blob, **kwargs):
+        super(ParameterEditor, self).__init__(id=_id, children=[], className='kwarg-editor', **kwargs)
+        self._json_blob = json_blob
+        self.children = self.build_children()
+
+    def build_children(self, values=None):
+        children = []
+        for json_record in self._json_blob:
+            ...
+            # build a parameter dict from self.json_blob
+            ...
+            type = json_record.get('type', self._determine_type(json_record))   
+            json_record = json_record.copy()
+            if values and json_record['name'] in values:
+                json_record['value'] = values[json_record['name']]
+            json_record.pop('type', None)
+            item = self.type_map[type](**json_record, base_id=self.id)
+            children.append(item)
+
+        return children
+
+
+class KwargsEditor(ParameterEditor):
+    def __init__(self, instance_index, func: Callable, **kwargs):
+        self.func = func
+        self._instance_index = instance_index
+
+        parameters = [{'name': name, 'value': param.default} for name, param in signature(func).parameters.items()
+                      if param.default is not _empty]
+
+        super(KwargsEditor, self).__init__(dict(index=instance_index, type='kwargs-editor'), parameters=parameters, **kwargs)
+
+    def new_record(self):
+        return {name: p.default for name, p in signature(self.func).parameters.items() if p.default is not _empty}

From ec15317257ceb141d3296f2042a6e7ad34a22080 Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Mon, 5 Feb 2024 14:29:40 -0800
Subject: [PATCH 02/62] Correct typo - first commit to fix the filepath issue

---
 src/frontend.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/frontend.py b/src/frontend.py
index 5018236..59feb61 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -576,7 +576,7 @@ def update_statistics(selected_data, clusters, assigned_labels, label_names):
 def toggle_modal(n_submit, n_apply,
                  is_open, input_data):
     '''
-    This callback pop up a winder to remind user to follow this flow: 
+    This callback pop up a window to remind user to follow this flow: 
         select dataset -> Submit dimension reduction job -> Apply clustering
     Args:
         n_submit (int):     Number of clicks on the 'Submit' button.

From 19c7bf12306165793a33c392da1b6b4cc2c6789a Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Wed, 7 Feb 2024 21:19:31 -0800
Subject: [PATCH 03/62] Remove input_data dependence on the
 submit_dimension_reduction_job

---
 src/app_layout.py |  2 ++
 src/frontend.py   | 12 ++++++------
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/app_layout.py b/src/app_layout.py
index 5664c1b..60ce790 100644
--- a/src/app_layout.py
+++ b/src/app_layout.py
@@ -279,6 +279,8 @@
             dcc.Store(id='experiment-id', data=None),
             # data_label_schema, latent vectors, clusters
             dcc.Store(id='input_data', data=None),
+            # to store the example dataset 
+            dcc.Store(id='example_data', data=None),
             dcc.Store(id='input_labels', data=None),
             dcc.Store(id='label_schema', data=None),
             dcc.Store(id='model_id', data=None),
diff --git a/src/frontend.py b/src/frontend.py
index 59feb61..b034705 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -82,12 +82,12 @@ def show_clustering_gui_layouts(selected_algo):
     return item_list
 
 @app.callback(
-    Output('input_data', 'data'),
+    Output('input_data', 'data'), #Output('example_data', 'data'),
     Output('input_labels', 'data'),
     Output('label_schema', 'data'),
     Output('label-dropdown', 'options'),
     Output('user-upload-data-dir', 'data'),
-    Input('dataset-selection', 'value'), # Example dataset
+    Input('dataset-selection', 'value'),
     Input({'base_id': 'file-manager', 'name': 'docker-file-paths'},'data'), # FM
     Input('feature-vector-model-list', 'value'), # data clinic
 )
@@ -180,7 +180,6 @@ def job_content_dict(content):
         State('dataset-selection', 'value'),
         State('user-upload-data-dir', 'data'),
         State('feature-vector-model-list', 'value'),
-        State('input_data', 'data'),
         State('model_id', 'data'),
         State('algo-dropdown', 'value'),
         State('additional-model-params', 'children'),
@@ -188,8 +187,7 @@ def job_content_dict(content):
     prevent_initial_call=True
 )
 def submit_dimension_reduction_job(submit_n_clicks,
-                                   selected_dataset, user_upload_data_dir, data_clinic_file_path,
-                                   input_data, model_id, selected_algo, children):
+                                   selected_dataset, user_upload_data_dir, data_clinic_file_path, model_id, selected_algo, children):
     """
     This callback is triggered every time the Submit button is hit:
         - compute latent vectors, which will be saved in data/output/experiment_id
@@ -210,7 +208,9 @@ def submit_dimension_reduction_job(submit_n_clicks,
         heatmap:                empty heatmap figure
         interval:               set interval component to trigger to find the latent_vectors.npy file (-1)
     """
-    if not submit_n_clicks or not input_data:
+    if not submit_n_clicks:
+        raise PreventUpdate
+    if not selected_dataset and not user_upload_data_dir and not data_clinic_file_path:
         raise PreventUpdate
 
     input_params = {}

From 23b86bee90cf6a18e58d6c7b704ff249e3d3aa74 Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Wed, 7 Feb 2024 21:46:01 -0800
Subject: [PATCH 04/62] Remove dependency on input_data of toggle_modal
 function

---
 src/frontend.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/frontend.py b/src/frontend.py
index b034705..3c7e8e5 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -570,11 +570,13 @@ def update_statistics(selected_data, clusters, assigned_labels, label_names):
     ],
     [
         State("modal", "is_open"), 
-        State('input_data', 'data'),
+        State('dataset-selection', 'value'),
+        State('user-upload-data-dir', 'data'),
+        State('feature-vector-model-list', 'value'),
     ]
 )
 def toggle_modal(n_submit, n_apply,
-                 is_open, input_data):
+                 is_open, selected_dataset, user_upload_data_dir, data_clinic_file_path):
     '''
     This callback pop up a window to remind user to follow this flow: 
         select dataset -> Submit dimension reduction job -> Apply clustering
@@ -587,10 +589,13 @@ def toggle_modal(n_submit, n_apply,
         is_open (bool):     New state of the modal window.
         modal_body_text (str): Text to be displayed in the modal body.
     '''
+    at_least_one_dataset_selected = False
+    if selected_dataset or user_upload_data_dir or data_clinic_file_path:
+        at_least_one_dataset_selected = True
     
-    if n_submit and input_data is None:
+    if n_submit and not at_least_one_dataset_selected:
         return True, "Please select an example dataset or upload your own zipped dataset."
-    elif n_apply and input_data is None:
+    elif n_apply and not at_least_one_dataset_selected:
         return True, "Please select an example dataset or upload your own zipped dataset."
     elif n_apply and n_submit is None:
         return True, "Please select a dimension reduction algorithm and click 'Submit' button before clustering."

From d1803414614302d3791d0b0a7f2bce7762c53ec4 Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Wed, 7 Feb 2024 22:06:40 -0800
Subject: [PATCH 05/62] Rename selected_dataset to selected_example_dataset,
 and keep select ed_dataset to determine which 3 choice

---
 src/frontend.py | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/src/frontend.py b/src/frontend.py
index 3c7e8e5..b9d9b26 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -87,15 +87,15 @@ def show_clustering_gui_layouts(selected_algo):
     Output('label_schema', 'data'),
     Output('label-dropdown', 'options'),
     Output('user-upload-data-dir', 'data'),
-    Input('dataset-selection', 'value'),
-    Input({'base_id': 'file-manager', 'name': 'docker-file-paths'},'data'), # FM
-    Input('feature-vector-model-list', 'value'), # data clinic
+    Input('example-dataset-selection', 'value'),                            # example dataset
+    Input({'base_id': 'file-manager', 'name': 'docker-file-paths'},'data'), # FM dataset
+    Input('feature-vector-model-list', 'value'),                            # data clinic dataset
 )
-def update_data_n_label_schema(selected_dataset, upload_file_paths, data_clinic_file_path):
+def update_data_n_label_schema(selected_example_dataset, upload_file_paths, data_clinic_file_path):
     '''
     This callback updates the selected dataset from the provided example datasets, as well as labels, and label schema
     Args:
-        dataset-selection:      selected dataset from the provided example datasets, not the one that user uploaded
+        example-dataset-selection:      selected dataset from the provided example datasets, not the one that user uploaded
         upload_file_pahts:      Data project info, the user uploaded zip file using FileManager, list
     Returns:
         input_data:             input image data, numpy.ndarray
@@ -126,14 +126,14 @@ def update_data_n_label_schema(selected_dataset, upload_file_paths, data_clinic_
         labels = np.full((data.shape[0],), -1)
         user_upload_data_dir = os.path.dirname(upload_file_paths[0]['uri'])
     # Example dataset option 1
-    elif selected_dataset == "data/example_shapes/Demoshapes.npz":
-        data = np.load("/app/work/" + selected_dataset)['arr_0']
+    elif selected_example_dataset == "data/example_shapes/Demoshapes.npz":
+        data = np.load("/app/work/" + selected_example_dataset)['arr_0']
         labels = np.load("/app/work/data/example_shapes/DemoLabels.npy")
         f = open("/app/work/data/example_shapes/label_schema.json")
         label_schema = json.load(f)
     # Example dataset option 2
-    elif selected_dataset == "data/example_latentrepresentation/f_vectors.parquet":
-        df = pd.read_parquet("/app/work/" + selected_dataset)
+    elif selected_example_dataset == "data/example_latentrepresentation/f_vectors.parquet":
+        df = pd.read_parquet("/app/work/" + selected_example_dataset)
         data = df.values
         labels = np.full((df.shape[0],), -1)
     # DataClinic options
@@ -177,7 +177,7 @@ def job_content_dict(content):
     ],
     Input('run-algo', 'n_clicks'),
     [
-        State('dataset-selection', 'value'),
+        State('example-dataset-selection', 'value'),
         State('user-upload-data-dir', 'data'),
         State('feature-vector-model-list', 'value'),
         State('model_id', 'data'),
@@ -187,7 +187,7 @@ def job_content_dict(content):
     prevent_initial_call=True
 )
 def submit_dimension_reduction_job(submit_n_clicks,
-                                   selected_dataset, user_upload_data_dir, data_clinic_file_path, model_id, selected_algo, children):
+                                   selected_example_dataset, user_upload_data_dir, data_clinic_file_path, model_id, selected_algo, children):
     """
     This callback is triggered every time the Submit button is hit:
         - compute latent vectors, which will be saved in data/output/experiment_id
@@ -195,7 +195,7 @@ def submit_dimension_reduction_job(submit_n_clicks,
         - reset heatmap to no image
     Args:
         submit_n_clicks:        num of clicks for the submit button
-        selected_dataset:       selected example dataset
+        selected_example_dataset:       selected example dataset
         user_upload_data_dir:   user uploaded dataset
         model_id:               uid of selected dimension reduciton algo
         selected_algo:          selected dimension reduction algo
@@ -210,7 +210,7 @@ def submit_dimension_reduction_job(submit_n_clicks,
     """
     if not submit_n_clicks:
         raise PreventUpdate
-    if not selected_dataset and not user_upload_data_dir and not data_clinic_file_path:
+    if not selected_example_dataset and not user_upload_data_dir and not data_clinic_file_path:
         raise PreventUpdate
 
     input_params = {}
@@ -247,6 +247,8 @@ def submit_dimension_reduction_job(submit_n_clicks,
         selected_dataset = user_upload_data_dir
     elif data_clinic_file_path is not None:
         selected_dataset = data_clinic_file_path
+    else:
+        selected_dataset = selected_example_dataset
     
     # check which dimension reduction algo, then compose command
     if selected_algo == 'PCA':
@@ -570,13 +572,13 @@ def update_statistics(selected_data, clusters, assigned_labels, label_names):
     ],
     [
         State("modal", "is_open"), 
-        State('dataset-selection', 'value'),
+        State('example-dataset-selection', 'value'),
         State('user-upload-data-dir', 'data'),
         State('feature-vector-model-list', 'value'),
     ]
 )
 def toggle_modal(n_submit, n_apply,
-                 is_open, selected_dataset, user_upload_data_dir, data_clinic_file_path):
+                 is_open, selected_example_dataset, user_upload_data_dir, data_clinic_file_path):
     '''
     This callback pop up a window to remind user to follow this flow: 
         select dataset -> Submit dimension reduction job -> Apply clustering
@@ -590,7 +592,7 @@ def toggle_modal(n_submit, n_apply,
         modal_body_text (str): Text to be displayed in the modal body.
     '''
     at_least_one_dataset_selected = False
-    if selected_dataset or user_upload_data_dir or data_clinic_file_path:
+    if selected_example_dataset or user_upload_data_dir or data_clinic_file_path:
         at_least_one_dataset_selected = True
     
     if n_submit and not at_least_one_dataset_selected:

From 080b0878a8687475f18e8e51a9a145e55c273ca1 Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Thu, 8 Feb 2024 09:23:08 -0800
Subject: [PATCH 06/62] Simplify the warining logic in toggle_modal

---
 src/app_layout.py | 5 ++---
 src/frontend.py   | 7 +++----
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/app_layout.py b/src/app_layout.py
index 60ce790..d59fa00 100644
--- a/src/app_layout.py
+++ b/src/app_layout.py
@@ -60,7 +60,7 @@
                     ),
                     dbc.Label('Or try Example Dataset', className='mr-2'),
                     dcc.Dropdown(
-                        id='dataset-selection',
+                        id='example-dataset-selection',
                         options=DATA_OPTION,
                         #value = DATA_OPTION[0]['value'],
                         clearable=False,
@@ -279,8 +279,7 @@
             dcc.Store(id='experiment-id', data=None),
             # data_label_schema, latent vectors, clusters
             dcc.Store(id='input_data', data=None),
-            # to store the example dataset 
-            dcc.Store(id='example_data', data=None),
+            dcc.Store(id='example_data', data=None), # to store the example dataset 
             dcc.Store(id='input_labels', data=None),
             dcc.Store(id='label_schema', data=None),
             dcc.Store(id='model_id', data=None),
diff --git a/src/frontend.py b/src/frontend.py
index b9d9b26..2e6d896 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -595,10 +595,9 @@ def toggle_modal(n_submit, n_apply,
     if selected_example_dataset or user_upload_data_dir or data_clinic_file_path:
         at_least_one_dataset_selected = True
     
-    if n_submit and not at_least_one_dataset_selected:
-        return True, "Please select an example dataset or upload your own zipped dataset."
-    elif n_apply and not at_least_one_dataset_selected:
-        return True, "Please select an example dataset or upload your own zipped dataset."
+    if ((n_submit and not at_least_one_dataset_selected) or
+        (n_apply and not at_least_one_dataset_selected)):
+        return True, "Please select an example dataset or upload your own zipped dataset or choose DataClinic outpu."
     elif n_apply and n_submit is None:
         return True, "Please select a dimension reduction algorithm and click 'Submit' button before clustering."
             

From b7b0dae18ef2619707dc8bec42a123b18db71a0e Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Thu, 8 Feb 2024 13:32:44 -0800
Subject: [PATCH 07/62] Remove dependency of input_data in update_heatmap
 function

---
 src/app_layout.py |   2 -
 src/frontend.py   | 147 ++++++++++++++++++++++++++++++++--------------
 2 files changed, 104 insertions(+), 45 deletions(-)

diff --git a/src/app_layout.py b/src/app_layout.py
index d59fa00..08b3c36 100644
--- a/src/app_layout.py
+++ b/src/app_layout.py
@@ -278,8 +278,6 @@
             dcc.Store(id='run-counter', data=0),
             dcc.Store(id='experiment-id', data=None),
             # data_label_schema, latent vectors, clusters
-            dcc.Store(id='input_data', data=None),
-            dcc.Store(id='example_data', data=None), # to store the example dataset 
             dcc.Store(id='input_labels', data=None),
             dcc.Store(id='label_schema', data=None),
             dcc.Store(id='model_id', data=None),
diff --git a/src/frontend.py b/src/frontend.py
index 2e6d896..227c967 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -82,14 +82,17 @@ def show_clustering_gui_layouts(selected_algo):
     return item_list
 
 @app.callback(
-    Output('input_data', 'data'), #Output('example_data', 'data'),
-    Output('input_labels', 'data'),
-    Output('label_schema', 'data'),
-    Output('label-dropdown', 'options'),
-    Output('user-upload-data-dir', 'data'),
-    Input('example-dataset-selection', 'value'),                            # example dataset
-    Input({'base_id': 'file-manager', 'name': 'docker-file-paths'},'data'), # FM dataset
-    Input('feature-vector-model-list', 'value'),                            # data clinic dataset
+    [
+        Output('input_labels', 'data'),
+        Output('label_schema', 'data'),
+        Output('label-dropdown', 'options'),
+        Output('user-upload-data-dir', 'data'),
+    ],
+    [
+        Input('example-dataset-selection', 'value'),                            # example dataset
+        Input({'base_id': 'file-manager', 'name': 'docker-file-paths'},'data'), # FM dataset
+        Input('feature-vector-model-list', 'value'),                            # data clinic dataset
+    ]
 )
 def update_data_n_label_schema(selected_example_dataset, upload_file_paths, data_clinic_file_path):
     '''
@@ -104,50 +107,45 @@ def update_data_n_label_schema(selected_example_dataset, upload_file_paths, data
         label_dropdown:         label dropdown options
         user_upload_data_dir:   dir name for the user uploaded zip file
     '''
-    # FM
+    labels = None
+    label_schema = {}
+
+    # check if user is using user uploaded zip file or example dataset or data clinic file
+    # priority level: FileManage > DataClinic > Example Datasets
+
+    # FileManager - user uploaded zip file of images
     data_project = DataProject()
     data_project.init_from_dict(upload_file_paths)
     data_set = data_project.data # list of len 1920, each element is a local_dataset.LocalDataset object
-
-    data = None
-    labels = None
-    label_schema = {}
     options = []
     user_upload_data_dir = None
-
-    # FM options
     if len(data_set) > 0:
-        data = []
-        for i in range(len(data_set)): #if dataset too large, dash will exit with code 247, 137
-            image, uri = data_project.data[i].read_data(export='pillow')
-            data.append(np.array(image))
-        data = np.array(data)
-        print(data.shape)
-        labels = np.full((data.shape[0],), -1)
+        labels = np.full((len(data_set),), -1)
         user_upload_data_dir = os.path.dirname(upload_file_paths[0]['uri'])
+    # DataClinic options
+    elif data_clinic_file_path is not None:
+        df = pd.read_parquet(data_clinic_file_path)
+        # data = df.values
+        labels = np.full((df.shape[0],), -1)
     # Example dataset option 1
     elif selected_example_dataset == "data/example_shapes/Demoshapes.npz":
-        data = np.load("/app/work/" + selected_example_dataset)['arr_0']
+        # data = np.load("/app/work/" + selected_example_dataset)['arr_0']
         labels = np.load("/app/work/data/example_shapes/DemoLabels.npy")
         f = open("/app/work/data/example_shapes/label_schema.json")
         label_schema = json.load(f)
     # Example dataset option 2
     elif selected_example_dataset == "data/example_latentrepresentation/f_vectors.parquet":
         df = pd.read_parquet("/app/work/" + selected_example_dataset)
-        data = df.values
-        labels = np.full((df.shape[0],), -1)
-    # DataClinic options
-    elif data_clinic_file_path is not None:
-        df = pd.read_parquet(data_clinic_file_path)
-        data = df.values
+        # data = df.values
         labels = np.full((df.shape[0],), -1)
 
+
     if label_schema: 
         options = [{'label': f'Label {label}', 'value': label} for label in label_schema]
     options.insert(0, {'label': 'Unlabeled', 'value': -1})
     options.insert(0, {'label': 'All', 'value': -2})
 
-    return data, labels, label_schema, options, user_upload_data_dir
+    return labels, label_schema, options, user_upload_data_dir
 
 def job_content_dict(content):
     job_content = {# 'mlex_app': content['name'],
@@ -177,9 +175,9 @@ def job_content_dict(content):
     ],
     Input('run-algo', 'n_clicks'),
     [
-        State('example-dataset-selection', 'value'),
-        State('user-upload-data-dir', 'data'),
-        State('feature-vector-model-list', 'value'),
+        State('example-dataset-selection', 'value'), # 2 example dataset
+        State('user-upload-data-dir', 'data'),       # FM
+        State('feature-vector-model-list', 'value'), # DataClinic
         State('model_id', 'data'),
         State('algo-dropdown', 'value'),
         State('additional-model-params', 'children'),
@@ -187,7 +185,8 @@ def job_content_dict(content):
     prevent_initial_call=True
 )
 def submit_dimension_reduction_job(submit_n_clicks,
-                                   selected_example_dataset, user_upload_data_dir, data_clinic_file_path, model_id, selected_algo, children):
+                                   selected_example_dataset, user_upload_data_dir, data_clinic_file_path, 
+                                   model_id, selected_algo, children):
     """
     This callback is triggered every time the Submit button is hit:
         - compute latent vectors, which will be saved in data/output/experiment_id
@@ -242,7 +241,7 @@ def submit_dimension_reduction_job(submit_n_clicks,
     output_path = OUTPUT_DIR / experiment_id
     output_path.mkdir(parents=True, exist_ok=True)
 
-    # check if user is using user uploaded zip file or example dataset or data clinic file
+    # check if user is using user uploaded zip file or example dataset or data clinic file 
     if user_upload_data_dir is not None:
         selected_dataset = user_upload_data_dir
     elif data_clinic_file_path is not None:
@@ -266,7 +265,6 @@ def submit_dimension_reduction_job(submit_n_clicks,
     print("respnse: ", response)
     # job_response = get_job(user=None, mlex_app=job_content['mlex_app'])
     
-    
     return experiment_id, 'cluster', -1, -2, go.Figure(go.Heatmap()), -1
 
 @app.callback(
@@ -404,6 +402,7 @@ def update_scatter_plot(latent_vectors, selected_cluster, selected_label, scatte
     if latent_vectors is None or children is None:
         raise PreventUpdate
     latent_vectors = np.array(latent_vectors)
+    print("latent vector shape:", latent_vectors.shape)
 
     n_components = children['props']['children'][0]["props"]["children"][1]["props"]["value"]
 
@@ -461,34 +460,96 @@ def update_scatter_plot(latent_vectors, selected_cluster, selected_label, scatte
         Input('scatter', 'selectedData'),
         Input('mean-std-toggle', 'value'),
     ],
-    State('input_data', 'data'),
+    [
+        State('example-dataset-selection', 'value'),                            # example dataset
+        State({'base_id': 'file-manager', 'name': 'docker-file-paths'},'data'), # FM dataset
+        State('feature-vector-model-list', 'value'),                            # data clinic dataset
+
+    ],
     prevent_initial_call=True
+
 )
-def update_heatmap(click_data, selected_data, display_option, input_data):
+def update_heatmap(click_data, selected_data, display_option,
+                   selected_example_dataset, upload_file_paths, data_clinic_file_path):
     '''
     This callback update the heatmap
     Args:
         click_data:         clicked data on scatter figure
         selected_data:      lasso or rect selected data points on scatter figure
         display_option:     option to display mean or std
-        input_data:         input image data
     Returns:
         fig:                updated heatmap
     '''
-    if input_data is None:
+    ##################
+    print("seleced_example_dataset:", selected_example_dataset)
+
+    if not selected_example_dataset and not upload_file_paths and not data_clinic_file_path:
         raise PreventUpdate
     
-    images = np.array(input_data)
+    print(selected_data is not None)
+    
+    # user select a group of points
     if selected_data is not None and len(selected_data['points']) > 0:
         selected_indices = [point['customdata'][0] for point in selected_data['points']]  # Access customdata for the original indices
-        selected_images = images[selected_indices]
+        print("selected_indices", selected_indices)
+        
+        ### FileManager
+        # print("upload_file_paths") # if not selected, its an empty list not None
+        selected_images = []
+        data_project = DataProject()
+        data_project.init_from_dict(upload_file_paths)
+        data_set = data_project.data
+        if len(data_set) > 0:
+            for i in selected_indices:
+                image, uri = data_project.data[i].read_data(export='pillow')
+                selected_images.append(np.array(image))
+        ### DataClinic
+        elif data_clinic_file_path is not None:
+            print("data_clinic_file_path")
+            df = pd.read_parquet(data_clinic_file_path)
+            selected_images = df.iloc[selected_indices].values
+        ### Example dataset
+        elif selected_example_dataset == "data/example_shapes/Demoshapes.npz":
+            print("Demoshapes.npz")
+            selected_images = np.load("/app/work/" + selected_example_dataset)['arr_0'][selected_indices]
+            print(selected_images.shape)
+        elif selected_example_dataset == "data/example_latentrepresentation/f_vectors.parquet":
+            print("f_vectors.parque")
+            df = pd.read_parquet("/app/work/" + selected_example_dataset)
+            selected_images = df.iloc[selected_indices].values
+        selected_images = np.array(selected_images)
+
+        print("selected_images shape:", selected_images.shape)
+        
+        # display options
         if display_option == 'mean':
             heatmap_data = go.Heatmap(z=np.mean(selected_images, axis=0))
         elif display_option == 'sigma':
             heatmap_data = go.Heatmap(z=np.std(selected_images, axis=0))
+
     elif click_data is not None and len(click_data['points']) > 0:
         selected_index = click_data['points'][0]['customdata'][0]  # click_data['points'][0]['pointIndex']
-        heatmap_data = go.Heatmap(z=images[selected_index])
+        ### FileManager
+        clicked_image = []
+        data_project = DataProject()
+        data_project.init_from_dict(upload_file_paths)
+        data_set = data_project.data
+        if len(data_set) > 0:
+            clicked_image = data_project.data[selected_index].read_data(export='pillow')
+        ### DataClinic
+        elif data_clinic_file_path is not None:
+            df = pd.read_parquet(data_clinic_file_path)
+            clicked_image = df.iloc[selected_index].values
+        ### Example dataset
+        elif selected_example_dataset == "data/example_shapes/Demoshapes.npz":
+            clicked_image = np.load("/app/work/" + selected_example_dataset)['arr_0'][selected_index]
+        elif selected_example_dataset == "data/example_latentrepresentation/f_vectors.parquet":
+            df = pd.read_parquet("/app/work/" + selected_example_dataset)
+            clicked_image = df.iloc[selected_index].values
+        clicked_image = np.array(clicked_image)
+
+        heatmap_data = go.Heatmap(z=clicked_image)
+
     else:
         heatmap_data = go.Heatmap()
 

From 3251a13a14341b562af6e5405c9aab64e36e9d42 Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Thu, 8 Feb 2024 13:41:45 -0800
Subject: [PATCH 08/62] Show clicked single iamge works

---
 src/frontend.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/frontend.py b/src/frontend.py
index 227c967..8a2ae8a 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -530,12 +530,11 @@ def update_heatmap(click_data, selected_data, display_option,
     elif click_data is not None and len(click_data['points']) > 0:
         selected_index = click_data['points'][0]['customdata'][0]  # click_data['points'][0]['pointIndex']
         ### FileManager
-        clicked_image = []
         data_project = DataProject()
         data_project.init_from_dict(upload_file_paths)
         data_set = data_project.data
         if len(data_set) > 0:
-            clicked_image = data_project.data[selected_index].read_data(export='pillow')
+            clicked_image, uri = data_project.data[selected_index].read_data(export='pillow')
         ### DataClinic
         elif data_clinic_file_path is not None:
             df = pd.read_parquet(data_clinic_file_path)
@@ -548,6 +547,7 @@ def update_heatmap(click_data, selected_data, display_option,
             clicked_image = df.iloc[selected_index].values
         clicked_image = np.array(clicked_image)
 
+        
         heatmap_data = go.Heatmap(z=clicked_image)
 
     else:
@@ -600,7 +600,7 @@ def update_statistics(selected_data, clusters, assigned_labels, label_names):
         selected_indices = [point['customdata'][0] for point in
                             selected_data['points']]  # Access customdata for the original indices
         selected_clusters = []
-        if clusters:
+        if clusters is not None:
             selected_clusters = clusters[selected_indices]
         selected_labels = assigned_labels[selected_indices]
 

From 73d1b4736902738a982ecc6afc8e7cae91167e24 Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Thu, 8 Feb 2024 14:17:29 -0800
Subject: [PATCH 09/62] Correct the error before clustering was done, and
 select

---
 src/frontend.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/src/frontend.py b/src/frontend.py
index 8a2ae8a..f207539 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -480,14 +480,9 @@ def update_heatmap(click_data, selected_data, display_option,
     Returns:
         fig:                updated heatmap
     '''
-    ##################
-    print("seleced_example_dataset:", selected_example_dataset)
-
     if not selected_example_dataset and not upload_file_paths and not data_clinic_file_path:
         raise PreventUpdate
     
-    print(selected_data is not None)
-    
     # user select a group of points
     if selected_data is not None and len(selected_data['points']) > 0:
         selected_indices = [point['customdata'][0] for point in selected_data['points']]  # Access customdata for the original indices
@@ -500,12 +495,14 @@ def update_heatmap(click_data, selected_data, display_option,
         data_project.init_from_dict(upload_file_paths)
         data_set = data_project.data
         if len(data_set) > 0:
+            print("FM file")
             for i in selected_indices:
                 image, uri = data_project.data[i].read_data(export='pillow')
                 selected_images.append(np.array(image))
         ### DataClinic
         elif data_clinic_file_path is not None:
             print("data_clinic_file_path")
+            print(data_clinic_file_path)
             df = pd.read_parquet(data_clinic_file_path)
             selected_images = df.iloc[selected_indices].values
         ### Example dataset
@@ -546,7 +543,6 @@ def update_heatmap(click_data, selected_data, display_option,
             df = pd.read_parquet("/app/work/" + selected_example_dataset)
             clicked_image = df.iloc[selected_index].values
         clicked_image = np.array(clicked_image)
-
         
         heatmap_data = go.Heatmap(z=clicked_image)
 
@@ -592,8 +588,7 @@ def update_statistics(selected_data, clusters, assigned_labels, label_names):
     Returns:
         [num_images, clusters, labels]:     statistics
     '''
-
-    clusters = np.array(clusters)
+    
     assigned_labels = np.array(assigned_labels)
 
     if selected_data is not None and len(selected_data['points']) > 0:
@@ -601,6 +596,7 @@ def update_statistics(selected_data, clusters, assigned_labels, label_names):
                             selected_data['points']]  # Access customdata for the original indices
         selected_clusters = []
         if clusters is not None:
+            clusters = np.array(clusters)
             selected_clusters = clusters[selected_indices]
         selected_labels = assigned_labels[selected_indices]
 

From 0306abd48295b0fed9d42cee481cc41197f0923d Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Thu, 8 Feb 2024 14:41:56 -0800
Subject: [PATCH 10/62] Able to read images when using DataClinic generated
 parquet file

---
 docker/requirements.txt |  3 ++-
 src/frontend.py         | 11 ++++++-----
 src/latentxp_utils.py   | 36 +++++++++++++++++++++++++++++++++++-
 3 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/docker/requirements.txt b/docker/requirements.txt
index 13c2d7d..bc93d1e 100644
--- a/docker/requirements.txt
+++ b/docker/requirements.txt
@@ -11,4 +11,5 @@ requests==2.26.0
 pyarrow==11.0.0
 diskcache==5.6.3
 pandas
-numpy
\ No newline at end of file
+numpy
+Pillow
\ No newline at end of file
diff --git a/src/frontend.py b/src/frontend.py
index f207539..8b07789 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -15,7 +15,7 @@
 from file_manager.data_project import DataProject
 
 from app_layout import app, DOCKER_DATA, UPLOAD_FOLDER_ROOT
-from latentxp_utils import kmeans_kwargs, dbscan_kwargs, hdbscan_kwargs, hex_to_rgba, generate_scatter_data, remove_key_from_dict_list, get_content, get_trained_models_list
+from latentxp_utils import kmeans_kwargs, dbscan_kwargs, hdbscan_kwargs, hex_to_rgba, generate_scatter_data, remove_key_from_dict_list, get_content, get_trained_models_list, load_images_by_indices
 from dash_component_editor import JSONParameterEditor
 
 
@@ -503,8 +503,9 @@ def update_heatmap(click_data, selected_data, display_option,
         elif data_clinic_file_path is not None:
             print("data_clinic_file_path")
             print(data_clinic_file_path)
-            df = pd.read_parquet(data_clinic_file_path)
-            selected_images = df.iloc[selected_indices].values
+            directory_path = os.path.dirname(data_clinic_file_path)
+            selected_images = load_images_by_indices(directory_path, selected_indices)
+
         ### Example dataset
         elif selected_example_dataset == "data/example_shapes/Demoshapes.npz":
             print("Demoshapes.npz")
@@ -534,8 +535,8 @@ def update_heatmap(click_data, selected_data, display_option,
             clicked_image, uri = data_project.data[selected_index].read_data(export='pillow')
         ### DataClinic
         elif data_clinic_file_path is not None:
-            df = pd.read_parquet(data_clinic_file_path)
-            clicked_image = df.iloc[selected_index].values
+            directory_path = os.path.dirname(data_clinic_file_path)
+            selected_images = load_images_by_indices(directory_path, selected_indices)
         ### Example dataset
         elif selected_example_dataset == "data/example_shapes/Demoshapes.npz":
             clicked_image = np.load("/app/work/" + selected_example_dataset)['arr_0'][selected_index]
diff --git a/src/latentxp_utils.py b/src/latentxp_utils.py
index 977823b..090d8fd 100755
--- a/src/latentxp_utils.py
+++ b/src/latentxp_utils.py
@@ -4,6 +4,7 @@
 from copy import deepcopy
 import requests
 import os
+from PIL import Image
 
 kmeans_kwargs = {"gui_parameters": [{"type": "dropdown", "name": "ncluster-dropdown-menu", "title": "Number of clusters", "param_key": "n_clusters",
                                      "options": [{"label": i, "value": i} for i in range(1, 21)], 
@@ -353,4 +354,37 @@ def get_trained_models_list(user, app):
                     trained_models.append({'label': app+': '+model['job_kwargs']['kwargs']['job_type'],
                                            'value': out_path+filename})
     trained_models.reverse()
-    return trained_models
\ No newline at end of file
+    return trained_models
+
+
+def load_images_from_directory(directory_path, indices):
+    image_data = []
+    for filename in os.listdir(directory_path):
+        if filename.endswith(".png") or filename.endswith(".jpg"):
+            file_path = os.path.join(directory_path, filename)
+            try:
+                img = Image.open(file_path)
+                img_array = np.array(img)
+                image_data.append(img_array)
+            except Exception as e:
+                print(f"Error processing {file_path}: {e}")
+    
+    image_data = np.array(image_data)
+    return image_data
+
+def load_images_by_indices(directory_path, indices):
+    image_data = []
+    filenames = [filename for filename in sorted(os.listdir(directory_path)) if filename.lower().endswith(('.png', '.jpg'))]
+    for index in indices:
+        if index in range(len(filenames)):
+            filename = filenames[index]
+            file_path = os.path.join(directory_path, filename)
+            try:
+                img = Image.open(file_path)
+                img_array = np.array(img)
+                image_data.append(img_array)
+            except Exception as e:
+                print(f"Error processing {file_path}: {e}")
+
+    image_data = np.array(image_data)
+    return image_data

From 4e1b1750b256bfbef3927256cbb8faa48a4cc284 Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Wed, 21 Feb 2024 09:58:15 -0800
Subject: [PATCH 11/62] Try to fix the data-click clickdata no show issue -
 does not report error- but still no show

---
 src/frontend.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/frontend.py b/src/frontend.py
index 8b07789..823be33 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -536,7 +536,7 @@ def update_heatmap(click_data, selected_data, display_option,
         ### DataClinic
         elif data_clinic_file_path is not None:
             directory_path = os.path.dirname(data_clinic_file_path)
-            selected_images = load_images_by_indices(directory_path, selected_indices)
+            clicked_image = load_images_by_indices(directory_path, [selected_index])
         ### Example dataset
         elif selected_example_dataset == "data/example_shapes/Demoshapes.npz":
             clicked_image = np.load("/app/work/" + selected_example_dataset)['arr_0'][selected_index]
@@ -559,7 +559,9 @@ def update_heatmap(click_data, selected_data, display_option,
     aspect_y = 1
     if heatmap_data['z'] is not None:
         if heatmap_data['z'].size > 0:
-            aspect_y, aspect_x = np.shape(heatmap_data['z'])
+            print("aaa")
+            print(np.shape(heatmap_data['z']))
+            aspect_y, aspect_x = np.shape(heatmap_data['z'])[-2:]
 
     return go.Figure(
         data=heatmap_data,

From e802e08b87c41cb9dbfa47ab881d61a326cbac83 Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Wed, 21 Feb 2024 14:05:06 -0800
Subject: [PATCH 12/62] Rename job contetn, mlex_app name

---
 src/frontend.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/frontend.py b/src/frontend.py
index 823be33..b89bb7f 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -149,7 +149,7 @@ def update_data_n_label_schema(selected_example_dataset, upload_file_paths, data
 
 def job_content_dict(content):
     job_content = {# 'mlex_app': content['name'],
-                   'mlex_app': 'dimension reduction demo',
+                   'mlex_app': 'latent_space_explorer',
                    'service_type': content['service_type'],
                    'working_directory': DATA_DIR,
                    'job_kwargs': {'uri': content['uri'], 
@@ -559,7 +559,6 @@ def update_heatmap(click_data, selected_data, display_option,
     aspect_y = 1
     if heatmap_data['z'] is not None:
         if heatmap_data['z'].size > 0:
-            print("aaa")
             print(np.shape(heatmap_data['z']))
             aspect_y, aspect_x = np.shape(heatmap_data['z'])[-2:]
 

From 0a19fc67d953c36b7fe0a9bc33eeaa50290980a9 Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Wed, 21 Feb 2024 14:11:16 -0800
Subject: [PATCH 13/62] Change output file path

---
 src/frontend.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/frontend.py b/src/frontend.py
index b89bb7f..e03c028 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -21,7 +21,7 @@
 
 #### GLOBAL PARAMS ####
 DATA_DIR = str(os.environ['DATA_DIR'])
-OUTPUT_DIR = pathlib.Path('data/output')
+OUTPUT_DIR = pathlib.Path('data/mlexchange_store/admin')
 USER = 'admin' #'mlexchange-team' # move to env file
 UPLOAD_FOLDER_ROOT = "data/upload"
 

From 612887815297b5c9ffe16027ad07400fc8aa8554 Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Wed, 21 Feb 2024 14:12:03 -0800
Subject: [PATCH 14/62] Update output path)

---
 src/frontend.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/frontend.py b/src/frontend.py
index e03c028..7c33462 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -21,8 +21,8 @@
 
 #### GLOBAL PARAMS ####
 DATA_DIR = str(os.environ['DATA_DIR'])
-OUTPUT_DIR = pathlib.Path('data/mlexchange_store/admin')
 USER = 'admin' #'mlexchange-team' # move to env file
+OUTPUT_DIR = pathlib.Path('data/mlexchange_store/' + USER)
 UPLOAD_FOLDER_ROOT = "data/upload"
 
 @app.callback(

From c039dff14053782e7025ac823afebd0a8e048e44 Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Wed, 13 Mar 2024 13:39:05 -0700
Subject: [PATCH 15/62] Add pre-commit-config file

---
 .pre-commit-config.yaml | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 .pre-commit-config.yaml

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..83d178f
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,34 @@
+default_language_version:
+    python: python3
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-ast
+      - id: check-case-conflict
+      - id: check-merge-conflict
+      - id: check-symlinks
+      - id: check-yaml
+      - id: debug-statements
+  - repo: https://github.com/gitguardian/ggshield
+    rev: v1.25.0
+    hooks:
+      - id: ggshield
+        language_version: python3
+        stages: [commit]
+  # Using this mirror lets us use mypyc-compiled black, which is about 2x faster
+  - repo: https://github.com/psf/black-pre-commit-mirror
+    rev: 24.2.0
+    hooks:
+      - id: black
+  - repo: https://github.com/pycqa/flake8
+    rev: 7.0.0
+    hooks:
+      - id: flake8
+  - repo: https://github.com/pycqa/isort
+    rev: 5.13.2
+    hooks:
+      - id: isort
+        args: ["--profile", "black"]
\ No newline at end of file

From 7b9ce84182145185d0c329bd9a12bf720b0c24c4 Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Wed, 13 Mar 2024 13:49:48 -0700
Subject: [PATCH 16/62] Include .env file

---
 .env       | 26 ++++++++++++++++++++++++++
 .gitignore |  2 +-
 2 files changed, 27 insertions(+), 1 deletion(-)
 create mode 100644 .env

diff --git a/.env b/.env
new file mode 100644
index 0000000..6a3b729
--- /dev/null
+++ b/.env
@@ -0,0 +1,26 @@
+USER = admin
+
+TILED_SINGLE_USER_API_KEY=<unique api key>
+
+PREFECT_DB_PW=unique_password
+PREFECT_DB_USER=prefect_user
+PREFECT_DB_NAME=prefect
+PREFECT_DB_SERVER=prefect_db
+
+TILED_DB_PW=<unique password>
+TILED_DB_USER=tiled_user
+TILED_DB_NAME=tiled
+TILED_DB_SERVER=tiled_db
+
+TILED_SINGLE_USER_API_KEY=<api key>
+
+
+MLEX_SEGM_USER=mlex_segm_user
+MLEX_SEGM_PW=<unique password>
+
+TILED_API_KEY=<api key>
+
+TILED_INGEST_TILED_CONFIG_PATH=/deploy/config
+TILED_INGEST_RMQ_HOST=rabbitmq
+TILED_INGEST_RMQ_USER=guest
+TILED_INGEST_RMQ_PW=guest
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 57516b2..6968502 100644
--- a/.gitignore
+++ b/.gitignore
@@ -128,7 +128,7 @@ celerybeat.pid
 *.sage.py
 
 # Environments
-.env
+# .env
 .venv
 env/
 venv/

From cb68701aacc0f40736400d71c1a4ff05682679de Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Wed, 13 Mar 2024 13:50:31 -0700
Subject: [PATCH 17/62] Update docker-compose to replace compute api with
 prefect, does not work

---
 docker-compose.yml | 40 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 37 insertions(+), 3 deletions(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index 383494d..e62e4a6 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,6 +1,34 @@
 version: "3"
 
 services:
+
+    prefect:
+    image: prefecthq/prefect:2.14-python3.11
+    command: prefect server start
+    environment:
+      - PREFECT_SERVER_API_HOST=0.0.0.0
+      - PREFECT_API_DATABASE_CONNECTION_URL=postgresql+asyncpg://${PREFECT_DB_USER}:${PREFECT_DB_PW}@prefect_db:5432/${PREFECT_DB_NAME} # Needed if using postgres and not sqlite
+      # - PREFECT_UI_API_URL=https://localhost/api. needed if nginx is handling ssl termination
+      - PREFECT_LOGGING_LEVEL=DEBUG
+    ports:
+      - 4200:4200
+    depends_on:
+      - prefect_db
+    networks:
+      mle_net:
+
+  prefect_db:
+    image: postgres:14.5-alpine
+    environment:
+      - POSTGRES_USER=${PREFECT_DB_USER}
+      - POSTGRES_PASSWORD=${PREFECT_DB_PW}
+      - POSTGRES_DB=${PREFECT_DB_NAME}
+    volumes:
+      - ./data/prefect_db:/var/lib/postgresql/data:rw
+    restart: unless-stopped
+    networks:
+      mle_net:
+
   front-end:
     restart: "unless-stopped"
     container_name: "latentxp"
@@ -16,11 +44,17 @@ services:
       - ./src:/app/work/src
     ports:
       - "8070:8070"
+    # networks:
+    #   - computing_api_default  
     networks:
-      - computing_api_default  
+      mle_net:
+
+# networks:
+#   computing_api_default:
+#     external: true
 
 networks:
-  computing_api_default:
-    external: true
+  mle_net:
+    driver: bridge
 
 # env file: set up pwd
\ No newline at end of file

From b1ba1f4753a8a7415244d0fe42bdd751e818cf28 Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Fri, 15 Mar 2024 13:21:20 -0700
Subject: [PATCH 18/62] Remove prefect

---
 docker-compose.yml | 39 +++------------------------------------
 1 file changed, 3 insertions(+), 36 deletions(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index e62e4a6..84e1ee3 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -2,33 +2,6 @@ version: "3"
 
 services:
 
-    prefect:
-    image: prefecthq/prefect:2.14-python3.11
-    command: prefect server start
-    environment:
-      - PREFECT_SERVER_API_HOST=0.0.0.0
-      - PREFECT_API_DATABASE_CONNECTION_URL=postgresql+asyncpg://${PREFECT_DB_USER}:${PREFECT_DB_PW}@prefect_db:5432/${PREFECT_DB_NAME} # Needed if using postgres and not sqlite
-      # - PREFECT_UI_API_URL=https://localhost/api. needed if nginx is handling ssl termination
-      - PREFECT_LOGGING_LEVEL=DEBUG
-    ports:
-      - 4200:4200
-    depends_on:
-      - prefect_db
-    networks:
-      mle_net:
-
-  prefect_db:
-    image: postgres:14.5-alpine
-    environment:
-      - POSTGRES_USER=${PREFECT_DB_USER}
-      - POSTGRES_PASSWORD=${PREFECT_DB_PW}
-      - POSTGRES_DB=${PREFECT_DB_NAME}
-    volumes:
-      - ./data/prefect_db:/var/lib/postgresql/data:rw
-    restart: unless-stopped
-    networks:
-      mle_net:
-
   front-end:
     restart: "unless-stopped"
     container_name: "latentxp"
@@ -44,17 +17,11 @@ services:
       - ./src:/app/work/src
     ports:
       - "8070:8070"
-    # networks:
-    #   - computing_api_default  
     networks:
-      mle_net:
-
-# networks:
-#   computing_api_default:
-#     external: true
+      mlex_mle_net:
 
 networks:
-  mle_net:
-    driver: bridge
+  mlex_mle_net:
+    external: true
 
 # env file: set up pwd
\ No newline at end of file

From 58cab54d8fdb5173799c92c17c80ed6b3e660049 Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Fri, 15 Mar 2024 13:38:54 -0700
Subject: [PATCH 19/62] add prefect:

---
 docker/requirements.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docker/requirements.txt b/docker/requirements.txt
index bc93d1e..3861b96 100644
--- a/docker/requirements.txt
+++ b/docker/requirements.txt
@@ -12,4 +12,6 @@ pyarrow==11.0.0
 diskcache==5.6.3
 pandas
 numpy
-Pillow
\ No newline at end of file
+Pillow
+# prefect
+prefect-client==2.14.21
\ No newline at end of file

From bbb35c1ab785bda57a56199589c51b53f9e5d945 Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Fri, 15 Mar 2024 13:39:15 -0700
Subject: [PATCH 20/62] Add prefect components

---
 src/frontend.py      | 7 +++++++
 src/utils_prefect.py | 0
 2 files changed, 7 insertions(+)
 create mode 100644 src/utils_prefect.py

diff --git a/src/frontend.py b/src/frontend.py
index 7c33462..09a304d 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -18,6 +18,13 @@
 from latentxp_utils import kmeans_kwargs, dbscan_kwargs, hdbscan_kwargs, hex_to_rgba, generate_scatter_data, remove_key_from_dict_list, get_content, get_trained_models_list, load_images_by_indices
 from dash_component_editor import JSONParameterEditor
 
+from utils_prefect import (
+    get_children_flow_run_ids,
+    get_flow_run_name,
+    get_flow_runs_by_name,
+    schedule_prefect_flow,
+)
+
 
 #### GLOBAL PARAMS ####
 DATA_DIR = str(os.environ['DATA_DIR'])
diff --git a/src/utils_prefect.py b/src/utils_prefect.py
new file mode 100644
index 0000000..e69de29

From da3210031294d2aa21bc2a5bb4120c4d3b72c645 Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Fri, 15 Mar 2024 13:43:00 -0700
Subject: [PATCH 21/62] Prefect update, not done yet;

---
 src/frontend.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/frontend.py b/src/frontend.py
index 09a304d..5316a20 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -231,7 +231,16 @@ def submit_dimension_reduction_job(submit_n_clicks,
     job_content = job_content_dict(model_content)
     job_content['job_kwargs']['kwargs'] = {}
     job_content['job_kwargs']['kwargs']['parameters'] = input_params
-    #TODO: other kwargs
+    
+
+    # prefect
+    job_uid = schedule_prefect_flow(
+                    FLOW__NAME,
+                    parameters=TRAIN_PARAMS_EXAMPLE,
+                    flow_run_name=f"{job_name} {current_time}",
+                    tags=PREFECT_TAGS + ["train", project_name],
+                )
+    job_message = f"Job has been succesfully submitted with uid: {job_uid} and mask uri: {mask_uri}"
 
     compute_dict = {'user_uid': USER,
                     'host_list': ['mlsandbox.als.lbl.gov', 'local.als.lbl.gov', 'vaughan.als.lbl.gov'],

From ffc570920b89bda1286bd98bc9d60d67dc693ebb Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Tue, 19 Mar 2024 15:10:30 -0700
Subject: [PATCH 22/62] update .env file

---
 .env | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.env b/.env
index 6a3b729..4ea6411 100644
--- a/.env
+++ b/.env
@@ -6,6 +6,11 @@ PREFECT_DB_PW=unique_password
 PREFECT_DB_USER=prefect_user
 PREFECT_DB_NAME=prefect
 PREFECT_DB_SERVER=prefect_db
+PREFECT_API_URL=http://prefect:4200/api
+FLOW_NAME="Parent flow/launch_parent_flow"
+TIMEZONE="US/Pacific"
+
+PREFECT_TAGS='["latent-space-explorer"]'
 
 TILED_DB_PW=<unique password>
 TILED_DB_USER=tiled_user

From 0aec2b5d65cb55e0b89c6edd5155f2733bc4355c Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Tue, 19 Mar 2024 15:10:59 -0700
Subject: [PATCH 23/62] Update to use prefect

---
 docker-compose.yml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index 84e1ee3..4210039 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -11,7 +11,11 @@ services:
     mem_limit: 2g
     environment:
       DATA_DIR: "${PWD}/data/"
-      # USER: "$USER"
+      PREFECT_TAGS: "${PREFECT_TAGS}"
+      PREFECT_API_URL: '${PREFECT_API_URL}'
+      FLOW_NAME: '${FLOW_NAME}'
+      TIMEZONE: "${TIMEZONE}"
+      USER: "${USER}"
     volumes:
       - ./data:/app/work/data
       - ./src:/app/work/src

From 5e15f92b2ab993e23d4389ee172ddb3023c95679 Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Tue, 19 Mar 2024 15:11:30 -0700
Subject: [PATCH 24/62] Prefect related utilies

---
 src/latentxp_utils.py |   2 +
 src/utils_prefect.py  | 105 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 107 insertions(+)

diff --git a/src/latentxp_utils.py b/src/latentxp_utils.py
index 090d8fd..76be66f 100755
--- a/src/latentxp_utils.py
+++ b/src/latentxp_utils.py
@@ -388,3 +388,5 @@ def load_images_by_indices(directory_path, indices):
 
     image_data = np.array(image_data)
     return image_data
+
+
diff --git a/src/utils_prefect.py b/src/utils_prefect.py
index e69de29..2b6612a 100644
--- a/src/utils_prefect.py
+++ b/src/utils_prefect.py
@@ -0,0 +1,105 @@
+import asyncio
+from typing import Optional
+
+from prefect import get_client
+from prefect.client.schemas.filters import (
+    FlowRunFilter,
+    FlowRunFilterName,
+    FlowRunFilterParentFlowRunId,
+    FlowRunFilterTags,
+)
+
+
+async def _schedule(
+    deployment_name: str,
+    flow_run_name: str,
+    parameters: Optional[dict] = None,
+    tags: Optional[list] = [],
+):
+    async with get_client() as client:
+        deployment = await client.read_deployment_by_name(deployment_name)
+        assert (
+            deployment
+        ), f"No deployment found in config for deployment_name {deployment_name}"
+        flow_run = await client.create_flow_run_from_deployment(
+            deployment.id,
+            parameters=parameters,
+            name=flow_run_name,
+            tags=tags,
+        )
+    return flow_run.id
+
+
+def schedule_prefect_flow(
+    deployment_name: str,
+    parameters: Optional[dict] = None,
+    flow_run_name: Optional[str] = None,
+    tags: Optional[list] = [],
+):
+    if not flow_run_name:
+        model_name = parameters["model_name"]
+        flow_run_name = f"{deployment_name}: {model_name}"
+    flow_run_id = asyncio.run(
+        _schedule(deployment_name, flow_run_name, parameters, tags)
+    )
+    return flow_run_id
+
+
+async def _get_name(flow_run_id):
+    async with get_client() as client:
+        flow_run = await client.read_flow_run(flow_run_id)
+        if flow_run.state.is_final():
+            if flow_run.state.is_completed():
+                return flow_run.name
+        return None
+
+
+def get_flow_run_name(flow_run_id):
+    """Retrieves the name of the flow with the given id."""
+    return asyncio.run(_get_name(flow_run_id))
+
+
+async def _flow_run_query(
+    tags=None, flow_run_name=None, parent_flow_run_id=None, sort="START_TIME_DESC"
+):
+    flow_run_filter_parent_flow_run_id = (
+        FlowRunFilterParentFlowRunId(any_=[parent_flow_run_id])
+        if parent_flow_run_id
+        else None
+    )
+    async with get_client() as client:
+        flow_runs = await client.read_flow_runs(
+            flow_run_filter=FlowRunFilter(
+                name=FlowRunFilterName(like_=flow_run_name),
+                parent_flow_run_id=flow_run_filter_parent_flow_run_id,
+                tags=FlowRunFilterTags(all_=tags),
+            ),
+            sort=sort,
+        )
+        return flow_runs
+
+
+def get_flow_runs_by_name(flow_run_name=None, tags=None):
+    flow_runs_by_name = []
+    flow_runs = asyncio.run(_flow_run_query(tags, flow_run_name=flow_run_name))
+    for flow_run in flow_runs:
+        if flow_run.state_name in {"Failed", "Crashed"}:
+            flow_name = f"❌ {flow_run.name}"
+        elif flow_run.state_name == "Completed":
+            flow_name = f"✅ {flow_run.name}"
+        elif flow_run.state_name == "Cancelled":
+            flow_name = f"🚫 {flow_run.name}"
+        else:
+            flow_name = f"🕑 {flow_run.name}"
+        flow_runs_by_name.append({"label": flow_name, "value": str(flow_run.id)})
+    return flow_runs_by_name
+
+
+def get_children_flow_run_ids(parent_flow_run_id, sort="START_TIME_ASC"):
+    children_flow_runs = asyncio.run(
+        _flow_run_query(parent_flow_run_id=parent_flow_run_id, sort=sort)
+    )
+    children_flow_run_ids = [
+        str(children_flow_run.id) for children_flow_run in children_flow_runs
+    ]
+    return children_flow_run_ids
\ No newline at end of file

From a8098f74c8be92f159f138f578f4e3cb9bbf6f3b Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Tue, 19 Mar 2024 15:11:53 -0700
Subject: [PATCH 25/62] Add a component to ask user input a job name

---
 src/app_layout.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/app_layout.py b/src/app_layout.py
index 08b3c36..c43c17b 100644
--- a/src/app_layout.py
+++ b/src/app_layout.py
@@ -104,6 +104,17 @@
                                 html.Hr(),
                                 html.Div(
                                     [
+                                        dbc.Label('Name your job', className='mr-2'),
+                                        dcc.Input(
+                                            id="job_name",
+                                            placeholder="test0",
+                                            style={'width':'100%'}
+                                        ),
+                                    ]
+                                ),
+                                html.Hr(),
+                                html.Div(
+                                    [   
                                         dbc.Button(
                                             "Submit",
                                             color="secondary",

From a2743da67500a8801993d2aae4187e4e931eeead Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Tue, 19 Mar 2024 15:27:10 -0700
Subject: [PATCH 26/62] get job name from user input:

---
 src/app_layout.py |   2 +-
 src/frontend.py   | 148 ++++++++++++++++++++++++++++++++--------------
 2 files changed, 105 insertions(+), 45 deletions(-)

diff --git a/src/app_layout.py b/src/app_layout.py
index c43c17b..2aac91e 100644
--- a/src/app_layout.py
+++ b/src/app_layout.py
@@ -106,7 +106,7 @@
                                     [
                                         dbc.Label('Name your job', className='mr-2'),
                                         dcc.Input(
-                                            id="job_name",
+                                            id="job-name",
                                             placeholder="test0",
                                             style={'width':'100%'}
                                         ),
diff --git a/src/frontend.py b/src/frontend.py
index 5316a20..f928624 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -11,6 +11,9 @@
 import requests
 import os
 import requests
+import pytz
+from datetime import datetime
+
 
 from file_manager.data_project import DataProject
 
@@ -31,6 +34,52 @@
 USER = 'admin' #'mlexchange-team' # move to env file
 OUTPUT_DIR = pathlib.Path('data/mlexchange_store/' + USER)
 UPLOAD_FOLDER_ROOT = "data/upload"
+PREFECT_TAGS = json.loads(os.getenv("PREFECT_TAGS", '["latent-space-explorer"]'))
+TIMEZONE = os.getenv("TIMEZONE", "US/Pacific")
+FLOW_NAME = os.getenv("FLOW_NAME", "")
+
+
+# TODO: Get model parameters from UI
+TRAIN_PARAMS_EXAMPLE = {
+    "flow_type": "podman",
+    "params_list": [
+        {
+            "image_name": "ghcr.io/mlexchange/mlex_latent_explorer",
+            "image_tag": "main",
+            "command": 'python -c \\"import time; time.sleep(30)\\"',
+            "params": {
+                "io_parameters": {"uid_save": "uid0001", "uid_retrieve": "uid0001"}
+            },
+            "volumes": [f"{DATA_DIR}:/app/work/data"],
+        },
+        {
+            "image_name": "ghcr.io/mlexchange/mlex_latent_explorer",
+            "image_tag": "main",
+            "command": 'python -c \\"import time; time.sleep(10)\\"',
+            "params": {
+                "io_parameters": {"uid_save": "uid0001", "uid_retrieve": "uid0001"}
+            },
+            "volumes": [f"{DATA_DIR}:/app/work/data"],
+        },
+    ],
+}
+
+INFERENCE_PARAMS_EXAMPLE = {
+    "flow_type": "podman",
+    "params_list": [
+        {
+            "image_name": "ghcr.io/mlexchange/mlex_latent_explorer",
+            "image_tag": "main",
+            "command": 'python -c \\"import time; time.sleep(30)\\"',
+            "params": {
+                "io_parameters": {"uid_save": "uid0001", "uid_retrieve": "uid0001"}
+            },
+            "volumes": [f"{DATA_DIR}:/app/work/data"],
+        },
+    ],
+}
+
+
 
 @app.callback(
     Output('additional-model-params', 'children'),
@@ -182,6 +231,7 @@ def job_content_dict(content):
     ],
     Input('run-algo', 'n_clicks'),
     [
+        State('job-name', 'value'),                  # job_name
         State('example-dataset-selection', 'value'), # 2 example dataset
         State('user-upload-data-dir', 'data'),       # FM
         State('feature-vector-model-list', 'value'), # DataClinic
@@ -192,6 +242,7 @@ def job_content_dict(content):
     prevent_initial_call=True
 )
 def submit_dimension_reduction_job(submit_n_clicks,
+                                   job_name,
                                    selected_example_dataset, user_upload_data_dir, data_clinic_file_path, 
                                    model_id, selected_algo, children):
     """
@@ -226,44 +277,53 @@ def submit_dimension_reduction_job(submit_n_clicks,
             value = child["props"]["children"][1]["props"]["value"]
             input_params[key] = value
     print("Dimension reduction algo params: ", input_params)
+
     model_content = get_content(model_id)
     print(model_content)
     job_content = job_content_dict(model_content)
     job_content['job_kwargs']['kwargs'] = {}
     job_content['job_kwargs']['kwargs']['parameters'] = input_params
     
+    # check if user is using user uploaded zip file or example dataset or data clinic file 
+    if user_upload_data_dir is not None:
+        selected_dataset = user_upload_data_dir
+    elif data_clinic_file_path is not None:
+        selected_dataset = data_clinic_file_path
+    else:
+        selected_dataset = selected_example_dataset
+    print(selected_dataset)
 
     # prefect
+    current_time = datetime.now(pytz.timezone(TIMEZONE)).strftime("%Y/%m/%d %H:%M:%S")
+    if not job_name:
+        job_name = "test0"
+    project_name = selected_dataset.split("/")[-1] # name of the dataset, get it from FM
+    print(PREFECT_TAGS, flush=True)
+    # job_uid is the
     job_uid = schedule_prefect_flow(
-                    FLOW__NAME,
+                    FLOW_NAME,
                     parameters=TRAIN_PARAMS_EXAMPLE,
                     flow_run_name=f"{job_name} {current_time}",
                     tags=PREFECT_TAGS + ["train", project_name],
                 )
-    job_message = f"Job has been succesfully submitted with uid: {job_uid} and mask uri: {mask_uri}"
-
-    compute_dict = {'user_uid': USER,
-                    'host_list': ['mlsandbox.als.lbl.gov', 'local.als.lbl.gov', 'vaughan.als.lbl.gov'],
-                    'requirements': {'num_processors': 2,
-                                     'num_gpus': 0,
-                                     'num_nodes': 2},
-                    }
-    compute_dict['job_list'] = [job_content]
-    compute_dict['dependencies'] = {'0':[]}
-    compute_dict['requirements']['num_nodes'] = 1
+    job_message = f"Job has been succesfully submitted with uid: {job_uid}."
+    print("Job message")
+    print(job_message, flush=True)
+
+    # compute_dict = {'user_uid': USER,
+    #                 'host_list': ['mlsandbox.als.lbl.gov', 'local.als.lbl.gov', 'vaughan.als.lbl.gov'],
+    #                 'requirements': {'num_processors': 2,
+    #                                  'num_gpus': 0,
+    #                                  'num_nodes': 2},
+    #                 }
+    # compute_dict['job_list'] = [job_content]
+    # compute_dict['dependencies'] = {'0':[]}
+    # compute_dict['requirements']['num_nodes'] = 1
 
     # create user directory to store users data/experiments
     experiment_id = str(uuid.uuid4())  # create unique id for experiment
     output_path = OUTPUT_DIR / experiment_id
     output_path.mkdir(parents=True, exist_ok=True)
-
-    # check if user is using user uploaded zip file or example dataset or data clinic file 
-    if user_upload_data_dir is not None:
-        selected_dataset = user_upload_data_dir
-    elif data_clinic_file_path is not None:
-        selected_dataset = data_clinic_file_path
-    else:
-        selected_dataset = selected_example_dataset
     
     # check which dimension reduction algo, then compose command
     if selected_algo == 'PCA':
@@ -275,10 +335,10 @@ def submit_dimension_reduction_job(submit_n_clicks,
     #print(docker_cmd)
     docker_cmd = docker_cmd + ' \'' + json.dumps(input_params) + '\''
     #print(docker_cmd)
-    job_content['job_kwargs']['cmd'] = docker_cmd
+    #job_content['job_kwargs']['cmd'] = docker_cmd
 
-    response = requests.post('http://job-service:8080/api/v0/workflows', json=compute_dict)
-    print("respnse: ", response)
+    # response = requests.post('http://job-service:8080/api/v0/workflows', json=compute_dict)
+    # print("respnse: ", response)
     # job_response = get_job(user=None, mlex_app=job_content['mlex_app'])
     
     return experiment_id, 'cluster', -1, -2, go.Figure(go.Heatmap()), -1
@@ -679,27 +739,27 @@ def toggle_modal(n_submit, n_apply,
     return False, "No alert."
 
 
-@app.callback(
-    Output('feature-vector-model-list', 'options'),
-    Input('interval-component', 'n_intervals'),
-)
-def update_trained_model_list(interval):
-    '''
-    This callback updates the list of trained models
-    Args:
-        tab_value:                      Tab option
-        prob_refresh_n_clicks:          Button to refresh the list of probability-based trained models
-        similarity_refresh_n_clicks:    Button to refresh the list of similarity-based trained models
-    Returns:
-        prob_model_list:                List of trained models in mlcoach
-        similarity_model_list:          List of trained models in data clinic and mlcoach
-    '''
-    data_clinic_models = get_trained_models_list(USER, 'data_clinic')
-    ml_coach_models = get_trained_models_list(USER, 'mlcoach')
-    feature_vector_models = data_clinic_models + ml_coach_models
-    #print(feature_vector_models)
-
-    return feature_vector_models
+# @app.callback(
+#     Output('feature-vector-model-list', 'options'),
+#     Input('interval-component', 'n_intervals'),
+# )
+# def update_trained_model_list(interval):
+#     '''
+#     This callback updates the list of trained models
+#     Args:
+#         tab_value:                      Tab option
+#         prob_refresh_n_clicks:          Button to refresh the list of probability-based trained models
+#         similarity_refresh_n_clicks:    Button to refresh the list of similarity-based trained models
+#     Returns:
+#         prob_model_list:                List of trained models in mlcoach
+#         similarity_model_list:          List of trained models in data clinic and mlcoach
+#     '''
+#     data_clinic_models = get_trained_models_list(USER, 'data_clinic')
+#     ml_coach_models = get_trained_models_list(USER, 'mlcoach')
+#     feature_vector_models = data_clinic_models + ml_coach_models
+#     #print(feature_vector_models)
+
+#     return feature_vector_models
 
 
 if __name__ == '__main__':

From 173977d90477287075ff28700c77352fb232e3e3 Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Wed, 20 Mar 2024 12:04:34 -0700
Subject: [PATCH 27/62] Success run DR job through Prefect and show the latent
 vectors

---
 src/frontend.py | 98 +++++++++++++++++--------------------------------
 1 file changed, 34 insertions(+), 64 deletions(-)

diff --git a/src/frontend.py b/src/frontend.py
index f928624..c188172 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -44,23 +44,14 @@
     "flow_type": "podman",
     "params_list": [
         {
-            "image_name": "ghcr.io/mlexchange/mlex_latent_explorer",
+            "image_name": "ghcr.io/runboj/mlex_dimension_reduction_pca",
             "image_tag": "main",
             "command": 'python -c \\"import time; time.sleep(30)\\"',
             "params": {
                 "io_parameters": {"uid_save": "uid0001", "uid_retrieve": "uid0001"}
             },
             "volumes": [f"{DATA_DIR}:/app/work/data"],
-        },
-        {
-            "image_name": "ghcr.io/mlexchange/mlex_latent_explorer",
-            "image_tag": "main",
-            "command": 'python -c \\"import time; time.sleep(10)\\"',
-            "params": {
-                "io_parameters": {"uid_save": "uid0001", "uid_retrieve": "uid0001"}
-            },
-            "volumes": [f"{DATA_DIR}:/app/work/data"],
-        },
+        }
     ],
 }
 
@@ -68,7 +59,7 @@
     "flow_type": "podman",
     "params_list": [
         {
-            "image_name": "ghcr.io/mlexchange/mlex_latent_explorer",
+            "image_name": "ghcr.io/runboj/mlex_dimension_reduction_pca",
             "image_tag": "main",
             "command": 'python -c \\"import time; time.sleep(30)\\"',
             "params": {
@@ -203,18 +194,18 @@ def update_data_n_label_schema(selected_example_dataset, upload_file_paths, data
 
     return labels, label_schema, options, user_upload_data_dir
 
-def job_content_dict(content):
-    job_content = {# 'mlex_app': content['name'],
-                   'mlex_app': 'latent_space_explorer',
-                   'service_type': content['service_type'],
-                   'working_directory': DATA_DIR,
-                   'job_kwargs': {'uri': content['uri'], 
-                                  'cmd': content['cmd'][0]}
-    }
-    if 'map' in content:
-        job_content['job_kwargs']['map'] = content['map']
+# def job_content_dict(content):
+#     job_content = {# 'mlex_app': content['name'],
+#                    'mlex_app': 'latent_space_explorer',
+#                    'service_type': content['service_type'],
+#                    'working_directory': DATA_DIR,
+#                    'job_kwargs': {'uri': content['uri'], 
+#                                   'cmd': content['cmd'][0]}
+#     }
+#     if 'map' in content:
+#         job_content['job_kwargs']['map'] = content['map']
     
-    return job_content
+#     return job_content
 
 @app.callback(
     [
@@ -277,12 +268,6 @@ def submit_dimension_reduction_job(submit_n_clicks,
             value = child["props"]["children"][1]["props"]["value"]
             input_params[key] = value
     print("Dimension reduction algo params: ", input_params)
-
-    model_content = get_content(model_id)
-    print(model_content)
-    job_content = job_content_dict(model_content)
-    job_content['job_kwargs']['kwargs'] = {}
-    job_content['job_kwargs']['kwargs']['parameters'] = input_params
     
     # check if user is using user uploaded zip file or example dataset or data clinic file 
     if user_upload_data_dir is not None:
@@ -295,11 +280,28 @@ def submit_dimension_reduction_job(submit_n_clicks,
 
     # prefect
     current_time = datetime.now(pytz.timezone(TIMEZONE)).strftime("%Y/%m/%d %H:%M:%S")
-    if not job_name:
-        job_name = "test0"
+    if not job_name: job_name = "test0"
+    # job_name += " " + str(current_time)
     project_name = selected_dataset.split("/")[-1] # name of the dataset, get it from FM
     print(PREFECT_TAGS, flush=True)
-    # job_uid is the
+    
+    # create user directory to store users data/experiments
+    experiment_id = experiment_id = str(uuid.uuid4())
+    output_path = OUTPUT_DIR / experiment_id
+    output_path.mkdir(parents=True, exist_ok=True)
+
+    # check which dimension reduction algo, then compose command
+    if selected_algo == 'PCA':
+        TRAIN_PARAMS_EXAMPLE["params_list"][0]["command"] = "python pca_run.py"
+    elif selected_algo == 'UMAP':
+        TRAIN_PARAMS_EXAMPLE["params_list"][0]["command"] = "python umap_run.py"
+    
+    TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["io_parameters"]["images_dir"] = selected_dataset
+    TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["io_parameters"]["output_dir"] = str(output_path)
+    TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["model_parameters"] = input_params
+    print(TRAIN_PARAMS_EXAMPLE)
+
+    # run prefect job, job_uid is the new experiment id
     job_uid = schedule_prefect_flow(
                     FLOW_NAME,
                     parameters=TRAIN_PARAMS_EXAMPLE,
@@ -307,39 +309,7 @@ def submit_dimension_reduction_job(submit_n_clicks,
                     tags=PREFECT_TAGS + ["train", project_name],
                 )
     job_message = f"Job has been succesfully submitted with uid: {job_uid}."
-    print("Job message")
     print(job_message, flush=True)
-
-    # compute_dict = {'user_uid': USER,
-    #                 'host_list': ['mlsandbox.als.lbl.gov', 'local.als.lbl.gov', 'vaughan.als.lbl.gov'],
-    #                 'requirements': {'num_processors': 2,
-    #                                  'num_gpus': 0,
-    #                                  'num_nodes': 2},
-    #                 }
-    # compute_dict['job_list'] = [job_content]
-    # compute_dict['dependencies'] = {'0':[]}
-    # compute_dict['requirements']['num_nodes'] = 1
-
-    # create user directory to store users data/experiments
-    experiment_id = str(uuid.uuid4())  # create unique id for experiment
-    output_path = OUTPUT_DIR / experiment_id
-    output_path.mkdir(parents=True, exist_ok=True)
-    
-    # check which dimension reduction algo, then compose command
-    if selected_algo == 'PCA':
-        cmd_list = ["python pca_run.py", selected_dataset, str(output_path)]
-    elif selected_algo == 'UMAP':
-        cmd_list = ["python umap_run.py", selected_dataset, str(output_path)]
-        
-    docker_cmd = " ".join(cmd_list)
-    #print(docker_cmd)
-    docker_cmd = docker_cmd + ' \'' + json.dumps(input_params) + '\''
-    #print(docker_cmd)
-    #job_content['job_kwargs']['cmd'] = docker_cmd
-
-    # response = requests.post('http://job-service:8080/api/v0/workflows', json=compute_dict)
-    # print("respnse: ", response)
-    # job_response = get_job(user=None, mlex_app=job_content['mlex_app'])
     
     return experiment_id, 'cluster', -1, -2, go.Figure(go.Heatmap()), -1
 

From 58de19499e4630926ea778f3323ba969517dede7 Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Wed, 20 Mar 2024 13:15:09 -0700
Subject: [PATCH 28/62] Remove compute api related function

---
 src/frontend.py | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/src/frontend.py b/src/frontend.py
index c188172..7a73a08 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -194,19 +194,6 @@ def update_data_n_label_schema(selected_example_dataset, upload_file_paths, data
 
     return labels, label_schema, options, user_upload_data_dir
 
-# def job_content_dict(content):
-#     job_content = {# 'mlex_app': content['name'],
-#                    'mlex_app': 'latent_space_explorer',
-#                    'service_type': content['service_type'],
-#                    'working_directory': DATA_DIR,
-#                    'job_kwargs': {'uri': content['uri'], 
-#                                   'cmd': content['cmd'][0]}
-#     }
-#     if 'map' in content:
-#         job_content['job_kwargs']['map'] = content['map']
-    
-#     return job_content
-
 @app.callback(
     [
         # flag the read variable

From b6ee3c488838b8158760b40b430fd39364790a21 Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Fri, 22 Mar 2024 15:26:38 -0700
Subject: [PATCH 29/62] Work with FM and tiled, need minor error remove

---
 docker-compose.yml |   1 +
 src/frontend.py    | 120 ++++++++++++++++++++++++++-------------------
 2 files changed, 70 insertions(+), 51 deletions(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index 4210039..75a44f5 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -19,6 +19,7 @@ services:
     volumes:
       - ./data:/app/work/data
       - ./src:/app/work/src
+      - ../mlex_file_manager/file_manager:/app/work/src/file_manager
     ports:
       - "8070:8070"
     networks:
diff --git a/src/frontend.py b/src/frontend.py
index 7a73a08..cd3f173 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -133,15 +133,15 @@ def show_clustering_gui_layouts(selected_algo):
         Output('input_labels', 'data'),
         Output('label_schema', 'data'),
         Output('label-dropdown', 'options'),
-        Output('user-upload-data-dir', 'data'),
+        # Output('user-upload-data-dir', 'data'),
     ],
     [
         Input('example-dataset-selection', 'value'),                            # example dataset
-        Input({'base_id': 'file-manager', 'name': 'docker-file-paths'},'data'), # FM dataset
+        Input({'base_id': 'file-manager', 'name': 'data-project-dict'},'data'), # FM dataset
         Input('feature-vector-model-list', 'value'),                            # data clinic dataset
     ]
 )
-def update_data_n_label_schema(selected_example_dataset, upload_file_paths, data_clinic_file_path):
+def update_data_n_label_schema(selected_example_dataset, data_project_dict, data_clinic_file_path):
     '''
     This callback updates the selected dataset from the provided example datasets, as well as labels, and label schema
     Args:
@@ -161,14 +161,16 @@ def update_data_n_label_schema(selected_example_dataset, upload_file_paths, data
     # priority level: FileManage > DataClinic > Example Datasets
 
     # FileManager - user uploaded zip file of images
-    data_project = DataProject()
-    data_project.init_from_dict(upload_file_paths)
-    data_set = data_project.data # list of len 1920, each element is a local_dataset.LocalDataset object
+    # data_project = DataProject()
+    # data_project.init_from_dict(upload_file_paths)
+
+    data_project = DataProject.from_dict(data_project_dict)
+    data_set_len = data_project.datasets[-1].cumulative_data_count - 1 # list of len 1920, each element is a local_dataset.LocalDataset object
     options = []
-    user_upload_data_dir = None
-    if len(data_set) > 0:
-        labels = np.full((len(data_set),), -1)
-        user_upload_data_dir = os.path.dirname(upload_file_paths[0]['uri'])
+    #user_upload_data_dir = None
+    if data_set_len > 0:
+        labels = np.full((data_set_len,), -1)
+        # user_upload_data_dir = os.path.dirname(data_project_dict[0]['uri'])
     # DataClinic options
     elif data_clinic_file_path is not None:
         df = pd.read_parquet(data_clinic_file_path)
@@ -192,7 +194,7 @@ def update_data_n_label_schema(selected_example_dataset, upload_file_paths, data
     options.insert(0, {'label': 'Unlabeled', 'value': -1})
     options.insert(0, {'label': 'All', 'value': -2})
 
-    return labels, label_schema, options, user_upload_data_dir
+    return labels, label_schema, options #, user_upload_data_dir
 
 @app.callback(
     [
@@ -211,18 +213,20 @@ def update_data_n_label_schema(selected_example_dataset, upload_file_paths, data
     [
         State('job-name', 'value'),                  # job_name
         State('example-dataset-selection', 'value'), # 2 example dataset
-        State('user-upload-data-dir', 'data'),       # FM
+        # State('user-upload-data-dir', 'data'),       # FM
         State('feature-vector-model-list', 'value'), # DataClinic
         State('model_id', 'data'),
         State('algo-dropdown', 'value'),
         State('additional-model-params', 'children'),
+        State({"base_id": "file-manager", "name": "data-project-dict"}, "data") # DataProject for FM
+       
     ],
     prevent_initial_call=True
 )
 def submit_dimension_reduction_job(submit_n_clicks,
                                    job_name,
-                                   selected_example_dataset, user_upload_data_dir, data_clinic_file_path, 
-                                   model_id, selected_algo, children):
+                                   selected_example_dataset, data_clinic_file_path, 
+                                   model_id, selected_algo, children, data_project_dict):
     """
     This callback is triggered every time the Submit button is hit:
         - compute latent vectors, which will be saved in data/output/experiment_id
@@ -245,7 +249,7 @@ def submit_dimension_reduction_job(submit_n_clicks,
     """
     if not submit_n_clicks:
         raise PreventUpdate
-    if not selected_example_dataset and not user_upload_data_dir and not data_clinic_file_path:
+    if not selected_example_dataset and not data_project_dict and not data_clinic_file_path:
         raise PreventUpdate
 
     input_params = {}
@@ -254,28 +258,41 @@ def submit_dimension_reduction_job(submit_n_clicks,
             key   = child["props"]["children"][1]["props"]["id"]["param_key"]
             value = child["props"]["children"][1]["props"]["value"]
             input_params[key] = value
-    print("Dimension reduction algo params: ", input_params)
+    print("Dimension reduction algo params: ", input_params, flush=True)
     
     # check if user is using user uploaded zip file or example dataset or data clinic file 
-    if user_upload_data_dir is not None:
-        selected_dataset = user_upload_data_dir
-    elif data_clinic_file_path is not None:
-        selected_dataset = data_clinic_file_path
+    data_project = DataProject.from_dict(data_project_dict)
+    if len(data_project.datasets) > 0:
+        print("FMM", flush=True)
+        data_project = DataProject.from_dict(data_project_dict)
+        io_parameters = {"data_uris": [dataset.uri for dataset in data_project.datasets], 
+                         "data_tiled_api_key": data_project.api_key,
+                         "data_type": data_project.data_type,
+                         "root_uri": data_project.root_uri,
+                         }
+
+    # elif data_clinic_file_path is not None:
+    #     selected_dataset = data_clinic_file_path
     else:
-        selected_dataset = selected_example_dataset
-    print(selected_dataset)
+        print("selected_example_dataset: " + selected_example_dataset, flush=True)
+        io_parameters = {"data_uris": [selected_example_dataset], 
+                         "data_tiled_api_key": None,
+                         "data_type": "file",
+                         "root_uri": None,
+                         }
 
     # prefect
     current_time = datetime.now(pytz.timezone(TIMEZONE)).strftime("%Y/%m/%d %H:%M:%S")
     if not job_name: job_name = "test0"
-    # job_name += " " + str(current_time)
-    project_name = selected_dataset.split("/")[-1] # name of the dataset, get it from FM
+    job_name += " " + str(current_time)
+    # project_name = selected_dataset.split("/")[-1] # name of the dataset, get it from FM ## this is an issue
+    project_name = "fake_name"
     print(PREFECT_TAGS, flush=True)
     
     # create user directory to store users data/experiments
-    experiment_id = experiment_id = str(uuid.uuid4())
-    output_path = OUTPUT_DIR / experiment_id
-    output_path.mkdir(parents=True, exist_ok=True)
+    # experiment_id = str(uuid.uuid4())
+    # output_path = OUTPUT_DIR / experiment_id
+    # output_path.mkdir(parents=True, exist_ok=True)
 
     # check which dimension reduction algo, then compose command
     if selected_algo == 'PCA':
@@ -283,12 +300,13 @@ def submit_dimension_reduction_job(submit_n_clicks,
     elif selected_algo == 'UMAP':
         TRAIN_PARAMS_EXAMPLE["params_list"][0]["command"] = "python umap_run.py"
     
-    TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["io_parameters"]["images_dir"] = selected_dataset
-    TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["io_parameters"]["output_dir"] = str(output_path)
+    TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["io_parameters"] = io_parameters
+    TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["io_parameters"]["output_dir"] = str(OUTPUT_DIR)
+    TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["io_parameters"]["uid_save"] = ""
     TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["model_parameters"] = input_params
     print(TRAIN_PARAMS_EXAMPLE)
 
-    # run prefect job, job_uid is the new experiment id
+    # run prefect job, job_uid is the new experiment id -> uid_save in the pca_example.yaml file
     job_uid = schedule_prefect_flow(
                     FLOW_NAME,
                     parameters=TRAIN_PARAMS_EXAMPLE,
@@ -298,7 +316,7 @@ def submit_dimension_reduction_job(submit_n_clicks,
     job_message = f"Job has been succesfully submitted with uid: {job_uid}."
     print(job_message, flush=True)
     
-    return experiment_id, 'cluster', -1, -2, go.Figure(go.Heatmap()), -1
+    return job_uid, 'cluster', -1, -2, go.Figure(go.Heatmap()), -1
 
 @app.callback(
     [   
@@ -325,9 +343,13 @@ def read_latent_vectors(n_intervals, experiment_id, max_intervals):
     """
     if experiment_id is None or n_intervals == 0 or max_intervals == 0:
         raise PreventUpdate
+    
+    children_flows = get_children_flow_run_ids(experiment_id)
+    print("child flow")
+    print(children_flows)
 
     #read the latent vectors from the output dir
-    output_path = OUTPUT_DIR / experiment_id
+    output_path = OUTPUT_DIR / children_flows[0]
     npz_files = list(output_path.glob('*.npy'))
     if len(npz_files) > 0 :
         lv_filepath = npz_files[0] # latent vector file path
@@ -494,16 +516,16 @@ def update_scatter_plot(latent_vectors, selected_cluster, selected_label, scatte
         Input('mean-std-toggle', 'value'),
     ],
     [
-        State('example-dataset-selection', 'value'),                            # example dataset
-        State({'base_id': 'file-manager', 'name': 'docker-file-paths'},'data'), # FM dataset
-        State('feature-vector-model-list', 'value'),                            # data clinic dataset
+        State('example-dataset-selection', 'value'),                             # example dataset
+        State({"base_id": "file-manager", "name": "data-project-dict"}, "data"), # DataProject for FM
+        State('feature-vector-model-list', 'value'),                             # data clinic dataset
 
     ],
     prevent_initial_call=True
 
 )
 def update_heatmap(click_data, selected_data, display_option,
-                   selected_example_dataset, upload_file_paths, data_clinic_file_path):
+                   selected_example_dataset, data_project_dict, data_clinic_file_path):
     '''
     This callback update the heatmap
     Args:
@@ -513,7 +535,7 @@ def update_heatmap(click_data, selected_data, display_option,
     Returns:
         fig:                updated heatmap
     '''
-    if not selected_example_dataset and not upload_file_paths and not data_clinic_file_path:
+    if not selected_example_dataset and not data_project_dict and not data_clinic_file_path:
         raise PreventUpdate
     
     # user select a group of points
@@ -524,21 +546,18 @@ def update_heatmap(click_data, selected_data, display_option,
         ### FileManager
         # print("upload_file_paths") # if not selected, its an empty list not None
         selected_images = []
-        data_project = DataProject()
-        data_project.init_from_dict(upload_file_paths)
-        data_set = data_project.data
-        if len(data_set) > 0:
+
+        data_project = DataProject.from_dict(data_project_dict)
+        data_set_len = data_project.datasets[-1].cumulative_data_count - 1
+        if data_set_len > 0:
             print("FM file")
-            for i in selected_indices:
-                image, uri = data_project.data[i].read_data(export='pillow')
-                selected_images.append(np.array(image))
+            selected_images, _ = data_project.read(selected_indices, export='pillow')
         ### DataClinic
         elif data_clinic_file_path is not None:
             print("data_clinic_file_path")
             print(data_clinic_file_path)
             directory_path = os.path.dirname(data_clinic_file_path)
             selected_images = load_images_by_indices(directory_path, selected_indices)
-
         ### Example dataset
         elif selected_example_dataset == "data/example_shapes/Demoshapes.npz":
             print("Demoshapes.npz")
@@ -559,13 +578,12 @@ def update_heatmap(click_data, selected_data, display_option,
             heatmap_data = go.Heatmap(z=np.std(selected_images, axis=0))
 
     elif click_data is not None and len(click_data['points']) > 0:
-        selected_index = click_data['points'][0]['customdata'][0]  # click_data['points'][0]['pointIndex']
+        selected_index = click_data['points'][0]['customdata'][0]
         ### FileManager
-        data_project = DataProject()
-        data_project.init_from_dict(upload_file_paths)
-        data_set = data_project.data
-        if len(data_set) > 0:
-            clicked_image, uri = data_project.data[selected_index].read_data(export='pillow')
+        data_project = DataProject.from_dict(data_project_dict)
+        data_set_len = data_project.datasets[-1].cumulative_data_count - 1
+        if data_set_len > 0:
+            selected_images, _ = data_project.read([selected_index], export='pillow')
         ### DataClinic
         elif data_clinic_file_path is not None:
             directory_path = os.path.dirname(data_clinic_file_path)

From e9b7c223aeb639e0ef4c5fbc17d04c170d55a32e Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Fri, 22 Mar 2024 16:51:58 -0700
Subject: [PATCH 30/62] Remove wrong dataset length method

---
 src/frontend.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/src/frontend.py b/src/frontend.py
index cd3f173..9c8a820 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -165,11 +165,10 @@ def update_data_n_label_schema(selected_example_dataset, data_project_dict, data
     # data_project.init_from_dict(upload_file_paths)
 
     data_project = DataProject.from_dict(data_project_dict)
-    data_set_len = data_project.datasets[-1].cumulative_data_count - 1 # list of len 1920, each element is a local_dataset.LocalDataset object
     options = []
     #user_upload_data_dir = None
-    if data_set_len > 0:
-        labels = np.full((data_set_len,), -1)
+    if len(data_project.datasets) > 0:
+        labels = np.full((len(data_project.datasets),), -1)
         # user_upload_data_dir = os.path.dirname(data_project_dict[0]['uri'])
     # DataClinic options
     elif data_clinic_file_path is not None:
@@ -263,7 +262,7 @@ def submit_dimension_reduction_job(submit_n_clicks,
     # check if user is using user uploaded zip file or example dataset or data clinic file 
     data_project = DataProject.from_dict(data_project_dict)
     if len(data_project.datasets) > 0:
-        print("FMM", flush=True)
+        print("FM", flush=True)
         data_project = DataProject.from_dict(data_project_dict)
         io_parameters = {"data_uris": [dataset.uri for dataset in data_project.datasets], 
                          "data_tiled_api_key": data_project.api_key,
@@ -548,8 +547,7 @@ def update_heatmap(click_data, selected_data, display_option,
         selected_images = []
 
         data_project = DataProject.from_dict(data_project_dict)
-        data_set_len = data_project.datasets[-1].cumulative_data_count - 1
-        if data_set_len > 0:
+        if len(data_project.datasets) > 0:
             print("FM file")
             selected_images, _ = data_project.read(selected_indices, export='pillow')
         ### DataClinic
@@ -581,8 +579,7 @@ def update_heatmap(click_data, selected_data, display_option,
         selected_index = click_data['points'][0]['customdata'][0]
         ### FileManager
         data_project = DataProject.from_dict(data_project_dict)
-        data_set_len = data_project.datasets[-1].cumulative_data_count - 1
-        if data_set_len > 0:
+        if len(data_project.datasets) > 0:
             selected_images, _ = data_project.read([selected_index], export='pillow')
         ### DataClinic
         elif data_clinic_file_path is not None:

From 65c8917160e21072dddcfe91bf39ddd758fa57cd Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Fri, 22 Mar 2024 16:53:00 -0700
Subject: [PATCH 31/62] Remove alert for now

---
 src/frontend.py | 72 ++++++++++++++++++++++++-------------------------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/src/frontend.py b/src/frontend.py
index 9c8a820..b41994a 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -671,44 +671,44 @@ def update_statistics(selected_data, clusters, assigned_labels, label_names):
         f"Labels represented: {labels_str}",
     ]
 
-@app.callback(
-    [Output("modal", "is_open"), Output("modal-body", "children")],
-    [
-        Input('run-algo', 'n_clicks'), 
-        Input('run-cluster-algo', 'n_clicks'),
-    ],
-    [
-        State("modal", "is_open"), 
-        State('example-dataset-selection', 'value'),
-        State('user-upload-data-dir', 'data'),
-        State('feature-vector-model-list', 'value'),
-    ]
-)
-def toggle_modal(n_submit, n_apply,
-                 is_open, selected_example_dataset, user_upload_data_dir, data_clinic_file_path):
-    '''
-    This callback pop up a window to remind user to follow this flow: 
-        select dataset -> Submit dimension reduction job -> Apply clustering
-    Args:
-        n_submit (int):     Number of clicks on the 'Submit' button.
-        n_apply (int):      Number of clicks on the 'Apply' button.
-        is_open (bool):     Current state of the modal window (open/closed).
-        input_data (list):         User selected data
-    Returns:
-        is_open (bool):     New state of the modal window.
-        modal_body_text (str): Text to be displayed in the modal body.
-    '''
-    at_least_one_dataset_selected = False
-    if selected_example_dataset or user_upload_data_dir or data_clinic_file_path:
-        at_least_one_dataset_selected = True
+# @app.callback(
+#     [Output("modal", "is_open"), Output("modal-body", "children")],
+#     [
+#         Input('run-algo', 'n_clicks'), 
+#         Input('run-cluster-algo', 'n_clicks'),
+#     ],
+#     [
+#         State("modal", "is_open"), 
+#         State('example-dataset-selection', 'value'),
+#         State('user-upload-data-dir', 'data'),
+#         State('feature-vector-model-list', 'value'),
+#     ]
+# )
+# def toggle_modal(n_submit, n_apply,
+#                  is_open, selected_example_dataset, user_upload_data_dir, data_clinic_file_path):
+#     '''
+#     This callback pop up a window to remind user to follow this flow: 
+#         select dataset -> Submit dimension reduction job -> Apply clustering
+#     Args:
+#         n_submit (int):     Number of clicks on the 'Submit' button.
+#         n_apply (int):      Number of clicks on the 'Apply' button.
+#         is_open (bool):     Current state of the modal window (open/closed).
+#         input_data (list):         User selected data
+#     Returns:
+#         is_open (bool):     New state of the modal window.
+#         modal_body_text (str): Text to be displayed in the modal body.
+#     '''
+#     at_least_one_dataset_selected = False
+#     if selected_example_dataset or user_upload_data_dir or data_clinic_file_path:
+#         at_least_one_dataset_selected = True
     
-    if ((n_submit and not at_least_one_dataset_selected) or
-        (n_apply and not at_least_one_dataset_selected)):
-        return True, "Please select an example dataset or upload your own zipped dataset or choose DataClinic outpu."
-    elif n_apply and n_submit is None:
-        return True, "Please select a dimension reduction algorithm and click 'Submit' button before clustering."
+#     if ((n_submit and not at_least_one_dataset_selected) or
+#         (n_apply and not at_least_one_dataset_selected)):
+#         return True, "Please select an example dataset or upload your own zipped dataset or choose DataClinic outpu."
+#     elif n_apply and n_submit is None:
+#         return True, "Please select a dimension reduction algorithm and click 'Submit' button before clustering."
             
-    return False, "No alert."
+#     return False, "No alert."
 
 
 # @app.callback(

From 44f85ad324643fea738c130dc42f77b67e4be553 Mon Sep 17 00:00:00 2001
From: taxe10 <tanchavez@lbl.gov>
Date: Sun, 24 Mar 2024 11:59:15 -0700
Subject: [PATCH 32/62] formatting

---
 src/app_layout.py | 428 ++++++++++++++++++++++++++--------------------
 1 file changed, 239 insertions(+), 189 deletions(-)

diff --git a/src/app_layout.py b/src/app_layout.py
index 2aac91e..702128d 100644
--- a/src/app_layout.py
+++ b/src/app_layout.py
@@ -1,47 +1,55 @@
-from dash import Dash, html, dcc
+import pathlib
+
 import dash_bootstrap_components as dbc
-from dash_iconify import DashIconify
-from dash.long_callback import DiskcacheLongCallbackManager
-import plotly.graph_objects as go
 import dash_uploader as du
 import diskcache
-import pathlib
-import os
+import plotly.graph_objects as go
+from dash import Dash, dcc, html
+from dash.long_callback import DiskcacheLongCallbackManager
+from dash_iconify import DashIconify
+from file_manager.main import FileManager
 
 import templates
-from file_manager.main import FileManager
 
-### GLOBAL VARIABLES
-ALGORITHM_DATABASE = {"PCA": "PCA", "UMAP": "UMAP",}
-CLUSTER_ALGORITHM_DATABASE = {"KMeans": "KMeans", "DBSCAN": "DBSCAN", "HDBSCAN": "HDBSCAN"}
+# GLOBAL VARIABLES
+ALGORITHM_DATABASE = {
+    "PCA": "PCA",
+    "UMAP": "UMAP",
+}
+CLUSTER_ALGORITHM_DATABASE = {
+    "KMeans": "KMeans",
+    "DBSCAN": "DBSCAN",
+    "HDBSCAN": "HDBSCAN",
+}
 
 DATA_OPTION = [
     {"label": "Synthetic Shapes", "value": "data/example_shapes/Demoshapes.npz"},
-    {"label": "Latent representations from encoder-decoder model", "value": "data/example_latentrepresentation/f_vectors.parquet"}
+    {
+        "label": "Latent representations from encoder-decoder model",
+        "value": "data/example_latentrepresentation/f_vectors.parquet",
+    },
 ]
-DOCKER_DATA = pathlib.Path.home() / 'data'  #/app/work/data
-UPLOAD_FOLDER_ROOT = DOCKER_DATA / 'upload' #/app/work/data/upload
-
-# DATA_CLINIC_OPTION = 
+DOCKER_DATA = pathlib.Path.home() / "data"  # /app/work/data
+UPLOAD_FOLDER_ROOT = DOCKER_DATA / "upload"  # /app/work/data/upload
 
-#### SETUP DASH APP ####
+# SETUP DASH APP
 cache = diskcache.Cache("./cache")
 long_callback_manager = DiskcacheLongCallbackManager(cache)
 external_stylesheets = [dbc.themes.BOOTSTRAP, "../assets/segmentation-style.css"]
-app = Dash(__name__, 
-           external_stylesheets=external_stylesheets, 
-           suppress_callback_exceptions=True,
-           long_callback_manager=long_callback_manager)
+app = Dash(
+    __name__,
+    external_stylesheets=external_stylesheets,
+    suppress_callback_exceptions=True,
+    long_callback_manager=long_callback_manager,
+)
 
 server = app.server
 
-dash_file_explorer = FileManager(DOCKER_DATA, 
-                                 UPLOAD_FOLDER_ROOT,
-                                 open_explorer=False)
+dash_file_explorer = FileManager(DOCKER_DATA, UPLOAD_FOLDER_ROOT, open_explorer=False)
 dash_file_explorer.init_callbacks(app)
 du.configure_upload(app, UPLOAD_FOLDER_ROOT, use_upload_id=False)
 
-#### BEGIN DASH CODE ####
+# BEGIN DASH CODE
 header = templates.header()
 # right panel: uploader, scatter plot, individual image  plot
 image_panel = [
@@ -49,72 +57,72 @@
         id="image-card",
         children=[
             dbc.CardHeader(
-                [   
-                    dbc.Label('Upload your own zipped dataset', className='mr-2'),
+                [
+                    dbc.Label("Upload your own zipped dataset", className="mr-2"),
                     dash_file_explorer.file_explorer,
-                    dbc.Label('Or select Data Clinic modal', className='mr-2'),
+                    dbc.Label("Or select Data Clinic modal", className="mr-2"),
                     dcc.Dropdown(
-                        id='feature-vector-model-list',
+                        id="feature-vector-model-list",
                         clearable=False,
-                        style={'margin-bottom': '1rem'}
+                        style={"margin-bottom": "1rem"},
                     ),
-                    dbc.Label('Or try Example Dataset', className='mr-2'),
+                    dbc.Label("Or try Example Dataset", className="mr-2"),
                     dcc.Dropdown(
-                        id='example-dataset-selection',
+                        id="example-dataset-selection",
                         options=DATA_OPTION,
-                        #value = DATA_OPTION[0]['value'],
                         clearable=False,
-                        style={'margin-bottom': '1rem'}
+                        style={"margin-bottom": "1rem"},
                     ),
                 ]
             ),
             dbc.CardBody(
                 dcc.Graph(
                     id="scatter",
-                    figure=go.Figure(go.Scattergl(mode='markers')),
+                    figure=go.Figure(go.Scattergl(mode="markers")),
                 )
             ),
-            dbc.CardFooter(
-                dcc.Graph(
-                    id="heatmap",
-                    figure=go.Figure(go.Heatmap())
-                )
-            )
-        ]
+            dbc.CardFooter(dcc.Graph(id="heatmap", figure=go.Figure(go.Heatmap()))),
+        ],
     )
 ]
 
 # left panel: choose algorithms, submit job, choose scatter plot attributes, and statistics...
 algo_panel = html.Div(
-    [dbc.Card(
-        id="algo-card",
-        style={"width": "100%"},
-        children=[
-            dbc.Collapse(children=[
-                dbc.CardHeader("Select Dimension Reduction Algorithms"),
-                dbc.CardBody(
-                    [
-                                dbc.Label("Algorithm", className='mr-2'),
-                                dcc.Dropdown(id="algo-dropdown",
-                                                options=[{"label": entry, "value": entry} for entry in ALGORITHM_DATABASE],
-                                                style={'min-width': '250px'},
-                                                value='PCA',
-                                                ),
-                                html.Div(id='additional-model-params'),
+    [
+        dbc.Card(
+            id="algo-card",
+            style={"width": "100%"},
+            children=[
+                dbc.Collapse(
+                    children=[
+                        dbc.CardHeader("Select Dimension Reduction Algorithms"),
+                        dbc.CardBody(
+                            [
+                                dbc.Label("Algorithm", className="mr-2"),
+                                dcc.Dropdown(
+                                    id="algo-dropdown",
+                                    options=[
+                                        {"label": entry, "value": entry}
+                                        for entry in ALGORITHM_DATABASE
+                                    ],
+                                    style={"min-width": "250px"},
+                                    value="PCA",
+                                ),
+                                html.Div(id="additional-model-params"),
                                 html.Hr(),
                                 html.Div(
                                     [
-                                        dbc.Label('Name your job', className='mr-2'),
+                                        dbc.Label("Name your job", className="mr-2"),
                                         dcc.Input(
                                             id="job-name",
                                             placeholder="test0",
-                                            style={'width':'100%'}
+                                            style={"width": "100%"},
                                         ),
                                     ]
                                 ),
                                 html.Hr(),
                                 html.Div(
-                                    [   
+                                    [
                                         dbc.Button(
                                             "Submit",
                                             color="secondary",
@@ -122,22 +130,25 @@
                                             outline=True,
                                             size="lg",
                                             className="m-1",
-                                            style={'width':'50%'}
+                                            style={"width": "50%"},
                                         ),
                                     ],
-                                    className='row',
-                                    style={'align-items': 'center', 'justify-content': 'center'}
+                                    className="row",
+                                    style={
+                                        "align-items": "center",
+                                        "justify-content": "center",
+                                    },
                                 ),
-                                html.Div(id='invisible-apply-div')
-                    ]
+                                html.Div(id="invisible-apply-div"),
+                            ]
+                        ),
+                    ],
+                    id="model-collapse",
+                    is_open=True,
+                    style={"margin-bottom": "0rem"},
                 )
             ],
-            id="model-collapse",
-            is_open=True,
-            style = {'margin-bottom': '0rem'}
-            )
-        ]
-    )
+        )
     ]
 )
 
@@ -147,116 +158,149 @@
             id="cluster-algo-card",
             style={"width": "100%"},
             children=[
-                dbc.Collapse(children=[
+                dbc.Collapse(
+                    children=[
                         dbc.CardHeader("Select Clustering Algorithms"),
-                        dbc.CardBody([
-                            dbc.Label("Algorithm", className='mr-2'),
-                            dcc.Dropdown(id="cluster-algo-dropdown",
-                                            options=[{"label": entry, "value": entry} for entry in CLUSTER_ALGORITHM_DATABASE],
-                                            style={'min-width': '250px'},
-                                            value='DBSCAN',
-                                            ),
-                            html.Div(id='additional-cluster-params'),
-                            html.Hr(),
-                            html.Div(
-                                [
-                                    dbc.Button(
-                                        "Apply",
-                                        color="secondary",
-                                        id="run-cluster-algo",
-                                        outline=True,
-                                        size="lg",
-                                        className="m-1",
-                                        style={'width':'50%'}
-                                    ),
-                                ],
-                                className='row',
-                                style={'align-items': 'center', 'justify-content': 'center'}
-                            ),
-                            html.Div(id='invisible-submit-div')
-                        ]
-
-                        )
+                        dbc.CardBody(
+                            [
+                                dbc.Label("Algorithm", className="mr-2"),
+                                dcc.Dropdown(
+                                    id="cluster-algo-dropdown",
+                                    options=[
+                                        {"label": entry, "value": entry}
+                                        for entry in CLUSTER_ALGORITHM_DATABASE
+                                    ],
+                                    style={"min-width": "250px"},
+                                    value="DBSCAN",
+                                ),
+                                html.Div(id="additional-cluster-params"),
+                                html.Hr(),
+                                html.Div(
+                                    [
+                                        dbc.Button(
+                                            "Apply",
+                                            color="secondary",
+                                            id="run-cluster-algo",
+                                            outline=True,
+                                            size="lg",
+                                            className="m-1",
+                                            style={"width": "50%"},
+                                        ),
+                                    ],
+                                    className="row",
+                                    style={
+                                        "align-items": "center",
+                                        "justify-content": "center",
+                                    },
+                                ),
+                                html.Div(id="invisible-submit-div"),
+                            ]
+                        ),
                     ],
-                id="cluster-model-collapse",
-                is_open=True,
-                style = {'margin-bottom': '0rem'}
+                    id="cluster-model-collapse",
+                    is_open=True,
+                    style={"margin-bottom": "0rem"},
                 )
-            ]
+            ],
         )
     ]
 )
 
-scatter_control_panel =  html.Div(
-    [dbc.Card(
-        style={"width": "100%"},
-        children=[
-            dbc.CardHeader("Scatter Plot Control Panel"),
-            dbc.CardBody([
-                        dbc.Label('Scatter Colors', className='mr-3'),
-                        dcc.RadioItems(id='scatter-color',
-                                        options=[
-                                            {'label': 'cluster', 'value': 'cluster'},
-                                            {'label': 'label', 'value': 'label'}
-                                            ],
-                                        value = 'cluster',
-                                        style={'min-width': '250px'},
-                                        className='mb-2'),
-                        dbc.Label("Select cluster", className='mr-3'),
-                        dcc.Dropdown(id='cluster-dropdown',
-                                        value=-1,
-                                        style={'min-width': '250px'},
-                                        className='mb-2'),
-                        dbc.Label("Select label", className='mr-3'),
-                        dcc.Dropdown(id='label-dropdown',
-                                        value=-2,
-                                        style={'min-width': '250px'},
-                                        )
-            ])
-        ]
-    ),
-    dcc.Interval(
-        id='interval-component',
-        interval=3000, # in milliseconds
-        max_intervals=-1,  # keep triggering indefinitely, None
-        n_intervals=0,
-    ),
+scatter_control_panel = html.Div(
+    [
+        dbc.Card(
+            style={"width": "100%"},
+            children=[
+                dbc.CardHeader("Scatter Plot Control Panel"),
+                dbc.CardBody(
+                    [
+                        dbc.Label("Scatter Colors", className="mr-3"),
+                        dcc.RadioItems(
+                            id="scatter-color",
+                            options=[
+                                {"label": "cluster", "value": "cluster"},
+                                {"label": "label", "value": "label"},
+                            ],
+                            value="cluster",
+                            style={"min-width": "250px"},
+                            className="mb-2",
+                        ),
+                        dbc.Label("Select cluster", className="mr-3"),
+                        dcc.Dropdown(
+                            id="cluster-dropdown",
+                            value=-1,
+                            style={"min-width": "250px"},
+                            className="mb-2",
+                        ),
+                        dbc.Label("Select label", className="mr-3"),
+                        dcc.Dropdown(
+                            id="label-dropdown",
+                            value=-2,
+                            style={"min-width": "250px"},
+                        ),
+                    ]
+                ),
+            ],
+        ),
+        dcc.Interval(
+            id="interval-component",
+            interval=3000,  # in milliseconds
+            max_intervals=-1,  # keep triggering indefinitely, None
+            n_intervals=0,
+        ),
     ]
 )
 
-heatmap_control_panel =  html.Div(
-    [dbc.Card(
-        style={"width": "100%"},
-        children=[
-            dbc.CardHeader("Heatmap Control Panel"),
-            dbc.CardBody([ 
-                            dbc.Label([
-                                    'Select a Group of Points using ',
-                                    html.Span(html.I(DashIconify(icon="lucide:lasso")), className='icon'),
-                                    ' or ',
-                                    html.Span(html.I(DashIconify(icon="lucide:box-select")), className='icon'),
-                                    ' Tools :'
-                                    ], 
-                                    className='mb-3'),
-                            dbc.Label(id='stats-div', children=[
-                                   'Number of images selected: 0',
-                                   html.Br(),
-                                   'Clusters represented: N/A',
-                                   html.Br(),
-                                   'Labels represented: N/A',
-                                ]),
-                            dbc.Label('Display Image Options', className='mr-3'),
-                            dcc.RadioItems(id='mean-std-toggle',
-                                           options=[
-                                               {'label': 'Mean', 'value': 'mean'},
-                                                {'label': 'Standard Deviation', 'value': 'sigma'}
-                                                ],
-                                           value = 'mean',
-                                           style={'min-width': '250px'},
-                                           className='mb-2'),
-            ])
-        ]
-    )]
+heatmap_control_panel = html.Div(
+    [
+        dbc.Card(
+            style={"width": "100%"},
+            children=[
+                dbc.CardHeader("Heatmap Control Panel"),
+                dbc.CardBody(
+                    [
+                        dbc.Label(
+                            [
+                                "Select a Group of Points using ",
+                                html.Span(
+                                    html.I(DashIconify(icon="lucide:lasso")),
+                                    className="icon",
+                                ),
+                                " or ",
+                                html.Span(
+                                    html.I(DashIconify(icon="lucide:box-select")),
+                                    className="icon",
+                                ),
+                                " Tools :",
+                            ],
+                            className="mb-3",
+                        ),
+                        dbc.Label(
+                            id="stats-div",
+                            children=[
+                                "Number of images selected: 0",
+                                html.Br(),
+                                "Clusters represented: N/A",
+                                html.Br(),
+                                "Labels represented: N/A",
+                            ],
+                        ),
+                        dbc.Label("Display Image Options", className="mr-3"),
+                        dcc.RadioItems(
+                            id="mean-std-toggle",
+                            options=[
+                                {"label": "Mean", "value": "mean"},
+                                {"label": "Standard Deviation", "value": "sigma"},
+                            ],
+                            value="mean",
+                            style={"min-width": "250px"},
+                            className="mb-2",
+                        ),
+                    ]
+                ),
+            ],
+        )
+    ]
 )
 
 # add alert pop up window
@@ -274,7 +318,13 @@
 )
 
 
-control_panel = [algo_panel, cluster_algo_panel, scatter_control_panel, heatmap_control_panel, modal]
+control_panel = [
+    algo_panel,
+    cluster_algo_panel,
+    scatter_control_panel,
+    heatmap_control_panel,
+    modal,
+]
 
 
 # metadata
@@ -283,34 +333,34 @@
         id="no-display",
         children=[
             # Store for user created contents
-            dcc.Store(id='image-length', data=0),
-            dcc.Store(id='user-upload-data-dir', data=None),
-            dcc.Store(id='dataset-options', data=DATA_OPTION),
-            dcc.Store(id='run-counter', data=0),
-            dcc.Store(id='experiment-id', data=None),
+            dcc.Store(id="image-length", data=0),
+            dcc.Store(id="user-upload-data-dir", data=None),
+            dcc.Store(id="dataset-options", data=DATA_OPTION),
+            dcc.Store(id="run-counter", data=0),
+            dcc.Store(id="experiment-id", data=None),
             # data_label_schema, latent vectors, clusters
-            dcc.Store(id='input_labels', data=None),
-            dcc.Store(id='label_schema', data=None),
-            dcc.Store(id='model_id', data=None),
-            dcc.Store(id='latent_vectors', data=None),
-            dcc.Store(id='clusters', data=None),
+            dcc.Store(id="input_labels", data=None),
+            dcc.Store(id="label_schema", data=None),
+            dcc.Store(id="model_id", data=None),
+            dcc.Store(id="latent_vectors", data=None),
+            dcc.Store(id="clusters", data=None),
         ],
     )
 ]
 
 
-##### DEFINE LAYOUT ####
+# DEFINE LAYOUT
 app.layout = html.Div(
     [
-        header, 
+        header,
         dbc.Container(
-            children = [
-                dbc.Row([ dbc.Col(control_panel, width=4), 
-                         dbc.Col(image_panel, width=7)
-                        ]),
+            children=[
+                dbc.Row(
+                    [dbc.Col(control_panel, width=4), dbc.Col(image_panel, width=7)]
+                ),
                 dbc.Row(dbc.Col(meta)),
             ]
         ),
-        modal
+        modal,
     ]
-)
\ No newline at end of file
+)

From 28a5b1021d391425094bc9bee38186426c7ee1a1 Mon Sep 17 00:00:00 2001
From: taxe10 <tanchavez@lbl.gov>
Date: Sun, 24 Mar 2024 12:04:50 -0700
Subject: [PATCH 33/62] formatting

---
 src/frontend.py | 681 ++++++++++++++++++++++++------------------------
 1 file changed, 344 insertions(+), 337 deletions(-)

diff --git a/src/frontend.py b/src/frontend.py
index b41994a..d3d470f 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -1,38 +1,36 @@
-import dash
-from dash import html, Input, Output, State
-from dash.exceptions import PreventUpdate
-import plotly.graph_objects as go
-import pandas as pd
-import numpy as np
-from sklearn.cluster import MiniBatchKMeans, DBSCAN, HDBSCAN
-import pathlib
 import json
-import uuid
-import requests
 import os
-import requests
-import pytz
+import pathlib
+import uuid
 from datetime import datetime
 
-
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as go
+import pytz
+import requests
+from dash import Input, Output, State, html
+from dash.exceptions import PreventUpdate
 from file_manager.data_project import DataProject
+from sklearn.cluster import DBSCAN, HDBSCAN, MiniBatchKMeans
 
-from app_layout import app, DOCKER_DATA, UPLOAD_FOLDER_ROOT
-from latentxp_utils import kmeans_kwargs, dbscan_kwargs, hdbscan_kwargs, hex_to_rgba, generate_scatter_data, remove_key_from_dict_list, get_content, get_trained_models_list, load_images_by_indices
+from app_layout import app
 from dash_component_editor import JSONParameterEditor
-
-from utils_prefect import (
-    get_children_flow_run_ids,
-    get_flow_run_name,
-    get_flow_runs_by_name,
-    schedule_prefect_flow,
+from latentxp_utils import (
+    dbscan_kwargs,
+    generate_scatter_data,
+    hdbscan_kwargs,
+    hex_to_rgba,
+    kmeans_kwargs,
+    load_images_by_indices,
+    remove_key_from_dict_list,
 )
+from utils_prefect import get_children_flow_run_ids, schedule_prefect_flow
 
-
-#### GLOBAL PARAMS ####
-DATA_DIR = str(os.environ['DATA_DIR'])
-USER = 'admin' #'mlexchange-team' # move to env file
-OUTPUT_DIR = pathlib.Path('data/mlexchange_store/' + USER)
+# GLOBAL PARAMS
+DATA_DIR = str(os.environ["DATA_DIR"])
+USER = "admin"  # 'mlexchange-team' move to env file
+OUTPUT_DIR = pathlib.Path("data/mlexchange_store/" + USER)
 UPLOAD_FOLDER_ROOT = "data/upload"
 PREFECT_TAGS = json.loads(os.getenv("PREFECT_TAGS", '["latent-space-explorer"]'))
 TIMEZONE = os.getenv("TIMEZONE", "US/Pacific")
@@ -71,78 +69,86 @@
 }
 
 
-
 @app.callback(
-    Output('additional-model-params', 'children'),
-    Output('model_id', 'data'),
-    Input('algo-dropdown', 'value')
+    Output("additional-model-params", "children"),
+    Output("model_id", "data"),
+    Input("algo-dropdown", "value"),
 )
 def show_dimension_reduction_gui_layouts(selected_algo):
-    '''
+    """
     This callback display dropdown menu in the frontend for different dimension reduction algos
     Args:
         selected_algo:      Selected dimension reduction algorithm
     Returns:
         item_list:          dropdown menu html code
         model_uid:          selected algo's uid
-    '''
-    data = requests.get('http://content-api:8000/api/v0/models').json() # all model
-   
-    if selected_algo == 'PCA':
-        conditions = {'name': 'PCA'}
-    elif selected_algo == 'UMAP':
-        conditions = {'name': 'UMAP'}
-    
-    model = [d for d in data if all((k in d and d[k] == v) for k, v in conditions.items())] # filter pca or umap
-    model_uid = model[0]['content_id']
-    new_model = remove_key_from_dict_list(model[0]["gui_parameters"], 'comp_group')
-
-    item_list = JSONParameterEditor(_id={'type': str(uuid.uuid4())},
-                                    json_blob=new_model,
+    """
+    data = requests.get("http://content-api:8000/api/v0/models").json()  # all model
+
+    if selected_algo == "PCA":
+        conditions = {"name": "PCA"}
+    elif selected_algo == "UMAP":
+        conditions = {"name": "UMAP"}
+
+    model = [
+        d for d in data if all((k in d and d[k] == v) for k, v in conditions.items())
+    ]  # filter pca or umap
+    model_uid = model[0]["content_id"]
+    new_model = remove_key_from_dict_list(model[0]["gui_parameters"], "comp_group")
+
+    item_list = JSONParameterEditor(
+        _id={"type": str(uuid.uuid4())},
+        json_blob=new_model,
     )
     item_list.init_callbacks(app)
-        
+
     return item_list, model_uid
 
+
 @app.callback(
-    Output('additional-cluster-params', 'children'),
-    Input('cluster-algo-dropdown', 'value'),
+    Output("additional-cluster-params", "children"),
+    Input("cluster-algo-dropdown", "value"),
 )
 def show_clustering_gui_layouts(selected_algo):
-    '''
+    """
     This callback display drop down menu in the fronend  for different clustering algos
     Args:
         selected_algo:      selected clustering algorithm
     Returns:
         item_list:          dropdown menu html code
-    '''
-    if selected_algo == 'KMeans':
+    """
+    if selected_algo == "KMeans":
         kwargs = kmeans_kwargs
-    elif selected_algo == 'DBSCAN':
+    elif selected_algo == "DBSCAN":
         kwargs = dbscan_kwargs
-    elif selected_algo == 'HDBSCAN':
+    elif selected_algo == "HDBSCAN":
         kwargs = hdbscan_kwargs
-    
-    item_list = JSONParameterEditor(_id={'type': str(uuid.uuid4())},
-                                    json_blob=kwargs["gui_parameters"])
+
+    item_list = JSONParameterEditor(
+        _id={"type": str(uuid.uuid4())}, json_blob=kwargs["gui_parameters"]
+    )
     item_list.init_callbacks(app)
     return item_list
 
+
 @app.callback(
     [
-        Output('input_labels', 'data'),
-        Output('label_schema', 'data'),
-        Output('label-dropdown', 'options'),
-        # Output('user-upload-data-dir', 'data'),
+        Output("input_labels", "data"),
+        Output("label_schema", "data"),
+        Output("label-dropdown", "options"),
     ],
     [
-        Input('example-dataset-selection', 'value'),                            # example dataset
-        Input({'base_id': 'file-manager', 'name': 'data-project-dict'},'data'), # FM dataset
-        Input('feature-vector-model-list', 'value'),                            # data clinic dataset
-    ]
+        Input("example-dataset-selection", "value"),  # example dataset
+        Input(
+            {"base_id": "file-manager", "name": "data-project-dict"}, "data"
+        ),  # FM dataset
+        Input("feature-vector-model-list", "value"),  # data clinic dataset
+    ],
 )
-def update_data_n_label_schema(selected_example_dataset, data_project_dict, data_clinic_file_path):
-    '''
+def update_data_n_label_schema(
+    selected_example_dataset, data_project_dict, data_clinic_file_path
+):
+    """
     This callback updates the selected dataset from the provided example datasets, as well as labels, and label schema
     Args:
         example-dataset-selection:      selected dataset from the provided example datasets, not the one that user uploaded
@@ -153,79 +159,82 @@ def update_data_n_label_schema(selected_example_dataset, data_project_dict, data
         label_schema:           the text of each unique label
         label_dropdown:         label dropdown options
         user_upload_data_dir:   dir name for the user uploaded zip file
-    '''
+    """
     labels = None
     label_schema = {}
 
     # check if user is using user uploaded zip file or example dataset or data clinic file
     # priority level: FileManage > DataClinic > Example Datasets
 
-    # FileManager - user uploaded zip file of images
-    # data_project = DataProject()
-    # data_project.init_from_dict(upload_file_paths)
-
     data_project = DataProject.from_dict(data_project_dict)
     options = []
-    #user_upload_data_dir = None
+    # user_upload_data_dir = None
     if len(data_project.datasets) > 0:
         labels = np.full((len(data_project.datasets),), -1)
-        # user_upload_data_dir = os.path.dirname(data_project_dict[0]['uri'])
     # DataClinic options
     elif data_clinic_file_path is not None:
         df = pd.read_parquet(data_clinic_file_path)
-        # data = df.values
         labels = np.full((df.shape[0],), -1)
     # Example dataset option 1
     elif selected_example_dataset == "data/example_shapes/Demoshapes.npz":
-        # data = np.load("/app/work/" + selected_example_dataset)['arr_0']
         labels = np.load("/app/work/data/example_shapes/DemoLabels.npy")
         f = open("/app/work/data/example_shapes/label_schema.json")
         label_schema = json.load(f)
     # Example dataset option 2
-    elif selected_example_dataset == "data/example_latentrepresentation/f_vectors.parquet":
+    elif (
+        selected_example_dataset
+        == "data/example_latentrepresentation/f_vectors.parquet"
+    ):
         df = pd.read_parquet("/app/work/" + selected_example_dataset)
-        # data = df.values
         labels = np.full((df.shape[0],), -1)
 
+    if label_schema:
+        options = [
+            {"label": f"Label {label}", "value": label} for label in label_schema
+        ]
+    options.insert(0, {"label": "Unlabeled", "value": -1})
+    options.insert(0, {"label": "All", "value": -2})
 
-    if label_schema: 
-        options = [{'label': f'Label {label}', 'value': label} for label in label_schema]
-    options.insert(0, {'label': 'Unlabeled', 'value': -1})
-    options.insert(0, {'label': 'All', 'value': -2})
+    return labels, label_schema, options
 
-    return labels, label_schema, options #, user_upload_data_dir
 
 @app.callback(
     [
         # flag the read variable
-        Output('experiment-id', 'data'),
+        Output("experiment-id", "data"),
         # reset scatter plot control panel
-        Output('scatter-color',  'value'),
-        Output('cluster-dropdown', 'value'),
-        Output('label-dropdown', 'value'),
+        Output("scatter-color", "value"),
+        Output("cluster-dropdown", "value"),
+        Output("label-dropdown", "value"),
         # reset heatmap
-        Output('heatmap', 'figure', allow_duplicate=True),
+        Output("heatmap", "figure", allow_duplicate=True),
         # reset interval value to
-        Output('interval-component', 'max_intervals'),
+        Output("interval-component", "max_intervals"),
     ],
-    Input('run-algo', 'n_clicks'),
+    Input("run-algo", "n_clicks"),
     [
-        State('job-name', 'value'),                  # job_name
-        State('example-dataset-selection', 'value'), # 2 example dataset
-        # State('user-upload-data-dir', 'data'),       # FM
-        State('feature-vector-model-list', 'value'), # DataClinic
-        State('model_id', 'data'),
-        State('algo-dropdown', 'value'),
-        State('additional-model-params', 'children'),
-        State({"base_id": "file-manager", "name": "data-project-dict"}, "data") # DataProject for FM
-       
+        State("job-name", "value"),  # job_name
+        State("example-dataset-selection", "value"),  # 2 example dataset
+        State("feature-vector-model-list", "value"),  # DataClinic
+        State("model_id", "data"),
+        State("algo-dropdown", "value"),
+        State("additional-model-params", "children"),
+        State(
+            {"base_id": "file-manager", "name": "data-project-dict"}, "data"
+        ),  # DataProject for FM
     ],
-    prevent_initial_call=True
+    prevent_initial_call=True,
 )
-def submit_dimension_reduction_job(submit_n_clicks,
-                                   job_name,
-                                   selected_example_dataset, data_clinic_file_path, 
-                                   model_id, selected_algo, children, data_project_dict):
+def submit_dimension_reduction_job(
+    submit_n_clicks,
+    job_name,
+    selected_example_dataset,
+    data_clinic_file_path,
+    model_id,
+    selected_algo,
+    children,
+    data_project_dict,
+):
     """
     This callback is triggered every time the Submit button is hit:
         - compute latent vectors, which will be saved in data/output/experiment_id
@@ -248,84 +257,87 @@ def submit_dimension_reduction_job(submit_n_clicks,
     """
     if not submit_n_clicks:
         raise PreventUpdate
-    if not selected_example_dataset and not data_project_dict and not data_clinic_file_path:
+    if (
+        not selected_example_dataset
+        and not data_project_dict
+        and not data_clinic_file_path
+    ):
         raise PreventUpdate
 
     input_params = {}
     if children:
-        for child in children['props']['children']:
-            key   = child["props"]["children"][1]["props"]["id"]["param_key"]
+        for child in children["props"]["children"]:
+            key = child["props"]["children"][1]["props"]["id"]["param_key"]
             value = child["props"]["children"][1]["props"]["value"]
             input_params[key] = value
     print("Dimension reduction algo params: ", input_params, flush=True)
-    
-    # check if user is using user uploaded zip file or example dataset or data clinic file 
+
+    # check if user is using user uploaded zip file or example dataset or data clinic file
     data_project = DataProject.from_dict(data_project_dict)
     if len(data_project.datasets) > 0:
         print("FM", flush=True)
         data_project = DataProject.from_dict(data_project_dict)
-        io_parameters = {"data_uris": [dataset.uri for dataset in data_project.datasets], 
-                         "data_tiled_api_key": data_project.api_key,
-                         "data_type": data_project.data_type,
-                         "root_uri": data_project.root_uri,
-                         }
-
-    # elif data_clinic_file_path is not None:
-    #     selected_dataset = data_clinic_file_path
+        io_parameters = {
+            "data_uris": [dataset.uri for dataset in data_project.datasets],
+            "data_tiled_api_key": data_project.api_key,
+            "data_type": data_project.data_type,
+            "root_uri": data_project.root_uri,
+        }
+
     else:
         print("selected_example_dataset: " + selected_example_dataset, flush=True)
-        io_parameters = {"data_uris": [selected_example_dataset], 
-                         "data_tiled_api_key": None,
-                         "data_type": "file",
-                         "root_uri": None,
-                         }
+        io_parameters = {
+            "data_uris": [selected_example_dataset],
+            "data_tiled_api_key": None,
+            "data_type": "file",
+            "root_uri": None,
+        }
 
     # prefect
     current_time = datetime.now(pytz.timezone(TIMEZONE)).strftime("%Y/%m/%d %H:%M:%S")
-    if not job_name: job_name = "test0"
+    if not job_name:
+        job_name = "test0"
     job_name += " " + str(current_time)
     # project_name = selected_dataset.split("/")[-1] # name of the dataset, get it from FM ## this is an issue
     project_name = "fake_name"
     print(PREFECT_TAGS, flush=True)
-    
-    # create user directory to store users data/experiments
-    # experiment_id = str(uuid.uuid4())
-    # output_path = OUTPUT_DIR / experiment_id
-    # output_path.mkdir(parents=True, exist_ok=True)
 
     # check which dimension reduction algo, then compose command
-    if selected_algo == 'PCA':
+    if selected_algo == "PCA":
         TRAIN_PARAMS_EXAMPLE["params_list"][0]["command"] = "python pca_run.py"
-    elif selected_algo == 'UMAP':
+    elif selected_algo == "UMAP":
         TRAIN_PARAMS_EXAMPLE["params_list"][0]["command"] = "python umap_run.py"
-    
+
     TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["io_parameters"] = io_parameters
-    TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["io_parameters"]["output_dir"] = str(OUTPUT_DIR)
+    TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["io_parameters"]["output_dir"] = (
+        str(OUTPUT_DIR)
+    )
     TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["io_parameters"]["uid_save"] = ""
     TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["model_parameters"] = input_params
     print(TRAIN_PARAMS_EXAMPLE)
 
     # run prefect job, job_uid is the new experiment id -> uid_save in the pca_example.yaml file
     job_uid = schedule_prefect_flow(
-                    FLOW_NAME,
-                    parameters=TRAIN_PARAMS_EXAMPLE,
-                    flow_run_name=f"{job_name} {current_time}",
-                    tags=PREFECT_TAGS + ["train", project_name],
-                )
+        FLOW_NAME,
+        parameters=TRAIN_PARAMS_EXAMPLE,
+        flow_run_name=f"{job_name} {current_time}",
+        tags=PREFECT_TAGS + ["train", project_name],
+    )
     job_message = f"Job has been succesfully submitted with uid: {job_uid}."
     print(job_message, flush=True)
-    
-    return job_uid, 'cluster', -1, -2, go.Figure(go.Heatmap()), -1
+
+    return job_uid, "cluster", -1, -2, go.Figure(go.Heatmap()), -1
+
 
 @app.callback(
-    [   
-        Output('latent_vectors', 'data'),
-        Output('interval-component', 'max_intervals', allow_duplicate=True),
+    [
+        Output("latent_vectors", "data"),
+        Output("interval-component", "max_intervals", allow_duplicate=True),
     ],
-    Input('interval-component', 'n_intervals'),
-    State('experiment-id', 'data'),
-    State('interval-component', 'max_intervals'),
-    prevent_initial_call=True
+    Input("interval-component", "n_intervals"),
+    State("experiment-id", "data"),
+    State("interval-component", "max_intervals"),
+    prevent_initial_call=True,
 )
 def read_latent_vectors(n_intervals, experiment_id, max_intervals):
     """
@@ -342,37 +354,39 @@ def read_latent_vectors(n_intervals, experiment_id, max_intervals):
     """
     if experiment_id is None or n_intervals == 0 or max_intervals == 0:
         raise PreventUpdate
-    
+
     children_flows = get_children_flow_run_ids(experiment_id)
     print("child flow")
     print(children_flows)
 
-    #read the latent vectors from the output dir
+    # read the latent vectors from the output dir
     output_path = OUTPUT_DIR / children_flows[0]
-    npz_files = list(output_path.glob('*.npy'))
-    if len(npz_files) > 0 :
-        lv_filepath = npz_files[0] # latent vector file path
+    npz_files = list(output_path.glob("*.npy"))
+    if len(npz_files) > 0:
+        lv_filepath = npz_files[0]  # latent vector file path
         latent_vectors = np.load(str(lv_filepath))
         print("latent vector", latent_vectors.shape)
         return latent_vectors, 0
     else:
         return None, -1
-    
+
+
 @app.callback(
     [
-        Output('clusters', 'data'),
-        Output('cluster-dropdown', 'options'),
+        Output("clusters", "data"),
+        Output("cluster-dropdown", "options"),
     ],
-    Input('run-cluster-algo', 'n_clicks'),
+    Input("run-cluster-algo", "n_clicks"),
     [
-        State('latent_vectors', 'data'),
-        State('cluster-algo-dropdown', 'value'),
-        State('additional-cluster-params', 'children'),
-        State('experiment-id', 'data'),
-    ]
+        State("latent_vectors", "data"),
+        State("cluster-algo-dropdown", "value"),
+        State("additional-cluster-params", "children"),
+        State("experiment-id", "data"),
+    ],
 )
-def apply_clustering(apply_n_clicks, 
-                     latent_vectors, selected_algo, children, experiment_id):
+def apply_clustering(
+    apply_n_clicks, latent_vectors, selected_algo, children, experiment_id
+):
     """
     This callback is triggered by click the 'Apply' button at the clustering panel:
         - apply cluster
@@ -386,58 +400,72 @@ def apply_clustering(apply_n_clicks,
     Returns:
         clusters:               clustering result for each data point
     """
-    ## TODO: pop up a widow to ask user to first run diemnsion reduction then apply
+    # TODO: pop up a widow to ask user to first run diemnsion reduction then apply
     if apply_n_clicks == 0 or experiment_id is None:
         raise PreventUpdate
     latent_vectors = np.array(latent_vectors)
 
     input_params = {}
     if children:
-        for child in children['props']['children']:
-            key   = child["props"]["children"][1]["props"]["id"]["param_key"]
+        for child in children["props"]["children"]:
+            key = child["props"]["children"][1]["props"]["id"]["param_key"]
             value = child["props"]["children"][1]["props"]["value"]
             input_params[key] = value
     print("Clustering params:", input_params)
- 
+
     if selected_algo == "KMeans":
-        obj = MiniBatchKMeans(n_clusters=input_params['n_clusters'])
+        obj = MiniBatchKMeans(n_clusters=input_params["n_clusters"])
     elif selected_algo == "DBSCAN":
-        obj = DBSCAN(eps=input_params['eps'], min_samples=input_params['min_samples'])
+        obj = DBSCAN(eps=input_params["eps"], min_samples=input_params["min_samples"])
     elif selected_algo == "HDBSCAN":
-        obj = HDBSCAN(min_cluster_size=input_params['min_cluster_size'])
+        obj = HDBSCAN(min_cluster_size=input_params["min_cluster_size"])
 
     clusters, options = None, None
     if obj:
         clusters = obj.fit_predict(latent_vectors)
         output_path = OUTPUT_DIR / experiment_id
-        np.save(output_path/'clusters.npy', clusters)
+        np.save(output_path / "clusters.npy", clusters)
         unique_clusters = np.unique(clusters)
-        options = [{'label': f'Cluster {cluster}', 'value': cluster} for cluster in unique_clusters if cluster != -1]
-        options.insert(0, {'label': 'All', 'value': -1})
+        options = [
+            {"label": f"Cluster {cluster}", "value": cluster}
+            for cluster in unique_clusters
+            if cluster != -1
+        ]
+        options.insert(0, {"label": "All", "value": -1})
 
     return clusters, options
 
+
 @app.callback(
-    Output('scatter', 'figure'),
+    Output("scatter", "figure"),
     [
-        Input('latent_vectors', 'data'),
-        Input('cluster-dropdown', 'value'),
-        Input('label-dropdown', 'value'),
-        Input('scatter-color', 'value'),
-        Input('clusters', 'data'), #move clusters to the input
+        Input("latent_vectors", "data"),
+        Input("cluster-dropdown", "value"),
+        Input("label-dropdown", "value"),
+        Input("scatter-color", "value"),
+        Input("clusters", "data"),  # move clusters to the input
     ],
     [
-        State('scatter', 'figure'),
-        State('scatter', 'selectedData'),
-        State('additional-model-params', 'children'),
-
-        State('input_labels', 'data'),
-        State('label_schema', 'data'),
-    ]
+        State("scatter", "figure"),
+        State("scatter", "selectedData"),
+        State("additional-model-params", "children"),
+        State("input_labels", "data"),
+        State("label_schema", "data"),
+    ],
 )
-def update_scatter_plot(latent_vectors, selected_cluster, selected_label, scatter_color, clusters,
-                        current_figure, selected_data, children, labels, label_names):
-    '''
+def update_scatter_plot(
+    latent_vectors,
+    selected_cluster,
+    selected_label,
+    scatter_color,
+    clusters,
+    current_figure,
+    selected_data,
+    children,
+    labels,
+    label_names,
+):
+    """
     This callback update the scater plot
     Args:
         latent_vectors:     data from dimension reduction algos
@@ -452,48 +480,56 @@ def update_scatter_plot(latent_vectors, selected_cluster, selected_label, scatte
         label_names:        same as label_schema defined earlier
     Returns:
         fig:                updated scatter figure
-    '''
+    """
     if latent_vectors is None or children is None:
         raise PreventUpdate
     latent_vectors = np.array(latent_vectors)
     print("latent vector shape:", latent_vectors.shape)
 
-    n_components = children['props']['children'][0]["props"]["children"][1]["props"]["value"]
+    n_components = children["props"]["children"][0]["props"]["children"][1]["props"][
+        "value"
+    ]
 
-    # if selected_data is not None and len(selected_data.get('points', [])) > 0:
-    #     selected_indices = [point['customdata'][0] for point in selected_data['points']]
-    if selected_data is not None and len(selected_data.get('points', [])) > 0:
+    if selected_data is not None and len(selected_data.get("points", [])) > 0:
         selected_indices = []
-        for point in selected_data['points']:
-            if 'customdata' in point and len(point['customdata']):
-                selected_indices.append(point['customdata'][0])
+        for point in selected_data["points"]:
+            if "customdata" in point and len(point["customdata"]):
+                selected_indices.append(point["customdata"][0])
         print("selected indices: ", selected_indices)
     else:
         selected_indices = None
-    
-    if not clusters: # when clusters is None, i.e., after submit dimension reduction but before apply clustering
+
+    if (
+        not clusters
+    ):  # when clusters is None, i.e., after submit dimension reduction but before apply clustering
         clusters = [-1 for i in range(latent_vectors.shape[0])]
     cluster_names = {a: a for a in np.unique(clusters).astype(int)}
-    
-    scatter_data = generate_scatter_data(latent_vectors,
-                                        n_components,
-                                        selected_cluster,
-                                        clusters,
-                                        cluster_names,
-                                        selected_label,
-                                        labels,
-                                        label_names,
-                                        scatter_color)
+
+    scatter_data = generate_scatter_data(
+        latent_vectors,
+        n_components,
+        selected_cluster,
+        clusters,
+        cluster_names,
+        selected_label,
+        labels,
+        label_names,
+        scatter_color,
+    )
 
     fig = go.Figure(scatter_data)
     fig.update_layout(legend=dict(tracegroupgap=20))
 
-    if current_figure and 'xaxis' in current_figure['layout'] and 'yaxis' in current_figure[
-        'layout'] and 'autorange' in current_figure['layout']['xaxis'] and current_figure['layout']['xaxis'][
-        'autorange'] is False:
+    if (
+        current_figure
+        and "xaxis" in current_figure["layout"]
+        and "yaxis" in current_figure["layout"]
+        and "autorange" in current_figure["layout"]["xaxis"]
+        and current_figure["layout"]["xaxis"]["autorange"] is False
+    ):
         # Update the axis range with current figure's values if available and if autorange is False
-        fig.update_xaxes(range=current_figure['layout']['xaxis']['range'])
-        fig.update_yaxes(range=current_figure['layout']['yaxis']['range'])
+        fig.update_xaxes(range=current_figure["layout"]["xaxis"]["range"])
+        fig.update_yaxes(range=current_figure["layout"]["yaxis"]["range"])
     else:
         # If it's the initial figure or autorange is True, set autorange to True to fit all points in view
         fig.update_xaxes(autorange=True)
@@ -503,29 +539,38 @@ def update_scatter_plot(latent_vectors, selected_cluster, selected_label, scatte
         # Use the selected indices to highlight the selected points in the updated figure
         for trace in fig.data:
             if trace.marker.color is not None:
-                trace.marker.color = [hex_to_rgba('grey', 0.3) if i not in selected_indices else 'red' for i in
-                                      range(len(trace.marker.color))]
+                trace.marker.color = [
+                    hex_to_rgba("grey", 0.3) if i not in selected_indices else "red"
+                    for i in range(len(trace.marker.color))
+                ]
     return fig
 
+
 @app.callback(
-    Output('heatmap', 'figure', allow_duplicate=True),
+    Output("heatmap", "figure", allow_duplicate=True),
     [
-        Input('scatter', 'clickData'),
-        Input('scatter', 'selectedData'),
-        Input('mean-std-toggle', 'value'),
+        Input("scatter", "clickData"),
+        Input("scatter", "selectedData"),
+        Input("mean-std-toggle", "value"),
     ],
     [
-        State('example-dataset-selection', 'value'),                             # example dataset
-        State({"base_id": "file-manager", "name": "data-project-dict"}, "data"), # DataProject for FM
-        State('feature-vector-model-list', 'value'),                             # data clinic dataset
-
+        State("example-dataset-selection", "value"),  # example dataset
+        State(
+            {"base_id": "file-manager", "name": "data-project-dict"}, "data"
+        ),  # DataProject for FM
+        State("feature-vector-model-list", "value"),  # data clinic dataset
     ],
-    prevent_initial_call=True
-
+    prevent_initial_call=True,
 )
-def update_heatmap(click_data, selected_data, display_option,
-                   selected_example_dataset, data_project_dict, data_clinic_file_path):
-    '''
+def update_heatmap(
+    click_data,
+    selected_data,
+    display_option,
+    selected_example_dataset,
+    data_project_dict,
+    data_clinic_file_path,
+):
+    """
     This callback update the heatmap
     Args:
         click_data:         clicked data on scatter figure
@@ -533,117 +578,135 @@ def update_heatmap(click_data, selected_data, display_option,
         display_option:     option to display mean or std
     Returns:
         fig:                updated heatmap
-    '''
-    if not selected_example_dataset and not data_project_dict and not data_clinic_file_path:
+    """
+    if (
+        not selected_example_dataset
+        and not data_project_dict
+        and not data_clinic_file_path
+    ):
         raise PreventUpdate
-    
+
     # user select a group of points
-    if selected_data is not None and len(selected_data['points']) > 0:
-        selected_indices = [point['customdata'][0] for point in selected_data['points']]  # Access customdata for the original indices
+    if selected_data is not None and len(selected_data["points"]) > 0:
+        selected_indices = [
+            point["customdata"][0] for point in selected_data["points"]
+        ]  # Access customdata for the original indices
         print("selected_indices", selected_indices)
-        
-        ### FileManager
+
+        # FileManager
         # print("upload_file_paths") # if not selected, its an empty list not None
         selected_images = []
 
         data_project = DataProject.from_dict(data_project_dict)
         if len(data_project.datasets) > 0:
             print("FM file")
-            selected_images, _ = data_project.read(selected_indices, export='pillow')
-        ### DataClinic
+            selected_images, _ = data_project.read(selected_indices, export="pillow")
+        # DataClinic
         elif data_clinic_file_path is not None:
             print("data_clinic_file_path")
             print(data_clinic_file_path)
             directory_path = os.path.dirname(data_clinic_file_path)
             selected_images = load_images_by_indices(directory_path, selected_indices)
-        ### Example dataset
+        # Example dataset
         elif selected_example_dataset == "data/example_shapes/Demoshapes.npz":
             print("Demoshapes.npz")
-            selected_images = np.load("/app/work/" + selected_example_dataset)['arr_0'][selected_indices]
+            selected_images = np.load("/app/work/" + selected_example_dataset)["arr_0"][
+                selected_indices
+            ]
             print(selected_images.shape)
-        elif selected_example_dataset == "data/example_latentrepresentation/f_vectors.parquet":
+        elif (
+            selected_example_dataset
+            == "data/example_latentrepresentation/f_vectors.parquet"
+        ):
             print("f_vectors.parque")
             df = pd.read_parquet("/app/work/" + selected_example_dataset)
             selected_images = df.iloc[selected_indices].values
         selected_images = np.array(selected_images)
 
         print("selected_images shape:", selected_images.shape)
-        
+
         # display options
-        if display_option == 'mean':
+        if display_option == "mean":
             heatmap_data = go.Heatmap(z=np.mean(selected_images, axis=0))
-        elif display_option == 'sigma':
+        elif display_option == "sigma":
             heatmap_data = go.Heatmap(z=np.std(selected_images, axis=0))
 
-    elif click_data is not None and len(click_data['points']) > 0:
-        selected_index = click_data['points'][0]['customdata'][0]
-        ### FileManager
+    elif click_data is not None and len(click_data["points"]) > 0:
+        selected_index = click_data["points"][0]["customdata"][0]
+        # FileManager
         data_project = DataProject.from_dict(data_project_dict)
         if len(data_project.datasets) > 0:
-            selected_images, _ = data_project.read([selected_index], export='pillow')
-        ### DataClinic
+            selected_images, _ = data_project.read([selected_index], export="pillow")
+        # DataClinic
         elif data_clinic_file_path is not None:
             directory_path = os.path.dirname(data_clinic_file_path)
             clicked_image = load_images_by_indices(directory_path, [selected_index])
-        ### Example dataset
+        # Example dataset
         elif selected_example_dataset == "data/example_shapes/Demoshapes.npz":
-            clicked_image = np.load("/app/work/" + selected_example_dataset)['arr_0'][selected_index]
-        elif selected_example_dataset == "data/example_latentrepresentation/f_vectors.parquet":
+            clicked_image = np.load("/app/work/" + selected_example_dataset)["arr_0"][
+                selected_index
+            ]
+        elif (
+            selected_example_dataset
+            == "data/example_latentrepresentation/f_vectors.parquet"
+        ):
             df = pd.read_parquet("/app/work/" + selected_example_dataset)
             clicked_image = df.iloc[selected_index].values
         clicked_image = np.array(clicked_image)
-        
+
         heatmap_data = go.Heatmap(z=clicked_image)
 
     else:
         heatmap_data = go.Heatmap()
 
     # only update heat map when the input data is 2d images, do not update for input latent vectors
-    if heatmap_data['z'] is None or len(np.shape(heatmap_data['z'])) < 2:
+    if heatmap_data["z"] is None or len(np.shape(heatmap_data["z"])) < 2:
         raise PreventUpdate
-    
+
     # Determine the aspect ratio based on the shape of the heatmap_data's z-values
     aspect_x = 1
     aspect_y = 1
-    if heatmap_data['z'] is not None:
-        if heatmap_data['z'].size > 0:
-            print(np.shape(heatmap_data['z']))
-            aspect_y, aspect_x = np.shape(heatmap_data['z'])[-2:]
+    if heatmap_data["z"] is not None:
+        if heatmap_data["z"].size > 0:
+            print(np.shape(heatmap_data["z"]))
+            aspect_y, aspect_x = np.shape(heatmap_data["z"])[-2:]
 
     return go.Figure(
         data=heatmap_data,
         layout=dict(
             autosize=True,
             yaxis=dict(scaleanchor="x", scaleratio=aspect_y / aspect_x),
-        )
+        ),
     )
 
+
 @app.callback(
-    Output('stats-div', 'children'),
-    Input('scatter', 'selectedData'),
+    Output("stats-div", "children"),
+    Input("scatter", "selectedData"),
     [
-        State('clusters', 'data'),
-        State('input_labels', 'data'),
-        State('label_schema', 'data'),
-    ]
+        State("clusters", "data"),
+        State("input_labels", "data"),
+        State("label_schema", "data"),
+    ],
 )
 def update_statistics(selected_data, clusters, assigned_labels, label_names):
-    '''
+    """
     This callback update the statistics panel
     Args:
         selected_data:      lasso or rect selected data points on scatter figure
         clusters:           clusters for latent vectors
         assigned_labels:    labels for each latent vector
-        label_names:        same as label schema  
+        label_names:        same as label schema
     Returns:
         [num_images, clusters, labels]:     statistics
-    '''
-    
+    """
+
     assigned_labels = np.array(assigned_labels)
 
-    if selected_data is not None and len(selected_data['points']) > 0:
-        selected_indices = [point['customdata'][0] for point in
-                            selected_data['points']]  # Access customdata for the original indices
+    if selected_data is not None and len(selected_data["points"]) > 0:
+        selected_indices = [
+            point["customdata"][0] for point in selected_data["points"]
+        ]  # Access customdata for the original indices
         selected_clusters = []
         if clusters is not None:
             clusters = np.array(clusters)
@@ -657,7 +720,9 @@ def update_statistics(selected_data, clusters, assigned_labels, label_names):
         # Format the clusters and labels as comma-separated strings
         clusters_str = ", ".join(str(cluster) for cluster in unique_clusters)
         label_int_to_str_map = {val: key for key, val in label_names.items()}
-        labels_str = ", ".join(str(label_int_to_str_map[label]) for label in unique_labels if label >= 0)
+        labels_str = ", ".join(
+            str(label_int_to_str_map[label]) for label in unique_labels if label >= 0
+        )
     else:
         num_images = 0
         clusters_str = "N/A"
@@ -671,68 +736,10 @@ def update_statistics(selected_data, clusters, assigned_labels, label_names):
         f"Labels represented: {labels_str}",
     ]
 
-# @app.callback(
-#     [Output("modal", "is_open"), Output("modal-body", "children")],
-#     [
-#         Input('run-algo', 'n_clicks'), 
-#         Input('run-cluster-algo', 'n_clicks'),
-#     ],
-#     [
-#         State("modal", "is_open"), 
-#         State('example-dataset-selection', 'value'),
-#         State('user-upload-data-dir', 'data'),
-#         State('feature-vector-model-list', 'value'),
-#     ]
-# )
-# def toggle_modal(n_submit, n_apply,
-#                  is_open, selected_example_dataset, user_upload_data_dir, data_clinic_file_path):
-#     '''
-#     This callback pop up a window to remind user to follow this flow: 
-#         select dataset -> Submit dimension reduction job -> Apply clustering
-#     Args:
-#         n_submit (int):     Number of clicks on the 'Submit' button.
-#         n_apply (int):      Number of clicks on the 'Apply' button.
-#         is_open (bool):     Current state of the modal window (open/closed).
-#         input_data (list):         User selected data
-#     Returns:
-#         is_open (bool):     New state of the modal window.
-#         modal_body_text (str): Text to be displayed in the modal body.
-#     '''
-#     at_least_one_dataset_selected = False
-#     if selected_example_dataset or user_upload_data_dir or data_clinic_file_path:
-#         at_least_one_dataset_selected = True
-    
-#     if ((n_submit and not at_least_one_dataset_selected) or
-#         (n_apply and not at_least_one_dataset_selected)):
-#         return True, "Please select an example dataset or upload your own zipped dataset or choose DataClinic outpu."
-#     elif n_apply and n_submit is None:
-#         return True, "Please select a dimension reduction algorithm and click 'Submit' button before clustering."
-            
-#     return False, "No alert."
-
-
-# @app.callback(
-#     Output('feature-vector-model-list', 'options'),
-#     Input('interval-component', 'n_intervals'),
-# )
-# def update_trained_model_list(interval):
-#     '''
-#     This callback updates the list of trained models
-#     Args:
-#         tab_value:                      Tab option
-#         prob_refresh_n_clicks:          Button to refresh the list of probability-based trained models
-#         similarity_refresh_n_clicks:    Button to refresh the list of similarity-based trained models
-#     Returns:
-#         prob_model_list:                List of trained models in mlcoach
-#         similarity_model_list:          List of trained models in data clinic and mlcoach
-#     '''
-#     data_clinic_models = get_trained_models_list(USER, 'data_clinic')
-#     ml_coach_models = get_trained_models_list(USER, 'mlcoach')
-#     feature_vector_models = data_clinic_models + ml_coach_models
-#     #print(feature_vector_models)
-
-#     return feature_vector_models
-
-
-if __name__ == '__main__':
-    app.run_server(debug=True, host='0.0.0.0', port=8070, )
+
+if __name__ == "__main__":
+    app.run_server(
+        debug=True,
+        host="0.0.0.0",
+        port=8070,
+    )

From 7105ae392ba5c1f51a2b73bdf74e12789f6c786e Mon Sep 17 00:00:00 2001
From: taxe10 <tanchavez@lbl.gov>
Date: Sun, 24 Mar 2024 12:09:51 -0700
Subject: [PATCH 34/62] replacing env for example and updating gitignore

---
 .env         | 31 -------------------------------
 .env.example |  8 ++++++++
 .gitignore   |  2 +-
 3 files changed, 9 insertions(+), 32 deletions(-)
 delete mode 100644 .env
 create mode 100644 .env.example

diff --git a/.env b/.env
deleted file mode 100644
index 4ea6411..0000000
--- a/.env
+++ /dev/null
@@ -1,31 +0,0 @@
-USER = admin
-
-TILED_SINGLE_USER_API_KEY=<unique api key>
-
-PREFECT_DB_PW=unique_password
-PREFECT_DB_USER=prefect_user
-PREFECT_DB_NAME=prefect
-PREFECT_DB_SERVER=prefect_db
-PREFECT_API_URL=http://prefect:4200/api
-FLOW_NAME="Parent flow/launch_parent_flow"
-TIMEZONE="US/Pacific"
-
-PREFECT_TAGS='["latent-space-explorer"]'
-
-TILED_DB_PW=<unique password>
-TILED_DB_USER=tiled_user
-TILED_DB_NAME=tiled
-TILED_DB_SERVER=tiled_db
-
-TILED_SINGLE_USER_API_KEY=<api key>
-
-
-MLEX_SEGM_USER=mlex_segm_user
-MLEX_SEGM_PW=<unique password>
-
-TILED_API_KEY=<api key>
-
-TILED_INGEST_TILED_CONFIG_PATH=/deploy/config
-TILED_INGEST_RMQ_HOST=rabbitmq
-TILED_INGEST_RMQ_USER=guest
-TILED_INGEST_RMQ_PW=guest
\ No newline at end of file
diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..269e4d2
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,8 @@
+USER=admin
+
+PREFECT_API_URL=http://prefect:4200/api
+FLOW_NAME="Parent flow/launch_parent_flow"
+TIMEZONE="US/Pacific"
+PREFECT_TAGS='["latent-space-explorer"]'
+
+TILED_API_KEY=<api key>
diff --git a/.gitignore b/.gitignore
index 6968502..57516b2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -128,7 +128,7 @@ celerybeat.pid
 *.sage.py
 
 # Environments
-# .env
+.env
 .venv
 env/
 venv/

From 4b471fdbba2081b531c53cb04968908c2e9adc1f Mon Sep 17 00:00:00 2001
From: taxe10 <tanchavez@lbl.gov>
Date: Sun, 24 Mar 2024 12:38:54 -0700
Subject: [PATCH 35/62] removed modal duplicate

---
 src/app_layout.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/app_layout.py b/src/app_layout.py
index 702128d..a1e7f3b 100644
--- a/src/app_layout.py
+++ b/src/app_layout.py
@@ -361,6 +361,5 @@
                 dbc.Row(dbc.Col(meta)),
             ]
         ),
-        modal,
     ]
 )

From 9b44de0255232ce7c52ffa17b5f649236d136044 Mon Sep 17 00:00:00 2001
From: taxe10 <tanchavez@lbl.gov>
Date: Sun, 24 Mar 2024 12:43:22 -0700
Subject: [PATCH 36/62] removed unused packages from Dockerfile and added data
 directory env var

---
 .env.example            |  1 +
 docker-compose.yml      |  8 +++----
 docker/Dockerfile       | 50 +++--------------------------------------
 docker/requirements.txt |  4 +---
 4 files changed, 8 insertions(+), 55 deletions(-)

diff --git a/.env.example b/.env.example
index 269e4d2..8b37080 100644
--- a/.env.example
+++ b/.env.example
@@ -6,3 +6,4 @@ TIMEZONE="US/Pacific"
 PREFECT_TAGS='["latent-space-explorer"]'
 
 TILED_API_KEY=<api key>
+DATA_DIR=$PWD/data
diff --git a/docker-compose.yml b/docker-compose.yml
index 75a44f5..61c05cc 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -10,23 +10,21 @@ services:
       dockerfile: "docker/Dockerfile"
     mem_limit: 2g
     environment:
-      DATA_DIR: "${PWD}/data/"
+      DATA_DIR: "${DATA_DIR}"
       PREFECT_TAGS: "${PREFECT_TAGS}"
       PREFECT_API_URL: '${PREFECT_API_URL}'
       FLOW_NAME: '${FLOW_NAME}'
       TIMEZONE: "${TIMEZONE}"
       USER: "${USER}"
     volumes:
-      - ./data:/app/work/data
+      - $DATA_DIR:/app/work/data
       - ./src:/app/work/src
       - ../mlex_file_manager/file_manager:/app/work/src/file_manager
     ports:
-      - "8070:8070"
+      - "127.0.0.1:8070:8070"
     networks:
       mlex_mle_net:
 
 networks:
   mlex_mle_net:
     external: true
-
-# env file: set up pwd
\ No newline at end of file
diff --git a/docker/Dockerfile b/docker/Dockerfile
index d57f623..87d8d55 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,61 +1,17 @@
-# FROM python:3.9
-# LABEL maintainer="THE MLEXCHANGE TEAM"
-
-# RUN ls
-# COPY docker/requirements.txt requirements.txt
-
-# RUN apt-get update && apt-get install -y \
-#     build-essential \
-#     wget \
-#     python3-pip\
-#     ffmpeg\
-#     libsm6\
-#     libxext6 
-
-# RUN pip3 install --upgrade pip &&\
-#     pip3 install --timeout=2000 -r requirements.txt\
-#     pip install git+https://github.com/taxe10/mlex_file_manager
-
-# RUN git clone https://github.com/mlexchange/mlex_dash_component_editor
-
-# # EXPOSE 8000
-
-# WORKDIR /app/work
-# ENV HOME /app/work
-# COPY src src
-# # ENV PYTHONUNBUFFERED=1
-# RUN mv /mlex_dash_component_editor/src/dash_component_editor.py /app/work/src/dash_component_editor.py
-
-# CMD ["bash"]
-# #CMD python3 src/frontend.py
-# CMD sleep 3600
-
 FROM python:3.9
 LABEL maintainer="THE MLEXCHANGE TEAM"
 
 RUN ls
 COPY docker/requirements.txt requirements.txt
 
-RUN apt-get update && apt-get install -y \
-    build-essential \
-    wget \
-    python3-pip\
-    ffmpeg\
-    libsm6\
-    libxext6 
-
 RUN pip3 install --upgrade pip &&\
-    pip3 install --timeout=2000 -r requirements.txt\
-    pip install git+https://github.com/taxe10/mlex_file_manager
+    pip3 install -r requirements.txt\
+    pip install git+https://github.com/mlexchange/mlex_file_manager\
+    pip install git+https://github.com/mlexchange/mlex_dash_component_editor
 
-RUN git clone https://github.com/mlexchange/mlex_dash_component_editor
 WORKDIR /app/work
 ENV HOME /app/work
 COPY src src
-RUN mv /mlex_dash_component_editor/src/dash_component_editor.py /app/work/src/dash_component_editor.py
 
 CMD ["bash"]
-#CMD sleep 3600 
 CMD python3 src/frontend.py
-
-
diff --git a/docker/requirements.txt b/docker/requirements.txt
index 3861b96..658ca06 100644
--- a/docker/requirements.txt
+++ b/docker/requirements.txt
@@ -1,5 +1,4 @@
 dash==2.9.3
-# dash_component_editor==0.0.7
 dash-core-components==2.0.0
 dash-bootstrap-components==1.0.2
 dash-html-components==2.0.0
@@ -13,5 +12,4 @@ diskcache==5.6.3
 pandas
 numpy
 Pillow
-# prefect
-prefect-client==2.14.21
\ No newline at end of file
+prefect-client==2.14.21

From a982f0c61d855d463790dd6a98342fb7b7d14e1f Mon Sep 17 00:00:00 2001
From: taxe10 <tanchavez@lbl.gov>
Date: Sun, 24 Mar 2024 12:44:00 -0700
Subject: [PATCH 37/62] removed dash_component_editor file

---
 src/dash_component_editor.py | 407 -----------------------------------
 1 file changed, 407 deletions(-)
 delete mode 100644 src/dash_component_editor.py

diff --git a/src/dash_component_editor.py b/src/dash_component_editor.py
deleted file mode 100644
index 181bae3..0000000
--- a/src/dash_component_editor.py
+++ /dev/null
@@ -1,407 +0,0 @@
-import re
-from typing import Callable
-# noinspection PyUnresolvedReferences
-from inspect import signature, _empty
-
-from dash import html, dcc, dash_table, Input, Output, State, MATCH, ALL
-import dash_bootstrap_components as dbc
-import dash_daq as daq
-
-import base64
-#import PIL.Image
-import io
-#import plotly.express as px
-# Procedural dash form generation
-
-"""
-{'name', 'title', 'value', 'type', 
-"""
-
-
-class SimpleItem(dbc.Col):     
-    def __init__(self,
-                 name,
-                 base_id,
-                 title=None,
-                 param_key=None,
-                 type='number',
-                 debounce=True,
-                 **kwargs):
-        
-        if param_key == None:
-            param_key = name
-        self.label = dbc.Label(title)
-        self.input = dbc.Input(type=type,
-                               debounce=debounce,
-                               id={**base_id,
-                                   'name': name,
-                                   'param_key': param_key},
-                               **kwargs)
-
-        super(SimpleItem, self).__init__(children=[self.label, self.input])
-
-
-class FloatItem(SimpleItem):
-    pass
-
-
-class IntItem(SimpleItem):
-    def __init__(self, *args, **kwargs):
-        if 'min' not in kwargs:
-            kwargs['min'] = -9007199254740991  
-        super(IntItem, self).__init__(*args, step=1, **kwargs)
-
-
-class StrItem(SimpleItem):
-    def __init__(self, *args, **kwargs):
-        super(StrItem, self).__init__(*args, type='text', **kwargs)
-
-
-class SliderItem(dbc.Col):
-    def __init__(self,
-                 name,       
-                 base_id,   
-                 title=None,
-                 param_key=None,
-                 debounce=True,
-                 visible=True,
-                 **kwargs):
-        
-        if param_key == None:
-            param_key = name
-        self.label = dbc.Label(title)
-        self.input = dcc.Slider(id={**base_id,
-                                    'name': name,
-                                    'param_key': param_key,
-                                    'layer': 'input'},
-                                    tooltip={"placement": "bottom", "always_visible": True},
-                                    **kwargs)
-
-        style = {}
-        if not visible:
-            style['display'] = 'none'
-
-        super(SliderItem, self).__init__(id={**base_id,
-                                             'name': name,
-                                             'param_key': param_key,
-                                             'layer': 'form_group'},
-                                              children=[self.label, self.input],
-                                              style=style)
-
-
-class DropdownItem(dbc.Col):
-    def __init__(self,
-                 name,       
-                 base_id,  
-                 title=None,
-                 param_key=None,
-                 debounce=True,
-                 visible=True,
-                 **kwargs):
-
-        if param_key == None:
-            param_key = name
-        self.label = dbc.Label(title)
-        self.input = dcc.Dropdown(id={**base_id,
-                                    'name': name,
-                                    'param_key': param_key,
-                                    'layer': 'input'},
-                                **kwargs)
-
-        style = {}
-        if not visible:
-            style['display'] = 'none'
-
-        super(DropdownItem, self).__init__(id={**base_id,
-                                                 'name': name,
-                                                 'param_key': param_key,
-                                                 'layer': 'form_group'},
-                                             children=[self.label, self.input],
-                                             style=style)
-
-
-class RadioItem(dbc.Col):
-    def __init__(self,
-                 name,
-                 base_id,
-                 title=None,
-                 param_key=None,
-                 visible=True,
-                 **kwargs):
-
-        if param_key == None:
-            param_key = name
-        self.label = dbc.Label(title)
-        self.input = dbc.RadioItems(id={**base_id,
-                                        'name': name,
-                                        'param_key': param_key,
-                                        'layer': 'input'},
-                                    **kwargs)
-
-        style = {}
-        if not visible:
-            style['display'] = 'none'
-
-        super(RadioItem, self).__init__(id={**base_id,
-                                               'name': name,
-                                               'param_key': param_key,
-                                               'layer': 'form_group'},
-                                           children=[self.label, self.input],
-                                           style=style)
-
-
-class BoolItem(dbc.Col):
-    def __init__(self,
-                 name,
-                 base_id,
-                 title=None,
-                 param_key=None,
-                 visible=True,
-                 **kwargs):
-
-        if param_key == None:
-            param_key = name
-        self.label = dbc.Label(title)
-        self.input = daq.ToggleSwitch(id={**base_id,
-                                          'name': name,
-                                          'param_key': param_key,
-                                          'layer': 'input'},
-                                      **kwargs)
-        self.output_label = dbc.Label('False/True')
-
-        style = {}
-        if not visible:
-            style['display'] = 'none'
-
-        super(BoolItem, self).__init__(id={**base_id,
-                                           'name': name,
-                                           'param_key': param_key,
-                                           'layer': 'form_group'},
-                                       children=[self.label, self.input, self.output_label],
-                                       style=style)
-
-
-class ImgItem(dbc.Col):
-    def __init__(self,
-                 name,
-                 src,
-                 base_id,
-                 title=None,
-                 param_key=None,
-                 width='100px',
-                 visible=True,
-                 **kwargs):
-
-        if param_key == None:
-            param_key = name
-        
-        if not (width.endswith('px') or width.endswith('%')):
-            width = width + 'px'
-        
-        self.label = dbc.Label(title)
-        
-        encoded_image = base64.b64encode(open(src, 'rb').read())
-        self.src = 'data:image/png;base64,{}'.format(encoded_image.decode())
-        self.input_img = html.Img(id={**base_id,
-                                     'name': name,
-                                     'param_key': param_key,
-                                     'layer': 'input'},
-                                     src=self.src,
-                                     style={'height':'auto', 'width':width},
-                                  **kwargs)
-
-        style = {}
-        if not visible:
-            style['display'] = 'none'
-
-        super(ImgItem, self).__init__(id={**base_id,
-                                           'name': name,
-                                           'param_key': param_key,
-                                           'layer': 'form_group'},
-                                       children=[self.label, self.input_img],
-                                       style=style)
-
-
-# class GraphItem(dbc.Col):
-#     def __init__(self,
-#                  name,
-#                  base_id,
-#                  title=None,
-#                  param_key=None,
-#                  visible=True,
-#                  figure = None,
-#                  **kwargs):
-# 
-#         self.name = name
-#         if param_key == None:
-#             param_key = name
-#         self.label = dbc.Label(title)
-#         self.input_graph = dcc.Graph(id={**base_id,
-#                                     'name': name,
-#                                     'param_key': param_key,
-#                                     'layer': 'input'},
-#                                     **kwargs)
-#                                 
-#         self.input_upload = dcc.Upload(id={**base_id,
-#                                     'name': name+'_upload',
-#                                     'param_key': param_key,
-#                                     'layer': 'input'},
-#                                     children=html.Div([
-#                                         'Drag and Drop or ',
-#                                         html.A('Select Files')
-#                                     ]),
-#                                     style={
-#                                         'width': '95%',
-#                                         'height': '60px',
-#                                         'lineHeight': '60px',
-#                                         'borderWidth': '1px',
-#                                         'borderStyle': 'dashed',
-#                                         'borderRadius': '5px',
-#                                         'textAlign': 'center',
-#                                         'margin': '10px'
-#                         },
-#                         multiple = False)
-# 
-#         style = {}
-#         if not visible:
-#             style['display'] = 'none'
-# 
-#         super(GraphItem, self).__init__(id={**base_id,
-#                                            'name': name,
-#                                            'param_key': param_key,
-#                                            'layer': 'form_group'},
-#                                        children=[self.label, self.input_upload, self.input_graph],
-#                                        style=style)
-#      
-#     # Issue: cannot get inputs from the callback decorator    
-#     def return_upload(self, *args): 
-#         print(f'before if, args {args}')
-#         if args:
-#             print(f'args {args}')
-#             img_bytes = base64.b64decode(contents.split(",")[1])
-#             img = PIL.Image.open(io.BytesIO(img_bytes))
-#             fig = px.imshow(img, binary_string=True)
-#             return fig 
-#     
-#     def init_callbacks(self, app):
-#         app.callback(Output({**self.id,
-#                             'name': self.name,
-#                             'layer': 'input'}, 'figure', allow_duplicate=True), 
-#                      Input({**self.id,
-#                             'name': self.name+'_upload',
-#                             'layer': 'input'},
-#                             'contents'),
-#                      State({**self.id,
-#                             'name': self.name+'_upload',
-#                             'layer': 'input'}, 'last_modified'),
-#                      State({**self.id,
-#                             'name': self.name+'_upload',
-#                             'layer': 'input'}, 'filename'), 
-#                      prevent_initial_call=True)(self.return_upload())
-
-
-
-class ParameterEditor(dbc.Form):
-
-    type_map = {float: FloatItem,
-                int: IntItem,
-                str: StrItem,
-                }
-
-    def __init__(self, _id, parameters, **kwargs):
-        self._parameters = parameters
-
-        super(ParameterEditor, self).__init__(id=_id, children=[], className='kwarg-editor', **kwargs)
-        self.children = self.build_children()
-
-    def init_callbacks(self, app):
-        app.callback(Output(self.id, 'n_submit'), 
-                     Input({**self.id,
-                            'name': ALL},
-                            'value'), 
-                     State(self.id, 'n_submit'), 
-                    )
-        
-        for child in self.children:
-            if hasattr(child,"init_callbacks"):
-                child.init_callbacks(app)   
-    
-    
-    @property
-    def values(self):
-        return {param['name']: param.get('value', None) for param in self._parameters} 
-
-    @property
-    def parameters(self):
-        return {param['name']: param for param in self._parameters}
-
-    def _determine_type(self, parameter_dict):
-        if 'type' in parameter_dict:
-            if parameter_dict['type'] in self.type_map:
-                return parameter_dict['type']
-            elif parameter_dict['type'].__name__ in self.type_map:
-                return parameter_dict['type'].__name__
-        elif type(parameter_dict['value']) in self.type_map:
-            return type(parameter_dict['value'])
-        raise TypeError(f'No item type could be determined for this parameter: {parameter_dict}')
-
-    def build_children(self, values=None):
-        children = []
-        for parameter_dict in self._parameters:
-            parameter_dict = parameter_dict.copy()
-            if values and parameter_dict['name'] in values:
-                parameter_dict['value'] = values[parameter_dict['name']]
-            type = self._determine_type(parameter_dict)
-            parameter_dict.pop('type', None)
-            item = self.type_map[type](**parameter_dict, base_id=self.id) 
-            children.append(item)
-
-        return children
-        
-
-class JSONParameterEditor(ParameterEditor):
-    type_map = {'float': FloatItem,
-                'int': IntItem,
-                'str': StrItem,
-                'slider': SliderItem,
-                'dropdown': DropdownItem,
-                'radio': RadioItem,
-                'bool': BoolItem,
-                'img': ImgItem,
-                #'graph': GraphItem,
-                }
-
-    def __init__(self, _id, json_blob, **kwargs):
-        super(ParameterEditor, self).__init__(id=_id, children=[], className='kwarg-editor', **kwargs)
-        self._json_blob = json_blob
-        self.children = self.build_children()
-
-    def build_children(self, values=None):
-        children = []
-        for json_record in self._json_blob:
-            ...
-            # build a parameter dict from self.json_blob
-            ...
-            type = json_record.get('type', self._determine_type(json_record))   
-            json_record = json_record.copy()
-            if values and json_record['name'] in values:
-                json_record['value'] = values[json_record['name']]
-            json_record.pop('type', None)
-            item = self.type_map[type](**json_record, base_id=self.id)
-            children.append(item)
-
-        return children
-
-
-class KwargsEditor(ParameterEditor):
-    def __init__(self, instance_index, func: Callable, **kwargs):
-        self.func = func
-        self._instance_index = instance_index
-
-        parameters = [{'name': name, 'value': param.default} for name, param in signature(func).parameters.items()
-                      if param.default is not _empty]
-
-        super(KwargsEditor, self).__init__(dict(index=instance_index, type='kwargs-editor'), parameters=parameters, **kwargs)
-
-    def new_record(self):
-        return {name: p.default for name, p in signature(self.func).parameters.items() if p.default is not _empty}

From 1820a47c7c6f466b3f0ebc845132ca21259aeb3e Mon Sep 17 00:00:00 2001
From: taxe10 <tanchavez@lbl.gov>
Date: Sun, 24 Mar 2024 12:44:39 -0700
Subject: [PATCH 38/62] add flake file

---
 .flake8 | 7 +++++++
 1 file changed, 7 insertions(+)
 create mode 100644 .flake8

diff --git a/.flake8 b/.flake8
new file mode 100644
index 0000000..a9f89c5
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,7 @@
+[flake8]
+# 127 is width of the Github code viewer,
+# black default is 88 so this will only warn about comments >127
+max-line-length = 127
+# Ignore errors due to incompatibility with black
+#https://black.readthedocs.io/en/stable/guides/using_black_with_other_tools.html
+extend-ignore = E203,E701

From 5f2191a7e45ec8b25bd6ce4f5b74cede8af0e35b Mon Sep 17 00:00:00 2001
From: taxe10 <tanchavez@lbl.gov>
Date: Sun, 24 Mar 2024 16:33:48 -0700
Subject: [PATCH 39/62] Placed plots side by side and fixed callback errors
 when a child flow has not started yet

---
 src/app_layout.py | 56 ++++++++++++++++++++++++++++++++++++++++-------
 src/frontend.py   | 53 +++++++++++++++++++++++++++-----------------
 2 files changed, 81 insertions(+), 28 deletions(-)

diff --git a/src/app_layout.py b/src/app_layout.py
index a1e7f3b..7acb7a8 100644
--- a/src/app_layout.py
+++ b/src/app_layout.py
@@ -76,12 +76,51 @@
                 ]
             ),
             dbc.CardBody(
-                dcc.Graph(
-                    id="scatter",
-                    figure=go.Figure(go.Scattergl(mode="markers")),
-                )
+                [
+                    dbc.Row(
+                        [
+                            dbc.Col(
+                                dcc.Graph(
+                                    id="scatter",
+                                    figure=go.Figure(
+                                        go.Scattergl(mode="markers"),
+                                        layout=go.Layout(
+                                            autosize=True,
+                                            margin=go.layout.Margin(
+                                                l=20,
+                                                r=20,
+                                                b=20,
+                                                t=20,
+                                                pad=0,
+                                            ),
+                                        ),
+                                    ),
+                                ),
+                                width=6,
+                            ),
+                            dbc.Col(
+                                dcc.Graph(
+                                    id="heatmap",
+                                    figure=go.Figure(
+                                        go.Heatmap(),
+                                        layout=go.Layout(
+                                            autosize=True,
+                                            margin=go.layout.Margin(
+                                                l=20,
+                                                r=20,
+                                                b=20,
+                                                t=20,
+                                                pad=0,
+                                            ),
+                                        ),
+                                    ),
+                                ),
+                                width=6,
+                            ),
+                        ]
+                    ),
+                ]
             ),
-            dbc.CardFooter(dcc.Graph(id="heatmap", figure=go.Figure(go.Heatmap()))),
         ],
     )
 ]
@@ -356,10 +395,11 @@
         dbc.Container(
             children=[
                 dbc.Row(
-                    [dbc.Col(control_panel, width=4), dbc.Col(image_panel, width=7)]
+                    [dbc.Col(control_panel, width=4), dbc.Col(image_panel, width=8)]
                 ),
                 dbc.Row(dbc.Col(meta)),
-            ]
+            ],
+            fluid=True,
         ),
-    ]
+    ],
 )
diff --git a/src/frontend.py b/src/frontend.py
index d3d470f..cacfdba 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -11,11 +11,11 @@
 import requests
 from dash import Input, Output, State, html
 from dash.exceptions import PreventUpdate
+from dash_component_editor import JSONParameterEditor
 from file_manager.data_project import DataProject
 from sklearn.cluster import DBSCAN, HDBSCAN, MiniBatchKMeans
 
 from app_layout import app
-from dash_component_editor import JSONParameterEditor
 from latentxp_utils import (
     dbscan_kwargs,
     generate_scatter_data,
@@ -36,7 +36,6 @@
 TIMEZONE = os.getenv("TIMEZONE", "US/Pacific")
 FLOW_NAME = os.getenv("FLOW_NAME", "")
 
-
 # TODO: Get model parameters from UI
 TRAIN_PARAMS_EXAMPLE = {
     "flow_type": "podman",
@@ -326,7 +325,14 @@ def submit_dimension_reduction_job(
     job_message = f"Job has been succesfully submitted with uid: {job_uid}."
     print(job_message, flush=True)
 
-    return job_uid, "cluster", -1, -2, go.Figure(go.Heatmap()), -1
+    fig = go.Figure(
+        go.Heatmap(),
+        layout=go.Layout(
+            autosize=True,
+            margin=go.layout.Margin(l=20, r=20, b=20, t=20, pad=0),
+        ),
+    )
+    return job_uid, "cluster", -1, -2, fig, -1
 
 
 @app.callback(
@@ -356,19 +362,16 @@ def read_latent_vectors(n_intervals, experiment_id, max_intervals):
         raise PreventUpdate
 
     children_flows = get_children_flow_run_ids(experiment_id)
-    print("child flow")
-    print(children_flows)
-
-    # read the latent vectors from the output dir
-    output_path = OUTPUT_DIR / children_flows[0]
-    npz_files = list(output_path.glob("*.npy"))
-    if len(npz_files) > 0:
-        lv_filepath = npz_files[0]  # latent vector file path
-        latent_vectors = np.load(str(lv_filepath))
-        print("latent vector", latent_vectors.shape)
-        return latent_vectors, 0
-    else:
-        return None, -1
+    if len(children_flows) > 0:
+        # read the latent vectors from the output dir
+        output_path = OUTPUT_DIR / children_flows[0]
+        npz_files = list(output_path.glob("*.npy"))
+        if len(npz_files) > 0:
+            lv_filepath = npz_files[0]  # latent vector file path
+            latent_vectors = np.load(str(lv_filepath))
+            print("latent vector", latent_vectors.shape)
+            return latent_vectors, 0
+    return None, -1
 
 
 @app.callback(
@@ -518,7 +521,10 @@ def update_scatter_plot(
     )
 
     fig = go.Figure(scatter_data)
-    fig.update_layout(legend=dict(tracegroupgap=20))
+    fig.update_layout(
+        margin=go.layout.Margin(l=20, r=20, b=20, t=20, pad=0),
+        legend=dict(tracegroupgap=20),
+    )
 
     if (
         current_figure
@@ -600,7 +606,9 @@ def update_heatmap(
         data_project = DataProject.from_dict(data_project_dict)
         if len(data_project.datasets) > 0:
             print("FM file")
-            selected_images, _ = data_project.read(selected_indices, export="pillow")
+            selected_images, _ = data_project.read_datasets(
+                selected_indices, export="pillow"
+            )
         # DataClinic
         elif data_clinic_file_path is not None:
             print("data_clinic_file_path")
@@ -675,6 +683,7 @@ def update_heatmap(
         data=heatmap_data,
         layout=dict(
             autosize=True,
+            margin=go.layout.Margin(l=20, r=20, b=20, t=20, pad=0),
             yaxis=dict(scaleanchor="x", scaleratio=aspect_y / aspect_x),
         ),
     )
@@ -700,10 +709,14 @@ def update_statistics(selected_data, clusters, assigned_labels, label_names):
     Returns:
         [num_images, clusters, labels]:     statistics
     """
-
     assigned_labels = np.array(assigned_labels)
+    print("assigned_labels", assigned_labels, flush=True)
 
-    if selected_data is not None and len(selected_data["points"]) > 0:
+    if (
+        selected_data is not None
+        and len(selected_data["points"]) > 0
+        and assigned_labels != [-1]
+    ):
         selected_indices = [
             point["customdata"][0] for point in selected_data["points"]
         ]  # Access customdata for the original indices

From 0debabd20465d656c7bddb065e2fafe000bc1e43 Mon Sep 17 00:00:00 2001
From: taxe10 <tanchavez@lbl.gov>
Date: Sun, 24 Mar 2024 16:43:39 -0700
Subject: [PATCH 40/62] check for child flow

---
 src/frontend.py | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/src/frontend.py b/src/frontend.py
index cacfdba..c9223ef 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -425,16 +425,18 @@ def apply_clustering(
 
     clusters, options = None, None
     if obj:
-        clusters = obj.fit_predict(latent_vectors)
-        output_path = OUTPUT_DIR / experiment_id
-        np.save(output_path / "clusters.npy", clusters)
-        unique_clusters = np.unique(clusters)
-        options = [
-            {"label": f"Cluster {cluster}", "value": cluster}
-            for cluster in unique_clusters
-            if cluster != -1
-        ]
-        options.insert(0, {"label": "All", "value": -1})
+        children_flows = get_children_flow_run_ids(experiment_id)
+        if len(children_flows) > 0:
+            clusters = obj.fit_predict(latent_vectors)
+            output_path = OUTPUT_DIR / children_flows[0]
+            np.save(output_path / "clusters.npy", clusters)
+            unique_clusters = np.unique(clusters)
+            options = [
+                {"label": f"Cluster {cluster}", "value": cluster}
+                for cluster in unique_clusters
+                if cluster != -1
+            ]
+            options.insert(0, {"label": "All", "value": -1})
 
     return clusters, options
 

From 0ed60a1af4b284a112598ba434cb51b3c88328af Mon Sep 17 00:00:00 2001
From: taxe10 <tanchavez@lbl.gov>
Date: Sun, 24 Mar 2024 17:12:05 -0700
Subject: [PATCH 41/62] add job dropdown

---
 src/app_layout.py |  7 ++++++-
 src/frontend.py   | 35 +++++++++++++++++++++++++----------
 2 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/src/app_layout.py b/src/app_layout.py
index 7acb7a8..180d870 100644
--- a/src/app_layout.py
+++ b/src/app_layout.py
@@ -155,8 +155,13 @@
                                         dcc.Input(
                                             id="job-name",
                                             placeholder="test0",
-                                            style={"width": "100%"},
+                                            style={
+                                                "width": "100%",
+                                                "margin-bottom": "1rem",
+                                            },
                                         ),
+                                        dbc.Label("Select a job..."),
+                                        dcc.Dropdown(id="job-selector"),
                                     ]
                                 ),
                                 html.Hr(),
diff --git a/src/frontend.py b/src/frontend.py
index c9223ef..a197e5e 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -25,7 +25,11 @@
     load_images_by_indices,
     remove_key_from_dict_list,
 )
-from utils_prefect import get_children_flow_run_ids, schedule_prefect_flow
+from utils_prefect import (
+    get_children_flow_run_ids,
+    get_flow_runs_by_name,
+    schedule_prefect_flow,
+)
 
 # GLOBAL PARAMS
 DATA_DIR = str(os.environ["DATA_DIR"])
@@ -130,6 +134,16 @@ def show_clustering_gui_layouts(selected_algo):
     return item_list
 
 
+@app.callback(
+    Output("job-selector", "options"),
+    Input("interval-component", "n_intervals"),
+)
+def update_job_selector(n_intervals):
+    # TODO: Split train/inference and add data project name
+    jobs = get_flow_runs_by_name(tags=PREFECT_TAGS)
+    return jobs
+
+
 @app.callback(
     [
         Output("input_labels", "data"),
@@ -199,8 +213,6 @@ def update_data_n_label_schema(
 
 @app.callback(
     [
-        # flag the read variable
-        Output("experiment-id", "data"),
         # reset scatter plot control panel
         Output("scatter-color", "value"),
         Output("cluster-dropdown", "value"),
@@ -247,7 +259,6 @@ def submit_dimension_reduction_job(
         selected_algo:          selected dimension reduction algo
         children:               div for algo's parameters
     Returns:
-        experiment-id:          uuid for current run
         cluster-dropdown:       options for cluster dropdown
         scatter-color:          default scatter-color value
         cluster-dropdown:       default cluster-dropdown value
@@ -332,7 +343,7 @@ def submit_dimension_reduction_job(
             margin=go.layout.Margin(l=20, r=20, b=20, t=20, pad=0),
         ),
     )
-    return job_uid, "cluster", -1, -2, fig, -1
+    return "cluster", -1, -2, fig, -1
 
 
 @app.callback(
@@ -341,7 +352,7 @@ def submit_dimension_reduction_job(
         Output("interval-component", "max_intervals", allow_duplicate=True),
     ],
     Input("interval-component", "n_intervals"),
-    State("experiment-id", "data"),
+    State("job-selector", "value"),
     State("interval-component", "max_intervals"),
     prevent_initial_call=True,
 )
@@ -384,7 +395,7 @@ def read_latent_vectors(n_intervals, experiment_id, max_intervals):
         State("latent_vectors", "data"),
         State("cluster-algo-dropdown", "value"),
         State("additional-cluster-params", "children"),
-        State("experiment-id", "data"),
+        State("job-selector", "value"),
     ],
 )
 def apply_clustering(
@@ -414,7 +425,7 @@ def apply_clustering(
             key = child["props"]["children"][1]["props"]["id"]["param_key"]
             value = child["props"]["children"][1]["props"]["value"]
             input_params[key] = value
-    print("Clustering params:", input_params)
+    print("Clustering params:", input_params, flush=True)
 
     if selected_algo == "KMeans":
         obj = MiniBatchKMeans(n_clusters=input_params["n_clusters"])
@@ -438,6 +449,8 @@ def apply_clustering(
             ]
             options.insert(0, {"label": "All", "value": -1})
 
+    print("clusters", clusters, flush=True)
+
     return clusters, options
 
 
@@ -489,7 +502,7 @@ def update_scatter_plot(
     if latent_vectors is None or children is None:
         raise PreventUpdate
     latent_vectors = np.array(latent_vectors)
-    print("latent vector shape:", latent_vectors.shape)
+    print("latent vector shape:", latent_vectors.shape, flush=True)
 
     n_components = children["props"]["children"][0]["props"]["children"][1]["props"][
         "value"
@@ -646,7 +659,9 @@ def update_heatmap(
         # FileManager
         data_project = DataProject.from_dict(data_project_dict)
         if len(data_project.datasets) > 0:
-            selected_images, _ = data_project.read([selected_index], export="pillow")
+            selected_images, _ = data_project.read_datasets(
+                [selected_index], export="pillow"
+            )
         # DataClinic
         elif data_clinic_file_path is not None:
             directory_path = os.path.dirname(data_clinic_file_path)

From 6b11ca3808f76cb20d170599c1e24ad5d42a10f0 Mon Sep 17 00:00:00 2001
From: taxe10 <tanchavez@lbl.gov>
Date: Sun, 24 Mar 2024 19:36:23 -0700
Subject: [PATCH 42/62] add conda flows and simplify data clinic options
 temporarily

---
 src/frontend.py | 150 ++++++++++++++++++++++++++++++------------------
 1 file changed, 93 insertions(+), 57 deletions(-)

diff --git a/src/frontend.py b/src/frontend.py
index a197e5e..0a14c5f 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -22,7 +22,6 @@
     hdbscan_kwargs,
     hex_to_rgba,
     kmeans_kwargs,
-    load_images_by_indices,
     remove_key_from_dict_list,
 )
 from utils_prefect import (
@@ -39,37 +38,65 @@
 PREFECT_TAGS = json.loads(os.getenv("PREFECT_TAGS", '["latent-space-explorer"]'))
 TIMEZONE = os.getenv("TIMEZONE", "US/Pacific")
 FLOW_NAME = os.getenv("FLOW_NAME", "")
-
-# TODO: Get model parameters from UI
-TRAIN_PARAMS_EXAMPLE = {
-    "flow_type": "podman",
-    "params_list": [
-        {
-            "image_name": "ghcr.io/runboj/mlex_dimension_reduction_pca",
-            "image_tag": "main",
-            "command": 'python -c \\"import time; time.sleep(30)\\"',
-            "params": {
-                "io_parameters": {"uid_save": "uid0001", "uid_retrieve": "uid0001"}
+MODEL_DIR = "data/models"
+FLOW_TYPE = "conda"
+CONDA_ENV_NAME = "dimension_reduction_pca"
+
+
+if FLOW_TYPE == "podman":
+    TRAIN_PARAMS_EXAMPLE = {
+        "flow_type": "podman",
+        "params_list": [
+            {
+                "image_name": "ghcr.io/runboj/mlex_dimension_reduction_pca",
+                "image_tag": "main",
+                "command": 'python -c \\"import time; time.sleep(30)\\"',
+                "params": {
+                    "io_parameters": {"uid_save": "uid0001", "uid_retrieve": "uid0001"}
+                },
+                "volumes": [f"{DATA_DIR}:/app/work/data"],
+            }
+        ],
+    }
+
+    INFERENCE_PARAMS_EXAMPLE = {
+        "flow_type": "podman",
+        "params_list": [
+            {
+                "image_name": "ghcr.io/runboj/mlex_dimension_reduction_pca",
+                "image_tag": "main",
+                "command": 'python -c \\"import time; time.sleep(30)\\"',
+                "params": {
+                    "io_parameters": {"uid_save": "uid0001", "uid_retrieve": "uid0001"}
+                },
+                "volumes": [f"{DATA_DIR}:/app/work/data"],
             },
-            "volumes": [f"{DATA_DIR}:/app/work/data"],
-        }
-    ],
-}
-
-INFERENCE_PARAMS_EXAMPLE = {
-    "flow_type": "podman",
-    "params_list": [
-        {
-            "image_name": "ghcr.io/runboj/mlex_dimension_reduction_pca",
-            "image_tag": "main",
-            "command": 'python -c \\"import time; time.sleep(30)\\"',
-            "params": {
-                "io_parameters": {"uid_save": "uid0001", "uid_retrieve": "uid0001"}
+        ],
+    }
+else:
+    TRAIN_PARAMS_EXAMPLE = {
+        "flow_type": "conda",
+        "params_list": [
+            {
+                "conda_env_name": f"{CONDA_ENV_NAME}",
+                "params": {
+                    "io_parameters": {"uid_save": "uid0001", "uid_retrieve": "uid0001"}
+                },
+            }
+        ],
+    }
+
+    INFERENCE_PARAMS_EXAMPLE = {
+        "flow_type": "conda",
+        "params_list": [
+            {
+                "conda_env_name": f"{CONDA_ENV_NAME}",
+                "params": {
+                    "io_parameters": {"uid_save": "uid0001", "uid_retrieve": "uid0001"}
+                },
             },
-            "volumes": [f"{DATA_DIR}:/app/work/data"],
-        },
-    ],
-}
+        ],
+    }
 
 
 @app.callback(
@@ -155,12 +182,9 @@ def update_job_selector(n_intervals):
         Input(
             {"base_id": "file-manager", "name": "data-project-dict"}, "data"
         ),  # FM dataset
-        Input("feature-vector-model-list", "value"),  # data clinic dataset
     ],
 )
-def update_data_n_label_schema(
-    selected_example_dataset, data_project_dict, data_clinic_file_path
-):
+def update_data_n_label_schema(selected_example_dataset, data_project_dict):
     """
     This callback updates the selected dataset from the provided example datasets, as well as labels, and label schema
     Args:
@@ -184,10 +208,6 @@ def update_data_n_label_schema(
     # user_upload_data_dir = None
     if len(data_project.datasets) > 0:
         labels = np.full((len(data_project.datasets),), -1)
-    # DataClinic options
-    elif data_clinic_file_path is not None:
-        df = pd.read_parquet(data_clinic_file_path)
-        labels = np.full((df.shape[0],), -1)
     # Example dataset option 1
     elif selected_example_dataset == "data/example_shapes/Demoshapes.npz":
         labels = np.load("/app/work/data/example_shapes/DemoLabels.npy")
@@ -314,9 +334,19 @@ def submit_dimension_reduction_job(
 
     # check which dimension reduction algo, then compose command
     if selected_algo == "PCA":
-        TRAIN_PARAMS_EXAMPLE["params_list"][0]["command"] = "python pca_run.py"
+        if FLOW_TYPE == "podman":
+            TRAIN_PARAMS_EXAMPLE["params_list"][0]["command"] = "python pca_run.py"
+        else:
+            TRAIN_PARAMS_EXAMPLE["params_list"][0][
+                "python_file_name"
+            ] = "mlex_dimension_reduction_pca/pca_run.py"
     elif selected_algo == "UMAP":
-        TRAIN_PARAMS_EXAMPLE["params_list"][0]["command"] = "python umap_run.py"
+        if FLOW_TYPE == "podman":
+            TRAIN_PARAMS_EXAMPLE["params_list"][0]["command"] = "python umap_run.py"
+        else:
+            TRAIN_PARAMS_EXAMPLE["params_list"][0][
+                "python_file_name"
+            ] = "mlex_dimension_reduction_umap/umap_run.py"
 
     TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["io_parameters"] = io_parameters
     TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["io_parameters"]["output_dir"] = (
@@ -579,7 +609,6 @@ def update_scatter_plot(
         State(
             {"base_id": "file-manager", "name": "data-project-dict"}, "data"
         ),  # DataProject for FM
-        State("feature-vector-model-list", "value"),  # data clinic dataset
     ],
     prevent_initial_call=True,
 )
@@ -589,7 +618,6 @@ def update_heatmap(
     display_option,
     selected_example_dataset,
     data_project_dict,
-    data_clinic_file_path,
 ):
     """
     This callback update the heatmap
@@ -600,11 +628,7 @@ def update_heatmap(
     Returns:
         fig:                updated heatmap
     """
-    if (
-        not selected_example_dataset
-        and not data_project_dict
-        and not data_clinic_file_path
-    ):
+    if not selected_example_dataset and not data_project_dict:
         raise PreventUpdate
 
     # user select a group of points
@@ -624,12 +648,6 @@ def update_heatmap(
             selected_images, _ = data_project.read_datasets(
                 selected_indices, export="pillow"
             )
-        # DataClinic
-        elif data_clinic_file_path is not None:
-            print("data_clinic_file_path")
-            print(data_clinic_file_path)
-            directory_path = os.path.dirname(data_clinic_file_path)
-            selected_images = load_images_by_indices(directory_path, selected_indices)
         # Example dataset
         elif selected_example_dataset == "data/example_shapes/Demoshapes.npz":
             print("Demoshapes.npz")
@@ -662,10 +680,6 @@ def update_heatmap(
             selected_images, _ = data_project.read_datasets(
                 [selected_index], export="pillow"
             )
-        # DataClinic
-        elif data_clinic_file_path is not None:
-            directory_path = os.path.dirname(data_clinic_file_path)
-            clicked_image = load_images_by_indices(directory_path, [selected_index])
         # Example dataset
         elif selected_example_dataset == "data/example_shapes/Demoshapes.npz":
             clicked_image = np.load("/app/work/" + selected_example_dataset)["arr_0"][
@@ -767,6 +781,28 @@ def update_statistics(selected_data, clusters, assigned_labels, label_names):
     ]
 
 
+@app.callback(
+    Output("feature-vector-model-list", "options"),
+    Input("interval-component", "n_intervals"),
+)
+def update_feature_vector_model_list(n_intervals):
+    """
+    This callback update the feature vector model list
+    Args:
+        n_intervals:         interval component
+    Returns:
+        options:            feature vector model list
+    """
+    # TODO: Connect to data clinic
+    # TODO: Check if inference has already taken place in this dataset
+    folder_names = [
+        os.path.join(dirpath, dir)
+        for dirpath, dirs, _ in os.walk(MODEL_DIR)
+        for dir in dirs
+    ]
+    return folder_names
+
+
 if __name__ == "__main__":
     app.run_server(
         debug=True,

From 437c7f25ef09242cae49176f3321887b1a9f97eb Mon Sep 17 00:00:00 2001
From: taxe10 <tanchavez@lbl.gov>
Date: Sun, 24 Mar 2024 21:49:38 -0700
Subject: [PATCH 43/62] adding inference step for autoencoder and temporarily
 removing using user

---
 src/frontend.py | 62 ++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 48 insertions(+), 14 deletions(-)

diff --git a/src/frontend.py b/src/frontend.py
index 0a14c5f..c7ff338 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -1,3 +1,4 @@
+import copy
 import json
 import os
 import pathlib
@@ -32,7 +33,7 @@
 
 # GLOBAL PARAMS
 DATA_DIR = str(os.environ["DATA_DIR"])
-USER = "admin"  # 'mlexchange-team' move to env file
+USER = ""  # 'mlexchange-team' move to env file
 OUTPUT_DIR = pathlib.Path("data/mlexchange_store/" + USER)
 UPLOAD_FOLDER_ROOT = "data/upload"
 PREFECT_TAGS = json.loads(os.getenv("PREFECT_TAGS", '["latent-space-explorer"]'))
@@ -294,6 +295,7 @@ def submit_dimension_reduction_job(
     ):
         raise PreventUpdate
 
+    job_params = job_params = copy.deepcopy(TRAIN_PARAMS_EXAMPLE)
     input_params = {}
     if children:
         for child in children["props"]["children"]:
@@ -323,43 +325,75 @@ def submit_dimension_reduction_job(
             "root_uri": None,
         }
 
+    # Autoencoder
+    if data_clinic_file_path is not None:
+        auto_io_params = io_parameters.copy()
+        auto_io_params["model_dir"] = data_clinic_file_path + "/last.ckpt"
+        if FLOW_TYPE == "podman":
+            autoencoder_params = {
+                "image_name": "ghcr.io/mlexchange/mlex_pytorch_autoencoders:main",
+                "image_tag": "main",
+                "command": "python src/predict_model.py",
+                "params": {
+                    "io_parameters": auto_io_params,
+                    "target_width": 64,
+                    "target_height": 64,
+                    "batch_size": 32,
+                },
+                "volumes": [f"{DATA_DIR}:/app/work/data"],
+            }
+        else:
+            autoencoder_params = {
+                "conda_env_name": "pytorch_autoencoders",
+                "params": {
+                    "io_parameters": auto_io_params,
+                    "target_width": 64,
+                    "target_height": 64,
+                    "batch_size": 32,
+                },
+                "python_file_name": "mlex_pytorch_autoencoders/src/predict_model.py",
+            }
+        job_params["params_list"].insert(0, autoencoder_params)
+
     # prefect
     current_time = datetime.now(pytz.timezone(TIMEZONE)).strftime("%Y/%m/%d %H:%M:%S")
     if not job_name:
         job_name = "test0"
     job_name += " " + str(current_time)
-    # project_name = selected_dataset.split("/")[-1] # name of the dataset, get it from FM ## this is an issue
+    # TODO: Hash root_uri + data_uris
     project_name = "fake_name"
     print(PREFECT_TAGS, flush=True)
 
     # check which dimension reduction algo, then compose command
     if selected_algo == "PCA":
         if FLOW_TYPE == "podman":
-            TRAIN_PARAMS_EXAMPLE["params_list"][0]["command"] = "python pca_run.py"
+            job_params["params_list"][-1]["command"] = "python pca_run.py"
         else:
-            TRAIN_PARAMS_EXAMPLE["params_list"][0][
+            job_params["params_list"][-1][
                 "python_file_name"
             ] = "mlex_dimension_reduction_pca/pca_run.py"
     elif selected_algo == "UMAP":
         if FLOW_TYPE == "podman":
-            TRAIN_PARAMS_EXAMPLE["params_list"][0]["command"] = "python umap_run.py"
+            job_params["params_list"][-1]["command"] = "python umap_run.py"
         else:
-            TRAIN_PARAMS_EXAMPLE["params_list"][0][
+            job_params["params_list"][-1][
                 "python_file_name"
             ] = "mlex_dimension_reduction_umap/umap_run.py"
 
-    TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["io_parameters"] = io_parameters
-    TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["io_parameters"]["output_dir"] = (
-        str(OUTPUT_DIR)
+    job_params["params_list"][-1]["params"]["io_parameters"] = io_parameters
+    job_params["params_list"][-1]["params"]["io_parameters"]["output_dir"] = str(
+        OUTPUT_DIR
     )
-    TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["io_parameters"]["uid_save"] = ""
-    TRAIN_PARAMS_EXAMPLE["params_list"][0]["params"]["model_parameters"] = input_params
-    print(TRAIN_PARAMS_EXAMPLE)
+    job_params["params_list"][-1]["params"]["io_parameters"]["uid_save"] = ""
+    job_params["params_list"][-1]["params"]["io_parameters"]["uid_retrieve"] = ""
+    job_params["params_list"][-1]["params"]["model_parameters"] = input_params
+    print(job_params)
+    print(TRAIN_PARAMS_EXAMPLE, flush=True)
 
     # run prefect job, job_uid is the new experiment id -> uid_save in the pca_example.yaml file
     job_uid = schedule_prefect_flow(
         FLOW_NAME,
-        parameters=TRAIN_PARAMS_EXAMPLE,
+        parameters=job_params,
         flow_run_name=f"{job_name} {current_time}",
         tags=PREFECT_TAGS + ["train", project_name],
     )
@@ -405,7 +439,7 @@ def read_latent_vectors(n_intervals, experiment_id, max_intervals):
     children_flows = get_children_flow_run_ids(experiment_id)
     if len(children_flows) > 0:
         # read the latent vectors from the output dir
-        output_path = OUTPUT_DIR / children_flows[0]
+        output_path = OUTPUT_DIR / children_flows[-1]
         npz_files = list(output_path.glob("*.npy"))
         if len(npz_files) > 0:
             lv_filepath = npz_files[0]  # latent vector file path

From d2afe4c0653766f7df67fb8c5be94136ac7048d5 Mon Sep 17 00:00:00 2001
From: taxe10 <tanchavez@lbl.gov>
Date: Wed, 27 Mar 2024 19:12:10 -0700
Subject: [PATCH 44/62] loading env variables from file when running outside
 docker and adding tiled api key

---
 docker/requirements.txt |  1 +
 src/app_layout.py       | 15 ++++++++++++---
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/docker/requirements.txt b/docker/requirements.txt
index 658ca06..1fda0c8 100644
--- a/docker/requirements.txt
+++ b/docker/requirements.txt
@@ -12,4 +12,5 @@ diskcache==5.6.3
 pandas
 numpy
 Pillow
+python-dotenv
 prefect-client==2.14.21
diff --git a/src/app_layout.py b/src/app_layout.py
index 180d870..b195cdd 100644
--- a/src/app_layout.py
+++ b/src/app_layout.py
@@ -1,3 +1,4 @@
+import os
 import pathlib
 
 import dash_bootstrap_components as dbc
@@ -7,10 +8,13 @@
 from dash import Dash, dcc, html
 from dash.long_callback import DiskcacheLongCallbackManager
 from dash_iconify import DashIconify
+from dotenv import load_dotenv
 from file_manager.main import FileManager
 
 import templates
 
+load_dotenv(".env")
+
 # GLOBAL VARIABLES
 ALGORITHM_DATABASE = {
     "PCA": "PCA",
@@ -29,8 +33,11 @@
         "value": "data/example_latentrepresentation/f_vectors.parquet",
     },
 ]
-DOCKER_DATA = pathlib.Path.home() / "data"  # /app/work/data
-UPLOAD_FOLDER_ROOT = DOCKER_DATA / "upload"  # /app/work/data/upload
+DATA_DIR = pathlib.Path(
+    os.getenv("DATA_DIR")
+)  # pathlib.Path.home() / "data"  # /app/work/data
+UPLOAD_FOLDER_ROOT = DATA_DIR / "upload"  # /app/work/data/upload
+TILED_API_KEY = os.getenv("TILED_API_KEY")
 
 # SETUP DASH APP
 cache = diskcache.Cache("./cache")
@@ -45,7 +52,9 @@
 
 server = app.server
 
-dash_file_explorer = FileManager(DOCKER_DATA, UPLOAD_FOLDER_ROOT, open_explorer=False)
+dash_file_explorer = FileManager(
+    DATA_DIR, UPLOAD_FOLDER_ROOT, open_explorer=False, api_key=TILED_API_KEY
+)
 dash_file_explorer.init_callbacks(app)
 du.configure_upload(app, UPLOAD_FOLDER_ROOT, use_upload_id=False)
 

From 825673d021edcdfddd8f04ca50f4bfadd9dfae94 Mon Sep 17 00:00:00 2001
From: taxe10 <tanchavez@lbl.gov>
Date: Wed, 27 Mar 2024 21:47:11 -0700
Subject: [PATCH 45/62] arranged filepaths according to env variables to enable
 running outside docker

---
 .env.example       |  8 +++++--
 docker-compose.yml |  2 +-
 src/app_layout.py  | 10 ++++----
 src/frontend.py    | 57 ++++++++++++++++++++++++++++------------------
 4 files changed, 46 insertions(+), 31 deletions(-)

diff --git a/.env.example b/.env.example
index 8b37080..712e091 100644
--- a/.env.example
+++ b/.env.example
@@ -1,9 +1,13 @@
 USER=admin
+DEFAULT_ALGORITHM_DESCRIPTION=/path/to/PCA_v1.0.0.json
 
-PREFECT_API_URL=http://prefect:4200/api
+PREFECT_API_URL=http://localhost:4200/api
 FLOW_NAME="Parent flow/launch_parent_flow"
 TIMEZONE="US/Pacific"
 PREFECT_TAGS='["latent-space-explorer"]'
 
+CONTENT_API_URL="http://localhost:8000/api/v0/models"
+
 TILED_API_KEY=<api key>
-DATA_DIR=$PWD/data
+
+DATA_DIR=/path/to/data
diff --git a/docker-compose.yml b/docker-compose.yml
index 61c05cc..b2b550e 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -13,13 +13,13 @@ services:
       DATA_DIR: "${DATA_DIR}"
       PREFECT_TAGS: "${PREFECT_TAGS}"
       PREFECT_API_URL: '${PREFECT_API_URL}'
+      CONTENT_API_URL: '${CONTENT_API_URL}'
       FLOW_NAME: '${FLOW_NAME}'
       TIMEZONE: "${TIMEZONE}"
       USER: "${USER}"
     volumes:
       - $DATA_DIR:/app/work/data
       - ./src:/app/work/src
-      - ../mlex_file_manager/file_manager:/app/work/src/file_manager
     ports:
       - "127.0.0.1:8070:8070"
     networks:
diff --git a/src/app_layout.py b/src/app_layout.py
index b195cdd..fd6f845 100644
--- a/src/app_layout.py
+++ b/src/app_layout.py
@@ -13,7 +13,7 @@
 
 import templates
 
-load_dotenv(".env")
+load_dotenv(".env", override=True)
 
 # GLOBAL VARIABLES
 ALGORITHM_DATABASE = {
@@ -33,11 +33,9 @@
         "value": "data/example_latentrepresentation/f_vectors.parquet",
     },
 ]
-DATA_DIR = pathlib.Path(
-    os.getenv("DATA_DIR")
-)  # pathlib.Path.home() / "data"  # /app/work/data
-UPLOAD_FOLDER_ROOT = DATA_DIR / "upload"  # /app/work/data/upload
-TILED_API_KEY = os.getenv("TILED_API_KEY")
+DATA_DIR = pathlib.Path(os.getenv("DATA_DIR"))
+UPLOAD_FOLDER_ROOT = "data/upload"
+TILED_API_KEY = os.getenv("TILED_API_KEY", None)
 
 # SETUP DASH APP
 cache = diskcache.Cache("./cache")
diff --git a/src/frontend.py b/src/frontend.py
index c7ff338..6a426ea 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -1,7 +1,6 @@
 import copy
 import json
 import os
-import pathlib
 import uuid
 from datetime import datetime
 
@@ -12,11 +11,12 @@
 import requests
 from dash import Input, Output, State, html
 from dash.exceptions import PreventUpdate
-from dash_component_editor import JSONParameterEditor
+from dotenv import load_dotenv
 from file_manager.data_project import DataProject
 from sklearn.cluster import DBSCAN, HDBSCAN, MiniBatchKMeans
 
 from app_layout import app
+from dash_component_editor import JSONParameterEditor
 from latentxp_utils import (
     dbscan_kwargs,
     generate_scatter_data,
@@ -31,18 +31,25 @@
     schedule_prefect_flow,
 )
 
+load_dotenv(".env")
+
 # GLOBAL PARAMS
-DATA_DIR = str(os.environ["DATA_DIR"])
-USER = ""  # 'mlexchange-team' move to env file
-OUTPUT_DIR = pathlib.Path("data/mlexchange_store/" + USER)
-UPLOAD_FOLDER_ROOT = "data/upload"
+USER = os.getenv("USER", "")  # 'mlexchange-team' move to env file
+
+DATA_DIR = os.getenv("DATA_DIR", "data")
+MODEL_DIR = "data/models"
+OUTPUT_DIR = f"data/mlexchange_store/{USER}"
+UPLOAD_FOLDER_ROOT = f"{DATA_DIR}/upload"
+
 PREFECT_TAGS = json.loads(os.getenv("PREFECT_TAGS", '["latent-space-explorer"]'))
 TIMEZONE = os.getenv("TIMEZONE", "US/Pacific")
 FLOW_NAME = os.getenv("FLOW_NAME", "")
-MODEL_DIR = "data/models"
 FLOW_TYPE = "conda"
 CONDA_ENV_NAME = "dimension_reduction_pca"
 
+CONTENT_API_URL = os.getenv("CONTENT_API_URL", "http://localhost:8000/api/v0/models")
+DEFAULT_ALGORITHM_DESCRIPTION = os.getenv("DEFAULT_ALGORITHM_DESCRIPTION")
+
 
 if FLOW_TYPE == "podman":
     TRAIN_PARAMS_EXAMPLE = {
@@ -53,7 +60,7 @@
                 "image_tag": "main",
                 "command": 'python -c \\"import time; time.sleep(30)\\"',
                 "params": {
-                    "io_parameters": {"uid_save": "uid0001", "uid_retrieve": "uid0001"}
+                    "io_parameters": {"uid_save": "uid0001", "uid_retrieve": None}
                 },
                 "volumes": [f"{DATA_DIR}:/app/work/data"],
             }
@@ -68,7 +75,7 @@
                 "image_tag": "main",
                 "command": 'python -c \\"import time; time.sleep(30)\\"',
                 "params": {
-                    "io_parameters": {"uid_save": "uid0001", "uid_retrieve": "uid0001"}
+                    "io_parameters": {"uid_save": "uid0001", "uid_retrieve": None}
                 },
                 "volumes": [f"{DATA_DIR}:/app/work/data"],
             },
@@ -81,7 +88,7 @@
             {
                 "conda_env_name": f"{CONDA_ENV_NAME}",
                 "params": {
-                    "io_parameters": {"uid_save": "uid0001", "uid_retrieve": "uid0001"}
+                    "io_parameters": {"uid_save": "uid0001", "uid_retrieve": None}
                 },
             }
         ],
@@ -93,7 +100,7 @@
             {
                 "conda_env_name": f"{CONDA_ENV_NAME}",
                 "params": {
-                    "io_parameters": {"uid_save": "uid0001", "uid_retrieve": "uid0001"}
+                    "io_parameters": {"uid_save": "uid0001", "uid_retrieve": None}
                 },
             },
         ],
@@ -114,7 +121,12 @@ def show_dimension_reduction_gui_layouts(selected_algo):
         item_list:          dropdown menu html code
         model_uid:          selected algo's uid
     """
-    data = requests.get("http://content-api:8000/api/v0/models").json()  # all model
+    try:
+        data = requests.get(CONTENT_API_URL).json()  # all model
+    except Exception as e:
+        print(f"Cannot access content api: {e}", flush=True)
+        with open(DEFAULT_ALGORITHM_DESCRIPTION, "r") as f:
+            data = [json.load(f)]
 
     if selected_algo == "PCA":
         conditions = {"name": "PCA"}
@@ -385,7 +397,7 @@ def submit_dimension_reduction_job(
         OUTPUT_DIR
     )
     job_params["params_list"][-1]["params"]["io_parameters"]["uid_save"] = ""
-    job_params["params_list"][-1]["params"]["io_parameters"]["uid_retrieve"] = ""
+    job_params["params_list"][-1]["params"]["io_parameters"]["uid_retrieve"] = None
     job_params["params_list"][-1]["params"]["model_parameters"] = input_params
     print(job_params)
     print(TRAIN_PARAMS_EXAMPLE, flush=True)
@@ -439,11 +451,10 @@ def read_latent_vectors(n_intervals, experiment_id, max_intervals):
     children_flows = get_children_flow_run_ids(experiment_id)
     if len(children_flows) > 0:
         # read the latent vectors from the output dir
-        output_path = OUTPUT_DIR / children_flows[-1]
-        npz_files = list(output_path.glob("*.npy"))
-        if len(npz_files) > 0:
-            lv_filepath = npz_files[0]  # latent vector file path
-            latent_vectors = np.load(str(lv_filepath))
+        output_path = f"{OUTPUT_DIR}/{children_flows[-1]}/latent_vectors.npy"
+        print(output_path, flush=True)
+        if os.path.exists(output_path):
+            latent_vectors = np.load(output_path)
             print("latent vector", latent_vectors.shape)
             return latent_vectors, 0
     return None, -1
@@ -503,8 +514,8 @@ def apply_clustering(
         children_flows = get_children_flow_run_ids(experiment_id)
         if len(children_flows) > 0:
             clusters = obj.fit_predict(latent_vectors)
-            output_path = OUTPUT_DIR / children_flows[0]
-            np.save(output_path / "clusters.npy", clusters)
+            output_path = f"{OUTPUT_DIR}/{children_flows[0]}"
+            np.save(f"{output_path}/clusters.npy", clusters)
             unique_clusters = np.unique(clusters)
             options = [
                 {"label": f"Cluster {cluster}", "value": cluster}
@@ -749,7 +760,9 @@ def update_heatmap(
         layout=dict(
             autosize=True,
             margin=go.layout.Margin(l=20, r=20, b=20, t=20, pad=0),
-            yaxis=dict(scaleanchor="x", scaleratio=aspect_y / aspect_x),
+            yaxis=dict(
+                scaleanchor="x", scaleratio=aspect_y / aspect_x, autorange="reversed"
+            ),
         ),
     )
 
@@ -780,7 +793,7 @@ def update_statistics(selected_data, clusters, assigned_labels, label_names):
     if (
         selected_data is not None
         and len(selected_data["points"]) > 0
-        and assigned_labels != [-1]
+        and (assigned_labels != [-1]).all()
     ):
         selected_indices = [
             point["customdata"][0] for point in selected_data["points"]

From bcaa30811948f3dbe83b066cbf32d5e796dd9134 Mon Sep 17 00:00:00 2001
From: taxe10 <tanchavez@lbl.gov>
Date: Wed, 27 Mar 2024 22:13:42 -0700
Subject: [PATCH 46/62] added optional output directory

---
 .env.example       | 3 +++
 docker-compose.yml | 1 +
 src/frontend.py    | 3 ++-
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/.env.example b/.env.example
index 712e091..29d423f 100644
--- a/.env.example
+++ b/.env.example
@@ -11,3 +11,6 @@ CONTENT_API_URL="http://localhost:8000/api/v0/models"
 TILED_API_KEY=<api key>
 
 DATA_DIR=/path/to/data
+OUTPUT_DIR=/path/to/output  # optional - if not provided, will default to data
+                            # If running on a container and this path is not mounted, the container
+                            # will not have access to this output directory
diff --git a/docker-compose.yml b/docker-compose.yml
index b2b550e..dce28fb 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -11,6 +11,7 @@ services:
     mem_limit: 2g
     environment:
       DATA_DIR: "${DATA_DIR}"
+      OUTPUT_DIR: "${OUTPUT_DIR}"
       PREFECT_TAGS: "${PREFECT_TAGS}"
       PREFECT_API_URL: '${PREFECT_API_URL}'
       CONTENT_API_URL: '${CONTENT_API_URL}'
diff --git a/src/frontend.py b/src/frontend.py
index 6a426ea..5b4cb77 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -38,7 +38,8 @@
 
 DATA_DIR = os.getenv("DATA_DIR", "data")
 MODEL_DIR = "data/models"
-OUTPUT_DIR = f"data/mlexchange_store/{USER}"
+OUTPUT_DIR = os.getenv("OUTPUT_DIR", "data")
+OUTPUT_DIR = f"{OUTPUT_DIR}/mlexchange_store/{USER}"
 UPLOAD_FOLDER_ROOT = f"{DATA_DIR}/upload"
 
 PREFECT_TAGS = json.loads(os.getenv("PREFECT_TAGS", '["latent-space-explorer"]'))

From 307fc58bd03c81eeae84c07763c0d47b8fdf5214 Mon Sep 17 00:00:00 2001
From: taxe10 <tanchavez@lbl.gov>
Date: Fri, 29 Mar 2024 10:23:25 -0700
Subject: [PATCH 47/62] removing output directory and replacing it with read
 and write directories

---
 .env.example       |  6 ++----
 docker-compose.yml |  8 ++++----
 src/app_layout.py  |  5 ++---
 src/frontend.py    | 34 ++++++++++++++++++++--------------
 4 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/.env.example b/.env.example
index 29d423f..8f4a37c 100644
--- a/.env.example
+++ b/.env.example
@@ -10,7 +10,5 @@ CONTENT_API_URL="http://localhost:8000/api/v0/models"
 
 TILED_API_KEY=<api key>
 
-DATA_DIR=/path/to/data
-OUTPUT_DIR=/path/to/output  # optional - if not provided, will default to data
-                            # If running on a container and this path is not mounted, the container
-                            # will not have access to this output directory
+READ_DIR=/path/to/read/data
+WRITE_DIR=/path/to/write/mlex_store
diff --git a/docker-compose.yml b/docker-compose.yml
index dce28fb..8f729c3 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -10,16 +10,16 @@ services:
       dockerfile: "docker/Dockerfile"
     mem_limit: 2g
     environment:
-      DATA_DIR: "${DATA_DIR}"
-      OUTPUT_DIR: "${OUTPUT_DIR}"
+      READ_DIR: "${READ_DIR}"
+      WRITE_DIR: "${WRITE_DIR}"
       PREFECT_TAGS: "${PREFECT_TAGS}"
       PREFECT_API_URL: '${PREFECT_API_URL}'
       CONTENT_API_URL: '${CONTENT_API_URL}'
       FLOW_NAME: '${FLOW_NAME}'
       TIMEZONE: "${TIMEZONE}"
-      USER: "${USER}"
     volumes:
-      - $DATA_DIR:/app/work/data
+      - $READ_DIR:/app/work/data
+      - $WRITE_DIR:/app/work/mlex_store
       - ./src:/app/work/src
     ports:
       - "127.0.0.1:8070:8070"
diff --git a/src/app_layout.py b/src/app_layout.py
index fd6f845..493fdff 100644
--- a/src/app_layout.py
+++ b/src/app_layout.py
@@ -1,5 +1,4 @@
 import os
-import pathlib
 
 import dash_bootstrap_components as dbc
 import dash_uploader as du
@@ -33,7 +32,7 @@
         "value": "data/example_latentrepresentation/f_vectors.parquet",
     },
 ]
-DATA_DIR = pathlib.Path(os.getenv("DATA_DIR"))
+READ_DIR = os.getenv("READ_DIR")
 UPLOAD_FOLDER_ROOT = "data/upload"
 TILED_API_KEY = os.getenv("TILED_API_KEY", None)
 
@@ -51,7 +50,7 @@
 server = app.server
 
 dash_file_explorer = FileManager(
-    DATA_DIR, UPLOAD_FOLDER_ROOT, open_explorer=False, api_key=TILED_API_KEY
+    READ_DIR, UPLOAD_FOLDER_ROOT, open_explorer=False, api_key=TILED_API_KEY
 )
 dash_file_explorer.init_callbacks(app)
 du.configure_upload(app, UPLOAD_FOLDER_ROOT, use_upload_id=False)
diff --git a/src/frontend.py b/src/frontend.py
index 5b4cb77..2aff44e 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -34,13 +34,11 @@
 load_dotenv(".env")
 
 # GLOBAL PARAMS
-USER = os.getenv("USER", "")  # 'mlexchange-team' move to env file
+READ_DIR = os.getenv("READ_DIR", "data")
+WRITE_DIR = os.getenv("WRITE_DIR", "mlex_store")
 
-DATA_DIR = os.getenv("DATA_DIR", "data")
 MODEL_DIR = "data/models"
-OUTPUT_DIR = os.getenv("OUTPUT_DIR", "data")
-OUTPUT_DIR = f"{OUTPUT_DIR}/mlexchange_store/{USER}"
-UPLOAD_FOLDER_ROOT = f"{DATA_DIR}/upload"
+UPLOAD_FOLDER_ROOT = "data/upload"
 
 PREFECT_TAGS = json.loads(os.getenv("PREFECT_TAGS", '["latent-space-explorer"]'))
 TIMEZONE = os.getenv("TIMEZONE", "US/Pacific")
@@ -63,7 +61,10 @@
                 "params": {
                     "io_parameters": {"uid_save": "uid0001", "uid_retrieve": None}
                 },
-                "volumes": [f"{DATA_DIR}:/app/work/data"],
+                "volumes": [
+                    f"{READ_DIR}:/app/work/data",
+                    f"{WRITE_DIR}:/app/work/mlex_store",
+                ],
             }
         ],
     }
@@ -78,7 +79,10 @@
                 "params": {
                     "io_parameters": {"uid_save": "uid0001", "uid_retrieve": None}
                 },
-                "volumes": [f"{DATA_DIR}:/app/work/data"],
+                "volumes": [
+                    f"{READ_DIR}:/app/work/data",
+                    f"{WRITE_DIR}:/app/work/mlex_store",
+                ],
             },
         ],
     }
@@ -219,7 +223,6 @@ def update_data_n_label_schema(selected_example_dataset, data_project_dict):
 
     data_project = DataProject.from_dict(data_project_dict)
     options = []
-    # user_upload_data_dir = None
     if len(data_project.datasets) > 0:
         labels = np.full((len(data_project.datasets),), -1)
     # Example dataset option 1
@@ -353,7 +356,10 @@ def submit_dimension_reduction_job(
                     "target_height": 64,
                     "batch_size": 32,
                 },
-                "volumes": [f"{DATA_DIR}:/app/work/data"],
+                "volumes": [
+                    f"{READ_DIR}:/app/work/data",
+                    f"{WRITE_DIR}:/app/work/mlex_store",
+                ],
             }
         else:
             autoencoder_params = {
@@ -394,9 +400,9 @@ def submit_dimension_reduction_job(
             ] = "mlex_dimension_reduction_umap/umap_run.py"
 
     job_params["params_list"][-1]["params"]["io_parameters"] = io_parameters
-    job_params["params_list"][-1]["params"]["io_parameters"]["output_dir"] = str(
-        OUTPUT_DIR
-    )
+    job_params["params_list"][-1]["params"]["io_parameters"][
+        "output_dir"
+    ] = "mlex_store"
     job_params["params_list"][-1]["params"]["io_parameters"]["uid_save"] = ""
     job_params["params_list"][-1]["params"]["io_parameters"]["uid_retrieve"] = None
     job_params["params_list"][-1]["params"]["model_parameters"] = input_params
@@ -452,7 +458,7 @@ def read_latent_vectors(n_intervals, experiment_id, max_intervals):
     children_flows = get_children_flow_run_ids(experiment_id)
     if len(children_flows) > 0:
         # read the latent vectors from the output dir
-        output_path = f"{OUTPUT_DIR}/{children_flows[-1]}/latent_vectors.npy"
+        output_path = f"mlex_store/{children_flows[-1]}/latent_vectors.npy"
         print(output_path, flush=True)
         if os.path.exists(output_path):
             latent_vectors = np.load(output_path)
@@ -515,7 +521,7 @@ def apply_clustering(
         children_flows = get_children_flow_run_ids(experiment_id)
         if len(children_flows) > 0:
             clusters = obj.fit_predict(latent_vectors)
-            output_path = f"{OUTPUT_DIR}/{children_flows[0]}"
+            output_path = f"mlex_store/{children_flows[0]}"
             np.save(f"{output_path}/clusters.npy", clusters)
             unique_clusters = np.unique(clusters)
             options = [

From 409dc2a6f36c073f393bc983af97b8748fc7e756 Mon Sep 17 00:00:00 2001
From: taxe10 <tanchavez@lbl.gov>
Date: Fri, 29 Mar 2024 10:25:14 -0700
Subject: [PATCH 48/62] adding description

---
 .env.example | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.env.example b/.env.example
index 8f4a37c..d8d342e 100644
--- a/.env.example
+++ b/.env.example
@@ -6,6 +6,7 @@ FLOW_NAME="Parent flow/launch_parent_flow"
 TIMEZONE="US/Pacific"
 PREFECT_TAGS='["latent-space-explorer"]'
 
+# MLEx Content Registry API
 CONTENT_API_URL="http://localhost:8000/api/v0/models"
 
 TILED_API_KEY=<api key>

From dc0f8b71930416053519677f5e747a26321551a3 Mon Sep 17 00:00:00 2001
From: taxe10 <tanchavez@lbl.gov>
Date: Fri, 29 Mar 2024 10:29:13 -0700
Subject: [PATCH 49/62] rearranging example

---
 .env.example | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.env.example b/.env.example
index d8d342e..6c5e611 100644
--- a/.env.example
+++ b/.env.example
@@ -12,4 +12,4 @@ CONTENT_API_URL="http://localhost:8000/api/v0/models"
 TILED_API_KEY=<api key>
 
 READ_DIR=/path/to/read/data
-WRITE_DIR=/path/to/write/mlex_store
+WRITE_DIR=/path/to/write/results

From 4b899d8097f9758df4110469ab6f2df61231a08c Mon Sep 17 00:00:00 2001
From: taxe10 <tanchavez@lbl.gov>
Date: Fri, 29 Mar 2024 11:12:10 -0700
Subject: [PATCH 50/62] added dotenv

---
 docker/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker/requirements.txt b/docker/requirements.txt
index 1fda0c8..c4f1660 100644
--- a/docker/requirements.txt
+++ b/docker/requirements.txt
@@ -3,6 +3,7 @@ dash-core-components==2.0.0
 dash-bootstrap-components==1.0.2
 dash-html-components==2.0.0
 dash-iconify==0.1.2
+dotenv
 plotly==5.14.1
 scikit-learn==1.3.0
 dash-uploader==0.6.0

From d1d4b72c61e9a2079d0f9dcd76c44fd998f93e76 Mon Sep 17 00:00:00 2001
From: taxe10 <tanchavez@lbl.gov>
Date: Fri, 29 Mar 2024 11:12:57 -0700
Subject: [PATCH 51/62] updating defaults to docker

---
 .env.example | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.env.example b/.env.example
index 6c5e611..13f6ffd 100644
--- a/.env.example
+++ b/.env.example
@@ -1,13 +1,13 @@
 USER=admin
 DEFAULT_ALGORITHM_DESCRIPTION=/path/to/PCA_v1.0.0.json
 
-PREFECT_API_URL=http://localhost:4200/api
+PREFECT_API_URL=http://prefect:4200/api
 FLOW_NAME="Parent flow/launch_parent_flow"
 TIMEZONE="US/Pacific"
 PREFECT_TAGS='["latent-space-explorer"]'
 
 # MLEx Content Registry API
-CONTENT_API_URL="http://localhost:8000/api/v0/models"
+CONTENT_API_URL="http://content-registry:8000/api/v0/models"
 
 TILED_API_KEY=<api key>
 

From 3ad68f14ee89f363eb1683172a374721ad7097bf Mon Sep 17 00:00:00 2001
From: taxe10 <tanchavez@lbl.gov>
Date: Fri, 29 Mar 2024 11:14:29 -0700
Subject: [PATCH 52/62] removed duplicate

---
 docker/requirements.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docker/requirements.txt b/docker/requirements.txt
index c4f1660..1fda0c8 100644
--- a/docker/requirements.txt
+++ b/docker/requirements.txt
@@ -3,7 +3,6 @@ dash-core-components==2.0.0
 dash-bootstrap-components==1.0.2
 dash-html-components==2.0.0
 dash-iconify==0.1.2
-dotenv
 plotly==5.14.1
 scikit-learn==1.3.0
 dash-uploader==0.6.0

From d4e4888e0d123304c81adb50feba0c2f30603904 Mon Sep 17 00:00:00 2001
From: taxe10 <tanchavez@lbl.gov>
Date: Fri, 29 Mar 2024 12:33:13 -0700
Subject: [PATCH 53/62] removed user and fixed typo

---
 .env.example | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.env.example b/.env.example
index 13f6ffd..7ee3a5b 100644
--- a/.env.example
+++ b/.env.example
@@ -1,4 +1,3 @@
-USER=admin
 DEFAULT_ALGORITHM_DESCRIPTION=/path/to/PCA_v1.0.0.json
 
 PREFECT_API_URL=http://prefect:4200/api
@@ -7,7 +6,7 @@ TIMEZONE="US/Pacific"
 PREFECT_TAGS='["latent-space-explorer"]'
 
 # MLEx Content Registry API
-CONTENT_API_URL="http://content-registry:8000/api/v0/models"
+CONTENT_API_URL="http://content-api:8000/api/v0/models"
 
 TILED_API_KEY=<api key>
 

From 1dfac55bab15a3f661908f02b5ef99f0b192eeb4 Mon Sep 17 00:00:00 2001
From: taxe10 <tanchavez@lbl.gov>
Date: Fri, 29 Mar 2024 12:33:40 -0700
Subject: [PATCH 54/62] using relative path for docker

---
 src/app_layout.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/app_layout.py b/src/app_layout.py
index 493fdff..917705c 100644
--- a/src/app_layout.py
+++ b/src/app_layout.py
@@ -32,7 +32,7 @@
         "value": "data/example_latentrepresentation/f_vectors.parquet",
     },
 ]
-READ_DIR = os.getenv("READ_DIR")
+READ_DIR = "data"
 UPLOAD_FOLDER_ROOT = "data/upload"
 TILED_API_KEY = os.getenv("TILED_API_KEY", None)
 

From 945e07431534348a81e60243e2eb087e878f2d1b Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Fri, 29 Mar 2024 20:34:30 -0700
Subject: [PATCH 55/62] Mount file manager

---
 docker-compose.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index 8f729c3..c9568aa 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -20,7 +20,8 @@ services:
     volumes:
       - $READ_DIR:/app/work/data
       - $WRITE_DIR:/app/work/mlex_store
-      - ./src:/app/work/src
+      # - ./src:/app/work/src
+      - ../mlex_file_manager/file_manager:/app/work/src/file_manager
     ports:
       - "127.0.0.1:8070:8070"
     networks:

From 6bf4d5d71bab8bf79ae404e8108979512a56fdd4 Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Fri, 29 Mar 2024 21:24:53 -0700
Subject: [PATCH 56/62] Reorganize the control panels and select job panel

---
 src/app_layout.py | 225 +++++++++++++++++++++++++---------------------
 1 file changed, 124 insertions(+), 101 deletions(-)

diff --git a/src/app_layout.py b/src/app_layout.py
index 917705c..2ab620d 100644
--- a/src/app_layout.py
+++ b/src/app_layout.py
@@ -57,7 +57,104 @@
 
 # BEGIN DASH CODE
 header = templates.header()
-# right panel: uploader, scatter plot, individual image  plot
+# right panel: file manager, scatter plot, individual image  plot
+scatter_control_panel = html.Div(
+    [
+        dbc.Card(
+            style={"width": "100%"},
+            children=[
+                dbc.CardHeader("Scatter Plot Control Panel"),
+                dbc.CardBody(
+                    [
+                        dbc.Label("Scatter Colors", className="mr-3"),
+                        dcc.RadioItems(
+                            id="scatter-color",
+                            options=[
+                                {"label": "cluster", "value": "cluster"},
+                                {"label": "label", "value": "label"},
+                            ],
+                            value="cluster",
+                            style={"min-width": "250px"},
+                            className="mb-2",
+                        ),
+                        dbc.Label("Select cluster", className="mr-3"),
+                        dcc.Dropdown(
+                            id="cluster-dropdown",
+                            value=-1,
+                            style={"min-width": "250px"},
+                            className="mb-2",
+                        ),
+                        dbc.Label("Select label", className="mr-3"),
+                        dcc.Dropdown(
+                            id="label-dropdown",
+                            value=-2,
+                            style={"min-width": "250px"},
+                        ),
+                    ]
+                ),
+            ],
+        ),
+        dcc.Interval(
+            id="interval-component",
+            interval=3000,  # in milliseconds
+            max_intervals=-1,  # keep triggering indefinitely, None
+            n_intervals=0,
+        ),
+    ]
+)
+
+heatmap_control_panel = html.Div(
+    [
+        dbc.Card(
+            style={"width": "100%"},
+            children=[
+                dbc.CardHeader("Heatmap Control Panel"),
+                dbc.CardBody(
+                    [
+                        dbc.Label(
+                            [
+                                "Select a Group of Points using ",
+                                html.Span(
+                                    html.I(DashIconify(icon="lucide:lasso")),
+                                    className="icon",
+                                ),
+                                " or ",
+                                html.Span(
+                                    html.I(DashIconify(icon="lucide:box-select")),
+                                    className="icon",
+                                ),
+                                " Tools :",
+                            ],
+                            className="mb-3",
+                        ),
+                        dbc.Label(
+                            id="stats-div",
+                            children=[
+                                "Number of images selected: 0",
+                                html.Br(),
+                                "Clusters represented: N/A",
+                                html.Br(),
+                                "Labels represented: N/A",
+                            ],
+                        ),
+                        dbc.Label("Display Image Options", className="mr-3"),
+                        dcc.RadioItems(
+                            id="mean-std-toggle",
+                            options=[
+                                {"label": "Mean", "value": "mean"},
+                                {"label": "Standard Deviation", "value": "sigma"},
+                            ],
+                            value="mean",
+                            style={"min-width": "250px"},
+                            className="mb-2",
+                        ),
+                    ]
+                ),
+            ],
+        )
+    ]
+)
+
 image_panel = [
     dbc.Card(
         id="image-card",
@@ -127,6 +224,23 @@
                     ),
                 ]
             ),
+            dbc.CardFooter(
+                [
+                    dbc.Row(
+                        [
+                            dbc.Col(
+                                scatter_control_panel,
+                                width=6,
+                            ),
+                            dbc.Col(
+                                heatmap_control_panel,
+                                width=6,
+                            ),
+                        ]
+                    )
+                ]
+                
+            )
         ],
     )
 ]
@@ -166,11 +280,8 @@
                                                 "margin-bottom": "1rem",
                                             },
                                         ),
-                                        dbc.Label("Select a job..."),
-                                        dcc.Dropdown(id="job-selector"),
                                     ]
                                 ),
-                                html.Hr(),
                                 html.Div(
                                     [
                                         dbc.Button(
@@ -189,6 +300,13 @@
                                         "justify-content": "center",
                                     },
                                 ),
+                                html.Hr(),
+                                html.Div(
+                                    [
+                                        dbc.Label("Select a job..."),
+                                        dcc.Dropdown(id="job-selector"),
+                                    ]
+                                ),
                                 html.Div(id="invisible-apply-div"),
                             ]
                         ),
@@ -256,102 +374,7 @@
     ]
 )
 
-scatter_control_panel = html.Div(
-    [
-        dbc.Card(
-            style={"width": "100%"},
-            children=[
-                dbc.CardHeader("Scatter Plot Control Panel"),
-                dbc.CardBody(
-                    [
-                        dbc.Label("Scatter Colors", className="mr-3"),
-                        dcc.RadioItems(
-                            id="scatter-color",
-                            options=[
-                                {"label": "cluster", "value": "cluster"},
-                                {"label": "label", "value": "label"},
-                            ],
-                            value="cluster",
-                            style={"min-width": "250px"},
-                            className="mb-2",
-                        ),
-                        dbc.Label("Select cluster", className="mr-3"),
-                        dcc.Dropdown(
-                            id="cluster-dropdown",
-                            value=-1,
-                            style={"min-width": "250px"},
-                            className="mb-2",
-                        ),
-                        dbc.Label("Select label", className="mr-3"),
-                        dcc.Dropdown(
-                            id="label-dropdown",
-                            value=-2,
-                            style={"min-width": "250px"},
-                        ),
-                    ]
-                ),
-            ],
-        ),
-        dcc.Interval(
-            id="interval-component",
-            interval=3000,  # in milliseconds
-            max_intervals=-1,  # keep triggering indefinitely, None
-            n_intervals=0,
-        ),
-    ]
-)
 
-heatmap_control_panel = html.Div(
-    [
-        dbc.Card(
-            style={"width": "100%"},
-            children=[
-                dbc.CardHeader("Heatmap Control Panel"),
-                dbc.CardBody(
-                    [
-                        dbc.Label(
-                            [
-                                "Select a Group of Points using ",
-                                html.Span(
-                                    html.I(DashIconify(icon="lucide:lasso")),
-                                    className="icon",
-                                ),
-                                " or ",
-                                html.Span(
-                                    html.I(DashIconify(icon="lucide:box-select")),
-                                    className="icon",
-                                ),
-                                " Tools :",
-                            ],
-                            className="mb-3",
-                        ),
-                        dbc.Label(
-                            id="stats-div",
-                            children=[
-                                "Number of images selected: 0",
-                                html.Br(),
-                                "Clusters represented: N/A",
-                                html.Br(),
-                                "Labels represented: N/A",
-                            ],
-                        ),
-                        dbc.Label("Display Image Options", className="mr-3"),
-                        dcc.RadioItems(
-                            id="mean-std-toggle",
-                            options=[
-                                {"label": "Mean", "value": "mean"},
-                                {"label": "Standard Deviation", "value": "sigma"},
-                            ],
-                            value="mean",
-                            style={"min-width": "250px"},
-                            className="mb-2",
-                        ),
-                    ]
-                ),
-            ],
-        )
-    ]
-)
 
 # add alert pop up window
 modal = html.Div(
@@ -371,8 +394,8 @@
 control_panel = [
     algo_panel,
     cluster_algo_panel,
-    scatter_control_panel,
-    heatmap_control_panel,
+    # scatter_control_panel,
+    # heatmap_control_panel,
     modal,
 ]
 

From 782d22ab0ce7a2eb6846135d63bc2d681c4c43dc Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Mon, 1 Apr 2024 15:39:51 -0700
Subject: [PATCH 57/62] Ignore /result

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 57516b2..96318fb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,7 @@ __pycache__/
 test.py
 
 # output dir
+results/
 data/output/
 data/upload/
 data/.file_manager_vars.pkl

From e3efbbb1daeb1071d21a9b4433bfd7d6d1dc578a Mon Sep 17 00:00:00 2001
From: Runbo Jiang <rjiang2@lbl.gov>
Date: Mon, 1 Apr 2024 15:40:10 -0700
Subject: [PATCH 58/62] Use accordion and update style

---
 src/app_layout.py                 | 236 ++++++++++++++----------------
 src/assets/segmentation-style.css |   5 +
 src/frontend.py                   |   2 +-
 3 files changed, 114 insertions(+), 129 deletions(-)

diff --git a/src/app_layout.py b/src/app_layout.py
index 2ab620d..035e2fb 100644
--- a/src/app_layout.py
+++ b/src/app_layout.py
@@ -246,136 +246,109 @@
 ]
 
 # left panel: choose algorithms, submit job, choose scatter plot attributes, and statistics...
-algo_panel = html.Div(
+algo_panel = dbc.AccordionItem(
     [
-        dbc.Card(
-            id="algo-card",
-            style={"width": "100%"},
-            children=[
-                dbc.Collapse(
-                    children=[
-                        dbc.CardHeader("Select Dimension Reduction Algorithms"),
-                        dbc.CardBody(
-                            [
-                                dbc.Label("Algorithm", className="mr-2"),
-                                dcc.Dropdown(
-                                    id="algo-dropdown",
-                                    options=[
-                                        {"label": entry, "value": entry}
-                                        for entry in ALGORITHM_DATABASE
-                                    ],
-                                    style={"min-width": "250px"},
-                                    value="PCA",
-                                ),
-                                html.Div(id="additional-model-params"),
-                                html.Hr(),
-                                html.Div(
-                                    [
-                                        dbc.Label("Name your job", className="mr-2"),
-                                        dcc.Input(
-                                            id="job-name",
-                                            placeholder="test0",
-                                            style={
-                                                "width": "100%",
-                                                "margin-bottom": "1rem",
-                                            },
-                                        ),
-                                    ]
-                                ),
-                                html.Div(
-                                    [
-                                        dbc.Button(
-                                            "Submit",
-                                            color="secondary",
-                                            id="run-algo",
-                                            outline=True,
-                                            size="lg",
-                                            className="m-1",
-                                            style={"width": "50%"},
-                                        ),
-                                    ],
-                                    className="row",
-                                    style={
-                                        "align-items": "center",
-                                        "justify-content": "center",
-                                    },
-                                ),
-                                html.Hr(),
-                                html.Div(
-                                    [
-                                        dbc.Label("Select a job..."),
-                                        dcc.Dropdown(id="job-selector"),
-                                    ]
-                                ),
-                                html.Div(id="invisible-apply-div"),
-                            ]
+        dbc.CardBody(
+            [
+                dbc.Label("Algorithm", className="mr-2"),
+                dcc.Dropdown(
+                    id="algo-dropdown",
+                    options=[
+                        {"label": entry, "value": entry}
+                        for entry in ALGORITHM_DATABASE
+                    ],
+                    style={"min-width": "250px"},
+                    value="PCA",
+                ),
+                html.Div(id="additional-model-params"),
+                html.Hr(),
+                html.Div(
+                    [
+                        dbc.Label("Name your job", className="mr-2"),
+                        dcc.Input(
+                            id="job-name",
+                            placeholder="test0",
+                            style={
+                                "width": "100%",
+                                "margin-bottom": "1rem",
+                            },
+                        ),
+                    ]
+                ),
+                html.Div(
+                    [
+                        dbc.Button(
+                            "Submit",
+                            color="secondary",
+                            id="run-algo",
+                            outline=True,
+                            size="lg",
+                            className="m-1",
+                            style={"width": "50%"},
                         ),
                     ],
-                    id="model-collapse",
-                    is_open=True,
-                    style={"margin-bottom": "0rem"},
-                )
-            ],
-        )
-    ]
+                    className="row",
+                    style={
+                        "align-items": "center",
+                        "justify-content": "center",
+                    },
+                ),
+                html.Hr(),
+                html.Div(
+                    [
+                        dbc.Label("Select a job..."),
+                        dcc.Dropdown(id="job-selector"),
+                    ]
+                ),
+                html.Div(id="invisible-apply-div"),
+            ]
+        ),
+    ],
+    title="Select Dimension Reduction Algorithms",
 )
 
-cluster_algo_panel = html.Div(
+cluster_algo_panel = dbc.AccordionItem(
     [
-        dbc.Card(
-            id="cluster-algo-card",
-            style={"width": "100%"},
-            children=[
-                dbc.Collapse(
-                    children=[
-                        dbc.CardHeader("Select Clustering Algorithms"),
-                        dbc.CardBody(
-                            [
-                                dbc.Label("Algorithm", className="mr-2"),
-                                dcc.Dropdown(
-                                    id="cluster-algo-dropdown",
-                                    options=[
-                                        {"label": entry, "value": entry}
-                                        for entry in CLUSTER_ALGORITHM_DATABASE
-                                    ],
-                                    style={"min-width": "250px"},
-                                    value="DBSCAN",
-                                ),
-                                html.Div(id="additional-cluster-params"),
-                                html.Hr(),
-                                html.Div(
-                                    [
-                                        dbc.Button(
-                                            "Apply",
-                                            color="secondary",
-                                            id="run-cluster-algo",
-                                            outline=True,
-                                            size="lg",
-                                            className="m-1",
-                                            style={"width": "50%"},
-                                        ),
-                                    ],
-                                    className="row",
-                                    style={
-                                        "align-items": "center",
-                                        "justify-content": "center",
-                                    },
-                                ),
-                                html.Div(id="invisible-submit-div"),
-                            ]
+        dbc.CardBody(
+            [
+                dbc.Label("Algorithm", className="mr-2"),
+                dcc.Dropdown(
+                    id="cluster-algo-dropdown",
+                    options=[
+                        {"label": entry, "value": entry}
+                        for entry in CLUSTER_ALGORITHM_DATABASE
+                    ],
+                    style={"min-width": "250px"},
+                    value="DBSCAN",
+                ),
+                html.Div(id="additional-cluster-params"),
+                html.Hr(),
+                html.Div(
+                    [
+                        dbc.Button(
+                            "Apply",
+                            color="secondary",
+                            id="run-cluster-algo",
+                            outline=True,
+                            size="lg",
+                            className="m-1",
+                            style={"width": "50%"},
                         ),
                     ],
-                    id="cluster-model-collapse",
-                    is_open=True,
-                    style={"margin-bottom": "0rem"},
-                )
-            ],
-        )
-    ]
+                    className="row",
+                    style={
+                        "align-items": "center",
+                        "justify-content": "center",
+                    },
+                ),
+                html.Div(id="invisible-submit-div"),
+            ]
+        ),
+    ],
+    title="Select Clustering Algorithms",
 )
 
 
-
 # add alert pop up window
 modal = html.Div(
     [
@@ -390,14 +363,17 @@
     ]
 )
 
-
-control_panel = [
-    algo_panel,
-    cluster_algo_panel,
-    # scatter_control_panel,
-    # heatmap_control_panel,
-    modal,
-]
+control_panel = dbc.Accordion(
+        [
+            algo_panel, 
+            cluster_algo_panel
+        ],
+        style={
+            'position': 'sticky',
+            'top': '10%',
+            'width': '100%'
+            }
+    )
 
 
 # metadata
@@ -429,8 +405,12 @@
         dbc.Container(
             children=[
                 dbc.Row(
-                    [dbc.Col(control_panel, width=4), dbc.Col(image_panel, width=8)]
+                    [
+                        dbc.Col(control_panel, width=4, style={'display': 'flex', 'margin-top': '1em'}), 
+                        dbc.Col(image_panel, width=8)
+                    ]
                 ),
+                dbc.Row(dbc.Col(modal)),
                 dbc.Row(dbc.Col(meta)),
             ],
             fluid=True,
diff --git a/src/assets/segmentation-style.css b/src/assets/segmentation-style.css
index 4cb18ba..8e76b71 100644
--- a/src/assets/segmentation-style.css
+++ b/src/assets/segmentation-style.css
@@ -32,3 +32,8 @@ label {
     margin: 0;
     border-style: solid;
 }
+
+.accordion-button {
+    font-size: large;
+    font-weight: bold;
+}
diff --git a/src/frontend.py b/src/frontend.py
index 2aff44e..7a1315c 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -43,7 +43,7 @@
 PREFECT_TAGS = json.loads(os.getenv("PREFECT_TAGS", '["latent-space-explorer"]'))
 TIMEZONE = os.getenv("TIMEZONE", "US/Pacific")
 FLOW_NAME = os.getenv("FLOW_NAME", "")
-FLOW_TYPE = "conda"
+FLOW_TYPE = "podman" #"conda"
 CONDA_ENV_NAME = "dimension_reduction_pca"
 
 CONTENT_API_URL = os.getenv("CONTENT_API_URL", "http://localhost:8000/api/v0/models")

From ab89eb0df92777d199a1a29d2566e713a2b58321 Mon Sep 17 00:00:00 2001
From: taxe10 <tanchavez@lbl.gov>
Date: Tue, 2 Apr 2024 14:45:59 -0700
Subject: [PATCH 59/62] adding current path to output directories and default
 examples directories

---
 src/app_layout.py | 11 ++++++-----
 src/frontend.py   | 14 +++++++-------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/src/app_layout.py b/src/app_layout.py
index 2ab620d..24b022b 100644
--- a/src/app_layout.py
+++ b/src/app_layout.py
@@ -26,10 +26,13 @@
 }
 
 DATA_OPTION = [
-    {"label": "Synthetic Shapes", "value": "data/example_shapes/Demoshapes.npz"},
+    {
+        "label": "Synthetic Shapes",
+        "value": f"{os.getcwd()}/data/example_shapes/Demoshapes.npz",
+    },
     {
         "label": "Latent representations from encoder-decoder model",
-        "value": "data/example_latentrepresentation/f_vectors.parquet",
+        "value": f"{os.getcwd()}/data/example_latentrepresentation/f_vectors.parquet",
     },
 ]
 READ_DIR = "data"
@@ -239,8 +242,7 @@
                         ]
                     )
                 ]
-                
-            )
+            ),
         ],
     )
 ]
@@ -375,7 +377,6 @@
 )
 
 
-
 # add alert pop up window
 modal = html.Div(
     [
diff --git a/src/frontend.py b/src/frontend.py
index 2aff44e..b74355f 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -402,7 +402,7 @@ def submit_dimension_reduction_job(
     job_params["params_list"][-1]["params"]["io_parameters"] = io_parameters
     job_params["params_list"][-1]["params"]["io_parameters"][
         "output_dir"
-    ] = "mlex_store"
+    ] = f"{os.getcwd()}/mlex_store"
     job_params["params_list"][-1]["params"]["io_parameters"]["uid_save"] = ""
     job_params["params_list"][-1]["params"]["io_parameters"]["uid_retrieve"] = None
     job_params["params_list"][-1]["params"]["model_parameters"] = input_params
@@ -701,18 +701,18 @@ def update_heatmap(
                 selected_indices, export="pillow"
             )
         # Example dataset
-        elif selected_example_dataset == "data/example_shapes/Demoshapes.npz":
+        elif "data/example_shapes/Demoshapes.npz" in selected_example_dataset:
             print("Demoshapes.npz")
-            selected_images = np.load("/app/work/" + selected_example_dataset)["arr_0"][
+            selected_images = np.load(selected_example_dataset)["arr_0"][
                 selected_indices
             ]
             print(selected_images.shape)
         elif (
-            selected_example_dataset
-            == "data/example_latentrepresentation/f_vectors.parquet"
+            "data/example_latentrepresentation/f_vectors.parquet"
+            in selected_example_dataset
         ):
-            print("f_vectors.parque")
-            df = pd.read_parquet("/app/work/" + selected_example_dataset)
+            print("f_vectors.parquet")
+            df = pd.read_parquet(selected_example_dataset)
             selected_images = df.iloc[selected_indices].values
         selected_images = np.array(selected_images)
 

From 2e40a2c3bef6766686049914bb980f1bcf0de621 Mon Sep 17 00:00:00 2001
From: taxe10 <tanchavez@lbl.gov>
Date: Tue, 2 Apr 2024 14:46:18 -0700
Subject: [PATCH 60/62] added tiled api key

---
 docker-compose.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker-compose.yml b/docker-compose.yml
index c9568aa..33854e9 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -15,6 +15,7 @@ services:
       PREFECT_TAGS: "${PREFECT_TAGS}"
       PREFECT_API_URL: '${PREFECT_API_URL}'
       CONTENT_API_URL: '${CONTENT_API_URL}'
+      TILED_API_KEY: '${TILED_API_KEY}'
       FLOW_NAME: '${FLOW_NAME}'
       TIMEZONE: "${TIMEZONE}"
     volumes:

From 6666baa80d40384d16cf3ea067afb58f20667291 Mon Sep 17 00:00:00 2001
From: taxe10 <tanchavez@lbl.gov>
Date: Tue, 2 Apr 2024 15:30:15 -0700
Subject: [PATCH 61/62] adding sample dimension reduction techniques

---
 src/assets/sample_models.json | 192 ++++++++++++++++++++++++++++++++++
 1 file changed, 192 insertions(+)
 create mode 100644 src/assets/sample_models.json

diff --git a/src/assets/sample_models.json b/src/assets/sample_models.json
new file mode 100644
index 0000000..9d601b9
--- /dev/null
+++ b/src/assets/sample_models.json
@@ -0,0 +1,192 @@
+[
+    {
+        "content_id": "uid1",
+        "content_type": "model",
+        "name": "PCA",
+        "public": true,
+        "version": "1.0.0",
+        "type": "unsupervised",
+        "owner": "mlexchange team",
+        "service_type": "frontend",
+        "docker_image_uri": "ghcr.io/runboj/mlex_dimension_reduction_pca:main",
+        "conda_env_name": "mlex_dimension_reduction_pca",
+        "reference": "PCA algorithm",
+        "application": [
+            "dimension reduction"
+        ],
+        "description": "PCA-based dimension reduction",
+        "gui_parameters": [
+            {
+                "type": "dropdown",
+                "name": "ncomp-dropdown-menu",
+                "title": "Number of Components",
+                "value": 2,
+                "options": [
+                    {
+                        "label": "2",
+                        "value": 2
+                    },
+                    {
+                        "label": "3",
+                        "value": 3
+                    }
+                ],
+                "param_key": "n_components",
+                "comp_group": "all"
+            }
+        ],
+        "cmd": [
+            "python pca_run.py"
+        ],
+        "kwargs": {},
+        "compute_resources": {
+            "num_processors": 1,
+            "num_gpus": 0
+        }
+    },
+    {
+        "content_id": "uid2",
+        "content_type": "model",
+        "name": "UMAP",
+        "public": true,
+        "version": "1.0.0",
+        "type": "unsupervised",
+        "owner": "mlexchange team",
+        "service_type": "frontend",
+        "docker_image_uri": "ghcr.io/runboj/mlex_dimension_reduction_umap:main",
+        "conda_env_name": "mlex_dimension_reduction_umap",
+        "reference": "UMAP algorithm",
+        "application": [
+            "dimension reduction"
+        ],
+        "description": "UMAP algotihtm for dimension reduction",
+        "gui_parameters": [
+            {
+                "type": "dropdown",
+                "name": "ncomp-dropdown-menu-2",
+                "title": "Number of Components",
+                "value": 2,
+                "options": [
+                    {
+                        "label": "2",
+                        "value": 2
+                    },
+                    {
+                        "label": "3",
+                        "value": 3
+                    }
+                ],
+                "param_key": "n_components",
+                "comp_group": "all"
+            },
+            {
+                "type": "dropdown",
+                "name": "mindist-dropdown-menu",
+                "title": "Min distance between points",
+                "value": 0.1,
+                "options": [
+                    {
+                        "label": 0.1,
+                        "value": 0.1
+                    },
+                    {
+                        "label": 0.2,
+                        "value": 0.2
+                    },
+                    {
+                        "label": 0.3,
+                        "value": 0.3
+                    },
+                    {
+                        "label": 0.4,
+                        "value": 0.4
+                    },
+                    {
+                        "label": 0.5,
+                        "value": 0.5
+                    },
+                    {
+                        "label": 0.6,
+                        "value": 0.6
+                    },
+                    {
+                        "label": 0.7,
+                        "value": 0.7
+                    },
+                    {
+                        "label": 0.8,
+                        "value": 0.8
+                    },
+                    {
+                        "label": 0.9,
+                        "value": 0.9
+                    },
+                    {
+                        "label": 1.0,
+                        "value": 1.0
+                    }
+                ],
+                "param_key": "min_dist",
+                "comp_group": "all"
+            },
+            {
+                "type": "dropdown",
+                "name": "nneighbor-dropdown-menu",
+                "title": "Number of Nearest Neighbors",
+                "value": 15,
+                "options": [
+                    {
+                        "label": 5,
+                        "value": 5
+                    },
+                    {
+                        "label": 10,
+                        "value": 10
+                    },
+                    {
+                        "label": 15,
+                        "value": 15
+                    },
+                    {
+                        "label": 20,
+                        "value": 20
+                    },
+                    {
+                        "label": 25,
+                        "value": 25
+                    },
+                    {
+                        "label": 30,
+                        "value": 30
+                    },
+                    {
+                        "label": 35,
+                        "value": 35
+                    },
+                    {
+                        "label": 40,
+                        "value": 40
+                    },
+                    {
+                        "label": 45,
+                        "value": 45
+                    },
+                    {
+                        "label": 50,
+                        "value": 50
+                    }
+                ],
+                "param_key": "n_neighbors",
+                "comp_group": "all"
+            }
+        ],
+        "cmd": [
+            "python umap_run.py"
+        ],
+        "kwargs": {},
+        "compute_resources": {
+            "num_processors": 1,
+            "num_gpus": 0
+        }
+    }
+]

From 0d5b82d7a3001e299ec6d6c4a4ea7dd56d1260df Mon Sep 17 00:00:00 2001
From: taxe10 <tanchavez@lbl.gov>
Date: Tue, 2 Apr 2024 15:33:37 -0700
Subject: [PATCH 62/62] add slurm jobs

---
 .env.example       |  5 +++
 docker-compose.yml |  3 ++
 src/frontend.py    | 78 +++++++++++++++++++++++-----------------------
 3 files changed, 47 insertions(+), 39 deletions(-)

diff --git a/.env.example b/.env.example
index 7ee3a5b..be2df74 100644
--- a/.env.example
+++ b/.env.example
@@ -12,3 +12,8 @@ TILED_API_KEY=<api key>
 
 READ_DIR=/path/to/read/data
 WRITE_DIR=/path/to/write/results
+
+# Slurm jobs
+PARTITIONS='["p1", "p2"]'
+RESERVATIONS='["r1", "r2"]'
+MAX_TIME="1:00:00"
diff --git a/docker-compose.yml b/docker-compose.yml
index 33854e9..4f3a7e6 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -18,6 +18,9 @@ services:
       TILED_API_KEY: '${TILED_API_KEY}'
       FLOW_NAME: '${FLOW_NAME}'
       TIMEZONE: "${TIMEZONE}"
+      PARTITIONS: "${PARTITIONS}"
+      RESERVATIONS: "${RESERVATIONS}"
+      MAX_TIME: "${MAX_TIME}"
     volumes:
       - $READ_DIR:/app/work/data
       - $WRITE_DIR:/app/work/mlex_store
diff --git a/src/frontend.py b/src/frontend.py
index b74355f..b448d97 100755
--- a/src/frontend.py
+++ b/src/frontend.py
@@ -44,11 +44,13 @@
 TIMEZONE = os.getenv("TIMEZONE", "US/Pacific")
 FLOW_NAME = os.getenv("FLOW_NAME", "")
 FLOW_TYPE = "conda"
-CONDA_ENV_NAME = "dimension_reduction_pca"
 
 CONTENT_API_URL = os.getenv("CONTENT_API_URL", "http://localhost:8000/api/v0/models")
 DEFAULT_ALGORITHM_DESCRIPTION = os.getenv("DEFAULT_ALGORITHM_DESCRIPTION")
 
+PARTITIONS = os.getenv("PARTITIONS", None)
+RESERVATIONS = os.getenv("RESERVATIONS", None)
+MAX_TIME = os.getenv("MAX_TIME", "1:00:00")
 
 if FLOW_TYPE == "podman":
     TRAIN_PARAMS_EXAMPLE = {
@@ -69,29 +71,12 @@
         ],
     }
 
-    INFERENCE_PARAMS_EXAMPLE = {
-        "flow_type": "podman",
-        "params_list": [
-            {
-                "image_name": "ghcr.io/runboj/mlex_dimension_reduction_pca",
-                "image_tag": "main",
-                "command": 'python -c \\"import time; time.sleep(30)\\"',
-                "params": {
-                    "io_parameters": {"uid_save": "uid0001", "uid_retrieve": None}
-                },
-                "volumes": [
-                    f"{READ_DIR}:/app/work/data",
-                    f"{WRITE_DIR}:/app/work/mlex_store",
-                ],
-            },
-        ],
-    }
-else:
+elif FLOW_TYPE == "conda":
     TRAIN_PARAMS_EXAMPLE = {
         "flow_type": "conda",
         "params_list": [
             {
-                "conda_env_name": f"{CONDA_ENV_NAME}",
+                "conda_env_name": "mlex_dimension_reduction_pca",
                 "params": {
                     "io_parameters": {"uid_save": "uid0001", "uid_retrieve": None}
                 },
@@ -99,15 +84,21 @@
         ],
     }
 
-    INFERENCE_PARAMS_EXAMPLE = {
-        "flow_type": "conda",
+else:
+    TRAIN_PARAMS_EXAMPLE = {
+        "flow_type": "slurm",
         "params_list": [
             {
-                "conda_env_name": f"{CONDA_ENV_NAME}",
+                "job_name": "latent_space_explorer",
+                "num_nodes": 1,
+                "partitions": PARTITIONS,
+                "reservations": RESERVATIONS,
+                "max_time": MAX_TIME,
+                "conda_env_name": "mlex_dimension_reduction_pca",
                 "params": {
                     "io_parameters": {"uid_save": "uid0001", "uid_retrieve": None}
                 },
-            },
+            }
         ],
     }
 
@@ -130,8 +121,8 @@ def show_dimension_reduction_gui_layouts(selected_algo):
         data = requests.get(CONTENT_API_URL).json()  # all model
     except Exception as e:
         print(f"Cannot access content api: {e}", flush=True)
-        with open(DEFAULT_ALGORITHM_DESCRIPTION, "r") as f:
-            data = [json.load(f)]
+        with open("src/assets/sample_models.json", "r") as f:
+            data = json.load(f)
 
     if selected_algo == "PCA":
         conditions = {"name": "PCA"}
@@ -345,33 +336,42 @@ def submit_dimension_reduction_job(
     if data_clinic_file_path is not None:
         auto_io_params = io_parameters.copy()
         auto_io_params["model_dir"] = data_clinic_file_path + "/last.ckpt"
+        auto_params = (
+            {
+                "io_parameters": auto_io_params,
+                "target_width": 64,
+                "target_height": 64,
+                "batch_size": 32,
+            },
+        )
+        # TODO: Use content registry to retrieve the model parameters
         if FLOW_TYPE == "podman":
             autoencoder_params = {
                 "image_name": "ghcr.io/mlexchange/mlex_pytorch_autoencoders:main",
                 "image_tag": "main",
                 "command": "python src/predict_model.py",
-                "params": {
-                    "io_parameters": auto_io_params,
-                    "target_width": 64,
-                    "target_height": 64,
-                    "batch_size": 32,
-                },
+                "params": auto_params,
                 "volumes": [
                     f"{READ_DIR}:/app/work/data",
                     f"{WRITE_DIR}:/app/work/mlex_store",
                 ],
             }
-        else:
+        elif FLOW_TYPE == "conda":
             autoencoder_params = {
                 "conda_env_name": "pytorch_autoencoders",
-                "params": {
-                    "io_parameters": auto_io_params,
-                    "target_width": 64,
-                    "target_height": 64,
-                    "batch_size": 32,
-                },
+                "params": auto_params,
                 "python_file_name": "mlex_pytorch_autoencoders/src/predict_model.py",
             }
+        else:
+            autoencoder_params = {
+                "job_name": "latent_space_explorer",
+                "num_nodes": 1,
+                "partitions": PARTITIONS,
+                "reservations": RESERVATIONS,
+                "max_time": MAX_TIME,
+                "conda_env_name": "pytorch_autoencoders",
+                "params": auto_params,
+            }
         job_params["params_list"].insert(0, autoencoder_params)
 
     # prefect