-
Notifications
You must be signed in to change notification settings - Fork 4
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Boilerplate for triggering ml job run #91
Changes from 6 commits
a34cb14
65b6a0e
bf44264
24f1a45
9f4b442
e01ad6b
f8575f1
39c8956
9e0438f
355f64c
0f68330
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,50 +1,147 @@ | ||
from dash import callback, Input, Output, State, no_update | ||
from utils.annotations import Annotations | ||
from utils.data_utils import data | ||
import numpy as np | ||
from dash.exceptions import PreventUpdate | ||
import os | ||
import uuid | ||
import requests | ||
import time | ||
import dash_mantine_components as dmc | ||
from utils import data_utils | ||
|
||
MODE = os.getenv("MODE", "") | ||
|
||
DEMO_WORKFLOW = { | ||
"user_uid": "high_res_user", | ||
"job_list": [ | ||
{ | ||
"mlex_app": "high-res-segmentation", | ||
"description": "test_1", | ||
"service_type": "backend", | ||
"working_directory": "/data/mlex_repo/mlex_tiled/data", | ||
"job_kwargs": { | ||
"uri": "mlexchange1/random-forest-dc:1.1", | ||
"type": "docker", | ||
"cmd": 'python random_forest.py data/seg-results/spiral/image-train data/seg-results-test/spiral/feature data/seg-results/spiral/mask data/seg-results-test/spiral/model \'{"n_estimators": 30, "oob_score": true, "max_depth": 8}\'', | ||
"kwargs": { | ||
"job_type": "train", | ||
"experiment_id": "123", | ||
"dataset": "name_of_dataset", | ||
"params": '{"n_estimators": 30, "oob_score": true, "max_depth": 8}', | ||
}, | ||
}, | ||
}, | ||
{ | ||
"mlex_app": "high-res-segmentation", | ||
"description": "test_1", | ||
"service_type": "backend", | ||
"working_directory": "/data/mlex_repo/mlex_tiled/data", | ||
"job_kwargs": { | ||
"uri": "mlexchange1/random-forest-dc:1.1", | ||
"type": "docker", | ||
"cmd": "python segment.py data/data/20221222_085501_looking_from_above_spiralUP_CounterClockwise_endPointAtDoor_0-1000 data/seg-results-test/spiral/model/random-forest.model data/seg-results-test/spiral/output '{\"show_progress\": 1}'", | ||
"kwargs": { | ||
"job_type": "train", | ||
"experiment_id": "124", | ||
"dataset": "name_of_dataset", | ||
"params": '{"show_progress": 1}', | ||
}, | ||
}, | ||
}, | ||
], | ||
"host_list": ["vaughan.als.lbl.gov"], | ||
"dependencies": {"0": [], "1": [0]}, | ||
"requirements": {"num_processors": 2, "num_gpus": 0, "num_nodes": 1}, | ||
} | ||
|
||
|
||
# NEXT STEPS: | ||
# - this function returns a job ID, which would be associated with the workflow run on vaughan | ||
# - then we need another callback to pick up this ID and start polling for successful output | ||
@callback( | ||
Output("output-placeholder", "children"), | ||
Output("output-details", "children"), | ||
Output("submitted-job-id", "data"), | ||
Input("run-model", "n_clicks"), | ||
State("annotation-store", "data"), | ||
State("project-name-src", "value"), | ||
) | ||
def run_job(n_clicks, annotation_store, project_name): | ||
# As a placeholder, pulling together the inputs we'd need if we were going to submit a job | ||
""" | ||
This callback collects parameters from the UI and submits a job to the computing api. | ||
If the app is run from "dev" mode, then only a placeholder job_uid will be created. | ||
The job_uid is saved in a dcc.Store for reference by the check_job callback below. | ||
|
||
# TODO: Appropriately paramaterize the DEMO_WORKFLOW json depending on user inputs | ||
and relevant file paths | ||
""" | ||
if n_clicks: | ||
if MODE == "dev": | ||
job_uid = str(uuid.uuid4()) | ||
return ( | ||
dmc.Text( | ||
f"Workflow has been succesfully submitted with uid: {job_uid}", | ||
size="sm", | ||
), | ||
job_uid, | ||
) | ||
else: | ||
data_utils.save_annotations_data(annotation_store, project_name) | ||
job_submitted = requests.post( | ||
"http://job-service:8080/api/v0/workflows", json=DEMO_WORKFLOW | ||
) | ||
job_uid = job_submitted.json() | ||
if job_submitted.status_code == 200: | ||
return ( | ||
dmc.Text( | ||
f"Workflow has been succesfully submitted with uid: {job_uid}", | ||
size="sm", | ||
), | ||
job_uid, | ||
) | ||
else: | ||
return ( | ||
dmc.Text( | ||
f"Workflow presented error code: {job_submitted.status_code}", | ||
size="sm", | ||
), | ||
job_uid, | ||
) | ||
return no_update, no_update | ||
|
||
|
||
@callback( | ||
Output("output-details", "children", allow_duplicate=True), | ||
Output("submitted-job-id", "data", allow_duplicate=True), | ||
Input("submitted-job-id", "data"), | ||
Input("model-check", "n_intervals"), | ||
prevent_initial_call=True, | ||
) | ||
def check_job(job_id, n_intervals): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is redundant because the models will most likely take a long time to finish and the user won't wait for it. 2 weeks when we talked about showing ML output, we agreed that it would go under There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We discussed having a simple toggle at this week's meeting, since this feature for now is just implemented as an MVP. Will refine that piece further in #88. Good point about the length of time. But I'd disagree that this function is entirely redundant, but we may want to refine the mechanism by which we're checking for results, depending on the length of time and how we want to manage user concurrency. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From Tanny: user ID - get request to the computing API - get all the jobs associated with the user ID and the segmentation - get all the jobs and the status of all these jobs - this happens on page load and happens at an interval. |
||
""" | ||
This callback checks to see if a job has completed successfully and will only | ||
update if there is a job_id present in the submitted-job-id dcc.Store. Will | ||
wait 3sec in "dev" mode to simulate. | ||
|
||
# TODO: Connect with the computing API when not in "dev" mode | ||
""" | ||
output_layout = [ | ||
dmc.Text( | ||
f"Workflow {job_id} completed successfully. Click button below to view segmentation results.", | ||
size="sm", | ||
), | ||
dmc.Space(h=20), | ||
dmc.Switch( | ||
size="sm", | ||
radius="lg", | ||
label="Show output results", | ||
id="show-results", | ||
checked=False, | ||
), | ||
] | ||
|
||
annotations = Annotations(annotation_store) | ||
annotations.create_annotation_metadata() | ||
annotations.create_annotation_mask( | ||
sparse=False | ||
) # TODO: Would sparse need to be true? | ||
|
||
# Get metadata and annotation data | ||
metadata = annotations.get_annotations() | ||
mask = annotations.get_annotation_mask() | ||
|
||
# Get raw images associated with each annotated slice | ||
# Actually we can just pass the indices and have the job point to Tiled directly | ||
img_idx = list(metadata.keys()) | ||
img = data[project_name] | ||
raw = [] | ||
for idx in img_idx: | ||
ar = img[int(idx)] | ||
raw.append(ar) | ||
try: | ||
raw = np.stack(raw) | ||
mask = np.stack(mask) | ||
except ValueError: | ||
return "No annotations to process." | ||
|
||
# Some checks to validate that things are in the format we'd expect | ||
print(metadata) | ||
print(mask.shape) | ||
print(raw.shape) | ||
|
||
return "Running the model..." | ||
return no_update | ||
if MODE == "dev": | ||
if job_id: | ||
time.sleep(3) | ||
return ( | ||
output_layout, | ||
None, | ||
) | ||
raise PreventUpdate | ||
else: | ||
# TODO - connect with API | ||
raise PreventUpdate |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Testing on Vaughan gives us the following error:
I suspect this may come from interacting with a Tiled server that has only a single tiff-sequence in it, so we technically never actively selected a project. Interacting with the GUI more (changing slider value, 'selecting' the single project) does remove this error and the attempt to submit the job is made.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could also test: App is loaded, then immediately click the "Run Model" button. And what if the annotation store is empty?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@Wiebke I think this is happening because of this line, where my guess is that
DATA_OPTIONS
evaluates toNone
, which means that the slider is disabled so this block isn't hit.I think you're probably right in that this because of a different structure on the Tiled server on your end. What's the structure of the
data
variable you get after running:There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This seems to have been indeed an issue with our previous local Tiled setup and resolved with the updated population of the project list.