-
Notifications
You must be signed in to change notification settings - Fork 44
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adding API to fetch tokenizer config for model #1052
Changes from 11 commits
707e96c
30592c8
3cc7e8a
41a305c
b95aeb6
74601c0
8997594
dbed1dd
aa0ee81
d6b728e
43f94b0
035c4ee
a5cb572
766b201
8356379
e449f0a
ef2c40e
88fa1cc
c66bdd6
a2f0ebe
be96c70
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,7 +6,7 @@ | |
import os | ||
import traceback | ||
from dataclasses import fields | ||
from typing import Dict, Union | ||
from typing import Dict, Optional, Union | ||
|
||
import oci | ||
from oci.data_science.models import UpdateModelDetails, UpdateModelProvenanceDetails | ||
|
@@ -268,7 +268,12 @@ def if_artifact_exist(self, model_id: str, **kwargs) -> bool: | |
logger.info(f"Artifact not found in model {model_id}.") | ||
return False | ||
|
||
def get_config(self, model_id: str, config_file_name: str) -> Dict: | ||
def get_config( | ||
self, | ||
model_id: str, | ||
config_file_name: str, | ||
config_folder: Optional[str] = "config", | ||
) -> Dict: | ||
"""Gets the config for the given Aqua model. | ||
|
||
Parameters | ||
|
@@ -277,6 +282,9 @@ def get_config(self, model_id: str, config_file_name: str) -> Dict: | |
The OCID of the Aqua model. | ||
config_file_name: str | ||
name of the config file | ||
config_folder: Optional[str] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's add a default value to the docstring as well. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added |
||
subfolder path where config_file_name needs to be searched | ||
default value: config | ||
|
||
Returns | ||
------- | ||
|
@@ -306,19 +314,18 @@ def get_config(self, model_id: str, config_file_name: str) -> Dict: | |
) | ||
base_model = self.ds_client.get_model(base_model_ocid).data | ||
artifact_path = get_artifact_path(base_model.custom_metadata_list) | ||
config_path = f"{os.path.dirname(artifact_path)}/config/" | ||
config_path = f"{os.path.dirname(artifact_path)}/{config_folder}/" | ||
else: | ||
logger.info(f"Loading {config_file_name} for model {oci_model.id}...") | ||
artifact_path = get_artifact_path(oci_model.custom_metadata_list) | ||
config_path = f"{artifact_path.rstrip('/')}/config/" | ||
|
||
config_path = f"{artifact_path.rstrip('/')}/{config_folder}/" | ||
if not artifact_path: | ||
logger.debug( | ||
f"Failed to get artifact path from custom metadata for the model: {model_id}" | ||
) | ||
return config | ||
|
||
config_file_path = f"{config_path}{config_file_name}" | ||
config_file_path = f"{config_path.rstrip('/')}/{config_file_name}" | ||
if is_path_exists(config_file_path): | ||
try: | ||
config = load_config( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,7 @@ | |
from ads.aqua.common.errors import AquaRuntimeError, AquaValueError | ||
from ads.aqua.common.utils import ( | ||
get_hf_model_info, | ||
is_valid_ocid, | ||
list_hf_models, | ||
) | ||
from ads.aqua.extension.base_handler import AquaAPIhandler | ||
|
@@ -316,8 +317,24 @@ def post(self, *args, **kwargs): # noqa: ARG002 | |
) | ||
|
||
|
||
class AquaModelTokenizerConfigHandler(AquaAPIhandler): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's add the pydocs to the class?
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated |
||
def get(self, model_id): | ||
url_parse = urlparse(self.request.path) | ||
paths = url_parse.path.strip("/") | ||
path_list = paths.split("/") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. NIT: Looks like we only use
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated |
||
if ( | ||
len(path_list) == 4 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The request path here is: /aqua/models/ocid1.iad.ahdxxx/tokenizer Added comments |
||
and is_valid_ocid(path_list[2]) | ||
and path_list[3] == "tokenizer" | ||
): | ||
return self.finish(AquaModelApp().get_hf_tokenizer_config(model_id)) | ||
else: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. NIT: looks like else is not required here. I think it would be more clear to do something like this:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated |
||
raise HTTPError(400, f"The request {self.request.path} is invalid.") | ||
|
||
|
||
__handlers__ = [ | ||
("model/?([^/]*)", AquaModelHandler), | ||
("model/?([^/]*)/license", AquaModelLicenseHandler), | ||
("model/?([^/]*)/tokenizer", AquaModelTokenizerConfigHandler), | ||
("model/hf/search/?([^/]*)", AquaHuggingFaceHandler), | ||
] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What if to pass
None
as aconfig_folder
? I guess in the code we should add something likeLet's also move the strings to the constants.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated