forked from skypilot-org/skypilot
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[OCI] Support more OS types in addition to ubuntu (skypilot-org#4080)
* Bug fix for sky config file path resolution. * format * [OCI] Bug fix for image_id in Task YAML * [OCI]: Support more OS types (esp. oraclelinux) in addition to ubuntu. * format * Disable system firewall * Bug fix for validation of the Marketplace images * Update sky/clouds/oci.py Co-authored-by: Zhanghao Wu <[email protected]> * Update sky/clouds/oci.py Co-authored-by: Zhanghao Wu <[email protected]> * variable/function naming * address review comments: not to change the service_catalog api. call oci_catalog directly for get os type for a image. * Update sky/clouds/oci.py Co-authored-by: Zhanghao Wu <[email protected]> * Update sky/clouds/oci.py Co-authored-by: Zhanghao Wu <[email protected]> * Update sky/clouds/oci.py Co-authored-by: Zhanghao Wu <[email protected]> * address review comments --------- Co-authored-by: Zhanghao Wu <[email protected]>
- Loading branch information
1 parent
067a0a3
commit 3c3bcee
Showing
4 changed files
with
81 additions
and
40 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,8 @@ | |
make_deploy_resources_variables(): Bug fix for specify the image_id as | ||
the ocid of the image in the task.yaml file, in this case the image_id | ||
for the node config should be set to the ocid instead of a dict. | ||
- Hysun He ([email protected]) @ Oct 13, 2024: | ||
Support more OS types additional to ubuntu for OCI resources. | ||
""" | ||
import json | ||
import logging | ||
|
@@ -295,10 +297,21 @@ def make_deploy_resources_variables( | |
cpus=None if cpus is None else float(cpus), | ||
disk_tier=resources.disk_tier) | ||
|
||
image_str = self._get_image_str(image_id=resources.image_id, | ||
instance_type=resources.instance_type, | ||
region=region.name) | ||
|
||
# pylint: disable=import-outside-toplevel | ||
from sky.clouds.service_catalog import oci_catalog | ||
os_type = oci_catalog.get_image_os_from_tag(tag=image_str, | ||
region=region.name) | ||
logger.debug(f'OS type for the image {image_str} is {os_type}') | ||
|
||
return { | ||
'instance_type': instance_type, | ||
'custom_resources': custom_resources, | ||
'region': region.name, | ||
'os_type': os_type, | ||
'cpus': str(cpus), | ||
'memory': resources.memory, | ||
'disk_size': resources.disk_size, | ||
|
@@ -501,59 +514,45 @@ def _get_image_id( | |
region_name: str, | ||
instance_type: str, | ||
) -> str: | ||
if image_id is None: | ||
return self._get_default_image(region_name=region_name, | ||
instance_type=instance_type) | ||
if None in image_id: | ||
image_id_str = image_id[None] | ||
else: | ||
assert region_name in image_id, image_id | ||
image_id_str = image_id[region_name] | ||
image_id_str = self._get_image_str(image_id=image_id, | ||
instance_type=instance_type, | ||
region=region_name) | ||
|
||
if image_id_str.startswith('skypilot:'): | ||
image_id_str = service_catalog.get_image_id_from_tag(image_id_str, | ||
region_name, | ||
clouds='oci') | ||
if image_id_str is None: | ||
logger.critical( | ||
'! Real image_id not found! - {region_name}:{image_id}') | ||
# Raise ResourcesUnavailableError to make sure the failover | ||
# in CloudVMRayBackend will be correctly triggered. | ||
# TODO(zhwu): This is a information leakage to the cloud | ||
# implementor, we need to find a better way to handle this. | ||
raise exceptions.ResourcesUnavailableError( | ||
'! ERR: No image found in catalog for region ' | ||
f'{region_name}. Try setting a valid image_id.') | ||
|
||
# Image_id should be impossible be None, except for the case when | ||
# user specify an image tag which does not exist in the image.csv | ||
# catalog file which only possible in "test" / "evaluation" phase. | ||
# Therefore, we use assert here. | ||
assert image_id_str is not None | ||
|
||
logger.debug(f'Got real image_id {image_id_str}') | ||
return image_id_str | ||
|
||
def _get_default_image(self, region_name: str, instance_type: str) -> str: | ||
def _get_image_str(self, image_id: Optional[Dict[Optional[str], str]], | ||
instance_type: str, region: str): | ||
if image_id is None: | ||
image_str = self._get_default_image_tag(instance_type) | ||
elif None in image_id: | ||
image_str = image_id[None] | ||
else: | ||
assert region in image_id, image_id | ||
image_str = image_id[region] | ||
return image_str | ||
|
||
def _get_default_image_tag(self, instance_type: str) -> str: | ||
acc = self.get_accelerators_from_instance_type(instance_type) | ||
|
||
if acc is None: | ||
image_tag = oci_utils.oci_config.get_default_image_tag() | ||
image_id_str = service_catalog.get_image_id_from_tag(image_tag, | ||
region_name, | ||
clouds='oci') | ||
else: | ||
assert len(acc) == 1, acc | ||
image_tag = oci_utils.oci_config.get_default_gpu_image_tag() | ||
image_id_str = service_catalog.get_image_id_from_tag(image_tag, | ||
region_name, | ||
clouds='oci') | ||
|
||
if image_id_str is not None: | ||
logger.debug( | ||
f'Got default image_id {image_id_str} from tag {image_tag}') | ||
return image_id_str | ||
|
||
# Raise ResourcesUnavailableError to make sure the failover in | ||
# CloudVMRayBackend will be correctly triggered. | ||
# TODO(zhwu): This is a information leakage to the cloud implementor, | ||
# we need to find a better way to handle this. | ||
raise exceptions.ResourcesUnavailableError( | ||
'ERR: No image found in catalog for region ' | ||
f'{region_name}. Try update your default image_id settings.') | ||
return image_tag | ||
|
||
def get_vpu_from_disktier( | ||
self, cpus: Optional[float], | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,8 @@ | |
- Hysun He ([email protected]) @ Apr, 2023: Initial implementation | ||
- Hysun He ([email protected]) @ Jun, 2023: Reduce retry times by | ||
excluding those unsubscribed regions. | ||
- Hysun He ([email protected]) @ Oct 14, 2024: Bug fix for validation | ||
of the Marketplace images | ||
""" | ||
|
||
import logging | ||
|
@@ -206,4 +208,24 @@ def get_image_id_from_tag(tag: str, region: Optional[str]) -> Optional[str]: | |
|
||
def is_image_tag_valid(tag: str, region: Optional[str]) -> bool: | ||
"""Returns whether the image tag is valid.""" | ||
# Oct.14, 2024 by Hysun He: Marketplace images are region neutral, so don't | ||
# check with region for the Marketplace images. | ||
df = _image_df[_image_df['Tag'].str.fullmatch(tag)] | ||
if df.empty: | ||
return False | ||
app_catalog_listing_id = df['AppCatalogListingId'].iloc[0] | ||
if app_catalog_listing_id: | ||
return True | ||
return common.is_image_tag_valid_impl(_image_df, tag, region) | ||
|
||
|
||
def get_image_os_from_tag(tag: str, region: Optional[str]) -> Optional[str]: | ||
del region | ||
df = _image_df[_image_df['Tag'].str.fullmatch(tag)] | ||
if df.empty: | ||
os_type = oci_utils.oci_config.get_default_image_os() | ||
else: | ||
os_type = df['OS'].iloc[0] | ||
|
||
logger.debug(f'Operation system for the image {tag} is {os_type}') | ||
return os_type |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,9 @@ | ||
"""OCI Configuration. | ||
History: | ||
- Zhanghao Wu @ Oct 2023: Formatting and refactoring | ||
- Hysun He ([email protected]) @ Apr, 2023: Initial implementation | ||
- Zhanghao Wu @ Oct 2023: Formatting and refactoring | ||
- Hysun He ([email protected]) @ Oct, 2024: Add default image OS | ||
configuration. | ||
""" | ||
import logging | ||
import os | ||
|
@@ -121,5 +123,13 @@ def get_profile(cls) -> str: | |
return skypilot_config.get_nested( | ||
('oci', 'default', 'oci_config_profile'), 'DEFAULT') | ||
|
||
@classmethod | ||
def get_default_image_os(cls) -> str: | ||
# Get the default image OS. Instead of hardcoding, we give a choice to | ||
# set the default image OS type in the sky's user-config file. (if not | ||
# specified, use the hardcode one at last) | ||
return skypilot_config.get_nested(('oci', 'default', 'image_os_type'), | ||
'ubuntu') | ||
|
||
|
||
oci_config = OCIConfig() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters