Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Triton gpt uids #8

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/dsmlp/app/config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
GPU_LABEL = "nvidia.com/gpu"
GPU_LIMIT_ANNOTATION = 'gpu-limit'
LOW_PRIORITY_CLASS = "low"
LOW_PRIORITY_CLASS = "low"
33 changes: 33 additions & 0 deletions src/dsmlp/app/tritongpt_validator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from dataclasses import dataclass
import json
from typing import List, Optional

from dataclasses_json import dataclass_json
from dsmlp.plugin.awsed import AwsedClient, UnsuccessfulRequest
from dsmlp.plugin.console import Console
from dsmlp.plugin.course import ConfigProvider
from dsmlp.plugin.kube import KubeClient, NotFound
import jsonify

from dsmlp.plugin.logger import Logger
from dsmlp.app.types import *
from dsmlp.app.config import *

# used in order to bypass awsed for tritonGPT while still maintaining UID security.
class TritonGPTValidator(ComponentValidator):

def __init__(self, kube: KubeClient, logger: Logger) -> None:
self.kube = kube
self.logger = logger

def validate_pod(self, request: Request):

namespace = self.kube.get_namespace(request.namespace)

permitted_uids = self.kube.get_tgpt_uids(namespace)
requested_uid = request.object.spec.securityContext.runAsUser

# if request.uid is not in kube.get_tgpt_uids
# return validationfailure
if str(requested_uid) not in permitted_uids:
raise ValidationFailure(f"TritonGPT Validator: user with access to UIDs {permitted_uids} attempted to run a pod as {requested_uid}. Pod denied.")
23 changes: 22 additions & 1 deletion src/dsmlp/app/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,15 @@
from abc import ABCMeta, abstractmethod
from dsmlp.app.id_validator import IDValidator
from dsmlp.app.gpu_validator import GPUValidator
from dsmlp.app.tritongpt_validator import TritonGPTValidator
from dsmlp.app.types import *
from dsmlp.app.config import *

class Validator:
def __init__(self, awsed: AwsedClient, kube: KubeClient, logger: Logger) -> None:
self.awsed = awsed
self.logger = logger
self.kube = kube
self.component_validators = [IDValidator(awsed, logger), GPUValidator(awsed, kube, logger)]

def validate_request(self, admission_review_json):
Expand Down Expand Up @@ -51,6 +54,24 @@ def handle_request(self, request: Request):
return self.admission_response(request.uid, True, "Allowed")

def validate_pod(self, request: Request):

### if tgpt-validator == enabled
### run special tritongpt validator that gets permitted UIDs from namespace instead of sicad

try:
namespace = self.kube.get_namespace(request.namespace)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to streamline logic - how about something like this?
` #

def validate_pod(self, request: Request):

    ### if tgpt-validator == enabled
    ### run special tritongpt validator that gets permitted UIDs from namespace instead of sicad

    try:
        namespace = self.kube.get_namespace(request.namespace)
        tgpt_label = self.kube.get_tgpt_label(namespace)
        
        if(tgpt_label == "enabled"):
            self.logger.info("Triton GPT Mode Activated. Only running TritonGPT Validator.")
            TritonGPTValidator(self.kube, self.logger).validate_pod(request)
            return

    except KeyError: as err:
        self.logger.debug("Failed to evaluate TGPT label logic. Falling back on regular validator components. Error: " + str(err)
        pass
        
    for component_validator in self.component_validators:
        component_validator.validate_pod(request)

`

tgpt_label = self.kube.get_tgpt_label(namespace)

except Exception as err:
self.logger.info("Failed to evaluate TGPT label logic. Falling back on regular validator components. Error: " + str(err))
for component_validator in self.component_validators:
component_validator.validate_pod(request)

if(tgpt_label == "enabled"):
self.logger.info("Triton GPT Mode Activated. Only running TritonGPT Validator.")
TritonGPTValidator(self.kube, self.logger).validate_pod(request)
return

for component_validator in self.component_validators:
component_validator.validate_pod(request)

Expand All @@ -65,4 +86,4 @@ def admission_response(self, uid, allowed, message):
"message": message
}
}
}
}
9 changes: 9 additions & 0 deletions src/dsmlp/ext/kube.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,15 @@ def get_gpus_in_namespace(self, name: str) -> int:

return gpu_count

def get_tgpt_label(self, namespace) -> str:
return namespace.labels.get("tgt-validator","")

# TODO: make arbitrary function of getting namespace labels.
def get_tgpt_uids(self, namespace) -> str:

# should be comma delimited, i.e. 2000,100,2,20
return namespace.labels.get("permitted-uids", "").split(',')

# noinspection PyMethodMayBeStatic

def get_policy_api(self) -> CoreV1Api:
Expand Down
165 changes: 165 additions & 0 deletions tests/app/test_tgpt_validator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
import inspect
from operator import contains
from dsmlp.app.validator import Validator
from dsmlp.plugin.awsed import ListTeamsResponse, TeamJson, UserResponse
from dsmlp.plugin.kube import Namespace
from hamcrest import assert_that, contains_inanyorder, equal_to, has_item
from tests.fakes import FakeAwsedClient, FakeLogger, FakeKubeClient


class TestTGPTValidator:
def setup_method(self) -> None:
self.logger = FakeLogger()
self.awsed_client = FakeAwsedClient()
self.kube_client = FakeKubeClient()

self.awsed_client.add_user(
'user10', UserResponse(uid=30, enrollments=[]))
self.awsed_client.add_teams('user10', ListTeamsResponse(
teams=[TeamJson(gid=1000)]
))

self.kube_client.add_namespace('user10', Namespace(
name='user10', labels={'k8s-sync': 'true', 'tgpt-validator': 'enabled', 'permitted-uids': '30,3000'}, gpu_quota=10))

self.awsed_client.add_user(
'user100', UserResponse(uid=10, enrollments=[]))
self.awsed_client.add_teams('user10', ListTeamsResponse(
teams=[TeamJson(gid=1000)]
))

self.kube_client.add_namespace('user100', Namespace(
name='user100', labels={'k8s-sync': 'true', 'tgpt-validator': 'disabled', 'permitted-uids': '10'}, gpu_quota=10))

def test_good_request(self):
self.when_validate(
{
"request": {
"uid": "705ab4f5-6393-11e8-b7cc-42010a800002",
"namespace": "user10",
"userInfo": {
"username": "system:kube-system"
},
"object": {
"metadata": {
"labels": {}
},
"spec": {
"containers": [{}],
"securityContext": {"runAsUser": 30},
},
}
}
}
)

assert_that(self.logger.messages, has_item(
f"INFO Allowed request username=system:kube-system namespace=user10 uid=705ab4f5-6393-11e8-b7cc-42010a800002"))

def test_good_request_2(self):
self.when_validate(
{
"request": {
"uid": "705ab4f5-6393-11e8-b7cc-42010a800002",
"namespace": "user10",
"userInfo": {
"username": "system:kube-system"
},
"object": {
"metadata": {
"labels": {}
},
"spec": {
"containers": [{}],
"securityContext": {"runAsUser": 3000},
},
}
}
}
)

assert_that(self.logger.messages, has_item(
f"INFO Allowed request username=system:kube-system namespace=user10 uid=705ab4f5-6393-11e8-b7cc-42010a800002"))

def test_bad_request(self):
self.when_validate(
{
"request": {
"uid": "705ab4f5-6393-11e8-b7cc-42010a800002",
"namespace": "user10",
"userInfo": {
"username": "system:kube-system"
},
"object": {
"metadata": {
"labels": {}
},
"spec": {
"containers": [{}],
"securityContext": {"runAsUser": 300},
},
}
}
}
)

assert_that(self.logger.messages, has_item(
f"INFO Denied request username=system:kube-system namespace=user10 reason=TritonGPT Validator: user with access to UIDs ['30', '3000'] attempted to run a pod as 300. Pod denied. uid=705ab4f5-6393-11e8-b7cc-42010a800002"))

def test_good_request_not_enabled_permitted_on(self):
self.when_validate(
{
"request": {
"uid": "705ab4f5-6393-11e8-b7cc-42010a800002",
"namespace": "user100",
"userInfo": {
"username": "system:kube-system"
},
"object": {
"metadata": {
"labels": {}
},
"spec": {
"containers": [{}],
"securityContext": {"runAsUser": 10},
},
}
}
}
)

assert_that(self.logger.messages, has_item(
f"INFO Allowed request username=system:kube-system namespace=user100 uid=705ab4f5-6393-11e8-b7cc-42010a800002"))

#assert_that(self.logger.messages, has_item(
#"INFO Allowed request username=user10 namespace=user10 uid=705ab4f5-6393-11e8-b7cc-42010a800002"))

# def test_gpu_quota_request(self):
# self.awsed_client.add_user_gpu_quota('user10', 10)
# self.awsed_client.get_user_gpu_quota('user10')

# response = self.when_validate(
# {
# "request": {
# "uid": "705ab4f5-6393-11e8-b7cc-42010a800002",
# "namespace": "user10",
# "userInfo": {
# "username": "user10"
# },
# "object": {
# "metadata": {
# "labels": {}
# },
# "spec": {
# "containers": [{}]
# }
# }
# }
# }
# )

def when_validate(self, json):
validator = Validator(self.awsed_client, self.kube_client, self.logger)
response = validator.validate_request(json)

return response
12 changes: 12 additions & 0 deletions tests/fakes.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,18 @@ def add_namespace(self, name: str, namespace: Namespace):
def set_existing_gpus(self, name: str, gpus: int):
self.existing_gpus[name] = gpus

def get_tgpt_label(self, namespace) -> str:
try:
return namespace.labels.get("tgpt-validator", "")
except KeyError:
raise UnsuccessfulRequest()

def get_tgpt_uids(self, namespace) -> str:
try:
return namespace.labels.get("permitted-uids").split(',')
except KeyError:
raise UnsuccessfulRequest()


class FakeLogger(Logger):
def __init__(self) -> None:
Expand Down
Loading