Skip to content

Commit

Permalink
feat: Add the ability to override resources on a per-user basis
Browse files Browse the repository at this point in the history
Closes #1951
  • Loading branch information
zusorio committed Nov 20, 2024
1 parent 5290f43 commit b821a01
Show file tree
Hide file tree
Showing 21 changed files with 439 additions and 47 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# SPDX-FileCopyrightText: Copyright DB InfraGO AG and contributors
# SPDX-License-Identifier: Apache-2.0

"""Add resource override
Revision ID: 4d42177579a2
Revises: 2f8449c217fa
Create Date: 2024-11-12 17:43:23.486104
"""
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "4d42177579a2"
down_revision = "2f8449c217fa"
branch_labels = None
depends_on = None


t_tools = sa.Table(
"tools",
sa.MetaData(),
sa.Column("id", sa.Integer()),
sa.Column("integrations", postgresql.JSONB(astext_type=sa.Text())),
sa.Column("config", postgresql.JSONB(astext_type=sa.Text())),
)


def upgrade():
connection = op.get_bind()
results = connection.execute(sa.select(t_tools)).mappings().all()

for row in results:
config = row["config"]
config["resources"] = {
"default_profile": {
"cpu": config["resources"]["cpu"],
"memory": config["resources"]["memory"],
},
"additional": {},
}

connection.execute(
sa.update(t_tools)
.where(t_tools.c.id == row["id"])
.values(config=config)
)
18 changes: 12 additions & 6 deletions backend/capellacollab/core/database/migration.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,10 +119,13 @@ def get_eclipse_session_configuration() -> (
"""
return tools_models.ToolSessionConfiguration(
resources=tools_models.Resources(
cpu=tools_models.CPUResources(requests=0.4, limits=2),
memory=tools_models.MemoryResources(
requests="1.6Gi", limits="6Gi"
default_profile=tools_models.DefaultResourceProfile(
cpu=tools_models.CPUResources(requests=0.4, limits=2),
memory=tools_models.MemoryResources(
requests="1.6Gi", limits="6Gi"
),
),
additional={},
),
environment={
"RMT_PASSWORD": "{CAPELLACOLLAB_SESSION_TOKEN}",
Expand Down Expand Up @@ -264,10 +267,13 @@ def create_jupyter_tool(db: orm.Session) -> tools_models.DatabaseTool:
integrations=tools_models.ToolIntegrations(jupyter=True),
config=tools_models.ToolSessionConfiguration(
resources=tools_models.Resources(
cpu=tools_models.CPUResources(requests=1, limits=2),
memory=tools_models.MemoryResources(
requests="500Mi", limits="3Gi"
default_profile=tools_models.DefaultResourceProfile(
cpu=tools_models.CPUResources(requests=1, limits=2),
memory=tools_models.MemoryResources(
requests="500Mi", limits="3Gi"
),
),
additional={},
),
environment={
"JUPYTER_PORT": "8888",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def create_backup(
body.include_commit_history,
),
labels=core.get_pipeline_labels(toolmodel),
tool_resources=toolmodel.tool.config.resources,
tool_resources=toolmodel.tool.config.resources.get_profile(None),
command="backup",
schedule=pipeline_config.cron,
timezone=pipeline_config.timezone,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,9 @@ def _schedule_pending_jobs():
pending_run.pipeline.t4c_password,
pending_run.pipeline.include_commit_history,
),
tool_resources=pending_run.pipeline.model.tool.config.resources,
tool_resources=pending_run.pipeline.model.tool.config.resources.get_profile(
None
),
)
pending_run.reference_id = job_name
pending_run.status = models.PipelineRunStatus.SCHEDULED
Expand Down
30 changes: 21 additions & 9 deletions backend/capellacollab/sessions/operators/k8s.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def start_session(
ports=ports,
volumes=volumes,
init_volumes=init_volumes,
tool_resources=tool.config.resources,
tool_resources_profile=tool.config.resources.get_profile(username),
annotations=annotations,
labels=labels,
)
Expand Down Expand Up @@ -268,7 +268,10 @@ def create_cronjob(
image: str,
command: str,
labels: dict[str, str],
tool_resources: tools_models.Resources,
tool_resources: (
tools_models.DefaultResourceProfile
| tools_models.AdditionalResourceProfile
),
environment: dict[str, str | None],
schedule="* * * * *",
timezone="UTC",
Expand Down Expand Up @@ -310,7 +313,10 @@ def create_job(
command: str,
labels: dict[str, str],
environment: dict[str, str | None],
tool_resources: tools_models.Resources,
tool_resources: (
tools_models.DefaultResourceProfile
| tools_models.AdditionalResourceProfile
),
timeout: int = 18000,
) -> str:
_id = self._generate_id()
Expand Down Expand Up @@ -553,7 +559,10 @@ def _create_session_pod(
ports: dict[str, int],
volumes: list[models.Volume],
init_volumes: list[models.Volume],
tool_resources: tools_models.Resources,
tool_resources_profile: (
tools_models.DefaultResourceProfile
| tools_models.AdditionalResourceProfile
),
annotations: dict[str, str],
labels: dict[str, str],
) -> client.V1Pod:
Expand All @@ -567,12 +576,12 @@ def _create_session_pod(

resources = client.V1ResourceRequirements(
limits={
"cpu": tool_resources.cpu.limits,
"memory": tool_resources.memory.limits,
"cpu": tool_resources_profile.cpu.limits,
"memory": tool_resources_profile.memory.limits,
},
requests={
"cpu": tool_resources.cpu.requests,
"memory": tool_resources.memory.requests,
"cpu": tool_resources_profile.cpu.requests,
"memory": tool_resources_profile.memory.requests,
},
)

Expand Down Expand Up @@ -779,7 +788,10 @@ def _create_job_spec(
image: str,
job_labels: dict[str, str],
environment: dict[str, str | None],
tool_resources: tools_models.Resources,
tool_resources: (
tools_models.DefaultResourceProfile
| tools_models.AdditionalResourceProfile
),
args: list[str] | None = None,
timeout: int = 18000,
) -> client.V1JobSpec:
Expand Down
18 changes: 18 additions & 0 deletions backend/capellacollab/tools/crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,24 @@ def update_tool(
return tool


def update_tools_username(
db: orm.Session, old_username: str, new_username: str
):
tools = get_tools(db)
for tool in tools:
updated = False
for profile in tool.config.resources.additional.values():
if old_username in profile.usernames:
profile.usernames = [
new_username if username == old_username else username
for username in profile.usernames
]
updated = True
if updated:
orm.attributes.flag_modified(tool, "config")
db.commit()


def delete_tool(db: orm.Session, tool: models.DatabaseTool) -> None:
db.delete(tool)
db.commit()
Expand Down
56 changes: 55 additions & 1 deletion backend/capellacollab/tools/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ class MemoryResources(core_pydantic.BaseModel):
)


class Resources(core_pydantic.BaseModelStrict):
class DefaultResourceProfile(core_pydantic.BaseModel):
cpu: CPUResources = pydantic.Field(
default=CPUResources(),
description="Configuration about the number of CPU cores that sessions can use.",
Expand All @@ -243,6 +243,60 @@ class Resources(core_pydantic.BaseModelStrict):
)


class AdditionalResourceProfile(DefaultResourceProfile):
usernames: list[str] = pydantic.Field(
default=None,
description="List of usernames, which are allowed to use this resource profile.",
)


class Resources(core_pydantic.BaseModelStrict):
default_profile: DefaultResourceProfile = pydantic.Field(
default_factory=DefaultResourceProfile,
description="Default resource profile, which is used when no other profile matches.",
)
additional: dict[str, AdditionalResourceProfile] = pydantic.Field(
default={},
description="Additional resource profiles, which can be used to limit the resource usage of sessions.",
)

def get_profile(
self, username: str | None
) -> DefaultResourceProfile | AdditionalResourceProfile:
if username is None:
return self.default_profile

for profile in self.additional.values():
if username in profile.usernames:
return profile

return self.default_profile

@pydantic.field_validator("additional")
@classmethod
def check_additional_profiles(
cls,
value: dict[str, AdditionalResourceProfile],
) -> dict[str, AdditionalResourceProfile]:

for profile_name, profile in value.items():
if len(profile.usernames) != len(set(profile.usernames)):
raise ValueError(
f"Usernames in profile '{profile_name}' must be unique."
)
usernames = [set(profile.usernames) for profile in value.values()]

all_usernames = [
username for usernames in usernames for username in usernames
]

# Check that usernames aren't in multiple profiles
if len(all_usernames) != len(set(all_usernames)):
raise ValueError("Usernames must be unique across all profiles.")

return value


class PrometheusConfiguration(core_pydantic.BaseModel):
path: str = pydantic.Field(default="/prometheus")

Expand Down
3 changes: 3 additions & 0 deletions backend/capellacollab/users/crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from sqlalchemy import orm

from capellacollab.core import database
from capellacollab.tools import crud as tools_crud
from capellacollab.users import models


Expand Down Expand Up @@ -80,6 +81,8 @@ def create_user(
def update_user(
db: orm.Session, user: models.DatabaseUser, patch_user: models.PatchUser
) -> models.DatabaseUser:
if patch_user.name:
tools_crud.update_tools_username(db, user.name, patch_user.name)
database.patch_database_with_pydantic_object(user, patch_user)
db.commit()
return user
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def test_create_job(monkeypatch: pytest.MonkeyPatch):
command="fakecmd",
labels={"key": "value"},
environment={"ENVVAR": "value"},
tool_resources=tools_models.Resources(),
tool_resources=tools_models.DefaultResourceProfile(),
)

assert result
Expand All @@ -136,7 +136,7 @@ def test_create_cronjob(monkeypatch: pytest.MonkeyPatch):
command="fakecmd",
environment={"ENVVAR": "value"},
labels={},
tool_resources=tools_models.Resources(),
tool_resources=tools_models.DefaultResourceProfile(),
)

assert result
Expand Down
60 changes: 60 additions & 0 deletions backend/tests/tools/test_tools_resources.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# SPDX-FileCopyrightText: Copyright DB InfraGO AG and contributors
# SPDX-License-Identifier: Apache-2.0

import pydantic
import pytest

from capellacollab.tools import models as tools_models


def test_validate_tools():
with pytest.raises(pydantic.ValidationError):
tools_models.Resources(
default_profile=tools_models.DefaultResourceProfile(),
additional={
"test1": tools_models.AdditionalResourceProfile(
usernames=["test", "test"]
),
},
)

with pytest.raises(pydantic.ValidationError):
tools_models.Resources(
default_profile=tools_models.DefaultResourceProfile(),
additional={
"test1": tools_models.AdditionalResourceProfile(
usernames=["test"]
),
"test2": tools_models.AdditionalResourceProfile(
usernames=["test"]
),
},
)


def test_get_profile():
default_profile = tools_models.DefaultResourceProfile(
memory=tools_models.MemoryResources(requests="1Gi", limits="2Gi"),
cpu=tools_models.CPUResources(requests=0.4, limits=2),
)
different_profile = tools_models.AdditionalResourceProfile(
usernames=["testuser"],
memory=tools_models.MemoryResources(requests="1Gi", limits="2Gi"),
cpu=tools_models.CPUResources(requests=0.4, limits=2),
)

resources = tools_models.Resources(
default_profile=default_profile,
additional={
"test": different_profile,
},
)

resource_profile = resources.get_profile(None)
assert resource_profile == default_profile

resource_profile = resources.get_profile("fakeuser")
assert resource_profile == default_profile

resource_profile = resources.get_profile("testuser")
assert resource_profile == different_profile
Loading

0 comments on commit b821a01

Please sign in to comment.