feat: Add the ability to override resources on a per-user basis

Closes #1951
DSD-DBS · Nov 20, 2024 · b821a01 · b821a01
1 parent 5290f43
commit b821a01
Show file tree

Hide file tree

Showing 21 changed files with 439 additions and 47 deletions.
diff --git a/backend/capellacollab/alembic/versions/4d42177579a2_add_resource_override.py b/backend/capellacollab/alembic/versions/4d42177579a2_add_resource_override.py
@@ -0,0 +1,49 @@
+# SPDX-FileCopyrightText: Copyright DB InfraGO AG and contributors
+# SPDX-License-Identifier: Apache-2.0
+
+"""Add resource override
+
+Revision ID: 4d42177579a2
+Revises: 2f8449c217fa
+Create Date: 2024-11-12 17:43:23.486104
+
+"""
+import sqlalchemy as sa
+from alembic import op
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "4d42177579a2"
+down_revision = "2f8449c217fa"
+branch_labels = None
+depends_on = None
+
+
+t_tools = sa.Table(
+    "tools",
+    sa.MetaData(),
+    sa.Column("id", sa.Integer()),
+    sa.Column("integrations", postgresql.JSONB(astext_type=sa.Text())),
+    sa.Column("config", postgresql.JSONB(astext_type=sa.Text())),
+)
+
+
+def upgrade():
+    connection = op.get_bind()
+    results = connection.execute(sa.select(t_tools)).mappings().all()
+
+    for row in results:
+        config = row["config"]
+        config["resources"] = {
+            "default_profile": {
+                "cpu": config["resources"]["cpu"],
+                "memory": config["resources"]["memory"],
+            },
+            "additional": {},
+        }
+
+        connection.execute(
+            sa.update(t_tools)
+            .where(t_tools.c.id == row["id"])
+            .values(config=config)
+        )
diff --git a/backend/capellacollab/core/database/migration.py b/backend/capellacollab/core/database/migration.py
@@ -119,10 +119,13 @@ def get_eclipse_session_configuration() -> (
     """
     return tools_models.ToolSessionConfiguration(
         resources=tools_models.Resources(
-            cpu=tools_models.CPUResources(requests=0.4, limits=2),
-            memory=tools_models.MemoryResources(
-                requests="1.6Gi", limits="6Gi"
+            default_profile=tools_models.DefaultResourceProfile(
+                cpu=tools_models.CPUResources(requests=0.4, limits=2),
+                memory=tools_models.MemoryResources(
+                    requests="1.6Gi", limits="6Gi"
+                ),
             ),
+            additional={},
         ),
         environment={
             "RMT_PASSWORD": "{CAPELLACOLLAB_SESSION_TOKEN}",
@@ -264,10 +267,13 @@ def create_jupyter_tool(db: orm.Session) -> tools_models.DatabaseTool:
         integrations=tools_models.ToolIntegrations(jupyter=True),
         config=tools_models.ToolSessionConfiguration(
             resources=tools_models.Resources(
-                cpu=tools_models.CPUResources(requests=1, limits=2),
-                memory=tools_models.MemoryResources(
-                    requests="500Mi", limits="3Gi"
+                default_profile=tools_models.DefaultResourceProfile(
+                    cpu=tools_models.CPUResources(requests=1, limits=2),
+                    memory=tools_models.MemoryResources(
+                        requests="500Mi", limits="3Gi"
+                    ),
                 ),
+                additional={},
             ),
             environment={
                 "JUPYTER_PORT": "8888",

diff --git a/backend/capellacollab/projects/toolmodels/backups/routes.py b/backend/capellacollab/projects/toolmodels/backups/routes.py
@@ -118,7 +118,7 @@ def create_backup(
                 body.include_commit_history,
             ),
             labels=core.get_pipeline_labels(toolmodel),
-            tool_resources=toolmodel.tool.config.resources,
+            tool_resources=toolmodel.tool.config.resources.get_profile(None),
             command="backup",
             schedule=pipeline_config.cron,
             timezone=pipeline_config.timezone,

diff --git a/backend/capellacollab/projects/toolmodels/backups/runs/interface.py b/backend/capellacollab/projects/toolmodels/backups/runs/interface.py
@@ -80,7 +80,9 @@ def _schedule_pending_jobs():
                         pending_run.pipeline.t4c_password,
                         pending_run.pipeline.include_commit_history,
                     ),
-                    tool_resources=pending_run.pipeline.model.tool.config.resources,
+                    tool_resources=pending_run.pipeline.model.tool.config.resources.get_profile(
+                        None
+                    ),
                 )
                 pending_run.reference_id = job_name
                 pending_run.status = models.PipelineRunStatus.SCHEDULED

diff --git a/backend/capellacollab/sessions/operators/k8s.py b/backend/capellacollab/sessions/operators/k8s.py
@@ -113,7 +113,7 @@ def start_session(
             ports=ports,
             volumes=volumes,
             init_volumes=init_volumes,
-            tool_resources=tool.config.resources,
+            tool_resources_profile=tool.config.resources.get_profile(username),
             annotations=annotations,
             labels=labels,
         )
@@ -268,7 +268,10 @@ def create_cronjob(
         image: str,
         command: str,
         labels: dict[str, str],
-        tool_resources: tools_models.Resources,
+        tool_resources: (
+            tools_models.DefaultResourceProfile
+            | tools_models.AdditionalResourceProfile
+        ),
         environment: dict[str, str | None],
         schedule="* * * * *",
         timezone="UTC",
@@ -310,7 +313,10 @@ def create_job(
         command: str,
         labels: dict[str, str],
         environment: dict[str, str | None],
-        tool_resources: tools_models.Resources,
+        tool_resources: (
+            tools_models.DefaultResourceProfile
+            | tools_models.AdditionalResourceProfile
+        ),
         timeout: int = 18000,
     ) -> str:
         _id = self._generate_id()
@@ -553,7 +559,10 @@ def _create_session_pod(
         ports: dict[str, int],
         volumes: list[models.Volume],
         init_volumes: list[models.Volume],
-        tool_resources: tools_models.Resources,
+        tool_resources_profile: (
+            tools_models.DefaultResourceProfile
+            | tools_models.AdditionalResourceProfile
+        ),
         annotations: dict[str, str],
         labels: dict[str, str],
     ) -> client.V1Pod:
@@ -567,12 +576,12 @@ def _create_session_pod(
 
         resources = client.V1ResourceRequirements(
             limits={
-                "cpu": tool_resources.cpu.limits,
-                "memory": tool_resources.memory.limits,
+                "cpu": tool_resources_profile.cpu.limits,
+                "memory": tool_resources_profile.memory.limits,
             },
             requests={
-                "cpu": tool_resources.cpu.requests,
-                "memory": tool_resources.memory.requests,
+                "cpu": tool_resources_profile.cpu.requests,
+                "memory": tool_resources_profile.memory.requests,
             },
         )
 
@@ -779,7 +788,10 @@ def _create_job_spec(
         image: str,
         job_labels: dict[str, str],
         environment: dict[str, str | None],
-        tool_resources: tools_models.Resources,
+        tool_resources: (
+            tools_models.DefaultResourceProfile
+            | tools_models.AdditionalResourceProfile
+        ),
         args: list[str] | None = None,
         timeout: int = 18000,
     ) -> client.V1JobSpec:

diff --git a/backend/capellacollab/tools/crud.py b/backend/capellacollab/tools/crud.py
@@ -67,6 +67,24 @@ def update_tool(
     return tool
 
 
+def update_tools_username(
+    db: orm.Session, old_username: str, new_username: str
+):
+    tools = get_tools(db)
+    for tool in tools:
+        updated = False
+        for profile in tool.config.resources.additional.values():
+            if old_username in profile.usernames:
+                profile.usernames = [
+                    new_username if username == old_username else username
+                    for username in profile.usernames
+                ]
+                updated = True
+        if updated:
+            orm.attributes.flag_modified(tool, "config")
+    db.commit()
+
+
 def delete_tool(db: orm.Session, tool: models.DatabaseTool) -> None:
     db.delete(tool)
     db.commit()

diff --git a/backend/capellacollab/tools/models.py b/backend/capellacollab/tools/models.py
@@ -232,7 +232,7 @@ class MemoryResources(core_pydantic.BaseModel):
     )
 
 
-class Resources(core_pydantic.BaseModelStrict):
+class DefaultResourceProfile(core_pydantic.BaseModel):
     cpu: CPUResources = pydantic.Field(
         default=CPUResources(),
         description="Configuration about the number of CPU cores that sessions can use.",
@@ -243,6 +243,60 @@ class Resources(core_pydantic.BaseModelStrict):
     )
 
 
+class AdditionalResourceProfile(DefaultResourceProfile):
+    usernames: list[str] = pydantic.Field(
+        default=None,
+        description="List of usernames, which are allowed to use this resource profile.",
+    )
+
+
+class Resources(core_pydantic.BaseModelStrict):
+    default_profile: DefaultResourceProfile = pydantic.Field(
+        default_factory=DefaultResourceProfile,
+        description="Default resource profile, which is used when no other profile matches.",
+    )
+    additional: dict[str, AdditionalResourceProfile] = pydantic.Field(
+        default={},
+        description="Additional resource profiles, which can be used to limit the resource usage of sessions.",
+    )
+
+    def get_profile(
+        self, username: str | None
+    ) -> DefaultResourceProfile | AdditionalResourceProfile:
+        if username is None:
+            return self.default_profile
+
+        for profile in self.additional.values():
+            if username in profile.usernames:
+                return profile
+
+        return self.default_profile
+
+    @pydantic.field_validator("additional")
+    @classmethod
+    def check_additional_profiles(
+        cls,
+        value: dict[str, AdditionalResourceProfile],
+    ) -> dict[str, AdditionalResourceProfile]:
+
+        for profile_name, profile in value.items():
+            if len(profile.usernames) != len(set(profile.usernames)):
+                raise ValueError(
+                    f"Usernames in profile '{profile_name}' must be unique."
+                )
+        usernames = [set(profile.usernames) for profile in value.values()]
+
+        all_usernames = [
+            username for usernames in usernames for username in usernames
+        ]
+
+        # Check that usernames aren't in multiple profiles
+        if len(all_usernames) != len(set(all_usernames)):
+            raise ValueError("Usernames must be unique across all profiles.")
+
+        return value
+
+
 class PrometheusConfiguration(core_pydantic.BaseModel):
     path: str = pydantic.Field(default="/prometheus")
 

diff --git a/backend/capellacollab/users/crud.py b/backend/capellacollab/users/crud.py
@@ -8,6 +8,7 @@
 from sqlalchemy import orm
 
 from capellacollab.core import database
+from capellacollab.tools import crud as tools_crud
 from capellacollab.users import models
 
 
@@ -80,6 +81,8 @@ def create_user(
 def update_user(
     db: orm.Session, user: models.DatabaseUser, patch_user: models.PatchUser
 ) -> models.DatabaseUser:
+    if patch_user.name:
+        tools_crud.update_tools_username(db, user.name, patch_user.name)
     database.patch_database_with_pydantic_object(user, patch_user)
     db.commit()
     return user

diff --git a/backend/tests/sessions/k8s_operator/test_session_k8s_operator.py b/backend/tests/sessions/k8s_operator/test_session_k8s_operator.py
@@ -118,7 +118,7 @@ def test_create_job(monkeypatch: pytest.MonkeyPatch):
         command="fakecmd",
         labels={"key": "value"},
         environment={"ENVVAR": "value"},
-        tool_resources=tools_models.Resources(),
+        tool_resources=tools_models.DefaultResourceProfile(),
     )
 
     assert result
@@ -136,7 +136,7 @@ def test_create_cronjob(monkeypatch: pytest.MonkeyPatch):
         command="fakecmd",
         environment={"ENVVAR": "value"},
         labels={},
-        tool_resources=tools_models.Resources(),
+        tool_resources=tools_models.DefaultResourceProfile(),
     )
 
     assert result

diff --git a/backend/tests/tools/test_tools_resources.py b/backend/tests/tools/test_tools_resources.py
@@ -0,0 +1,60 @@
+# SPDX-FileCopyrightText: Copyright DB InfraGO AG and contributors
+# SPDX-License-Identifier: Apache-2.0
+
+import pydantic
+import pytest
+
+from capellacollab.tools import models as tools_models
+
+
+def test_validate_tools():
+    with pytest.raises(pydantic.ValidationError):
+        tools_models.Resources(
+            default_profile=tools_models.DefaultResourceProfile(),
+            additional={
+                "test1": tools_models.AdditionalResourceProfile(
+                    usernames=["test", "test"]
+                ),
+            },
+        )
+
+    with pytest.raises(pydantic.ValidationError):
+        tools_models.Resources(
+            default_profile=tools_models.DefaultResourceProfile(),
+            additional={
+                "test1": tools_models.AdditionalResourceProfile(
+                    usernames=["test"]
+                ),
+                "test2": tools_models.AdditionalResourceProfile(
+                    usernames=["test"]
+                ),
+            },
+        )
+
+
+def test_get_profile():
+    default_profile = tools_models.DefaultResourceProfile(
+        memory=tools_models.MemoryResources(requests="1Gi", limits="2Gi"),
+        cpu=tools_models.CPUResources(requests=0.4, limits=2),
+    )
+    different_profile = tools_models.AdditionalResourceProfile(
+        usernames=["testuser"],
+        memory=tools_models.MemoryResources(requests="1Gi", limits="2Gi"),
+        cpu=tools_models.CPUResources(requests=0.4, limits=2),
+    )
+
+    resources = tools_models.Resources(
+        default_profile=default_profile,
+        additional={
+            "test": different_profile,
+        },
+    )
+
+    resource_profile = resources.get_profile(None)
+    assert resource_profile == default_profile
+
+    resource_profile = resources.get_profile("fakeuser")
+    assert resource_profile == default_profile
+
+    resource_profile = resources.get_profile("testuser")
+    assert resource_profile == different_profile