Merge branch 'staging' into prod

tapis-project · Sep 14, 2024 · 7d9fcfb · 7d9fcfb
2 parents 9c0c485 + b48dfe6
commit 7d9fcfb
Show file tree

Hide file tree

Showing 65 changed files with 5,818 additions and 1,387 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,19 +1,34 @@
-# Change Log
+# Change Log for the Tapis Pods Service
+
 All notable changes to this project will be documented in this file.
 
-## 1.6.1 - 2024-02-xx:
+Please find documentation here:
+https://tapis.readthedocs.io/en/latest/index.html
+
+You may also reference live-docs based on the OpenAPI v3 specification here:
+https://tapis-project.github.io/live-docs
+
+## 1.7.0 - 2024-09-13:
 
 ### Breaking Changes:
-- No change.
+- Large DB model changes. Migrations should automate changes, but be warned.
 
 ### New features:
+- Added Pod Templates and Template Tags to define sharable Pod templates.
+- Added Image endpoints.
+- Added Volume & Snapshot download endpoints.
+- Added compute_queues, configurable with kubernetes flag. Allowing multi-GPU configuration.
+- Added initial workings for tapis-auth option for Pods to use Tapis auth. Will be fully implemented after client changes.
+- PVC pod volume option now exists permanently. Non-sharable, but can be useful.
+- Revamped auth logic for organizational purposes.
 - Changed CORS for tapis-ui integration.
+- Lots of changes for TapisUI.
 - Added auto saving openapi.json, removing manual step of copy/paste.
 - Updating openapi.json.
 - Added dev_tools useful links to `make vars`.
 
 ### Bug fixes:
-- No change.
+- Multi-slash object routing is now much better.
 
 
 ## 1.6.0 - 2024-02-05

diff --git a/Dockerfile b/Dockerfile
@@ -7,8 +7,8 @@ RUN useradd tapis -u 4872
 WORKDIR /home/tapis/
 
 # set the name of the api, for use by some of the common modules.
-ENV TAPIS_API pods
-ENV PYTHONPATH .:*:pods:pods/*
+ENV TAPIS_API=pods
+ENV PYTHONPATH=.:*:pods:pods/*
 
 ## PACKAGE INITIALIZATION
 COPY requirements.txt /home/tapis/

diff --git a/Makefile b/Makefile
@@ -115,6 +115,11 @@ endif
 	@echo ""
 
 
+#: Initialize a few templates
+init-data:
+	@echo "Not yet implemented"
+
+
 # Runs pytest in the pods-api container
 #: Run tests in pods-api container
 test:
@@ -132,6 +137,7 @@ build: vars
 	@echo "Makefile: $(GREEN)build$(NC)"
 	@echo "  🔨 : Running image build."
 	@echo "  🌎 : Using daemon: $(LCYAN)minikube$(NC)"
+	@echo "  🏃 : Building: This part takes a while if it takes a while."
 	@echo ""
 	minikube image build -t $(SERVICE_NAME)/pods-api:$$TAG ./
 	@echo ""

diff --git a/alembic/env.py b/alembic/env.py
@@ -69,7 +69,9 @@
 from models_pods import Pod, Password
 from models_volumes import Volume
 from models_snapshots import Snapshot
-from models_admin import Template
+from models_templates import Template
+from models_images import Image
+from models_templates_tags import TemplateTag
 
 target_metadata = SQLModel.metadata
 

diff --git a/alembic/versions/6333debf3f60_init8.py b/alembic/versions/6333debf3f60_init8.py
@@ -0,0 +1,86 @@
+"""init8
+
+Revision ID: 6333debf3f60
+Revises: 96f8878302f1
+Create Date: 2024-06-14 02:36:42.894511
+
+"""
+from alembic import op
+import sqlalchemy as sa
+import sqlmodel              ##### Required when using sqlmodel and not use sqlalchemy
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = '6333debf3f60'
+down_revision = '96f8878302f1'
+branch_labels = None
+depends_on = None
+
+
+def upgrade(engine_name):
+    globals()["upgrade_alltenants"]()
+
+
+def downgrade(engine_name):
+    globals()["downgrade_alltenants"]()
+
+
+def upgrade_alltenants():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('image',
+    sa.Column('tenants', postgresql.ARRAY(sa.String()), nullable=True),
+    sa.Column('image', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
+    sa.Column('description', sqlmodel.sql.sqltypes.AutoString(), nullable=True),
+    sa.Column('creation_ts', sa.DateTime(), nullable=True),
+    sa.Column('added_by', sqlmodel.sql.sqltypes.AutoString(), nullable=True),
+    sa.PrimaryKeyConstraint('image')
+    )
+    op.create_table('template',
+    sa.Column('metatags', postgresql.ARRAY(sa.String(), dimensions=1), nullable=True),
+    sa.Column('permissions', postgresql.ARRAY(sa.String(), dimensions=1), nullable=True),
+    sa.Column('template_id', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
+    sa.Column('description', sqlmodel.sql.sqltypes.AutoString(), nullable=True),
+    sa.Column('archive_message', sqlmodel.sql.sqltypes.AutoString(), nullable=True),
+    sa.Column('creation_ts', sa.DateTime(), nullable=True),
+    sa.Column('update_ts', sa.DateTime(), nullable=True),
+    sa.Column('tenant_id', sqlmodel.sql.sqltypes.AutoString(), nullable=True),
+    sa.Column('site_id', sqlmodel.sql.sqltypes.AutoString(), nullable=True),
+    sa.PrimaryKeyConstraint('template_id')
+    )
+    op.create_table('templatetag',
+    sa.Column('pod_definition', sa.JSON(), nullable=True),
+    sa.Column('template_id', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
+    sa.Column('commit_message', sqlmodel.sql.sqltypes.AutoString(), nullable=True),
+    sa.Column('tag', sqlmodel.sql.sqltypes.AutoString(), nullable=True),
+    sa.Column('tag_timestamp', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
+    sa.Column('added_by', sqlmodel.sql.sqltypes.AutoString(), nullable=True),
+    sa.Column('creation_ts', sa.DateTime(), nullable=True),
+    sa.PrimaryKeyConstraint('tag_timestamp')
+    )
+    op.add_column('pod', sa.Column('arguments', postgresql.ARRAY(sa.String()), nullable=True))
+    op.add_column('pod', sa.Column('modified_fields', postgresql.ARRAY(sa.String(), dimensions=1), nullable=True, server_default='{}'))
+    op.add_column('pod', sa.Column('template', sqlmodel.sql.sqltypes.AutoString(), nullable=True, server_default=''))
+    op.add_column('pod', sa.Column('compute_queue', sqlmodel.sql.sqltypes.AutoString(), nullable=True, server_default='default'))
+    op.alter_column('pod', 'pod_template', new_column_name='image', existing_type=sqlmodel.sql.sqltypes.AutoString(), nullable=True, server_default='')
+    op.drop_column('pod', 'data_attached')
+    op.drop_column('pod', 'roles_inherited')
+    op.drop_column('pod', 'roles_required')
+    op.drop_column('pod', 'data_requests')
+    # ### end Alembic commands ###
+
+
+def downgrade_alltenants():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column('pod', sa.Column('data_requests', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True))
+    op.add_column('pod', sa.Column('roles_required', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True))
+    op.add_column('pod', sa.Column('roles_inherited', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True))
+    op.add_column('pod', sa.Column('data_attached', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True))
+    op.alter_column('pod', 'image', new_column_name='pod_template', existing_type=sqlmodel.sql.sqltypes.AutoString(), nullable=False, server_default="")
+    op.drop_column('pod', 'compute_queue')
+    op.drop_column('pod', 'template')
+    op.drop_column('pod', 'modified_fields')
+    op.drop_column('pod', 'arguments')
+    op.drop_table('templatetag')
+    op.drop_table('template')
+    op.drop_table('image')
+    # ### end Alembic commands ###
diff --git a/alembic/versions/96f8878302f1_init7.py b/alembic/versions/96f8878302f1_init7.py
@@ -0,0 +1,36 @@
+"""init7
+
+Revision ID: 96f8878302f1
+Revises: 4e04cfb7cbbe
+Create Date: 2024-06-03 08:32:29.116348
+
+"""
+from alembic import op
+import sqlalchemy as sa
+import sqlmodel              ##### Required when using sqlmodel and not use sqlalchemy
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = '96f8878302f1'
+down_revision = '4e04cfb7cbbe'
+branch_labels = None
+depends_on = None
+
+
+def upgrade(engine_name):
+    globals()["upgrade_alltenants"]()
+
+
+def downgrade(engine_name):
+    globals()["downgrade_alltenants"]()
+
+
+
+
+def upgrade_alltenants():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table('template')
+
+def downgrade_alltenants():
+    op.create_table('template')
+    # ### end Alembic commands ###
diff --git a/configschema.json b/configschema.json
@@ -80,6 +80,150 @@
         "description": "Maximum GPU allocation pod is allowed to have in resources.gpus.",
         "default": 1
       },
+      "postgres_engine_echo_for_debug": {
+        "type": "boolean",
+        "description": "Whether or not to echo postgres engine queries for debugging.",
+        "default": false
+      },
+      "compute_queues": {
+        "type": "array",
+        "description": "List of node objects available to the spawner using this config.",
+        "required": ["queue_name"],
+        "additionalProperties": false,
+        "items": {
+          "type": "object",
+          "properties": {
+            "queue_name": {
+              "type": "string",
+              "description": "Name of the queue being made available."
+            },
+            "default": {
+              "type": "boolean",
+              "description": "Whether or not this is the default queue to use."
+            },
+            "node_selector": {
+              "type": "string",
+              "description": "Kubernetes spec.nodeSelector required to access node. [A-z][0-9]-_ comma seperated. e.g. 'gpu,v100', 'region,us-west', 'name_id,my_node_id'"
+            },
+            "description": {
+              "type": "string",
+              "description": "Description to be displayed in the UI."
+            },
+            "cpu_info": {
+              "type": "string",
+              "description": "CPU information of the node. e.g. 'Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz'",
+              "default": "Unknown - Not defined in config.yml"
+            },
+            "cpu_architecture": {
+              "type": "string",
+              "description": "Architecture of the node. e.g. 'x86_64' or 'arm64'",
+              "default": "Unknown - Not defined in config.yml"
+            },
+            "max_memory": {
+              "type": "integer",
+              "description": "Maximum memory (RAM) allowed for this queue in GB."
+            },
+            "default_memory_limit": {
+              "type": "integer",
+              "description": "Default memory (RAM) limit for this queue in GB."
+            },
+            "default_memory_request": {
+              "type": "integer",
+              "description": "Default memory (RAM) request for this queue in GB."
+            },
+            "min_memory": {
+              "type": "integer",
+              "description": "Minimum memory (RAM) allowed for this queue in GB.",
+              "default": 256
+            },
+            "max_cpus": {
+              "type": "integer",
+              "description": "Maximum CPU value allowed for this queue."
+            },
+            "default_cpu_limit": {
+              "type": "integer",
+              "description": "Default CPU limit for pods on this queue."
+            },
+            "default_cpu_request": {
+              "type": "integer",
+              "description": "Default CPU request for pods on this queue."
+            },
+            "min_cpus": {
+              "type": "integer",
+              "description": "Minimum CPU value allowed for this queue.",
+              "default": 250
+            },
+            "tolerations": {
+              "type": "array",
+              "description": "Kubernetes spec.tolerations required to tolerate the node.",
+              "required": ["key"],
+              "additionalProperties": false,
+              "items": {
+                "type": "object",
+                "properties": {
+                  "key": {
+                    "type": "string",
+                    "description": "Key of the toleration."
+                  },
+                  "operator": {
+                    "type": "string",
+                    "description": "Operator of the toleration."
+                  },
+                  "value": {
+                    "type": "string",
+                    "description": "Value of the toleration."
+                  },
+                  "effect": {
+                    "type": "string",
+                    "description": "Effect of the toleration."
+                  }
+                }
+              }
+            },
+            "gpu_resources": {
+              "type": "array",
+              "description": "List of GPU resources available to the spawner using this config.",
+              "required": ["device_name", "activation_resource"],
+              "additionalProperties": false,
+              "items": {
+                "type": "object",
+                "properties": {
+                  "gpu_name": {
+                    "type": "string",
+                    "description": "Colloquial name for the GPU device. e.g. 'NVIDIA Tesla V100-SXM2-16GB' or 'NVIDIA Tesla T4'"
+                  },
+                  "description": {
+                    "type": "string",
+                    "description": "Description to be displayed in the UI for the device."
+                  },
+                  "brand": {
+                    "type": "string",
+                    "description": "Brand of the GPU device. e.g. 'NVIDIA' or 'AMD'"
+                  },
+                  "vram": {
+                    "type": "integer",
+                    "description": "Amount of VRAM on the device in GB."
+                  },
+                  "activation_resource": {
+                    "type": "string",
+                    "description": "Activation resource for the GPU device. What you put in K8 'spec.containers[X].resources.requests' to activate this GPU. e.g. 'nvidia.com/gpu' or 'nvidia.com/gpu.shared"
+                  },
+                  "default_gpu_request": {
+                    "type": "integer",
+                    "description": "Default int a user can request for this device.",
+                    "default": 1
+                  },
+                  "max_gpu_request": {
+                    "type": "integer",
+                    "description": "Maximum gpus a user can request for this device. Time-sliced GPUs should say 1 unless they work different, if so, contact admin.",
+                    "default": 1
+                  }
+                }
+              }
+            }
+          }
+        }
+      },
       "spawner_host_id": {
         "type": "integer",
         "description": "Unique host_id for worker host. Each host should have at least one spawner and health check worker."

diff --git a/deploymentTemplate/api.yml b/deploymentTemplate/api.yml
@@ -20,8 +20,8 @@ spec:
 #DEV        args: ["/home/tapis/entry.sh & jupyter lab service --ip=0.0.0.0 --allow-root --no-browser --ServerApp.token=''"]
         resources:
           requests:
-            cpu: ".5"
-            memory: "1G"
+            cpu: ".4"
+            memory: ".4G"
           limits:
             cpu: "3"
             memory: "3G"