diff --git a/.gitignore b/.gitignore index b8c321d6..b199849f 100755 --- a/.gitignore +++ b/.gitignore @@ -42,4 +42,5 @@ secret.yaml *scheduler-deployment.yaml *outputs-processor-deployment.yaml -*google-creds.json \ No newline at end of file +*google-creds.json +*cs-config.yaml \ No newline at end of file diff --git a/workers/cs-config.yaml b/workers/cs-config.yaml deleted file mode 100644 index 79015a0d..00000000 --- a/workers/cs-config.yaml +++ /dev/null @@ -1,4 +0,0 @@ -CS_URL: null -CS_API_TOKEN: null -PROJECT: null -TAG: null diff --git a/workers/cs_workers/cli.py b/workers/cs_workers/cli.py index d160d1da..617e19a7 100644 --- a/workers/cs_workers/cli.py +++ b/workers/cs_workers/cli.py @@ -15,14 +15,16 @@ TAG = os.environ.get("TAG", "") -PROJECT = os.environ.get("PROJECT", "cs-workers-dev") +PROJECT = os.environ.get("PROJECT") CS_URL = os.environ.get("CS_URL", None) +BUCKET = os.environ.get("BUCKET") defaults = dict( TAG=datetime.datetime.now().strftime("%Y-%m-%d"), - PROJECT="cs-workers-dev", + PROJECT=None, CS_URL=None, CS_API_TOKEN=None, + BUCKET=None, ) @@ -36,7 +38,7 @@ def load_env(): else: user_config = {} - for var in ["TAG", "PROJECT", "CS_URL", "CS_API_TOKEN"]: + for var in ["TAG", "PROJECT", "CS_URL", "CS_API_TOKEN", "BUCKET"]: if os.environ.get(var): config[var] = os.environ.get(var) elif user_config.get(var): @@ -49,6 +51,7 @@ def cli(): parser = argparse.ArgumentParser(description="C/S Workers CLI") parser.add_argument("--tag", required=False, default=config["TAG"]) parser.add_argument("--project", required=False, default=config["PROJECT"]) + parser.add_argument("--bucket", required=False, default=config["BUCKET"]) parser.add_argument("--cs-url", required=False, default=config["CS_URL"]) parser.add_argument( "--cs-api-token", required=False, default=config["CS_API_TOKEN"] diff --git a/workers/cs_workers/models/clients/job.py b/workers/cs_workers/models/clients/job.py index 9b4915a2..38741f08 100644 --- a/workers/cs_workers/models/clients/job.py +++ b/workers/cs_workers/models/clients/job.py @@ -61,6 +61,7 @@ def env(self, owner, title, config): ] for sec in [ "CS_URL", + "BUCKET", "REDIS_HOST", "REDIS_PORT", "REDIS_EXECUTOR_PW", @@ -143,7 +144,7 @@ def save_job_kwargs(self, job_id, job_kwargs): self.rclient.set(job_id, json.dumps(job_kwargs)) def create(self): - return self.api_client.create_namespaced_job(body=self.job, namespace="prod") + return self.api_client.create_namespaced_job(body=self.job, namespace="default") def delete(self): return self.api_client.delete_namespaced_job( diff --git a/workers/cs_workers/models/manage.py b/workers/cs_workers/models/manage.py index 71c7a22c..3b90a68a 100644 --- a/workers/cs_workers/models/manage.py +++ b/workers/cs_workers/models/manage.py @@ -38,11 +38,12 @@ def __init__( tag, models=None, base_branch="origin/master", - cs_url=os.environ.get("CS_URL"), + cs_url=None, cs_api_token=None, kubernetes_target=None, use_kind=False, staging_tag=None, + use_latest_tag=False, cr="gcr.io", ignore_ci_errors=False, quiet=False, @@ -61,6 +62,7 @@ def __init__( self.use_kind = use_kind self.staging_tag = staging_tag + self.use_latest_tag = use_latest_tag self.ignore_ci_errors = ignore_ci_errors @@ -200,9 +202,17 @@ def push_app_image(self, app): img_name = f"{safeowner}_{safetitle}_tasks" if self.use_kind: cmd_prefix = "kind load docker-image --name cs --nodes cs-worker2" + elif self.use_latest_tag: + raise Exception("Unable to push latest tag for use outside of kind.") else: cmd_prefix = "docker push" - run(f"{cmd_prefix} {self.cr}/{self.project}/{img_name}:{self.tag}") + + if self.use_latest_tag: + tag = self.get_latest_tag(app) + else: + tag = self.tag + + run(f"{cmd_prefix} {self.cr}/{self.project}/{img_name}:{tag}") def stage_app(self, app): resp = httpx.post( @@ -248,7 +258,7 @@ def write_secrets(self, app): secret_config["stringData"][name] = value if not secret_config["stringData"]: - return + secret_config["stringData"] = dict() if self.kubernetes_target == "-": sys.stdout.write(yaml.dump(secret_config)) @@ -272,10 +282,15 @@ def _write_api_task(self, app): container_config = deployment["spec"]["template"]["spec"]["containers"][0] + if self.use_latest_tag: + tag = self.get_latest_tag(app) + else: + tag = self.tag + container_config.update( { "name": name, - "image": f"{self.cr}/{self.project}/{safeowner}_{safetitle}_tasks:{self.tag}", + "image": f"{self.cr}/{self.project}/{safeowner}_{safetitle}_tasks:{tag}", "command": ["csw", "api-task", "--start"], } ) @@ -332,6 +347,16 @@ def cs_api_token(self): self._cs_api_token = svc_secrets.get_secret("CS_API_TOKEN") return self._cs_api_token + def get_latest_tag(self, app): + resp = httpx.get( + f"{self.config.cs_url}/publish/api/{app['owner']}/{app['title']}/deployments/", + headers={"Authorization": f"Token {self.cs_api_token}"}, + ) + assert ( + resp.status_code == 200 + ), f"Got: {resp.url} {resp.status_code} {resp.text}" + return resp.json()["latest_tag"] + def build(args: argparse.Namespace): manager = Manager( @@ -370,6 +395,7 @@ def push(args: argparse.Namespace): cr=args.cr, cs_api_token=getattr(args, "cs_api_token", None), ignore_ci_errors=args.ignore_ci_errors, + use_latest_tag=args.use_latest_tag, ) manager.push() @@ -385,6 +411,7 @@ def config(args: argparse.Namespace): cr=args.cr, cs_api_token=getattr(args, "cs_api_token", None), ignore_ci_errors=args.ignore_ci_errors, + use_latest_tag=args.use_latest_tag, ) manager.write_app_config() @@ -433,10 +460,11 @@ def cli(subparsers: argparse._SubParsersAction): push_parser = model_subparsers.add_parser("push") push_parser.add_argument("--use-kind", action="store_true") - push_parser.add_argument("--latest-tag", action="store_true") + push_parser.add_argument("--use-latest-tag", action="store_true") push_parser.set_defaults(func=push) config_parser = model_subparsers.add_parser("config") + config_parser.add_argument("--use-latest-tag", action="store_true") config_parser.add_argument("--out", "-o", default=None) config_parser.set_defaults(func=config) diff --git a/workers/cs_workers/models/secrets.py b/workers/cs_workers/models/secrets.py index fc9182fd..2a0034bd 100644 --- a/workers/cs_workers/models/secrets.py +++ b/workers/cs_workers/models/secrets.py @@ -5,8 +5,6 @@ from cs_workers.utils import clean from cs_workers import secrets -PROJECT = os.environ.get("PROJECT", "cs-workers-dev") - class ModelSecrets(secrets.Secrets): def __init__(self, owner=None, title=None, name=None, project=None): diff --git a/workers/cs_workers/services/manage.py b/workers/cs_workers/services/manage.py index 78d691c6..380ddd58 100644 --- a/workers/cs_workers/services/manage.py +++ b/workers/cs_workers/services/manage.py @@ -72,6 +72,7 @@ def __init__( self, tag, project, + bucket=None, kubernetes_target="kubernetes/", use_kind=False, cs_url=None, @@ -79,6 +80,7 @@ def __init__( ): self.tag = tag self.project = project + self.bucket = bucket self.use_kind = use_kind self.cs_url = cs_url self._cs_api_token = cs_api_token @@ -163,6 +165,8 @@ def config(self): "scheduler-RBAC.yaml", "outputs-processor-Service.yaml", "redis-master-Service.yaml", + "job-cleanup-Deployment.yaml", + "job-cleanup-RBAC.yaml", ] for filename in config_filenames: with open(self.templates_dir / "services" / f"{filename}", "r") as f: @@ -220,9 +224,15 @@ def write_redis_deployment(self): self.write_config("redis-master-Deployment.yaml", deployment) def write_secret(self): + assert self.bucket + assert self.cs_url + assert self.cs_api_token + assert self.project secrets = copy.deepcopy(self.secret_template) secrets["stringData"]["CS_URL"] = self.cs_url secrets["stringData"]["CS_API_TOKEN"] = self.cs_api_token + secrets["stringData"]["BUCKET"] = self.bucket + secrets["stringData"]["PROJECT"] = self.project redis_secrets = self.redis_secrets() for name, sec in redis_secrets.items(): if sec is not None: @@ -281,6 +291,7 @@ def manager_from_args(args: argparse.Namespace): return Manager( tag=args.tag, project=args.project, + bucket=args.bucket, kubernetes_target=getattr(args, "out", None), use_kind=getattr(args, "use_kind", None), cs_url=getattr(args, "cs_url", None), diff --git a/workers/cs_workers/services/outputs_processor.py b/workers/cs_workers/services/outputs_processor.py index 42889709..113b2500 100644 --- a/workers/cs_workers/services/outputs_processor.py +++ b/workers/cs_workers/services/outputs_processor.py @@ -12,6 +12,7 @@ CS_URL = os.environ.get("CS_URL") CS_API_TOKEN = os.environ.get("CS_API_TOKEN") +BUCKET = os.environ.get("BUCKET") async def write(task_id, outputs): @@ -54,6 +55,7 @@ async def post(self): def get_app(): + assert CS_URL and CS_API_TOKEN and BUCKET return tornado.web.Application( [(r"/write/", Write), (r"/push/", Push)], debug=True, autoreload=True ) diff --git a/workers/cs_workers/services/scheduler.py b/workers/cs_workers/services/scheduler.py index e405d799..fd4758a5 100644 --- a/workers/cs_workers/services/scheduler.py +++ b/workers/cs_workers/services/scheduler.py @@ -15,7 +15,7 @@ CS_URL = os.environ.get("CS_URL") - +PROJECT = os.environ.get("PROJECT") redis_conn = dict( username="scheduler", @@ -73,7 +73,7 @@ async def post(self, owner, title): elif task_name == "sim": tag = payload["tag"] client = job.Job( - "cs-workers-dev", + PROJECT, owner, title, tag=tag, @@ -104,8 +104,9 @@ def post(self): def get_app(): + assert PROJECT and CS_URL rclient = redis.Redis(**redis_conn) - config = ModelConfig("cs-workers-dev", cs_url=CS_URL, rclient=rclient) + config = ModelConfig(PROJECT, cs_url=CS_URL, rclient=rclient) config.set_projects() return tornado.web.Application( [ diff --git a/workers/cs_workers/templates/secret.template.yaml b/workers/cs_workers/templates/secret.template.yaml index ce97cb0b..5ec4d604 100644 --- a/workers/cs_workers/templates/secret.template.yaml +++ b/workers/cs_workers/templates/secret.template.yaml @@ -4,9 +4,8 @@ metadata: name: worker-secret type: Opaque stringData: - # CS_URL: https://dev.compute.studio - CS_URL: http://hdoupe.ngrok.io - BUCKET: cs-outputs-dev + CS_URL: "" + BUCKET: "" OUTPUTS_VERSION: "v1" REDIS_HOST: redis-master REDIS_DB: "" diff --git a/workers/cs_workers/templates/services/job-cleanup-Deployment.yaml b/workers/cs_workers/templates/services/job-cleanup-Deployment.yaml new file mode 100644 index 00000000..72f9325a --- /dev/null +++ b/workers/cs_workers/templates/services/job-cleanup-Deployment.yaml @@ -0,0 +1,17 @@ +apiVersion: batch/v1beta1 +kind: CronJob +metadata: + name: job-cleanup +spec: + schedule: "*/30 * * * *" + successfulJobsHistoryLimit: 0 + jobTemplate: + spec: + template: + spec: + serviceAccountName: job-cleanup + containers: + - name: kubectl-container + image: bitnami/kubectl:latest + command: ["sh", "-c", "kubectl delete jobs --field-selector status.successful=1"] + restartPolicy: Never diff --git a/workers/cs_workers/templates/services/job-cleanup-RBAC.yaml b/workers/cs_workers/templates/services/job-cleanup-RBAC.yaml new file mode 100644 index 00000000..61ddb5e2 --- /dev/null +++ b/workers/cs_workers/templates/services/job-cleanup-RBAC.yaml @@ -0,0 +1,28 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: job-cleanup +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: job-remove + namespace: default +rules: + - apiGroups: ["batch", "extensions"] + resources: ["jobs"] + verbs: ["get", "list", "watch", "delete"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: job-remove + namespace: default +subjects: + - kind: ServiceAccount + name: job-cleanup + namespace: default +roleRef: + kind: Role + name: job-remove + apiGroup: rbac.authorization.k8s.io diff --git a/workers/cs_workers/templates/services/scheduler-Deployment.template.yaml b/workers/cs_workers/templates/services/scheduler-Deployment.template.yaml index e95123ce..0f9b8f77 100755 --- a/workers/cs_workers/templates/services/scheduler-Deployment.template.yaml +++ b/workers/cs_workers/templates/services/scheduler-Deployment.template.yaml @@ -24,6 +24,11 @@ spec: secretKeyRef: name: worker-secret key: CS_URL + - name: PROJECT + valueFrom: + secretKeyRef: + name: worker-secret + key: PROJECT - name: REDIS_HOST valueFrom: secretKeyRef: diff --git a/workers/cs_workers/templates/services/scheduler-RBAC.yaml b/workers/cs_workers/templates/services/scheduler-RBAC.yaml index 9651cb96..25ea334f 100644 --- a/workers/cs_workers/templates/services/scheduler-RBAC.yaml +++ b/workers/cs_workers/templates/services/scheduler-RBAC.yaml @@ -7,7 +7,7 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: name: job-admin - namespace: prod + namespace: default rules: - apiGroups: ["batch", "extensions"] resources: ["jobs"] @@ -17,11 +17,11 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: name: job-admin - namespace: prod + namespace: default subjects: - kind: ServiceAccount name: scheduler - namespace: prod + namespace: default roleRef: kind: Role name: job-admin