From 1ae95e943ce4cdf448ff777170d53b8892365501 Mon Sep 17 00:00:00 2001
From: bio-boris <bio-boris@users.noreply.github.com>
Date: Wed, 23 Mar 2022 15:42:02 -0500
Subject: [PATCH 01/18] Update RELEASE_NOTES.txt

---
 RELEASE_NOTES.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/RELEASE_NOTES.txt b/RELEASE_NOTES.txt
index 3e5ea29..d74dea5 100644
--- a/RELEASE_NOTES.txt
+++ b/RELEASE_NOTES.txt
@@ -3,7 +3,7 @@
 * Add GHA Actions
 * Move from quay to GHCR.io
 * New Base Image from  HTCondor Dockerhub
-* Update security policies for HTCondor Version 9 Series
+* Update security policies for HTCondor Version 9 Series 
 
 1.0.8.4
 =======

From 57bf894dcf1f546b6204e5b1bc079f28752f553a Mon Sep 17 00:00:00 2001
From: bio-boris <bio-boris@users.noreply.github.com>
Date: Wed, 23 Mar 2022 15:44:33 -0500
Subject: [PATCH 02/18] Update RELEASE_NOTES.txt

---
 RELEASE_NOTES.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/RELEASE_NOTES.txt b/RELEASE_NOTES.txt
index d74dea5..3e5ea29 100644
--- a/RELEASE_NOTES.txt
+++ b/RELEASE_NOTES.txt
@@ -3,7 +3,7 @@
 * Add GHA Actions
 * Move from quay to GHCR.io
 * New Base Image from  HTCondor Dockerhub
-* Update security policies for HTCondor Version 9 Series 
+* Update security policies for HTCondor Version 9 Series
 
 1.0.8.4
 =======

From b528df9689bc1a83a020e5f768c8e77b53ad1a9d Mon Sep 17 00:00:00 2001
From: bio-boris <bio-boris@users.noreply.github.com>
Date: Wed, 23 Mar 2022 16:45:44 -0500
Subject: [PATCH 03/18] Update start-condor.sh

---
 deployment/bin/start-condor.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deployment/bin/start-condor.sh b/deployment/bin/start-condor.sh
index 59428d2..d2d7ac2 100755
--- a/deployment/bin/start-condor.sh
+++ b/deployment/bin/start-condor.sh
@@ -9,7 +9,7 @@ fi
 
 if [ "$POOL_PASSWORD" ] ; then
     /usr/sbin/condor_store_cred -p "$POOL_PASSWORD" -f "$(condor_config_val SEC_PASSWORD_FILE)"
-    echo "$POOL_PASSWORD" | condor_store_cred -c add
+    condor_store_cred -p "$POOL_PASSWORD" -c add
     umask 0077; condor_token_create -identity condor@mypool > /etc/condor/tokens.d/condor@mypool
 fi
 

From 7e7676c8945545aaaf592bb3450999a3383aba6a Mon Sep 17 00:00:00 2001
From: bio-boris <bio-boris@users.noreply.github.com>
Date: Fri, 25 Mar 2022 10:13:07 -0500
Subject: [PATCH 04/18] Update Dockerfile

---
 Dockerfile | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Dockerfile b/Dockerfile
index 03b2de4..7bac474 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -37,6 +37,9 @@ RUN rm -rf /var/cache/yum
 
 COPY --chown=kbase deployment/ /kb/deployment/
 
+# Install dependencies for JobRunner
+ENV PATH /miniconda/bin:$PATH
+RUN wget https://raw.githubusercontent.com/kbase/JobRunner/master/requirements.txt && pip install -r requirements.txt && rm requirements.txt
 RUN /kb/deployment/bin/install_python_dependencies.sh
 
 # The BUILD_DATE value seem to bust the docker cache when the timestamp changes, move to

From 97c95704d213230a93bcf4167809310de03ce1b8 Mon Sep 17 00:00:00 2001
From: bio-boris <bio-boris@users.noreply.github.com>
Date: Fri, 25 Mar 2022 10:43:59 -0500
Subject: [PATCH 05/18] Update build_prodrc_pr.yaml

---
 .github/workflows/build_prodrc_pr.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/build_prodrc_pr.yaml b/.github/workflows/build_prodrc_pr.yaml
index 2e5034e..a548960 100644
--- a/.github/workflows/build_prodrc_pr.yaml
+++ b/.github/workflows/build_prodrc_pr.yaml
@@ -5,6 +5,7 @@ name: Build Prod RC Image
     branches:
     - master
     - main
+    - workflow_dispatch
     types:
     - opened
     - synchronize

From eafc23d63d1339c463cdc8a9032e31662c2dd6cb Mon Sep 17 00:00:00 2001
From: bio-boris <bio-boris@users.noreply.github.com>
Date: Fri, 25 Mar 2022 10:44:55 -0500
Subject: [PATCH 06/18] Update build_test_pr.yaml

---
 .github/workflows/build_test_pr.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/build_test_pr.yaml b/.github/workflows/build_test_pr.yaml
index b6b5328..80673b0 100644
--- a/.github/workflows/build_test_pr.yaml
+++ b/.github/workflows/build_test_pr.yaml
@@ -8,6 +8,7 @@ name: Build Test Image
     - opened
     - synchronize
     - ready_for_review
+  workflow_dispatch:
 jobs:
   docker_build:
     runs-on: ubuntu-latest

From 648b13336c7fb0d4f959c8eb0081b98df02432f1 Mon Sep 17 00:00:00 2001
From: bio-boris <bio-boris@users.noreply.github.com>
Date: Fri, 25 Mar 2022 10:45:33 -0500
Subject: [PATCH 07/18] Update tag_test_latest.yaml

---
 .github/workflows/tag_test_latest.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/tag_test_latest.yaml b/.github/workflows/tag_test_latest.yaml
index d8cac46..5231241 100644
--- a/.github/workflows/tag_test_latest.yaml
+++ b/.github/workflows/tag_test_latest.yaml
@@ -6,6 +6,7 @@ name: Tag Latest Test Image
     - develop
     types:
     - closed
+  workflow_dispatch:
 jobs:
   docker_tag:
     runs-on: ubuntu-latest

From f7103e88c2b1ba35998233afab09aabc2ecc85a0 Mon Sep 17 00:00:00 2001
From: bio-boris <bio-boris@users.noreply.github.com>
Date: Fri, 25 Mar 2022 11:04:39 -0500
Subject: [PATCH 08/18] Update build_prodrc_pr.yaml

---
 .github/workflows/build_prodrc_pr.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build_prodrc_pr.yaml b/.github/workflows/build_prodrc_pr.yaml
index a548960..84f570b 100644
--- a/.github/workflows/build_prodrc_pr.yaml
+++ b/.github/workflows/build_prodrc_pr.yaml
@@ -5,11 +5,11 @@ name: Build Prod RC Image
     branches:
     - master
     - main
-    - workflow_dispatch
     types:
     - opened
     - synchronize
     - ready_for_review
+  workflow_dispatch:
 jobs:
   docker_build:
     runs-on: ubuntu-latest

From 97c5dccc6e28265c6ffd35d7d22f09c9b3b0f984 Mon Sep 17 00:00:00 2001
From: bio-boris <bio-boris@users.noreply.github.com>
Date: Fri, 25 Mar 2022 13:06:00 -0500
Subject: [PATCH 09/18] Update build_test_pr.yaml

---
 .github/workflows/build_test_pr.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/build_test_pr.yaml b/.github/workflows/build_test_pr.yaml
index 80673b0..ee0caf1 100644
--- a/.github/workflows/build_test_pr.yaml
+++ b/.github/workflows/build_test_pr.yaml
@@ -9,6 +9,7 @@ name: Build Test Image
     - synchronize
     - ready_for_review
   workflow_dispatch:
+
 jobs:
   docker_build:
     runs-on: ubuntu-latest

From 89a1927d541e508e5c4333c1a9ed777d745f3782 Mon Sep 17 00:00:00 2001
From: bio-boris <bio-boris@users.noreply.github.com>
Date: Wed, 16 Nov 2022 13:51:15 -0600
Subject: [PATCH 10/18] Create manual-build.yml

---
 .github/workflows/manual-build.yml | 11 +++++++++++
 1 file changed, 11 insertions(+)
 create mode 100644 .github/workflows/manual-build.yml

diff --git a/.github/workflows/manual-build.yml b/.github/workflows/manual-build.yml
new file mode 100644
index 0000000..944f903
--- /dev/null
+++ b/.github/workflows/manual-build.yml
@@ -0,0 +1,11 @@
+---
+name: Manual Build & Push
+on:
+ workflow_dispatch:
+jobs:
+  build-push:
+    uses: kbase/.github/.github/workflows/reusable_build-push.yml@main
+    with:
+      name: '${{ github.event.repository.name }}-develop'
+      tags: br-${{ github.ref_name }}
+    secrets: inherit

From eee4bee40ba331c30e370619c782ebf4a7e36655 Mon Sep 17 00:00:00 2001
From: bio-boris <bio-boris@users.noreply.github.com>
Date: Fri, 4 Aug 2023 15:26:16 -0500
Subject: [PATCH 11/18] DEVOPS-1465 Update health check (#57)

* Create manual-build.yml

* Update health_check.py

* Update Dockerfile

* Update install_python_dependencies.sh

* Update RELEASE_NOTES.txt

---------

Co-authored-by: Boris <bio-boris@github.com>
---
 Dockerfile                                    |  6 +++---
 RELEASE_NOTES.txt                             |  8 ++++++++
 deployment/bin/cron/health_check.py           | 13 +++++++++----
 deployment/bin/install_python_dependencies.sh | 13 ++++++++++---
 4 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 7bac474..b364dee 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,10 +1,10 @@
-FROM htcondor/execute:9.7-el7
+FROM htcondor/execute:lts-el8
 ENV container docker
 
 # Ge$t commonly used utilities
-RUN yum install -y deltarpm
 RUN yum -y update && yum upgrade -y 
-RUN yum -y install -y epel-release wget which git deltarpm gcc libcgroup libcgroup-tools stress-ng tmpwatch
+RUN yum install -y drpm
+RUN yum -y install -y epel-release wget which git gcc libcgroup libcgroup-tools stress-ng tmpwatch
 
 # Install docker binaries 
 RUN yum install -y yum-utils device-mapper-persistent-data lvm2 && yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo && yum install -y docker-ce
diff --git a/RELEASE_NOTES.txt b/RELEASE_NOTES.txt
index 3e5ea29..4b70aa6 100644
--- a/RELEASE_NOTES.txt
+++ b/RELEASE_NOTES.txt
@@ -1,3 +1,11 @@
+1.0.10
+========
+*  Fix health check
+*  Fix dependencies for JobRunner
+*  Pin versions due for Python
+*  Update base image to `htcondor/execute:lts-el8`
+
+
 1.0.9
 =======
 * Add GHA Actions
diff --git a/deployment/bin/cron/health_check.py b/deployment/bin/cron/health_check.py
index bc5c6db..80cebc4 100755
--- a/deployment/bin/cron/health_check.py
+++ b/deployment/bin/cron/health_check.py
@@ -203,8 +203,7 @@ def checkEndpoints():
     Check auth/njs/catalog/ws
     """
 
-    services = {
-        f"{endpoint}/auth": {},
+    post_services = {
         f"{endpoint}/catalog": {
             "method": "Catalog.status",
             "version": "1.1",
@@ -218,10 +217,14 @@ def checkEndpoints():
             "params": [],
         },
     }
+    get_services = {f"{endpoint}/auth": {}}
 
-    for service in services:
+    for service in {**post_services, **get_services}:
         try:
-            response = requests.post(url=service, json=services[service], timeout=30)
+            if service in post_services:
+                response = requests.post(url=service, json=post_services[service], timeout=30)
+            else:
+                response = requests.get(url=service, timeout=30)
             if response.status_code != 200:
                 message = f"{service} is not available."
                 exit_unsuccessfully(message)
@@ -229,6 +232,8 @@ def checkEndpoints():
             message = f"Couldn't reach {service}. {e}"
             exit_unsuccessfully(message)
 
+        
+
 
 def main():
     try:
diff --git a/deployment/bin/install_python_dependencies.sh b/deployment/bin/install_python_dependencies.sh
index 401325c..191437c 100755
--- a/deployment/bin/install_python_dependencies.sh
+++ b/deployment/bin/install_python_dependencies.sh
@@ -1,6 +1,13 @@
 #!/usr/bin/env bash
 
-#Install Python3 Libraries
-#TODO Requirements.txt
+#Install Python3 Libraries for cronjobs and for job runner
+
 source /miniconda/bin/activate
-pip install requests docker slackclient htcondor psutil lockfile
+pip install requests==2.29.0
+pip install docker==6.1.3
+pip install slackclient==2.9.4
+pip install htcondor==10.7.0
+pip install psutil==5.9.5
+pip install lockfile==0.12.2
+pip install sanic==21.9.3
+pip install websockets==10.4

From b9d074e582c82e0c3fe12c4916f69ec3631a4e9a Mon Sep 17 00:00:00 2001
From: bio-boris <bio-boris@users.noreply.github.com>
Date: Wed, 24 Jan 2024 22:12:30 -0600
Subject: [PATCH 12/18] DEVOPS-1593 Update worker cronjob (#58)

* Modify cronjobs

---------

Co-authored-by: Boris <bio-boris@github.com>
---
 .github/workflows/build_prodrc_pr.yaml        |  32 --
 .github/workflows/build_test_pr.yaml          |  29 --
 .github/workflows/pr_build.yml                |  43 ++
 .github/workflows/prod_release.yaml           |  38 --
 .github/workflows/release-main.yml            |  25 ++
 .github/workflows/scripts/build_prodrc_pr.sh  |  17 -
 .github/workflows/scripts/build_test_pr.sh    |  17 -
 .github/workflows/scripts/deploy_tag.sh       |  34 --
 .github/workflows/scripts/prod_release.sh     |  24 -
 .github/workflows/scripts/tag_environments.sh |  22 -
 .github/workflows/scripts/tag_prod_latest.sh  |  12 -
 .github/workflows/scripts/tag_test_latest.sh  |  12 -
 .github/workflows/tag_environments.yaml       |  19 -
 .github/workflows/tag_prod_latest.yaml        |  27 --
 .github/workflows/tag_test_latest.yaml        |  27 --
 Dockerfile                                    |  10 +-
 README.md                                     |  44 +-
 RELEASE_NOTES.txt                             |   9 +
 deployment/README.md                          |   3 -
 .../cron/clients/NarrativeJobServiceClient.py | 416 ------------------
 deployment/bin/cron/clients/baseclient.py     | 311 -------------
 deployment/bin/cron/container_reaper.py       | 229 +++++-----
 deployment/bin/cron/container_reaper_ee2.py   | 306 +++++++------
 .../bin/cron/delete_exited_containers.py      |  21 +-
 deployment/bin/cron/health_check.py           |  77 ++--
 .../bin/cruft/check_abandoned_containers.py   |  90 ----
 .../bin/cruft/check_abandoned_containers.sh   |  33 --
 .../bin/cruft/delete_exited_containers.sh     |  24 -
 .../conf/.templates/cronjobs.config.templ     |  36 +-
 29 files changed, 432 insertions(+), 1555 deletions(-)
 delete mode 100644 .github/workflows/build_prodrc_pr.yaml
 delete mode 100644 .github/workflows/build_test_pr.yaml
 create mode 100644 .github/workflows/pr_build.yml
 delete mode 100644 .github/workflows/prod_release.yaml
 create mode 100644 .github/workflows/release-main.yml
 delete mode 100755 .github/workflows/scripts/build_prodrc_pr.sh
 delete mode 100755 .github/workflows/scripts/build_test_pr.sh
 delete mode 100755 .github/workflows/scripts/deploy_tag.sh
 delete mode 100755 .github/workflows/scripts/prod_release.sh
 delete mode 100755 .github/workflows/scripts/tag_environments.sh
 delete mode 100755 .github/workflows/scripts/tag_prod_latest.sh
 delete mode 100755 .github/workflows/scripts/tag_test_latest.sh
 delete mode 100644 .github/workflows/tag_environments.yaml
 delete mode 100644 .github/workflows/tag_prod_latest.yaml
 delete mode 100644 .github/workflows/tag_test_latest.yaml
 delete mode 100644 deployment/README.md
 delete mode 100644 deployment/bin/cron/clients/NarrativeJobServiceClient.py
 delete mode 100644 deployment/bin/cron/clients/baseclient.py
 delete mode 100644 deployment/bin/cruft/check_abandoned_containers.py
 delete mode 100644 deployment/bin/cruft/check_abandoned_containers.sh
 delete mode 100755 deployment/bin/cruft/delete_exited_containers.sh

diff --git a/.github/workflows/build_prodrc_pr.yaml b/.github/workflows/build_prodrc_pr.yaml
deleted file mode 100644
index 84f570b..0000000
--- a/.github/workflows/build_prodrc_pr.yaml
+++ /dev/null
@@ -1,32 +0,0 @@
----
-name: Build Prod RC Image
-'on':
-  pull_request:
-    branches:
-    - master
-    - main
-    types:
-    - opened
-    - synchronize
-    - ready_for_review
-  workflow_dispatch:
-jobs:
-  docker_build:
-    runs-on: ubuntu-latest
-    steps:
-    - name: Verify merge is develop -> main
-      if: github.head_ref != 'develop'
-      run: echo "Must merge from develop -> main/master"; exit 1
-    - name: Check out GitHub Repo
-      if: github.event.pull_request.draft == false && github.head_ref == 'develop'
-      with:
-        ref: "${{ github.event.pull_request.head.sha }}"
-      uses: actions/checkout@v2
-    - name: Build and Push to Packages
-      if: github.event.pull_request.draft == false && github.head_ref == 'develop'
-      env:
-        PR: "${{ github.event.pull_request.number }}"
-        SHA: "${{ github.event.pull_request.head.sha }}"
-        DOCKER_ACTOR: "${{ secrets.GHCR_USERNAME }}"
-        DOCKER_TOKEN: "${{ secrets.GHCR_TOKEN }}"
-      run: "./.github/workflows/scripts/build_prodrc_pr.sh\n"
diff --git a/.github/workflows/build_test_pr.yaml b/.github/workflows/build_test_pr.yaml
deleted file mode 100644
index ee0caf1..0000000
--- a/.github/workflows/build_test_pr.yaml
+++ /dev/null
@@ -1,29 +0,0 @@
----
-name: Build Test Image
-'on':
-  pull_request:
-    branches:
-    - develop
-    types:
-    - opened
-    - synchronize
-    - ready_for_review
-  workflow_dispatch:
-
-jobs:
-  docker_build:
-    runs-on: ubuntu-latest
-    steps:
-    - name: Check out GitHub Repo
-      if: github.event.pull_request.draft == false
-      with:
-        ref: "${{ github.event.pull_request.head.sha }}"
-      uses: actions/checkout@v2
-    - name: Build and Push to Packages
-      if: github.event.pull_request.draft == false
-      env:
-        PR: "${{ github.event.pull_request.number }}"
-        SHA: "${{ github.event.pull_request.head.sha }}"
-        DOCKER_ACTOR: "${{ secrets.GHCR_USERNAME }}"
-        DOCKER_TOKEN: "${{ secrets.GHCR_TOKEN }}"
-      run: "./.github/workflows/scripts/build_test_pr.sh\n"
diff --git a/.github/workflows/pr_build.yml b/.github/workflows/pr_build.yml
new file mode 100644
index 0000000..0fa1c46
--- /dev/null
+++ b/.github/workflows/pr_build.yml
@@ -0,0 +1,43 @@
+---
+name: Pull Request Build, Tag, & Push
+on:
+  pull_request:
+    branches:
+      - develop
+      - main
+      - master
+    types:
+      - opened
+      - reopened
+      - synchronize
+      - closed
+jobs:
+  build-develop-open:
+    if: github.base_ref == 'develop' && github.event.pull_request.merged == false
+    uses: kbase/.github/.github/workflows/reusable_build.yml@main
+    secrets: inherit
+  build-develop-merge:
+    if: github.base_ref == 'develop' && github.event.pull_request.merged == true
+    uses: kbase/.github/.github/workflows/reusable_build-push.yml@main
+    with:
+      name: '${{ github.event.repository.name }}-develop'
+      tags: pr-${{ github.event.number }},latest
+    secrets: inherit
+  build-main-open:
+    if: (github.base_ref == 'main' || github.base_ref == 'master') && github.event.pull_request.merged == false
+    uses: kbase/.github/.github/workflows/reusable_build-push.yml@main
+    with:
+      name: '${{ github.event.repository.name }}'
+      tags: pr-${{ github.event.number }}
+    secrets: inherit
+  build-main-merge:
+    if: (github.base_ref == 'main' || github.base_ref == 'master') && github.event.pull_request.merged == true
+    uses: kbase/.github/.github/workflows/reusable_build-push.yml@main
+    with:
+      name: '${{ github.event.repository.name }}'
+      tags: pr-${{ github.event.number }},latest-rc
+    secrets: inherit
+  trivy-scans:
+    if: (github.base_ref == 'develop' || github.base_ref == 'main' || github.base_ref == 'master' ) && github.event.pull_request.merged == false
+    uses: kbase/.github/.github/workflows/reusable_trivy-scans.yml@main
+    secrets: inherit
diff --git a/.github/workflows/prod_release.yaml b/.github/workflows/prod_release.yaml
deleted file mode 100644
index ffa1453..0000000
--- a/.github/workflows/prod_release.yaml
+++ /dev/null
@@ -1,38 +0,0 @@
----
-name: Publish Release Image
-'on':
-  release:
-    branches:
-    - main
-    - master
-    types:
-    - published
-jobs:
-  docker_build:
-    runs-on: ubuntu-latest
-    steps:
-    - name: Check Tag
-      id: check-tag
-      run: |-
-        if [[ ${{ github.ref_name }} =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
-          echo ::set-output name=match::true
-        fi
-    - name: Report SemVer Check
-      if: steps.check-tag.outputs.match != 'true'
-      run: echo "Release version must follow semantic naming (e.g. 1.0.2)"; exit 1
-    - name: Check Source Branch
-      if: github.event.release.target_commitish != 'master' && github.event.release.target_commitish != 'main'
-      run: echo "Releases must be built from master/main branch"; exit 1
-    - name: Check out GitHub Repo
-      with:
-        ref: "${{ github.event.pull_request.head.sha }}"
-      uses: actions/checkout@v2
-    - name: Build and Push to Packages
-      env:
-        ISH: "${{ github.event.release.target_commitish }}"
-        PR: "${{ github.event.pull_request.number }}"
-        SHA: "${{ github.event.pull_request.head.sha }}"
-        VER: "${{ github.event.release.tag_name }}"
-        DOCKER_ACTOR: "${{ secrets.GHCR_USERNAME }}"
-        DOCKER_TOKEN: "${{ secrets.GHCR_TOKEN }}"
-      run: "./.github/workflows/scripts/prod_release.sh\n"
diff --git a/.github/workflows/release-main.yml b/.github/workflows/release-main.yml
new file mode 100644
index 0000000..a254678
--- /dev/null
+++ b/.github/workflows/release-main.yml
@@ -0,0 +1,25 @@
+---
+name: Release - Build & Push Image
+on:
+  release:
+    branches:
+      - main
+      - master
+    types: [ published ]
+jobs:
+  check-source-branch:
+    uses: kbase/.github/.github/workflows/reusable_validate-branch.yml@main
+    with:
+      build_branch: '${{ github.event.release.target_commitish }}'
+  validate-release-tag:
+    needs: check-source-branch
+    uses: kbase/.github/.github/workflows/reusable_validate-release-tag.yml@main
+    with:
+      release_tag: '${{ github.event.release.tag_name }}'
+  build-push:
+    needs: validate-release-tag
+    uses: kbase/.github/.github/workflows/reusable_build-push.yml@main
+    with:
+      name: '${{ github.event.repository.name }}'
+      tags: '${{ github.event.release.tag_name }},latest'
+    secrets: inherit
diff --git a/.github/workflows/scripts/build_prodrc_pr.sh b/.github/workflows/scripts/build_prodrc_pr.sh
deleted file mode 100755
index 4c7bdf2..0000000
--- a/.github/workflows/scripts/build_prodrc_pr.sh
+++ /dev/null
@@ -1,17 +0,0 @@
-#! /usr/bin/env bash
-
-export MY_ORG=$(echo "${GITHUB_REPOSITORY}" | awk -F / '{print $1}')
-export MY_APP=$(echo "${GITHUB_REPOSITORY}" | awk -F / '{print $2}')
-export DATE=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
-export BUILD_DATE=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
-export COMMIT=$(echo "$SHA" | cut -c -7)
-
-echo "Branch is:" ${GITHUB_HEAD_REF}
-docker login -u "$DOCKER_ACTOR" -p "$DOCKER_TOKEN" ghcr.io
-docker build --build-arg BUILD_DATE="$DATE" \
-             --build-arg COMMIT="$COMMIT" \
-             --build-arg BRANCH="$GITHUB_HEAD_REF" \
-             --build-arg PULL_REQUEST="$PR" \
-             --label us.kbase.vcs-pull-req="$PR" \
-             -t ghcr.io/"$MY_ORG"/"$MY_APP":"pr-""$PR" .
-docker push ghcr.io/"$MY_ORG"/"$MY_APP":"pr-""$PR"
diff --git a/.github/workflows/scripts/build_test_pr.sh b/.github/workflows/scripts/build_test_pr.sh
deleted file mode 100755
index 546b1b4..0000000
--- a/.github/workflows/scripts/build_test_pr.sh
+++ /dev/null
@@ -1,17 +0,0 @@
-#! /usr/bin/env bash
-
-export MY_ORG=$(echo "${GITHUB_REPOSITORY}" | awk -F / '{print $1}')
-export MY_APP=$(echo $(echo "${GITHUB_REPOSITORY}" | awk -F / '{print $2}')"-develop")
-export DATE=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
-export BUILD_DATE=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
-export COMMIT=$(echo "$SHA" | cut -c -7)
-
-echo $DOCKER_TOKEN | docker login ghcr.io -u $DOCKER_ACTOR --password-stdin
-docker build --build-arg BUILD_DATE="$DATE" \
-             --build-arg COMMIT="$COMMIT" \
-             --build-arg BRANCH="$GITHUB_HEAD_REF" \
-             --build-arg PULL_REQUEST="$PR" \
-             --label us.kbase.vcs-pull-req="$PR" \
-             -t ghcr.io/"$MY_ORG"/"$MY_APP":"pr-""$PR" .
-docker push ghcr.io/"$MY_ORG"/"$MY_APP":"pr-""$PR"
-	
\ No newline at end of file
diff --git a/.github/workflows/scripts/deploy_tag.sh b/.github/workflows/scripts/deploy_tag.sh
deleted file mode 100755
index 5fb928a..0000000
--- a/.github/workflows/scripts/deploy_tag.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-#! /usr/bin/env bash
-
-# Usage: ./deploy_tag.sh -e TARGET -o ORG -r REPO -s DEV_PROD -t IMAGE_TAG
-#
-# Example 1: ./deploy_tag.sh -o "kbase" -r "narrative-traefiker" -s "dev" -t "pr-9001" -e "ci"
-# Example 2: ./deploy_tag.sh -o "kbase" -r "narrative" -s "prod" -t "latest" -e "next"
-#
-# Where:
-#   -o ORG is the organization (`kbase`, `kbaseapps`, etc.)
-#   -r REPO is the repository (e.g. `narrative`)
-#   -s DEV_PROD determines whether to pull the development {APPNAME}-develop or production {APPNAME} image.
-#   -t IMAGE_TAG is the *current* Docker image tag, typically `pr-#` or `latest`
-#   -e TARGET is one of: `appdsshev`, `ci`, or `next`
-#
-# Be sure to set $TOKEN first!
-# See: https://docs.github.com/en/packages/getting-started-with-github-container-registry/migrating-to-github-container-registry-for-docker-images#authenticating-with-the-container-registry
-
-
-while getopts e:o:r:s:t: option
-  do
-   case "${option}"
-    in
-      e) TARGET=${OPTARG};;
-      o) ORG=${OPTARG};;
-      r) REPO=${OPTARG};;
-      s) DEV_PROD=${OPTARG};;
-      t) IMAGE_TAG=${OPTARG};;
-    esac
-done
-
-curl -H "Authorization: token $TOKEN" \
-    -H 'Accept: application/vnd.github.everest-preview+json' \
-    "https://api.github.com/repos/$ORG/$REPO/dispatches" \
-    -d '{"event_type":"Tag '"$DEV_PROD"' '"$IMAGE_TAG"' for '"$TARGET"'", "client_payload": {"image_tag": "'"$IMAGE_TAG"'","target": "'"$TARGET"'","dev_prod": "'"$DEV_PROD"'"}}'
diff --git a/.github/workflows/scripts/prod_release.sh b/.github/workflows/scripts/prod_release.sh
deleted file mode 100755
index 46d008c..0000000
--- a/.github/workflows/scripts/prod_release.sh
+++ /dev/null
@@ -1,24 +0,0 @@
-#! /usr/bin/env bash
-
-export MY_ORG=$(echo "${GITHUB_REPOSITORY}" | awk -F / '{print $1}')
-export MY_APP=$(echo "${GITHUB_REPOSITORY}" | awk -F / '{print $2}')
-export DATE=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
-export BUILD_DATE=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
-export COMMIT=$(echo "$SHA" | cut -c -7)
-
-echo "ISH is:" $ISH
-echo "GITHUB_REF is:" $GITHUB_REF
-echo "HEAD_REF is:" $GITHUB_HEAD_REF
-echo "BASE_REF is:" $GITHUB_BASE_REF
-echo "Release is:" $GITHUB_REF_NAME
-echo $DOCKER_TOKEN | docker login ghcr.io -u $DOCKER_ACTOR --password-stdin
-docker build --build-arg BUILD_DATE="$DATE" \
-             --build-arg COMMIT="$COMMIT" \
-             --build-arg BRANCH="$GITHUB_HEAD_REF" \
-             --build-arg PULL_REQUEST="$PR" \
-             --build-arg VERSION="$VER"  \
-             --label us.kbase.vcs-pull-req="$PR" \
-             -t ghcr.io/"$MY_ORG"/"$MY_APP":"$VER" \
-             -t ghcr.io/"$MY_ORG"/"$MY_APP":"latest" .
-docker push ghcr.io/"$MY_ORG"/"$MY_APP":"$VER"
-docker push ghcr.io/"$MY_ORG"/"$MY_APP":"latest"
diff --git a/.github/workflows/scripts/tag_environments.sh b/.github/workflows/scripts/tag_environments.sh
deleted file mode 100755
index b39732a..0000000
--- a/.github/workflows/scripts/tag_environments.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-
-#! /usr/bin/env bash
-# Add vars for PR & environments to yaml, as called from external script
-
-export MY_ORG=$(echo "${GITHUB_REPOSITORY}" | awk -F / '{print $1}')
-export MY_APP=$(echo "${GITHUB_REPOSITORY}" | awk -F / '{print $2}')
-export DATE=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
-export BUILD_DATE=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
-export COMMIT=$(echo "$SHA" | cut -c -7)
-
-if [ $DEV_PROD = "dev" ] || [ $DEV_PROD = "develop" ]
-then
-  IMAGE=$MY_APP"-develop"
-else
-  IMAGE=$MY_APP
-fi
-
-echo "Dev or Prod:" $DEV_PROD
-docker login -u "$DOCKER_ACTOR" -p "$DOCKER_TOKEN" ghcr.io
-docker pull ghcr.io/"$MY_ORG"/"$IMAGE":"$IMAGE_TAG"
-docker tag ghcr.io/"$MY_ORG"/"$IMAGE":"$IMAGE_TAG" ghcr.io/"$MY_ORG"/"$IMAGE":"$TARGET"
-docker push ghcr.io/"$MY_ORG"/"$IMAGE":"$TARGET"
diff --git a/.github/workflows/scripts/tag_prod_latest.sh b/.github/workflows/scripts/tag_prod_latest.sh
deleted file mode 100755
index c3c4225..0000000
--- a/.github/workflows/scripts/tag_prod_latest.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#! /usr/bin/env bash
-
-export MY_ORG=$(echo "${GITHUB_REPOSITORY}" | awk -F / '{print $1}')
-export MY_APP=$(echo "${GITHUB_REPOSITORY}" | awk -F / '{print $2}')
-export DATE=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
-export BUILD_DATE=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
-export COMMIT=$(echo "$SHA" | cut -c -7)
-
-docker login -u "$DOCKER_ACTOR" -p "$DOCKER_TOKEN" ghcr.io
-docker pull ghcr.io/"$MY_ORG"/"$MY_APP":"pr-""$PR"
-docker tag ghcr.io/"$MY_ORG"/"$MY_APP":"pr-""$PR" ghcr.io/"$MY_ORG"/"$MY_APP":"latest-rc"
-docker push ghcr.io/"$MY_ORG"/"$MY_APP":"latest-rc"
diff --git a/.github/workflows/scripts/tag_test_latest.sh b/.github/workflows/scripts/tag_test_latest.sh
deleted file mode 100755
index c0dc504..0000000
--- a/.github/workflows/scripts/tag_test_latest.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#! /usr/bin/env bash
-
-export MY_ORG=$(echo "${GITHUB_REPOSITORY}" | awk -F / '{print $1}')
-export MY_APP=$(echo $(echo "${GITHUB_REPOSITORY}" | awk -F / '{print $2}')"-develop")
-export DATE=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
-export BUILD_DATE=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
-export COMMIT=$(echo "$SHA" | cut -c -7)
-
-docker login -u "$DOCKER_ACTOR" -p "$DOCKER_TOKEN" ghcr.io
-docker pull ghcr.io/"$MY_ORG"/"$MY_APP":"pr-""$PR"
-docker tag ghcr.io/"$MY_ORG"/"$MY_APP":"pr-""$PR" ghcr.io/"$MY_ORG"/"$MY_APP":"latest"
-docker push ghcr.io/"$MY_ORG"/"$MY_APP":"latest"
diff --git a/.github/workflows/tag_environments.yaml b/.github/workflows/tag_environments.yaml
deleted file mode 100644
index 6dba743..0000000
--- a/.github/workflows/tag_environments.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
----
-name: Tag Image For Deploy
-'on':
-  repository_dispatch
-jobs:
-  tag_environments:
-    runs-on: ubuntu-latest
-    steps:
-    - name: Check out GitHub Repo
-      uses: actions/checkout@v2
-    - name: Tag Deploy Environments  
-      env:
-        DOCKER_ACTOR: "${{ secrets.GHCR_USERNAME }}"
-        DOCKER_TOKEN: ${{ secrets.GHCR_TOKEN  }}
-        IMAGE_TAG: ${{ github.event.client_payload.image_tag }}
-        SHA: ${{ github.event.pull_request.head.sha }}
-        TARGET: ${{ github.event.client_payload.target }}
-        DEV_PROD: ${{ github.event.client_payload.dev_prod }}
-      run: './.github/workflows/scripts/tag_environments.sh'
diff --git a/.github/workflows/tag_prod_latest.yaml b/.github/workflows/tag_prod_latest.yaml
deleted file mode 100644
index 12b23df..0000000
--- a/.github/workflows/tag_prod_latest.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
----
-name: Tag Prod Latest
-'on':
-  pull_request:
-    branches:
-    - master
-    - main
-    types:
-    - closed
-jobs:
-  docker_tag:
-    runs-on: ubuntu-latest
-    steps:
-    - name: Check out GitHub Repo
-      if: github.event_name == 'pull_request' && github.event.action == 'closed' &&
-        github.event.pull_request.merged == true
-      with:
-        ref: "${{ github.event.pull_request.head.sha }}"
-      uses: actions/checkout@v2
-    - name: Build and Push to Packages
-      if: github.event.pull_request.draft == false
-      env:
-        PR: "${{ github.event.pull_request.number }}"
-        SHA: "${{ github.event.pull_request.head.sha }}"
-        DOCKER_ACTOR: "${{ secrets.GHCR_USERNAME }}"
-        DOCKER_TOKEN: "${{ secrets.GHCR_TOKEN }}"
-      run: "./.github/workflows/scripts/tag_prod_latest.sh\n"
diff --git a/.github/workflows/tag_test_latest.yaml b/.github/workflows/tag_test_latest.yaml
deleted file mode 100644
index 5231241..0000000
--- a/.github/workflows/tag_test_latest.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
----
-name: Tag Latest Test Image
-'on':
-  pull_request:
-    branches:
-    - develop
-    types:
-    - closed
-  workflow_dispatch:
-jobs:
-  docker_tag:
-    runs-on: ubuntu-latest
-    steps:
-    - name: Check out GitHub Repo
-      if: github.event_name == 'pull_request' && github.event.action == 'closed' &&
-        github.event.pull_request.merged == true
-      with:
-        ref: "${{ github.event.pull_request.head.sha }}"
-      uses: actions/checkout@v2
-    - name: Build and Push to Packages
-      if: github.event.pull_request.draft == false
-      env:
-        PR: "${{ github.event.pull_request.number }}"
-        SHA: "${{ github.event.pull_request.head.sha }}"
-        DOCKER_ACTOR: "${{ secrets.GHCR_USERNAME }}"
-        DOCKER_TOKEN: "${{ secrets.GHCR_TOKEN }}"
-      run: "./.github/workflows/scripts/tag_test_latest.sh\n"
diff --git a/Dockerfile b/Dockerfile
index b364dee..d134337 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -4,7 +4,8 @@ ENV container docker
 # Ge$t commonly used utilities
 RUN yum -y update && yum upgrade -y 
 RUN yum install -y drpm
-RUN yum -y install -y epel-release wget which git gcc libcgroup libcgroup-tools stress-ng tmpwatch
+RUN yum -y install -y epel-release wget which git gcc libcgroup libcgroup-tools stress-ng tmpwatch procps
+
 
 # Install docker binaries 
 RUN yum install -y yum-utils device-mapper-persistent-data lvm2 && yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo && yum install -y docker-ce
@@ -14,7 +15,9 @@ RUN yum install -y yum-utils device-mapper-persistent-data lvm2 && yum-config-ma
 RUN yum install -y bzip2 \
 && wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh \
 && bash ~/miniconda.sh -b -p /miniconda \
-&& export PATH="/miniconda/bin:$PATH"
+
+
+ENV PATH="/miniconda/bin:${PATH}"
 
 # Add kbase user and set up directories
 RUN useradd -c "KBase user" -rd /kb/deployment/ -u 998 -s /bin/bash kbase && \
@@ -37,9 +40,6 @@ RUN rm -rf /var/cache/yum
 
 COPY --chown=kbase deployment/ /kb/deployment/
 
-# Install dependencies for JobRunner
-ENV PATH /miniconda/bin:$PATH
-RUN wget https://raw.githubusercontent.com/kbase/JobRunner/master/requirements.txt && pip install -r requirements.txt && rm requirements.txt
 RUN /kb/deployment/bin/install_python_dependencies.sh
 
 # The BUILD_DATE value seem to bust the docker cache when the timestamp changes, move to
diff --git a/README.md b/README.md
index 0f5a173..a855d50 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# condor-worker requirements
+# Condor-worker requirements
 
 
 The condor workers require 
@@ -10,22 +10,54 @@ The condor workers require
 * Docker needs privileges to set cgroups/namespaces
 * CLIENTGROUPS set with extra apostrophes
 
-Environmental variables to be set in rancher
+# Required Environmental Variables for the worker
 * COLLECTOR_HOST
 * CONDOR_HOST
 * POOL_PASSWORD
 * SCHEDD_HOST
 * UID_DOMAIN
 * USE_TCP
-* SET_NOBODY_USER_GUID
+* SET_NOBODY_USER_GUID 
 * SET_NOBODY_USER_UID
 * CONDOR_SUBMIT_WORKDIR
 * EXECUTE_SUFFIX
-* SLACK_WEBHOOK_URL
-* DELETE_ABANDONED_CONTAINERS
-* NJS_ENDPOINT
 * EE2_ENDPOINT
 * SERVICE_ENDPOINT
 * DOCKER_CACHE
 * CGROUP_MEMORY_LIMIT_POLICY
 * USE_POOL_PASSWORD=yes
+
+## HTCondor STARTD_CRON Environment Variables
+
+* The cronjobs pass their environmental variables to the scripts they run.
+* You can check the condor start log for their status and output when something goes wrong.
+* You won't know if the cronjob is running unless you check the condor start log for a missing env var or possibly a job is stuck in a NODE_IS_HEALTHY=false state
+* If an env var is present in the cronjobs.config, it is required, otherwise the template engine won't render it
+* Q: Why are they in both the cronjobs.config and ALSO in environmental vars section? A: I'm not sure. Need to look at that why https://github.com/kbase/condor-worker/issues/59
+
+
+### NodeHealth Health Check
+
+#### Required Environmental Variables
+* SLACK_WEBHOOK_URL  (dev or prod channels)
+* SERVICE_ENDPOINT, e.g. https://kbase.us/services/ee2
+
+#### Optional Environmental Variables
+* DOCKER_CACHE (default: /var/lib/docker/)
+* CONDOR_SUBMIT_WORKDIR (default: /cdr)
+* EXECUTE_SUFFIX (default: "")
+* CHECK_CONDOR_STARTER_HEALTH (default: true)
+* DEBUG (default: false)
+* CHECK_CONDOR_STARTER_HEALTH (default: true)
+
+### DeleteExitedContainers
+#### Required Environmental Variables
+* SLACK_WEBHOOK_URL (dev or prod channels)
+
+
+### EE2ContainerREAPER
+#### Required Environmental Variables
+* SLACK_WEBHOOK_URL (dev or prod channels)
+* CONTAINER_REAPER_ENDPOINTS, e.g. https://kbase.us/services/ee2,https://services.kbase.us/services/ee2,
+* DELETE_ABANDONED_CONTAINERS required to be set to true in order to run both checks
+
diff --git a/RELEASE_NOTES.txt b/RELEASE_NOTES.txt
index 4b70aa6..68f6411 100644
--- a/RELEASE_NOTES.txt
+++ b/RELEASE_NOTES.txt
@@ -1,3 +1,12 @@
+1.0.10
+========
+* Modify Cronjobs that look for runaway containers
+* Update Documentation
+* Deprecate container_reaper_ee2.py in favor of container_reaper.py
+
+
+
+
 1.0.10
 ========
 *  Fix health check
diff --git a/deployment/README.md b/deployment/README.md
deleted file mode 100644
index 139083d..0000000
--- a/deployment/README.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# condor
-KBase specific Condor scheduler image. Based on the andypohl/htcondor image.
-
diff --git a/deployment/bin/cron/clients/NarrativeJobServiceClient.py b/deployment/bin/cron/clients/NarrativeJobServiceClient.py
deleted file mode 100644
index ad12f30..0000000
--- a/deployment/bin/cron/clients/NarrativeJobServiceClient.py
+++ /dev/null
@@ -1,416 +0,0 @@
-# -*- coding: utf-8 -*-
-############################################################
-#
-# Autogenerated by the KBase type compiler -
-# any changes made here will be overwritten
-#
-############################################################
-
-from __future__ import print_function
-
-# the following is a hack to get the baseclient to import whether we're in a
-# package or not. This makes pep8 unhappy hence the annotations.
-try:
-    # baseclient and this client are in a package
-    from .baseclient import BaseClient as _BaseClient  # @UnusedImport
-except Exception:
-    # no they aren't
-    from baseclient import BaseClient as _BaseClient  # @Reimport
-
-
-class NarrativeJobService(object):
-    def __init__(
-        self,
-        url=None,
-        timeout=30 * 60,
-        user_id=None,
-        password=None,
-        token=None,
-        ignore_authrc=False,
-        trust_all_ssl_certificates=False,
-        auth_svc="https://ci.kbase.us/services/auth/api/legacy/KBase/Sessions/Login",
-    ):
-        if url is None:
-            raise ValueError("A url is required")
-        self._service_ver = None
-        self._client = _BaseClient(
-            url,
-            timeout=timeout,
-            user_id=user_id,
-            password=password,
-            token=token,
-            ignore_authrc=ignore_authrc,
-            trust_all_ssl_certificates=trust_all_ssl_certificates,
-            auth_svc=auth_svc,
-        )
-
-    def list_config(self, context=None):
-        """
-        :returns: instance of mapping from String to String
-        """
-        return self._client.call_method(
-            "NarrativeJobService.list_config", [], self._service_ver, context
-        )
-
-    def ver(self, context=None):
-        """
-        Returns the current running version of the NarrativeJobService.
-        :returns: instance of String
-        """
-        return self._client.call_method(
-            "NarrativeJobService.ver", [], self._service_ver, context
-        )
-
-    def status(self, context=None):
-        """
-        Simply check the status of this service to see queue details
-        :returns: instance of type "Status" -> structure: parameter
-           "reboot_mode" of type "boolean" (@range [0,1]), parameter
-           "stopping_mode" of type "boolean" (@range [0,1]), parameter
-           "running_tasks_total" of Long, parameter "running_tasks_per_user"
-           of mapping from String to Long, parameter "tasks_in_queue" of
-           Long, parameter "config" of mapping from String to String,
-           parameter "git_commit" of String
-        """
-        return self._client.call_method(
-            "NarrativeJobService.status", [], self._service_ver, context
-        )
-
-    def run_job(self, params, context=None):
-        """
-        Start a new job (long running method of service registered in ServiceRegistery).
-        Such job runs Docker image for this service in script mode.
-        :param params: instance of type "RunJobParams" (method - service
-           defined in standard JSON RPC way, typically it's module name from
-           spec-file followed by '.' and name of funcdef from spec-file
-           corresponding to running method (e.g.
-           'KBaseTrees.construct_species_tree' from trees service); params -
-           the parameters of the method that performed this call; Optional
-           parameters: service_ver - specific version of deployed service,
-           last version is used if this parameter is not defined rpc_context
-           - context of current method call including nested call history
-           remote_url - run remote service call instead of local command line
-           execution. source_ws_objects - denotes the workspace objects that
-           will serve as a source of data when running the SDK method. These
-           references will be added to the autogenerated provenance. app_id -
-           the id of the Narrative application running this job (e.g.
-           repo/name) mapping<string, string> meta - user defined metadata to
-           associate with the job. This data is passed to the User and Job
-           State (UJS) service. wsid - a workspace id to associate with the
-           job. This is passed to the UJS service, which will share the job
-           based on the permissions of the workspace rather than UJS ACLs.
-           parent_job_id - UJS id of the parent of a batch job. Sub jobs will
-           add this id to the NJS database under the field "parent_job_id")
-           -> structure: parameter "method" of String, parameter "params" of
-           list of unspecified object, parameter "service_ver" of String,
-           parameter "rpc_context" of type "RpcContext" (call_stack -
-           upstream calls details including nested service calls and parent
-           jobs where calls are listed in order from outer to inner.) ->
-           structure: parameter "call_stack" of list of type "MethodCall"
-           (time - the time the call was started; method - service defined in
-           standard JSON RPC way, typically it's module name from spec-file
-           followed by '.' and name of funcdef from spec-file corresponding
-           to running method (e.g. 'KBaseTrees.construct_species_tree' from
-           trees service); job_id - job id if method is asynchronous
-           (optional field).) -> structure: parameter "time" of type
-           "timestamp" (A time in the format YYYY-MM-DDThh:mm:ssZ, where Z is
-           either the character Z (representing the UTC timezone) or the
-           difference in time to UTC in the format +/-HHMM, eg:
-           2012-12-17T23:24:06-0500 (EST time) 2013-04-03T08:56:32+0000 (UTC
-           time) 2013-04-03T08:56:32Z (UTC time)), parameter "method" of
-           String, parameter "job_id" of type "job_id" (A job id.), parameter
-           "run_id" of String, parameter "remote_url" of String, parameter
-           "source_ws_objects" of list of type "wsref" (A workspace object
-           reference of the form X/Y/Z, where X is the workspace name or id,
-           Y is the object name or id, Z is the version, which is optional.),
-           parameter "app_id" of String, parameter "meta" of mapping from
-           String to String, parameter "wsid" of Long, parameter
-           "parent_job_id" of String
-        :returns: instance of type "job_id" (A job id.)
-        """
-        return self._client.call_method(
-            "NarrativeJobService.run_job", [params], self._service_ver, context
-        )
-
-    def get_job_params(self, job_id, context=None):
-        """
-        Get job params necessary for job execution
-        :param job_id: instance of type "job_id" (A job id.)
-        :returns: multiple set - (1) parameter "params" of type
-           "RunJobParams" (method - service defined in standard JSON RPC way,
-           typically it's module name from spec-file followed by '.' and name
-           of funcdef from spec-file corresponding to running method (e.g.
-           'KBaseTrees.construct_species_tree' from trees service); params -
-           the parameters of the method that performed this call; Optional
-           parameters: service_ver - specific version of deployed service,
-           last version is used if this parameter is not defined rpc_context
-           - context of current method call including nested call history
-           remote_url - run remote service call instead of local command line
-           execution. source_ws_objects - denotes the workspace objects that
-           will serve as a source of data when running the SDK method. These
-           references will be added to the autogenerated provenance. app_id -
-           the id of the Narrative application running this job (e.g.
-           repo/name) mapping<string, string> meta - user defined metadata to
-           associate with the job. This data is passed to the User and Job
-           State (UJS) service. wsid - a workspace id to associate with the
-           job. This is passed to the UJS service, which will share the job
-           based on the permissions of the workspace rather than UJS ACLs.
-           parent_job_id - UJS id of the parent of a batch job. Sub jobs will
-           add this id to the NJS database under the field "parent_job_id")
-           -> structure: parameter "method" of String, parameter "params" of
-           list of unspecified object, parameter "service_ver" of String,
-           parameter "rpc_context" of type "RpcContext" (call_stack -
-           upstream calls details including nested service calls and parent
-           jobs where calls are listed in order from outer to inner.) ->
-           structure: parameter "call_stack" of list of type "MethodCall"
-           (time - the time the call was started; method - service defined in
-           standard JSON RPC way, typically it's module name from spec-file
-           followed by '.' and name of funcdef from spec-file corresponding
-           to running method (e.g. 'KBaseTrees.construct_species_tree' from
-           trees service); job_id - job id if method is asynchronous
-           (optional field).) -> structure: parameter "time" of type
-           "timestamp" (A time in the format YYYY-MM-DDThh:mm:ssZ, where Z is
-           either the character Z (representing the UTC timezone) or the
-           difference in time to UTC in the format +/-HHMM, eg:
-           2012-12-17T23:24:06-0500 (EST time) 2013-04-03T08:56:32+0000 (UTC
-           time) 2013-04-03T08:56:32Z (UTC time)), parameter "method" of
-           String, parameter "job_id" of type "job_id" (A job id.), parameter
-           "run_id" of String, parameter "remote_url" of String, parameter
-           "source_ws_objects" of list of type "wsref" (A workspace object
-           reference of the form X/Y/Z, where X is the workspace name or id,
-           Y is the object name or id, Z is the version, which is optional.),
-           parameter "app_id" of String, parameter "meta" of mapping from
-           String to String, parameter "wsid" of Long, parameter
-           "parent_job_id" of String, (2) parameter "config" of mapping from
-           String to String
-        """
-        return self._client.call_method(
-            "NarrativeJobService.get_job_params", [job_id], self._service_ver, context
-        )
-
-    def update_job(self, params, context=None):
-        """
-        :param params: instance of type "UpdateJobParams" (is_started -
-           optional flag marking job as started (and triggering
-           exec_start_time statistics to be stored).) -> structure: parameter
-           "job_id" of type "job_id" (A job id.), parameter "is_started" of
-           type "boolean" (@range [0,1])
-        :returns: instance of type "UpdateJobResults" -> structure: parameter
-           "messages" of list of String
-        """
-        return self._client.call_method(
-            "NarrativeJobService.update_job", [params], self._service_ver, context
-        )
-
-    def add_job_logs(self, job_id, lines, context=None):
-        """
-        :param job_id: instance of type "job_id" (A job id.)
-        :param lines: instance of list of type "LogLine" -> structure:
-           parameter "line" of String, parameter "is_error" of type "boolean"
-           (@range [0,1])
-        :returns: instance of Long
-        """
-        return self._client.call_method(
-            "NarrativeJobService.add_job_logs",
-            [job_id, lines],
-            self._service_ver,
-            context,
-        )
-
-    def get_job_logs(self, params, context=None):
-        """
-        :param params: instance of type "GetJobLogsParams" (skip_lines -
-           optional parameter, number of lines to skip (in case they were
-           already loaded before).) -> structure: parameter "job_id" of type
-           "job_id" (A job id.), parameter "skip_lines" of Long
-        :returns: instance of type "GetJobLogsResults" (last_line_number -
-           common number of lines (including those in skip_lines parameter),
-           this number can be used as next skip_lines value to skip already
-           loaded lines next time.) -> structure: parameter "lines" of list
-           of type "LogLine" -> structure: parameter "line" of String,
-           parameter "is_error" of type "boolean" (@range [0,1]), parameter
-           "last_line_number" of Long
-        """
-        return self._client.call_method(
-            "NarrativeJobService.get_job_logs", [params], self._service_ver, context
-        )
-
-    def finish_job(self, job_id, params, context=None):
-        """
-        Register results of already started job
-        :param job_id: instance of type "job_id" (A job id.)
-        :param params: instance of type "FinishJobParams" (Either 'result',
-           'error' or 'is_canceled' field should be defined; result - keeps
-           exact copy of what original server method puts in result block of
-           JSON RPC response; error - keeps exact copy of what original
-           server method puts in error block of JSON RPC response;
-           is_cancelled - Deprecated (field is kept for backward
-           compatibility), please use 'is_canceled' instead.) -> structure:
-           parameter "result" of unspecified object, parameter "error" of
-           type "JsonRpcError" (Error block of JSON RPC response) ->
-           structure: parameter "name" of String, parameter "code" of Long,
-           parameter "message" of String, parameter "error" of String,
-           parameter "is_cancelled" of type "boolean" (@range [0,1]),
-           parameter "is_canceled" of type "boolean" (@range [0,1])
-        """
-        return self._client.call_method(
-            "NarrativeJobService.finish_job",
-            [job_id, params],
-            self._service_ver,
-            context,
-        )
-
-    def check_job(self, job_id, context=None):
-        """
-        Check if a job is finished and get results/error
-        :param job_id: instance of type "job_id" (A job id.)
-        :returns: instance of type "JobState" (job_id - id of job running
-           method finished - indicates whether job is done (including
-           error/cancel cases) or not, if the value is true then either of
-           'returned_data' or 'detailed_error' should be defined; ujs_url -
-           url of UserAndJobState service used by job service status - tuple
-           returned by UserAndJobState.get_job_status method result - keeps
-           exact copy of what original server method puts in result block of
-           JSON RPC response; error - keeps exact copy of what original
-           server method puts in error block of JSON RPC response; job_state
-           - 'queued', 'in-progress', 'completed', or 'suspend'; position -
-           position of the job in execution waiting queue; creation_time,
-           exec_start_time and finish_time - time moments of submission,
-           execution start and finish events in milliseconds since Unix
-           Epoch, canceled - whether the job is canceled or not. cancelled -
-           Deprecated field, please use 'canceled' field instead.) ->
-           structure: parameter "job_id" of String, parameter "finished" of
-           type "boolean" (@range [0,1]), parameter "ujs_url" of String,
-           parameter "status" of unspecified object, parameter "result" of
-           unspecified object, parameter "error" of type "JsonRpcError"
-           (Error block of JSON RPC response) -> structure: parameter "name"
-           of String, parameter "code" of Long, parameter "message" of
-           String, parameter "error" of String, parameter "job_state" of
-           String, parameter "position" of Long, parameter "creation_time" of
-           Long, parameter "exec_start_time" of Long, parameter "finish_time"
-           of Long, parameter "cancelled" of type "boolean" (@range [0,1]),
-           parameter "canceled" of type "boolean" (@range [0,1])
-        """
-        return self._client.call_method(
-            "NarrativeJobService.check_job", [job_id], self._service_ver, context
-        )
-
-    def check_jobs(self, params, context=None):
-        """
-        :param params: instance of type "CheckJobsParams" -> structure:
-           parameter "job_ids" of list of type "job_id" (A job id.),
-           parameter "with_job_params" of type "boolean" (@range [0,1])
-        :returns: instance of type "CheckJobsResults" (job_states - states of
-           jobs, job_params - parameters of jobs, check_error - this map
-           includes info about errors happening during job checking.) ->
-           structure: parameter "job_states" of mapping from type "job_id" (A
-           job id.) to type "JobState" (job_id - id of job running method
-           finished - indicates whether job is done (including error/cancel
-           cases) or not, if the value is true then either of 'returned_data'
-           or 'detailed_error' should be defined; ujs_url - url of
-           UserAndJobState service used by job service status - tuple
-           returned by UserAndJobState.get_job_status method result - keeps
-           exact copy of what original server method puts in result block of
-           JSON RPC response; error - keeps exact copy of what original
-           server method puts in error block of JSON RPC response; job_state
-           - 'queued', 'in-progress', 'completed', or 'suspend'; position -
-           position of the job in execution waiting queue; creation_time,
-           exec_start_time and finish_time - time moments of submission,
-           execution start and finish events in milliseconds since Unix
-           Epoch, canceled - whether the job is canceled or not. cancelled -
-           Deprecated field, please use 'canceled' field instead.) ->
-           structure: parameter "job_id" of String, parameter "finished" of
-           type "boolean" (@range [0,1]), parameter "ujs_url" of String,
-           parameter "status" of unspecified object, parameter "result" of
-           unspecified object, parameter "error" of type "JsonRpcError"
-           (Error block of JSON RPC response) -> structure: parameter "name"
-           of String, parameter "code" of Long, parameter "message" of
-           String, parameter "error" of String, parameter "job_state" of
-           String, parameter "position" of Long, parameter "creation_time" of
-           Long, parameter "exec_start_time" of Long, parameter "finish_time"
-           of Long, parameter "cancelled" of type "boolean" (@range [0,1]),
-           parameter "canceled" of type "boolean" (@range [0,1]), parameter
-           "job_params" of mapping from type "job_id" (A job id.) to type
-           "RunJobParams" (method - service defined in standard JSON RPC way,
-           typically it's module name from spec-file followed by '.' and name
-           of funcdef from spec-file corresponding to running method (e.g.
-           'KBaseTrees.construct_species_tree' from trees service); params -
-           the parameters of the method that performed this call; Optional
-           parameters: service_ver - specific version of deployed service,
-           last version is used if this parameter is not defined rpc_context
-           - context of current method call including nested call history
-           remote_url - run remote service call instead of local command line
-           execution. source_ws_objects - denotes the workspace objects that
-           will serve as a source of data when running the SDK method. These
-           references will be added to the autogenerated provenance. app_id -
-           the id of the Narrative application running this job (e.g.
-           repo/name) mapping<string, string> meta - user defined metadata to
-           associate with the job. This data is passed to the User and Job
-           State (UJS) service. wsid - a workspace id to associate with the
-           job. This is passed to the UJS service, which will share the job
-           based on the permissions of the workspace rather than UJS ACLs.
-           parent_job_id - UJS id of the parent of a batch job. Sub jobs will
-           add this id to the NJS database under the field "parent_job_id")
-           -> structure: parameter "method" of String, parameter "params" of
-           list of unspecified object, parameter "service_ver" of String,
-           parameter "rpc_context" of type "RpcContext" (call_stack -
-           upstream calls details including nested service calls and parent
-           jobs where calls are listed in order from outer to inner.) ->
-           structure: parameter "call_stack" of list of type "MethodCall"
-           (time - the time the call was started; method - service defined in
-           standard JSON RPC way, typically it's module name from spec-file
-           followed by '.' and name of funcdef from spec-file corresponding
-           to running method (e.g. 'KBaseTrees.construct_species_tree' from
-           trees service); job_id - job id if method is asynchronous
-           (optional field).) -> structure: parameter "time" of type
-           "timestamp" (A time in the format YYYY-MM-DDThh:mm:ssZ, where Z is
-           either the character Z (representing the UTC timezone) or the
-           difference in time to UTC in the format +/-HHMM, eg:
-           2012-12-17T23:24:06-0500 (EST time) 2013-04-03T08:56:32+0000 (UTC
-           time) 2013-04-03T08:56:32Z (UTC time)), parameter "method" of
-           String, parameter "job_id" of type "job_id" (A job id.), parameter
-           "run_id" of String, parameter "remote_url" of String, parameter
-           "source_ws_objects" of list of type "wsref" (A workspace object
-           reference of the form X/Y/Z, where X is the workspace name or id,
-           Y is the object name or id, Z is the version, which is optional.),
-           parameter "app_id" of String, parameter "meta" of mapping from
-           String to String, parameter "wsid" of Long, parameter
-           "parent_job_id" of String, parameter "check_error" of mapping from
-           type "job_id" (A job id.) to type "JsonRpcError" (Error block of
-           JSON RPC response) -> structure: parameter "name" of String,
-           parameter "code" of Long, parameter "message" of String, parameter
-           "error" of String
-        """
-        return self._client.call_method(
-            "NarrativeJobService.check_jobs", [params], self._service_ver, context
-        )
-
-    def cancel_job(self, params, context=None):
-        """
-        :param params: instance of type "CancelJobParams" -> structure:
-           parameter "job_id" of type "job_id" (A job id.)
-        """
-        return self._client.call_method(
-            "NarrativeJobService.cancel_job", [params], self._service_ver, context
-        )
-
-    def check_job_canceled(self, params, context=None):
-        """
-        Check whether a job has been canceled. This method is lightweight compared to check_job.
-        :param params: instance of type "CancelJobParams" -> structure:
-           parameter "job_id" of type "job_id" (A job id.)
-        :returns: instance of type "CheckJobCanceledResult" (job_id - id of
-           job running method finished - indicates whether job is done
-           (including error/cancel cases) or not canceled - whether the job
-           is canceled or not. ujs_url - url of UserAndJobState service used
-           by job service) -> structure: parameter "job_id" of type "job_id"
-           (A job id.), parameter "finished" of type "boolean" (@range
-           [0,1]), parameter "canceled" of type "boolean" (@range [0,1]),
-           parameter "ujs_url" of String
-        """
-        return self._client.call_method(
-            "NarrativeJobService.check_job_canceled",
-            [params],
-            self._service_ver,
-            context,
-        )
diff --git a/deployment/bin/cron/clients/baseclient.py b/deployment/bin/cron/clients/baseclient.py
deleted file mode 100644
index 1f78b54..0000000
--- a/deployment/bin/cron/clients/baseclient.py
+++ /dev/null
@@ -1,311 +0,0 @@
-############################################################
-#
-# Autogenerated by the KBase type compiler -
-# any changes made here will be overwritten
-#
-############################################################
-
-from __future__ import print_function
-
-import json as _json
-import requests as _requests
-import random as _random
-import os as _os
-import traceback as _traceback
-from requests.exceptions import ConnectionError
-from urllib3.exceptions import ProtocolError
-
-try:
-    from configparser import ConfigParser as _ConfigParser  # py 3
-except ImportError:
-    from ConfigParser import ConfigParser as _ConfigParser  # py 2
-
-try:
-    from urllib.parse import urlparse as _urlparse  # py3
-except ImportError:
-    from urlparse import urlparse as _urlparse  # py2
-import time
-
-_CT = "content-type"
-_AJ = "application/json"
-_URL_SCHEME = frozenset(["http", "https"])
-_CHECK_JOB_RETRYS = 3
-
-
-def _get_token(user_id, password, auth_svc):
-    # This is bandaid helper function until we get a full
-    # KBase python auth client released
-    # note that currently globus usernames, and therefore kbase usernames,
-    # cannot contain non-ascii characters. In python 2, quote doesn't handle
-    # unicode, so if this changes this client will need to change.
-    body = (
-        "user_id="
-        + _requests.utils.quote(user_id)
-        + "&password="
-        + _requests.utils.quote(password)
-        + "&fields=token"
-    )
-    ret = _requests.post(auth_svc, data=body, allow_redirects=True)
-    status = ret.status_code
-    if status >= 200 and status <= 299:
-        tok = _json.loads(ret.text)
-    elif status == 403:
-        raise Exception(
-            "Authentication failed: Bad user_id/password "
-            + "combination for user %s" % (user_id)
-        )
-    else:
-        raise Exception(ret.text)
-    return tok["token"]
-
-
-def _read_inifile(
-    file=_os.environ.get(  # @ReservedAssignment
-        "KB_DEPLOYMENT_CONFIG", _os.environ["HOME"] + "/.kbase_config"
-    )
-):
-    # Another bandaid to read in the ~/.kbase_config file if one is present
-    authdata = None
-    if _os.path.exists(file):
-        try:
-            config = _ConfigParser()
-            config.read(file)
-            # strip down whatever we read to only what is legit
-            authdata = {
-                x: config.get("authentication", x)
-                if config.has_option("authentication", x)
-                else None
-                for x in (
-                    "user_id",
-                    "token",
-                    "client_secret",
-                    "keyfile",
-                    "keyfile_passphrase",
-                    "password",
-                )
-            }
-        except Exception as e:
-            print("Error while reading INI file {}: {}".format(file, e))
-    return authdata
-
-
-class ServerError(Exception):
-    def __init__(self, name, code, message, data=None, error=None):
-        super(Exception, self).__init__(message)
-        self.name = name
-        self.code = code
-        self.message = "" if message is None else message
-        self.data = data or error or ""
-        # data = JSON RPC 2.0, error = 1.1
-
-    def __str__(self):
-        return (
-            self.name + ": " + str(self.code) + ". " + self.message + "\n" + self.data
-        )
-
-
-class _JSONObjectEncoder(_json.JSONEncoder):
-    def default(self, obj):
-        if isinstance(obj, set):
-            return list(obj)
-        if isinstance(obj, frozenset):
-            return list(obj)
-        return _json.JSONEncoder.default(self, obj)
-
-
-class BaseClient(object):
-    """
-    The KBase base client.
-    Required initialization arguments (positional):
-    url - the url of the the service to contact:
-        For SDK methods: either the url of the callback service or the
-            Narrative Job Service Wrapper.
-        For SDK dynamic services: the url of the Service Wizard.
-        For other services: the url of the service.
-    Optional arguments (keywords in positional order):
-    timeout - methods will fail if they take longer than this value in seconds.
-        Default 1800.
-    user_id - a KBase user name.
-    password - the password corresponding to the user name.
-    token - a KBase authentication token.
-    ignore_authrc - if True, don't read auth configuration from
-        ~/.kbase_config.
-    trust_all_ssl_certificates - set to True to trust self-signed certificates.
-        If you don't understand the implications, leave as the default, False.
-    auth_svc - the url of the KBase authorization service.
-    lookup_url - set to true when contacting KBase dynamic services.
-    async_job_check_time_ms - the wait time between checking job state for
-        asynchronous jobs run with the run_job method.
-    """
-
-    def __init__(
-        self,
-        url=None,
-        timeout=30 * 60,
-        user_id=None,
-        password=None,
-        token=None,
-        ignore_authrc=False,
-        trust_all_ssl_certificates=False,
-        auth_svc="https://kbase.us/services/auth/api/legacy/KBase/Sessions/Login",
-        lookup_url=False,
-        async_job_check_time_ms=100,
-        async_job_check_time_scale_percent=150,
-        async_job_check_max_time_ms=300000,
-    ):
-        if url is None:
-            raise ValueError("A url is required")
-        scheme, _, _, _, _, _ = _urlparse(url)
-        if scheme not in _URL_SCHEME:
-            raise ValueError(url + " isn't a valid http url")
-        self.url = url
-        self.timeout = int(timeout)
-        self._headers = dict()
-        self.trust_all_ssl_certificates = trust_all_ssl_certificates
-        self.lookup_url = lookup_url
-        self.async_job_check_time = async_job_check_time_ms / 1000.0
-        self.async_job_check_time_scale_percent = async_job_check_time_scale_percent
-        self.async_job_check_max_time = async_job_check_max_time_ms / 1000.0
-        # token overrides user_id and password
-        if token is not None:
-            self._headers["AUTHORIZATION"] = token
-        elif user_id is not None and password is not None:
-            self._headers["AUTHORIZATION"] = _get_token(user_id, password, auth_svc)
-        elif "KB_AUTH_TOKEN" in _os.environ:
-            self._headers["AUTHORIZATION"] = _os.environ.get("KB_AUTH_TOKEN")
-        elif not ignore_authrc:
-            authdata = _read_inifile()
-            if authdata is not None:
-                if authdata.get("token") is not None:
-                    self._headers["AUTHORIZATION"] = authdata["token"]
-                elif (
-                    authdata.get("user_id") is not None
-                    and authdata.get("password") is not None
-                ):
-                    self._headers["AUTHORIZATION"] = _get_token(
-                        authdata["user_id"], authdata["password"], auth_svc
-                    )
-        if self.timeout < 1:
-            raise ValueError("Timeout value must be at least 1 second")
-
-    def _call(self, url, method, params, context=None):
-        arg_hash = {
-            "method": method,
-            "params": params,
-            "version": "1.1",
-            "id": str(_random.random())[2:],
-        }
-        if context:
-            if type(context) is not dict:
-                raise ValueError("context is not type dict as required.")
-            arg_hash["context"] = context
-
-        body = _json.dumps(arg_hash, cls=_JSONObjectEncoder)
-        ret = _requests.post(
-            url,
-            data=body,
-            headers=self._headers,
-            timeout=self.timeout,
-            verify=not self.trust_all_ssl_certificates,
-        )
-        ret.encoding = "utf-8"
-        if ret.status_code == 500:
-            if ret.headers.get(_CT) == _AJ:
-                err = ret.json()
-                if "error" in err:
-                    raise ServerError(**err["error"])
-                else:
-                    raise ServerError("Unknown", 0, ret.text)
-            else:
-                raise ServerError("Unknown", 0, ret.text)
-        if not ret.ok:
-            ret.raise_for_status()
-        resp = ret.json()
-        if "result" not in resp:
-            raise ServerError("Unknown", 0, "An unknown server error occurred")
-        if not resp["result"]:
-            return
-        if len(resp["result"]) == 1:
-            return resp["result"][0]
-        return resp["result"]
-
-    def _get_service_url(self, service_method, service_version):
-        if not self.lookup_url:
-            return self.url
-        service, _ = service_method.split(".")
-        service_status_ret = self._call(
-            self.url,
-            "ServiceWizard.get_service_status",
-            [{"module_name": service, "version": service_version}],
-        )
-        return service_status_ret["url"]
-
-    def _set_up_context(self, service_ver=None, context=None):
-        if service_ver:
-            if not context:
-                context = {}
-            context["service_ver"] = service_ver
-        return context
-
-    def _check_job(self, service, job_id):
-        return self._call(self.url, service + "._check_job", [job_id])
-
-    def _submit_job(self, service_method, args, service_ver=None, context=None):
-        context = self._set_up_context(service_ver, context)
-        mod, meth = service_method.split(".")
-        return self._call(self.url, mod + "._" + meth + "_submit", args, context)
-
-    def run_job(self, service_method, args, service_ver=None, context=None):
-        """
-        Run a SDK method asynchronously.
-        Required arguments:
-        service_method - the service and method to run, e.g. myserv.mymeth.
-        args - a list of arguments to the method.
-        Optional arguments:
-        service_ver - the version of the service to run, e.g. a git hash
-            or dev/beta/release.
-        context - the rpc context dict.
-        """
-        mod, _ = service_method.split(".")
-        job_id = self._submit_job(service_method, args, service_ver, context)
-        async_job_check_time = self.async_job_check_time
-        check_job_failures = 0
-        while check_job_failures < _CHECK_JOB_RETRYS:
-            time.sleep(async_job_check_time)
-            async_job_check_time = (
-                async_job_check_time * self.async_job_check_time_scale_percent / 100.0
-            )
-            if async_job_check_time > self.async_job_check_max_time:
-                async_job_check_time = self.async_job_check_max_time
-
-            try:
-                job_state = self._check_job(mod, job_id)
-            except (ConnectionError, ProtocolError):
-                _traceback.print_exc()
-                check_job_failures += 1
-                continue
-
-            if job_state["finished"]:
-                if not job_state["result"]:
-                    return
-                if len(job_state["result"]) == 1:
-                    return job_state["result"][0]
-                return job_state["result"]
-        raise RuntimeError(
-            "_check_job failed {} times and exceeded limit".format(check_job_failures)
-        )
-
-    def call_method(self, service_method, args, service_ver=None, context=None):
-        """
-        Call a standard or dynamic service synchronously.
-        Required arguments:
-        service_method - the service and method to run, e.g. myserv.mymeth.
-        args - a list of arguments to the method.
-        Optional arguments:
-        service_ver - the version of the service to run, e.g. a git hash
-            or dev/beta/release.
-        context - the rpc context dict.
-        """
-        url = self._get_service_url(service_method, service_ver)
-        context = self._set_up_context(service_ver, context)
-        return self._call(url, service_method, args, context)
diff --git a/deployment/bin/cron/container_reaper.py b/deployment/bin/cron/container_reaper.py
index 8a6f948..522fadd 100755
--- a/deployment/bin/cron/container_reaper.py
+++ b/deployment/bin/cron/container_reaper.py
@@ -1,163 +1,140 @@
 #!/miniconda/bin/python
-import datetime
-import fnmatch
+"""
+This script is automatically run by the condor cronjob periodically
+in order to clean up containers > 7 days or running without a starter
+Required env vars are
+# CONTAINER_REAPER_ENDPOINTS - A comma separated list of EE2 endpoints to manage containers for
+# DELETE_ABANDONED_CONTAINERS - Set to true to enable the container reaper
+# SLACK_WEBHOOK_URL - The slack webhook url to send messages to
+"""
+
 import json
-import logging
 import os
 import socket
+import subprocess
+import time
+from datetime import datetime, timedelta
+from typing import Set
 
 import docker
-import psutil
 import requests
-from clients.NarrativeJobServiceClient import NarrativeJobService
-
-from typing import List, Dict
-
-slack_key = os.environ.get("SLACK_WEBHOOK_KEY", None)
-# ee_notifications_channel
-webhook_url = os.environ.get("SLACK_WEBHOOK_URL", None)
-
-kill = os.environ.get("DELETE_ABANDONED_CONTAINERS", "false")
-if kill.lower() == "true":
-    kill = True
-else:
-    kill = False
-
-njs_endpoint_url = os.environ.get("NJS_ENDPOINT", None)
-
-if njs_endpoint_url is None:
-    raise Exception("NJS Endpoint not set")
-
-hostname = socket.gethostname()
-dc = docker.from_env()
-
-
-def find_dockerhub_jobs() -> Dict:
-    # send_slack_message(f"Job CONTAINER_REAPER is FINDING DOCKERHUB JOBS at {datetime.datetime.now()}")
-
-    try:
-        all_containers = dc.containers
-        list = all_containers.list()
-    except Exception as e:
-        send_slack_message(str(e) + hostname)
-
-    job_containers = {}
-
-    for container in list:
-        cnt_id = container.id
-        try:
-            cnt = all_containers.get(cnt_id)
-            labels = cnt.labels
-            if "condor_id" in labels.keys() and "njs_endpoint" in labels.keys():
-                labels["image"] = cnt.image
-                job_containers[cnt_id] = labels
-        except Exception as e:
-            logging.error(f"Container {cnt_id} doesn't exist anymore")
-            logging.error(e)
-
-    return job_containers
-
-
-def find_running_jobs(ps_name: str):
-    # send_slack_message(f"Job CONTAINER_REAPER is FINDING RUNNING JOBS at {datetime.datetime.now()}")
-
-    "Return a list of processes matching 'name'."
-    ls = []
-    for p in psutil.process_iter(attrs=["name", "cmdline"]):
-        if ps_name in p.info["cmdline"]:
-            ls.append(p.info["cmdline"][-2])
-    return ls
+from docker.models.containers import Container
 
 
 def send_slack_message(message: str):
     """
-
     :param message: Escaped Message to send to slack
-    :return:
     """
-
+    webhook_url = os.environ.get("SLACK_WEBHOOK_URL", None)
     slack_data = {"text": message}
-    response = requests.post(
+    requests.post(
         webhook_url,
         data=json.dumps(slack_data),
         headers={"Content-Type": "application/json"},
     )
 
 
-def notify_slack(cnt_id: str, labels: dict(), running_job_ids: List):
-    now = datetime.datetime.now()
+def filter_containers_by_time(potential_containers, days=0, minutes=0):
+    filtered_containers = []
+    seven_days_ago = datetime.now() - timedelta(days=days, minutes=minutes)
 
-    job_id = labels.get("job_id", None)
-    # app_id = labels['app_id']
-    app_name = labels.get("app_name", None)
-    method_name = labels.get("method_name", None)
-    condor_id = labels.get("condor_id", None)
-    username = labels.get("user_name", None)
+    for old_container in potential_containers:
+        # Do we need to catch the chance that there is no created attribute?
+        created_time_str = old_container.attrs['Created'][:26]
+        created_time = datetime.fromisoformat(created_time_str)
+        if created_time <= seven_days_ago:
+            filtered_containers.append(old_container)
+    return filtered_containers
 
-    msg = f"cnt_id:{cnt_id} job_id:{job_id} condor_id:{condor_id} for {username} not in running_job_ids {running_job_ids} ({now}) hostname:({hostname}) app:{app_name} method:{method_name} (kill = {kill}) "
-    send_slack_message(msg)
 
+def get_running_time_message(container, title=""):
+    image_name = container.attrs['Config']['Image']
+    if "kbase" in image_name:
+        image_name = image_name.split(":")[1]
+    user_name = container.attrs['Config']['Labels'].get('user_name')
 
-# @deprecated for EVENTLOG
-def notify_user(cnt_id: str, labels: Dict):
-    username = labels.get("user_name", None)
-    job_id = labels.get("job_id", None)
-    # TODO add this to a configuration somewhere or ENV variable
-    job_directory = f"/mnt/awe/condor/{username}/{job_id}"
+    total_running_time = datetime.now() - datetime.fromisoformat(container.attrs['Created'][:26])
+    days = total_running_time.days
+    hours = total_running_time.seconds // 3600
 
-    print("About to notify")
-    print(labels)
+    formatted_running_time = f"{days}D:{hours}H"
+    return f"{title}:{hostname} {image_name}:{user_name}:{formatted_running_time}"
 
-    env_files = []
-    for file in os.listdir(job_directory):
-        if fnmatch.fnmatch(file, "env_*"):
-            env_files.append(file)
+def remove_with_backoff(container,message,backoff=30):
+    try:
+        container.stop()
+        time.sleep(backoff)  # Wait for backoff period before attempting to remove
+        container.remove()
+    except Exception as e:
+        # Not much we can do here, just hope that the next pass will remove it
+        pass
+def reap_containers_running_more_than_7_days(potential_containers: Set[Container]):
+    old_containers = filter_containers_by_time(potential_containers, days=7)
 
-    print(env_files)
-    env_filepath = env_files[0]
-    if os.path.isfile(env_filepath):
-        with open(env_filepath, "r") as content_file:
-            content = content_file.readlines()
+    if old_containers:
+        for old_container in old_containers:
+            message = get_running_time_message(old_container, title="reaper7daylimit")
+            send_slack_message(message)
+            remove_with_backoff(old_container, message)
 
-        token = None
-        for line in content:
-            if "KB_AUTH_TOKEN" in line:
-                token = line.split("=")[1]
 
-        if token:
-            njs = NarrativeJobService(token=token, url=njs_endpoint_url)
-            status = njs.check_job(job_id)
-            print(status)
+def reap_containers_when_there_is_no_starter(potential_containers: Set[Container]):
+    """
+    This function will reap containers that are running but have no starter, and have been running for 30 mins
+    """
 
+    condor_starter = check_for_condor_starter()
+    if condor_starter:
+        return
 
-def kill_docker_container(cnt_id: str):
-    if kill is True:
-        cnt = dc.containers.get(cnt_id)
-        cnt.kill()
-    else:
-        pass
+    runaway_containers = filter_containers_by_time(potential_containers, minutes=30)
+    if runaway_containers:
+        for runaway_container in runaway_containers:
+            message = get_running_time_message(runaway_container, title="reaper_no_starter")
+            send_slack_message(message)
+            remove_with_backoff(container,message)
 
 
-def kill_dead_jobs(running_jobs: List, docker_processes: Dict):
-    # send_slack_message(f"Job CONTAINER_REAPER is KILLING DEAD JOBS at {datetime.datetime.now()}")
-    for cnt_id in docker_processes:
-        labels = docker_processes[cnt_id]
-        job_id = labels.get("job_id", None)
-        if job_id not in running_jobs:
-            if kill is True:
-                kill_docker_container(cnt_id)
-                notify_slack(cnt_id, labels, running_jobs)
+def check_for_condor_starter():
+    result = subprocess.run("ps -ef | grep '[c]ondor_starter'", shell=True, stdout=subprocess.PIPE, text=True)
+    count = len(result.stdout.strip().split('\n')) if result.stdout.strip() else 0
+    return count > 0
 
 
 if __name__ == "__main__":
-    try:
-        # send_slack_message(f"Job CONTAINER_REAPER is beginning at {datetime.datetime.now()}")
-        name = "us.kbase.narrativejobservice.sdkjobs.SDKLocalMethodRunner"
+    """
+    PDSH_SSH_ARGS_APPEND="-o StrictHostKeyChecking=no -q" pdsh -w rancher@km[2-28]-p "docker ps | grep kbase| grep days" | sort -V |  grep -v worker
+    """
 
-        running_java_jobs = find_running_jobs(name)
-        docker_jobs = find_dockerhub_jobs()
-        kill_dead_jobs(running_java_jobs, docker_jobs)
-        # send_slack_message(f"Job CONTAINER_REAPER is ENDING at {datetime.datetime.now()}")
-    except Exception as e:
-        send_slack_message(f"FAILURE on {hostname}" + str(e.with_traceback()))
-        logging.error(e.with_traceback())
+    CONTAINER_REAPER_ENDPOINTS = os.environ.get("CONTAINER_REAPER_ENDPOINTS", "").split(",")
+    DELETE_ABANDONED_CONTAINERS = os.environ.get("DELETE_ABANDONED_CONTAINERS", "false").lower() == "true"
+
+    if not DELETE_ABANDONED_CONTAINERS:
+        exit("DELETE_ABANDONED_CONTAINERS is not set to true")
+    if not CONTAINER_REAPER_ENDPOINTS or CONTAINER_REAPER_ENDPOINTS == [""]:
+        exit("No CONTAINER_REAPER_ENDPOINTS set, unsure where to manage containers")
+
+    hostname = socket.gethostname()
+    dc = docker.from_env()
+
+    # Define the filters to specify that you are searching for only your specific containers in a multi worker environment
+    # Also add user_name as a filter to make sure you aren't killing containers that happen to have EE2_ENDPOINT set,
+    # The chances of EE2_endpoint and user_name as labels on a container should be very small.
+    # CONTAINER_REAPER_ENDPOINTS = ["https://kbase.us/services/ee2", "https://appdev.kbase.us/services/ee2", "https://services.kbase.us/services/ee2/"]
+    unique_containers = set()
+    filters = {}
+    for endpoint in CONTAINER_REAPER_ENDPOINTS:
+
+        filters.update({
+            "status": "running",
+            "label": [
+                f"ee2_endpoint={endpoint.strip()}",
+                "user_name"
+            ]
+        })
+        containers = dc.containers.list(filters=filters)
+        for container in containers:
+            unique_containers.add(container)
+
+    reap_containers_running_more_than_7_days(potential_containers=unique_containers)
+    reap_containers_when_there_is_no_starter(potential_containers=unique_containers)
diff --git a/deployment/bin/cron/container_reaper_ee2.py b/deployment/bin/cron/container_reaper_ee2.py
index d93ca77..56d936d 100755
--- a/deployment/bin/cron/container_reaper_ee2.py
+++ b/deployment/bin/cron/container_reaper_ee2.py
@@ -1,154 +1,152 @@
-#!/miniconda/bin/python
-import datetime
-import json
-import logging
-import os
-import socket
-from typing import List, Dict
-
-import docker
-from docker.models.containers import Container
-import psutil
-import requests
-
-logging.basicConfig(level=logging.INFO)
-
-slack_key = os.environ.get("SLACK_WEBHOOK_KEY", None)
-# ee_notifications_channel
-webhook_url = os.environ.get("SLACK_WEBHOOK_URL", None)
-
-kill = os.environ.get("DELETE_ABANDONED_CONTAINERS", "false")
-if kill.lower() == "true":
-    kill = True
-else:
-    kill = False
-
-ee2_endpoint_url = os.environ.get("EE2_ENDPOINT", None)
-
-if ee2_endpoint_url is None:
-    raise Exception("EE2 Endpoint not set")
-
-hostname = socket.gethostname()
-dc = docker.from_env()
-
-
-def find_dockerhub_jobs() -> Dict:
-    # send_slack_message(f"Job CONTAINER_REAPER is FINDING DOCKERHUB JOBS at {datetime.datetime.now()}")
-
-    try:
-        all_containers = dc.containers
-        container_list = all_containers.list()
-    except Exception as e:
-        send_slack_message(str(e) + hostname)
-        raise e
-
-    job_containers = {}
-
-    for container in container_list:
-        cnt_id = container.id
-        try:
-            cnt = all_containers.get(cnt_id)
-            labels = cnt.labels
-            label_keys = labels.keys()
-            if (
-                "condor_id" in label_keys
-                and "ee2_endpoint" in label_keys
-                and "worker_hostname" in label_keys
-            ):
-                if (
-                    labels.get("worker_hostname") == hostname
-                    and labels.get("ee2_endpoint") == ee2_endpoint_url
-                ):
-                    labels["image"] = cnt.image
-                    job_containers[cnt_id] = labels
-        except Exception as e:
-            logging.error(f"Container {cnt_id} doesn't exist anymore")
-            logging.error(e)
-
-    return job_containers
-
-
-def find_running_jobs():
-    "Return a list of job ids from running job processes. Since python procs have multiple entries, keep only 1 version"
-    # send_slack_message(f"Job CONTAINER_REAPER is FINDING RUNNING JOBS at {datetime.datetime.now()}")
-    ls = []
-    for p in psutil.process_iter(attrs=["name", "cmdline"]):
-        if (
-            "/miniconda/bin/python" in p.info["cmdline"]
-            and "./jobrunner.py" in p.info["cmdline"]
-        ):
-            ls.append(p.info["cmdline"][-2])
-    return list(set(ls))
-
-
-def send_slack_message(message: str):
-    """
-
-    :param message: Escaped Message to send to slack
-    :return:
-    """
-
-    slack_data = {"text": message}
-    response = requests.post(
-        webhook_url,
-        data=json.dumps(slack_data),
-        headers={"Content-Type": "application/json"},
-    )
-
-
-def notify_slack(cnt_id: str, labels: dict(), running_job_ids: List):
-    now = datetime.datetime.now()
-
-    job_id = labels.get("job_id", None)
-    # app_id = labels['app_id']
-    app_name = labels.get("app_name", None)
-    method_name = labels.get("method_name", None)
-    condor_id = labels.get("condor_id", None)
-    username = labels.get("user_name", None)
-
-    msg = f"cnt_id:{cnt_id} job_id:{job_id} condor_id:{condor_id} for {username} not in running_job_ids {running_job_ids} ({now}) hostname:({hostname}) app:{app_name} method:{method_name} (kill = {kill}) "
-    send_slack_message(msg)
-
-
-def kill_docker_container(cnt_id: str):
-    """
-    Kill a docker container. The job finish script should clean up after itself.
-    :param cnt_id: The container to kill/remove
-    """
-    if kill is True:
-        cnt = dc.containers.get(cnt_id)  # type: Container
-        try:
-            cnt.kill()
-        except Exception:
-            try:
-                cnt.remove(force=True)
-            except Exception:
-                send_slack_message(f"Couldn't delete {cnt_id} on {hostname}")
-
-
-def kill_dead_jobs(running_jobs: List, docker_processes: Dict):
-    """
-    Check whether there are runaway docker containers
-    :param running_jobs:  A list of condor jobs gathered from the starter scripts
-    :param docker_processes: A list of docker containers
-    """
-    # send_slack_message(f"Job CONTAINER_REAPER is KILLING DEAD JOBS at {datetime.datetime.now()}")
-    for cnt_id in docker_processes:
-        labels = docker_processes[cnt_id]
-        job_id = labels.get("job_id", None)
-        if job_id not in running_jobs:
-            notify_slack(cnt_id, labels, running_jobs)
-            if kill is True:
-                kill_docker_container(cnt_id)
-
-
-if __name__ == "__main__":
-    try:
-        # send_slack_message(f"Job CONTAINER_REAPER is beginning at {datetime.datetime.now()}")
-        locally_running_jobrunners = find_running_jobs()
-        docker_jobs = find_dockerhub_jobs()
-        kill_dead_jobs(locally_running_jobrunners, docker_jobs)
-        # send_slack_message(f"Job CONTAINER_REAPER is ENDING at {datetime.datetime.now()}")
-    except Exception as e:
-        send_slack_message(f"FAILURE on {hostname}" + str(e))
-        logging.error(str(e))
+# #!/miniconda/bin/python
+# import datetime
+# import json
+# import logging
+# import os
+# import socket
+# from typing import List, Dict
+#
+# import docker
+# import psutil
+# import requests
+# from docker.models.containers import Container
+#
+# # REQUIRED ENVIRONMENT VARIABLES
+# ee2_endpoint_url = os.environ.get("EE2_ENDPOINT")
+# if not ee2_endpoint_url:
+#     raise Exception("EE2 Endpoint not set")
+#
+# webhook_url = os.environ.get("SLACK_WEBHOOK_URL")
+# if not webhook_url:
+#     raise Exception("SLACK_WEBHOOK_URL is not defined")
+#
+# # OPTIONAL ENVIRONMENT VARIABLES
+# kill = os.environ.get("DELETE_ABANDONED_CONTAINERS", "false").lower() == "true"
+#
+# logging.basicConfig(level=logging.INFO)
+# hostname = socket.gethostname()
+# dc = docker.from_env()
+#
+#
+# def find_dockerhub_jobs() -> Dict:
+#     try:
+#         all_containers = dc.containers
+#         container_list = all_containers.list()
+#     except Exception as e:
+#         send_slack_message(str(e) + hostname)
+#         raise e
+#
+#     job_containers = {}
+#
+#     for container in container_list:
+#         cnt_id = container.id
+#         try:
+#             cnt = all_containers.get(cnt_id)
+#             labels = cnt.labels
+#             label_keys = labels.keys()
+#             if (
+#                     "condor_id" in label_keys
+#                     and "ee2_endpoint" in label_keys
+#                     and "worker_hostname" in label_keys
+#             ):
+#                 if (
+#                         labels.get("worker_hostname") == hostname
+#                         and labels.get("ee2_endpoint") == ee2_endpoint_url
+#                 ):
+#                     labels["image"] = cnt.image
+#                     job_containers[cnt_id] = labels
+#         except Exception as e:
+#             logging.error(f"Container {cnt_id} doesn't exist anymore")
+#             logging.error(e)
+#
+#     return job_containers
+#
+#
+# def find_running_jobs():
+#     """
+#     Return a list of job ids from running job processes.
+#     Since python procs have multiple entries, keep only 1 version
+#     """
+#
+#     # send_slack_message(f"Job CONTAINER_REAPER is FINDING RUNNING JOBS at {datetime.datetime.now()}")
+#     ls = []
+#     for p in psutil.process_iter(attrs=["name", "cmdline"]):
+#         if (
+#                 "/miniconda/bin/python" in p.info["cmdline"]
+#                 and "./jobrunner.py" in p.info["cmdline"]
+#         ):
+#             ls.append(p.info["cmdline"][-2])
+#     return list(set(ls))
+#
+#
+# def send_slack_message(message: str):
+#     """
+#
+#     :param message: Escaped Message to send to slack
+#     :return:
+#     """
+#
+#     slack_data = {"text": message}
+#     requests.post(
+#         webhook_url,
+#         data=json.dumps(slack_data),
+#         headers={"Content-Type": "application/json"},
+#     )
+#
+#
+# def notify_slack(cnt_id: str, labels: dict(), running_job_ids: List):
+#     now = datetime.datetime.now()
+#
+#     job_id = labels.get("job_id", None)
+#     # app_id = labels['app_id']
+#     app_name = labels.get("app_name", None)
+#     method_name = labels.get("method_name", None)
+#     condor_id = labels.get("condor_id", None)
+#     username = labels.get("user_name", None)
+#
+#     msg = f"cnt_id:{cnt_id} job_id:{job_id} condor_id:{condor_id} for {username} not in running_job_ids {running_job_ids} ({now}) hostname:({hostname}) app:{app_name} method:{method_name} (kill = {kill}) "
+#     send_slack_message(msg)
+#
+#
+# def kill_docker_container(cnt_id: str):
+#     """
+#     Kill a docker container. The job finish script should clean up after itself.
+#     :param cnt_id: The container to kill/remove
+#     """
+#     if kill is True:
+#         cnt = dc.containers.get(cnt_id)  # type: Container
+#         try:
+#             cnt.kill()
+#         except Exception:
+#             try:
+#                 cnt.remove(force=True)
+#             except Exception:
+#                 send_slack_message(f"Couldn't delete {cnt_id} on {hostname}")
+#
+#
+# def kill_dead_jobs(running_jobs: List, docker_processes: Dict):
+#     """
+#     Check whether there are runaway docker containers
+#     :param running_jobs:  A list of condor jobs gathered from the starter scripts
+#     :param docker_processes: A list of docker containers
+#     """
+#     # send_slack_message(f"Job CONTAINER_REAPER is KILLING DEAD JOBS at {datetime.datetime.now()}")
+#     for cnt_id in docker_processes:
+#         labels = docker_processes[cnt_id]
+#         job_id = labels.get("job_id", None)
+#         if job_id not in running_jobs:
+#             notify_slack(cnt_id, labels, running_jobs)
+#             if kill is True:
+#                 kill_docker_container(cnt_id)
+#
+#
+# if __name__ == "__main__":
+#     try:
+#         # send_slack_message(f"Job CONTAINER_REAPER is beginning at {datetime.datetime.now()}")
+#         locally_running_jobrunners = find_running_jobs()
+#         docker_jobs = find_dockerhub_jobs()
+#         kill_dead_jobs(locally_running_jobrunners, docker_jobs)
+#         # send_slack_message(f"Job CONTAINER_REAPER is ENDING at {datetime.datetime.now()}")
+#     except Exception as ev:
+#         send_slack_message(f"FAILURE on {hostname}" + str(ev))
+#         logging.error(str(ev))
diff --git a/deployment/bin/cron/delete_exited_containers.py b/deployment/bin/cron/delete_exited_containers.py
index 60c9ca5..fe9640a 100755
--- a/deployment/bin/cron/delete_exited_containers.py
+++ b/deployment/bin/cron/delete_exited_containers.py
@@ -1,17 +1,18 @@
 #!/miniconda/bin/python
-import os
+# This script is automatically run by the condor cronjob periodically
+# in order to clean up exited docker containers.
 import json
-import requests
-import docker
+import os
 import socket
-import datetime
+
+import docker
+import requests
 
 
 def send_slack_message(message: str):
     """
     :param message: Escaped Message to send to slack
     """
-    # ee_notifications_channel
     webhook_url = os.environ.get("SLACK_WEBHOOK_URL", None)
     slack_data = {"text": message}
     requests.post(
@@ -22,14 +23,10 @@ def send_slack_message(message: str):
 
 
 if __name__ == "__main__":
-    # send_slack_message(f"Job DELETE_EXITED is beginning at {datetime.datetime.now()}")
     hostname = socket.gethostname()
     dc = docker.from_env()
     ec = dc.containers.list(filters={"status": "exited"})
-    count = len(ec)
-
-    if count > 0:
+    container_image_names = [c.attrs["Config"]["Image"] for c in ec]
+    if container_image_names:
         dc.containers.prune()
-        send_slack_message(f"Deleted {count} stopped containers on {hostname}")
-
-    # send_slack_message(f"Job DELETE_EXITED is ENDING at {datetime.datetime.now()}")
+        send_slack_message(f"Deleted {len(ec)} `exited` containers on {hostname} {container_image_names}")
diff --git a/deployment/bin/cron/health_check.py b/deployment/bin/cron/health_check.py
index 80cebc4..69d1c1c 100755
--- a/deployment/bin/cron/health_check.py
+++ b/deployment/bin/cron/health_check.py
@@ -17,44 +17,38 @@
 import psutil
 import requests
 
-
-def send_slack_message(message: str):
-    """
-    :param message: Escaped Message to send to slack
-    """
-    # ee_notifications_channel
-    webhook_url = os.environ.get("SLACK_WEBHOOK_URL", None)
-    slack_data = {"text": message}
-    requests.post(
-        webhook_url,
-        data=json.dumps(slack_data),
-        headers={"Content-Type": "application/json"},
-    )
-
-
-debug = False
+# Optional environment variables
+var_lib_docker = os.environ.get("DOCKER_CACHE", "/var/lib/docker/")
 scratch = os.environ.get("CONDOR_SUBMIT_WORKDIR", "/cdr")
 scratch += os.environ.get("EXECUTE_SUFFIX", "")
-check_condor_starter_health = (
-    os.environ.get("CHECK_CONDOR_STARTER_HEALTH", "true").lower() == "true"
-)
-
-# Endpoint
+check_condor_starter_health = (os.environ.get("CHECK_CONDOR_STARTER_HEALTH", "true").lower() == "true")
+debug = (os.environ.get("DEBUG", "false").lower() == "true")
 
+# Required environment variables
 endpoint = os.environ.get("SERVICE_ENDPOINT", None)
-
 if endpoint is None:
     exit("SERVICE_ENDPOINT is not defined")
 
-# Docker Cache
-var_lib_docker = os.environ.get("DOCKER_CACHE", "/var/lib/docker/")
+webhook_url = os.environ.get("SLACK_WEBHOOK_URL", None)
+if webhook_url is None:
+    exit("SLACK_WEBHOOK_URL is not defined")
+
+
 
 user = "nobody"
 pid = pwd.getpwnam(user).pw_uid
 gid = pwd.getpwnam(user).pw_gid
 
-
-# TODO Report to nagios
+def send_slack_message(message: str):
+    """
+    :param message: Escaped Message to send to slack
+    """
+    slack_data = {"text": message}
+    requests.post(
+        webhook_url,
+        data=json.dumps(slack_data),
+        headers={"Content-Type": "application/json"},
+    )
 
 
 def exit_unsuccessfully(message: str, send_to_slack=True):
@@ -66,7 +60,7 @@ def exit_unsuccessfully(message: str, send_to_slack=True):
     print("- update:true")
     now = datetime.datetime.now()
 
-    if send_to_slack is True:
+    if send_to_slack:
         send_slack_message(
             f"POSSIBLE BLACK HOLE: Ran healthcheck at {now} on {socket.gethostname()} with failure: {message}"
         )
@@ -136,8 +130,8 @@ def test_docker_socket():
     """
     Check to see if the nobody user has access to the docker socket
     """
-    socket = "/var/run/docker.sock"
-    socket_gid = os.stat(socket).st_gid
+    socket_location = "/var/run/docker.sock"
+    socket_gid = os.stat(socket_location).st_gid
 
     # TODO FIX THIS TEST.. GROUPS ARE NOT BEING CORRECTLY SET INSIDE THE DOCKER CONTAINER
     gids = [999, 996, 995, 987]
@@ -145,7 +139,7 @@ def test_docker_socket():
         return
 
     message = (
-        f"Cannot access docker socket, check to make sure permissions of user in {gids}"
+        f"test_docker_socket: Cannot access docker socket, check to make sure permissions of user in {gids}"
     )
     exit_unsuccessfully(message)
 
@@ -156,13 +150,13 @@ def test_docker_socket2():
     """
     dc = docker.from_env()
     if len(dc.containers.list()) < 1:
-        message = f"Cannot access docker socket"
+        message = f"Nobody User cannot access docker socket"
         exit_unsuccessfully(message)
 
 
-def test_world_writeable():
+def test_scratch_world_writeable():
     """
-    Check to see if /mnt/awe/condor is writeable
+    Check to see if /cdr/scratch is writeable
     """
     # Strip out octal 0o
     perms = str(oct(stat.S_IMODE(os.stat(scratch).st_mode))).lstrip("0").lstrip("o")
@@ -170,7 +164,7 @@ def test_world_writeable():
     if perms == "01777" or perms == "1777" or perms == "0o1777":
         return
     else:
-        message = f"Cannot access {scratch} gid={os.stat(scratch).st_gid} perms={perms}"
+        message = f"Scratch not world writeable: Cannot access {scratch} gid={os.stat(scratch).st_gid} perms={perms}"
         exit_unsuccessfully(message)
 
 
@@ -184,21 +178,19 @@ def test_enough_space(mount_point, nickname, percentage):
     try:
         usage = subprocess.check_output(cmd, shell=True).decode().strip()
         if int(usage) < percentage:
-            # send_slack_message(
-            #     f"The amount of usage  {usage}  for {mount_point} ({nickname}) which is less than  {percentage}")
             return
         else:
             message = f"Can't access {mount_point} ({nickname}) or not enough space ({usage}% > {percentage}%)"
             exit_unsuccessfully(message)
     except Exception as e:
         message = (
-            f"Can't access {mount_point} ({nickname}) or not enough space {usage}"
-            + str(e)
+                f"Can't access {mount_point} ({nickname}) or not enough space {usage}"
+                + str(e)
         )
         exit_unsuccessfully(message)
 
 
-def checkEndpoints():
+def check_kbase_endpoints():
     """
     Check auth/njs/catalog/ws
     """
@@ -232,21 +224,18 @@ def checkEndpoints():
             message = f"Couldn't reach {service}. {e}"
             exit_unsuccessfully(message)
 
-        
-
 
 def main():
     try:
         # send_slack_message(f"Job HEALTH_CHECK is beginning at {datetime.datetime.now()}")
         test_docker_socket()
         test_docker_socket2()
-        test_world_writeable()
+        test_scratch_world_writeable()
         test_enough_space(scratch, "scratch", 95)
         test_enough_space(var_lib_docker, "docker", 95)
         test_free_memory()
         test_condor_starter()
-        checkEndpoints()
-        # send_slack_message(f"Job HEALTH_CHECK is ENDING at {datetime.datetime.now()}")
+        check_kbase_endpoints()
     except Exception as e:
         exit_unsuccessfully(str(e))
     exit_successfully()
diff --git a/deployment/bin/cruft/check_abandoned_containers.py b/deployment/bin/cruft/check_abandoned_containers.py
deleted file mode 100644
index 156d60a..0000000
--- a/deployment/bin/cruft/check_abandoned_containers.py
+++ /dev/null
@@ -1,90 +0,0 @@
-#!/usr/bin/env python
-# This script is used to find abandoned containers running on a condor worker.
-# It requires a webhook URL environmental variable in order to send a notification to a slack channel
-import datetime
-import json
-import logging
-import os
-import subprocess
-import time
-
-import requests
-
-logging.basicConfig(level=logging.DEBUG)
-
-# Improvements: Use a library
-
-while True:
-    delete = os.environ.get("DELETE_ABANDONED_CONTAINERS")
-    webhook_url = os.environ.get("SLACK_WEBHOOK_URL")
-
-    hostname = subprocess.check_output("hostname").strip()
-    logging.info("About to check for jobs on" + str(hostname))
-
-    try:
-        cmd = "docker ps | grep dockerhub | cut -f1 -d' '"
-        running_containers = subprocess.check_output(cmd, shell=True)
-
-        container_ids = running_containers.split("\n")
-        container_ids = filter(None, container_ids)
-
-        cmd = 'ps -ax -o command | egrep "java -cp /mnt/awe/condor/.+/NJSWrapper-all.jar us.kbase.narrativejobservice.sdkjobs.SDKLocalMethodRunner" | grep -v grep | cut -f5 -d" "'
-        java_procs = str(subprocess.check_output(cmd, shell=True))
-        running_job_ids = java_procs.split("\n")
-
-        running_job_ids = filter(None, running_job_ids)
-
-        logging.info(running_job_ids)
-
-        now = datetime.datetime.now()
-
-        for container_id in container_ids:
-
-            # Try catch here so the script can keep going
-            try:
-                cmd = (
-                    "docker inspect --format '{{ index .Config.Labels \"job_id\"}}' "
-                    + str(container_id)
-                )
-                ujs_id = str(subprocess.check_output(cmd, shell=True).strip())
-                cmd = (
-                    "docker inspect --format '{{ index .Config.Labels \"condor_id\"}}' "
-                    + str(container_id)
-                )
-                condor_id = str(subprocess.check_output(cmd, shell=True).strip())
-
-                # Skip containers without a condor or worker id
-                if len(ujs_id) == 0 and len(condor_id) == 0:
-                    continue
-
-                if ujs_id not in running_job_ids:
-                    message = "container:[{}] job_id:[{}] condor_id:[{}] is dead ({}) {} ".format(
-                        container_id, ujs_id, condor_id, hostname, now
-                    )
-
-                    slack_data = {"text": message}
-
-                    response = requests.post(
-                        webhook_url,
-                        data=json.dumps(slack_data),
-                        headers={"Content-Type": "application/json"},
-                    )
-
-                    if delete == "true":
-                        cmd = "docker stop {} && docker container rm -v {}".format(
-                            container_id, container_id
-                        )
-                        logging.error(message)
-                        logging.error(cmd)
-                        output = subprocess.check_output(cmd, shell=True)
-
-                elif ujs_id in running_job_ids:
-                    logging.info("Job still running: " + ujs_id)
-
-            except Exception as e:
-                print(e)
-
-    except Exception as e:
-        print(e)
-
-    time.sleep(60)
diff --git a/deployment/bin/cruft/check_abandoned_containers.sh b/deployment/bin/cruft/check_abandoned_containers.sh
deleted file mode 100644
index 579fbe3..0000000
--- a/deployment/bin/cruft/check_abandoned_containers.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/usr/bin/env bash
-#This script is used to find abandoned containers running on a condor worker.
-#It requires a webhook URL environmental variable in order to send a notification to a slack channel
-
-delete=${DELETE_ABANDONDED_CONTAINERS}
-webhook_url=${SLACK_WEBHOOK_URL}
-hostname=`hostname`
-running="2"
-
-while true
-do
-    running_containers=`docker ps | grep dockerhub | cut -f1 -d' '`
-    for container_id in ${running_containers}
-    do
-        condor_id=`docker inspect ${container_id} | grep condor_id | egrep -o "[0-9]+\.[0-9]"`
-        last_job_status=`condor_q ${condor_id} -attributes JobStatus -long | egrep -o "[0-9]"`
-        remote_host=`condor_q ${condor_id} -attributes RemoteHost -long | cut -f2 -d'='`
-        last_remote_host=`condor_q ${condor_id} -attributes LastRemoteHost -long | cut -f2 -d'='`
-
-        if [[ ${last_job_status} = 2 ]];
-        then
-            message="container_id ${condor_id} ${last_job_status} ${remote_host} ${last_remote_host} running"
-        else
-            message="DOCKER_ID:${container_id} CONDOR_ID:${condor_id} STATUS:${last_job_status} HOST:${remote_host} ${last_remote_host} (${hostname}) container is abandoned"
-            curl -X POST -H 'Content-type: application/json' --data "{'text':'${message}'}" $webhook_url
-            if [[ ${delete} = true ]];
-            then
-                docker stop ${container_id} && docker container rm -v ${container_id}
-           fi
-        fi
-    done
-sleep 60
-done
diff --git a/deployment/bin/cruft/delete_exited_containers.sh b/deployment/bin/cruft/delete_exited_containers.sh
deleted file mode 100755
index e143119..0000000
--- a/deployment/bin/cruft/delete_exited_containers.sh
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env bash
-# Usage - chmod +x
-# ./remove_exited_containers.sh
-# Sends a message about the containers you are going to delete on the host you run this on
-
-
-#ee_notifications channel
-webhook_url=${SLACK_WEBHOOK_URL}
-
-hostname=`hostname`
-
-# This will avoid inadvertently removing any containers which happen to have
-# the word Exit in the name or command, and won't stop working if the output format of "docker ps -a" ever changes.
-
-exited_containers=`docker ps -a --filter status=exited --format {{.ID}}`
-n=`docker ps -a --filter status=exited --format {{.ID}} | wc -l`
-message="Deleting $n exiting containers from $hostname"
-
-# This cannot remove running containers
-if [[ ${n} > 0 ]];
-then
-    `echo $exited_containers | xargs docker rm`
-	curl -X POST -H 'Content-type: application/json' --data "{'text':'${message}'}" $webhook_url
-fi
diff --git a/deployment/conf/.templates/cronjobs.config.templ b/deployment/conf/.templates/cronjobs.config.templ
index b0cbb03..f3c6057 100644
--- a/deployment/conf/.templates/cronjobs.config.templ
+++ b/deployment/conf/.templates/cronjobs.config.templ
@@ -1,32 +1,26 @@
-# SLACK_WEBHOOK_KEY={{ .Env.SLACK_WEBHOOK_KEY }}
-
-# startd hook to check if node is healthy
+# This checks if the node is healthy and reports to slack if it is not. Sets NODE_IS_HEALTHY to True or False
 STARTD_CRON_NodeHealth_EXECUTABLE = /kb/deployment/bin/cron/health_check.py
 STARTD_CRON_NodeHealth_PERIOD = 6m
 STARTD_CRON_NodeHealth_MODE = Periodic
 STARTD_CRON_NodeHealth_RECONFIG_RERUN = True
-STARTD_CRON_NodeHealth_ENV = "SLACK_WEBHOOK_URL={{ .Env.SLACK_WEBHOOK_URL }} SERVICE_ENDPOINT={{ .Env.SERVICE_ENDPOINT }} CONDOR_SUBMIT_WORKDIR={{ .Env.CONDOR_SUBMIT_WORKDIR }} DOCKER_CACHE={{ .Env.DOCKER_CACHE }} DELETE_ABANDONED_CONTAINERS={{ .Env.DELETE_ABANDONED_CONTAINERS }}"
+STARTD_CRON_NodeHealth_ENV = "SLACK_WEBHOOK_URL={{ .Env.SLACK_WEBHOOK_URL }} SERVICE_ENDPOINT={{ .Env.SERVICE_ENDPOINT }} CONDOR_SUBMIT_WORKDIR={{ .Env.CONDOR_SUBMIT_WORKDIR }} DOCKER_CACHE={{ .Env.DOCKER_CACHE }} EXECUTE_SUFFIX={{ .Env.EXECUTE_SUFFIX }} CHECK_CONDOR_STARTER_HEALTH={{ .Env.CHECK_CONDOR_STARTER_HEALTH }} "
+
 
-# startd hook to delete exited containers
+# startd hook to delete exited containers (Might want to leave this longer for debugging)
 STARTD_CRON_DeleteExitedContainers_EXECUTABLE = /kb/deployment/bin/cron/delete_exited_containers.py
-STARTD_CRON_DeleteExitedContainers_PERIOD = 10m
+STARTD_CRON_DeleteExitedContainers_PERIOD = 30m
 STARTD_CRON_DeleteExitedContainers_MODE = Periodic
 STARTD_CRON_DeleteExitedContainers_RECONFIG_RERUN = True
 STARTD_CRON_DeleteExitedContainers_ENV = "SLACK_WEBHOOK_URL={{ .Env.SLACK_WEBHOOK_URL }}"
 
-# startd hook to delete abandoned containers
-STARTD_CRON_ReapAbandondedContainers_EXECUTABLE = /kb/deployment/bin/cron/container_reaper.py
-STARTD_CRON_ReapAbandondedContainers_PERIOD = 6m
-STARTD_CRON_ReapAbandondedContainers_MODE = Periodic
-STARTD_CRON_ReapAbandondedContainers_RECONFIG_RERUN = True
-STARTD_CRON_ReapAbandondedContainers_ENV = "SLACK_WEBHOOK_URL={{ .Env.SLACK_WEBHOOK_URL }} SERVICE_ENDPOINT={{ .Env.SERVICE_ENDPOINT }} CONDOR_SUBMIT_WORKDIR={{ .Env.CONDOR_SUBMIT_WORKDIR }} DOCKER_CACHE={{ .Env.DOCKER_CACHE }} DELETE_ABANDONED_CONTAINERS={{ .Env.DELETE_ABANDONED_CONTAINERS }}"
 
-# startd hook to delete abandoned containers
-STARTD_CRON_ReapAbandondedContainersEE2_EXECUTABLE = /kb/deployment/bin/cron/container_reaper_ee2.py
-STARTD_CRON_ReapAbandondedContainersEE2_PERIOD = 6m
-STARTD_CRON_ReapAbandondedContainersEE2_MODE = Periodic
-STARTD_CRON_ReapAbandondedContainersEE2_RECONFIG_RERUN = True
-STARTD_CRON_ReapAbandondedContainersEE2_ENV = "EE2_ENDPOINT={{ .Env.EE2_ENDPOINT }} SLACK_WEBHOOK_URL={{ .Env.SLACK_WEBHOOK_URL }} SERVICE_ENDPOINT={{ .Env.SERVICE_ENDPOINT }} CONDOR_SUBMIT_WORKDIR={{ .Env.CONDOR_SUBMIT_WORKDIR }} DOCKER_CACHE={{ .Env.DOCKER_CACHE }} DELETE_ABANDONED_CONTAINERS={{ .Env.DELETE_ABANDONED_CONTAINERS }}"
+# Container Reaper Version 2024
+STARTD_CRON_ContainerReaper_EXECUTABLE = /kb/deployment/bin/cron/container_reaper.py
+STARTD_CRON_ContainerReaper_PERIOD = 6m
+STARTD_CRON_ContainerReaper_MODE = Periodic
+STARTD_CRON_ContainerReaper_RECONFIG_RERUN = True
+STARTD_CRON_ContainerReaper_ENV = "SLACK_WEBHOOK_URL={{ .Env.SLACK_WEBHOOK_URL }} CONTAINER_REAPER_ENDPOINTS={{ .Env.CONTAINER_REAPER_ENDPOINTS }} DELETE_ABANDONED_CONTAINERS={{ .Env.DELETE_ABANDONED_CONTAINERS }}"
+
 
 # Tmpwatch $CONDOR_SUBMIT_WORKDIR
 STARTD_CRON_ManageCondorSubmitWorkdir_EXECUTABLE = /usr/sbin/tmpwatch
@@ -36,12 +30,12 @@ STARTD_CRON_ManageCondorSubmitWorkdir_MODE = Periodic
 STARTD_CRON_ManageCondorSubmitWorkdir_RECONFIG_RERUN = True
 STARTD_CRON_ManageCondorSubmitWorkdir_ENV = "CONDOR_SUBMIT_WORKDIR={{ .Env.CONDOR_SUBMIT_WORKDIR }} "
 
-# Prune docker every 14 days.. This works right now, but need to redirect to a script
+# Prune docker every 14 days
 STARTD_CRON_ManageVarLibDocker_EXECUTABLE = /usr/bin/docker
 STARTD_CRON_ManageVarLibDocker_ARGS = system prune -a -f
 STARTD_CRON_ManageVarLibDocker_PERIOD = 336h
 STARTD_CRON_ManageVarLibDocker_MODE = Periodic
-STARTD_CRON_ManageCondorSubmitWorkdir_RECONFIG_RERUN = True
+STARTD_CRON_ManageVarLibDocker_RECONFIG_RERUN = True
 
-STARTD_CRON_JOBLIST = NodeHealth ReapAbandondedContainersEE2 ManageVarLibDocker ManageCondorSubmitWorkdir
+STARTD_CRON_JOBLIST = NodeHealth ContainerReaper ManageVarLibDocker ManageCondorSubmitWorkdir
 # STARTD_CRON_AUTOPUBLISH = If_Changed

From db01164112bf90108795e2a1604e2f410f368224 Mon Sep 17 00:00:00 2001
From: bio-boris <bio-boris@users.noreply.github.com>
Date: Wed, 24 Jan 2024 22:20:35 -0600
Subject: [PATCH 13/18] Update README.md (#62)

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index a855d50..cbf8ec3 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# Condor-worker requirements
+# Custom DockerFile, Configurations, Helper Scripts, and CronJobs for KBase Condor Worker
 
 
 The condor workers require 

From f363d533d1b89bee8d4642e27cd2b2b13b70f0a3 Mon Sep 17 00:00:00 2001
From: bio-boris <bio-boris@users.noreply.github.com>
Date: Wed, 24 Jan 2024 22:25:03 -0600
Subject: [PATCH 14/18] Update RELEASE_NOTES.txt

---
 RELEASE_NOTES.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/RELEASE_NOTES.txt b/RELEASE_NOTES.txt
index 68f6411..f70fbed 100644
--- a/RELEASE_NOTES.txt
+++ b/RELEASE_NOTES.txt
@@ -1,4 +1,4 @@
-1.0.10
+1.0.11
 ========
 * Modify Cronjobs that look for runaway containers
 * Update Documentation

From 939a5c1e04a8c5e741f34293775e6bd392bb08b2 Mon Sep 17 00:00:00 2001
From: Boris <bio-boris@github.com>
Date: Wed, 24 Jan 2024 23:30:19 -0600
Subject: [PATCH 15/18] Fix conflicts

---
 RELEASE_NOTES.txt                           |   9 --
 deployment/bin/cron/container_reaper_ee2.py | 152 --------------------
 2 files changed, 161 deletions(-)
 delete mode 100755 deployment/bin/cron/container_reaper_ee2.py

diff --git a/RELEASE_NOTES.txt b/RELEASE_NOTES.txt
index e7ea601..75b7879 100644
--- a/RELEASE_NOTES.txt
+++ b/RELEASE_NOTES.txt
@@ -11,15 +11,6 @@
 *  Pin versions due for Python
 *  Update base image to `htcondor/execute:lts-el8`
 
-
-1.0.10
-========
-*  Fix health check
-*  Fix dependencies for JobRunner
-*  Pin versions due for Python
-*  Update base image to `htcondor/execute:lts-el8`
-
-
 1.0.9
 =======
 * Add GHA Actions
diff --git a/deployment/bin/cron/container_reaper_ee2.py b/deployment/bin/cron/container_reaper_ee2.py
deleted file mode 100755
index 56d936d..0000000
--- a/deployment/bin/cron/container_reaper_ee2.py
+++ /dev/null
@@ -1,152 +0,0 @@
-# #!/miniconda/bin/python
-# import datetime
-# import json
-# import logging
-# import os
-# import socket
-# from typing import List, Dict
-#
-# import docker
-# import psutil
-# import requests
-# from docker.models.containers import Container
-#
-# # REQUIRED ENVIRONMENT VARIABLES
-# ee2_endpoint_url = os.environ.get("EE2_ENDPOINT")
-# if not ee2_endpoint_url:
-#     raise Exception("EE2 Endpoint not set")
-#
-# webhook_url = os.environ.get("SLACK_WEBHOOK_URL")
-# if not webhook_url:
-#     raise Exception("SLACK_WEBHOOK_URL is not defined")
-#
-# # OPTIONAL ENVIRONMENT VARIABLES
-# kill = os.environ.get("DELETE_ABANDONED_CONTAINERS", "false").lower() == "true"
-#
-# logging.basicConfig(level=logging.INFO)
-# hostname = socket.gethostname()
-# dc = docker.from_env()
-#
-#
-# def find_dockerhub_jobs() -> Dict:
-#     try:
-#         all_containers = dc.containers
-#         container_list = all_containers.list()
-#     except Exception as e:
-#         send_slack_message(str(e) + hostname)
-#         raise e
-#
-#     job_containers = {}
-#
-#     for container in container_list:
-#         cnt_id = container.id
-#         try:
-#             cnt = all_containers.get(cnt_id)
-#             labels = cnt.labels
-#             label_keys = labels.keys()
-#             if (
-#                     "condor_id" in label_keys
-#                     and "ee2_endpoint" in label_keys
-#                     and "worker_hostname" in label_keys
-#             ):
-#                 if (
-#                         labels.get("worker_hostname") == hostname
-#                         and labels.get("ee2_endpoint") == ee2_endpoint_url
-#                 ):
-#                     labels["image"] = cnt.image
-#                     job_containers[cnt_id] = labels
-#         except Exception as e:
-#             logging.error(f"Container {cnt_id} doesn't exist anymore")
-#             logging.error(e)
-#
-#     return job_containers
-#
-#
-# def find_running_jobs():
-#     """
-#     Return a list of job ids from running job processes.
-#     Since python procs have multiple entries, keep only 1 version
-#     """
-#
-#     # send_slack_message(f"Job CONTAINER_REAPER is FINDING RUNNING JOBS at {datetime.datetime.now()}")
-#     ls = []
-#     for p in psutil.process_iter(attrs=["name", "cmdline"]):
-#         if (
-#                 "/miniconda/bin/python" in p.info["cmdline"]
-#                 and "./jobrunner.py" in p.info["cmdline"]
-#         ):
-#             ls.append(p.info["cmdline"][-2])
-#     return list(set(ls))
-#
-#
-# def send_slack_message(message: str):
-#     """
-#
-#     :param message: Escaped Message to send to slack
-#     :return:
-#     """
-#
-#     slack_data = {"text": message}
-#     requests.post(
-#         webhook_url,
-#         data=json.dumps(slack_data),
-#         headers={"Content-Type": "application/json"},
-#     )
-#
-#
-# def notify_slack(cnt_id: str, labels: dict(), running_job_ids: List):
-#     now = datetime.datetime.now()
-#
-#     job_id = labels.get("job_id", None)
-#     # app_id = labels['app_id']
-#     app_name = labels.get("app_name", None)
-#     method_name = labels.get("method_name", None)
-#     condor_id = labels.get("condor_id", None)
-#     username = labels.get("user_name", None)
-#
-#     msg = f"cnt_id:{cnt_id} job_id:{job_id} condor_id:{condor_id} for {username} not in running_job_ids {running_job_ids} ({now}) hostname:({hostname}) app:{app_name} method:{method_name} (kill = {kill}) "
-#     send_slack_message(msg)
-#
-#
-# def kill_docker_container(cnt_id: str):
-#     """
-#     Kill a docker container. The job finish script should clean up after itself.
-#     :param cnt_id: The container to kill/remove
-#     """
-#     if kill is True:
-#         cnt = dc.containers.get(cnt_id)  # type: Container
-#         try:
-#             cnt.kill()
-#         except Exception:
-#             try:
-#                 cnt.remove(force=True)
-#             except Exception:
-#                 send_slack_message(f"Couldn't delete {cnt_id} on {hostname}")
-#
-#
-# def kill_dead_jobs(running_jobs: List, docker_processes: Dict):
-#     """
-#     Check whether there are runaway docker containers
-#     :param running_jobs:  A list of condor jobs gathered from the starter scripts
-#     :param docker_processes: A list of docker containers
-#     """
-#     # send_slack_message(f"Job CONTAINER_REAPER is KILLING DEAD JOBS at {datetime.datetime.now()}")
-#     for cnt_id in docker_processes:
-#         labels = docker_processes[cnt_id]
-#         job_id = labels.get("job_id", None)
-#         if job_id not in running_jobs:
-#             notify_slack(cnt_id, labels, running_jobs)
-#             if kill is True:
-#                 kill_docker_container(cnt_id)
-#
-#
-# if __name__ == "__main__":
-#     try:
-#         # send_slack_message(f"Job CONTAINER_REAPER is beginning at {datetime.datetime.now()}")
-#         locally_running_jobrunners = find_running_jobs()
-#         docker_jobs = find_dockerhub_jobs()
-#         kill_dead_jobs(locally_running_jobrunners, docker_jobs)
-#         # send_slack_message(f"Job CONTAINER_REAPER is ENDING at {datetime.datetime.now()}")
-#     except Exception as ev:
-#         send_slack_message(f"FAILURE on {hostname}" + str(ev))
-#         logging.error(str(ev))

From 2a452c9a2265d63a11ace99fb173acc3ea1c1009 Mon Sep 17 00:00:00 2001
From: Boris <bio-boris@github.com>
Date: Thu, 25 Jan 2024 18:18:38 -0600
Subject: [PATCH 16/18] Add tini

---
 Dockerfile | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/Dockerfile b/Dockerfile
index 848dcf4..3e2a7be 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -61,3 +61,10 @@ CMD [ "-template", "/kb/deployment/conf/.templates/deployment.cfg.templ:/kb/depl
       "/kb/deployment/bin/start_server.sh" ]
 
 WORKDIR /kb/deployment/jettybase
+
+ENV TINI_VERSION v0.19.0
+ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini
+ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini.asc /tini.asc
+RUN gpg --batch --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 595E85A6B1B4779EA4DAAEC70B588DFF0527A9B7 \
+ && gpg --batch --verify /tini.asc /tini
+RUN chmod +x /tini && cp /tini /usr/bin/docker-init

From 82bc93f3cd16717984bf2403478977bfd0d219e5 Mon Sep 17 00:00:00 2001
From: bio-boris <bio-boris@users.noreply.github.com>
Date: Thu, 25 Jan 2024 18:26:59 -0600
Subject: [PATCH 17/18] Update RELEASE_NOTES.txt

---
 RELEASE_NOTES.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/RELEASE_NOTES.txt b/RELEASE_NOTES.txt
index 75b7879..0f04db3 100644
--- a/RELEASE_NOTES.txt
+++ b/RELEASE_NOTES.txt
@@ -3,6 +3,7 @@
 * Modify Cronjobs that look for runaway containers
 * Update Documentation
 * Deprecate container_reaper_ee2.py in favor of container_reaper.py
+* Update htcondor image to add TINI back in until we use supervisor.d 
 
 1.0.10
 ========

From 81d7bc2b111adbfbcb2465cef58936cc4cb018ea Mon Sep 17 00:00:00 2001
From: bio-boris <bio-boris@users.noreply.github.com>
Date: Fri, 26 Jan 2024 18:29:19 -0600
Subject: [PATCH 18/18] DEVOPS-1593 Condor Cronjob Cleanup (#63)

* Delete un-needed configs
* Remove Shared Port Demon
* Fix cronjob scripts

---------

Co-authored-by: Boris <bio-boris@github.com>
---
 Dockerfile                                    | 31 ++++---
 RELEASE_NOTES.txt                             |  1 +
 deployment/bin/README.md                      |  3 -
 .../bin/cron/delete_exited_containers.py      | 13 ++-
 deployment/bin/cron/health_check.py           | 13 +--
 deployment/bin/docker-init.sh                 | 11 +++
 deployment/bin/misc/java_stats.sh             | 10 ---
 deployment/bin/misc/jshell-wrapper            |  7 --
 deployment/bin/start-condor.sh                |  8 +-
 .../.templates/condor_config_worker.templ     | 80 ++++++++-----------
 .../conf/.templates/cronjobs.config.templ     |  2 +-
 .../conf/.templates/deployment.cfg.templ      | 61 --------------
 .../conf/.templates/shared_port_config.templ  |  4 -
 .../conf/legacy/condor_config_worker2.templ   | 71 ++++++++++++++++
 .../limitBigMemSlots.templ                    |  0
 .../start_server.sh.templ                     |  0
 16 files changed, 157 insertions(+), 158 deletions(-)
 delete mode 100644 deployment/bin/README.md
 create mode 100755 deployment/bin/docker-init.sh
 delete mode 100755 deployment/bin/misc/java_stats.sh
 delete mode 100755 deployment/bin/misc/jshell-wrapper
 delete mode 100644 deployment/conf/.templates/deployment.cfg.templ
 delete mode 100644 deployment/conf/.templates/shared_port_config.templ
 create mode 100644 deployment/conf/legacy/condor_config_worker2.templ
 rename deployment/conf/{.templates => legacy}/limitBigMemSlots.templ (100%)
 rename deployment/conf/{.templates => legacy}/start_server.sh.templ (100%)

diff --git a/Dockerfile b/Dockerfile
index 3e2a7be..3b358a1 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,7 +1,7 @@
 FROM htcondor/execute:lts-el8
 ENV container docker
 
-# Ge$t commonly used utilities
+# Get commonly used utilities
 RUN yum -y update && yum upgrade -y 
 RUN yum install -y drpm
 RUN yum -y install -y epel-release wget which git gcc libcgroup libcgroup-tools stress-ng tmpwatch procps
@@ -14,7 +14,7 @@ RUN yum install -y yum-utils device-mapper-persistent-data lvm2 && yum-config-ma
 #Install Python3 and Libraries (source /root/miniconda/bin/activate)
 RUN yum install -y bzip2 \
 && wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh \
-&& bash ~/miniconda.sh -b -p /miniconda \
+&& bash ~/miniconda.sh -b -p /miniconda
 
 
 ENV PATH="/miniconda/bin:${PATH}"
@@ -22,8 +22,6 @@ ENV PATH="/miniconda/bin:${PATH}"
 # Add kbase user and set up directories
 RUN useradd -c "KBase user" -rd /kb/deployment/ -u 998 -s /bin/bash kbase && \
     mkdir -p /kb/deployment/bin && \
-    mkdir -p /kb/deployment/jettybase/logs/ && \
-    touch /kb/deployment/jettybase/logs/request.log && \
     chown -R kbase /kb/deployment
 
 #INSTALL DOCKERIZE
@@ -38,9 +36,7 @@ RUN mkdir -p /var/run/condor && mkdir -p /var/log/condor && mkdir -p /var/lock/c
 # Maybe you want: rm -rf /var/cache/yum, to also free up space taken by orphaned data from disabled or removed repos
 RUN rm -rf /var/cache/yum
 
-COPY --chown=kbase deployment/ /kb/deployment/
 
-RUN /kb/deployment/bin/install_python_dependencies.sh
 
 # The BUILD_DATE value seem to bust the docker cache when the timestamp changes, move to
 # the end
@@ -51,16 +47,6 @@ LABEL org.label-schema.build-date=$BUILD_DATE \
       us.kbase.vcs-branch=$BRANCH \
       maintainer="Steve Chan sychan@lbl.gov"
 
-ENTRYPOINT [ "/kb/deployment/bin/dockerize" ]
-CMD [ "-template", "/kb/deployment/conf/.templates/deployment.cfg.templ:/kb/deployment/conf/deployment.cfg", \
-      "-template", "/kb/deployment/conf/.templates/http.ini.templ:/kb/deployment/jettybase/start.d/http.ini", \
-      "-template", "/kb/deployment/conf/.templates/server.ini.templ:/kb/deployment/jettybase/start.d/server.ini", \
-      "-template", "/kb/deployment/conf/.templates/start_server.sh.templ:/kb/deployment/bin/start_server.sh", \
-      "-template", "/kb/deployment/conf/.templates/condor_config.templ:/etc/condor/condor_config.local", \
-      "-stdout", "/kb/deployment/jettybase/logs/request.log", \
-      "/kb/deployment/bin/start_server.sh" ]
-
-WORKDIR /kb/deployment/jettybase
 
 ENV TINI_VERSION v0.19.0
 ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini
@@ -68,3 +54,16 @@ ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini.asc /
 RUN gpg --batch --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 595E85A6B1B4779EA4DAAEC70B588DFF0527A9B7 \
  && gpg --batch --verify /tini.asc /tini
 RUN chmod +x /tini && cp /tini /usr/bin/docker-init
+
+# Delete un-needed-configs from htcondor/execute:lts-el8
+# Revisit this when we change dockerize and token auth
+RUN rm -f /etc/condor/config.d/00-htcondor-9.0.config
+RUN rm -f /etc/condor/config.d/01-*
+
+
+COPY --chown=kbase deployment/ /kb/deployment/
+RUN /kb/deployment/bin/install_python_dependencies.sh
+
+ENTRYPOINT [ "/usr/bin/docker-init" ]
+CMD ["/kb/deployment/bin/docker-init.sh"]
+WORKDIR /kb/deployment
diff --git a/RELEASE_NOTES.txt b/RELEASE_NOTES.txt
index 0f04db3..145569d 100644
--- a/RELEASE_NOTES.txt
+++ b/RELEASE_NOTES.txt
@@ -3,6 +3,7 @@
 * Modify Cronjobs that look for runaway containers
 * Update Documentation
 * Deprecate container_reaper_ee2.py in favor of container_reaper.py
+* Remove un-needed configs
 * Update htcondor image to add TINI back in until we use supervisor.d 
 
 1.0.10
diff --git a/deployment/bin/README.md b/deployment/bin/README.md
deleted file mode 100644
index 318b193..0000000
--- a/deployment/bin/README.md
+++ /dev/null
@@ -1,3 +0,0 @@
-This directory is intended to be mounted in a running docker image under
-/kb/deployment/bin so that it's contents can be used to build and store helper
-binaries
diff --git a/deployment/bin/cron/delete_exited_containers.py b/deployment/bin/cron/delete_exited_containers.py
index fe9640a..7dfb2e3 100755
--- a/deployment/bin/cron/delete_exited_containers.py
+++ b/deployment/bin/cron/delete_exited_containers.py
@@ -26,7 +26,12 @@ def send_slack_message(message: str):
     hostname = socket.gethostname()
     dc = docker.from_env()
     ec = dc.containers.list(filters={"status": "exited"})
-    container_image_names = [c.attrs["Config"]["Image"] for c in ec]
-    if container_image_names:
-        dc.containers.prune()
-        send_slack_message(f"Deleted {len(ec)} `exited` containers on {hostname} {container_image_names}")
+    kbase_containers = [c for c in ec if "kbase" in c.attrs["Config"]["Image"]]
+    container_image_names = [c.attrs["Config"]["Image"] for c in kbase_containers]
+    if kbase_containers:
+        for container in kbase_containers:
+            container.remove()
+        debug_mode = os.environ.get("DEBUG", "false").lower() == "true"
+        if debug_mode:
+            send_slack_message(
+                f"Deleted {len(kbase_containers)} `exited` containers with 'kbase' in image name on {hostname}: {container_image_names}")
diff --git a/deployment/bin/cron/health_check.py b/deployment/bin/cron/health_check.py
index a5fc8dc..7fcb008 100755
--- a/deployment/bin/cron/health_check.py
+++ b/deployment/bin/cron/health_check.py
@@ -4,6 +4,7 @@
 
 """
 import datetime
+import inspect
 import json
 import logging
 import os
@@ -33,12 +34,11 @@
 if webhook_url is None:
     exit("SLACK_WEBHOOK_URL is not defined")
 
-
-
 user = "nobody"
 pid = pwd.getpwnam(user).pw_uid
 gid = pwd.getpwnam(user).pw_gid
 
+
 def send_slack_message(message: str):
     """
     :param message: Escaped Message to send to slack
@@ -61,8 +61,13 @@ def exit_unsuccessfully(message: str, send_to_slack=True):
     now = datetime.datetime.now()
 
     if send_to_slack:
+        try:
+            function_name = lambda: inspect.stack()[1][3]
+        except Exception:
+            function_name = ""
+
         send_slack_message(
-            f"POSSIBLE BLACK HOLE: Ran healthcheck at {now} on {socket.gethostname()} with failure: {message}"
+            f"POSSIBLE BLACK HOLE: {function_name} Ran healthcheck at {now} on {socket.gethostname()} with failure: {message}"
         )
 
     sys.exit(1)
@@ -224,8 +229,6 @@ def check_kbase_endpoints():
             message = f"Couldn't reach {service}. {e}"
             exit_unsuccessfully(message)
 
-        
-
 
 def main():
     try:
diff --git a/deployment/bin/docker-init.sh b/deployment/bin/docker-init.sh
new file mode 100755
index 0000000..41973c9
--- /dev/null
+++ b/deployment/bin/docker-init.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+
+# Meant to be called by /usr/bin/docker-init --
+
+/kb/deployment/bin/dockerize \
+-template /kb/deployment/conf/.templates/condor_config_worker.templ:/etc/condor/condor_config.local \
+-template /kb/deployment/conf/.templates/cronjobs.config.templ:/etc/condor/config.d/cronjobs.config \
+-timeout 120s \
+-stdout /var/log/condor/ProcLog \
+-stdout /var/log/condor/StartLog \
+/kb/deployment/bin/start-condor.sh
diff --git a/deployment/bin/misc/java_stats.sh b/deployment/bin/misc/java_stats.sh
deleted file mode 100755
index aed096e..0000000
--- a/deployment/bin/misc/java_stats.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/usr/bin/env jshell-wrapper
-
-System.out.println("Available Processors");
-System.out.println(Runtime.getRuntime().availableProcessors());
-
-System.out.println("Free Memory");
-System.out.println(Runtime.getRuntime().freeMemory() + " " + Runtime.getRuntime().freeMemory() / 1000000000 + "G");
-
-System.out.println("Max Memory");
-System.out.println(Runtime.getRuntime().maxMemory() + " " + Runtime.getRuntime().maxMemory() / 1000000000 + "G");
diff --git a/deployment/bin/misc/jshell-wrapper b/deployment/bin/misc/jshell-wrapper
deleted file mode 100755
index ab5ad0f..0000000
--- a/deployment/bin/misc/jshell-wrapper
+++ /dev/null
@@ -1,7 +0,0 @@
-TMP=`mktemp`
-tail -n +2 $@ >> $TMP
-echo "/exit" >> $TMP
-$JAVA_HOME/bin/jshell -q --execution local $TMP
-rm $TMP
-
-#put this file in /usr/local/bin/ or somewhere in your $PATH
diff --git a/deployment/bin/start-condor.sh b/deployment/bin/start-condor.sh
index d2d7ac2..bd6bbb8 100755
--- a/deployment/bin/start-condor.sh
+++ b/deployment/bin/start-condor.sh
@@ -4,7 +4,7 @@
 # condor pool password
 
 if [ "$GROUPMOD_DOCKER" ] ; then
-    groupmod -g $GROUPMOD_DOCKER docker
+    groupmod -g "$GROUPMOD_DOCKER" docker
 fi
 
 if [ "$POOL_PASSWORD" ] ; then
@@ -42,11 +42,15 @@ fi
 
 # Ensure condor user can write to logs, since this is now mounted from host
 # Ensure condor user can modify the lock files and run files as of 8.9.10
-chown condor $(condor_config_val log) $(condor_config_val lock) $(condor_config_val run)
+chown condor "$(condor_config_val log)" "$(condor_config_val lock)" "$(condor_config_val run)"
 
 
 
 
 
 docker system prune -a -f
+
+# Required for htcondor docker image to pick up changes from configs
+/update-config
+
 exec "$(condor_config_val MASTER)" -f -t 2>&1
diff --git a/deployment/conf/.templates/condor_config_worker.templ b/deployment/conf/.templates/condor_config_worker.templ
index 10a34a1..c487377 100644
--- a/deployment/conf/.templates/condor_config_worker.templ
+++ b/deployment/conf/.templates/condor_config_worker.templ
@@ -1,26 +1,26 @@
-##  What machine is your central manager?
+# Central Manager Configuration
 CONDOR_HOST = {{ default .Env.CONDOR_HOST "condor" }}
 SCHEDD_HOST = {{ default .Env.SCHEDD_HOST "kbase@condor" }}
 CCB_ADDRESS = {{ default .Env.CCB_ADDRESS "condor" }}
 PRIVATE_NETWORK_NAME = {{ default .Env.HOSTNAME "condor" }}
 
-## Allow commands to execute from this machine
+# Access Control
+# Allow commands to execute from this machine
 ALLOW_WRITE = $(ALLOW_WRITE) *.$(UID_DOMAIN) $(HOSTNAME)
-ALLOW_ADMINISTRATOR =  *.$(UID_DOMAIN) $(HOSTNAME)
-ALLOW_NEGOTIATOR =  *.$(UID_DOMAIN) $(HOSTNAME)
+ALLOW_ADMINISTRATOR = *.$(UID_DOMAIN) $(HOSTNAME)
+ALLOW_NEGOTIATOR = *.$(UID_DOMAIN) $(HOSTNAME)
 
-# Set COLLECTOR_HOST if collector's internal hostname doesn't match DNS name
-# and also set sock=COLLECTOR if using shared port
-# example:
+# Collector Configuration
+# Set COLLECTOR_HOST to match the internal hostname, if different from DNS name.
+# Use 'sock=collector' for shared port configuration if using shared port
 # COLLECTOR_HOST = ci.kbase.us:9618?sock=collector
 {{ if .Env.COLLECTOR_HOST -}} COLLECTOR_HOST = {{ .Env.COLLECTOR_HOST }} {{- end }}
 
-# If the environment variable USE_TCP is set to true, the template will enable
+# TCP Communication (optional)
+# If the environment variable USE_TCP is set to True, the template will enable
 # this group of directives that convert communications to TCP
 # per https://lists.cs.wisc.edu/archive/htcondor-users/2011-August/msg00085.shtml
-# UPDATE_COLLECTOR_WITH_TCP = True
-# WANT_UDP_COMMAND_SOCKET = False
-# COLLECTOR_MAX_FILE_DESCRIPTORS = 3000
+
 {{ if .Env.USE_TCP -}}
 UPDATE_COLLECTOR_WITH_TCP = True
 UPDATE_VIEW_COLLECTOR_WITH_TCP = True
@@ -28,60 +28,50 @@ WANT_UDP_COMMAND_SOCKET = False
 COLLECTOR_MAX_FILE_DESCRIPTORS = 3000
 {{- end }}
 
+# Security Settings
 SEC_DEFAULT_AUTHENTICATION = {{ default .Env.SEC_DEFAULT_AUTHENTICATION "PASSWORD" }}
-SEC_DEFAULT_AUTHENTICATION_METHODS = {{ default .Env.SEC_AUTHENTICATION_METHODS "PASSWORD" }} 
-SEC_DEFAULT_NEGOTIATION = {{ default .Env.SEC_DEFAULT_NEGOTIATION "REQUIRED" }} 
+SEC_DEFAULT_AUTHENTICATION_METHODS = {{ default .Env.SEC_AUTHENTICATION_METHODS "PASSWORD" }}
+SEC_DEFAULT_NEGOTIATION = {{ default .Env.SEC_DEFAULT_NEGOTIATION "REQUIRED" }}
 SEC_PASSWORD_FILE = {{ default .Env.SEC_PASSWORD_FILE "/etc/condor/password" }}
 
+# Daemon and Domain Settings
 UID_DOMAIN = {{ default .Env.UID_DOMAIN "condor" }}
 DAEMON_LIST = MASTER, STARTD
 DISCARD_SESSION_KEYRING_ON_STARTUP = False
 
-
-
-
-# When is this node willing to run jobs?
-#StartJobs = True
-#NODE_IS_HEALTHY = False
-#START = (NODE_IS_HEALTHY =?= True) && (StartJobs =?= True)
-
+## Node Health and Job Management
 NODE_IS_HEALTHY = False
-START = (NODE_IS_HEALTHY =?= True) 
-
-# Check this with condor_config_val START -evaluate
-
-
-#START = TRUE
+# Expression Conditions to start jobs
+START = (NODE_IS_HEALTHY =?= True)
 SUSPEND = False
 PREEMPT = False
 KILL = False
-
-# Set 12 hours maximum wait time for jobs to finish for using condor_drain
+# Max time (in seconds) to retire jobs # Set 12 hours maximum wait time for jobs to finish for using condor_drain
 MAXJOBRETIREMENTTIME = 43200
+CGROUP_MEMORY_LIMIT_POLICY = {{ default .Env.CGROUP_MEMORY_LIMIT_POLICY "soft" }}
 
-# Machine resource settings
-# {{ if .Env.NUM_SLOTS -}} NUM_SLOTS = {{ .Env.NUM_SLOTS }} {{- end }}
-
-# Dynamic Slots
+# Partitionable slot configuration
 NUM_SLOTS = 1
 NUM_SLOTS_TYPE_1 = 1
 SLOT_TYPE_1 = 100%
-SLOT_TYPE_1_PARTITIONABLE = TRUE
-CGROUP_MEMORY_LIMIT_POLICY = {{ default .Env.CGROUP_MEMORY_LIMIT_POLICY "soft" }}
- 
-CLIENTGROUP = {{ default .Env.CLIENTGROUP "\"njs\"" }}
-
-#LEGACY : /mnt/awe/condor/condor_job_execute/$(HOSTNAME)/ to
+SLOT_TYPE_1_PARTITIONABLE = True
 
+# Client Group Setting
+CLIENTGROUP = {{ default .Env.CLIENTGROUP "\"njs\"" }}
 
+# Execution and Resource Management
 EXECUTE = {{ default .Env.condor_submit_workdir "/cdr/" }}{{ .Env.EXECUTE_SUFFIX }}
-
-STARTD_RECOMPUTE_DISK_FREE = true 
-#STARTD_ATTRS = StartJobs, CLIENTGROUP, $(STARTD_ATTRS)
+STARTD_RECOMPUTE_DISK_FREE = True
 STARTD_ATTRS = CLIENTGROUP, $(STARTD_ATTRS)
 
-# We don't need core files, but can enable them for persistent crashes
-CREATE_CORE_FILES = false
+# Miscellaneous Settings
+CREATE_CORE_FILES = False
+USE_SHARED_PORT = False
+AUTO_INCLUDE_SHARED_PORT_IN_DAEMON_LIST=False
 
+# Log File Management
+# Configuration for log file rotation and size management.
+# Max log size is calculated based on the number of cores and daemons.
 # $(MAX_DEFAULT_LOG) is set to 10MB, before it is moved to .old. The .old gets over-written after 1 rotation
-# That means for 32 cores + 5 daemons, the max size = 37procs * 10mb = 370 * 2 = 740MB 
+# That means for 32 cores + 5 daemons, the max size = 37procs * 10mb = 370 * 2 = 740MB
+
diff --git a/deployment/conf/.templates/cronjobs.config.templ b/deployment/conf/.templates/cronjobs.config.templ
index f3c6057..f490f93 100644
--- a/deployment/conf/.templates/cronjobs.config.templ
+++ b/deployment/conf/.templates/cronjobs.config.templ
@@ -37,5 +37,5 @@ STARTD_CRON_ManageVarLibDocker_PERIOD = 336h
 STARTD_CRON_ManageVarLibDocker_MODE = Periodic
 STARTD_CRON_ManageVarLibDocker_RECONFIG_RERUN = True
 
-STARTD_CRON_JOBLIST = NodeHealth ContainerReaper ManageVarLibDocker ManageCondorSubmitWorkdir
+STARTD_CRON_JOBLIST = NodeHealth ContainerReaper ManageVarLibDocker ManageCondorSubmitWorkdir DeleteExitedContainers
 # STARTD_CRON_AUTOPUBLISH = If_Changed
diff --git a/deployment/conf/.templates/deployment.cfg.templ b/deployment/conf/.templates/deployment.cfg.templ
deleted file mode 100644
index e01e8c1..0000000
--- a/deployment/conf/.templates/deployment.cfg.templ
+++ /dev/null
@@ -1,61 +0,0 @@
-[NarrativeJobService]
-port = {{ default .Env.port "8200" }}
-# server thread count - this determines the number of requests that can be
-# processed simultaneously.
-server-threads = {{ default .Env.server_threads "20" }}
-# Minimum memory size in MB.
-min-memory = {{ default .Env.min_memory "1000" }}
-# Maximum memory size in MB.
-max-memory = {{ default .Env.max_memory "1500" }}
-
-queue.db.dir={{ default .Env.queue.db.dir "/tmp/njs/queue" }}
-basedir={{ default .Env.basedir "njs_wrapper" }}
-scratch={{ default .Env.scratch "/tmp" }}
-ref.data.base={{ default .Env.ref_data_base "/kb/data" }}
-
-self.external.url={{ default .Env.self_external_url "https://ci.kbase.us/services/njs_wrapper" }}
-kbase.endpoint={{ default .Env.kbase_endpoint "https://ci.kbase.us/services" }}
-workspace.srv.url={{ default .Env.workspace_srv_url "https://ci.kbase.us/services/ws" }}
-jobstatus.srv.url={{ default .Env.jobstatus_srv_url "https://ci.kbase.us/services/userandjobstate" }}
-shock.url={{ default .Env.shock_url "https://ci.kbase.us/services/shock-api" }}
-awe.srv.url={{ default .Env.awe_srv_url "http://ci.kbase.us/services/awe-api" }}
-docker.registry.url={{ default .Env.docker_registry_url "dockerhub-ci.kbase.us" }}
-awe.client.docker.uri={{ default .Env.awe_client_docker_uri "unix:///var/run/docker.sock" }}
-catalog.srv.url={{ default .Env.catalog_srv_url "https://ci.kbase.us/services/catalog" }}
-handle.url={{ default .Env.handle_url "https://ci.kbase.us/services/handle_service" }}
-srv.wiz.url={{ default .Env.srv_wiz_url "https://ci.kbase.us/services/service_wizard" }}
-auth-service-url = {{ default .Env.auth_service_url "https://ci.kbase.us/services/auth/api/legacy/KBase/Sessions/Login" }}
-auth.service.url.v2 = {{ default .Env.auth_service_url_v2 "https://ci.kbase.us/services/auth/api/V2/token" }}
-auth-service-url-allow-insecure={{ default .Env.auth_service_url_allow_insecure "false" }}
-
-
-## This user can run list_running_apps method to get states
-## of all running apps (running internally on wrapper side). 
-admin.user={{default .Env.admin_user "" }}
-
-# Following parameters define Catalog admin creds for pushing exec-stats:
-catalog.admin.token={{ default .Env.catalog_token "" }}
-
-default.awe.client.groups={{ default .Env.default_awe_client_groups "ci" }}
-awe.readonly.admin.token={{ default .Env.awe_token "" }}
-awe.client.callback.networks={{ default .Env.awe_client_callback_networks "docker0,eth0" }}
-running.tasks.per.user={{ default .Env.running_tasks_per_user "5" }}
-
-mongodb-host = {{ default .Env.mongodb_host "localhost:27017" }}
-mongodb-database = {{ default .Env.mongodb_database "exec_engine" }}
-mongodb-user = {{ default .Env.mongodb_user "" }}
-mongodb-pwd = {{ default .Env.mongodb_pwd "" }}
-
-ujs-mongodb-host = {{ default .Env.ujs_mongodb_host "localhost:27017" }}
-ujs-mongodb-database = {{ default .Env.ujs_mongodb_database "userjobstate" }}
-ujs-mongodb-user = {{ default .Env.ujs_mongodb_user "" }}
-ujs-mongodb-pwd = {{ default .Env.ujs_mongodb_pwd "" }}
-
-narrative.proxy.sharing.user={{ default .Env.narrative_proxy_sharing_user "narrativejoblistener" }}
-
-condor.mode={{ default .Env.condor_mode "1" }}
-condor.submit.desc.file.path={{ default .Env.condor_submit_desc_file_path "/kb/deployment/misc/" }}
-condor-submit-workdir={{ default .Env.condor_submit_workdir "/mnt/condor" }}
-
-## Formula = Token Expiration in ms - (Time_Before_Expiration / 60 ) * 1000
-time.before.expiration = {{ default .Env.time_before_expiration "10" }}
\ No newline at end of file
diff --git a/deployment/conf/.templates/shared_port_config.templ b/deployment/conf/.templates/shared_port_config.templ
deleted file mode 100644
index 1ff2405..0000000
--- a/deployment/conf/.templates/shared_port_config.templ
+++ /dev/null
@@ -1,4 +0,0 @@
-SHARED_PORT_ARGS = -p {{ default .Env.SHARED_PORT_CONFIG "9618" }}
-DAEMON_LIST = $(DAEMON_LIST), SHARED_PORT
-COLLECTOR_HOST = $(CONDOR_HOST)?sock=collector
-USE_SHARED_PORT = TRUE
diff --git a/deployment/conf/legacy/condor_config_worker2.templ b/deployment/conf/legacy/condor_config_worker2.templ
new file mode 100644
index 0000000..c7182eb
--- /dev/null
+++ b/deployment/conf/legacy/condor_config_worker2.templ
@@ -0,0 +1,71 @@
+##  What machine is your central manager?
+CONDOR_HOST = {{ default .Env.CONDOR_HOST "condor" }}
+SCHEDD_HOST = {{ default .Env.SCHEDD_HOST "kbase@condor" }}
+CCB_ADDRESS = {{ default .Env.CCB_ADDRESS "condor" }}
+PRIVATE_NETWORK_NAME = {{ default .Env.HOSTNAME "condor" }}
+
+## Allow commands to execute from this machine
+ALLOW_WRITE = $(ALLOW_WRITE) *.$(UID_DOMAIN) $(HOSTNAME)
+ALLOW_ADMINISTRATOR =  *.$(UID_DOMAIN) $(HOSTNAME)
+ALLOW_NEGOTIATOR =  *.$(UID_DOMAIN) $(HOSTNAME)
+
+# Set COLLECTOR_HOST if collector's internal hostname doesn't match DNS name
+# and also set sock=COLLECTOR if using shared port
+# example:
+# COLLECTOR_HOST = ci.kbase.us:9618?sock=collector
+{{ if .Env.COLLECTOR_HOST -}} COLLECTOR_HOST = {{ .Env.COLLECTOR_HOST }} {{- end }}
+
+# If the environment variable USE_TCP is set to true, the template will enable
+# this group of directives that convert communications to TCP
+# per https://lists.cs.wisc.edu/archive/htcondor-users/2011-August/msg00085.shtml
+# UPDATE_COLLECTOR_WITH_TCP = True
+# WANT_UDP_COMMAND_SOCKET = False
+# COLLECTOR_MAX_FILE_DESCRIPTORS = 3000
+{{ if .Env.USE_TCP -}}
+UPDATE_COLLECTOR_WITH_TCP = True
+UPDATE_VIEW_COLLECTOR_WITH_TCP = True
+WANT_UDP_COMMAND_SOCKET = False
+COLLECTOR_MAX_FILE_DESCRIPTORS = 3000
+{{- end }}
+
+SEC_DEFAULT_AUTHENTICATION = {{ default .Env.SEC_DEFAULT_AUTHENTICATION "PASSWORD" }}
+SEC_DEFAULT_AUTHENTICATION_METHODS = {{ default .Env.SEC_AUTHENTICATION_METHODS "PASSWORD" }} 
+SEC_DEFAULT_NEGOTIATION = {{ default .Env.SEC_DEFAULT_NEGOTIATION "REQUIRED" }} 
+SEC_PASSWORD_FILE = {{ default .Env.SEC_PASSWORD_FILE "/etc/condor/password" }}
+
+UID_DOMAIN = {{ default .Env.UID_DOMAIN "condor" }}
+DAEMON_LIST = MASTER, STARTD
+DISCARD_SESSION_KEYRING_ON_STARTUP = False
+
+NODE_IS_HEALTHY = False
+START = (NODE_IS_HEALTHY =?= True) 
+SUSPEND = False
+PREEMPT = False
+KILL = False
+
+# Set 12 hours maximum wait time for jobs to finish for using condor_drain
+MAXJOBRETIREMENTTIME = 43200
+
+
+# Dynamic Slots
+NUM_SLOTS = 1
+NUM_SLOTS_TYPE_1 = 1
+SLOT_TYPE_1 = 100%
+SLOT_TYPE_1_PARTITIONABLE = TRUE
+CGROUP_MEMORY_LIMIT_POLICY = {{ default .Env.CGROUP_MEMORY_LIMIT_POLICY "soft" }}
+ 
+CLIENTGROUP = {{ default .Env.CLIENTGROUP "\"njs\"" }}
+
+
+EXECUTE = {{ default .Env.condor_submit_workdir "/cdr/" }}{{ .Env.EXECUTE_SUFFIX }}
+
+STARTD_RECOMPUTE_DISK_FREE = true 
+STARTD_ATTRS = CLIENTGROUP, $(STARTD_ATTRS)
+
+# We don't need core files, but can enable them for persistent crashes
+CREATE_CORE_FILES = false
+AUTO_INCLUDE_SHARED_PORT_IN_DAEMON_LIST=False
+
+# $(MAX_DEFAULT_LOG) is set to 10MB, before it is moved to .old. The .old gets over-written after 1 rotation
+# That means for 32 cores + 5 daemons, the max size = 37procs * 10mb = 370 * 2 = 740MB 
+
diff --git a/deployment/conf/.templates/limitBigMemSlots.templ b/deployment/conf/legacy/limitBigMemSlots.templ
similarity index 100%
rename from deployment/conf/.templates/limitBigMemSlots.templ
rename to deployment/conf/legacy/limitBigMemSlots.templ
diff --git a/deployment/conf/.templates/start_server.sh.templ b/deployment/conf/legacy/start_server.sh.templ
similarity index 100%
rename from deployment/conf/.templates/start_server.sh.templ
rename to deployment/conf/legacy/start_server.sh.templ