From 18ca71dbe6fd106939d7b3c5041777eba7b356fc Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Wed, 1 Jul 2020 16:40:58 -0700 Subject: [PATCH 01/25] Support podman or docker --- jupyter-nersc/app-monitoring/build.sh | 16 +++++++++++++++- jupyter-nersc/app-notebooks/build.sh | 16 +++++++++++++++- jupyter-nersc/web-announcement/build.sh | 16 +++++++++++++++- jupyter-nersc/web-nbviewer/build.sh | 16 +++++++++++++++- jupyter-nersc/web-offline/build.sh | 16 +++++++++++++++- 5 files changed, 75 insertions(+), 5 deletions(-) diff --git a/jupyter-nersc/app-monitoring/build.sh b/jupyter-nersc/app-monitoring/build.sh index a3a4efc..8ebd337 100644 --- a/jupyter-nersc/app-monitoring/build.sh +++ b/jupyter-nersc/app-monitoring/build.sh @@ -1,7 +1,21 @@ #!/bin/bash +imcmd="" +for command in docker podman; do + if [ $(command -v $command) ]; then + imcmd=$command + break + fi +done +if [ -n "$imcmd" ]; then + echo "Using $imcmd" +else + echo "No image command defined" + exit 1 +fi + branch=$(git symbolic-ref --short HEAD) -docker build \ +$imcmd build \ "$@" \ --tag registry.spin.nersc.gov/das/app-monitoring.jupyter-nersc-$branch:latest . diff --git a/jupyter-nersc/app-notebooks/build.sh b/jupyter-nersc/app-notebooks/build.sh index 8fec9e1..846f44d 100644 --- a/jupyter-nersc/app-notebooks/build.sh +++ b/jupyter-nersc/app-notebooks/build.sh @@ -1,8 +1,22 @@ #!/bin/bash +imcmd="" +for command in docker podman; do + if [ $(command -v $command) ]; then + imcmd=$command + break + fi +done +if [ -n "$imcmd" ]; then + echo "Using $imcmd" +else + echo "No image command defined" + exit 1 +fi + branch=$(git symbolic-ref --short HEAD) -docker build \ +podman build \ --build-arg branch=$branch \ "$@" \ --tag registry.spin.nersc.gov/das/app-notebooks.jupyter-nersc-$branch:latest . diff --git a/jupyter-nersc/web-announcement/build.sh b/jupyter-nersc/web-announcement/build.sh index 0505f8a..1ba8810 100644 --- a/jupyter-nersc/web-announcement/build.sh +++ b/jupyter-nersc/web-announcement/build.sh @@ -1,8 +1,22 @@ #!/bin/bash +imcmd="" +for command in docker podman; do + if [ $(command -v $command) ]; then + imcmd=$command + break + fi +done +if [ -n "$imcmd" ]; then + echo "Using $imcmd" +else + echo "No image command defined" + exit 1 +fi + branch=$(git symbolic-ref --short HEAD) -docker build \ +$imcmd build \ --build-arg branch=$branch \ "$@" \ --tag registry.spin.nersc.gov/das/web-announcement.jupyter-nersc-$branch:latest . diff --git a/jupyter-nersc/web-nbviewer/build.sh b/jupyter-nersc/web-nbviewer/build.sh index 2cf2cff..c5a3424 100644 --- a/jupyter-nersc/web-nbviewer/build.sh +++ b/jupyter-nersc/web-nbviewer/build.sh @@ -1,8 +1,22 @@ #!/bin/bash +imcmd="" +for command in docker podman; do + if [ $(command -v $command) ]; then + imcmd=$command + break + fi +done +if [ -n "$imcmd" ]; then + echo "Using $imcmd" +else + echo "No image command defined" + exit 1 +fi + branch=$(git symbolic-ref --short HEAD) -docker build \ +$imcmd build \ --build-arg branch=$branch \ "$@" \ --tag registry.spin.nersc.gov/das/web-nbviewer.jupyter-nersc-$branch:latest . diff --git a/jupyter-nersc/web-offline/build.sh b/jupyter-nersc/web-offline/build.sh index b427ff9..2a1e1f2 100644 --- a/jupyter-nersc/web-offline/build.sh +++ b/jupyter-nersc/web-offline/build.sh @@ -1,8 +1,22 @@ #!/bin/bash +imcmd="" +for command in docker podman; do + if [ $(command -v $command) ]; then + imcmd=$command + break + fi +done +if [ -n "$imcmd" ]; then + echo "Using $imcmd" +else + echo "No image command defined" + exit 1 +fi + branch=$(git symbolic-ref --short HEAD) -docker build \ +$imcmd build \ --build-arg branch=$branch \ "$@" \ --tag registry.spin.nersc.gov/das/web-offline.jupyter-nersc-$branch:latest . From ea03ebd3289014c2e15c65fe5efb549cf68b974c Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Thu, 2 Jul 2020 14:09:37 -0700 Subject: [PATCH 02/25] Unpin and user conda-forge --- jupyter-base/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter-base/Dockerfile b/jupyter-base/Dockerfile index b711cc2..6e86c9f 100644 --- a/jupyter-base/Dockerfile +++ b/jupyter-base/Dockerfile @@ -32,9 +32,9 @@ RUN \ curl -s -o /tmp/miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ bash /tmp/miniconda3.sh -b -p /opt/anaconda3 && \ rm -rf /tmp/miniconda3.sh && \ - echo "python 3.7.3" >> /opt/anaconda3/conda-meta/pinned && \ /opt/anaconda3/bin/conda update --yes conda && \ /opt/anaconda3/bin/conda install --yes \ + --channel conda-forge \ alembic \ attrs \ certipy \ From 1e736d3f9422c1f7c37d278f366232b8e0571d70 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Thu, 2 Jul 2020 14:10:12 -0700 Subject: [PATCH 03/25] Expect the CPU info to come from Iris --- jupyter-nersc/web-jupyterhub/nerscspawner.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/jupyter-nersc/web-jupyterhub/nerscspawner.py b/jupyter-nersc/web-jupyterhub/nerscspawner.py index 32cd8a9..4938dae 100644 --- a/jupyter-nersc/web-jupyterhub/nerscspawner.py +++ b/jupyter-nersc/web-jupyterhub/nerscspawner.py @@ -1,6 +1,4 @@ -import os - from jupyterhub.spawner import LocalProcessSpawner from tornado import httpclient, web @@ -44,15 +42,18 @@ def check_role(self, auth_state, role): if role == "staff": return self.check_role_staff(auth_state) if role == "cori-exclusive-node-cpu": - return self.check_role_cori_exclusive_node_cpu() + return self.check_role_cori_exclusive_node_cpu(auth_state) return False - def check_role_cori_exclusive_node_cpu(self): - users = os.environ.get("CORI_EXCLUSIVE_NODE_CPU_USERS") - if users: - return self.user.name in users.split(",") - else: - return True + def check_role_cori_exclusive_node_cpu(self, auth_state): + return self.default_jupyter_repo(auth_state) is not None + + def default_jupyter_repo(self, auth_state): + for allocation in self.user_allocations(auth_state): + for qos in allocation["userAllocationQos"]: + if qos["qos"]["qos"] in ["jupyter"]: + return allocation["computeAllocation"]["repoName"] + return None def check_role_gpu(self, auth_state): return self.default_gpu_repo(auth_state) is not None @@ -63,7 +64,6 @@ def check_role_staff(self, auth_state): return False def default_gpu_repo(self, auth_state): -# for allocation in self.user_allocations(auth_state, ["nstaff", "m1759", "dasrepo", "gpu4sci"]): for allocation in self.user_allocations(auth_state): for qos in allocation["userAllocationQos"]: if qos["qos"]["qos"] in ["gpu", "gpu_special_m1759"]: From ef13ff29960e24ef8ebad121a27ea220c47b62c7 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Sat, 4 Jul 2020 11:58:37 -0700 Subject: [PATCH 04/25] Now with bigmem --- .../web-jupyterhub/jupyterhub_config.py | 30 +++++++++++++++++++ .../web-jupyterhub/nerscslurmspawner.py | 29 ++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index ffac7b4..fe43426 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -1017,6 +1017,7 @@ def comma_split(string): { "name": "cori2-shared-node-cpu" }, { "name": "cori-shared-node-gpu" }, { "name": "cori-exclusive-node-cpu" }, + { "name": "cori-bigmem-node-cpu" }, { "name": "cori-configurable-gpu" }, { "name": "spin-shared-node-cpu" }, ] @@ -1051,6 +1052,18 @@ def comma_split(string): "resources": "Use your own node within a job allocation using defaults.", "use_cases": "Visualization, analytics, machine learning that is compute or memory intensive but can be done on a single node." }, + { + "name": "bigmem-node", + "architectures": [ + { + "name": "cpu", + "description": "Big Memory CPU Node", + "roles": ["cori-exclusive-node-cpu"], + } + ], + "resources": "Use your own node within a job allocation using defaults.", + "use_cases": "Big memory stuff" + }, # { # "name": "configurable", # "architectures": [ @@ -1176,6 +1189,23 @@ def comma_split(string): ]) } ), + "cori-bigmem-node-cpu": ( + "nerscslurmspawner.NERSCBigmemSlurmSpawner", { + "cmd": ["/global/common/cori_cle7/software/jupyter/20-06/bin/jupyterhub-singleuser"], + "args": ["--transport=ipc"], + "exec_prefix": "/usr/bin/ssh -q -o StrictHostKeyChecking=no -o preferredauthentications=publickey -l {username} -i /certs/{username}.key {remote_host}", + "startup_poll_interval": 30.0, + "req_remote_host": "cori19-224.nersc.gov", + "req_homedir": "/tmp", + "req_runtime": "240", + "hub_api_url": f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub/api", + "path": "/usr/common/software/jupyter/20-06/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", + "batchspawner_singleuser_cmd" : " ".join([ + "/global/common/cori/das/jupyterhub/jupyter-launcher.sh", + "/global/common/cori_cle7/software/jupyter/20-06/bin/batchspawner-singleuser", + ]) + } + ), "cori-configurable-gpu": ( "nerscslurmspawner.NERSCConfigurableGPUSlurmSpawner", { "cmd": ["/global/common/cori/das/jupyterhub/jupyter-launcher.sh", diff --git a/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py b/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py index a7730ca..7d387e3 100644 --- a/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py +++ b/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py @@ -172,6 +172,35 @@ async def _get_batch_script(self, **subvars): subvars["cookie"] = int(time.time()) ^ (uid ** 2) return format_template(self.batch_script, **subvars) +class NERSCBigmemSlurmSpawner(NERSCSlurmSpawner): + + batch_script = Unicode("""#!/bin/bash +#SBATCH --clusters=escori +#SBATCH --comment={{ cookie }} +#SBATCH --exclusive +#SBATCH --job-name=jupyter +#SBATCH --nodes={{ nodes }} +#SBATCH --qos=bigmem +#SBATCH --time={{ runtime }} +{{ env_text }} +unset XDG_RUNTIME_DIR +{{ cmd }}""").tag(config=True) + + batch_query_cmd = Unicode("/bin/bash -l /global/common/cori/das/jupyterhub/esslurm-wrapper.sh squeue -h -j {job_id} -o '%T\ %B-224.nersc.gov'").tag(config=True) + batch_cancel_cmd = Unicode("/bin/bash -l /global/common/cori/das/jupyterhub/esslurm-wrapper.sh scancel {job_id}").tag(config=True) + + # Have to override this to call get_auth_state() I think + async def _get_batch_script(self, **subvars): + """Format batch script from vars""" + auth_state = await self.user.get_auth_state() + self.userdata = auth_state["userdata"] + uid = self.userdata["uid"] + subvars["cookie"] = int(time.time()) ^ (uid ** 2) + return format_template(self.batch_script, **subvars) + + def parse_job_id(self, output): + output = output.replace(" on cluster escori", "") + return super().parse_job_id(output) class NERSCExclusiveGPUSlurmSpawner(NERSCSlurmSpawner): From 29f18339dcb207fe5c950a4364ca3f22e609dfee Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Sat, 4 Jul 2020 16:27:36 -0700 Subject: [PATCH 05/25] Changes for later jupyterhub --- jupyter-compose/web-jupyterhub/jupyterhub_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter-compose/web-jupyterhub/jupyterhub_config.py b/jupyter-compose/web-jupyterhub/jupyterhub_config.py index e931e6f..142c712 100644 --- a/jupyter-compose/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-compose/web-jupyterhub/jupyterhub_config.py @@ -590,7 +590,6 @@ # environment variables. Most, including the default, do not. Consult the # documentation for your spawner to verify! #c.Spawner.cmd = ['jupyterhub-singleuser'] -c.Spawner.cmd = ['jupyter-labhub'] ## Maximum number of consecutive failures to allow before shutting down # JupyterHub. @@ -643,6 +642,7 @@ # - Start with `/notebooks` instead of `/tree` if `default_url` points to a notebook instead of a directory. # - You can set this to `/lab` to have JupyterLab start by default, rather than Jupyter Notebook. #c.Spawner.default_url = '' +c.Spawner.default_url = '/lab' ## Disable per-user configuration of single-user servers. # From 2529eadf3387b7585611725336e6d13c351f584d Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Sat, 4 Jul 2020 16:27:52 -0700 Subject: [PATCH 06/25] Newer version --- jupyter-compose/web-announcement/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter-compose/web-announcement/Dockerfile b/jupyter-compose/web-announcement/Dockerfile index 1d05094..b50af16 100644 --- a/jupyter-compose/web-announcement/Dockerfile +++ b/jupyter-compose/web-announcement/Dockerfile @@ -4,7 +4,7 @@ FROM registry.spin.nersc.gov/das/jupyter-base-${branch}:latest LABEL maintainer="Rollin Thomas " RUN \ - pip install git+https://github.com/rcthomas/jupyterhub-announcement.git@0.3.1 + pip install git+https://github.com/rcthomas/jupyterhub-announcement.git@0.4.1 WORKDIR /srv From 81d7c1329f2ef7ab78c45a9fdca69e138e05ac3a Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Sat, 4 Jul 2020 16:28:13 -0700 Subject: [PATCH 07/25] Use master --- jupyter-compose/app-notebooks/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter-compose/app-notebooks/Dockerfile b/jupyter-compose/app-notebooks/Dockerfile index e2fd869..790a9c6 100644 --- a/jupyter-compose/app-notebooks/Dockerfile +++ b/jupyter-compose/app-notebooks/Dockerfile @@ -41,7 +41,7 @@ RUN \ RUN \ pip install --no-cache-dir \ - git+https://github.com/rcthomas/jupyter-server-proxy.git@allow-remote-proxy + git+https://github.com/jupyterhub/jupyter-server-proxy.git ADD jupyter_notebook_config.py /opt/anaconda3/etc/jupyter/. From bd44a437b6171d1db0622290342b090959598677 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Mon, 13 Jul 2020 13:56:10 -0700 Subject: [PATCH 08/25] Fully convert to JupyterLab2 --- jupyter-nersc/web-jupyterhub/jupyterhub_config.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index fe43426..baf2a86 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -1208,8 +1208,7 @@ def comma_split(string): ), "cori-configurable-gpu": ( "nerscslurmspawner.NERSCConfigurableGPUSlurmSpawner", { - "cmd": ["/global/common/cori/das/jupyterhub/jupyter-launcher.sh", - "/usr/common/software/jupyter/19-11/bin/jupyter-labhub"], + "cmd": ["/global/common/cori_cle7/software/jupyter/20-06/bin/jupyterhub-singleuser"], "args": ["--transport=ipc"], "exec_prefix": "/usr/bin/ssh -q -o StrictHostKeyChecking=no -o preferredauthentications=publickey -l {username} -i /certs/{username}.key {remote_host}", "startup_poll_interval": 30.0, @@ -1218,19 +1217,23 @@ def comma_split(string): "req_ngpus": "1", "req_runtime": "240", "hub_api_url": f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub/api", - "path": "/usr/common/software/jupyter/19-11/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", + "path": "/usr/common/software/jupyter/20-06/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", + "batchspawner_singleuser_cmd" : " ".join([ + "/global/common/cori/das/jupyterhub/jupyter-launcher.sh", + "/global/common/cori_cle7/software/jupyter/20-06/bin/batchspawner-singleuser", + ]) } ), "spin-shared-node-cpu": ( "sshspawner.sshspawner.SSHSpawner", { - "cmd": ["/global/common/cori/das/jupyterhub/jupyter-launcher.sh", - "/global/common/cori_cle7/software/jupyter/19-11/bin/jupyter-labhub"], + "cmd": ["/global/common/cori/das/jupyterhub/jupyter-launcher.sh", + "/global/common/cori_cle7/software/jupyter/20-06/bin/jupyterhub-singleuser"], "args": ["--transport=ipc"], "environment": {"OMP_NUM_THREADS" : "2"}, "remote_hosts": ["app-notebooks"], "remote_port_command": "/usr/bin/python /global/common/cori/das/jupyterhub/new-get-port.py --ip", "hub_api_url": f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub/api", - "path": "/global/common/cori_cle7/software/jupyter/19-11/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", + "path": "/global/common/cori_cle7/software/jupyter/20-06/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", "ssh_keyfile": '/certs/{username}.key' } ) From 3d867e2bc641d5fe653b1fcdafce78936ac797f0 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Wed, 15 Jul 2020 17:31:41 -0700 Subject: [PATCH 09/25] JupyterLab 2 and configurable --- .../web-jupyterhub/jupyterhub_config.py | 41 +++++++------------ 1 file changed, 14 insertions(+), 27 deletions(-) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index baf2a86..04bf0b3 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -1064,18 +1064,18 @@ def comma_split(string): "resources": "Use your own node within a job allocation using defaults.", "use_cases": "Big memory stuff" }, -# { -# "name": "configurable", -# "architectures": [ -# { -# "name": "gpu", -# "description": "Configurable GPU", -# "roles": ["gpu"], -# } -# ], -# "resources": "Use multiple compute nodes with specialized settings.", -# "use_cases": "Multi-node analytics jobs, jobs in reservations, custom project charging, and more." -# }, + { + "name": "configurable", + "architectures": [ + { + "name": "gpu", + "description": "Configurable GPU", + "roles": ["gpu"], + } + ], + "resources": "Use multiple compute nodes with specialized settings.", + "use_cases": "Multi-node analytics jobs, jobs in reservations, custom project charging, and more." + }, ] c.NERSCSpawner.systems = [ @@ -1132,26 +1132,13 @@ def comma_split(string): "cori-shared-node-cpu": ( "sshspawner.sshspawner.SSHSpawner", { "cmd": ["/global/common/cori/das/jupyterhub/jupyter-launcher.sh", - "/usr/common/software/jupyter/19-11/bin/jupyter-labhub"], - "args": ["--transport=ipc"], - "environment": {"OMP_NUM_THREADS" : "2", "PYTHONFAULTHANDLER": "1"}, - "remote_hosts": ["corijupyter.nersc.gov"], - "remote_port_command": "/usr/bin/python /global/common/cori/das/jupyterhub/new-get-port.py --ip", - "hub_api_url": f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub/api", - "path": "/usr/common/software/jupyter/19-11/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", - "ssh_keyfile": '/certs/{username}.key' - } - ), - "cori2-shared-node-cpu": ( - "sshspawner.sshspawner.SSHSpawner", { - "cmd": ["/global/common/cori/das/jupyterhub/jupyter-launcher.sh", - "/global/common/cori_cle7/software/jupyter/20-06/bin/jupyterhub-singleuser"], + "/usr/common/software/jupyter/20-06/bin/jupyter-labhub"], "args": ["--transport=ipc"], "environment": {"OMP_NUM_THREADS" : "2", "PYTHONFAULTHANDLER": "1"}, "remote_hosts": ["corijupyter.nersc.gov"], "remote_port_command": "/usr/bin/python /global/common/cori/das/jupyterhub/new-get-port.py --ip", "hub_api_url": f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub/api", - "path": "/global/common/cori_cle7/software/jupyter/20-06/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", + "path": "/usr/common/software/jupyter/20-06/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", "ssh_keyfile": '/certs/{username}.key' } ), From f47abeb487e558408f288cc26b4d90c98f8f4d75 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Mon, 13 Jul 2020 08:34:38 -0700 Subject: [PATCH 10/25] Easy way to deny users access --- jupyter-nersc/web-jupyterhub/jupyterhub_config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index 04bf0b3..c7ea6df 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -860,6 +860,7 @@ def comma_split(string): # # .. versionadded: 0.9 #c.Authenticator.blacklist = set() +c.Authenticator.blacklist = set(comma_split(os.environ.get("BLACKLIST"))) ## Enable persisting auth_state (if available). # From a1a411025eefbac5e82775646790a094512e44e6 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Wed, 15 Jul 2020 17:35:31 -0700 Subject: [PATCH 11/25] Change to how accounts work --- jupyter-nersc/web-jupyterhub/nerscslurmspawner.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py b/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py index 7d387e3..0f31b3f 100644 --- a/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py +++ b/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py @@ -302,11 +302,8 @@ async def options_form(self, spawner): """) + # QOS, would be nice to constrain from qos + + form += dedent(""" + + + """) + # # GPUs per node, should come from model # form += dedent(""" @@ -359,6 +369,7 @@ async def options_form(self, spawner): def options_from_form(self, formdata): options = dict() options["account"] = formdata["account"][0] + options["qos"] = formdata["qos"][0] # options["ngpus"] = formdata["ngpus"][0] options["ntasks_per_node"] = formdata["ntasks-per-node"][0] options["cpus_per_task"] = formdata["cpus-per-task"][0] From e7c63115a604044810af2c79922cb271730a02f7 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Wed, 29 Jul 2020 17:41:37 -0700 Subject: [PATCH 15/25] Help the user a little --- jupyter-nersc/web-jupyterhub/templates/login.html | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/jupyter-nersc/web-jupyterhub/templates/login.html b/jupyter-nersc/web-jupyterhub/templates/login.html index c087f7d..70984ff 100644 --- a/jupyter-nersc/web-jupyterhub/templates/login.html +++ b/jupyter-nersc/web-jupyterhub/templates/login.html @@ -19,7 +19,7 @@ {% else %} -
+
Sign in
@@ -73,6 +73,11 @@ value='Sign In' tabindex="4" /> +

+ Forgot password? | + Forgot username? | + MFA not working? +

{% endif %} From c846e56791ec3242c915b6b73a6131312a0bcd25 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Mon, 10 Aug 2020 12:39:01 -0700 Subject: [PATCH 16/25] Make time limit work properly --- jupyter-nersc/web-jupyterhub/nerscslurmspawner.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py b/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py index f04394e..705eb33 100644 --- a/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py +++ b/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py @@ -360,8 +360,8 @@ async def options_form(self, spawner): # Time, should come from model form += dedent(""" - - + + """) return form @@ -374,7 +374,7 @@ def options_from_form(self, formdata): options["ntasks_per_node"] = formdata["ntasks-per-node"][0] options["cpus_per_task"] = formdata["cpus-per-task"][0] options["gpus_per_task"] = formdata["gpus-per-task"][0] - options["time"] = formdata["time"][0] + options["runtime"] = formdata["runtime"][0] return options # # Have to override this to call get_auth_state() I think From 106541fd78024589e62d021bb2c41e7baca47d40 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Mon, 10 Aug 2020 12:39:52 -0700 Subject: [PATCH 17/25] Correct labhub to singleuser here --- jupyter-nersc/web-jupyterhub/jupyterhub_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index bd444c7..1d57275 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -1128,7 +1128,7 @@ def comma_split(string): "cori-shared-node-cpu": ( "sshspawner.sshspawner.SSHSpawner", { "cmd": ["/global/common/cori/das/jupyterhub/jupyter-launcher.sh", - "/usr/common/software/jupyter/20-06/bin/jupyter-labhub"], + "/usr/common/software/jupyter/20-06/bin/jupyterhub-singleuser"], "args": ["--transport=ipc"], "environment": {"OMP_NUM_THREADS" : "2", "PYTHONFAULTHANDLER": "1"}, "remote_hosts": ["corijupyter.nersc.gov"], From 941972728f67360b5709a6922e769b7f5ce99e7a Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Mon, 10 Aug 2020 13:11:37 -0700 Subject: [PATCH 18/25] Format option needed for new harbor --- jupyter-base/build.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/jupyter-base/build.sh b/jupyter-base/build.sh index 6b34fda..d8a42fe 100644 --- a/jupyter-base/build.sh +++ b/jupyter-base/build.sh @@ -14,8 +14,14 @@ else exit 1 fi +format="" +if [ "$imcmd" == "podman" ]; then + format="--format docker" +fi + branch=$(git symbolic-ref --short HEAD) $imcmd build \ + $format \ "$@" \ --tag registry.spin.nersc.gov/das/jupyter-base-$branch:latest . From 231dc1608b2c0a933e5b4cb7504a435d1d6813ea Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Mon, 10 Aug 2020 13:13:56 -0700 Subject: [PATCH 19/25] Changes needed for new registry --- jupyter-nersc/app-monitoring/build.sh | 6 ++++++ jupyter-nersc/web-announcement/build.sh | 6 ++++++ jupyter-nersc/web-nbviewer/build.sh | 6 ++++++ 3 files changed, 18 insertions(+) diff --git a/jupyter-nersc/app-monitoring/build.sh b/jupyter-nersc/app-monitoring/build.sh index 8ebd337..43e79ab 100644 --- a/jupyter-nersc/app-monitoring/build.sh +++ b/jupyter-nersc/app-monitoring/build.sh @@ -14,8 +14,14 @@ else exit 1 fi +format="" +if [ "$imcmd" == "podman" ]; then + format="--format docker" +fi + branch=$(git symbolic-ref --short HEAD) $imcmd build \ + $format \ "$@" \ --tag registry.spin.nersc.gov/das/app-monitoring.jupyter-nersc-$branch:latest . diff --git a/jupyter-nersc/web-announcement/build.sh b/jupyter-nersc/web-announcement/build.sh index 1ba8810..dd75d49 100644 --- a/jupyter-nersc/web-announcement/build.sh +++ b/jupyter-nersc/web-announcement/build.sh @@ -14,9 +14,15 @@ else exit 1 fi +format="" +if [ "$imcmd" == "podman" ]; then + format="--format docker" +fi + branch=$(git symbolic-ref --short HEAD) $imcmd build \ --build-arg branch=$branch \ + $format \ "$@" \ --tag registry.spin.nersc.gov/das/web-announcement.jupyter-nersc-$branch:latest . diff --git a/jupyter-nersc/web-nbviewer/build.sh b/jupyter-nersc/web-nbviewer/build.sh index c5a3424..029b976 100644 --- a/jupyter-nersc/web-nbviewer/build.sh +++ b/jupyter-nersc/web-nbviewer/build.sh @@ -14,9 +14,15 @@ else exit 1 fi +format="" +if [ "$imcmd" == "podman" ]; then + format="--format docker" +fi + branch=$(git symbolic-ref --short HEAD) $imcmd build \ --build-arg branch=$branch \ + $format \ "$@" \ --tag registry.spin.nersc.gov/das/web-nbviewer.jupyter-nersc-$branch:latest . From c2a36462a0121a5bbd3942c6a7c29f437e1184c1 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Mon, 10 Aug 2020 13:14:22 -0700 Subject: [PATCH 20/25] Upgrade to 0.5.0 --- jupyter-nersc/web-announcement/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter-nersc/web-announcement/Dockerfile b/jupyter-nersc/web-announcement/Dockerfile index b50af16..3a35ac3 100644 --- a/jupyter-nersc/web-announcement/Dockerfile +++ b/jupyter-nersc/web-announcement/Dockerfile @@ -4,7 +4,7 @@ FROM registry.spin.nersc.gov/das/jupyter-base-${branch}:latest LABEL maintainer="Rollin Thomas " RUN \ - pip install git+https://github.com/rcthomas/jupyterhub-announcement.git@0.4.1 + pip install git+https://github.com/rcthomas/jupyterhub-announcement.git@0.5.0 WORKDIR /srv From 04ba9118cfff3fbe29fca554157f22d7264c3b36 Mon Sep 17 00:00:00 2001 From: shreddd Date: Tue, 11 Aug 2020 15:40:17 -0700 Subject: [PATCH 21/25] add pylibmc Completely pointless dependency that seems to fix the build --- jupyter-nersc/web-nbviewer/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/jupyter-nersc/web-nbviewer/Dockerfile b/jupyter-nersc/web-nbviewer/Dockerfile index 956ac98..86629c0 100644 --- a/jupyter-nersc/web-nbviewer/Dockerfile +++ b/jupyter-nersc/web-nbviewer/Dockerfile @@ -17,6 +17,7 @@ RUN \ nbconvert \ nbformat \ notebook \ + pylibmc \ pycurl && \ pip install --no-cache-dir \ statsd From fae16ba1bc5c3e7bc55ddc9b493a078056151f87 Mon Sep 17 00:00:00 2001 From: Shreyas Cholia Date: Tue, 11 Aug 2020 16:30:55 -0700 Subject: [PATCH 22/25] add conda-forge --- jupyter-nersc/web-nbviewer/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter-nersc/web-nbviewer/Dockerfile b/jupyter-nersc/web-nbviewer/Dockerfile index 86629c0..39c8a1a 100644 --- a/jupyter-nersc/web-nbviewer/Dockerfile +++ b/jupyter-nersc/web-nbviewer/Dockerfile @@ -11,7 +11,7 @@ LABEL maintainer="Rollin Thomas " # libevent-dev RUN \ - conda install --yes --all \ + conda install -c conda-forge --yes --all \ invoke \ markdown \ nbconvert \ From 681d393cd8fcc9738826651936326cc562c2875f Mon Sep 17 00:00:00 2001 From: Shreyas Cholia Date: Tue, 25 Aug 2020 09:09:06 -0700 Subject: [PATCH 23/25] disable caching --- jupyter-nersc/web-nbviewer/Dockerfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/jupyter-nersc/web-nbviewer/Dockerfile b/jupyter-nersc/web-nbviewer/Dockerfile index 39c8a1a..6a73b39 100644 --- a/jupyter-nersc/web-nbviewer/Dockerfile +++ b/jupyter-nersc/web-nbviewer/Dockerfile @@ -27,15 +27,15 @@ WORKDIR /repos RUN \ git clone https://github.com/jupyter/nbviewer.git && \ cd nbviewer && \ -# --no-dependencies flag because we don't actually need pylibmc or elasticsearch to run this (without -# elasticsearch or memcached) and everything else in requirements.txt is already installed + # --no-dependencies flag because we don't actually need pylibmc or elasticsearch to run this (without + # elasticsearch or memcached) and everything else in requirements.txt is already installed pip install -e . --no-cache-dir --no-dependencies && \ npm install && \ invoke bower && \ invoke less && \ cd .. -RUN echo 1 +RUN echo "Building clonenotebooks" RUN \ git clone https://github.com/NERSC/clonenotebooks.git && \ cd clonenotebooks && \ @@ -49,4 +49,4 @@ ADD frontpage.json ./ ADD docker-entrypoint.sh nbviewer_config.py ./ RUN chmod +x docker-entrypoint.sh ENTRYPOINT ["./docker-entrypoint.sh"] -CMD ["python", "-m", "nbviewer"] +CMD ["python", "-m", "nbviewer", "--no-cache"] From a4e1a787eef5fbbbe77e0b55e37fc37733f44853 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Tue, 25 Aug 2020 13:00:40 -0700 Subject: [PATCH 24/25] Large mem considerations --- .../web-jupyterhub/jupyterhub_config.py | 41 ++++++++----------- .../web-jupyterhub/nerscslurmspawner.py | 29 +++++++++---- jupyter-nersc/web-jupyterhub/nerscspawner.py | 12 ++++++ 3 files changed, 50 insertions(+), 32 deletions(-) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index 1d57275..b401966 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -1012,14 +1012,14 @@ def comma_split(string): #------------------------------------------------------------------------------ c.NERSCSpawner.profiles = [ - { "name": "gerty-shared-node-cpu" }, - { "name": "gerty-exclusive-node-cpu" }, - { "name": "cori-shared-node-cpu" }, - { "name": "cori-shared-node-gpu" }, - { "name": "cori-exclusive-node-cpu" }, - { "name": "cori-bigmem-node-cpu" }, - { "name": "cori-configurable-gpu" }, - { "name": "spin-shared-node-cpu" }, + { "name": "gerty-shared-node-cpu" }, + { "name": "gerty-exclusive-node-cpu" }, + { "name": "cori-shared-node-cpu" }, + { "name": "cori-shared-node-gpu" }, + { "name": "cori-exclusive-node-cpu" }, + { "name": "cori-exclusive-node-largemem" }, + { "name": "cori-configurable-gpu" }, + { "name": "spin-shared-node-cpu" }, ] c.NERSCSpawner.setups = [ @@ -1047,22 +1047,15 @@ def comma_split(string): "name": "cpu", "description": "Exclusive CPU Node", "roles": ["cori-exclusive-node-cpu"], - } - ], - "resources": "Use your own node within a job allocation using defaults.", - "use_cases": "Visualization, analytics, machine learning that is compute or memory intensive but can be done on a single node." - }, - { - "name": "bigmem-node", - "architectures": [ + }, { - "name": "cpu", - "description": "Big Memory CPU Node", - "roles": ["cori-exclusive-node-cpu"], + "name": "largemem", + "description": "Exclusive Large Memory Node", + "roles": ["cmem"], } ], "resources": "Use your own node within a job allocation using defaults.", - "use_cases": "Big memory stuff" + "use_cases": "Visualization, analytics, machine learning that is compute or memory intensive but can be done on a single node." }, { "name": "configurable", @@ -1128,7 +1121,7 @@ def comma_split(string): "cori-shared-node-cpu": ( "sshspawner.sshspawner.SSHSpawner", { "cmd": ["/global/common/cori/das/jupyterhub/jupyter-launcher.sh", - "/usr/common/software/jupyter/20-06/bin/jupyterhub-singleuser"], + "/global/common/cori_cle7/software/jupyter/20-06/bin/jupyterhub-singleuser"], "args": ["--transport=ipc"], "environment": {"OMP_NUM_THREADS" : "2", "PYTHONFAULTHANDLER": "1"}, "remote_hosts": ["corijupyter.nersc.gov"], @@ -1172,15 +1165,15 @@ def comma_split(string): ]) } ), - "cori-bigmem-node-cpu": ( - "nerscslurmspawner.NERSCBigmemSlurmSpawner", { + "cori-exclusive-node-largemem": ( + "nerscslurmspawner.NERSCExclusiveLargeMemSlurmSpawner", { "cmd": ["/global/common/cori_cle7/software/jupyter/20-06/bin/jupyterhub-singleuser"], "args": ["--transport=ipc"], "exec_prefix": "/usr/bin/ssh -q -o StrictHostKeyChecking=no -o preferredauthentications=publickey -l {username} -i /certs/{username}.key {remote_host}", "startup_poll_interval": 30.0, "req_remote_host": "cori19-224.nersc.gov", "req_homedir": "/tmp", - "req_runtime": "240", + "req_runtime": "480", "hub_api_url": f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub/api", "path": "/usr/common/software/jupyter/20-06/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", "batchspawner_singleuser_cmd" : " ".join([ diff --git a/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py b/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py index 705eb33..6b88afa 100644 --- a/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py +++ b/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py @@ -172,11 +172,11 @@ async def _get_batch_script(self, **subvars): subvars["cookie"] = int(time.time()) ^ (uid ** 2) return format_template(self.batch_script, **subvars) -class NERSCBigmemSlurmSpawner(NERSCSlurmSpawner): +class NERSCExclusiveLargeMemSlurmSpawner(NERSCSlurmSpawner): batch_script = Unicode("""#!/bin/bash -#SBATCH --clusters=escori -#SBATCH --comment={{ cookie }} +#SBATCH --account={{ account }} +#SBATCH --constraint=amd #SBATCH --job-name=jupyter #SBATCH --nodes={{ nodes }} #SBATCH --qos=bigmem @@ -185,6 +185,7 @@ class NERSCBigmemSlurmSpawner(NERSCSlurmSpawner): unset XDG_RUNTIME_DIR {{ cmd }}""").tag(config=True) + batch_submit_cmd = Unicode("/bin/bash -l /global/common/cori/das/jupyterhub/esslurm-wrapper.sh sbatch").tag(config=True) batch_query_cmd = Unicode("/bin/bash -l /global/common/cori/das/jupyterhub/esslurm-wrapper.sh squeue -h -j {job_id} -o '%T\ %B-224.nersc.gov'").tag(config=True) batch_cancel_cmd = Unicode("/bin/bash -l /global/common/cori/das/jupyterhub/esslurm-wrapper.sh scancel {job_id}").tag(config=True) @@ -193,13 +194,25 @@ async def _get_batch_script(self, **subvars): """Format batch script from vars""" auth_state = await self.user.get_auth_state() self.userdata = auth_state["userdata"] - uid = self.userdata["uid"] - subvars["cookie"] = int(time.time()) ^ (uid ** 2) + subvars["account"] = self.default_cmem_repo() return format_template(self.batch_script, **subvars) - def parse_job_id(self, output): - output = output.replace(" on cluster escori", "") - return super().parse_job_id(output) + def default_cmem_repo(self): + for allocation in self.user_allocations(): + for qos in allocation["userAllocationQos"]: + if qos["qos"]["qos"] in ["cmem"]: + return allocation["computeAllocation"]["repoName"] + return None + + def user_allocations(self, repos=[]): + for allocation in self.userdata["userAllocations"]: + if repos and allocation["computeAllocation"]["repoName"] not in repos: + continue + yield allocation + +# def parse_job_id(self, output): +# output = output.replace(" on cluster escori", "") +# return super().parse_job_id(output) class NERSCExclusiveGPUSlurmSpawner(NERSCSlurmSpawner): diff --git a/jupyter-nersc/web-jupyterhub/nerscspawner.py b/jupyter-nersc/web-jupyterhub/nerscspawner.py index 4938dae..8049baf 100644 --- a/jupyter-nersc/web-jupyterhub/nerscspawner.py +++ b/jupyter-nersc/web-jupyterhub/nerscspawner.py @@ -43,6 +43,8 @@ def check_role(self, auth_state, role): return self.check_role_staff(auth_state) if role == "cori-exclusive-node-cpu": return self.check_role_cori_exclusive_node_cpu(auth_state) + if role == "cmem": + return self.check_role_cmem(auth_state) return False def check_role_cori_exclusive_node_cpu(self, auth_state): @@ -55,6 +57,16 @@ def default_jupyter_repo(self, auth_state): return allocation["computeAllocation"]["repoName"] return None + def check_role_cmem(self, auth_state): + return self.default_cmem_repo(auth_state) is not None + + def default_cmem_repo(self, auth_state): + for allocation in self.user_allocations(auth_state): + for qos in allocation["userAllocationQos"]: + if qos["qos"]["qos"] in ["cmem"]: + return allocation["computeAllocation"]["repoName"] + return None + def check_role_gpu(self, auth_state): return self.default_gpu_repo(auth_state) is not None From 5a507ca0f5185dc9f198ed6de60f77e30552b261 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Fri, 11 Sep 2020 07:22:06 -0700 Subject: [PATCH 25/25] Upgrade to 0.6 --- jupyter-nersc/web-announcement/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/jupyter-nersc/web-announcement/Dockerfile b/jupyter-nersc/web-announcement/Dockerfile index 3a35ac3..e4782dd 100644 --- a/jupyter-nersc/web-announcement/Dockerfile +++ b/jupyter-nersc/web-announcement/Dockerfile @@ -4,7 +4,8 @@ FROM registry.spin.nersc.gov/das/jupyter-base-${branch}:latest LABEL maintainer="Rollin Thomas " RUN \ - pip install git+https://github.com/rcthomas/jupyterhub-announcement.git@0.5.0 + pip install --no-cache-dir html_sanitizer && \ + pip install git+https://github.com/rcthomas/jupyterhub-announcement.git@0.6.0 WORKDIR /srv