Skip to content
This repository has been archived by the owner on Apr 29, 2021. It is now read-only.

Commit

Permalink
Merge pull request #79 from NERSC/20-08
Browse files Browse the repository at this point in the history
20 08
  • Loading branch information
rcthomas authored Sep 11, 2020
2 parents 25adb11 + 7e37007 commit 6b7edfa
Show file tree
Hide file tree
Showing 17 changed files with 246 additions and 48 deletions.
2 changes: 1 addition & 1 deletion jupyter-base/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ RUN \
curl -s -o /tmp/miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
bash /tmp/miniconda3.sh -b -p /opt/anaconda3 && \
rm -rf /tmp/miniconda3.sh && \
echo "python 3.7.3" >> /opt/anaconda3/conda-meta/pinned && \
/opt/anaconda3/bin/conda update --yes conda && \
/opt/anaconda3/bin/conda install --yes \
--channel conda-forge \
alembic \
attrs \
certipy \
Expand Down
6 changes: 6 additions & 0 deletions jupyter-base/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,14 @@ else
exit 1
fi

format=""
if [ "$imcmd" == "podman" ]; then
format="--format docker"
fi

branch=$(git symbolic-ref --short HEAD)

$imcmd build \
$format \
"$@" \
--tag registry.spin.nersc.gov/das/jupyter-base-$branch:latest .
2 changes: 1 addition & 1 deletion jupyter-compose/app-notebooks/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ RUN \

RUN \
pip install --no-cache-dir \
git+https://github.com/rcthomas/jupyter-server-proxy.git@allow-remote-proxy
git+https://github.com/jupyterhub/jupyter-server-proxy.git

ADD jupyter_notebook_config.py /opt/anaconda3/etc/jupyter/.

Expand Down
2 changes: 1 addition & 1 deletion jupyter-compose/web-announcement/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ FROM registry.spin.nersc.gov/das/jupyter-base-${branch}:latest
LABEL maintainer="Rollin Thomas <[email protected]>"

RUN \
pip install git+https://github.com/rcthomas/jupyterhub-announcement.git@0.3.1
pip install git+https://github.com/rcthomas/jupyterhub-announcement.git@0.4.1

WORKDIR /srv

Expand Down
2 changes: 1 addition & 1 deletion jupyter-compose/web-jupyterhub/jupyterhub_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -590,7 +590,6 @@
# environment variables. Most, including the default, do not. Consult the
# documentation for your spawner to verify!
#c.Spawner.cmd = ['jupyterhub-singleuser']
c.Spawner.cmd = ['jupyter-labhub']

## Maximum number of consecutive failures to allow before shutting down
# JupyterHub.
Expand Down Expand Up @@ -643,6 +642,7 @@
# - Start with `/notebooks` instead of `/tree` if `default_url` points to a notebook instead of a directory.
# - You can set this to `/lab` to have JupyterLab start by default, rather than Jupyter Notebook.
#c.Spawner.default_url = ''
c.Spawner.default_url = '/lab'

## Disable per-user configuration of single-user servers.
#
Expand Down
22 changes: 21 additions & 1 deletion jupyter-nersc/app-monitoring/build.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,27 @@
#!/bin/bash

imcmd=""
for command in docker podman; do
if [ $(command -v $command) ]; then
imcmd=$command
break
fi
done
if [ -n "$imcmd" ]; then
echo "Using $imcmd"
else
echo "No image command defined"
exit 1
fi

format=""
if [ "$imcmd" == "podman" ]; then
format="--format docker"
fi

branch=$(git symbolic-ref --short HEAD)

docker build \
$imcmd build \
$format \
"$@" \
--tag registry.spin.nersc.gov/das/app-monitoring.jupyter-nersc-$branch:latest .
16 changes: 15 additions & 1 deletion jupyter-nersc/app-notebooks/build.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,22 @@
#!/bin/bash

imcmd=""
for command in docker podman; do
if [ $(command -v $command) ]; then
imcmd=$command
break
fi
done
if [ -n "$imcmd" ]; then
echo "Using $imcmd"
else
echo "No image command defined"
exit 1
fi

branch=$(git symbolic-ref --short HEAD)

docker build \
podman build \
--build-arg branch=$branch \
"$@" \
--tag registry.spin.nersc.gov/das/app-notebooks.jupyter-nersc-$branch:latest .
3 changes: 2 additions & 1 deletion jupyter-nersc/web-announcement/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ FROM registry.spin.nersc.gov/das/jupyter-base-${branch}:latest
LABEL maintainer="Rollin Thomas <[email protected]>"

RUN \
pip install git+https://github.com/rcthomas/[email protected]
pip install --no-cache-dir html_sanitizer && \
pip install git+https://github.com/rcthomas/[email protected]

WORKDIR /srv

Expand Down
22 changes: 21 additions & 1 deletion jupyter-nersc/web-announcement/build.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,28 @@
#!/bin/bash

imcmd=""
for command in docker podman; do
if [ $(command -v $command) ]; then
imcmd=$command
break
fi
done
if [ -n "$imcmd" ]; then
echo "Using $imcmd"
else
echo "No image command defined"
exit 1
fi

format=""
if [ "$imcmd" == "podman" ]; then
format="--format docker"
fi

branch=$(git symbolic-ref --short HEAD)

docker build \
$imcmd build \
--build-arg branch=$branch \
$format \
"$@" \
--tag registry.spin.nersc.gov/das/web-announcement.jupyter-nersc-$branch:latest .
22 changes: 17 additions & 5 deletions jupyter-nersc/web-jupyterhub/hub-scripts/scram-user.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,27 @@ if [ ! -f $cert ]; then
echo " ... SKIPPED no cert for $username"
continue
fi
for i in 1 2 3
do

if [ "$#" -lt 3 ]; then
for i in 1 2 3
do
/usr/bin/ssh \
-i $cert \
-l $username \
-o PreferredAuthentications=publickey \
-o StrictHostKeyChecking=no \
-p 22 \
$hostname \
killall -u $username
sleep 1
done
elif [ "$#" -ge 3 ]; then
/usr/bin/ssh \
-i $cert \
-l $username \
-o PreferredAuthentications=publickey \
-o StrictHostKeyChecking=no \
-p 22 \
$hostname \
killall -u $username
sleep 1
done
kill -9 "${@:3}"
fi
63 changes: 43 additions & 20 deletions jupyter-nersc/web-jupyterhub/jupyterhub_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1012,13 +1012,14 @@ def comma_split(string):
#------------------------------------------------------------------------------

c.NERSCSpawner.profiles = [
{ "name": "gerty-shared-node-cpu" },
{ "name": "gerty-exclusive-node-cpu" },
{ "name": "cori-shared-node-cpu" },
{ "name": "cori-shared-node-gpu" },
{ "name": "cori-exclusive-node-cpu" },
{ "name": "cori-configurable-gpu" },
{ "name": "spin-shared-node-cpu" },
{ "name": "gerty-shared-node-cpu" },
{ "name": "gerty-exclusive-node-cpu" },
{ "name": "cori-shared-node-cpu" },
{ "name": "cori-shared-node-gpu" },
{ "name": "cori-exclusive-node-cpu" },
{ "name": "cori-exclusive-node-largemem" },
{ "name": "cori-configurable-gpu" },
{ "name": "spin-shared-node-cpu" },
]

c.NERSCSpawner.setups = [
Expand Down Expand Up @@ -1046,23 +1047,28 @@ def comma_split(string):
"name": "cpu",
"description": "Exclusive CPU Node",
"roles": ["cori-exclusive-node-cpu"],
},
{
"name": "largemem",
"description": "Exclusive Large Memory Node",
"roles": ["cmem"],
}
],
"resources": "Use your own node within a job allocation using defaults.",
"use_cases": "Visualization, analytics, machine learning that is compute or memory intensive but can be done on a single node."
},
# {
# "name": "configurable",
# "architectures": [
# {
# "name": "gpu",
# "description": "Configurable GPU",
# "roles": ["gpu"],
# }
# ],
# "resources": "Use multiple compute nodes with specialized settings.",
# "use_cases": "Multi-node analytics jobs, jobs in reservations, custom project charging, and more."
# },
{
"name": "configurable",
"architectures": [
{
"name": "gpu",
"description": "Configurable GPU",
"roles": ["gpu"],
}
],
"resources": "Use multiple compute nodes with specialized settings.",
"use_cases": "Multi-node analytics jobs, jobs in reservations, custom project charging, and more."
},
]

c.NERSCSpawner.systems = [
Expand Down Expand Up @@ -1121,7 +1127,7 @@ def comma_split(string):
"remote_hosts": ["corijupyter.nersc.gov"],
"remote_port_command": "/usr/bin/python /global/common/cori/das/jupyterhub/new-get-port.py --ip",
"hub_api_url": f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub/api",
"path": "/global/common/cori_cle7/software/jupyter/20-06/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin",
"path": "/usr/common/software/jupyter/20-06/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin",
"ssh_keyfile": '/certs/{username}.key'
}
),
Expand Down Expand Up @@ -1159,6 +1165,23 @@ def comma_split(string):
])
}
),
"cori-exclusive-node-largemem": (
"nerscslurmspawner.NERSCExclusiveLargeMemSlurmSpawner", {
"cmd": ["/global/common/cori_cle7/software/jupyter/20-06/bin/jupyterhub-singleuser"],
"args": ["--transport=ipc"],
"exec_prefix": "/usr/bin/ssh -q -o StrictHostKeyChecking=no -o preferredauthentications=publickey -l {username} -i /certs/{username}.key {remote_host}",
"startup_poll_interval": 30.0,
"req_remote_host": "cori19-224.nersc.gov",
"req_homedir": "/tmp",
"req_runtime": "480",
"hub_api_url": f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub/api",
"path": "/usr/common/software/jupyter/20-06/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin",
"batchspawner_singleuser_cmd" : " ".join([
"/global/common/cori/das/jupyterhub/jupyter-launcher.sh",
"/global/common/cori_cle7/software/jupyter/20-06/bin/batchspawner-singleuser",
])
}
),
"cori-configurable-gpu": (
"nerscslurmspawner.NERSCConfigurableGPUSlurmSpawner", {
"cmd": ["/global/common/cori_cle7/software/jupyter/20-06/bin/jupyterhub-singleuser"],
Expand Down
64 changes: 57 additions & 7 deletions jupyter-nersc/web-jupyterhub/nerscslurmspawner.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,47 @@ async def _get_batch_script(self, **subvars):
subvars["cookie"] = int(time.time()) ^ (uid ** 2)
return format_template(self.batch_script, **subvars)

class NERSCExclusiveLargeMemSlurmSpawner(NERSCSlurmSpawner):

batch_script = Unicode("""#!/bin/bash
#SBATCH --account={{ account }}
#SBATCH --constraint=amd
#SBATCH --job-name=jupyter
#SBATCH --nodes={{ nodes }}
#SBATCH --qos=bigmem
#SBATCH --time={{ runtime }}
{{ env_text }}
unset XDG_RUNTIME_DIR
{{ cmd }}""").tag(config=True)

batch_submit_cmd = Unicode("/bin/bash -l /global/common/cori/das/jupyterhub/esslurm-wrapper.sh sbatch").tag(config=True)
batch_query_cmd = Unicode("/bin/bash -l /global/common/cori/das/jupyterhub/esslurm-wrapper.sh squeue -h -j {job_id} -o '%T\ %B-224.nersc.gov'").tag(config=True)
batch_cancel_cmd = Unicode("/bin/bash -l /global/common/cori/das/jupyterhub/esslurm-wrapper.sh scancel {job_id}").tag(config=True)

# Have to override this to call get_auth_state() I think
async def _get_batch_script(self, **subvars):
"""Format batch script from vars"""
auth_state = await self.user.get_auth_state()
self.userdata = auth_state["userdata"]
subvars["account"] = self.default_cmem_repo()
return format_template(self.batch_script, **subvars)

def default_cmem_repo(self):
for allocation in self.user_allocations():
for qos in allocation["userAllocationQos"]:
if qos["qos"]["qos"] in ["cmem"]:
return allocation["computeAllocation"]["repoName"]
return None

def user_allocations(self, repos=[]):
for allocation in self.userdata["userAllocations"]:
if repos and allocation["computeAllocation"]["repoName"] not in repos:
continue
yield allocation

# def parse_job_id(self, output):
# output = output.replace(" on cluster escori", "")
# return super().parse_job_id(output)

class NERSCExclusiveGPUSlurmSpawner(NERSCSlurmSpawner):

Expand Down Expand Up @@ -252,6 +293,7 @@ class NERSCConfigurableGPUSlurmSpawner(NERSCSlurmSpawner):

batch_script = Unicode("""#!/bin/bash
#SBATCH --account={{ account }}
#SBATCH --qos={{ qos }}
#SBATCH --constraint=gpu
#SBATCH --job-name=jupyter
#SBATCH --nodes={{ nodes }}
Expand All @@ -273,19 +315,26 @@ async def options_form(self, spawner):
<select class="form-control" name="account" required autofocus>
""")

gpu_accounts = ["nstaff", "m1759", "dasrepo"]
for allocation in spawner.userdata["userAllocations"]:
account = allocation["computeAllocation"]["repoName"]
if account not in gpu_accounts:
continue
for qos in allocation["userAllocationQos"]:
if qos["qos"]["qos"] == "gpu":
if qos["qos"]["qos"] in ["gpu", "gpu_special_m1759"]:
form += """<option value="{}">{}</option>""".format(account, account)

form += dedent("""
</select>
""")

# QOS, would be nice to constrain from qos

form += dedent("""
<label for="qos">QOS:</label>
<select class="form-control" name="qos" required autofocus>
<option value="gpu">gpu</option>
<option value="special">special (m1759 only)</option>
</select>
""")

# # GPUs per node, should come from model

# form += dedent("""
Expand Down Expand Up @@ -324,20 +373,21 @@ async def options_form(self, spawner):
# Time, should come from model

form += dedent("""
<label for="time">time (time limit in minutes):</label>
<input class="form-control" type="number" name="time" min="10" max="240" value="240" step="10" required autofocus>
<label for="runtime">time (time limit in minutes):</label>
<input class="form-control" type="number" name="runtime" min="10" max="240" value="240" step="10" required autofocus>
""")

return form

def options_from_form(self, formdata):
options = dict()
options["account"] = formdata["account"][0]
options["qos"] = formdata["qos"][0]
# options["ngpus"] = formdata["ngpus"][0]
options["ntasks_per_node"] = formdata["ntasks-per-node"][0]
options["cpus_per_task"] = formdata["cpus-per-task"][0]
options["gpus_per_task"] = formdata["gpus-per-task"][0]
options["time"] = formdata["time"][0]
options["runtime"] = formdata["runtime"][0]
return options

# # Have to override this to call get_auth_state() I think
Expand Down
Loading

0 comments on commit 6b7edfa

Please sign in to comment.