Skip to content

Commit

Permalink
Support Jupyter saving to S3 (#860)
Browse files Browse the repository at this point in the history
* Add support configured by setting the JUPYTER_S3_BUCKET and JUPTYER_S3_PREFIX variables
* Add support for resetting the jupyter configuration via JUPYTER_CONFIG_RESET (can also
  delete the file)
* Fix bug in startup script that could end up racing with the jupyter log file being written
* Switch to jupyter lab by default
* Add cache to gitignore (it's generated by a notebook)
* Add package-mode=false to pyproject -- I'm on python3.12 and getting warnings
* Add pip as an explicit dependencies -- I'm on python3.12 and it's not installed by default leading
  to mypy errors installing type info dependencies.
  • Loading branch information
eric-anderson authored Oct 1, 2024
1 parent 4c5725f commit 51a4e2e
Show file tree
Hide file tree
Showing 6 changed files with 310 additions and 9 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,5 @@ traces/
apps/jupyter/bind_dir/poetry_cache
apps/query-ui/cache_dir
docs/build
cache
notebooks/default-prep-data
1 change: 0 additions & 1 deletion apps/jupyter/Dockerfile.buildx
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ FROM arynai/sycamore-base:$TAG
ARG GIT_BRANCH="unknown"
ARG GIT_COMMIT="unknown"
ARG GIT_DIFF="unknown"
ARG ERIC_TEST="foo"

ENV GIT_BRANCH=${GIT_BRANCH}
ENV GIT_COMMIT=${GIT_COMMIT}
Expand Down
41 changes: 36 additions & 5 deletions apps/jupyter/run-jupyter.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,42 @@ if [[ -x "${BIND_DIR}/setup.sh" ]]; then
SETUP_FAILED=true
fi
fi

if [[ "${JUPYTER_CONFIG_RESET}" == yes ]]; then
echo "Resetting jupyter configuration"
rm -f "${JUPYTER_CONFIG_DOCKER}"
fi

if [[ ! -f "${JUPYTER_CONFIG_DOCKER}" ]]; then
TOKEN=$(openssl rand -hex 24)
cat >"${JUPYTER_CONFIG_DOCKER}" <<EOF
cat >"${JUPYTER_CONFIG_DOCKER}".tmp <<EOF
# Configuration file for notebook.
c = get_config() #noqa
c.IdentityProvider.token = '${TOKEN}'
EOF

if [[ "${JUPYTER_S3_BUCKET}" ]]; then
echo "Enabling S3 contents manager with bucket ${JUPYTER_S3_BUCKET}"
cat >>"${JUPYTER_CONFIG_DOCKER}".tmp <<EOF
from s3contents import S3ContentsManager
c.ServerApp.contents_manager_class = S3ContentsManager
c.S3ContentsManager.bucket = "${JUPYTER_S3_BUCKET}"
c.ServerApp.root_dir = ""
EOF
case "${JUPYTER_S3_PREFIX}" in
"") : ;;
*/)
echo "ERROR: JUPYTER_S3_PREFIX ${JUPYTER_S3_PREFIX} must not end in / or no file will be accessible"
exit 1
;;
*)
echo "Using S3 Prefix ${JUPYTER_S3_PREFIX}"
echo "c.S3ContentsManager.prefix = \"${JUPYTER_S3_PREFIX}\"" >>"${JUPYTER_CONFIG_DOCKER}".tmp ;;
esac
fi
mv "${JUPYTER_CONFIG_DOCKER}".tmp "${JUPYTER_CONFIG_DOCKER}"
fi
ln -snf "${JUPYTER_CONFIG_DOCKER}" $HOME/.jupyter

Expand Down Expand Up @@ -77,10 +104,13 @@ fi
exit 1
fi

sleep 1 # reduce race with file being written
REDIRECT="${BIND_DIR}/redirect.html"
perl -ne 's,://\S+:8888/tree,://localhost:8888/tree,;print' < "${FILE}" >"${REDIRECT}"
URL=$(perl -ne 'print $1 if m,url=(https?://localhost:8888/tree\S+)",;' <"${REDIRECT}")
while [[ "${URL}" == "" ]]; do
echo "Waiting to find the URL in ${FILE}..."
sleep 1
perl -ne 's,://\S+:8888/lab,://localhost:8888/lab,;print' < "${FILE}" >"${REDIRECT}"
URL=$(perl -ne 'print $1 if m,url=(https?://localhost:8888/lab\S+)",;' <"${REDIRECT}")
done

for i in {1..10}; do
echo
Expand All @@ -93,11 +123,12 @@ fi
echo " c) docker compose cp jupyter:${BIND_DIR}/redirect.html ."
echo " and open redirect.html in a browser"
echo " Note: the token is stable unless you delete docker_volume/jupyter_notebook_config.py"
echo " or you set JUPYTER_CONFIG_RESET=yes when starting the container"
sleep 30
done
) &

trap "kill $!" EXIT

cd "${WORK_DIR}"
poetry run jupyter notebook "${SSLARG[@]}" --no-browser --ip 0.0.0.0 "$@"
poetry run jupyter lab "${SSLARG[@]}" --no-browser --ip 0.0.0.0 "$@"
3 changes: 3 additions & 0 deletions compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ services:
- AWS_SECRET_ACCESS_KEY
- AWS_SESSION_TOKEN
- AWS_CREDENTIAL_EXPIRATION
- JUPYTER_S3_BUCKET
- JUPYTER_S3_PREFIX
- JUPYTER_CONFIG_RESET # set this to 'yes' to reset the configuration
- SSL # Set to 1 if you want self-signed certificates

### Optional profiles below here
Expand Down
Loading

0 comments on commit 51a4e2e

Please sign in to comment.