Skip to content

Commit

Permalink
Functions for python3 virtualenv and using nfs mounts (#2)
Browse files Browse the repository at this point in the history
  • Loading branch information
hiyer authored Jul 3, 2018
1 parent 982cf4e commit 103d48a
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 0 deletions.
44 changes: 44 additions & 0 deletions examples/nfs_venv.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/bin/bash -x

#
# Install python virtualenv in NFS mount. If it fails
# fall back to local
#
# Installing python libraries in an NFS mount has the following advantages over
# installing them locally on each node:
# 1. It allows for faster cluster startup and upscaling since the libraries only
# need to be installed once. This is especially pertinent with libraries that have
# compiled components, like numpy, scipy, etc.
# 2. One can install new libraries or upgrading existing ones at runtime, and the
# changes would be immediately available to all the cluster's nodes
#

source /usr/lib/hustler/bin/qubole-bash-lib.sh
source /usr/lib/bootstrap-functions/misc/mount_nfs.sh
source /usr/lib/bootstrap-functions/misc/python_venv.sh

mount_nfs_volume "fs-7abdefa3.efs.us-east-1.amazonaws.com:/" /mnt/efs

if [[ $? == 0 ]]; then
is_master=$(nodeinfo is_master)
cluster_id=$(nodeinfo cluster_id)
# Use the cluster id so we can install different virtualenvs for
# different clusters
install_location="/mnt/efs/${cluster_id}/py36"

# symlink to same path as local install so we can
# use in zeppelin
symlink=/usr/lib/virtualenv/py36

if [[ "$is_master" != "1" ]]; then
ln -s "$install_location" "$symlink"
hadoop_use_venv "$install_location"
# Install only from master. On worker nodes we just
# need the change to use the new virtualenv
exit 0
fi
install_python_venv "36" "$install_location"
ln -s "$install_location" "$symlink"
else
install_python_venv
fi
28 changes: 28 additions & 0 deletions misc/mount_nfs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/bin/bash

source /usr/lib/hustler/bin/qubole-bash-lib.sh

#
# Instructions for AWS EFS mount:
# 1. After creating the EFS file system, create a security group
# 2. Create an inbound traffic rule for this security group that allows traffic on
# port 2049 (NFS) from this security group as described here:
# https://docs.aws.amazon.com/efs/latest/ug/accessing-fs-create-security-groups.html
# 3. Add this security group as a persistent security group for the cluster from which
# you want to mount the EFS store, as described here:
# http://docs.qubole.com/en/latest/admin-guide/how-to-topics/persistent-security-group.html
#
# TODO: add instructions for Azure file share
#

function mount_nfs_volume() {
nfs_export=$1
mountpoint=$2

is_master=$(nodeinfo is_master)
if [[ $is_master == "1" ]]; then
mount -v -t nfs4 -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2 "$nfs_export" "$mountpoint"
else
mount -v -t nfs4 -o nfsvers=4.1,ro,rsize=1048576,hard,timeo=600,retrans=2 "$nfs_export" "$mountpoint"
fi
}
30 changes: 30 additions & 0 deletions misc/python_venv.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash -x

#
# This function activates the new virtualenv, so install
# any libraries you want after calling this with "pip install"
#
# Alternatively you can also use a requirements file. For example
# to use a requirements file stored in S3 or Azure Blob Store, run
#
# /usr/lib/hadoop2/bin/hadoop dfs -get {s3|wasb}://path/to/requirements/file /tmp/requirements.txt
# pip install -r /tmp/requirements.txt
#

function install_python_venv() {
version=${$1:-36}
location=${$2:-/usr/lib/virtualenv/py36}

yum install -y "python${version}"
mkdir -p $location

virtualenv -p "/usr/bin/python${version}" $location
hadoop_use_venv "$location"

source ${location}/bin/activate
}

function hadoop_use_venv() {
location="$1"
echo "VIRTUAL_ENV_DISABLE_PROMPT=1 source ${location}/bin/activate ${location}" >> /usr/lib/hadoop2/etc/hadoop/hadoop-env.sh
}

0 comments on commit 103d48a

Please sign in to comment.