diff --git a/README.md b/README.md index f578f09..541d6f2 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,17 @@ -# VM for profiling -This packer configuration creates a VM provisioned with tools need for profiling analysis. +# VM for profiling +This packer configuration creates a VM provisioned with tools need for +profiling analysis. - Python and R packages -- cytominer and dependencies +- cytominer, cytotools, cytominer-database, and dependencies - Distributed-CellProfiler and dependencies - Docker +- Mount S3 (s3fs) so that CellProfiler can access files +- Mount EFS -In addition, it mounts an S3 bucket and an Amazon Elastic File System (EFS). +In addition, it mounts +- an S3 bucket so that CellProfiler can access files +- an NFS (Amazon Elastic File System (EFS)) ## Install packer @@ -29,20 +34,31 @@ aws configure ### Edit EFS and S3 configurations -Edit `EFS_ID` in `efs.sh`, and `S3_ROLE` and `BUCKET_ID` appropriately. +Edit `EFS_ID` in `efs.sh`, and `S3_ROLE` and `BUCKET_ID` is `s3.sh` +appropriately. ### Validate and build -Note that although instance specifics are included in `cytominer_ami.json` (`c4.large`), and an instance will be created, the instance only exists to create the AMI and will be destroyed upon completion, `delete_on_termination: true`, leaving only the newly created AMI. +Note that although instance specifics are included in `cytominer_ami.json` +(`c4.large`), and an instance will be created, the instance only exists to +create the AMI and will be destroyed upon completion, +`delete_on_termination: true`, leaving only the newly created AMI. ``` packer validate cytominer_ami.json ``` -Specify `security_group_ids`, `subnet_id` and `vpc_id` on which the builder should be run (get these from AWS console) +Specify `security_group_ids`, `subnet_id` and `vpc_id` on which the builder +should be run (get these from AWS console) ``` packer build -var 'security_group_ids=sg-NNNNNNNN,sg-NNNNNNNN' -var 'subnet_id=subnet-NNNNNNNN' -var 'vpc_id=vpc-NNNNNNNN' cytominer_ami.json ``` +The security groups should be such that together they allow +- SSH access on port 22 +- NFS access on part 2049 (for EFS) + + + diff --git a/cytominer_ami.json b/cytominer_ami.json index dfe4031..dd05d98 100644 --- a/cytominer_ami.json +++ b/cytominer_ami.json @@ -8,7 +8,7 @@ }, "builders": [{ "access_key": "{{user `aws_access_key`}}", - "ami_name": "cytomining/images/hvm-ssd/cytominer-ubuntu-trusty-14.04-amd64-server-{{timestamp}}", + "ami_name": "cytomining/images/hvm-ssd/cytominer-ubuntu-bionic-18.04-amd64-server-{{timestamp}}", "associate_public_ip_address" : true, "instance_type": "c4.xlarge", "region": "us-east-1", @@ -16,7 +16,7 @@ "security_group_ids": "{{user `security_group_ids`}}", "source_ami_filter": { "filters": { - "name": "ubuntu/images/hvm-ssd/ubuntu-trusty-14.04-amd64-server-*", + "name": "ubuntu/images/hvm-ssd/ubuntu-bionic-18.04-amd64-server-*", "root-device-type": "ebs", "virtualization-type": "hvm" }, diff --git a/docker.sh b/docker.sh index ea9e0f7..6d1c8c1 100644 --- a/docker.sh +++ b/docker.sh @@ -2,16 +2,39 @@ #----------------------------- # docker -# https://docs.docker.com/engine/installation/linux/ubuntulinux/ +# https://docs.docker.com/engine/install/ubuntu/#prerequisites #----------------------------- -sudo apt-get install -y apt-transport-https ca-certificates -sudo apt-key adv --keyserver hkp://p80.pool.sks-keyservers.net:80 --recv-keys 58118E89F3A912897C070ADBF76221572C52609D -sudo sh -c 'echo "deb https://apt.dockerproject.org/repo ubuntu-trusty main" > /etc/apt/sources.list.d/docker.list' + +sudo apt-get remove -y \ + docker \ + docker-engine \ + docker.io \ + containerd \ + runc + sudo apt-get update -sudo apt-get purge lxc-docker -apt-cache policy docker-engine # Verify that right repository. -sudo apt-get install -y linux-image-extra-$(uname -r) -sudo apt-get install -y docker-engine + +sudo apt-get install -y \ + apt-transport-https \ + ca-certificates \ + curl \ + gnupg-agent \ + software-properties-common + +curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - + +sudo add-apt-repository \ + "deb [arch=amd64] https://download.docker.com/linux/ubuntu \ + $(lsb_release -cs) \ + stable" + +sudo apt-get update + +sudo apt-get install -y \ + docker-ce \ + docker-ce-cli \ + containerd.io sudo groupadd docker + sudo usermod -aG docker ubuntu diff --git a/efs.sh b/efs.sh index b50c6da..5e70a10 100644 --- a/efs.sh +++ b/efs.sh @@ -1,6 +1,6 @@ #!/bin/bash -EFS_ID="fs-d1824199" +EFS_ID="fs-3609f37f" #----------------------------- # EFS @@ -8,6 +8,9 @@ EFS_ID="fs-d1824199" #----------------------------- sudo apt-get install -y nfs-common + sudo mkdir ~/efs + sudo chown ubuntu ~/efs/ + echo "sudo mount -t nfs4 -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2 $(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone).${EFS_ID}.efs.us-east-1.amazonaws.com:/ ~/efs" >> ~/.bashrc diff --git a/init.sh b/init.sh index ccc4375..b0f3c5f 100644 --- a/init.sh +++ b/init.sh @@ -3,4 +3,5 @@ sleep 90 sudo apt-get update + sudo apt-get upgrade -y diff --git a/python.sh b/python.sh index 8a2bb87..1274e18 100644 --- a/python.sh +++ b/python.sh @@ -2,56 +2,105 @@ #----------------------------- # Python +#----------------------------- + +sudo apt-get install -y \ + python \ + python-dev \ + python-pip \ + python-setuptools + +#----------------------------- +# pyenv # https://github.com/yyuu/pyenv #----------------------------- -sudo easy_install pip mkdir -p ~/work/software/archives + cd ~/work/software/archives + git clone https://github.com/yyuu/pyenv.git ~/.pyenv -echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.bashrc -echo 'export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.bashrc -echo 'eval "$(pyenv init -)"' >> ~/.bashrc + +echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.bash_profile + +echo 'export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.bash_profile + +echo -e 'if command -v pyenv 1>/dev/null 2>&1; then\n eval "$(pyenv init -)"\nfi' >> ~/.bash_profile + +# In a non-interactive install, you can't do this: +# exec "$SHELL" +# and so you need to mock this: + export PYENV_ROOT="$HOME/.pyenv" + export PATH="$PYENV_ROOT/bin:$PATH" + eval "$(pyenv init -)" -pyenv install 3.5.1 -pyenv install 2.7.12 +pyenv install 3.8.2 + +pyenv shell 3.8.2 -pyenv shell 3.5.1 -pip install --upgrade pip -pip install --upgrade setuptools -pip install IPython pyyaml +pip install --upgrade \ + pip \ + setuptools \ + wheel -pyenv shell 2.7.12 -pip install --upgrade pip -pip install --upgrade setuptools -pip install IPython==5.0 -pip install pyyaml +#----------------------------- +# pe2loaddata +#----------------------------- -# DCP and cytominer are now configured within this script because +# This section will go once pe2loaddata is packaged + +pip install --upgrade \ + IPython \ + pyyaml + +#----------------------------- +# NOTE: +# DCP and cytominer are configured within this script because # pyenv wouldn't load when they were moved into their own scripts +#----------------------------- #----------------------------- -# DCP +# DCP Control Node +# https://github.com/CellProfiler/Distributed-CellProfiler/wiki/Before-you-get-started%3A-setting-up #----------------------------- -cd + +# This is done in tools.sh +# sudo apt install build-essential checkinstall +# sudo apt-get install parallel + +# This is done above +# sudo apt install python python-dev python-setuptools python-dev build-essential +# pip install --upgrade setuptools + +# This is no longer needed +# See https://pip.pypa.io/en/stable/installing/ +# sudo easy_install pip + +sudo apt-get install -y \ + cloud-image-utils \ + fabric + +cd ~ + git clone https://github.com/CellProfiler/Distributed-CellProfiler.git + cd Distributed-CellProfiler/files -pyenv local 2.7.12 -pyenv shell 2.7.12 + +pyenv local 3.8.2 + +pyenv shell 3.8.2 + pip install -r requirements.txt -#optional packages to be able to run ancillary DCP functions -sudo apt-get install -y cloud-image-utils parallel python-pandas +pip install --upgrade \ + awscli #----------------------------- # cytominer #----------------------------- -pyenv shell 3.5.1 -pip install --upgrade pip -pip install git+git://github.com/cytomining/cytominer-database.git -pip install IPython -Rscript -e 'devtools::install_github("cytomining/cytominer", dependencies=TRUE)' +pip install cytominer-database==0.3.3 + diff --git a/r.sh b/r.sh index 18d0368..f11deb6 100644 --- a/r.sh +++ b/r.sh @@ -2,13 +2,29 @@ #----------------------------- # R -# https://www.digitalocean.com/community/tutorials/how-to-set-up-r-on-ubuntu-14-04 +# https://www.digitalocean.com/community/tutorials/how-to-install-r-on-ubuntu-18-04-quickstart #----------------------------- -sudo sh -c 'echo "deb http://cran.rstudio.com/bin/linux/ubuntu trusty/" >> /etc/apt/sources.list' -gpg --keyserver keyserver.ubuntu.com --recv-key E084DAB9 -gpg -a --export E084DAB9 | sudo apt-key add - -sudo apt-get update -sudo apt-get -y install r-base + +sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 + +sudo add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran35/' + +sudo apt-get update -y + +sudo apt-get install -y \ + r-base=3.6.3-1bionic + mkdir -p ~/R/library + echo "R_LIBS=~/R/library" > ~/.Renviron -Rscript -e 'install.packages(c("devtools", "docopt", "dplyr", "feather", "ggplot2", "knitr", "magrittr", "readr", "rmarkdown", "RSQLite", "stringr", "testthat", "tidyr", "tidyverse"), Ncpus=4, repos=c("http://cran.us.r-project.org", "https://cran.cnr.berkeley.edu/", "https://cran.revolutionanalytics.com/"), lib="~/R/library/")' + +Rscript -e 'install.packages(c("devtools"), repos = "http://cran.us.r-project.org", Ncpus=4)' + +Rscript -e 'devtools::install_version("cytominer", version = "0.2.1", repos = "http://cran.us.r-project.org", dependencies=TRUE, Ncpus=4)' + +# cytotools doesn't current have tidyr as a dep but will eventually +# (but cytominer_script needs it) +Rscript -e 'install.packages(c("tidyr"), repos = "http://cran.us.r-project.org", Ncpus=4)' + +Rscript -e 'devtools::install_github("cytomining/cytotools", ref = "2fc846c", dependencies=TRUE)' + diff --git a/s3.sh b/s3.sh index bb4b7d8..4f0aa96 100644 --- a/s3.sh +++ b/s3.sh @@ -1,25 +1,47 @@ #!/bin/bash # IAM role that allows access to S3 bucket -S3_ROLE="ec2-iam-role-s3" +S3_ROLE="s3-imaging-platform-role" # Name of S3 bucket to be mounted -BUCKET_ID="imaging-platform-dev" +BUCKET_ID="imaging-platform" #----------------------------- # s3fs # https://github.com/s3fs-fuse/s3fs-fuse #----------------------------- -sudo apt-get install -y automake autotools-dev g++ git libcurl4-gnutls-dev libfuse-dev libssl-dev libxml2-dev make pkg-config +sudo apt-get install -y \ + automake \ + autotools-dev \ + g++ \ + git \ + libcurl4-gnutls-dev \ + libfuse-dev \ + libssl-dev \ + libxml2-dev \ + make \ + pkg-config + mkdir -p ~/work/software/archives + cd ~/work/software/archives + git clone https://github.com/s3fs-fuse/s3fs-fuse.git + cd s3fs-fuse + ./autogen.sh -./configure + +./configure --prefix=/usr --with-openssl + make + sudo make install + cd ~ + mkdir ~/bucket + echo "${BUCKET_ID} /home/ubuntu/bucket fuse.s3fs _netdev,allow_other,iam_role=${S3_ROLE},uid=1000,gid=1000,umask=0022 0 0" | sudo tee --append /etc/fstab + sudo mount -a diff --git a/tools.sh b/tools.sh index dc53b34..ffe32c3 100644 --- a/tools.sh +++ b/tools.sh @@ -3,8 +3,40 @@ #----------------------------- # Tools #----------------------------- -sudo apt-get install -y build-essential bzip2 checkinstall emacs htop jq mysql-client-core-5.5 pandoc parallel postgresql postgresql-client-common sqlite sqlite3 tree zip libbz2-dev -sudo apt-get install -y libffi-dev libfftw3-dev libldap2-dev libpq-dev libreadline6 libreadline6-dev libsasl2-dev libsqlite3-dev libssh2-1-dev libtiff5 libtiff5-dev libxslt1-dev -sudo apt-get install -y python python-dev python-pip python-pip python-setuptools -sudo pip install awscli +# Do this first +# See https://askubuntu.com/questions/909277/avoiding-user-interaction-with-tzdata-when-installing-certbot-in-a-docker-contai +sudo apt-get install -y \ + tzdata + +sudo apt-get install -y \ + build-essential \ + bzip2 \ + checkinstall \ + emacs \ + htop \ + jq \ + mysql-client-core-5.7 \ + pandoc \ + parallel \ + postgresql \ + postgresql-client-common \ + sqlite \ + sqlite3 \ + tree \ + zip + +sudo apt-get install -y \ + libbz2-dev \ + libffi-dev \ + libfftw3-dev \ + libldap2-dev \ + libpq-dev \ + libreadline7 \ + libreadline-dev \ + libsasl2-dev \ + libsqlite3-dev \ + libssh2-1-dev \ + libtiff5 \ + libtiff5-dev \ + libxslt1-dev