Skip to content

Commit

Permalink
Update build docker images (apache-spark-on-k8s#458)
Browse files Browse the repository at this point in the history
  • Loading branch information
robert3005 authored and Robert Kruszewski committed Jan 6, 2019
1 parent 77c70df commit 0d3f5cb
Show file tree
Hide file tree
Showing 7 changed files with 25 additions and 29 deletions.
14 changes: 7 additions & 7 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ version: 2

defaults: &defaults
docker:
- image: palantirtechnologies/circle-spark-base
- image: palantirtechnologies/circle-spark-base:0.1.0
resource_class: xlarge
environment: &defaults-environment
TERM: dumb
Expand Down Expand Up @@ -128,7 +128,7 @@ jobs:
<<: *defaults
# Some part of the maven setup fails if there's no R, so we need to use the R image here
docker:
- image: palantirtechnologies/circle-spark-r
- image: palantirtechnologies/circle-spark-r:0.1.0
steps:
# Saves us from recompiling every time...
- restore_cache:
Expand Down Expand Up @@ -300,7 +300,7 @@ jobs:
# depends on build-sbt, but we only need the assembly jars
<<: *defaults
docker:
- image: palantirtechnologies/circle-spark-python
- image: palantirtechnologies/circle-spark-python:0.1.0
parallelism: 2
steps:
- *checkout-code
Expand All @@ -325,7 +325,7 @@ jobs:
# depends on build-sbt, but we only need the assembly jars
<<: *defaults
docker:
- image: palantirtechnologies/circle-spark-r
- image: palantirtechnologies/circle-spark-r:0.1.0
steps:
- *checkout-code
- attach_workspace:
Expand Down Expand Up @@ -438,7 +438,7 @@ jobs:
<<: *defaults
# Some part of the maven setup fails if there's no R, so we need to use the R image here
docker:
- image: palantirtechnologies/circle-spark-r
- image: palantirtechnologies/circle-spark-r:0.1.0
steps:
- *checkout-code
- restore_cache:
Expand All @@ -458,7 +458,7 @@ jobs:
deploy-gradle:
<<: *defaults
docker:
- image: palantirtechnologies/circle-spark-r
- image: palantirtechnologies/circle-spark-r:0.1.0
steps:
- *checkout-code
- *restore-gradle-wrapper-cache
Expand All @@ -470,7 +470,7 @@ jobs:
<<: *defaults
# Some part of the maven setup fails if there's no R, so we need to use the R image here
docker:
- image: palantirtechnologies/circle-spark-r
- image: palantirtechnologies/circle-spark-r:0.1.0
steps:
# This cache contains the whole project after version was set and mvn package was called
# Restoring first (and instead of checkout) as mvn versions:set mutates real source code...
Expand Down
6 changes: 3 additions & 3 deletions dev/docker-images/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@

.PHONY: all publish base python r

BASE_IMAGE_NAME = palantirtechnologies/circle-spark-base
PYTHON_IMAGE_NAME = palantirtechnologies/circle-spark-python
R_IMAGE_NAME = palantirtechnologies/circle-spark-r
BASE_IMAGE_NAME = palantirtechnologies/circle-spark-base:0.1.0
PYTHON_IMAGE_NAME = palantirtechnologies/circle-spark-python:0.1.0
R_IMAGE_NAME = palantirtechnologies/circle-spark-r:0.1.0

all: base python r

Expand Down
5 changes: 3 additions & 2 deletions dev/docker-images/base/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# limitations under the License.
#

FROM buildpack-deps:xenial
FROM buildpack-deps:cosmic

# make Apt non-interactive
RUN echo 'APT::Get::Assume-Yes "true";' > /etc/apt/apt.conf.d/90circleci \
Expand Down Expand Up @@ -107,10 +107,11 @@ WORKDIR $CIRCLE_HOME
# Install miniconda, we are using it to test conda support and a bunch of tests expect CONDA_BIN to be set
ENV CONDA_ROOT=$CIRCLE_HOME/miniconda
ENV CONDA_BIN=$CIRCLE_HOME/miniconda/bin/conda
ENV MINICONDA2_VERSION=4.3.31
ENV MINICONDA2_VERSION=4.5.11
RUN curl -sO https://repo.continuum.io/miniconda/Miniconda2-${MINICONDA2_VERSION}-Linux-x86_64.sh \
&& bash Miniconda2-${MINICONDA2_VERSION}-Linux-x86_64.sh -b -p ${CONDA_ROOT} \
&& $CONDA_BIN clean --all \
&& sudo mkdir -m 777 /home/.conda \
&& rm -f Miniconda2-${MINICONDA2_VERSION}-Linux-x86_64.sh

# END IMAGE CUSTOMIZATIONS
Expand Down
7 changes: 3 additions & 4 deletions dev/docker-images/python/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,14 @@ FROM palantirtechnologies/circle-spark-base
ENV PATH="$CIRCLE_HOME/.pyenv/bin:$PATH"
RUN curl -L https://github.com/pyenv/pyenv-installer/raw/master/bin/pyenv-installer | bash \
&& cat >>.bashrc <<<'eval "$($HOME/.pyenv/bin/pyenv init -)"' \
&& cat >>.bashrc <<<'eval "$($HOME/.pyenv/bin/pyenv virtualenv-init -)"' \
&& pyenv doctor
&& cat >>.bashrc <<<'eval "$($HOME/.pyenv/bin/pyenv virtualenv-init -)"'

# Must install numpy 1.11 or else a bunch of tests break due to different output formatting on e.g. nparray
# A version I've tested earlier that I know it breaks with is 1.14.1
RUN mkdir -p $(pyenv root)/versions \
&& ln -s $CONDA_ROOT $(pyenv root)/versions/our-miniconda \
&& $CONDA_BIN create -y -n python2 -c anaconda -c conda-forge python==2.7.11 numpy=1.11.2 pyarrow==0.8.0 pandas nomkl \
&& $CONDA_BIN create -y -n python3 -c anaconda -c conda-forge python=3.6 numpy=1.11.2 pyarrow==0.8.0 pandas nomkl \
&& $CONDA_BIN create -y -n python2 -c anaconda -c conda-forge python==2.7.15 numpy=1.14.0 pyarrow==0.8.0 pandas nomkl \
&& $CONDA_BIN create -y -n python3 -c anaconda -c conda-forge python=3.6 numpy=1.14.0 pyarrow==0.8.0 pandas nomkl \
&& $CONDA_BIN clean --all

RUN pyenv global our-miniconda/envs/python2 our-miniconda/envs/python3 \
Expand Down
8 changes: 2 additions & 6 deletions dev/docker-images/r/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,8 @@ FROM palantirtechnologies/circle-spark-base
USER root

### Install R
RUN echo "deb http://cran.rstudio.com/bin/linux/ubuntu trusty/" >> /etc/apt/sources.list \
&& apt-get update \
&& gpg --keyserver keyserver.ubuntu.com --recv-key E084DAB9 \
&& gpg -a --export E084DAB9 | sudo apt-key add - \
&& apt-get update \
&& apt-get --assume-yes install r-base r-base-dev qpdf \
RUN apt-get update \
&& apt-get install r-base r-base-dev qpdf \
&& rm -rf /var/lib/apt/lists/* \
&& chmod 777 /usr/local/lib/R/site-library \
&& /usr/lib/R/bin/R -e "install.packages(c('devtools'), repos='http://cran.us.r-project.org', lib='/usr/local/lib/R/site-library'); devtools::install_github('r-lib/[email protected]', lib='/usr/local/lib/R/site-library'); install.packages(c('knitr', 'rmarkdown', 'e1071', 'survival', 'roxygen2', 'lintr'), repos='http://cran.us.r-project.org', lib='/usr/local/lib/R/site-library')"
Expand Down
6 changes: 3 additions & 3 deletions python/pyspark/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -2279,20 +2279,20 @@ def test_conda(self):
|from pyspark import SparkContext
|
|sc = SparkContext()
|sc.addCondaPackages('numpy=1.11.1')
|sc.addCondaPackages('numpy=1.14.0')
|
|# Ensure numpy is accessible on the driver
|import numpy
|arr = [1, 2, 3]
|def mul2(x):
| # Also ensure numpy accessible from executor
| assert numpy.version.version == "1.11.1"
| assert numpy.version.version == "1.14.0"
| return x * 2
|print(sc.parallelize(arr).map(mul2).collect())
""")
props = self.createTempFile("properties", """
|spark.conda.binaryPath {}
|spark.conda.channelUrls https://repo.continuum.io/pkgs/free
|spark.conda.channelUrls https://repo.continuum.io/pkgs/main
|spark.conda.bootstrapPackages python=3.5
""".format(os.environ["CONDA_BIN"]))
env = dict(os.environ)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,14 +84,14 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
| exit(-1)
| sc = SparkContext(conf=SparkConf())
|
| sc.addCondaPackages('numpy=1.11.1')
| sc.addCondaPackages('numpy=1.14.0')
| import numpy
|
| status = open(sys.argv[1],'w')
|
| # Addict exists only in external-conda-forge, not anaconda
| sc.addCondaChannel("https://conda.anaconda.org/conda-forge")
| sc.addCondaPackages('addict=1.0.0')
| sc.addCondaPackages('addict=2.2.0')
|
| def numpy_multiply(x):
| # Ensure package from non-base channel is installed
Expand Down Expand Up @@ -376,8 +376,8 @@ class YarnClusterSuite extends BaseYarnClusterSuite {

val extraConf: Map[String, String] = Map(
"spark.conda.binaryPath" -> sys.env("CONDA_BIN"),
"spark.conda.channelUrls" -> "https://repo.continuum.io/pkgs/free",
"spark.conda.bootstrapPackages" -> "python=3.5"
"spark.conda.channelUrls" -> "https://repo.continuum.io/pkgs/main",
"spark.conda.bootstrapPackages" -> "python=3.6"
)

val moduleDir =
Expand Down

0 comments on commit 0d3f5cb

Please sign in to comment.