Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 23 additions & 25 deletions docker/Dockerfile_base
Original file line number Diff line number Diff line change
@@ -1,51 +1,49 @@
# This base image comes shipped with java 8 (needed for scala)
FROM openjdk:8-jdk-alpine
COPY --from=python:3.6 / /
# This base image comes shipped with java 11 (needed for scala)
FROM openjdk:11-jre-slim
COPY --from=python:3.7 / /

# Set env variables
ENV DAEMON_RUN=true
ENV SPARK_VERSION=2.4.5
ENV HADOOP_VERSION=2.7
ENV SCALA_VERSION=2.12.3
ENV SPARK_VERSION=3.3.0
ENV HADOOP_VERSION=3
ENV SCALA_VERSION=2.12.16
ENV SCALA_HOME=/usr/share/scala
ENV SPARK_HOME=/spark
ENV SPARK_OPTS --driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info
ENV PYTHONPATH $SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.7-src.zip
ENV PYTHONPATH $SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.9.5-src.zip


# Add additional repo's for apk to use
RUN echo http://mirror.yandex.ru/mirrors/alpine/v3.3/main > /etc/apk/repositories; \
echo http://mirror.yandex.ru/mirrors/alpine/v3.3/community >> /etc/apk/repositories
# Add additional repo's for apt to use
RUN echo http://mirror.yandex.ru/mirrors/alpine/v3.3/main > /etc/apt/repositories; \
echo http://mirror.yandex.ru/mirrors/alpine/v3.3/community >> /etc/apt/repositories

# Update commands
RUN apk --update add wget tar bash coreutils procps openssl
RUN apt update && apt -y upgrade wget tar bash coreutils procps openssl

# Install Scala
RUN apk add --no-cache --virtual=.build-dependencies wget ca-certificates && \
apk add --no-cache bash && \
RUN apt install wget ca-certificates && \
apt install bash && \
cd "/tmp" && \
wget "https://downloads.typesafe.com/scala/${SCALA_VERSION}/scala-${SCALA_VERSION}.tgz" && \
tar xzf "scala-${SCALA_VERSION}.tgz" && \
mkdir "${SCALA_HOME}" && \
rm "/tmp/scala-${SCALA_VERSION}/bin/"*.bat && \
mv "/tmp/scala-${SCALA_VERSION}/bin" "/tmp/scala-${SCALA_VERSION}/lib" "${SCALA_HOME}" && \
ln -s "${SCALA_HOME}/bin/"* "/usr/bin/" && \
apk del .build-dependencies && \
apt autoclean && \
rm -rf "/tmp/"*


RUN export PATH="/usr/local/sbt/bin:$PATH" && apk update && apk add ca-certificates wget tar && mkdir -p "/usr/local/sbt"

# Get Apache Spark
RUN wget http://mirror.ox.ac.uk/sites/rsync.apache.org/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
RUN export PATH="/usr/local/sbt/bin:$PATH" && apt update && apt install ca-certificates wget tar && mkdir -p "/usr/local/sbt"

# Install Spark and move it to the folder "/spark" and then add this location to the PATH env variable
RUN tar -xzf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz && \
# Install Apache Spark
RUN wget http://mirror.ox.ac.uk/sites/rsync.apache.org/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz && \
tar -xzf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz && \
mv spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} /spark && \
rm spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz && \
export PATH=$SPARK_HOME/bin:$PATH

# Install jars needed for communication with Azure
RUN wget https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-azure/${HADOOP_VERSION}.0/hadoop-azure-${HADOOP_VERSION}.0.jar -P $SPARK_HOME/jars/ && \
# Install additional jars
RUN rm $SPARK_HOME/jars/guava-14.0.1.jar && \
wget https://repo1.maven.org/maven2/com/google/guava/guava/31.1-jre/guava-31.1-jre.jar -P $SPARK_HOME/jars/ && \
wget https://repo1.maven.org/maven2/com/google/guava/failureaccess/1.0.1/failureaccess-1.0.1.jar -P $SPARK_HOME/jars/ && \
wget https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-azure/${HADOOP_VERSION}.0.0/hadoop-azure-${HADOOP_VERSION}.0.0.jar -P $SPARK_HOME/jars/ && \
wget https://repo1.maven.org/maven2/com/microsoft/azure/azure-storage/8.6.3/azure-storage-8.6.3.jar -P $SPARK_HOME/jars/

2 changes: 1 addition & 1 deletion docker/Dockerfile_master
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM sdesilva26/spark_base
FROM logistser/spark_base:latest

ENV SPARK_MASTER_PORT 7077
ENV SPARK_MASTER_WEBUI_PORT 8080
Expand Down
2 changes: 1 addition & 1 deletion docker/Dockerfile_submit
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM sdesilva26/spark_base
FROM logistser/spark_base:latest

# Set default environment variables. These can also be set at the command line when invoking /bin/spark-submit
ENV MASTER_CONTAINER_NAME=spark-master
Expand Down
2 changes: 1 addition & 1 deletion docker/Dockerfile_worker
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM sdesilva26/spark_base
FROM logistser/spark_base:latest

ENV MASTER_CONTAINER_NAME=spark-master
ENV CORES=3
Expand Down
10 changes: 4 additions & 6 deletions docker/push_docker_images.cmd
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
docker push sdesilva26/spark_master:latest
docker push sdesilva26/spark_master:0.0.2
docker push sdesilva26/spark_worker:latest
docker push sdesilva26/spark_worker:0.0.2
docker push sdesilva26/spark_submit:latest
docker push sdesilva26/spark_submit:0.0.2
docker push logistser/spark_base:latest
docker push logistser/spark_master:latest
docker push logistser/spark_worker:latest
docker push logistser/spark_submit:latest
10 changes: 3 additions & 7 deletions docker/resources/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
pandas
numpy
matplotlib
pyarrow
koalas
keras
elephas
pandas>=1.0.5
numpy>=1.14
pyarrow>=4.0.0