From 53453087bec2b5ec2a46217cbec708b9a7d5dd6b Mon Sep 17 00:00:00 2001 From: Sergey Date: Sat, 5 Nov 2022 02:47:34 +0300 Subject: [PATCH] versions up --- docker/Dockerfile_base | 48 +++++++++++++++---------------- docker/Dockerfile_master | 2 +- docker/Dockerfile_submit | 2 +- docker/Dockerfile_worker | 2 +- docker/push_docker_images.cmd | 10 +++---- docker/resources/requirements.txt | 10 ++----- 6 files changed, 33 insertions(+), 41 deletions(-) diff --git a/docker/Dockerfile_base b/docker/Dockerfile_base index 6eed108..c64b336 100644 --- a/docker/Dockerfile_base +++ b/docker/Dockerfile_base @@ -1,28 +1,27 @@ -# This base image comes shipped with java 8 (needed for scala) -FROM openjdk:8-jdk-alpine -COPY --from=python:3.6 / / +# This base image comes shipped with java 11 (needed for scala) +FROM openjdk:11-jre-slim +COPY --from=python:3.7 / / # Set env variables ENV DAEMON_RUN=true -ENV SPARK_VERSION=2.4.5 -ENV HADOOP_VERSION=2.7 -ENV SCALA_VERSION=2.12.3 +ENV SPARK_VERSION=3.3.0 +ENV HADOOP_VERSION=3 +ENV SCALA_VERSION=2.12.16 ENV SCALA_HOME=/usr/share/scala ENV SPARK_HOME=/spark ENV SPARK_OPTS --driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info -ENV PYTHONPATH $SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.7-src.zip +ENV PYTHONPATH $SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.9.5-src.zip - -# Add additional repo's for apk to use -RUN echo http://mirror.yandex.ru/mirrors/alpine/v3.3/main > /etc/apk/repositories; \ - echo http://mirror.yandex.ru/mirrors/alpine/v3.3/community >> /etc/apk/repositories +# Add additional repo's for apt to use +RUN echo http://mirror.yandex.ru/mirrors/alpine/v3.3/main > /etc/apt/repositories; \ + echo http://mirror.yandex.ru/mirrors/alpine/v3.3/community >> /etc/apt/repositories # Update commands -RUN apk --update add wget tar bash coreutils procps openssl +RUN apt update && apt -y upgrade wget tar bash coreutils procps openssl # Install Scala -RUN apk add --no-cache --virtual=.build-dependencies wget ca-certificates && \ - apk add --no-cache bash && \ +RUN apt install wget ca-certificates && \ + apt install bash && \ cd "/tmp" && \ wget "https://downloads.typesafe.com/scala/${SCALA_VERSION}/scala-${SCALA_VERSION}.tgz" && \ tar xzf "scala-${SCALA_VERSION}.tgz" && \ @@ -30,22 +29,21 @@ RUN apk add --no-cache --virtual=.build-dependencies wget ca-certificates && \ rm "/tmp/scala-${SCALA_VERSION}/bin/"*.bat && \ mv "/tmp/scala-${SCALA_VERSION}/bin" "/tmp/scala-${SCALA_VERSION}/lib" "${SCALA_HOME}" && \ ln -s "${SCALA_HOME}/bin/"* "/usr/bin/" && \ - apk del .build-dependencies && \ + apt autoclean && \ rm -rf "/tmp/"* - - -RUN export PATH="/usr/local/sbt/bin:$PATH" && apk update && apk add ca-certificates wget tar && mkdir -p "/usr/local/sbt" -# Get Apache Spark -RUN wget http://mirror.ox.ac.uk/sites/rsync.apache.org/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz +RUN export PATH="/usr/local/sbt/bin:$PATH" && apt update && apt install ca-certificates wget tar && mkdir -p "/usr/local/sbt" -# Install Spark and move it to the folder "/spark" and then add this location to the PATH env variable -RUN tar -xzf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz && \ +# Install Apache Spark +RUN wget http://mirror.ox.ac.uk/sites/rsync.apache.org/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz && \ + tar -xzf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz && \ mv spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} /spark && \ rm spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz && \ export PATH=$SPARK_HOME/bin:$PATH -# Install jars needed for communication with Azure -RUN wget https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-azure/${HADOOP_VERSION}.0/hadoop-azure-${HADOOP_VERSION}.0.jar -P $SPARK_HOME/jars/ && \ +# Install additional jars +RUN rm $SPARK_HOME/jars/guava-14.0.1.jar && \ + wget https://repo1.maven.org/maven2/com/google/guava/guava/31.1-jre/guava-31.1-jre.jar -P $SPARK_HOME/jars/ && \ + wget https://repo1.maven.org/maven2/com/google/guava/failureaccess/1.0.1/failureaccess-1.0.1.jar -P $SPARK_HOME/jars/ && \ + wget https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-azure/${HADOOP_VERSION}.0.0/hadoop-azure-${HADOOP_VERSION}.0.0.jar -P $SPARK_HOME/jars/ && \ wget https://repo1.maven.org/maven2/com/microsoft/azure/azure-storage/8.6.3/azure-storage-8.6.3.jar -P $SPARK_HOME/jars/ - diff --git a/docker/Dockerfile_master b/docker/Dockerfile_master index 6cb5cf7..b943abb 100644 --- a/docker/Dockerfile_master +++ b/docker/Dockerfile_master @@ -1,4 +1,4 @@ -FROM sdesilva26/spark_base +FROM logistser/spark_base:latest ENV SPARK_MASTER_PORT 7077 ENV SPARK_MASTER_WEBUI_PORT 8080 diff --git a/docker/Dockerfile_submit b/docker/Dockerfile_submit index 07b2df3..1370ec6 100644 --- a/docker/Dockerfile_submit +++ b/docker/Dockerfile_submit @@ -1,4 +1,4 @@ -FROM sdesilva26/spark_base +FROM logistser/spark_base:latest # Set default environment variables. These can also be set at the command line when invoking /bin/spark-submit ENV MASTER_CONTAINER_NAME=spark-master diff --git a/docker/Dockerfile_worker b/docker/Dockerfile_worker index 04832fe..6abc51a 100644 --- a/docker/Dockerfile_worker +++ b/docker/Dockerfile_worker @@ -1,4 +1,4 @@ -FROM sdesilva26/spark_base +FROM logistser/spark_base:latest ENV MASTER_CONTAINER_NAME=spark-master ENV CORES=3 diff --git a/docker/push_docker_images.cmd b/docker/push_docker_images.cmd index d69a8eb..477961d 100644 --- a/docker/push_docker_images.cmd +++ b/docker/push_docker_images.cmd @@ -1,6 +1,4 @@ -docker push sdesilva26/spark_master:latest -docker push sdesilva26/spark_master:0.0.2 -docker push sdesilva26/spark_worker:latest -docker push sdesilva26/spark_worker:0.0.2 -docker push sdesilva26/spark_submit:latest -docker push sdesilva26/spark_submit:0.0.2 \ No newline at end of file +docker push logistser/spark_base:latest +docker push logistser/spark_master:latest +docker push logistser/spark_worker:latest +docker push logistser/spark_submit:latest \ No newline at end of file diff --git a/docker/resources/requirements.txt b/docker/resources/requirements.txt index 7de0780..6f7611d 100644 --- a/docker/resources/requirements.txt +++ b/docker/resources/requirements.txt @@ -1,8 +1,4 @@ -pandas -numpy -matplotlib -pyarrow -koalas -keras -elephas +pandas>=1.0.5 +numpy>=1.14 +pyarrow>=4.0.0