Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
.DS_Store

Dockerfile

*.tar.gz
!/DBR*/Dockerfile
*.tar.gz
.idea
105 changes: 105 additions & 0 deletions DBR12.2LTS/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
FROM databricksruntime/standard:12.2-LTS

ENV ROOTDIR /usr/local
ENV LD_LIBRARY_PATH /usr/local/lib
ENV SPARK_VERSION 3.3.2

WORKDIR $ROOTDIR/
RUN mkdir -p $ROOTDIR/src

# Install dependencies
RUN set -ex \
&& deps=" \
unixodbc \
libcurl3-gnutls \
libsnappy-dev \
libopenjp2-7-dev \
# python-dev \
# python3-dev \
# python3-numpy \
# python3-pip \
# python3-venv \
bash-completion \
" \
&& buildDeps=" \
build-essential \
cmake \
swig \
ant \
pkg-config \
"\
&& apt-get update -y && apt-get install -y $buildDeps $deps --no-install-recommends

# Add Maven
ARG MAVEN_VERSION=3.9.4
ARG USER_HOME_DIR="/root"
ARG BASE_URL=https://dlcdn.apache.org/maven/maven-3/${MAVEN_VERSION}/binaries
ARG SHA=deaa39e16b2cf20f8cd7d232a1306344f04020e1f0fb28d35492606f647a60fe729cc40d3cba33e093a17aed41bd161fe1240556d0f1b80e773abd408686217e

RUN mkdir -p $ROOTDIR/share/maven $ROOTDIR/share/maven/ref \
&& echo "Downlaoding maven" \
&& curl -fsSL -o /tmp/apache-maven.tar.gz ${BASE_URL}/apache-maven-${MAVEN_VERSION}-bin.tar.gz \
\
&& echo "Checking download hash" \
&& echo "${SHA} /tmp/apache-maven.tar.gz" | sha512sum -c - \
\
&& echo "Unziping maven" \
&& tar -xzf /tmp/apache-maven.tar.gz -C $ROOTDIR/share/maven --strip-components=1 \
\
&& echo "Cleaning and setting links" \
&& rm -f /tmp/apache-maven.tar.gz \
&& ln -s $ROOTDIR/share/maven/bin/mvn $ROOTDIR/bin/mvn

ENV MAVEN_HOME $ROOTDIR/share/maven
ENV MAVEN_CONFIG "$USER_HOME_DIR/.m2"

# Configure Python virtual environment
RUN pip install pip --upgrade
RUN pip install build wheel pyspark==$SPARK_VERSION

# We add RStudio's debian source to install the latest r-base version (4.1)
# We are using the more secure long form of pgp key ID of [email protected]
# based on these instructions (avoiding firewall issue for some users):
# https://cran.rstudio.com/bin/linux/ubuntu/#secure-apt
RUN apt-get update \
&& apt-get install --yes software-properties-common apt-transport-https \
&& gpg --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 \
&& gpg -a --export E298A3A825C0D65DFD57CBB651716619E084DAB9 | sudo apt-key add - \
&& add-apt-repository -y "deb [arch=amd64,i386] https://cran.rstudio.com/bin/linux/ubuntu $(lsb_release -cs)-cran40/" \
&& apt-get update \
&& apt-get install --yes \
libssl-dev \
r-base \
r-base-dev \
&& add-apt-repository -r "deb [arch=amd64,i386] https://cran.rstudio.com/bin/linux/ubuntu $(lsb_release -cs)-cran40/" \
&& apt-key del E298A3A825C0D65DFD57CBB651716619E084DAB9 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

# hwriterPlus is used by Databricks to display output in notebook cells
# hwriterPlus is removed for newer version of R, so we hardcode the dependency to archived version
# Rserve allows Spark to communicate with a local R process to run R code
RUN R -e "options(repos = list(CRAN = 'https://cloud.r-project.org/')); install.packages(c('hwriter', 'TeachingDemos', 'htmltools'))" \
&& R -e "install.packages('https://cran.r-project.org/src/contrib/Archive/hwriterPlus/hwriterPlus_1.0-3.tar.gz', repos=NULL, type='source')" \
&& R -e "install.packages('Rserve', repos='http://rforge.net/')"

# Additional instructions to setup rstudio. If you dont need rstudio, you can
# omit the below commands in your docker file. Even after this you need to use
# an init script to start the RStudio daemon (See README.md for details.)

# Databricks configuration for RStudio sessions.
COPY Rprofile.site /usr/lib/R/etc/Rprofile.site

# Rstudio installation.
RUN apt-get update \
# Install gdebi-core.
&& apt-get install -y gdebi-core \
# Download rstudio 1.4 package for ubuntu 18.04 and install it.
&& apt-get install -y wget \
&& apt-get install -y gdebi-core \
&& wget https://download2.rstudio.org/server/bionic/amd64/rstudio-server-2022.02.1-461-amd64.deb \
&& gdebi -n rstudio-server-2022.02.1-461-amd64.deb && rstudio-server version

# Clean up
RUN apt-get purge -y --auto-remove $buildDeps \
&& rm -rf /var/lib/apt/lists/*
12 changes: 12 additions & 0 deletions DBR12.2LTS/Rprofile.site
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Databricks configuration for RStudio sessions.
# Please do not remove this file or modify following lines.
if (grepl("rstudio", system(paste0("cat /proc/", Sys.getpid(), "/cmdline"), intern = T))) {
Sys.setenv("SPARK_HOME" = "/databricks/spark")
.libPaths(c("/databricks/spark/R/lib", .libPaths()))
assign("DATABRICKS_GUID", system('wget -qO - \'http://localhost:6061/?type="com.databricks.backend.common.rpc.DriverMessages$GetRStudioBackendGUID"\' --post-data=\'{"@class":"com.databricks.backend.common.rpc.DriverMessages$GetRStudioBackendGUID"}\' --no-check-certificate | tr -d \\" ', intern = TRUE), envir = .GlobalEnv)
Sys.setenv("EXISTING_SPARKR_BACKEND_PORT" = system(paste0('wget -qO - \'http://localhost:6061/?type="com.databricks.backend.common.rpc.DriverMessages$StartRStudioSparkRBackend"\' --post-data=\'{"@class":"com.databricks.backend.common.rpc.DriverMessages$StartRStudioSparkRBackend", "guid": "', DATABRICKS_GUID, '"}\' --no-check-certificate'), intern = TRUE))
Sys.setenv("SPARKR_BACKEND_AUTH_SECRET" = system(paste0('wget -qO - \'http://localhost:6061/?type="com.databricks.backend.common.rpc.DriverMessages$GetRStudioRAuthSecret"\' --post-data=\'{"@class":"com.databricks.backend.common.rpc.DriverMessages$GetRStudioRAuthSecret", "port": "', Sys.getenv("EXISTING_SPARKR_BACKEND_PORT"), '"}\' --no-check-certificate | tr -d \\" '), intern = TRUE))
.Last <- function() {
system(paste0('wget -qO - \'http://localhost:6061/?type="com.databricks.backend.common.rpc.DriverMessages$StopRStudioSparkRBackend"\' --post-data=\'{"@class":"com.databricks.backend.common.rpc.DriverMessages$StopRStudioSparkRBackend", "port": "', Sys.getenv("EXISTING_SPARKR_BACKEND_PORT") , '"}\' --no-check-certificate'), intern = TRUE)
}
}
6 changes: 6 additions & 0 deletions DBR12.2LTS/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash

set -e

docker buildx build -t "mosaic-dev:dbr12.2LTS-spark3.3.2-amd64" --progress=plain --platform linux/amd64 --load .
docker buildx build -t "mosaic-dev:dbr12.2LTS-spark3.3.2-arm64" --progress=plain --platform linux/arm64 --load .
5 changes: 3 additions & 2 deletions Dockerfile.template
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,10 @@ RUN mkdir -p /usr/lib/jni && ln -s $ROOTDIR/lib/libgdalalljni.so /usr/lib/jni/li
RUN mkdir -p /usr/lib/ogdi && ln -s $ROOTDIR/lib/libgdal.so /usr/lib/ogdi/libgdal.so

# Add Maven
ARG MAVEN_VERSION=3.9.1
ARG MAVEN_VERSION=3.9.4
ARG USER_HOME_DIR="/root"
ARG BASE_URL=https://dlcdn.apache.org/maven/maven-3/${MAVEN_VERSION}/binaries
ARG SHA=d3be5956712d1c2cf7a6e4c3a2db1841aa971c6097c7a67f59493a5873ccf8c8b889cf988e4e9801390a2b1ae5a0669de07673acb090a083232dbd3faf82f3e3
ARG SHA=deaa39e16b2cf20f8cd7d232a1306344f04020e1f0fb28d35492606f647a60fe729cc40d3cba33e093a17aed41bd161fe1240556d0f1b80e773abd408686217e

RUN mkdir -p $ROOTDIR/share/maven $ROOTDIR/share/maven/ref \
&& echo "Downlaoding maven" \
Expand All @@ -91,6 +91,7 @@ ENV MAVEN_HOME $ROOTDIR/share/maven
ENV MAVEN_CONFIG "$USER_HOME_DIR/.m2"

# Configure Python virtual environment
RUN pip3 install pip --upgrade
RUN pip3 install build wheel keplergl ipython pyspark==$SPARK_VERSION

# Clean up
Expand Down
4 changes: 2 additions & 2 deletions ubuntu-22-spark-3.4/Dockerfile.template
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,10 @@ RUN mkdir -p /usr/lib/ogdi && ln -s $ROOTDIR/lib/libgdal.so /usr/lib/ogdi/libgda


# Add Maven
ARG MAVEN_VERSION=3.9.1
ARG MAVEN_VERSION=3.9.4
ARG USER_HOME_DIR="/root"
ARG BASE_URL=https://dlcdn.apache.org/maven/maven-3/${MAVEN_VERSION}/binaries
ARG SHA=d3be5956712d1c2cf7a6e4c3a2db1841aa971c6097c7a67f59493a5873ccf8c8b889cf988e4e9801390a2b1ae5a0669de07673acb090a083232dbd3faf82f3e3
ARG SHA=deaa39e16b2cf20f8cd7d232a1306344f04020e1f0fb28d35492606f647a60fe729cc40d3cba33e093a17aed41bd161fe1240556d0f1b80e773abd408686217e

RUN mkdir -p $ROOTDIR/share/maven $ROOTDIR/share/maven/ref \
&& echo "Downlaoding maven" \
Expand Down
2 changes: 1 addition & 1 deletion ubuntu-22-spark-3.4/build
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ sed -e "s/%%GDAL_VERSION%%/$GDAL_VERSION/" \
-e "s/%%SPARK_VERSION%%/$SPARK_VERSION/" \
-e "s/%%CORES%%/$CORES/" "Dockerfile.template" > Dockerfile

docker build -t "mosaic-dev:ubuntu22-gdal$GDAL_VERSION-spark$SPARK_VERSION" .
docker build -t "mosaic-dev:ubuntu22-gdal$GDAL_VERSION-spark$SPARK_VERSION" .`