Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[TOREE-557] Bump Spark 3.5 #224

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 10 additions & 12 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ jobs:
fail-fast: false
matrix:
java: [ '8', '11' ]
scala: [ '2.12', '2.13' ]
env:
# define Java options for both official sbt and sbt-extras
JAVA_OPTS: -Xms2048M -Xmx2048M -Xss6M -XX:ReservedCodeCacheSize=256M -Dfile.encoding=UTF-8
Expand All @@ -41,28 +42,25 @@ jobs:
TEST_DILATION: 3
steps:
- name: Checkout
uses: actions/checkout@v2
uses: actions/checkout@v4
with:
clean: true
- name: Checkout
uses: actions/setup-java@v2
- name: Install JDK/SBT
uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: ${{ matrix.java }}
- name: Add SBT launcher
run: |
mkdir -p $HOME/.sbt/launchers/1.3.12
curl -L -o $HOME/.sbt/launchers/1.3.12/sbt-launch.jar https://repo1.maven.org/maven2/org/scala-sbt/sbt-launch/1.3.12/sbt-launch.jar
cache: 'sbt'
- name: Build
run: |
make clean release
make SCALA_VERSION=${{ matrix.scala }} clean release
- name: Run tests
run: |
make test
make SCALA_VERSION=${{ matrix.scala }} test
# See https://issues.apache.org/jira/browse/TOREE-526
# - name: Run system tests
# run: |
# make system-test
- name: Run license eudit
# make SCALA_VERSION=${{ matrix.scala }} system-test
- name: Run license audit
run: |
make audit-licenses
make SCALA_VERSION=${{ matrix.scala }} audit-licenses
46 changes: 23 additions & 23 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,32 +26,32 @@ RUN curl -sL https://deb.nodesource.com/setup_0.12 | bash - && \
npm install -g bower

# for Apache Spark demos
ENV APACHE_SPARK_VERSION 3.3.2
ENV APACHE_SPARK_CUSTOM_NAME=hadoop3
ARG APACHE_SPARK_VERSION=3.5.2
ARG SCALA_VERSION=2.13

RUN apt-get -y update && \
apt-get -y install software-properties-common

RUN \
echo "===> add webupd8 repository..." && \
echo "deb http://ppa.launchpad.net/webupd8team/java/ubuntu trusty main" | tee /etc/apt/sources.list.d/webupd8team-java.list && \
echo "deb-src http://ppa.launchpad.net/webupd8team/java/ubuntu trusty main" | tee -a /etc/apt/sources.list.d/webupd8team-java.list && \
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys EEA14886 && \
apt-get update

RUN echo "===> install Java" && \
echo debconf shared/accepted-oracle-license-v1-1 select true | debconf-set-selections && \
echo debconf shared/accepted-oracle-license-v1-1 seen true | debconf-set-selections && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --force-yes oracle-java8-installer oracle-java8-set-default && \
apt-get clean && \
update-java-alternatives -s java-8-oracle

RUN cd /tmp && \
wget -q https://archive.apache.org/dist/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME}.tgz && \
tar xzf spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME}.tgz -C /usr/local && \
rm spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME}.tgz

RUN cd /usr/local && ln -s spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME} spark
RUN JAVA_8=`update-alternatives --list java | grep java-8-openjdk` || echo $JAVA_8 && \
if [ "x$JAVA_8" = "x" ]; then \
apt-get -y update ; \
apt-get install -y --no-install-recommends openjdk-8-jdk ca-certificates-java ; \
apt-get clean ; \
rm -rf /var/lib/apt/lists/* ; \
update-ca-certificates -f ; \
JAVA_8=`update-alternatives --list java | grep java-8-openjdk` ; \
update-alternatives --set java $JAVA_8 ; \
fi

RUN if [ "$SCALA_VERSION" = "2.13" ]; then APACHE_SPARK_CUSTOM_NAME=hadoop3-scala2.13; else APACHE_SPARK_CUSTOM_NAME=hadoop3; fi && \
SPARK_TGZ_NAME=spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME} && \
if [ ! -d "/usr/local/$SPARK_TGZ_NAME" ]; then \
cd /tmp ; \
wget -q https://archive.apache.org/dist/spark/spark-${APACHE_SPARK_VERSION}/${SPARK_TGZ_NAME}.tgz ; \
tar -xzf ${SPARK_TGZ_NAME}.tgz -C /usr/local ; \
rm ${SPARK_TGZ_NAME}.tgz ; \
ln -snf /usr/local/$SPARK_TGZ_NAME /usr/local/spark ; \
fi

# R support
RUN apt-get update && \
Expand All @@ -62,7 +62,7 @@ RUN apt-get update && \
rm -rf /var/lib/apt/lists/*

ENV SPARK_HOME /usr/local/spark
ENV PYTHONPATH $SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.9.5-src.zip
ENV PYTHONPATH $SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.9.7-src.zip
ENV PYSPARK_PYTHON /home/main/anaconda2/envs/python3/bin/python
ENV R_LIBS_USER $SPARK_HOME/R/lib

Expand Down
41 changes: 22 additions & 19 deletions Dockerfile.toree-dev
Original file line number Diff line number Diff line change
Expand Up @@ -23,27 +23,30 @@ FROM jupyter/all-spark-notebook
USER root

# Spark dependencies
ENV APACHE_SPARK_VERSION 3.3.2
ENV APACHE_SPARK_CUSTOM_NAME=hadoop3

RUN apt-get -y update && \
apt-get install -y --no-install-recommends openjdk-8-jdk ca-certificates-java && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
update-ca-certificates -f && \
JAVA_8=`update-alternatives --list java | grep java-8-openjdk` && \
update-alternatives --set java $JAVA_8
ARG APACHE_SPARK_VERSION=3.5.2
ARG SCALA_VERSION=2.13

RUN JAVA_8=`update-alternatives --list java | grep java-8-openjdk` || echo $JAVA_8 && \
if [ "x$JAVA_8" = "x" ]; then \
apt-get -y update ; \
apt-get install -y --no-install-recommends openjdk-8-jdk ca-certificates-java ; \
apt-get clean ; \
rm -rf /var/lib/apt/lists/* ; \
update-ca-certificates -f ; \
JAVA_8=`update-alternatives --list java | grep java-8-openjdk` ; \
update-alternatives --set java $JAVA_8 ; \
fi

# Installing Spark3
RUN cd /tmp && \
wget -q https://archive.apache.org/dist/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME}.tgz && \
tar xzf spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME}.tgz -C /usr/local && \
rm spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME}.tgz

# Overwrite symlink
RUN cd /usr/local && \
rm spark && \
ln -s spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME} spark
RUN if [ "$SCALA_VERSION" = "2.13" ]; then APACHE_SPARK_CUSTOM_NAME=hadoop3-scala2.13; else APACHE_SPARK_CUSTOM_NAME=hadoop3; fi && \
SPARK_TGZ_NAME=spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME} && \
if [ ! -d "/usr/local/$SPARK_TGZ_NAME" ]; then \
cd /tmp ; \
wget -q https://archive.apache.org/dist/spark/spark-${APACHE_SPARK_VERSION}/${SPARK_TGZ_NAME}.tgz ; \
tar -xzf ${SPARK_TGZ_NAME}.tgz -C /usr/local ; \
rm ${SPARK_TGZ_NAME}.tgz ; \
ln -snf /usr/local/$SPARK_TGZ_NAME /usr/local/spark ; \
fi

# Remove other scala kernels
RUN cd /opt/conda/share/jupyter/kernels/ && \
Expand Down
11 changes: 7 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ IS_SNAPSHOT?=true
SNAPSHOT:=-SNAPSHOT
endif

APACHE_SPARK_VERSION?=3.3.2
SCALA_VERSION?=2.12
APACHE_SPARK_VERSION?=3.5.2
SCALA_VERSION?=2.13
IMAGE?=jupyter/all-spark-notebook:latest
EXAMPLE_IMAGE?=apache/toree-examples
TOREE_DEV_IMAGE?=apache/toree-dev
Expand All @@ -49,7 +49,7 @@ endef

RUN=$(RUN_PREFIX)$(1)$(RUN_SUFFIX)

ENV_OPTS:=APACHE_SPARK_VERSION=$(APACHE_SPARK_VERSION) VERSION=$(VERSION) IS_SNAPSHOT=$(IS_SNAPSHOT)
ENV_OPTS:=APACHE_SPARK_VERSION=$(APACHE_SPARK_VERSION) SCALA_VERSION=$(SCALA_VERSION) VERSION=$(VERSION) IS_SNAPSHOT=$(IS_SNAPSHOT)

ASSEMBLY_JAR:=toree-assembly-$(VERSION)$(SNAPSHOT).jar

Expand Down Expand Up @@ -83,7 +83,10 @@ clean: clean-dist
@-docker rmi -f $(TOREE_DEV_IMAGE)

.toree-dev-image:
@docker build -t $(TOREE_DEV_IMAGE) -f Dockerfile.toree-dev .
@docker build -t $(TOREE_DEV_IMAGE) -f Dockerfile.toree-dev \
--build-arg APACHE_SPARK_VERSION=$(APACHE_SPARK_VERSION) \
--build-arg SCALA_VERSION=$(SCALA_VERSION) \
.
touch $@

.clean-binder-image:
Expand Down
21 changes: 12 additions & 9 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,23 @@

import scala.util.Properties

lazy val scala212 = "2.12.18"
lazy val scala213 = "2.13.8"
lazy val defaultScalaVersion = sys.env.get("SCALA_VERSION") match {
case Some("2.12") => scala212
case _ => scala213
}

// Version settings
ThisBuild / version := Properties.envOrElse("VERSION", "0.0.0-dev") +
(if ((ThisBuild / isSnapshot ).value) "-SNAPSHOT" else "")
ThisBuild / isSnapshot := Properties.envOrElse("IS_SNAPSHOT","true").toBoolean
ThisBuild / organization := "org.apache.toree.kernel"
ThisBuild / crossScalaVersions := Seq("2.12.15")
ThisBuild / scalaVersion := (ThisBuild / crossScalaVersions ).value.head
ThisBuild / crossScalaVersions := Seq(scala212, scala213)
ThisBuild / scalaVersion := defaultScalaVersion
ThisBuild / Dependencies.sparkVersion := {
val envVar = "APACHE_SPARK_VERSION"
val defaultVersion = "3.3.2"
val defaultVersion = "3.5.2"

Properties.envOrNone(envVar) match {
case None =>
Expand All @@ -43,7 +50,6 @@ ThisBuild / scalacOptions ++= Seq(
"-deprecation",
"-unchecked",
"-feature",
"-Xfatal-warnings",
"-language:reflectiveCalls",
"-target:jvm-1.8"
)
Expand Down Expand Up @@ -117,7 +123,7 @@ ThisBuild / credentials += Credentials(Path.userHome / ".ivy2" / ".credentials")

/** Root Toree project. */
lazy val root = (project in file("."))
.settings(name := "toree", crossScalaVersions := Nil)
.settings(name := "toree")
.aggregate(
macros,protocol,plugins,communication,kernelApi,client,scalaInterpreter,sqlInterpreter,kernel
)
Expand Down Expand Up @@ -202,10 +208,7 @@ lazy val kernel = (project in file("kernel"))
enablePlugins(ScalaUnidocPlugin)
(ScalaUnidoc / unidoc / scalacOptions) ++= Seq(
"-Ymacro-expand:none",
"-skip-packages", Seq(
"org.apache.pekko",
"scala"
).mkString(":"),
"-skip-packages", "org.apache.pekko:scala",
"-no-link-warnings" // Suppresses problems with Scaladoc @throws links
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import org.apache.toree.kernel.protocol.v5.content.ExecuteRequest
import org.apache.toree.utils.LogLike
import play.api.libs.json.{JsPath, Json, JsonValidationError, Reads}

import scala.collection.mutable
import scala.concurrent.duration._

object Utilities extends LogLike {
Expand Down Expand Up @@ -64,7 +65,7 @@ object Utilities extends LogLike {
val header = Json.parse(message.frames(delimiterIndex + 2)).as[Header]
val parentHeader = Json.parse(message.frames(delimiterIndex + 3)).validate[ParentHeader].fold[ParentHeader](
// TODO: Investigate better solution than setting parentHeader to null for {}
(invalid: Seq[(JsPath, Seq[JsonValidationError])]) => null, //HeaderBuilder.empty,
(invalid: collection.Seq[(JsPath, collection.Seq[JsonValidationError])]) => null, //HeaderBuilder.empty,
(valid: ParentHeader) => valid
)
val metadata = Json.parse(message.frames(delimiterIndex + 4)).as[Metadata]
Expand All @@ -78,20 +79,20 @@ object Utilities extends LogLike {
}

implicit def KernelMessageToZMQMessage(kernelMessage : KernelMessage) : ZMQMessage = {
val frames: scala.collection.mutable.ListBuffer[ByteString] = scala.collection.mutable.ListBuffer()
kernelMessage.ids.map((id : Array[Byte]) => frames += ByteString.apply(id) )
val frames: mutable.ListBuffer[ByteString] = mutable.ListBuffer()
kernelMessage.ids.map((id: Array[Byte]) => frames += ByteString.apply(id))
frames += "<IDS|MSG>"
frames += kernelMessage.signature
frames += Json.toJson(kernelMessage.header).toString()
frames += Json.toJson(kernelMessage.parentHeader).toString()
frames += Json.toJson(kernelMessage.metadata).toString
frames += kernelMessage.contentString
ZMQMessage(frames : _*)
ZMQMessage(frames.toSeq : _*)
}

def parseAndHandle[T](json: String, reads: Reads[T], handler: T => Unit) : Unit = {
Json.parse(json).validate[T](reads).fold(
(invalid: Seq[(JsPath, Seq[JsonValidationError])]) =>
(invalid: collection.Seq[(JsPath, collection.Seq[JsonValidationError])]) =>
logger.error(s"Could not parse JSON, ${json}"),
(content: T) => handler(content)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class HeartbeatClient(
case HeartbeatMessage =>
import scala.concurrent.ExecutionContext.Implicits.global
val id = java.util.UUID.randomUUID().toString
futureMap += (id -> sender)
futureMap += (id -> sender())
logger.info(s"Heartbeat client send: $id")
val future = socket ? ZMQMessage(ByteString(id.getBytes))
future.onComplete {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ class IOPubClient(
} else {
logger.warn("Received message with null parent header.")
logger.debug(s"Kernel message is: $kernelMessage")
sender.forward(Failure(new RuntimeException(PARENT_HEADER_NULL_MESSAGE)))
sender().forward(Failure(new RuntimeException(PARENT_HEADER_NULL_MESSAGE)))
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ import org.mockito.ArgumentMatchers.{eq => mockEq, _}
import org.mockito.Mockito._
import org.scalatest.concurrent.{Eventually, ScalaFutures}
import org.scalatestplus.mockito.MockitoSugar
import org.scalatest.time.{Milliseconds, Span}
import org.scalatest.time.{Milliseconds, Seconds, Span}
import org.scalatest.funspec.AnyFunSpecLike
import org.scalatest.matchers.should.Matchers
import org.scalatest.BeforeAndAfterEach
Expand All @@ -60,7 +60,7 @@ class IOPubClientSpec extends TestKit(ActorSystem(
{
private val TestTimeout = Timeout(10.seconds)
implicit override val patienceConfig = PatienceConfig(
timeout = scaled(Span(200, Milliseconds)),
timeout = scaled(Span(1, Seconds)),
interval = scaled(Span(5, Milliseconds))
)
private val SignatureEnabled = true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class SignatureCheckerActor(
val isValidSignature = hmacString == signature
logger.trace(s"Signature ${signature} validity checked against " +
s"hmac ${hmacString} with outcome ${isValidSignature}")
sender ! isValidSignature
sender() ! isValidSignature
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class SignatureProducerActor(
Json.stringify(Json.toJson(message.metadata)),
message.contentString
)
sender ! signature
sender() ! signature
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ class ZeroMQSocketRunnable(
socket.close()
} catch {
case ex: Exception =>
logger.error("Failed to close socket!", _: Throwable)
logger.error("Failed to close socket!", ex: Throwable)
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ trait OrderedSupport extends Actor with Stash with LogLike {
*/
def startProcessing(): Unit = {
logger.debug("Actor is in processing state and will stash messages of " +
s"types: ${orderedTypes.mkString(" ")}")
s"types: ${orderedTypes().mkString(" ")}")
context.become(waiting, discardOld = false)
}

Expand Down
2 changes: 1 addition & 1 deletion etc/kernel.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
},
"display_name": "Apache Toree (development)",
"env": {
"PYTHONPATH": "/usr/local/spark/python:/usr/local/spark/python/lib/py4j-0.10.9.5-src.zip",
"PYTHONPATH": "/usr/local/spark/python:/usr/local/spark/python/lib/py4j-0.10.9.7-src.zip",
"SPARK_HOME": "/usr/local/spark",
"CAPTURE_STANDARD_ERR": "true",
"MAX_INTERPRETER_THREADS": "16",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ class CoursierDependencyDownloader extends DependencyDownloader {
artifactClassifier.map(c => Classifier(c)).getOrElse(Classifier.empty)
)
}
))
).toSeq)

printStream.println(s"Marking $groupId:$artifactId:$version for download")

Expand Down
Loading