forked from apache/spark
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SPARK-42657][CONNECT] Support to find and transfer client-side REPL …
…classfiles to server as artifacts ### What changes were proposed in this pull request? This PR introduces the concept of a `ClassFinder` that is able to scrape the REPL output (either file-based or in-memory based) for generated class files. The `ClassFinder` is registered during initialization of the REPL and aids in uploading the generated class files as artifacts to the Spark Connect server. ### Why are the changes needed? To run UDFs which are defined on the client side REPL, we require a mechanism that can find the local REPL classfiles and then utilise the mechanism from https://issues.apache.org/jira/browse/SPARK-42653 to transfer them to the server as artifacts. ### Does this PR introduce _any_ user-facing change? Yes, users can now run UDFs on the default (ammonite) REPL with spark connect. Input (in REPL): ``` class A(x: Int) { def get = x * 5 + 19 } def dummyUdf(x: Int): Int = new A(x).get val myUdf = udf(dummyUdf _) spark.range(5).select(myUdf(col("id"))).as[Int].collect() ``` Output: ``` Array[Int] = Array(19, 24, 29, 34, 39) ``` ### How was this patch tested? Unit tests + E2E tests. Closes apache#40675 from vicennial/SPARK-42657. Lead-authored-by: vicennial <[email protected]> Co-authored-by: Venkata Sai Akhil Gudesa <[email protected]> Signed-off-by: Herman van Hovell <[email protected]>
- Loading branch information
1 parent
7a5b6c8
commit 3941369
Showing
11 changed files
with
349 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
80 changes: 80 additions & 0 deletions
80
...r/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/ClassFinder.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.sql.connect.client | ||
|
||
import java.net.URL | ||
import java.nio.file.{Files, LinkOption, Path, Paths} | ||
|
||
import scala.collection.JavaConverters._ | ||
|
||
import ammonite.repl.api.Session | ||
import ammonite.runtime.SpecialClassLoader | ||
|
||
import org.apache.spark.sql.connect.client.Artifact.{InMemory, LocalFile} | ||
|
||
trait ClassFinder { | ||
def findClasses(): Iterator[Artifact] | ||
} | ||
|
||
/** | ||
* A generic [[ClassFinder]] implementation that traverses a specific REPL output directory. | ||
* @param _rootDir | ||
*/ | ||
class REPLClassDirMonitor(_rootDir: String) extends ClassFinder { | ||
private val rootDir = Paths.get(_rootDir) | ||
require(rootDir.isAbsolute) | ||
require(Files.isDirectory(rootDir)) | ||
|
||
override def findClasses(): Iterator[Artifact] = { | ||
Files | ||
.walk(rootDir) | ||
// Ignore symbolic links | ||
.filter(path => Files.isRegularFile(path, LinkOption.NOFOLLOW_LINKS) && isClass(path)) | ||
.map[Artifact](path => toArtifact(path)) | ||
.iterator() | ||
.asScala | ||
} | ||
|
||
private def toArtifact(path: Path): Artifact = { | ||
// Persist the relative path of the classfile | ||
Artifact.newClassArtifact(rootDir.relativize(path), new LocalFile(path)) | ||
} | ||
|
||
private def isClass(path: Path): Boolean = path.toString.endsWith(".class") | ||
} | ||
|
||
/** | ||
* A special [[ClassFinder]] for the Ammonite REPL to handle in-memory class files. | ||
* @param session | ||
*/ | ||
class AmmoniteClassFinder(session: Session) extends ClassFinder { | ||
|
||
override def findClasses(): Iterator[Artifact] = { | ||
session.frames.iterator.flatMap { frame => | ||
val classloader = frame.classloader.asInstanceOf[SpecialClassLoader] | ||
val signatures: Seq[(Either[String, URL], Long)] = classloader.classpathSignature | ||
signatures.iterator.collect { case (Left(name), _) => | ||
val parts = name.split('.') | ||
parts(parts.length - 1) += ".class" | ||
val path = Paths.get(parts.head, parts.tail: _*) | ||
val bytes = classloader.newFileDict(name) | ||
Artifact.newClassArtifact(path, new InMemory(bytes)) | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.