Skip to content

Commit 79d02e2

Browse files
rdbluelresende
authored andcommitted
[TOREE-408] Add support for hdfs and s3 to AddJar
Closes #125
1 parent baf8391 commit 79d02e2

File tree

2 files changed

+34
-15
lines changed

2 files changed

+34
-15
lines changed

kernel/src/main/scala/org/apache/toree/magic/builtin/AddJar.scala

+32-14
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,18 @@
1818
package org.apache.toree.magic.builtin
1919

2020
import java.io.{File, PrintStream}
21-
import java.net.URL
21+
import java.net.{URL, URI}
2222
import java.nio.file.{Files, Paths}
23-
2423
import org.apache.toree.magic._
2524
import org.apache.toree.magic.builtin.AddJar._
2625
import org.apache.toree.magic.dependencies._
2726
import org.apache.toree.utils.{ArgumentParsingSupport, DownloadSupport, LogLike, FileUtils}
2827
import com.typesafe.config.Config
28+
import org.apache.hadoop.fs.Path
2929
import org.apache.toree.plugins.annotations.Event
3030

3131
object AddJar {
32+
val HADOOP_FS_SCHEMES = Set("hdfs", "s3", "s3n", "file")
3233

3334
private var jarDir:Option[String] = None
3435

@@ -63,18 +64,18 @@ class AddJar
6364
private def printStream = new PrintStream(outputStream)
6465

6566
/**
66-
* Retrieves file name from URL.
67+
* Retrieves file name from a URI.
6768
*
68-
* @param location The remote location (URL)
69-
* @return The name of the remote URL, or an empty string if one does not exist
69+
* @param location a URI
70+
* @return The file name of the remote URI, or an empty string if one does not exist
7071
*/
7172
def getFileFromLocation(location: String): String = {
72-
val url = new URL(location)
73-
val file = url.getFile.split("/")
74-
if (file.length > 0) {
75-
file.last
73+
val uri = new URI(location)
74+
val pathParts = uri.getPath.split("/")
75+
if (pathParts.nonEmpty) {
76+
pathParts.last
7677
} else {
77-
""
78+
""
7879
}
7980
}
8081

@@ -122,10 +123,27 @@ class AddJar
122123
// Report beginning of download
123124
printStream.println(s"Starting download from $jarRemoteLocation")
124125

125-
downloadFile(
126-
new URL(jarRemoteLocation),
127-
new File(downloadLocation).toURI.toURL
128-
)
126+
val jar = URI.create(jarRemoteLocation)
127+
if (HADOOP_FS_SCHEMES.contains(jar.getScheme)) {
128+
val conf = kernel.sparkContext.hadoopConfiguration
129+
val jarPath = new Path(jarRemoteLocation)
130+
val fs = jarPath.getFileSystem(conf)
131+
val destPath = if (downloadLocation.startsWith("file:")) {
132+
new Path(downloadLocation)
133+
} else {
134+
new Path("file:" + downloadLocation)
135+
}
136+
137+
fs.copyToLocalFile(
138+
false /* keep original file */,
139+
jarPath, destPath,
140+
true /* don't create checksum files */)
141+
} else {
142+
downloadFile(
143+
new URL(jarRemoteLocation),
144+
new File(downloadLocation).toURI.toURL
145+
)
146+
}
129147

130148
// Report download finished
131149
printStream.println(s"Finished download of $jarName")

kernel/src/test/scala/org/apache/toree/magic/builtin/AddJarSpec.scala

+2-1
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,8 @@ class AddJarSpec extends FunSpec with Matchers with MockitoSugar {
9191

9292
url = """http://www.example.com/remotecontent?filepath=/path/to/someJar.jar"""
9393
jarName = addJarMagic.getFileFromLocation(url)
94-
assert(jarName == "someJar.jar")
94+
// File names come from the path, not from the query fragment
95+
assert(jarName == "remotecontent")
9596

9697
url = """http://www.example.com/"""
9798
jarName = addJarMagic.getFileFromLocation(url)

0 commit comments

Comments
 (0)