From 020e8d8dbb63edae751b93df8cbe0bb74c44d984 Mon Sep 17 00:00:00 2001 From: ccleva Date: Mon, 23 Dec 2024 13:03:47 +0100 Subject: [PATCH 1/6] Avoid read(URL) throwing NPE for invalid URLs --- .../main/java/tech/tablesaw/io/DataFrameReader.java | 11 ++++++----- .../java/tech/tablesaw/io/DataFrameReaderTest.java | 13 +++++++++++++ 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/core/src/main/java/tech/tablesaw/io/DataFrameReader.java b/core/src/main/java/tech/tablesaw/io/DataFrameReader.java index ce06dd41a..733d8e463 100644 --- a/core/src/main/java/tech/tablesaw/io/DataFrameReader.java +++ b/core/src/main/java/tech/tablesaw/io/DataFrameReader.java @@ -56,14 +56,13 @@ public Table url(String url) { * mime-type Use {@link #usingOptions(ReadOptions) usingOptions} to use non-default options */ public Table url(URL url) { - URLConnection connection = null; try { - connection = url.openConnection(); + URLConnection connection = url.openConnection(); + String contentType = connection.getContentType(); + return url(url, getCharset(contentType), getMimeType(contentType)); } catch (IOException e) { - e.printStackTrace(); + throw new RuntimeIOException(e); } - String contentType = connection.getContentType(); - return url(url, getCharset(contentType), getMimeType(contentType)); } private Table url(URL url, Charset charset, String mimeType) { @@ -87,11 +86,13 @@ private Table readUrl(URL url, Charset charset, DataReader reader) { } private String getMimeType(String contentType) { + if(contentType == null) return null; String[] pair = contentType.split(";"); return pair[0].trim(); } private Charset getCharset(String contentType) { + if(contentType == null) return Charset.defaultCharset(); String[] pair = contentType.split(";"); return pair.length == 1 ? Charset.defaultCharset() diff --git a/core/src/test/java/tech/tablesaw/io/DataFrameReaderTest.java b/core/src/test/java/tech/tablesaw/io/DataFrameReaderTest.java index 374b11af2..6e961bd5a 100644 --- a/core/src/test/java/tech/tablesaw/io/DataFrameReaderTest.java +++ b/core/src/test/java/tech/tablesaw/io/DataFrameReaderTest.java @@ -6,6 +6,7 @@ import com.google.common.jimfs.Configuration; import com.google.common.jimfs.Jimfs; import java.io.IOException; +import java.net.MalformedURLException; import java.net.URL; import java.nio.file.FileSystem; import java.nio.file.Files; @@ -81,4 +82,16 @@ public void readUrlUnknownMimeTypeNoExtension() throws Exception { .getMessage() .contains("No reader registered for mime-type application/octet-stream")); } + + @Test + void readInvalidURL() throws MalformedURLException { + final URL url = new URL("ftp://not-a-host/data.csv"); + assertThrows(RuntimeIOException.class, () -> Table.read().url(url)); + } + + @Test + void readInvalidURLNoExtension() throws MalformedURLException { + final URL url = new URL("ftp://not-a-host/data/csv"); + assertThrows(IllegalArgumentException.class, () -> Table.read().url(url)); + } } From 7be3bff51d134da2e0183eabb31dc6a9fc2cf94e Mon Sep 17 00:00:00 2001 From: ccleva Date: Fri, 3 Jan 2025 15:56:57 +0100 Subject: [PATCH 2/6] Update core/src/main/java/tech/tablesaw/io/DataFrameReader.java Co-authored-by: Ben McCann <322311+benmccann@users.noreply.github.com> --- core/src/main/java/tech/tablesaw/io/DataFrameReader.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/java/tech/tablesaw/io/DataFrameReader.java b/core/src/main/java/tech/tablesaw/io/DataFrameReader.java index 733d8e463..a289b7d40 100644 --- a/core/src/main/java/tech/tablesaw/io/DataFrameReader.java +++ b/core/src/main/java/tech/tablesaw/io/DataFrameReader.java @@ -61,7 +61,7 @@ public Table url(URL url) { String contentType = connection.getContentType(); return url(url, getCharset(contentType), getMimeType(contentType)); } catch (IOException e) { - throw new RuntimeIOException(e); + throw new RuntimeIOException(e); } } From 5d57aa7cfb33787ff488648d2ff8e00e9eed65d9 Mon Sep 17 00:00:00 2001 From: ccleva Date: Fri, 3 Jan 2025 15:57:50 +0100 Subject: [PATCH 3/6] Update core/src/main/java/tech/tablesaw/io/DataFrameReader.java Co-authored-by: Ben McCann <322311+benmccann@users.noreply.github.com> --- core/src/main/java/tech/tablesaw/io/DataFrameReader.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/tech/tablesaw/io/DataFrameReader.java b/core/src/main/java/tech/tablesaw/io/DataFrameReader.java index a289b7d40..42be4d10b 100644 --- a/core/src/main/java/tech/tablesaw/io/DataFrameReader.java +++ b/core/src/main/java/tech/tablesaw/io/DataFrameReader.java @@ -86,7 +86,9 @@ private Table readUrl(URL url, Charset charset, DataReader reader) { } private String getMimeType(String contentType) { - if(contentType == null) return null; + if (contentType == null) { + return null; + } String[] pair = contentType.split(";"); return pair[0].trim(); } From b2c348215ac35c00270dbb3d74a459d1fb7ddca7 Mon Sep 17 00:00:00 2001 From: ccleva Date: Fri, 3 Jan 2025 15:58:35 +0100 Subject: [PATCH 4/6] Update core/src/main/java/tech/tablesaw/io/DataFrameReader.java Co-authored-by: Ben McCann <322311+benmccann@users.noreply.github.com> --- core/src/main/java/tech/tablesaw/io/DataFrameReader.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/tech/tablesaw/io/DataFrameReader.java b/core/src/main/java/tech/tablesaw/io/DataFrameReader.java index 42be4d10b..7e1ae8785 100644 --- a/core/src/main/java/tech/tablesaw/io/DataFrameReader.java +++ b/core/src/main/java/tech/tablesaw/io/DataFrameReader.java @@ -94,7 +94,9 @@ private String getMimeType(String contentType) { } private Charset getCharset(String contentType) { - if(contentType == null) return Charset.defaultCharset(); + if (contentType == null) { + return Charset.defaultCharset(); + } String[] pair = contentType.split(";"); return pair.length == 1 ? Charset.defaultCharset() From c315b817788bd4b39510052db4f539c6153bc900 Mon Sep 17 00:00:00 2001 From: ccleva Date: Fri, 3 Jan 2025 15:58:44 +0100 Subject: [PATCH 5/6] Update core/src/test/java/tech/tablesaw/io/DataFrameReaderTest.java Co-authored-by: Ben McCann <322311+benmccann@users.noreply.github.com> --- core/src/test/java/tech/tablesaw/io/DataFrameReaderTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/test/java/tech/tablesaw/io/DataFrameReaderTest.java b/core/src/test/java/tech/tablesaw/io/DataFrameReaderTest.java index 6e961bd5a..27e36bd2d 100644 --- a/core/src/test/java/tech/tablesaw/io/DataFrameReaderTest.java +++ b/core/src/test/java/tech/tablesaw/io/DataFrameReaderTest.java @@ -85,7 +85,7 @@ public void readUrlUnknownMimeTypeNoExtension() throws Exception { @Test void readInvalidURL() throws MalformedURLException { - final URL url = new URL("ftp://not-a-host/data.csv"); + URL url = new URL("ftp://not-a-host/data.csv"); assertThrows(RuntimeIOException.class, () -> Table.read().url(url)); } From 36a4bca72eaa9007fbd93224f2ebbe8100f7fe30 Mon Sep 17 00:00:00 2001 From: ccleva Date: Fri, 3 Jan 2025 15:58:52 +0100 Subject: [PATCH 6/6] Update core/src/test/java/tech/tablesaw/io/DataFrameReaderTest.java Co-authored-by: Ben McCann <322311+benmccann@users.noreply.github.com> --- core/src/test/java/tech/tablesaw/io/DataFrameReaderTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/test/java/tech/tablesaw/io/DataFrameReaderTest.java b/core/src/test/java/tech/tablesaw/io/DataFrameReaderTest.java index 27e36bd2d..313be26f8 100644 --- a/core/src/test/java/tech/tablesaw/io/DataFrameReaderTest.java +++ b/core/src/test/java/tech/tablesaw/io/DataFrameReaderTest.java @@ -91,7 +91,7 @@ void readInvalidURL() throws MalformedURLException { @Test void readInvalidURLNoExtension() throws MalformedURLException { - final URL url = new URL("ftp://not-a-host/data/csv"); + URL url = new URL("ftp://not-a-host/data/csv"); assertThrows(IllegalArgumentException.class, () -> Table.read().url(url)); } }