From 424876ffac7c99ca0c454cffd809c2e692e672e3 Mon Sep 17 00:00:00 2001 From: Anthony Goubard Date: Wed, 29 May 2024 14:18:31 +0200 Subject: [PATCH 1/7] Improved performance of UriParser.normalisePath --- commons-vfs2/pom.xml | 69 +++++++++++++++++++ .../commons/vfs2/provider/UriParser.java | 24 ++++--- .../vfs2/provider/UriParserBenchmark.java | 34 +++++++++ 3 files changed, 116 insertions(+), 11 deletions(-) create mode 100644 commons-vfs2/src/test/java/org/apache/commons/vfs2/provider/UriParserBenchmark.java diff --git a/commons-vfs2/pom.xml b/commons-vfs2/pom.xml index 3750a95fef..3630552d1d 100644 --- a/commons-vfs2/pom.xml +++ b/commons-vfs2/pom.xml @@ -352,6 +352,75 @@ + + + benchmark + + + true + org.apache + 1.37 + + + + + org.openjdk.jmh + jmh-core + ${jmh.version} + test + + + + org.openjdk.jmh + jmh-generator-annprocess + ${jmh.version} + test + + + + + + + + maven-compiler-plugin + ${commons.compiler.version} + + + **/* + + + + + + org.codehaus.mojo + exec-maven-plugin + + + benchmark + test + + exec + + + test + java + + -classpath + + org.openjdk.jmh.Main + -rf + json + -rff + target/jmh-result.json + ${benchmark} + + + + + + + + diff --git a/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java b/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java index 9b23df3cbd..569ba069db 100644 --- a/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java +++ b/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java @@ -70,8 +70,8 @@ private boolean readDot() { if (cursor + 2 >= end) { return false; } - final String sub = path.substring(cursor, cursor + 3); - if (sub.equals("%2e") || sub.equals("%2E")) { + if (path.charAt(cursor) == '%' && path.charAt(cursor + 1) == '2' && + (path.charAt(cursor + 2) == 'E' || path.charAt(cursor + 2) == 'e')) { cursor += 3; return true; } @@ -89,8 +89,7 @@ private boolean readNonSeparator() { cursor++; return true; } - final String sub = path.substring(cursor + 1, cursor + 3); - if (sub.equals(URLENCODED_SLASH_UC) || sub.equals(URLENCODED_SLASH_LC)) { + if (isCursorAtUrlEncodedSlash()) { return false; } cursor++; @@ -115,8 +114,7 @@ private boolean readSeparator() { if (cursor + 2 >= end) { return false; } - final String sub = path.substring(cursor, cursor + 3); - if (sub.equals(URLENCODED_SLASH_LC) || sub.equals(URLENCODED_SLASH_UC)) { + if (isCursorAtUrlEncodedSlash()) { cursor += 3; return true; } @@ -130,6 +128,11 @@ private void readToNextSeparator() { } } + private boolean isCursorAtUrlEncodedSlash() { + return path.charAt(cursor) == '%' && path.charAt(cursor + 1) == '2' && + (path.charAt(cursor + 2) == 'F' || path.charAt(cursor + 2) == 'f'); + } + private void removePreviousElement(final int to) throws FileSystemException { if (lastSeparator == 0) { // Previous element is missing @@ -669,11 +672,10 @@ public static FileType normalisePath(final StringBuilder path) throws FileSystem if (maxlen > 1 && path.charAt(maxlen - 1) == SEPARATOR_CHAR) { path.delete(maxlen - 1, maxlen); } - if (maxlen > 3) { - final String sub = path.substring(maxlen - 3); - if (sub.equals(URLENCODED_SLASH_UC) || sub.equals(URLENCODED_SLASH_LC)) { - path.delete(maxlen - 3, maxlen); - } + if (maxlen > 3 && + path.charAt(maxlen - 3) == '%' && path.charAt(maxlen - 2) == '2' && + (path.charAt(maxlen - 1) == 'F' || path.charAt(maxlen - 1) == 'f')) { + path.delete(maxlen - 3, maxlen); } } diff --git a/commons-vfs2/src/test/java/org/apache/commons/vfs2/provider/UriParserBenchmark.java b/commons-vfs2/src/test/java/org/apache/commons/vfs2/provider/UriParserBenchmark.java new file mode 100644 index 0000000000..babe24d1f0 --- /dev/null +++ b/commons-vfs2/src/test/java/org/apache/commons/vfs2/provider/UriParserBenchmark.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.vfs2.provider; + +import org.apache.commons.vfs2.FileSystemException; +import org.openjdk.jmh.annotations.*; + +@BenchmarkMode(Mode.Throughput) +@Warmup(iterations = 2) +@Measurement(iterations = 5) +public class UriParserBenchmark { + + private static final String PATH_TO_NORMALIZE = "file:///this/../is/a%2flong%2Fpath/./for testing/normlisePath%2fmethod.txt"; + + @Benchmark + public void normalisePath() throws FileSystemException { + StringBuilder path = new StringBuilder(PATH_TO_NORMALIZE); + UriParser.normalisePath(path); + } +} From 52d091a011cbdafce7b990d086dd502afc16ee86 Mon Sep 17 00:00:00 2001 From: Anthony Goubard Date: Fri, 31 May 2024 12:07:58 +0200 Subject: [PATCH 2/7] Improved performance of UriParser.extractScheme --- .../java/org/apache/commons/vfs2/provider/UriParser.java | 2 +- .../apache/commons/vfs2/provider/UriParserBenchmark.java | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java b/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java index 569ba069db..df9a05e598 100644 --- a/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java +++ b/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java @@ -599,7 +599,7 @@ public static String extractScheme(final String[] schemes, final String uri, fin buffer.append(uri); } for (final String scheme : schemes) { - if (uri.startsWith(scheme + ":")) { + if (uri.startsWith(scheme) && uri.length() > scheme.length() && uri.charAt(scheme.length()) == ':') { if (buffer != null) { buffer.delete(0, uri.indexOf(':') + 1); } diff --git a/commons-vfs2/src/test/java/org/apache/commons/vfs2/provider/UriParserBenchmark.java b/commons-vfs2/src/test/java/org/apache/commons/vfs2/provider/UriParserBenchmark.java index babe24d1f0..64a436e12c 100644 --- a/commons-vfs2/src/test/java/org/apache/commons/vfs2/provider/UriParserBenchmark.java +++ b/commons-vfs2/src/test/java/org/apache/commons/vfs2/provider/UriParserBenchmark.java @@ -21,14 +21,21 @@ @BenchmarkMode(Mode.Throughput) @Warmup(iterations = 2) -@Measurement(iterations = 5) +@Measurement(iterations = 2) +@Fork(2) public class UriParserBenchmark { private static final String PATH_TO_NORMALIZE = "file:///this/../is/a%2flong%2Fpath/./for testing/normlisePath%2fmethod.txt"; + private static final String[] SCHEMES = {"file", "ftp", "ftps", "webdav", "temp", "ram", "http", "https", "sftp", "zip", "jar", "tgz", "gz"}; @Benchmark public void normalisePath() throws FileSystemException { StringBuilder path = new StringBuilder(PATH_TO_NORMALIZE); UriParser.normalisePath(path); } + + @Benchmark + public void extractScheme() throws FileSystemException { + UriParser.extractScheme(SCHEMES, PATH_TO_NORMALIZE); + } } From 8ddf3f0fbc4b1013ab1752aae36cdc0af410df28 Mon Sep 17 00:00:00 2001 From: Anthony Goubard Date: Sat, 5 Oct 2024 17:27:56 +0200 Subject: [PATCH 3/7] Aligned measurement with master branch --- .../org/apache/commons/vfs2/provider/UriParserBenchmark.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/commons-vfs2/src/test/java/org/apache/commons/vfs2/provider/UriParserBenchmark.java b/commons-vfs2/src/test/java/org/apache/commons/vfs2/provider/UriParserBenchmark.java index 2ba6c469d6..b03c59fca2 100644 --- a/commons-vfs2/src/test/java/org/apache/commons/vfs2/provider/UriParserBenchmark.java +++ b/commons-vfs2/src/test/java/org/apache/commons/vfs2/provider/UriParserBenchmark.java @@ -25,8 +25,7 @@ @BenchmarkMode(Mode.Throughput) @Warmup(iterations = 2) -@Measurement(iterations = 2) -@Fork(2) +@Measurement(iterations = 5) public class UriParserBenchmark { private static final String PATH_TO_NORMALIZE = "file:///this/../is/a%2flong%2Fpath/./for testing/normlisePath%2fmethod.txt"; From 3d22ccf69c47a9e9e4f0d97f0fb7c3e9b1f4da92 Mon Sep 17 00:00:00 2001 From: Anthony Goubard Date: Tue, 4 Feb 2025 09:38:21 +0100 Subject: [PATCH 4/7] Keep the same line feed --- .../main/java/org/apache/commons/vfs2/provider/UriParser.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java b/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java index 5a751cf267..1863903d87 100644 --- a/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java +++ b/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java @@ -431,7 +431,7 @@ public static String extractScheme(final String[] schemes, final String uri, fin buffer.append(uri); } for (final String scheme : schemes) { - if (uri.startsWith(scheme + ":")) { + if (uri.startsWith(scheme) && uri.length() > scheme.length() && uri.charAt(scheme.length()) == ':') { if (buffer != null) { buffer.delete(0, uri.indexOf(':') + 1); } From 737f9bae4e8892d7330753ccd9609bbc0892b1c9 Mon Sep 17 00:00:00 2001 From: Anthony Goubard Date: Tue, 4 Feb 2025 09:43:02 +0100 Subject: [PATCH 5/7] Revert "Keep the same line feed" This reverts commit 3d22ccf69c47a9e9e4f0d97f0fb7c3e9b1f4da92. --- .../main/java/org/apache/commons/vfs2/provider/UriParser.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java b/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java index 1863903d87..5a751cf267 100644 --- a/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java +++ b/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java @@ -431,7 +431,7 @@ public static String extractScheme(final String[] schemes, final String uri, fin buffer.append(uri); } for (final String scheme : schemes) { - if (uri.startsWith(scheme) && uri.length() > scheme.length() && uri.charAt(scheme.length()) == ':') { + if (uri.startsWith(scheme + ":")) { if (buffer != null) { buffer.delete(0, uri.indexOf(':') + 1); } From fd9a40883d01151a6c9a46ce82686992f3b0cc45 Mon Sep 17 00:00:00 2001 From: Anthony Goubard Date: Tue, 4 Feb 2025 09:50:37 +0100 Subject: [PATCH 6/7] Improved performance of UriParser#extractScheme --- .../main/java/org/apache/commons/vfs2/provider/UriParser.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java b/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java index 5a751cf267..1863903d87 100644 --- a/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java +++ b/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java @@ -431,7 +431,7 @@ public static String extractScheme(final String[] schemes, final String uri, fin buffer.append(uri); } for (final String scheme : schemes) { - if (uri.startsWith(scheme + ":")) { + if (uri.startsWith(scheme) && uri.length() > scheme.length() && uri.charAt(scheme.length()) == ':') { if (buffer != null) { buffer.delete(0, uri.indexOf(':') + 1); } From 5ed6ae82963e6a15ec5d13a72b88b0f99f00b7b6 Mon Sep 17 00:00:00 2001 From: Anthony Goubard Date: Tue, 4 Feb 2025 10:15:03 +0100 Subject: [PATCH 7/7] Improved performance of UriParser#extractScheme --- .../commons/vfs2/provider/UriParser.java | 1150 ++++++++--------- 1 file changed, 575 insertions(+), 575 deletions(-) diff --git a/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java b/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java index 1863903d87..169c5575fa 100644 --- a/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java +++ b/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java @@ -1,575 +1,575 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.vfs2.provider; - -import java.util.Arrays; - -import org.apache.commons.lang3.SystemUtils; -import org.apache.commons.vfs2.FileName; -import org.apache.commons.vfs2.FileSystemException; -import org.apache.commons.vfs2.FileType; -import org.apache.commons.vfs2.VFS; - -/** - * Utilities for dealing with URIs. See RFC 2396 for details. - */ -public final class UriParser { - - /** - * The set of valid separators. These are all converted to the normalized one. Does not contain the - * normalized separator - */ - // public static final char[] separators = {'\\'}; - public static final char TRANS_SEPARATOR = '\\'; - - /** - * The normalized separator to use. - */ - private static final char SEPARATOR_CHAR = FileName.SEPARATOR_CHAR; - - private static final int HEX_BASE = 16; - - private static final int BITS_IN_HALF_BYTE = 4; - - private static final char LOW_MASK = 0x0F; - - /** - * Encodes and appends a string to a StringBuilder. - * - * @param buffer The StringBuilder to append to. - * @param unencodedValue The String to encode and append. - * @param reserved characters to encode. - */ - public static void appendEncoded(final StringBuilder buffer, final String unencodedValue, final char[] reserved) { - final int offset = buffer.length(); - buffer.append(unencodedValue); - encode(buffer, offset, unencodedValue.length(), reserved); - } - - static void appendEncodedRfc2396(final StringBuilder buffer, final String unencodedValue, final char[] allowed) { - final int offset = buffer.length(); - buffer.append(unencodedValue); - encodeRfc2396(buffer, offset, unencodedValue.length(), allowed); - } - - /** - * Canonicalizes a path. - * - * @param buffer Source data. - * @param offset Where to start reading. - * @param length How much to read. - * @param fileNameParser Now to encode and decode. - * @throws FileSystemException If an I/O error occurs. - */ - public static void canonicalizePath(final StringBuilder buffer, final int offset, final int length, - final FileNameParser fileNameParser) throws FileSystemException { - int index = offset; - int count = length; - for (; count > 0; count--, index++) { - final char ch = buffer.charAt(index); - if (ch == '%') { - if (count < 3) { - throw new FileSystemException("vfs.provider/invalid-escape-sequence.error", - buffer.substring(index, index + count)); - } - - // Decode - final int dig1 = Character.digit(buffer.charAt(index + 1), HEX_BASE); - final int dig2 = Character.digit(buffer.charAt(index + 2), HEX_BASE); - if (dig1 == -1 || dig2 == -1) { - throw new FileSystemException("vfs.provider/invalid-escape-sequence.error", - buffer.substring(index, index + 3)); - } - final char value = (char) (dig1 << BITS_IN_HALF_BYTE | dig2); - - final boolean match = value == '%' || fileNameParser.encodeCharacter(value); - - if (match) { - // this is a reserved character, not allowed to decode - index += 2; - count -= 2; - continue; - } - - // Replace - buffer.setCharAt(index, value); - buffer.delete(index + 1, index + 3); - count -= 2; - } else if (fileNameParser.encodeCharacter(ch)) { - // Encode - final char[] digits = {Character.forDigit(ch >> BITS_IN_HALF_BYTE & LOW_MASK, HEX_BASE), Character.forDigit(ch & LOW_MASK, HEX_BASE)}; - buffer.setCharAt(index, '%'); - buffer.insert(index + 1, digits); - index += 2; - } - } - } - - /** - * Decodes the String. - * - * @param uri The String to decode. - * @throws FileSystemException if an error occurs. - */ - public static void checkUriEncoding(final String uri) throws FileSystemException { - decode(uri); - } - - /** - * Removes %nn encodings from a string. - * - * @param encodedStr The encoded String. - * @return The decoded String. - * @throws FileSystemException if an error occurs. - */ - public static String decode(final String encodedStr) throws FileSystemException { - if (encodedStr == null) { - return null; - } - if (encodedStr.indexOf('%') < 0) { - return encodedStr; - } - final StringBuilder buffer = new StringBuilder(encodedStr); - decode(buffer, 0, buffer.length()); - return buffer.toString(); - } - - /** - * Removes %nn encodings from a string. - * - * @param buffer StringBuilder containing the string to decode. - * @param offset The position in the string to start decoding. - * @param length The number of characters to decode. - * @throws FileSystemException if an error occurs. - */ - public static void decode(final StringBuilder buffer, final int offset, final int length) - throws FileSystemException { - int index = offset; - int count = length; - boolean ipv6Host = false; - for (; count > 0; count--, index++) { - final char ch = buffer.charAt(index); - if (ch == '[') { - ipv6Host = true; - } - if (ch == ']') { - ipv6Host = false; - } - if (ch != '%' || ipv6Host) { - continue; - } - - if (count < 3) { - throw new FileSystemException("vfs.provider/invalid-escape-sequence.error", - buffer.substring(index, index + count)); - } - - // Decode - final int dig1 = Character.digit(buffer.charAt(index + 1), HEX_BASE); - final int dig2 = Character.digit(buffer.charAt(index + 2), HEX_BASE); - if (dig1 == -1 || dig2 == -1) { - throw new FileSystemException("vfs.provider/invalid-escape-sequence.error", - buffer.substring(index, index + 3)); - } - final char value = (char) (dig1 << BITS_IN_HALF_BYTE | dig2); - - // Replace - buffer.setCharAt(index, value); - buffer.delete(index + 1, index + 3); - count -= 2; - } - } - - /** - * Converts "special" characters to their %nn value. - * - * @param decodedStr The decoded String. - * @return The encoded String. - */ - public static String encode(final String decodedStr) { - return encode(decodedStr, null); - } - - /** - * Converts "special" characters to their %nn value. - * - * @param decodedStr The decoded String. - * @param reserved Characters to encode. - * @return The encoded String - */ - public static String encode(final String decodedStr, final char[] reserved) { - if (decodedStr == null) { - return null; - } - final StringBuilder buffer = new StringBuilder(decodedStr); - encode(buffer, 0, buffer.length(), reserved); - return buffer.toString(); - } - - /** - * Encode an array of Strings. - * - * @param strings The array of Strings to encode. - * @return An array of encoded Strings. - */ - public static String[] encode(final String[] strings) { - if (strings == null) { - return null; - } - Arrays.setAll(strings, i -> encode(strings[i])); - return strings; - } - - /** - * Encodes a set of reserved characters in a StringBuilder, using the URI %nn encoding. Always encodes % characters. - * - * @param buffer The StringBuilder to append to. - * @param offset The position in the buffer to start encoding at. - * @param length The number of characters to encode. - * @param reserved characters to encode. - */ - public static void encode(final StringBuilder buffer, final int offset, final int length, final char[] reserved) { - int index = offset; - int count = length; - for (; count > 0; index++, count--) { - final char ch = buffer.charAt(index); - boolean match = ch == '%'; - if (reserved != null) { - for (int i = 0; !match && i < reserved.length; i++) { - if (ch == reserved[i]) { - match = true; - break; - } - } - } - if (match) { - // Encode - final char[] digits = {Character.forDigit(ch >> BITS_IN_HALF_BYTE & LOW_MASK, HEX_BASE), Character.forDigit(ch & LOW_MASK, HEX_BASE)}; - buffer.setCharAt(index, '%'); - buffer.insert(index + 1, digits); - index += 2; - } - } - } - - static void encodeRfc2396(final StringBuilder buffer, final int offset, final int length, final char[] allowed) { - int index = offset; - int count = length; - for (; count > 0; index++, count--) { - final char ch = buffer.charAt(index); - if (Arrays.binarySearch(allowed, ch) < 0) { - // Encode - final char[] digits = {Character.forDigit(ch >> BITS_IN_HALF_BYTE & LOW_MASK, HEX_BASE), Character.forDigit(ch & LOW_MASK, HEX_BASE)}; - buffer.setCharAt(index, '%'); - buffer.insert(index + 1, digits); - index += 2; - } - } - } - - /** - * Extracts the first element of a path. - * - * @param name StringBuilder containing the path. - * @return The first element of the path. - */ - public static String extractFirstElement(final StringBuilder name) { - final int len = name.length(); - if (len < 1) { - return null; - } - int startPos = 0; - if (name.charAt(0) == SEPARATOR_CHAR) { - startPos = 1; - } - for (int pos = startPos; pos < len; pos++) { - if (name.charAt(pos) == SEPARATOR_CHAR) { - // Found a separator - final String elem = name.substring(startPos, pos); - name.delete(startPos, pos + 1); - return elem; - } - } - - // No separator - final String elem = name.substring(startPos); - name.setLength(0); - return elem; - } - - /** - * Extract the query String from the URI. - * - * @param name StringBuilder containing the URI. - * @return The query string, if any. null otherwise. - */ - public static String extractQueryString(final StringBuilder name) { - for (int pos = 0; pos < name.length(); pos++) { - if (name.charAt(pos) == '?') { - final String queryString = name.substring(pos + 1); - name.delete(pos, name.length()); - return queryString; - } - } - - return null; - } - - /** - * Extracts the scheme from a URI. - * - * @param uri The URI. - * @return The scheme name. Returns null if there is no scheme. - * @deprecated Use instead {@link #extractScheme}. Will be removed in 3.0. - */ - @Deprecated - public static String extractScheme(final String uri) { - return extractScheme(uri, null); - } - - /** - * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from the front of the URI. - * - * @param uri The URI. - * @param buffer Returns the remainder of the URI. - * @return The scheme name. Returns null if there is no scheme. - * @deprecated Use instead {@link #extractScheme}. Will be removed in 3.0. - */ - @Deprecated - public static String extractScheme(final String uri, final StringBuilder buffer) { - if (buffer != null) { - buffer.setLength(0); - buffer.append(uri); - } - - final int maxPos = uri.length(); - for (int pos = 0; pos < maxPos; pos++) { - final char ch = uri.charAt(pos); - - if (ch == ':') { - // Found the end of the scheme - final String scheme = uri.substring(0, pos); - if (scheme.length() <= 1 && SystemUtils.IS_OS_WINDOWS) { - // This is not a scheme, but a Windows drive letter - return null; - } - if (buffer != null) { - buffer.delete(0, pos + 1); - } - return scheme.intern(); - } - - if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z') { - // A scheme character - continue; - } - if (!(pos > 0 && (ch >= '0' && ch <= '9' || ch == '+' || ch == '-' || ch == '.'))) { - // Not a scheme character - break; - } - // A scheme character (these are not allowed as the first - // character of the scheme), but can be used as subsequent - // characters. - } - - // No scheme in URI - return null; - } - - /** - * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from the front of the URI. - *

- * The scheme is extracted based on the currently supported schemes in the system. That is to say the schemes - * supported by the registered providers. - *

- *

- * This allows us to handle varying scheme's without making assumptions based on the ':' character. Specifically - * handle scheme extraction calls for URI parameters that are not actually uri's, but may be names with ':' in them. - *

- * @param schemes The schemes to check. - * @param uri The potential URI. May also be a name. - * @return The scheme name. Returns null if there is no scheme. - * @since 2.3 - */ - public static String extractScheme(final String[] schemes, final String uri) { - return extractScheme(schemes, uri, null); - } - - /** - * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from the front of the URI. - *

- * The scheme is extracted based on the given set of schemes. Normally, that is to say the schemes - * supported by the registered providers. - *

- *

- * This allows us to handle varying scheme's without making assumptions based on the ':' character. Specifically - * handle scheme extraction calls for URI parameters that are not actually URI's, but may be names with ':' in them. - *

- * @param schemes The schemes to check. - * @param uri The potential URI. May also just be a name. - * @param buffer Returns the remainder of the URI. - * @return The scheme name. Returns null if there is no scheme. - * @since 2.3 - */ - public static String extractScheme(final String[] schemes, final String uri, final StringBuilder buffer) { - if (buffer != null) { - buffer.setLength(0); - buffer.append(uri); - } - for (final String scheme : schemes) { - if (uri.startsWith(scheme) && uri.length() > scheme.length() && uri.charAt(scheme.length()) == ':') { - if (buffer != null) { - buffer.delete(0, uri.indexOf(':') + 1); - } - return scheme; - } - } - return null; - } - - /** - * Normalises the separators in a name. - * - * @param name The StringBuilder containing the name - * @return true if the StringBuilder was modified. - */ - public static boolean fixSeparators(final StringBuilder name) { - boolean changed = false; - int maxlen = name.length(); - for (int i = 0; i < maxlen; i++) { - final char ch = name.charAt(i); - if (ch == TRANS_SEPARATOR) { - name.setCharAt(i, SEPARATOR_CHAR); - changed = true; - } - if (i < maxlen - 2 && name.charAt(i) == '%' && name.charAt(i + 1) == '2') { - if (name.charAt(i + 2) == 'f' || name.charAt(i + 2) == 'F') { - name.setCharAt(i, SEPARATOR_CHAR); - name.delete(i + 1, i + 3); - maxlen -= 2; - changed = true; - } else if (name.charAt(i + 2) == 'e' || name.charAt(i + 2) == 'E') { - name.setCharAt(i, '.'); - name.delete(i + 1, i + 3); - maxlen -= 2; - changed = true; - } - } - } - return changed; - } - - /** - * Normalises a path. Does the following: - *
    - *
  • Removes empty path elements. - *
  • Handles '.' and '..' elements. - *
  • Removes trailing separator. - *
- * - * Its assumed that the separators are already fixed. - * - * @param path The path to normalize. - * @return The FileType. - * @throws FileSystemException if an error occurs. - * @see #fixSeparators - */ - public static FileType normalisePath(final StringBuilder path) throws FileSystemException { - FileType fileType = FileType.FOLDER; - if (path.length() == 0) { - return fileType; - } - - // '/' or '.' or '..' or anyPath/..' or 'anyPath/.' should always be a path - if (path.charAt(path.length() - 1) != '/' - && path.lastIndexOf("/..") != path.length() - 3 - && path.lastIndexOf("/.") != path.length() - 2 - && path.lastIndexOf("..") != 0 - && path.lastIndexOf(".") != 0 - ) { - fileType = FileType.FILE; - } - - // Adjust separators - // fixSeparators(path); - - // Determine the start of the first element - int startFirstElem = 0; - if (path.charAt(0) == SEPARATOR_CHAR) { - if (path.length() == 1) { - return fileType; - } - startFirstElem = 1; - } - - // Iterate over each element - int startElem = startFirstElem; - int maxlen = path.length(); - while (startElem < maxlen) { - // Find the end of the element - int endElem = startElem; - while (endElem < maxlen && path.charAt(endElem) != SEPARATOR_CHAR) { - endElem++; - } - - final int elemLen = endElem - startElem; - if (elemLen == 0) { - // An empty element - axe it - path.deleteCharAt(endElem); - maxlen = path.length(); - continue; - } - if (elemLen == 1 && path.charAt(startElem) == '.') { - // A '.' element - axe it - path.deleteCharAt(startElem); - maxlen = path.length(); - continue; - } - if (elemLen == 2 && path.charAt(startElem) == '.' && path.charAt(startElem + 1) == '.') { - // A '..' element - remove the previous element - if (startElem == startFirstElem) { - // Previous element is missing - throw new FileSystemException("vfs.provider/invalid-relative-path.error"); - } - - // Find start of previous element - int pos = startElem - 2; - while (pos >= 0 && path.charAt(pos) != SEPARATOR_CHAR) { - pos--; - } - startElem = pos + 1; - - path.delete(startElem, endElem + 1); - maxlen = path.length(); - continue; - } - - // A regular element - startElem = endElem + 1; - } - - // Remove trailing separator - if (!VFS.isUriStyle() && maxlen > 1 && path.charAt(maxlen - 1) == SEPARATOR_CHAR) { - path.deleteCharAt(maxlen - 1); - } - - return fileType; - } - - private UriParser() { - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.vfs2.provider; + +import java.util.Arrays; + +import org.apache.commons.lang3.SystemUtils; +import org.apache.commons.vfs2.FileName; +import org.apache.commons.vfs2.FileSystemException; +import org.apache.commons.vfs2.FileType; +import org.apache.commons.vfs2.VFS; + +/** + * Utilities for dealing with URIs. See RFC 2396 for details. + */ +public final class UriParser { + + /** + * The set of valid separators. These are all converted to the normalized one. Does not contain the + * normalized separator + */ + // public static final char[] separators = {'\\'}; + public static final char TRANS_SEPARATOR = '\\'; + + /** + * The normalized separator to use. + */ + private static final char SEPARATOR_CHAR = FileName.SEPARATOR_CHAR; + + private static final int HEX_BASE = 16; + + private static final int BITS_IN_HALF_BYTE = 4; + + private static final char LOW_MASK = 0x0F; + + /** + * Encodes and appends a string to a StringBuilder. + * + * @param buffer The StringBuilder to append to. + * @param unencodedValue The String to encode and append. + * @param reserved characters to encode. + */ + public static void appendEncoded(final StringBuilder buffer, final String unencodedValue, final char[] reserved) { + final int offset = buffer.length(); + buffer.append(unencodedValue); + encode(buffer, offset, unencodedValue.length(), reserved); + } + + static void appendEncodedRfc2396(final StringBuilder buffer, final String unencodedValue, final char[] allowed) { + final int offset = buffer.length(); + buffer.append(unencodedValue); + encodeRfc2396(buffer, offset, unencodedValue.length(), allowed); + } + + /** + * Canonicalizes a path. + * + * @param buffer Source data. + * @param offset Where to start reading. + * @param length How much to read. + * @param fileNameParser Now to encode and decode. + * @throws FileSystemException If an I/O error occurs. + */ + public static void canonicalizePath(final StringBuilder buffer, final int offset, final int length, + final FileNameParser fileNameParser) throws FileSystemException { + int index = offset; + int count = length; + for (; count > 0; count--, index++) { + final char ch = buffer.charAt(index); + if (ch == '%') { + if (count < 3) { + throw new FileSystemException("vfs.provider/invalid-escape-sequence.error", + buffer.substring(index, index + count)); + } + + // Decode + final int dig1 = Character.digit(buffer.charAt(index + 1), HEX_BASE); + final int dig2 = Character.digit(buffer.charAt(index + 2), HEX_BASE); + if (dig1 == -1 || dig2 == -1) { + throw new FileSystemException("vfs.provider/invalid-escape-sequence.error", + buffer.substring(index, index + 3)); + } + final char value = (char) (dig1 << BITS_IN_HALF_BYTE | dig2); + + final boolean match = value == '%' || fileNameParser.encodeCharacter(value); + + if (match) { + // this is a reserved character, not allowed to decode + index += 2; + count -= 2; + continue; + } + + // Replace + buffer.setCharAt(index, value); + buffer.delete(index + 1, index + 3); + count -= 2; + } else if (fileNameParser.encodeCharacter(ch)) { + // Encode + final char[] digits = {Character.forDigit(ch >> BITS_IN_HALF_BYTE & LOW_MASK, HEX_BASE), Character.forDigit(ch & LOW_MASK, HEX_BASE)}; + buffer.setCharAt(index, '%'); + buffer.insert(index + 1, digits); + index += 2; + } + } + } + + /** + * Decodes the String. + * + * @param uri The String to decode. + * @throws FileSystemException if an error occurs. + */ + public static void checkUriEncoding(final String uri) throws FileSystemException { + decode(uri); + } + + /** + * Removes %nn encodings from a string. + * + * @param encodedStr The encoded String. + * @return The decoded String. + * @throws FileSystemException if an error occurs. + */ + public static String decode(final String encodedStr) throws FileSystemException { + if (encodedStr == null) { + return null; + } + if (encodedStr.indexOf('%') < 0) { + return encodedStr; + } + final StringBuilder buffer = new StringBuilder(encodedStr); + decode(buffer, 0, buffer.length()); + return buffer.toString(); + } + + /** + * Removes %nn encodings from a string. + * + * @param buffer StringBuilder containing the string to decode. + * @param offset The position in the string to start decoding. + * @param length The number of characters to decode. + * @throws FileSystemException if an error occurs. + */ + public static void decode(final StringBuilder buffer, final int offset, final int length) + throws FileSystemException { + int index = offset; + int count = length; + boolean ipv6Host = false; + for (; count > 0; count--, index++) { + final char ch = buffer.charAt(index); + if (ch == '[') { + ipv6Host = true; + } + if (ch == ']') { + ipv6Host = false; + } + if (ch != '%' || ipv6Host) { + continue; + } + + if (count < 3) { + throw new FileSystemException("vfs.provider/invalid-escape-sequence.error", + buffer.substring(index, index + count)); + } + + // Decode + final int dig1 = Character.digit(buffer.charAt(index + 1), HEX_BASE); + final int dig2 = Character.digit(buffer.charAt(index + 2), HEX_BASE); + if (dig1 == -1 || dig2 == -1) { + throw new FileSystemException("vfs.provider/invalid-escape-sequence.error", + buffer.substring(index, index + 3)); + } + final char value = (char) (dig1 << BITS_IN_HALF_BYTE | dig2); + + // Replace + buffer.setCharAt(index, value); + buffer.delete(index + 1, index + 3); + count -= 2; + } + } + + /** + * Converts "special" characters to their %nn value. + * + * @param decodedStr The decoded String. + * @return The encoded String. + */ + public static String encode(final String decodedStr) { + return encode(decodedStr, null); + } + + /** + * Converts "special" characters to their %nn value. + * + * @param decodedStr The decoded String. + * @param reserved Characters to encode. + * @return The encoded String + */ + public static String encode(final String decodedStr, final char[] reserved) { + if (decodedStr == null) { + return null; + } + final StringBuilder buffer = new StringBuilder(decodedStr); + encode(buffer, 0, buffer.length(), reserved); + return buffer.toString(); + } + + /** + * Encode an array of Strings. + * + * @param strings The array of Strings to encode. + * @return An array of encoded Strings. + */ + public static String[] encode(final String[] strings) { + if (strings == null) { + return null; + } + Arrays.setAll(strings, i -> encode(strings[i])); + return strings; + } + + /** + * Encodes a set of reserved characters in a StringBuilder, using the URI %nn encoding. Always encodes % characters. + * + * @param buffer The StringBuilder to append to. + * @param offset The position in the buffer to start encoding at. + * @param length The number of characters to encode. + * @param reserved characters to encode. + */ + public static void encode(final StringBuilder buffer, final int offset, final int length, final char[] reserved) { + int index = offset; + int count = length; + for (; count > 0; index++, count--) { + final char ch = buffer.charAt(index); + boolean match = ch == '%'; + if (reserved != null) { + for (int i = 0; !match && i < reserved.length; i++) { + if (ch == reserved[i]) { + match = true; + break; + } + } + } + if (match) { + // Encode + final char[] digits = {Character.forDigit(ch >> BITS_IN_HALF_BYTE & LOW_MASK, HEX_BASE), Character.forDigit(ch & LOW_MASK, HEX_BASE)}; + buffer.setCharAt(index, '%'); + buffer.insert(index + 1, digits); + index += 2; + } + } + } + + static void encodeRfc2396(final StringBuilder buffer, final int offset, final int length, final char[] allowed) { + int index = offset; + int count = length; + for (; count > 0; index++, count--) { + final char ch = buffer.charAt(index); + if (Arrays.binarySearch(allowed, ch) < 0) { + // Encode + final char[] digits = {Character.forDigit(ch >> BITS_IN_HALF_BYTE & LOW_MASK, HEX_BASE), Character.forDigit(ch & LOW_MASK, HEX_BASE)}; + buffer.setCharAt(index, '%'); + buffer.insert(index + 1, digits); + index += 2; + } + } + } + + /** + * Extracts the first element of a path. + * + * @param name StringBuilder containing the path. + * @return The first element of the path. + */ + public static String extractFirstElement(final StringBuilder name) { + final int len = name.length(); + if (len < 1) { + return null; + } + int startPos = 0; + if (name.charAt(0) == SEPARATOR_CHAR) { + startPos = 1; + } + for (int pos = startPos; pos < len; pos++) { + if (name.charAt(pos) == SEPARATOR_CHAR) { + // Found a separator + final String elem = name.substring(startPos, pos); + name.delete(startPos, pos + 1); + return elem; + } + } + + // No separator + final String elem = name.substring(startPos); + name.setLength(0); + return elem; + } + + /** + * Extract the query String from the URI. + * + * @param name StringBuilder containing the URI. + * @return The query string, if any. null otherwise. + */ + public static String extractQueryString(final StringBuilder name) { + for (int pos = 0; pos < name.length(); pos++) { + if (name.charAt(pos) == '?') { + final String queryString = name.substring(pos + 1); + name.delete(pos, name.length()); + return queryString; + } + } + + return null; + } + + /** + * Extracts the scheme from a URI. + * + * @param uri The URI. + * @return The scheme name. Returns null if there is no scheme. + * @deprecated Use instead {@link #extractScheme}. Will be removed in 3.0. + */ + @Deprecated + public static String extractScheme(final String uri) { + return extractScheme(uri, null); + } + + /** + * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from the front of the URI. + * + * @param uri The URI. + * @param buffer Returns the remainder of the URI. + * @return The scheme name. Returns null if there is no scheme. + * @deprecated Use instead {@link #extractScheme}. Will be removed in 3.0. + */ + @Deprecated + public static String extractScheme(final String uri, final StringBuilder buffer) { + if (buffer != null) { + buffer.setLength(0); + buffer.append(uri); + } + + final int maxPos = uri.length(); + for (int pos = 0; pos < maxPos; pos++) { + final char ch = uri.charAt(pos); + + if (ch == ':') { + // Found the end of the scheme + final String scheme = uri.substring(0, pos); + if (scheme.length() <= 1 && SystemUtils.IS_OS_WINDOWS) { + // This is not a scheme, but a Windows drive letter + return null; + } + if (buffer != null) { + buffer.delete(0, pos + 1); + } + return scheme.intern(); + } + + if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z') { + // A scheme character + continue; + } + if (!(pos > 0 && (ch >= '0' && ch <= '9' || ch == '+' || ch == '-' || ch == '.'))) { + // Not a scheme character + break; + } + // A scheme character (these are not allowed as the first + // character of the scheme), but can be used as subsequent + // characters. + } + + // No scheme in URI + return null; + } + + /** + * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from the front of the URI. + *

+ * The scheme is extracted based on the currently supported schemes in the system. That is to say the schemes + * supported by the registered providers. + *

+ *

+ * This allows us to handle varying scheme's without making assumptions based on the ':' character. Specifically + * handle scheme extraction calls for URI parameters that are not actually uri's, but may be names with ':' in them. + *

+ * @param schemes The schemes to check. + * @param uri The potential URI. May also be a name. + * @return The scheme name. Returns null if there is no scheme. + * @since 2.3 + */ + public static String extractScheme(final String[] schemes, final String uri) { + return extractScheme(schemes, uri, null); + } + + /** + * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from the front of the URI. + *

+ * The scheme is extracted based on the given set of schemes. Normally, that is to say the schemes + * supported by the registered providers. + *

+ *

+ * This allows us to handle varying scheme's without making assumptions based on the ':' character. Specifically + * handle scheme extraction calls for URI parameters that are not actually URI's, but may be names with ':' in them. + *

+ * @param schemes The schemes to check. + * @param uri The potential URI. May also just be a name. + * @param buffer Returns the remainder of the URI. + * @return The scheme name. Returns null if there is no scheme. + * @since 2.3 + */ + public static String extractScheme(final String[] schemes, final String uri, final StringBuilder buffer) { + if (buffer != null) { + buffer.setLength(0); + buffer.append(uri); + } + for (final String scheme : schemes) { + if (uri.startsWith(scheme) && uri.length() > scheme.length() && uri.charAt(scheme.length()) == ':') { + if (buffer != null) { + buffer.delete(0, uri.indexOf(':') + 1); + } + return scheme; + } + } + return null; + } + + /** + * Normalises the separators in a name. + * + * @param name The StringBuilder containing the name + * @return true if the StringBuilder was modified. + */ + public static boolean fixSeparators(final StringBuilder name) { + boolean changed = false; + int maxlen = name.length(); + for (int i = 0; i < maxlen; i++) { + final char ch = name.charAt(i); + if (ch == TRANS_SEPARATOR) { + name.setCharAt(i, SEPARATOR_CHAR); + changed = true; + } + if (i < maxlen - 2 && name.charAt(i) == '%' && name.charAt(i + 1) == '2') { + if (name.charAt(i + 2) == 'f' || name.charAt(i + 2) == 'F') { + name.setCharAt(i, SEPARATOR_CHAR); + name.delete(i + 1, i + 3); + maxlen -= 2; + changed = true; + } else if (name.charAt(i + 2) == 'e' || name.charAt(i + 2) == 'E') { + name.setCharAt(i, '.'); + name.delete(i + 1, i + 3); + maxlen -= 2; + changed = true; + } + } + } + return changed; + } + + /** + * Normalises a path. Does the following: + *
    + *
  • Removes empty path elements. + *
  • Handles '.' and '..' elements. + *
  • Removes trailing separator. + *
+ * + * Its assumed that the separators are already fixed. + * + * @param path The path to normalize. + * @return The FileType. + * @throws FileSystemException if an error occurs. + * @see #fixSeparators + */ + public static FileType normalisePath(final StringBuilder path) throws FileSystemException { + FileType fileType = FileType.FOLDER; + if (path.length() == 0) { + return fileType; + } + + // '/' or '.' or '..' or anyPath/..' or 'anyPath/.' should always be a path + if (path.charAt(path.length() - 1) != '/' + && path.lastIndexOf("/..") != path.length() - 3 + && path.lastIndexOf("/.") != path.length() - 2 + && path.lastIndexOf("..") != 0 + && path.lastIndexOf(".") != 0 + ) { + fileType = FileType.FILE; + } + + // Adjust separators + // fixSeparators(path); + + // Determine the start of the first element + int startFirstElem = 0; + if (path.charAt(0) == SEPARATOR_CHAR) { + if (path.length() == 1) { + return fileType; + } + startFirstElem = 1; + } + + // Iterate over each element + int startElem = startFirstElem; + int maxlen = path.length(); + while (startElem < maxlen) { + // Find the end of the element + int endElem = startElem; + while (endElem < maxlen && path.charAt(endElem) != SEPARATOR_CHAR) { + endElem++; + } + + final int elemLen = endElem - startElem; + if (elemLen == 0) { + // An empty element - axe it + path.deleteCharAt(endElem); + maxlen = path.length(); + continue; + } + if (elemLen == 1 && path.charAt(startElem) == '.') { + // A '.' element - axe it + path.deleteCharAt(startElem); + maxlen = path.length(); + continue; + } + if (elemLen == 2 && path.charAt(startElem) == '.' && path.charAt(startElem + 1) == '.') { + // A '..' element - remove the previous element + if (startElem == startFirstElem) { + // Previous element is missing + throw new FileSystemException("vfs.provider/invalid-relative-path.error"); + } + + // Find start of previous element + int pos = startElem - 2; + while (pos >= 0 && path.charAt(pos) != SEPARATOR_CHAR) { + pos--; + } + startElem = pos + 1; + + path.delete(startElem, endElem + 1); + maxlen = path.length(); + continue; + } + + // A regular element + startElem = endElem + 1; + } + + // Remove trailing separator + if (!VFS.isUriStyle() && maxlen > 1 && path.charAt(maxlen - 1) == SEPARATOR_CHAR) { + path.deleteCharAt(maxlen - 1); + } + + return fileType; + } + + private UriParser() { + } +}