Skip to content

Commit

Permalink
improve support for relative, absolute and urls with missing protocol
Browse files Browse the repository at this point in the history
  • Loading branch information
David-Development committed Jul 10, 2022
1 parent c43199c commit 262c98e
Show file tree
Hide file tree
Showing 8 changed files with 199 additions and 21 deletions.
5 changes: 3 additions & 2 deletions News-Android-App/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ android {

vectorDrawables.useSupportLibrary = true

versionCode 177
versionName "0.9.9.74"
versionCode 178
versionName "0.9.9.75"
}

buildFeatures {
Expand All @@ -37,6 +37,7 @@ android {

testOptions {
execution 'ANDROIDX_TEST_ORCHESTRATOR'
unitTests.returnDefaultValues = true
}

compileOptions {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import android.webkit.WebSettings;
import android.webkit.WebView;
import android.webkit.WebViewClient;
import android.widget.Toast;

import androidx.annotation.NonNull;
import androidx.annotation.Nullable;
Expand Down Expand Up @@ -79,6 +80,7 @@ public class NewsDetailFragment extends Fragment implements RssItemToHtmlTask.Li
private int section_number;
protected String html;
private String title = "";
private String baseUrl = null;
// private GestureDetector mGestureDetector;


Expand Down Expand Up @@ -408,7 +410,11 @@ public void loadURL(String url) {
.setStartAnimations(activity, R.anim.slide_in_right, R.anim.slide_out_left)
.setExitAnimations(activity, R.anim.slide_in_left, R.anim.slide_out_right)
.addDefaultShareMenuItem();
builder.build().launchUrl(activity, Uri.parse(url));
try {
builder.build().launchUrl(activity, Uri.parse(url));
} catch(Exception ex) {
Toast.makeText(NewsDetailFragment.this.getContext(), "Invalid URL: " + url, Toast.LENGTH_LONG).show();
}
break;
case 1: // External Browser
Intent browserIntent = new Intent(Intent.ACTION_VIEW, Uri.parse(url));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@

import com.nostra13.universalimageloader.core.ImageLoader;

import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
Expand All @@ -36,6 +38,8 @@ public class ImageHandler {
private static final String TAG = "[ImageHandler]";
private static final Pattern patternImg = Pattern.compile("<img[^>]*>");
private static final Pattern patternImgSrcLink = Pattern.compile("src=\"(.*?)\"");
private static final Pattern patternHref = Pattern.compile("<a[^>]*>");
private static final Pattern patternHrefLink = Pattern.compile("href=\"(.*?)\"");

public static List<String> getImageLinksFromText(String articleUrl, String text)
{
Expand All @@ -62,25 +66,35 @@ public static List<String> getImageLinksFromText(String articleUrl, String text)
return links;
}

public static String fixBrokenImageLinksInArticle(String articleUrl, String text)
{
Matcher matcher = patternImg.matcher(text);
public static String fixBrokenImageLinksInArticle(String articleUrl, String text) {
return fixBrokenLinkInArticle(articleUrl, text, patternImg, patternImgSrcLink, "src");
}

public static String fixBrokenHrefInArticle(String articleUrl, String text) {
return fixBrokenLinkInArticle(articleUrl, text, patternHref, patternHrefLink, "href");
}

public static String fixBrokenLinkInArticle(String articleUrl, String text, Pattern matcherElement, Pattern matcherLink, String htmlAttribut) {
Matcher matcher = matcherElement.matcher(text);
// Check all occurrences
while (matcher.find()) {
Matcher matcherSrcLink = patternImgSrcLink.matcher(matcher.group());
Matcher matcherSrcLink = matcherLink.matcher(matcher.group());
if(matcherSrcLink.find()) {
String link = matcherSrcLink.group(1);
String originalLink = link;
String originalArticleUrl = articleUrl;
if(link != null) {
if(link.startsWith("//")) { //Maybe the text contains image urls without http or https prefix.
// System.out.println("CASE_MISSING_PROTOCOL");
link = "https:" + link;
} else if (link.startsWith("/")) { // detected absolute url
// System.out.println("CASE_ABSOLUTE_URL");
try {
URI uri = new URI(articleUrl);
String domain = uri.getHost();
link = uri.getScheme() + "://" + domain + link;
} catch (URISyntaxException e) {
URL uri = new URL(articleUrl);
String protocol = uri.getProtocol();
String authority = uri.getAuthority();
link = String.format("%s://%s", protocol, authority) + link;
} catch (MalformedURLException e) {
e.printStackTrace();
Log.e(TAG, e.toString());
}
Expand All @@ -89,21 +103,42 @@ public static String fixBrokenImageLinksInArticle(String articleUrl, String text
// ./abc.jpeg or ./../abc.jpeg, ../abc.jpeg or ../../abc.jpeg
boolean linkNeedsHost = false;
if(link.startsWith("./")) {
//Log.d(TAG, "fix relative url (remove ./ in front)");
link = link.substring(2); // remove ./ from link
linkNeedsHost = true;
}

// if link is relative without anything else in front (e.g. pix/wow.svg)
if(!link.startsWith("http") && !link.startsWith(".")) {
linkNeedsHost = true;
if(!articleUrl.endsWith("/")) {
// remove last part of article url to get a relative url
articleUrl = sliceLastPathOfUrl(articleUrl);
if(!link.startsWith("http") && !link.startsWith(".") && !"about:blank".equals(articleUrl)) {
if(!link.contains("/")) {
// could be just a domain name or a reference to a file in the same directory (either way we should leave it as it is)
//System.out.println("CASE_RELATIVE_DOMAIN_OR_FILE");
} else {
String lastPartOfUrl = getFileName(link);

// the link ends with a filname (e.g. "test.jpg") - therefore we can assume that it is a relative url
if(lastPartOfUrl.contains(".")) {
if(!articleUrl.endsWith("/")) {
// the article contains a file in the end (doesn't end with "/") - therefore we need to remove the last part of the article URL
// System.out.println("CASE_RELATIVE_FILE_END");
// remove last part of article url to get a relative url
articleUrl = sliceLastPathOfUrl(articleUrl);
linkNeedsHost = true;
} else {
// article URL ends with a "/" so we can just append it
// System.out.println("CASE_RELATIVE_ADD_HOST");
linkNeedsHost = true;
}
} else {
// in case we have an url such as astralcodexten.substack.com/subscribe we assume that it is a path and we should not modify it
// System.out.println("CASE_RELATIVE_DOMAIN_SUBPATH");
}
}
}

// in case the article url is of type articles/matrix-vs-xmpp.html we need to remove the file plus the first part of the url
if(link.startsWith("../") && !articleUrl.endsWith("/")) {
// System.out.println("CASE_RELATIVE_PARENT");
linkNeedsHost = true;
articleUrl = sliceLastPathOfUrl(articleUrl);
articleUrl = sliceLastPathOfUrl(articleUrl);
Expand All @@ -112,24 +147,32 @@ public static String fixBrokenImageLinksInArticle(String articleUrl, String text

// if the article urls ends with an / we can just remove it piece by piece
while(link.startsWith("../")) {
// System.out.println("CASE_RELATIVE_PARENT");
linkNeedsHost = true;
articleUrl = sliceLastPathOfUrl(articleUrl);
link = link.substring(3); // remove ../ from link
}

if(linkNeedsHost) {
link = articleUrl + "/" + link;
// concat article url + link (and make sure that we have only one /)
if(articleUrl.endsWith("/")) {
link = articleUrl + link;
} else {
link = articleUrl + "/" + link;
}
}
}
}

if(!originalLink.equals(link)) {
Log.d(TAG, "Fixed link from: " + originalArticleUrl + " and " + originalLink + " -> " + link);
String l = "Fixed link in article: " + originalArticleUrl + ": " + originalLink + " -> " + link;
// System.out.println(l);
Log.d(TAG, l);
// text = text.replaceAll(originalLink, link); // this causes OutOfMemoryExceptions (https://github.com/nextcloud/news-android/issues/1055)

Pattern URL_PATTERN = Pattern.compile(originalLink);
Pattern URL_PATTERN = Pattern.compile(String.format("%s=\"%s\"", htmlAttribut, originalLink));
Matcher urlMatcher = URL_PATTERN.matcher(text);
return urlMatcher.replaceAll(link);
text = urlMatcher.replaceAll(String.format("%s=\"%s\"", htmlAttribut, link));

}
}
Expand All @@ -149,6 +192,16 @@ private static String sliceLastPathOfUrl(String url) {
}
}

private static String getFileName(String url) {
int idx = url.lastIndexOf("/");
int countOfSlashes = url.split("/").length - 1;
if(idx > 0) {
return url.substring(idx);
} else {
return url;
}
}

public static void clearCache()
{
if(ImageLoader.getInstance().isInited()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,9 @@ static RssItem parseItem(JsonObject e) {
try {
// try fixing relative image links
content = ImageHandler.fixBrokenImageLinksInArticle(url, content);

// try fixing relative href links
content = ImageHandler.fixBrokenHrefInArticle(url, content);
} catch (Exception ex) {
ex.printStackTrace();
Log.e(TAG, "Error while fixing broken image links in article" + ex);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
package de.luhmer.owncloudnewsreader.junit_tests;

import static org.junit.Assert.assertEquals;
import org.junit.Test;

import de.luhmer.owncloudnewsreader.helper.ImageHandler;

public class ImageHandlerTest {

@Test
public void testHref_CASE_MISSING_PROTOCOL() {
String articleUrl = "https://www.reddit.com/";
String content =
"<p><a rel=\"noreferrer\" href=\"//abc.de\">Test</a></p>" +
"<p><a rel=\"noreferrer\" href=\"//abcd.de\">Test</a></p>";
String expectedResult =
"<p><a rel=\"noreferrer\" href=\"https://abc.de\">Test</a></p>" +
"<p><a rel=\"noreferrer\" href=\"https://abcd.de\">Test</a></p>";
String result = ImageHandler.fixBrokenHrefInArticle(articleUrl, content);
assertEquals(expectedResult, result);
}

@Test
public void testHref_CASE_ABSOLUTE_URL() {
String articleUrl = "https://www.reddit.com/r/MsMarvelShow/comments/vp6qrp/continuing_the_discussion/";
String content =
"<p><a rel=\"noreferrer\" href=\"/r/LokiTV\">r/LokiTV</a></p>" +
"<p><a rel=\"noreferrer\" href=\"/r/shehulk\">r/shehulk</a></p>";
String expectedResult =
"<p><a rel=\"noreferrer\" href=\"https://www.reddit.com/r/LokiTV\">r/LokiTV</a></p>" +
"<p><a rel=\"noreferrer\" href=\"https://www.reddit.com/r/shehulk\">r/shehulk</a></p>";
String result = ImageHandler.fixBrokenHrefInArticle(articleUrl, content);
assertEquals(expectedResult, result);
}

@Test
public void testHref_CASE_RELATIVE_FILE_END() {
String articleUrl = "https://www.reddit.com/subdir";
String content =
"<p><a rel=\"noreferrer\" href=\"articles/matrix-vs-xmpp.html\">Test</a></p>" +
"<p><a rel=\"noreferrer\" href=\"articles/matrix-vs-xmpp2.html\">Test</a></p>";
String expectedResult =
"<p><a rel=\"noreferrer\" href=\"https://www.reddit.com/articles/matrix-vs-xmpp.html\">Test</a></p>" +
"<p><a rel=\"noreferrer\" href=\"https://www.reddit.com/articles/matrix-vs-xmpp2.html\">Test</a></p>";
String result = ImageHandler.fixBrokenHrefInArticle(articleUrl, content);
assertEquals(expectedResult, result);
}

@Test
public void testHref_CASE_RELATIVE_PARENT() {
String articleUrl = "https://www.reddit.com/subdir";
String content =
"<p><a rel=\"noreferrer\" href=\"../articles/matrix-vs-xmpp.html\">Test</a></p>"+
"<p><a rel=\"noreferrer\" href=\"../articles/matrix-vs-xmpp.html2\">Test</a></p>";
String expectedResult =
"<p><a rel=\"noreferrer\" href=\"https://www.reddit.com/articles/matrix-vs-xmpp.html\">Test</a></p>" +
"<p><a rel=\"noreferrer\" href=\"https://www.reddit.com/articles/matrix-vs-xmpp.html2\">Test</a></p>";
String result = ImageHandler.fixBrokenHrefInArticle(articleUrl, content);
assertEquals(expectedResult, result);
}

@Test
public void testHref_CASE_RELATIVE_ADD_HOST() {
String articleUrl = "https://www.reddit.com/subdir/";
String content =
"<p><a rel=\"noreferrer\" href=\"subsubdir/articles.html\">Test</a></p>" +
"<p><a rel=\"noreferrer\" href=\"subsubdir/articles2.html\">Test</a></p>";
String expectedResult =
"<p><a rel=\"noreferrer\" href=\"https://www.reddit.com/subdir/subsubdir/articles.html\">Test</a></p>" +
"<p><a rel=\"noreferrer\" href=\"https://www.reddit.com/subdir/subsubdir/articles2.html\">Test</a></p>";
String result = ImageHandler.fixBrokenHrefInArticle(articleUrl, content);
assertEquals(expectedResult, result);
}

@Test
public void testHref_CASE_RELATIVE_DOMAIN_OR_FILE() {
String articleUrl = "https://sscpodcast.libsyn.com/eight-hundred-slightly-poisoned-word-games";
String content =
"<p><a rel=\"noreferrer\" href=\"astralcodexten.substack.com\">astralcodexten.substack.com</a></p>" +
"<p><a rel=\"noreferrer\" href=\"astralcodexten.substack2.com\">astralcodexten.substack2.com</a></p>";
String expectedResult =
"<p><a rel=\"noreferrer\" href=\"astralcodexten.substack.com\">astralcodexten.substack.com</a></p>" +
"<p><a rel=\"noreferrer\" href=\"astralcodexten.substack2.com\">astralcodexten.substack2.com</a></p>";
String result = ImageHandler.fixBrokenHrefInArticle(articleUrl, content);
assertEquals(expectedResult, result);
}

@Test
public void testHref_CASE_RELATIVE_DOMAIN_SUBPATH() {
String articleUrl = "https://sscpodcast.libsyn.com/model-city-monday";
String content =
"<p><a rel=\"noreferrer\" href=\"astralcodexten.substack.com/subscribe\">astralcodexten.substack.com/subscribe</a></p>" +
"<p><a rel=\"noreferrer\" href=\"astralcodexten.substack2.com/subscribe\">astralcodexten.substack2.com/subscribe</a></p>";
String expectedResult =
"<p><a rel=\"noreferrer\" href=\"astralcodexten.substack.com/subscribe\">astralcodexten.substack.com/subscribe</a></p>"+
"<p><a rel=\"noreferrer\" href=\"astralcodexten.substack2.com/subscribe\">astralcodexten.substack2.com/subscribe</a></p>";
String result = ImageHandler.fixBrokenHrefInArticle(articleUrl, content);
assertEquals(expectedResult, result);
}

@Test
public void testImg_CASE_MISSING_PROTOCOL() {
String articleUrl = "http://blog.cleancoder.com/uncle-bob/2021/03/06/ifElseSwitch.html";
String content =
"<p><img src=\"//blog.cleancoder.com/assets/ifElseSwitch.jpg\" alt=\"ifElseSwitch.jpg\" /></p>" +
"<p><img src=\"//blog.cleancoder.com/assets/ifElseSwitchPolymorphism.jpg\" alt=\"ifElseSwitchPolymorphism.jpg\" /></p>";
String expectedResult =
"<p><img src=\"https://blog.cleancoder.com/assets/ifElseSwitch.jpg\" alt=\"ifElseSwitch.jpg\" /></p><p>" +
"<img src=\"https://blog.cleancoder.com/assets/ifElseSwitchPolymorphism.jpg\" alt=\"ifElseSwitchPolymorphism.jpg\" /></p>";
String result = ImageHandler.fixBrokenImageLinksInArticle(articleUrl, content);
assertEquals(expectedResult, result);
}
}
2 changes: 1 addition & 1 deletion docker-nextcloud-test-instances/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ version: '3'

services:
app:
image: nextcloud
image: nextcloud:24.0.2-apache
volumes:
- ./nextcloud-data/:/var/www/html
#restart: no
Expand Down
1 change: 1 addition & 0 deletions fastlane/metadata/android/de-DE/changelogs/178.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixed crash when relative links in articles are clicked
1 change: 1 addition & 0 deletions fastlane/metadata/android/en-US/changelogs/178.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixed crash when relative links in articles are clicked

0 comments on commit 262c98e

Please sign in to comment.