From bd395d0d94306d35601fa4382e9544251439aa73 Mon Sep 17 00:00:00 2001 From: Josiah Campbell <9521010+jocmp@users.noreply.github.com> Date: Thu, 2 Jan 2025 21:43:58 -0600 Subject: [PATCH] Improve FreshRSS image preview parsing (#669) * Unescape & in image URLs * Parse FreshRSS content for first image --- .../capyreader/app/ui/articles/ArticleRow.kt | 1 + .../capy/accounts/reader/ItemParsedImageUrl.kt | 17 +++++++++++++++++ .../accounts/reader/ReaderAccountDelegate.kt | 2 +- ...LCharactersExt.kt => StringCharactersExt.kt} | 8 ++++++++ 4 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 capy/src/main/java/com/jocmp/capy/accounts/reader/ItemParsedImageUrl.kt rename capy/src/main/java/com/jocmp/capy/common/{StringXMLCharactersExt.kt => StringCharactersExt.kt} (76%) diff --git a/app/src/main/java/com/capyreader/app/ui/articles/ArticleRow.kt b/app/src/main/java/com/capyreader/app/ui/articles/ArticleRow.kt index 92583650..2c2e7b9b 100644 --- a/app/src/main/java/com/capyreader/app/ui/articles/ArticleRow.kt +++ b/app/src/main/java/com/capyreader/app/ui/articles/ArticleRow.kt @@ -58,6 +58,7 @@ import com.capyreader.app.ui.fixtures.ArticleSample import com.capyreader.app.ui.theme.CapyTheme import com.jocmp.capy.Article import com.jocmp.capy.MarkRead +import com.jocmp.capy.common.escapingSpecialXMLCharacters import java.net.URL import java.time.LocalDateTime import java.time.ZoneOffset diff --git a/capy/src/main/java/com/jocmp/capy/accounts/reader/ItemParsedImageUrl.kt b/capy/src/main/java/com/jocmp/capy/accounts/reader/ItemParsedImageUrl.kt new file mode 100644 index 00000000..d4af768b --- /dev/null +++ b/capy/src/main/java/com/jocmp/capy/accounts/reader/ItemParsedImageUrl.kt @@ -0,0 +1,17 @@ +package com.jocmp.capy.accounts.reader + +import com.jocmp.capy.common.unescapingHTMLCharacters +import com.jocmp.readerclient.Item +import org.jsoup.Jsoup + +internal fun parsedImageURL(item: Item): String? { + val imageHref = item.image?.href + + if (imageHref != null) { + return imageHref.unescapingHTMLCharacters + } + + val content = item.summary.content + + return Jsoup.parse(content).selectFirst("img")?.attr("src") +} diff --git a/capy/src/main/java/com/jocmp/capy/accounts/reader/ReaderAccountDelegate.kt b/capy/src/main/java/com/jocmp/capy/accounts/reader/ReaderAccountDelegate.kt index eef5142f..858d5fad 100644 --- a/capy/src/main/java/com/jocmp/capy/accounts/reader/ReaderAccountDelegate.kt +++ b/capy/src/main/java/com/jocmp/capy/accounts/reader/ReaderAccountDelegate.kt @@ -348,7 +348,7 @@ internal class ReaderAccountDelegate( extracted_content_url = null, summary = Jsoup.parse(item.summary.content).text(), url = item.canonical.firstOrNull()?.href, - image_url = item.image?.href, + image_url = parsedImageURL(item), published_at = item.published ) diff --git a/capy/src/main/java/com/jocmp/capy/common/StringXMLCharactersExt.kt b/capy/src/main/java/com/jocmp/capy/common/StringCharactersExt.kt similarity index 76% rename from capy/src/main/java/com/jocmp/capy/common/StringXMLCharactersExt.kt rename to capy/src/main/java/com/jocmp/capy/common/StringCharactersExt.kt index dfe0af02..898b3103 100644 --- a/capy/src/main/java/com/jocmp/capy/common/StringXMLCharactersExt.kt +++ b/capy/src/main/java/com/jocmp/capy/common/StringCharactersExt.kt @@ -21,3 +21,11 @@ val String.escapingSpecialXMLCharacters: String return escaped } + +val String.unescapingHTMLCharacters: String + get() { + return this + .replace("&", "&") + .replace("<", "<") + .replace(">", ">") + }