From 9addb12f09109e4244613686aef84766a2ebaa34 Mon Sep 17 00:00:00 2001 From: Josiah Campbell <9521010+jocmp@users.noreply.github.com> Date: Thu, 2 Jan 2025 20:43:59 -0600 Subject: [PATCH 1/2] Unescape & in image URLs --- .../java/com/capyreader/app/ui/articles/ArticleRow.kt | 1 + .../jocmp/capy/accounts/reader/ReaderAccountDelegate.kt | 3 ++- .../{StringXMLCharactersExt.kt => StringCharactersExt.kt} | 8 ++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) rename capy/src/main/java/com/jocmp/capy/common/{StringXMLCharactersExt.kt => StringCharactersExt.kt} (76%) diff --git a/app/src/main/java/com/capyreader/app/ui/articles/ArticleRow.kt b/app/src/main/java/com/capyreader/app/ui/articles/ArticleRow.kt index 92583650..2c2e7b9b 100644 --- a/app/src/main/java/com/capyreader/app/ui/articles/ArticleRow.kt +++ b/app/src/main/java/com/capyreader/app/ui/articles/ArticleRow.kt @@ -58,6 +58,7 @@ import com.capyreader.app.ui.fixtures.ArticleSample import com.capyreader.app.ui.theme.CapyTheme import com.jocmp.capy.Article import com.jocmp.capy.MarkRead +import com.jocmp.capy.common.escapingSpecialXMLCharacters import java.net.URL import java.time.LocalDateTime import java.time.ZoneOffset diff --git a/capy/src/main/java/com/jocmp/capy/accounts/reader/ReaderAccountDelegate.kt b/capy/src/main/java/com/jocmp/capy/accounts/reader/ReaderAccountDelegate.kt index eef5142f..d253bd6c 100644 --- a/capy/src/main/java/com/jocmp/capy/accounts/reader/ReaderAccountDelegate.kt +++ b/capy/src/main/java/com/jocmp/capy/accounts/reader/ReaderAccountDelegate.kt @@ -8,6 +8,7 @@ import com.jocmp.capy.accounts.feedbin.FeedbinAccountDelegate.Companion.MAX_CREA import com.jocmp.capy.accounts.withErrorHandling import com.jocmp.capy.common.TimeHelpers import com.jocmp.capy.common.transactionWithErrorHandling +import com.jocmp.capy.common.unescapingHTMLCharacters import com.jocmp.capy.common.withResult import com.jocmp.capy.db.Database import com.jocmp.capy.logging.CapyLog @@ -348,7 +349,7 @@ internal class ReaderAccountDelegate( extracted_content_url = null, summary = Jsoup.parse(item.summary.content).text(), url = item.canonical.firstOrNull()?.href, - image_url = item.image?.href, + image_url = item.image?.href?.unescapingHTMLCharacters, published_at = item.published ) diff --git a/capy/src/main/java/com/jocmp/capy/common/StringXMLCharactersExt.kt b/capy/src/main/java/com/jocmp/capy/common/StringCharactersExt.kt similarity index 76% rename from capy/src/main/java/com/jocmp/capy/common/StringXMLCharactersExt.kt rename to capy/src/main/java/com/jocmp/capy/common/StringCharactersExt.kt index dfe0af02..898b3103 100644 --- a/capy/src/main/java/com/jocmp/capy/common/StringXMLCharactersExt.kt +++ b/capy/src/main/java/com/jocmp/capy/common/StringCharactersExt.kt @@ -21,3 +21,11 @@ val String.escapingSpecialXMLCharacters: String return escaped } + +val String.unescapingHTMLCharacters: String + get() { + return this + .replace("&", "&") + .replace("<", "<") + .replace(">", ">") + } From 5ef20cc4bb385b4523a2cadadfd1918ade41724f Mon Sep 17 00:00:00 2001 From: Josiah Campbell <9521010+jocmp@users.noreply.github.com> Date: Thu, 2 Jan 2025 21:29:01 -0600 Subject: [PATCH 2/2] Parse FreshRSS content for first image --- .../capy/accounts/reader/ItemParsedImageUrl.kt | 17 +++++++++++++++++ .../accounts/reader/ReaderAccountDelegate.kt | 3 +-- 2 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 capy/src/main/java/com/jocmp/capy/accounts/reader/ItemParsedImageUrl.kt diff --git a/capy/src/main/java/com/jocmp/capy/accounts/reader/ItemParsedImageUrl.kt b/capy/src/main/java/com/jocmp/capy/accounts/reader/ItemParsedImageUrl.kt new file mode 100644 index 00000000..d4af768b --- /dev/null +++ b/capy/src/main/java/com/jocmp/capy/accounts/reader/ItemParsedImageUrl.kt @@ -0,0 +1,17 @@ +package com.jocmp.capy.accounts.reader + +import com.jocmp.capy.common.unescapingHTMLCharacters +import com.jocmp.readerclient.Item +import org.jsoup.Jsoup + +internal fun parsedImageURL(item: Item): String? { + val imageHref = item.image?.href + + if (imageHref != null) { + return imageHref.unescapingHTMLCharacters + } + + val content = item.summary.content + + return Jsoup.parse(content).selectFirst("img")?.attr("src") +} diff --git a/capy/src/main/java/com/jocmp/capy/accounts/reader/ReaderAccountDelegate.kt b/capy/src/main/java/com/jocmp/capy/accounts/reader/ReaderAccountDelegate.kt index d253bd6c..858d5fad 100644 --- a/capy/src/main/java/com/jocmp/capy/accounts/reader/ReaderAccountDelegate.kt +++ b/capy/src/main/java/com/jocmp/capy/accounts/reader/ReaderAccountDelegate.kt @@ -8,7 +8,6 @@ import com.jocmp.capy.accounts.feedbin.FeedbinAccountDelegate.Companion.MAX_CREA import com.jocmp.capy.accounts.withErrorHandling import com.jocmp.capy.common.TimeHelpers import com.jocmp.capy.common.transactionWithErrorHandling -import com.jocmp.capy.common.unescapingHTMLCharacters import com.jocmp.capy.common.withResult import com.jocmp.capy.db.Database import com.jocmp.capy.logging.CapyLog @@ -349,7 +348,7 @@ internal class ReaderAccountDelegate( extracted_content_url = null, summary = Jsoup.parse(item.summary.content).text(), url = item.canonical.firstOrNull()?.href, - image_url = item.image?.href?.unescapingHTMLCharacters, + image_url = parsedImageURL(item), published_at = item.published )