From 5086ee5c55dcb9cb0c21fc98da07001cf143f39a Mon Sep 17 00:00:00 2001
From: mat <4396128+yawks@users.noreply.github.com>
Date: Mon, 8 Feb 2021 21:21:37 +0100
Subject: [PATCH] Improve parsing of enclosure and media namespace in feeds
---
.../java/net/frju/flym/data/entities/Entry.kt | 38 +++++++++++++++++--
.../java/net/frju/flym/utils/HtmlUtils.kt | 5 +++
2 files changed, 40 insertions(+), 3 deletions(-)
diff --git a/app/src/main/java/net/frju/flym/data/entities/Entry.kt b/app/src/main/java/net/frju/flym/data/entities/Entry.kt
index 30384c0a4..3ab6fa100 100644
--- a/app/src/main/java/net/frju/flym/data/entities/Entry.kt
+++ b/app/src/main/java/net/frju/flym/data/entities/Entry.kt
@@ -29,9 +29,10 @@ import androidx.room.PrimaryKey
import com.rometools.rome.feed.synd.SyndEntry
import kotlinx.android.parcel.Parcelize
import net.fred.feedex.R
+import net.frju.flym.utils.HtmlUtils
import net.frju.flym.utils.sha1
-import java.util.Date
-import java.util.UUID
+import org.jdom2.Element
+import java.util.*
@Parcelize
@@ -75,8 +76,39 @@ fun SyndEntry.toDbFormat(context: Context, feed: Feed): Entry {
item.title = context.getString(R.string.entry_default_title)
}
item.description = contents.getOrNull(0)?.value ?: description?.value
+
+ if (item.description == null) {
+ foreignMarkup?.forEach {
+ if (it.namespace?.prefix == "media" && it.name == "group") {
+ it.children.forEach { mc ->
+ if (mc.name == "description") item.description = mc.value
+ if (mc.name == "thumbnail") {
+ mc.attributes.forEach { tb ->
+ if (tb.name == "url") item.imageLink = tb.value
+ }
+ }
+ }
+ }
+ }
+ }
+
item.link = link
- //TODO item.imageLink = null
+
+ enclosures?.forEach {
+ if ((it.type != null && it.type.contains("image")) || HtmlUtils.isImageInUrl(it.url)) {
+ item.imageLink = it.url
+ }
+ }
+ if (item.imageLink == null) {
+ foreignMarkup?.forEach {
+ if (it.namespace?.prefix == "media" && (it.name == "thumbnail" || it.name == "content")) {
+ it.attributes.forEach { mc ->
+ if (mc.name == "url" && HtmlUtils.isImageInUrl(mc.value)) item.imageLink = mc.value
+ }
+ }
+ }
+ }
+
item.author = author
val date = publishedDate ?: updatedDate
diff --git a/app/src/main/java/net/frju/flym/utils/HtmlUtils.kt b/app/src/main/java/net/frju/flym/utils/HtmlUtils.kt
index 37ea6e77e..a3e178017 100644
--- a/app/src/main/java/net/frju/flym/utils/HtmlUtils.kt
+++ b/app/src/main/java/net/frju/flym/utils/HtmlUtils.kt
@@ -53,6 +53,7 @@ object HtmlUtils {
private val END_BR_PATTERN = Pattern.compile("(\\s*
\\s*)*$", Pattern.CASE_INSENSITIVE)
private val MULTIPLE_BR_PATTERN = Pattern.compile("(\\s*
\\s*){3,}", Pattern.CASE_INSENSITIVE)
private val EMPTY_LINK_PATTERN = Pattern.compile("]*>", Pattern.CASE_INSENSITIVE)
+ private val URL_WITH_IMAGE_PATTERN = Pattern.compile("(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*\\.(?:jpg|gif|png))(?:\\?([^#]*))?(?:#(.*))?", Pattern.CASE_INSENSITIVE)
fun improveHtmlContent(content: String, baseUri: String): String {
@Suppress("NAME_SHADOWING")
@@ -160,4 +161,8 @@ object HtmlUtils {
return false
}
+
+ fun isImageInUrl(url:String) : Boolean {
+ return URL_WITH_IMAGE_PATTERN.matcher(url).matches()
+ }
}