From 5086ee5c55dcb9cb0c21fc98da07001cf143f39a Mon Sep 17 00:00:00 2001 From: mat <4396128+yawks@users.noreply.github.com> Date: Mon, 8 Feb 2021 21:21:37 +0100 Subject: [PATCH] Improve parsing of enclosure and media namespace in feeds --- .../java/net/frju/flym/data/entities/Entry.kt | 38 +++++++++++++++++-- .../java/net/frju/flym/utils/HtmlUtils.kt | 5 +++ 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/app/src/main/java/net/frju/flym/data/entities/Entry.kt b/app/src/main/java/net/frju/flym/data/entities/Entry.kt index 30384c0a4..3ab6fa100 100644 --- a/app/src/main/java/net/frju/flym/data/entities/Entry.kt +++ b/app/src/main/java/net/frju/flym/data/entities/Entry.kt @@ -29,9 +29,10 @@ import androidx.room.PrimaryKey import com.rometools.rome.feed.synd.SyndEntry import kotlinx.android.parcel.Parcelize import net.fred.feedex.R +import net.frju.flym.utils.HtmlUtils import net.frju.flym.utils.sha1 -import java.util.Date -import java.util.UUID +import org.jdom2.Element +import java.util.* @Parcelize @@ -75,8 +76,39 @@ fun SyndEntry.toDbFormat(context: Context, feed: Feed): Entry { item.title = context.getString(R.string.entry_default_title) } item.description = contents.getOrNull(0)?.value ?: description?.value + + if (item.description == null) { + foreignMarkup?.forEach { + if (it.namespace?.prefix == "media" && it.name == "group") { + it.children.forEach { mc -> + if (mc.name == "description") item.description = mc.value + if (mc.name == "thumbnail") { + mc.attributes.forEach { tb -> + if (tb.name == "url") item.imageLink = tb.value + } + } + } + } + } + } + item.link = link - //TODO item.imageLink = null + + enclosures?.forEach { + if ((it.type != null && it.type.contains("image")) || HtmlUtils.isImageInUrl(it.url)) { + item.imageLink = it.url + } + } + if (item.imageLink == null) { + foreignMarkup?.forEach { + if (it.namespace?.prefix == "media" && (it.name == "thumbnail" || it.name == "content")) { + it.attributes.forEach { mc -> + if (mc.name == "url" && HtmlUtils.isImageInUrl(mc.value)) item.imageLink = mc.value + } + } + } + } + item.author = author val date = publishedDate ?: updatedDate diff --git a/app/src/main/java/net/frju/flym/utils/HtmlUtils.kt b/app/src/main/java/net/frju/flym/utils/HtmlUtils.kt index 37ea6e77e..a3e178017 100644 --- a/app/src/main/java/net/frju/flym/utils/HtmlUtils.kt +++ b/app/src/main/java/net/frju/flym/utils/HtmlUtils.kt @@ -53,6 +53,7 @@ object HtmlUtils { private val END_BR_PATTERN = Pattern.compile("(\\s*\\s*)*$", Pattern.CASE_INSENSITIVE) private val MULTIPLE_BR_PATTERN = Pattern.compile("(\\s*\\s*){3,}", Pattern.CASE_INSENSITIVE) private val EMPTY_LINK_PATTERN = Pattern.compile("]*>", Pattern.CASE_INSENSITIVE) + private val URL_WITH_IMAGE_PATTERN = Pattern.compile("(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*\\.(?:jpg|gif|png))(?:\\?([^#]*))?(?:#(.*))?", Pattern.CASE_INSENSITIVE) fun improveHtmlContent(content: String, baseUri: String): String { @Suppress("NAME_SHADOWING") @@ -160,4 +161,8 @@ object HtmlUtils { return false } + + fun isImageInUrl(url:String) : Boolean { + return URL_WITH_IMAGE_PATTERN.matcher(url).matches() + } }