Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(rss): move RSS reader to its own folder #188

Merged
merged 16 commits into from
Aug 5, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions qtribu/gui/dlg_contents.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
from qtribu.constants import ICON_ARTICLE, ICON_GEORDP
from qtribu.gui.form_article import ArticleForm
from qtribu.gui.form_rdp_news import RdpNewsForm
from qtribu.logic import RssItem
from qtribu.logic.json_feed import JsonFeedClient
from qtribu.logic.news_feed.json_feed import JsonFeedClient
from qtribu.logic.news_feed.mdl_rss_item import RssItem
from qtribu.toolbelt import PlgLogger, PlgOptionsManager
from qtribu.toolbelt.commons import open_url_in_browser, open_url_in_webviewer

Expand Down Expand Up @@ -257,7 +257,7 @@ def _build_tree_widget_item_from_content(content: RssItem) -> QTreeWidgetItem:
[
content.date_pub.strftime("%d %B"),
content.title,
", ".join(content.author),
", ".join(content.authors),
tags,
content.url,
]
Expand Down
2 changes: 0 additions & 2 deletions qtribu/logic/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
#! python3 # noqa: E265
from .custom_datatypes import RssItem # noqa: F401
from .rss_reader import RssMiniReader # noqa: F401
from .splash_changer import SplashChanger # noqa: F401
22 changes: 0 additions & 22 deletions qtribu/logic/custom_datatypes.py

This file was deleted.

Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

# plugin
from qtribu.__about__ import __title__, __version__
from qtribu.logic import RssItem
from qtribu.logic.news_feed.mdl_rss_item import RssItem
from qtribu.toolbelt import NetworkRequestsManager, PlgLogger, PlgOptionsManager

# -- GLOBALS --
Expand All @@ -30,7 +30,7 @@
FETCH_UPDATE_INTERVAL_SECONDS = 7200


## -- CLASSES --
# -- CLASSES --


class JsonFeedClient:
Expand Down Expand Up @@ -89,7 +89,7 @@ def authors(self) -> list[str]:
"""
authors = []
for content in self.fetch():
for ca in content.author:
for ca in content.authors:
authors.append(" ".join([a.title() for a in ca.split(" ")]))
return sorted(set(authors))

Expand All @@ -115,7 +115,7 @@ def _map_item(item: dict[str, Any]) -> RssItem:
"""
return RssItem(
abstract=item.get("content_html"),
author=[i["name"] for i in item.get("authors")],
authors=[i["name"] for i in item.get("authors")],
categories=item.get("tags", []),
date_pub=datetime.fromisoformat(item.get("date_published")),
guid=item.get("id"),
Expand All @@ -142,7 +142,7 @@ def _matches(query: str, item: RssItem) -> bool:
return all([JsonFeedClient._matches(w, item) for w in words])
return (
query.upper() in item.abstract.upper()
or query.upper() in ",".join(item.author).upper()
or query.upper() in ",".join(item.authors).upper()
or query.upper() in ",".join(item.categories).upper()
or query.upper() in item.date_pub.isoformat().upper()
or query.upper() in item.image_url.upper()
Expand Down
21 changes: 21 additions & 0 deletions qtribu/logic/news_feed/mdl_rss_item.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#! python3 # noqa: E265

# Standard library
from dataclasses import dataclass
from typing import Optional


@dataclass
class RssItem:
"""Dataclass describing a RSS channel item."""

abstract: Optional[str] = None
authors: Optional[list[Optional[str]]] = None
categories: Optional[list[Optional[str]]] = None
date_pub: Optional[tuple[int, ...]] = None
guid: Optional[str] = None
image_length: Optional[str] = None
image_type: Optional[str] = None
image_url: Optional[str] = None
title: Optional[str] = None
url: Optional[str] = None
181 changes: 138 additions & 43 deletions qtribu/logic/rss_reader.py → qtribu/logic/news_feed/rss_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,21 @@
import logging
import xml.etree.ElementTree as ET
from email.utils import parsedate
from typing import List, Optional
from pathlib import Path
from typing import Callable, Optional

# QGIS
from qgis.core import Qgis, QgsSettings
from qgis.PyQt.QtCore import QCoreApplication
from qgis.PyQt.QtGui import QIcon
from qgis.PyQt.QtWidgets import QAction

# project
from qtribu.__about__ import __title__, __version__
from qtribu.logic.custom_datatypes import RssItem
from qtribu.__about__ import DIR_PLUGIN_ROOT, __title__, __version__
from qtribu.logic.news_feed.mdl_rss_item import RssItem
from qtribu.toolbelt import PlgLogger, PlgOptionsManager
from qtribu.toolbelt.file_stats import is_file_older_than
from qtribu.toolbelt.network_manager import NetworkRequestsManager

# ############################################################################
# ########## Globals ###############
Expand All @@ -38,71 +43,158 @@
class RssMiniReader:
"""Minimalist RSS feed parser."""

FEED_ITEMS: Optional[tuple] = None
FEED_ITEMS: Optional[list[RssItem]] = None
HEADERS: dict = {
b"Accept": b"application/xml",
b"User-Agent": bytes(f"{__title__}/{__version__}", "utf8"),
}
PATTERN_INCLUDE: list = ["articles/", "rdp/"]

def __init__(self):
def __init__(
self,
action_read: Optional[QAction] = None,
on_read_button: Optional[Callable] = None,
):
"""Class initialization."""
self.log = PlgLogger().log
self.ntwk_manager = NetworkRequestsManager()
self.plg_settings = PlgOptionsManager.get_plg_settings()
self.local_feed_filepath: Path = self.plg_settings.local_app_folder.joinpath(
"rss.xml"
)
self.action_read = action_read
self.on_read_button = on_read_button

def process(self):
"""Download, parse and read RSS feed than store items as attribute."""
# download remote RSS feed to cache folder
self.download_feed()
if not self.local_feed_filepath.exists():
self.log(
message=self.tr(
"The RSS feed is not available locally. "
"Disabling RSS reader related features."
Guts marked this conversation as resolved.
Show resolved Hide resolved
),
log_level=1,
)
return

# parse the local RSS feed
self.read_feed()

# check if a new item has been published since last check
if not self.has_new_content:
self.log(message="No new item found in RSS feed.", log_level=4)
return
# notify
if isinstance(self.latest_item, RssItem):
latest_item = self.latest_item
self.log(
message="{} {}".format(
self.tr("New content published:"),
latest_item.title,
),
log_level=3,
Guts marked this conversation as resolved.
Show resolved Hide resolved
push=PlgOptionsManager().get_plg_settings().notify_push_info,
duration=PlgOptionsManager().get_plg_settings().notify_push_duration,
button=True,
button_label=self.tr("Read it!"),
button_connect=self.on_read_button,
)

def read_feed(self, in_xml: str) -> tuple[RssItem]:
"""Parse the feed XML as string and store items into an ordered tuple of tuples.
# change action icon
if isinstance(self.action_read, QAction):
self.action_read.setIcon(
QIcon(
str(
DIR_PLUGIN_ROOT / "resources/images/logo_orange_no_text.svg"
)
),
)

:param in_xml: XML as string. Must be RSS compliant.
:type in_xml: str
def download_feed(self) -> bool:
"""Download RSS feed locally if it's older than latest 24 hours.

:return: RSS items loaded as namedtuples
:rtype: Tuple[RssItem]
:return: True is a new file has been downloaded.
:rtype: bool
"""
feed_items = []
tree = ET.ElementTree(ET.fromstring(in_xml))
root = tree.getroot()
items = root.findall("channel/item")
if is_file_older_than(
local_file_path=self.local_feed_filepath,
expiration_rotating_hours=self.plg_settings.rss_poll_frequency_hours,
):
self.ntwk_manager.download_file_to(
remote_url=self.plg_settings.rss_source,
local_path=self.local_feed_filepath,
)
self.log(
message=f"The remote RSS feed ({self.plg_settings.rss_source}) has been "
f"downloaded to {self.local_feed_filepath}",
log_level=0,
Guts marked this conversation as resolved.
Show resolved Hide resolved
)
return True
self.log(
message=f"A fresh local RSS feed already exists: {self.local_feed_filepath}",
log_level=0,
)
return False

def read_feed(self) -> list[RssItem]:
"""Parse the feed XML as string and store items into an ordered list of RSS items.

:return: list of RSS items dataclasses
:rtype: list[RssItem]
"""
feed_items: list[RssItem] = []
tree = ET.parse(self.local_feed_filepath)
items = tree.findall("channel/item")
for item in items:
try:
# filter on included pattern
if not any([i in item.find("link").text for i in self.PATTERN_INCLUDE]):
logging.debug(
"Item ignored because unmatches the include pattern: {}".format(
item.find("title")
)
self.log(
message="Item ignored because unmatches the include pattern: {}".format(
item.find("title").text
),
log_level=4,
)
continue

# add items to the feed
feed_items.append(
RssItem(
abstract=item.find("description").text,
author=[author.text for author in item.findall("author")]
or None,
categories=[
category.text for category in item.findall("category")
]
or None,
date_pub=parsedate(item.find("pubDate").text),
guid=item.find("guid").text,
image_length=item.find("enclosure").attrib.get("length"),
image_type=item.find("enclosure").attrib.get("type"),
image_url=item.find("enclosure").attrib.get("url"),
title=item.find("title").text,
url=item.find("link").text,
)
# feed item object
feed_item_obj = RssItem(
abstract=item.find("description").text,
authors=[author.text for author in item.findall("author")] or None,
categories=[category.text for category in item.findall("category")]
or None,
date_pub=parsedate(item.find("pubDate").text),
guid=item.find("guid").text,
image_length=item.find("enclosure").attrib.get("length"),
image_type=item.find("enclosure").attrib.get("type"),
image_url=item.find("enclosure").attrib.get("url"),
title=item.find("title").text,
url=item.find("link").text,
)
if item.find("enclosure") is not None:
item_enclosure = item.find("enclosure")
feed_item_obj.image_length = item_enclosure.attrib.get("length")
feed_item_obj.image_type = item_enclosure.attrib.get("type")
feed_item_obj.image_url = item_enclosure.attrib.get("url")

# add items to the feed
feed_items.append(feed_item_obj)
except Exception as err:
err_msg = f"Feed item triggers an error. Trace: {err}"
logger.error(err_msg)
item_idx: Optional[int] = None
if hasattr(items, "index"):
item_idx = items.index(item)

err_msg = f"Feed item {item_idx} triggers an error. Trace: {err}"
self.log(message=err_msg, log_level=2)

# store feed items as attribute and return it
self.FEED_ITEMS = feed_items
return feed_items

@property
def latest_item(self) -> RssItem:
def latest_item(self) -> Optional[RssItem]:
"""Returns the latest feed item, based on index 0.

:return: latest feed item.
Expand All @@ -117,7 +209,7 @@ def latest_item(self) -> RssItem:

return self.FEED_ITEMS[0]

def latest_items(self, count: int = 36) -> List[RssItem]:
def latest_items(self, count: int = 36) -> list[RssItem]:
"""Returns the latest feed items.
:param count: number of items to fetch
:type count: int
Expand All @@ -143,7 +235,10 @@ def has_new_content(self) -> bool:
:rtype: bool
"""
settings = PlgOptionsManager.get_plg_settings()
if self.latest_item.guid != settings.latest_content_guid:
if (
isinstance(self.latest_item, RssItem)
and self.latest_item.guid != settings.latest_content_guid
):
return True
else:
return False
Expand Down Expand Up @@ -187,7 +282,7 @@ def add_latest_item_to_news_feed(self) -> bool:
key=f"news-feed/items/httpsfeedqgisorg/entries/items/{item_id}/content",
value=f"<p>{latest_geotribu_article.abstract}</p><p>"
+ self.tr("Author(s): ")
+ f"{', '.join(latest_geotribu_article.author)}</p><p><small>"
+ f"{', '.join(latest_geotribu_article.authors)}</p><p><small>"
+ self.tr("Keywords: ")
+ f"{', '.join(latest_geotribu_article.categories)}</small></p>",
section=QgsSettings.App,
Expand Down
Loading
Loading