Skip to content

Commit

Permalink
Add parameter to control whether to unescape HTML entities
Browse files Browse the repository at this point in the history
Some plugin needed the raw data for further processing.
Related: #22074.

PR #22106.
  • Loading branch information
Chocobo1 authored Jan 6, 2025
1 parent d911928 commit 4f3d779
Showing 1 changed file with 6 additions and 3 deletions.
9 changes: 6 additions & 3 deletions src/searchengine/nova3/helpers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#VERSION: 1.50
#VERSION: 1.51

# Author:
# Christophe DUMEZ (chris@qbittorrent.org)
Expand Down Expand Up @@ -77,7 +77,7 @@ def getBrowserUserAgent() -> str:
htmlentitydecode = html.unescape


def retrieve_url(url: str, custom_headers: Mapping[str, Any] = {}, request_data: Optional[Any] = None, ssl_context: Optional[ssl.SSLContext] = None) -> str:
def retrieve_url(url: str, custom_headers: Mapping[str, Any] = {}, request_data: Optional[Any] = None, ssl_context: Optional[ssl.SSLContext] = None, unescape_html_entities: bool = True) -> str:
""" Return the content of the url page as a string """

request = urllib.request.Request(url, request_data, {**headers, **custom_headers})
Expand All @@ -101,7 +101,10 @@ def retrieve_url(url: str, custom_headers: Mapping[str, Any] = {}, request_data:
pass

dataStr = data.decode(charset, 'replace')
dataStr = htmlentitydecode(dataStr)

if unescape_html_entities:
dataStr = html.unescape(dataStr)

return dataStr


Expand Down

0 comments on commit 4f3d779

Please sign in to comment.