Skip to content

Commit

Permalink
Merge pull request #227 from openzim/metadata_and_beartype
Browse files Browse the repository at this point in the history
Protecting our API with beartype
  • Loading branch information
benoit74 authored Dec 17, 2024
2 parents a0a225b + 6f93ffe commit f68d568
Show file tree
Hide file tree
Showing 35 changed files with 194 additions and 185 deletions.
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ ban-relative-imports = "all"
# _libkiwix mimics libkiwix C++ code, names obey C++ conventions
"src/zimscraperlib/zim/_libkiwix.py" = ["N802", "N803", "N806"]
# beartype must be first
"src/zimscraperlib/zim/__init__.py" = ["E402"]
"src/zimscraperlib/__init__.py" = ["E402"]

[tool.pytest.ini_options]
minversion = "7.3"
Expand All @@ -278,6 +278,7 @@ exclude_lines = [
"no cov",
"if __name__ == .__main__.:",
"if TYPE_CHECKING:",
"class .*Protocol.*",
]

[tool.pyright]
Expand Down
4 changes: 4 additions & 0 deletions src/zimscraperlib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
import logging as stdlogging
import os

from beartype.claw import beartype_this_package

beartype_this_package()

from zimscraperlib.constants import NAME
from zimscraperlib.logging import getLogger

Expand Down
35 changes: 18 additions & 17 deletions src/zimscraperlib/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pathlib
import subprocess
from concurrent.futures import Future, ThreadPoolExecutor
from typing import IO, ClassVar
from typing import ClassVar

import requests
import requests.adapters
Expand All @@ -16,6 +16,7 @@

from zimscraperlib import logger
from zimscraperlib.constants import DEFAULT_WEB_REQUESTS_TIMEOUT
from zimscraperlib.typing import SupportsSeekableWrite, SupportsWrite


class YoutubeDownloader:
Expand Down Expand Up @@ -59,11 +60,10 @@ def download(
future = self.executor.submit(self._run_youtube_dl, url, options or {})
if not wait:
return future
if not future.exception():
# return the result
return future.result() # pyright: ignore
# raise the exception
raise future.exception() # pyright: ignore
exc = future.exception()
if isinstance(exc, BaseException):
raise exc
return True


class YoutubeConfig(dict):
Expand Down Expand Up @@ -176,7 +176,7 @@ def get_session(max_retries: int | None = 5) -> requests.Session:
def stream_file(
url: str,
fpath: pathlib.Path | None = None,
byte_stream: IO[bytes] | None = None,
byte_stream: SupportsWrite[bytes] | SupportsSeekableWrite[bytes] | None = None,
block_size: int | None = 1024,
proxies: dict[str, str] | None = None,
max_retries: int | None = 5,
Expand Down Expand Up @@ -216,24 +216,25 @@ def stream_file(

total_downloaded = 0
if fpath is not None:
fp = open(fpath, "wb")
elif (
byte_stream is not None
): # pragma: no branch (we use a precise condition to help type checker)
fp = byte_stream
fpath_handler = open(fpath, "wb")
else:
fpath_handler = None

for data in resp.iter_content(block_size):
total_downloaded += len(data)
fp.write(data)
if fpath_handler:
fpath_handler.write(data)
if byte_stream:
byte_stream.write(data)

# stop downloading/reading if we're just testing first block
if only_first_block:
break

logger.debug(f"Downloaded {total_downloaded} bytes from {url}")

if fpath:
fp.close()
else:
fp.seek(0)
if fpath_handler:
fpath_handler.close()
elif isinstance(byte_stream, SupportsSeekableWrite) and byte_stream.seekable():
byte_stream.seek(0)
return total_downloaded, resp.headers
4 changes: 2 additions & 2 deletions src/zimscraperlib/i18n.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,12 @@ def iso_types(self) -> list[str]:
return self["iso_types"]

@property
def query(self) -> list[str]:
def query(self) -> str:
"""Query issued for these language details"""
return self["query"]

@property
def querytype(self) -> list[str]:
def querytype(self) -> str:
"""Type of query issued to retrieve language details"""
return self["querytype"]

Expand Down
13 changes: 7 additions & 6 deletions src/zimscraperlib/image/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

import io
import pathlib
from typing import IO

import cairosvg.svg
from PIL.Image import open as pilopen
Expand All @@ -17,9 +16,9 @@


def convert_image(
src: pathlib.Path | IO[bytes],
dst: pathlib.Path | IO[bytes],
**params: str,
src: pathlib.Path | io.BytesIO,
dst: pathlib.Path | io.BytesIO,
**params: str | None,
) -> None:
"""convert an image file from one format to another
params: Image.save() parameters. Depends on dest format.
Expand All @@ -31,7 +30,9 @@ def convert_image(
to RGB. ex: RGB, ARGB, CMYK (and other PIL colorspaces)"""

colorspace = params.get("colorspace") # requested colorspace
fmt = params.pop("fmt").upper() if "fmt" in params else None # requested format
fmt = (
str(params.pop("fmt")).upper() if params.get("fmt") else None
) # requested format
if not fmt:
fmt = format_for(dst)
if not fmt:
Expand All @@ -44,7 +45,7 @@ def convert_image(

def convert_svg2png(
src: str | pathlib.Path | io.BytesIO,
dst: pathlib.Path | IO[bytes],
dst: pathlib.Path | io.BytesIO,
width: int | None = None,
height: int | None = None,
):
Expand Down
4 changes: 2 additions & 2 deletions src/zimscraperlib/image/optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,14 +210,14 @@ def optimize_webp(
else:
try:
save_image(webp_image, dst, fmt="WEBP", **params)
except Exception as exc:
except Exception as exc: # pragma: no cover
if (
isinstance(src, pathlib.Path)
and isinstance(dst, pathlib.Path)
and src.resolve() != dst.resolve()
and dst.exists()
):
dst.unlink() # pragma: no cover
dst.unlink()
raise exc
return dst

Expand Down
5 changes: 2 additions & 3 deletions src/zimscraperlib/image/probing.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import io
import pathlib
import re
from typing import IO

import colorthief
import PIL.Image
Expand Down Expand Up @@ -55,7 +54,7 @@ def is_hex_color(text: str) -> bool:


def format_for(
src: pathlib.Path | IO[bytes],
src: pathlib.Path | io.BytesIO,
*,
from_suffix: bool = True,
) -> str | None:
Expand Down Expand Up @@ -95,7 +94,7 @@ def format_for(


def is_valid_image(
image: pathlib.Path | IO[bytes] | bytes,
image: pathlib.Path | bytes | io.BytesIO,
imformat: str,
size: tuple[int, int] | None = None,
) -> bool:
Expand Down
7 changes: 4 additions & 3 deletions src/zimscraperlib/image/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,19 @@
# vim: ai ts=4 sts=4 et sw=4 nu
from __future__ import annotations

import io
import pathlib
from typing import IO
from typing import IO, Any

from PIL.Image import Image
from PIL.ImageFile import ImageFile


def save_image(
src: Image | ImageFile,
dst: pathlib.Path | IO[bytes],
dst: pathlib.Path | IO[bytes] | io.BytesIO,
fmt: str,
**params: str,
**params: Any,
) -> None:
"""PIL.Image.save() wrapper setting default parameters"""
args = {"JPEG": {"quality": 100}, "PNG": {}}.get(fmt, {})
Expand Down
3 changes: 2 additions & 1 deletion src/zimscraperlib/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from __future__ import annotations

import io
import logging
import pathlib
import sys
Expand All @@ -22,7 +23,7 @@
def getLogger( # noqa: N802 (intentionally matches the stdlib getLogger name)
name: str,
level: int = logging.INFO,
console: TextIO | None = sys.stdout,
console: TextIO | io.StringIO | None = sys.stdout,
log_format: str | None = DEFAULT_FORMAT,
file: pathlib.Path | None = None,
file_level: int | None = None,
Expand Down
22 changes: 10 additions & 12 deletions src/zimscraperlib/rewriting/css.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,29 +186,27 @@ def _process_node(self, node: ast.Node):
)
elif isinstance(node, ast.FunctionBlock):
if node.lower_name == "url": # pyright: ignore[reportUnknownMemberType]
url_node: ast.Node = node.arguments[0] # pyright: ignore
url_node: ast.Node = node.arguments[0]
new_url = self.url_rewriter(
url_node.value, # pyright: ignore
getattr(url_node, "value", ""),
self.base_href,
).rewriten_url
url_node.value = str(new_url) # pyright: ignore
url_node.representation = ( # pyright: ignore
f'"{serialize_url(str(new_url))}"'
setattr(url_node, "value", str(new_url)) # noqa: B010
setattr( # noqa: B010
url_node, "representation", f'"{serialize_url(str(new_url))}"'
)

else:
self._process_list(
node.arguments, # pyright: ignore
getattr(node, "arguments", []),
)
elif isinstance(node, ast.AtRule):
self._process_list(node.prelude) # pyright: ignore
self._process_list(node.content) # pyright: ignore
self._process_list(node.prelude)
self._process_list(node.content)
elif isinstance(node, ast.Declaration):
self._process_list(node.value) # pyright: ignore
self._process_list(node.value)
elif isinstance(node, ast.URLToken):
new_url = self.url_rewriter(
node.value, self.base_href
).rewriten_url # pyright: ignore
new_url = self.url_rewriter(node.value, self.base_href).rewriten_url
node.value = new_url
node.representation = f"url({serialize_url(new_url)})"

Expand Down
8 changes: 5 additions & 3 deletions src/zimscraperlib/rewriting/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,7 @@ def rewrite_meta_charset_content(
return
if attr_name == "charset":
return (attr_name, "UTF-8")
if attr_name == "content" and any(
if attr_name == "content" and any( # pragma: no coverage (coverage bug)
attr_name.lower() == "http-equiv"
and attr_value
and attr_value.lower() == "content-type"
Expand All @@ -574,7 +574,9 @@ def rewrite_onxxx_tags(
attr_name: str, attr_value: str | None, js_rewriter: JsRewriter
) -> AttrNameAndValue | None:
"""Rewrite onxxx script attributes"""
if attr_value and attr_name.startswith("on") and not attr_name.startswith("on-"):
if (
attr_value and attr_name.startswith("on") and not attr_name.startswith("on-")
): # pragma: no coverage (coverage bug)
return (attr_name, js_rewriter.rewrite(attr_value))


Expand All @@ -583,7 +585,7 @@ def rewrite_style_tags(
attr_name: str, attr_value: str | None, css_rewriter: CssRewriter
) -> AttrNameAndValue | None:
"""Rewrite style attributes"""
if attr_value and attr_name == "style":
if attr_value and attr_name == "style": # pragma: no coverage (coverage bug)
return (attr_name, css_rewriter.rewrite_inline(attr_value))


Expand Down
12 changes: 6 additions & 6 deletions src/zimscraperlib/rewriting/url_rewriting.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,9 @@
from __future__ import annotations

import re
from dataclasses import dataclass
from pathlib import PurePosixPath
from typing import ClassVar, NamedTuple
from typing import ClassVar
from urllib.parse import quote, unquote, urljoin, urlsplit

import idna
Expand All @@ -51,7 +52,8 @@
from zimscraperlib.rewriting.rules import FUZZY_RULES


class AdditionalRule(NamedTuple):
@dataclass
class AdditionalRule:
match: re.Pattern[str]
replace: str

Expand Down Expand Up @@ -147,7 +149,8 @@ def check_validity(cls, value: str) -> None:
raise ValueError(f"Unexpected password in value: {value} {parts.password}")


class RewriteResult(NamedTuple):
@dataclass
class RewriteResult:
absolute_url: str
rewriten_url: str
zim_path: ZimPath | None
Expand Down Expand Up @@ -382,9 +385,6 @@ def normalize(cls, url: HttpUrl) -> ZimPath:
passed to python-libzim for UTF-8 encoding.
"""

if not isinstance(url, HttpUrl):
raise ValueError("Bad argument type passed, HttpUrl expected")

url_parts = urlsplit(url.value)

if not url_parts.hostname:
Expand Down
11 changes: 4 additions & 7 deletions src/zimscraperlib/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,10 @@ def get_mime_for_name(
MIME only guessed from file extension and not actual content.
Filename with no extension are mapped to `no_ext_to`"""
try:
filename = pathlib.Path(filename)
if not filename.suffix:
return no_ext_to
return mimetypes.guess_type(f"{filename.stem}{filename.suffix}")[0] or fallback
except Exception:
return fallback
filename = pathlib.Path(filename)
if not filename.suffix:
return no_ext_to
return mimetypes.guess_type(f"{filename.stem}{filename.suffix}")[0] or fallback


def init_types():
Expand Down
Loading

0 comments on commit f68d568

Please sign in to comment.