diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 075fb8c..a2b7099 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -34,7 +34,7 @@ dynamic = ["version"] Homepage = "https://github.com/kiwix/mirrors-qa" [project.scripts] -mirrors-qa-backend = "mirrors_qa_backend.entrypoint:main" +update-mirrors = "mirrors_qa_backend.entrypoint:main" [project.optional-dependencies] scripts = [ diff --git a/backend/src/mirrors_qa_backend/db/__init__.py b/backend/src/mirrors_qa_backend/db/__init__.py index 20fdf15..f163494 100644 --- a/backend/src/mirrors_qa_backend/db/__init__.py +++ b/backend/src/mirrors_qa_backend/db/__init__.py @@ -39,21 +39,21 @@ def count_from_stmt(session: OrmSession, stmt: SelectBase) -> int: def initialize_mirrors() -> None: with Session.begin() as session: - nb_mirrors = count_from_stmt(session, select(models.Mirror)) current_mirrors = get_current_mirrors() + nb_mirrors = count_from_stmt(session, select(models.Mirror)) if nb_mirrors == 0: logger.info("No mirrors exist in database.") if not current_mirrors: logger.info(f"No mirrors were found on {Settings.mirrors_url!r}") return - results = mirrors.update_mirrors(session, current_mirrors) + result = mirrors.create_or_update_status(session, current_mirrors) logger.info( - f"Registered {results.nb_mirrors_added} mirrors " + f"Registered {result.nb_mirrors_added} mirrors " f"from {Settings.mirrors_url!r}" ) else: logger.info(f"Found {nb_mirrors} mirrors in database.") - result = mirrors.update_mirrors(session, current_mirrors) + result = mirrors.create_or_update_status(session, current_mirrors) logger.info( f"Added {result.nb_mirrors_added} mirrors. " f"Disabled {result.nb_mirrors_disabled} mirrors." diff --git a/backend/src/mirrors_qa_backend/db/mirrors.py b/backend/src/mirrors_qa_backend/db/mirrors.py index 21e4c74..8e5e8a0 100644 --- a/backend/src/mirrors_qa_backend/db/mirrors.py +++ b/backend/src/mirrors_qa_backend/db/mirrors.py @@ -10,22 +10,19 @@ @dataclass -class UpdateMirrorsResult: - """Represents the results of an update to the list of mirrors in the database""" +class MirrorsUpdateResult: + """Results of an update to the list of mirrors in the database""" nb_mirrors_added: int = 0 nb_mirrors_disabled: int = 0 def create_mirrors(session: OrmSession, mirrors: list[schemas.Mirror]) -> int: - """ - Saves all the mirrors to the database. - - Returns the total number of mirrors created. + """Number of mirrors created in the database. Assumes that each mirror does not exist on the database. """ - total = 0 + nb_created = 0 for mirror in mirrors: db_mirror = models.Mirror( id=mirror.id, @@ -55,26 +52,23 @@ def create_mirrors(session: OrmSession, mirrors: list[schemas.Mirror]) -> int: logger.debug( f"Registered new mirror: {db_mirror.id!r} for country: {country.name!r}" ) - total += 1 - return total + nb_created += 1 + return nb_created -def update_mirrors( +def create_or_update_status( session: OrmSession, mirrors: list[schemas.Mirror] -) -> UpdateMirrorsResult: - """ - Disables mirrors in the database that are not in the provided list of mirrors. - New mirrors in the database are saved accordingly. +) -> MirrorsUpdateResult: + """Updates the status of mirrors in the database and creates any new mirrors. - Raises an EmptyMirrorsError if the provided list of mirrors is empty. + Raises: + EmptyMirrorsError if the provided list of mirrors is empty. - Returns an UpdateMirrorsResult showing the number of mirrors added and updated. """ - result = UpdateMirrorsResult() - # If there are no countries, disable all mirrors if not mirrors: raise EmptyMirrorsError("mirrors list must not be empty") + result = MirrorsUpdateResult() # Map the id (hostname) of each mirror from the mirrors list for comparison # against the id of mirrors from the database. To be used in determining # if this mirror is a new mirror, in which case it should be added @@ -97,7 +91,7 @@ def update_mirrors( result.nb_mirrors_added += create_mirrors(session, [mirror]) # Disable any mirror in the database that doesn't exist on the current - # list of mirrors. However, if a mirror is diabled in the database and + # list of mirrors. However, if a mirror is disabled in the database and # exists in the list, re-enable it for db_mirror_id, db_mirror in db_mirrors.items(): if db_mirror_id not in current_mirrors: diff --git a/backend/src/mirrors_qa_backend/db/models.py b/backend/src/mirrors_qa_backend/db/models.py index db41a66..1cd69e9 100644 --- a/backend/src/mirrors_qa_backend/db/models.py +++ b/backend/src/mirrors_qa_backend/db/models.py @@ -1,6 +1,6 @@ from __future__ import annotations -from datetime import datetime +import datetime from ipaddress import IPv4Address from uuid import UUID @@ -27,7 +27,7 @@ class Base(MappedAsDataclass, DeclarativeBase): list[str]: ARRAY( item_type=String ), # transform Python list[str] into PostgreSQL Array of strings - datetime: DateTime( + datetime.datetime: DateTime( timezone=False ), # transform Python datetime into PostgreSQL Datetime without timezone IPv4Address: INET, # transform Python IPV4Address into PostgreSQL INET @@ -99,7 +99,9 @@ class Worker(Base): pubkey_pkcs8: Mapped[str] pubkey_fingerprint: Mapped[str] - last_seen_on: Mapped[datetime] = mapped_column(default_factory=datetime.now) + last_seen_on: Mapped[datetime.datetime] = mapped_column( + default_factory=datetime.datetime.now + ) countries: Mapped[list[Country]] = relationship(back_populates="worker", init=False) @@ -108,8 +110,10 @@ class Test(Base): id: Mapped[UUID] = mapped_column( init=False, primary_key=True, server_default=text("uuid_generate_v4()") ) - requested_on: Mapped[datetime] = mapped_column(default_factory=datetime.now) - started_on: Mapped[datetime | None] = mapped_column(default=None) + requested_on: Mapped[datetime.datetime] = mapped_column( + default_factory=datetime.datetime.now + ) + started_on: Mapped[datetime.datetime | None] = mapped_column(default=None) status: Mapped[StatusEnum] = mapped_column( Enum( StatusEnum, diff --git a/backend/src/mirrors_qa_backend/entrypoint.py b/backend/src/mirrors_qa_backend/entrypoint.py index 673cae6..c8699c7 100644 --- a/backend/src/mirrors_qa_backend/entrypoint.py +++ b/backend/src/mirrors_qa_backend/entrypoint.py @@ -1,18 +1,13 @@ import argparse import logging -from mirrors_qa_backend import Settings, db, logger +from mirrors_qa_backend import db, logger from mirrors_qa_backend.db import mirrors from mirrors_qa_backend.extract import get_current_mirrors def main(): parser = argparse.ArgumentParser() - parser.add_argument( - "--update-mirrors", - action="store_true", - help=f"Update the list of mirrors from {Settings.mirrors_url}", - ) parser.add_argument( "--verbose", "-v", help="Show verbose output", action="store_true" ) @@ -22,9 +17,8 @@ def main(): if args.verbose: logger.setLevel(logging.DEBUG) - if args.update_mirrors: - with db.Session.begin() as session: - mirrors.update_mirrors(session, get_current_mirrors()) + with db.Session.begin() as session: + mirrors.create_or_update_status(session, get_current_mirrors()) if __name__ == "__main__": diff --git a/backend/src/mirrors_qa_backend/exceptions.py b/backend/src/mirrors_qa_backend/exceptions.py index 116b922..da14c6e 100644 --- a/backend/src/mirrors_qa_backend/exceptions.py +++ b/backend/src/mirrors_qa_backend/exceptions.py @@ -1,6 +1,13 @@ +from requests import RequestException + + class EmptyMirrorsError(Exception): - pass + """An empty list was used to update the mirrors in the database.""" class MirrorsExtractError(Exception): - pass + """An error occurred while extracting mirror data from page DOM""" + + +class MirrorsRequestError(RequestException): + """A network error occurred while fetching mirrors from the mirrors URL""" diff --git a/backend/src/mirrors_qa_backend/extract.py b/backend/src/mirrors_qa_backend/extract.py index c069dfa..427f1f0 100644 --- a/backend/src/mirrors_qa_backend/extract.py +++ b/backend/src/mirrors_qa_backend/extract.py @@ -7,28 +7,31 @@ from bs4.element import Tag from mirrors_qa_backend import logger, schemas -from mirrors_qa_backend.exceptions import MirrorsExtractError +from mirrors_qa_backend.exceptions import MirrorsExtractError, MirrorsRequestError from mirrors_qa_backend.settings import Settings def get_current_mirrors() -> list[schemas.Mirror]: """ - Returns list of current mirrors from the mrirors url. + Current mirrors from the mirrors url. - Raises MirrorsExtractError if the parser is unable to extract the mirrors - from the page. This is most likely as a result of the page being updated - indicating that the parsing logic should be updated + Raises: + MirrorsExtractError: parser unable to extract the mirrors from the page. + Page DOM has been updated and parser needs an update as well. + MirrorsRequestError: a network error occured while fetching mirrors """ - def find_country_rows(tag: Tag) -> bool: + def is_country_row(tag: Tag) -> bool: """ - Filters out table rows that do not contain mirror - data from the table body. + Filters out table rows that do not contain mirror data. """ return tag.name == "tr" and tag.findChild("td", class_="newregion") is None - resp = requests.get(Settings.mirrors_url, timeout=Settings.requests_timeout) - resp.raise_for_status() + try: + resp = requests.get(Settings.mirrors_url, timeout=Settings.requests_timeout) + resp.raise_for_status() + except requests.RequestException as exc: + raise MirrorsRequestError from exc soup = BeautifulSoup(resp.text, features="html.parser") body = soup.find("tbody") @@ -40,7 +43,7 @@ def find_country_rows(tag: Tag) -> bool: mirrors: list[schemas.Mirror] = [] - for row in body.find_all(find_country_rows): + for row in body.find_all(is_country_row): base_url = row.find("a", string="HTTP")["href"] hostname: Any = urlsplit( base_url @@ -51,7 +54,7 @@ def find_country_rows(tag: Tag) -> bool: try: country: Any = pycountry.countries.search_fuzzy(country_name)[0] except LookupError: - logger.warning(f"Could not get information for country: {country_name!r}") + logger.error(f"Could not get information for country: {country_name!r}") continue else: mirrors.append( diff --git a/backend/tests/db/test_mirrors.py b/backend/tests/db/test_mirrors.py index 287a0d1..5f4d53e 100644 --- a/backend/tests/db/test_mirrors.py +++ b/backend/tests/db/test_mirrors.py @@ -30,7 +30,23 @@ def db_mirror() -> models.Mirror: @pytest.fixture(scope="session") def schema_mirror(db_mirror: models.Mirror) -> schemas.Mirror: - return schemas.Mirror.model_validate(db_mirror) + return schemas.Mirror( + id=db_mirror.id, + base_url=db_mirror.base_url, + enabled=db_mirror.enabled, + region=db_mirror.region, + asn=db_mirror.asn, + score=db_mirror.score, + latitude=db_mirror.latitude, + longitude=db_mirror.longitude, + country_only=db_mirror.country_only, + region_only=db_mirror.region_only, + as_only=db_mirror.as_only, + other_countries=db_mirror.other_countries, + country=schemas.Country( + code=db_mirror.country.code, name=db_mirror.country.name + ), + ) @pytest.fixture(scope="session") @@ -69,7 +85,7 @@ def test_create_mirrors(dbsession: OrmSession, schema_mirror: schemas.Mirror): def test_raises_empty_mirrors_error(dbsession: OrmSession): with pytest.raises(EmptyMirrorsError): - mirrors.update_mirrors(dbsession, []) + mirrors.create_or_update_status(dbsession, []) def test_register_new_country_mirror( @@ -79,7 +95,9 @@ def test_register_new_country_mirror( new_schema_mirror: schemas.Mirror, ): dbsession.add(db_mirror) - result = mirrors.update_mirrors(dbsession, [schema_mirror, new_schema_mirror]) + result = mirrors.create_or_update_status( + dbsession, [schema_mirror, new_schema_mirror] + ) assert result.nb_mirrors_added == 1 @@ -89,7 +107,7 @@ def test_disable_old_mirror( new_schema_mirror: schemas.Mirror, ): dbsession.add(db_mirror) - result = mirrors.update_mirrors(dbsession, [new_schema_mirror]) + result = mirrors.create_or_update_status(dbsession, [new_schema_mirror]) assert result.nb_mirrors_disabled == 1 @@ -97,7 +115,7 @@ def test_no_mirrors_disabled( dbsession: OrmSession, db_mirror: models.Mirror, schema_mirror: schemas.Mirror ): dbsession.add(db_mirror) - result = mirrors.update_mirrors(dbsession, [schema_mirror]) + result = mirrors.create_or_update_status(dbsession, [schema_mirror]) assert result.nb_mirrors_disabled == 0 @@ -105,7 +123,7 @@ def test_no_mirrors_added( dbsession: OrmSession, db_mirror: models.Mirror, schema_mirror: schemas.Mirror ): dbsession.add(db_mirror) - result = mirrors.update_mirrors(dbsession, [schema_mirror]) + result = mirrors.create_or_update_status(dbsession, [schema_mirror]) assert result.nb_mirrors_added == 0 @@ -134,5 +152,5 @@ def test_re_enable_existing_mirror( schema_mirror = schemas.Mirror.model_validate(db_mirror) schema_mirror.enabled = True - result = mirrors.update_mirrors(dbsession, [schema_mirror]) + result = mirrors.create_or_update_status(dbsession, [schema_mirror]) assert result.nb_mirrors_added == 1