Skip to content

Commit

Permalink
feat: Allow downloading republished episodes (#208)
Browse files Browse the repository at this point in the history
[no-bump]
  • Loading branch information
janw authored Dec 4, 2024
1 parent c8a7457 commit 591209c
Show file tree
Hide file tree
Showing 15 changed files with 1,047 additions and 843 deletions.
276 changes: 138 additions & 138 deletions .assets/podcast-archiver-help.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ jobs:
pytest:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version:
- "3.10"
Expand Down
8 changes: 8 additions & 0 deletions podcast_archiver/compat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import sys

if sys.version_info >= (3, 11):
from datetime import UTC
else:
from datetime import timezone

UTC = timezone.utc
85 changes: 68 additions & 17 deletions podcast_archiver/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,51 @@
import sqlite3
from abc import abstractmethod
from contextlib import contextmanager
from dataclasses import dataclass
from datetime import datetime
from threading import Lock
from typing import TYPE_CHECKING, Iterator

from podcast_archiver.logging import logger

if TYPE_CHECKING:
from podcast_archiver.models import Episode
from podcast_archiver.models import EpisodeSkeleton


def adapt_datetime_iso(val: datetime) -> str:
return val.isoformat()


def convert_datetime_iso(val: bytes) -> datetime:
return datetime.fromisoformat(val.decode())


sqlite3.register_adapter(datetime, adapt_datetime_iso)
sqlite3.register_converter("TIMESTAMP", convert_datetime_iso)


@dataclass(frozen=True, slots=True)
class EpisodeInDb:
length: int | None = None
published_time: datetime | None = None


class BaseDatabase:
@abstractmethod
def add(self, episode: Episode) -> None:
def add(self, episode: EpisodeSkeleton) -> None:
pass # pragma: no cover

@abstractmethod
def exists(self, episode: Episode) -> bool:
def exists(self, episode: EpisodeSkeleton) -> EpisodeInDb | None:
pass # pragma: no cover


class DummyDatabase(BaseDatabase):
def add(self, episode: Episode) -> None:
def add(self, episode: EpisodeSkeleton) -> None:
pass

def exists(self, episode: Episode) -> bool:
return False
def exists(self, episode: EpisodeSkeleton) -> EpisodeInDb | None:
return None


class Database(BaseDatabase):
Expand All @@ -43,7 +63,8 @@ def __init__(self, filename: str, ignore_existing: bool) -> None:

@contextmanager
def get_conn(self) -> Iterator[sqlite3.Connection]:
with self.lock, sqlite3.connect(self.filename) as conn:
with self.lock, sqlite3.connect(self.filename, detect_types=sqlite3.PARSE_DECLTYPES) as conn:
conn.row_factory = sqlite3.Row
yield conn

def migrate(self) -> None:
Expand All @@ -53,26 +74,56 @@ def migrate(self) -> None:
"""\
CREATE TABLE IF NOT EXISTS episodes(
guid TEXT UNIQUE NOT NULL,
title TEXT
title TEXT,
length UNSIGNED BIG INT,
published_time TIMESTAMP
)"""
)

def add(self, episode: Episode) -> None:
self._add_column_if_missing(
"length",
"ALTER TABLE episodes ADD COLUMN length UNSIGNED BIG INT",
)
self._add_column_if_missing(
"published_time",
"ALTER TABLE episodes ADD COLUMN published_time TIMESTAMP",
)

def _add_column_if_missing(self, name: str, alter_stmt: str) -> None:
with self.get_conn() as conn:
if not self._has_column(conn, name):
logger.debug(f"Adding missing DB column {name}")
conn.execute(alter_stmt)

def _has_column(self, conn: sqlite3.Connection, name: str) -> bool:
result = conn.execute(
"SELECT EXISTS(SELECT 1 FROM pragma_table_info('episodes') WHERE name = ?)",
(name,),
)
return bool(result.fetchone()[0])

def add(self, episode: EpisodeSkeleton) -> None:
with self.get_conn() as conn:
try:
conn.execute(
"INSERT INTO episodes(guid, title) VALUES (?, ?)",
(episode.guid, episode.title),
"INSERT OR REPLACE INTO episodes(guid, title, length, published_time) VALUES (?, ?, ?, ?)",
(
episode.guid,
episode.title,
episode.enclosure.length,
episode.published_time,
),
)
except sqlite3.IntegrityError:
logger.debug(f"Episode exists: {episode}")
except sqlite3.DatabaseError as exc:
logger.debug("Error adding %s to db", episode, exc_info=exc)

def exists(self, episode: Episode) -> bool:
def exists(self, episode: EpisodeSkeleton) -> EpisodeInDb | None:
if self.ignore_existing:
return False
return None
with self.get_conn() as conn:
result = conn.execute(
"SELECT EXISTS(SELECT 1 FROM episodes WHERE guid = ?)",
"SELECT length, published_time FROM episodes WHERE guid = ?",
(episode.guid,),
)
return bool(result.fetchone()[0])
match = result.fetchone()
return EpisodeInDb(**match) if match else None
9 changes: 3 additions & 6 deletions podcast_archiver/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@

from requests import Response

from podcast_archiver.models import Episode, FeedInfo
from podcast_archiver.models import EpisodeSkeleton, FeedInfo


class DownloadJob:
episode: Episode
episode: EpisodeSkeleton
feed_info: FeedInfo
target: Path
stop_event: Event
Expand All @@ -31,7 +31,7 @@ class DownloadJob:

def __init__(
self,
episode: Episode,
episode: EpisodeSkeleton,
*,
target: Path,
max_download_bytes: int | None = None,
Expand All @@ -55,9 +55,6 @@ def __call__(self) -> EpisodeResult:
return EpisodeResult(self.episode, DownloadResult.FAILED)

def run(self) -> EpisodeResult:
if self.target.exists():
return EpisodeResult(self.episode, DownloadResult.ALREADY_EXISTS)

self.target.parent.mkdir(parents=True, exist_ok=True)
logger.info("Downloading: %s", self.episode)
response = session.get_and_raise(self.episode.enclosure.href, stream=True)
Expand Down
10 changes: 10 additions & 0 deletions podcast_archiver/enums.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

from enum import Enum


Expand All @@ -10,6 +12,7 @@ class QueueCompletionType(StrEnum):
COMPLETED = "Archived all episodes"
FOUND_EXISTING = "Archive is up to date"
MAX_EPISODES = "Maximum episode count reached"
FAILED = "Failed"


class DownloadResult(StrEnum):
Expand All @@ -18,5 +21,12 @@ class DownloadResult(StrEnum):
FAILED = "Failed"
ABORTED = "Aborted"

@classmethod
def successful(cls) -> set[DownloadResult]:
return {
cls.ALREADY_EXISTS,
cls.COMPLETED_SUCCESSFULLY,
}

def __str__(self) -> str:
return self.value
Loading

0 comments on commit 591209c

Please sign in to comment.