Skip to content

Commit

Permalink
source.browser: implement fallbacks onto old browser module
Browse files Browse the repository at this point in the history
- if my.browser.export is available try to hack HPI config and use it
- if not, fallback to promnesia.sources.browser_old
  • Loading branch information
karlicoss committed Feb 10, 2023
1 parent 3dd864f commit 3c10b6e
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 34 deletions.
8 changes: 8 additions & 0 deletions src/promnesia/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,3 +586,11 @@ def measure(tag: str='', *, logger, unit: str='ms'):
mult = {'s': 1, 'ms': 10**3, 'us': 10**6}[unit]
xx = secs * mult
logger.debug(f'[{tag}]: {xx:.1f}{unit} elapsed')


def is_sqlite_db(x: Path) -> bool:
return x.is_file() and mime(x) in {
'application/x-sqlite3',
'application/vnd.sqlite3',
# TODO this mime can also match wal files/journals, not sure
}
84 changes: 84 additions & 0 deletions src/promnesia/sources/browser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import re
from typing import Optional, Iterator, Any, TYPE_CHECKING
import warnings

from promnesia.common import Results, Visit, Loc, Second, PathIsh, logger, is_sqlite_db


def index(p: Optional[PathIsh]=None) -> Results:
from . import hpi

if p is None:
from my.browser.all import history
yield from _index_new(history())
return

warnings.warn('Passing paths to promnesia.sources.browser is deprecated. You should switch to HPI for that. See https://github.com/seanbreckenridge/browserexport#hpi')

# even if the used doesn't have HPI config for my.browser set up,
try:
yield from _index_new_with_adhoc_config(path=p)
except Exception as e:
logger.exception(e)
warnings.warn("Setting my.config.browser.export didn't work. You probably need to update HPI.")
else:
return

logger.warning("Falling back onto legacy promnesia.sources.browser_old")
raise RuntimeError
yield from _index_old(path=p)


def _index_old(*, path: PathIsh) -> Results:
from . import browser_old
yield from browser_old.index(path)


def _index_new_with_adhoc_config(*, path: PathIsh) -> Results:
## previously, it was possible to index be called with multiple different db search paths
## this would result in each subsequent call to my.browser.export.history to invalidate cache every time
## so we hack cachew path so it's different for each call
from my.core.core_config import config as hpi_core_config
hpi_cache_dir = hpi_core_config.get_cache_dir()
sanitized_path = re.sub(r'\W', '_', str(path))
cache_override = hpi_cache_dir / sanitized_path
##

from my.core.common import classproperty, Paths, get_files
class config:
class core:
cache_dir = cache_override

class browser:
class export:
@classproperty
def export_path(cls) -> Paths:
return tuple([f for f in get_files(path, glob='**/*') if is_sqlite_db(f)])


from my.core.cfg import tmp_config
with tmp_config(modules='my.browser.export|my.core.core_config', config=config):
from my.browser.export import history
yield from _index_new(history())


if TYPE_CHECKING:
from browserexport.merge import Visit as BrowserMergeVisit
else:
BrowserMergeVisit = Any


def _index_new(history: Iterator[BrowserMergeVisit]) -> Results:
for v in history:
desc: Optional[str] = None
duration: Optional[Second] = None
metadata = v.metadata
if metadata is not None:
desc = metadata.title
duration = metadata.duration
yield Visit(
url=v.url,
dt=v.dt,
locator=Loc(title=desc or v.url, href=v.url),
duration=duration,
)
22 changes: 0 additions & 22 deletions src/promnesia/sources/browser_new.py

This file was deleted.

14 changes: 2 additions & 12 deletions src/promnesia/sources/browser_old.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,21 @@

import pytz

from ..common import PathIsh, Results, Visit, Loc, get_logger, Second, mime
from ..common import PathIsh, Results, Visit, Loc, logger, Second, is_sqlite_db
from .. import config

# todo mcachew?
from cachew import cachew

logger = get_logger()


def index(p: PathIsh) -> Results:
pp = Path(p)
assert pp.exists(), pp # just in case of broken symlinks

# is_file check because it also returns dirs
# TODO hmm, not sure what I meant here -- which dirs? behind symlinks?
is_db = lambda x: x.is_file() and mime(x) in {
'application/x-sqlite3',
'application/vnd.sqlite3',
# TODO this mime can also match wal files/journals, not sure
}

# todo warn if filtered out too many?
# todo wonder how quickly mimes can be computed?
# todo ugh, dunno, maybe this really belongs to hpi?? need get_files etc...
dbs = [p for p in sorted(pp.rglob('*')) if is_db(p)]
dbs = [p for p in sorted(pp.rglob('*')) if is_sqlite_db(p)]

assert len(dbs) > 0, pp
logger.info('processing %d databases', len(dbs))
Expand Down
1 change: 1 addition & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ commands =
hpi module install my.reddit
hpi module install my.fbmessenger
hpi module install my.google.takeout.parser
hpi module install my.browser.export

{envpython} -m mypy --install-types --non-interactive \
-p promnesia.sources \
Expand Down

0 comments on commit 3c10b6e

Please sign in to comment.