Skip to content

Commit

Permalink
style: apply flake8, black, isort
Browse files Browse the repository at this point in the history
  • Loading branch information
jsstevenson committed Mar 28, 2024
1 parent 06355c1 commit 9c597be
Show file tree
Hide file tree
Showing 13 changed files with 54 additions and 56 deletions.
3 changes: 0 additions & 3 deletions src/biocommons/seqrepo/_internal/logging_support.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
import logging


class DuplicateFilter:
"""
Filters away duplicate log messages.
Expand Down
13 changes: 3 additions & 10 deletions src/biocommons/seqrepo/_versionwarning.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,13 @@
"""emits a warning when imported under Python < 3.6
This module may be used by other biocommons packages
"""
"""emits a warning when imported under Python < 3.9"""

import logging
import sys

__all__ = []

version_warning = (
"biocommons packages are tested and supported only on Python >= 3.6"
" (https://github.com/biocommons/org/wiki/Migrating-to-Python-3.6)"
)
version_warning = "This package is tested and supported only on Python >= 3.9."

_logger = logging.getLogger(__package__)

if sys.version_info < (3, 6):
if sys.version_info < (3, 9):
_logger.warning(version_warning)
30 changes: 16 additions & 14 deletions src/biocommons/seqrepo/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import itertools
import logging
import os
import pprint
import re
import shutil
import stat
Expand All @@ -30,7 +29,6 @@

import bioutils.assemblies
import bioutils.seqfetcher
import six
import tqdm

from . import SeqRepo, __version__
Expand Down Expand Up @@ -62,7 +60,7 @@ def _get_remote_instances(opts):
]
_logger.debug("Executing `" + " ".join(rsync_cmd) + "`")
lines = subprocess.check_output(rsync_cmd).decode().splitlines()[1:]
dirs = (m.group(1) for m in (line_re.match(l) for l in lines) if m)
dirs = (m.group(1) for m in (line_re.match(line) for line in lines) if m)
return sorted(list(filter(instance_name_new_re.match, dirs)))


Expand All @@ -81,12 +79,14 @@ def _latest_instance_path(opts):


def parse_arguments():
epilog = (
f"seqrepo {__version__}"
"See https://github.com/biocommons/biocommons.seqrepo for more information"
)
top_p = argparse.ArgumentParser(
description=__doc__.split("\n\n")[0],
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
epilog="seqrepo "
+ __version__
+ ". See https://github.com/biocommons/biocommons.seqrepo for more information",
epilog=epilog
)
top_p.add_argument("--dry-run", "-n", default=False, action="store_true")
top_p.add_argument("--remote-host", default="dl.biocommons.org", help="rsync server host")
Expand Down Expand Up @@ -352,12 +352,12 @@ def add_assembly_names(opts):
]
if not_in_seqrepo:
_logger.warning(
"Assembly {an} references {n} accessions not in SeqRepo instance {opts.instance_name} (e.g., {acs})".format(
"Assembly {an} references {n} accessions not in SeqRepo instance "
"{opts.instance_name} (e.g., {acs})".format(
an=assy_name,
n=len(not_in_seqrepo),
opts=opts,
acs=", ".join(not_in_seqrepo[:5] + ["..."]),
seqrepo_dir=seqrepo_dir,
)
)
if not opts.partial_load:
Expand Down Expand Up @@ -432,8 +432,8 @@ def _rec_iterator():
"{ns}:{a}".format(ns=ns, a=a) for ns, aliases in sorted(nsad.items()) for a in aliases
]
print(">" + " ".join(aliases))
for l in _wrap_lines(srec["seq"], 100):
print(l)
for line in _wrap_lines(srec["seq"], 100):
print(line)


def export_aliases(opts):
Expand Down Expand Up @@ -477,7 +477,7 @@ def init(opts):
seqrepo_dir = os.path.join(opts.root_directory, opts.instance_name)
if os.path.exists(seqrepo_dir) and len(os.listdir(seqrepo_dir)) > 0:
raise IOError("{seqrepo_dir} exists and is not empty".format(seqrepo_dir=seqrepo_dir))
sr = SeqRepo(seqrepo_dir, writeable=True) # flake8: noqa
sr = SeqRepo(seqrepo_dir, writeable=True) # noqa: F841


def list_local_instances(opts):
Expand Down Expand Up @@ -586,12 +586,14 @@ def show_status(opts):
)
)
print(
"sequences: {ss[n_sequences]} sequences, {ss[tot_length]} residues, {ss[n_files]} files".format(
"sequences: {ss[n_sequences]} sequences, {ss[tot_length]} residues, "
"{ss[n_files]} files".format(
ss=sr.sequences.stats()
)
)
print(
"aliases: {sa[n_aliases]} aliases, {sa[n_current]} current, {sa[n_namespaces]} namespaces, {sa[n_sequences]} sequences".format(
"aliases: {sa[n_aliases]} aliases, {sa[n_current]} current, "
"{sa[n_namespaces]} namespaces, {sa[n_sequences]} sequences".format(
sa=sr.aliases.stats()
)
)
Expand Down Expand Up @@ -679,7 +681,7 @@ def _drop_write(p):

def start_shell(opts):
seqrepo_dir = os.path.join(opts.root_directory, opts.instance_name)
sr = SeqRepo(seqrepo_dir)
sr = SeqRepo(seqrepo_dir) # noqa: 682
import IPython

IPython.embed(
Expand Down
3 changes: 2 additions & 1 deletion src/biocommons/seqrepo/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,6 @@ def parse_caching_env_var(env_name: str, env_default: str) -> Optional[int]:


SEQREPO_LRU_CACHE_MAXSIZE = parse_caching_env_var("SEQREPO_LRU_CACHE_MAXSIZE", "1000000")
# Using a default value here of -1 to differentiate not setting this env var and an explicit None (unbounded cache)
# Using a default value here of -1 to differentiate not setting this env var and an
# explicit None (unbounded cache)
SEQREPO_FD_CACHE_MAXSIZE = parse_caching_env_var("SEQREPO_FD_CACHE_MAXSIZE", "-1")
1 change: 0 additions & 1 deletion src/biocommons/seqrepo/dataproxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import logging
import os
from abc import ABC, abstractmethod
from collections.abc import Sequence
from urllib.parse import urlparse

import requests
Expand Down
2 changes: 1 addition & 1 deletion src/biocommons/seqrepo/fastadir/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .fastadir import FastaDir
from .fastadir import FastaDir # noqa: F401
10 changes: 6 additions & 4 deletions src/biocommons/seqrepo/fastadir/fabgz.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ def _find_bgzip():
raise RuntimeError("Didn't find version string in bgzip executable ({exe})".format(exe=exe))
except missing_file_exception:
raise RuntimeError(
"{exe} doesn't exist; you need to install htslib and tabix (See https://github.com/biocommons/biocommons.seqrepo#requirements)".format(
"{exe} doesn't exist; you need to install htslib and tabix "
"(See https://github.com/biocommons/biocommons.seqrepo#requirements)".format(
exe=exe
)
)
Expand All @@ -74,7 +75,8 @@ class FabgzReader(object):
"""
Class that implements ContextManager and wraps a FabgzReader.
The FabgzReader is returned when acquired in a contextmanager with statement.
"""
"""

def __init__(self, filename):
self.lock = threading.Lock()
self._fh = FastaFile(filename)
Expand Down Expand Up @@ -141,8 +143,8 @@ def wrap_lines(seq, line_width):

if seq_id not in self._added:
self._fh.write(">" + seq_id + "\n")
for l in wrap_lines(seq, line_width):
self._fh.write(l + "\n")
for line in wrap_lines(seq, line_width):
self._fh.write(line + "\n")
self._added.add(seq_id)
_logger.debug("added seq_id {i}; length {l}".format(i=seq_id, l=len(seq)))
return seq_id
Expand Down
4 changes: 1 addition & 3 deletions src/biocommons/seqrepo/fastadir/fastadir.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import contextlib
import threading
import datetime
import functools
import importlib.resources
Expand Down Expand Up @@ -83,7 +81,7 @@ def __init__(self, root_dir, writeable=False, check_same_thread=True, fd_cache_s
)

if fd_cache_size == 0:
_logger.info(f"File descriptor caching disabled")
_logger.info("File descriptor caching disabled")
else:
_logger.warning(f"File descriptor caching enabled (size={fd_cache_size})")

Expand Down
2 changes: 1 addition & 1 deletion src/biocommons/seqrepo/fastaiter/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .fastaiter import FastaIter
from .fastaiter import FastaIter # noqa: F401
6 changes: 3 additions & 3 deletions src/biocommons/seqrepo/fastaiter/fastaiter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@ def FastaIter(handle):
for line in handle:
if line.startswith(">"):
if header is not None: # not the first record
yield header, "".join(seq_lines)
yield header, "".join(seq_lines) # noqa: F821
seq_lines = list()
header = line[1:].rstrip()
else:
if header is not None: # not the first record
seq_lines.append(line.strip())
seq_lines.append(line.strip()) # noqa: F821

if header is not None:
yield header, "".join(seq_lines)
yield header, "".join(seq_lines) # noqa: F821
else: # no FASTA records in file
return
2 changes: 1 addition & 1 deletion src/biocommons/seqrepo/seqaliasdb/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .seqaliasdb import SeqAliasDB
from .seqaliasdb import SeqAliasDB # noqa: F401
11 changes: 6 additions & 5 deletions src/biocommons/seqrepo/seqaliasdb/seqaliasdb.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import itertools
import logging
import sqlite3

import pkg_resources
import yoyo

from .._internal.logging_support import DuplicateFilter
from .._internal.translate import translate_alias_records, translate_api2db

_logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -39,7 +37,8 @@ def __init__(

if translate_ncbi_namespace is not None:
_logger.warning(
"translate_ncbi_namespace is obsolete; translation is now automatic; this flag will be removed"
"translate_ncbi_namespace is obsolete; translation is now automatic; "
"this flag will be removed"
)

if self._writeable:
Expand Down Expand Up @@ -87,7 +86,8 @@ def fetch_aliases(self, seq_id, current_only=True, translate_ncbi_namespace=None
)
if translate_ncbi_namespace is not None:
_logger.warning(
"translate_ncbi_namespace is obsolete; translation is now automatic; this flag will be removed"
"translate_ncbi_namespace is obsolete; translation is now automatic; "
"this flag will be removed"
)
return [dict(r) for r in self.find_aliases(seq_id=seq_id, current_only=current_only)]

Expand Down Expand Up @@ -119,7 +119,8 @@ def eq_or_like(s):

if translate_ncbi_namespace is not None:
_logger.warning(
"translate_ncbi_namespace is obsolete; translation is now automatic; this flag will be removed"
"translate_ncbi_namespace is obsolete; translation is now automatic; "
"this flag will be removed"
)

if namespace is not None:
Expand Down
23 changes: 14 additions & 9 deletions src/biocommons/seqrepo/seqrepo.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import bioutils.digests
from bioutils.digests import seq_seqhash as sha512t24u

from .config import SEQREPO_LRU_CACHE_MAXSIZE, SEQREPO_FD_CACHE_MAXSIZE
from .config import SEQREPO_FD_CACHE_MAXSIZE, SEQREPO_LRU_CACHE_MAXSIZE
from .fastadir import FastaDir
from .seqaliasdb import SeqAliasDB

Expand Down Expand Up @@ -100,7 +100,7 @@ def __init__(
translate_ncbi_namespace=None,
check_same_thread=False,
use_sequenceproxy=True,
fd_cache_size=0
fd_cache_size=0,
):
self._root_dir = root_dir
self._upcase = upcase
Expand All @@ -123,7 +123,9 @@ def __init__(
self._seq_path,
writeable=self._writeable,
check_same_thread=self._check_same_thread,
fd_cache_size=SEQREPO_FD_CACHE_MAXSIZE if SEQREPO_FD_CACHE_MAXSIZE != -1 else fd_cache_size
fd_cache_size=(
SEQREPO_FD_CACHE_MAXSIZE if SEQREPO_FD_CACHE_MAXSIZE != -1 else fd_cache_size
),
)
self.aliases = SeqAliasDB(
self._db_path,
Expand All @@ -133,7 +135,8 @@ def __init__(

if translate_ncbi_namespace is not None:
_logger.warn(
"translate_ncbi_namespace is obsolete; translation is now automatic; this flag will be removed"
"translate_ncbi_namespace is obsolete; translation is now automatic; "
"this flag will be removed"
)

def __contains__(self, nsa):
Expand Down Expand Up @@ -210,7 +213,7 @@ def store(self, seq, nsaliases):

try:
seqhash = sha512t24u(seq)
except Exception as e:
except Exception:
import pprint

_logger.critical("Exception raised for " + pprint.pformat(nsaliases))
Expand Down Expand Up @@ -253,8 +256,8 @@ def store(self, seq, nsaliases):
n_aliases_added += len(upd_tuples)
if (
self._pending_sequences > ct_n_seqs
or self._pending_aliases > ct_n_aliases
or self._pending_sequences_len > ct_n_residues
or self._pending_aliases > ct_n_aliases # noqa: W503
or self._pending_sequences_len > ct_n_residues # noqa: W503
): # pragma: no cover
_logger.info(
"Hit commit thresholds ({self._pending_sequences} sequences, "
Expand All @@ -279,7 +282,8 @@ def translate_alias(

if translate_ncbi_namespace is not None:
_logger.warn(
"translate_ncbi_namespace is obsolete; translation is now automatic; this flag will be removed"
"translate_ncbi_namespace is obsolete; translation is now automatic; "
"this flag will be removed"
)
seq_id = self._get_unique_seqid(alias=alias, namespace=namespace)
aliases = self.aliases.find_aliases(seq_id=seq_id)
Expand All @@ -297,7 +301,8 @@ def translate_identifier(
"""
if translate_ncbi_namespace is not None:
_logger.warn(
"translate_ncbi_namespace is obsolete; translation is now automatic; this flag will be removed"
"translate_ncbi_namespace is obsolete; translation is now automatic; "
"this flag will be removed"
)

namespace, alias = (
Expand Down

0 comments on commit 9c597be

Please sign in to comment.