From da4ca241666700b68481c779646d20bceba92707 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Thu, 13 Jun 2024 11:54:09 -0400 Subject: [PATCH 1/2] fix: properly initialize file handler (#160) --- src/biocommons/seqrepo/fastadir/fabgz.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/biocommons/seqrepo/fastadir/fabgz.py b/src/biocommons/seqrepo/fastadir/fabgz.py index cfa20a6..1948dd1 100644 --- a/src/biocommons/seqrepo/fastadir/fabgz.py +++ b/src/biocommons/seqrepo/fastadir/fabgz.py @@ -116,7 +116,7 @@ def __init__(self, filename: str) -> None: super(FabgzWriter, self).__init__() self.filename = filename - self.fh = None + self._fh = None self._basepath, suffix = os.path.splitext(self.filename) if suffix != ".bgz": raise RuntimeError("Path must end with .bgz") From 0730c629efd665c4a5d07380ead89af7ae7ef8a8 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Thu, 13 Jun 2024 11:54:50 -0400 Subject: [PATCH 2/2] refactor: use Python 3 stdlib instead of six (#164) --- misc/docker/seqrepo.df | 1 - pyproject.toml | 1 - src/biocommons/seqrepo/fastadir/bases.py | 8 ++------ src/biocommons/seqrepo/fastadir/fabgz.py | 4 +--- tests/test_fabgz.py | 9 ++++----- tests/test_fastaiter.py | 24 ++++++++++++------------ 6 files changed, 19 insertions(+), 28 deletions(-) diff --git a/misc/docker/seqrepo.df b/misc/docker/seqrepo.df index 08f8a0f..8e10ede 100644 --- a/misc/docker/seqrepo.df +++ b/misc/docker/seqrepo.df @@ -37,7 +37,6 @@ RUN pip3 install --upgrade \ pygments \ pysam \ simplegeneric \ - six \ tqdm \ traitlets \ wcwidth \ diff --git a/pyproject.toml b/pyproject.toml index 863f2ff..cc89bd0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,6 @@ dependencies = [ "ipython ~= 8.4", "pysam ~= 0.22", "requests ~= 2.31", - "six ~= 1.16", "tqdm ~= 4.66", "yoyo-migrations ~= 8.2", ] diff --git a/src/biocommons/seqrepo/fastadir/bases.py b/src/biocommons/seqrepo/fastadir/bases.py index 5804a1f..3ee2a27 100644 --- a/src/biocommons/seqrepo/fastadir/bases.py +++ b/src/biocommons/seqrepo/fastadir/bases.py @@ -1,11 +1,8 @@ import abc from typing import Optional -import six - -@six.add_metaclass(abc.ABCMeta) -class BaseReader: +class BaseReader(metaclass=abc.ABCMeta): @abc.abstractmethod def fetch(self, seq_id: str, start: Optional[int] = None, end: Optional[int] = None) -> str: raise NotImplementedError @@ -14,8 +11,7 @@ def __getitem__(self, ac: str) -> str: return self.fetch(ac) -@six.add_metaclass(abc.ABCMeta) -class BaseWriter: +class BaseWriter(metaclass=abc.ABCMeta): @abc.abstractmethod def store(self, seq_id: str, seq: str) -> str: pass # pragma: no cover diff --git a/src/biocommons/seqrepo/fastadir/fabgz.py b/src/biocommons/seqrepo/fastadir/fabgz.py index 1948dd1..987a131 100644 --- a/src/biocommons/seqrepo/fastadir/fabgz.py +++ b/src/biocommons/seqrepo/fastadir/fabgz.py @@ -16,7 +16,6 @@ from types import TracebackType from typing import Optional, Type -import six from pysam import FastaFile from typing_extensions import Self @@ -45,7 +44,6 @@ def _get_bgzip_version(exe: str) -> str: def _find_bgzip() -> str: """return path to bgzip if found and meets version requirements, else exception""" - missing_file_exception = OSError if six.PY2 else FileNotFoundError min_bgzip_version = ".".join(map(str, min_bgzip_version_info)) exe = os.environ.get("SEQREPO_BGZIP_PATH", shutil.which("bgzip") or "/usr/bin/bgzip") @@ -53,7 +51,7 @@ def _find_bgzip() -> str: bgzip_version = _get_bgzip_version(exe) except AttributeError: raise RuntimeError("Didn't find version string in bgzip executable ({exe})".format(exe=exe)) - except missing_file_exception: + except FileNotFoundError: raise RuntimeError( "{exe} doesn't exist; you need to install htslib and tabix " "(See https://github.com/biocommons/biocommons.seqrepo#requirements)".format(exe=exe) diff --git a/tests/test_fabgz.py b/tests/test_fabgz.py index 56cfd3b..89d8214 100644 --- a/tests/test_fabgz.py +++ b/tests/test_fabgz.py @@ -3,7 +3,6 @@ import tempfile import pytest -import six from biocommons.seqrepo.fastadir.fabgz import FabgzReader, FabgzWriter @@ -19,18 +18,18 @@ def test_write_reread(): # write sequences faw = FabgzWriter(fabgz_fn) - for seq_id, seq in six.iteritems(sequences): + for seq_id, seq in sequences.items(): faw.store(seq_id, seq) # add twice to demonstrate non-redundancy - for seq_id, seq in six.iteritems(sequences): + for seq_id, seq in sequences.items(): faw.store(seq_id, seq) faw.close() # now read them back far = FabgzReader(fabgz_fn) - assert far.filename.startswith(tmpdir.encode()) + assert far.filename.startswith(tmpdir.encode()) # type: ignore assert set(far.keys()) == set(sequences.keys()) - assert 5 == len(far), "expected 5 sequences" + assert 5 == len(far), "expected 5 sequences" # type: ignore assert "l10" in far.keys() assert far["l10"] == seed * 10 for seq_id in far.keys(): diff --git a/tests/test_fastaiter.py b/tests/test_fastaiter.py index 0d31310..93a0659 100644 --- a/tests/test_fastaiter.py +++ b/tests/test_fastaiter.py @@ -1,6 +1,6 @@ +from io import StringIO + import pytest -import six -from six.moves import StringIO from biocommons.seqrepo.fastaiter import FastaIter @@ -12,7 +12,7 @@ def test_empty(): # should return an empty generator with pytest.raises(StopIteration): - six.next(iterator) + next(iterator) def test_noheader(): @@ -22,7 +22,7 @@ def test_noheader(): # should return an empty generator with pytest.raises(StopIteration): - six.next(iterator) + next(iterator) def test_single(): @@ -30,13 +30,13 @@ def test_single(): iterator = FastaIter(data) - header, seq = six.next(iterator) + header, seq = next(iterator) assert header == "seq1" assert seq == "ACGT" # should be empty now with pytest.raises(StopIteration): - six.next(iterator) + next(iterator) def test_multiple(): @@ -44,21 +44,21 @@ def test_multiple(): iterator = FastaIter(data) - header, seq = six.next(iterator) + header, seq = next(iterator) assert header == "seq1" assert seq == "ACGT" - header, seq = six.next(iterator) + header, seq = next(iterator) assert header == "seq2" assert seq == "TGCA" - header, seq = six.next(iterator) + header, seq = next(iterator) assert header == "seq3" assert seq == "TTTT" # should be empty now with pytest.raises(StopIteration): - six.next(iterator) + next(iterator) def test_multiline(): @@ -66,10 +66,10 @@ def test_multiline(): iterator = FastaIter(data) - header, seq = six.next(iterator) + header, seq = next(iterator) assert header == "seq1" assert seq == "ACGTTGCA" # should be empty now with pytest.raises(StopIteration): - six.next(iterator) + next(iterator)