Skip to content

Commit

Permalink
Normalize metadata on parse (#853)
Browse files Browse the repository at this point in the history
* TST: verify bytes -> utf-8 on parse

* Force general parser to return utf-8 if presented with bytes

* REL/DOC: note bugfix, demark version changes for release
  • Loading branch information
wasade authored Nov 16, 2020
1 parent e683e4e commit 0f470cd
Show file tree
Hide file tree
Showing 10 changed files with 42 additions and 18 deletions.
9 changes: 9 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
BIOM-Format ChangeLog
=====================

biom 2.1.10
-----------

Bug fix, released on 16 November 2020.

Bug fixes:

* During deployment testing for QIIME 2 2020.11, it was observed that certain combinations of hdf5 or h5py dependencies can result in metadata strings parsing as ASCII rather than UTF-8. Parse of BIOM-Format 2.1.0 files now normalize metadata strings as UTF-8, see [PR #853](https://github.com/biocore/biom-format/pull/853).

biom 2.1.9
----------

Expand Down
4 changes: 2 additions & 2 deletions biom/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
"""
# ----------------------------------------------------------------------------
# Copyright (c) 2011-2017, The BIOM Format Development Team.
# Copyright (c) 2011-2020, The BIOM Format Development Team.
#
# Distributed under the terms of the Modified BSD License.
#
Expand All @@ -53,7 +53,7 @@
from .util import __format_version__, __version__

__author__ = "Daniel McDonald"
__copyright__ = "Copyright 2011-2017, The BIOM Format Development Team"
__copyright__ = "Copyright 2011-2020, The BIOM Format Development Team"
__credits__ = ["Daniel McDonald", "Jai Ram Rideout", "Greg Caporaso",
"Jose Clemente", "Justin Kuczynski", "Antonio Gonzalez",
"Yoshiki Vazquez Baeza", "Jose Navas", "Adam Robbins-Pianka",
Expand Down
2 changes: 1 addition & 1 deletion biom/err.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
"""

# -----------------------------------------------------------------------------
# Copyright (c) 2011-2017, The BIOM Format Development Team.
# Copyright (c) 2011-2020, The BIOM Format Development Team.
#
# Distributed under the terms of the Modified BSD License.
#
Expand Down
4 changes: 2 additions & 2 deletions biom/exception.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@
"""Define BIOM exceptions"""

# -----------------------------------------------------------------------------
# Copyright (c) 2011-2017, The BIOM Format Development Team.
# Copyright (c) 2011-2020, The BIOM Format Development Team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file COPYING.txt, distributed with this software.
# -----------------------------------------------------------------------------

__author__ = "Daniel McDonald"
__copyright__ = "Copyright 2011-2017, The BIOM Format Development Team"
__copyright__ = "Copyright 2011-2020, The BIOM Format Development Team"
__credits__ = ["Daniel McDonald", "Jai Ram Rideout", "Greg Caporaso",
"Jose Clemente", "Justin Kuczynski"]
__license__ = "BSD"
Expand Down
4 changes: 2 additions & 2 deletions biom/parse.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python

# ----------------------------------------------------------------------------
# Copyright (c) 2011-2017, The BIOM Format Development Team.
# Copyright (c) 2011-2020, The BIOM Format Development Team.
#
# Distributed under the terms of the Modified BSD License.
#
Expand All @@ -23,7 +23,7 @@


__author__ = "Justin Kuczynski"
__copyright__ = "Copyright 2011-2017, The BIOM Format Development Team"
__copyright__ = "Copyright 2011-2020, The BIOM Format Development Team"
__credits__ = ["Justin Kuczynski", "Daniel McDonald", "Greg Caporaso",
"Jose Carlos Clemente Litran", "Adam Robbins-Pianka",
"Jose Antonio Navas Molina"]
Expand Down
6 changes: 4 additions & 2 deletions biom/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@
"""

# -----------------------------------------------------------------------------
# Copyright (c) 2011-2017, The BIOM Format Development Team.
# Copyright (c) 2011-2020, The BIOM Format Development Team.
#
# Distributed under the terms of the Modified BSD License.
#
Expand Down Expand Up @@ -212,7 +212,7 @@


__author__ = "Daniel McDonald"
__copyright__ = "Copyright 2011-2017, The BIOM Format Development Team"
__copyright__ = "Copyright 2011-2020, The BIOM Format Development Team"
__credits__ = ["Daniel McDonald", "Jai Ram Rideout", "Greg Caporaso",
"Jose Clemente", "Justin Kuczynski", "Adam Robbins-Pianka",
"Joshua Shorenstein", "Jose Antonio Navas Molina",
Expand Down Expand Up @@ -257,6 +257,8 @@ def _identify_bad_value(dtype, fields):


def general_parser(x):
if isinstance(x, bytes):
x = x.decode('utf8')
return x


Expand Down
15 changes: 14 additions & 1 deletion biom/tests/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
list_dict_to_sparse, dict_to_sparse,
coo_arrays_to_sparse, list_list_to_sparse,
nparray_to_sparse, list_sparse_to_sparse,
_identify_bad_value)
_identify_bad_value, general_parser)
from biom.parse import parse_biom_table
from biom.err import errstate

Expand Down Expand Up @@ -615,6 +615,19 @@ def test_max_whole(self):
obs = self.simple_derived.max('whole')
npt.assert_equal(obs, exp)

def test_general_parser(self):
test_and_exp = [(b'foo', 'foo'),
('foo', 'foo'),
(b'', ''),
('', ''),
(b'10', '10'),
('10', '10'),
(b'3.14', '3.14'),
('3.14', '3.14')]
for test, exp in test_and_exp:
obs = general_parser(test)
self.assertEqual(obs, exp)

@npt.dec.skipif(HAVE_H5PY is False, msg='H5PY is not installed')
def test_from_hdf5_non_hdf5_file_or_group(self):
with self.assertRaises(ValueError):
Expand Down
6 changes: 3 additions & 3 deletions biom/util.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# ----------------------------------------------------------------------------
# Copyright (c) 2011-2017, The BIOM Format Development Team.
# Copyright (c) 2011-2020, The BIOM Format Development Team.
#
# Distributed under the terms of the Modified BSD License.
#
Expand Down Expand Up @@ -37,15 +37,15 @@
from numpy import mean, median, min, max

__author__ = "Daniel McDonald"
__copyright__ = "Copyright 2011-2017, The BIOM Format Development Team"
__copyright__ = "Copyright 2011-2020, The BIOM Format Development Team"
__credits__ = ["Daniel McDonald", "Jai Ram Rideout", "Greg Caporaso",
"Jose Clemente", "Justin Kuczynski", "Jorge Cañardo Alastuey"]
__license__ = "BSD"
__url__ = "http://biom-format.org"
__maintainer__ = "Daniel McDonald"
__email__ = "daniel.mcdonald@colorado.edu"
__format_version__ = (2, 1)
__version__ = "2.1.9"
__version__ = "2.1.10"


def generate_subsamples(table, n, axis='sample', by_id=False):
Expand Down
4 changes: 2 additions & 2 deletions doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@
# built documents.
#
# The full version, including alpha/beta/rc tags.
version = "2.1.9"
release = "2.1.9"
version = "2.1.10"
release = "2.1.10"

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-

# ----------------------------------------------------------------------------
# Copyright (c) 2011-2017, The BIOM Format Development Team.
# Copyright (c) 2011-2020, The BIOM Format Development Team.
#
# Distributed under the terms of the Modified BSD License.
#
Expand All @@ -29,11 +29,11 @@
pass

__author__ = "Daniel McDonald"
__copyright__ = "Copyright 2011-2017, The BIOM Format Development Team"
__copyright__ = "Copyright 2011-2020, The BIOM Format Development Team"
__credits__ = ["Greg Caporaso", "Daniel McDonald", "Jose Clemente",
"Jai Ram Rideout", "Jorge Cañardo Alastuey", "Michael Hall"]
__license__ = "BSD"
__version__ = "2.1.9"
__version__ = "2.1.10"
__maintainer__ = "Daniel McDonald"
__email__ = "mcdonadt@colorado.edu"

Expand Down

0 comments on commit 0f470cd

Please sign in to comment.