Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: Support semicolon-separated lists in Factor Value fields #95

Merged
merged 15 commits into from
Jan 19, 2024
2 changes: 1 addition & 1 deletion altamisa/isatab/parse_assay_study.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ def build(self, line: List[str]) -> models.Material:
models.Comment(hdr.label, line[hdr.col_no]) for hdr in self.comment_headers
)
factor_values = tuple(
self._build_complex(hdr, line, models.build_factor_value)
self._build_complex(hdr, line, models.build_factor_value, allow_list=True)
for hdr in self.factor_value_headers
)
material_type = self._build_freetext_or_term_ref(self.material_type_header, line)
Expand Down
2 changes: 1 addition & 1 deletion altamisa/isatab/validate_assay_study.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def has_content(value):
[any(has_content(v) for v in char.value) for char in material.characteristics]
)
any_comm = any([comm.value for comm in material.comments])
any_fact = any([fact.value for fact in material.factor_values])
any_fact = any([any(has_content(v) for v in fact.value) for fact in material.factor_values])
if not material.name and any(
(
any_char,
Expand Down
2 changes: 1 addition & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ Special Extensions
In addition to the original ISA-Tab format specifications, AltamISA supports
the following special modifications to improve specific use cases:

- **List of values** in ``Characterics`` or ``Parameter Value`` fields by using
- **List of values** in ``Characterics``, ``Parameter Value``, or ``Factor Value`` fields by using
semicolon-separators (";"). Note, for ontology terms the same number of
splits is expected in the associated field ``Term Source REF`` and
``Term Accession Number``.
Expand Down
86 changes: 86 additions & 0 deletions tests/__snapshots__/test_parse_study.ambr
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# serializer version: 1
# name: test_study_reader_minimal_study
list([
'''
Investigation with only one study contains metadata:
ID: i_minimal
Title: Minimal Investigation
Path: i_minimal.txt
Submission Date:
Public Release Date: None
Prefer recording metadata in the study section.
''',
'''
Assay without platform:
Path: a_minimal.txt
Measurement Type: exome sequencing assay
Technology Type: nucleotide sequencing
Technology Platform:
''',
])
# ---
# name: test_study_reader_minimal_study_iostring
list([
'''
Investigation with only one study contains metadata:
ID: i_minimal
Title: Minimal Investigation
Path: <no file>
Submission Date:
Public Release Date: None
Prefer recording metadata in the study section.
''',
'''
Assay without platform:
Path: a_minimal.txt
Measurement Type: exome sequencing assay
Technology Type: nucleotide sequencing
Technology Platform:
''',
])
# ---
# name: test_study_reader_minimal_study_iostring2
list([
'''
Investigation with only one study contains metadata:
ID: i_minimal
Title: Minimal Investigation
Path: <no file>
Submission Date:
Public Release Date: None
Prefer recording metadata in the study section.
''',
'''
Assay without platform:
Path: a_minimal.txt
Measurement Type: exome sequencing assay
Technology Type: nucleotide sequencing
Technology Platform:
''',
])
# ---
# name: test_study_reader_small_study
list([
'''
Investigation with only one study contains metadata:
ID: i_small
Title: Small Investigation
Path: i_small.txt
Submission Date:
Public Release Date: None
Prefer recording metadata in the study section.
''',
'''
Assay without platform:
Path: a_small.txt
Measurement Type: exome sequencing assay
Technology Type: nucleotide sequencing
Technology Platform:
''',
])
# ---
# name: test_study_reader_small_study.1
list([
"Found annotated material/file without name: Material(type='Sample Name', unique_name='S1-Empty Sample Name-13-5', name='', extract_label=None, characteristics=(Characteristics(name='status', value=['1'], unit=None),), comments=(), factor_values=(FactorValue(name='treatment', value=[''], unit=None),), material_type=None, headers=['Sample Name', 'Characteristics[status]', 'Factor Value[treatment]'])",
])
# ---
3 changes: 2 additions & 1 deletion tests/data/i_small/s_small.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ Source Name Characteristics[organism] Term Source REF Term Accession Number Char
0815 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON/10090 90 day UO http://purl.obolibrary.org/obo/UO_0000033 sample collection scalpel John Doe 2018-02-02 0815-N1 0 yes
0815 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON/10090 90 day UO http://purl.obolibrary.org/obo/UO_0000033 sample collection scalpel type A;scalpel type B John Doe 2018-02-02 0815-T1 2
0816 Mus musculus day UO http://purl.obolibrary.org/obo/UO_0000033 sample collection scalpel John Doe 2018-02-02 0816-T1 1 yes
0817 150 day UO http://purl.obolibrary.org/obo/UO_0000033 sample collection scalpel John Doe 2018-02-02
0817 150 day UO http://purl.obolibrary.org/obo/UO_0000033 sample collection scalpel John Doe 2018-02-02 1
0818 150 day UO http://purl.obolibrary.org/obo/UO_0000033 sample collection scalpel John Doe 2018-02-02
51 changes: 37 additions & 14 deletions tests/test_parse_study.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import os

import pytest
from syrupy.assertion import SnapshotAssertion

from altamisa.constants import table_headers
from altamisa.exceptions import IsaWarning
Expand Down Expand Up @@ -69,7 +70,9 @@ def test_study_row_reader_minimal_study(minimal_investigation_file, minimal_stud
assert expected == first_row[2]


def test_study_reader_minimal_study(minimal_investigation_file, minimal_study_file):
def test_study_reader_minimal_study(
minimal_investigation_file, minimal_study_file, snapshot: SnapshotAssertion
):
"""Use ``StudyReader`` to read in minimal study file.

Using the ``StudyReader`` instead of the ``StudyRowReader`` gives us
Expand All @@ -81,7 +84,7 @@ def test_study_reader_minimal_study(minimal_investigation_file, minimal_study_fi
InvestigationValidator(investigation).validate()

# Check warnings
assert 2 == len(record)
assert snapshot == [str(r.message) for r in record]

# Create new row reader and check read headers
reader = StudyReader.from_stream("S1", minimal_study_file)
Expand Down Expand Up @@ -155,7 +158,7 @@ def test_study_row_reader_small_study(small_investigation_file, small_study_file
rows = list(row_reader.read())

# Check results
assert 5 == len(rows)
assert 6 == len(rows)
first_row = rows[0]
second_row = rows[1]
third_row = rows[2]
Expand Down Expand Up @@ -318,15 +321,17 @@ def test_study_row_reader_small_study(small_investigation_file, small_study_file
assert expected == third_row[2]


def test_study_reader_small_study(small_investigation_file, small_study_file):
def test_study_reader_small_study(
small_investigation_file, small_study_file, snapshot: SnapshotAssertion
):
"""Use ``StudyReader`` to read in small study file."""
# Load investigation (tested elsewhere)
with pytest.warns(IsaWarning) as record:
investigation = InvestigationReader.from_stream(small_investigation_file).read()
InvestigationValidator(investigation).validate()

# Check warnings
assert 2 == len(record)
assert snapshot == [str(r.message) for r in record]

# Create new row reader and check read headers
reader = StudyReader.from_stream("S1", small_study_file)
Expand All @@ -337,14 +342,14 @@ def test_study_reader_small_study(small_investigation_file, small_study_file):
with pytest.warns(IsaWarning) as record:
StudyValidator(investigation, investigation.studies[0], study).validate()
# Check warnings
assert 1 == len(record)
assert snapshot == [str(r.message) for r in record]

# Check results
assert os.path.normpath(str(study.file)).endswith(os.path.normpath("data/i_small/s_small.txt"))
assert 13 == len(study.header)
assert 9 == len(study.materials)
assert 5 == len(study.processes)
assert 10 == len(study.arcs)
assert 11 == len(study.materials)
assert 6 == len(study.processes)
assert 12 == len(study.arcs)

headers_source = [
table_headers.SOURCE_NAME,
Expand Down Expand Up @@ -476,13 +481,25 @@ def test_study_reader_small_study(small_investigation_file, small_study_file):
"S1-Empty Sample Name-13-5",
"",
None,
(models.Characteristics("status", [""], None),),
(models.Characteristics("status", ["1"], None),),
(),
(models.FactorValue("treatment", [""], None),),
None,
headers_sample,
)
assert expected == study.materials["S1-Empty Sample Name-13-5"]
expected = models.Material(
"Sample Name",
"S1-Empty Sample Name-13-6",
"",
None,
(models.Characteristics("status", [""], None),),
(),
(models.FactorValue("treatment", [""], None),),
None,
headers_sample,
)
assert expected == study.materials["S1-Empty Sample Name-13-6"]

expected = models.Process(
"sample collection",
Expand Down Expand Up @@ -541,19 +558,23 @@ def test_study_reader_small_study(small_investigation_file, small_study_file):
models.Arc("S1-sample collection-9-4", "S1-sample-0816-T1"),
models.Arc("S1-source-0817", "S1-sample collection-9-5"),
models.Arc("S1-sample collection-9-5", "S1-Empty Sample Name-13-5"),
models.Arc("S1-source-0818", "S1-sample collection-9-6"),
models.Arc("S1-sample collection-9-6", "S1-Empty Sample Name-13-6"),
)
assert expected == study.arcs


def test_study_reader_minimal_study_iostring(minimal_investigation_file, minimal_study_file):
def test_study_reader_minimal_study_iostring(
minimal_investigation_file, minimal_study_file, snapshot: SnapshotAssertion
):
# Load investigation (tested elsewhere)
stringio = io.StringIO(minimal_investigation_file.read())
investigation = InvestigationReader.from_stream(stringio).read()
with pytest.warns(IsaWarning) as record:
InvestigationValidator(investigation).validate()

# Check warnings
assert 2 == len(record)
assert snapshot == [str(r.message) for r in record]

# Create new study reader and read from StringIO with original filename indicated
stringio = io.StringIO(minimal_study_file.read())
Expand All @@ -574,15 +595,17 @@ def test_study_reader_minimal_study_iostring(minimal_investigation_file, minimal
assert 2 == len(study.arcs)


def test_study_reader_minimal_study_iostring2(minimal_investigation_file, minimal_study_file):
def test_study_reader_minimal_study_iostring2(
minimal_investigation_file, minimal_study_file, snapshot: SnapshotAssertion
):
# Load investigation (tested elsewhere)
stringio = io.StringIO(minimal_investigation_file.read())
investigation = InvestigationReader.from_stream(stringio).read()
with pytest.warns(IsaWarning) as record:
InvestigationValidator(investigation).validate()

# Check warnings
assert 2 == len(record)
assert snapshot == [str(r.message) for r in record]

# Create new study reader and read from StringIO with no filename indicated
stringio = io.StringIO(minimal_study_file.read())
Expand Down
Loading