Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: Support semicolon-separated lists in Factor Value fields #95

Merged
merged 15 commits into from
Jan 19, 2024
2 changes: 1 addition & 1 deletion altamisa/isatab/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ class FactorValue:
#: Factor name
name: str
#: Factor value
value: FreeTextOrTermRef
value: List[FreeTextOrTermRef]
#: Factor value unit
unit: FreeTextOrTermRef

Expand Down
3 changes: 2 additions & 1 deletion altamisa/isatab/parse_assay_study.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,8 @@ def build(self, line: List[str]) -> models.Material:
models.Comment(hdr.label, line[hdr.col_no]) for hdr in self.comment_headers
)
factor_values = tuple(
self._build_complex(hdr, line, models.FactorValue) for hdr in self.factor_value_headers
self._build_complex(hdr, line, models.FactorValue, allow_list=True)
for hdr in self.factor_value_headers
)
material_type = self._build_freetext_or_term_ref(self.material_type_header, line)
# Then, constructing ``Material`` is easy
Expand Down
2 changes: 1 addition & 1 deletion altamisa/isatab/validate_assay_study.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def has_content(value):
[any(has_content(v) for v in char.value) for char in material.characteristics]
)
any_comm = any([comm.value for comm in material.comments])
any_fact = any([fact.value for fact in material.factor_values])
any_fact = any([any(has_content(v) for v in fact.value) for fact in material.factor_values])
if not material.name and any(
(
any_char,
Expand Down
2 changes: 1 addition & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ Special Extensions
In addition to the original ISA-Tab format specifications, AltamISA supports
the following special modifications to improve specific use cases:

- **List of values** in ``Characterics`` or ``Parameter Value`` fields by using
- **List of values** in ``Characterics``, ``Parameter Value``, or ``Factor Value`` fields by using
semicolon-separators (";"). Note, for ontology terms the same number of
splits is expected in the associated field ``Term Source REF`` and
``Term Accession Number``.
Expand Down
12 changes: 6 additions & 6 deletions tests/data/i_small/s_small.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Source Name Characteristics[organism] Term Source REF Term Accession Number Characteristics[age] Unit Term Source REF Term Accession Number Protocol REF Parameter Value[instrument] Performer Date Sample Name Characteristics[status] Factor Value[treatment]
0814 Mus musculus;Homo sapiens NCBITAXON;NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON/10090;http://purl.bioontology.org/ontology/NCBITAXON/9606 90 day UO http://purl.obolibrary.org/obo/UO_0000033 sample collection scalpel John Doe 2018-02-02 0814-N1 0 yes
0815 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON/10090 90 day UO http://purl.obolibrary.org/obo/UO_0000033 sample collection scalpel John Doe 2018-02-02 0815-N1 0 yes
0815 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON/10090 90 day UO http://purl.obolibrary.org/obo/UO_0000033 sample collection scalpel type A;scalpel type B John Doe 2018-02-02 0815-T1 2
0816 Mus musculus day UO http://purl.obolibrary.org/obo/UO_0000033 sample collection scalpel John Doe 2018-02-02 0816-T1 1 yes
0817 150 day UO http://purl.obolibrary.org/obo/UO_0000033 sample collection scalpel John Doe 2018-02-02
Source Name Characteristics[organism] Term Source REF Term Accession Number Characteristics[age] Unit Term Source REF Term Accession Number Protocol REF Parameter Value[instrument] Performer Date Sample Name Characteristics[status] Factor Value[treatment] Term Source REF Term Accession Number
0814 Mus musculus;Homo sapiens NCBITAXON;NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON/10090;http://purl.bioontology.org/ontology/NCBITAXON/9606 90 day UO http://purl.obolibrary.org/obo/UO_0000033 sample collection scalpel John Doe 2018-02-02 0814-N1 0 vaccine;yes OBI; http://purl.obolibrary.org/obo/VO_0000001;
0815 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON/10090 90 day UO http://purl.obolibrary.org/obo/UO_0000033 sample collection scalpel John Doe 2018-02-02 0815-N1 0 yes
0815 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON/10090 90 day UO http://purl.obolibrary.org/obo/UO_0000033 sample collection scalpel type A;scalpel type B John Doe 2018-02-02 0815-T1 2
0816 Mus musculus day UO http://purl.obolibrary.org/obo/UO_0000033 sample collection scalpel John Doe 2018-02-02 0816-T1 1 yes
0817 150 day UO http://purl.obolibrary.org/obo/UO_0000033 sample collection scalpel John Doe 2018-02-02
78 changes: 69 additions & 9 deletions tests/test_parse_study.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def test_study_row_reader_small_study(small_investigation_file, small_study_file

# Create new row reader and check read headers (+ string representation)
row_reader = StudyRowReader.from_stream("S1", small_study_file)
assert 13 == len(row_reader.header)
assert 14 == len(row_reader.header)
rep0 = "ColumnHeader(column_type='Source Name', col_no=0, span=1)"
rep1 = "LabeledColumnHeader(column_type='Characteristics', col_no=1, span=1, label='organism')"
assert rep0 == repr(row_reader.header[0])
Expand Down Expand Up @@ -181,6 +181,8 @@ def test_study_row_reader_small_study(small_investigation_file, small_study_file
table_headers.SAMPLE_NAME,
table_headers.CHARACTERISTICS + "[status]",
table_headers.FACTOR_VALUE + "[treatment]",
table_headers.TERM_SOURCE_REF,
table_headers.TERM_ACCESSION_NUMBER,
]

unit = models.OntologyTermRef(
Expand Down Expand Up @@ -262,14 +264,30 @@ def test_study_row_reader_small_study(small_investigation_file, small_study_file
headers_collection,
)
assert expected == second_row[1]

factor_treatment2 = (
models.FactorValue(
name="treatment",
value=[models.OntologyTermRef("yes", None, None)],
unit=None,
),
)
factor_treatment3 = (
models.FactorValue(
name="treatment",
value=[models.OntologyTermRef(None, None, None)],
unit=None,
),
)

expected = models.Material(
"Sample Name",
"S1-sample-0815-N1",
"0815-N1",
None,
(models.Characteristics("status", ["0"], None),),
(),
(models.FactorValue("treatment", "yes", None),),
factor_treatment2,
None,
headers_sample,
)
Expand Down Expand Up @@ -310,7 +328,7 @@ def test_study_row_reader_small_study(small_investigation_file, small_study_file
None,
(models.Characteristics("status", ["2"], None),),
(),
(models.FactorValue("treatment", "", None),),
factor_treatment3,
None,
headers_sample,
)
Expand All @@ -329,15 +347,15 @@ def test_study_reader_small_study(small_investigation_file, small_study_file):

# Create new row reader and check read headers
reader = StudyReader.from_stream("S1", small_study_file)
assert 13 == len(reader.header)
assert 14 == len(reader.header)

# Read study
study = reader.read()
StudyValidator(investigation, investigation.studies[0], study).validate()

# Check results
assert os.path.normpath(str(study.file)).endswith(os.path.normpath("data/i_small/s_small.txt"))
assert 13 == len(study.header)
assert 14 == len(study.header)
assert 9 == len(study.materials)
assert 5 == len(study.processes)
assert 10 == len(study.arcs)
Expand All @@ -362,6 +380,8 @@ def test_study_reader_small_study(small_investigation_file, small_study_file):
table_headers.SAMPLE_NAME,
table_headers.CHARACTERISTICS + "[status]",
table_headers.FACTOR_VALUE + "[treatment]",
table_headers.TERM_SOURCE_REF,
table_headers.TERM_ACCESSION_NUMBER,
]

unit = models.OntologyTermRef(
Expand Down Expand Up @@ -394,6 +414,34 @@ def test_study_reader_small_study(small_investigation_file, small_study_file):
),
models.Characteristics(name="age", value=["150"], unit=unit),
)
factor_treatment1 = (
models.FactorValue(
name="treatment",
value=[
models.OntologyTermRef(
name="vaccine",
accession="http://purl.obolibrary.org/obo/VO_0000001",
ontology_name="OBI",
),
models.OntologyTermRef("yes", None, None),
],
unit=None,
),
)
factor_treatment2 = (
models.FactorValue(
name="treatment",
value=[models.OntologyTermRef("yes", None, None)],
unit=None,
),
)
factor_treatment3 = (
models.FactorValue(
name="treatment",
value=[models.OntologyTermRef(None, None, None)],
unit=None,
),
)

expected = models.Material(
"Source Name",
Expand Down Expand Up @@ -431,14 +479,26 @@ def test_study_reader_small_study(small_investigation_file, small_study_file):
headers_source,
)
assert expected == study.materials["S1-source-0817"]
expected = models.Material(
"Sample Name",
"S1-sample-0814-N1",
"0814-N1",
None,
(models.Characteristics("status", ["0"], None),),
(),
factor_treatment1,
None,
headers_sample,
)
assert expected == study.materials["S1-sample-0814-N1"]
expected = models.Material(
"Sample Name",
"S1-sample-0815-N1",
"0815-N1",
None,
(models.Characteristics("status", ["0"], None),),
(),
(models.FactorValue("treatment", "yes", None),),
factor_treatment2,
None,
headers_sample,
)
Expand All @@ -450,7 +510,7 @@ def test_study_reader_small_study(small_investigation_file, small_study_file):
None,
(models.Characteristics("status", ["2"], None),),
(),
(models.FactorValue("treatment", "", None),),
factor_treatment3,
None,
headers_sample,
)
Expand All @@ -462,7 +522,7 @@ def test_study_reader_small_study(small_investigation_file, small_study_file):
None,
(models.Characteristics("status", ["1"], None),),
(),
(models.FactorValue("treatment", "yes", None),),
factor_treatment2,
None,
headers_sample,
)
Expand All @@ -474,7 +534,7 @@ def test_study_reader_small_study(small_investigation_file, small_study_file):
None,
(models.Characteristics("status", [""], None),),
(),
(models.FactorValue("treatment", "", None),),
factor_treatment3,
None,
headers_sample,
)
Expand Down
Loading