From 9497e7030b1a9fe32acf8d93e215447401e851fa Mon Sep 17 00:00:00 2001 From: Thomas Sell Date: Fri, 3 Nov 2023 17:31:53 +0100 Subject: [PATCH 1/4] feat: More useful inconsistent entry length errors --- altamisa/isatab/parse_investigation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/altamisa/isatab/parse_investigation.py b/altamisa/isatab/parse_investigation.py index 5ac8aff..284cad2 100644 --- a/altamisa/isatab/parse_investigation.py +++ b/altamisa/isatab/parse_investigation.py @@ -193,8 +193,8 @@ def _read_multi_column_section(self, prefix, ref_keys, section_name): msg = tpl.format(section_name, list(sorted(section))) raise ParseIsatabException(msg) # TODO: should be warning? if not len(set([len(v) for v in section.values()])) == 1: # pragma: no cover - tpl = "Inconsistent entry lengths in section {}" - msg = tpl.format(section_name) + lengths = "\n".join(map(str, [f"{key}: {len(value)}" for key, value in section.items()])) + msg = f"Inconsistent entry lengths in section {section_name}:\n{lengths}" raise ParseIsatabException(msg) return section, comment_keys From 6644590e24592b45908cc97eb9f0313a00033835 Mon Sep 17 00:00:00 2001 From: Thomas Sell Date: Fri, 3 Nov 2023 17:54:38 +0100 Subject: [PATCH 2/4] more useful errors for parameter/component splits --- altamisa/isatab/parse_investigation.py | 29 ++++++++++++++++++-------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/altamisa/isatab/parse_investigation.py b/altamisa/isatab/parse_investigation.py index 284cad2..46049e4 100644 --- a/altamisa/isatab/parse_investigation.py +++ b/altamisa/isatab/parse_investigation.py @@ -43,14 +43,18 @@ def _parse_comment_header(val): # Helper function to extract protocol parameters def _split_study_protocols_parameters( - names, name_term_accs, name_term_srcs + protocol_name, names, name_term_accs, name_term_srcs ) -> Iterator[models.FreeTextOrTermRef]: names = names.split(";") name_term_accs = name_term_accs.split(";") name_term_srcs = name_term_srcs.split(";") if not (len(names) == len(name_term_accs) == len(name_term_srcs)): # pragma: no cover - tpl = 'Unequal protocol parameter splits; found: "{}", "{}", "{}"' - msg = tpl.format(names, name_term_accs, name_term_srcs) + msg = ( + f"Unequal parameter splits in protocol '{protocol_name}':\n" + f"Parameter Names: {len(names)}\n" + f"Term Accession Numers: {len(name_term_accs)}\n" + f"Term Source REFs: {len(name_term_srcs)}" + ) raise ParseIsatabException(msg) if len(names) > len(set(names)): # pragma: no cover tpl = "Repeated protocol parameter; found: {}" @@ -63,7 +67,7 @@ def _split_study_protocols_parameters( # Helper function to extract protocol components def _split_study_protocols_components( - names, types, type_term_accs, type_term_srcs + protocol_name, names, types, type_term_accs, type_term_srcs ) -> Iterator[models.ProtocolComponentInfo]: names = names.split(";") types = types.split(";") @@ -72,8 +76,13 @@ def _split_study_protocols_components( if not ( len(names) == len(types) == len(type_term_accs) == len(type_term_srcs) ): # pragma: no cover - tpl = "Unequal protocol component splits; " 'found: "{}", "{}", "{}", "{}"' - msg = tpl.format(names, types, type_term_accs, type_term_srcs) + msg = ( + f"Unequal component splits in protocol '{protocol_name}':\n" + f"Components Names: {len(names)}\n" + f"Components Types: {len(types)}\n" + f"Type Term Accession Numers: {len(type_term_accs)}\n" + f"Type Term Source REFs: {len(type_term_srcs)}" + ) raise ParseIsatabException(msg) if len(names) > len(set(names)): # pragma: no cover tpl = "Repeated protocol components; found: {}" @@ -193,7 +202,9 @@ def _read_multi_column_section(self, prefix, ref_keys, section_name): msg = tpl.format(section_name, list(sorted(section))) raise ParseIsatabException(msg) # TODO: should be warning? if not len(set([len(v) for v in section.values()])) == 1: # pragma: no cover - lengths = "\n".join(map(str, [f"{key}: {len(value)}" for key, value in section.items()])) + lengths = "\n".join( + map(str, [f"{key}: {len(value)}" for key, value in section.items()]) + ) msg = f"Inconsistent entry lengths in section {section_name}:\n{lengths}" raise ParseIsatabException(msg) return section, comment_keys @@ -556,13 +567,13 @@ def _read_study_protocols(self) -> Iterator[models.ProtocolInfo]: paras = { p.name if hasattr(p, "name") else p: p for p in _split_study_protocols_parameters( - para_names, para_name_term_accs, para_name_term_srcs + name, para_names, para_name_term_accs, para_name_term_srcs ) } comps = { c.name: c for c in _split_study_protocols_components( - comp_names, comp_types, comp_type_term_accs, comp_type_term_srcs + name, comp_names, comp_types, comp_type_term_accs, comp_type_term_srcs ) } comments = _parse_comments(section, comment_keys, i) From 0652e22bb69fb4a9f2610ecaa118a7c163fc5aef Mon Sep 17 00:00:00 2001 From: Thomas Sell Date: Tue, 16 Jan 2024 14:20:33 +0100 Subject: [PATCH 3/4] added type hints for touched functions --- altamisa/isatab/parse_investigation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/altamisa/isatab/parse_investigation.py b/altamisa/isatab/parse_investigation.py index 0df4cf9..9ad8102 100644 --- a/altamisa/isatab/parse_investigation.py +++ b/altamisa/isatab/parse_investigation.py @@ -42,7 +42,7 @@ def _parse_comment_header(val): # Helper function to extract protocol parameters def _split_study_protocols_parameters( - protocol_name, names, name_term_accs, name_term_srcs + protocol_name: str, names: str, name_term_accs: str, name_term_srcs: str ) -> Iterator[models.FreeTextOrTermRef]: names = names.split(";") name_term_accs = name_term_accs.split(";") @@ -66,7 +66,7 @@ def _split_study_protocols_parameters( # Helper function to extract protocol components def _split_study_protocols_components( - protocol_name, names, types, type_term_accs, type_term_srcs + protocol_name: str, names: str, types: str, type_term_accs: str, type_term_srcs: str ) -> Iterator[models.ProtocolComponentInfo]: names = names.split(";") types = types.split(";") From efb1156cfc90b01659407c3574745160e294a745 Mon Sep 17 00:00:00 2001 From: Thomas Sell Date: Tue, 16 Jan 2024 14:32:02 +0100 Subject: [PATCH 4/4] conform to declared types --- altamisa/isatab/parse_investigation.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/altamisa/isatab/parse_investigation.py b/altamisa/isatab/parse_investigation.py index 9ad8102..e7a95fc 100644 --- a/altamisa/isatab/parse_investigation.py +++ b/altamisa/isatab/parse_investigation.py @@ -42,11 +42,11 @@ def _parse_comment_header(val): # Helper function to extract protocol parameters def _split_study_protocols_parameters( - protocol_name: str, names: str, name_term_accs: str, name_term_srcs: str + protocol_name: str, names_str: str, name_term_accs_str: str, name_term_srcs_str: str ) -> Iterator[models.FreeTextOrTermRef]: - names = names.split(";") - name_term_accs = name_term_accs.split(";") - name_term_srcs = name_term_srcs.split(";") + names = names_str.split(";") + name_term_accs = name_term_accs_str.split(";") + name_term_srcs = name_term_srcs_str.split(";") if not (len(names) == len(name_term_accs) == len(name_term_srcs)): # pragma: no cover msg = ( f"Unequal parameter splits in protocol '{protocol_name}':\n" @@ -66,12 +66,16 @@ def _split_study_protocols_parameters( # Helper function to extract protocol components def _split_study_protocols_components( - protocol_name: str, names: str, types: str, type_term_accs: str, type_term_srcs: str + protocol_name: str, + names_str: str, + types_str: str, + type_term_accs_str: str, + type_term_srcs_str: str, ) -> Iterator[models.ProtocolComponentInfo]: - names = names.split(";") - types = types.split(";") - type_term_accs = type_term_accs.split(";") - type_term_srcs = type_term_srcs.split(";") + names = names_str.split(";") + types = types_str.split(";") + type_term_accs = type_term_accs_str.split(";") + type_term_srcs = type_term_srcs_str.split(";") if not ( len(names) == len(types) == len(type_term_accs) == len(type_term_srcs) ): # pragma: no cover