From 79d8716b16cf1bb7bf75c3f4f4c5270060aa4ba9 Mon Sep 17 00:00:00 2001 From: Joe Flack Date: Sat, 13 Jan 2024 18:37:40 -0500 Subject: [PATCH] Extended prefix maps usage updates - Bugfix: clean_prefix_map(): Was ignoring prefix aliases. This was causing an error, as there was a perceived mismatch between the prefixes of the mapping set, and the prefix_map. - Bugfix: get_metadata_and_prefix_map(): Was not utilizing extended prefix maps. This manifested in issue where prefix aliases were not incorporated. This meant that (a) if we tried to fix by removing the alias from the plain prefix_map, these CURIEs could not be resolved, (b) if we included the alias in the plain prefix_map, there would be a duplicate URI prefix, which would result in an error. - Add: convert_plain_prefix_map_to_extended() --- src/sssom/io.py | 21 ++++++++++++++++++++- src/sssom/util.py | 10 +++++++++- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/sssom/io.py b/src/sssom/io.py index 8f4c947e..9b7882b8 100644 --- a/src/sssom/io.py +++ b/src/sssom/io.py @@ -130,6 +130,23 @@ def split_file(input_path: str, output_directory: Union[str, Path]) -> None: write_tables(splitted, output_directory) +def convert_plain_prefix_map_to_extended(prefix_map): + """Convert a standard key/val previx map to extended prefix map format""" + by_uri_prefix = {} + for prefix, uri_prefix in prefix_map.items(): + if uri_prefix in by_uri_prefix: + by_uri_prefix[uri_prefix]["prefix_synonyms"].append(prefix) + continue + by_uri_prefix[uri_prefix] = { + "prefix": prefix, + "prefix_synonyms": [], + "uri_prefix": uri_prefix, + "uri_prefix_synonyms": [], + } + epm = list(by_uri_prefix.values()) + return epm + + def get_metadata_and_prefix_map( metadata_path: Union[None, str, Path] = None, *, prefix_map_mode: Optional[MergeMode] = None ) -> Tuple[Converter, MetadataType]: @@ -147,7 +164,9 @@ def get_metadata_and_prefix_map( metadata = yaml.safe_load(file) metadata = dict(ChainMap(metadata, get_default_metadata())) - converter = Converter.from_prefix_map(metadata.pop(CURIE_MAP, {})) + prefix_map = metadata.pop(CURIE_MAP, {}) + epm = convert_plain_prefix_map_to_extended(prefix_map) + converter = Converter.from_extended_prefix_map(epm) converter = _merge_converter(converter, prefix_map_mode=prefix_map_mode) return converter, metadata diff --git a/src/sssom/util.py b/src/sssom/util.py index 64db60ed..8d917105 100644 --- a/src/sssom/util.py +++ b/src/sssom/util.py @@ -246,7 +246,15 @@ def clean_prefix_map(self, strict: bool = True) -> None: if self.metadata: prefixes_in_table.update(get_prefixes_used_in_metadata(self.metadata)) - missing_prefixes = prefixes_in_table - self.converter.get_prefixes() + prefixes = {record.prefix for record in self.converter.records} + aliases = { + p + for p in itt.chain(*[record.prefix_synonyms for record in self.converter.records]) + if p + } + converter_prefixes = prefixes | aliases + missing_prefixes = prefixes_in_table - converter_prefixes + if missing_prefixes and strict: raise ValueError( f"{missing_prefixes} are used in the SSSOM mapping set but it does not exist in the prefix map"