From a47427fb0645bc5ecf1c712b3df3a4c963556fb7 Mon Sep 17 00:00:00 2001 From: Joe Flack Date: Sun, 5 May 2024 00:27:00 -0400 Subject: [PATCH] FHIR Feature - Add: _test_to_fhir_json() - Update: test_write_sssom_fhir(): More assertions - Update: Fixed some typos - Update: model mappings: exporter: title --> title (was previously name --> title) - Update: Drepecated these functions instead of deleting: write_fhir_json(), write_ontoportal_json() --- src/sssom/writers.py | 139 +++++++++++++++++++++++++-------------- tests/test_conversion.py | 10 +++ tests/test_writers.py | 35 +++++++++- 3 files changed, 131 insertions(+), 53 deletions(-) diff --git a/src/sssom/writers.py b/src/sssom/writers.py index 6486b49d..37293497 100644 --- a/src/sssom/writers.py +++ b/src/sssom/writers.py @@ -8,6 +8,7 @@ import pandas as pd import yaml from curies import Converter +from deprecation import deprecated from jsonasobj2 import JsonObj from linkml_runtime.dumpers import JSONDumper, rdflib_dumper from linkml_runtime.utils.schemaview import SchemaView @@ -100,7 +101,13 @@ def write_rdf( def write_json(msdf: MappingSetDataFrame, output: TextIO, serialisation="json") -> None: - """Write a mapping set dataframe to the file as JSON.""" + """Write a mapping set dataframe to the file as JSON. + + :param serialisation: The JSON format to use. Supported formats are: + - fhir_json: https://mapping-commons.github.io/sssom-py/sssom.html#sssom.writers.to_fhir_json + - json: https://mapping-commons.github.io/sssom-py/sssom.html#sssom.writers.to_json + - ontoportal_json: https://mapping-commons.github.io/sssom-py/sssom.html#sssom.writers.to_ontoportal_json + """ func_map: Dict[str, Callable] = { "fhir_json": to_fhir_json, "json": to_json, @@ -115,6 +122,28 @@ def write_json(msdf: MappingSetDataFrame, output: TextIO, serialisation="json") json.dump(data, output, indent=2) +@deprecated(deprecated_in="0.4.7", details="Use write_json() instead") +def write_fhir_json(msdf: MappingSetDataFrame, output: TextIO, serialisation="fhir_json") -> None: + """Write a mapping set dataframe to the file as FHIR ConceptMap JSON.""" + if serialisation != "fhir_json": + raise ValueError( + f"Unknown json format: {serialisation}, currently only fhir_json supported" + ) + write_json(msdf, output, serialisation="fhir_json") + + +@deprecated(deprecated_in="0.4.7", details="Use write_json() instead") +def write_ontoportal_json( + msdf: MappingSetDataFrame, output: TextIO, serialisation: str = "ontoportal_json" +) -> None: + """Write a mapping set dataframe to the file as the ontoportal mapping JSON model.""" + if serialisation != "ontoportal_json": + raise ValueError( + f"Unknown json format: {serialisation}, currently only ontoportal_json supported" + ) + write_json(msdf, output, serialisation="ontoportal_json") + + def write_owl( msdf: MappingSetDataFrame, file: TextIO, @@ -259,18 +288,30 @@ def to_fhir_json(msdf: MappingSetDataFrame) -> Dict: :return: Dict: A Dictionary serializable as JSON. Resources: - - ConcpetMap::SSSOM mapping spreadsheet: https://docs.google.com/spreadsheets/d/1J19foBAYO8PCHwOfksaIGjNu-q5ILUKFh2HpOCgYle0/edit#gid=1389897118 + - ConceptMap::SSSOM mapping spreadsheet: + https://docs.google.com/spreadsheets/d/1J19foBAYO8PCHwOfksaIGjNu-q5ILUKFh2HpOCgYle0/edit#gid=1389897118 TODO: add to CLI & to these functions: r4 vs r5 param TODO: What if the msdf doesn't have everything we need? (i) metadata, e.g. yml, (ii) what if we need to override? - - todo: later: allow any nested aribtrary override: (get in kwargs, else metadata.get(key, None)) + - todo: later: allow any nested arbitrary override: (get in kwargs, else metadata.get(key, None)) Minor todos - todo: `mapping_justification` consider `ValueString` -> `ValueCoding` https://github.com/timsbiomed/issues/issues/152 + todo: mapping_justification: consider `ValueString` -> `ValueCoding` https://github.com/timsbiomed/issues/issues/152 todo: when/how to conform to R5 instead of R4?: https://build.fhir.org/conceptmap.html """ # Constants df: pd.DataFrame = msdf.df + # TODO: R4 (try this first) + # relatedto | equivalent | equal | wider | subsumes | narrower | specializes | inexact | unmatched | disjoint + # https://www.hl7.org/fhir/r4/conceptmap.html + # todo: r4: if not found, should likely be `null` or something. check docs to see if nullable, else ask on Zulip + # TODO: R5 Needs to be one of: + # related-to | equivalent | source-is-narrower-than-target | source-is-broader-than-target | not-related-to + # https://www.hl7.org/fhir/r4/valueset-concept-map-equivalence.html + # ill update that next time. i can map SSSOM SKOS/etc mappings to FHIR ones + # and then add the original SSSOM mapping CURIE fields somewhere else + # https://www.hl7.org/fhir/valueset-concept-map-equivalence.html + # https://github.com/mapping-commons/sssom-py/issues/258 equivalence_map = { # relateedto: The concepts are related to each other, and have at least some overlap in meaning, but the exact # relationship is not known. @@ -315,9 +356,10 @@ def to_fhir_json(msdf: MappingSetDataFrame) -> Dict: name: str = mapping_set_id.split("/")[-1].replace(".sssom.tsv", "") # Construct JSON - json_obj = { + json_obj: Dict[str, Any] = { "resourceType": "ConceptMap", "url": mapping_set_id, + # Assumes mapping_set_id is a URI w/ artefact name at end. System becomes URI stem, value becomes artefact name "identifier": [ { "system": "/".join(mapping_set_id.split("/")[:-1]) + "/", @@ -326,7 +368,6 @@ def to_fhir_json(msdf: MappingSetDataFrame) -> Dict: ], "version": metadata.get("mapping_set_version", ""), "name": name, - "title": name, # TODO -> mapping_set_description? "status": "draft", # todo: when done: draft | active | retired | unknown "experimental": True, # todo: False when converter finished # todo: should this be date of last converted to FHIR json instead? @@ -353,53 +394,11 @@ def to_fhir_json(msdf: MappingSetDataFrame) -> Dict: # }], # "purpose": "", # todo: conceptmap "copyright": metadata.get("license", ""), - "sourceUri": metadata.get("subject_source", ""), - "targetUri": metadata.get("object_source", ""), # TODO: Might want to make each "group" first, if there is more than 1 set of ontology1::ontology2 # ...within a given MappingSet / set of SSSOM TSV rows. "group": [ { - # TODO: Override? but how? - "source": metadata.get("subject_source", ""), - "target": metadata.get("object_source", ""), - "element": [ - { - "code": row["subject_id"], - "display": row.get("subject_label", ""), - "target": [ - { - "code": row["object_id"], - "display": row.get("object_label", ""), - # TODO: R4 (try this first) - # relatedto | equivalent | equal | wider | subsumes | narrower | specializes | inexact | unmatched | disjoint - # https://www.hl7.org/fhir/r4/conceptmap.html - # todo: r4: if not found, eventually needs to be `null` or something. check docs to see if nullable, else ask on Zulip - # TODO: R5 Needs to be one of: - # related-to | equivalent | source-is-narrower-than-target | source-is-broader-than-target | not-related-to - # https://www.hl7.org/fhir/r4/valueset-concept-map-equivalence.html - # ill update that next time. i can map SSSOM SKOS/etc mappings to FHIR ones - # and then add the original SSSOM mapping CURIE fields somewhere else - # https://www.hl7.org/fhir/valueset-concept-map-equivalence.html - # https://github.com/mapping-commons/sssom-py/issues/258 - "equivalence": equivalence_map.get( - row["predicate_id"], row["predicate_id"] - ), # r4 - # "relationship": row['predicate_id'], # r5 - # "comment": '', - "extension": [ - { - "url": "http://example.org/fhir/StructureDefinition/mapping_justification", - "valueString": row.get( - "mapping_justification", - row.get("mapping_justification", ""), - ), - } - ], - } - ], - } - for i, row in df.iterrows() - ], + "element": [] # "unmapped": { # todo: conceptmap # "mode": "fixed", # "code": "temp", @@ -408,9 +407,49 @@ def to_fhir_json(msdf: MappingSetDataFrame) -> Dict: } ], } + if "mapping_set_title" in metadata: + json_obj["title"] = metadata["mapping_set_title"] + + # todo: Override? but how? (2024/04/05 Joe: idr what I was trying to override) + if "subject_source" in metadata: + json_obj["sourceUri"] = metadata["subject_source"] + json_obj["group"][0]["source"] = metadata["subject_source"] + if "object_source" in metadata: + json_obj["targetUri"] = metadata["object_source"] + json_obj["group"][0]["target"] = metadata["object_source"] + + for _i, row in df.iterrows(): + entry = { + "code": row["subject_id"], + "display": row.get("subject_label", ""), # todo: if empty, don't add this key + "target": [ + { + "code": row["object_id"], + "display": row.get("object_label", ""), # todo: if empty, don't add this key + "equivalence": equivalence_map.get( + row["predicate_id"], row["predicate_id"] + ), # r4 + # "relationship": row['predicate_id'], # r5 + # "comment": '', + "extension": [ + { + "url": "http://example.org/fhir/StructureDefinition/mapping_justification", + "valueString": row.get( + "mapping_justification", + row.get( + "mapping_justification", "" + ), # todo: if empty, don't add this key + ), + } + ], + } + ], + } + json_obj["group"][0]["element"].append(entry) # Delete empty fields - # todo: This should be recursive? + # todo: This should be recursive? yes + # - it catches empty 'sourceUri' and 'targetUri', but not 'source' and 'target' keys_to_delete: List[str] = [] for k, v in json_obj.items(): if v in [ diff --git a/tests/test_conversion.py b/tests/test_conversion.py index faeae644..c4feccd1 100644 --- a/tests/test_conversion.py +++ b/tests/test_conversion.py @@ -15,6 +15,7 @@ from sssom.sssom_document import MappingSetDocument from sssom.util import MappingSetDataFrame, to_mapping_set_dataframe from sssom.writers import ( + to_fhir_json, to_json, to_ontoportal_json, to_owl_graph, @@ -62,6 +63,8 @@ def test_conversion(self): self._test_to_json(mdoc, test) logging.info("Testing ontoportal JSON export") self._test_to_ontoportal_json(mdoc, test) + logging.info("Testing fhir_json JSON export") + self._test_to_fhir_json(mdoc, test) def _test_to_owl_graph(self, mdoc, test): msdf = to_mapping_set_dataframe(mdoc) @@ -85,6 +88,13 @@ def _test_to_json(self, mdoc, test: SSSOMTestCase): with open(test.get_out_file("json"), "w") as file: write_json(msdf, file, serialisation="json") + def _test_to_fhir_json(self, mdoc, test: SSSOMTestCase): + msdf = to_mapping_set_dataframe(mdoc) + d = to_fhir_json(msdf) + self.assertEqual( + len(d["group"][0]["element"]), test.ct_data_frame_rows, "wrong number of mappings." + ) + def _test_to_ontoportal_json(self, mdoc, test: SSSOMTestCase): msdf = to_mapping_set_dataframe(mdoc) jsonob = to_ontoportal_json(msdf) diff --git a/tests/test_writers.py b/tests/test_writers.py index 4d94110d..ad27e565 100644 --- a/tests/test_writers.py +++ b/tests/test_writers.py @@ -3,6 +3,7 @@ import json import os import unittest +from typing import Any, Dict import pandas as pd from curies import Converter @@ -128,19 +129,47 @@ def test_update_sssom_context_with_prefixmap(self): def test_write_sssom_fhir(self): """Test writing as FHIR ConceptMap JSON.""" + # Vars path = os.path.join(test_out_dir, "test_write_sssom_fhir.json") + msdf: MappingSetDataFrame = self.msdf + metadata: Dict[str, Any] = msdf.metadata + mapping_set_id: str = metadata["mapping_set_id"] + + # Write with open(path, "w") as file: write_json(self.msdf, file, "fhir_json") - # todo: @Joe: after implementing reader/importer, change this to `msdf = parse_sssom_fhir_json()` + # Read + # todo: after implementing reader/importer, change this to `msdf = parse_sssom_fhir_json()` with open(path, "r") as file: d = json.load(file) - # todo: @Joe: What else is worth checking? + # Test + # - metadata + self.assertEqual(d["resourceType"], "ConceptMap") + self.assertIn(d["identifier"][0]["system"], mapping_set_id) + self.assertEqual(len(d["identifier"]), 1) + self.assertEqual( + len({d["identifier"][0]["value"], mapping_set_id, d["url"]}), 1 + ) # assert all same + # todo: if/when more test cases, shan't be just 'basic.tsv' + self.assertEqual(d["name"], "basic.tsv") + # self.assertEqual(d["title"], "todo") # missing from basic.tsv + self.assertEqual(d["status"], "draft") + self.assertEqual(d["experimental"], True) + self.assertEqual(len(d["date"]), len("YYYY-MM-DD")) + self.assertEqual(d["copyright"], "https://creativecommons.org/publicdomain/zero/1.0/") + # - n mappings self.assertEqual( len(d["group"][0]["element"]), self.mapping_count, f"{path} has the wrong number of mappings.", ) - print() # TODO: temp + # - more + self.assertEqual(len(d["group"]), 1) + # todo: code + # todo: display + # todo: equivalence + # - I'm getting: subsumes, owl:equivalentClass (and see what else in basic.tsv) + # todo: mapping_justification extensionprint() # TODO: temp def test_write_sssom_owl(self): """Test writing as OWL."""