Skip to content

Commit

Permalink
FHIR Feature
Browse files Browse the repository at this point in the history
- Add: _test_to_fhir_json()
- Update: test_write_sssom_fhir(): More assertions
- Update: Fixed some typos
- Update: model mappings: exporter: title --> title (was previously name --> title)
- Update: Drepecated these functions instead of deleting: write_fhir_json(), write_ontoportal_json()
  • Loading branch information
joeflack4 committed May 13, 2024
1 parent ea2a407 commit 591dc75
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 54 deletions.
142 changes: 91 additions & 51 deletions src/sssom/writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import pandas as pd
import yaml
from curies import Converter
from deprecation import deprecated
from jsonasobj2 import JsonObj
from linkml_runtime.dumpers import JSONDumper, rdflib_dumper
from linkml_runtime.utils.schemaview import SchemaView
Expand Down Expand Up @@ -100,7 +101,15 @@ def write_rdf(


def write_json(msdf: MappingSetDataFrame, output: TextIO, serialisation="json") -> None:
"""Write a mapping set dataframe to the file as JSON."""
"""Write a mapping set dataframe to the file as JSON.
:param serialisation: The JSON format to use. Supported formats are:
- fhir_json: Outputs JSON in FHIR ConceptMap format (https://fhir-ru.github.io/conceptmap.html)
https://mapping-commons.github.io/sssom-py/sssom.html#sssom.writers.to_fhir_json
- json: Outputs to SSSOM JSON https://mapping-commons.github.io/sssom-py/sssom.html#sssom.writers.to_json
- ontoportal_json: Outputs JSON in Ontoportal format (https://ontoportal.org/)
https://mapping-commons.github.io/sssom-py/sssom.html#sssom.writers.to_ontoportal_json
"""
func_map: Dict[str, Callable] = {
"fhir_json": to_fhir_json,
"json": to_json,
Expand All @@ -115,6 +124,28 @@ def write_json(msdf: MappingSetDataFrame, output: TextIO, serialisation="json")
json.dump(data, output, indent=2)


@deprecated(deprecated_in="0.4.7", details="Use write_json() instead")
def write_fhir_json(msdf: MappingSetDataFrame, output: TextIO, serialisation="fhir_json") -> None:
"""Write a mapping set dataframe to the file as FHIR ConceptMap JSON."""
if serialisation != "fhir_json":
raise ValueError(
f"Unknown json format: {serialisation}, currently only fhir_json supported"
)
write_json(msdf, output, serialisation="fhir_json")


@deprecated(deprecated_in="0.4.7", details="Use write_json() instead")
def write_ontoportal_json(
msdf: MappingSetDataFrame, output: TextIO, serialisation: str = "ontoportal_json"
) -> None:
"""Write a mapping set dataframe to the file as the ontoportal mapping JSON model."""
if serialisation != "ontoportal_json":
raise ValueError(
f"Unknown json format: {serialisation}, currently only ontoportal_json supported"
)
write_json(msdf, output, serialisation="ontoportal_json")


def write_owl(
msdf: MappingSetDataFrame,
file: TextIO,
Expand Down Expand Up @@ -259,18 +290,30 @@ def to_fhir_json(msdf: MappingSetDataFrame) -> Dict:
:return: Dict: A Dictionary serializable as JSON.
Resources:
- ConcpetMap::SSSOM mapping spreadsheet: https://docs.google.com/spreadsheets/d/1J19foBAYO8PCHwOfksaIGjNu-q5ILUKFh2HpOCgYle0/edit#gid=1389897118
- ConceptMap::SSSOM mapping spreadsheet:
https://docs.google.com/spreadsheets/d/1J19foBAYO8PCHwOfksaIGjNu-q5ILUKFh2HpOCgYle0/edit#gid=1389897118
TODO: add to CLI & to these functions: r4 vs r5 param
TODO: What if the msdf doesn't have everything we need? (i) metadata, e.g. yml, (ii) what if we need to override?
- todo: later: allow any nested aribtrary override: (get in kwargs, else metadata.get(key, None))
- todo: later: allow any nested arbitrary override: (get in kwargs, else metadata.get(key, None))
Minor todos
todo: `mapping_justification` consider `ValueString` -> `ValueCoding` https://github.com/timsbiomed/issues/issues/152
todo: mapping_justification: consider `ValueString` -> `ValueCoding` https://github.com/timsbiomed/issues/issues/152
todo: when/how to conform to R5 instead of R4?: https://build.fhir.org/conceptmap.html
"""
# Constants
df: pd.DataFrame = msdf.df
# TODO: R4 (try this first)
# relatedto | equivalent | equal | wider | subsumes | narrower | specializes | inexact | unmatched | disjoint
# https://www.hl7.org/fhir/r4/conceptmap.html
# todo: r4: if not found, should likely be `null` or something. check docs to see if nullable, else ask on Zulip
# TODO: R5 Needs to be one of:
# related-to | equivalent | source-is-narrower-than-target | source-is-broader-than-target | not-related-to
# https://www.hl7.org/fhir/r4/valueset-concept-map-equivalence.html
# ill update that next time. i can map SSSOM SKOS/etc mappings to FHIR ones
# and then add the original SSSOM mapping CURIE fields somewhere else
# https://www.hl7.org/fhir/valueset-concept-map-equivalence.html
# https://github.com/mapping-commons/sssom-py/issues/258
equivalence_map = {
# relateedto: The concepts are related to each other, and have at least some overlap in meaning, but the exact
# relationship is not known.
Expand All @@ -287,7 +330,6 @@ def to_fhir_json(msdf: MappingSetDataFrame) -> Dict:
"skos:broadMatch": "wider", # canonical
# subsumes: The target mapping subsumes the meaning of the source concept (e.g. the source is-a target).
"rdfs:subClassOf": "subsumes",
"owl:subClassOf": "subsumes",
# narrower: The target mapping is narrower in meaning than the source concept. The sense in which the mapping is
# narrower SHALL be described in the comments in this case, and applications should be careful when attempting
# to use these mappings operationally.
Expand Down Expand Up @@ -315,9 +357,10 @@ def to_fhir_json(msdf: MappingSetDataFrame) -> Dict:
name: str = mapping_set_id.split("/")[-1].replace(".sssom.tsv", "")

# Construct JSON
json_obj = {
json_obj: Dict[str, Any] = {
"resourceType": "ConceptMap",
"url": mapping_set_id,
# Assumes mapping_set_id is a URI w/ artefact name at end. System becomes URI stem, value becomes artefact name
"identifier": [
{
"system": "/".join(mapping_set_id.split("/")[:-1]) + "/",
Expand All @@ -326,7 +369,6 @@ def to_fhir_json(msdf: MappingSetDataFrame) -> Dict:
],
"version": metadata.get("mapping_set_version", ""),
"name": name,
"title": name, # TODO -> mapping_set_description?
"status": "draft", # todo: when done: draft | active | retired | unknown
"experimental": True, # todo: False when converter finished
# todo: should this be date of last converted to FHIR json instead?
Expand All @@ -353,53 +395,11 @@ def to_fhir_json(msdf: MappingSetDataFrame) -> Dict:
# }],
# "purpose": "", # todo: conceptmap
"copyright": metadata.get("license", ""),
"sourceUri": metadata.get("subject_source", ""),
"targetUri": metadata.get("object_source", ""),
# TODO: Might want to make each "group" first, if there is more than 1 set of ontology1::ontology2
# ...within a given MappingSet / set of SSSOM TSV rows.
"group": [
{
# TODO: Override? but how?
"source": metadata.get("subject_source", ""),
"target": metadata.get("object_source", ""),
"element": [
{
"code": row["subject_id"],
"display": row.get("subject_label", ""),
"target": [
{
"code": row["object_id"],
"display": row.get("object_label", ""),
# TODO: R4 (try this first)
# relatedto | equivalent | equal | wider | subsumes | narrower | specializes | inexact | unmatched | disjoint
# https://www.hl7.org/fhir/r4/conceptmap.html
# todo: r4: if not found, eventually needs to be `null` or something. check docs to see if nullable, else ask on Zulip
# TODO: R5 Needs to be one of:
# related-to | equivalent | source-is-narrower-than-target | source-is-broader-than-target | not-related-to
# https://www.hl7.org/fhir/r4/valueset-concept-map-equivalence.html
# ill update that next time. i can map SSSOM SKOS/etc mappings to FHIR ones
# and then add the original SSSOM mapping CURIE fields somewhere else
# https://www.hl7.org/fhir/valueset-concept-map-equivalence.html
# https://github.com/mapping-commons/sssom-py/issues/258
"equivalence": equivalence_map.get(
row["predicate_id"], row["predicate_id"]
), # r4
# "relationship": row['predicate_id'], # r5
# "comment": '',
"extension": [
{
"url": "http://example.org/fhir/StructureDefinition/mapping_justification",
"valueString": row.get(
"mapping_justification",
row.get("mapping_justification", ""),
),
}
],
}
],
}
for i, row in df.iterrows()
],
"element": []
# "unmapped": { # todo: conceptmap
# "mode": "fixed",
# "code": "temp",
Expand All @@ -408,9 +408,49 @@ def to_fhir_json(msdf: MappingSetDataFrame) -> Dict:
}
],
}
if "mapping_set_title" in metadata:
json_obj["title"] = metadata["mapping_set_title"]

# todo: Override? but how? (2024/04/05 Joe: idr what I was trying to override)
if "subject_source" in metadata:
json_obj["sourceUri"] = metadata["subject_source"]
json_obj["group"][0]["source"] = metadata["subject_source"]
if "object_source" in metadata:
json_obj["targetUri"] = metadata["object_source"]
json_obj["group"][0]["target"] = metadata["object_source"]

for _i, row in df.iterrows():
entry = {
"code": row["subject_id"],
"display": row.get("subject_label", ""), # todo: if empty, don't add this key
"target": [
{
"code": row["object_id"],
"display": row.get("object_label", ""), # todo: if empty, don't add this key
"equivalence": equivalence_map.get(
row["predicate_id"], row["predicate_id"]
), # r4
# "relationship": row['predicate_id'], # r5
# "comment": '',
"extension": [
{
"url": "http://example.org/fhir/StructureDefinition/mapping_justification",
"valueString": row.get(
"mapping_justification",
row.get(
"mapping_justification", ""
), # todo: if empty, don't add this key
),
}
],
}
],
}
json_obj["group"][0]["element"].append(entry)

# Delete empty fields
# todo: This should be recursive?
# todo: This should be recursive? yes
# - it catches empty 'sourceUri' and 'targetUri', but not 'source' and 'target'
keys_to_delete: List[str] = []
for k, v in json_obj.items():
if v in [
Expand Down
10 changes: 10 additions & 0 deletions tests/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from sssom.sssom_document import MappingSetDocument
from sssom.util import MappingSetDataFrame, to_mapping_set_dataframe
from sssom.writers import (
to_fhir_json,
to_json,
to_ontoportal_json,
to_owl_graph,
Expand Down Expand Up @@ -62,6 +63,8 @@ def test_conversion(self):
self._test_to_json(mdoc, test)
logging.info("Testing ontoportal JSON export")
self._test_to_ontoportal_json(mdoc, test)
logging.info("Testing fhir_json JSON export")
self._test_to_fhir_json(mdoc, test)

def _test_to_owl_graph(self, mdoc, test):
msdf = to_mapping_set_dataframe(mdoc)
Expand All @@ -85,6 +88,13 @@ def _test_to_json(self, mdoc, test: SSSOMTestCase):
with open(test.get_out_file("json"), "w") as file:
write_json(msdf, file, serialisation="json")

def _test_to_fhir_json(self, mdoc, test: SSSOMTestCase):
msdf = to_mapping_set_dataframe(mdoc)
d = to_fhir_json(msdf)
self.assertEqual(
len(d["group"][0]["element"]), test.ct_data_frame_rows, "wrong number of mappings."
)

def _test_to_ontoportal_json(self, mdoc, test: SSSOMTestCase):
msdf = to_mapping_set_dataframe(mdoc)
jsonob = to_ontoportal_json(msdf)
Expand Down
35 changes: 32 additions & 3 deletions tests/test_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import json
import os
import unittest
from typing import Any, Dict

import pandas as pd
from curies import Converter
Expand Down Expand Up @@ -128,19 +129,47 @@ def test_update_sssom_context_with_prefixmap(self):

def test_write_sssom_fhir(self):
"""Test writing as FHIR ConceptMap JSON."""
# Vars
path = os.path.join(test_out_dir, "test_write_sssom_fhir.json")
msdf: MappingSetDataFrame = self.msdf
metadata: Dict[str, Any] = msdf.metadata
mapping_set_id: str = metadata["mapping_set_id"]

# Write
with open(path, "w") as file:
write_json(self.msdf, file, "fhir_json")
# todo: @Joe: after implementing reader/importer, change this to `msdf = parse_sssom_fhir_json()`
# Read
# todo: after implementing reader/importer, change this to `msdf = parse_sssom_fhir_json()`
with open(path, "r") as file:
d = json.load(file)
# todo: @Joe: What else is worth checking?
# Test
# - metadata
self.assertEqual(d["resourceType"], "ConceptMap")
self.assertIn(d["identifier"][0]["system"], mapping_set_id)
self.assertEqual(len(d["identifier"]), 1)
self.assertEqual(
len({d["identifier"][0]["value"], mapping_set_id, d["url"]}), 1
) # assert all same
# todo: if/when more test cases, shan't be just 'basic.tsv'
self.assertEqual(d["name"], "basic.tsv")
# self.assertEqual(d["title"], "todo") # missing from basic.tsv
self.assertEqual(d["status"], "draft")
self.assertEqual(d["experimental"], True)
self.assertEqual(len(d["date"]), len("YYYY-MM-DD"))
self.assertEqual(d["copyright"], "https://creativecommons.org/publicdomain/zero/1.0/")
# - n mappings
self.assertEqual(
len(d["group"][0]["element"]),
self.mapping_count,
f"{path} has the wrong number of mappings.",
)
print() # TODO: temp
# - more
self.assertEqual(len(d["group"]), 1)
# todo: code
# todo: display
# todo: equivalence
# - I'm getting: subsumes, owl:equivalentClass (and see what else in basic.tsv)
# todo: mapping_justification extensionprint() # TODO: temp

def test_write_sssom_owl(self):
"""Test writing as OWL."""
Expand Down

0 comments on commit 591dc75

Please sign in to comment.