Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add New Endpoint: /metakg/parse issue#271 #280

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@
(r"/api/metakg/consolidated/?", "handlers.api.MetaKGQueryHandler", {"biothing_type": "metakg_consolidated"}),
(r"/api/metakg/consolidated/fields/?", "biothings.web.handlers.MetadataFieldHandler", {"biothing_type": "metakg_consolidated"}),
(r"/api/metakg/paths/?", "handlers.api.MetaKGPathFinderHandler", {"biothing_type": "metakgpathfinder"}),
(r"/api/metakg/parse/?", "handlers.api.MetaKGParserHandler"),
]

# biothings web tester will read this
Expand Down
10 changes: 8 additions & 2 deletions src/controller/smartapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,8 +369,14 @@ def is_trapi(self):
"""return True if a TRAPI"""
return self.has_tags("trapi", "translator")

def get_metakg(self, include_trapi=True):
raw_metadata = decoder.to_dict(decoder.decompress(self._doc._raw))
def get_metakg(self, include_trapi=True, metadata_url=False):
if metadata_url:
data_id = decoder.get_id(self.url)
doc = self.get(data_id)
self._doc = doc._doc
raw_metadata = decoder.to_dict(decoder.decompress(doc._doc._raw))
else:
raw_metadata = decoder.to_dict(decoder.decompress(self._doc._raw))
mkg_parser = MetaKGParser()
extra_data = {"id": self._id, "url": self.url}
self.metakg_errors = None # reset metakg_errors
Expand Down
161 changes: 155 additions & 6 deletions src/handlers/api.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import asyncio
import json
import logging
from typing import List, Union
import os
import bmt
from biothings.utils import serializer
Expand All @@ -22,10 +21,10 @@
from utils.metakg.cytoscape_formatter import CytoscapeDataFormatter
from utils.metakg.biolink_helpers import get_expanded_values
from utils.notification import SlackNewAPIMessage, SlackNewTranslatorAPIMessage
from utils.metakg.parser import MetaKGParser

logger = logging.getLogger("smartAPI")


def github_authenticated(func):
"""
RegistryHandler Decorator
Expand Down Expand Up @@ -495,7 +494,7 @@ def process_apis(self, apis):
api_dict = apis["api"]
filtered_api= self.get_filtered_api(api_dict)
apis["api"] = filtered_api

def write(self, chunk):
"""
Overwrite the biothings query handler to ...
Expand All @@ -522,7 +521,7 @@ def write(self, chunk):
self.set_header("Content-Disposition", 'attachment; filename="smartapi_metakg.graphml"')

return super(BaseAPIHandler, self).write(chunk)

if self.format == "html":
# setup template
template_path = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'templates'))
Expand Down Expand Up @@ -680,9 +679,159 @@ async def get(self, *args, **kwargs):
raw_query_output = self.setup_pathfinder_rawquery(expanded_fields)
self.write(raw_query_output)
return
res = {
"total": len(paths_with_edges),
res = {
"total": len(paths_with_edges),
"paths": paths_with_edges,
}
await asyncio.sleep(0.01)
self.finish(res)

class MetaKGParserHandler(BaseHandler):
kwargs = {
"GET": {
"url": {
"type": str,
"required": True,
"max": 1000,
"description": "URL of the SmartAPI metadata to parse"
},
"api_details": {"type": bool, "default": 0 },
"bte": {"type": bool, "default": 0},
},
"POST": {
"api_details": {"type": bool, "default": 0 },
"bte": {"type": bool, "default": 0 },
},
}

def initialize(self, *args, **kwargs):
super().initialize(*args, **kwargs)
# change the default query pipeline from self.biothings.pipeline
self.pipeline = MetaKGQueryPipeline(ns=self.biothings)

def get_filtered_api(self, api_dict):
"""Extract and return filtered API information."""
api_info = api_dict["api"]

# Default structure to preserve top-level keys
filtered_dict = {
"subject": api_dict.get("subject"),
"object": api_dict.get("object"),
"predicate": api_dict.get("predicate"),
"subject_prefix": api_dict.get("subject_prefix"),
"object_prefix": api_dict.get("object_prefix"),
}

# case: bte=1, api_details=0
if self.args.bte == "1" and self.args.api_details == "0":
filtered_api = {
**({"name": api_info["name"]} if "name" in api_info else {}),
**(
{"smartapi": {"id": api_info["smartapi"]["id"]}}
if "smartapi" in api_info and "id" in api_info["smartapi"]
else {}
),
"bte": api_info.get("bte", {}),
}

# case: bte=0, api_details=1
elif self.args.bte == "0" and self.args.api_details == "1":
api_info.pop("bte", None)
filtered_api = api_info

# case: api_details=1, bte=1
elif self.args.bte == "1" and self.args.api_details == "1":
filtered_api = api_info

# case: bte=0, api_details=0
else:
filtered_api = {
**({"name": api_info["name"]} if "name" in api_info else {}),
**(
{"smartapi": {"id": api_info["smartapi"]["id"]}}
if "smartapi" in api_info and "id" in api_info["smartapi"]
else {}
),
}
# Add the filtered 'api' key to the preserved top-level structure
filtered_dict["api"] = filtered_api

# Remove 'bte' from 'api' if it exists
if "bte" in filtered_dict["api"]:
filtered_dict['bte'] = filtered_dict["api"].pop("bte", None)

return filtered_dict

def process_apis(self, apis):
"""Process each API dict based on provided args."""
if isinstance(apis, list):
for i, api_dict in enumerate(apis):
filtered_api = self.get_filtered_api(api_dict)
apis[i] = filtered_api
elif isinstance(apis, dict):
if "bte" in apis:
# Update dict for new format
apis["api"]["bte"] = apis.pop("bte")
api_dict = apis["api"]
filtered_api = self.get_filtered_api(api_dict)
apis["api"] = filtered_api
return apis

async def get(self, *args, **kwargs):
if not self.get_argument("url", None):
self.set_status(400)
self.write({"error": "Missing 'url' argument"})
return

parser = MetaKGParser()
url = self.get_argument("url")
self.args.api_details = self.get_argument("api_details", False)
self.args.bte = self.get_argument("bte", False)

trapi_data = parser.get_TRAPI_metadatas(data=None, url=url)
nontrapi_data = parser.get_non_TRAPI_metadatas(data=None, url=url)
combined_data = trapi_data + nontrapi_data

for i, api_dict in enumerate(combined_data):
filtered_api = self.get_filtered_api(api_dict)
combined_data[i] = filtered_api

response = {
"took": 1,
"total": len(combined_data),
"max_score": 1,
"hits": combined_data,
}

self.set_header("Content-Type", "application/json")
self.write(response)

async def post(self, *args, **kwargs):
try:
# Read the raw request body
body = self.request.body
# Parse the JSON content
data = json.loads(body)
parser = MetaKGParser()
self.args.api_details = self.get_argument("api_details", "0")
self.args.bte = self.get_argument("bte", "0")
trapi_data = parser.get_TRAPI_metadatas(data=data)
nontrapi_data = parser.get_non_TRAPI_metadatas(data=data)
combined_data = trapi_data + nontrapi_data

for i, api_dict in enumerate(combined_data):
filtered_api = self.get_filtered_api(api_dict)
combined_data[i] = filtered_api

response = {
"took": 1,
"total": len(combined_data),
"max_score": 1,
"hits": combined_data,
}

self.set_header("Content-Type", "application/json")
self.write(response)

except json.JSONDecodeError:
raise ValueError("Invalid JSON content in request body.")
81 changes: 81 additions & 0 deletions src/tests/_utils/metakg/integration/parser/parse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import unittest
import requests
import json


class TestAPI(unittest.TestCase):
URL_EXAMPLE = "https://raw.githubusercontent.com/NCATS-Tangerine/translator-api-registry/master/mygene.info/openapi_full.yml"
TIMEOUT = 10 # Timeout in seconds

def setUp(self):
self.headers = {"Content-Type": "application/json"}
with open('/Users/nacosta/Documents/smartAPI/WORKING_BRANCH/add-metakg-endpoint/smartAPI/src/metadata_content.json', 'r') as file:
self.data = json.load(file)

# POST Tests
def test_post_metakg_parse_api_details_1_bte_1(self):
url = "http://localhost:8000/api/metakg/parse?api_details=1&bte=1"
response = requests.post(url, headers=self.headers, json=self.data, timeout=self.TIMEOUT)
json_response = response.json()
self.assertEqual(response.status_code, 200)
self.assertIn('api', json_response['hits'][0].keys())
self.assertIn('bte', json_response['hits'][0].keys())

def test_post_metakg_parse_api_details_0_bte_1(self):
url = "http://localhost:8000/api/metakg/parse?api_details=0&bte=1"
response = requests.post(url, headers=self.headers, json=self.data, timeout=self.TIMEOUT)
json_response = response.json()
self.assertEqual(response.status_code, 200)
self.assertIn('bte', json_response['hits'][0].keys())

def test_post_metakg_parse_api_details_1_bte_0(self):
url = "http://localhost:8000/api/metakg/parse?api_details=1&bte=0"
response = requests.post(url, headers=self.headers, json=self.data, timeout=self.TIMEOUT)
json_response = response.json()
self.assertEqual(response.status_code, 200)
self.assertIn('api', json_response['hits'][0].keys())
self.assertNotIn('bte', json_response['hits'][0].keys())

def test_post_metakg_parse_api_details_0_bte_0(self):
url = "http://localhost:8000/api/metakg/parse?api_details=0&bte=0"
response = requests.post(url, headers=self.headers, json=self.data, timeout=self.TIMEOUT)
json_response = response.json()
self.assertEqual(response.status_code, 200)
self.assertNotIn('bte', json_response['hits'][0].keys())
self.assertIn('subject', json_response['hits'][0].keys())

# GET Tests
def test_get_metakg_parse_api_details_1_bte_1(self):
url = f"http://localhost:8000/api/metakg/parse?url={self.URL_EXAMPLE}&api_details=1&bte=1"
response = requests.get(url, timeout=self.TIMEOUT)
json_response = response.json()
self.assertEqual(response.status_code, 200)
self.assertIn('api', json_response['hits'][0].keys())
self.assertIn('bte', json_response['hits'][0].keys())

def test_get_metakg_parse_api_details_0_bte_1(self):
url = f"http://localhost:8000/api/metakg/parse?url={self.URL_EXAMPLE}&api_details=0&bte=1"
response = requests.get(url, timeout=self.TIMEOUT)
json_response = response.json()
self.assertEqual(response.status_code, 200)
self.assertIn('bte', json_response['hits'][0].keys())

def test_get_metakg_parse_api_details_1_bte_0(self):
url = f"http://localhost:8000/api/metakg/parse?url={self.URL_EXAMPLE}&api_details=1&bte=0"
response = requests.get(url, timeout=self.TIMEOUT)
json_response = response.json()
self.assertEqual(response.status_code, 200)
self.assertIn('api', json_response['hits'][0].keys())
self.assertNotIn('bte', json_response['hits'][0].keys())

def test_get_metakg_parse_api_details_0_bte_0(self):
url = f"http://localhost:8000/api/metakg/parse?url={self.URL_EXAMPLE}&api_details=0&bte=0"
response = requests.get(url, timeout=self.TIMEOUT)
json_response = response.json()
self.assertEqual(response.status_code, 200)
self.assertNotIn('bte', json_response['hits'][0].keys())
self.assertIn('subject', json_response['hits'][0].keys())


if __name__ == "__main__":
unittest.main()
52 changes: 39 additions & 13 deletions src/utils/metakg/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,34 @@ class MetaKGParser:
get_url_timeout = 60
metakg_errors = None

def get_non_TRAPI_metadatas(self, data, extra_data=None):
parser = API(data)
def get_non_TRAPI_metadatas(self, data=None, extra_data=None, url=None):
# Error Handling
if not data and not url:
raise ValueError("Either data or url must be provided.")
if data:
parser = API(smartapi_doc=data)
elif url:
parser = API(url=url)
else:
raise ValueError("Error getting metadata from provided data or url.")

mkg = self.extract_metakgedges(parser.metadata["operations"], extra_data=extra_data)
no_nodes = len({x["subject"] for x in mkg} | {x["object"] for x in mkg})
no_edges = len({x["predicate"] for x in mkg})
logger.info("Done [%s nodes, %s edges]", no_nodes, no_edges)
return mkg

def get_TRAPI_metadatas(self, data, extra_data=None):
def get_TRAPI_metadatas(self, data=None, extra_data=None, url=None):
ops = []
metadata_list = self.get_TRAPI_with_metakg_endpoint(data)
if not data and not url:
raise ValueError("Either data or url must be provided.")
if data:
metadata_list = self.get_TRAPI_with_metakg_endpoint(data=data)
elif url:
metadata_list = self.get_TRAPI_with_metakg_endpoint(url=url)
else:
raise ValueError("Error getting metadata from provided data or url.")

count_metadata_list = len(metadata_list)
self.metakg_errors = {}
for i, metadata in enumerate(metadata_list):
Expand All @@ -34,15 +51,24 @@ def get_TRAPI_metadatas(self, data, extra_data=None):

return self.extract_metakgedges(ops, extra_data=extra_data)

def get_TRAPI_with_metakg_endpoint(self, data):
metadatas = []
parser = API(data)
metadata = parser.metadata
_paths = metadata.get("paths", {})
_team = metadata.get("x-translator", {}).get("team")
if "/meta_knowledge_graph" in _paths and "/query" in _paths and _team:
metadatas.append(metadata)
return metadatas
def get_TRAPI_with_metakg_endpoint(self, data=None, url=None):
if not data and not url:
raise ValueError("Either data or url must be provided.")
try:
# Initialize API with either data or URL
parser = API(smartapi_doc=data) if data else API(url=url)
metadata = parser.metadata
_paths = metadata.get("paths", {})
_team = metadata.get("x-translator", {}).get("team")

# Check for required TRAPI paths
if "/meta_knowledge_graph" in _paths and "/query" in _paths and _team:
print("TRAPI metadata found.")
return [metadata]
else:
return []
except Exception as e:
raise ValueError(f"Error getting TRAPI metadata: {e}")

def construct_query_url(self, server_url):
if server_url.endswith("/"):
Expand Down
Loading