Skip to content

Commit

Permalink
Add normalize_hla utility
Browse files Browse the repository at this point in the history
Signed-off-by: Mihai Todor <todormihai@gmail.com>
  • Loading branch information
mihaitodor committed Nov 27, 2024
1 parent 797ce2d commit 4f17a4b
Show file tree
Hide file tree
Showing 15 changed files with 141 additions and 7 deletions.
2 changes: 2 additions & 0 deletions .env
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
UTILITIES_DATA_VERSION=113c119
PYARD_DATABASE_VERSION=3580
2 changes: 1 addition & 1 deletion .github/workflows/cicd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
args: --extend-ignore E501,E741

- name: Run Tests
run: python -m pytest
run: ./fetch_utilities_data.sh && python -m pytest

deploy:
name: Deploy to dev
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@
__pycache__
.venv
utilities/FASTA
utilities/mongo_utilities.py
utilities/mongo_utilities.py
/data
1 change: 1 addition & 0 deletions .python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.11.6
3 changes: 2 additions & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
"type": "debugpy",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal"
"console": "integratedTerminal",
"justMyCode": false
}
]
}
2 changes: 1 addition & 1 deletion Procfile
Original file line number Diff line number Diff line change
@@ -1 +1 @@
web: gunicorn run:app
web: ./fetch_utilities_data.sh && gunicorn run:app
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,11 @@ run `python3 -m pytest` from the terminal to execute them all.
Additionally, since the tests run against the Mongo DB database, if you need to update the test data in this repo, you
can run `OVERWRITE_TEST_EXPECTED_DATA=true python3 -m pytest` from the terminal and then create a pull request with the
changes.

## Update py-ard database

- Run `pyard.init(data_dir='./data/pyard', imgt_version=<new version>)` to download the new version
- Run `cd data/pyard && tar -czf pyard.sqlite3.tar.gz pyard-<new version>.sqlite3`
- Upload `pyard.sqlite3.tar.gz` in a new release on GitHub
- Update `PYARD_DATABASE_VERSION` in `.env`
- Update `UTILITIES_DATA_VERSION` in `.env` with the new tag ID (short git sha)
22 changes: 22 additions & 0 deletions app/api_spec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1306,6 +1306,28 @@ paths:
pattern: '^\s*[Nn][Cc]_\d{4,10}(\.)(\d{1,2}):\d{1,10}-\d{1,10}\s*$'
example: "NC_000001.11:11794399-11794400"

/utilities/normalize-hla:
get:
summary: "Normalize HLA"
operationId: "app.utilities_endpoints.normalize_hla"
tags:
- "Operations Utilities (not part of balloted HL7 Operations)"
responses:
"200":
description: "Returns a normalized HLA ARD allele."
content:
application/json:
schema:
type: object
parameters:
- name: allele
in: query
required: true
description: "Allele."
schema:
type: string
example: "B14"

/utilities/terminology-translation:
get:
description: |-
Expand Down
35 changes: 33 additions & 2 deletions app/utilities_endpoints.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,19 @@
from flask import abort, jsonify
import os
from collections import OrderedDict
from app import common
from os.path import isdir

import pyard
import requests
from flask import abort, jsonify

from app import common

# Make sure the pyard folder exists locally
if not isdir('./data/pyard'):
exit("Missing pyard folder. Please run fetch_utilities_data.sh!")

pyard_database_version = os.getenv('PYARD_DATABASE_VERSION', '3580')
ard = pyard.init(data_dir='./data/pyard', cache_size=1, imgt_version=pyard_database_version)


def fetch_concept_map(mapID):
Expand Down Expand Up @@ -290,3 +302,22 @@ def translate_terminology(codeSystem, code):
else:
abort(500, "HAPI server error")
return response


def normalize_hla(allele):
try:
return {
allele: {
"G": ard.redux(allele, "G"),
"P": ard.redux(allele, "P"),
"lg": ard.redux(allele, "lg"),
"lgx": ard.redux(allele, "lgx"),
"W": ard.redux(allele, "W"),
"exon": ard.redux(allele, "exon"),
"U2": ard.redux(allele, "U2"),
"S": ard.redux(allele, "S")
}
}
except Exception as err:
print(f"Unexpected {err=}, {type(err)=}")
abort(422, 'Failed HLA normalization')
25 changes: 25 additions & 0 deletions fetch_utilities_data.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/bin/sh

set -eu

. .env

if [ -d ./data ]; then
echo "files already fetched."
exit 0
fi

mkdir -p ./data/pyard
(
cd ./data/pyard

echo "Downloading py-ard database..."

curl -sLO https://github.com/FHIR/genomics-operations/releases/download/${UTILITIES_DATA_VERSION}/pyard.sqlite3.tar.gz

echo "Extracting py-ard database..."

tar -xzf pyard.sqlite3.tar.gz

echo "Finished extracting py-ard database."
)
6 changes: 5 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ dnspython==2.2.1
Flask==2.2.2
flask_cors==4.0.0
gunicorn==20.1.0
pandas==1.3.5
pandas==2.2.2
pyfastx==0.9.1
pyliftover==0.4
pymongo==4.2.0
Expand All @@ -13,3 +13,7 @@ python_dateutil==2.8.2
requests==2.28.1
streamlit==1.19.0
deepdiff==6.7.1
python-dotenv==1.0.1
py-ard==1.5.3
toml==0.10.2
numpy==2.0.2
4 changes: 4 additions & 0 deletions run.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
from dotenv import load_dotenv

from app import create_app

load_dotenv()

app = create_app()

if __name__ == '__main__':
Expand Down
12 changes: 12 additions & 0 deletions tests/expected_outputs/normalize_hla/1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"B14": {
"G": "B*14:01:01G/B*14:01:02/B*14:01:03/B*14:01:04/B*14:01:05/B*14:01:06/B*14:01:07/B*14:01:08/B*14:01:12/B*14:01:13/B*14:01:14/B*14:01:15/B*14:02:01G/B*14:02:02/B*14:02:03/B*14:02:04/B*14:02:05/B*14:02:06/B*14:02:07/B*14:02:08/B*14:02:09/B*14:02:10/B*14:02:11/B*14:02:12/B*14:02:13/B*14:02:14/B*14:02:15/B*14:02:16/B*14:02:17/B*14:02:18/B*14:02:19/B*14:02:20/B*14:02:21/B*14:02:22/B*14:02:24/B*14:02:27/B*14:02:28/B*14:02:29/B*14:02:30/B*14:03/B*14:04/B*14:05/B*14:06:01/B*14:06:02/B*14:08:01/B*14:08:02/B*14:09/B*14:10/B*14:11/B*14:12/B*14:13/B*14:14/B*14:15/B*14:16/B*14:17/B*14:18/B*14:19/B*14:20/B*14:21:01/B*14:21:02/B*14:22/B*14:23/B*14:24/B*14:25/B*14:26/B*14:27/B*14:28/B*14:29/B*14:30/B*14:31/B*14:32/B*14:33/B*14:34/B*14:35/B*14:36/B*14:37/B*14:38/B*14:39/B*14:40/B*14:42:01/B*14:42:02/B*14:43/B*14:44/B*14:45/B*14:46/B*14:47/B*14:48/B*14:49/B*14:50/B*14:51/B*14:52/B*14:53/B*14:54/B*14:55/B*14:56/B*14:57/B*14:58/B*14:59/B*14:60/B*14:62/B*14:63/B*14:65/B*14:66/B*14:68/B*14:70Q/B*14:71/B*14:73/B*14:74/B*14:75/B*14:77/B*14:82/B*14:83/B*14:86/B*14:87/B*14:88/B*14:90/B*14:93/B*14:94/B*14:95/B*14:96/B*14:97/B*14:99/B*14:102/B*14:105/B*14:106/B*14:107/B*14:108Q/B*14:110/B*14:111/B*14:112/B*14:114/B*14:116/B*14:117/B*14:118/B*14:119/B*14:120/B*14:122/B*14:123/B*14:124/B*14:125/B*14:127/B*14:128/B*14:129/B*14:130/B*14:133/B*14:134",
"P": "B*14:01P/B*14:02P/B*14:03/B*14:04/B*14:05/B*14:06P/B*14:08P/B*14:09/B*14:10/B*14:11/B*14:12/B*14:13/B*14:14/B*14:15/B*14:16/B*14:17/B*14:18/B*14:19/B*14:20/B*14:21P/B*14:22/B*14:23/B*14:24/B*14:25/B*14:26/B*14:27/B*14:28/B*14:29/B*14:30/B*14:31/B*14:32/B*14:33/B*14:34/B*14:35/B*14:36/B*14:37/B*14:38/B*14:39/B*14:40/B*14:42P/B*14:43/B*14:44/B*14:45/B*14:46/B*14:47/B*14:48/B*14:49/B*14:50/B*14:51/B*14:52/B*14:53/B*14:54/B*14:55/B*14:56/B*14:57/B*14:58/B*14:59/B*14:60/B*14:62/B*14:63/B*14:65/B*14:66/B*14:68/B*14:70Q/B*14:71/B*14:73/B*14:74/B*14:75/B*14:77/B*14:82/B*14:83/B*14:86/B*14:87/B*14:88/B*14:90/B*14:93/B*14:94/B*14:95/B*14:96/B*14:97/B*14:99/B*14:102/B*14:105/B*14:106/B*14:107/B*14:108Q/B*14:110/B*14:111/B*14:112/B*14:114/B*14:116/B*14:117/B*14:118/B*14:119/B*14:120/B*14:122/B*14:123/B*14:124/B*14:125/B*14:127/B*14:128/B*14:129/B*14:130/B*14:133/B*14:134",
"lg": "B*14:01g/B*14:02g/B*14:03g/B*14:04g/B*14:05g/B*14:06g/B*14:08g/B*14:09g/B*14:10g/B*14:11g/B*14:12g/B*14:13g/B*14:14g/B*14:15g/B*14:16g/B*14:17g/B*14:18g/B*14:19g/B*14:20g/B*14:21g/B*14:22g/B*14:23g/B*14:24g/B*14:25g/B*14:26g/B*14:27g/B*14:28g/B*14:29g/B*14:30g/B*14:31g/B*14:32g/B*14:33g/B*14:34g/B*14:35g/B*14:36g/B*14:37g/B*14:38g/B*14:39g/B*14:40g/B*14:42g/B*14:43g/B*14:44g/B*14:45g/B*14:46g/B*14:47g/B*14:48g/B*14:49g/B*14:50g/B*14:51g/B*14:52g/B*14:53g/B*14:54g/B*14:55g/B*14:56g/B*14:57g/B*14:58g/B*14:59g/B*14:60g/B*14:62g/B*14:63g/B*14:65g/B*14:66g/B*14:68g/B*14:70Qg/B*14:71g/B*14:73g/B*14:74g/B*14:75g/B*14:77g/B*14:82g/B*14:83g/B*14:86g/B*14:87g/B*14:88g/B*14:90g/B*14:93g/B*14:94g/B*14:95g/B*14:96g/B*14:97g/B*14:99g/B*14:102g/B*14:105g/B*14:106g/B*14:107g/B*14:108Qg/B*14:110g/B*14:111g/B*14:112g/B*14:114g/B*14:116g/B*14:117g/B*14:118g/B*14:119g/B*14:120g/B*14:122g/B*14:123g/B*14:124g/B*14:125g/B*14:127g/B*14:128g/B*14:129g/B*14:130g/B*14:133g/B*14:134g",
"lgx": "B*14:01/B*14:02/B*14:03/B*14:04/B*14:05/B*14:06/B*14:08/B*14:09/B*14:10/B*14:11/B*14:12/B*14:13/B*14:14/B*14:15/B*14:16/B*14:17/B*14:18/B*14:19/B*14:20/B*14:21/B*14:22/B*14:23/B*14:24/B*14:25/B*14:26/B*14:27/B*14:28/B*14:29/B*14:30/B*14:31/B*14:32/B*14:33/B*14:34/B*14:35/B*14:36/B*14:37/B*14:38/B*14:39/B*14:40/B*14:42/B*14:43/B*14:44/B*14:45/B*14:46/B*14:47/B*14:48/B*14:49/B*14:50/B*14:51/B*14:52/B*14:53/B*14:54/B*14:55/B*14:56/B*14:57/B*14:58/B*14:59/B*14:60/B*14:62/B*14:63/B*14:65/B*14:66/B*14:68/B*14:70Q/B*14:71/B*14:73/B*14:74/B*14:75/B*14:77/B*14:82/B*14:83/B*14:86/B*14:87/B*14:88/B*14:90/B*14:93/B*14:94/B*14:95/B*14:96/B*14:97/B*14:99/B*14:102/B*14:105/B*14:106/B*14:107/B*14:108Q/B*14:110/B*14:111/B*14:112/B*14:114/B*14:116/B*14:117/B*14:118/B*14:119/B*14:120/B*14:122/B*14:123/B*14:124/B*14:125/B*14:127/B*14:128/B*14:129/B*14:130/B*14:133/B*14:134",
"W": "B*14:01:01:01/B*14:01:01:02/B*14:01:01:03/B*14:01:01:04/B*14:01:01:05/B*14:01:01:06/B*14:01:01:07/B*14:01:01:08/B*14:01:01:09/B*14:01:01:10/B*14:01:01:11/B*14:01:01:12/B*14:01:02/B*14:01:03/B*14:01:04/B*14:01:05/B*14:01:06/B*14:01:07/B*14:01:08/B*14:01:09/B*14:01:10/B*14:01:11/B*14:01:12/B*14:01:13/B*14:01:14/B*14:01:15/B*14:02:01:01/B*14:02:01:02/B*14:02:01:03/B*14:02:01:04/B*14:02:01:05/B*14:02:01:06/B*14:02:01:07/B*14:02:01:08/B*14:02:01:09/B*14:02:01:10/B*14:02:01:11/B*14:02:01:12/B*14:02:01:13/B*14:02:01:14/B*14:02:01:15/B*14:02:01:16/B*14:02:01:17/B*14:02:01:18/B*14:02:01:19/B*14:02:01:20/B*14:02:01:21/B*14:02:01:22/B*14:02:01:23/B*14:02:01:24/B*14:02:01:25Q/B*14:02:01:26/B*14:02:01:27/B*14:02:01:28/B*14:02:01:29/B*14:02:01:30/B*14:02:02/B*14:02:03/B*14:02:04/B*14:02:05/B*14:02:06/B*14:02:07/B*14:02:08/B*14:02:09/B*14:02:10/B*14:02:11/B*14:02:12/B*14:02:13/B*14:02:14/B*14:02:15/B*14:02:16/B*14:02:17/B*14:02:18/B*14:02:19/B*14:02:20/B*14:02:21/B*14:02:22/B*14:02:23/B*14:02:24/B*14:02:25/B*14:02:26/B*14:02:27/B*14:02:28/B*14:02:29/B*14:02:30/B*14:02:31:01/B*14:02:31:02/B*14:03/B*14:04/B*14:05/B*14:06:01/B*14:06:02/B*14:08:01/B*14:08:02/B*14:09/B*14:10/B*14:11/B*14:12/B*14:13/B*14:14/B*14:15/B*14:16/B*14:17/B*14:18/B*14:19/B*14:20/B*14:21:01/B*14:21:02/B*14:22/B*14:23/B*14:24/B*14:25/B*14:26/B*14:27/B*14:28/B*14:29/B*14:30/B*14:31/B*14:32/B*14:33/B*14:34/B*14:35/B*14:36/B*14:37/B*14:38/B*14:39/B*14:40/B*14:42:01/B*14:42:02/B*14:43/B*14:44/B*14:45/B*14:46/B*14:47/B*14:48/B*14:49/B*14:50/B*14:51/B*14:52/B*14:53/B*14:54/B*14:55/B*14:56/B*14:57/B*14:58/B*14:59/B*14:60/B*14:61/B*14:62/B*14:63/B*14:64/B*14:65/B*14:66/B*14:67/B*14:68/B*14:70Q/B*14:71/B*14:73/B*14:74/B*14:75/B*14:77/B*14:78/B*14:80/B*14:81/B*14:82/B*14:83/B*14:84/B*14:86/B*14:87/B*14:88/B*14:89/B*14:90/B*14:91/B*14:92/B*14:93/B*14:94/B*14:95/B*14:96/B*14:97/B*14:98/B*14:99/B*14:102/B*14:103/B*14:104/B*14:105/B*14:106/B*14:107/B*14:108Q/B*14:109/B*14:110/B*14:111/B*14:112/B*14:114/B*14:115/B*14:116/B*14:117/B*14:118/B*14:119/B*14:120/B*14:121/B*14:122/B*14:123/B*14:124/B*14:125/B*14:126/B*14:127/B*14:128/B*14:129/B*14:130/B*14:132/B*14:133/B*14:134",
"exon": "B*14:01:01/B*14:01:02/B*14:01:03/B*14:01:04/B*14:01:05/B*14:01:06/B*14:01:07/B*14:01:08/B*14:01:09/B*14:01:10/B*14:01:11/B*14:01:12/B*14:01:13/B*14:01:14/B*14:01:15/B*14:02:01/B*14:02:01Q/B*14:02:02/B*14:02:03/B*14:02:04/B*14:02:05/B*14:02:06/B*14:02:07/B*14:02:08/B*14:02:09/B*14:02:10/B*14:02:11/B*14:02:12/B*14:02:13/B*14:02:14/B*14:02:15/B*14:02:16/B*14:02:17/B*14:02:18/B*14:02:19/B*14:02:20/B*14:02:21/B*14:02:22/B*14:02:23/B*14:02:24/B*14:02:25/B*14:02:26/B*14:02:27/B*14:02:28/B*14:02:29/B*14:02:30/B*14:02:31/B*14:03/B*14:04/B*14:05/B*14:06:01/B*14:06:02/B*14:08:01/B*14:08:02/B*14:09/B*14:10/B*14:11/B*14:12/B*14:13/B*14:14/B*14:15/B*14:16/B*14:17/B*14:18/B*14:19/B*14:20/B*14:21:01/B*14:21:02/B*14:22/B*14:23/B*14:24/B*14:25/B*14:26/B*14:27/B*14:28/B*14:29/B*14:30/B*14:31/B*14:32/B*14:33/B*14:34/B*14:35/B*14:36/B*14:37/B*14:38/B*14:39/B*14:40/B*14:42:01/B*14:42:02/B*14:43/B*14:44/B*14:45/B*14:46/B*14:47/B*14:48/B*14:49/B*14:50/B*14:51/B*14:52/B*14:53/B*14:54/B*14:55/B*14:56/B*14:57/B*14:58/B*14:59/B*14:60/B*14:61/B*14:62/B*14:63/B*14:64/B*14:65/B*14:66/B*14:67/B*14:68/B*14:70Q/B*14:71/B*14:73/B*14:74/B*14:75/B*14:77/B*14:78/B*14:80/B*14:81/B*14:82/B*14:83/B*14:84/B*14:86/B*14:87/B*14:88/B*14:89/B*14:90/B*14:91/B*14:92/B*14:93/B*14:94/B*14:95/B*14:96/B*14:97/B*14:98/B*14:99/B*14:102/B*14:103/B*14:104/B*14:105/B*14:106/B*14:107/B*14:108Q/B*14:109/B*14:110/B*14:111/B*14:112/B*14:114/B*14:115/B*14:116/B*14:117/B*14:118/B*14:119/B*14:120/B*14:121/B*14:122/B*14:123/B*14:124/B*14:125/B*14:126/B*14:127/B*14:128/B*14:129/B*14:130/B*14:132/B*14:133/B*14:134",
"U2": "B*14:01/B*14:02/B*14:02Q/B*14:03/B*14:04/B*14:05/B*14:06/B*14:08/B*14:09/B*14:10/B*14:11/B*14:12/B*14:13/B*14:14/B*14:15/B*14:16/B*14:17/B*14:18/B*14:19/B*14:20/B*14:21/B*14:22/B*14:23/B*14:24/B*14:25/B*14:26/B*14:27/B*14:28/B*14:29/B*14:30/B*14:31/B*14:32/B*14:33/B*14:34/B*14:35/B*14:36/B*14:37/B*14:38/B*14:39/B*14:40/B*14:42/B*14:43/B*14:44/B*14:45/B*14:46/B*14:47/B*14:48/B*14:49/B*14:50/B*14:51/B*14:52/B*14:53/B*14:54/B*14:55/B*14:56/B*14:57/B*14:58/B*14:59/B*14:60/B*14:61/B*14:62/B*14:63/B*14:64/B*14:65/B*14:66/B*14:67/B*14:68/B*14:70Q/B*14:71/B*14:73/B*14:74/B*14:75/B*14:77/B*14:78/B*14:80/B*14:81/B*14:82/B*14:83/B*14:84/B*14:86/B*14:87/B*14:88/B*14:89/B*14:90/B*14:91/B*14:92/B*14:93/B*14:94/B*14:95/B*14:96/B*14:97/B*14:98/B*14:99/B*14:102/B*14:103/B*14:104/B*14:105/B*14:106/B*14:107/B*14:108Q/B*14:109/B*14:110/B*14:111/B*14:112/B*14:114/B*14:115/B*14:116/B*14:117/B*14:118/B*14:119/B*14:120/B*14:121/B*14:122/B*14:123/B*14:124/B*14:125/B*14:126/B*14:127/B*14:128/B*14:129/B*14:130/B*14:132/B*14:133/B*14:134",
"S": "B14/B64/B65"
}
}
16 changes: 16 additions & 0 deletions tests/integration_tests/test_normalize_hla_utility.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import tests.utilities as tu

"""
Normalize HLA Utility Tests
--------------------------------
"""


def test_normalize_hla_utility(client):
url = tu.normalize_hla_utility_query('allele=B14')
response = client.get(url)

assert response.status_code == 200

# This utility is not deterministic for a few output items such as `exon` and `U2`
# tu.compare_actual_and_expected_output(f'{tu.NORMALIZE_HLA_OUTPUT_DIR}1.json', response.json)
7 changes: 7 additions & 0 deletions tests/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@
FIND_THE_GENE_URL = "/utilities/find-the-gene"
FIND_THE_GENE_OUTPUT_DIR = "tests/expected_outputs/find_the_gene/"

NORMALIZE_HLA_URL = "/utilities/normalize-hla"
NORMALIZE_HLA_OUTPUT_DIR = "tests/expected_outputs/normalize_hla/"


def find_subject_variants_query(query):
return f"{FIND_SUBJECT_VARIANTS_URL}?{query}"
Expand Down Expand Up @@ -137,6 +140,10 @@ def find_the_gene_query(query):
return f"{FIND_THE_GENE_URL}?{query}"


def normalize_hla_utility_query(query):
return f"{NORMALIZE_HLA_URL}?{query}"


def compare_actual_and_expected_output(filename, actual_json):
with open(filename) as expected_output_file:
expected_json = json.load(expected_output_file)
Expand Down

0 comments on commit 4f17a4b

Please sign in to comment.