From 4d2f984c3050e58b407426c16a41f889cb220c26 Mon Sep 17 00:00:00 2001 From: rajdeepmondal-el Date: Mon, 26 Feb 2024 19:56:28 +0530 Subject: [PATCH] uberon mappings and labels --- src/fetch_tissue_data/run_breast.py | 18 +++-- .../cda/mapper/op_uberon_mapper.py | 75 ++++++++++++++++++- 2 files changed, 83 insertions(+), 10 deletions(-) diff --git a/src/fetch_tissue_data/run_breast.py b/src/fetch_tissue_data/run_breast.py index 7514754..6d67d84 100644 --- a/src/fetch_tissue_data/run_breast.py +++ b/src/fetch_tissue_data/run_breast.py @@ -6,16 +6,18 @@ table_importer: CdaTableImporter = configure_cda_table_importer() -Tsite = Q('primary_diagnosis_site = "%breast%"', ) -cohort_name = 'Breast' -p = table_importer.get_ga4gh_phenopackets(Tsite, cohort_name=cohort_name) +Tsite_breast = Q('primary_diagnosis_site = "%breast%"') +cohort_name_breast = 'Breast' +p_breast = table_importer.get_ga4gh_phenopackets(Tsite_breast, cohort_name=cohort_name_breast) -result_dir = os.path.abspath(os.path.join('phenopackets', cohort_name)) -os.makedirs(result_dir, exist_ok=True) -print(f'Writing {len(p)} phenopackets to {result_dir}') -for pp in p: - file_path = os.path.join(result_dir, f'{pp.id}.json') +result_dir_breast = os.path.abspath(os.path.join('phenopackets', cohort_name_breast)) +os.makedirs(result_dir_breast, exist_ok=True) + +# Writing phenopackets for Breast tissue +print(f'Writing {len(p_breast)} phenopackets to {result_dir_breast}') +for pp in p_breast: + file_path = os.path.join(result_dir_breast, f'{pp.id}.json') with open(file_path, 'w') as fh: json = MessageToJson(pp) fh.write(json) diff --git a/src/oncoexporter/cda/mapper/op_uberon_mapper.py b/src/oncoexporter/cda/mapper/op_uberon_mapper.py index e6ff8d5..b2fa4f7 100644 --- a/src/oncoexporter/cda/mapper/op_uberon_mapper.py +++ b/src/oncoexporter/cda/mapper/op_uberon_mapper.py @@ -22,7 +22,19 @@ def __init__(self): "uterine cervix": "UBERON:0000002", "uterus": "UBERON:0000995", "body of uterus": "UBERON:0009853", - "lower respiratory tract": "UBERON:0001558" + "lower respiratory tract": "UBERON:0001558", + 'breast': 'UBERON:0000310', + 'bone marrow': 'UBERON:0002371', + 'bone': 'UBERON:0002481', + 'brain': 'UBERON:0000955', + 'colon': 'UBERON:0001155', + 'heart': 'UBERON:0000948', + 'kidney': 'UBERON:0002113', + 'adrenal gland': 'UBERON:0002369', + 'liver': 'UBERON:0002107', + 'pancreas': 'UBERON:0001264', + 'skin': 'UBERON:0002097', + 'thyroid gland': 'UBERON:0002046' } self._site_to_uberon_label_d = { "Lung": "lung", @@ -39,6 +51,66 @@ def __init__(self): "Bronchus and lung": "lower respiratory tract", "Lung/Bronchus": "lower respiratory tract", "Lung/Bronchus, Unknown": "lower respiratory tract", + "Breast": "breast", + "Breast, NOS": "breast", + "Breast, Unknown": "breast", + "Bone marrow": "bone marrow", + "Bone Marrow": "bone marrow", + "Bones, joints and articular cartilage of other and unspecified sites": "bone", + "Bones, joints and articular cartilage of limbs": "bone", + "Bones of skull and face and associated joints (excludes mandible C41.1)": "bone", + "Long bones of lower limb and associated joints": "bone", + "Long bones of upper limb, scapula and associated joints": 'bone', + "Pelvic bones, sacrum, coccyx and associated joints": "bone", + "Bone, NOS" : 'bone', + "Bone": "bone", + "Bones": "bone", + "Brain": "brain", + "Brain, NOS": "brain", + "Brain, Unknown": "brain", + "Overlapping lesion of brain and central nervous system": "brain", + "Overlapping lesion of brain": "brain", + "Brain stem": "brain", + "Colon": "colon", + "Colon, NOS": "colon", + "Colon, Unknown": "colon", + "Heart, mediastinum, and pleura": "heart", + "Connective, subcutaneous and other soft tissues of thorax (excludes thymus C37.9, heart and mediastinum C38._)": "heart", + "Kidney": "kidney", + "Kidney, NOS": "kidney", + "Kidney, Unknown": "kidney", + "Renal pelvis": "kidney", + "Renal Pelvis": "kidney", + "Adrenal gland": "adrenal gland", + "Adrenal Gland": "adrenal gland", + "Adrenal gland, NOS": "adrenal gland", + "Adrenal gland, Unknown": "adrenal gland", + "Liver and intrahepatic bile ducts": "liver", + "Liver": "liver", + "Intrahepatic bile ducts": "liver", + "Pancreas": "pancreas", + "Pancreas, NOS": "pancreas", + "Pancreas, Unknown": "pancreas", + "Pancreatic duct": "pancreas", + "Skin": "skin", + "Skin, NOS": "skin", + "Skin, Unknown": "skin", + "Connective, subcutaneous and other soft tissues": "skin", + "Connective, subcutaneous and other soft tissues of pelvis": "skin", + "Connective, subcutaneous and other soft tissues of lower limb and hip": "skin", + "Connective, subcutaneous and other soft tissues, NOS": "skin", + "Connective, subcutaneous and other soft tissues of upper limb and shoulder": "skin", + "Connective, subcutaneous and other soft tissues of head, face, and neck (excludes connective tissue of orbit C69.6 and nasal cartilage C30.0)": "skin", + "Skin of scalp and neck": "skin", + "Skin of lower limb and hip": "skin", + "Connective, subcutaneous and other soft tissues of abdomen": "skin", + "Connective, subcutaneous and other soft tissues of trunk, NOS": "skin", + "Skin, NOS (excludes skin of labia majora C51.0, skin of vulva C51.9, skin of penis C60.9 and skin of scrotum C63.2)": "skin", + "Thyroid gland": "thyroid gland", + "Thyroid Gland": "thyroid gland", + "Thyroid gland, NOS": "thyroid gland", + "Thyroid gland, Unknown": "thyroid gland", + "Thyroid Gland, Unknown": "thyroid gland", } def get_ontology_term(self, row: pd.Series) -> Optional[PPkt.OntologyClass]: @@ -54,4 +126,3 @@ def get_ontology_term(self, row: pd.Series) -> Optional[PPkt.OntologyClass]: else: # TODO -- more robust error handling in final release, but for development fail early raise ValueError(f"Could not find UBERON term for primary_site=\"{primary_site}\"") -