Skip to content

Commit

Permalink
uberon mappings and labels
Browse files Browse the repository at this point in the history
  • Loading branch information
rajdeepmondaldotcom committed Feb 26, 2024
1 parent 293563b commit 4d2f984
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 10 deletions.
18 changes: 10 additions & 8 deletions src/fetch_tissue_data/run_breast.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,18 @@

table_importer: CdaTableImporter = configure_cda_table_importer()

Tsite = Q('primary_diagnosis_site = "%breast%"', )
cohort_name = 'Breast'
p = table_importer.get_ga4gh_phenopackets(Tsite, cohort_name=cohort_name)
Tsite_breast = Q('primary_diagnosis_site = "%breast%"')
cohort_name_breast = 'Breast'
p_breast = table_importer.get_ga4gh_phenopackets(Tsite_breast, cohort_name=cohort_name_breast)

result_dir = os.path.abspath(os.path.join('phenopackets', cohort_name))
os.makedirs(result_dir, exist_ok=True)

print(f'Writing {len(p)} phenopackets to {result_dir}')
for pp in p:
file_path = os.path.join(result_dir, f'{pp.id}.json')
result_dir_breast = os.path.abspath(os.path.join('phenopackets', cohort_name_breast))
os.makedirs(result_dir_breast, exist_ok=True)

# Writing phenopackets for Breast tissue
print(f'Writing {len(p_breast)} phenopackets to {result_dir_breast}')
for pp in p_breast:
file_path = os.path.join(result_dir_breast, f'{pp.id}.json')
with open(file_path, 'w') as fh:
json = MessageToJson(pp)
fh.write(json)
75 changes: 73 additions & 2 deletions src/oncoexporter/cda/mapper/op_uberon_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,19 @@ def __init__(self):
"uterine cervix": "UBERON:0000002",
"uterus": "UBERON:0000995",
"body of uterus": "UBERON:0009853",
"lower respiratory tract": "UBERON:0001558"
"lower respiratory tract": "UBERON:0001558",
'breast': 'UBERON:0000310',
'bone marrow': 'UBERON:0002371',
'bone': 'UBERON:0002481',
'brain': 'UBERON:0000955',
'colon': 'UBERON:0001155',
'heart': 'UBERON:0000948',
'kidney': 'UBERON:0002113',
'adrenal gland': 'UBERON:0002369',
'liver': 'UBERON:0002107',
'pancreas': 'UBERON:0001264',
'skin': 'UBERON:0002097',
'thyroid gland': 'UBERON:0002046'
}
self._site_to_uberon_label_d = {
"Lung": "lung",
Expand All @@ -39,6 +51,66 @@ def __init__(self):
"Bronchus and lung": "lower respiratory tract",
"Lung/Bronchus": "lower respiratory tract",
"Lung/Bronchus, Unknown": "lower respiratory tract",
"Breast": "breast",
"Breast, NOS": "breast",
"Breast, Unknown": "breast",
"Bone marrow": "bone marrow",
"Bone Marrow": "bone marrow",
"Bones, joints and articular cartilage of other and unspecified sites": "bone",
"Bones, joints and articular cartilage of limbs": "bone",
"Bones of skull and face and associated joints (excludes mandible C41.1)": "bone",
"Long bones of lower limb and associated joints": "bone",
"Long bones of upper limb, scapula and associated joints": 'bone',
"Pelvic bones, sacrum, coccyx and associated joints": "bone",
"Bone, NOS" : 'bone',
"Bone": "bone",
"Bones": "bone",
"Brain": "brain",
"Brain, NOS": "brain",
"Brain, Unknown": "brain",
"Overlapping lesion of brain and central nervous system": "brain",
"Overlapping lesion of brain": "brain",
"Brain stem": "brain",
"Colon": "colon",
"Colon, NOS": "colon",
"Colon, Unknown": "colon",
"Heart, mediastinum, and pleura": "heart",
"Connective, subcutaneous and other soft tissues of thorax (excludes thymus C37.9, heart and mediastinum C38._)": "heart",
"Kidney": "kidney",
"Kidney, NOS": "kidney",
"Kidney, Unknown": "kidney",
"Renal pelvis": "kidney",
"Renal Pelvis": "kidney",
"Adrenal gland": "adrenal gland",
"Adrenal Gland": "adrenal gland",
"Adrenal gland, NOS": "adrenal gland",
"Adrenal gland, Unknown": "adrenal gland",
"Liver and intrahepatic bile ducts": "liver",
"Liver": "liver",
"Intrahepatic bile ducts": "liver",
"Pancreas": "pancreas",
"Pancreas, NOS": "pancreas",
"Pancreas, Unknown": "pancreas",
"Pancreatic duct": "pancreas",
"Skin": "skin",
"Skin, NOS": "skin",
"Skin, Unknown": "skin",
"Connective, subcutaneous and other soft tissues": "skin",
"Connective, subcutaneous and other soft tissues of pelvis": "skin",
"Connective, subcutaneous and other soft tissues of lower limb and hip": "skin",
"Connective, subcutaneous and other soft tissues, NOS": "skin",
"Connective, subcutaneous and other soft tissues of upper limb and shoulder": "skin",
"Connective, subcutaneous and other soft tissues of head, face, and neck (excludes connective tissue of orbit C69.6 and nasal cartilage C30.0)": "skin",
"Skin of scalp and neck": "skin",
"Skin of lower limb and hip": "skin",
"Connective, subcutaneous and other soft tissues of abdomen": "skin",
"Connective, subcutaneous and other soft tissues of trunk, NOS": "skin",
"Skin, NOS (excludes skin of labia majora C51.0, skin of vulva C51.9, skin of penis C60.9 and skin of scrotum C63.2)": "skin",
"Thyroid gland": "thyroid gland",
"Thyroid Gland": "thyroid gland",
"Thyroid gland, NOS": "thyroid gland",
"Thyroid gland, Unknown": "thyroid gland",
"Thyroid Gland, Unknown": "thyroid gland",
}

def get_ontology_term(self, row: pd.Series) -> Optional[PPkt.OntologyClass]:
Expand All @@ -54,4 +126,3 @@ def get_ontology_term(self, row: pd.Series) -> Optional[PPkt.OntologyClass]:
else:
# TODO -- more robust error handling in final release, but for development fail early
raise ValueError(f"Could not find UBERON term for primary_site=\"{primary_site}\"")

0 comments on commit 4d2f984

Please sign in to comment.