-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmeshFromDescXML.py
48 lines (39 loc) · 1.85 KB
/
meshFromDescXML.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# RUN BY TYPING BELOW IN A TERMINAL WITH PYTHON SET UP (takes ~20sec to run)
# 2 WILL BE CREATED = "output.csv" & "output.txt"
#> python3 meshFromDescXML.py
import xml.etree.ElementTree as ET
import csv
# MODIFY TO WHATEVER MESH XML FILE YOU ARE PARSING BELOW, HAVE IN SAME DIRECTORY
tree = ET.parse('desc2020.xml')
root = tree.getroot()
itemArrayS = []
# THE BELOW LINE ADD ONE HEADER ROW TO FILE, BUT UNDESIRED IF IMPORTING INTO ALREADY CREATED EMPTY SQL TABLE
#itemArrayS.extend([['meshTerm', 'synonym']])
for QR in root.findall('.//DescriptorRecord'):
conceptNameString = QR.find('ConceptList/Concept/ConceptName/String').text
for element in QR.findall('ConceptList/Concept/TermList'):
itemArray = []
if element.find('Term') is not None:
terms = [terms.text for terms in element.findall('Term/String')]
i=0
#itemArray=[]
while i < len(terms):
#itemArray.append(conceptNameString,terms[i])
itemArray.extend([[conceptNameString, terms[i]]])
i += 1
itemArrayS.extend(itemArray)
# DEBUGGING LINES WHICH WILL PRINT OUT IN TERMINAL IF UNCOMMENTED
#print(conceptNameString , " , terms = " , terms , " , len(terms) = " , len(terms))
#print("itemArray = " , itemArray)
#print("itemArrayS = " , itemArrayS)
# PRINT TO TERMINAL HOW MANY ROWS ARE BEING WRITTEN
print("itemArrayS len = " , len(itemArrayS))
# WRITE ARRAY OF MESH TERMS AS TALL TABLE WITH 2 COLUMNS: conceptNameString, term[i]
# THESE CAN BE BULK INSERTED INTO AN EMPTY SQL TABLE.
# I SET MINE UP WITH COLUMN NAMES = meshTerm , synonym
# THE OFFICIAL MESH TERM HAS A ROW WITH ITSELF AS SYNONYM SO THE SYNONYM COLUMN
# CAN BE SEARCHED TO FIND ALL TERMS.
print("itemArrayS = " , itemArrayS , file=open("output.txt", "a"))
with open("output.csv", "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(itemArrayS)