-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain_odeuropa.py
210 lines (157 loc) · 7.15 KB
/
main_odeuropa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
import streamlit as st
from annotated_text import annotated_text
import os
from functions import *
import tempfile
import pathlib
import os
from PIL import Image
from machamp.predictor.predict import predict_with_paths
from convert_excel import make_excel,to_excel
import logging
import torch
import sys
import machamp
import pandas as pd
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
level=logging.INFO, handlers=[logging.StreamHandler(sys.stdout)])
logger = logging.getLogger(__name__)
def main():
st.set_page_config(
page_title="Odeuropa Smells Extraction",
page_icon="",
# layout="wide",
initial_sidebar_state="expanded",
menu_items={
"Get Help": 'https://github.com',
"Report a bug": "https://github.com",
"About": "Odeuropa demonstrator for the smells extraction tools."})
### model to be used for every language
langdict = {
"English": "en.pt",
"Italian": "it.pt",
"French": "fr.pt",
"German": "de.pt",
"Dutch": "nl.pt",
"Slovene": "sl.pt",
"Latin": "de.pt",
}
### select where to load the models, could be gpu (e.g. "cuda:0") or cpu (e.g "cpu")
langdevice = {
"English": "cuda:0",
"Italian": "cuda:1",
"French": "cuda:0",
"German": "cuda:1",
"Dutch": "cuda:0",
"Slovene": "cuda:1",
"Latin": "cuda:1"
}
# langdevice = {
# "English": "cpu",
# "Italian": "cpu",
# "French": "cpu",
# "German": "cpu",
# "Dutch": "cpu",
# "Slovene": "cpu",
# "Latin": "cpu"
# }
### preload the models for the classifier
for l in langdict:
loadModel(langdict[l], langdevice[l])
### Header
col1, empty, mid, col2 = st.columns([2,2,10,2])
with col1:
st.image('logo.png', use_column_width="always")
with mid:
st.title("Smells Extraction")
with col2:
st.image('fbk.png', use_column_width="always")
### main form
form = st.form("main_form")
### upload documents and save them in a temp file
fp = tempfile.TemporaryFile(mode="w+")
uploadedFile = False
# uploaded_file = form.file_uploader("Upload a txt file")
# if uploaded_file is not None:
# fp.write(uploaded_file.getvalue().decode("utf-8"))
# fp.seek(0)
# uploadedFile = True
txt = form.text_area('Insert a text:', height=300)
### list of examples
example = form.selectbox(
'Or... you might want to try these examples',
['',
'[ENGLISH] It\'s 1787, you are newly arrived in London, and you are walking the short distance from the Saracen\'s Head Inn to the nearby Newgate prison. As you pass the Old Bailey courthouse you catch a terrible smell in the air. Uncertain of its origins, you ask a lawyer as they hurry past on their way to a trial. They tell you that the smell arose from the burning of a woman who had been found guilty of coining farthings. The public burning of women in England only ended in 1790, Catherine Hayes being the last such individual to be thus punished. Up until 1789 the scent of burnt flesh also appeared in the courtroom itself, where some malefactors might be branded with a hot iron - "T" for theft, "F" for felon, or "M" for murder. The smell of burning was a warning to others. But smell could also feature as part of the humilitation of legal or, in some cases, extra-judicial punishment.',
'[DUTCH] Evenwel was het eene goede zaak; er werd nu een verbod uitgevaardigd, om elders in de stad visch te verkoopen en de walgelijke overblijfsels van den visch, die vroeger hier en daar werden nedergeworpen, verpesten niet langer de lucht door onaangename reuk; terwijl nu tevens een beter toezigt op de hoedanigheid van den aangeboden visch kon worden uitgeoefend'
]
)
### lanuguage selection. The names need to match the ones in langdict and langdevice
language = form.selectbox(
'Select the language of the text:',
['','English', 'Italian', 'French', 'German', 'Dutch', 'Slovene', 'Latin'])
outTxt = ""
form.text("For this demo only the first 1000 words of the text will be processed")
if form.form_submit_button("Extract the smells"):
### if no document is uploaded the text from the input box or the example is saved in the temp file
if not uploadedFile:
if len(txt) > 0:
fp.write(txt)
fp.seek(0)
elif len(example) > 0:
fp.write(example)
fp.seek(0)
if len(language) == 0:
st.warning('Please select a language', icon="⚠️")
exit()
if not uploadedFile and len(txt) == 0 and len(example) == 0:
st.warning('Please enter a text', icon="⚠️")
exit()
### convert the text into the format required by the classifier
### only convert the first N tokens defined by "limit"
convertedText = convertText(fp, limit=1000)
### Temp files needed by the classifier
fInput = tempfile.NamedTemporaryFile(delete=False, mode="w")
fInput.write(convertedText)
fInput.close()
fOutput = tempfile.NamedTemporaryFile(delete=False, mode="w")
fOutput.close()
input_path = fInput.name
output_path = fOutput.name
### Run predictions
model = loadModel(langdict[language], langdevice[language])
logger.info('predicting on ' + input_path + ', saving on ' + output_path)
predict_with_paths(model, input_path, output_path, None, 32, False, langdevice[language], "=", "|")
os.remove(fInput.name)
tuples = predictions_to_tuples(fOutput.name)
with open(fOutput.name) as f:
for line in f:
outTxt += line
### save prediction on excel file for download
df_out = make_excel(fOutput.name)
os.remove(fOutput.name)
### convert predictions in the tuples needed for the annotated_text module
### and add them the colors to display
### you can change colors in functions.py -> add_colors
tuples_fixed = []
for t in tuples:
if len(t) == 1:
tuples_fixed.append(t[0])
else:
tuples_fixed.append(t)
tuples_colors = add_colors(tuples_fixed)
### print the predictions
annotated_text(tuples_colors)
### download predictions as excel file
if outTxt:
st.markdown("""---""")
df_xlsx = to_excel(df_out)
st.download_button(label='Download Output', data=df_xlsx, file_name= 'df_test.xlsx')
# footer
st.markdown('#')
col1, mid= st.columns([2,12])
with col1:
st.image('eu-logo.png', use_column_width="always")
with mid:
st.caption("This work has been realised in the context of Odeuropa, a research project that has received funding from the European Union’s Horizon 2020 research and innovation programme under grant agreement No. 101004469.")
if __name__ == "__main__":
main()