-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbot.py
283 lines (222 loc) · 14.3 KB
/
bot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
# coding=utf8
##############################################################
# #
# Pars Ex Toto #
# #
# Created on 23.01.2015 for #clunc15 #
# by Anett Diesner and Esther Seyffarth #
# #
# (see Twitter Account @parsextoto for output) #
# #
##############################################################
import tweepy
import sqlite3
import config
import random
import time
def login():
# for info on the tweepy module, see http://tweepy.readthedocs.org/en/v3.1.0/
# Authentication is taken from config.py
consumer_key = config.consumer_key
consumer_secret = config.consumer_secret
access_token = config.access_token
access_token_secret = config.access_token_secret
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
return api
def errorAlert(api, info):
api.send_direct_message(screen_name = "ojahnn", text = info + " " + time.strftime("%H:%M:%S"))
return
def getInfoFromDB(longID, shortID, c, c1, c2):
get_longword = 'select * from Wort where WortID = %d' % (longID)
longword = c1.execute(get_longword).fetchone()[1]
get_shortword = 'select * from Wort where WortID = %d' % (shortID)
shortword = c2.execute(get_shortword).fetchone()[1]
# retrieving morphological info for each word
long_query = 'select features from Morph where WortID = %d' % (longID)
long_features = c.execute(long_query).fetchone()[0]
short_query = 'select features from Morph where WortID = %d' % (shortID)
short_features = c.execute(short_query).fetchone()[0]
return(longword, shortword, long_features, short_features)
def makeNNSentence(longword, shortword, long_features, short_features):
sentence = ""
# defining sentences, according to morphological info of longer word
noun_patterns = {u"Masc_Nom_Sg":["Kein %s ohne %s!", "Kein %s ohne %s."],
u"Fem_Nom_Sg":["Undenkbar: Eine %s ohne %s!", u"Was wäre eine %s ohne %s?"],
u"Neut_Nom_Sg":["Es gibt kein %s ohne %s.", "Kein %s ohne %s!", "Kein %s ohne %s..."],
u"Masc_Nom_Pl": ["Alle %s enthalten %s."],
u"Fem_Nom_Pl": ["Alle %s enthalten %s."],
u"Neut_Nom_Pl": ["Alle %s enthalten %s."]}
# TODO: better rev_noun_patterns for plural
rev_noun_patterns = {u"Masc_Nom_Sg":["Wie kommt der %s in den %s?"],
u"Neut_Nom_Sg":["Wie kommt das %s ins %s?"],
u"Fem_Nom_Sg":["Wie kommt die %s in die %s?"],
u"Masc_Nom_Pl":["Was macht der %s in den %s?"],
u"Neut_Nom_Pl":["Was macht das %s in den %s?"],
u"Fem_Nom_Pl":["Was macht die %s in den %s?"],
}
# high priority output pattern
if longword.lower().endswith(shortword.lower()):
if long_features.startswith(u"Masc") and short_features.startswith(u"Masc") and short_features.endswith(u"Sg"):
sentence = "Jeder %s ist auch nur ein %s." % (longword, shortword)
elif long_features.startswith(u"Fem") and short_features.startswith(u"Fem") and short_features.endswith(u"Sg"):
sentence = "Jede %s ist auch nur eine %s." % (longword, shortword)
elif long_features.startswith(u"Neut") and short_features.startswith(u"Neut") and short_features.endswith(u"Sg"):
sentence = "Jedes %s ist auch nur ein %s." % (longword, shortword)
# lower priority output patterns
else:
choice = random.randint(1,4)
if choice > 2:
if long_features in noun_patterns:
# select random sentence pattern from above
sentence = noun_patterns[long_features][random.randint(0,len(noun_patterns[long_features])-1)] % (longword, shortword)
else:
# this if statement needs to be fixed
if short_features.endswith("_Sg") and short_features.split("_", 1)[0] == long_features.split("_", 1)[0] and short_features.split("_",2)[1] == "Nom":
sentence = rev_noun_patterns[long_features][random.randint(0,len(rev_noun_patterns[long_features])-1)] % (shortword, longword)
return sentence
def makeADJSentence(longword, shortword, long_features, short_features):
sentence = ""
# too many questions
adj_questions = {u"Masc_Nom_Sg":["Ist jeder %s %s?"],
u"Fem_Nom_Sg":["Welche %s ist nicht %s?"],
u"Neut_Nom_Sg":["Wann ist ein %s %s?"],
u"Masc_Nom_Pl":["Sind alle %s %s?", "Gibt es %s, die nicht %s sind?"],
u"Fem_Nom_Pl":["Sind alle %s %s?", "Gibt es %s, die nicht %s sind?"],
u"Neut_Nom_Pl":["Sind alle %s %s?", "Gibt es %s, die nicht %s sind?"]}
adj_patterns = {u"Masc_Nom_Sg":["Jeder %s ist %s."],
u"Fem_Nom_Sg":["Jede %s ist %s!"],
u"Neut_Nom_Sg":["Ein %s ist immer %s!"],
u"Masc_Nom_Pl":["Nicht alle %s sind %s.", "Alle %s sind %s!"],
u"Fem_Nom_Pl":["Nicht alle %s sind %s.", "Alle %s sind %s!"],
u"Neut_Nom_Pl":["Nicht alle %s sind %s.", "Alle %s sind %s!"]}
if long_features in adj_patterns:
if short_features.endswith("Invar") or short_features == "Pos_Pred" or short_features == "Pos_Adv" or long_features.split("_",1)[0]+"_Nom" in short_features:
sentence = adj_patterns[long_features][random.randint(0,len(adj_patterns[long_features])-1)] % (longword, shortword)
return sentence
def makeVSentence(longword, shortword, long_features, short_features):
# reverse sentences to make more interesting
verb_patterns = {u"Masc_Nom_Sg":["Jeder %s %s."],
u"Fem_Nom_Sg":["Jede %s %s."],
u"Neut_Nom_Sg":["Jedes %s %s."],
u"Fem_Nom_Pl":["Alle %s %s."],
u"Masc_Nom_Pl":["Alle %s %s."],
u"Neut_Nom_Pl":["Alle %s %s."]}
# u"Masc_Dat_Pl":["Das wichtigste am %s ist das %s."]} # something wrong with this sentence
if (long_features.endswith("_Pl") and short_features == "3_Pl_Pres_Ind") or (long_features.endswith("_Sg") and short_features == "3_Sg_Pres_Ind"):
sentence = verb_patterns[long_features][random.randint(0,len(verb_patterns[long_features])-1)] % (longword, shortword)
return sentence
def makeNPSentence(longword, shortword, long_features, shortword_gender):
sentence = ""
NP_patterns_female = {"Masc_Nom_Sg":[u"Der %s ist eine Erfindung von %s."],
"Fem_Nom_Sg": [u"Die %s ist eine Erfindung von %s."],
"Neut_Nom_Sg": [u"Das %s ist eine Erfindung von %s."],
"Masc_Nom_Pl": [u"%s wurden von %s erfunden."],
"Fem_Nom_Pl": [u"%s wurden von %s erfunden."],
"Neut_Nom_Pl": [u"%s wurden von %s erfunden."],
"Masc_Acc_Pl": [u"Die beste Expertin für %s ist %s."],
"Fem_Acc_Pl": [u"Die beste Expertin für %s ist %s."],
"Neut_Acc_Pl": [u"Die beste Expertin für %s ist %s."]}
NP_patterns_male = {"Masc_Nom_Sg":[u"Der %s ist eine Erfindung von %s."],
"Fem_Nom_Sg": [u"Die %s ist eine Erfindung von %s."],
"Neut_Nom_Sg": [u"Das %s ist eine Erfindung von %s."],
"Masc_Nom_Pl": [u"%s wurden von %s erfunden."],
"Fem_Nom_Pl": [u"%s wurden von %s erfunden."],
"Neut_Nom_Pl": [u"%s wurden von %s erfunden."],
"Masc_Acc_Pl": [u"Der beste Experte für %s ist %s."],
"Fem_Acc_Pl": [u"Der beste Experte für %s ist %s."],
"Neut_Acc_Pl": [u"Der beste Experte für %s ist %s."]}
if shortword_gender == "w":
if long_features in NP_patterns_female:
sentence = NP_patterns_female[long_features][random.randint(0,len(NP_patterns_female[long_features])-1)] % (longword, shortword)
else:
if long_features in NP_patterns_male:
sentence = NP_patterns_male[long_features][random.randint(0,len(NP_patterns_male[long_features])-1)] % (longword, shortword)
return sentence
def post(path, debug = False):
api = login()
# establishing connection to database that contains our vocabulary etc.
conn = sqlite3.connect(path)
c = conn.cursor()
c1 = conn.cursor()
c2 = conn.cursor()
#while True: # if you want to tweet without pauses (floods timeline!)
tweeted = False # set tweeted to an integer if you want to tweet e.g. 3x in a row
while not tweeted:
# retrieving word pairs if we want to choose any random pair of words:
# row = c.execute("select * from Substring where posted = 0 order by random() limit 1").fetchone()
# decide what kind of POS pair we want to tweet - 1 in 20 will be NN-NN, 4 in 20 will be NN-ADJ, 4 in 20 will be NN-V
# all other choices are currently proper names!
posChoice = random.randint(0, 19)
output = "" # default output
if posChoice == 0: # NN-NN choice
row = c.execute('select * from Substring where SubstringID in (select WortID from Wort where POS = "NN") and Score >= 0 and posted = 0 order by random() limit 1').fetchone() # only pairs of NN + NN
# note: good NN-NN pairs are scarce, so there is no required Score here as in the other cases
if row == None:
errorAlert(api, u"Keine NN-NN-Paare mit den erforderlichen Score- und Posted-Werten mehr übrig! :(")
else:
longID = row[0]
shortID = row[1]
(longword, shortword, long_features, short_features) = getInfoFromDB(longID, shortID, c, c1, c2)
output = makeNNSentence(longword, shortword, long_features, short_features)
elif 1 <= posChoice <= 4: # NN-ADJ choice
row = c.execute('select * from Substring where SubstringID in (select WortID from Wort where POS = "ADJ") and Score >= 0.5 and posted = 0 order by random() limit 1').fetchone() # only pairs of NN + ADJ
if row == None:
errorAlert(api, u"Keine NN-ADJ-Paare mit den erforderlichen Score- und Posted-Werten mehr übrig! :(")
else:
longID = row[0]
shortID = row[1]
(longword, shortword, long_features, short_features) = getInfoFromDB(longID, shortID, c, c1, c2)
output = makeADJSentence(longword, shortword, long_features, short_features)
elif 5 <= posChoice <= 8: # NN-V choice
row = c.execute('select * from Substring where SubstringID in (select WortID from Wort where POS = "V") and Score >= 0.5 and posted = 0 order by random() limit 1').fetchone() # only pairs of NN + verb
if row == None:
errorAlert(api, u"Keine NN-V-Paare mit den erforderlichen Score- und Posted-Werten mehr übrig! :(")
else:
longID = row[0]
shortID = row[1]
(longword, shortword, long_features, short_features) = getInfoFromDB(longID, shortID, c, c1, c2)
output = makeADJSentence(longword, shortword, long_features, short_features)
else: # Proper Name choice! \o/
row = c.execute('select * from NameSubstring where posted = 0 order by random() limit 1').fetchone() # only pairs of NN + Proper Names
if row == None:
errorAlert(api, u"Ein Problem mit Eigennamen ist aufgetreten. :(")
else:
longID = row[0]
shortID = row[1]
get_longword = 'select * from Wort where WortID = %d' % (longID)
longword = c1.execute(get_longword).fetchone()[1]
get_shortword = 'select * from Name where NameID = %d' % (shortID)
shortword = c2.execute(get_shortword).fetchone()[1]
# retrieving morphological info for long word (not relevant for short word, because it's a proper name)
long_query = 'select features from Morph where WortID = %d' % (longID)
long_features = c.execute(long_query).fetchone()[0]
short_query = 'select Gender from Name where NameID = %d' % (shortID)
shortword_gender = c.execute(short_query).fetchone()[0]
output = makeNPSentence(longword, shortword, long_features, shortword_gender)
"""
TODO:
- alle möglichen Features für jedes Wort holen, damit alles vielseitiger wird (Behandlung des Genitivs, weitere
bisher nicht beachtete Features)
- Was bedeutet das Feature Masc_Dat_Sg_OLD?
- die Kasus- und Genusprobleme lösen, die zB hier auftreten:
https://twitter.com/parsextoto/status/562604744649490432
https://twitter.com/parsextoto/status/562958850224312321
- KeyError behandeln/vermeiden
- zusätzliche Sätze für Eigennamen ergänzen
"""
if output != "":
print(output)
if not debug:
api.update_status(output)
# mark combination as already posted
if posChoice <= 8:
update = 'UPDATE Substring set posted=1 where WortID = %d and SubstringID = %d' % (longID, shortID)
else:
update = 'UPDATE NameSubstring set posted=1 where WortID = %d and NameID = %d' % (longID, shortID)
success = c.execute(update)
if success:
conn.commit()
tweeted = True
post("parsextoto.sqlite")