This repository has been archived by the owner on Jan 3, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathidentity.py
executable file
·56 lines (40 loc) · 2 KB
/
identity.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/usr/bin/env python3
import argparse
import csv
import sys
from gensim.models.word2vec import Word2Vec
parser = argparse.ArgumentParser(description='Identity Evaluation.')
parser.add_argument('--w2v', default='all.norm-sz100-w10-cb0-it1-min100.w2v', nargs='?',
help='Path to the word2vec model.')
parser.add_argument('--subsumptions', default='subsumptions-test.txt', nargs='?', help='Path to the test subsumptions.')
args = vars(parser.parse_args())
w2v = Word2Vec.load_word2vec_format(args['w2v'], binary=True, unicode_errors='ignore')
w2v.init_sims(replace=True)
subsumptions_test = []
with open(args['subsumptions']) as f:
reader = csv.reader(f, delimiter='\t', quoting=csv.QUOTE_NONE)
for row in reader:
subsumptions_test.append((row[0], row[1]))
def compute_ats(measures):
return [sum(measures[j].values()) / len(subsumptions_test) for j in range(len(measures))]
def compute_auc(ats):
return sum([ats[j] + ats[j + 1] for j in range(0, len(ats) - 1)]) / 2 / 10
measures = [{} for _ in range(0, 10)]
for i, (hyponym, hypernym) in enumerate(subsumptions_test):
actual = [w for w, _ in w2v.most_similar(positive=[w2v[hyponym]], topn=10)]
for j in range(0, len(measures)):
measures[j][(hyponym, hypernym)] = 1. if hypernym in actual[:j + 1] else 0.
if (i + 1) % 100 == 0:
ats = compute_ats(measures)
auc = compute_auc(ats)
ats_string = ', '.join(['A@%d=%.6f' % (j + 1, ats[j]) for j in range(len(ats))])
print('%d examples out of %d done for identity: %s. AUC=%.6f.' % (
i + 1,
len(subsumptions_test),
ats_string,
auc),
file=sys.stderr, flush=True)
ats = [sum(measures[j].values()) / len(subsumptions_test) for j in range(len(measures))]
auc = sum([ats[j] + ats[j + 1] for j in range(0, len(ats) - 1)]) / 2 / 10
ats_string = ', '.join(['A@%d=%.4f' % (j + 1, ats[j]) for j in range(len(ats))])
print('For identity: overall %s. AUC=%.6f.' % (ats_string, auc), flush=True)