-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathevaluations.py
190 lines (144 loc) · 7.7 KB
/
evaluations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# -*- coding: utf-8 -*-
"""
Scripts for running various evaluation tasks for the task of large-scale cover song detection
[NOTE] : All the logs are stored to the LOG_FILE
Albin Andrew Correya
R&D Intern
@Deezer
"""
from joblib import Parallel, delayed
from es_search import SearchModule
from experiments import Experiments
from utils import log
import templates as presets
import argparse
# Logging handlers
LOG_FILE = './logs/evaluations.log'
LOGGER = log(LOG_FILE)
def shs_train_set_evals(size, method="msd_title", with_duplicates=True, mode="msd"):
"""
:param size: Required prune size of the results
:param method: (string type) {default:"msd_title"}
choose the method of experiment available modes are
["msd_title", "pre-msd_title", "mxm_lyrics", "title_mxm_lyrics", "pre-title_mxm_lyrics"]
:param with_duplicates: (boolean) {default:True} include
or exclude MSD official duplicate tracks from the experiments
:param mode: 'msd' or 'shs'
"""
es = SearchModule(presets.uri_config)
if mode == "msd":
if with_duplicates:
exp = Experiments(es, './data/train_shs.csv', presets.shs_msd)
else:
exp = Experiments(es, './data/train_shs.csv', presets.shs_msd_no_dup)
elif mode == "shs":
exp = Experiments(es, './data/train_shs.csv', presets.shs_shs)
else:
raise Exception("\nInvalid 'mode' parameter ... ")
if method == "msd_title":
LOGGER.info("\n%s with size %s, duplicates=%s and msd_mode=%s" %
(method, size, with_duplicates, mode))
results = exp.run_song_title_match_task(size=size)
elif method == "pre-msd_title":
LOGGER.info("\n%s with size %s, duplicates=%s and msd_mode=%s" %
(method, size, with_duplicates, mode))
results = exp.run_cleaned_song_title_task(size=size)
elif method == "mxm_lyrics":
LOGGER.info("\n%s with size %s, duplicates=%s and msd_mode=%s" %
(method, size, with_duplicates, mode))
results = exp.run_mxm_lyrics_search_task(presets.more_like_this, size=size)
elif method == "title_mxm_lyrics":
LOGGER.info("\n%s with size %s, duplicates=%s and msd_mode=%s" %
(method, size, with_duplicates, mode))
results = exp.run_rerank_title_with_mxm_lyrics_task(size=size, with_cleaned=False)
elif method == "pre-title_mxm_lyrics":
LOGGER.info("\n%s with size %s, duplicates=%s and msd_mode=%s" %
(method, size, with_duplicates, mode))
results = exp.run_rerank_title_with_mxm_lyrics_task(size=size, with_cleaned=True)
else:
raise Exception("\nInvalid 'method' parameter....")
mean_avg_precision = exp.mean_average_precision(results)
LOGGER.info("\n Mean Average Precision (MAP) = %s" % mean_avg_precision)
return
def shs_test_set_evals(size, method="msd_title", with_duplicates=True):
"""
:param size: Required prune size of the results
:param method: (string type) {default:"msd_title"}
choose the method of experiment available modes are
["msd_title", "pre-msd_title", "mxm_lyrics", "title_mxm_lyrics", "pre-title_mxm_lyrics"]
:param with_duplicates: (boolean) {default:True} include
or exclude MSD official duplicate tracks from the experiments
:return:
"""
es = SearchModule(presets.uri_config)
if with_duplicates:
exp = Experiments(es, './data/test_shs.csv', presets.shs_msd)
else:
exp = Experiments(es, './data/test_shs.csv', presets.shs_msd_no_dup)
if method == "msd_title":
LOGGER.info("\n%s with size %s and duplicates=%s " % (method, size, with_duplicates))
results = exp.run_song_title_match_task(size=size)
elif method == "pre-msd_title":
LOGGER.info("\n%s with size %s and duplicates=%s" % (method, size, with_duplicates))
results = exp.run_cleaned_song_title_task(size=size)
elif method == "mxm_lyrics":
LOGGER.info("\n%s with size %s and duplicates=%s" % (method, size, with_duplicates))
results = exp.run_mxm_lyrics_search_task(presets.more_like_this, size=size)
elif method == "title_mxm_lyrics":
LOGGER.info("\n%s with size %s and duplicates=%s" % (method, size, with_duplicates))
results = exp.run_rerank_title_with_mxm_lyrics_task(size=size, with_cleaned=False)
elif method == "pre-title_mxm_lyrics":
LOGGER.info("\n%s with size %s and duplicates=%s" % (method, size, with_duplicates))
results = exp.run_rerank_title_with_mxm_lyrics_task(size=size, with_cleaned=True)
else:
raise Exception("\nInvalid 'method' parameter for the experiment ! ")
mean_avg_precision = exp.mean_average_precision(results)
LOGGER.info("\n Mean Average Precision (MAP) = %s" %mean_avg_precision)
return
def automate_online_evals(mode, n_threads=-1, exp_mode="msd", is_duplicates=False, size=100,
methods=["msd_title", "pre-msd_title", "mxm_lyrics",
"title_mxm_lyrics", "pre-title_mxm_lyrics"]):
"""
Run the paralleled automated evaluation tasks as per the chosen requirements from the parameters
:param mode: (type : string) chose whether train or test mode from the list ["test", "train"]
:param n_threads: number of threads to parallelize with (-1
:param exp_mode: (type : string) Choose experiment mode from the list ["msd", "shs"]
:param is_duplicates: (type : boolean) Choose whether you should include duplicates in the experiments
:param size: (type : int) Required size of the pruned response
:param methods: Choose a list of methods to compute in the automated process
available methods are ["msd_title", "pre-msd_title",
"mxm_lyrics", "title_mxm_lyrics", "pre-title_mxm_lyrics"]
"""
LOGGER.info("\n ======== Automated online experiments on shs_ %s "
"with exp_mode %s and duplicates %s size %s ======= "
% (mode, exp_mode, is_duplicates, size))
sizes = [size for i in range(len(methods))]
duplicates = [is_duplicates for i in range(len(methods))]
if mode == "test":
args = zip(sizes, methods, duplicates)
Parallel(n_jobs=n_threads, verbose=1)(map(delayed(shs_test_set_evals), args))
if mode == "train":
exp_modes = [exp_mode for i in range(len(methods))]
args = zip(sizes, methods, duplicates, exp_modes)
Parallel(n_jobs=n_threads, verbose=1)(map(delayed(shs_train_set_evals), args))
LOGGER.info("\n ===== Process finished successfully... ===== ")
return
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description="Run automated evaluation for cover song detection task mentioned in the paper",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("-m", action="store", default='test',
help="choose whether 'train' or 'test' mode")
parser.add_argument("-t", action="store", default=-1,
help="number of threads required")
parser.add_argument("-e", action="store", default='msd',
help="choose between 'msd' or 'shs' ")
parser.add_argument("-d", action="store", default=0,
help="choose whether you want to exclude msd official duplicates song from the experiments")
parser.add_argument("-s", action="store", default=100,
help="required prune size for the results")
args = parser.parse_args()
d = bool(args.d)
methods = ["msd_title", "pre-msd_title", "mxm_lyrics", "title_mxm_lyrics", "pre-title_mxm_lyrics"]
automate_online_evals(mode=args.m, n_threads=args.t, exp_mode=args.e, is_duplicates=d, size=args.s, methods=methods)
print "\n ...Done..."