-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlowlvl.py
429 lines (348 loc) · 19.8 KB
/
lowlvl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
import partitura as pt
import mido
import math
import numpy as np
import copy
import pretty_midi
import os
#MIDI Locations for the labels so that we can decipher what's being output.
MID_FWDBCKWD = 21 #A0
MID_ROLLBACK = 25 #C#1
MID_MISTOUCH = 28 #E1
MID_WRNG_PRED_INS = 30 #F#1
MID_DRAG = 33 #A1
LOW_INSERT = 1 #C#-1
LOW_DELETE = 3 #D#-1
LOW_GOBACK = 5 #F-1
LOW_SHIFT = 7 #G-1
LOW_CHG_OFFSET = 9 #A-1
LOW_CHG_ONSET = 11 #B-1
DEFAULT_MID = 20 #G#0
DEFAULT_LOW = 0 #C-1
mid_lvl_opers = {
'MID_FWDBCKWD': MID_FWDBCKWD,
'MID_ROLLBACK': MID_ROLLBACK,
'MID_MISTOUCH': MID_MISTOUCH,
'MID_WRNG_PRED_INS': MID_WRNG_PRED_INS,
'MID_DRAG': MID_DRAG,
'DEFAULT_MID': DEFAULT_MID,
}
low_lvl_opers = {
'LOW_INSERT': LOW_INSERT,
'LOW_DELETE': LOW_DELETE,
'LOW_GOBACK': LOW_GOBACK,
'LOW_SHIFT': LOW_SHIFT,
'LOW_CHG_OFFSET': LOW_CHG_OFFSET,
'LOW_CHG_ONSET': LOW_CHG_ONSET,
'DEFAULT_LOW': DEFAULT_LOW,
}
mid_label_pitch_map = {
'fwdbackwd' : MID_FWDBCKWD,
'rollback': MID_ROLLBACK,
'wrong_pred': MID_WRNG_PRED_INS,
'mistouch': MID_MISTOUCH,
'drag': MID_DRAG
}
low_label_pitch_map = {
'pitch_insert': LOW_INSERT,
'pitch_delete': LOW_DELETE,
'time_shift': LOW_SHIFT,
'go_back': LOW_GOBACK,
'change_offset': LOW_CHG_OFFSET,
'change_onset': LOW_CHG_ONSET
}
LABEL_VELOCITY = 10
#We cannot have enough control as in the case of m-san's code to the temporal shifts first.
#In Akira's logic, the 'scheduling' happens with the timeshifts at the same time, making things
#significantly easier.
label_na_fields = [('onset_sec', '<f4'), ('duration_sec', '<f4'), ('pitch', '<i4'), ('velocity', '<i4'),
('midlvl_label', '<U256'), ('lowlvl_label', '<U256')]
regular_na_fields = [('onset_sec', '<f4'), ('duration_sec', '<f4'), ('onset_tick', '<i4'), ('duration_tick', '<i4'),
('pitch', '<i4'), ('velocity', '<i4'), ('track', '<i4'), ('channel', '<i4'),
('id', '<U256')]
#order of applying the mistakes:
#time based
#then, if several at the same time: insert, delete, goback, offset,
#actually it might not matter so much.. try without sorting first.
#btw if we want to delete after an insertion, and we give the same 'time' as the insertion, we should
#make the extra step to check that we won't delete what we just inserted. (or not?). Anyway, ids would work
#in this case.
#food for thought, should the label be the same for low level and mid level stuff?
#TODO: have a function that merges mid_level labels done by the same 'event'.
# group_mid (start, end), and would merge the notes of the same mid level event into 1.
class lowlvl:
def __init__(self, src_na, ts_annot=[]):
#the labels would be maintained wrt to timeto or the tgt_na
#target = using the nearest giventime, get the timeto. In thoery, all existing score notes will
#have a timefrom value in the safe zones.
#ts_annot = time series annotation
#TODO: if src is an empty note array, fail and exit
start = src_na['onset_sec'][0]
end = src_na['onset_sec'][-1]+ src_na['onset_sec'][-1]
self.time_from = np.linspace(start, end, int(math.ceil(end)-math.ceil(start))*40) #not sure of resolution..
self.time_to = self.time_from.copy()
self.repeat_tracker = {} #dictionary to hold the time_from -> time_to mappings for performance regions that will be removed
#the format is: (interval for tgt time region) -> ([target time points], [src time points])
self.ts_annot = ts_annot
self.onsets = src_na['onset_sec']
self.src_na = src_na
self.tgt_na = copy.deepcopy(self.src_na)
self.label_na = np.zeros(0, dtype=label_na_fields)
#self.time_res = 0.05 #mostly used for the time_offset calculation.. let's see.
#This index points to the note item you are standing at
self.src_idx = 0
self.tgt_idx = 0
return
def _label_note(self, start, end, lowlvl_label, midlvl_label):
mid_label_pitch = mid_label_pitch_map[midlvl_label] if midlvl_label in mid_label_pitch_map else DEFAULT_MID
low_label_pitch = low_label_pitch_map[lowlvl_label] if lowlvl_label in low_label_pitch_map else DEFAULT_LOW
#add 2 notes, one mid and one low.
new_label = np.array([(start, end-start, mid_label_pitch, LABEL_VELOCITY, lowlvl_label, midlvl_label),
(start, end-start, low_label_pitch, LABEL_VELOCITY, lowlvl_label, midlvl_label)], dtype=self.label_na.dtype)
self.label_na = np.concatenate((new_label, self.label_na))
self.label_na.sort(order='onset_sec')
return
def _shift_labels(self, src_time, offset):
#shift all labels after time 'time' by offset s
#only used in case we relax the requirement that changes have to be done from earliest to latest.
#offset can be +ve or -ve.
nearestIdx = np.fabs(self.time_from - src_time).argmin()
time_in_tgtna = self.time_to[nearestIdx]
labels_start = self.label_na['onset_sec']
label_na_idx = np.fabs([labels_start - time_in_tgtna]).argmin() #should be the one greater but for now whatev.
self.label_na[label_na_idx:]['onset_sec'] += offset
return
#this in itself does not shift consequent pitches
def pitch_insert(self, src_time, pitch, duration, velocity, midlvl_label):
#TODO: find a calculate ticks option from partitura..
#TODO: from the match file, find what id is placed for notes that are 'extra'
#get corresponding insertion time in tgt_na.
nearestIdx = np.fabs(self.time_from - src_time).argmin()
tgt_insertion_time = self.time_to[nearestIdx]
#following the regular_na fields structure above..
new_note = np.array([(tgt_insertion_time, duration, 0, 0, pitch, velocity, 0, 0, 'none')], dtype=self.tgt_na.dtype)
self.tgt_na = np.concatenate((self.tgt_na, new_note))
self.tgt_na.sort(order='onset_sec')
self._label_note(tgt_insertion_time, tgt_insertion_time+duration, 'pitch_insert', midlvl_label)
return
def _find_note_in_tgt(self, src_time, pitch):
nearestIdx = np.fabs(self.time_from - src_time).argmin()
time_in_tgtna = self.time_to[nearestIdx]
window = 0.05 #a window 50 ms before and after for trying to find the onset of the pitch in question.
#we could evade little time offset problems by:
#1. setting a tol. window around the src_time (which we do) which we check for all notes of the specified
# pitch and get the nearest one to the given time.
#2. find all notes of that pitch in the score and just choose the nearest one within a thresh.
lowerbound_in_tgt_na = np.fabs(np.array(self.tgt_na['onset_sec'] - (time_in_tgtna - window))).argmin()
#upperbound isn't done by a oneliner because we need to return the last min. index, not the first:
tgt_onset_dist_from_upperbound_time = np.fabs(np.array(self.tgt_na['onset_sec'] - (time_in_tgtna + window)))
upperbound_in_tgt_na = np.where(
tgt_onset_dist_from_upperbound_time == tgt_onset_dist_from_upperbound_time.min())[0][-1]
#this indexing above should always have a value if the inputs are not empty..
found = False
note_options = []
#the problem with the lowerbound and upperbound logic is that, in case of chords, there can be several
#notes at that time, and argmin will only return the index of the first..
for i in range(lowerbound_in_tgt_na, upperbound_in_tgt_na+1):
#find all notes with the corresponding pitch
if self.tgt_na['pitch'][i] == pitch:
note_options.append((self.tgt_na['onset_sec'][i], i))
found = True
if found==False:
#import pdb
#pdb.set_trace()
print('pitch {} not found at time {}'.format(pitch, src_time))
return False, None, None
else:
#find closest to the src_time from the list. should be at the top
note_start, note_idx = sorted(note_options, key=lambda x: np.fabs(x[0] - src_time))[0]
return found, note_idx, note_start
def pitch_delete(self, src_time, pitch, midlvl_label):
found, note_idx, note_start = self._find_note_in_tgt(src_time, pitch)
if not found:
return
else:
note_end = note_start + self.tgt_na['duration_sec'][note_idx]
self.tgt_na = self.tgt_na[~((self.tgt_na['onset_sec'] == self.tgt_na['onset_sec'][note_idx]) &
(self.tgt_na['pitch'] == self.tgt_na['pitch'][note_idx]))]
self._label_note(note_start, note_end, 'pitch_delete', midlvl_label)
return
#works for shorten note and extend note
def change_note_offset(self, src_time, pitch, offset_shift, midlvl_label):
found, note_idx, note_start = self._find_note_in_tgt(src_time, pitch)
#it should always be found.. unless the note has been deleted in prior processing.
if not found:
return False
self.tgt_na['duration_sec'][note_idx] += offset_shift
note_end = note_start + self.tgt_na['duration_sec'][note_idx]
self._label_note(note_start, note_end, "change_offset", midlvl_label)
return True
def change_note_onset(self, src_time, pitch, onset_shift, midlvl_label, lowlvl_label):
#this would have a limit, because if it goes tooooo far, then it should be a deleted note in
#this region, and a note insertion in the other..
#this is for wiggles that make sense, without the need to change time_from or time_to.
return
def _construct_note_na(self, notes):
#just converts a list of notes into the expected na dtype, to avoid confusions on accessing elements..
#no sorting is applied.
#lol this function was to solve a misunderstood problem..
notes_na = np.zeros(0, dtype=self.src_na.dtype)
notes_na = np.concatenate((np.array(notes, dtype=self.src_na.dtype), notes_na))
notes_na.sort(order='onset_sec')
return notes_na
def get_notes(self, src_time, num_events_back):
#from src_time, go backwards for num_events_back (a chord is considered as one score event, for ex.)
#if there are not enough backwards notes, stop.
#num events back 0 is a repeat of the current note.
src_onsets = self.src_na['onset_sec']
nearest_src_idx = np.fabs(src_onsets - src_time).argmin()
back_src_idx = nearest_src_idx
curr_note = self.src_na[back_src_idx]
notes = [curr_note]
while(num_events_back):
if back_src_idx == 0:
return back_src_idx, self._construct_note_na(notes)
if self.src_na['onset_sec'][back_src_idx-1] != curr_note['onset_sec']:
curr_note = self.src_na[back_src_idx-1]
num_events_back -=1
new_note = copy.deepcopy(self.src_na[back_src_idx-1])
notes.append(new_note)
back_src_idx -= 1
return back_src_idx, self._construct_note_na(notes) #the first note event will be returned last.
#Since the notes themselves are passed on by the caller, the only purpose of go_back
#is to ensure that: 1 the labels are correct, and that the timefrom/time to makes sense, since we
#want to keep the perf. array and the labels hidden from the midlevel.
#For timeto, if a rollback happens, it is treated as if it were a long silence inserted
#meaning that, timefrom doesn't change, but timeto changes.
#but the src pointer goes backwards.
#the notes would not be set with their times from performance, they would be
#set relative to 0.
#there is also a limitation with the labels, since it's all put as a rollback whereas
#each note, in comparision to the score portion rolled back, should have its own label
#but for now we move on..
def go_back(self, src_time_to, src_time_from, notes=[], midlvl_label="rollback"):
#The variable names are hell.. time from and time to.. and src_time_from and src_time_to..
#note that, src_time_from will be larger than src_time_to, since we are going backwards
#same for an offset (Shift) vs a note offset.. all confusing..
#TODO: check that src_time_to < src_time_from
#add a time offset with the necessary time window for the new notes after src_time_from's equivalent
#nearest_Idx represents src_time_from's location
#window = (notes['onset_sec'][-1] + notes['duration_sec'][-1]) - notes['onset_sec'][0]
nearestIdx_src_time_from = np.fabs(self.time_from - src_time_from).argmin()
time_in_tgtna = self.time_to[nearestIdx_src_time_from]
tgt_na_idx = np.fabs([self.tgt_na['onset_sec'] - time_in_tgtna]).argmin() #not sure if it should be one greater.. look out
self.tgt_na[tgt_na_idx:]['onset_sec'] += (src_time_from - src_time_to)
self._shift_labels(src_time_from, (src_time_from - src_time_to))
self._label_note(time_in_tgtna, time_in_tgtna + (src_time_from - src_time_to), "time_shift", midlvl_label)
#TODO: DEBUG THIS LABEL ERROR :()
#if (src_time_from - src_time_to) > 1:
# import pdb
# pdb.set_trace()
#change the grid so that src_time_to (where we want to return to) now points to our new
#starting point (which was src_time_from)
nearestIdx_src_time_to = np.fabs(self.time_from - src_time_to).argmin()
#save the initial src indexes to recover their ground truths
#the key should be unique, so in theory this shouldn't crash
self.repeat_tracker[(self.time_to[nearestIdx_src_time_to],
self.time_to[nearestIdx_src_time_from])] = (self.time_to[nearestIdx_src_time_to:nearestIdx_src_time_from+1],
self.time_from[nearestIdx_src_time_to:nearestIdx_src_time_from+1])
self.time_to[nearestIdx_src_time_to:] += (src_time_from - src_time_to)
#what happens to the labels in that case. they should be aligned with tgt_na they should accomodate for the same shift applied..
#append the notes at the correct time and sort
for note in notes:
note['onset_sec'] += time_in_tgtna
self.tgt_na = np.concatenate((self.tgt_na, notes))
self.tgt_na.sort(order='onset_sec')
return
def go_fwd(self):
#skip.
#the timefrom array would be set to 0 at these parts of the score that are skipped.
return
def time_offset(self, src_time, offset_time, midlvl_label): #adding silence without note insertions..
#perhaps we shouldn't add a 1 to the index because the caller should handle making the time
#input as the time where the offset literally would start..
#this is just a shift in timeto, the times in the na of the perf, and the labels na.
nearestIdx = np.fabs(self.time_from - src_time).argmin()
time_in_tgtna = self.time_to[nearestIdx]
onsets = self.tgt_na['onset_sec']
tgt_na_idx = np.fabs([onsets - time_in_tgtna]).argmin() #not sure if it should be one greater.. look out
self.tgt_na[tgt_na_idx:]['onset_sec'] += offset_time
self.time_to[nearestIdx:] += offset_time
self._shift_labels(src_time, offset_time)
self._label_note(time_in_tgtna, time_in_tgtna+offset_time, "time_shift", midlvl_label)
return
def inspect_tgt(self):
#maybe some functionality to inspect some label specific things would be helpful.
return
def get_label_miditrack(self, loc):
midiobj = self._na_to_miditrack(self.label_na)
midiobj.write(loc)
return
def _filter_by_label(self, name, tier='mid'): #tier could be mid or low
if tier == 'mid':
return self.label_na[self.label_na['midlvl_label'] == name]
if tier == 'low':
return self.label_na[self.label_na['lowlvl_label'] == name]
return []
#folder must already be created.
def get_midlevel_label_miditracks(self, folder):
#make sure the output folder exists, or create if it doesn't
os.makedirs(folder, exist_ok=True)
for midlvl_label in np.unique(self.label_na['midlvl_label']):
#filter self.label_na based on the midlevel label
#and get the low level operations that correspond to this midlevel label
#create an na from both.
self._na_to_miditrack(self._filter_by_label(name=folder, tier='mid')).write(os.path.join(folder, '{}.mid'.format(midlvl_label)))
return
def _na_to_miditrack(self, na):
midiobj = pretty_midi.PrettyMIDI()
piano_program = pretty_midi.instrument_name_to_program('Acoustic Grand Piano')
inst = pretty_midi.Instrument(program=piano_program)
for na_note in na:
note = pretty_midi.Note(velocity=na_note['velocity'],
pitch=na_note['pitch'],
start=na_note['onset_sec'],
end=na_note['onset_sec'] + na_note['duration_sec'])
inst.notes.append(note)
midiobj.instruments.append(inst)
return midiobj
def get_target_miditrack(self, loc):
midiobj = self._na_to_miditrack(self.tgt_na)
midiobj.write(loc)
return
def get_src_miditrack(self, loc):
midiobj = self._na_to_miditrack(self.src_na)
midiobj.write(loc)
return
def get_timemap(self):
return zip(self.time_from, self.time_to)
def get_repeats(self):
#Recall that the format is: (interval for tgt time region) -> ([target time points], [src time points]), unlike time map which we make src_time -> target_time
return self.repeat_tracker
def get_adjusted_gt(self):
#check the format of the gt (needs to be a 1d array with some values)
#we want gt to map to the tgt_na timings.
#if the resolution of regular interpolation is insufficient, we could break it up by searching, for each annotation t, for the timefrom before and after it and interpolate based on that fragment only.
interpol_t = np.interp(self.ts_annot, self.time_from, self.time_to)
#then, go over the repeats to handle the discontinuities:
# for every tgt_time from to tgt_time to in the repeats
interpol_repeats = []
for key, val in self.repeat_tracker.items():
tgt_time_start, tgt_time_end = key # guess those were unused because we just got the whole array
tgt_time, src_time = val
# get the ground truth labels corresponding to the the range src_time[0], src_time[1]
#we could be pedantic and only get the annot_indexfrom to be after the src_time[0]
# but I don't think having a repeated annotation would be a disaster.
annot_indexfrom = np.fabs(self.ts_annot - src_time[0]).argmin()
annot_indexto = np.fabs(self.ts_annot - src_time[-1]).argmin()
annotations = np.interp(self.ts_annot[annot_indexfrom:annot_indexto+1],
src_time, tgt_time)
interpol_repeats.extend(annotations)
#interpolate every gt in source time to an equiv in that old tgt time
#append it to the same array
#sort all the labels (because we added the repeats at the end.)
np.concatenate((interpol_t, np.array(interpol_repeats)))
interpol_t.sort()
#et. voila!
return interpol_t
#prob. not needed: if this index is within the resolution from the original time (which probably we should save in the class params)