-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathraw_compress.py
55 lines (37 loc) · 1.28 KB
/
raw_compress.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/env python
"""CDL encoding PCA fitter
CREATED:2013-05-08 16:15:55 by Brian McFee <brm2132@columbia.edu>
Usage:
./cdl_compress.py n_jobs pca_model.pickle /path/to/octarines
Once we have a PCA model and a set of -encoded.npy files, use PCA to compress them.
Saves output alongside as -encoded-compressed.npy
"""
import os
import sys
import glob
import cPickle as pickle
import numpy as np
from joblib import Parallel, delayed
RETAIN = 0.95
def process_song(PCA, d, song):
songname = os.path.basename(song)
songname = songname[:songname.index('-CL.npy')]
print songname
X = np.load(song)
# Transform the data, project to top $RETAIN variance dimensions
Xhat = PCA.transform(X)[:,:d]
outname = '%s/%s-raw-compressed.npy' % (os.path.dirname(song), songname)
np.save(outname, Xhat)
pass
def process_data(n_jobs, PCA, file_glob):
files = glob.glob(file_glob)
files.sort()
d = np.argmax(np.cumsum(PCA.explained_variance_ratio_) >= RETAIN)
Parallel(n_jobs=n_jobs)(delayed(process_song)(PCA, d, song) for song in files)
pass
if __name__ == '__main__':
n_jobs = int(sys.argv[1])
with open(sys.argv[2], 'r') as f:
PCA = pickle.load(f)
file_glob = '%s/*/*-CL.npy' % sys.argv[3]
process_data(n_jobs, PCA, file_glob)