-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathfft2melmx.m
171 lines (138 loc) · 5.7 KB
/
fft2melmx.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
function [wts,binfrqs] = fft2melmx(nfft, sr, nfilts, width, minfrq, maxfrq, htkmel, constamp)
% [wts,frqs] = fft2melmx(nfft, sr, nfilts, width, minfrq, maxfrq, htkmel, constamp)
% Generate a matrix of weights to combine FFT bins into Mel
% bins. nfft defines the source FFT size at sampling rate sr.
% Optional nfilts specifies the number of output bands required
% (else one per "mel/width"), and width is the constant width of each
% band relative to standard Mel (default 1).
% While wts has nfft columns, the second half are all zero.
% Hence, Mel spectrum is fft2melmx(nfft,sr)*abs(fft(xincols,nfft));
% minfrq is the frequency (in Hz) of the lowest band edge;
% default is 0, but 133.33 is a common standard (to skip LF).
% maxfrq is frequency in Hz of upper edge; default sr/2.
% You can exactly duplicate the mel matrix in Slaney's mfcc.m
% as fft2melmx(512, 8000, 40, 1, 133.33, 6855.5, 0);
% htkmel=1 means use HTK's version of the mel curve, not Slaney's.
% constamp=1 means make integration windows peak at 1, not
% pseudo-ortgonal (for reconstruction too).
% frqs returns bin center frqs.
% 2004-09-05 dpwe@ee.columbia.edu based on fft2barkmx
if nargin < 2; sr = 8000; end
if nargin < 3; nfilts = 0; end
if nargin < 4; width = 1.0; end
if nargin < 5; minfrq = 0; end % default bottom edge at 0
if nargin < 6; maxfrq = sr/2; end % default top edge at nyquist
if nargin < 7; htkmel = 0; end
if nargin < 8; constamp = 0; end
if nfilts == 0
nfilts = ceil(hz2mel(maxfrq, htkmel)/2);
end
wts = zeros(nfilts, nfft);
% Center freqs of each FFT bin
fftfrqs = [0:(nfft/2)]/nfft*sr;
% 'Center freqs' of mel bands - uniformly spaced between limits
minmel = hz2mel(minfrq, htkmel);
maxmel = hz2mel(maxfrq, htkmel);
binfrqs = mel2hz(minmel+[0:(nfilts+1)]/(nfilts+1)*(maxmel-minmel), htkmel);
% HTK ignores bins 0 and 1 - lowest bin is 2
% 2013-02-26
%binfrqs = max(binfrqs, 0.0*sr/nfft);
%binbin = round(binfrqs/sr*(nfft-1));
% marginally closer to HTK to calculate curves in Mel space
fftfrqs = hz2mel(fftfrqs, htkmel);
for i = 1:nfilts
% fs = mel2hz(i + [-1 0 1], htkmel);
%fs = binfrqs(i+[0 1 2]);
fs = hz2mel(binfrqs(i+[0 1 2]),htkmel);
% scale by width
fs = fs(2)+width*(fs - fs(2));
% lower and upper slopes for all bins
loslope = (fftfrqs - fs(1))/(fs(2) - fs(1));
hislope = (fs(3) - fftfrqs)/(fs(3) - fs(2));
% .. then intersect them with each other and zero
% wts(i,:) = 2/(fs(3)-fs(1))*max(0,min(loslope, hislope));
wts(i,1+[0:(nfft/2)]) = max(0,min(loslope, hislope));
% actual algo and weighting in feacalc (more or less)
% wts(i,:) = 0;
% ww = binbin(i+2)-binbin(i);
% usl = binbin(i+1)-binbin(i);
% wts(i,1+binbin(i)+[1:usl]) = 2/ww * [1:usl]/usl;
% dsl = binbin(i+2)-binbin(i+1);
% wts(i,1+binbin(i+1)+[1:(dsl-1)]) = 2/ww * [(dsl-1):-1:1]/dsl;
% need to disable weighting below if you use this one
end
% band 0 is special??
% these values are read from HSigP.c:533 fb.loWt[]
if 0
wts(1,1) = 0;
wts(1,2) = 1 - 0.532052517;
wts(1,3) = 1 - 0.0836902633;
wts(1,4) = 0.653339505;
wts(1,5) = 0.23960878;
wts(1,6) = 0.0;
wts(2,4) = 1-wts(1,4);
wts(2,5) = 1-wts(1,5);
end
if (constamp == 0)
% Slaney-style mel is scaled to be approx constant E per channel
wts = diag(2./(binfrqs(2+[1:nfilts])-binfrqs(1:nfilts)))*wts;
else
% "ortho" weights back-ported from Python (2014-05-15)
wts = diag(sqrt(( (24000/nfft) ./ (binfrqs(2+[1:nfilts]) - binfrqs(1:nfilts)))))*wts;
end
% Make sure 2nd half of FFT is zero
%wts(:,(nfft/2+2):nfft) = 0;
% seems like a good idea to avoid aliasing
% No, just don't even return it (was always a dumb idea)
wts = wts(:, 1:(nfft/2+1));
% Trim binfrqs to remove the extrema
binfrqs = binfrqs(2:end-1);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function f = mel2hz(z, htk)
% f = mel2hz(z, htk)
% Convert 'mel scale' frequencies into Hz
% Optional htk = 1 means use the HTK formula
% else use the formula from Slaney's mfcc.m
% 2005-04-19 dpwe@ee.columbia.edu
if nargin < 2
htk = 0;
end
if htk == 1
f = 700*(10.^(z/2595)-1);
else
f_0 = 0; % 133.33333;
f_sp = 200/3; % 66.66667;
brkfrq = 1000;
brkpt = (brkfrq - f_0)/f_sp; % starting mel value for log region
logstep = exp(log(6.4)/27); % the magic 1.0711703 which is the ratio needed to get from 1000 Hz to 6400 Hz in 27 steps, and is *almost* the ratio between 1000 Hz and the preceding linear filter center at 933.33333 Hz (actually 1000/933.33333 = 1.07142857142857 and exp(log(6.4)/27) = 1.07117028749447)
linpts = (z < brkpt);
f = 0*z;
% fill in parts separately
f(linpts) = f_0 + f_sp*z(linpts);
f(~linpts) = brkfrq*exp(log(logstep)*(z(~linpts)-brkpt));
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function z = hz2mel(f,htk)
% z = hz2mel(f,htk)
% Convert frequencies f (in Hz) to mel 'scale'.
% Optional htk = 1 uses the mel axis defined in the HTKBook
% otherwise use Slaney's formula
% 2005-04-19 dpwe@ee.columbia.edu
if nargin < 2
htk = 0;
end
if htk == 1
z = 2595 * log10(1+f/700);
else
% Mel fn to match Slaney's Auditory Toolbox mfcc.m
f_0 = 0; % 133.33333;
f_sp = 200/3; % 66.66667;
brkfrq = 1000;
brkpt = (brkfrq - f_0)/f_sp; % starting mel value for log region
logstep = exp(log(6.4)/27); % the magic 1.0711703 which is the ratio needed to get from 1000 Hz to 6400 Hz in 27 steps, and is *almost* the ratio between 1000 Hz and the preceding linear filter center at 933.33333 Hz (actually 1000/933.33333 = 1.07142857142857 and exp(log(6.4)/27) = 1.07117028749447)
linpts = (f < brkfrq);
z = 0*f;
% fill in parts separately
z(linpts) = (f(linpts) - f_0)/f_sp;
z(~linpts) = brkpt+(log(f(~linpts)/brkfrq))./log(logstep);
end