forked from almazan/watts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtool_learnAtts.m
155 lines (137 loc) · 5.92 KB
/
tool_learnAtts.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
function tool_learnAtts( optsfile, sp, ep)
%UNTITLED Summary of this function goes here
% Detailed explanation goes here
eval(sprintf('opts = %s();',optsfile));
load(opts.fileSets,'idxTrain','idxValidation','idxTest');
phocs = readMat(opts.filePHOCs);
features = readMat(opts.fileFeatures);
features = features(:, [find(idxTrain);find(idxValidation)]);
phocs = phocs(:, [find(idxTrain); find(idxValidation)]);
params = opts.sgdparams;
[numAtt,numSamples] = size(phocs);
dimFeats = size(features,1);
for idxAtt = sp:ep
[model, encodedTr] = learn_att(idxAtt,features, phocs,dimFeats, numSamples, opts, params);
end
end
function [model, attFeatsBag] = learn_att(idxAtt,feats, phocs,dimFeats, numSamples, opts, params)
fileModel = sprintf('%smodel_%.3d.mat',opts.folderModels,idxAtt);
if ~exist(fileModel,'file')
% Separate positives and negatives
idxPos = find(phocs(idxAtt,:)~=0);
idxNeg = find(phocs(idxAtt,:)==0);
nPos = length(idxPos);
nNeg = length(idxNeg);
% If too few positives, discard attribute :(
if nPos < 2
fprintf('Model for attribute %d discarded. Not enough data\n',idxAtt);
f=fopen(opts.modelsLog,'a');
fprintf(f,'Model for attribute %d discarded. Not enough data\n',idxAtt);
fclose(f);
model.W = single(zeros(dimFeats,1));
model.B = 0;
model.numPosSamples = 0;
attFeatsBag = single(zeros(1, numSamples));
save(fileModel,'model','attFeatsBag');
return;
end
% Prepare the output classifier and bias
W=single(zeros(dimFeats,1));
B = 0;
attFeatsBag = single(zeros(1, numSamples));
% Keep counts of how many updates, global and per sample
Np = zeros(numSamples,1);
N = 0;
numPosSamples = 0;
% Do two passes through the data so every sample gets scored at least twice
numPasses = 2;
numIters = 5;
for cpass = 1:numPasses
% Randomize data
idxPos = idxPos(randperm(nPos));
idxNeg = idxNeg(randperm(nNeg));
% Get number of samples per group. Since we use floor and we
% enforce at least two positive samples, there should always be
% at least one sample in train and val for the positives. The
% negatives should be populated enough.
nTrainPos = floor(0.8*nPos);
nValPos = nPos - nTrainPos;
nTrainNeg = floor(0.8*nNeg);
nValNeg = nNeg - nTrainNeg;
% for each iteration
for it=1:numIters
% Get the first nTrain as train and the rest as val
idxTrain = [ idxPos(1:nTrainPos) idxNeg(1:nTrainNeg)];
idxVal = [idxPos(nTrainPos+1:end) idxNeg(nTrainNeg+1:end)];
% Get actual data
featsTrain = feats(:,idxTrain);
phocsTrain = phocs(:,idxTrain);
featsVal = feats(:,idxVal);
phocsVal = phocs(:,idxVal);
labelsTrain = int32(phocsTrain(idxAtt,:)~=0);
labelsVal = int32(phocsVal(idxAtt,:)~=0);
numPosSamples = numPosSamples + nTrainPos;
% Learn model
tic;
%modelAtt = sgdsvm_train_cv_mex(featsTrain,labelsTrain,featsVal,labelsVal,params);
modelAtt = cvSVM(featsTrain,labelsTrain,featsVal,labelsVal,params);
t=toc;
fprintf('Model for attribute %d it %d pass %d (%.2f map) learned in %.0f seconds using %d positive samples\n',idxAtt, it,cpass, modelAtt.info.acc, t, nTrainPos);
f=fopen(opts.modelsLog,'a');
fprintf(f,'Model for attribute %d it %d pass %d (%.2f map) learned in %.0f seconds using %d positive samples\n',idxAtt,it,cpass, modelAtt.info.acc, t, nTrainPos);
fclose(f);
% Update things. Update the scores of the samples not used for
% training, as well as the global model.
N = N+1;
Np(idxVal) = Np(idxVal)+1;
sc = modelAtt.W'*featsVal;
attFeatsBag(idxVal) = attFeatsBag(idxVal) + sc;
W = W + modelAtt.W;
B = B + modelAtt.B;
% shift the idx to get new samples next iter
idxPos=circshift(idxPos, [0,nValPos]);
idxNeg=circshift(idxNeg, [0,nValNeg]);
end
end
% Average and save
model.W = W;
model.B = B;
model.numPosSamples = 0;
if N~=0
model.W = model.W/N;
model.B = model.B/N;
attFeatsBag = attFeatsBag ./ Np';
model.numPosSamples = ceil(numPosSamples / N);
end
save(fileModel,'model','attFeatsBag');
else
fprintf('\nAttribute %d already computed. Loaded.\n',idxAtt);
load(fileModel); % Contains the variables to return.
end
end
function map = modelMap(scores, labels)
[s,idx] = sort(scores, 'descend');
labelsSort = single(labels(idx));
acc = cumsum(labelsSort).*labelsSort;
N = sum(labelsSort);
map = sum(single(acc)./(1:length(labels)))/N;
end
function model = cvSVM(featsTrain, labelsTrain, featsVal, labelsVal, params)
bestmap = 0;
bestlbd = 0;
W = [];
B = [];
for lbd=params.lbds
[Wv,Bv,info, scores] = vl_svmtrain(featsTrain, double(2*labelsTrain-1), double(lbd),'BiasMultiplier', 0.1);
cmap = modelMap(Wv'*featsVal, labelsVal);
if cmap > bestmap
bestmap = cmap;
bestlbd = double(lbd);
W = Wv;
B = Bv;
end
end
model.W = W;
model.B = B;
model.info.acc = 100*bestmap;
end