-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathdataset.py
136 lines (106 loc) · 4.15 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torch.nn.functional as F
from torch.utils.data.dataset import Dataset
import torch.utils.data as data
import scipy.io
import numpy as np
from tqdm import tqdm
class dataloader(Dataset):
"""docstring for CUB"""
def __init__(self,transform, root='/content/drive/My Drive/computer_vision/datasets/CUB/', split='train', device='cpu'):
path_features = root + 'res101.mat'
path_att_splits = root + 'att_splits.mat'
self.res101 = scipy.io.loadmat(path_features)
att_splits = scipy.io.loadmat(path_att_splits)
self.scaler = transform
self.labels, self.feats, self.sig = self.get_data(att_splits, split)
assert len(self.labels) == len(self.feats) == len(self.sig)
if len(self.feats) == 0:
raise(RuntimeError("Found zero feats in the directory: "+ root))
self.feats_ = torch.from_numpy(self.feats).float().to(device)
self.labels_ = torch.from_numpy(self.labels).long().to(device)
self.sig_ = torch.from_numpy(self.sig).float().to(device)
def __getitem__(self, index):
#index = np.random.randint(1,50)
x = self.feats_[index,:]
sig = self.sig_[index,:]
y = self.labels_[index]
return x,y,sig
def __get_perclass_feats__(self, index):
if index in np.unique(self.labels_):
idx = np.where(self.labels_==index)
return self.feats_[idx[0],:]
def __NumClasses__(self):
return np.unique(self.labels_)
def __get_attlen__(self):
len_sig = self.sig.shape[1]
return len_sig
def __getlen__(self):
len_feats = self.feats.shape[1]
return len_feats
def __totalClasses__(self):
return len(np.unique(self.res101['labels']).tolist())
def __attributeVector__(self):
return self.signature[:,np.unique(self.labels_)-1].transpose(), np.unique(self.labels_)
def __Test_Features_Labels__(self):
return self.feats_, self.labels_
def check_unique_labels(self, labels, att_splits):
trainval_loc = 'trainval_loc'
train_loc = 'train_loc'
val_loc = 'val_loc'
test_loc = 'test_unseen_loc'
self.labels_train = labels[np.squeeze(att_splits[train_loc]-1)]
self.labels_val = labels[np.squeeze(att_splits[val_loc]-1)]
self.labels_trainval = labels[np.squeeze(att_splits[trainval_loc]-1)]
self.labels_test = labels[np.squeeze(att_splits[test_loc]-1)]
self.train_labels_seen = np.unique(self.labels_train)
self.val_labels_unseen = np.unique(self.labels_val)
self.trainval_labels_seen = np.unique(self.labels_trainval)
self.test_labels_unseen = np.unique(self.labels_test)
#print("Number of overlapping classes between train and val:",
#len(set(self.train_labels_seen).intersection(set(self.val_labels_unseen))))
#print("Number of overlapping classes between trainval and test:",
#len(set(self.trainval_labels_seen).intersection(set(self.test_labels_unseen))))
def __len__(self):
return self.feats.shape[0]
def get_data(self, att_splits, split):
labels = self.res101['labels']
X_features = self.res101['features']
self.signature = att_splits['att']
self.check_unique_labels(labels, att_splits)
if split == 'trainval':
loc = 'trainval_loc'
elif split == 'train':
loc = 'train_loc'
elif split == 'val':
loc = 'val_loc'
elif split == 'test_seen':
loc = 'test_seen_loc'
else:
loc = 'test_unseen_loc'
labels_loc = labels[np.squeeze(att_splits[loc]-1)]
feat_vec = np.transpose(X_features[:,np.squeeze(att_splits[loc]-1)])
unique_labels = np.unique(labels_loc)
sig_vec = np.zeros((labels_loc.shape[0],self.signature.shape[0]))
labels_list = np.squeeze(labels_loc).tolist()
for i, idx in enumerate(labels_list):
sig_vec[i,:] = self.signature[:,idx-1]
self.scaler.fit_transform(feat_vec)
labels_loc_ = np.int64(labels_loc)
return labels_loc_, feat_vec, sig_vec
class classifier_dataloader(Dataset):
"""docstring for classifier_dataloader"""
def __init__(self, features_img, labels, device):
self.labels = labels.long().to(device)
self.feats = features_img.float().to(device)
def __getitem__(self, index):
X = self.feats[index, :]
y = self.labels[index]-1 #for NLLL loss
return X, y
def __len__(self):
return len(self.labels)
def __targetClasses__(self):
return np.unique(self.labels)