-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDataLoader.py
83 lines (59 loc) · 2.23 KB
/
DataLoader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# coding: utf-8
# In[3]:
import numpy as np
import random
import os
# In[2]:
DATA_PATH = 'data/prepared_data/'
# In[37]:
class Data:
def __init__(self):
self.X_counter = 0
self.file_counter = 0
self.files = [file for file in os.listdir(DATA_PATH) if file.endswith('.npy')]
random.shuffle(self.files)
self._load_data() ## store in self.X at starting
def _load_data(self):
self.X = []
datas = np.load(os.path.join(DATA_PATH, self.files[self.file_counter]))
for data in datas:
self.X.append(data)
random.shuffle(self.X)
self.X = np.asarray(self.X)
self.file_counter += 1
# def get_data(self, batch_size): # don't support batch_size > len(X)
# if self.X_counter >= len(self.X):
# if self.file_counter > len(self.files) - 1:
# print("Data exhausted, Re Initialize")
# self.__init__()
# return None
# else:
# self._load_data()
# self.X_counter = 0
# if self.X_counter + batch_size <= len(self.X):
# X = self.X[self.X_counter : self.X_counter + batch_size]
# else:
# X = self.X[self.X_counter : ]
# self.X_counter += batch_size
# return X
def get_data(self, batch_size): # supports batch_size > len(X)
X = []
while(batch_size):
if self.X_counter >= len(self.X):
if self.file_counter > len(self.files) - 1:
print("Data exhausted, Re Initialize")
self.__init__()
return None
else:
self._load_data()
self.X_counter = 0
if self.X_counter + batch_size <= len(self.X):
X.extend(self.X[self.X_counter : self.X_counter + batch_size])
self.X_counter += batch_size
break
else:
X.extend(self.X[self.X_counter : ])
self.X_counter += batch_size
batch_size = self.X_counter - len(self.X)
X = np.asarray(X)
return X