forked from Lolnarr/ml4qs_bitburger
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDataGenerator.py
51 lines (41 loc) · 1.56 KB
/
DataGenerator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from typing import Tuple
class DataGenerator(keras.utils.Sequence):
def __init__(self,
df: pd.DataFrame,
shape: Tuple[int, int] = (50, 13),
num_classes: int = 26,
batch_size: int = 32,
shuffle: bool = True):
self.df = df
self.shape = shape
self.list_ids = self.df.index.tolist()
self.num_classes = num_classes
self.batch_size = batch_size
self.shuffle = shuffle
self.on_epoch_end()
def on_epoch_end(self):
self.index = np.arange(len(self.list_ids))
if self.shuffle:
np.random.shuffle(self.index)
def __getitem__(self, index):
# generate indices of the batch
indexes = self.index[index * self.batch_size:(index+1) * self.batch_size]
# find list of ids
batch_ids = [self.list_ids[k] for k in indexes]
X, y = self.__get_data(batch_ids)
return X, y
def __len__(self):
return len(self.list_ids) // self.batch_size
def __get_data(self, batch_ids):
X = np.empty((self.batch_size, *self.shape))
y = np.empty(self.batch_size, dtype=int)
for i, ID in enumerate(batch_ids):
data = pd.read_csv(self.df['data_path'].iloc[ID])
data.drop(columns=['Unnamed: 0'], inplace=True)
X[i,] = data.values
y[i] = ord(self.df['label'].iloc[ID].upper()) - 65
return X, tf.one_hot(y, depth=26)