-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcnn.py
96 lines (83 loc) · 2.68 KB
/
cnn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# This was modified from Valerio Velardo's brilliaant tutorial
# https://youtu.be/SQ1iIKs190Q
from torch import nn
from torchsummary import summary
class CNNNetwork(nn.Module):
def __init__(self):
super().__init__()
# 4 convolutional blocks / flatten / linear / softmax
# Grid size: # of pixels for height/width (odd number value)
# Stride: Step size for sliding kernel across matrix
# Depth: How many independent channels
# i.e. for RGB image, it has a kernel of 3 x 3 x 3 because of RGB channels
# Number of Kernels: Output from a layer has as many 2day arrays as # of kernels
# ---------
# Pooling: Shrinks the data
# Parameters: Grid size, Stride and Type (max, average)
# ---------
# Calculating Data Shape:
# 13 MFCCS, 512 sample hop length, 51200 samples in audio file
# Shape = 100 x 13 x 1
# 100 = (total samples / hop length)
# 13 = num_MFCC
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels=1,
out_channels=16,
kernel_size=3,
stride=1,
padding=2
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2)
)
self.conv2 = nn.Sequential(
nn.Conv2d(
in_channels=16,
out_channels=32,
kernel_size=3,
stride=1,
padding=2
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2)
)
self.conv3 = nn.Sequential(
nn.Conv2d(
in_channels=32,
out_channels=64,
kernel_size=3,
stride=1,
padding=2
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2)
)
self.conv4 = nn.Sequential(
nn.Conv2d(
in_channels=64,
out_channels=128,
kernel_size=3,
stride=1,
padding=2
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2)
)
self.flatten = nn.Flatten()
self.linear = nn.Linear(128 * 5 * 4, 1)
self.softmax = nn.Softmax(dim=1)
self.sigmoid = nn.Sigmoid()
def forward(self, input_data):
x = self.conv1(input_data)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.flatten(x)
logits = self.linear(x)
predictions = self.sigmoid(logits)
#predictions = self.softmax(logits)
return predictions
if __name__ == "__main__":
cnn = CNNNetwork()
summary(cnn, (1, 64, 44))