Skip to content

CorvusCorax/Pytorch-LSTM-Mini-Tutorial

Repository files navigation

PyTorch LSTM Example on Sin(t) Waveform Prediction

  • Network & Solver definition
  • Mini-Batch Training & Testing
  • Inifnite time sequence inference
  • Iterative Finetuning

Example equivalent to Caffe example https://github.com/CorvusCorax/Caffe-LSTM-Mini-Tutorial
which is based on http://www.xiaoliangbai.com/2018/01/30/Caffe-LSTM-SinWaveform-Batch by Xiaoliang Bai.

import numpy as np
import math
import os
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import torch
use_cuda = True

torch.manual_seed(0x1010)
np.random.seed(0x1010)
device = torch.device("cuda" if use_cuda else "cpu")
# Use the sample generator from Tensorflow Sin(t) online
def generate_sample(f = 1.0, t0 = None, batch_size = 1, predict = 50, samples = 100):
    """
    Generates data samples.

    :param f: The frequency to use for all time series or None to randomize.
    :param t0: The time offset to use for all time series or None to randomize.
    :param batch_size: The number of time series to generate.
    :param predict: The number of future samples to generate.
    :param samples: The number of past (and current) samples to generate.
    :return: Tuple that contains the past times and values as well as the future times and values. In all outputs,
             each row represents one time series of the batch.
    """
    Fs = 100.0

    T = np.empty((batch_size, samples))
    Y = np.empty((batch_size, samples))
    FT = np.empty((batch_size, predict))
    FY = np.empty((batch_size, predict))

    _t0 = t0
    for i in range(batch_size):
        t = np.arange(0, samples + predict) / Fs
        if _t0 is None:
            t0 = np.random.rand() * 2 * np.pi
        else:
            t0 = _t0 + i/float(batch_size)

        freq = f
        if freq is None:
            freq = np.random.rand() * 3.5 + 0.5

        y = np.sin(2 * np.pi * freq * (t + t0))

        T[i, :] = t[0:samples]
        Y[i, :] = y[0:samples]

        FT[i, :] = t[samples:samples + predict]
        FY[i, :] = y[samples:samples + predict]

    return T, Y, FT, FY
snapshot_prefix = 'lstm_demo_snapshot'
# Network Parameters
n_input = 1 # single input stream
n_steps = 100 # timesteps
n_hidden = 15 # hidden units in LSTM
n_outputs = 50 # predictions in future time
batch_size = 20 # batch of data
NO_INPUT_DATA = -2 # defined numeric value for network if no input data is available
# Training Parameters
n_train = 6000
n_display = 200
n_adamAlpha = 0.001
n_adamEpsilon = 0.02

Training Network definition

class LSTMDemoNetwork(torch.nn.Module):

    n_hidden = None
    hx = (None,None)
    
    # n_input is input timesteps, n_hidden is LSTM units, n_output is output timesteps
    def __init__(self, n_hidden):
        super(LSTMDemoNetwork, self).__init__()
        self.n_hidden=n_hidden
        
        self.lstm = torch.nn.LSTM(1,self.n_hidden,num_layers=1)
        self.ip1 = torch.nn.Linear(self.n_hidden,1)
        
        self.init_weights()
        
    def forward(self, x, hx=None):
        
        out, thx = self.lstm(x,hx=hx)
        self.hx = (thx[0].detach(),thx[1].detach())
        out = self.ip1(out)
        return out
    
    def init_weights(self):
        # Initialize forget gate bias to 1
        #
        # from https://pytorch.org/docs/stable/_modules/torch/nn/modules/rnn.html#LSTM
        #    .. math::
        #    \begin{array}{ll} \\
        #        i_t = \sigma(W_{ii} x_t + b_{ii} + W_{hi} h_{(t-1)} + b_{hi}) \\
        #        f_t = \sigma(W_{if} x_t + b_{if} + W_{hf} h_{(t-1)} + b_{hf}) \\
        #        g_t = \tanh(W_{ig} x_t + b_{ig} + W_{hg} h_{(t-1)} + b_{hg}) \\
        #        o_t = \sigma(W_{io} x_t + b_{io} + W_{ho} h_{(t-1)} + b_{ho}) \\
        #        c_t = f_t * c_{(t-1)} + i_t * g_t \\
        #        h_t = o_t * \tanh(c_t) \\
        #    \end{array}
        #...
        #    Attributes:
        #    weight_ih_l[k] : the learnable input-hidden weights of the :math:`\text{k}^{th}` layer
        #      `(W_ii|W_if|W_ig|W_io)`, of shape `(4*hidden_size, input_size)` for `k = 0`.
        #       Otherwise, the shape is `(4*hidden_size, num_directions * hidden_size)`
        #    weight_hh_l[k] : the learnable hidden-hidden weights of the :math:`\text{k}^{th}` layer
        #      `(W_hi|W_hf|W_hg|W_ho)`, of shape `(4*hidden_size, hidden_size)`
        #    bias_ih_l[k] : the learnable input-hidden bias of the :math:`\text{k}^{th}` layer
        #      `(b_ii|b_if|b_ig|b_io)`, of shape `(4*hidden_size)`
        #    bias_hh_l[k] : the learnable hidden-hidden bias of the :math:`\text{k}^{th}` layer
        #      `(b_hi|b_hf|b_hg|b_ho)`, of shape `(4*hidden_size)`
        #
        #
        # This is a common trick to speed up LSTM training.
        # Unfortunately Pytorch does not offer selective weight initialisation in the layer definition.
        
        #print(self.lstm.state_dict().keys())
        param = getattr(self.lstm, 'bias_hh_l0')
        param.data[self.n_hidden:2*self.n_hidden] = 1
        # IMPORTANT! For some reason Pytorch uses two bias terms in each term of the LSTM cell
        # this is not only unnecessary and increases the number of learnable parameters
        # it also makes training less stable when compared to caffe1
        # we force bias_ih to 0 and set learning rate to 0 as well
        param = getattr(self.lstm, 'bias_ih_l0')
        param.data[:] = 0
        param.requires_grad = False

        
lstmModel = LSTMDemoNetwork(n_hidden).to(device)
lossFunction = torch.nn.MSELoss().to(device)
optimizer = torch.optim.Adam(lstmModel.parameters(),lr=n_adamAlpha,eps=n_adamEpsilon)

Training

# Train network
def train_single(model, lf, solver, niter, disp_step):
    train_loss = np.zeros(niter) # this is for plotting, later
    
    
    unknown = torch.tensor(np.ones((n_outputs,batch_size), dtype=np.float32) * NO_INPUT_DATA).to(device)
    for i in range(niter):
        _, batch_x, _, batch_y = generate_sample(f=None,
                                         t0=None,
                                         batch_size=batch_size,
                                         samples=n_steps,
                                         predict=n_outputs)
        # IMPORTANT: Caffe LSTM has time in first dimension and batch in second, so
        # batched training data needs to be transposed
        # (This is from caffe examples, but unless batch_first has been passed to LSTM initializer, pytorch does the same thing)
        batch_x = torch.tensor(batch_x.transpose(),dtype=torch.float32).to(device)
        batch_y = torch.tensor(batch_y.transpose(),dtype=torch.float32).to(device)
        
        combined_x = torch.cat((batch_x,unknown)).view([n_steps+n_outputs,batch_size,1])
        combined_y = torch.cat((batch_x,batch_y)).view([n_steps+n_outputs,batch_size,1])
        
        result = model(combined_x)
        
        loss = lf(combined_y,result)
        
        train_loss[i] = loss.detach().cpu().numpy()
        
        solver.zero_grad()
        loss.backward()
        solver.step()
        
        if i % disp_step == 0:
            if i==0:
                print("step ", i, ", loss = ", train_loss[i])
            else:
                print("step ", i, ", loss = ", train_loss[i], ", avg loss = ", np.average(train_loss[i-disp_step:i]))
    print("Finished training, iteration reached ", niter, " final loss = ", train_loss[niter-1],
        " final avg = ", np.average(train_loss[niter-disp_step:niter-1]))
    return train_loss

train_loss = train_single(lstmModel,lossFunction,optimizer,n_train,n_display)

#explicitly save snapshot if it has not been done yet
filename='%s_iter_%i.pt' % (snapshot_prefix,n_train)
print('saving snapshot to "%s"' % (filename))
torch.save(lstmModel.state_dict(),filename)

# plot loss value during training
plt.plot(np.arange(n_train), train_loss)
plt.show()
step  0 , loss =  0.6100238561630249
step  200 , loss =  0.46383559703826904 , avg loss =  0.5117676916718483
step  400 , loss =  0.35188817977905273 , avg loss =  0.4015021505951881
step  600 , loss =  0.27478936314582825 , avg loss =  0.29233527250587943
step  800 , loss =  0.21736185252666473 , avg loss =  0.23865433923900128
step  1000 , loss =  0.20815210044384003 , avg loss =  0.21169447161257268
step  1200 , loss =  0.18245530128479004 , avg loss =  0.19248730778694154
step  1400 , loss =  0.17951136827468872 , avg loss =  0.17836878329515457
step  1600 , loss =  0.1621991991996765 , avg loss =  0.1723385511338711
step  1800 , loss =  0.1653403639793396 , avg loss =  0.16596302539110183
step  2000 , loss =  0.1583380550146103 , avg loss =  0.162551117092371
step  2200 , loss =  0.1612466424703598 , avg loss =  0.1598108634352684
step  2400 , loss =  0.16245587170124054 , avg loss =  0.15708779029548167
step  2600 , loss =  0.15433980524539948 , avg loss =  0.1543604374676943
step  2800 , loss =  0.14505650103092194 , avg loss =  0.1518767724186182
step  3000 , loss =  0.13852189481258392 , avg loss =  0.1487720350176096
step  3200 , loss =  0.12218562513589859 , avg loss =  0.14528122432529927
step  3400 , loss =  0.14171256124973297 , avg loss =  0.14186786215752364
step  3600 , loss =  0.1320994347333908 , avg loss =  0.13702102996408938
step  3800 , loss =  0.13346511125564575 , avg loss =  0.14042050909250975
step  4000 , loss =  0.1315401941537857 , avg loss =  0.1420031564310193
step  4200 , loss =  0.13872137665748596 , avg loss =  0.1294499559700489
step  4400 , loss =  0.10143540054559708 , avg loss =  0.12134559355676174
step  4600 , loss =  0.16664937138557434 , avg loss =  0.12554681070148946
step  4800 , loss =  0.12061886489391327 , avg loss =  0.12734253853559493
step  5000 , loss =  0.08962979912757874 , avg loss =  0.11184824105352163
step  5200 , loss =  0.0747600719332695 , avg loss =  0.1083226903155446
step  5400 , loss =  0.1375703066587448 , avg loss =  0.10364189278334379
step  5600 , loss =  0.08898063004016876 , avg loss =  0.10170481231063605
step  5800 , loss =  0.08319579809904099 , avg loss =  0.09477913180366158
Finished training, iteration reached  6000  final loss =  0.11366216838359833  final avg =  0.09371602690624233
saving snapshot to "lstm_demo_snapshot_iter_6000.pt"

png

Testing

# Test the prediction with trained (unrolled) net
# we can change the batch size on the network at runtime, but not the number of timesteps (depth of unrolling)
def test_net(net,n_tests):
    batch_size = 1

    unknown = torch.tensor(np.ones((n_outputs,batch_size), dtype=np.float32) * NO_INPUT_DATA).to(device)
    for i in range(1, n_tests + 1):
        plt.subplot(n_tests, 1, i)
        t, y, next_t, expected_y = generate_sample(f=i+0.1337, t0=None, samples=n_steps, predict=n_outputs)
        test_input = torch.tensor(y.transpose(),dtype=torch.float32).to(device)
        combined_x = torch.cat((test_input,unknown)).view([n_steps+n_outputs,batch_size,1])
        expected_y = expected_y.reshape(n_outputs)
        
        prediction = net(combined_x).detach().cpu().numpy()
        
        # remove the batch size dimensions
        t = t.squeeze()
        y = y.squeeze()
        next_t = next_t.squeeze()
        t2 = np.append(t,next_t)
        prediction = prediction.squeeze()
        
        plt.plot(t, y, color='black')
        plt.plot(np.append(t[-1], next_t), np.append(y[-1], expected_y), color='green', linestyle=":")
        plt.plot(t2, prediction, color='red')
        plt.ylim([-1, 1])
        plt.xlabel('time [t]')
        plt.ylabel('signal')
    plt.show()
test_net(lstmModel,3)

png

Inference / Validation

# the single step network can process infinite time series in a loop,
# as such we can increate n_outputs safely to have a glance at long term behaviour

def test_net_iterative(net,n_tests,n_outputs):
    for i in range(1, n_tests + 1):
        plt.subplot(n_tests, 1, i)
        t, y, next_t, expected_y = generate_sample(f=i+0.1337, t0=None, samples=n_steps, predict=n_outputs)
        expected_y = expected_y.reshape(n_outputs)

        prediction = []
        iA = np.zeros((1,1,1),dtype=np.float32)
        hx = None
        for T in range(n_steps):
            iA[0,0,0] = y[0,T].copy()
            x = torch.tensor(iA).to(device)
            yp = net(x,hx=hx).detach().cpu().numpy()
            hx = net.hx
            prediction.append(yp[0,0,0])
        
        for T in range(n_outputs):
            # in this case we have to manually indicate to the network
            # that there is no more input data at the current time step
            iA[0,0,0] = NO_INPUT_DATA
            x = torch.tensor(iA).to(device)
            yp = net(x,hx=hx).detach().cpu().numpy()
            hx = net.hx
            prediction.append(yp[0,0,0])
        
        # remove the batch size dimensions
        t = t.squeeze()
        y = y.squeeze()
        next_t = next_t.squeeze()
        t2 = np.append(t,next_t)
        prediction = np.array(prediction)
        
        plt.plot(t, y, color='black')
        plt.plot(np.append(t[-1], next_t), np.append(y[-1], expected_y), color='green', linestyle=":")
        plt.plot(t2, prediction, color='red')
        plt.ylim([-1, 1])
        plt.xlabel('time [t]')
        plt.ylabel('signal')
    plt.show()

test_net_iterative(lstmModel,3,600)

png

Observation:

The network drifts towards a generic sine wave at constant frequency when left running for longer than the training sample size. What happens if we train with a longer training window?

n_outputs = 200
n_train = 20000
snapshot_prefix = 'lstm_demo2_snapshot'

lstmModel = LSTMDemoNetwork(n_hidden).to(device)
optimizer = torch.optim.Adam(lstmModel.parameters(),lr=n_adamAlpha,eps=n_adamEpsilon)
train_loss = train_single(lstmModel,lossFunction,optimizer,n_train,800)

#explicitly save snapshot if it has not been done yet
filename='%s_iter_%i.pt' % (snapshot_prefix,n_train)
print('saving snapshot to "%s"' % (filename))
torch.save(lstmModel.state_dict(),filename)


plt.plot(np.arange(n_train), train_loss)
plt.show()
step  0 , loss =  0.5993098020553589
step  800 , loss =  0.36738312244415283 , avg loss =  0.4462436730414629
step  1600 , loss =  0.34903058409690857 , avg loss =  0.3582467787340283
step  2400 , loss =  0.3405740559101105 , avg loss =  0.33886114452034233
step  3200 , loss =  0.32634323835372925 , avg loss =  0.33354020312428473
step  4000 , loss =  0.3298587501049042 , avg loss =  0.33072057999670507
step  4800 , loss =  0.3283030688762665 , avg loss =  0.3287493133544922
step  5600 , loss =  0.3225407004356384 , avg loss =  0.3270166611671448
step  6400 , loss =  0.3237491548061371 , avg loss =  0.32480167396366594
step  7200 , loss =  0.3187989890575409 , avg loss =  0.3212342968955636
step  8000 , loss =  0.3114610016345978 , avg loss =  0.31326253194361925
step  8800 , loss =  0.30659493803977966 , avg loss =  0.3122813962772489
step  9600 , loss =  0.31901195645332336 , avg loss =  0.3151634018868208
step  10400 , loss =  0.29218778014183044 , avg loss =  0.30723592575639486
step  11200 , loss =  0.2900225818157196 , avg loss =  0.3011150689050555
step  12000 , loss =  0.28400006890296936 , avg loss =  0.29929715652018785
step  12800 , loss =  0.3201054334640503 , avg loss =  0.3114289540797472
step  13600 , loss =  0.28136447072029114 , avg loss =  0.3051535439863801
step  14400 , loss =  0.29169967770576477 , avg loss =  0.29688007034361363
step  15200 , loss =  0.32572659850120544 , avg loss =  0.32527332320809366
step  16000 , loss =  0.32227620482444763 , avg loss =  0.32703731760382654
step  16800 , loss =  0.3101913034915924 , avg loss =  0.3207817366719246
step  17600 , loss =  0.30284586548805237 , avg loss =  0.3142517460882664
step  18400 , loss =  0.3241356611251831 , avg loss =  0.31600836109369995
step  19200 , loss =  0.3086712956428528 , avg loss =  0.3166497207805514
Finished training, iteration reached  20000  final loss =  0.3029806613922119  final avg =  0.308760990748865
saving snapshot to "lstm_demo2_snapshot_iter_20000.pt"

png

test_net_iterative(lstmModel,3,600)

png

Observation:

With the longer unrolling window, the training converges much slower. The loss accumulated at the end of the window needs to backpropagate many steps until it reaches a timestep in which there is still a useful memory in the LSTM layer. This makes training potentially unstable.

A better option is to attempt iterative fine tuning with slowly increasing time windows. As a bonus, this allows doing most of the training with shorter windows, which means smaller networks and faster computation.

Iterative Finetuning

n_display = 400
n_outputs = 50
n_train = 10000

snapshot_prefix = 'lstm_demo3_0_snapshot'

print("initial training ", n_outputs," ouput timesteps for ",n_train," training cycles")

lstmModel = LSTMDemoNetwork(n_hidden).to(device)
optimizer = torch.optim.Adam(lstmModel.parameters(),lr=n_adamAlpha,eps=n_adamEpsilon)

train_loss = train_single(lstmModel,lossFunction,optimizer,n_train,n_display)

#explicitly save snapshot if it has not been done yet
filename='%s_iter_%i.pt' % (snapshot_prefix,n_train)
print('saving snapshot to "%s"' % (filename))
torch.save(lstmModel.state_dict(),filename)
net0=filename

plt.plot(np.arange(n_train), train_loss)
plt.show()

test_net_iterative(lstmModel,3,600)
initial training  50  ouput timesteps for  10000  training cycles
step  0 , loss =  0.5674645304679871
step  400 , loss =  0.30983877182006836 , avg loss =  0.42093255557119846
step  800 , loss =  0.24238353967666626 , avg loss =  0.25268871303647755
step  1200 , loss =  0.18918123841285706 , avg loss =  0.20987924225628377
step  1600 , loss =  0.18205870687961578 , avg loss =  0.18941344663500786
step  2000 , loss =  0.16308528184890747 , avg loss =  0.1753477780148387
step  2400 , loss =  0.1559123545885086 , avg loss =  0.16605412032455205
step  2800 , loss =  0.14058952033519745 , avg loss =  0.15445086445659398
step  3200 , loss =  0.13714367151260376 , avg loss =  0.14846248131245374
step  3600 , loss =  0.1421949714422226 , avg loss =  0.14334236338734627
step  4000 , loss =  0.13915038108825684 , avg loss =  0.1399128881841898
step  4400 , loss =  0.13092833757400513 , avg loss =  0.13678157048299908
step  4800 , loss =  0.11198166757822037 , avg loss =  0.13241701954975724
step  5200 , loss =  0.11275465041399002 , avg loss =  0.12056515583768487
step  5600 , loss =  0.10991053283214569 , avg loss =  0.1133300599269569
step  6000 , loss =  0.1106618270277977 , avg loss =  0.11166221337392926
step  6400 , loss =  0.10361015051603317 , avg loss =  0.10439997596666217
step  6800 , loss =  0.08229666948318481 , avg loss =  0.09271162228658796
step  7200 , loss =  0.07472217082977295 , avg loss =  0.07909428033977747
step  7600 , loss =  0.06766573339700699 , avg loss =  0.0681962291803211
step  8000 , loss =  0.0323370136320591 , avg loss =  0.05369881788268685
step  8400 , loss =  0.027782050892710686 , avg loss =  0.035783933475613594
step  8800 , loss =  0.022237971425056458 , avg loss =  0.02789714374113828
step  9200 , loss =  0.027625788003206253 , avg loss =  0.02477482489310205
step  9600 , loss =  0.025524307042360306 , avg loss =  0.01947603355627507
Finished training, iteration reached  10000  final loss =  0.018504885956645012  final avg =  0.01802341028917254
saving snapshot to "lstm_demo3_0_snapshot_iter_10000.pt"

png

png

n_outputs = 100
n_train = 6000

snapshot_prefix = 'lstm_demo3_1_snapshot'

print("initial training ", n_outputs," ouput timesteps for ",n_train," training cycles")

lstmModel = LSTMDemoNetwork(n_hidden).to(device)
# load weights:
lstmModel.load_state_dict(torch.load(net0))
optimizer = torch.optim.Adam(lstmModel.parameters(),lr=n_adamAlpha,eps=n_adamEpsilon)

train_loss = train_single(lstmModel,lossFunction,optimizer,n_train,n_display)

#explicitly save snapshot if it has not been done yet
filename='%s_iter_%i.pt' % (snapshot_prefix,n_train)
print('saving snapshot to "%s"' % (filename))
torch.save(lstmModel.state_dict(),filename)
net1=filename

plt.plot(np.arange(n_train), train_loss)
plt.show()

test_net_iterative(lstmModel,3,600)
initial training  100  ouput timesteps for  6000  training cycles
step  0 , loss =  0.07179751992225647
step  400 , loss =  0.04386515915393829 , avg loss =  0.061182753699831664
step  800 , loss =  0.09873643517494202 , avg loss =  0.04699650762602687
step  1200 , loss =  0.023680172860622406 , avg loss =  0.04237248070538044
step  1600 , loss =  0.04869146645069122 , avg loss =  0.037694152360782024
step  2000 , loss =  0.019881540909409523 , avg loss =  0.030602476422209292
step  2400 , loss =  0.024556143209338188 , avg loss =  0.030611779880709945
step  2800 , loss =  0.04189608246088028 , avg loss =  0.030109703775960953
step  3200 , loss =  0.01703086495399475 , avg loss =  0.030579789385665208
step  3600 , loss =  0.015673721209168434 , avg loss =  0.027010483865160494
step  4000 , loss =  0.05981170013546944 , avg loss =  0.025126901678740977
step  4400 , loss =  0.014695246703922749 , avg loss =  0.022143100197426976
step  4800 , loss =  0.013369301334023476 , avg loss =  0.02178293542470783
step  5200 , loss =  0.018563488498330116 , avg loss =  0.024162344792857768
step  5600 , loss =  0.022386537864804268 , avg loss =  0.0228674180386588
Finished training, iteration reached  6000  final loss =  0.021470926702022552  final avg =  0.019623865252057265
saving snapshot to "lstm_demo3_1_snapshot_iter_6000.pt"

png

png

n_outputs = 200
n_train = 8000

snapshot_prefix = 'lstm_demo3_2_snapshot'

print("initial training ", n_outputs," ouput timesteps for ",n_train," training cycles")

lstmModel = LSTMDemoNetwork(n_hidden).to(device)
# load weights:
lstmModel.load_state_dict(torch.load(net1))
optimizer = torch.optim.Adam(lstmModel.parameters(),lr=n_adamAlpha,eps=n_adamEpsilon)

train_loss = train_single(lstmModel,lossFunction,optimizer,n_train,n_display)

#explicitly save snapshot if it has not been done yet
filename='%s_iter_%i.pt' % (snapshot_prefix,n_train)
print('saving snapshot to "%s"' % (filename))
torch.save(lstmModel.state_dict(),filename)
net2=filename

plt.plot(np.arange(n_train), train_loss)
plt.show()

test_net_iterative(lstmModel,3,600)
initial training  200  ouput timesteps for  8000  training cycles
step  0 , loss =  0.07823554426431656
step  400 , loss =  0.3082226812839508 , avg loss =  0.31507726760581134
step  800 , loss =  0.28302016854286194 , avg loss =  0.28704889595508576
step  1200 , loss =  0.23874340951442719 , avg loss =  0.2718718259036541
step  1600 , loss =  0.22829364240169525 , avg loss =  0.2529639072716236
step  2000 , loss =  0.18757495284080505 , avg loss =  0.24449665740132331
step  2400 , loss =  0.3054700791835785 , avg loss =  0.24931358870118855
step  2800 , loss =  0.2238008826971054 , avg loss =  0.2374013230577111
step  3200 , loss =  0.11111718416213989 , avg loss =  0.19704528357833623
step  3600 , loss =  0.09020867943763733 , avg loss =  0.15532408263534309
step  4000 , loss =  0.15085208415985107 , avg loss =  0.10852161114104092
step  4400 , loss =  0.06994427740573883 , avg loss =  0.09443443451076745
step  4800 , loss =  0.0706186443567276 , avg loss =  0.0783108841907233
step  5200 , loss =  0.04296185076236725 , avg loss =  0.06777180855162442
step  5600 , loss =  0.04341905564069748 , avg loss =  0.04988356251269579
step  6000 , loss =  0.060596633702516556 , avg loss =  0.05368827513884753
step  6400 , loss =  0.031181370839476585 , avg loss =  0.05029259566683322
step  6800 , loss =  0.06263475120067596 , avg loss =  0.052052143262699246
step  7200 , loss =  0.034290213137865067 , avg loss =  0.044606765871867536
step  7600 , loss =  0.043462689965963364 , avg loss =  0.046963601331226526
Finished training, iteration reached  8000  final loss =  0.016892900690436363  final avg =  0.04482044198113799
saving snapshot to "lstm_demo3_2_snapshot_iter_8000.pt"

png

png

n_outputs = 400
n_train = 20000

snapshot_prefix = 'lstm_demo3_2_snapshot'

print("initial training ", n_outputs," ouput timesteps for ",n_train," training cycles")

lstmModel = LSTMDemoNetwork(n_hidden).to(device)
# load weights:
lstmModel.load_state_dict(torch.load(net2))
optimizer = torch.optim.Adam(lstmModel.parameters(),lr=n_adamAlpha/2,eps=n_adamEpsilon*2)

train_loss = train_single(lstmModel,lossFunction,optimizer,n_train,n_display)

#explicitly save snapshot if it has not been done yet
filename='%s_iter_%i.pt' % (snapshot_prefix,n_train)
print('saving snapshot to "%s"' % (filename))
torch.save(lstmModel.state_dict(),filename)
net3=filename

plt.plot(np.arange(n_train), train_loss)
plt.show()

test_net_iterative(lstmModel,3,600)
initial training  400  ouput timesteps for  20000  training cycles
step  0 , loss =  0.2417384684085846
step  400 , loss =  0.3802364766597748 , avg loss =  0.3961190339922905
step  800 , loss =  0.36070266366004944 , avg loss =  0.34978813380002977
step  1200 , loss =  0.31221869587898254 , avg loss =  0.3209123172610998
step  1600 , loss =  0.31287479400634766 , avg loss =  0.3358392792195082
step  2000 , loss =  0.37020471692085266 , avg loss =  0.3133617028594017
step  2400 , loss =  0.318511426448822 , avg loss =  0.3158454853668809
step  2800 , loss =  0.3617900311946869 , avg loss =  0.3181963883712888
step  3200 , loss =  0.23838287591934204 , avg loss =  0.3042260016128421
step  3600 , loss =  0.24795377254486084 , avg loss =  0.29421926647424695
step  4000 , loss =  0.21419012546539307 , avg loss =  0.2833383076637983
step  4400 , loss =  0.37373048067092896 , avg loss =  0.2854553144797683
step  4800 , loss =  0.3574375510215759 , avg loss =  0.2726440889015794
step  5200 , loss =  0.24906352162361145 , avg loss =  0.24820176795125007
step  5600 , loss =  0.3088309168815613 , avg loss =  0.28066089980304243
step  6000 , loss =  0.3630617558956146 , avg loss =  0.2487983927503228
step  6400 , loss =  0.15489281713962555 , avg loss =  0.2558498025685549
step  6800 , loss =  0.1724589318037033 , avg loss =  0.2361430866457522
step  7200 , loss =  0.17813941836357117 , avg loss =  0.22636637752875685
step  7600 , loss =  0.22419112920761108 , avg loss =  0.19472642172127963
step  8000 , loss =  0.12985140085220337 , avg loss =  0.19410479166544975
step  8400 , loss =  0.1421784907579422 , avg loss =  0.17879845025017857
step  8800 , loss =  0.05857166647911072 , avg loss =  0.11427636014297604
step  9200 , loss =  0.07518190890550613 , avg loss =  0.1337810768187046
step  9600 , loss =  0.12090877443552017 , avg loss =  0.123783427067101
step  10000 , loss =  0.07604173570871353 , avg loss =  0.10479012469761073
step  10400 , loss =  0.055052317678928375 , avg loss =  0.09160819497890771
step  10800 , loss =  0.24722011387348175 , avg loss =  0.10011224077548832
step  11200 , loss =  0.1114102303981781 , avg loss =  0.08324208219535649
step  11600 , loss =  0.14068636298179626 , avg loss =  0.0915181951224804
step  12000 , loss =  0.06901068985462189 , avg loss =  0.08120087944436818
step  12400 , loss =  0.2151074856519699 , avg loss =  0.10351389294490218
step  12800 , loss =  0.04228517785668373 , avg loss =  0.09186296372208744
step  13200 , loss =  0.06884298473596573 , avg loss =  0.07728165757376701
step  13600 , loss =  0.07363653182983398 , avg loss =  0.07830392704810947
step  14000 , loss =  0.07472634315490723 , avg loss =  0.08215399933978915
step  14400 , loss =  0.1202554926276207 , avg loss =  0.08858864304609597
step  14800 , loss =  0.04567205160856247 , avg loss =  0.09998644658830017
step  15200 , loss =  0.07285066694021225 , avg loss =  0.07876378685235977
step  15600 , loss =  0.048514705151319504 , avg loss =  0.07967117227613926
step  16000 , loss =  0.1159677803516388 , avg loss =  0.08072371372487396
step  16400 , loss =  0.16969966888427734 , avg loss =  0.07685292581096291
step  16800 , loss =  0.03979143127799034 , avg loss =  0.07849125532899053
step  17200 , loss =  0.051311057060956955 , avg loss =  0.08302943719550968
step  17600 , loss =  0.09293260425329208 , avg loss =  0.0804610605398193
step  18000 , loss =  0.06861778348684311 , avg loss =  0.08652417317964137
step  18400 , loss =  0.10969476401805878 , avg loss =  0.0762527915276587
step  18800 , loss =  0.08178454637527466 , avg loss =  0.06858136689290405
step  19200 , loss =  0.18140973150730133 , avg loss =  0.06844571948517114
step  19600 , loss =  0.06450293213129044 , avg loss =  0.06725999435409903
Finished training, iteration reached  20000  final loss =  0.06648300588130951  final avg =  0.0775011823300207
saving snapshot to "lstm_demo3_2_snapshot_iter_20000.pt"

png

png

Long term test

test_net_iterative(lstmModel,3,1500)

png

Trained with sufficient long unrolled time window, the resulting network is capable of identifying frequency and phase of the sin() wave with high accuracy and generate a time-stable reproduction.

About

A mini tutorial to get started with Pytorch LSTM layers

Resources

License

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published