From eb4003b0eeff981d1e7b2a7a7cdf8e72c4b54bbe Mon Sep 17 00:00:00 2001 From: mortonjt Date: Fri, 14 Aug 2020 17:52:16 -0600 Subject: [PATCH 01/30] adding methods for prediction / evaluation --- deepblast/dataset/dataset.py | 5 +++++ deepblast/dataset/utils.py | 8 +++++++- deepblast/score.py | 22 +++++++++++++++++++++- deepblast/trainer.py | 31 +++++++++++++++++++++++++------ 4 files changed, 58 insertions(+), 8 deletions(-) diff --git a/deepblast/dataset/dataset.py b/deepblast/dataset/dataset.py index 45a0e4b..e991721 100644 --- a/deepblast/dataset/dataset.py +++ b/deepblast/dataset/dataset.py @@ -37,6 +37,11 @@ def __iter__(self): for i in range(iter_start, iter_end): yield self.__getitem__(i) +class FastaDataset(AlignmentDataset): + """ Dataset for searching. """ + def __init__(self, query_path, db_path, tokenizer=UniprotTokenizer()): + pass + class TMAlignDataset(AlignmentDataset): """ Dataset for training and testing. diff --git a/deepblast/dataset/utils.py b/deepblast/dataset/utils.py index 14a35f3..0e0cbb6 100644 --- a/deepblast/dataset/utils.py +++ b/deepblast/dataset/utils.py @@ -24,6 +24,13 @@ def tmstate_f(z): else: return m +def revstate_f(z): + if z == x: + return '1' + if z == y: + return '2' + if z == m: + return ':' def clip_boundaries(X, Y, A): """ Remove xs and ys from ends. """ @@ -138,7 +145,6 @@ def states2alignment(states: np.array, X: str, Y: str): f'The state string length {sy} does not match ' f'the length of sequence {len(X)}.\n' f'SequenceX: {X}\nSequenceY: {Y}\nStates: {states}\n' - ) i, j = 0, 0 diff --git a/deepblast/score.py b/deepblast/score.py index 2397ec1..74b242a 100644 --- a/deepblast/score.py +++ b/deepblast/score.py @@ -1,6 +1,7 @@ import numpy as np import matplotlib.pyplot as plt -from deepblast.dataset.utils import states2alignment +from deepblast.dataset.utils import states2alignment, states2edges, tmstate_f + def roc_edges(true_edges, pred_edges): @@ -16,6 +17,25 @@ def roc_edges(true_edges, pred_edges): return tp, fp, fn, perc_id, ppv, fnr, fdr +def alignment_score(true_states : str, pred_states : str): + """ + Computes ROC statistics on alignment + + Parameters + ---------- + true_states : str + Ground truth state string + pred_states : str + Predicted state string + """ + pred_states = list(map(tmstate_f, pred_states)) + true_states = list(map(tmstate_f, true_states)) + pred_edges = states2edges(pred_states) + true_edges = states2edges(true_states) + stats = roc_edges(true_edges, pred_edges) + return stats + + def alignment_visualization(truth, pred, match, gap, xlen, ylen): """ Visualize alignment matrix diff --git a/deepblast/trainer.py b/deepblast/trainer.py index ead2fc7..122d862 100644 --- a/deepblast/trainer.py +++ b/deepblast/trainer.py @@ -11,9 +11,9 @@ import pytorch_lightning as pl from deepblast.alignment import NeedlemanWunschAligner from deepblast.dataset.alphabet import UniprotTokenizer -from deepblast.dataset import TMAlignDataset +from deepblast.dataset import TMAlignDataset, MaliAlignmentDataset from deepblast.dataset.utils import ( - decode, states2edges, collate_f, unpack_sequences, pack_sequences) + decode, states2edges, collate_f, unpack_sequences, pack_sequences, revstate_f) from deepblast.losses import ( SoftAlignmentLoss, SoftPathLoss, MatrixCrossEntropy) from deepblast.score import roc_edges, alignment_visualization, alignment_text @@ -49,7 +49,25 @@ def initialize_aligner(self): f'Aligner {self.hparams.aligner_type} not implemented.') def forward(self, x, y): - return self.aligner.forward(x, y) + x_code = torch.Tensor(self.tokenizer(str.encode(x))).long() + y_code = torch.Tensor(self.tokenizer(str.encode(y))).long() + x_code = x_code.to(self.device) + y_code = y_code.to(self.device) + seq, order = pack_sequences([x_code], [y_code]) + A, theta, gap = self.aligner(seq, order) + return A, theta, gap + + def align(self, x, y): + x_code = torch.Tensor(self.tokenizer(str.encode(x))).long() + y_code = torch.Tensor(self.tokenizer(str.encode(y))).long() + x_code = x_code.to(self.device) + y_code = y_code.to(self.device) + seq, order = pack_sequences([x_code], [y_code]) + gen = self.aligner.traceback(seq, order) + decoded, _ = next(gen) + pred_x, pred_y, pred_states = list(zip(*decoded)) + s = ''.join(list(map(revstate_f, pred_states))) + return s def initialize_logging(self, root_dir='./', logging_path=None): if logging_path is None: @@ -81,6 +99,7 @@ def val_dataloader(self): return valid_dataloader def test_dataloader(self): + # Held-out TM-align dataset test_dataset = TMAlignDataset( self.hparams.test_pairs, construct_paths=isinstance(self.loss_func, SoftPathLoss)) @@ -194,9 +213,6 @@ def validation_step(self, batch, batch_idx): return {'validation_loss': loss, 'log': tensorboard_logs} - def test_step(self, batch, batch_idx): - pass - def validation_epoch_end(self, outputs): loss_f = lambda x: x['validation_loss'] losses = list(map(loss_f, outputs)) @@ -217,6 +233,9 @@ def validation_epoch_end(self, outputs): ) return {'val_loss': loss, 'log': tensorboard_logs} + def test_step(self, batch, batch_idx): + pass + def test_epoch_end(self, outputs): pass From 087aa0b3268ff66dc7849ee867b4761ef4cfa326 Mon Sep 17 00:00:00 2001 From: mortonjt Date: Tue, 18 Aug 2020 14:10:44 -0600 Subject: [PATCH 02/30] Addressing issue #65 --- deepblast/dataset/utils.py | 13 ++++- deepblast/tests/test_score.py | 90 +++++++++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+), 2 deletions(-) create mode 100644 deepblast/tests/test_score.py diff --git a/deepblast/dataset/utils.py b/deepblast/dataset/utils.py index 0e0cbb6..ebd9bc6 100644 --- a/deepblast/dataset/utils.py +++ b/deepblast/dataset/utils.py @@ -105,8 +105,17 @@ def states2edges(states): prev_s, next_s = states[:-1], states[1:] transitions = list(zip(prev_s, next_s)) state_diffs = np.array(list(map(state_diff_f, transitions))) - coords = np.cumsum(state_diffs, axis=0).tolist() - coords = [(0, 0)] + list(map(tuple, coords)) + coords = np.cumsum(state_diffs, axis=0) + if states[0] == 1: + coords = [(0, 0)] + list(map(tuple, coords.tolist())) + elif states[0] == 2: + coords[:, 0] = coords[:, 0] - 1 + coords = [(0, 0)] + list(map(tuple, coords.tolist())) + elif states[0] == 0: + coords[:, 1] = coords[:, 1] - 1 + coords = [(0, 0)] + list(map(tuple, coords.tolist())) + else: + raise ValueError('Unrecognized state: `{states[2]}`') return coords diff --git a/deepblast/tests/test_score.py b/deepblast/tests/test_score.py new file mode 100644 index 0000000..e1c8a25 --- /dev/null +++ b/deepblast/tests/test_score.py @@ -0,0 +1,90 @@ +from deepblast.score import roc_edges, alignment_text +from deepblast.dataset.utils import states2edges, tmstate_f +import pandas as pd +import numpy as np +import unittest + + +class TestScore(unittest.TestCase): + + def setUp(self): + pass + + def test_alignment_text(self): + gene = 'YACSGGCGQNFRTMSEFNEHMIRLVH' + other = 'LICPKHTRDCGKVFKRNSSLRVHEKTH' + pred = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 1, 1, 1, 2, 0, 1, 1, 1, 1, 1]) + truth = np.array([1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1]) + stats = np.array([1, 1, 1, 1, 1, 1, 1]) + alignment_text(gene, other, pred, truth, stats) + + def test_roc_edges(self): + cols = ['tp', 'fp', 'fn', 'perc_id', 'ppv', 'fnr', 'fdr'] + + exp_alignment = ( + 'FRCPRPAGCE--KLYSTSSHVNKHLLL', + 'YDCE---ICQSFKDFSPYMKLRKHRAT', + '::::111:::22:::::::::::::::' + ) + res_alignment = ( + 'FRCPRPAGCEKLYSTSSHVNKHLL', + 'YDCEICQSFKDFSPYMKLRKHRAT', + '::::::::::::::::::::::::' + ) + # TODO: there are still parts of the alignment + # that are being clipped erronously + exp_edges = states2edges( + list(map(tmstate_f, exp_alignment[2]))) + res_edges = states2edges( + list(map(tmstate_f, res_alignment[2]))) + + res = pd.Series(roc_edges(exp_edges, res_edges), index=cols) + + self.assertGreater(res.perc_id, 0.1) + + def test_roc_edges_2(self): + cols = ['tp', 'fp', 'fn', 'perc_id', 'ppv', 'fnr', 'fdr'] + exp_alignment = ( + 'SVHTLLDEKHETLDSEWEKLVRDAMTSGVSKKQFREFLDYQKWRKSQ', + ':1111111111111111111111111111::::::::::::::::::', + 'I----------------------------FTYGELQRMQEKERNKGQ' + ) + res_alignment = ( + 'SVHTLLDEKHETLDSEWEKLVRDAMTSGVSKKQFREFLDYQKWRKSQ', + '1:1111111:111111111111111111:11::::::::::::::::', + '-I-------F------------------T--YGELQRMQEKERNKGQ' + ) + + exp_edges = states2edges( + list(map(tmstate_f, exp_alignment[2]))) + res_edges = states2edges( + list(map(tmstate_f, res_alignment[2]))) + res = pd.Series(roc_edges(exp_edges, res_edges), index=cols) + self.assertGreater(res.tp, 20) + self.assertGreater(res.perc_id, 0.5) + + def test_roc_edges_3(self): + cols = ['tp', 'fp', 'fn', 'perc_id', 'ppv', 'fnr', 'fdr'] + exp_alignment = ( + 'F--GD--D--------QN-PYTESVDILEDLVIEFITEMTHKAMSI', + 'ISHLVIMHEEGEVDGKAIPDLTAPVSAVQAAVSNLVRVGKETVQTT', + ':22::22:22222222::2:::::::::::::::::::::::::::' + ) + res_alignment = ( + '-FG---D------D--QN-PYTESVDILEDLVIEFITEMTHKAMSI', + 'ISHLVIMHEEGEVDGKAIPDLTAPVSAVQAAVSNLVRVGKETVQTT', + '2::222:222222:22::2:::::::::::::::::::::::::::' + ) + exp_edges = states2edges( + list(map(tmstate_f, exp_alignment[2]))) + res_edges = states2edges( + list(map(tmstate_f, res_alignment[2]))) + res = pd.Series(roc_edges(exp_edges, res_edges), index=cols) + self.assertGreater(res.tp, 20) + self.assertGreater(res.perc_id, 0.5) + + +if __name__ == '__main__': + unittest.main() From a37278416fb9e0e35d2762f99e0981a90d0f80e3 Mon Sep 17 00:00:00 2001 From: mortonjt Date: Tue, 18 Aug 2020 18:52:19 -0600 Subject: [PATCH 03/30] WIP: adding priors, multilinear layer and mask --- deepblast/alignment.py | 50 ++++++++++++++++++--------- deepblast/dataset/tests/test_utils.py | 5 +++ deepblast/dataset/utils.py | 13 ++++--- deepblast/embedding.py | 17 +++++++++ deepblast/losses.py | 38 ++++++++++++++++++++ deepblast/trainer.py | 2 +- 6 files changed, 101 insertions(+), 24 deletions(-) diff --git a/deepblast/alignment.py b/deepblast/alignment.py index 25a222d..fae84de 100644 --- a/deepblast/alignment.py +++ b/deepblast/alignment.py @@ -2,15 +2,16 @@ import torch.nn as nn from deepblast.language_model import BiLM, pretrained_language_models from deepblast.nw_cuda import NeedlemanWunschDecoder as NWDecoderCUDA -from deepblast.embedding import StackedRNN, EmbedLinear +from deepblast.embedding import StackedRNN, EmbedLinear, MultiLinear from deepblast.dataset.utils import unpack_sequences import torch.nn.functional as F +import math class NeedlemanWunschAligner(nn.Module): def __init__(self, n_alpha, n_input, n_units, n_embed, - n_layers=2, lm=None, device='gpu'): + n_layers=2, n_heads=16, lm=None, device='gpu', local=True): """ NeedlemanWunsch Alignment model Parameters @@ -25,6 +26,8 @@ def __init__(self, n_alpha, n_input, n_units, n_embed, Embedding dimension n_layers : int Number of RNN layers. + n_heads : int + Number of heads in multilinear layer. lm : BiLM Pretrained language model (optional) padding_idx : int @@ -32,6 +35,8 @@ def __init__(self, n_alpha, n_input, n_units, n_embed, transform : function Activation function (default relu) sparse : False? + local : bool + Specifies if local alignment should be performed on the traceback """ super(NeedlemanWunschAligner, self).__init__() if lm is None: @@ -49,12 +54,15 @@ def __init__(self, n_alpha, n_input, n_units, n_embed, n_alpha, n_input, n_embed, lm=lm) self.gap_embedding = EmbedLinear( n_alpha, n_input, n_embed, lm=lm) - + self.match_mixture = MultiLinear(n_input, n_input, n_heads) + self.gap_mixture = MultiLinear(n_input, n_input, n_heads) # TODO: make cpu compatible version # if device == 'cpu': # self.nw = NWDecoderCPU(operator='softmax') # else: self.nw = NWDecoderCUDA(operator='softmax') + self.local = local + def forward(self, x, order): """ Generate alignment matrix. @@ -70,27 +78,37 @@ def forward(self, x, order): Alignment Matrix (dim B x N x M) """ with torch.enable_grad(): - zx, _, zy, _ = unpack_sequences(self.match_embedding(x), order) - gx, _, gy, _ = unpack_sequences(self.gap_embedding(x), order) + zx_, _, zy_, _ = unpack_sequences(self.match_embedding(x), order) + gx_, _, gy_, _ = unpack_sequences(self.gap_embedding(x), order) + zx, zy = match_mixture(zx_), match_mixture(zy_) + gx, gy = gap_mixture(zx_), gap_mixture(zy_) # Obtain theta through an inner product across latent dimensions - theta = F.softplus(torch.einsum('bid,bjd->bij', zx, zy)) - A = F.logsigmoid(torch.einsum('bid,bjd->bij', gx, gy)) + theta = torch.einsum('bid,bjd->bij', zx, zy) + A = torch.einsum('bid,bjd->bij', gx, gy) aln = self.nw.decode(theta, A) return aln, theta, A def traceback(self, x, order): # dim B x N x D with torch.enable_grad(): - zx, _, zy, _ = unpack_sequences(self.match_embedding(x), order) - gx, xlen, gy, ylen = unpack_sequences(self.gap_embedding(x), order) - match = F.softplus(torch.einsum('bid,bjd->bij', zx, zy)) - gap = F.logsigmoid(torch.einsum('bid,bjd->bij', gx, gy)) + zx_, _, zy_, _ = unpack_sequences(self.match_embedding(x), order) + gx_, xlen, gy_, ylen = unpack_sequences(self.gap_embedding(x), order) + zx, zy = match_mixture(zx_), match_mixture(zy_) + gx, gy = gap_mixture(zx_), gap_mixture(zy_) + + match = torch.einsum('bid,bjd->bij', zx, zy) + gap = torch.einsum('bid,bjd->bij', gx, gy) B, _, _ = match.shape + for b in range(B): - aln = self.nw.decode( - match[b, :xlen[b], :ylen[b]].unsqueeze(0), - gap[b, :xlen[b], :ylen[b]].unsqueeze(0) - ) + M = match[b, :xlen[b], :ylen[b]].unsqueeze(0) + G = gap[b, :xlen[b], :ylen[b]].unsqueeze(0) + val = math.log(1 - (1/50)) # based on average insertion length + if self.local: + G[0, 0, :] = val + G[0, :, 0] = val + aln = self.nw.decode(M, G) decoded = self.nw.traceback(aln.squeeze()) - yield decoded, aln + del G, aln + yield decoded diff --git a/deepblast/dataset/tests/test_utils.py b/deepblast/dataset/tests/test_utils.py index 7c0db9b..245dc18 100644 --- a/deepblast/dataset/tests/test_utils.py +++ b/deepblast/dataset/tests/test_utils.py @@ -30,6 +30,11 @@ def test_states2matrix_zinc(self): s = np.array(list(map(tmstate_f, s))) states2matrix(s, sparse=True) + def test_states2matrix_insert(self): + # Test how this is constructed if there are + # gaps in the beginning of the alignment + pass + def test_states2matrix_only_matches(self): s = ":11::11:" s = np.array(list(map(tmstate_f, s))) diff --git a/deepblast/dataset/utils.py b/deepblast/dataset/utils.py index ebd9bc6..9d06a72 100644 --- a/deepblast/dataset/utils.py +++ b/deepblast/dataset/utils.py @@ -106,19 +106,18 @@ def states2edges(states): transitions = list(zip(prev_s, next_s)) state_diffs = np.array(list(map(state_diff_f, transitions))) coords = np.cumsum(state_diffs, axis=0) - if states[0] == 1: + if states[0] == m: coords = [(0, 0)] + list(map(tuple, coords.tolist())) - elif states[0] == 2: - coords[:, 0] = coords[:, 0] - 1 + elif states[0] == y: + coords[:, 0] = np.maximum(coords[:, 0] - 1, 0) coords = [(0, 0)] + list(map(tuple, coords.tolist())) - elif states[0] == 0: - coords[:, 1] = coords[:, 1] - 1 + elif states[0] == x: + coords[:, 1] = np.maximum(coords[:, 1] - 1, 0) coords = [(0, 0)] + list(map(tuple, coords.tolist())) else: - raise ValueError('Unrecognized state: `{states[2]}`') + raise ValueError(f'Unrecognized state {states[0]}') return coords - def states2matrix(states, sparse=False): """ Converts state string to alignment matrix. diff --git a/deepblast/embedding.py b/deepblast/embedding.py index 510e572..4d69820 100644 --- a/deepblast/embedding.py +++ b/deepblast/embedding.py @@ -2,6 +2,23 @@ from torch.nn.utils.rnn import PackedSequence +class MultiLinear(nn.Module): + """ Multiple linear layers concatenated together""" + def __init__(self, n_input, n_output, n_heads=16): + super(M, self).__init__() + self.multi_output = torch.nn.ModuleList( + [nn.Linear(n_input, n_output) + for i in range(num_heads)] + ) + self.mixture = nn.Linear(num_heads, 1) + + def forward(self, x): + attn_outputs = torch.stack( + [head(x) for head in self.multi_output], dim=-1) + outputs = self.mixture(attn_outputs) + return outputs + + class LMEmbed(nn.Module): def __init__(self, nin, nout, lm, padding_idx=-1, transform=nn.ReLU(), sparse=False): diff --git a/deepblast/losses.py b/deepblast/losses.py index 7d5b509..6328950 100644 --- a/deepblast/losses.py +++ b/deepblast/losses.py @@ -6,6 +6,44 @@ def __call__(self, true_edges, pred_edges): pass +class PenalizedMatrixCrossEntropy: + def __call__(self, Ytrue, Ypred, M, G, + match_prior, gap_prior, x_mask, y_mask): + """ Computes binary cross entropy on the matrix with regularizers. + + The matrix cross entropy loss is given by + + d(ypred, ytrue) = - (mean(ytrue x log(ypred)) + + mean((1 - ytrue) x log(1 - ypred))) + + Parameters + ---------- + Ytrue : torch.Tensor + Ground truth alignment matrix of dimension N x M. + All entries are marked by 0 and 1. + Ypred : torch.Tensor + Predicted alignment matrix of dimension N x M. + + """ + score = 0 + eps = 3e-8 # unfortunately, this is the smallest eps we can have :( + Ypred = torch.clamp(Ypred, min=eps, max=1 - eps) + for b in range(len(x_len)): + pos = torch.mean( + Ytrue[b, x_mask[b], y_mask[b]] * torch.log( + Ypred[b, x_mask[b], y_mask[b]]) + ) + neg = torch.mean( + (1 - Ytrue[b, x_mask[b], y_mask[b]]) * torch.log( + 1 - Ypred[b, x_mask[b], y_mask[b]]) + ) + score += -(pos + neg) + log_like = score / len(x_len) + match_log = match_prior.log_prob(M).mean() + gap_log = gap_prior.log_prob(G).mean() + score = log_like + match_log + gap_log + return score + class MatrixCrossEntropy: def __call__(self, Ytrue, Ypred, x_len, y_len): """ Computes binary cross entropy on the matrix diff --git a/deepblast/trainer.py b/deepblast/trainer.py index 122d862..f4b2d1e 100644 --- a/deepblast/trainer.py +++ b/deepblast/trainer.py @@ -64,7 +64,7 @@ def align(self, x, y): y_code = y_code.to(self.device) seq, order = pack_sequences([x_code], [y_code]) gen = self.aligner.traceback(seq, order) - decoded, _ = next(gen) + decoded = next(gen) pred_x, pred_y, pred_states = list(zip(*decoded)) s = ''.join(list(map(revstate_f, pred_states))) return s From 2ac1d61937683ac1724d17ee5569b5ccd4fd1b5d Mon Sep 17 00:00:00 2001 From: mortonjt Date: Wed, 19 Aug 2020 07:55:39 -0600 Subject: [PATCH 04/30] adding embedding tests --- deepblast/alignment.py | 29 ++++++++++++----------------- deepblast/embedding.py | 25 +++++++++++++++++++------ deepblast/tests/test_embedding.py | 28 ++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 23 deletions(-) create mode 100644 deepblast/tests/test_embedding.py diff --git a/deepblast/alignment.py b/deepblast/alignment.py index fae84de..404e931 100644 --- a/deepblast/alignment.py +++ b/deepblast/alignment.py @@ -2,7 +2,7 @@ import torch.nn as nn from deepblast.language_model import BiLM, pretrained_language_models from deepblast.nw_cuda import NeedlemanWunschDecoder as NWDecoderCUDA -from deepblast.embedding import StackedRNN, EmbedLinear, MultiLinear +from deepblast.embedding import StackedRNN, EmbedLinear, MultiheadProduct from deepblast.dataset.utils import unpack_sequences import torch.nn.functional as F import math @@ -54,8 +54,8 @@ def __init__(self, n_alpha, n_input, n_units, n_embed, n_alpha, n_input, n_embed, lm=lm) self.gap_embedding = EmbedLinear( n_alpha, n_input, n_embed, lm=lm) - self.match_mixture = MultiLinear(n_input, n_input, n_heads) - self.gap_mixture = MultiLinear(n_input, n_input, n_heads) + self.match_mixture = MultiheadProduct(n_input, n_input, n_heads) + self.gap_mixture = MultiheadProduct(n_input, n_input, n_heads) # TODO: make cpu compatible version # if device == 'cpu': # self.nw = NWDecoderCPU(operator='softmax') @@ -78,27 +78,22 @@ def forward(self, x, order): Alignment Matrix (dim B x N x M) """ with torch.enable_grad(): - zx_, _, zy_, _ = unpack_sequences(self.match_embedding(x), order) - gx_, _, gy_, _ = unpack_sequences(self.gap_embedding(x), order) - zx, zy = match_mixture(zx_), match_mixture(zy_) - gx, gy = gap_mixture(zx_), gap_mixture(zy_) - + zx, _, zy, _ = unpack_sequences(self.match_embedding(x), order) + gx, _, gy, _ = unpack_sequences(self.gap_embedding(x), order) # Obtain theta through an inner product across latent dimensions - theta = torch.einsum('bid,bjd->bij', zx, zy) - A = torch.einsum('bid,bjd->bij', gx, gy) + theta = self.match_mixture(zx, zy) + A = self.gap_mixture(gx, gy) + aln = self.nw.decode(theta, A) return aln, theta, A def traceback(self, x, order): # dim B x N x D with torch.enable_grad(): - zx_, _, zy_, _ = unpack_sequences(self.match_embedding(x), order) - gx_, xlen, gy_, ylen = unpack_sequences(self.gap_embedding(x), order) - zx, zy = match_mixture(zx_), match_mixture(zy_) - gx, gy = gap_mixture(zx_), gap_mixture(zy_) - - match = torch.einsum('bid,bjd->bij', zx, zy) - gap = torch.einsum('bid,bjd->bij', gx, gy) + zx, _, zy, _ = unpack_sequences(self.match_embedding(x), order) + gx, xlen, gy, ylen = unpack_sequences(self.gap_embedding(x), order) + match = self.match_mixture(zx, zy) + gap = self.gap_mixture(gx, gy) B, _, _ = match.shape for b in range(B): diff --git a/deepblast/embedding.py b/deepblast/embedding.py index 4d69820..a4bd97e 100644 --- a/deepblast/embedding.py +++ b/deepblast/embedding.py @@ -1,3 +1,4 @@ +import torch import torch.nn as nn from torch.nn.utils.rnn import PackedSequence @@ -5,20 +6,32 @@ class MultiLinear(nn.Module): """ Multiple linear layers concatenated together""" def __init__(self, n_input, n_output, n_heads=16): - super(M, self).__init__() - self.multi_output = torch.nn.ModuleList( + super(MultiLinear, self).__init__() + self.multi_output = nn.ModuleList( [nn.Linear(n_input, n_output) - for i in range(num_heads)] + for i in range(n_heads)] ) - self.mixture = nn.Linear(num_heads, 1) def forward(self, x): - attn_outputs = torch.stack( + outputs = torch.stack( [head(x) for head in self.multi_output], dim=-1) - outputs = self.mixture(attn_outputs) return outputs +class MultiheadProduct(nn.Module): + def __init__(self, n_input, n_output, n_heads=16): + super(MultiheadProduct, self).__init__() + self.multilinear = MultiLinear(n_input, n_output, n_heads) + self.linear = nn.Linear(n_heads, 1) + + def forward(self, x, y): + zx = self.multilinear(x) + zy = self.multilinear(y) + dists = torch.einsum('bidh,bjdh->bijh', zx, zy) + output = self.linear(dists) + return output.squeeze() + + class LMEmbed(nn.Module): def __init__(self, nin, nout, lm, padding_idx=-1, transform=nn.ReLU(), sparse=False): diff --git a/deepblast/tests/test_embedding.py b/deepblast/tests/test_embedding.py new file mode 100644 index 0000000..fe6c898 --- /dev/null +++ b/deepblast/tests/test_embedding.py @@ -0,0 +1,28 @@ +import torch +from deepblast.embedding import MultiLinear, MultiheadProduct +import unittest + + +class TestEmbedding(unittest.TestCase): + def setUp(self): + b, l, d, h = 3, 100, 50, 8 + self.x = torch.randn(b, l, d) + self.y = torch.randn(b, l, d) + self.b = b + self.l = l + self.d = d + self.h = h + + def test_multilinear(self): + model = MultiLinear(self.d, self.d, self.h) + res = model(self.x) + self.assertEqual(tuple(res.shape), (self.b, self.l, self.d, self.h)) + + def test_multihead_product(self): + model = MultiheadProduct(self.d, self.d, self.h) + res = model(self.x, self.y) + self.assertEqual(tuple(res.shape), (self.b, self.l, self.l)) + + +if __name__ == '__main__': + unittest.main() From 1aaa91ecfd85fd8c07ec1faba118a1dec739955d Mon Sep 17 00:00:00 2001 From: mortonjt Date: Wed, 19 Aug 2020 08:20:52 -0600 Subject: [PATCH 05/30] adding multihead product. zeroing out first gap row/column for local alignments. condensing gap mask --- deepblast/alignment.py | 16 +++-- deepblast/dataset/dataset.py | 12 +++- deepblast/dataset/tests/test_utils.py | 58 +++++++++++++++++- deepblast/dataset/utils.py | 88 +++++++++++++++++++++++++++ 4 files changed, 168 insertions(+), 6 deletions(-) diff --git a/deepblast/alignment.py b/deepblast/alignment.py index 404e931..6580fa1 100644 --- a/deepblast/alignment.py +++ b/deepblast/alignment.py @@ -54,8 +54,8 @@ def __init__(self, n_alpha, n_input, n_units, n_embed, n_alpha, n_input, n_embed, lm=lm) self.gap_embedding = EmbedLinear( n_alpha, n_input, n_embed, lm=lm) - self.match_mixture = MultiheadProduct(n_input, n_input, n_heads) - self.gap_mixture = MultiheadProduct(n_input, n_input, n_heads) + self.match_mixture = MultiheadProduct(n_embed, n_embed, n_heads) + self.gap_mixture = MultiheadProduct(n_embed, n_embed, n_heads) # TODO: make cpu compatible version # if device == 'cpu': # self.nw = NWDecoderCPU(operator='softmax') @@ -82,7 +82,10 @@ def forward(self, x, order): gx, _, gy, _ = unpack_sequences(self.gap_embedding(x), order) # Obtain theta through an inner product across latent dimensions theta = self.match_mixture(zx, zy) - A = self.gap_mixture(gx, gy) + # zero out first row and first column for local alignments + L = gx.shape[1] + A = torch.zeros((L, L)) + A[1:, 1:] = self.gap_mixture(gx[:, 1:, :], gy[:, 1:, :]) aln = self.nw.decode(theta, A) return aln, theta, A @@ -93,7 +96,12 @@ def traceback(self, x, order): zx, _, zy, _ = unpack_sequences(self.match_embedding(x), order) gx, xlen, gy, ylen = unpack_sequences(self.gap_embedding(x), order) match = self.match_mixture(zx, zy) - gap = self.gap_mixture(gx, gy) + + # zero out first row and first column for local alignments + L = gx.shape[1] + gap = torch.zeros((L, L)) + gap[1:, 1:] = self.gap_mixture(gx[:, 1:, :], gy[:, 1:, :]) + B, _, _ = match.shape for b in range(B): diff --git a/deepblast/dataset/dataset.py b/deepblast/dataset/dataset.py index e991721..89bd55d 100644 --- a/deepblast/dataset/dataset.py +++ b/deepblast/dataset/dataset.py @@ -8,10 +8,20 @@ from deepblast.dataset.utils import ( state_f, tmstate_f, clip_boundaries, states2matrix, states2edges, - path_distance_matrix + path_distance_matrix, remove_orphans, gap_mask ) +def reshape(x, N, M): + if x.shape != (N, M) and x.shape != (M, N): + raise ValueError(f'The shape of `x` {x.shape} ' + f'does not agree with ({N}, {M})') + if tuple(x.shape) != (N, M): + return x.t() + else: + return x + + class AlignmentDataset(Dataset): def __init__(self, pairs, tokenizer=UniprotTokenizer()): self.tokenizer = tokenizer diff --git a/deepblast/dataset/tests/test_utils.py b/deepblast/dataset/tests/test_utils.py index 245dc18..5f6c4f2 100644 --- a/deepblast/dataset/tests/test_utils.py +++ b/deepblast/dataset/tests/test_utils.py @@ -2,7 +2,8 @@ from deepblast.dataset.utils import ( tmstate_f, states2matrix, states2alignment, path_distance_matrix, clip_boundaries, - pack_sequences, unpack_sequences) + pack_sequences, unpack_sequences, gap_mask, + remove_orphans) from math import sqrt import numpy as np import numpy.testing as npt @@ -278,5 +279,60 @@ def test_unpack_sequences(self): tt.assert_allclose(expY, resY) + +class TestPreprocess(unittest.TestCase): + + def test_gap_mask(self): + s = ":11::22:" + N, M = 6, 6 + res = gap_mask(s, N, M) + exp_x = np.array([3, 4]) + exp_y = np.array([1, 2]) + + npt.assert_equal(res[0], exp_x) + npt.assert_equal(res[1], exp_y) + + s = ":11:.:22:" + N, M = 7, 7 + res = gap_mask(s, N, M) + exp_x = np.array([2, 4, 5]) + exp_y = np.array([1, 2, 4]) + npt.assert_equal(res[0], exp_x) + npt.assert_equal(res[1], exp_y) + + def test_gap_mask2(self): + s = ( + '222222222222222222.11112222222222222222222222222' + '222222222222222222222222222222222222222222222222' + '22222222...::::::..:2:22::2:::::::..11.111...::.' + '::::::::::.::::......:::::::::::222:.::::::::.11' + '.:::::::::.:22.::::::::::::2:::::::::::::::1::..' + '.::::::::::::::::::::::22:2:2::::::::::1::::::::' + '::::22222::::::::::1::::::.' + ) + # N, M = 197, 283 + res = gap_mask(s) + + def test_replace_orphans_small(self): + s = ":11:11:" + e = ":111211:" + r = remove_orphans(s, threshold=3) + self.assertEqual(r, e) + + def test_replace_orphans(self): + s = ":1111111111:11111111111111:" + e = ":11111111111211111111111111:" + r = remove_orphans(s, threshold=9) + self.assertEqual(r, e) + + s = ":2222222222:22222222222222:" + e = ":22222222221222222222222222:" + r = remove_orphans(s, threshold=9) + self.assertEqual(r, e) + + s = ":1111111111:22222222222222:" + r = remove_orphans(s, threshold=9) + self.assertEqual(r, s) + if __name__ == '__main__': unittest.main() diff --git a/deepblast/dataset/utils.py b/deepblast/dataset/utils.py index 9d06a72..cba88b9 100644 --- a/deepblast/dataset/utils.py +++ b/deepblast/dataset/utils.py @@ -4,6 +4,8 @@ from scipy.sparse import coo_matrix from scipy.spatial import cKDTree from deepblast.constants import x, m, y +from itertools import islice +from functools import reduce def state_f(z): @@ -287,3 +289,89 @@ def path_distance_matrix(pi): d, i = model.query(coords) Pdist = np.array(coo_matrix((d, (coords[:, 0], coords[:, 1]))).todense()) return Pdist + + +# Preprocessing functions +def gap_mask(states: np.array, N : int, M : int): + """ Builds a mask for all gaps. + + Reports rows and columns that should be completely masked. + + Parameters + ---------- + states : np.array + List of alignment states + Returns + ------- + mask : np.array + Masked array. + """ + i, j = 0, 0 + rows, cols = [], [] + for k in range(len(states)): + if states[k] == '1': + cols.append(i) + i += 1 + elif states[k] == '2': + rows.append(j) + j += 1 + elif states[k] == ':': + i += 1 + j += 1 + elif states[k] == '.': + cols.append(i) + rows.append(j) + i += 1 + j += 1 + else: + raise ValueError(f'{states[k]} is not recognized') + return np.array(rows), np.array(cols) + + +def window(seq, n=2): + "Returns a sliding window (of width n) over data from the iterable" + " s -> (s0,s1,...s[n-1]), (s1,s2,...,sn), ... " + it = iter(seq) + result = tuple(islice(it, n)) + if len(result) == n: + yield result + for elem in it: + result = result[1:] + (elem,) + yield result + + +def replace_orphan(w, s=5): + i = len(w) // 2 + # identify orphans and replace with gaps + sw = ''.join(w) + if (w[i] == ':') and((('1' * s) in sw[:i] and ('1' * s) in sw[i:]) or + (('2' * s) in sw[:i] and ('2' * s) in sw[i:])): + return ['1', '2'] + else: + return [w[i]] + + +def remove_orphans(states, threshold : int=11): + """ Removes singletons and doubletons that are orphaned. + A match is considered orphaned if it exceeds the `threshold` gap. + Parameters + ---------- + states : np.array + List of alignment states + threshold : int + Number of consecutive gaps surrounding a matched required for it + to be considered an orphan. + Returns + ------- + new_states : np.array + States string with orphans removed. + Notes + ----- + The threshold *must* be an odd number. This determines the window size. + """ + wins = list(window(states, threshold)) + rwins = list(map(lambda x: replace_orphan(x, threshold // 2), list(wins))) + new_states = list(reduce(lambda x, y: x + y, rwins)) + new_states = list(states[:threshold//2]) + new_states + \ + list(states[-threshold//2 + 1:]) + return ''.join(new_states) From efdcbd3e45cc0ff9be74a3a9f8f3c2369e964c8e Mon Sep 17 00:00:00 2001 From: mortonjt Date: Wed, 19 Aug 2020 08:32:34 -0600 Subject: [PATCH 06/30] updating penalized cross entropy --- deepblast/constants.py | 6 ++++++ deepblast/losses.py | 15 +++++++++++---- deepblast/trainer.py | 19 +++++++++++++------ 3 files changed, 30 insertions(+), 10 deletions(-) diff --git a/deepblast/constants.py b/deepblast/constants.py index 633a8c5..3eb770c 100644 --- a/deepblast/constants.py +++ b/deepblast/constants.py @@ -1 +1,7 @@ x, m, y = 0, 1, 2 # state numberings + +match_mean = 0 +match_std = 3 + +gap_mean = -4 +gap_std = 3 diff --git a/deepblast/losses.py b/deepblast/losses.py index 6328950..7ab21e6 100644 --- a/deepblast/losses.py +++ b/deepblast/losses.py @@ -1,4 +1,6 @@ import torch +from torch.distributions import Normal +from deepblast.constants import match_mean, match_std, gap_mean, gap_std class AlignmentAccuracy: @@ -6,9 +8,8 @@ def __call__(self, true_edges, pred_edges): pass -class PenalizedMatrixCrossEntropy: - def __call__(self, Ytrue, Ypred, M, G, - match_prior, gap_prior, x_mask, y_mask): +class L2MatrixCrossEntropy: + def __call__(self, Ytrue, Ypred, M, G, x_mask, y_mask): """ Computes binary cross entropy on the matrix with regularizers. The matrix cross entropy loss is given by @@ -23,7 +24,10 @@ def __call__(self, Ytrue, Ypred, M, G, All entries are marked by 0 and 1. Ypred : torch.Tensor Predicted alignment matrix of dimension N x M. - + M : torch.Tensor + Match score matrix + G : torch.Tensor + Gap score matrix """ score = 0 eps = 3e-8 # unfortunately, this is the smallest eps we can have :( @@ -38,6 +42,9 @@ def __call__(self, Ytrue, Ypred, M, G, 1 - Ypred[b, x_mask[b], y_mask[b]]) ) score += -(pos + neg) + + match_prior = Normal(match_mean, match_prior) + gap_prior = Normal(gap_mean, gap_prior) log_like = score / len(x_len) match_log = match_prior.log_prob(M).mean() gap_log = gap_prior.log_prob(G).mean() diff --git a/deepblast/trainer.py b/deepblast/trainer.py index f4b2d1e..90b327f 100644 --- a/deepblast/trainer.py +++ b/deepblast/trainer.py @@ -13,9 +13,11 @@ from deepblast.dataset.alphabet import UniprotTokenizer from deepblast.dataset import TMAlignDataset, MaliAlignmentDataset from deepblast.dataset.utils import ( - decode, states2edges, collate_f, unpack_sequences, pack_sequences, revstate_f) + decode, states2edges, collate_f, unpack_sequences, + pack_sequences, revstate_f) from deepblast.losses import ( - SoftAlignmentLoss, SoftPathLoss, MatrixCrossEntropy) + SoftAlignmentLoss, SoftPathLoss, MatrixCrossEntropy, + L2MatrixCrossEntropy) from deepblast.score import roc_edges, alignment_visualization, alignment_text @@ -28,6 +30,8 @@ def __init__(self, args): self.initialize_aligner() if self.hparams.loss == 'sse': self.loss_func = SoftAlignmentLoss() + elif self.hparams.loss == 'l2_cross_entropy': + self.loss_func = L2MatrixCrossEntropy() elif self.hparams.loss == 'cross_entropy': self.loss_func = MatrixCrossEntropy() elif self.hparams.loss == 'path': @@ -109,12 +113,14 @@ def test_dataloader(self): pin_memory=True) return test_dataloader - def compute_loss(self, x, y, predA, A, P, theta): + def compute_loss(self, x, y, predA, A, P, theta, gap): if isinstance(self.loss_func, SoftAlignmentLoss): loss = self.loss_func(A, predA, x, y) elif isinstance(self.loss_func, MatrixCrossEntropy): loss = self.loss_func(A, predA, x, y) + elif isinstance(self.loss_func, L2MatrixCrossEntropy): + loss = self.loss_func(A, predA, theta, gap, x, y) elif isinstance(self.loss_func, SoftPathLoss): loss = self.loss_func(P, predA, x, y) if self.hparams.multitask: @@ -134,7 +140,7 @@ def training_step(self, batch, batch_idx): seq, order = pack_sequences(genes, others) predA, theta, gap = self.aligner(seq, order) _, xlen, _, ylen = unpack_sequences(seq, order) - loss = self.compute_loss(xlen, ylen, predA, A, P, theta) + loss = self.compute_loss(xlen, ylen, predA, A, P, theta, gap) assert torch.isnan(loss).item() is False if len(self.trainer.lr_schedulers) >= 1: current_lr = self.trainer.lr_schedulers[0]['scheduler'] @@ -194,7 +200,7 @@ def validation_step(self, batch, batch_idx): seq, order = pack_sequences(genes, others) predA, theta, gap = self.aligner(seq, order) x, xlen, y, ylen = unpack_sequences(seq, order) - loss = self.compute_loss(xlen, ylen, predA, A, P, theta) + loss = self.compute_loss(xlen, ylen, predA, A, P, theta, gap) assert torch.isnan(loss).item() is False # Obtain alignment statistics + visualizations gen = self.aligner.traceback(seq, order) @@ -299,7 +305,8 @@ def add_model_specific_args(parent_parser): required=False, type=int, default=2) parser.add_argument( '--loss', - help=('Loss function. Options include {sse, path, cross_entropy} ' + help=('Loss function. Options include ' + '{sse, path, cross_entropy, l2_cross_entropy} ' '(default cross_entropy)'), default='cross_entropy', required=False, type=str) parser.add_argument( From dae23c48f8b41285389d5b229872d43c46d96c74 Mon Sep 17 00:00:00 2001 From: mortonjt Date: Wed, 19 Aug 2020 10:03:48 -0600 Subject: [PATCH 07/30] adjusting gap mask --- deepblast/dataset/dataset.py | 8 ++++++- deepblast/dataset/tests/test_dataset.py | 2 +- deepblast/dataset/tests/test_utils.py | 9 +++----- deepblast/dataset/utils.py | 25 ++++++++++++++++++---- deepblast/trainer.py | 28 ++++++++++++------------- 5 files changed, 45 insertions(+), 27 deletions(-) diff --git a/deepblast/dataset/dataset.py b/deepblast/dataset/dataset.py index 89bd55d..5e53923 100644 --- a/deepblast/dataset/dataset.py +++ b/deepblast/dataset/dataset.py @@ -163,7 +163,13 @@ def __getitem__(self, i): path_matrix = path_matrix.t() if tuple(alignment_matrix.shape) != (len(gene), len(pos)): alignment_matrix = alignment_matrix.t() - return gene, pos, states, alignment_matrix, path_matrix + + gene_mask, pos_mask = gap_mask(states) + gene_mask = torch.Tensor(gene_mask) + pos_mask = torch.Tensor(pos_mask) + + return (gene, pos, states, alignment_matrix, path_matrix, + gene_mask, pos_mask) class MaliAlignmentDataset(AlignmentDataset): diff --git a/deepblast/dataset/tests/test_dataset.py b/deepblast/dataset/tests/test_dataset.py index 5270189..6320775 100644 --- a/deepblast/dataset/tests/test_dataset.py +++ b/deepblast/dataset/tests/test_dataset.py @@ -19,7 +19,7 @@ def test_getitem(self): pad_ends=False, clip_ends=False) res = x[0] self.assertEqual(len(res), 5) - gene, pos, states, alignment_matrix, _ = res + gene, pos, states, alignment_matrix, _, _, _ = res # test the lengths self.assertEqual(len(gene), 103) self.assertEqual(len(pos), 21) diff --git a/deepblast/dataset/tests/test_utils.py b/deepblast/dataset/tests/test_utils.py index 5f6c4f2..236d642 100644 --- a/deepblast/dataset/tests/test_utils.py +++ b/deepblast/dataset/tests/test_utils.py @@ -2,7 +2,7 @@ from deepblast.dataset.utils import ( tmstate_f, states2matrix, states2alignment, path_distance_matrix, clip_boundaries, - pack_sequences, unpack_sequences, gap_mask, + collate_f, pack_sequences, unpack_sequences, gap_mask, remove_orphans) from math import sqrt import numpy as np @@ -279,13 +279,11 @@ def test_unpack_sequences(self): tt.assert_allclose(expY, resY) - class TestPreprocess(unittest.TestCase): def test_gap_mask(self): s = ":11::22:" - N, M = 6, 6 - res = gap_mask(s, N, M) + res = gap_mask(s) exp_x = np.array([3, 4]) exp_y = np.array([1, 2]) @@ -293,8 +291,7 @@ def test_gap_mask(self): npt.assert_equal(res[1], exp_y) s = ":11:.:22:" - N, M = 7, 7 - res = gap_mask(s, N, M) + res = gap_mask(s) exp_x = np.array([2, 4, 5]) exp_y = np.array([1, 2, 4]) npt.assert_equal(res[0], exp_x) diff --git a/deepblast/dataset/utils.py b/deepblast/dataset/utils.py index cba88b9..5fef3f3 100644 --- a/deepblast/dataset/utils.py +++ b/deepblast/dataset/utils.py @@ -252,8 +252,17 @@ def collate_f(batch): states = [x[2] for x in batch] alignments = [x[3] for x in batch] paths = [x[4] for x in batch] - max_x = max(map(len, genes)) - max_y = max(map(len, others)) + g_mask = [x[5] for x in batch] + p_mask = [x[6] for x in batch] + + x_len = list(map(len, genes)) + y_len = list(map(len, others)) + + max_x = max(x_len) + max_y = max(y_len) + x_mask = [] + y_mask = [] + B = len(genes) dm = torch.zeros((B, max_x, max_y)) p = torch.zeros((B, max_x, max_y)) @@ -261,7 +270,9 @@ def collate_f(batch): n, m = len(genes[b]), len(others[b]) dm[b, :n, :m] = alignments[b] p[b, :n, :m] = paths[b] - return genes, others, states, dm, p + x_mask.append(merge_mask(g_mask[b], n, max_x)) + y_mask.append(merge_mask(p_mask[b], m, max_y)) + return genes, others, states, dm, p, (x_mask, y_mask) def path_distance_matrix(pi): @@ -291,8 +302,14 @@ def path_distance_matrix(pi): return Pdist +def merge_mask(idx, length, mask_length): + pads = torch.Tensor(list(range(length, mask_length))) + idx = torch.stack(idx, pads) + return idx + + # Preprocessing functions -def gap_mask(states: np.array, N : int, M : int): +def gap_mask(states: np.array): """ Builds a mask for all gaps. Reports rows and columns that should be completely masked. diff --git a/deepblast/trainer.py b/deepblast/trainer.py index 90b327f..ce5925c 100644 --- a/deepblast/trainer.py +++ b/deepblast/trainer.py @@ -113,22 +113,23 @@ def test_dataloader(self): pin_memory=True) return test_dataloader - def compute_loss(self, x, y, predA, A, P, theta, gap): - + def compute_loss(self, mask, predA, A, P, theta, gap): + x_mask, y_mask = mask if isinstance(self.loss_func, SoftAlignmentLoss): - loss = self.loss_func(A, predA, x, y) + loss = self.loss_func(A, predA, x_mask, y_mask) elif isinstance(self.loss_func, MatrixCrossEntropy): - loss = self.loss_func(A, predA, x, y) + loss = self.loss_func(A, predA, x_mask, y_mask) elif isinstance(self.loss_func, L2MatrixCrossEntropy): - loss = self.loss_func(A, predA, theta, gap, x, y) + loss = self.loss_func(A, predA, theta, gap, x_mask, y_mask) elif isinstance(self.loss_func, SoftPathLoss): - loss = self.loss_func(P, predA, x, y) + loss = self.loss_func(P, predA, x_mask, y_mask) if self.hparams.multitask: current_lr = self.trainer.lr_schedulers[0]['scheduler'] current_lr = current_lr.get_last_lr()[0] max_lr = self.hparams.learning_rate lam = current_lr / max_lr - match_loss = self.loss_func(torch.sigmoid(theta), predA, x, y) + match_loss = self.loss_func(torch.sigmoid(theta), predA, + x_mask, y_mask) # when learning rate is large, weight match loss # otherwise, weight towards DP loss = lam * match_loss + (1 - lam) * loss @@ -136,11 +137,11 @@ def compute_loss(self, x, y, predA, A, P, theta, gap): def training_step(self, batch, batch_idx): self.aligner.train() - genes, others, s, A, P = batch + genes, others, s, A, P, mask = batch seq, order = pack_sequences(genes, others) predA, theta, gap = self.aligner(seq, order) - _, xlen, _, ylen = unpack_sequences(seq, order) - loss = self.compute_loss(xlen, ylen, predA, A, P, theta, gap) + x_mask, y_mask = mask + loss = self.compute_loss(mask, predA, A, P, theta, gap) assert torch.isnan(loss).item() is False if len(self.trainer.lr_schedulers) >= 1: current_lr = self.trainer.lr_schedulers[0]['scheduler'] @@ -194,17 +195,14 @@ def validation_stats(self, x, y, xlen, ylen, gen, return statistics def validation_step(self, batch, batch_idx): - # TODO: something weird is going on with the lengths - # Need to make sure that they are being sorted properly - genes, others, s, A, P = batch + genes, others, s, A, P, mask = batch seq, order = pack_sequences(genes, others) predA, theta, gap = self.aligner(seq, order) x, xlen, y, ylen = unpack_sequences(seq, order) - loss = self.compute_loss(xlen, ylen, predA, A, P, theta, gap) + loss = self.compute_loss(mask, predA, A, P, theta, gap) assert torch.isnan(loss).item() is False # Obtain alignment statistics + visualizations gen = self.aligner.traceback(seq, order) - # TODO; compare the traceback and the forward statistics = self.validation_stats( x, y, xlen, ylen, gen, s, A, predA, theta, gap, batch_idx) statistics = pd.DataFrame( From e8c8a483f12102900a740b5ca81c943208eca862 Mon Sep 17 00:00:00 2001 From: mortonjt Date: Wed, 19 Aug 2020 10:10:49 -0600 Subject: [PATCH 08/30] flake8 --- deepblast/dataset/dataset.py | 3 ++- deepblast/dataset/tests/test_utils.py | 5 +++-- deepblast/dataset/utils.py | 13 ++++++++----- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/deepblast/dataset/dataset.py b/deepblast/dataset/dataset.py index 5e53923..27af0b1 100644 --- a/deepblast/dataset/dataset.py +++ b/deepblast/dataset/dataset.py @@ -8,7 +8,7 @@ from deepblast.dataset.utils import ( state_f, tmstate_f, clip_boundaries, states2matrix, states2edges, - path_distance_matrix, remove_orphans, gap_mask + path_distance_matrix, gap_mask ) @@ -47,6 +47,7 @@ def __iter__(self): for i in range(iter_start, iter_end): yield self.__getitem__(i) + class FastaDataset(AlignmentDataset): """ Dataset for searching. """ def __init__(self, query_path, db_path, tokenizer=UniprotTokenizer()): diff --git a/deepblast/dataset/tests/test_utils.py b/deepblast/dataset/tests/test_utils.py index 236d642..146a4d2 100644 --- a/deepblast/dataset/tests/test_utils.py +++ b/deepblast/dataset/tests/test_utils.py @@ -2,7 +2,7 @@ from deepblast.dataset.utils import ( tmstate_f, states2matrix, states2alignment, path_distance_matrix, clip_boundaries, - collate_f, pack_sequences, unpack_sequences, gap_mask, + pack_sequences, unpack_sequences, gap_mask, remove_orphans) from math import sqrt import numpy as np @@ -308,7 +308,7 @@ def test_gap_mask2(self): '::::22222::::::::::1::::::.' ) # N, M = 197, 283 - res = gap_mask(s) + gap_mask(s) def test_replace_orphans_small(self): s = ":11:11:" @@ -331,5 +331,6 @@ def test_replace_orphans(self): r = remove_orphans(s, threshold=9) self.assertEqual(r, s) + if __name__ == '__main__': unittest.main() diff --git a/deepblast/dataset/utils.py b/deepblast/dataset/utils.py index 5fef3f3..e29a7cc 100644 --- a/deepblast/dataset/utils.py +++ b/deepblast/dataset/utils.py @@ -26,6 +26,7 @@ def tmstate_f(z): else: return m + def revstate_f(z): if z == x: return '1' @@ -34,6 +35,7 @@ def revstate_f(z): if z == m: return ':' + def clip_boundaries(X, Y, A): """ Remove xs and ys from ends. """ if A[0] == m: @@ -120,6 +122,7 @@ def states2edges(states): raise ValueError(f'Unrecognized state {states[0]}') return coords + def states2matrix(states, sparse=False): """ Converts state string to alignment matrix. @@ -361,14 +364,14 @@ def replace_orphan(w, s=5): i = len(w) // 2 # identify orphans and replace with gaps sw = ''.join(w) - if (w[i] == ':') and((('1' * s) in sw[:i] and ('1' * s) in sw[i:]) or - (('2' * s) in sw[:i] and ('2' * s) in sw[i:])): + if (w[i] == ':') and ((('1' * s) in sw[:i] and ('1' * s) in sw[i:]) or + (('2' * s) in sw[:i] and ('2' * s) in sw[i:])): return ['1', '2'] else: return [w[i]] -def remove_orphans(states, threshold : int=11): +def remove_orphans(states, threshold: int = 11): """ Removes singletons and doubletons that are orphaned. A match is considered orphaned if it exceeds the `threshold` gap. Parameters @@ -389,6 +392,6 @@ def remove_orphans(states, threshold : int=11): wins = list(window(states, threshold)) rwins = list(map(lambda x: replace_orphan(x, threshold // 2), list(wins))) new_states = list(reduce(lambda x, y: x + y, rwins)) - new_states = list(states[:threshold//2]) + new_states + \ - list(states[-threshold//2 + 1:]) + new_states = list(states[:threshold // 2]) + new_states + new_states += list(states[-threshold // 2 + 1:]) return ''.join(new_states) From 6b38107da2bd36e38219d5163e5e5f112090e198 Mon Sep 17 00:00:00 2001 From: mortonjt Date: Wed, 19 Aug 2020 10:15:45 -0600 Subject: [PATCH 09/30] more flake8 --- deepblast/alignment.py | 2 -- deepblast/losses.py | 39 ++++++++++++++++--------------- deepblast/score.py | 3 +-- deepblast/tests/test_embedding.py | 12 +++++----- deepblast/trainer.py | 2 +- 5 files changed, 28 insertions(+), 30 deletions(-) diff --git a/deepblast/alignment.py b/deepblast/alignment.py index 6580fa1..3e620eb 100644 --- a/deepblast/alignment.py +++ b/deepblast/alignment.py @@ -4,7 +4,6 @@ from deepblast.nw_cuda import NeedlemanWunschDecoder as NWDecoderCUDA from deepblast.embedding import StackedRNN, EmbedLinear, MultiheadProduct from deepblast.dataset.utils import unpack_sequences -import torch.nn.functional as F import math @@ -63,7 +62,6 @@ def __init__(self, n_alpha, n_input, n_units, n_embed, self.nw = NWDecoderCUDA(operator='softmax') self.local = local - def forward(self, x, order): """ Generate alignment matrix. diff --git a/deepblast/losses.py b/deepblast/losses.py index 7ab21e6..8b603ae 100644 --- a/deepblast/losses.py +++ b/deepblast/losses.py @@ -32,7 +32,7 @@ def __call__(self, Ytrue, Ypred, M, G, x_mask, y_mask): score = 0 eps = 3e-8 # unfortunately, this is the smallest eps we can have :( Ypred = torch.clamp(Ypred, min=eps, max=1 - eps) - for b in range(len(x_len)): + for b in range(len(x_mask)): pos = torch.mean( Ytrue[b, x_mask[b], y_mask[b]] * torch.log( Ypred[b, x_mask[b], y_mask[b]]) @@ -43,16 +43,17 @@ def __call__(self, Ytrue, Ypred, M, G, x_mask, y_mask): ) score += -(pos + neg) - match_prior = Normal(match_mean, match_prior) - gap_prior = Normal(gap_mean, gap_prior) - log_like = score / len(x_len) + match_prior = Normal(match_mean, match_std) + gap_prior = Normal(gap_mean, gap_std) + log_like = score / len(x_mask) match_log = match_prior.log_prob(M).mean() gap_log = gap_prior.log_prob(G).mean() score = log_like + match_log + gap_log return score + class MatrixCrossEntropy: - def __call__(self, Ytrue, Ypred, x_len, y_len): + def __call__(self, Ytrue, Ypred, x_mask, y_mask): """ Computes binary cross entropy on the matrix The matrix cross entropy loss is given by @@ -71,21 +72,21 @@ def __call__(self, Ytrue, Ypred, x_len, y_len): score = 0 eps = 3e-8 # unfortunately, this is the smallest eps we can have :( Ypred = torch.clamp(Ypred, min=eps, max=1 - eps) - for b in range(len(x_len)): + for b in range(len(x_mask)): pos = torch.mean( - Ytrue[b, :x_len[b], :y_len[b]] * torch.log( - Ypred[b, :x_len[b], :y_len[b]]) + Ytrue[b, x_mask[b], y_mask[b]] * torch.log( + Ypred[b, x_mask[b], y_mask[b]]) ) neg = torch.mean( - (1 - Ytrue[b, :x_len[b], :y_len[b]]) * torch.log( - 1 - Ypred[b, :x_len[b], :y_len[b]]) + (1 - Ytrue[b, x_mask[b], y_mask[b]]) * torch.log( + 1 - Ypred[b, x_mask[b], y_mask[b]]) ) score += -(pos + neg) - return score / len(x_len) + return score / len(x_mask) class SoftPathLoss: - def __call__(self, Pdist, Ypred, x_len, y_len): + def __call__(self, Pdist, Ypred, x_mask, y_mask): """ Computes a soft path loss The soft path loss is given by @@ -105,15 +106,15 @@ def __call__(self, Pdist, Ypred, x_len, y_len): Predicted alignment matrix of dimension N x M. """ score = 0 - for b in range(len(x_len)): + for b in range(len(x_mask)): score += torch.norm( - Pdist[b, :x_len[b], :y_len[b]] * Ypred[b, :x_len[b], :y_len[b]] + Pdist[b, x_mask[b], y_mask[b]] * Ypred[b, x_mask[b], y_mask[b]] ) - return score / len(x_len) + return score / len(x_mask) class SoftAlignmentLoss: - def __call__(self, Ytrue, Ypred, x_len, y_len): + def __call__(self, Ytrue, Ypred, x_mask, y_mask): """ Computes soft alignment loss as proposed in Mensch et al. The soft alignment loss is given by @@ -141,8 +142,8 @@ def __call__(self, Ytrue, Ypred, x_len, y_len): since it is possible to leave out important parts of the alignment. """ score = 0 - for b in range(len(x_len)): + for b in range(len(x_mask)): score += torch.norm( - Ytrue[b, :x_len[b], :y_len[b]] - Ypred[b, :x_len[b], :y_len[b]] + Ytrue[b, x_mask[b], y_mask[b]] - Ypred[b, x_mask[b], y_mask[b]] ) - return score / len(x_len) + return score / len(x_mask) diff --git a/deepblast/score.py b/deepblast/score.py index 74b242a..81b920d 100644 --- a/deepblast/score.py +++ b/deepblast/score.py @@ -3,7 +3,6 @@ from deepblast.dataset.utils import states2alignment, states2edges, tmstate_f - def roc_edges(true_edges, pred_edges): truth = set(true_edges) pred = set(pred_edges) @@ -17,7 +16,7 @@ def roc_edges(true_edges, pred_edges): return tp, fp, fn, perc_id, ppv, fnr, fdr -def alignment_score(true_states : str, pred_states : str): +def alignment_score(true_states: str, pred_states: str): """ Computes ROC statistics on alignment diff --git a/deepblast/tests/test_embedding.py b/deepblast/tests/test_embedding.py index fe6c898..371db34 100644 --- a/deepblast/tests/test_embedding.py +++ b/deepblast/tests/test_embedding.py @@ -5,23 +5,23 @@ class TestEmbedding(unittest.TestCase): def setUp(self): - b, l, d, h = 3, 100, 50, 8 - self.x = torch.randn(b, l, d) - self.y = torch.randn(b, l, d) + b, L, d, h = 3, 100, 50, 8 + self.x = torch.randn(b, L, d) + self.y = torch.randn(b, L, d) self.b = b - self.l = l + self.L = L self.d = d self.h = h def test_multilinear(self): model = MultiLinear(self.d, self.d, self.h) res = model(self.x) - self.assertEqual(tuple(res.shape), (self.b, self.l, self.d, self.h)) + self.assertEqual(tuple(res.shape), (self.b, self.L, self.d, self.h)) def test_multihead_product(self): model = MultiheadProduct(self.d, self.d, self.h) res = model(self.x, self.y) - self.assertEqual(tuple(res.shape), (self.b, self.l, self.l)) + self.assertEqual(tuple(res.shape), (self.b, self.L, self.L)) if __name__ == '__main__': diff --git a/deepblast/trainer.py b/deepblast/trainer.py index ce5925c..d00518f 100644 --- a/deepblast/trainer.py +++ b/deepblast/trainer.py @@ -11,7 +11,7 @@ import pytorch_lightning as pl from deepblast.alignment import NeedlemanWunschAligner from deepblast.dataset.alphabet import UniprotTokenizer -from deepblast.dataset import TMAlignDataset, MaliAlignmentDataset +from deepblast.dataset import TMAlignDataset from deepblast.dataset.utils import ( decode, states2edges, collate_f, unpack_sequences, pack_sequences, revstate_f) From 81b7ee10758da7c8490d16c91575ede9da6188c7 Mon Sep 17 00:00:00 2001 From: mortonjt Date: Wed, 19 Aug 2020 10:42:06 -0600 Subject: [PATCH 10/30] fix dataset tests --- deepblast/dataset/dataset.py | 3 ++- deepblast/dataset/tests/test_dataset.py | 7 +++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/deepblast/dataset/dataset.py b/deepblast/dataset/dataset.py index 27af0b1..136f353 100644 --- a/deepblast/dataset/dataset.py +++ b/deepblast/dataset/dataset.py @@ -141,6 +141,8 @@ def __getitem__(self, i): gene = self.pairs.iloc[i]['chain1'] pos = self.pairs.iloc[i]['chain2'] states = self.pairs.iloc[i]['alignment'] + + gene_mask, pos_mask = gap_mask(states) states = list(map(tmstate_f, states)) if self.clip_ends: gene, pos, states = clip_boundaries(gene, pos, states) @@ -165,7 +167,6 @@ def __getitem__(self, i): if tuple(alignment_matrix.shape) != (len(gene), len(pos)): alignment_matrix = alignment_matrix.t() - gene_mask, pos_mask = gap_mask(states) gene_mask = torch.Tensor(gene_mask) pos_mask = torch.Tensor(pos_mask) diff --git a/deepblast/dataset/tests/test_dataset.py b/deepblast/dataset/tests/test_dataset.py index 6320775..d314fa9 100644 --- a/deepblast/dataset/tests/test_dataset.py +++ b/deepblast/dataset/tests/test_dataset.py @@ -18,14 +18,13 @@ def test_getitem(self): x = TMAlignDataset(self.data_path, tm_threshold=0, pad_ends=False, clip_ends=False) res = x[0] - self.assertEqual(len(res), 5) + self.assertEqual(len(res), 7) gene, pos, states, alignment_matrix, _, _, _ = res # test the lengths self.assertEqual(len(gene), 103) self.assertEqual(len(pos), 21) self.assertEqual(len(states), 103) - # wtf is going on here?? - self.assertEqual(alignment_matrix.shape, (22, 103)) + self.assertEqual(alignment_matrix.shape, (103, 21)) class TestMaliDataset(unittest.TestCase): @@ -47,7 +46,7 @@ def test_getitem(self): self.assertEqual(len(gene), 81) self.assertEqual(len(pos), 81) self.assertEqual(len(states), 100) - self.assertEqual(alignment_matrix.shape, (81, 82)) + self.assertEqual(alignment_matrix.shape, (81, 81)) if __name__ == '__main__': From 2d828def87c2c1cfc4b84b4a8cf91816fc5d1032 Mon Sep 17 00:00:00 2001 From: mortonjt Date: Wed, 19 Aug 2020 11:14:09 -0600 Subject: [PATCH 11/30] fixing tests in alignment --- deepblast/alignment.py | 14 ++++++++------ deepblast/dataset/utils.py | 2 +- deepblast/tests/test_alignment.py | 18 ++++++++++-------- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/deepblast/alignment.py b/deepblast/alignment.py index 3e620eb..92c713e 100644 --- a/deepblast/alignment.py +++ b/deepblast/alignment.py @@ -80,10 +80,11 @@ def forward(self, x, order): gx, _, gy, _ = unpack_sequences(self.gap_embedding(x), order) # Obtain theta through an inner product across latent dimensions theta = self.match_mixture(zx, zy) + A = self.gap_mixture(gx, gy) # zero out first row and first column for local alignments - L = gx.shape[1] - A = torch.zeros((L, L)) - A[1:, 1:] = self.gap_mixture(gx[:, 1:, :], gy[:, 1:, :]) + # L = gx.shape[1] + # A = torch.zeros((L, L)) + # A[1:, 1:] = self.gap_mixture(gx[:, 1:, :], gy[:, 1:, :]) aln = self.nw.decode(theta, A) return aln, theta, A @@ -94,11 +95,12 @@ def traceback(self, x, order): zx, _, zy, _ = unpack_sequences(self.match_embedding(x), order) gx, xlen, gy, ylen = unpack_sequences(self.gap_embedding(x), order) match = self.match_mixture(zx, zy) + gap = self.gap_mixture(gx, gy) # zero out first row and first column for local alignments - L = gx.shape[1] - gap = torch.zeros((L, L)) - gap[1:, 1:] = self.gap_mixture(gx[:, 1:, :], gy[:, 1:, :]) + # L = gx.shape[1] + # gap = torch.zeros((L, L)) + # gap[1:, 1:] = self.gap_mixture(gx[:, 1:, :], gy[:, 1:, :]) B, _, _ = match.shape diff --git a/deepblast/dataset/utils.py b/deepblast/dataset/utils.py index e29a7cc..66bfd89 100644 --- a/deepblast/dataset/utils.py +++ b/deepblast/dataset/utils.py @@ -307,7 +307,7 @@ def path_distance_matrix(pi): def merge_mask(idx, length, mask_length): pads = torch.Tensor(list(range(length, mask_length))) - idx = torch.stack(idx, pads) + idx = torch.cat((idx, pads)) return idx diff --git a/deepblast/tests/test_alignment.py b/deepblast/tests/test_alignment.py index f713ab7..a539768 100644 --- a/deepblast/tests/test_alignment.py +++ b/deepblast/tests/test_alignment.py @@ -18,20 +18,20 @@ def setUp(self): nalpha, ninput, nunits, nembed = 22, 1024, 1024, 1024 self.aligner = NeedlemanWunschAligner(nalpha, ninput, nunits, nembed) - @unittest.skip + @unittest.skipUnless(torch.cuda.is_available(), 'No GPU was detected') def test_alignment(self): self.embedding = self.embedding.cuda() self.aligner = self.aligner.cuda() x = torch.Tensor( self.tokenizer(b'ARNDCQEGHILKMFPSTWYVXOUBZ') - ).unsqueeze(0).long().cuda() + ).long().cuda() y = torch.Tensor( self.tokenizer(b'ARNDCQEGHILKARNDCQMFPSTWYVXOUBZ') - ).unsqueeze(0).long().cuda() - N, M = x.shape[1], y.shape[1] - seq, order = pack_sequences([x], [y]) + ).long().cuda() + M = max(x.shape[0], y.shape[0]) + seq, order = pack_sequences([x, x], [y, y]) aln, theta, A = self.aligner(seq, order) - self.assertEqual(aln.shape, (1, N, M)) + self.assertEqual(aln.shape, (2, M, M)) @unittest.skipUnless(torch.cuda.is_available(), "No GPU detected") def test_batch_alignment(self): @@ -64,8 +64,10 @@ def test_collate_alignment(self): A2 = torch.ones((len(x2), len(y2))).long() P1 = torch.ones((len(x1), len(y1))).long() P2 = torch.ones((len(x2), len(y2))).long() - batch = [(x1, y1, s1, A1, P1), (x2, y2, s2, A2, P2)] - gene_codes, other_codes, states, dm, p = collate_f(batch) + mask = [torch.Tensor([0, 1]), torch.Tensor([0, 1])] + batch = [(x1, y1, s1, A1, P1, mask[0], mask[1]), + (x2, y2, s2, A2, P2, mask[0], mask[1])] + gene_codes, other_codes, states, dm, p, mask = collate_f(batch) self.embedding = self.embedding.cuda() self.aligner = self.aligner.cuda() seq, order = pack_sequences(gene_codes, other_codes) From 037442288d61bc4ded35820f05c73ebf229bc8de Mon Sep 17 00:00:00 2001 From: mortonjt Date: Wed, 19 Aug 2020 13:05:00 -0600 Subject: [PATCH 12/30] fixing dimensionality issues --- deepblast/alignment.py | 3 +- deepblast/dataset/dataset.py | 4 +- deepblast/dataset/utils.py | 6 +- deepblast/losses.py | 28 +++++--- deepblast/trainer.py | 17 ++--- ipynb/simulation-benchmark.ipynb | 116 ++++++++++++++++++++----------- 6 files changed, 111 insertions(+), 63 deletions(-) diff --git a/deepblast/alignment.py b/deepblast/alignment.py index 92c713e..eaf1978 100644 --- a/deepblast/alignment.py +++ b/deepblast/alignment.py @@ -113,5 +113,4 @@ def traceback(self, x, order): G[0, :, 0] = val aln = self.nw.decode(M, G) decoded = self.nw.traceback(aln.squeeze()) - del G, aln - yield decoded + yield decoded, aln diff --git a/deepblast/dataset/dataset.py b/deepblast/dataset/dataset.py index 136f353..815cb3e 100644 --- a/deepblast/dataset/dataset.py +++ b/deepblast/dataset/dataset.py @@ -167,8 +167,8 @@ def __getitem__(self, i): if tuple(alignment_matrix.shape) != (len(gene), len(pos)): alignment_matrix = alignment_matrix.t() - gene_mask = torch.Tensor(gene_mask) - pos_mask = torch.Tensor(pos_mask) + # gene_mask = torch.Tensor(gene_mask).long() + # pos_mask = torch.Tensor(pos_mask).long() return (gene, pos, states, alignment_matrix, path_matrix, gene_mask, pos_mask) diff --git a/deepblast/dataset/utils.py b/deepblast/dataset/utils.py index 66bfd89..06a100e 100644 --- a/deepblast/dataset/utils.py +++ b/deepblast/dataset/utils.py @@ -306,8 +306,10 @@ def path_distance_matrix(pi): def merge_mask(idx, length, mask_length): - pads = torch.Tensor(list(range(length, mask_length))) - idx = torch.cat((idx, pads)) + pads = set(list(range(length, mask_length))) + idx = set(idx) | pads + allx = set(list(range(0, mask_length))) + idx = torch.Tensor(list(allx - idx)).long() return idx diff --git a/deepblast/losses.py b/deepblast/losses.py index 8b603ae..480c7ed 100644 --- a/deepblast/losses.py +++ b/deepblast/losses.py @@ -3,6 +3,10 @@ from deepblast.constants import match_mean, match_std, gap_mean, gap_std +def mask_tensor(A, x_mask, y_mask): + return A[x_mask][:, y_mask] + + class AlignmentAccuracy: def __call__(self, true_edges, pred_edges): pass @@ -34,12 +38,12 @@ def __call__(self, Ytrue, Ypred, M, G, x_mask, y_mask): Ypred = torch.clamp(Ypred, min=eps, max=1 - eps) for b in range(len(x_mask)): pos = torch.mean( - Ytrue[b, x_mask[b], y_mask[b]] * torch.log( - Ypred[b, x_mask[b], y_mask[b]]) + mask_tensor(Ytrue[b], x_mask[b], y_mask[b]) * torch.log( + mask_tensor(Ypred[b], x_mask[b], y_mask[b])) ) neg = torch.mean( - (1 - Ytrue[b, x_mask[b], y_mask[b]]) * torch.log( - 1 - Ypred[b, x_mask[b], y_mask[b]]) + (1 - mask_tensor(Ytrue[b], x_mask[b], y_mask[b])) * torch.log( + 1 - mask_tensor(Ypred[b], x_mask[b], y_mask[b])) ) score += -(pos + neg) @@ -74,13 +78,21 @@ def __call__(self, Ytrue, Ypred, x_mask, y_mask): Ypred = torch.clamp(Ypred, min=eps, max=1 - eps) for b in range(len(x_mask)): pos = torch.mean( - Ytrue[b, x_mask[b], y_mask[b]] * torch.log( - Ypred[b, x_mask[b], y_mask[b]]) + mask_tensor(Ytrue[b], x_mask[b], y_mask[b]) * torch.log( + mask_tensor(Ypred[b], x_mask[b], y_mask[b])) ) neg = torch.mean( - (1 - Ytrue[b, x_mask[b], y_mask[b]]) * torch.log( - 1 - Ypred[b, x_mask[b], y_mask[b]]) + (1 - mask_tensor(Ytrue[b], x_mask[b], y_mask[b])) * torch.log( + 1 - mask_tensor(Ypred[b], x_mask[b], y_mask[b])) ) + # pos = torch.mean( + # Ytrue[b, x_mask[b], y_mask[b]] * torch.log( + # Ypred[b, x_mask[b], y_mask[b]]) + # ) + # neg = torch.mean( + # (1 - Ytrue[b, x_mask[b], y_mask[b]]) * torch.log( + # 1 - Ypred[b, x_mask[b], y_mask[b]]) + # f) score += -(pos + neg) return score / len(x_mask) diff --git a/deepblast/trainer.py b/deepblast/trainer.py index d00518f..70706b5 100644 --- a/deepblast/trainer.py +++ b/deepblast/trainer.py @@ -45,12 +45,10 @@ def initialize_aligner(self): n_input = self.hparams.rnn_input_dim n_units = self.hparams.rnn_dim n_layers = self.hparams.layers - if self.hparams.aligner == 'nw': - self.aligner = NeedlemanWunschAligner( - n_alpha, n_input, n_units, n_embed, n_layers) - else: - raise NotImplementedError( - f'Aligner {self.hparams.aligner_type} not implemented.') + n_heads = self.hparams.heads + self.aligner = NeedlemanWunschAligner( + n_alpha, n_input, n_units, n_embed, n_layers, n_heads) + def forward(self, x, y): x_code = torch.Tensor(self.tokenizer(str.encode(x))).long() @@ -285,10 +283,6 @@ def add_model_specific_args(parent_parser): '--test-pairs', help='Testing pairs file', required=True) parser.add_argument( '--valid-pairs', help='Validation pairs file', required=True) - parser.add_argument( - '-a', '--aligner', - help='Aligner type. Choices include (nw, hmm).', - required=False, type=str, default='nw') parser.add_argument( '--embedding-dim', help='Embedding dimension (default 512).', required=False, type=int, default=512) @@ -301,6 +295,9 @@ def add_model_specific_args(parent_parser): parser.add_argument( '--layers', help='Number of RNN layers (default 2).', required=False, type=int, default=2) + parser.add_argument( + '--heads', help='Number heads in attention layer (default 8).', + required=False, type=int, default=8) parser.add_argument( '--loss', help=('Loss function. Options include ' diff --git a/ipynb/simulation-benchmark.ipynb b/ipynb/simulation-benchmark.ipynb index 9b986e8..75e76a4 100644 --- a/ipynb/simulation-benchmark.ipynb +++ b/ipynb/simulation-benchmark.ipynb @@ -122,38 +122,7 @@ "cell_type": "code", "execution_count": 6, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "usage: ipykernel_launcher.py [-h] --train-pairs TRAIN_PAIRS --test-pairs TEST_PAIRS --valid-pairs VALID_PAIRS [-a ALIGNER]\n", - " [--embedding-dim EMBEDDING_DIM] [--rnn-input-dim RNN_INPUT_DIM] [--rnn-dim RNN_DIM] [--layers LAYERS]\n", - " [--loss LOSS] [--learning-rate LEARNING_RATE] [--batch-size BATCH_SIZE] [--multitask MULTITASK]\n", - " [--finetune FINETUNE] [--clip-ends CLIP_ENDS] [--scheduler SCHEDULER] [--epochs EPOCHS]\n", - " [--visualization-fraction VISUALIZATION_FRACTION] -o OUTPUT_DIRECTORY [--num-workers NUM_WORKERS]\n", - " [--gpus GPUS]\n", - "ipykernel_launcher.py: error: unrecognized arguments: --load-from-checkpoint lightning_logs/version_5/checkpoints\n" - ] - }, - { - "ename": "SystemExit", - "evalue": "2", - "output_type": "error", - "traceback": [ - "An exception has occurred, use %tb to see the full traceback.\n", - "\u001b[0;31mSystemExit\u001b[0m\u001b[0;31m:\u001b[0m 2\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/juermieboop/miniconda3/envs/pytorch/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3339: UserWarning: To exit: use 'exit', 'quit', or Ctrl-D.\n", - " warn(\"To exit: use 'exit', 'quit', or Ctrl-D.\", stacklevel=1)\n" - ] - } - ], + "outputs": [], "source": [ "args = [\n", " '--train-pairs', f'{os.getcwd()}/data/train.txt',\n", @@ -168,8 +137,7 @@ " '--visualization-fraction', '1',\n", " '--loss', 'cross_entropy',\n", " '--scheduler', 'none',\n", - " '--gpus', '1',\n", - " '--load-from-checkpoint', 'lightning_logs/version_5/checkpoints'\n", + " '--gpus', '1'\n", "]\n", "parser = argparse.ArgumentParser(add_help=False)\n", "parser = LightningAligner.add_model_specific_args(parser)\n", @@ -180,9 +148,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "No traceback available to show.\n" + ] + } + ], "source": [ "%tb" ] @@ -196,7 +172,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -212,11 +188,73 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": { "scrolled": false }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True, used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "CUDA_VISIBLE_DEVICES: [0]\n", + "\n", + " | Name | Type | Params\n", + "---------------------------------------------------\n", + "0 | aligner | NeedlemanWunschAligner | 38 M \n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "23b2538f21804cd287e3b698f87a3853", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor(-0.0554, device='cuda:0') tensor(-0.0655, device='cuda:0')\n", + "tensor(-0.0566, device='cuda:0') tensor(-0.0642, device='cuda:0')\n", + "tensor(-0.0597, device='cuda:0') tensor(-0.0663, device='cuda:0')\n", + "tensor(-0.0598, device='cuda:0') tensor(-0.0664, device='cuda:0')\n", + "tensor(-0.0578, device='cuda:0') tensor(-0.0665, device='cuda:0')\n", + "tensor(-0.0624, device='cuda:0') tensor(-0.0637, device='cuda:0')\n", + "tensor(-0.0586, device='cuda:0') tensor(-0.0677, device='cuda:0')\n", + "tensor(-0.0602, device='cuda:0') tensor(-0.0664, device='cuda:0')\n", + "tensor(-0.0575, device='cuda:0') tensor(-0.0678, device='cuda:0')\n", + "tensor(-0.0579, device='cuda:0') tensor(-0.0677, device='cuda:0')\n" + ] + }, + { + "ename": "ValueError", + "evalue": "too many values to unpack (expected 2)", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 8\u001b[0m )\n\u001b[1;32m 9\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 10\u001b[0;31m \u001b[0mtrainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, model, train_dataloader, val_dataloaders)\u001b[0m\n\u001b[1;32m 977\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 978\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msingle_gpu\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 979\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msingle_gpu_train\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 980\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 981\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0muse_tpu\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# pragma: no-cover\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/distrib_parts.py\u001b[0m in \u001b[0;36msingle_gpu_train\u001b[0;34m(self, model)\u001b[0m\n\u001b[1;32m 183\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreinit_scheduler_properties\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptimizers\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlr_schedulers\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 184\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 185\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_pretrain_routine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 186\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 187\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mtpu_train\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtpu_core_idx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py\u001b[0m in \u001b[0;36mrun_pretrain_routine\u001b[0;34m(self, model)\u001b[0m\n\u001b[1;32m 1134\u001b[0m \u001b[0mnum_loaders\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mval_dataloaders\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1135\u001b[0m \u001b[0mmax_batches\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnum_sanity_val_steps\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mnum_loaders\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1136\u001b[0;31m eval_results = self._evaluate(model,\n\u001b[0m\u001b[1;32m 1137\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mval_dataloaders\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1138\u001b[0m \u001b[0mmax_batches\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/evaluation_loop.py\u001b[0m in \u001b[0;36m_evaluate\u001b[0;34m(self, model, dataloaders, max_batches, test_mode)\u001b[0m\n\u001b[1;32m 291\u001b[0m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mevaluation_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_idx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdataloader_idx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest_mode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 292\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 293\u001b[0;31m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mevaluation_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_idx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdataloader_idx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest_mode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 294\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 295\u001b[0m \u001b[0;31m# on dp / ddp2 might still want to do something with the batch parts\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/evaluation_loop.py\u001b[0m in \u001b[0;36mevaluation_forward\u001b[0;34m(self, model, batch, batch_idx, dataloader_idx, test_mode)\u001b[0m\n\u001b[1;32m 483\u001b[0m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtest_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 484\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 485\u001b[0;31m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalidation_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 486\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 487\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Documents/research/garfunkel/deepblast/trainer.py\u001b[0m in \u001b[0;36mvalidation_step\u001b[0;34m(self, batch, batch_idx)\u001b[0m\n\u001b[1;32m 202\u001b[0m \u001b[0;31m# Obtain alignment statistics + visualizations\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 203\u001b[0m \u001b[0mgen\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maligner\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtraceback\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseq\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 204\u001b[0;31m statistics = self.validation_stats(\n\u001b[0m\u001b[1;32m 205\u001b[0m x, y, xlen, ylen, gen, s, A, predA, theta, gap, batch_idx)\n\u001b[1;32m 206\u001b[0m statistics = pd.DataFrame(\n", + "\u001b[0;32m~/Documents/research/garfunkel/deepblast/trainer.py\u001b[0m in \u001b[0;36mvalidation_stats\u001b[0;34m(self, x, y, xlen, ylen, gen, states, A, predA, theta, gap, batch_idx)\u001b[0m\n\u001b[1;32m 162\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0mylen\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msqueeze\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcpu\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdetach\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 163\u001b[0m self.tokenizer.alphabet)\n\u001b[0;32m--> 164\u001b[0;31m \u001b[0mdecoded\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgen\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 165\u001b[0m \u001b[0mpred_x\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpred_y\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpred_states\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mdecoded\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 166\u001b[0m \u001b[0mpred_states\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpred_states\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: too many values to unpack (expected 2)" + ] + } + ], "source": [ "trainer = Trainer(\n", " max_epochs=args.epochs,\n", From 9ae419d7905a7a8f504cdfa8db23fbf25cb13485 Mon Sep 17 00:00:00 2001 From: mortonjt Date: Wed, 19 Aug 2020 14:33:58 -0600 Subject: [PATCH 13/30] fixing gap indexing error --- deepblast/constants.py | 6 +- deepblast/dataset/dataset.py | 4 +- deepblast/dataset/tests/test_utils.py | 21 +- deepblast/dataset/utils.py | 30 +-- deepblast/losses.py | 2 +- ipynb/simulation-benchmark.ipynb | 305 ++++++++++++++++++++++---- ipynb/struct-benchmark.ipynb | 269 +++++++++++++++++++---- 7 files changed, 531 insertions(+), 106 deletions(-) diff --git a/deepblast/constants.py b/deepblast/constants.py index 3eb770c..05cf67e 100644 --- a/deepblast/constants.py +++ b/deepblast/constants.py @@ -1,7 +1,7 @@ x, m, y = 0, 1, 2 # state numberings -match_mean = 0 -match_std = 3 +match_mean = 1 +match_std = 5 gap_mean = -4 -gap_std = 3 +gap_std = 5 diff --git a/deepblast/dataset/dataset.py b/deepblast/dataset/dataset.py index 815cb3e..aee48d7 100644 --- a/deepblast/dataset/dataset.py +++ b/deepblast/dataset/dataset.py @@ -142,14 +142,14 @@ def __getitem__(self, i): pos = self.pairs.iloc[i]['chain2'] states = self.pairs.iloc[i]['alignment'] - gene_mask, pos_mask = gap_mask(states) states = list(map(tmstate_f, states)) if self.clip_ends: gene, pos, states = clip_boundaries(gene, pos, states) + gene_mask, pos_mask = gap_mask(states) + if self.pad_ends: states = [m] + states + [m] - states = torch.Tensor(states).long() gene = self.tokenizer(str.encode(gene)) pos = self.tokenizer(str.encode(pos)) diff --git a/deepblast/dataset/tests/test_utils.py b/deepblast/dataset/tests/test_utils.py index 146a4d2..599b8dc 100644 --- a/deepblast/dataset/tests/test_utils.py +++ b/deepblast/dataset/tests/test_utils.py @@ -2,7 +2,8 @@ from deepblast.dataset.utils import ( tmstate_f, states2matrix, states2alignment, path_distance_matrix, clip_boundaries, - pack_sequences, unpack_sequences, gap_mask, + pack_sequences, unpack_sequences, + gap_mask, merge_mask, remove_orphans) from math import sqrt import numpy as np @@ -310,6 +311,24 @@ def test_gap_mask2(self): # N, M = 197, 283 gap_mask(s) + def test_gap_mask3(self): + seq = ('TSKINKELITTANDKKYTIATVVKVDGIAWFDRRDGVDQFKADTGNDVWVGPSQA' + 'DAAAQVQIVENLIAQGVDAIAIVPFSVEAVEPVLKKARERGIVVISHEASNIQNV' + 'DYDIEAFDNKAYGANLKELGKSGGKGKYVTTVGSLTSKSQEWIDGAVEYQKANFP' + 'ESEATGRLETYDDANTDYNKLKEATAYPDITGILGAPPTSAGAGRLIAEGGLKGK' + 'VFFAGTGLVSVAGEYIKNDDVQYIQFWDPAVAGYANLAVAALEKKNDQIKAGLNL' + 'GLPGYESLLAPDAAKPNLLYGAGWVGVTKEND') + st = ('222222222222222:::::::::::2::::::1:::::::::::1::::::::.:' + ':::::::::::::::::::::::::::::::::::::::::::::::::::1.:::' + ':::::::2:::::::::1:::::::1:::::::::::::::::1::::::::::::' + ':::::::::2:::::::::::::::::1:::::::::::::1::::::::::::::' + ':::::::::::1:::11:::::::::::::::::::2::.:::1:::::::::22:' + '::222222222222222222222::::::::::::11::11:11.11111111111' + '1111111') + xmask, ymask = gap_mask(st) + xidx = merge_mask(xmask, len(seq), len(seq)) + yidx = merge_mask(ymask, len(seq), len(seq)) + def test_replace_orphans_small(self): s = ":11:11:" e = ":111211:" diff --git a/deepblast/dataset/utils.py b/deepblast/dataset/utils.py index 06a100e..ba2c93b 100644 --- a/deepblast/dataset/utils.py +++ b/deepblast/dataset/utils.py @@ -273,8 +273,12 @@ def collate_f(batch): n, m = len(genes[b]), len(others[b]) dm[b, :n, :m] = alignments[b] p[b, :n, :m] = paths[b] - x_mask.append(merge_mask(g_mask[b], n, max_x)) - y_mask.append(merge_mask(p_mask[b], m, max_y)) + gm = merge_mask(g_mask[b], n, max_x) + pm = merge_mask(p_mask[b], m, max_y) + assert len(gm) > 0 + assert len(pm) > 0 + x_mask.append(gm) + y_mask.append(pm) return genes, others, states, dm, p, (x_mask, y_mask) @@ -307,21 +311,21 @@ def path_distance_matrix(pi): def merge_mask(idx, length, mask_length): pads = set(list(range(length, mask_length))) - idx = set(idx) | pads + idx = set(idx.tolist()) | pads allx = set(list(range(0, mask_length))) idx = torch.Tensor(list(allx - idx)).long() return idx # Preprocessing functions -def gap_mask(states: np.array): +def gap_mask(states: str): """ Builds a mask for all gaps. Reports rows and columns that should be completely masked. Parameters ---------- - states : np.array + states : str List of alignment states Returns ------- @@ -331,20 +335,20 @@ def gap_mask(states: np.array): i, j = 0, 0 rows, cols = [], [] for k in range(len(states)): - if states[k] == '1': + if states[k] == x: cols.append(i) i += 1 - elif states[k] == '2': + elif states[k] == y: rows.append(j) j += 1 - elif states[k] == ':': - i += 1 - j += 1 - elif states[k] == '.': - cols.append(i) - rows.append(j) + elif states[k] == m: i += 1 j += 1 + # elif states[k] == '.': + # cols.append(i) + # rows.append(j) + # i += 1 + # j += 1 else: raise ValueError(f'{states[k]} is not recognized') return np.array(rows), np.array(cols) diff --git a/deepblast/losses.py b/deepblast/losses.py index 480c7ed..4cb727a 100644 --- a/deepblast/losses.py +++ b/deepblast/losses.py @@ -52,7 +52,7 @@ def __call__(self, Ytrue, Ypred, M, G, x_mask, y_mask): log_like = score / len(x_mask) match_log = match_prior.log_prob(M).mean() gap_log = gap_prior.log_prob(G).mean() - score = log_like + match_log + gap_log + score = log_like - match_log - gap_log return score diff --git a/ipynb/simulation-benchmark.ipynb b/ipynb/simulation-benchmark.ipynb index 75e76a4..d60390c 100644 --- a/ipynb/simulation-benchmark.ipynb +++ b/ipynb/simulation-benchmark.ipynb @@ -129,13 +129,13 @@ " '--test-pairs', f'{os.getcwd()}/data/test.txt',\n", " '--valid-pairs', f'{os.getcwd()}/data/valid.txt',\n", " '--output-directory', output_dir,\n", - " '--epochs', '32',\n", + " '--epochs', '64',\n", " '--batch-size', '20', \n", " '--num-workers', '30',\n", - " '--learning-rate', '1e-4',\n", + " '--learning-rate', '1e-3', #1e-4\n", " '--layers', '2',\n", " '--visualization-fraction', '1',\n", - " '--loss', 'cross_entropy',\n", + " '--loss', 'l2_cross_entropy',\n", " '--scheduler', 'none',\n", " '--gpus', '1'\n", "]\n", @@ -209,7 +209,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "23b2538f21804cd287e3b698f87a3853", + "model_id": "ed887e26019242cb804a5abd421e30f8", "version_major": 2, "version_minor": 0 }, @@ -220,39 +220,120 @@ "metadata": {}, "output_type": "display_data" }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "87f0ce80d8e44a118cece96c51ab6f93", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stdout", "output_type": "stream", "text": [ - "tensor(-0.0554, device='cuda:0') tensor(-0.0655, device='cuda:0')\n", - "tensor(-0.0566, device='cuda:0') tensor(-0.0642, device='cuda:0')\n", - "tensor(-0.0597, device='cuda:0') tensor(-0.0663, device='cuda:0')\n", - "tensor(-0.0598, device='cuda:0') tensor(-0.0664, device='cuda:0')\n", - "tensor(-0.0578, device='cuda:0') tensor(-0.0665, device='cuda:0')\n", - "tensor(-0.0624, device='cuda:0') tensor(-0.0637, device='cuda:0')\n", - "tensor(-0.0586, device='cuda:0') tensor(-0.0677, device='cuda:0')\n", - "tensor(-0.0602, device='cuda:0') tensor(-0.0664, device='cuda:0')\n", - "tensor(-0.0575, device='cuda:0') tensor(-0.0678, device='cuda:0')\n", - "tensor(-0.0579, device='cuda:0') tensor(-0.0677, device='cuda:0')\n" + "\n" ] }, { - "ename": "ValueError", - "evalue": "too many values to unpack (expected 2)", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 8\u001b[0m )\n\u001b[1;32m 9\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 10\u001b[0;31m \u001b[0mtrainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, model, train_dataloader, val_dataloaders)\u001b[0m\n\u001b[1;32m 977\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 978\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msingle_gpu\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 979\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msingle_gpu_train\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 980\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 981\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0muse_tpu\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# pragma: no-cover\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/distrib_parts.py\u001b[0m in \u001b[0;36msingle_gpu_train\u001b[0;34m(self, model)\u001b[0m\n\u001b[1;32m 183\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreinit_scheduler_properties\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptimizers\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlr_schedulers\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 184\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 185\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_pretrain_routine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 186\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 187\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mtpu_train\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtpu_core_idx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py\u001b[0m in \u001b[0;36mrun_pretrain_routine\u001b[0;34m(self, model)\u001b[0m\n\u001b[1;32m 1134\u001b[0m \u001b[0mnum_loaders\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mval_dataloaders\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1135\u001b[0m \u001b[0mmax_batches\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnum_sanity_val_steps\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mnum_loaders\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1136\u001b[0;31m eval_results = self._evaluate(model,\n\u001b[0m\u001b[1;32m 1137\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mval_dataloaders\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1138\u001b[0m \u001b[0mmax_batches\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/evaluation_loop.py\u001b[0m in \u001b[0;36m_evaluate\u001b[0;34m(self, model, dataloaders, max_batches, test_mode)\u001b[0m\n\u001b[1;32m 291\u001b[0m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mevaluation_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_idx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdataloader_idx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest_mode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 292\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 293\u001b[0;31m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mevaluation_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_idx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdataloader_idx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest_mode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 294\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 295\u001b[0m \u001b[0;31m# on dp / ddp2 might still want to do something with the batch parts\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/evaluation_loop.py\u001b[0m in \u001b[0;36mevaluation_forward\u001b[0;34m(self, model, batch, batch_idx, dataloader_idx, test_mode)\u001b[0m\n\u001b[1;32m 483\u001b[0m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtest_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 484\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 485\u001b[0;31m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalidation_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 486\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 487\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Documents/research/garfunkel/deepblast/trainer.py\u001b[0m in \u001b[0;36mvalidation_step\u001b[0;34m(self, batch, batch_idx)\u001b[0m\n\u001b[1;32m 202\u001b[0m \u001b[0;31m# Obtain alignment statistics + visualizations\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 203\u001b[0m \u001b[0mgen\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maligner\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtraceback\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseq\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 204\u001b[0;31m statistics = self.validation_stats(\n\u001b[0m\u001b[1;32m 205\u001b[0m x, y, xlen, ylen, gen, s, A, predA, theta, gap, batch_idx)\n\u001b[1;32m 206\u001b[0m statistics = pd.DataFrame(\n", - "\u001b[0;32m~/Documents/research/garfunkel/deepblast/trainer.py\u001b[0m in \u001b[0;36mvalidation_stats\u001b[0;34m(self, x, y, xlen, ylen, gen, states, A, predA, theta, gap, batch_idx)\u001b[0m\n\u001b[1;32m 162\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0mylen\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msqueeze\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcpu\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdetach\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 163\u001b[0m self.tokenizer.alphabet)\n\u001b[0;32m--> 164\u001b[0;31m \u001b[0mdecoded\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgen\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 165\u001b[0m \u001b[0mpred_x\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpred_y\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpred_states\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mdecoded\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 166\u001b[0m \u001b[0mpred_states\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpred_states\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mValueError\u001b[0m: too many values to unpack (expected 2)" - ] + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -277,16 +358,36 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "version_0 version_20\tversion_32 version_44\tversion_56 version_68\r\n", + "version_1 version_21\tversion_33 version_45\tversion_57 version_69\r\n", + "version_10 version_22\tversion_34 version_46\tversion_58 version_7\r\n", + "version_11 version_23\tversion_35 version_47\tversion_59 version_70\r\n", + "version_12 version_24\tversion_36 version_48\tversion_6 version_71\r\n", + "version_13 version_25\tversion_37 version_49\tversion_60 version_72\r\n", + "version_14 version_26\tversion_38 version_5\tversion_61 version_73\r\n", + "version_15 version_27\tversion_39 version_50\tversion_62 version_74\r\n", + "version_16 version_28\tversion_4 version_51\tversion_63 version_75\r\n", + "version_17 version_29\tversion_40 version_52\tversion_64 version_76\r\n", + "version_18 version_3\tversion_41 version_53\tversion_65 version_77\r\n", + "version_19 version_30\tversion_42 version_54\tversion_66 version_8\r\n", + "version_2 version_31\tversion_43 version_55\tversion_67 version_9\r\n" + ] + } + ], "source": [ "!ls lightning_logs" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -295,11 +396,44 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": { "scrolled": false }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Reusing TensorBoard on port 6006 (pid 14351), started 0:35:32 ago. (Use '!kill 14351' to kill it.)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "%tensorboard --logdir lightning_logs" ] @@ -313,27 +447,116 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "NeedlemanWunschAligner(\n", + " (lm): BiLM(\n", + " (embed): Embedding(22, 21, padding_idx=21)\n", + " (dropout): Dropout(p=0, inplace=False)\n", + " (rnn): ModuleList(\n", + " (0): LSTM(21, 1024, batch_first=True)\n", + " (1): LSTM(1024, 1024, batch_first=True)\n", + " )\n", + " (linear): Linear(in_features=1024, out_features=21, bias=True)\n", + " )\n", + " (match_embedding): StackedRNN(\n", + " (embed): Embedding(21, 512, padding_idx=20)\n", + " (dropout): Dropout(p=0, inplace=False)\n", + " (rnn): LSTM(512, 512, num_layers=2, batch_first=True, bidirectional=True)\n", + " (proj): Linear(in_features=1024, out_features=512, bias=True)\n", + " )\n", + " (gap_embedding): StackedRNN(\n", + " (embed): Embedding(21, 512, padding_idx=20)\n", + " (dropout): Dropout(p=0, inplace=False)\n", + " (rnn): LSTM(512, 512, num_layers=2, batch_first=True, bidirectional=True)\n", + " (proj): Linear(in_features=1024, out_features=512, bias=True)\n", + " )\n", + " (match_mixture): MultiheadProduct(\n", + " (multilinear): MultiLinear(\n", + " (multi_output): ModuleList(\n", + " (0): Linear(in_features=512, out_features=512, bias=True)\n", + " (1): Linear(in_features=512, out_features=512, bias=True)\n", + " (2): Linear(in_features=512, out_features=512, bias=True)\n", + " (3): Linear(in_features=512, out_features=512, bias=True)\n", + " (4): Linear(in_features=512, out_features=512, bias=True)\n", + " (5): Linear(in_features=512, out_features=512, bias=True)\n", + " (6): Linear(in_features=512, out_features=512, bias=True)\n", + " (7): Linear(in_features=512, out_features=512, bias=True)\n", + " )\n", + " )\n", + " (linear): Linear(in_features=8, out_features=1, bias=True)\n", + " )\n", + " (gap_mixture): MultiheadProduct(\n", + " (multilinear): MultiLinear(\n", + " (multi_output): ModuleList(\n", + " (0): Linear(in_features=512, out_features=512, bias=True)\n", + " (1): Linear(in_features=512, out_features=512, bias=True)\n", + " (2): Linear(in_features=512, out_features=512, bias=True)\n", + " (3): Linear(in_features=512, out_features=512, bias=True)\n", + " (4): Linear(in_features=512, out_features=512, bias=True)\n", + " (5): Linear(in_features=512, out_features=512, bias=True)\n", + " (6): Linear(in_features=512, out_features=512, bias=True)\n", + " (7): Linear(in_features=512, out_features=512, bias=True)\n", + " )\n", + " )\n", + " (linear): Linear(in_features=8, out_features=1, bias=True)\n", + " )\n", + " (nw): NeedlemanWunschDecoder()\n", + ")" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "model.aligner" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'epoch=29.ckpt'\r\n" + ] + } + ], "source": [ "!ls lightning_logs/version_5/checkpoints" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: 'lightning_logs/version_70/checkpoints/epoch=59.ckpt'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mcheckpoint_dir\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'lightning_logs/version_70/checkpoints'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mpath\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf'{checkpoint_dir}/epoch=59.ckpt'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mLightningAligner\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_from_checkpoint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/core/saving.py\u001b[0m in \u001b[0;36mload_from_checkpoint\u001b[0;34m(cls, checkpoint_path, map_location, hparams_file, tags_csv, *args, **kwargs)\u001b[0m\n\u001b[1;32m 140\u001b[0m \u001b[0mcheckpoint\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpl_load\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcheckpoint_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmap_location\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 141\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 142\u001b[0;31m \u001b[0mcheckpoint\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpl_load\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcheckpoint_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mstorage\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mloc\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstorage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 143\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 144\u001b[0m \u001b[0;31m# add the hparams from csv file to checkpoint\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/utilities/cloud_io.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(path_or_url, map_location)\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0murlparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscheme\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m''\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mPath\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrive\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# no scheme or with a drive letter\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmap_location\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhub\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_state_dict_from_url\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmap_location\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/torch/serialization.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(f, map_location, pickle_module, **pickle_load_args)\u001b[0m\n\u001b[1;32m 523\u001b[0m \u001b[0mpickle_load_args\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'encoding'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'utf-8'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 524\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 525\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0m_open_file_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'rb'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mopened_file\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 526\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m_is_zipfile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopened_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 527\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0m_open_zipfile_reader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mopened_zipfile\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/torch/serialization.py\u001b[0m in \u001b[0;36m_open_file_like\u001b[0;34m(name_or_buffer, mode)\u001b[0m\n\u001b[1;32m 210\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_open_file_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 211\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m_is_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname_or_buffer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 212\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_open_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 213\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 214\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m'w'\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/torch/serialization.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, name, mode)\u001b[0m\n\u001b[1;32m 191\u001b[0m \u001b[0;32mclass\u001b[0m \u001b[0m_open_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_opener\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 192\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 193\u001b[0;31m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_open_file\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 194\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 195\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__exit__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'lightning_logs/version_70/checkpoints/epoch=59.ckpt'" + ] + } + ], "source": [ "from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint\n", "checkpoint_dir = 'lightning_logs/version_70/checkpoints'\n", diff --git a/ipynb/struct-benchmark.ipynb b/ipynb/struct-benchmark.ipynb index dea9251..75e1b3e 100644 --- a/ipynb/struct-benchmark.ipynb +++ b/ipynb/struct-benchmark.ipynb @@ -170,7 +170,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": { "scrolled": false }, @@ -185,7 +185,9 @@ "\n", " | Name | Type | Params\n", "---------------------------------------------------\n", - "0 | aligner | NeedlemanWunschAligner | 34 M \n" + "0 | aligner | NeedlemanWunschAligner | 38 M \n", + "/home/juermieboop/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/utilities/distributed.py:25: UserWarning: The dataloader, val dataloader 0, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 32 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.\n", + " warnings.warn(*args, **kwargs)\n" ] }, { @@ -202,10 +204,58 @@ "metadata": {}, "output_type": "display_data" }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[tensor([1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1,\n", + " 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2,\n", + " 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1,\n", + " 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 2, 2, 1],\n", + " device='cuda:0'), tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 2, 1],\n", + " device='cuda:0')]\n", + "[tensor([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 2, 1, 1, 0,\n", + " 0, 2, 2, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 1], device='cuda:0'), tensor([1, 0, 0, 0, 2, 2, 2, 1, 0, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 2,\n", + " 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1,\n", + " 1, 0, 2, 1, 1, 0, 0, 1, 1, 0, 1, 1], device='cuda:0')]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/juermieboop/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/utilities/distributed.py:25: UserWarning: The dataloader, train dataloader, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 32 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.\n", + " warnings.warn(*args, **kwargs)\n", + "/home/juermieboop/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/utilities/distributed.py:25: UserWarning: The dataloader, val dataloader 0, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 32 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.\n", + " warnings.warn(*args, **kwargs)\n" + ] + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "ce3249fdf417451c896ecfa630cf1943", + "model_id": "eb644d8e916b449bb32e5cb0f0786c5f", "version_major": 2, "version_minor": 0 }, @@ -217,46 +267,29 @@ "output_type": "display_data" }, { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…" - ] - }, - "metadata": {}, - "output_type": "display_data" + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] }, { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…" - ] - }, - "metadata": {}, - "output_type": "display_data" + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/juermieboop/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/utilities/distributed.py:25: UserWarning: Detected KeyboardInterrupt, attempting graceful shutdown...\n", + " warnings.warn(*args, **kwargs)\n" + ] }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "cc3be93a41df413195269996a5009591", - "version_major": 2, - "version_minor": 0 - }, "text/plain": [ - "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…" + "1" ] }, + "execution_count": 8, "metadata": {}, - "output_type": "display_data" + "output_type": "execute_result" } ], "source": [ @@ -281,16 +314,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "version_0 version_24\tversion_4 version_55\tversion_70 version_86\r\n", + "version_1 version_25\tversion_40 version_56\tversion_71 version_87\r\n", + "version_10 version_26\tversion_41 version_57\tversion_72 version_88\r\n", + "version_11 version_27\tversion_42 version_58\tversion_73 version_89\r\n", + "version_12 version_28\tversion_43 version_59\tversion_74 version_9\r\n", + "version_13 version_29\tversion_44 version_6\tversion_75 version_90\r\n", + "version_14 version_3\tversion_45 version_60\tversion_76 version_91\r\n", + "version_15 version_30\tversion_46 version_61\tversion_77 version_92\r\n", + "version_16 version_31\tversion_47 version_62\tversion_78 version_93\r\n", + "version_17 version_32\tversion_48 version_63\tversion_79 version_94\r\n", + "version_18 version_33\tversion_49 version_64\tversion_8 version_95\r\n", + "version_19 version_34\tversion_5 version_65\tversion_80 version_96\r\n", + "version_2 version_35\tversion_50 version_66\tversion_81\r\n", + "version_20 version_36\tversion_51 version_67\tversion_82\r\n", + "version_21 version_37\tversion_52 version_68\tversion_83\r\n", + "version_22 version_38\tversion_53 version_69\tversion_84\r\n", + "version_23 version_39\tversion_54 version_7\tversion_85\r\n" + ] + } + ], "source": [ "!ls lightning_logs" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -299,11 +356,44 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": { "scrolled": false }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Reusing TensorBoard on port 6006 (pid 14351), started 1:27:34 ago. (Use '!kill 14351' to kill it.)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "%tensorboard --logdir lightning_logs" ] @@ -317,9 +407,73 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "NeedlemanWunschAligner(\n", + " (lm): BiLM(\n", + " (embed): Embedding(22, 21, padding_idx=21)\n", + " (dropout): Dropout(p=0, inplace=False)\n", + " (rnn): ModuleList(\n", + " (0): LSTM(21, 1024, batch_first=True)\n", + " (1): LSTM(1024, 1024, batch_first=True)\n", + " )\n", + " (linear): Linear(in_features=1024, out_features=21, bias=True)\n", + " )\n", + " (match_embedding): StackedRNN(\n", + " (embed): Embedding(21, 512, padding_idx=20)\n", + " (dropout): Dropout(p=0, inplace=False)\n", + " (rnn): LSTM(512, 512, num_layers=2, batch_first=True, bidirectional=True)\n", + " (proj): Linear(in_features=1024, out_features=512, bias=True)\n", + " )\n", + " (gap_embedding): StackedRNN(\n", + " (embed): Embedding(21, 512, padding_idx=20)\n", + " (dropout): Dropout(p=0, inplace=False)\n", + " (rnn): LSTM(512, 512, num_layers=2, batch_first=True, bidirectional=True)\n", + " (proj): Linear(in_features=1024, out_features=512, bias=True)\n", + " )\n", + " (match_mixture): MultiheadProduct(\n", + " (multilinear): MultiLinear(\n", + " (multi_output): ModuleList(\n", + " (0): Linear(in_features=512, out_features=512, bias=True)\n", + " (1): Linear(in_features=512, out_features=512, bias=True)\n", + " (2): Linear(in_features=512, out_features=512, bias=True)\n", + " (3): Linear(in_features=512, out_features=512, bias=True)\n", + " (4): Linear(in_features=512, out_features=512, bias=True)\n", + " (5): Linear(in_features=512, out_features=512, bias=True)\n", + " (6): Linear(in_features=512, out_features=512, bias=True)\n", + " (7): Linear(in_features=512, out_features=512, bias=True)\n", + " )\n", + " )\n", + " (linear): Linear(in_features=8, out_features=1, bias=True)\n", + " )\n", + " (gap_mixture): MultiheadProduct(\n", + " (multilinear): MultiLinear(\n", + " (multi_output): ModuleList(\n", + " (0): Linear(in_features=512, out_features=512, bias=True)\n", + " (1): Linear(in_features=512, out_features=512, bias=True)\n", + " (2): Linear(in_features=512, out_features=512, bias=True)\n", + " (3): Linear(in_features=512, out_features=512, bias=True)\n", + " (4): Linear(in_features=512, out_features=512, bias=True)\n", + " (5): Linear(in_features=512, out_features=512, bias=True)\n", + " (6): Linear(in_features=512, out_features=512, bias=True)\n", + " (7): Linear(in_features=512, out_features=512, bias=True)\n", + " )\n", + " )\n", + " (linear): Linear(in_features=8, out_features=1, bias=True)\n", + " )\n", + " (nw): NeedlemanWunschDecoder()\n", + ")" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "model.aligner" ] @@ -333,18 +487,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'epoch=103.ckpt'\r\n" + ] + } + ], "source": [ "!ls lightning_logs/version_3/checkpoints" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: 'lightning_logs/version_3/checkpoints/epoch=49.ckpt'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mcheckpoint_dir\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'lightning_logs/version_3/checkpoints'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mpath\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf'{checkpoint_dir}/epoch=49.ckpt'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mLightningAligner\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_from_checkpoint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/core/saving.py\u001b[0m in \u001b[0;36mload_from_checkpoint\u001b[0;34m(cls, checkpoint_path, map_location, hparams_file, tags_csv, *args, **kwargs)\u001b[0m\n\u001b[1;32m 140\u001b[0m \u001b[0mcheckpoint\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpl_load\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcheckpoint_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmap_location\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 141\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 142\u001b[0;31m \u001b[0mcheckpoint\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpl_load\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcheckpoint_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mstorage\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mloc\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstorage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 143\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 144\u001b[0m \u001b[0;31m# add the hparams from csv file to checkpoint\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/utilities/cloud_io.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(path_or_url, map_location)\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0murlparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscheme\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m''\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mPath\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrive\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# no scheme or with a drive letter\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmap_location\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhub\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_state_dict_from_url\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmap_location\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/torch/serialization.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(f, map_location, pickle_module, **pickle_load_args)\u001b[0m\n\u001b[1;32m 523\u001b[0m \u001b[0mpickle_load_args\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'encoding'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'utf-8'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 524\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 525\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0m_open_file_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'rb'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mopened_file\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 526\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m_is_zipfile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopened_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 527\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0m_open_zipfile_reader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mopened_zipfile\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/torch/serialization.py\u001b[0m in \u001b[0;36m_open_file_like\u001b[0;34m(name_or_buffer, mode)\u001b[0m\n\u001b[1;32m 210\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_open_file_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 211\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m_is_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname_or_buffer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 212\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_open_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 213\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 214\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m'w'\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/torch/serialization.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, name, mode)\u001b[0m\n\u001b[1;32m 191\u001b[0m \u001b[0;32mclass\u001b[0m \u001b[0m_open_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_opener\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 192\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 193\u001b[0;31m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_open_file\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 194\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 195\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__exit__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'lightning_logs/version_3/checkpoints/epoch=49.ckpt'" + ] + } + ], "source": [ "from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint\n", "checkpoint_dir = 'lightning_logs/version_3/checkpoints'\n", From 82ca19734a191b8e2108f74ab5cb3183089425cc Mon Sep 17 00:00:00 2001 From: mortonjt Date: Wed, 19 Aug 2020 15:58:55 -0600 Subject: [PATCH 14/30] making priors looser --- deepblast/constants.py | 6 +- ipynb/simulation-benchmark.ipynb | 80 ++------- ipynb/struct-benchmark.ipynb | 288 +++++-------------------------- 3 files changed, 64 insertions(+), 310 deletions(-) diff --git a/deepblast/constants.py b/deepblast/constants.py index 05cf67e..bf7c572 100644 --- a/deepblast/constants.py +++ b/deepblast/constants.py @@ -1,7 +1,7 @@ x, m, y = 0, 1, 2 # state numberings -match_mean = 1 -match_std = 5 +match_mean = 2 +match_std = 10 gap_mean = -4 -gap_std = 5 +gap_std = 10 diff --git a/ipynb/simulation-benchmark.ipynb b/ipynb/simulation-benchmark.ipynb index d60390c..b48ce3f 100644 --- a/ipynb/simulation-benchmark.ipynb +++ b/ipynb/simulation-benchmark.ipynb @@ -129,14 +129,15 @@ " '--test-pairs', f'{os.getcwd()}/data/test.txt',\n", " '--valid-pairs', f'{os.getcwd()}/data/valid.txt',\n", " '--output-directory', output_dir,\n", - " '--epochs', '64',\n", + " '--epochs', '32',\n", " '--batch-size', '20', \n", " '--num-workers', '30',\n", - " '--learning-rate', '1e-3', #1e-4\n", + " '--learning-rate', '1e-3', \n", " '--layers', '2',\n", + " '--heads', '8',\n", " '--visualization-fraction', '1',\n", " '--loss', 'l2_cross_entropy',\n", - " '--scheduler', 'none',\n", + " '--scheduler', 'cosine',\n", " '--gpus', '1'\n", "]\n", "parser = argparse.ArgumentParser(add_help=False)\n", @@ -209,7 +210,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "ed887e26019242cb804a5abd421e30f8", + "model_id": "b143966e555b46bd9ada1db2fc13e453", "version_major": 2, "version_minor": 0 }, @@ -223,7 +224,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "87f0ce80d8e44a118cece96c51ab6f93", + "model_id": "bbe1327e962f436c88dd62a3e255810a", "version_major": 2, "version_minor": 0 }, @@ -276,48 +277,6 @@ "metadata": {}, "output_type": "display_data" }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, { "name": "stdout", "output_type": "stream", @@ -337,12 +296,14 @@ } ], "source": [ + "from pytorch_lightning.profiler import AdvancedProfiler\n", + "profiler=AdvancedProfiler()\n", "trainer = Trainer(\n", " max_epochs=args.epochs,\n", " gpus=args.gpus,\n", " check_val_every_n_epoch=10,\n", " # profiler=profiler,\n", - " fast_dev_run=False,\n", + " # fast_dev_run=True,\n", " # auto_scale_batch_size='power'\n", ")\n", "\n", @@ -365,19 +326,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "version_0 version_20\tversion_32 version_44\tversion_56 version_68\r\n", - "version_1 version_21\tversion_33 version_45\tversion_57 version_69\r\n", - "version_10 version_22\tversion_34 version_46\tversion_58 version_7\r\n", - "version_11 version_23\tversion_35 version_47\tversion_59 version_70\r\n", - "version_12 version_24\tversion_36 version_48\tversion_6 version_71\r\n", - "version_13 version_25\tversion_37 version_49\tversion_60 version_72\r\n", - "version_14 version_26\tversion_38 version_5\tversion_61 version_73\r\n", - "version_15 version_27\tversion_39 version_50\tversion_62 version_74\r\n", - "version_16 version_28\tversion_4 version_51\tversion_63 version_75\r\n", - "version_17 version_29\tversion_40 version_52\tversion_64 version_76\r\n", - "version_18 version_3\tversion_41 version_53\tversion_65 version_77\r\n", - "version_19 version_30\tversion_42 version_54\tversion_66 version_8\r\n", - "version_2 version_31\tversion_43 version_55\tversion_67 version_9\r\n" + "version_0 version_10 version_12 version_3 version_5 version_7 version_9\r\n", + "version_1 version_11 version_2 version_4 version_6 version_8\r\n" ] } ], @@ -404,7 +354,7 @@ { "data": { "text/plain": [ - "Reusing TensorBoard on port 6006 (pid 14351), started 0:35:32 ago. (Use '!kill 14351' to kill it.)" + "Reusing TensorBoard on port 6006 (pid 14351), started 2:51:54 ago. (Use '!kill 14351' to kill it.)" ] }, "metadata": {}, @@ -414,11 +364,11 @@ "data": { "text/html": [ "\n", - " \n", " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "%tensorboard --logdir lightning_logs" ] @@ -407,73 +300,9 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "NeedlemanWunschAligner(\n", - " (lm): BiLM(\n", - " (embed): Embedding(22, 21, padding_idx=21)\n", - " (dropout): Dropout(p=0, inplace=False)\n", - " (rnn): ModuleList(\n", - " (0): LSTM(21, 1024, batch_first=True)\n", - " (1): LSTM(1024, 1024, batch_first=True)\n", - " )\n", - " (linear): Linear(in_features=1024, out_features=21, bias=True)\n", - " )\n", - " (match_embedding): StackedRNN(\n", - " (embed): Embedding(21, 512, padding_idx=20)\n", - " (dropout): Dropout(p=0, inplace=False)\n", - " (rnn): LSTM(512, 512, num_layers=2, batch_first=True, bidirectional=True)\n", - " (proj): Linear(in_features=1024, out_features=512, bias=True)\n", - " )\n", - " (gap_embedding): StackedRNN(\n", - " (embed): Embedding(21, 512, padding_idx=20)\n", - " (dropout): Dropout(p=0, inplace=False)\n", - " (rnn): LSTM(512, 512, num_layers=2, batch_first=True, bidirectional=True)\n", - " (proj): Linear(in_features=1024, out_features=512, bias=True)\n", - " )\n", - " (match_mixture): MultiheadProduct(\n", - " (multilinear): MultiLinear(\n", - " (multi_output): ModuleList(\n", - " (0): Linear(in_features=512, out_features=512, bias=True)\n", - " (1): Linear(in_features=512, out_features=512, bias=True)\n", - " (2): Linear(in_features=512, out_features=512, bias=True)\n", - " (3): Linear(in_features=512, out_features=512, bias=True)\n", - " (4): Linear(in_features=512, out_features=512, bias=True)\n", - " (5): Linear(in_features=512, out_features=512, bias=True)\n", - " (6): Linear(in_features=512, out_features=512, bias=True)\n", - " (7): Linear(in_features=512, out_features=512, bias=True)\n", - " )\n", - " )\n", - " (linear): Linear(in_features=8, out_features=1, bias=True)\n", - " )\n", - " (gap_mixture): MultiheadProduct(\n", - " (multilinear): MultiLinear(\n", - " (multi_output): ModuleList(\n", - " (0): Linear(in_features=512, out_features=512, bias=True)\n", - " (1): Linear(in_features=512, out_features=512, bias=True)\n", - " (2): Linear(in_features=512, out_features=512, bias=True)\n", - " (3): Linear(in_features=512, out_features=512, bias=True)\n", - " (4): Linear(in_features=512, out_features=512, bias=True)\n", - " (5): Linear(in_features=512, out_features=512, bias=True)\n", - " (6): Linear(in_features=512, out_features=512, bias=True)\n", - " (7): Linear(in_features=512, out_features=512, bias=True)\n", - " )\n", - " )\n", - " (linear): Linear(in_features=8, out_features=1, bias=True)\n", - " )\n", - " (nw): NeedlemanWunschDecoder()\n", - ")" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "model.aligner" ] @@ -487,43 +316,18 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "'epoch=103.ckpt'\r\n" - ] - } - ], + "outputs": [], "source": [ "!ls lightning_logs/version_3/checkpoints" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "ename": "FileNotFoundError", - "evalue": "[Errno 2] No such file or directory: 'lightning_logs/version_3/checkpoints/epoch=49.ckpt'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mcheckpoint_dir\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'lightning_logs/version_3/checkpoints'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mpath\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf'{checkpoint_dir}/epoch=49.ckpt'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mLightningAligner\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_from_checkpoint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/core/saving.py\u001b[0m in \u001b[0;36mload_from_checkpoint\u001b[0;34m(cls, checkpoint_path, map_location, hparams_file, tags_csv, *args, **kwargs)\u001b[0m\n\u001b[1;32m 140\u001b[0m \u001b[0mcheckpoint\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpl_load\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcheckpoint_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmap_location\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 141\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 142\u001b[0;31m \u001b[0mcheckpoint\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpl_load\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcheckpoint_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mstorage\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mloc\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstorage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 143\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 144\u001b[0m \u001b[0;31m# add the hparams from csv file to checkpoint\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/utilities/cloud_io.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(path_or_url, map_location)\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0murlparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscheme\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m''\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mPath\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrive\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# no scheme or with a drive letter\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmap_location\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhub\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_state_dict_from_url\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmap_location\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/torch/serialization.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(f, map_location, pickle_module, **pickle_load_args)\u001b[0m\n\u001b[1;32m 523\u001b[0m \u001b[0mpickle_load_args\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'encoding'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'utf-8'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 524\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 525\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0m_open_file_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'rb'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mopened_file\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 526\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m_is_zipfile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopened_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 527\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0m_open_zipfile_reader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mopened_zipfile\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/torch/serialization.py\u001b[0m in \u001b[0;36m_open_file_like\u001b[0;34m(name_or_buffer, mode)\u001b[0m\n\u001b[1;32m 210\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_open_file_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 211\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m_is_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname_or_buffer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 212\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_open_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 213\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 214\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m'w'\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/torch/serialization.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, name, mode)\u001b[0m\n\u001b[1;32m 191\u001b[0m \u001b[0;32mclass\u001b[0m \u001b[0m_open_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_opener\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 192\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 193\u001b[0;31m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_open_file\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 194\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 195\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__exit__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'lightning_logs/version_3/checkpoints/epoch=49.ckpt'" - ] - } - ], + "outputs": [], "source": [ "from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint\n", "checkpoint_dir = 'lightning_logs/version_3/checkpoints'\n", From e752757421e0d322ab18608f7e24f5f9d56dcc33 Mon Sep 17 00:00:00 2001 From: mortonjt Date: Fri, 21 Aug 2020 21:00:26 -0700 Subject: [PATCH 15/30] remove local alignment in traceback. adding clip-ends option. --- deepblast/alignment.py | 8 ++++---- deepblast/trainer.py | 10 ++++++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/deepblast/alignment.py b/deepblast/alignment.py index eaf1978..def1c2e 100644 --- a/deepblast/alignment.py +++ b/deepblast/alignment.py @@ -107,10 +107,10 @@ def traceback(self, x, order): for b in range(B): M = match[b, :xlen[b], :ylen[b]].unsqueeze(0) G = gap[b, :xlen[b], :ylen[b]].unsqueeze(0) - val = math.log(1 - (1/50)) # based on average insertion length - if self.local: - G[0, 0, :] = val - G[0, :, 0] = val + # val = math.log(1 - (1/50)) # based on average insertion length + # if self.local: + # G[0, 0, :] = val + # G[0, :, 0] = val aln = self.nw.decode(M, G) decoded = self.nw.traceback(aln.squeeze()) yield decoded, aln diff --git a/deepblast/trainer.py b/deepblast/trainer.py index 70706b5..935e837 100644 --- a/deepblast/trainer.py +++ b/deepblast/trainer.py @@ -82,7 +82,7 @@ def initialize_logging(self, root_dir='./', logging_path=None): def train_dataloader(self): train_dataset = TMAlignDataset( - self.hparams.train_pairs, + self.hparams.train_pairs, clip_ends=self.hparams.clip_ends, construct_paths=isinstance(self.loss_func, SoftPathLoss)) train_dataloader = DataLoader( train_dataset, self.hparams.batch_size, collate_fn=collate_f, @@ -92,7 +92,7 @@ def train_dataloader(self): def val_dataloader(self): valid_dataset = TMAlignDataset( - self.hparams.valid_pairs, + self.hparams.valid_pairs, clip_ends=self.hparams.clip_ends, construct_paths=isinstance(self.loss_func, SoftPathLoss)) valid_dataloader = DataLoader( valid_dataset, self.hparams.batch_size, collate_fn=collate_f, @@ -103,7 +103,7 @@ def val_dataloader(self): def test_dataloader(self): # Held-out TM-align dataset test_dataset = TMAlignDataset( - self.hparams.test_pairs, + self.hparams.test_pairs, clip_ends=self.hparams.clip_ends, construct_paths=isinstance(self.loss_func, SoftPathLoss)) test_dataloader = DataLoader( test_dataset, self.hparams.batch_size, shuffle=False, @@ -226,7 +226,9 @@ def validation_epoch_end(self, outputs): for i, m in enumerate(metrics): loss_f = lambda x: x['log'][m] losses = list(map(loss_f, outputs)) - scalar = sum(losses) / len(losses) + losses = losses[np.logical_not(np.isnan(losses))] + # scalar = sum(losses) / len(losses) + scalar = np.asscalar(np.mean(losses)) scores.append(scalar) self.logger.experiment.add_scalar(m, scalar, self.global_step) From 1475d60b82066adf5713b7f58dd2fa6d2564e5d3 Mon Sep 17 00:00:00 2001 From: mortonjt Date: Sat, 22 Aug 2020 08:34:11 -0700 Subject: [PATCH 16/30] fixing scalar issue in validation --- deepblast/trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepblast/trainer.py b/deepblast/trainer.py index 935e837..b86f400 100644 --- a/deepblast/trainer.py +++ b/deepblast/trainer.py @@ -225,7 +225,7 @@ def validation_epoch_end(self, outputs): scores = [] for i, m in enumerate(metrics): loss_f = lambda x: x['log'][m] - losses = list(map(loss_f, outputs)) + losses = np.array(list(map(loss_f, outputs))) losses = losses[np.logical_not(np.isnan(losses))] # scalar = sum(losses) / len(losses) scalar = np.asscalar(np.mean(losses)) From f23e513f4c26b3608ae266f2dcead19d092da83e Mon Sep 17 00:00:00 2001 From: mortonjt Date: Tue, 25 Aug 2020 21:50:07 -0600 Subject: [PATCH 17/30] fixed bug with gap_mask - the outputs were swapped before --- deepblast/dataset/dataset.py | 8 +- deepblast/dataset/tests/test_utils.py | 48 ++++- deepblast/dataset/utils.py | 52 +++--- ipynb/struct-benchmark.ipynb | 248 +++++++++++++++++++++----- 4 files changed, 275 insertions(+), 81 deletions(-) diff --git a/deepblast/dataset/dataset.py b/deepblast/dataset/dataset.py index aee48d7..f91ffa2 100644 --- a/deepblast/dataset/dataset.py +++ b/deepblast/dataset/dataset.py @@ -7,7 +7,7 @@ from deepblast.constants import m from deepblast.dataset.utils import ( state_f, tmstate_f, - clip_boundaries, states2matrix, states2edges, + remove_gaps, states2matrix, states2edges, path_distance_matrix, gap_mask ) @@ -141,11 +141,8 @@ def __getitem__(self, i): gene = self.pairs.iloc[i]['chain1'] pos = self.pairs.iloc[i]['chain2'] states = self.pairs.iloc[i]['alignment'] - states = list(map(tmstate_f, states)) - if self.clip_ends: - gene, pos, states = clip_boundaries(gene, pos, states) - + gene, pos, states = remove_gaps(gene, pos, states, self.clip_ends) gene_mask, pos_mask = gap_mask(states) if self.pad_ends: @@ -169,7 +166,6 @@ def __getitem__(self, i): # gene_mask = torch.Tensor(gene_mask).long() # pos_mask = torch.Tensor(pos_mask).long() - return (gene, pos, states, alignment_matrix, path_matrix, gene_mask, pos_mask) diff --git a/deepblast/dataset/tests/test_utils.py b/deepblast/dataset/tests/test_utils.py index 599b8dc..b1139cf 100644 --- a/deepblast/dataset/tests/test_utils.py +++ b/deepblast/dataset/tests/test_utils.py @@ -1,7 +1,7 @@ import unittest from deepblast.dataset.utils import ( tmstate_f, states2matrix, states2alignment, - path_distance_matrix, clip_boundaries, + path_distance_matrix, remove_gaps, pack_sequences, unpack_sequences, gap_mask, merge_mask, remove_orphans) @@ -230,7 +230,7 @@ def test_clip_ends_none(self): s_ = [m, m, m, m] x_ = 'GSSG' y_ = 'GEIR' - rx, ry, rs = clip_boundaries(x_, y_, s_) + rx, ry, rs = remove_gaps(x_, y_, s_) self.assertEqual(x_, rx) self.assertEqual(y_, ry) self.assertEqual(s_, rs) @@ -240,7 +240,7 @@ def test_clip_ends(self): s = [x, m, m, m, y] x = 'GSSG' y = 'GEIR' - rx, ry, rs = clip_boundaries(x, y, s) + rx, ry, rs = remove_gaps(x, y, s) ex, ey, es = 'SSG', 'GEI', [m, m, m] self.assertEqual(ex, rx) self.assertEqual(ey, ry) @@ -252,7 +252,7 @@ def test_clip_ends_2(self): st = np.array([1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1]) - rx, ry, rs = clip_boundaries(gen, oth, st) + rx, ry, rs = remove_gaps(gen, oth, st) self.assertTrue(1) def test_pack_sequences(self): @@ -329,6 +329,46 @@ def test_gap_mask3(self): xidx = merge_mask(xmask, len(seq), len(seq)) yidx = merge_mask(ymask, len(seq), len(seq)) + + def test_gap_mask4(self): + + st1 = ('2222222222222222222222222222222222222222222222222222222222222.' + '22222222222222222222222222222222222222222222222222222222222222' + '22222222222222222222222222222222222222222222222222222222222222' + '2::.:.22222222222222222222222222222..2..:.::::::::::::11...::.' + '..:::.111111111..:..::::::1:.11.111.::::::.:::::::::::::::::2:' + ':..11111111111111122222222222222222222...:::::::::::::::::::2:' + '::..:::::::::::::::::::::::::2.:.:::::::::::::::2:::::::::::::' + '1::::::::::1:......:::::::::::::::1111.1.11:11:11111111.111111' + '1111111111111111111111111111.:1:::::::.2222.22.:...:..::..::.:' + '::.::::::.11.....::::::::222.22222222222222222222222222 ') + st1 = list(map(tmstate_f, st1)) + L = 205 + xmask, ymask = gap_mask(st1) + xidx = merge_mask(xmask, L, L) + yidx = merge_mask(ymask, L, L) + self.assertGreater(len(xidx), 0) + self.assertGreater(len(yidx), 0) + + seq = ('PKYQIIDAAVEVIAENGYHQSQVSKIAKQAGVADGTIYLYFKNKEDILISLFKEKGQFI' + 'EREEDIKEKATAKEKLALVISKHFSLLAGDHNLAIVTQLELRQSNLELRQKINEILKGY' + 'LNILDGILTEGIQSGEIKEGLDVRLARQIFGTIDETVTTWVNDQKYDLVALSNSVLELL' + 'VSGIHNK') + states = [2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, + 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, + 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 2, 2, 1] + xmask, ymask = gap_mask(states) + + self.assertLess(max(xmask), len(seq)) + def test_replace_orphans_small(self): s = ":11:11:" e = ":111211:" diff --git a/deepblast/dataset/utils.py b/deepblast/dataset/utils.py index ba2c93b..7cac1c5 100644 --- a/deepblast/dataset/utils.py +++ b/deepblast/dataset/utils.py @@ -36,17 +36,20 @@ def revstate_f(z): return ':' -def clip_boundaries(X, Y, A): +def remove_gaps(X, Y, A, clip_ends=True): """ Remove xs and ys from ends. """ - if A[0] == m: - first = 0 - else: - first = A.index(m) + first = 0 + last = len(A) + if clip_ends: + if A[0] == m: + first = 0 + else: + first = A.index(m) - if A[-1] == m: - last = len(A) - else: - last = len(A) - A[::-1].index(m) + if A[-1] == m: + last = len(A) + else: + last = len(A) - A[::-1].index(m) X, Y = states2alignment(np.array(A), X, Y) X_ = X[first:last].replace('-', '') Y_ = Y[first:last].replace('-', '') @@ -260,23 +263,22 @@ def collate_f(batch): x_len = list(map(len, genes)) y_len = list(map(len, others)) - max_x = max(x_len) max_y = max(y_len) + max_l = max(max_x, max_y) x_mask = [] y_mask = [] - B = len(genes) - dm = torch.zeros((B, max_x, max_y)) - p = torch.zeros((B, max_x, max_y)) + dm = torch.zeros((B, max_l, max_l)) + p = torch.zeros((B, max_l, max_l)) for b in range(B): n, m = len(genes[b]), len(others[b]) dm[b, :n, :m] = alignments[b] p[b, :n, :m] = paths[b] - gm = merge_mask(g_mask[b], n, max_x) - pm = merge_mask(p_mask[b], m, max_y) - assert len(gm) > 0 - assert len(pm) > 0 + gm = merge_mask(g_mask[b], n, max_l) + pm = merge_mask(p_mask[b], m, max_l) + assert len(gm) > 0, (len(g_mask[b]), max(g_mask[b]), n, max_l) + assert len(pm) > 0, (len(p_mask[b]), max(p_mask[b]), m, max_l) x_mask.append(gm) y_mask.append(pm) return genes, others, states, dm, p, (x_mask, y_mask) @@ -309,16 +311,16 @@ def path_distance_matrix(pi): return Pdist -def merge_mask(idx, length, mask_length): - pads = set(list(range(length, mask_length))) +def merge_mask(idx, length, max_len): + pads = set(list(range(length, max_len))) idx = set(idx.tolist()) | pads - allx = set(list(range(0, mask_length))) + allx = set(list(range(0, max_len))) idx = torch.Tensor(list(allx - idx)).long() return idx # Preprocessing functions -def gap_mask(states: str): +def gap_mask(states): """ Builds a mask for all gaps. Reports rows and columns that should be completely masked. @@ -327,6 +329,7 @@ def gap_mask(states: str): ---------- states : str List of alignment states + Returns ------- mask : np.array @@ -344,14 +347,9 @@ def gap_mask(states: str): elif states[k] == m: i += 1 j += 1 - # elif states[k] == '.': - # cols.append(i) - # rows.append(j) - # i += 1 - # j += 1 else: raise ValueError(f'{states[k]} is not recognized') - return np.array(rows), np.array(cols) + return np.array(cols), np.array(rows) def window(seq, n=2): diff --git a/ipynb/struct-benchmark.ipynb b/ipynb/struct-benchmark.ipynb index db3c7f5..548ffef 100644 --- a/ipynb/struct-benchmark.ipynb +++ b/ipynb/struct-benchmark.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -29,7 +29,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -52,7 +52,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -76,7 +76,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -90,7 +90,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -99,7 +99,7 @@ "'/home/juermieboop/Documents/research/garfunkel/ipynb'" ] }, - "execution_count": 34, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -118,7 +118,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -128,15 +128,14 @@ " '--valid-pairs', f'{os.getcwd()}/data/valid.txt',\n", " '--output-directory', output_dir,\n", " '--epochs', '128',\n", - " '--batch-size', '30', \n", - " '--num-workers', '4',\n", - " '--layers', '2',\n", - " '--heads', '8',\n", + " '--batch-size', '20', \n", + " '--num-workers', '16',\n", + " '--layers', '1',\n", + " '--heads', '1',\n", " '--learning-rate', '5e-5',\n", " '--visualization-fraction', '1',\n", " '--loss', 'cross_entropy',\n", " '--scheduler', 'steplr', \n", - " '--clip-ends', 'False',\n", " '--gpus', '1'\n", "]" ] @@ -150,7 +149,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -162,6 +161,26 @@ "model = LightningAligner(args)" ] }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Namespace(batch_size=2, clip_ends=False, embedding_dim=512, epochs=128, finetune=False, gpus=1, heads=1, layers=1, learning_rate=5e-05, loss='cross_entropy', multitask=False, num_workers=1, output_directory='struct_results', rnn_dim=512, rnn_input_dim=512, scheduler='steplr', test_pairs='/home/juermieboop/Documents/research/garfunkel/ipynb/data/test.txt', train_pairs='/home/juermieboop/Documents/research/garfunkel/ipynb/data/train.txt', valid_pairs='/home/juermieboop/Documents/research/garfunkel/ipynb/data/valid.txt', visualization_fraction=1.0)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "args" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -171,7 +190,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": { "scrolled": false }, @@ -186,7 +205,9 @@ "\n", " | Name | Type | Params\n", "---------------------------------------------------\n", - "0 | aligner | NeedlemanWunschAligner | 38 M \n" + "0 | aligner | NeedlemanWunschAligner | 13 M \n", + "/home/juermieboop/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/utilities/distributed.py:25: UserWarning: The dataloader, val dataloader 0, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 32 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.\n", + " warnings.warn(*args, **kwargs)\n" ] }, { @@ -203,10 +224,20 @@ "metadata": {}, "output_type": "display_data" }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/juermieboop/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/utilities/distributed.py:25: UserWarning: The dataloader, train dataloader, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 32 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.\n", + " warnings.warn(*args, **kwargs)\n", + "/home/juermieboop/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/utilities/distributed.py:25: UserWarning: The dataloader, val dataloader 0, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 32 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.\n", + " warnings.warn(*args, **kwargs)\n" + ] + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "d6835b2781f24213a9ceff5983ff3dc8", + "model_id": "3966592c4ca34163a654bd7032361657", "version_major": 2, "version_minor": 0 }, @@ -216,6 +247,47 @@ }, "metadata": {}, "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/juermieboop/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/utilities/distributed.py:25: UserWarning: Detected KeyboardInterrupt, attempting graceful shutdown...\n", + " warnings.warn(*args, **kwargs)\n" + ] + }, + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\n", + " File \"/home/juermieboop/miniconda3/envs/pytorch/lib/python3.8/multiprocessing/queues.py\", line 245, in _feed\n", + " send_bytes(obj)\n", + " File \"/home/juermieboop/miniconda3/envs/pytorch/lib/python3.8/multiprocessing/connection.py\", line 200, in send_bytes\n", + " self._send_bytes(m[offset:offset + size])\n", + " File \"/home/juermieboop/miniconda3/envs/pytorch/lib/python3.8/multiprocessing/connection.py\", line 411, in _send_bytes\n", + " self._send(header + buf)\n", + " File \"/home/juermieboop/miniconda3/envs/pytorch/lib/python3.8/multiprocessing/connection.py\", line 368, in _send\n", + " n = write(self._handle, buf)\n", + "BrokenPipeError: [Errno 32] Broken pipe\n" + ] } ], "source": [ @@ -242,28 +314,21 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [ { - "ename": "UnboundLocalError", - "evalue": "local variable 'child' referenced before assignment", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/IPython/utils/_process_posix.py\u001b[0m in \u001b[0;36msystem\u001b[0;34m(self, cmd)\u001b[0m\n\u001b[1;32m 155\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 156\u001b[0;31m \u001b[0mchild\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpexpect\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mspawn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msh\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'-c'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcmd\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# Vanilla Pexpect\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 157\u001b[0m \u001b[0mflush\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstdout\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mflush\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pexpect/pty_spawn.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, command, args, timeout, maxread, searchwindowsize, logfile, cwd, env, ignore_sighup, echo, preexec_fn, encoding, codec_errors, dimensions, use_poll)\u001b[0m\n\u001b[1;32m 204\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 205\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_spawn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpreexec_fn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdimensions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 206\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0muse_poll\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0muse_poll\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pexpect/pty_spawn.py\u001b[0m in \u001b[0;36m_spawn\u001b[0;34m(self, command, args, preexec_fn, dimensions)\u001b[0m\n\u001b[1;32m 302\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 303\u001b[0;31m self.ptyproc = self._spawnpty(self.args, env=self.env,\n\u001b[0m\u001b[1;32m 304\u001b[0m cwd=self.cwd, **kwargs)\n", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pexpect/pty_spawn.py\u001b[0m in \u001b[0;36m_spawnpty\u001b[0;34m(self, args, **kwargs)\u001b[0m\n\u001b[1;32m 314\u001b[0m \u001b[0;34m'''Spawn a pty and return an instance of PtyProcess.'''\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 315\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mptyprocess\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mPtyProcess\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mspawn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 316\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/ptyprocess/ptyprocess.py\u001b[0m in \u001b[0;36mspawn\u001b[0;34m(cls, argv, cwd, env, echo, preexec_fn, dimensions)\u001b[0m\n\u001b[1;32m 308\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexec_err_pipe_write\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 309\u001b[0;31m \u001b[0mexec_err_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexec_err_pipe_read\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m4096\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 310\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexec_err_pipe_read\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: ", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[0;31mUnboundLocalError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mget_ipython\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msystem\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'ls lightning_logs'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/ipykernel/zmqshell.py\u001b[0m in \u001b[0;36msystem_piped\u001b[0;34m(self, cmd)\u001b[0m\n\u001b[1;32m 633\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0muser_ns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'_exit_code'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msystem\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcmd\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 634\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 635\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0muser_ns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'_exit_code'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msystem\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvar_expand\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcmd\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdepth\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 636\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 637\u001b[0m \u001b[0;31m# Ensure new system_piped implementation is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/IPython/utils/_process_posix.py\u001b[0m in \u001b[0;36msystem\u001b[0;34m(self, cmd)\u001b[0m\n\u001b[1;32m 170\u001b[0m \u001b[0;31m# (the character is known as ETX for 'End of Text', see\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 171\u001b[0m \u001b[0;31m# curses.ascii.ETX).\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 172\u001b[0;31m \u001b[0mchild\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msendline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mchr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 173\u001b[0m \u001b[0;31m# Read and print any more output the program might produce on its\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 174\u001b[0m \u001b[0;31m# way out.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mUnboundLocalError\u001b[0m: local variable 'child' referenced before assignment" + "name": "stdout", + "output_type": "stream", + "text": [ + "version_0 version_16\tversion_23 version_30\tversion_38 version_5\r\n", + "version_1 version_17\tversion_24 version_31\tversion_39 version_6\r\n", + "version_10 version_18\tversion_25 version_32\tversion_4 version_7\r\n", + "version_11 version_19\tversion_26 version_33\tversion_40 version_8\r\n", + "version_12 version_2\tversion_27 version_34\tversion_41 version_9\r\n", + "version_13 version_20\tversion_28 version_35\tversion_42\r\n", + "version_14 version_21\tversion_29 version_36\tversion_43\r\n", + "version_15 version_22\tversion_3 version_37\tversion_44\r\n" ] } ], @@ -273,7 +338,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -282,11 +347,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": { "scrolled": false }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "%tensorboard --logdir lightning_logs" ] @@ -300,9 +389,55 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "NeedlemanWunschAligner(\n", + " (lm): BiLM(\n", + " (embed): Embedding(22, 21, padding_idx=21)\n", + " (dropout): Dropout(p=0, inplace=False)\n", + " (rnn): ModuleList(\n", + " (0): LSTM(21, 1024, batch_first=True)\n", + " (1): LSTM(1024, 1024, batch_first=True)\n", + " )\n", + " (linear): Linear(in_features=1024, out_features=21, bias=True)\n", + " )\n", + " (match_embedding): EmbedLinear(\n", + " (embed): Embedding(21, 512, padding_idx=20)\n", + " (proj): Linear(in_features=512, out_features=512, bias=True)\n", + " )\n", + " (gap_embedding): EmbedLinear(\n", + " (embed): Embedding(21, 512, padding_idx=20)\n", + " (proj): Linear(in_features=512, out_features=512, bias=True)\n", + " )\n", + " (match_mixture): MultiheadProduct(\n", + " (multilinear): MultiLinear(\n", + " (multi_output): ModuleList(\n", + " (0): Linear(in_features=512, out_features=512, bias=True)\n", + " )\n", + " )\n", + " (linear): Linear(in_features=1, out_features=1, bias=True)\n", + " )\n", + " (gap_mixture): MultiheadProduct(\n", + " (multilinear): MultiLinear(\n", + " (multi_output): ModuleList(\n", + " (0): Linear(in_features=512, out_features=512, bias=True)\n", + " )\n", + " )\n", + " (linear): Linear(in_features=1, out_features=1, bias=True)\n", + " )\n", + " (nw): NeedlemanWunschDecoder()\n", + ")" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "model.aligner" ] @@ -316,18 +451,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'epoch=7.ckpt'\r\n" + ] + } + ], "source": [ "!ls lightning_logs/version_3/checkpoints" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: 'lightning_logs/version_3/checkpoints/epoch=49.ckpt'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mcheckpoint_dir\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'lightning_logs/version_3/checkpoints'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mpath\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf'{checkpoint_dir}/epoch=49.ckpt'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mLightningAligner\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_from_checkpoint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/core/saving.py\u001b[0m in \u001b[0;36mload_from_checkpoint\u001b[0;34m(cls, checkpoint_path, map_location, hparams_file, tags_csv, *args, **kwargs)\u001b[0m\n\u001b[1;32m 140\u001b[0m \u001b[0mcheckpoint\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpl_load\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcheckpoint_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmap_location\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 141\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 142\u001b[0;31m \u001b[0mcheckpoint\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpl_load\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcheckpoint_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mstorage\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mloc\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstorage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 143\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 144\u001b[0m \u001b[0;31m# add the hparams from csv file to checkpoint\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/utilities/cloud_io.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(path_or_url, map_location)\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0murlparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscheme\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m''\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mPath\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrive\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# no scheme or with a drive letter\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmap_location\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhub\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_state_dict_from_url\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmap_location\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/torch/serialization.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(f, map_location, pickle_module, **pickle_load_args)\u001b[0m\n\u001b[1;32m 523\u001b[0m \u001b[0mpickle_load_args\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'encoding'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'utf-8'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 524\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 525\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0m_open_file_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'rb'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mopened_file\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 526\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m_is_zipfile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopened_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 527\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0m_open_zipfile_reader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mopened_zipfile\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/torch/serialization.py\u001b[0m in \u001b[0;36m_open_file_like\u001b[0;34m(name_or_buffer, mode)\u001b[0m\n\u001b[1;32m 210\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_open_file_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 211\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m_is_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname_or_buffer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 212\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_open_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 213\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 214\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m'w'\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/torch/serialization.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, name, mode)\u001b[0m\n\u001b[1;32m 191\u001b[0m \u001b[0;32mclass\u001b[0m \u001b[0m_open_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_opener\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 192\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 193\u001b[0;31m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_open_file\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 194\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 195\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__exit__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'lightning_logs/version_3/checkpoints/epoch=49.ckpt'" + ] + } + ], "source": [ "from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint\n", "checkpoint_dir = 'lightning_logs/version_3/checkpoints'\n", From 837082a9d86b88c555fbaf7dbd1018346437767e Mon Sep 17 00:00:00 2001 From: mortonjt Date: Thu, 27 Aug 2020 20:28:39 -0700 Subject: [PATCH 18/30] adding more asserts to hunt down validation TB issue --- deepblast/trainer.py | 22 +++++++++++++++++++--- scripts/deepblast-train | 2 +- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/deepblast/trainer.py b/deepblast/trainer.py index b86f400..0d07188 100644 --- a/deepblast/trainer.py +++ b/deepblast/trainer.py @@ -21,6 +21,7 @@ from deepblast.score import roc_edges, alignment_visualization, alignment_text + class LightningAligner(pl.LightningModule): def __init__(self, args): @@ -167,7 +168,12 @@ def validation_stats(self, x, y, xlen, ylen, gen, truth_states = states[b].cpu().detach().numpy() pred_edges = states2edges(pred_states) true_edges = states2edges(truth_states) - stats = roc_edges(true_edges, pred_edges) + if len(pred_edges) == 0: + raise ValueError('No predicted edges', pred_states) + if len(true_edges) == 0: + raise ValueError('No truth edges', truth_states) + + stats = roc_edges(true_edges, pred_edges) if random.random() < self.hparams.visualization_fraction: Av = A[b].cpu().detach().numpy().squeeze() pv = predA[b].cpu().detach().numpy().squeeze() @@ -215,6 +221,15 @@ def validation_step(self, batch, batch_idx): return {'validation_loss': loss, 'log': tensorboard_logs} + def custom_parameter_histogram(self): + # iterating through all parameters + for name, params in self.named_parameters(): + self.logger.experiment.add_histogram( + f'{name}/value', params, self.global_step) + self.logger.experiment.add_histogram( + f'{name}/grad', parm.grad.data.cpu().numpy(), + self.global_step) + def validation_epoch_end(self, outputs): loss_f = lambda x: x['validation_loss'] losses = list(map(loss_f, outputs)) @@ -228,10 +243,11 @@ def validation_epoch_end(self, outputs): losses = np.array(list(map(loss_f, outputs))) losses = losses[np.logical_not(np.isnan(losses))] # scalar = sum(losses) / len(losses) - scalar = np.asscalar(np.mean(losses)) + scalar = sum(losses) / len(losses) scores.append(scalar) self.logger.experiment.add_scalar(m, scalar, self.global_step) - + + self.custom_parameter_histogram() tensorboard_logs = dict( [('val_loss', loss)] + list(zip(metrics, scores)) ) diff --git a/scripts/deepblast-train b/scripts/deepblast-train index 85c7b06..dd74d79 100644 --- a/scripts/deepblast-train +++ b/scripts/deepblast-train @@ -28,7 +28,7 @@ def main(args): distributed_backend=args.backend, precision=args.precision, # check_val_every_n_epoch=1, - val_check_interval=0.25, + val_check_interval=3000, fast_dev_run=False, # auto_scale_batch_size='power', # profiler=profiler, From ec392c3f375599428d013bb53724e0177c69029d Mon Sep 17 00:00:00 2001 From: mortonjt Date: Sat, 29 Aug 2020 09:14:20 -0700 Subject: [PATCH 19/30] adding more logging options --- deepblast/trainer.py | 34 +++++++++++++++++++++++----------- scripts/deepblast-train | 6 ++++-- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/deepblast/trainer.py b/deepblast/trainer.py index 0d07188..04fe20d 100644 --- a/deepblast/trainer.py +++ b/deepblast/trainer.py @@ -19,7 +19,7 @@ SoftAlignmentLoss, SoftPathLoss, MatrixCrossEntropy, L2MatrixCrossEntropy) from deepblast.score import roc_edges, alignment_visualization, alignment_text - +import warnings class LightningAligner(pl.LightningModule): @@ -224,15 +224,17 @@ def validation_step(self, batch, batch_idx): def custom_parameter_histogram(self): # iterating through all parameters for name, params in self.named_parameters(): - self.logger.experiment.add_histogram( - f'{name}/value', params, self.global_step) - self.logger.experiment.add_histogram( - f'{name}/grad', parm.grad.data.cpu().numpy(), - self.global_step) + if params.requires_grad and (params.grad is not None): + self.logger.experiment.add_histogram( + f'{name}/value', params, self.global_step) + self.logger.experiment.add_histogram( + f'{name}/grad', params.grad, self.global_step) def validation_epoch_end(self, outputs): loss_f = lambda x: x['validation_loss'] losses = list(map(loss_f, outputs)) + if len(losses) == 0: + raise ValueError('No losses reported', output) loss = sum(losses) / len(losses) self.logger.experiment.add_scalar('val_loss', loss, self.global_step) metrics = ['val_tp', 'val_fp', 'val_fn', 'val_perc_id', @@ -241,11 +243,14 @@ def validation_epoch_end(self, outputs): for i, m in enumerate(metrics): loss_f = lambda x: x['log'][m] losses = np.array(list(map(loss_f, outputs))) - losses = losses[np.logical_not(np.isnan(losses))] - # scalar = sum(losses) / len(losses) - scalar = sum(losses) / len(losses) - scores.append(scalar) - self.logger.experiment.add_scalar(m, scalar, self.global_step) + losses = losses[np.logical_not(np.isnan(losses))] + if len(losses) > 0: + # scalar = sum(losses) / len(losses) + scalar = sum(losses) / len(losses) + scores.append(scalar) + self.logger.experiment.add_scalar(m, scalar, self.global_step) + else: + warnings.warn(f'No losses reported for {m}.', RuntimeWarning) self.custom_parameter_histogram() tensorboard_logs = dict( @@ -285,6 +290,13 @@ def configure_optimizers(self): steps = int(np.log2(self.hparams.learning_rate / m)) steps = self.hparams.epochs // steps scheduler = StepLR(optimizer, step_size=steps, gamma=0.5) + elif self.hparams.scheduler == 'inv_steplr': + m = 1e-3 # maximum learning rate + optimizer = torch.optim.Adam( + self.model.parameters(), lr=m) + steps = int(np.log2(m / self.hparams.learning_rate)) + steps = self.hparams.epochs // steps + scheduler = StepLR(optimizer, step_size=steps, gamma=0.5) elif self.hparams.scheduler == 'none': return [optimizer] else: diff --git a/scripts/deepblast-train b/scripts/deepblast-train index dd74d79..b992f22 100644 --- a/scripts/deepblast-train +++ b/scripts/deepblast-train @@ -24,12 +24,14 @@ def main(args): num_nodes=args.nodes, accumulate_grad_batches=args.grad_accum, gradient_clip_val=args.grad_clip, - + track_grad_norm=2, distributed_backend=args.backend, precision=args.precision, # check_val_every_n_epoch=1, - val_check_interval=3000, + val_check_interval=0.25, fast_dev_run=False, + # overfit the data + overfit_pct=0.01, # auto_scale_batch_size='power', # profiler=profiler, ) From 62376226c9f48a0c01a32b08aac1499017624c83 Mon Sep 17 00:00:00 2001 From: mortonjt Date: Sat, 29 Aug 2020 22:41:37 -0600 Subject: [PATCH 20/30] adding batch normalization --- deepblast/alignment.py | 19 ++- deepblast/embedding.py | 38 +++++- deepblast/trainer.py | 27 ++-- ipynb/simulation-benchmark.ipynb | 81 +++++------ ipynb/struct-benchmark.ipynb | 223 +++++++------------------------ scripts/deepblast-train | 5 +- 6 files changed, 156 insertions(+), 237 deletions(-) diff --git a/deepblast/alignment.py b/deepblast/alignment.py index def1c2e..e76d9f2 100644 --- a/deepblast/alignment.py +++ b/deepblast/alignment.py @@ -1,5 +1,6 @@ import torch import torch.nn as nn +import torch.nn.functional as F from deepblast.language_model import BiLM, pretrained_language_models from deepblast.nw_cuda import NeedlemanWunschDecoder as NWDecoderCUDA from deepblast.embedding import StackedRNN, EmbedLinear, MultiheadProduct @@ -7,6 +8,10 @@ import math +def swish(x): + return x * F.sigmoid(x) + + class NeedlemanWunschAligner(nn.Module): def __init__(self, n_alpha, n_input, n_units, n_embed, @@ -43,16 +48,22 @@ def __init__(self, n_alpha, n_input, n_units, n_embed, self.lm = BiLM() self.lm.load_state_dict(torch.load(path)) self.lm.eval() + transform = swish if n_layers > 1: self.match_embedding = StackedRNN( - n_alpha, n_input, n_units, n_embed, n_layers, lm=lm) + n_alpha, n_input, n_units, n_embed, n_layers, lm=lm, + transform=swish, rnn_type='gru') self.gap_embedding = StackedRNN( - n_alpha, n_input, n_units, n_embed, n_layers, lm=lm) + n_alpha, n_input, n_units, n_embed, n_layers, lm=lm, + transform=swish, rnn_type='gru') else: self.match_embedding = EmbedLinear( - n_alpha, n_input, n_embed, lm=lm) + n_alpha, n_input, n_embed, lm=lm, + transform=swish) self.gap_embedding = EmbedLinear( - n_alpha, n_input, n_embed, lm=lm) + n_alpha, n_input, n_embed, lm=lm, + transform=swish) + self.match_mixture = MultiheadProduct(n_embed, n_embed, n_heads) self.gap_mixture = MultiheadProduct(n_embed, n_embed, n_heads) # TODO: make cpu compatible version diff --git a/deepblast/embedding.py b/deepblast/embedding.py index a4bd97e..d62ca02 100644 --- a/deepblast/embedding.py +++ b/deepblast/embedding.py @@ -3,14 +3,36 @@ from torch.nn.utils.rnn import PackedSequence +def init_weights(m): + # https://stackoverflow.com/a/49433937/1167475 + if type(m) == nn.Linear: + nn.init.xavier_uniform(m.weight) + m.bias.data.fill_(0.01) + + +class BatchNorm(nn.Module): + """ Batch normalization for RNN outputs. """ + def __init__(self, num_features): + super(BatchNorm, self).__init__() + self.bn = nn.BatchNorm1d(num_features=num_features) + def forward(self, x): + return self.bn(x.permute(0, 2, 1)).permute(0, 2, 1) + + class MultiLinear(nn.Module): """ Multiple linear layers concatenated together""" def __init__(self, n_input, n_output, n_heads=16): super(MultiLinear, self).__init__() self.multi_output = nn.ModuleList( - [nn.Linear(n_input, n_output) - for i in range(n_heads)] + [ + nn.Sequential( + BatchNorm(n_input), + nn.Linear(n_input, n_output) + ) + for i in range(n_heads) + ] ) + # self.multi_output.apply(init_weights) def forward(self, x): outputs = torch.stack( @@ -23,6 +45,7 @@ def __init__(self, n_input, n_output, n_heads=16): super(MultiheadProduct, self).__init__() self.multilinear = MultiLinear(n_input, n_output, n_heads) self.linear = nn.Linear(n_heads, 1) + nn.init.xavier_uniform(self.linear.weight) def forward(self, x, y): zx = self.multilinear(x) @@ -71,7 +94,7 @@ def forward(self, x): class EmbedLinear(nn.Module): def __init__(self, nin, nhidden, nout, padding_idx=-1, - sparse=False, lm=None): + sparse=False, lm=None, transform=nn.ReLU()): super(EmbedLinear, self).__init__() if padding_idx == -1: @@ -79,7 +102,8 @@ def __init__(self, nin, nhidden, nout, padding_idx=-1, if lm is not None: self.embed = LMEmbed( - nin, nhidden, lm, padding_idx=padding_idx, sparse=sparse) + nin, nhidden, lm, padding_idx=padding_idx, sparse=sparse, + transform=transform) self.proj = nn.Linear(self.embed.nout, nout) self.lm = True else: @@ -88,6 +112,7 @@ def __init__(self, nin, nhidden, nout, padding_idx=-1, self.proj = nn.Linear(nout, nout) self.lm = False + init_weights(self.proj) self.nout = nout def forward(self, x): @@ -115,7 +140,7 @@ def forward(self, x): class StackedRNN(nn.Module): def __init__(self, nin, nembed, nunits, nout, nlayers=2, padding_idx=-1, dropout=0, rnn_type='lstm', - sparse=False, lm=None): + sparse=False, lm=None, transform=nn.ReLU()): super(StackedRNN, self).__init__() if padding_idx == -1: @@ -123,7 +148,8 @@ def __init__(self, nin, nembed, nunits, nout, nlayers=2, if lm is not None: self.embed = LMEmbed( - nin, nembed, lm, padding_idx=padding_idx, sparse=sparse) + nin, nembed, lm, padding_idx=padding_idx, sparse=sparse, + transform=transform) nembed = self.embed.nout self.lm = True else: diff --git a/deepblast/trainer.py b/deepblast/trainer.py index 04fe20d..c697b5f 100644 --- a/deepblast/trainer.py +++ b/deepblast/trainer.py @@ -147,6 +147,10 @@ def training_step(self, batch, batch_idx): current_lr = current_lr.get_last_lr()[0] else: current_lr = self.hparams.learning_rate + + if batch_idx % 100 == 0: + self.custom_parameter_histogram() + tensorboard_logs = {'train_loss': loss, 'lr': current_lr} # log the learning rate return {'loss': loss, 'log': tensorboard_logs} @@ -173,7 +177,7 @@ def validation_stats(self, x, y, xlen, ylen, gen, if len(true_edges) == 0: raise ValueError('No truth edges', truth_states) - stats = roc_edges(true_edges, pred_edges) + stats = roc_edges(true_edges, pred_edges) if random.random() < self.hparams.visualization_fraction: Av = A[b].cpu().detach().numpy().squeeze() pv = predA[b].cpu().detach().numpy().squeeze() @@ -221,14 +225,20 @@ def validation_step(self, batch, batch_idx): return {'validation_loss': loss, 'log': tensorboard_logs} - def custom_parameter_histogram(self): + def custom_parameter_histogram(self): # iterating through all parameters - for name, params in self.named_parameters(): + for name, params in self.named_parameters(): if params.requires_grad and (params.grad is not None): self.logger.experiment.add_histogram( f'{name}/value', params, self.global_step) - self.logger.experiment.add_histogram( - f'{name}/grad', params.grad, self.global_step) + + def on_after_backward(self): + # example to inspect gradient information in tensorboard + if self.trainer.global_step % 20 == 0: # don't make the tf file huge + for name, params in self.named_parameters(): + if params.requires_grad and (params.grad is not None): + self.logger.experiment.add_histogram( + f'{name}/grad', params.grad, self.global_step) def validation_epoch_end(self, outputs): loss_f = lambda x: x['validation_loss'] @@ -251,8 +261,7 @@ def validation_epoch_end(self, outputs): self.logger.experiment.add_scalar(m, scalar, self.global_step) else: warnings.warn(f'No losses reported for {m}.', RuntimeWarning) - - self.custom_parameter_histogram() + tensorboard_logs = dict( [('val_loss', loss)] + list(zip(metrics, scores)) ) @@ -273,7 +282,7 @@ def configure_optimizers(self): grad_params, lr=self.hparams.learning_rate) if self.hparams.scheduler == 'cosine_restarts': scheduler = CosineAnnealingWarmRestarts( - optimizer, T_0=1, T_mult=2) + optimizer, T_0=1, T_mult=1) elif self.hparams.scheduler == 'cosine': scheduler = CosineAnnealingLR(optimizer, T_max=self.hparams.epochs) elif self.hparams.scheduler == 'triangular': @@ -293,7 +302,7 @@ def configure_optimizers(self): elif self.hparams.scheduler == 'inv_steplr': m = 1e-3 # maximum learning rate optimizer = torch.optim.Adam( - self.model.parameters(), lr=m) + grad_params, lr=m) steps = int(np.log2(m / self.hparams.learning_rate)) steps = self.hparams.epochs // steps scheduler = StepLR(optimizer, step_size=steps, gamma=0.5) diff --git a/ipynb/simulation-benchmark.ipynb b/ipynb/simulation-benchmark.ipynb index b48ce3f..05d3ac1 100644 --- a/ipynb/simulation-benchmark.ipynb +++ b/ipynb/simulation-benchmark.ipynb @@ -133,8 +133,8 @@ " '--batch-size', '20', \n", " '--num-workers', '30',\n", " '--learning-rate', '1e-3', \n", - " '--layers', '2',\n", - " '--heads', '8',\n", + " '--layers', '1',\n", + " '--heads', '1',\n", " '--visualization-fraction', '1',\n", " '--loss', 'l2_cross_entropy',\n", " '--scheduler', 'cosine',\n", @@ -175,7 +175,18 @@ "cell_type": "code", "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/juermieboop/Documents/research/garfunkel/deepblast/embedding.py:9: UserWarning: nn.init.xavier_uniform is now deprecated in favor of nn.init.xavier_uniform_.\n", + " nn.init.xavier_uniform(m.weight)\n", + "/home/juermieboop/Documents/research/garfunkel/deepblast/embedding.py:48: UserWarning: nn.init.xavier_uniform is now deprecated in favor of nn.init.xavier_uniform_.\n", + " nn.init.xavier_uniform(self.linear.weight)\n" + ] + } + ], "source": [ "model = LightningAligner(args)" ] @@ -204,13 +215,13 @@ "\n", " | Name | Type | Params\n", "---------------------------------------------------\n", - "0 | aligner | NeedlemanWunschAligner | 38 M \n" + "0 | aligner | NeedlemanWunschAligner | 13 M \n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "b143966e555b46bd9ada1db2fc13e453", + "model_id": "56adf638851a4db4a1ebd462faef37cf", "version_major": 2, "version_minor": 0 }, @@ -224,7 +235,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "bbe1327e962f436c88dd62a3e255810a", + "model_id": "a727c468cbfc4ad88223d7701020d90b", "version_major": 2, "version_minor": 0 }, @@ -302,6 +313,7 @@ " max_epochs=args.epochs,\n", " gpus=args.gpus,\n", " check_val_every_n_epoch=10,\n", + " gradient_clip_val=10\n", " # profiler=profiler,\n", " # fast_dev_run=True,\n", " # auto_scale_batch_size='power'\n", @@ -326,8 +338,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "version_0 version_10 version_12 version_3 version_5 version_7 version_9\r\n", - "version_1 version_11 version_2 version_4 version_6 version_8\r\n" + "version_0 version_11\tversion_14 version_17\tversion_2 version_5 version_8\r\n", + "version_1 version_12\tversion_15 version_18\tversion_3 version_6 version_9\r\n", + "version_10 version_13\tversion_16 version_19\tversion_4 version_7\r\n" ] } ], @@ -354,7 +367,7 @@ { "data": { "text/plain": [ - "Reusing TensorBoard on port 6006 (pid 14351), started 2:51:54 ago. (Use '!kill 14351' to kill it.)" + "Reusing TensorBoard on port 6006 (pid 9310), started 4:45:19 ago. (Use '!kill 9310' to kill it.)" ] }, "metadata": {}, @@ -364,11 +377,11 @@ "data": { "text/html": [ "\n", - " \n", " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "%tensorboard --logdir lightning_logs" ] @@ -389,55 +329,9 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "NeedlemanWunschAligner(\n", - " (lm): BiLM(\n", - " (embed): Embedding(22, 21, padding_idx=21)\n", - " (dropout): Dropout(p=0, inplace=False)\n", - " (rnn): ModuleList(\n", - " (0): LSTM(21, 1024, batch_first=True)\n", - " (1): LSTM(1024, 1024, batch_first=True)\n", - " )\n", - " (linear): Linear(in_features=1024, out_features=21, bias=True)\n", - " )\n", - " (match_embedding): EmbedLinear(\n", - " (embed): Embedding(21, 512, padding_idx=20)\n", - " (proj): Linear(in_features=512, out_features=512, bias=True)\n", - " )\n", - " (gap_embedding): EmbedLinear(\n", - " (embed): Embedding(21, 512, padding_idx=20)\n", - " (proj): Linear(in_features=512, out_features=512, bias=True)\n", - " )\n", - " (match_mixture): MultiheadProduct(\n", - " (multilinear): MultiLinear(\n", - " (multi_output): ModuleList(\n", - " (0): Linear(in_features=512, out_features=512, bias=True)\n", - " )\n", - " )\n", - " (linear): Linear(in_features=1, out_features=1, bias=True)\n", - " )\n", - " (gap_mixture): MultiheadProduct(\n", - " (multilinear): MultiLinear(\n", - " (multi_output): ModuleList(\n", - " (0): Linear(in_features=512, out_features=512, bias=True)\n", - " )\n", - " )\n", - " (linear): Linear(in_features=1, out_features=1, bias=True)\n", - " )\n", - " (nw): NeedlemanWunschDecoder()\n", - ")" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "model.aligner" ] @@ -451,43 +345,18 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "'epoch=7.ckpt'\r\n" - ] - } - ], + "outputs": [], "source": [ "!ls lightning_logs/version_3/checkpoints" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "ename": "FileNotFoundError", - "evalue": "[Errno 2] No such file or directory: 'lightning_logs/version_3/checkpoints/epoch=49.ckpt'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mcheckpoint_dir\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'lightning_logs/version_3/checkpoints'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mpath\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf'{checkpoint_dir}/epoch=49.ckpt'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mLightningAligner\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_from_checkpoint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/core/saving.py\u001b[0m in \u001b[0;36mload_from_checkpoint\u001b[0;34m(cls, checkpoint_path, map_location, hparams_file, tags_csv, *args, **kwargs)\u001b[0m\n\u001b[1;32m 140\u001b[0m \u001b[0mcheckpoint\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpl_load\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcheckpoint_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmap_location\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 141\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 142\u001b[0;31m \u001b[0mcheckpoint\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpl_load\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcheckpoint_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mstorage\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mloc\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstorage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 143\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 144\u001b[0m \u001b[0;31m# add the hparams from csv file to checkpoint\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/utilities/cloud_io.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(path_or_url, map_location)\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0murlparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscheme\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m''\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mPath\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrive\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# no scheme or with a drive letter\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmap_location\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhub\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_state_dict_from_url\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmap_location\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/torch/serialization.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(f, map_location, pickle_module, **pickle_load_args)\u001b[0m\n\u001b[1;32m 523\u001b[0m \u001b[0mpickle_load_args\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'encoding'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'utf-8'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 524\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 525\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0m_open_file_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'rb'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mopened_file\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 526\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m_is_zipfile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopened_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 527\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0m_open_zipfile_reader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mopened_zipfile\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/torch/serialization.py\u001b[0m in \u001b[0;36m_open_file_like\u001b[0;34m(name_or_buffer, mode)\u001b[0m\n\u001b[1;32m 210\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_open_file_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 211\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m_is_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname_or_buffer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 212\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_open_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 213\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 214\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m'w'\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/torch/serialization.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, name, mode)\u001b[0m\n\u001b[1;32m 191\u001b[0m \u001b[0;32mclass\u001b[0m \u001b[0m_open_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_opener\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 192\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 193\u001b[0;31m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_open_file\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 194\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 195\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__exit__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'lightning_logs/version_3/checkpoints/epoch=49.ckpt'" - ] - } - ], + "outputs": [], "source": [ "from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint\n", "checkpoint_dir = 'lightning_logs/version_3/checkpoints'\n", diff --git a/scripts/deepblast-train b/scripts/deepblast-train index b992f22..1f55276 100644 --- a/scripts/deepblast-train +++ b/scripts/deepblast-train @@ -24,14 +24,13 @@ def main(args): num_nodes=args.nodes, accumulate_grad_batches=args.grad_accum, gradient_clip_val=args.grad_clip, - track_grad_norm=2, distributed_backend=args.backend, precision=args.precision, # check_val_every_n_epoch=1, val_check_interval=0.25, fast_dev_run=False, # overfit the data - overfit_pct=0.01, + # overfit_pct=0.01, # auto_scale_batch_size='power', # profiler=profiler, ) @@ -65,7 +64,7 @@ if __name__ == '__main__': parser = argparse.ArgumentParser(add_help=False) parser.add_argument('--gpus', type=int, default=None) parser.add_argument('--grad-accum', type=int, default=1) - parser.add_argument('--grad-clip', type=int, default=0) + parser.add_argument('--grad-clip', type=int, default=10) parser.add_argument('--nodes', type=int, default=1) parser.add_argument('--num-workers', type=int, default=1) parser.add_argument('--precision', type=int, default=32) From 9439ccc71f80bb62a0edbc626ec566ae31abe98c Mon Sep 17 00:00:00 2001 From: mortonjt Date: Sat, 29 Aug 2020 22:42:24 -0600 Subject: [PATCH 21/30] adding weight initialization --- deepblast/embedding.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepblast/embedding.py b/deepblast/embedding.py index d62ca02..ccc22e2 100644 --- a/deepblast/embedding.py +++ b/deepblast/embedding.py @@ -32,7 +32,7 @@ def __init__(self, n_input, n_output, n_heads=16): for i in range(n_heads) ] ) - # self.multi_output.apply(init_weights) + self.multi_output.apply(init_weights) def forward(self, x): outputs = torch.stack( From 514d0a7966e381459abb5c060ff21759c2e3df1b Mon Sep 17 00:00:00 2001 From: mortonjt Date: Sat, 29 Aug 2020 22:47:26 -0600 Subject: [PATCH 22/30] removing batch norm --- deepblast/embedding.py | 2 +- ipynb/simulation-benchmark.ipynb | 14 ++++++-------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/deepblast/embedding.py b/deepblast/embedding.py index ccc22e2..7bb1ff0 100644 --- a/deepblast/embedding.py +++ b/deepblast/embedding.py @@ -26,7 +26,7 @@ def __init__(self, n_input, n_output, n_heads=16): self.multi_output = nn.ModuleList( [ nn.Sequential( - BatchNorm(n_input), + # BatchNorm(n_input), nn.Linear(n_input, n_output) ) for i in range(n_heads) diff --git a/ipynb/simulation-benchmark.ipynb b/ipynb/simulation-benchmark.ipynb index 05d3ac1..98f07eb 100644 --- a/ipynb/simulation-benchmark.ipynb +++ b/ipynb/simulation-benchmark.ipynb @@ -221,7 +221,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "56adf638851a4db4a1ebd462faef37cf", + "model_id": "b24b98afd3784e68a2a8ea2fc5ec86fd", "version_major": 2, "version_minor": 0 }, @@ -235,7 +235,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "a727c468cbfc4ad88223d7701020d90b", + "model_id": "aa0536cc560144e296ae395f2455cc82", "version_major": 2, "version_minor": 0 }, @@ -338,9 +338,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "version_0 version_11\tversion_14 version_17\tversion_2 version_5 version_8\r\n", - "version_1 version_12\tversion_15 version_18\tversion_3 version_6 version_9\r\n", - "version_10 version_13\tversion_16 version_19\tversion_4 version_7\r\n" + "version_0\r\n" ] } ], @@ -367,7 +365,7 @@ { "data": { "text/plain": [ - "Reusing TensorBoard on port 6006 (pid 9310), started 4:45:19 ago. (Use '!kill 9310' to kill it.)" + "Reusing TensorBoard on port 6006 (pid 9310), started 4:53:54 ago. (Use '!kill 9310' to kill it.)" ] }, "metadata": {}, @@ -377,11 +375,11 @@ "data": { "text/html": [ "\n", - " \n", " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "%tensorboard --logdir lightning_logs" ] @@ -408,102 +336,27 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "NeedlemanWunschAligner(\n", - " (lm): BiLM(\n", - " (embed): Embedding(22, 21, padding_idx=21)\n", - " (dropout): Dropout(p=0, inplace=False)\n", - " (rnn): ModuleList(\n", - " (0): LSTM(21, 1024, batch_first=True)\n", - " (1): LSTM(1024, 1024, batch_first=True)\n", - " )\n", - " (linear): Linear(in_features=1024, out_features=21, bias=True)\n", - " )\n", - " (match_embedding): EmbedLinear(\n", - " (embed): Embedding(21, 512, padding_idx=20)\n", - " (proj): Linear(in_features=512, out_features=512, bias=True)\n", - " )\n", - " (gap_embedding): EmbedLinear(\n", - " (embed): Embedding(21, 512, padding_idx=20)\n", - " (proj): Linear(in_features=512, out_features=512, bias=True)\n", - " )\n", - " (match_mixture): MultiheadProduct(\n", - " (multilinear): MultiLinear(\n", - " (multi_output): ModuleList(\n", - " (0): Sequential(\n", - " (0): Linear(in_features=512, out_features=512, bias=True)\n", - " )\n", - " )\n", - " )\n", - " (linear): Linear(in_features=1, out_features=1, bias=True)\n", - " )\n", - " (gap_mixture): MultiheadProduct(\n", - " (multilinear): MultiLinear(\n", - " (multi_output): ModuleList(\n", - " (0): Sequential(\n", - " (0): Linear(in_features=512, out_features=512, bias=True)\n", - " )\n", - " )\n", - " )\n", - " (linear): Linear(in_features=1, out_features=1, bias=True)\n", - " )\n", - " (nw): NeedlemanWunschDecoder()\n", - ")" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "model.aligner" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ls: cannot access 'lightning_logs/version_5/checkpoints': No such file or directory\r\n" - ] - } - ], + "outputs": [], "source": [ "!ls lightning_logs/version_5/checkpoints" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "ename": "FileNotFoundError", - "evalue": "[Errno 2] No such file or directory: 'lightning_logs/version_70/checkpoints/epoch=59.ckpt'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mcheckpoint_dir\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'lightning_logs/version_70/checkpoints'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mpath\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf'{checkpoint_dir}/epoch=59.ckpt'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mLightningAligner\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_from_checkpoint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/core/saving.py\u001b[0m in \u001b[0;36mload_from_checkpoint\u001b[0;34m(cls, checkpoint_path, map_location, hparams_file, tags_csv, *args, **kwargs)\u001b[0m\n\u001b[1;32m 140\u001b[0m \u001b[0mcheckpoint\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpl_load\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcheckpoint_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmap_location\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 141\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 142\u001b[0;31m \u001b[0mcheckpoint\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpl_load\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcheckpoint_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mstorage\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mloc\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstorage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 143\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 144\u001b[0m \u001b[0;31m# add the hparams from csv file to checkpoint\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/utilities/cloud_io.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(path_or_url, map_location)\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0murlparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscheme\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m''\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mPath\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrive\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# no scheme or with a drive letter\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmap_location\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhub\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_state_dict_from_url\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmap_location\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/torch/serialization.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(f, map_location, pickle_module, **pickle_load_args)\u001b[0m\n\u001b[1;32m 523\u001b[0m \u001b[0mpickle_load_args\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'encoding'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'utf-8'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 524\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 525\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0m_open_file_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'rb'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mopened_file\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 526\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m_is_zipfile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopened_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 527\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0m_open_zipfile_reader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mopened_zipfile\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/torch/serialization.py\u001b[0m in \u001b[0;36m_open_file_like\u001b[0;34m(name_or_buffer, mode)\u001b[0m\n\u001b[1;32m 210\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_open_file_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 211\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m_is_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname_or_buffer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 212\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_open_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 213\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 214\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m'w'\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/torch/serialization.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, name, mode)\u001b[0m\n\u001b[1;32m 191\u001b[0m \u001b[0;32mclass\u001b[0m \u001b[0m_open_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_opener\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 192\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 193\u001b[0;31m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_open_file\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 194\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 195\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__exit__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'lightning_logs/version_70/checkpoints/epoch=59.ckpt'" - ] - } - ], + "outputs": [], "source": [ "from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint\n", "checkpoint_dir = 'lightning_logs/version_70/checkpoints'\n", From 9190411aa34349445c938473ac2da518c05c78ac Mon Sep 17 00:00:00 2001 From: mortonjt Date: Sat, 29 Aug 2020 22:57:32 -0600 Subject: [PATCH 25/30] adding more layers --- ipynb/simulation-benchmark.ipynb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ipynb/simulation-benchmark.ipynb b/ipynb/simulation-benchmark.ipynb index bbd0a92..6daff7d 100644 --- a/ipynb/simulation-benchmark.ipynb +++ b/ipynb/simulation-benchmark.ipynb @@ -133,8 +133,8 @@ " '--batch-size', '20', \n", " '--num-workers', '30',\n", " '--learning-rate', '1e-3', \n", - " '--layers', '1',\n", - " '--heads', '4',\n", + " '--layers', '2',\n", + " '--heads', '1',\n", " '--visualization-fraction', '1',\n", " '--loss', 'l2_cross_entropy',\n", " '--scheduler', 'cosine',\n", @@ -215,13 +215,13 @@ "\n", " | Name | Type | Params\n", "---------------------------------------------------\n", - "0 | aligner | NeedlemanWunschAligner | 15 M \n" + "0 | aligner | NeedlemanWunschAligner | 30 M \n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "8c2397a36a1c4aed9bb0aa91e9079dc4", + "model_id": "d3dc59b28da34b6180a4daeddc7781cc", "version_major": 2, "version_minor": 0 }, @@ -235,7 +235,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "9066a094d3034078b0c2569eb43f458b", + "model_id": "f6ec09780281499a87dddc4ee64d24b3", "version_major": 2, "version_minor": 0 }, @@ -263,7 +263,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "cf7a49856b5f4fd2ab3808bbb2d92e22", + "model_id": "", "version_major": 2, "version_minor": 0 }, From 60cc74a0ec09dcfc9fc75b59fa5ef1012ce8d9a9 Mon Sep 17 00:00:00 2001 From: mortonjt Date: Sun, 30 Aug 2020 11:06:34 -0700 Subject: [PATCH 26/30] simplifying validation --- deepblast/trainer.py | 19 ++++++++----------- scripts/deepblast-train | 4 ++-- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/deepblast/trainer.py b/deepblast/trainer.py index c697b5f..ce5fcd5 100644 --- a/deepblast/trainer.py +++ b/deepblast/trainer.py @@ -148,9 +148,6 @@ def training_step(self, batch, batch_idx): else: current_lr = self.hparams.learning_rate - if batch_idx % 100 == 0: - self.custom_parameter_histogram() - tensorboard_logs = {'train_loss': loss, 'lr': current_lr} # log the learning rate return {'loss': loss, 'log': tensorboard_logs} @@ -232,13 +229,13 @@ def custom_parameter_histogram(self): self.logger.experiment.add_histogram( f'{name}/value', params, self.global_step) - def on_after_backward(self): - # example to inspect gradient information in tensorboard - if self.trainer.global_step % 20 == 0: # don't make the tf file huge - for name, params in self.named_parameters(): - if params.requires_grad and (params.grad is not None): - self.logger.experiment.add_histogram( - f'{name}/grad', params.grad, self.global_step) + # def on_after_backward(self): + # # example to inspect gradient information in tensorboard + # if self.trainer.global_step % 200 == 0: # don't make the tf file huge + # for name, params in self.named_parameters(): + # if params.requires_grad and (params.grad is not None): + # self.logger.experiment.add_histogram( + # f'{name}/grad', params.grad, self.global_step) def validation_epoch_end(self, outputs): loss_f = lambda x: x['validation_loss'] @@ -255,13 +252,13 @@ def validation_epoch_end(self, outputs): losses = np.array(list(map(loss_f, outputs))) losses = losses[np.logical_not(np.isnan(losses))] if len(losses) > 0: - # scalar = sum(losses) / len(losses) scalar = sum(losses) / len(losses) scores.append(scalar) self.logger.experiment.add_scalar(m, scalar, self.global_step) else: warnings.warn(f'No losses reported for {m}.', RuntimeWarning) + self.custom_parameter_histogram() tensorboard_logs = dict( [('val_loss', loss)] + list(zip(metrics, scores)) ) diff --git a/scripts/deepblast-train b/scripts/deepblast-train index 1f55276..73fb68e 100644 --- a/scripts/deepblast-train +++ b/scripts/deepblast-train @@ -26,8 +26,8 @@ def main(args): gradient_clip_val=args.grad_clip, distributed_backend=args.backend, precision=args.precision, - # check_val_every_n_epoch=1, - val_check_interval=0.25, + check_val_every_n_epoch=1, + #val_check_interval=0.25, fast_dev_run=False, # overfit the data # overfit_pct=0.01, From 41e28e42019c71055b37906890328f7ab2f90137 Mon Sep 17 00:00:00 2001 From: mortonjt Date: Sun, 30 Aug 2020 11:10:20 -0700 Subject: [PATCH 27/30] zeroing out gap edges --- deepblast/alignment.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/deepblast/alignment.py b/deepblast/alignment.py index e76d9f2..3d474dc 100644 --- a/deepblast/alignment.py +++ b/deepblast/alignment.py @@ -91,11 +91,11 @@ def forward(self, x, order): gx, _, gy, _ = unpack_sequences(self.gap_embedding(x), order) # Obtain theta through an inner product across latent dimensions theta = self.match_mixture(zx, zy) - A = self.gap_mixture(gx, gy) + # A = self.gap_mixture(gx, gy) # zero out first row and first column for local alignments - # L = gx.shape[1] - # A = torch.zeros((L, L)) - # A[1:, 1:] = self.gap_mixture(gx[:, 1:, :], gy[:, 1:, :]) + L = gx.shape[1] + A = torch.zeros((L, L)) + A[1:, 1:] = self.gap_mixture(gx[:, 1:, :], gy[:, 1:, :]) aln = self.nw.decode(theta, A) return aln, theta, A From b5d3e7a402520919d99863831e2674d868259700 Mon Sep 17 00:00:00 2001 From: mortonjt Date: Sun, 30 Aug 2020 14:59:54 -0600 Subject: [PATCH 28/30] removing end gap zeroing --- deepblast/alignment.py | 11 +- ipynb/simulation-benchmark.ipynb | 217 +++++++++++++++++++--- ipynb/struct-benchmark.ipynb | 310 ++++++++++++++++++++++++++----- 3 files changed, 461 insertions(+), 77 deletions(-) diff --git a/deepblast/alignment.py b/deepblast/alignment.py index 3d474dc..34d8129 100644 --- a/deepblast/alignment.py +++ b/deepblast/alignment.py @@ -91,11 +91,11 @@ def forward(self, x, order): gx, _, gy, _ = unpack_sequences(self.gap_embedding(x), order) # Obtain theta through an inner product across latent dimensions theta = self.match_mixture(zx, zy) - # A = self.gap_mixture(gx, gy) + gap = self.gap_mixture(gx, gy) + #G = self.gap_mixture(gx, gy) # zero out first row and first column for local alignments - L = gx.shape[1] - A = torch.zeros((L, L)) - A[1:, 1:] = self.gap_mixture(gx[:, 1:, :], gy[:, 1:, :]) + #A = torch.zeros(G.shape).to(G.device) + #A[:, 1:, 1:] = G[:, 1:, 1:] aln = self.nw.decode(theta, A) return aln, theta, A @@ -107,6 +107,9 @@ def traceback(self, x, order): gx, xlen, gy, ylen = unpack_sequences(self.gap_embedding(x), order) match = self.match_mixture(zx, zy) gap = self.gap_mixture(gx, gy) + # G = self.gap_mixture(gx, gy) + # gap = torch.zeros(G.shape).to(G.device) + # gap[:, 1:, 1:] = G[:, 1:, 1:] # zero out first row and first column for local alignments # L = gx.shape[1] diff --git a/ipynb/simulation-benchmark.ipynb b/ipynb/simulation-benchmark.ipynb index 6daff7d..2ee6929 100644 --- a/ipynb/simulation-benchmark.ipynb +++ b/ipynb/simulation-benchmark.ipynb @@ -19,6 +19,26 @@ "import numpy as np" ] }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Beta-lactamase.hmm tm-align-0.9-30.tab tm_align_output_10k.ali\r\n", + "I-set.hmm\t tm-align-0.9-50.tab tm_align_output_10k.tab\r\n", + "PPR_2.hmm\t tm-align-0.9.tab\t zf-C2H2.hmm\r\n", + "tm-align-0.9-10.tab tm-align-0.9.txt\r\n" + ] + } + ], + "source": [ + "!ls ../data" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -29,11 +49,11 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ - "hmm = '../data/zf-C2H2.hmm'\n", + "hmm = '../data/Beta-lactamase.hmm'\n", "n_alignments = 100\n", "np.random.seed(0)\n", "align_df = hmm_alignments(n=40, seed=0, n_alignments=n_alignments, hmmfile=hmm)\n", @@ -54,7 +74,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -78,7 +98,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -92,7 +112,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -101,7 +121,7 @@ "'/home/juermieboop/Documents/research/garfunkel/ipynb'" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -120,7 +140,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -133,8 +153,8 @@ " '--batch-size', '20', \n", " '--num-workers', '30',\n", " '--learning-rate', '1e-3', \n", - " '--layers', '2',\n", - " '--heads', '1',\n", + " '--layers', '4',\n", + " '--heads', '8',\n", " '--visualization-fraction', '1',\n", " '--loss', 'l2_cross_entropy',\n", " '--scheduler', 'cosine',\n", @@ -149,7 +169,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -173,7 +193,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -200,7 +220,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": { "scrolled": false }, @@ -221,7 +241,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "d3dc59b28da34b6180a4daeddc7781cc", + "model_id": "e1da30727bf64b89adb0dd3f39659e83", "version_major": 2, "version_minor": 0 }, @@ -235,7 +255,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "f6ec09780281499a87dddc4ee64d24b3", + "model_id": "20522bb71e8a4f4d933232b7aa6387dc", "version_major": 2, "version_minor": 0 }, @@ -273,6 +293,37 @@ }, "metadata": {}, "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -300,16 +351,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "version_0 version_1 version_2\r\n" + ] + } + ], "source": [ "!ls lightning_logs" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -318,11 +377,44 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": { "scrolled": false }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Reusing TensorBoard on port 6006 (pid 3827), started 3:28:24 ago. (Use '!kill 3827' to kill it.)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "%tensorboard --logdir lightning_logs" ] @@ -336,27 +428,102 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "NeedlemanWunschAligner(\n", + " (lm): BiLM(\n", + " (embed): Embedding(22, 21, padding_idx=21)\n", + " (dropout): Dropout(p=0, inplace=False)\n", + " (rnn): ModuleList(\n", + " (0): LSTM(21, 1024, batch_first=True)\n", + " (1): LSTM(1024, 1024, batch_first=True)\n", + " )\n", + " (linear): Linear(in_features=1024, out_features=21, bias=True)\n", + " )\n", + " (match_embedding): StackedRNN(\n", + " (embed): Embedding(21, 512, padding_idx=20)\n", + " (dropout): Dropout(p=0, inplace=False)\n", + " (rnn): GRU(512, 512, num_layers=2, batch_first=True, bidirectional=True)\n", + " (proj): Linear(in_features=1024, out_features=512, bias=True)\n", + " )\n", + " (gap_embedding): StackedRNN(\n", + " (embed): Embedding(21, 512, padding_idx=20)\n", + " (dropout): Dropout(p=0, inplace=False)\n", + " (rnn): GRU(512, 512, num_layers=2, batch_first=True, bidirectional=True)\n", + " (proj): Linear(in_features=1024, out_features=512, bias=True)\n", + " )\n", + " (match_mixture): MultiheadProduct(\n", + " (multilinear): MultiLinear(\n", + " (multi_output): ModuleList(\n", + " (0): Linear(in_features=512, out_features=512, bias=True)\n", + " )\n", + " )\n", + " (linear): Linear(in_features=1, out_features=1, bias=True)\n", + " )\n", + " (gap_mixture): MultiheadProduct(\n", + " (multilinear): MultiLinear(\n", + " (multi_output): ModuleList(\n", + " (0): Linear(in_features=512, out_features=512, bias=True)\n", + " )\n", + " )\n", + " (linear): Linear(in_features=1, out_features=1, bias=True)\n", + " )\n", + " (nw): NeedlemanWunschDecoder()\n", + ")" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "model.aligner" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ls: cannot access 'lightning_logs/version_5/checkpoints': No such file or directory\r\n" + ] + } + ], "source": [ "!ls lightning_logs/version_5/checkpoints" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: 'lightning_logs/version_70/checkpoints/epoch=59.ckpt'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mcheckpoint_dir\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'lightning_logs/version_70/checkpoints'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mpath\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf'{checkpoint_dir}/epoch=59.ckpt'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mLightningAligner\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_from_checkpoint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/core/saving.py\u001b[0m in \u001b[0;36mload_from_checkpoint\u001b[0;34m(cls, checkpoint_path, map_location, hparams_file, tags_csv, *args, **kwargs)\u001b[0m\n\u001b[1;32m 140\u001b[0m \u001b[0mcheckpoint\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpl_load\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcheckpoint_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmap_location\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 141\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 142\u001b[0;31m \u001b[0mcheckpoint\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpl_load\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcheckpoint_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mstorage\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mloc\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstorage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 143\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 144\u001b[0m \u001b[0;31m# add the hparams from csv file to checkpoint\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/utilities/cloud_io.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(path_or_url, map_location)\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0murlparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscheme\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m''\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mPath\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrive\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# no scheme or with a drive letter\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmap_location\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhub\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_state_dict_from_url\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmap_location\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/torch/serialization.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(f, map_location, pickle_module, **pickle_load_args)\u001b[0m\n\u001b[1;32m 523\u001b[0m \u001b[0mpickle_load_args\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'encoding'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'utf-8'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 524\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 525\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0m_open_file_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'rb'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mopened_file\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 526\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m_is_zipfile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopened_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 527\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0m_open_zipfile_reader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mopened_zipfile\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/torch/serialization.py\u001b[0m in \u001b[0;36m_open_file_like\u001b[0;34m(name_or_buffer, mode)\u001b[0m\n\u001b[1;32m 210\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_open_file_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 211\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m_is_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname_or_buffer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 212\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_open_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 213\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 214\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m'w'\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/torch/serialization.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, name, mode)\u001b[0m\n\u001b[1;32m 191\u001b[0m \u001b[0;32mclass\u001b[0m \u001b[0m_open_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_opener\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 192\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 193\u001b[0;31m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_open_file\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 194\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 195\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__exit__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'lightning_logs/version_70/checkpoints/epoch=59.ckpt'" + ] + } + ], "source": [ "from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint\n", "checkpoint_dir = 'lightning_logs/version_70/checkpoints'\n", diff --git a/ipynb/struct-benchmark.ipynb b/ipynb/struct-benchmark.ipynb index b1e19f3..91bb476 100644 --- a/ipynb/struct-benchmark.ipynb +++ b/ipynb/struct-benchmark.ipynb @@ -127,16 +127,16 @@ " '--test-pairs', f'{os.getcwd()}/data/test.txt',\n", " '--valid-pairs', f'{os.getcwd()}/data/valid.txt',\n", " '--output-directory', output_dir,\n", - " '--epochs', '128',\n", + " '--epochs', '32',\n", " '--batch-size', '20', \n", - " '--num-workers', '16',\n", + " '--num-workers', '30',\n", " '--layers', '1',\n", " '--heads', '1',\n", " '--learning-rate', '5e-5',\n", " '--visualization-fraction', '1',\n", " '--loss', 'cross_entropy',\n", " '--scheduler', 'inv_steplr', \n", - " '--clip-ends', 'True',\n", + " # '--clip-ends', 'True',\n", " '--gpus', '1'\n", "]" ] @@ -152,7 +152,18 @@ "cell_type": "code", "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/juermieboop/Documents/research/garfunkel/deepblast/embedding.py:9: UserWarning: nn.init.xavier_uniform is now deprecated in favor of nn.init.xavier_uniform_.\n", + " nn.init.xavier_uniform(m.weight)\n", + "/home/juermieboop/Documents/research/garfunkel/deepblast/embedding.py:36: UserWarning: nn.init.xavier_uniform is now deprecated in favor of nn.init.xavier_uniform_.\n", + " nn.init.xavier_uniform(self.linear.weight)\n" + ] + } + ], "source": [ "parser = argparse.ArgumentParser(add_help=False)\n", "parser = LightningAligner.add_model_specific_args(parser)\n", @@ -170,7 +181,7 @@ { "data": { "text/plain": [ - "Namespace(batch_size=20, clip_ends=True, embedding_dim=512, epochs=128, finetune=False, gpus=1, heads=1, layers=1, learning_rate=5e-05, loss='cross_entropy', multitask=False, num_workers=16, output_directory='struct_results', rnn_dim=512, rnn_input_dim=512, scheduler='inv_steplr', test_pairs='/home/juermieboop/Documents/research/garfunkel/ipynb/data/test.txt', train_pairs='/home/juermieboop/Documents/research/garfunkel/ipynb/data/train.txt', valid_pairs='/home/juermieboop/Documents/research/garfunkel/ipynb/data/valid.txt', visualization_fraction=1.0)" + "Namespace(batch_size=20, clip_ends=False, embedding_dim=512, epochs=32, finetune=False, gpus=1, heads=1, layers=1, learning_rate=5e-05, loss='cross_entropy', multitask=False, num_workers=30, output_directory='struct_results', rnn_dim=512, rnn_input_dim=512, scheduler='inv_steplr', test_pairs='/home/juermieboop/Documents/research/garfunkel/ipynb/data/test.txt', train_pairs='/home/juermieboop/Documents/research/garfunkel/ipynb/data/train.txt', valid_pairs='/home/juermieboop/Documents/research/garfunkel/ipynb/data/valid.txt', visualization_fraction=1.0)" ] }, "execution_count": 8, @@ -212,7 +223,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "2f66fac652c64b4e8527dc0d0bac116e", + "model_id": "", "version_major": 2, "version_minor": 0 }, @@ -223,50 +234,140 @@ "metadata": {}, "output_type": "display_data" }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "ce72ec9896ec492f8d7814561f0995ac", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stdout", "output_type": "stream", "text": [ - "tensor(0.5726, device='cuda:0', grad_fn=) tensor(43.8023, device='cuda:0', grad_fn=)\n", - "tensor(0.9757, device='cuda:0', grad_fn=) tensor(88.0248, device='cuda:0', grad_fn=)\n" + "\n" ] }, { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 10\u001b[0m )\n\u001b[1;32m 11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m \u001b[0mtrainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, model, train_dataloader, val_dataloaders)\u001b[0m\n\u001b[1;32m 977\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 978\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msingle_gpu\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 979\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msingle_gpu_train\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 980\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 981\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0muse_tpu\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# pragma: no-cover\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/distrib_parts.py\u001b[0m in \u001b[0;36msingle_gpu_train\u001b[0;34m(self, model)\u001b[0m\n\u001b[1;32m 183\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreinit_scheduler_properties\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptimizers\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlr_schedulers\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 184\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 185\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_pretrain_routine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 186\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 187\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mtpu_train\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtpu_core_idx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py\u001b[0m in \u001b[0;36mrun_pretrain_routine\u001b[0;34m(self, model)\u001b[0m\n\u001b[1;32m 1134\u001b[0m \u001b[0mnum_loaders\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mval_dataloaders\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1135\u001b[0m \u001b[0mmax_batches\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnum_sanity_val_steps\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mnum_loaders\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1136\u001b[0;31m eval_results = self._evaluate(model,\n\u001b[0m\u001b[1;32m 1137\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mval_dataloaders\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1138\u001b[0m \u001b[0mmax_batches\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/evaluation_loop.py\u001b[0m in \u001b[0;36m_evaluate\u001b[0;34m(self, model, dataloaders, max_batches, test_mode)\u001b[0m\n\u001b[1;32m 291\u001b[0m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mevaluation_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_idx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdataloader_idx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest_mode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 292\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 293\u001b[0;31m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mevaluation_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_idx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdataloader_idx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest_mode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 294\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 295\u001b[0m \u001b[0;31m# on dp / ddp2 might still want to do something with the batch parts\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/evaluation_loop.py\u001b[0m in \u001b[0;36mevaluation_forward\u001b[0;34m(self, model, batch, batch_idx, dataloader_idx, test_mode)\u001b[0m\n\u001b[1;32m 483\u001b[0m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtest_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 484\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 485\u001b[0;31m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalidation_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 486\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 487\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Documents/research/garfunkel/deepblast/trainer.py\u001b[0m in \u001b[0;36mvalidation_step\u001b[0;34m(self, batch, batch_idx)\u001b[0m\n\u001b[1;32m 212\u001b[0m \u001b[0;31m# Obtain alignment statistics + visualizations\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 213\u001b[0m \u001b[0mgen\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maligner\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtraceback\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseq\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 214\u001b[0;31m statistics = self.validation_stats(\n\u001b[0m\u001b[1;32m 215\u001b[0m x, y, xlen, ylen, gen, s, A, predA, theta, gap, batch_idx)\n\u001b[1;32m 216\u001b[0m statistics = pd.DataFrame(\n", - "\u001b[0;32m~/Documents/research/garfunkel/deepblast/trainer.py\u001b[0m in \u001b[0;36mvalidation_stats\u001b[0;34m(self, x, y, xlen, ylen, gen, states, A, predA, theta, gap, batch_idx)\u001b[0m\n\u001b[1;32m 184\u001b[0m \u001b[0mtv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtheta\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcpu\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdetach\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msqueeze\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 185\u001b[0m \u001b[0mgv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgap\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcpu\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdetach\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msqueeze\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 186\u001b[0;31m fig, _ = alignment_visualization(\n\u001b[0m\u001b[1;32m 187\u001b[0m Av, pv, tv, gv, xlen[b], ylen[b])\n\u001b[1;32m 188\u001b[0m self.logger.experiment.add_figure(\n", - "\u001b[0;32m~/Documents/research/garfunkel/deepblast/score.py\u001b[0m in \u001b[0;36malignment_visualization\u001b[0;34m(truth, pred, match, gap, xlen, ylen)\u001b[0m\n\u001b[1;32m 70\u001b[0m \u001b[0max\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_title\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Predicted alignment'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[0mfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolorbar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mim1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0max\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0max\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 72\u001b[0;31m \u001b[0mim2\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0max\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mimshow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmatch\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mxlen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0mylen\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maspect\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'auto'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 73\u001b[0m \u001b[0max\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_xlabel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Positions'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 74\u001b[0m \u001b[0max\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_title\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Match scoring matrix'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/matplotlib/__init__.py\u001b[0m in \u001b[0;36minner\u001b[0;34m(ax, data, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1563\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0minner\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0max\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1564\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1565\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0max\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msanitize_sequence\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1566\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1567\u001b[0m \u001b[0mbound\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnew_sig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbind\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0max\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/matplotlib/cbook/deprecation.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 356\u001b[0m \u001b[0;34mf\"%(removal)s. If any parameter follows {name!r}, they \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 357\u001b[0m f\"should be pass as keyword, not positionally.\")\n\u001b[0;32m--> 358\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 359\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 360\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/matplotlib/cbook/deprecation.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 356\u001b[0m \u001b[0;34mf\"%(removal)s. If any parameter follows {name!r}, they \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 357\u001b[0m f\"should be pass as keyword, not positionally.\")\n\u001b[0;32m--> 358\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 359\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 360\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/matplotlib/axes/_axes.py\u001b[0m in \u001b[0;36mimshow\u001b[0;34m(self, X, cmap, norm, aspect, interpolation, alpha, vmin, vmax, origin, extent, shape, filternorm, filterrad, imlim, resample, url, **kwargs)\u001b[0m\n\u001b[1;32m 5620\u001b[0m \u001b[0maspect\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrcParams\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'image.aspect'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5621\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_aspect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maspect\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5622\u001b[0;31m im = mimage.AxesImage(self, cmap, norm, interpolation, origin, extent,\n\u001b[0m\u001b[1;32m 5623\u001b[0m \u001b[0mfilternorm\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfilternorm\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfilterrad\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfilterrad\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5624\u001b[0m resample=resample, **kwargs)\n", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/matplotlib/image.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, ax, cmap, norm, interpolation, origin, extent, filternorm, filterrad, resample, **kwargs)\u001b[0m\n\u001b[1;32m 888\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_extent\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mextent\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 889\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 890\u001b[0;31m super().__init__(\n\u001b[0m\u001b[1;32m 891\u001b[0m \u001b[0max\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 892\u001b[0m \u001b[0mcmap\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcmap\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/matplotlib/image.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, ax, cmap, norm, interpolation, origin, filternorm, filterrad, resample, **kwargs)\u001b[0m\n\u001b[1;32m 245\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 246\u001b[0m ):\n\u001b[0;32m--> 247\u001b[0;31m \u001b[0mmartist\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mArtist\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 248\u001b[0m \u001b[0mcm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mScalarMappable\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnorm\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcmap\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 249\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_mouseover\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + "name": "stderr", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\n", + "Traceback (most recent call last):\n", + " File \"/home/juermieboop/miniconda3/envs/pytorch/lib/python3.8/multiprocessing/queues.py\", line 245, in _feed\n", + " send_bytes(obj)\n", + " File \"/home/juermieboop/miniconda3/envs/pytorch/lib/python3.8/multiprocessing/connection.py\", line 200, in send_bytes\n", + " self._send_bytes(m[offset:offset + size])\n", + " File \"/home/juermieboop/miniconda3/envs/pytorch/lib/python3.8/multiprocessing/connection.py\", line 411, in _send_bytes\n", + " self._send(header + buf)\n", + " File \"/home/juermieboop/miniconda3/envs/pytorch/lib/python3.8/multiprocessing/connection.py\", line 368, in _send\n", + " n = write(self._handle, buf)\n", + "BrokenPipeError: [Errno 32] Broken pipe\n", + " File \"/home/juermieboop/miniconda3/envs/pytorch/lib/python3.8/multiprocessing/queues.py\", line 245, in _feed\n", + " send_bytes(obj)\n", + " File \"/home/juermieboop/miniconda3/envs/pytorch/lib/python3.8/multiprocessing/connection.py\", line 200, in send_bytes\n", + " self._send_bytes(m[offset:offset + size])\n", + " File \"/home/juermieboop/miniconda3/envs/pytorch/lib/python3.8/multiprocessing/connection.py\", line 411, in _send_bytes\n", + " self._send(header + buf)\n", + " File \"/home/juermieboop/miniconda3/envs/pytorch/lib/python3.8/multiprocessing/connection.py\", line 368, in _send\n", + " n = write(self._handle, buf)\n", + "BrokenPipeError: [Errno 32] Broken pipe\n", + "Traceback (most recent call last):\n", + " File \"/home/juermieboop/miniconda3/envs/pytorch/lib/python3.8/multiprocessing/queues.py\", line 245, in _feed\n", + " send_bytes(obj)\n", + " File \"/home/juermieboop/miniconda3/envs/pytorch/lib/python3.8/multiprocessing/connection.py\", line 200, in send_bytes\n", + " self._send_bytes(m[offset:offset + size])\n", + " File \"/home/juermieboop/miniconda3/envs/pytorch/lib/python3.8/multiprocessing/connection.py\", line 411, in _send_bytes\n", + " self._send(header + buf)\n", + " File \"/home/juermieboop/miniconda3/envs/pytorch/lib/python3.8/multiprocessing/connection.py\", line 368, in _send\n", + " n = write(self._handle, buf)\n", + "BrokenPipeError: [Errno 32] Broken pipe\n", + "Traceback (most recent call last):\n", + " File \"/home/juermieboop/miniconda3/envs/pytorch/lib/python3.8/multiprocessing/queues.py\", line 245, in _feed\n", + " send_bytes(obj)\n", + " File \"/home/juermieboop/miniconda3/envs/pytorch/lib/python3.8/multiprocessing/connection.py\", line 200, in send_bytes\n", + " self._send_bytes(m[offset:offset + size])\n", + " File \"/home/juermieboop/miniconda3/envs/pytorch/lib/python3.8/multiprocessing/connection.py\", line 411, in _send_bytes\n", + " self._send(header + buf)\n", + " File \"/home/juermieboop/miniconda3/envs/pytorch/lib/python3.8/multiprocessing/connection.py\", line 368, in _send\n", + " n = write(self._handle, buf)\n", + "BrokenPipeError: [Errno 32] Broken pipe\n", + "/home/juermieboop/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/utilities/distributed.py:25: UserWarning: Detected KeyboardInterrupt, attempting graceful shutdown...\n", + " warnings.warn(*args, **kwargs)\n" ] }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAt8AAADgCAYAAADbuQqVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzdeZxkZXn3/8/3nKquXmefYYaZYR9EICIySlyiGFQQF4xxwS0uJITENfGJ0STPo3l+MTEmrycxUWNwA6MRUTGgIoggbgiyo8M6DDDTzL73dE93V51z/f64T3VX9/RSVV3Vy8z1fr3Oq7tOnTrnruq7u6+667qvW2aGc84555xzrvmimW6Ac84555xzRwoPvp1zzjnnnJsmHnw755xzzjk3TTz4ds4555xzbpp48O2cc84559w08eDbOeecc865aeLB9zSRdJwkk5Sb5uveIukPG30uSW+R9MNGnNc1jqTLJf1d9v3vSHp4mq5rkk6q43HnSOquuL1O0jkNbZxzzjk3ixxWwbekiyTdLqlX0vbs+z+VpJlu22QkPSHpJVM8x8ckfbVRbZqImX3NzF42HddqpHqDxAa34QlJByUdkLRN0pcldTb6Omb2MzN7WhXteYeknzf6+vUws9PM7JaZbkctpvP3zjnn3Nx32ATfkj4IfAr4J2A5cBRwKfB8oGWcx8TT1sApmu4Rc9d0rzKzTuBZwLOBvxl9gP/MnXPOucPPYRF8S5oP/F/gT83sW2bWY8E9ZvYWMxvIjrtc0n9Iuk5SL/BiSU/P0in2Zh95v7rivCNSNkaPEGajqJdKelTSHkmfKY+yS4ol/bOknZI2AK+YoP3/BRwDfDcbDf1QRZrKxZI2AjeP/og+e+wTkl4i6Xzgr4A3Zue4r+KwYyX9QlKPpB9KWjJOOxZK+p6kHdnz+Z6kVeMcO/q1eJmkhyXtk/RZST+pSFF5h6SfZ6/HHkmPS3r5qNf57yTdmrX9u5IWS/qapP2S7pB0XMXxp0i6UdLu7JpvqLjv8uzn8P3s+d4u6cTsvp9mh92XXeeN4/1MpouZPQX8ADgdhvrUuyU9Cjya7XulpHuzPnqrpGeUHy/pTEl3Z8/1G0BrxX2jUzpWS7o6+/nukvRpSU8HPgc8N3tN9mbHFrKf18ZsdP5zktoqzvUXkrZI2izpXRM9R0nvlPRg1sYNkv54gmOHPgGS1CbpiqzPPJj9XnSPOvZ/Sbo/63ffkNRa+dyzx2zP2voaSRdIeiTrO39Vca5I0oclPZa9NldJWpTdV/5dfHv2euyU9NfZfRP93jnnnHOHOCyCb+C5QAG4popj3wx8HOgCbge+C/wQWAa8F/iapEk/qq/wSsLI5RnAG4Dzsv1/lN13JrAWeN14JzCztwEbyUZDzeyTFXe/CHh6xXnHO8f1wN8D38jOcUbF3W8G3kl4ji3A/xrnNBHwZeBYwpuBg8CnJ7ouQBbMfwv4CLAYeBh43qjDzs72LwE+CXxRGpEOdBHwNmAlcCLwy6wti4AHgY9m1+oAbgT+O3s+bwI+K+m0inO9CfhbYCGwnvDzxsxemN1/RvYafWOy59ZsklYDFwD3VOx+DeH1OlXSs4AvAX9MeG3/E7g2C45bgP8B/ovwOn0T+P1xrhMD3wOeBI4jvM5XmtmDhE+Ifpm9Jguyh/wjcDLwTOCk7Pj/k53rfEIfeimwBpgsXWo74XdhHqEf/kv2vCbz0aytJ2TXeusYx7wBOB84HngG8I6K+5YT3oyU2/757BxnAb8D/B9JJ2THvo/wur8IOBrYA3xm1LVeADwNODd77NMn+b1zzjnnDnG4BN9LgJ1mVirvyEYI9yrk1r6w4thrzOwXZpYSAotO4BNmNmhmNxMClDfVcO1PmNleM9sI/Dg7J4Sg4F/NbJOZ7Qb+oc7n9jEz6zWzg3U+HuDLZvZIdo6rKto4gpntMrNvm1mfmfUQgtYXVXH+C4B1ZnZ19jP4N2DrqGOeNLPPm1kCXAGsIKQGVbbxMTPbRxgJfszMfpSd75uENzEQgrgnzOzLZlYys7uBbzPyzc3VZvar7LFfG+/5zrD/yUaZfw78hBDAlf2Dme3Ofl5/BPynmd1uZomZXQEMAL+dbXlCPyua2beAO8a53nMIQeVfZP2p38zGzPPO3hT9EfBnWTt6svZdlB3yBsLP6zdm1gt8bKInambfz362ZmY/IbzZ/Z0JX53h6/y9me0xs25Cvxrt38xsc/Y79l1G/qyLwMfNrAhcSfg78ansk7F1wDpCwA7hzc1fm1l39knZx4DXaWTqz9+a2UEzuw+4j/CG2znnnKvJ4ZJTugtYIilXDsDN7HkA2cfUlW8yNlV8fzSwKQvEy54kjJRVqzLI7CME80PnHnXeemya/JBJjdfGESS1A/9CGElcmO3ukhRnQfN4RjxXMzONSo+pbIOZ9WWD3pXt2Fbx/cExbpePPRY4u5wekckRRn8PuRYTPN8Z9hoz+9E491X+zI8F3i7pvRX7WgivuQFPmZlV3DdeP1tNeANUGuf+SkuBduCuig8nBJTnSBwN3FXFNcMDQ4rRRwkj6VF27l9X0Y7Rv0Nj/S6M/lkfXXF7V0W/Lb95nahffUdS5d+ChJFvEOdCv3LOOTfLHS4j378kjAZeWMWxlYHKZmC1pMrX4Rjgqez7XkKgULa8hjZtIQQ8leettl3j7R/RniyVYGkV56jWBwkfq59tZvOA8icGk1WL2QIM5YZnI6dj5oo3wCbgJ2a2oGLrNLM/adL1ZkLlz3ETYfS28vm2m9nXCa/7ylHpO+P1s03AMRp7EufofrOTEJieVnHN+dkEUaihb0sqED6Z+GfgqCyt5Tom71Pl61T2o9XjHdgAm4CXj3qdW7Oc/MlM9ffOOefcEeSwCL7NbC8hx/ezkl4nqTObQPVMoGOCh95OCGg/JCmvUF/4VYSPqAHuBV4rqV2hPN3FNTTrKuB9klZJWgh8eJLjtxFyWyfyCNAq6RWS8oQKGYVR5zhu1JuJWnQRgq692WSzj1b5uO8Dv5VNaMsB76a2Nyq1+B5wsqS3ZT+zvKRnZxMHq1HN6zybfB64VNLZCjqyn38X4U1nidDPcpJeS0gvGcuvCMHsJ7JztEp6fnbfNmBVlkNO9knQ5wm52csAJK2UVJ53cBXwDkmnZp+WTNRPWgh9dAdQykbBqy1ReRXwEYWJwCuB91T5uHp8Dvi4pGMBJC2VVM2beZj6751zzrkjyGHzzyKbpPjnwIcIE7y2ESan/SVw6ziPGQReDbycMNr3WeAPzOyh7JB/AQazc11ByB+u1ueBGwi5oXcDV09y/D8Af5PlqY85ITLLh/5T4AuE0fleoDK945vZ112S7q6hrWX/CrQRXovbgOureZCZ7QReT5hIuQs4FbiT8GlEQ2X5xy8j5B9vJqQC/CMj34RM5GPAFdnr/IbJDp5pZnYnIf/604RJgOvJJhVm/fe12e09wBsZp59l6RevIkye3EjoN+VqLzcT8p+3StqZ7fvL7Fq3SdoP/IjwqQhm9gNCX7k5O+bmCdrfQ5jMeFXWxjcD11b59P9v1s7Hs+t/iyb0qcynsnb9UFIPof+fXeVjp/p755xz7giikemizk1dNgLYDbzFzH480+1xhwdJfwJcZGbVTAJ2zjnnZqXDZuTbzSxJ50lakOX4/hUhp/e2GW6Wm8MkrZD0/CyF7GmEOQnfmel2Oeecc1PhwbdrlOcCjxFSVl5FqOYxlfKIzrUQUsd6CKkt1xBSw1wVJH1JYYGh34xzvyT9m6T1CgsVVVN73bmm8n7rjgSzLviWdL7CqoXrJU02SdHNEmb2MTNbbGZdZna2md0+022aTt5vG8/MnjSz082sw8xWmtkHszx3V53LCWVDx/NywiJJa4BLgP+YhjY5N5nL8X7rDnOzKvjOSud9hvDLdSrwJkmnzmyrnJuY91s3G5nZT4HdExxyIfCVbPGj24AFklZMT+ucG5v3W3ckmFXBN6FM2noz25CNcF1JdbW7nZtJ3m/dXLSSkQsXdVPbAmPOzQTvt27Om20rXI71SzWi3JekSwgfNdHRrrNOOakFgL1pRHffQuJ9Ebl9A1ipmoX8nIMe9uw0s6WTHzmuSfstjOy77e06a81J4devaFC0iISY/jTPzgOdtOwB9fRNoUluOvXTy6ANjLtw0Hkv7rBdu8dfJPau+wduMLOJPmpvhrHaO2b5qxF/dzs6zjrllFOa2S53mLvrrrum8jfX+62bMVPsu0NmW/A96S+VmV0GXAbQNW+VdR4V1t3ILc4TnxLB6T08b/XjLCkc4BfbTmDHHUex6pZBWm57iLS3t+lPwM09P7JvTbg8ehWq+mdQ2Xc7F662jpPeT6lN7D8Lzv2d+3jfslAue93gcu7tPZYtA/N5dO9Sdt26nOOu3Ut67wNTbKZrltvtpgnv37m7xK3Xjz8413r040sa3aYqdDNy1dBVhNr5h6jsu2vXrrU777yz+a1zhy1JU/mb6/3WzZgp9t0hsy34rvqXCsLIYP5HdwHQumA+8x5ZRc8Dndy97BkkBUhaIV1gbHizaLnkRCRjcEsHi34tlt6xF3vgMazo87fclNXUbwHiA/10/Ww9yueZt2EZ9919Bm9ecialDhg4vY93nf5LPrLiem5bcCwffeK1lDpbZl2OmKueAensW4X+WuA9kq4kfFKzz8y2zHCbnJuM91s358224PsOYI2k4wkrOF5EWBFvUsnefXDPPjrvgc5sX271KnqedTQ7T2/h4OoIWlJoTdhzWsz+ExYQD6xFCbTuNBasH6Ttoa2UntoMvvCQq03t/dYMBovYYJFo3eMs/HXKQjOiZUvY+LpVPHDcCloXGCe2bOes0zZw95tOpPWc55E/AAsfLtJxz0ZKW7dNw1NzjWAYRRs/7aQZJH0dOAdYIqkb+CiQBzCzzwHXARcQVintA945rQ10bgzeb92RYFYF32ZWkvQewrLsMfAlM1tX7/lKm7pp29TN6muyHVFMdPoatr5wEfvPPsgzj99ITikP7jyKjY8uYN5Jx9CxfRVR0WjZWyS/dR+2dQdpT08jnp47TNXVb1PDBoc/dRlaaXZgkPZtxi8ePpEPlH6P+S39vGDRej5ywXWszhW5rX8pH7n/99h7y4nMf+LYQ06b60sobD0A23ZiB/sPvW6SYKWSz4mYAdM98m1mb5rkfgPePU3Nca4q3m/dkWBWBd8AZnYd4Z1t46UJ6f0Psex+WPZp2AeoUGDJb7fRe5447s3receKX9CXFvhi9wt47NZVrPjlYtrX70alBAYGsZ4DpH19Hry4EabSby1JsVIRzCht2cqC/9rKgv+CXmBg1UqueM0FfOuVz+JNx9xBTMpfnHYjracXiZTSqiLLc/tYGg3QKvjy3rP44k0v5tgfdNH6VHjTaLFAIS1dfQNo916SPfsgnd6R2COZAUXSmW6Gc865WWDWBd/TzQYGiH5yD8f/JAQ7n+Fkoq4uBn53BYOvKPHqT97ExQvW8WRJfGjD79N9w+msvn4PPLQBUgNLsSTxVBXXFKXup1j26afg03Atiw+5Pz71ZJ74vSWc/LLHOHfJQ9y47RQ6N0a0PbKd0pObiNrbiRYtJJ3fCbloKAh308uAxP9GOOecw4PvMaU9PbRd8ytOvgZuYB438FziJYvZ97LVDL6qlz+8+Pv8fud+7hoY5H0PX0Tfd5dz9A82U9rwxEw33R3h5rf0073C2Ld2Ba3HLWGgLWZwXsxgpyCCtl0pXQ/F6EAvNuAj39PFMIqzb8Klc865GeDBd5WSnbuY/80eFny3wBdb1vLFKIZlizj4wkUMvmQfr3/vrTyv7XFu6VvDJ+95GUuua2Xxjx73SXGuqXRwgPbtxn2PrWbjvgXs2TyfZb+G+bd1k2zdRmH5UcQnL6fU2kLaMtOtPXKZhXruzjnnnAffNbDi4IjShNqzh+XbdsLNC/n6/PP473xM/5IWCqfl2HdhD8/5wG5WFnr5xe4TeeBXx7Pq5hLtv3wkVGZxrhHMIAUSUUojyKX0L8lx8JTl5I9eRO+SVvadkKd3pZHmjcHNMYXdnbRsbiVNEixLnfK0qWYTyZjl4J1zzh1pPPieAiuVSHbugp27gLDSSkdHB53rV9F7/3x+uWwtSQFK7SJaamx8Y0rnHx9NLlrOnm3zmH9/nuW/3I9+s560f4zKFM5NpliidW9KYXOeA33zad0V0dmd0LppH9rXAywjXpnDIkhbjDRfnoAZgSIUAxaCQkuzANwnYjacAUXz4Ns555wH3w2X9vbCuodpXQet2b7c8qPoO2M1u04v0LsqT1pIUSp6TkjpXd2FXncmSqCwSyx8pETnum2Unuz2IMhNykolcn0pLftjIKKwF1p3lWD7Lkp79hB3dRD3dyIDpUIpKLVQcjBJUJQFhEOBeIqZfCS8wQx85Ns55xzgwfe0KG3dRsvWbay4IdshEZ98IttfsJRdzylx3AnbKcQluvcuoHt1F50nraRt5wqiIhT2JbR296CntpHs2TOjz8PNYTb8VamFCj1pgplQHM9o044EYeTb1yh1zjnnwffMMCN5eD2LH17P4i9m+/ItrDxrMRtfFrHkld28bdVtDFqOb29+FuvvWM3yX82n87H9qJiggwPY/h7S/QdG5KA7ZxGkeaH2NqJSCWsvUGqDpNVIW1OS1ohSW458eztREj5ZMbNQNtM1jSESPPh2zjnnwfesYcVBuO1+jrkt3P46RxO1tzP4whUkr0x5yUd/xiULf8W2JM/HNr6aR248hdU/7CFatwEsG8nMVi90c5AiqHP58ZBSErakFfYfl2dgwTFEJePA0TH7zhjkzJOfZGnrAW496ni2JwtYkjuefG+JXM8g8ba9pLv3hP5jQhD6U5mnoEyZj3w755wr8+B7Fkv7+mi5/g7WXA+30sKtvIB44UL2n3sc/a/u5w/edh1v6drFusGDfOCxN7D9e6tZ+f1tJI88NtNNd1WyKQa2tm8/7Q9to7BzHmkhR9RXJN6xl2TbDqw4yLynryEeWMK9Aydg7SVyO1ro2mMoMUxicEEBW7wctOKQc6uU0rKrn2jTVpJduz0InxKRePDtnHMOD77nnGTPHjquvpOnfS/PV+NT+Focw7LF7H3hcvrPO8BFf/wTXtr+BLf2H81Hf/MqWr8/j6Nu2Eip+6mZbrqbiNW59HiUBXQSJrBCTLqoi6itgJKUvmPm07dcFJb10dXez86eRXRsh/bb1pPs2z/iVIpj4iWLSFYuYXBBASIxuLQNlh4PHH/opQdTWrYfgO6tJPv3H3K/GxaWl/fceueccx58z01pQtpfkRZwoJelO3ez7McL+HrXS/jvfMzg4jb0jBYOnNfDue95ipNbt/KTvSfz8zufzsqboeunj4bRTDen2eAgtq+HOEmJczEUS9jBg1hvH2mSUljYQTzQQgJEykauDcgmXI44V5pQ2rYD7d5DIXfonwa1tMCShZSWzaPYlcci+XL1VTITRfPg2znnnAffh4c0CQv3VCzeU2hvZ/Xjyxm4ZyE/Xfbb/Lggih0if7Tx1GsGWPZHi2jLd7Fp+yLa7m1jxa19xPc8QtrXN4NPxNUlSaBYDGkhpRI2WCQdLEKaEJVSVMsaOmmCDSTYwMAhd2mwSDyv0ydn1iGUGvS0E+ecczMQfEtaDXwFWE5Ym+8yM/uUpEXAN4DjgCeAN5iZ19arU9rXB49uIPcozMv2xUsWM3j6sex+eoHdq5eSFkIQ1bci5bHXtcLrnwEGrTsiFj6SMO/+naQbnvRJnBnvu65ehiiaj3U455ybmZHvEvBBM7tbUhdwl6QbgXcAN5nZJyR9GPgw8Jcz0L7DVrJzF/Etu1h6CyzN9uVOOI5dz1vOjrUw/4Q9tLUU2bWyg81HdbD3xKNo3b0MlaB1b0rHpl6iJ7eR7Nx5pE6+m/19V0K5HFFrAYC0LU/SAoWWEm35Ipa3kC4SNTBdpJHnOowlvsKlc845ZiD4NrMtwJbs+x5JDwIrgQuBc7LDrgBuwYPvpitteIL5G55g/lezHVFM15mn0P2SiMILdvL64+4hr4QfbDuN9XevYuk9nXQ9fjRRMSHqHUB79pPu3Ufa3z+jz2M6zMa+a+UFc5IUET6hUKEFWvJIYqAzT6nTWNHZy1FtPTzZtoQ0n5+OprkKPvLtnHOubEb/G0g6DjgTuB04KgtuMLMtkpbNYNOOXGmC3bWOlXcB/wg304EKBdLnL4NXGM/+87t4z9Jb6Enz/EP3Bfzm5pNZ/aOD5B94Mkz2SxJssIiViof16Pis6btpQtrbC729Y97dsmop8UCBtDzqGhlJi1BnJ1GxBEkylB8+KUuH872zn61lI+3Kt4Sl6VOr7lxHGM/5ds45VzZjwbekTuDbwAfMbL+qrJog6RLgEoBW2pvXQDfEBgbI3XwXJ94MDwPv5flEXV30nnsCA68a5PWX3cDF87p5otTH+x9/PU9edzyrv7+LZN3DM930pmhI31VHeR/EQnFIEzGzsNhNkjT0zYuZKMQlXrBmPelJEVwKfaV27nngeFZfDx03PUja01Pd8zBDFlbS7F/ZCSs7Acj1J+S39mCbt1V9riNFGPn2aifOOedmKPiWlCcEL18zs6uz3dskrchGDlcA28d6rJldBlwGME+LDt+h1Vku7emh7Zo7eNr3Yq6OV/MdHUO0ZDE7X3wM/S/v4w3v+jEXdjzB3YNdfOiB38euW8yK6zdTevzJmW76lDSy7w6l6kQximOIFILxXA7iikBtCsG43bWOY+/Lozhiaz5Pce1JPHlegeOe3c3S1gOc/VvrKZ0Wkf55Kz3FBWy4byXH3FCicMuvD6l4YqVSqBf/1BbyQNRaIFq+jOKKBRQ7PaViIl5q0DnnXNlMVDsR8EXgQTP7fxV3XQu8HfhE9vWa6W6bq5FZqIRSKmFA+tRmFl29lyU/6uKqzhdwVf4ciovaKZ3VTv85B1h76XrObH+SX/Ss4Vt3n8WKG3MsuGUDybYxY9VZp2l9N02wLFXDJFCE4hjF0ZRXwAw/oyKWRDBYJH/7Q6x5oBO1tbK3ZRF7z1zKlhelnHbqJtpyRSxnE9fuNiMUegHSihqGUmi7T74ck4GvcOmccw6YmZHv5wNvA34t6d5s318RAperJF0MbARePwNtc1NhRtrbG3KQMy0rjyZ/yrFErYO8uPNBzmlL6Yru5/rFT6fYPh+NsZjLLNb8vmsGjY5fzcBCcJ8ePAgHD4IiopY8rcctJBrMEcnIKQHBpEU5yvneo98YeNw9Lk87cc45VzYT1U5+zvj/ps+dzra45kv397Dknv3s7V/Au2+7lFKnYRGkeaP3WSk7z14FrCK/N2beo7Dk3v3owcdn5WI/h0XfHQqY0xA8j14wZ6oBtITiCKI4TNAccc0jm0+4dM45B77CpWuytKcH7lrH/LtgfrYvPul4Nl+wggWv3My/rvkGzywUuOrAfP7qjtfScqCLBU+0wiwMvmeMIlA2It6ogLYy8M6+tMQJucX97HhmB/Pnn0k8YLRt6ye/YSulbdvHvqZZxTbcXsUxmEKuuvORb+ecc0M8+HZutrE0C1wJOdSRUJRD0shqKOM+vsrA3FJyvSVatxdYt3EF+UKJzo5+lr5kF/MLB9k/2MrDD63kqFuPZ8GDS1Ay8rwmUWzNkbbERCVDZlguImpvI4oU2tk/UH0pw8OYAekUc74lnQ98CoiBL5jZJ0bdPx/4KnAM4W/79cArazj+n83sy1NqpHMNUEdf977r5hQPvp2bbcoTWcuyaihDsVucVUfJVJY6NDOsWKoq2LVSCf3yPlb9MrtMayvF557Kky9eBKccIJ9PWHLMXlpPGsTiEshojUu05wYpxCW6exfw2LqjWXpnRNemASwSg0s6sGUdEIm4r0R+ew/Rlu3YwYNTf1lSy96YzL00lqlWO5EUA58BXgp0A3dIutbMHqg47N3AA2b2KklHAZuBU4HHqzh+KfCwpK+Z2WDdDXVuiuro69533Zzjwbdzs11FNZQxZaUJq603Pu5l+vuJf3w3x/547PujxYvYev7JbD9vkOedtIGDxTyFXTEL1+2D+x5GhQJasYzSsnkkbTlkhIooSTLyzUQtohhVVlCZg4E3hJHvKaadPAdYb2YbACRdSVhZtTIgMaArq8rzfGAAeNTM0iqO7wR2A3X+oJxrmFr7uvddN+d48O2cq440YkJmpOFA2FIbussiYbGwSJDPobbW+qYaShBFQ28qrFTCBgfrD+RnlCYrNbhE0p0Vty/L6sKXrQQ2VdzuBs4edY5PE8pebgYWAD81K08SmPT4LuCNFcePbH3FAlHHHHPMRM/Duamqta+P23e937rZyoNv5+Y4Sw3FjF29pKEXyiZVGqQmUhNKCWW/0zC6HRVLxAMJSEQDJdQ/SHqwv67qNcrlUEtLGNmf46oY+d5pZmsnuH+sjzVG/7DPA+4Ffhd4D/D3kuaZ2f4qjj8RuFHSzyqOH75QxQJRa9eunZsfP7i5ota+Pm7f9X7rZiuvfeWcq9tQTXBpzLQXiyPUkg8pKTVsUWvr0Pdkm1ryI1f+nEMMDb1hGWurQjewuuL2KsKoX6V3AldbKMB+JzAInFLN8Wa2npAbfgrOzaya+rr3XTcXefDt3OGgmSPe4zBCBcSh22aQpKiYoFKKiikqJWECaJLUtFmpNJTfrTgKtcM1zp+rKea6TwezMPI93laFO4A1ko6X1AJcRPjYvdJGhuvNP0mo7lmq5vhsgubTgA31PD/nGqimvu59181Fc//zXOfc5MoBaoMmLKbjrcZjBmmKkhSlKSShOonV8eZAAHE0PNodh/xvqwy2ywF5uQb6LJ6QWeUI95jMrCTpPcANhPJrXzKzdZIuze7/HPD/AZdL+jXhFfkX4Moajv9LM9tZdyOda4A6+7r3XTenePDt3OFIGg5ERwSrmnqAWkUQOTQiHilUYsnX/qdGuRzK5SCXBd+leKjMoqU2XAUlC8AtAZidAXgjFtkxs+uA60bt+1zF95uBl4162F/UeLxzM67Ovu7cnOHBt3OHi3IdbGhaACorL44pkjQCIyyuk032tFIJFcOESw2WYLAIxSI2WHv5XYvjkMZSHjVPKxYYMsNMI+qdz2aGKKVzo63OOeeay4Nv544E5QcrypAAACAASURBVPSMsSvJTYlJWVnAMOnSAKQQPHe0EcUxUT3lAVsLWHsrSSGPzFBvP7KUiJBfLoVRdaIo1BMvKeSX2+xcTXPcVB3nnHNHFA++nTscWNrUAHvktSqqdFSMfFuxRNrbh8yIcrlQl7vvIElvX13Ly0ddXUS2YGhWuAaLpAODpAMDYfg9l0PR3JgzbgZFH/l2zjmHB9/OHX4UTcvobyQDZSPfUcjFnuoqmxMqlzNUBGRvNsr74jikpDTv6lNSLjXonHPOzdiwkaRY0j2SvpfdXiTpRkmPZl8XzlTbnJvIrO+7zRz5zhbZKaVxyPkGiEPuteIoTLBslDTN8tiHNzOb9VVNxmJAyaJxN+ecc0eOmfyr/37gwYrbHwZuMrM1wE3Zbedmo9nZd5udbkJWxSQVJYtIUg2vO1cRdDdt9Ls88bIceE/D822k1KJxN+ecc0eOGfmrL2kV8ArgCxW7LwSuyL6/AnjNdLfLuckc0X23olZ3agrVRoxxR6GbmoIyx5jJR76dc84BM5fz/a/Ah4Cuin1HmdkWADPbImnZjLTMuYnN7b7b7HzwLA+7XIFEEopU+yD1WIH7UF3vrFZ5eYJppBlZ4bMWBpRSD7Kdc87NwMi3pFcC283srjoff4mkOyXdWWSgwa1zbnyztu+Ojmwrl2GvCGJVGbw2Wjn4zRbVGVqZMo6HA/JaNggBfDRy39BoeuUx5evO8sonaUWVmNGbc865I8dMjHw/H3i1pAuAVmCepK8C2yStyEYOVwDbx3qwmV0GXAYwT4tm93CXO9zM2r5bXvHRKhfaCRdtbLCdTbisLDVIClga0lBGTZDE0qFFcWpWOeGyfHkblfedpiHQn+UMeXqJc845YAZGvs3sI2a2ysyOAy4CbjaztwLXAm/PDns7cM10t825icypvls5QqxoaNRbjahGYiGHORcnDM43DpzYiT3r6XD6SXD0MqytMHK0ulEVUKRsyfk8yregljxqLUC+JWxx3LhrNZr5yLdzzrlgNtX5/gRwlaSLgY3A62e4Pc5Va1b0XavMex4xQpxiaTQ8Ml6vJKGwL6F1c55HcstRf0zHTpE7mC0B3+jiI2ZQKkGxFAL5YgkbHMRKxXB/JJS2zGzNpip5zrdzzrmyGQ2+zewW4Jbs+13AuTPZHueqNWf67qiR74bIyg2OWNFGwvKi2NlOUujEYhEPpOR3dhHv3IP199d8GbW1YfM6SdsLEEEURURJMjyZs7WA2tognwvpJ5Zig4MhYJ9lfJEd55xzZbNp5Ns5V6/y8vKjJ19aWIWycdepiLgTke81WrcdJHpiCyoUsAVdsKidpBChUooGi1h/P2nvwdquE4kojlGxhIrhz5SKJWxgEOsPk1WVz/58lauraHaPLCee8+2ccw4Pvp2b+8qTKseq51c52VIRitKGVRqUQTQI0f6DJLt2ExUKRPkcUUcLFudRUjFZss6Rd4sEcXisxRHkcsNBd74FWvJYawskacgBj+NZucS8GcMrgjrnnDuiefDt3JGggaPCKke3Y0W5Zig1lBhK0qFrK45rvIggl4MowrISgooilIvDfkBZ4J22hUBfA4PNKaPYEJ524pxzLvDg27nDjY2aeNmMmC+7ROUKl2YGg0XUXyIG1F9Cff0kfX3YQO11zSMgyuWIspF99fWT7u8h7e0FiTifg872hj2lZjJ85Ns551zgwbdzzjXbyHLlzjnnjmAefDvnGqe8yqQ0J0oAThfDJ1w655wLPPh27nAmTc/CM2MM61ocY/M7iQstkNQ4y1PC2lspzWslacthErkDg8TtreR6D4b753dSXNJOsT1HlBgFM9RzAHp7G/SkGslzvp1zzgUefDt3OJggp8FSQzXOd5z8ehVbeVexRLq/B5VKRPk8lEpYfz/Jwf46gu+IqLODXP98oq62sKu3H3btJenpgTgmjiM0v7VhT6nZ0tSDb+eccx58O3fYGxr5jgSNKDM4YnGd8tfsmzSFJAmHJAlMZUXNSKHUYPncEsRRWEY+jiGOsXxE2iIogeUiNEurnXipQeecc2UefDvnqmJmw2UGK1WuolleEt4MUsOyALzWZe0VhSBeSYqloWShkjRUVEkNFPaZQoBuEbN+oR2fcOmccw58SpRzRxY1IEA1wMKEysH5ou/ERejMU9Bpa4iWLwuL3xRLWKm8FSFNatqsVMKKJSglqJigUgjGKZawJMGSdLiMosAisFhhoZ1CAeVbIIpnTd1vQ6RpNO5WDUnnS3pY0npJHx7nmHMk3StpnaT7azz+J1N4is41TB193fuum1OqGvmW9HzgXjPrlfRW4FnAp8zsyaa2zjnXHOWgtJbh2DQl7ivRsr+F0p4YBAeXGv1LcsA8cgfEwkfnMe/+nbB1R2PbW17FMwoTSBVHo1bvhDQfY+2tRAvmh0B9sEg6MBCCeAgrgM7g8PNUriwpBj4DvBToBu6QdK2ZPVBxzALgs8D5wFPAY8DvVnO8mW2UtGwKTXSuIWrt69533VxUbdrJfwBnSDoD+BDwReArwIua1TDnXANYSPlQlEIaHboE/eiR4QmC07Svj/z9G1j5RFdYWbKrnb2ndrHnVDG4OCHNxwx2RJBr0OxOs5BDXsoS1UsJVh5RT43oYD+5nkEsEkqNuG8QlZKQ4qIICgWiQiHLdU+wg/2kB/vD6Pp0M7CpTbh8DrDezDYASLoSuBB4oOKYNwNXZ8HIc4GHqz0ewMy2T6WBzjVITX0dvO+6uafaz59LZmaEX4BPmdmngK7mNcs51zTlEeDKrarHGcn+/ZSe2kzy2BNEm7ZS2JcSFUNQOaOV9Ayi/hK2dz/Jjh0kO3divb0zE2iPw0zjblVYCWyquN2d7at0MrBQ0i3AV4FCtcdLukvSH9TyfJxrkpr6uvddNxdVG3z3SPoI8Fbg+9nHQvl6LyppgaRvSXpI0oOSnitpkaQbJT2afV1Y7/mdaxbvu5MQQ+khbpgRSg2OtwFLJN1ZsV0y6hRjvaCj3zXlgLOAVwAfB86UdHKVx58H/O9Rxw9fXLqk3LYdOxqcUuTcSLX29XH7rvdbN1tVG3y/ERgALjazrYR3of80het+CrjezE4BzgAeBD4M3GRma4CbstvOzTbed8dRroRiUlYOcPaW/pt2WdrJeBuw08zWVmyXjTpDN7C64vYqYPMYx1xvZr2EftlD6KOTHm9mO4GfVhw/svlml5XbtnTp0hqfvHM1qamvT9R3vd+62aqq4NvMtprZ/zOzn2W3N5rZV+q5oKR5wAsJeeOY2aCZ7SWktFyRHXYF8Jp6zu9cs3jfPZTMhhbbSfNG33Kx+8yF9LxwDQfPXoNOOIaovX2mmzk72ATb5O4A1kg6XlILcBFw7ahjrgF+R1IO+A2wDNhfzfGS2oGzCUG7czOppr7ufdfNRdVWO3kt8I+EP+ZZcS/MzObVcc0TgB3Al7MJnHcB7weOMrMthBNvGW/2cvZx7CUArfg/dTetvO9WGixS2DVA15M58gdyWARJC+xbAxZHtOyNWRzPp33Hnqkv+Z6NoCuOUS4HcQy5HJaLSHMiIis1OPVn1SSa0oRLMytJeg9wAxADXzKzdZIuze7/nJk9KOl64H4gBb4M/HsNx3/BzH4zhSfp3JTV2de977o5pdpqJ58EXmVmjXhnmSOUKnyvmd0u6VPU8DF99nHsZQDztMiXrXDTyftuhXRggNymnSw+WCTpaKE4r4U9a/L0nGjYgiJJIc9Ad0xHS93TQyBNw+I65QV8ICzok+WVW3mLmd255ka1EyvHP4XZdcB1o/Z9btTtf2KClMBaj3duJjSirzs3m1UbfG9rUOANIVer28xuz25/ixDAbJO0Ihs5XAF46SA323jfrWCDg5S2bEPbthMB7cuPYmDBag4cIywy0jwMdonk6MXk8jUupithrS0kHQWS1vDYuL9E1NlO3D8AUURp6TwOHtXKwLyIqGRExTYKezuIevvCKXK5WbPIDjDD5WCcc87NFtX+R7xT0jeA/yFMvATAzK6u9YJmtlXSJklPM7OHgXMJ9TsfAN4OfCL7ek2t53aumeZk3y0Hn5WrWioCKhacmUqAailWruZXKmVBsEgGYiik7DktYv+JnSjtrLHdIYc8aU+hJQUDHSyQ399F3BdW1xxYlBId1c+CeX3s729hz4NdHDV/BZ2Ptof64MUSGihCqRTSqqMIRTqk1Pm0mfOfdTjnnGuEaoPveUAf8LKKfQbUHHxn3gt8LZtMsQF4J2Hy51WSLgY2Aq+v89zONdPc6rvlJdiziNNSG7nQziHBeR2rQGbHl7bvZN4PDzL/F+2Qy4Wgt7eP5EB99bbjefPQkkWk80J+vA4chJ27SfbtJ2pro/Tsp7Hlue3sPK4FFUXnHhH321C1FesokBZyWE5EAwm57ftJt27HSqWa2zJlBkxtkR3nnHOHiaqCbzN7ZyMvamb3AmvHuOvcRl7HuUab631XkQhzmLJAvCEnDfnYGp1vHUWQzxG1FrCktuBbEhQKWCFP2hJjcYRaYjS/nSgxkrY8u5/WSu8JRVYes4tIxr7VrWxaWyAZ6ID+mPaNORY+mtC2Y7Axz3OKZmzE3Tnn3KxSbbWTVYRZ888njOH8HHi/mXU3sW3OuUZIk+HATxoa5R4KlitTUkwhUJ5o9HusdJXse0sSKBZD2keSYP0DpP0DNY98G6BSCfX2EsUxUmiXlUpYkhDFMcsfX8CyWxeSdHWRtOXIn9iC1oCWDaKuInp2L9E5feRbBnhy90JyPz+aldfl4OH1NbWlYTzn2znnHNWnnXwZ+G+GP05/a7bvpc1olHOuScwoJ2lbCkgoZmQAXsu54NCc8VrTVsY7fZKEvO00DeWwkzTsS5MQ2PcdJNqXR4NFchLLtsUs/VWERRHFRa1sfc58en474ekLt7J/oJU9rfMhFzekbbU/GZCPfDvnnKP64HupmX254vblkj7QjAY552ZIJKg9NXvGmFlY5CdNUWqwrwfrOQADA7SuWE7rSavpL8XklRJppmc7yke+nXPOAdUvL79T0lslxdn2VmBXMxvmnJtGjayP3ezyfhWpMyPk86i9nairC+tsI2kRcZwSKSWOZsGwczrB5pxz7ohRbfD9LuANwFZgC/C6bJ9zzk0bRSMDbysWQznBJIFcjLo6YPECSgvaKXVAR2GQebl+2nJFLEeohDJTpra8vHPOucNEtdVONgKvbnJbnHMzoZ5872kyokqKpSMmg6Y9PdDTMzQSHnd2oK5OlK2ombTA6q69PKfjMQ4kBdZ3HIvlZzLn29NOnHPOTRJ8S/qQmX1S0r8zxviMmb2vaS1zzk0PSymXH5yzLCXZvx/27wcgX0po6emgPwmBeEyYtEksiOKhxzRqcmh1bZy+SznnnJu9Jhv5Li8pf2ezG+KcmwXiGMUVgXiSTF56cAySpi/WHKttkYbmN8ZK+d35D/C7r3uA5PfDKP93dp3FbT/4LY6/agfJg49OSzNnfM6nc865WWHC4NvMvpt922dm36y8T9LsWcXPOVcfs7DiY3nVxygLvssTMEcH46NVBOeWGoLhNBZFKJ8jSvJYUsfIeqRw7XKedpaCMmKFysoc7oogvLSxm5Wf20XpK218NvciSscsY+MFXZx27iO8eum9vGjBw6x94xP0vb6F1Eam3SQW8a0nngnXLWLFNY9T2rrt0LbVvAoovsKlc845oPpSgx8BvlnFPufcXJYmWJqMW7EkBMPR2NVRLCysQ3EQsxwkKTZYJB0s1r68vELgbYBy4c+UZQF+VcxI+/qgry+cbs9eTtiylAPXHMVX218ZDik/BYl9J7Sy45xBLj7rFzyjbSM/6VjDtpbFYZXOykB7KhM2feTbOecck+d8vxy4AFgp6d8q7poHlMZ+lHNuzhtnZNdSC4vyVPN4S8NKl1O4viqC3UlTWbJl7sc83cAApU3dsKmboZi74v5FB09nz2ld7Cu1kWZFoEzZOUes5FkeJa89X9wX2XHOOQeTj3xvJuR7vxq4q2J/D/BnzWqUc24OS61pCc6KFOaHjtzJoTunJkXhKYwY9Y4q2hBRcwDuI9/OOeeYPOf7PuA+SV8zMx/pds5NyMyGR5TLKSJ1BsZDOeSj9jWFGRikJgYtJjUNB8vlAFtZnfE6yEsNOuecy0xY4FfSVdm390i6v2L7taT7672opD+TtE7SbyR9XVKrpEWSbpT0aPZ1Yb3nd65ZvO/WQdFw+kYN21CgG0VhY4Lgtxm1yisv1YDFeZSOvznnnDtyTPYf6/3Z11cCr6rYyrdrJmkl8D5grZmdTigwfBHwYeAmM1sD3JTddm7W8L5bHcvyvc1s6PswslzbNtYod9NGvjNJ9idxaOS7nLPeiHrgvsKlc845Jgm+zWxL9u1OYJOZPQkUgDMI+eD1ygFtknJAe3auC4ErsvuvAF4zhfM71yzed48AUTlnPSqXTcxGvhWFEoq1pp+Yj3w755wLqv2s9qdAazbydxPwTuDyei5oZk8B/wxsBLYA+8zsh8BR5WA/+7qsnvM71yzed0dp8ij0TEktCiPfUH+1lrH4yLdzzjmqD75lZn3Aa4F/N7PfA06t54JZPuyFwPHA0UCHpLfW8PhLJN0p6c4iA/U0wbm6eN+tjRqQJ33IOaOK0n9SfaPQtYiG/0QO56BPYdLlOJtzzrkjR9XBt6TnAm8Bvp/tq3aBntFeAjxuZjvMrAhcDTwP2CZpRXaxFcD2sR5sZpeZ2VozW5unUGcTnKuL910YUb2k2TnYQBh9zkagLbUx869DUD7FSZdZtZOm8ZFv55xzVB98f4CwouV3zGydpBOAH9d5zY3Ab0tqVxgaOxd4ELgWeHt2zNuBa+o8v3PN4n13VNnApo46k03enGYJEWZZne9GpZ14zrdzzrlMVcG3mf3EzF4NfFZSp5ltMLP31XNBM7sd+BZwN/DrrA2XAZ8AXirpUeCl2W3nZg3vuxUO03zvZhFTD74lnS/pYUnrJY1bUUfSsyUlkv6uxuNfV/MTc64J6ujr3nfdnFJV6oik3wK+AiwKN7UD+AMzW1fPRc3so8BHR+0eIIwkOjdred8daUTaSTNqbVdeawZGwRtqCs2XFAOfIby56wbukHStmT0wxnH/CNwA/CEhLara452bcXX2defmlGr/W/4n8OdmdqyZHQN8EPh885rlnJv1Gryk+2Ft6mknzwHWZ586DgJXEib/jvZe4Nvhimyt4fgx5yk4NwNq7eved92cU23w3WFmQzneZnYL0NGUFjnn5ixJocpJk0fB56SpTbhcCWyquN2d7RuSlYL9PeBzhBr0O2s43rnZota+7tycU+1/yA2S/rek47Ltb4DHm9kw59zc18hyg80oXTgREyNKDQLDZQbreHMxycj3knIZymy7ZPTDx2riqNv/CvylmSXjNKHW44cvXlEmc8eOHZMd7txUNKKvhxN5v3WzVLXlAt8F/C2htBqERXfe2ZQWOefmlnIudrakfLNC5OnO+W50tZNJRrh3mtnaCe7vBlZX3F7FoasMrwWuzN6kLANiSa8xs/+p4vglwAWSStnxI5tvdhlhcjFr166d48n3bparta+P23e937rZasLgW1IrcClwEqG6wwez+sbOOXeIMUenFcHkg6tjPEwjzicJIg2nmpcX2BkahU7CwjsNCtJNQBwPLeZDHGdtiDCSmp/XFEsK3gGskXQ88BRwEfDmEe01O37oWtIVwPnAfZJaqjj+cuB7YwXezk2zWvv65XjfdXPMZCPfVwBF4GfAy4GnE2p+O+eOZONMtmzkyLelzRtFH4sIS8snJtJyne/RgfzoNJRazj+F9wRmVpL0HkJlhxj4UrbmwqXZ/aNzXw34Qg3HOzcr1NHXnZtzJgu+TzWz3wKQ9EXgV81vknNuNrPUJlxcx8xQFpyb2ZSqolQG82Y2srShpYT/zY2TIlKisNJlOe3EbOws1FoYMMUMFjO7Drhu1L4xAxEze0f27V/XeLxzM67Ovu7cnDFZ8D2UYpK9G21yc5xzrj6KVE92ywhJ5fLy4/25yyq61DKQLaY28u2cc+7wMVnwfYak/dn3Atqy2wLMzOY1tXXOOTcdKkalE8tSSxocLHvw7ZxzDiYJvs2ssZ/pOucOP6MmOY6eJGn1TLiUhidcZnnWY024BEL5v2Ysdz+q1GCoX17x3CrbUg1fk8g55xzVlxp0zrnxVSysY2YozXK06835zvK7BSPK/TU159uyCZdEYdB7VKnBKU0mNR/5ds45F3jw7Zyr2VAQ7EvMV22KpQadc84dJnwNaOdc9aodyZ6gGsoRa2rLyzvnnDtM+Mi3c65hjoiKSFEESY057OYj384554KmjXxL+pKk7ZJ+U7FvkaQbJT2afV1Ycd9HJK2X9LCk85rVLucm4323DqMnXCpCUvOD8cpJl01WOfmz5scSgu/xNuecc0eOZqadXE5Y3rjSh4GbzGwNcFN2G0mnEpaQPS17zGcleaUVN1Mux/tu9Ww4/9satLT7rFdPEO5pJ84552hi8G1mPwV2j9p9IWHJerKvr6nYf6WZDZjZ48B64DnNaptzE/G+O0doGqasRNFQ2UOiKYzmGyi1cTfnnHNHjumecHmUmW0ByL4uy/avBDZVHNed7XNutvC+O9pMVzpp9PUNUtPQVrm8vKU2ouxgPWTjb845544cs2XC5VhDSWP+S5J0CXAJQCvtzWyTc9Xwvluh2TneioSVl4DX8OI7TUl3iYA4hihGcQz5fLgNIdWmxpF3z+12zjkH0z/yvU3SCoDs6/ZsfzewuuK4VcDmsU5gZpeZ2VozW5un0NTGOlfB+26lcUadGx0Eh0V6bOh7S7PbTQi2ZVC0iKLlSNIIJRaqmqRJeL5THPn2nG/nnHMw/cH3tcDbs+/fDlxTsf8iSQVJxwNrgF9Nc9ucm4j33bFUEwRPNTc7G02faFRdFStsTlWSfZhho68XRaGqSlRH3rd5tRPnnHNB09JOJH0dOAdYIqkb+CjwCeAqSRcDG4HXA5jZOklXAQ8AJeDdZlZjIV3nGsP7boM1KDd7zFH1BpUYVHbu1KJD9h16cO3XDKUGfYjbOedcE4NvM3vTOHedO87xHwc+3qz2OFct77uzk6TmZmhkJ08tGs4rP7QRdZ/eJ1Y655yD2TPh0jnnDjErVsysWDQo3B5OP6magfzzEOecc3jw7ZxrMDNDWarJXFt0p2QRCSIdmgxZnuDZgNSZufVSOOecaxIPvp1zTaM4hly+ruBVuRzk8+ErYKUSSlIsO5fKEx/juDkVUEQ26q3h0e5yqcFaJ3ea53w755wLPPh2zjWGZaX5ikUsjSFJsFIJS5K6A2NlK0wC4dyWDpceTG3MIut1G1pkJ8v5rhz5BkgtbHVO8vScb+ecc+DBt3OuWaII4rjuAFm5XDb6Hf5MCUIgn40gK47D+eM4pLqkKaYIGllspjzyXS4xGIcRb6sxAA/VThrXLOecc3OXB9/OuYax1CDJRqfTkCtd98i3IkiSEGRTEXinWXAdqaGpJuWR6SQb+Y4qR77HSBmpqfqKmaedOOecAzz4ds41mqWQZiPE6RRysSfLE9dwSkqjyhCmFSUGrZzzXXk9Ru2rhcfezjnn8ODbOdcsDVpcB0uHA99GnXMCaZYoMyJHu5xmMoXAW4lH384556Z/eXnn3Fw3E+UDJwq6G7TKJYSR78Si4RHwysmWU2UTbFWQdL6khyWtl/ThMe5/i6T7s+1WSX9S4/FnTOXpOdcodfR177tuTvGRb+fc3DFR6kc0TWMJlRVYajCVnG9JMfAZ4KVAN3CHpGvN7IGKwx4HXmRmeyS9Avg2cGqVx78cuAw4u+5GOtcAdfR177tuzvGRb+dc44weoZ5qmsjoUfby+ebY4j0Q0ljG26rwHGC9mW0ws0HgSuDCygPM7FYz25PdLGb7qj3+NmBV3U/Oucapta9733VzjgffzrnmUX2jxMOPr1jSvfJ85f1RNLwATjOMKjVY7+i6skV2xtuAJZLurNguGXWKlcCmitvd2b7xvAV4oobjLwZ+MG77pUvKbduxY8cEp3Fuymrt6+P2Xe+3brbytBPnXENZaihKs0on6dRGqcuPHRrxnuL5JrpOtsBOecLliEV2ygvsTMEkEy53mtnaiR4+xr4xTyjpxcC5wI9rOP5i4AXjXdzMLiN8tM/atWvn3scObi6pta+P23e937rZyke+nXNzR3nkO4rDojflUek4Bgk1cgRchPNGcVhcJy5fO8r21zCqP9Fky+pCgm5gdcXtVcDmQ5osPQP4AvDnwLIajr/QzHZV1RLnmqvWvu591805TQu+JX1J0nZJv6nY90+SHspmKH9H0oKK+z6SzWx+WNJ5zWqXc5Pxvjs7WHmkuTzqbBUj6ZW55FNJa6kgC2UGh6qdGJCmh16rrgB//JSTKidi3gGskXS8pBbgIuDaEe2XjgGuBt6Wfa36eDN7pI4n5Vwz1NTXve+6uaiZI9+XA+eP2ncjcLqZPQN4BPgIgKRTCb9gp2WP+Ww249m5mXA53nenTJHqz/keaxRb2fmiGBSFUe84Ht7XgGonqWnEQjvltBNrRKpLOYVlrG3Sh1oJeA9wA/AgcJWZrZN0qaRLs8P+D7AY+CxwJ5BUe7ykeyXdOfUn6dzU1NrXve+6uahpOd9m9lNJx43a98OKm7cBr8u+vxC40swGgMclrSfMeP5ls9rn3Hi8706djR7NbdDoNJFQOTiOY4gjlIuxNEVxhKnB4wnZhEsNBf7Dt2sKxxuwyI6ZXQdcN2rf5yq+/0PgDyc5R03HOzcTGtHXnZvNZjLn+10Mz1CuenZz5ezlIgNNbqJzY/K+O56h1JBsS5OJR3wn2Cw1LEmwUml4K2ZfkwSSZGjUWI0I7i1bZIcIM4USgA0d+Z5gc865/7+9u4+xrK7vOP7+nHvvDMuCwO4WXFZkF0vaQEWkhPqEYpu2gApN20SsiQ+xNSSoNY1WWhOkbUh9aGxDmXjEmAAAEdBJREFU1BoqFDGIT23tttH4nKLQRVZdHlwpoKWwSlxAWFjYnZl7zrd/nN+duTM7D/fOzLn33J3PKzncc8/5nXO/9+x3Z3/85vdga8ZQKt+S3gu0gRs7h+YpNu8/SRFxTUScHRFntxivKkSzeTl3h0vZ3GkGlz/936CpKBbczMxs7Rj4VIOS3gi8GvitmGlO6ml0s9kwOXcHLIpyoKVS5bSINH1hZyDm6FRaFbHibidmZnZ4GGiTkaTzgfcAF0XEM12ntgOXSBqXtA04FfjuIGMzW4xzt2bmtnh3+mRX9nHz/IKj364uKxhwaWZmh4/KWr4l3QScR7ly2x7gfZQzRIwDX0v/mO2IiEvTSObPAbspf6V/WUTkVcVmthjnbo11T/tXUct3ETp0VfuIefsX9SwAt3ybmRnVznbyunkOX7tI+auAq6qKx6xXzt1VJC2/ZXduS3Ym1GikFTQFrdbsxW6aTbJ1RxCNrBwgmeflwMxlfL4EkQHNJmo20dhYOatKq1nOdtLO++5rLrdwm5kZXl7ezJaj14rkipaWLyBEdKYWnO7zXRBFhvIc2u2Z7h9RlFMBtlrlIMZGo5wRpXNtnpezrywQpwrIi4ypaFAUKruap1lXVBQzlXyp/y4nxEj1UTczs+q48m1mq2sVW3ijCEQ+vT+zwmVOTLWhMVUWLFJLd7s9U8nNMjQ2ls4X0BYxWSwcX8Bk0WCyaJLnGcpJ90vTGq6k8tyZttDMzNY8V75t4DQxxdi+4MGHN3D9xpdy6rqfs/PJreQHGxzcICbP2Epz/xY0MUW272mKXzxBsX+/Ky+2qMhzYnKyrDADao2V/bQb9Vhw1LOdmJkZuPJtQ9B+aA/HfXIPx32yXDv4R2ykcdpmjrpwjOP+YA9XnLKdkxr7uXHf2Vz73+ey5asncsyOh4innybyoqxgTbUX7kJga8aCi+uswqI73avMHzKzSt83A3J3OzEzM1e+rSby3fdy4u574e/gKs6ErIF+/TTGLmpy+nt28Tebv85RavGhx87khm+9nK3bp2jdcjcxcZiuFGlAZ1GdLO2Xfb07046okaUBlw3IyrJqNFZlNUopynuOjaHxcTQ+DuNjMNYiJNRqlp/fM08paGZmJVe+zayeIg2SVKq0xuz+2sUkKC9mKsFdK15KglaznK2k02rdvUx8u00cOEhxcKL8DUoBymEyb3Agb5G3G6hN+VuWiQmi2UQrbbn2gEszM8OVbzOrq7ndO5QBqQIula3i2UyZmGqnqQWL6fLTLeeZylbssVZqKW+g9etpHLkOgKmN65g6Jthy1D62rnuMXeu3kB+xbqbl+4hxYrxFMdaEDGKy0V9f8qAcFGpmZmueK99WT0VOdt+DPO+mE7jvv07j9489g7wlDmzKYFvB3rcfYNv7jmOi3eTeB57NhttanPCdx8jv+bH7gh8uUiV71sI6nZbrzhSADcoW706r8qzW8YLIKVvOc8rZUA4cmHclzMbTU2STRzCZN5mKBnmIVkHZQj45CVPjqFN5XvZUg85LMzNz5dtqLH9iHzyxj9ZuaFHOXrH+pb/G/q1jXLztLi7fdDsHI+fKZ/0mX/35WWy8a93KViG00ZVl5VzdaRGe+Sy24E42laNctKMzz3dWtlZ3X58JMlLXln6Xlsct32ZmBrjybWZ11mn9XkgRlKvhJMrK1vBZ9yjSS+q20n3vQ243z2elbiud/uTTRfpt/XafbzMzw5VvM6u7BRfFSd1KYoFKbafSTFkbl6Lcn66Mx6GDOEMUobLOn1a4LAdkzomh34p3Wu7ezMzMlW8zOyxEMadftTRv/+7p010V82hmRCMYa+SMZ22yLIhOF5NONxOJEERfUwx2B+huJ2Zm5sq3ma0Vc1q5uxvMs3aB8rLVeyoaRKic4TDS/NzFSufpDi+yY2ZmQDl8qBKSrpO0V9Ld85x7l6SQtKnr2F9Iul/S/0j63ariMluKc3d0TE8lqAw1GuWS8p2t0Uit28u4r6Ls291ZxKfRgGajbPXORGTZ9Of2JCCiWHAzM7O1o7LKN3A9cP7cg5JOAn4beLDr2GnAJcDp6ZqPSYcMmzIblOtx7tZfBNFuE1OT5dY9xzdAo4GaTbKxVrmllSo1Po5aY2WluuteBLSLjHaRkedZ2ec7z8uuLFGsfLaSvFh4MzOzNaOyyndE3Az8Yp5Tfw/8ObMn8roY+ExETETE/wL3A+dUFZvZYpy7I6rIy8p4e6rcJicpJqemt2i3Z8pmaYn41EoeY02KVnBEo81RzQmazXx2n+90DaJsEe/3J2dEOdvJQpuZma0ZA+3zLeki4KcRcYdmzxawBdjR9X5POmZWC87dEdLdNzvy6cpz5CzYeq0IFJClaQvV+c8qDpIMz3ZiZmZU2+1kFklHAu8Frpjv9DzH5v1XT9JbJe2UtHOKidUM0Wxezt0R1xk02Zk2MIo0feDMpjwgRBFL/Ehc1uqWKYYVdjuRdH4aV3C/pMvnOS9JV6fzd0q6rM/yZ/X/xcxW3zJy3blrI2WQLd/PA7YBnZbD5wDfl3QOZWvhSV1lnwP8bL6bRMQ1wDUAz9IGz921xrWUcfK6R2k/Z4JHXrieY449a2YZ8C7ZVNDad5DG3n0UjzxKcfBgPx/j3D2cLNSarSBTQdb9/06diray6akGp4/3vcrl8ruXpHEEH6Ucc7AHuF3S9ojY3VXsAuDUtL0Y+CZwWo/lfwP4x/RqNjTLyHXnro2cgVW+I+Iu4PjOe0kPAGdHxKOStgOflvRh4ETKv1DfHVRsNjqyqYLGM+Le/cdz29HrOTo7yCvW38Mrzr0Hzl34unsmNvORH7+SZ25+Lid+eyOthx6bOfngwteBc3dNiJmW7wKV1e/uqQa7pylcxu8LI2Kl3U7OAe6PiJ8ASPoM5XiD7grJxcANEREqVxTKgQMRMblUeWCHpGMlbY6Ih1cSqNkK9ZXrOHdtBFVW+ZZ0E3AesEnSHuB9EXHtfGUj4oeSPkf5l6sNXBYR7iBps8TUJLplF1tvgX3AB3l+z9c2n30C+p1NFBc9yZ/9yec5/8iZbh+NzbPLOncNKAdcphlTaDaJRgMaItJUg1mf3U9iZbOlbAEe6nq/h0Nb+rrLbAGeSK8P91C+c89OebNh6TfXO2WcuzYyKqt8R8Trlji/dc77q4Cr+vmMp3j80a/HF/4vvd0EPNrP9TUz6vFDnb/Dw8Any+1Vh549ufvNgHO3vs9sLcW2q9zuA/4DgH8H4I7O+UfStrCTFzv5FI9/5evF5zYtUuQISTu73l+Tuil19DK2QPPsd5dZrPxCZcqC0luBt6a3E/PNgT9EdcvTusUD9YvpVxY512+uL1Sm7nkL9ftzqVs8UL+YFsvdno30CpcR8UudfUk7I+LsYcazEqMePxwe32FQOrlb52fm2FZPRBwyb3yfehlb0F1mD3BsV5mlyi9UBpg9XqFuz97xLK1uMc35H825+s31hcrUOm+hfjHVLR6oX0xL5G7PBjbbiZmZLdvtwKmStkkao1zYafucMtuBN6gcFZwBDcoW9SXLS3oRsM99Zq0G+sp1566NopFu+TYzWwsioi3pbcBXKCvV16XxBpem8x8HvgRcSLnQ0zOUC0L1U/7Ng/1WZodaZq47d22kHE6V72uWLlJrox4/HB7fYdDq/MwcW41ExJcoKx3dxz7etR/AZXMuu7rP8r2o27N3PEurW0yLxrPMXF/RZw5J3WKqWzxQv5hWJR7FKq7gZmZmZmZmC3OfbzMzMzOzARn5yvdSy9DWkaTrJO3tnvZI0gZJX5N0X3o9bpgxLkbSSZK+JelHkn4o6U/T8ZH5DsNWt7yta0461wZnqZxMg9vmXdK7qnzuIabXp1julHSrpBd0nXtA0l2Sdq3WDAU9xHOepH3pM3dJuqLXayuK591dsdwtKZe0IZ2r4vkc8nNkzvlKcqhuuVu3vO0xJufuIHM3IkZ2oxyM8WPgFGCMclre04YdVw9xvxw4C7i769gHgcvT/uXAB4Yd5yLxbwbOSvtHA/dSLmM9Mt9hyM+vdnlb15x0rg3sOS+Zk5QD3L5MOcfyi4Dber22wpheAhyX9i/oxJTePwBsGvAzOg/4z+VcW0U8c8q/BvhmVc8n3fOQnyNV51DdcrdueevcrWfujnrL9/QytBExCXSWoa21iLgZ+MWcwxdTLgNDev29gQbVh4h4OCK+n/afAn5EubrYyHyHIatd3tY1J51rA9NLTk4v6R0RO4BjJW3u8dpKYoqIWyPi8fR2B+V8z1VZyfes4hn1e8/XATet8DMXtcDPkW5V5FDdcrduedtTTBVdu1r3POxyd9Qr3wstMTuKTog0T2l6PX7I8fRE0lbghcBtjOh3GIJRydta/Xk61yrVS04uVKaqfO73vm+hbJnqCOCrkr6ncqXDQcXzYkl3SPqypNP7vLaKeJB0JHA+8C9dh1f7+fSiihyqW+7WLW/7icm5u7BVzaFRn2qw5+WRbfVJOoryL8Q7I+JJab4/DpuH87ZPzrXKrWRJ76ryuef7SnolZSXmZV2HXxoRP5N0PPA1Sfek1q0q4/k+cHJE7Jd0IfBF4NQer60ino7XALdERHfL3mo/n15UkUN1y9265W2vMTl3F7eqOTTqLd89L488An6efoVBet075HgWJalFWRm6MSL+NR0eqe8wRKOSt7X483SuDcRKlvSuKp97uq+kM4BPABdHxGOd4xHxs/S6F/g3yl8PVxpPRDwZEfvT/peAlqRNvX6X1Y6nyyXM+bV9Bc+nF1XkUN1yt25521NMzt0lrW4OLdUpvM4bZcv9T4BtzHR0P33YcfUY+1ZmD277ELMHkH1w2DEuEruAG4B/mHN8ZL7DkJ9fLfO2jjnpXBvYc14yJ4FXMXvA0Xd7vbbCmJ5LucrhS+YcXw8c3bV/K3D+AOJ5NjPrZ5wDPJie16o/o17vCRxD2Zd1fZXPp+ves36OVJ1DdcvduuWtc7eeubviYIe9UY5AvZdytOl7hx1PjzHfBDwMTFH+X9NbgI3AN4D70uuGYce5SPwvo/y1yp3ArrRdOErfYdhb3fK2rjnpXBvosz4kJ4FLgUvTvoCPpvN3AWcvdu2AYvoE8HhXbuxMx09J/wjeAfxwtWLqIZ63pc+7g3Ig3UsWu7bqeNL7NwGfmXNdVc9nvp8jledQ3XK3bnnr3K1f7nqFSzMzMzOzARn1Pt9mZmZmZiPDlW8zMzMzswFx5dvMzMzMbEBc+TYzMzMzGxBXvs3MzMzMBsSV7xqRlEvaJeluSZ9Py6r2c/2Jkr6Q9s9Mq1R1zl0k6fLVjtkOH84/MzOz6nmqwRqRtD8ijkr7NwLfi4gPL/Neb6Kch/JtqxiiHcacf2ZmZtVzy3d9fRv4ZUkbJH1R0p2SdqQlaZH0itRKuUvSDyQdLWlrarUcA/4aeG06/1pJb5L0kXTtyZK+ke75DUnPTcevl3S1pFsl/UTSH6bjmyXd3NUqeu6QnokNjvPPzMysAq5815CkJnAB5SpKfwX8ICLOAP6ScqltgHcBl0XEmcC5wIHO9RExCVwBfDYizoyIz875iI8AN6R73ghc3XVuM+Wqgq8G3p+O/RHwlfRZL6BckcsOU84/MzOz6rjyXS/rJO0CdgIPAtdSVkQ+BRAR3wQ2SjoGuAX4sKR3AMdGRLuPz3kx8Om0/6n0GR1fjIgiInYDJ6RjtwNvlnQl8PyIeGpZ387qzvlnZmZWMVe+6+VAaik8MyLenloQNU+5iIj3A38MrAN2SPrVFXxud8f/ia59pQ+7GXg58FPgU5LesILPsvpy/pmZmVXMle/6uxl4PYCk84BHI+JJSc+LiLsi4gOULZVzKz9PAUcvcM9bgUvS/uuB7ywWgKSTgb0R8U+UraFnLeeL2Ehy/pmZma2i5rADsCVdCfyzpDuBZ4A3puPvlPRKIAd2A1+m7C/b8S3g8tSN4G/n3PMdwHWS3g08Arx5iRjOA94taQrYD7jlce24EuefmZnZqvFUg2ZmZmZmA+JuJ2ZmZmZmA+LKt5mZmZnZgLjybWZmZmY2IK58m5mZmZkNiCvfZmZmZmYD4sq3mZmZmdmAuPJtZmZmZjYgrnybmZmZmQ3I/wMVbtmE0Wr0OAAAAABJRU5ErkJggg==\n", "text/plain": [ - "
" + "1" ] }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -275,7 +376,8 @@ "trainer = Trainer(\n", " max_epochs=args.epochs,\n", " gpus=args.gpus,\n", - " check_val_every_n_epoch=8,\n", + " check_val_every_n_epoch=1,\n", + " val_percent_check=0.1\n", " #profiler=profiler,\n", " #fast_dev_run=True,\n", " # auto_scale_batch_size='power'\n", @@ -293,16 +395,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "version_0\r\n" + ] + } + ], "source": [ "!ls lightning_logs" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -311,11 +421,44 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": { "scrolled": false }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Reusing TensorBoard on port 6006 (pid 3827), started 3:15:02 ago. (Use '!kill 3827' to kill it.)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "%tensorboard --logdir lightning_logs" ] @@ -329,9 +472,55 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "NeedlemanWunschAligner(\n", + " (lm): BiLM(\n", + " (embed): Embedding(22, 21, padding_idx=21)\n", + " (dropout): Dropout(p=0, inplace=False)\n", + " (rnn): ModuleList(\n", + " (0): LSTM(21, 1024, batch_first=True)\n", + " (1): LSTM(1024, 1024, batch_first=True)\n", + " )\n", + " (linear): Linear(in_features=1024, out_features=21, bias=True)\n", + " )\n", + " (match_embedding): EmbedLinear(\n", + " (embed): Embedding(21, 512, padding_idx=20)\n", + " (proj): Linear(in_features=512, out_features=512, bias=True)\n", + " )\n", + " (gap_embedding): EmbedLinear(\n", + " (embed): Embedding(21, 512, padding_idx=20)\n", + " (proj): Linear(in_features=512, out_features=512, bias=True)\n", + " )\n", + " (match_mixture): MultiheadProduct(\n", + " (multilinear): MultiLinear(\n", + " (multi_output): ModuleList(\n", + " (0): Linear(in_features=512, out_features=512, bias=True)\n", + " )\n", + " )\n", + " (linear): Linear(in_features=1, out_features=1, bias=True)\n", + " )\n", + " (gap_mixture): MultiheadProduct(\n", + " (multilinear): MultiLinear(\n", + " (multi_output): ModuleList(\n", + " (0): Linear(in_features=512, out_features=512, bias=True)\n", + " )\n", + " )\n", + " (linear): Linear(in_features=1, out_features=1, bias=True)\n", + " )\n", + " (nw): NeedlemanWunschDecoder()\n", + ")" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "model.aligner" ] @@ -345,18 +534,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ls: cannot access 'lightning_logs/version_3/checkpoints': No such file or directory\r\n" + ] + } + ], "source": [ "!ls lightning_logs/version_3/checkpoints" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: 'lightning_logs/version_3/checkpoints/epoch=49.ckpt'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mcheckpoint_dir\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'lightning_logs/version_3/checkpoints'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mpath\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf'{checkpoint_dir}/epoch=49.ckpt'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mLightningAligner\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_from_checkpoint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/core/saving.py\u001b[0m in \u001b[0;36mload_from_checkpoint\u001b[0;34m(cls, checkpoint_path, map_location, hparams_file, tags_csv, *args, **kwargs)\u001b[0m\n\u001b[1;32m 140\u001b[0m \u001b[0mcheckpoint\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpl_load\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcheckpoint_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmap_location\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 141\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 142\u001b[0;31m \u001b[0mcheckpoint\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpl_load\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcheckpoint_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mstorage\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mloc\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstorage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 143\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 144\u001b[0m \u001b[0;31m# add the hparams from csv file to checkpoint\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/utilities/cloud_io.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(path_or_url, map_location)\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0murlparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscheme\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m''\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mPath\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrive\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# no scheme or with a drive letter\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmap_location\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhub\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_state_dict_from_url\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_url\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmap_location\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/torch/serialization.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(f, map_location, pickle_module, **pickle_load_args)\u001b[0m\n\u001b[1;32m 523\u001b[0m \u001b[0mpickle_load_args\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'encoding'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'utf-8'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 524\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 525\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0m_open_file_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'rb'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mopened_file\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 526\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m_is_zipfile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopened_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 527\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0m_open_zipfile_reader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mopened_zipfile\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/torch/serialization.py\u001b[0m in \u001b[0;36m_open_file_like\u001b[0;34m(name_or_buffer, mode)\u001b[0m\n\u001b[1;32m 210\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_open_file_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 211\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m_is_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname_or_buffer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 212\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_open_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 213\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 214\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m'w'\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.8/site-packages/torch/serialization.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, name, mode)\u001b[0m\n\u001b[1;32m 191\u001b[0m \u001b[0;32mclass\u001b[0m \u001b[0m_open_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_opener\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 192\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 193\u001b[0;31m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_open_file\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 194\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 195\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__exit__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'lightning_logs/version_3/checkpoints/epoch=49.ckpt'" + ] + } + ], "source": [ "from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint\n", "checkpoint_dir = 'lightning_logs/version_3/checkpoints'\n", From 2e5f326c656e829409e1532328d4bba3e0212ef5 Mon Sep 17 00:00:00 2001 From: mortonjt Date: Sun, 30 Aug 2020 17:48:24 -0600 Subject: [PATCH 29/30] fixing local alignments --- deepblast/alignment.py | 17 +- ipynb/simulation-benchmark.ipynb | 127 +++++++++++-- ipynb/struct-benchmark.ipynb | 303 +++++-------------------------- 3 files changed, 169 insertions(+), 278 deletions(-) diff --git a/deepblast/alignment.py b/deepblast/alignment.py index 34d8129..168cf5b 100644 --- a/deepblast/alignment.py +++ b/deepblast/alignment.py @@ -91,12 +91,11 @@ def forward(self, x, order): gx, _, gy, _ = unpack_sequences(self.gap_embedding(x), order) # Obtain theta through an inner product across latent dimensions theta = self.match_mixture(zx, zy) - gap = self.gap_mixture(gx, gy) - #G = self.gap_mixture(gx, gy) + #A = self.gap_mixture(gx, gy) # zero out first row and first column for local alignments - #A = torch.zeros(G.shape).to(G.device) - #A[:, 1:, 1:] = G[:, 1:, 1:] - + G = self.gap_mixture(gx, gy) + A = torch.zeros(G.shape).to(G.device) + A[:, 1:, 1:] += G[:, 1:, 1:] aln = self.nw.decode(theta, A) return aln, theta, A @@ -106,10 +105,10 @@ def traceback(self, x, order): zx, _, zy, _ = unpack_sequences(self.match_embedding(x), order) gx, xlen, gy, ylen = unpack_sequences(self.gap_embedding(x), order) match = self.match_mixture(zx, zy) - gap = self.gap_mixture(gx, gy) - # G = self.gap_mixture(gx, gy) - # gap = torch.zeros(G.shape).to(G.device) - # gap[:, 1:, 1:] = G[:, 1:, 1:] + # gap = self.gap_mixture(gx, gy) + A = self.gap_mixture(gx, gy) + gap = torch.zeros(A.shape).to(A.device) + gap[:, 1:, 1:] += A[:, 1:, 1:] # zero out first row and first column for local alignments # L = gx.shape[1] diff --git a/ipynb/simulation-benchmark.ipynb b/ipynb/simulation-benchmark.ipynb index 2ee6929..583db3f 100644 --- a/ipynb/simulation-benchmark.ipynb +++ b/ipynb/simulation-benchmark.ipynb @@ -147,17 +147,17 @@ "args = [\n", " '--train-pairs', f'{os.getcwd()}/data/train.txt',\n", " '--test-pairs', f'{os.getcwd()}/data/test.txt',\n", - " '--valid-pairs', f'{os.getcwd()}/data/valid.txt',\n", + " '--valid-pairs', f'{os.getcwd()}/data/train.txt',\n", " '--output-directory', output_dir,\n", - " '--epochs', '32',\n", + " '--epochs', '128',\n", " '--batch-size', '20', \n", " '--num-workers', '30',\n", - " '--learning-rate', '1e-3', \n", + " '--learning-rate', '1e-4', \n", " '--layers', '4',\n", - " '--heads', '8',\n", + " '--heads', '4',\n", " '--visualization-fraction', '1',\n", " '--loss', 'l2_cross_entropy',\n", - " '--scheduler', 'cosine',\n", + " '--scheduler', 'steplr',\n", " '--gpus', '1'\n", "]\n", "parser = argparse.ArgumentParser(add_help=False)\n", @@ -235,13 +235,13 @@ "\n", " | Name | Type | Params\n", "---------------------------------------------------\n", - "0 | aligner | NeedlemanWunschAligner | 30 M \n" + "0 | aligner | NeedlemanWunschAligner | 48 M \n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "e1da30727bf64b89adb0dd3f39659e83", + "model_id": "", "version_major": 2, "version_minor": 0 }, @@ -255,7 +255,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "20522bb71e8a4f4d933232b7aa6387dc", + "model_id": "a15504f7cc40479f86df76fd73aed1e2", "version_major": 2, "version_minor": 0 }, @@ -308,6 +308,90 @@ "metadata": {}, "output_type": "display_data" }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stdout", "output_type": "stream", @@ -315,6 +399,14 @@ "\n" ] }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/juermieboop/miniconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/utilities/distributed.py:25: UserWarning: Detected KeyboardInterrupt, attempting graceful shutdown...\n", + " warnings.warn(*args, **kwargs)\n" + ] + }, { "data": { "text/plain": [ @@ -333,7 +425,8 @@ " max_epochs=args.epochs,\n", " gpus=args.gpus,\n", " check_val_every_n_epoch=10,\n", - " gradient_clip_val=10\n", + " gradient_clip_val=10,\n", + " # val_percent_check=0.25 \n", " # profiler=profiler,\n", " # fast_dev_run=True,\n", " # auto_scale_batch_size='power'\n", @@ -358,7 +451,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "version_0 version_1 version_2\r\n" + "version_0 version_11\tversion_14 version_4 version_7\r\n", + "version_1 version_12\tversion_2 version_5 version_8\r\n", + "version_10 version_13\tversion_3 version_6 version_9\r\n" ] } ], @@ -385,7 +480,7 @@ { "data": { "text/plain": [ - "Reusing TensorBoard on port 6006 (pid 3827), started 3:28:24 ago. (Use '!kill 3827' to kill it.)" + "Reusing TensorBoard on port 6006 (pid 3827), started 7:41:35 ago. (Use '!kill 3827' to kill it.)" ] }, "metadata": {}, @@ -395,11 +490,11 @@ "data": { "text/html": [ "\n", - " \n", "