diff --git a/opensoundscape/ml/cnn.py b/opensoundscape/ml/cnn.py index 7d545ca0..49b8e97e 100644 --- a/opensoundscape/ml/cnn.py +++ b/opensoundscape/ml/cnn.py @@ -1819,9 +1819,9 @@ def hook(module, input, output): # aggregate across batches # note that shapes of elements in intermediate_outputs may vary # (so we don't make one combined np.array) + # careful with squeezing: if we have a batch size of 1, we don't want to squeeze out the batch dimension intermediate_outputs = [ - torch.vstack(x).squeeze().detach().cpu().numpy() - for x in intermediate_outputs + torch.vstack(x).detach().cpu().numpy() for x in intermediate_outputs ] # replace scores with nan for samples that failed in preprocessing @@ -2073,6 +2073,7 @@ def embed( progress_bar=True, return_preds=False, avgpool=True, + return_dfs=True, **kwargs, ): """ @@ -2087,6 +2088,10 @@ def embed( progress_bar: bool, if True, shows a progress bar with tqdm [default: True] avgpool: bool, if True, applies global average pooling to embeddings [default: True] i.e. averages across all dimensions except first to get a 1D vector per sample + return_dfs: bool, if True, returns embeddings as pd.DataFrame with multi-index like .predict() + if False, returns np.array of embeddings [default: True]. If avg_pool=False, overrides + to return np.array since we can't have a df with >2 dimensions + kwargs are passed to self.predict_dataloader() Returns: @@ -2095,6 +2100,9 @@ def embed( `(embeddings, preds)` where `preds` is the raw model output (e.g. logits, no activation layer) """ + if not avgpool: # cannot create a DataFrame with >2 dimensions + return_dfs = False + # if target_layer is None, attempt to retrieve default target layers of network if target_layer is None: try: @@ -2125,16 +2133,20 @@ def embed( avgpool_intermediates=avgpool, ) - # put embeddings - embeddings = pd.DataFrame( - data=embeddings[0], index=dataloader.dataset.dataset.label_df.index - ) + if return_dfs: + # put embeddings in DataFrame with multi-index like .predict() + embeddings = pd.DataFrame( + data=embeddings[0], index=dataloader.dataset.dataset.label_df.index + ) + else: + embeddings = embeddings[0] if return_preds: - # put predictions in a DataFrame with same index as embeddings - preds = pd.DataFrame( - data=preds, index=dataloader.dataset.dataset.label_df.index - ) + if return_dfs: + # put predictions in a DataFrame with same index as embeddings + preds = pd.DataFrame( + data=preds, index=dataloader.dataset.dataset.label_df.index + ) return embeddings, preds return embeddings diff --git a/tests/test_cnn.py b/tests/test_cnn.py index 644f1a6c..4e391421 100644 --- a/tests/test_cnn.py +++ b/tests/test_cnn.py @@ -836,6 +836,52 @@ def test_embed(test_df): raise Exception(f"{arch} failed") from e +def test_embed_no_avgpool(test_df): + # returns arrays rather than dataframes + m = cnn.SpectrogramClassifier( + classes=[0, 1], + single_target=False, + architecture="resnet18", + sample_duration=5, + ) + embeddings = m.embed( + samples=test_df, + avgpool=False, + progress_bar=False, + target_layer=m.network.layer4, + ) + assert embeddings.shape == (2, 512, 7, 7) + + +def test_embed_return_array(test_df): + # returns arrays rather than dataframes + m = cnn.SpectrogramClassifier( + classes=[0, 1], + single_target=False, + architecture="resnet18", + sample_duration=5, + ) + embeddings = m.embed( + samples=test_df, + progress_bar=False, + target_layer=m.network.layer4, + return_dfs=False, + ) + assert embeddings.shape == (2, 512) + assert isinstance(embeddings, np.ndarray) + + +def test_embed_one_sample(train_df): + m = cnn.SpectrogramClassifier( + classes=[0, 1, 2], + single_target=False, + architecture="resnet18", + sample_duration=10, + ) + embeddings = m.embed(samples=train_df.head(1), avgpool=True, progress_bar=False) + assert embeddings.shape == (1, 512) + + def test_call_with_intermediate_layers(test_df): """test that passing intermediate_layers to SpectrogramClassifier.__call__ returns tensors of expected shape""" model = cnn.SpectrogramClassifier(