Skip to content

Commit

Permalink
add switch for embed() to return dfs or arrays
Browse files Browse the repository at this point in the history
we need to return arrays rather than dfs if we have >2d result, for instance if we preserve spatial dimension. However, returning df is a useful default for scenarios where global avgpool is enabled and we just have a 1d feature per sample. So embed() now takes return_dfs flag, if False returns np array but default is True. If avgpool is set to False, overrides return_dfs to be False so that we can return >2d output.
  • Loading branch information
sammlapp committed Sep 6, 2024
1 parent 51263bc commit d39833e
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 10 deletions.
32 changes: 22 additions & 10 deletions opensoundscape/ml/cnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -1819,9 +1819,9 @@ def hook(module, input, output):
# aggregate across batches
# note that shapes of elements in intermediate_outputs may vary
# (so we don't make one combined np.array)
# careful with squeezing: if we have a batch size of 1, we don't want to squeeze out the batch dimension
intermediate_outputs = [
torch.vstack(x).squeeze().detach().cpu().numpy()
for x in intermediate_outputs
torch.vstack(x).detach().cpu().numpy() for x in intermediate_outputs
]

# replace scores with nan for samples that failed in preprocessing
Expand Down Expand Up @@ -2073,6 +2073,7 @@ def embed(
progress_bar=True,
return_preds=False,
avgpool=True,
return_dfs=True,
**kwargs,
):
"""
Expand All @@ -2087,6 +2088,10 @@ def embed(
progress_bar: bool, if True, shows a progress bar with tqdm [default: True]
avgpool: bool, if True, applies global average pooling to embeddings [default: True]
i.e. averages across all dimensions except first to get a 1D vector per sample
return_dfs: bool, if True, returns embeddings as pd.DataFrame with multi-index like .predict()
if False, returns np.array of embeddings [default: True]. If avg_pool=False, overrides
to return np.array since we can't have a df with >2 dimensions
kwargs are passed to self.predict_dataloader()
Returns:
Expand All @@ -2095,6 +2100,9 @@ def embed(
`(embeddings, preds)` where `preds` is the raw model output (e.g. logits, no activation layer)
"""
if not avgpool: # cannot create a DataFrame with >2 dimensions
return_dfs = False

# if target_layer is None, attempt to retrieve default target layers of network
if target_layer is None:
try:
Expand Down Expand Up @@ -2125,16 +2133,20 @@ def embed(
avgpool_intermediates=avgpool,
)

# put embeddings
embeddings = pd.DataFrame(
data=embeddings[0], index=dataloader.dataset.dataset.label_df.index
)
if return_dfs:
# put embeddings in DataFrame with multi-index like .predict()
embeddings = pd.DataFrame(
data=embeddings[0], index=dataloader.dataset.dataset.label_df.index
)
else:
embeddings = embeddings[0]

if return_preds:
# put predictions in a DataFrame with same index as embeddings
preds = pd.DataFrame(
data=preds, index=dataloader.dataset.dataset.label_df.index
)
if return_dfs:
# put predictions in a DataFrame with same index as embeddings
preds = pd.DataFrame(
data=preds, index=dataloader.dataset.dataset.label_df.index
)
return embeddings, preds
return embeddings

Expand Down
46 changes: 46 additions & 0 deletions tests/test_cnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -836,6 +836,52 @@ def test_embed(test_df):
raise Exception(f"{arch} failed") from e


def test_embed_no_avgpool(test_df):
# returns arrays rather than dataframes
m = cnn.SpectrogramClassifier(
classes=[0, 1],
single_target=False,
architecture="resnet18",
sample_duration=5,
)
embeddings = m.embed(
samples=test_df,
avgpool=False,
progress_bar=False,
target_layer=m.network.layer4,
)
assert embeddings.shape == (2, 512, 7, 7)


def test_embed_return_array(test_df):
# returns arrays rather than dataframes
m = cnn.SpectrogramClassifier(
classes=[0, 1],
single_target=False,
architecture="resnet18",
sample_duration=5,
)
embeddings = m.embed(
samples=test_df,
progress_bar=False,
target_layer=m.network.layer4,
return_dfs=False,
)
assert embeddings.shape == (2, 512)
assert isinstance(embeddings, np.ndarray)


def test_embed_one_sample(train_df):
m = cnn.SpectrogramClassifier(
classes=[0, 1, 2],
single_target=False,
architecture="resnet18",
sample_duration=10,
)
embeddings = m.embed(samples=train_df.head(1), avgpool=True, progress_bar=False)
assert embeddings.shape == (1, 512)


def test_call_with_intermediate_layers(test_df):
"""test that passing intermediate_layers to SpectrogramClassifier.__call__ returns tensors of expected shape"""
model = cnn.SpectrogramClassifier(
Expand Down

0 comments on commit d39833e

Please sign in to comment.