Skip to content

Commit

Permalink
update docstrings and move samples=[samples] to predict_dataloader()
Browse files Browse the repository at this point in the history
only checks for path/str file path once, in predict_dataloader(), rather than in several places
  • Loading branch information
sammlapp committed Sep 10, 2024
1 parent 3f7af4a commit 70301d5
Showing 1 changed file with 41 additions and 39 deletions.
80 changes: 41 additions & 39 deletions opensoundscape/ml/cnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,12 +455,11 @@ def train_dataloader(
train_loader samples batches of images + labels from training set
Args:
samples: list of files or pd.DataFrame with multi-index ['file','start_time','end_time']
Args: see self.train_dataloader_cls docstring for arguments
**kwargs: any arguments to pass to the DataLoader __init__
Note: some arguments are fixed and should not be passed in kwargs:
- shuffle=True: shuffle samples for training
- bypass_augmentations=False: apply augmentations to training samples
Note: some arguments are fixed and should not be passed in kwargs:
- shuffle=True: shuffle samples for training
- bypass_augmentations=False: apply augmentations to training samples
"""
return self.train_dataloader_cls(
Expand All @@ -478,7 +477,17 @@ def train_dataloader(
)

def predict_dataloader(self, samples, collate_fn=collate_audio_samples, **kwargs):
"""generate dataloader for inference (predict/validate/test)"""
"""generate dataloader for inference (predict/validate/test)
Args: see self.inference_dataloader_cls docstring for arguments
**kwargs: any arguments to pass to the DataLoader __init__
Note: these arguments are fixed and should not be passed in kwargs:
- shuffle=False: retain original sample order
"""
# for convenience, convert str/pathlib.Path to list of length 1
if isinstance(samples, (str, Path)):
samples = [samples]

return self.inference_dataloader_cls(
samples=samples,
preprocessor=self.preprocessor,
Expand Down Expand Up @@ -652,7 +661,6 @@ def __init__(
# These metrics are a good starting point
# for single and multi-target classification
# User can add/remove metrics as desired.
# TODO: should re-initialize if number of classes changes
self._init_torch_metrics()

### Logging ###
Expand Down Expand Up @@ -847,18 +855,12 @@ def __init__(self, *args, **kwargs):
"""track number of trained epochs"""

### metrics ###
self.prediction_threshold = 0.5 # used for threshold-specific metrics
self.loss_hist = {}
"""dictionary of epoch:mean batch loss during training"""
self.train_metrics = {}
self.valid_metrics = {}

### network device ###
# automatically gpu (default is 'cuda:0') if available
# can set after init, eg model.device='cuda:1'
# network and samples are moved to device during training/inference
# devices could be 'cuda:0', torch.device('cuda'), torch.device('cpu'), torch.device('mps') etc
self.device = _gpu_if_available()
self.device = _gpu_if_available() # device to use for training and inference

def _log(self, message, level=1):
txt = str(message)
Expand Down Expand Up @@ -960,10 +962,6 @@ def predict(
for that sample will be np.nan
"""
# for convenience, convert str/pathlib.Path to list of length 1
if isinstance(samples, (str, Path)):
samples = [samples]

# create dataloader to generate batches of AudioSamples
dataloader = self.predict_dataloader(
samples,
Expand Down Expand Up @@ -1099,12 +1097,8 @@ def generate_samples(
show_tensor_grid(tensors,columns=3)
```
"""
# allow passing a single file path (str or pathlib.Path) for convenience
if isinstance(samples, (str, Path)):
samples = [samples]

# create dataloader to generate batches of AudioSamples
dataloader = self.inference_dataloader_cls(samples, self.preprocessor, **kwargs)
dataloader = self.predict_dataloader(samples, **kwargs)

# move model to device
try:
Expand Down Expand Up @@ -2080,24 +2074,31 @@ def embed(
Generate embeddings (intermediate layer outputs) for audio files/clips
Note: to capture embeddings on multiple layers, use self.__call__ with intermediate_layers
argument directly. This wrapper only allows one target_layer.
Note: Output can be n-dimensional array (return_dfs=False) or pd.DataFrame with multi-index
like .predict() (return_dfs=True). If avgpool=False, return_dfs is forced to False since we
can't create a DataFrame with >2 dimensions.
Args:
samples: (same as CNN.predict())
target_layers: layers from self.model._modules to extract outputs from
- if None, attempts to use self.model.embedding_layer as default
samples: same as CNN.predict(): list of file paths, OR pd.DataFrame with index
containing audio file paths, OR a pd.DataFrame with multi-index (file, start_time,
end_time)
target_layers: layers from self.model._modules to
extract outputs from - if None, attempts to use self.model.embedding_layer as
default
progress_bar: bool, if True, shows a progress bar with tqdm [default: True]
avgpool: bool, if True, applies global average pooling to embeddings [default: True]
return_preds: bool, if True, returns two outputs (embeddings, logits)
avgpool: bool, if True, applies global average pooling to intermediate outputs
i.e. averages across all dimensions except first to get a 1D vector per sample
return_dfs: bool, if True, returns embeddings as pd.DataFrame with multi-index like .predict()
if False, returns np.array of embeddings [default: True]. If avg_pool=False, overrides
to return np.array since we can't have a df with >2 dimensions
return_dfs: bool, if True, returns embeddings as pd.DataFrame with multi-index like
.predict(). if False, returns np.array of embeddings [default: True]. If
avg_pool=False, overrides to return np.array since we can't have a df with >2
dimensions
kwargs are passed to self.predict_dataloader()
Returns:
if return_preds is False, returns `embeddings` , and np.array of shape [n_samples, ...]
if return_preds is True, returns a tuple:
`(embeddings, preds)` where `preds` is the raw model output (e.g. logits, no activation layer)
Returns: (embeddings, preds) if return_preds=True or embeddings if return_preds=False
types are pd.DataFrame if return_dfs=True, or np.array if return_dfs=False
"""
if not avgpool: # cannot create a DataFrame with >2 dimensions
Expand All @@ -2118,10 +2119,6 @@ def embed(
target_layer in self.network.modules()
), f"target_layers must be in self.model.modules(), but {target_layer} is not."

# for convenience, convert `samples` str/pathlib.Path to list of length 1
if isinstance(samples, (str, Path)):
samples = [samples]

# create dataloader to generate batches of AudioSamples
dataloader = self.predict_dataloader(samples, **kwargs)

Expand Down Expand Up @@ -2159,6 +2156,11 @@ def device(self, device):
"""
Set the device to use in train/predict, casting strings to torch.device datatype
Automatically gpu (default is 'cuda:0' or 'mps') if available. Can set after init, eg
model.device='cuda:1'. Network and samples are moved to device during training/inference.
Devices could be 'cuda:0', torch.device('cuda'), torch.device('cpu'), torch.device('mps')
etc
Args:
device: a torch.device object or str such as 'cuda:0', 'mps', 'cpu'
"""
Expand Down

0 comments on commit 70301d5

Please sign in to comment.