Merge branch 'develop' into issue_749_crowsetta

kitzeslab · May 23, 2024 · f5e784e · f5e784e
2 parents ab7e754 + cf0de77
commit f5e784e
Show file tree

Hide file tree

Showing 19 changed files with 327 additions and 167 deletions.
diff --git a/opensoundscape/annotations.py b/opensoundscape/annotations.py
@@ -882,13 +882,12 @@ def one_hot_labels_like(
     def one_hot_clip_labels(
         self,
         clip_duration,
-        clip_overlap,
         min_label_overlap,
         min_label_fraction=1,
         full_duration=None,
         class_subset=None,
-        final_clip=None,
         audio_files=None,
+        **kwargs,
     ):
         """Generate one-hot labels for clips of fixed duration
 
@@ -898,7 +897,6 @@ def one_hot_clip_labels(
 
         Args:
             clip_duration (float):  The duration in seconds of the clips
-            clip_overlap (float):   The overlap of the clips in seconds [default: 0]
             min_label_overlap: minimum duration (seconds) of annotation within the
                 time interval for it to count as a label. Note that any annotation
                 of length less than this value will be discarded.
@@ -919,19 +917,10 @@ def one_hot_clip_labels(
                 of `audio` for each row of self.df
             class_subset: list of classes for one-hot labels. If None, classes will
                 be all unique values of self.df['annotation']
-            final_clip (str): Behavior if final_clip is less than clip_duration
-                seconds long. By default, discards remaining time if less than
-                clip_duration seconds long [default: None].
-                Options:
-                - None: Discard the remainder (do not make a clip)
-                - "extend": Extend the final clip beyond full_duration to reach
-                    clip_duration length
-                - "remainder": Use only remainder of full_duration
-                    (final clip will be shorter than clip_duration)
-                - "full": Increase overlap with previous clip to yield a
-                    clip with clip_duration length
             audio_files: list of audio file paths (as str or pathlib.Path)
                 to create clips for. If None, uses self.audio_files. [default: None]
+            **kwargs (such as overlap_fraction, final_clip) are passed to
+                opensoundscape.utils.generate_clip_times_df() via make_clip_df()
         Returns:
             dataframe with index ['file','start_time','end_time'] and columns=classes
         """
@@ -960,9 +949,8 @@ def one_hot_clip_labels(
                 clip_df = make_clip_df(
                     files=[f for f in audio_files if f == f],  # remove NaN if present
                     clip_duration=clip_duration,
-                    clip_overlap=clip_overlap,
-                    final_clip=final_clip,
                     raise_exceptions=True,  # raise exceptions from librosa.duration(f)
+                    **kwargs,
                 )
             except GetDurationError as exc:
                 raise GetDurationError(
@@ -975,10 +963,7 @@ def one_hot_clip_labels(
         else:  # use fixed full_duration for all files
             # make a clip df, will be re-used for each file
             clip_df_template = generate_clip_times_df(
-                full_duration=full_duration,
-                clip_duration=clip_duration,
-                clip_overlap=clip_overlap,
-                final_clip=final_clip,
+                full_duration=full_duration, clip_duration=clip_duration, **kwargs
             )
             # make a clip df for all files
             clip_df = pd.concat([clip_df_template] * len(audio_files))

diff --git a/opensoundscape/audio.py b/opensoundscape/audio.py
@@ -873,41 +873,24 @@ def save(
             else:  # we can write metadata for WAV and AIFF
                 _write_metadata(self.metadata, metadata_format, path)
 
-    def split(self, clip_duration, clip_overlap=0, final_clip=None):
+    def split(self, clip_duration, **kwargs):
         """Split Audio into even-lengthed clips
 
         The Audio object is split into clips of a specified duration and overlap
 
         Args:
             clip_duration (float):  The duration in seconds of the clips
-            clip_overlap (float):   The overlap of the clips in seconds [default: 0]
-            final_clip (str):       Behavior if final_clip is less than clip_duration
-                seconds long. By default, discards remaining audio if less than
-                clip_duration seconds long [default: None].
-                Options:
-                - None: Discard the remainder (do not make a clip)
-                - "extend": Extend the final clip with silence to reach
-                    clip_duration length
-                - "remainder": Use only remainder of Audio (final clip will be
-                    shorter than clip_duration)
-                - "full": Increase overlap with previous clip to yield a clip with
-                    clip_duration length
+            **kwargs (such as clip_overlap_fraction, final_clip) are passed to
+                opensoundscape.utils.generate_clip_times_df()
+                - extends last Audio object if user passes final_clip == "extend"
         Returns:
             - audio_clips: list of audio objects
             - dataframe w/columns for start_time and end_time of each clip
         """
-        if not final_clip in ["remainder", "full", "extend", None]:
-            raise ValueError(
-                f"final_clip must be 'remainder', 'full', 'extend',"
-                f"or None. Got {final_clip}."
-            )
 
         duration = self.duration
         clip_df = generate_clip_times_df(
-            full_duration=duration,
-            clip_duration=clip_duration,
-            clip_overlap=clip_overlap,
-            final_clip=final_clip,
+            full_duration=duration, clip_duration=clip_duration, **kwargs
         )
 
         clips = [None] * len(clip_df)
@@ -918,17 +901,17 @@ def split(self, clip_duration, clip_overlap=0, final_clip=None):
             audio_clip = self.trim(start, end)
 
             # Extend the final clip if necessary
-            if end > duration and final_clip == "extend":
-                audio_clip = audio_clip.extend_to(clip_duration)
+            if "final_clip" in kwargs.keys():
+                if end > duration and kwargs["final_clip"] == "extend":
+                    audio_clip = audio_clip.extend_to(clip_duration)
 
             # Add clip to list of clips
             clips[idx] = audio_clip
 
         if len(clips) == 0:
             warnings.warn(
                 f"Given Audio object with duration of `{duration}` "
-                f"seconds and `clip_duration={clip_duration}` but "
-                f" `final_clip={final_clip}` produces no clips. "
+                f"seconds and `clip_duration={clip_duration}`, produces no clips. "
                 f"Returning empty list."
             )
 

diff --git a/opensoundscape/ml/__init__.py b/opensoundscape/ml/__init__.py
@@ -7,3 +7,10 @@
 from . import sampling
 from . import utils
 from . import bioacoustics_model_zoo
+import torch.multiprocessing
+
+# using 'file_system' avoids errors with "Too many open files",
+# "Pin memory thread exited unexpectedly", and RuntimeError('received %d items of ancdata')
+# when using parallelized DataLoader. This is the recommended solution according to
+# https://github.com/pytorch/pytorch/issues/11201#issuecomment-421146936
+torch.multiprocessing.set_sharing_strategy("file_system")
diff --git a/opensoundscape/ml/cnn.py b/opensoundscape/ml/cnn.py
@@ -20,7 +20,7 @@
 
 import opensoundscape
 from opensoundscape.ml import cnn_architectures
-from opensoundscape.ml.utils import apply_activation_layer
+from opensoundscape.ml.utils import apply_activation_layer, check_labels
 from opensoundscape.preprocess.preprocessors import SpectrogramPreprocessor
 from opensoundscape.ml.loss import (
     BCEWithLogitsLoss_hot,
@@ -43,6 +43,8 @@
     multi_target_metrics,
 )
 
+import warnings
+
 
 class BaseClassifier(torch.nn.Module):
     """
@@ -105,7 +107,10 @@ def predict(
         num_workers=0,
         activation_layer=None,
         split_files_into_clips=True,
-        overlap_fraction=0,
+        clip_overlap=None,
+        clip_overlap_fraction=None,
+        clip_step=None,
+        overlap_fraction=None,
         final_clip=None,
         bypass_augmentations=True,
         invalid_samples_log=None,
@@ -145,10 +150,9 @@ def predict(
             split_files_into_clips:
                 If True, internally splits and predicts on clips from longer audio files
                 Otherwise, assumes each row of `samples` corresponds to one complete sample
-            overlap_fraction: fraction of overlap between consecutive clips when
-                predicting on clips of longer audio files. For instance, 0.5
-                gives 50% overlap between consecutive clips.
-            final_clip: see `opensoundscape.utils.generate_clip_times_df`
+            clip_overlap_fraction, clip_overlap, clip_step, final_clip:
+                see `opensoundscape.utils.generate_clip_times_df`
+            overlap_fraction: deprecated alias for clip_overlap_fraction
             bypass_augmentations: If False, Actions with
                 is_augmentation==True are performed. Default True.
             invalid_samples_log: if not None, samples that failed to preprocess
@@ -188,7 +192,7 @@ def predict(
             for that sample will be np.nan
 
         """
-        # for convenience, convert str/pathlib.Path to list
+        # for convenience, convert str/pathlib.Path to list of length 1
         if isinstance(samples, (str, Path)):
             samples = [samples]
 
@@ -198,6 +202,9 @@ def predict(
             self.preprocessor,
             split_files_into_clips=split_files_into_clips,
             overlap_fraction=overlap_fraction,
+            clip_overlap=clip_overlap,
+            clip_overlap_fraction=clip_overlap_fraction,
+            clip_step=clip_step,
             final_clip=final_clip,
             bypass_augmentations=bypass_augmentations,
             batch_size=batch_size,
@@ -231,7 +238,7 @@ def predict(
             # Log a table of preprocessed samples to wandb
             wandb_session.log(
                 {
-                    "Samples / Preprocessed samples": wandb_table(
+                    "Peprocessed_samples": wandb_table(
                         dataloader.dataset.dataset,
                         self.wandb_logging["n_preview_samples"],
                     )
@@ -281,9 +288,8 @@ def predict(
                     classes_to_extract=[c],
                     drop_labels=True,
                     gradcam_model=self if self.wandb_logging["gradcam"] else None,
-                    raise_exceptions=True,  # TODO back to false when done debugging
                 )
-                wandb_session.log({f"Samples / Top scoring [{c}]": table})
+                wandb_session.log({f"Top_scoring_{c.replace(' ','_')}": table})
 
         if return_invalid_samples:
             return score_df, invalid_samples
@@ -365,8 +371,16 @@ def eval(self, targets, scores, logging_offset=0):
             scores: continuous values in 0/1 for each sample and class
             logging_offset: modify verbosity - for example, -1 will reduce
                 the amount of printing/logging by 1 level
+
+        Raises:
+            AssertionError: if targets are outside of range [0,1]
         """
 
+        # check for invalid label values
+        assert (
+            targets.max(axis=None) <= 1 and targets.min(axis=None) >= 0
+        ), "Labels must in range [0,1], but found values outside range"
+
         # remove all samples with NaN for a prediction
         targets = targets[~np.isnan(scores).any(axis=1), :]
         scores = scores[~np.isnan(scores).any(axis=1), :]
@@ -577,7 +591,7 @@ def _init_train_dataloader(self, train_df, batch_size, num_workers, raise_errors
             train_df,
             self.preprocessor,
             split_files_into_clips=True,
-            overlap_fraction=0,
+            clip_overlap=0,
             final_clip=None,
             bypass_augmentations=False,
             batch_size=batch_size,
@@ -802,9 +816,9 @@ def train(
             `train_df=train_df[cnn.classes]` or `cnn.classes=train_df.columns` 
             before training.
             """
-        assert list(self.classes) == list(train_df.columns), class_err
+        check_labels(train_df, self.classes)
         if validation_df is not None:
-            assert list(self.classes) == list(validation_df.columns), class_err
+            check_labels(validation_df, self.classes)
 
         # Validation: warn user if no validation set
         if validation_df is None:
@@ -850,21 +864,23 @@ def train(
             # log tables of preprocessed samples
             wandb_session.log(
                 {
-                    "Samples / training samples": wandb_table(
+                    "training_samples": wandb_table(
                         AudioFileDataset(
                             train_df, self.preprocessor, bypass_augmentations=False
                         ),
                         self.wandb_logging["n_preview_samples"],
                     ),
-                    "Samples / training samples no aug": wandb_table(
+                    "training_samples_no_aug": wandb_table(
                         AudioFileDataset(
                             train_df, self.preprocessor, bypass_augmentations=True
                         ),
                         self.wandb_logging["n_preview_samples"],
                     ),
-                    "Samples / validation samples": wandb_table(
+                    "validation_samples": wandb_table(
                         AudioFileDataset(
-                            validation_df, self.preprocessor, bypass_augmentations=True
+                            validation_df,
+                            self.preprocessor,
+                            bypass_augmentations=True,
                         ),
                         self.wandb_logging["n_preview_samples"],
                     ),

diff --git a/opensoundscape/ml/dataloaders.py b/opensoundscape/ml/dataloaders.py
@@ -14,7 +14,10 @@ def __init__(
         samples,
         preprocessor,
         split_files_into_clips=True,
-        overlap_fraction=0,
+        clip_overlap=None,
+        clip_overlap_fraction=None,
+        clip_step=None,
+        overlap_fraction=None,
         final_clip=None,
         bypass_augmentations=True,
         raise_errors=False,
@@ -42,11 +45,9 @@ def __init__(
             preprocessor: preprocessor object, eg AudioPreprocessor or SpectrogramPreprocessor
             split_files_into_clips=True: use AudioSplittingDataset to automatically split
                 audio files into appropriate-lengthed clips
-            overlap_fraction: overlap fraction between consecutive clips, ignroed if
-                split_files_into_clips is False [default: 0]
-            final_clip: how to handle the final incomplete clip in a file
-                options:['extend','remainder','full',None] [default: None]
-                see opensoundscape.utils.generate_clip_times_df for details
+            clip_overlap_fraction, clip_overlap, clip_step, final_clip:
+                see `opensoundscape.utils.generate_clip_times_df`
+            overlap_fraction: deprecated alias for clip_overlap_fraction
             bypass_augmentations: if True, don't apply any augmentations [default: True]
             raise_errors: if True, raise errors during preprocessing [default: False]
             collate_fn: function to collate samples into batches [default: identity]
@@ -62,6 +63,16 @@ def __init__(
             "(c) (file,start_time,end_time) as MultiIndex"
         )
 
+        if overlap_fraction is not None:
+            warnings.warn(
+                "`overlap_fraction` argument is deprecated. Use `clip_overlap_fraction` instead.",
+                DeprecationWarning,
+            )
+            assert (
+                clip_overlap_fraction is None
+            ), "Cannot specify both overlap_fraction and clip_overlap_fraction"
+            clip_overlap_fraction = overlap_fraction
+
         # set up prediction Dataset, considering three possible cases:
         # (c1) user provided multi-index df with file,start_time,end_time of clips
         # (c2) user provided file list and wants clips to be split out automatically
@@ -75,7 +86,9 @@ def __init__(
             dataset = AudioSplittingDataset(
                 samples=samples,
                 preprocessor=preprocessor,
-                overlap_fraction=overlap_fraction,
+                clip_overlap=clip_overlap,
+                clip_overlap_fraction=clip_overlap_fraction,
+                clip_step=clip_step,
                 final_clip=final_clip,
             )
         else:  # c3 split_files_into_clips=False -> one sample & one prediction per file provided

diff --git a/opensoundscape/ml/datasets.py b/opensoundscape/ml/datasets.py
@@ -1,4 +1,5 @@
 """Preprocessors: pd.Series child with an action sequence & forward method"""
+
 import warnings
 import copy
 from pathlib import Path
@@ -161,10 +162,11 @@ class AudioSplittingDataset(AudioFileDataset):
     automatically split longer files into clips (providing only the file paths).
 
     Args:
-        see AudioFileDataset and make_clip_df
+        samples and preprocessor are passed to AudioFileDataset.__init__
+        **kwargs are passed to opensoundscape.utils.make_clip_df
     """
 
-    def __init__(self, samples, preprocessor, overlap_fraction=0, final_clip=None):
+    def __init__(self, samples, preprocessor, **kwargs):
         super(AudioSplittingDataset, self).__init__(
             samples=samples, preprocessor=preprocessor
         )
@@ -177,7 +179,6 @@ def __init__(self, samples, preprocessor, overlap_fraction=0, final_clip=None):
         self.label_df, self.invalid_samples = make_clip_df(
             files=samples,
             clip_duration=preprocessor.sample_duration,
-            clip_overlap=overlap_fraction * preprocessor.sample_duration,
-            final_clip=final_clip,
             return_invalid_samples=True,
+            **kwargs,
         )