From 35fd0802fde90b34c291e03c1a47ece56409f946 Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Wed, 24 Jan 2024 00:00:07 -0500
Subject: [PATCH 01/16] resolve 911 change labels of Spectrogram.plot() and add
 kHz arg

---
 opensoundscape/spectrogram.py | 14 ++++++++++----
 tests/test_spectrogram.py     |  6 ++++++
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/opensoundscape/spectrogram.py b/opensoundscape/spectrogram.py
index ac19fea6..d3f36a62 100644
--- a/opensoundscape/spectrogram.py
+++ b/opensoundscape/spectrogram.py
@@ -416,7 +416,9 @@ def trim(self, start_time, end_time):
             times=self.times[lowest_index : highest_index + 1],
         )
 
-    def plot(self, inline=True, fname=None, show_colorbar=False, range=(-100, -20)):
+    def plot(
+        self, inline=True, fname=None, show_colorbar=False, range=(-100, -20), kHz=False
+    ):
         """Plot the spectrogram with matplotlib.pyplot
 
         Args:
@@ -425,19 +427,23 @@ def plot(self, inline=True, fname=None, show_colorbar=False, range=(-100, -20)):
             show_colorbar: include image legend colorbar from pyplot
             range: tuple of (min,max) values of .spectrogram to map to the lowest/highest
                 pixel values. Values outside this range will be clipped to the min/max values
+            kHz: bool [default:False] if True, y axis is plotted in units of kHz rather than Hz
         """
         norm = matplotlib.colors.Normalize(vmin=range[0], vmax=range[1])
+
+        # if user specifies kHz=True, use kHz units rather than Hz on y axis
+        y = self.frequencies / 1000 if kHz else self.frequencies
         plt.pcolormesh(
             self.times,
-            self.frequencies,
+            y,
             self.spectrogram,
             shading="auto",
             cmap="Greys",
             norm=norm,
         )
 
-        plt.xlabel("time (sec)")
-        plt.ylabel("frequency (Hz)")
+        plt.xlabel("Time (sec)")
+        plt.ylabel(f"Frequency ({'kHz' if kHz else 'Hz'})")
         if show_colorbar:
             plt.colorbar()
 
diff --git a/tests/test_spectrogram.py b/tests/test_spectrogram.py
index e1398c56..b1c4ab22 100644
--- a/tests/test_spectrogram.py
+++ b/tests/test_spectrogram.py
@@ -158,6 +158,12 @@ def test_plot_spectrogram():
     Spectrogram(np.zeros((5, 10)), np.zeros((5)), np.zeros((10)), (-100, -20)).plot()
 
 
+def test_plot_spectrogram_kHz():
+    Spectrogram(np.zeros((5, 10)), np.zeros((5)), np.zeros((10)), (-100, -20)).plot(
+        kHz=True
+    )
+
+
 def test_amplitude_spectrogram():
     Spectrogram(
         np.zeros((5, 10)), np.zeros((5)), np.zeros((10)), (-100, -20)

From 458e4efc4d62b0b2c4b82f02919dfc4d1a958c5d Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Fri, 23 Feb 2024 09:03:58 -0500
Subject: [PATCH 02/16] remove special characters from wandb.log keys

Windows users are getting errors logging to wandb. We can't reproduce them, but Louis suspected it may be because of " / " in wandb logging keys ending up in file paths. This commit removes slashes, spaces, and other special characters ([]) from wandb.log() string keys
---
 opensoundscape/ml/cnn.py | 55 +++++++++++++++++++++++-----------------
 1 file changed, 32 insertions(+), 23 deletions(-)

diff --git a/opensoundscape/ml/cnn.py b/opensoundscape/ml/cnn.py
index 4d6d0fc8..c6737439 100644
--- a/opensoundscape/ml/cnn.py
+++ b/opensoundscape/ml/cnn.py
@@ -2,6 +2,7 @@
 
 For tutorials, see notebooks on opensoundscape.org
 """
+
 from pathlib import Path
 import warnings
 import copy
@@ -219,10 +220,12 @@ def predict(
             # Log a table of preprocessed samples to wandb
             wandb_session.log(
                 {
-                    "Samples / Preprocessed samples": wandb_table(
-                        dataloader.dataset.dataset,
-                        self.wandb_logging["n_preview_samples"],
-                    )
+                    "Samples": {
+                        "Peprocessed_samples": wandb_table(
+                            dataloader.dataset.dataset,
+                            self.wandb_logging["n_preview_samples"],
+                        )
+                    }
                 }
             )
 
@@ -266,7 +269,9 @@ def predict(
                     gradcam_model=self if self.wandb_logging["gradcam"] else None,
                     raise_exceptions=True,  # TODO back to false when done debugging
                 )
-                wandb_session.log({f"Samples / Top scoring [{c}]": table})
+                wandb_session.log(
+                    {"Samples": {f"Top_scoring_{c.replace(' ','_')}": table}}
+                )
 
         if return_invalid_samples:
             return score_df, invalid_samples
@@ -840,24 +845,28 @@ def train(
             # log tables of preprocessed samples
             wandb_session.log(
                 {
-                    "Samples / training samples": wandb_table(
-                        AudioFileDataset(
-                            train_df, self.preprocessor, bypass_augmentations=False
+                    "Samples": {
+                        "training_samples": wandb_table(
+                            AudioFileDataset(
+                                train_df, self.preprocessor, bypass_augmentations=False
+                            ),
+                            self.wandb_logging["n_preview_samples"],
                         ),
-                        self.wandb_logging["n_preview_samples"],
-                    ),
-                    "Samples / training samples no aug": wandb_table(
-                        AudioFileDataset(
-                            train_df, self.preprocessor, bypass_augmentations=True
+                        "training_samples_no_aug": wandb_table(
+                            AudioFileDataset(
+                                train_df, self.preprocessor, bypass_augmentations=True
+                            ),
+                            self.wandb_logging["n_preview_samples"],
                         ),
-                        self.wandb_logging["n_preview_samples"],
-                    ),
-                    "Samples / validation samples": wandb_table(
-                        AudioFileDataset(
-                            validation_df, self.preprocessor, bypass_augmentations=True
+                        "validation_samples": wandb_table(
+                            AudioFileDataset(
+                                validation_df,
+                                self.preprocessor,
+                                bypass_augmentations=True,
+                            ),
+                            self.wandb_logging["n_preview_samples"],
                         ),
-                        self.wandb_logging["n_preview_samples"],
-                    ),
+                    }
                 }
             )
 
@@ -925,9 +934,9 @@ def train(
                     validation_df,
                     batch_size=batch_size,
                     num_workers=num_workers,
-                    activation_layer="softmax_and_logit"
-                    if self.single_target
-                    else None,
+                    activation_layer=(
+                        "softmax_and_logit" if self.single_target else None
+                    ),
                     split_files_into_clips=False,
                 )  # returns a dataframe matching validation_df
                 validation_targets = validation_df.values

From 98704d41f16c3059927901abd803dcfe5667b487 Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Mon, 8 Apr 2024 13:19:15 -0400
Subject: [PATCH 03/16] fix extend_to resolves #972 and #948

changes the behavior of extend_to() so that it doesn't trim audio
---
 opensoundscape/audio.py | 41 +++++++++++++++++++++--------------------
 tests/test_audio.py     | 17 ++++++++++++++---
 2 files changed, 35 insertions(+), 23 deletions(-)

diff --git a/opensoundscape/audio.py b/opensoundscape/audio.py
index 926bdf4f..c3faed55 100644
--- a/opensoundscape/audio.py
+++ b/opensoundscape/audio.py
@@ -587,41 +587,42 @@ def loop(self, length=None, n=None):
     def extend_to(self, duration):
         """Extend audio file to desired duration by adding silence to the end
 
-        If duration is less than the Audio's .duration, the Audio object is trimmed.
+        If `duration` is less than or equal to the Audio's self.duration, the Audio remains unchanged.
+
         Otherwise, silence is added to the end of the Audio object to achieve the desired
-        duration.
+        `duration`.
 
         Args:
-            duration: the final duration in seconds of the audio object
+            duration: the minimum final duration in seconds of the audio object
 
         Returns:
             a new Audio object of the desired duration
         """
 
-        target_n_samples = round(duration * self.sample_rate)
+        minimum_n_samples = round(duration * self.sample_rate)
         current_n_samples = len(self.samples)
 
-        if target_n_samples > current_n_samples:
+        if minimum_n_samples <= current_n_samples:
+            return self._spawn()
+
+        else:
             # add 0's to the end of the sample array
             new_samples = np.pad(
-                self.samples, pad_width=(0, target_n_samples - current_n_samples)
+                self.samples, pad_width=(0, minimum_n_samples - current_n_samples)
             )
-        elif target_n_samples < current_n_samples:
-            # trim to desired samples (similar to self.trim())
-            new_samples = self.samples[0:target_n_samples]
 
-        # update metadata to reflect new duration
-        if self.metadata is None:
-            metadata = None
-        else:
-            metadata = self.metadata.copy()
-            if "duration" in metadata:
-                metadata["duration"] = len(new_samples) / self.sample_rate
+            # update metadata to reflect new duration
+            if self.metadata is None:
+                metadata = None
+            else:
+                metadata = self.metadata.copy()
+                if "duration" in metadata:
+                    metadata["duration"] = len(new_samples) / self.sample_rate
 
-        return self._spawn(
-            samples=new_samples,
-            metadata=metadata,
-        )
+            return self._spawn(
+                samples=new_samples,
+                metadata=metadata,
+            )
 
     def extend_by(self, duration):
         """Extend audio file by adding `duration` seconds of silence to the end
diff --git a/tests/test_audio.py b/tests/test_audio.py
index 39dff9d8..5853d25d 100644
--- a/tests/test_audio.py
+++ b/tests/test_audio.py
@@ -523,12 +523,23 @@ def test_extend_to_correct_metadata(silence_10s_mp3_str):
 
 
 def test_extend_to_shorter_duration(silence_10s_mp3_str):
-    # extending 10s to 6s should simply trim the audio
+    # extending 10s to 6s should retain 10s
     audio = Audio.from_file(silence_10s_mp3_str, sample_rate=10000)
     a2 = audio.extend_to(6)
-    assert math.isclose(a2.duration, 6)
+    assert math.isclose(a2.duration, 10)
     # duration in metadata should be updated:
-    assert math.isclose(a2.metadata["duration"], 6)
+    assert math.isclose(a2.metadata["duration"], 10)
+    # other metadata should be retained:
+    assert a2.metadata["subtype"] == audio.metadata["subtype"]
+
+
+def test_extend_to_correct_duration_ok(silence_10s_mp3_str):
+    # extending 10s to 10 shouldn't raise error (#972)
+    audio = Audio.from_file(silence_10s_mp3_str, sample_rate=10000)
+    a2 = audio.extend_to(10)
+    assert math.isclose(a2.duration, 10)
+    # duration in metadata should be updated:
+    assert math.isclose(a2.metadata["duration"], 10)
     # other metadata should be retained:
     assert a2.metadata["subtype"] == audio.metadata["subtype"]
 

From ebb4c6f48be8930791e82833a3405004bd3e5e0e Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Thu, 9 May 2024 16:38:38 -0400
Subject: [PATCH 04/16] improve AudioTrim action

resolves random_trim_audio in default SpectrogramPreprocessor doesn't work #904
---
 opensoundscape/preprocess/actions.py       | 94 ++++++++++++----------
 opensoundscape/preprocess/preprocessors.py | 12 ++-
 tests/test_actions.py                      | 28 +++----
 3 files changed, 75 insertions(+), 59 deletions(-)

diff --git a/opensoundscape/preprocess/actions.py b/opensoundscape/preprocess/actions.py
index 2e236481..1b4c1c4c 100644
--- a/opensoundscape/preprocess/actions.py
+++ b/opensoundscape/preprocess/actions.py
@@ -59,7 +59,11 @@ def set(self, **kwargs):
             f"unexpected arguments: {unmatched_args}. "
             f"The valid arguments and current values are: \n{self.params}"
         )
-        self.params.update(pd.Series(kwargs, dtype=object))
+        # Series.update ignores nan/None values, so we use dictionary.update method
+        new_params = dict(self.params)
+        new_params.update(kwargs)
+        self.params = pd.Series(new_params, dtype=object)
+        # self.params.update(pd.Series(kwargs, dtype=object))
 
     def get(self, arg):
         return self.params[arg]
@@ -159,7 +163,7 @@ class AudioTrim(Action):
     """Action to trim/extend audio to desired length
 
     Args:
-        see actions.trim_audio
+        see actions.audio_trim()
     """
 
     def __init__(self, **kwargs):
@@ -169,61 +173,69 @@ def go(self, sample, **kwargs):
         self.action_fn(sample, **dict(self.params, **kwargs))
 
 
-def trim_audio(sample, extend=True, random_trim=False, tol=1e-5):
-    """trim audio clips (Audio -> Audio)
+def trim_audio(sample, target_duration, extend=True, random_trim=False, tol=1e-6):
+    """trim audio clips from t=0 or random position (Audio -> Audio)
+
+    Trims an audio file to desired length.
 
-    Trims an audio file to desired length
     Allows audio to be trimmed from start or from a random time
-    Optionally extends audio shorter than clip_length with silence
+
+    Optionally extends audio shorter than clip_length to sample.duration by
+    appending silence.
 
     Args:
         sample: AudioSample with .data=Audio object, .duration as sample duration
+        target_duration: length of resulting clip in seconds. If None,
+            no trimming is performed.
         extend: if True, clips shorter than sample.duration are
-            extended with silence to required length
+            extended with silence to required length [Default: True]
         random_trim: if True, chooses a random segment of length sample.duration
             from the input audio. If False, the file is trimmed from 0 seconds
-            to sample.duration seconds.
-        tol: tolerance for considering a clip to be of the correct length (sec)
+            to sample.duration seconds. [Default: False]
+        tol: tolerance for considering a clip to be long enough (sec),
+            when raising an error for short clips [Default: 1e-6]
 
-    Returns:
-        trimmed audio
+    Effects:
+        Updates the sample's .data, .start_time, and .duration attributes
     """
+
+    if target_duration is None:
+        return
+
     audio = sample.data
 
     if len(audio.samples) == 0:
         raise ValueError("recieved zero-length audio")
 
-    if sample.target_duration is not None:
-        if audio.duration + tol <= sample.target_duration:
-            # input audio is not as long as desired length
-            if extend:  # extend clip sith silence
-                audio = audio.extend_to(sample.target_duration)
-            else:
-                raise ValueError(
-                    f"the length of the original file ({audio.duration} "
-                    f"sec) was less than the length to extract "
-                    f"({sample.target_duration} sec). To extend short "
-                    f"clips, use extend=True"
-                )
-        if random_trim:
-            # uniformly randomly choose clip time from full audio
-            extra_time = audio.duration - sample.target_duration
-            start_time = np.random.uniform() * extra_time
-        else:
-            start_time = 0
-
-        end_time = start_time + sample.target_duration
-        audio = audio.trim(start_time, end_time)
-
-        # update the sample
-        sample.data = audio
-        if sample.start_time is None:
-            sample.start_time = start_time
-        else:
-            sample.start_time += start_time
-        sample.duration = sample.target_duration
+    # input audio is not as long as desired length
+    if extend:  # extend clip sith silence
+        audio = audio.extend_to(target_duration)
+    else:
+        if audio.duration + tol < target_duration:
+            raise ValueError(
+                f"the length of the original file ({audio.duration} "
+                f"sec) was less than the length to extract "
+                f"({target_duration} sec). To extend short "
+                f"clips, use extend=True"
+            )
+    if random_trim:
+        # uniformly randomly choose clip time from full audio
+        # such that a full-length clip can be extracted
+        extra_time = audio.duration - target_duration
+        start_time = np.random.uniform() * extra_time
+    else:
+        start_time = 0
 
-    return sample
+    end_time = start_time + target_duration
+    audio = audio.trim(start_time, end_time)
+
+    # update the sample in-place
+    sample.data = audio
+    if sample.start_time is None:
+        sample.start_time = start_time
+    else:
+        sample.start_time += start_time
+    sample.duration = target_duration
 
 
 class SpectrogramToTensor(Action):
diff --git a/opensoundscape/preprocess/preprocessors.py b/opensoundscape/preprocess/preprocessors.py
index 7bf1288a..07e5ee9a 100644
--- a/opensoundscape/preprocess/preprocessors.py
+++ b/opensoundscape/preprocess/preprocessors.py
@@ -196,6 +196,7 @@ def _generate_sample(self, sample):
         """
         # handle paths or pd.Series as input for `sample`
         if isinstance(sample, tuple):
+            # assume duration should be self.sample_duration
             path, start = sample
             assert isinstance(
                 path, (str, Path)
@@ -277,9 +278,16 @@ def __init__(
                 # references AudioSample attributes: start_time and duration
                 "load_audio": AudioClipLoader(),
                 # if we are augmenting and get a long file, take a random trim from it
-                "random_trim_audio": AudioTrim(is_augmentation=True, random_trim=True),
+                "random_trim_audio": AudioTrim(
+                    target_duration=sample_duration,
+                    is_augmentation=True,
+                    random_trim=True,
+                ),
                 # otherwise, we expect to get the correct duration. no random trim
-                "trim_audio": AudioTrim(),  # trim or extend (w/silence) clips to correct length
+                # trim or extend (w/silence) clips to correct length
+                "trim_audio": AudioTrim(
+                    target_duration=sample_duration, random_trim=False
+                ),
                 # convert Audio object to Spectrogram
                 "to_spec": Action(Spectrogram.from_audio),
                 # bandpass to 0-11.025 kHz (to ensure all outputs have same scale in y-axis)
diff --git a/tests/test_actions.py b/tests/test_actions.py
index 76264de0..bb6417f0 100644
--- a/tests/test_actions.py
+++ b/tests/test_actions.py
@@ -88,44 +88,40 @@ def test_audio_clip_loader_clip(sample_clip):
 
 
 def test_action_trim(sample_audio):
-    action = actions.AudioTrim()
-    sample_audio.target_duration = 1.0
+    action = actions.AudioTrim(target_duration=1)
+    sample_audio.target_duration = 2  # should be ignored
     action.go(sample_audio)
     assert math.isclose(sample_audio.data.duration, 1.0, abs_tol=1e-4)
 
 
 def test_action_random_trim(sample_audio):
     sample2 = copy.deepcopy(sample_audio)
-    action = actions.AudioTrim(random_trim=True)
-    original_duration = sample_audio.data.duration
-    sample_audio.target_duration = sample2.target_duration = 0.01
+    action = actions.AudioTrim(target_duration=0.001, random_trim=True)
     action.go(sample_audio)
     action.go(sample2)
-    assert math.isclose(sample_audio.data.duration, 0.01, abs_tol=1e-4)
+    assert math.isclose(sample_audio.data.duration, 0.001, abs_tol=1e-4)
     # random trim should result in 2 different samples
+    assert not math.isclose(sample_audio.start_time, sample2.start_time, abs_tol=1e-9)
     assert not np.array_equal(sample_audio.data.samples, sample2.data.samples)
 
 
-def test_audio_trimmer_default(sample_audio):
-    """should not trim if no extra args"""
-    action = actions.AudioTrim()
-    sample_audio.target_duration = None
+def test_audio_trimmer_duration_None(sample_audio):
+    """should not trim if target_duration=None"""
+    action = actions.AudioTrim(target_duration=None)
     action.go(sample_audio)
     assert math.isclose(sample_audio.data.duration, 0.142086167800, abs_tol=1e-4)
 
 
 def test_audio_trimmer_raises_error_on_short_clip(sample_audio):
-    action = actions.AudioTrim()
-    sample_audio.target_duration = 10
+    action = actions.AudioTrim(target_duration=10, extend=False)
     with pytest.raises(ValueError):
-        action.go(sample_audio, extend=False)
+        action.go(sample_audio)
 
 
 def test_audio_trimmer_extend_short_clip(sample_audio):
-    action = actions.AudioTrim()
-    sample_audio.target_duration = 1
+    action = actions.AudioTrim(target_duration=10)
     action.go(sample_audio)  # extend=True is default
-    assert math.isclose(sample_audio.data.duration, 1.0, abs_tol=1e-4)
+    assert math.isclose(sample_audio.data.duration, 10, abs_tol=1e-4)
 
 
 def test_audio_random_gain(sample_audio):

From 5f67259d5edb1e093a91c6a6c253c0c46dda83a1 Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Thu, 9 May 2024 17:46:44 -0400
Subject: [PATCH 05/16] update test lengths match

---
 tests/test_annotations.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_annotations.py b/tests/test_annotations.py
index 5e84d076..bdf04d5d 100644
--- a/tests/test_annotations.py
+++ b/tests/test_annotations.py
@@ -524,7 +524,7 @@ def test_warn_if_file_wont_get_raven_output(raven_file, saved_raven_file):
 
 def test_assert_audio_files_annotation_files_match():
     with pytest.raises(AssertionError):
-        BoxedAnnotations.from_raven_files(["path"], [])
+        BoxedAnnotations.from_raven_files(["path"], ['a','b'])
 
 
 def test_from_raven_files(raven_file):

From 13edc52b5a9df61c7e9549c46f92dab2ba966c8a Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Mon, 13 May 2024 08:02:35 -0400
Subject: [PATCH 06/16] black

---
 tests/test_annotations.py | 2 +-
 tests/test_cnn.py         | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/tests/test_annotations.py b/tests/test_annotations.py
index bdf04d5d..1bb38a7e 100644
--- a/tests/test_annotations.py
+++ b/tests/test_annotations.py
@@ -524,7 +524,7 @@ def test_warn_if_file_wont_get_raven_output(raven_file, saved_raven_file):
 
 def test_assert_audio_files_annotation_files_match():
     with pytest.raises(AssertionError):
-        BoxedAnnotations.from_raven_files(["path"], ['a','b'])
+        BoxedAnnotations.from_raven_files(["path"], ["a", "b"])
 
 
 def test_from_raven_files(raven_file):
diff --git a/tests/test_cnn.py b/tests/test_cnn.py
index dde91786..a5a8a2ee 100644
--- a/tests/test_cnn.py
+++ b/tests/test_cnn.py
@@ -461,9 +461,8 @@ def test_predict_raise_errors(short_file_df, onemin_wav_df):
     )  # use 2 files. 1 file wrong is manually caught and userwarning raised
     model = cnn.CNN("resnet18", classes=["class"], sample_duration=30)
     model.preprocessor.pipeline.bandpass.bypass = False  # ensure bandpass happens
-    model.preprocessor.pipeline.bandpass.params["low"] = (
-        1  # add a bad param. this should be min_f
-    )
+    # add a bad param. this should be min_f
+    model.preprocessor.pipeline.bandpass.params["low"] = 1
 
     with pytest.raises(PreprocessingError):
         model.predict(files_df, raise_errors=True)

From c1bf20dbd9e3648772f150aca6f22a9d9f0a18e0 Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Mon, 13 May 2024 08:08:05 -0400
Subject: [PATCH 07/16] fix merge

---
 tests/test_annotations.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/test_annotations.py b/tests/test_annotations.py
index e2cf54a5..26bf8563 100644
--- a/tests/test_annotations.py
+++ b/tests/test_annotations.py
@@ -525,14 +525,11 @@ def test_warn_if_file_wont_get_raven_output(raven_file, saved_raven_file):
 def test_assert_audio_files_annotation_files_match():
     with pytest.raises(AssertionError):
         BoxedAnnotations.from_raven_files(["path"], ["a", "b"])
-=======
-        BoxedAnnotations.from_raven_files(["path"], ["a","b"])
 
 
 def test_assert_audio_files_annotation_files_empty():
     with pytest.raises(AssertionError):
         BoxedAnnotations.from_raven_files([], [])
->>>>>>> develop
 
 
 def test_from_raven_files(raven_file):

From d6a115933b96db78b8c55aea4ee95b64d1e1b1e0 Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Mon, 13 May 2024 11:44:17 -0400
Subject: [PATCH 08/16] black

---
 opensoundscape/annotations.py | 8 +++++---
 opensoundscape/ml/cnn.py      | 5 +++--
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/opensoundscape/annotations.py b/opensoundscape/annotations.py
index 096eb457..584e7b25 100644
--- a/opensoundscape/annotations.py
+++ b/opensoundscape/annotations.py
@@ -163,11 +163,13 @@ def from_raven_files(
         if isinstance(raven_files, (str, Path)):
             raven_files = [raven_files]
         else:
-            assert len(raven_files)>0, "raven_files must be a non-empty list or iterable"
+            assert (
+                len(raven_files) > 0
+            ), "raven_files must be a non-empty list or iterable"
             assert isinstance(
                 raven_files[0], (str, Path)
             ), f"raven_files must be an iterable of string or pathlib.Path, or a single string or pathlib.Path. Got type: {type(raven_files)}"
-            
+
         if isinstance(audio_files, (str, Path)):
             audio_files = [audio_files]
         else:
@@ -183,7 +185,7 @@ def from_raven_files(
             `audio_files` and `raven_files` lists must have one-to-one correspondence,
             but their lengths did not match.
             """
-                    
+
         all_file_dfs = []
 
         # mapping of Raven file columns to standard opensoundscape names
diff --git a/opensoundscape/ml/cnn.py b/opensoundscape/ml/cnn.py
index 26a1d4ac..bb0ce47b 100644
--- a/opensoundscape/ml/cnn.py
+++ b/opensoundscape/ml/cnn.py
@@ -1404,17 +1404,18 @@ def avg_over_channels(img):
     @property
     def device(self):
         return self._device
-    
+
     @device.setter
     def device(self, device):
         """
         Set the device to use in train/predict, casting strings to torch.device datatype
 
-        Args: 
+        Args:
             device: a torch.device object or str such as 'cuda:0', 'mps', 'cpu'
         """
         self._device = torch.device(device)
 
+
 def use_resample_loss(
     model, train_df
 ):  # TODO revisit how this work. Should be able to set loss_cls=ResampleLoss()

From c35d712cb99700ccf374717f6d0f4fe289a3a40c Mon Sep 17 00:00:00 2001
From: LeonardoViotti <leonardoviotti@gmail.com>
Date: Thu, 16 May 2024 15:04:58 -0400
Subject: [PATCH 09/16] Cleaned dtypes and assignemnt function in
 preprocessor.foward().

---
 opensoundscape/preprocess/preprocessors.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/opensoundscape/preprocess/preprocessors.py b/opensoundscape/preprocess/preprocessors.py
index 7bf1288a..19b59f9d 100644
--- a/opensoundscape/preprocess/preprocessors.py
+++ b/opensoundscape/preprocess/preprocessors.py
@@ -132,7 +132,7 @@ def forward(
         # create AudioSample from input path
         sample = self._generate_sample(sample)
         if trace:
-            sample.trace = pd.Series(index=self.pipeline.index)
+            sample.trace = pd.Series(index=self.pipeline.index, dtype=str)
 
         if profile:
             sample.runtime = pd.Series(index=self.pipeline.index)
@@ -146,13 +146,13 @@ def forward(
                 if type(action) == break_on_type or k == break_on_key:
                     if trace:
                         # saved "output" of this step informs user pipeline was stopped
-                        sample.trace[k] = f"## Pipeline terminated ## {sample.trace[k]}"
+                        sample.trace.loc[k] = f"## Pipeline terminated ## {sample.trace[k]}"
                     break
                 if action.bypass:
                     continue
                 if action.is_augmentation and bypass_augmentations:
                     if trace:
-                        sample.trace[k] = f"## Bypassed ## {sample.trace[k]}"
+                        sample.trace.loc[k] = f"## Bypassed ## {sample.trace[k]}"
                     continue
 
                 # perform the action (modifies the AudioSample in-place)

From 556a7ad6055b7e955fb45e13caa5bc3ad1336bad Mon Sep 17 00:00:00 2001
From: LeonardoViotti <leonardoviotti@gmail.com>
Date: Thu, 16 May 2024 15:06:18 -0400
Subject: [PATCH 10/16] Black formatted.

---
 opensoundscape/preprocess/preprocessors.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/opensoundscape/preprocess/preprocessors.py b/opensoundscape/preprocess/preprocessors.py
index 19b59f9d..bd7115d1 100644
--- a/opensoundscape/preprocess/preprocessors.py
+++ b/opensoundscape/preprocess/preprocessors.py
@@ -146,7 +146,9 @@ def forward(
                 if type(action) == break_on_type or k == break_on_key:
                     if trace:
                         # saved "output" of this step informs user pipeline was stopped
-                        sample.trace.loc[k] = f"## Pipeline terminated ## {sample.trace[k]}"
+                        sample.trace.loc[
+                            k
+                        ] = f"## Pipeline terminated ## {sample.trace[k]}"
                     break
                 if action.bypass:
                     continue

From 5fb567a1b87aa733650f1a38a2e7845040ee7abf Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Mon, 20 May 2024 14:35:25 -0400
Subject: [PATCH 11/16] remove output dir

this was added accidentally at some point
---
 output/great_plains_toad.wav_prdf.csv | 6 ------
 output/silence_10s.mp3_prdf.csv       | 6 ------
 2 files changed, 12 deletions(-)
 delete mode 100644 output/great_plains_toad.wav_prdf.csv
 delete mode 100644 output/silence_10s.mp3_prdf.csv

diff --git a/output/great_plains_toad.wav_prdf.csv b/output/great_plains_toad.wav_prdf.csv
deleted file mode 100644
index d473a035..00000000
--- a/output/great_plains_toad.wav_prdf.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-species,pulse_rate_low,pulse_rate_high,low_f,high_f,reject_low,reject_high,window_length,score,t,max_score,time_of_max_score
-Chorus frog,16,40,2500,4000,0,2000,2.0,"[3.2500390144493193e-06, 5.801447082919901e-06, 9.551969077025937e-06, 1.4846515109012422e-05, 7.20137742853964e-06, 5.479920164111909e-06, 7.678316376652447e-06, 8.12659428177074e-06, 5.672717116487688e-06, 6.653043750823905e-06, 4.195701290636413e-06, 3.7357589889274674e-06, 6.6037131566419975e-06, 1.0906428101669344e-05, 1.5674260578693147e-05, 1.0072739566848058e-05, 1.1340738797339192e-05, 1.068871284672846e-05, 8.506884345200445e-06, 1.1104020564912416e-05, 1.120065809557751e-05, 1.4966454596842588e-05, 1.8958975278076696e-05]","[0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0, 22.0, 24.0, 26.0, 28.0, 30.0, 32.0, 34.0, 36.0, 38.0, 40.0, 42.0, 44.0]",1.8958975278076696e-05,44.0
-great plains toad,7,18,2000,3000,0,1000,15.0,"[0.0002718100048208053, 0.0005690390622569361, 0.00047514692, 0.00040189223, 0.00057994, 0.00034554987, 0.00039256216, 0.00043337233, 0.00048808544, 0.00051435985, 0.0007436124, 0.0007989852889683302, 0.0008075010697968804, 0.0005887583053883069, 0.00043281727, 0.0003687196, 0.00045033346, 0.0004648417, 0.0006286908829177401, 0.000908370064970198, 0.0001388154024578076, 0.0001552719403460541, 0.00022877016916655616]","[0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0, 22.0, 24.0, 26.0, 28.0, 30.0, 32.0, 34.0, 36.0, 38.0, 40.0, 42.0, 44.0]",0.000908370064970198,38.0
-northern leopard frog,15,30,1000,1500,0,500,2.0,"[1.6637013860045666e-06, 3.1423272743529193e-06, 2.9155657794036147e-06, 4.701847271423706e-06, 3.6979163281179884e-06, 2.628235173919404e-06, 6.111995214169149e-06, 4.076330914856003e-06, 3.4581424063852097e-06, 7.116332895736921e-06, 3.7639912573265763e-06, 2.032287813134638e-06, 3.7557337204445752e-06, 5.892896770157595e-06, 7.470352687984107e-06, 6.670304596597368e-06, 7.206266780995212e-06, 6.29987338439374e-06, 5.08101927392769e-06, 5.585508731252252e-06, 6.422839082373554e-07, 5.817734516917052e-07, 1.1029788201200245e-06]","[0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0, 22.0, 24.0, 26.0, 28.0, 30.0, 32.0, 34.0, 36.0, 38.0, 40.0, 42.0, 44.0]",7.470352687984107e-06,28.0
-spadefoot toad,1,2,1000,2000,0,500,5.0,"[1.0594443312105714e-05, 2.4291446978269906e-05, 1.027778204489321e-05, 2.669954856170439e-05, 1.0010965250848444e-05, 2.9950880303490986e-06, 0.00010319260276200067, 2.362683986683449e-05, 4.634107276346073e-05, 7.130055482154363e-05, 3.34230484691724e-05, 4.9024073148100306e-05, 2.667075409194549e-05, 4.139503785398175e-05, 2.2421849948192466e-05, 4.7741522350134754e-05, 2.272280182926626e-05, 2.484097596097967e-05, 3.0864623961210006e-05, 2.0884407081110413e-05, 2.954356264469807e-05, 2.5428362564894202e-05, 3.864825752053816e-05]","[0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0, 22.0, 24.0, 26.0, 28.0, 30.0, 32.0, 34.0, 36.0, 38.0, 40.0, 42.0, 44.0]",0.00010319260276200067,12.0
-spadefoot toad (81Hz),70,90,1000,2000,0,500,0.5,"[3.0814845420079957e-06, 4.306986380019167e-06, 5.415654628289309e-06, 6.054611316641504e-06, 9.106696253076954e-06, 8.776539713632198e-06, 7.850333336890567e-06, 1.0805945797457844e-05, 1.3579018869263782e-05, 1.4185579691681417e-05, 6.628462615562992e-06, 7.68835627553044e-06, 7.279189188754188e-06, 1.069030912595832e-05, 1.18920172932865e-05, 1.2878259247705957e-05, 1.1367913911868312e-05, 9.535337598859539e-06, 8.566347406329984e-06, 1.1300234244738395e-05, 6.645809103973028e-06, 1.0086483189600105e-05, 1.206968388863879e-05]","[0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0, 22.0, 24.0, 26.0, 28.0, 30.0, 32.0, 34.0, 36.0, 38.0, 40.0, 42.0, 44.0]",1.4185579691681417e-05,18.0
diff --git a/output/silence_10s.mp3_prdf.csv b/output/silence_10s.mp3_prdf.csv
deleted file mode 100644
index fe6d85bd..00000000
--- a/output/silence_10s.mp3_prdf.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-species,pulse_rate_low,pulse_rate_high,low_f,high_f,reject_low,reject_high,window_length,score,t,max_score,time_of_max_score
-Chorus frog,16,40,2500,4000,0,2000,2.0,"[3.7678293192170487e-06, 3.7673481082101374e-06, 3.454049999746183e-06, 3.5254580099288085e-06, 2.9589592943656763e-06]","[0.0, 2.0, 4.0, 6.0, 8.0]",3.7678293192170487e-06,0.0
-great plains toad,7,18,2000,3000,0,1000,15.0,"[6.1310035018454136e-06, 4.067743333437454e-06, 4.216261440695183e-06, 4.714590624428302e-06, 4.268840606906055e-06]","[0.0, 2.0, 4.0, 6.0, 8.0]",6.1310035018454136e-06,0.0
-northern leopard frog,15,30,1000,1500,0,500,2.0,"[1.2832093738897077e-05, 1.3101839905406943e-05, 1.7973985551618525e-05, 1.3127703817943879e-05, 1.1394665880998894e-05]","[0.0, 2.0, 4.0, 6.0, 8.0]",1.7973985551618525e-05,4.0
-spadefoot toad,1,2,1000,2000,0,500,5.0,"[9.49055071971458e-06, 1.0222386372799245e-05, 6.886865649119779e-06, 5.276267993007903e-06, 6.3572764006441305e-06]","[0.0, 2.0, 4.0, 6.0, 8.0]",1.0222386372799245e-05,2.0
-spadefoot toad (81Hz),70,90,1000,2000,0,500,0.5,"[5.813727907147359e-06, 7.044397169718664e-06, 1.1273120465130139e-05, 5.966222054954781e-06, 1.1090558441103307e-05]","[0.0, 2.0, 4.0, 6.0, 8.0]",1.1273120465130139e-05,4.0

From 35db4fcfe87b70b4e220705a10a9e346c578a9a9 Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Mon, 20 May 2024 15:07:39 -0400
Subject: [PATCH 12/16] resolves #726

automatically sets `torch.multiprocessing.set_sharing_strategy("file_system")` during opensoundscape import. We may want to revisit this decision, but it seems that this is the recommended setting for avoiding issues seen when using parallelized DataLoader

see discussion and recommended solution here https://github.com/pytorch/pytorch/issues/11201#issuecomment-421146936
---
 opensoundscape/ml/__init__.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/opensoundscape/ml/__init__.py b/opensoundscape/ml/__init__.py
index d585c336..4cab8e8b 100644
--- a/opensoundscape/ml/__init__.py
+++ b/opensoundscape/ml/__init__.py
@@ -7,3 +7,10 @@
 from . import sampling
 from . import utils
 from . import bioacoustics_model_zoo
+import torch.multiprocessing
+
+# using 'file_system' avoids errors with "Too many open files",
+# "Pin memory thread exited unexpectedly", and RuntimeError('received %d items of ancdata')
+# when using parallelized DataLoader. This is the recommended solution according to
+# https://github.com/pytorch/pytorch/issues/11201#issuecomment-421146936
+torch.multiprocessing.set_sharing_strategy("file_system")

From 59906acefd7b1eeda1abae69e30df99693804319 Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Mon, 20 May 2024 15:57:30 -0400
Subject: [PATCH 13/16] check for labels outside range [0,1]

resolves check for values other than 0/1 in labels #891

now asserts that label values are >=0 and <=1 during CNN.train() and CNN.eval(). Adds tests for both. Also adds a missing test for input validation check of wrong class list during CNN.train()
---
 opensoundscape/ml/cnn.py   | 14 +++++++++++---
 opensoundscape/ml/utils.py | 15 +++++++++++++++
 tests/test_cnn.py          | 24 ++++++++++++++++++++++++
 3 files changed, 50 insertions(+), 3 deletions(-)

diff --git a/opensoundscape/ml/cnn.py b/opensoundscape/ml/cnn.py
index bb0ce47b..925ca6bb 100644
--- a/opensoundscape/ml/cnn.py
+++ b/opensoundscape/ml/cnn.py
@@ -20,7 +20,7 @@
 
 import opensoundscape
 from opensoundscape.ml import cnn_architectures
-from opensoundscape.ml.utils import apply_activation_layer
+from opensoundscape.ml.utils import apply_activation_layer, check_labels
 from opensoundscape.preprocess.preprocessors import SpectrogramPreprocessor
 from opensoundscape.ml.loss import (
     BCEWithLogitsLoss_hot,
@@ -365,8 +365,16 @@ def eval(self, targets, scores, logging_offset=0):
             scores: continuous values in 0/1 for each sample and class
             logging_offset: modify verbosity - for example, -1 will reduce
                 the amount of printing/logging by 1 level
+
+        Raises:
+            AssertionError: if targets are outside of range [0,1]
         """
 
+        # check for invalid label values
+        assert (
+            targets.max(axis=None) <= 1 and targets.min(axis=None) >= 0
+        ), "Labels must in range [0,1], but found values outside range"
+
         # remove all samples with NaN for a prediction
         targets = targets[~np.isnan(scores).any(axis=1), :]
         scores = scores[~np.isnan(scores).any(axis=1), :]
@@ -802,9 +810,9 @@ def train(
             `train_df=train_df[cnn.classes]` or `cnn.classes=train_df.columns` 
             before training.
             """
-        assert list(self.classes) == list(train_df.columns), class_err
+        check_labels(train_df, self.classes)
         if validation_df is not None:
-            assert list(self.classes) == list(validation_df.columns), class_err
+            check_labels(validation_df, self.classes)
 
         # Validation: warn user if no validation set
         if validation_df is None:
diff --git a/opensoundscape/ml/utils.py b/opensoundscape/ml/utils.py
index 1321ccca..9e464232 100644
--- a/opensoundscape/ml/utils.py
+++ b/opensoundscape/ml/utils.py
@@ -1,4 +1,5 @@
 """Utilties for .ml"""
+
 import warnings
 import pandas as pd
 import numpy as np
@@ -215,3 +216,17 @@ def collate_audio_samples_to_tensors(batch):
     tensors = torch.stack([i.data for i in batch])
     labels = torch.tensor([i.labels.tolist() for i in batch])
     return tensors, labels
+
+
+def check_labels(label_df, classes):
+    class_err = """
+            Train and validation datasets must have same classes
+            and class order as model object. Consider using
+            `train_df=train_df[cnn.classes]` or `cnn.classes=train_df.columns` 
+            before training.
+            """
+    assert list(classes) == list(label_df.columns), class_err
+
+    assert (
+        label_df.max(axis=None) <= 1 and label_df.min(axis=None) >= 0
+    ), "Labels must in range [0,1], but found values outside range"
diff --git a/tests/test_cnn.py b/tests/test_cnn.py
index 4c0bf799..14969a66 100644
--- a/tests/test_cnn.py
+++ b/tests/test_cnn.py
@@ -402,6 +402,21 @@ def test_prediction_warns_different_classes(train_df):
         assert "classes" in all_warnings
 
 
+def test_train_raises_wrong_class_list(train_df):
+    model = cnn.CNN("resnet18", classes=["different"], sample_duration=5.0)
+    with pytest.raises(AssertionError):
+        # raises AssertionError bc test_df columns != model.classes
+        model.train(train_df)
+
+
+def test_train_raises_labels_outside_range(train_df):
+    model = cnn.CNN("resnet18", classes=[0, 1], sample_duration=5.0)
+    train_df.iat[0, 0] = 2
+    with pytest.raises(AssertionError):
+        # raises AssertionError bc values outside [0,1] not allowed
+        model.train(train_df)
+
+
 def test_prediction_returns_consistent_values(train_df):
     model = cnn.CNN("resnet18", classes=["a", "b"], sample_duration=5.0)
     a = model.predict(train_df)
@@ -427,6 +442,15 @@ def test_eval(train_df):
     model.eval(train_df.values, scores.values)
 
 
+def test_eval_raises_bad_labels(train_df):
+    model = cnn.CNN("resnet18", classes=[0, 1], sample_duration=2)
+    scores = model.predict(train_df, split_files_into_clips=False)
+    train_df.iat[0, 0] = 2
+    with pytest.raises(AssertionError):
+        # raises AssertionError bc values outside [0,1] not allowed
+        model.eval(train_df.values, scores.values)
+
+
 def test_split_resnet_feat_clf(train_df):
     model = cnn.CNN("resnet18", classes=[0, 1], sample_duration=2)
     cnn.separate_resnet_feat_clf(model)

From 92bd4a915d668e17b5348a2cd7a5de2267a2a632 Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Tue, 21 May 2024 13:08:54 -0400
Subject: [PATCH 14/16] more flexible specification of overlap

add consistent and flexible specification of consecutive clip overlap throughout the code: clip step seconds, clip overlap seconds, or clip overlap fraction. Updates ribbit, CNN.predict, SafeAudioDataLoader, make_clip_df, generate_clip_times_df, AudioSplittingDataset, BoxedAnnotations.one_hot_clip_labels,  AudioFileDataset

resolves overlap_time or step_time argument for predicting on overlapping clips #876

note: there deprecates "overlap_fraction" kwarg to CNN.predict in favor of any of these: "clip_overlap", "clip_overlap_fraction", "clip_step"
---
 opensoundscape/annotations.py    | 25 ++++-------------
 opensoundscape/audio.py          | 35 ++++++------------------
 opensoundscape/ml/cnn.py         | 21 +++++++++-----
 opensoundscape/ml/dataloaders.py | 27 +++++++++++++-----
 opensoundscape/ml/datasets.py    |  9 +++---
 opensoundscape/ribbit.py         | 16 +++++++++--
 opensoundscape/utils.py          | 47 ++++++++++++++++++++++++++------
 tests/test_cnn.py                | 16 +++++++++++
 tests/test_utils.py              | 42 ++++++++++++++++++++++++++++
 9 files changed, 163 insertions(+), 75 deletions(-)

diff --git a/opensoundscape/annotations.py b/opensoundscape/annotations.py
index 584e7b25..45e339fd 100644
--- a/opensoundscape/annotations.py
+++ b/opensoundscape/annotations.py
@@ -640,13 +640,12 @@ def one_hot_labels_like(
     def one_hot_clip_labels(
         self,
         clip_duration,
-        clip_overlap,
         min_label_overlap,
         min_label_fraction=1,
         full_duration=None,
         class_subset=None,
-        final_clip=None,
         audio_files=None,
+        **kwargs,
     ):
         """Generate one-hot labels for clips of fixed duration
 
@@ -656,7 +655,6 @@ def one_hot_clip_labels(
 
         Args:
             clip_duration (float):  The duration in seconds of the clips
-            clip_overlap (float):   The overlap of the clips in seconds [default: 0]
             min_label_overlap: minimum duration (seconds) of annotation within the
                 time interval for it to count as a label. Note that any annotation
                 of length less than this value will be discarded.
@@ -677,19 +675,10 @@ def one_hot_clip_labels(
                 of `audio` for each row of self.df
             class_subset: list of classes for one-hot labels. If None, classes will
                 be all unique values of self.df['annotation']
-            final_clip (str): Behavior if final_clip is less than clip_duration
-                seconds long. By default, discards remaining time if less than
-                clip_duration seconds long [default: None].
-                Options:
-                - None: Discard the remainder (do not make a clip)
-                - "extend": Extend the final clip beyond full_duration to reach
-                    clip_duration length
-                - "remainder": Use only remainder of full_duration
-                    (final clip will be shorter than clip_duration)
-                - "full": Increase overlap with previous clip to yield a
-                    clip with clip_duration length
             audio_files: list of audio file paths (as str or pathlib.Path)
                 to create clips for. If None, uses self.audio_files. [default: None]
+            **kwargs (such as overlap_fraction, final_clip) are passed to
+                opensoundscape.utils.generate_clip_times_df() via make_clip_df()
         Returns:
             dataframe with index ['file','start_time','end_time'] and columns=classes
         """
@@ -718,9 +707,8 @@ def one_hot_clip_labels(
                 clip_df = make_clip_df(
                     files=[f for f in audio_files if f == f],  # remove NaN if present
                     clip_duration=clip_duration,
-                    clip_overlap=clip_overlap,
-                    final_clip=final_clip,
                     raise_exceptions=True,  # raise exceptions from librosa.duration(f)
+                    **kwargs,
                 )
             except GetDurationError as exc:
                 raise GetDurationError(
@@ -733,10 +721,7 @@ def one_hot_clip_labels(
         else:  # use fixed full_duration for all files
             # make a clip df, will be re-used for each file
             clip_df_template = generate_clip_times_df(
-                full_duration=full_duration,
-                clip_duration=clip_duration,
-                clip_overlap=clip_overlap,
-                final_clip=final_clip,
+                full_duration=full_duration, clip_duration=clip_duration, **kwargs
             )
             # make a clip df for all files
             clip_df = pd.concat([clip_df_template] * len(audio_files))
diff --git a/opensoundscape/audio.py b/opensoundscape/audio.py
index 98b5717e..b9fd1ac1 100644
--- a/opensoundscape/audio.py
+++ b/opensoundscape/audio.py
@@ -873,41 +873,24 @@ def save(
             else:  # we can write metadata for WAV and AIFF
                 _write_metadata(self.metadata, metadata_format, path)
 
-    def split(self, clip_duration, clip_overlap=0, final_clip=None):
+    def split(self, clip_duration, **kwargs):
         """Split Audio into even-lengthed clips
 
         The Audio object is split into clips of a specified duration and overlap
 
         Args:
             clip_duration (float):  The duration in seconds of the clips
-            clip_overlap (float):   The overlap of the clips in seconds [default: 0]
-            final_clip (str):       Behavior if final_clip is less than clip_duration
-                seconds long. By default, discards remaining audio if less than
-                clip_duration seconds long [default: None].
-                Options:
-                - None: Discard the remainder (do not make a clip)
-                - "extend": Extend the final clip with silence to reach
-                    clip_duration length
-                - "remainder": Use only remainder of Audio (final clip will be
-                    shorter than clip_duration)
-                - "full": Increase overlap with previous clip to yield a clip with
-                    clip_duration length
+            **kwargs (such as clip_overlap_fraction, final_clip) are passed to
+                opensoundscape.utils.generate_clip_times_df()
+                - extends last Audio object if user passes final_clip == "extend"
         Returns:
             - audio_clips: list of audio objects
             - dataframe w/columns for start_time and end_time of each clip
         """
-        if not final_clip in ["remainder", "full", "extend", None]:
-            raise ValueError(
-                f"final_clip must be 'remainder', 'full', 'extend',"
-                f"or None. Got {final_clip}."
-            )
 
         duration = self.duration
         clip_df = generate_clip_times_df(
-            full_duration=duration,
-            clip_duration=clip_duration,
-            clip_overlap=clip_overlap,
-            final_clip=final_clip,
+            full_duration=duration, clip_duration=clip_duration, **kwargs
         )
 
         clips = [None] * len(clip_df)
@@ -918,8 +901,9 @@ def split(self, clip_duration, clip_overlap=0, final_clip=None):
             audio_clip = self.trim(start, end)
 
             # Extend the final clip if necessary
-            if end > duration and final_clip == "extend":
-                audio_clip = audio_clip.extend_to(clip_duration)
+            if "final_clip" in kwargs.keys():
+                if end > duration and kwargs["final_clip"] == "extend":
+                    audio_clip = audio_clip.extend_to(clip_duration)
 
             # Add clip to list of clips
             clips[idx] = audio_clip
@@ -927,8 +911,7 @@ def split(self, clip_duration, clip_overlap=0, final_clip=None):
         if len(clips) == 0:
             warnings.warn(
                 f"Given Audio object with duration of `{duration}` "
-                f"seconds and `clip_duration={clip_duration}` but "
-                f" `final_clip={final_clip}` produces no clips. "
+                f"seconds and `clip_duration={clip_duration}`, produces no clips. "
                 f"Returning empty list."
             )
 
diff --git a/opensoundscape/ml/cnn.py b/opensoundscape/ml/cnn.py
index bb0ce47b..568c036e 100644
--- a/opensoundscape/ml/cnn.py
+++ b/opensoundscape/ml/cnn.py
@@ -43,6 +43,8 @@
     multi_target_metrics,
 )
 
+import warnings
+
 
 class BaseClassifier(torch.nn.Module):
     """
@@ -105,7 +107,10 @@ def predict(
         num_workers=0,
         activation_layer=None,
         split_files_into_clips=True,
-        overlap_fraction=0,
+        clip_overlap=None,
+        clip_overlap_fraction=None,
+        clip_step=None,
+        overlap_fraction=None,
         final_clip=None,
         bypass_augmentations=True,
         invalid_samples_log=None,
@@ -145,10 +150,9 @@ def predict(
             split_files_into_clips:
                 If True, internally splits and predicts on clips from longer audio files
                 Otherwise, assumes each row of `samples` corresponds to one complete sample
-            overlap_fraction: fraction of overlap between consecutive clips when
-                predicting on clips of longer audio files. For instance, 0.5
-                gives 50% overlap between consecutive clips.
-            final_clip: see `opensoundscape.utils.generate_clip_times_df`
+            clip_overlap_fraction, clip_overlap, clip_step, final_clip:
+                see `opensoundscape.utils.generate_clip_times_df`
+            overlap_fraction: deprecated alias for clip_overlap_fraction
             bypass_augmentations: If False, Actions with
                 is_augmentation==True are performed. Default True.
             invalid_samples_log: if not None, samples that failed to preprocess
@@ -188,7 +192,7 @@ def predict(
             for that sample will be np.nan
 
         """
-        # for convenience, convert str/pathlib.Path to list
+        # for convenience, convert str/pathlib.Path to list of length 1
         if isinstance(samples, (str, Path)):
             samples = [samples]
 
@@ -198,6 +202,9 @@ def predict(
             self.preprocessor,
             split_files_into_clips=split_files_into_clips,
             overlap_fraction=overlap_fraction,
+            clip_overlap=clip_overlap,
+            clip_overlap_fraction=clip_overlap_fraction,
+            clip_step=clip_step,
             final_clip=final_clip,
             bypass_augmentations=bypass_augmentations,
             batch_size=batch_size,
@@ -577,7 +584,7 @@ def _init_train_dataloader(self, train_df, batch_size, num_workers, raise_errors
             train_df,
             self.preprocessor,
             split_files_into_clips=True,
-            overlap_fraction=0,
+            clip_overlap=0,
             final_clip=None,
             bypass_augmentations=False,
             batch_size=batch_size,
diff --git a/opensoundscape/ml/dataloaders.py b/opensoundscape/ml/dataloaders.py
index 35e57cbe..b7a048a2 100644
--- a/opensoundscape/ml/dataloaders.py
+++ b/opensoundscape/ml/dataloaders.py
@@ -14,7 +14,10 @@ def __init__(
         samples,
         preprocessor,
         split_files_into_clips=True,
-        overlap_fraction=0,
+        clip_overlap=None,
+        clip_overlap_fraction=None,
+        clip_step=None,
+        overlap_fraction=None,
         final_clip=None,
         bypass_augmentations=True,
         raise_errors=False,
@@ -42,11 +45,9 @@ def __init__(
             preprocessor: preprocessor object, eg AudioPreprocessor or SpectrogramPreprocessor
             split_files_into_clips=True: use AudioSplittingDataset to automatically split
                 audio files into appropriate-lengthed clips
-            overlap_fraction: overlap fraction between consecutive clips, ignroed if
-                split_files_into_clips is False [default: 0]
-            final_clip: how to handle the final incomplete clip in a file
-                options:['extend','remainder','full',None] [default: None]
-                see opensoundscape.utils.generate_clip_times_df for details
+            clip_overlap_fraction, clip_overlap, clip_step, final_clip:
+                see `opensoundscape.utils.generate_clip_times_df`
+            overlap_fraction: deprecated alias for clip_overlap_fraction
             bypass_augmentations: if True, don't apply any augmentations [default: True]
             raise_errors: if True, raise errors during preprocessing [default: False]
             collate_fn: function to collate samples into batches [default: identity]
@@ -62,6 +63,16 @@ def __init__(
             "(c) (file,start_time,end_time) as MultiIndex"
         )
 
+        if overlap_fraction is not None:
+            warnings.warn(
+                "`overlap_fraction` argument is deprecated. Use `clip_overlap_fraction` instead.",
+                DeprecationWarning,
+            )
+            assert (
+                clip_overlap_fraction is None
+            ), "Cannot specify both overlap_fraction and clip_overlap_fraction"
+            clip_overlap_fraction = overlap_fraction
+
         # set up prediction Dataset, considering three possible cases:
         # (c1) user provided multi-index df with file,start_time,end_time of clips
         # (c2) user provided file list and wants clips to be split out automatically
@@ -75,7 +86,9 @@ def __init__(
             dataset = AudioSplittingDataset(
                 samples=samples,
                 preprocessor=preprocessor,
-                overlap_fraction=overlap_fraction,
+                clip_overlap=clip_overlap,
+                clip_overlap_fraction=clip_overlap_fraction,
+                clip_step=clip_step,
                 final_clip=final_clip,
             )
         else:  # c3 split_files_into_clips=False -> one sample & one prediction per file provided
diff --git a/opensoundscape/ml/datasets.py b/opensoundscape/ml/datasets.py
index 0f7f3c3a..58785095 100644
--- a/opensoundscape/ml/datasets.py
+++ b/opensoundscape/ml/datasets.py
@@ -1,4 +1,5 @@
 """Preprocessors: pd.Series child with an action sequence & forward method"""
+
 import warnings
 import copy
 from pathlib import Path
@@ -161,10 +162,11 @@ class AudioSplittingDataset(AudioFileDataset):
     automatically split longer files into clips (providing only the file paths).
 
     Args:
-        see AudioFileDataset and make_clip_df
+        samples and preprocessor are passed to AudioFileDataset.__init__
+        **kwargs are passed to opensoundscape.utils.make_clip_df
     """
 
-    def __init__(self, samples, preprocessor, overlap_fraction=0, final_clip=None):
+    def __init__(self, samples, preprocessor, **kwargs):
         super(AudioSplittingDataset, self).__init__(
             samples=samples, preprocessor=preprocessor
         )
@@ -177,7 +179,6 @@ def __init__(self, samples, preprocessor, overlap_fraction=0, final_clip=None):
         self.label_df, self.invalid_samples = make_clip_df(
             files=samples,
             clip_duration=preprocessor.sample_duration,
-            clip_overlap=overlap_fraction * preprocessor.sample_duration,
-            final_clip=final_clip,
             return_invalid_samples=True,
+            **kwargs,
         )
diff --git a/opensoundscape/ribbit.py b/opensoundscape/ribbit.py
index 8b7c1273..905b8c0a 100644
--- a/opensoundscape/ribbit.py
+++ b/opensoundscape/ribbit.py
@@ -2,6 +2,7 @@
 
 This module provides functionality to search audio for periodically fluctuating vocalizations.
 """
+
 import os
 import warnings
 
@@ -76,7 +77,9 @@ def ribbit(
     signal_band,
     pulse_rate_range,
     clip_duration,
-    clip_overlap=0,
+    clip_overlap=None,
+    clip_overlap_fraction=None,
+    clip_step=None,
     final_clip=None,
     noise_bands=None,
     spec_clip_range=(-100, -20),
@@ -93,8 +96,13 @@ def ribbit(
         pulse_rate_range: [min,max] pulses per second for the target species
         clip_duration: the length of audio (in seconds) to analyze at one time
             - each clip is analyzed independently and recieves a ribbit score
-        clip_overlap (float):   overlap between consecutive clips (sec)
-        final_clip (str):       behavior if final clip is less than clip_duration
+        clip_overlap (float): overlap between consecutive clips (sec)
+        clip_overlap_fraction (float): overlap between consecutive clips as a fraction of
+            clip_duration
+        clip_step (float): step size between consecutive clips (sec)
+            - only one of clip_overlap, clip_overlap_fraction, or clip_step should be provided
+            - if all are None, defaults to clip_overlap=0
+        final_clip (str): behavior if final clip is less than clip_duration
             seconds long. By default, discards remaining audio if less than
             clip_duration seconds long [default: None].
             Options:
@@ -189,6 +197,8 @@ def ribbit(
         full_duration=spectrogram.duration,
         clip_duration=clip_duration,
         clip_overlap=clip_overlap,
+        clip_overlap_fraction=clip_overlap_fraction,
+        clip_step=clip_step,
         final_clip=final_clip,
     )
     clip_df["score"] = np.nan
diff --git a/opensoundscape/utils.py b/opensoundscape/utils.py
index 1eee5902..58106e50 100644
--- a/opensoundscape/utils.py
+++ b/opensoundscape/utils.py
@@ -3,6 +3,7 @@
 import datetime
 import warnings
 
+from pathlib import Path
 import numpy as np
 import pandas as pd
 import pytz
@@ -127,7 +128,9 @@ def jitter(x, width, distribution="gaussian"):
 def generate_clip_times_df(
     full_duration,
     clip_duration,
-    clip_overlap=0,
+    clip_overlap=None,
+    clip_overlap_fraction=None,
+    clip_step=None,
     final_clip=None,
     rounding_precision=10,
 ):
@@ -142,7 +145,11 @@ def generate_clip_times_df(
     Args:
         full_duration: The amount of time (seconds) to split into clips
         clip_duration (float):  The duration in seconds of the clips
-        clip_overlap (float):   The overlap of the clips in seconds [default: 0]
+        clip_overlap (float):   The overlap of the clips in seconds
+        clip_overlap_fraction (float): The overlap of the clips as a fraction of clip_duration
+        clip_step (float):      The increment in seconds between starts of consecutive clips
+            - must only specify one of clip_overlap, clip_overlap_fraction, or clip_step
+            - if all are None, overlap is set to 0
         final_clip (str):       Behavior if final_clip is less than clip_duration
             seconds long. By default, discards remaining time if less than
             clip_duration seconds long [default: None].
@@ -167,7 +174,27 @@ def generate_clip_times_df(
             f"or None. Got {final_clip}."
         )
 
-    assert clip_overlap < clip_duration, "clip_overlap must be less than clip_duration"
+    overspecified_overlap_err = (
+        "only one of clip_overlap, clip_overlap_fraction, or clip_step can be specified"
+    )
+    if clip_overlap is not None:
+        if clip_overlap_fraction is not None or clip_step is not None:
+            raise ValueError(overspecified_overlap_err)
+        assert (
+            clip_overlap < clip_duration
+        ), "clip_overlap must be less than clip_duration"
+    elif clip_overlap_fraction is not None:
+        if clip_overlap is not None or clip_step is not None:
+            raise ValueError(overspecified_overlap_err)
+        assert 0 <= clip_overlap_fraction < 1, "clip_overlap_fraction must be in [0, 1)"
+        clip_overlap = clip_overlap_fraction * clip_duration
+    elif clip_step is not None:
+        # allow values outside of [0, clip_duration]
+        if clip_overlap is not None or clip_overlap_fraction is not None:
+            raise ValueError(overspecified_overlap_err)
+        clip_overlap = clip_duration - clip_step
+    else:
+        clip_overlap = 0
 
     # Lists of start and end times for clips
     increment = clip_duration - clip_overlap
@@ -218,7 +245,9 @@ def cast_np_to_native(x):
 def make_clip_df(
     files,
     clip_duration,
-    clip_overlap=0,
+    clip_overlap=None,
+    clip_overlap_fraction=None,
+    clip_step=None,
     final_clip=None,
     return_invalid_samples=False,
     raise_exceptions=False,
@@ -243,6 +272,8 @@ class labels. Labels for a file will be copied to all clips
             belonging to that file in the returned clip dataframe.
         clip_duration (float): see generate_clip_times_df
         clip_overlap (float): see generate_clip_times_df
+        clip_overlap_fraction (float): see generate_clip_times_df
+        clip_step (float): see generate_clip_times_df
         final_clip (str): see generate_clip_times_df
         return_invalid_samples (bool): if True, returns additional value,
             a list of samples that caused exceptions
@@ -263,10 +294,6 @@ class labels. Labels for a file will be copied to all clips
         the dataframe will have one row with np.nan for 'start_time' and 'end_time' for that
         file path.
     """
-    if isinstance(files, str):
-        raise TypeError(
-            "make_clip_df expects a list of files, it looks like you passed it a string"
-        )
 
     label_df = None  # assume no labels to begin with, just a list of paths
     if isinstance(files, pd.DataFrame):
@@ -274,6 +301,8 @@ class labels. Labels for a file will be copied to all clips
         # use the dataframe as labels, keeping each column as a class
         # if paths are duplicated in index, keep only the first of each
         label_df = files[~files.index.duplicated(keep="first")]
+    elif isinstance(files, (str, Path)):
+        files = [files]  # be lenient, turn single path into list
     else:
         assert hasattr(files, "__iter__"), (
             f"`files` should be a dataframe with paths as "
@@ -291,6 +320,8 @@ class labels. Labels for a file will be copied to all clips
                 full_duration=t,
                 clip_duration=clip_duration,
                 clip_overlap=clip_overlap,
+                clip_overlap_fraction=clip_overlap_fraction,
+                clip_step=clip_step,
                 final_clip=final_clip,
             )
             clips["file"] = f
diff --git a/tests/test_cnn.py b/tests/test_cnn.py
index 4c0bf799..3dbe101e 100644
--- a/tests/test_cnn.py
+++ b/tests/test_cnn.py
@@ -580,3 +580,19 @@ def test_predict_posixpath_missing_files(missing_file_df, test_df):
     assert np.all([isnan(score) for score in scores.iloc[0].values])
     assert len(invalid_samples) == 1
     assert missing_file_df.index.values[0] in invalid_samples
+
+
+def test_predict_overlap_fraction_deprecated(test_df):
+    """
+    should give deprecation error if clip_overlap_fraction is passed.
+
+    Future version will remove this argument in favor of clip_overlap_fraction
+
+    also, should raise AssertionError if both args are passed (over-specified)
+    """
+    model = cnn.CNN("resnet18", classes=[0, 1], sample_duration=5.0)
+    with pytest.warns(DeprecationWarning):
+        scores = model.predict(test_df, overlap_fraction=0.5)
+        assert len(scores) == 3
+    with pytest.raises(AssertionError):
+        model.predict(test_df, overlap_fraction=0.5, clip_overlap_fraction=0.5)
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 5ed2eb0f..2b1bacf4 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -134,6 +134,48 @@ def test_generate_clip_times_df_overlap():
     assert clip_df.iloc[1]["start_time"] == 2.5
     assert clip_df.iloc[1]["end_time"] == 7.5
 
+    clip_df = utils.generate_clip_times_df(
+        full_duration=10, clip_duration=5, clip_overlap_fraction=0.5
+    )
+    assert clip_df.shape[0] == 3
+    assert clip_df.iloc[0]["start_time"] == 0.0
+    assert clip_df.iloc[0]["end_time"] == 5.0
+    assert clip_df.iloc[1]["start_time"] == 2.5
+    assert clip_df.iloc[1]["end_time"] == 7.5
+
+    clip_df = utils.generate_clip_times_df(
+        full_duration=10, clip_duration=5, clip_step=2.5
+    )
+    assert clip_df.shape[0] == 3
+    assert clip_df.iloc[0]["start_time"] == 0.0
+    assert clip_df.iloc[0]["end_time"] == 5.0
+    assert clip_df.iloc[1]["start_time"] == 2.5
+    assert clip_df.iloc[1]["end_time"] == 7.5
+
+
+def test_generate_clip_times_df_overlap_raises_overspecified():
+    with pytest.raises(ValueError):
+        utils.generate_clip_times_df(
+            full_duration=10,
+            clip_duration=5,
+            clip_overlap=2.5,
+            clip_overlap_fraction=0.5,
+        )
+    with pytest.raises(ValueError):
+        utils.generate_clip_times_df(
+            full_duration=10,
+            clip_duration=5,
+            clip_overlap=2.5,
+            clip_step=0.5,
+        )
+    with pytest.raises(ValueError):
+        utils.generate_clip_times_df(
+            full_duration=10,
+            clip_duration=5,
+            clip_overlap_fraction=0.5,
+            clip_step=0.5,
+        )
+
 
 def test_make_clip_df(silence_10s_mp3_str):
     """many corner cases / alternatives are tested for audio.split()

From 2ceb6a9ad9fbc67c742e60b7f3ad1b14b4edd30f Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Tue, 21 May 2024 15:34:59 -0400
Subject: [PATCH 15/16] update arg name in test

---
 tests/test_datasets.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_datasets.py b/tests/test_datasets.py
index d43da7e4..e2822f54 100644
--- a/tests/test_datasets.py
+++ b/tests/test_datasets.py
@@ -219,7 +219,7 @@ def test_audio_splitting_dataset(dataset_df, pre):
 
 
 def test_audio_splitting_dataset_overlap(dataset_df, pre):
-    dataset = AudioSplittingDataset(dataset_df, pre, overlap_fraction=0.5)
+    dataset = AudioSplittingDataset(dataset_df, pre, clip_overlap_fraction=0.5)
     assert len(dataset) == 18
 
     # load a sample

From e4723f9f960107b7fa2f9e535384c9619b6267b5 Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Wed, 22 May 2024 13:37:12 -0400
Subject: [PATCH 16/16] don't nest wandb tables

nesting using / character is not behaving as expected, causing errors on windows machines. Nesting with dictionary doesn't work. So we just log all tables to default section, "Tables".

Tested on a mac by checking wandb tables during train() and predict(). All look good.
---
 opensoundscape/ml/cnn.py | 51 +++++++++++++++++-----------------------
 1 file changed, 22 insertions(+), 29 deletions(-)

diff --git a/opensoundscape/ml/cnn.py b/opensoundscape/ml/cnn.py
index cef2b56e..569d90b5 100644
--- a/opensoundscape/ml/cnn.py
+++ b/opensoundscape/ml/cnn.py
@@ -231,12 +231,10 @@ def predict(
             # Log a table of preprocessed samples to wandb
             wandb_session.log(
                 {
-                    "Samples": {
-                        "Peprocessed_samples": wandb_table(
-                            dataloader.dataset.dataset,
-                            self.wandb_logging["n_preview_samples"],
-                        )
-                    }
+                    "Peprocessed_samples": wandb_table(
+                        dataloader.dataset.dataset,
+                        self.wandb_logging["n_preview_samples"],
+                    )
                 }
             )
 
@@ -283,11 +281,8 @@ def predict(
                     classes_to_extract=[c],
                     drop_labels=True,
                     gradcam_model=self if self.wandb_logging["gradcam"] else None,
-                    raise_exceptions=True,  # TODO back to false when done debugging
-                )
-                wandb_session.log(
-                    {"Samples": {f"Top_scoring_{c.replace(' ','_')}": table}}
                 )
+                wandb_session.log({f"Top_scoring_{c.replace(' ','_')}": table})
 
         if return_invalid_samples:
             return score_df, invalid_samples
@@ -854,28 +849,26 @@ def train(
             # log tables of preprocessed samples
             wandb_session.log(
                 {
-                    "Samples": {
-                        "training_samples": wandb_table(
-                            AudioFileDataset(
-                                train_df, self.preprocessor, bypass_augmentations=False
-                            ),
-                            self.wandb_logging["n_preview_samples"],
+                    "training_samples": wandb_table(
+                        AudioFileDataset(
+                            train_df, self.preprocessor, bypass_augmentations=False
                         ),
-                        "training_samples_no_aug": wandb_table(
-                            AudioFileDataset(
-                                train_df, self.preprocessor, bypass_augmentations=True
-                            ),
-                            self.wandb_logging["n_preview_samples"],
+                        self.wandb_logging["n_preview_samples"],
+                    ),
+                    "training_samples_no_aug": wandb_table(
+                        AudioFileDataset(
+                            train_df, self.preprocessor, bypass_augmentations=True
                         ),
-                        "validation_samples": wandb_table(
-                            AudioFileDataset(
-                                validation_df,
-                                self.preprocessor,
-                                bypass_augmentations=True,
-                            ),
-                            self.wandb_logging["n_preview_samples"],
+                        self.wandb_logging["n_preview_samples"],
+                    ),
+                    "validation_samples": wandb_table(
+                        AudioFileDataset(
+                            validation_df,
+                            self.preprocessor,
+                            bypass_augmentations=True,
                         ),
-                    }
+                        self.wandb_logging["n_preview_samples"],
+                    ),
                 }
             )