From fc43b5cad167b013af9e377bcbfa5a7bc671aea3 Mon Sep 17 00:00:00 2001
From: LeonardoViotti <leonardoviotti@gmail.com>
Date: Tue, 2 Jan 2024 16:39:24 -0500
Subject: [PATCH 01/43] First version of opensoundscape.seed() function.

Draft fuinction that worked on my initial testing using OpSo 0.9.1, but still needs testing on 0.10.1
---
 opensoundscape/seed.py | 11 +++++++++++
 1 file changed, 11 insertions(+)
 create mode 100644 opensoundscape/seed.py

diff --git a/opensoundscape/seed.py b/opensoundscape/seed.py
new file mode 100644
index 00000000..31b9e55e
--- /dev/null
+++ b/opensoundscape/seed.py
@@ -0,0 +1,11 @@
+""" seed.py: Set random state across different libraries for reproducibility
+"""
+import numpy as np
+import torch
+import random
+
+def seed(seed):
+    np.random.seed(seed)
+    random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
\ No newline at end of file

From a7c8f07d34a708363c8615e5a77cf197a7e6fbdc Mon Sep 17 00:00:00 2001
From: LeonardoViotti <leonardoviotti@gmail.com>
Date: Wed, 3 Jan 2024 13:53:00 -0500
Subject: [PATCH 02/43] First version of opensoundscape.seed().

---
 opensoundscape/__init__.py | 1 +
 opensoundscape/seed.py     | 8 ++++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/opensoundscape/__init__.py b/opensoundscape/__init__.py
index 55e8ecbf..bd5bed6a 100644
--- a/opensoundscape/__init__.py
+++ b/opensoundscape/__init__.py
@@ -26,6 +26,7 @@
 from .preprocess.actions import Action
 from .preprocess.preprocessors import SpectrogramPreprocessor, AudioPreprocessor
 from .sample import AudioSample
+from .seed import seed
 from .annotations import BoxedAnnotations
 from .preprocess.utils import show_tensor, show_tensor_grid
 from .localization import SpatialEvent, SynchronizedRecorderArray
diff --git a/opensoundscape/seed.py b/opensoundscape/seed.py
index 31b9e55e..0099f4f9 100644
--- a/opensoundscape/seed.py
+++ b/opensoundscape/seed.py
@@ -4,8 +4,12 @@
 import torch
 import random
 
-def seed(seed):
+
+def seed(seed, verbose=True):
+    print(f"Random state set with seed {seed}")
+
+    torch.backends.cudnn.deterministic = True
     np.random.seed(seed)
     random.seed(seed)
     torch.manual_seed(seed)
-    torch.cuda.manual_seed_all(seed)
\ No newline at end of file
+    torch.cuda.manual_seed_all(seed)

From d88524f7f66a021ccf5602068a36ee79915b9e18 Mon Sep 17 00:00:00 2001
From: LeonardoViotti <leonardoviotti@gmail.com>
Date: Wed, 3 Jan 2024 13:53:32 -0500
Subject: [PATCH 03/43] Unit testing for opensoundscape.seed().

---
 tests/test_seed.py | 66 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 tests/test_seed.py

diff --git a/tests/test_seed.py b/tests/test_seed.py
new file mode 100644
index 00000000..fe872cc1
--- /dev/null
+++ b/tests/test_seed.py
@@ -0,0 +1,66 @@
+from opensoundscape import seed
+import numpy as np
+import torch
+import random
+from opensoundscape.ml import cnn, cnn_architectures
+
+import pytest
+
+pytestmark = pytest.mark.parametrize("input", [1, 11, 13, 42, 59, 666, 1234])
+
+
+def test_torch_rand(input):
+    seed(input)
+    tr1 = torch.rand(100)
+
+    seed(input)
+    tr2 = torch.rand(100)
+
+    seed(input + 1)
+    tr3 = torch.rand(100)
+
+    assert all(tr1 == tr2) & any(tr1 != tr3)
+
+
+def test_numpy_random_rand(input):
+    seed(input)
+    nr1 = np.random.rand(100)
+
+    seed(input)
+    nr2 = np.random.rand(100)
+
+    seed(input + 1)
+    nr3 = np.random.rand(100)
+
+    assert all(nr1 == nr2) & any(nr1 != nr3)
+
+
+def test_radom_sample(input):
+    list1000 = list(range(1, 1000))
+
+    seed(input)
+    rs1 = random.sample(list1000, 100)
+
+    seed(input)
+    rs2 = random.sample(list1000, 100)
+
+    seed(input + 1)
+    rs3 = random.sample(list1000, 100)
+
+    assert (rs1 == rs2) & (rs1 != rs3)
+
+
+def test_cnn(input):
+    seed(input)
+    model_resnet1 = cnn_architectures.resnet18(num_classes=10, weights=None)
+    lw1 = model_resnet1.layer1[0].conv1.weight
+
+    seed(input)
+    model_resnet2 = cnn_architectures.resnet18(num_classes=10, weights=None)
+    lw2 = model_resnet2.layer1[0].conv1.weight
+
+    seed(input + 1)
+    model_resnet3 = cnn_architectures.resnet18(num_classes=10, weights=None)
+    lw3 = model_resnet3.layer1[0].conv1.weight
+
+    assert torch.all(lw1 == lw2) & torch.any(lw1 != lw3)

From 261851f12ddf0bc04829b616270005a427be0121 Mon Sep 17 00:00:00 2001
From: LeonardoViotti <leonardoviotti@gmail.com>
Date: Wed, 3 Jan 2024 15:48:16 -0500
Subject: [PATCH 04/43] Resolved comments for PR #929

---
 opensoundscape/__init__.py               |  1 -
 opensoundscape/seed.py                   | 15 --------------
 opensoundscape/utils.py                  | 25 ++++++++++++++++++++++-
 tests/{test_seed.py => test_set_seed.py} | 26 ++++++++++++------------
 4 files changed, 37 insertions(+), 30 deletions(-)
 delete mode 100644 opensoundscape/seed.py
 rename tests/{test_seed.py => test_set_seed.py} (80%)

diff --git a/opensoundscape/__init__.py b/opensoundscape/__init__.py
index bd5bed6a..55e8ecbf 100644
--- a/opensoundscape/__init__.py
+++ b/opensoundscape/__init__.py
@@ -26,7 +26,6 @@
 from .preprocess.actions import Action
 from .preprocess.preprocessors import SpectrogramPreprocessor, AudioPreprocessor
 from .sample import AudioSample
-from .seed import seed
 from .annotations import BoxedAnnotations
 from .preprocess.utils import show_tensor, show_tensor_grid
 from .localization import SpatialEvent, SynchronizedRecorderArray
diff --git a/opensoundscape/seed.py b/opensoundscape/seed.py
deleted file mode 100644
index 0099f4f9..00000000
--- a/opensoundscape/seed.py
+++ /dev/null
@@ -1,15 +0,0 @@
-""" seed.py: Set random state across different libraries for reproducibility
-"""
-import numpy as np
-import torch
-import random
-
-
-def seed(seed, verbose=True):
-    print(f"Random state set with seed {seed}")
-
-    torch.backends.cudnn.deterministic = True
-    np.random.seed(seed)
-    random.seed(seed)
-    torch.manual_seed(seed)
-    torch.cuda.manual_seed_all(seed)
diff --git a/opensoundscape/utils.py b/opensoundscape/utils.py
index a28b9ae1..f29447c1 100644
--- a/opensoundscape/utils.py
+++ b/opensoundscape/utils.py
@@ -9,7 +9,8 @@
 import soundfile
 import librosa
 from matplotlib.colors import LinearSegmentedColormap
-
+import torch
+import random
 
 class GetDurationError(ValueError):
     """raised if librosa.get_duration(path=f) causes an error"""
@@ -329,3 +330,25 @@ def generate_opacity_colormaps(
         colormaps.append(cmap)
 
     return colormaps
+
+
+import numpy as np
+import torch
+import random
+
+
+def set_seed(seed, verbose=True):
+    """Set random state across different libraries for reproducibility
+
+    Args:
+        seed (int): Number to fix random number generators to a specific start.
+        verbose (bool, optional): Print set seed. Defaults to True.
+    """
+    if verbose:
+        print(f"Random state set with seed {seed}")
+
+    torch.backends.cudnn.deterministic = True
+    np.random.seed(seed)
+    random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
\ No newline at end of file
diff --git a/tests/test_seed.py b/tests/test_set_seed.py
similarity index 80%
rename from tests/test_seed.py
rename to tests/test_set_seed.py
index fe872cc1..f135ca9e 100644
--- a/tests/test_seed.py
+++ b/tests/test_set_seed.py
@@ -1,4 +1,4 @@
-from opensoundscape import seed
+from opensoundscape.utils import set_seed
 import numpy as np
 import torch
 import random
@@ -10,26 +10,26 @@
 
 
 def test_torch_rand(input):
-    seed(input)
+    set_seed(input)
     tr1 = torch.rand(100)
 
-    seed(input)
+    set_seed(input)
     tr2 = torch.rand(100)
 
-    seed(input + 1)
+    set_seed(input + 1)
     tr3 = torch.rand(100)
 
     assert all(tr1 == tr2) & any(tr1 != tr3)
 
 
 def test_numpy_random_rand(input):
-    seed(input)
+    set_seed(input)
     nr1 = np.random.rand(100)
 
-    seed(input)
+    set_seed(input)
     nr2 = np.random.rand(100)
 
-    seed(input + 1)
+    set_seed(input + 1)
     nr3 = np.random.rand(100)
 
     assert all(nr1 == nr2) & any(nr1 != nr3)
@@ -38,28 +38,28 @@ def test_numpy_random_rand(input):
 def test_radom_sample(input):
     list1000 = list(range(1, 1000))
 
-    seed(input)
+    set_seed(input)
     rs1 = random.sample(list1000, 100)
 
-    seed(input)
+    set_seed(input)
     rs2 = random.sample(list1000, 100)
 
-    seed(input + 1)
+    set_seed(input + 1)
     rs3 = random.sample(list1000, 100)
 
     assert (rs1 == rs2) & (rs1 != rs3)
 
 
 def test_cnn(input):
-    seed(input)
+    set_seed(input)
     model_resnet1 = cnn_architectures.resnet18(num_classes=10, weights=None)
     lw1 = model_resnet1.layer1[0].conv1.weight
 
-    seed(input)
+    set_seed(input)
     model_resnet2 = cnn_architectures.resnet18(num_classes=10, weights=None)
     lw2 = model_resnet2.layer1[0].conv1.weight
 
-    seed(input + 1)
+    set_seed(input + 1)
     model_resnet3 = cnn_architectures.resnet18(num_classes=10, weights=None)
     lw3 = model_resnet3.layer1[0].conv1.weight
 

From 87cc881ce8d1c5672fa1ffe23c820906ac9ebfdd Mon Sep 17 00:00:00 2001
From: LeonardoViotti <leonardoviotti@gmail.com>
Date: Thu, 4 Jan 2024 09:51:12 -0500
Subject: [PATCH 05/43] Reorgnized utils unit testing.

---
 tests/test_set_seed.py | 66 ------------------------------------------
 tests/test_utils.py    | 62 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+), 66 deletions(-)
 delete mode 100644 tests/test_set_seed.py

diff --git a/tests/test_set_seed.py b/tests/test_set_seed.py
deleted file mode 100644
index f135ca9e..00000000
--- a/tests/test_set_seed.py
+++ /dev/null
@@ -1,66 +0,0 @@
-from opensoundscape.utils import set_seed
-import numpy as np
-import torch
-import random
-from opensoundscape.ml import cnn, cnn_architectures
-
-import pytest
-
-pytestmark = pytest.mark.parametrize("input", [1, 11, 13, 42, 59, 666, 1234])
-
-
-def test_torch_rand(input):
-    set_seed(input)
-    tr1 = torch.rand(100)
-
-    set_seed(input)
-    tr2 = torch.rand(100)
-
-    set_seed(input + 1)
-    tr3 = torch.rand(100)
-
-    assert all(tr1 == tr2) & any(tr1 != tr3)
-
-
-def test_numpy_random_rand(input):
-    set_seed(input)
-    nr1 = np.random.rand(100)
-
-    set_seed(input)
-    nr2 = np.random.rand(100)
-
-    set_seed(input + 1)
-    nr3 = np.random.rand(100)
-
-    assert all(nr1 == nr2) & any(nr1 != nr3)
-
-
-def test_radom_sample(input):
-    list1000 = list(range(1, 1000))
-
-    set_seed(input)
-    rs1 = random.sample(list1000, 100)
-
-    set_seed(input)
-    rs2 = random.sample(list1000, 100)
-
-    set_seed(input + 1)
-    rs3 = random.sample(list1000, 100)
-
-    assert (rs1 == rs2) & (rs1 != rs3)
-
-
-def test_cnn(input):
-    set_seed(input)
-    model_resnet1 = cnn_architectures.resnet18(num_classes=10, weights=None)
-    lw1 = model_resnet1.layer1[0].conv1.weight
-
-    set_seed(input)
-    model_resnet2 = cnn_architectures.resnet18(num_classes=10, weights=None)
-    lw2 = model_resnet2.layer1[0].conv1.weight
-
-    set_seed(input + 1)
-    model_resnet3 = cnn_architectures.resnet18(num_classes=10, weights=None)
-    lw3 = model_resnet3.layer1[0].conv1.weight
-
-    assert torch.all(lw1 == lw2) & torch.any(lw1 != lw3)
diff --git a/tests/test_utils.py b/tests/test_utils.py
index bfea9cf3..f175852a 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -3,6 +3,9 @@
 import pandas as pd
 import pytz
 import datetime
+import torch
+import random
+from opensoundscape.ml import cnn, cnn_architectures
 
 from opensoundscape import utils
 
@@ -168,3 +171,62 @@ def test_make_clip_df_from_label_df(silence_10s_mp3_str, metadata_wav_str):
     # should copy labels for each file to all clips of that file
     # duplicate file should have labels from _first_ occurrence in label_df
     assert np.array_equal(clip_df["a"].values, [0, 0, 0, 0, 2, 2])
+    
+    
+
+@pytest.mark.parametrize("input", [1, 11, 13, 42, 59, 666, 1234])
+def test_torch_rand(input):
+    utils.set_seed(input)
+    tr1 = torch.rand(100)
+
+    utils.set_seed(input)
+    tr2 = torch.rand(100)
+
+    utils.set_seed(input + 1)
+    tr3 = torch.rand(100)
+
+    assert all(tr1 == tr2) & any(tr1 != tr3)
+
+@pytest.mark.parametrize("input", [1, 11, 13, 42, 59, 666, 1234])
+def test_numpy_random_rand(input):
+    utils.set_seed(input)
+    nr1 = np.random.rand(100)
+
+    utils.set_seed(input)
+    nr2 = np.random.rand(100)
+
+    utils.set_seed(input + 1)
+    nr3 = np.random.rand(100)
+
+    assert all(nr1 == nr2) & any(nr1 != nr3)
+
+@pytest.mark.parametrize("input", [1, 11, 13, 42, 59, 666, 1234])
+def test_radom_sample(input):
+    list1000 = list(range(1, 1000))
+
+    utils.set_seed(input)
+    rs1 = random.sample(list1000, 100)
+
+    utils.set_seed(input)
+    rs2 = random.sample(list1000, 100)
+
+    utils.set_seed(input + 1)
+    rs3 = random.sample(list1000, 100)
+
+    assert (rs1 == rs2) & (rs1 != rs3)
+
+@pytest.mark.parametrize("input", [1, 11, 13, 42, 59, 666, 1234])
+def test_cnn(input):
+    utils.set_seed(input)
+    model_resnet1 = cnn_architectures.resnet18(num_classes=10, weights=None)
+    lw1 = model_resnet1.layer1[0].conv1.weight
+
+    utils.set_seed(input)
+    model_resnet2 = cnn_architectures.resnet18(num_classes=10, weights=None)
+    lw2 = model_resnet2.layer1[0].conv1.weight
+
+    utils.set_seed(input + 1)
+    model_resnet3 = cnn_architectures.resnet18(num_classes=10, weights=None)
+    lw3 = model_resnet3.layer1[0].conv1.weight
+
+    assert torch.all(lw1 == lw2) & torch.any(lw1 != lw3)

From 75cf950b72aa8f07bc37f97f465ed3a1c0fe9e2f Mon Sep 17 00:00:00 2001
From: LeonardoViotti <leonardoviotti@gmail.com>
Date: Thu, 4 Jan 2024 09:55:06 -0500
Subject: [PATCH 06/43] Added decorator comments.

---
 tests/test_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_utils.py b/tests/test_utils.py
index f175852a..26b8f2cd 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -174,7 +174,7 @@ def test_make_clip_df_from_label_df(silence_10s_mp3_str, metadata_wav_str):
     
     
 
-@pytest.mark.parametrize("input", [1, 11, 13, 42, 59, 666, 1234])
+@pytest.mark.parametrize("input", [1, 11, 13, 42, 59, 666, 1234]) # Loops trough each value in list when running pytest
 def test_torch_rand(input):
     utils.set_seed(input)
     tr1 = torch.rand(100)

From 22f48d936286000d4dc2c538c2c52551a82649ee Mon Sep 17 00:00:00 2001
From: LeonardoViotti <leonardoviotti@gmail.com>
Date: Thu, 4 Jan 2024 10:17:24 -0500
Subject: [PATCH 07/43] Updated decorator description in comments.

---
 tests/test_utils.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/test_utils.py b/tests/test_utils.py
index 26b8f2cd..62d4a4c9 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -174,7 +174,11 @@ def test_make_clip_df_from_label_df(silence_10s_mp3_str, metadata_wav_str):
     
     
 
-@pytest.mark.parametrize("input", [1, 11, 13, 42, 59, 666, 1234]) # Loops trough each value in list when running pytest
+# The @pytest.mark.parametrize decorator loops trough each value in list when running pytest. 
+# If you add --verbose, it also prints if it passed for each value in the list for each function 
+# that takes it as input.
+
+@pytest.mark.parametrize("input", [1, 11, 13, 42, 59, 666, 1234]) 
 def test_torch_rand(input):
     utils.set_seed(input)
     tr1 = torch.rand(100)

From 1fc6d3a00197f9141ec5d92efdbf4d5bb4fe81bd Mon Sep 17 00:00:00 2001
From: LeonardoViotti <leonardoviotti@gmail.com>
Date: Thu, 4 Jan 2024 12:05:26 -0500
Subject: [PATCH 08/43] Resolved PR's comments.

---
 opensoundscape/utils.py | 6 ------
 tests/test_utils.py     | 3 +++
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/opensoundscape/utils.py b/opensoundscape/utils.py
index f29447c1..d377bacd 100644
--- a/opensoundscape/utils.py
+++ b/opensoundscape/utils.py
@@ -331,12 +331,6 @@ def generate_opacity_colormaps(
 
     return colormaps
 
-
-import numpy as np
-import torch
-import random
-
-
 def set_seed(seed, verbose=True):
     """Set random state across different libraries for reproducibility
 
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 62d4a4c9..3b9dbd87 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -178,6 +178,9 @@ def test_make_clip_df_from_label_df(silence_10s_mp3_str, metadata_wav_str):
 # If you add --verbose, it also prints if it passed for each value in the list for each function 
 # that takes it as input.
 
+# For all utils.set_seed() tests, assert that results are determistic for the the same seed AND
+# for different seeds, in a tensor/array at least one element is different.
+
 @pytest.mark.parametrize("input", [1, 11, 13, 42, 59, 666, 1234]) 
 def test_torch_rand(input):
     utils.set_seed(input)

From ab858caf6333d5087e7644ab4ced8e4baa3b0e19 Mon Sep 17 00:00:00 2001
From: LeonardoViotti <leonardoviotti@gmail.com>
Date: Mon, 15 Jan 2024 12:59:16 -0500
Subject: [PATCH 09/43] Black formatted opensoundscape/utils.py and
 tests/test_utils.py.

---
 opensoundscape/utils.py |  4 +++-
 tests/test_utils.py     | 13 ++++++++-----
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/opensoundscape/utils.py b/opensoundscape/utils.py
index d377bacd..30a78122 100644
--- a/opensoundscape/utils.py
+++ b/opensoundscape/utils.py
@@ -12,6 +12,7 @@
 import torch
 import random
 
+
 class GetDurationError(ValueError):
     """raised if librosa.get_duration(path=f) causes an error"""
 
@@ -331,6 +332,7 @@ def generate_opacity_colormaps(
 
     return colormaps
 
+
 def set_seed(seed, verbose=True):
     """Set random state across different libraries for reproducibility
 
@@ -345,4 +347,4 @@ def set_seed(seed, verbose=True):
     np.random.seed(seed)
     random.seed(seed)
     torch.manual_seed(seed)
-    torch.cuda.manual_seed_all(seed)
\ No newline at end of file
+    torch.cuda.manual_seed_all(seed)
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 3b9dbd87..5ed2eb0f 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -171,17 +171,17 @@ def test_make_clip_df_from_label_df(silence_10s_mp3_str, metadata_wav_str):
     # should copy labels for each file to all clips of that file
     # duplicate file should have labels from _first_ occurrence in label_df
     assert np.array_equal(clip_df["a"].values, [0, 0, 0, 0, 2, 2])
-    
-    
 
-# The @pytest.mark.parametrize decorator loops trough each value in list when running pytest. 
-# If you add --verbose, it also prints if it passed for each value in the list for each function 
+
+# The @pytest.mark.parametrize decorator loops trough each value in list when running pytest.
+# If you add --verbose, it also prints if it passed for each value in the list for each function
 # that takes it as input.
 
 # For all utils.set_seed() tests, assert that results are determistic for the the same seed AND
 # for different seeds, in a tensor/array at least one element is different.
 
-@pytest.mark.parametrize("input", [1, 11, 13, 42, 59, 666, 1234]) 
+
+@pytest.mark.parametrize("input", [1, 11, 13, 42, 59, 666, 1234])
 def test_torch_rand(input):
     utils.set_seed(input)
     tr1 = torch.rand(100)
@@ -194,6 +194,7 @@ def test_torch_rand(input):
 
     assert all(tr1 == tr2) & any(tr1 != tr3)
 
+
 @pytest.mark.parametrize("input", [1, 11, 13, 42, 59, 666, 1234])
 def test_numpy_random_rand(input):
     utils.set_seed(input)
@@ -207,6 +208,7 @@ def test_numpy_random_rand(input):
 
     assert all(nr1 == nr2) & any(nr1 != nr3)
 
+
 @pytest.mark.parametrize("input", [1, 11, 13, 42, 59, 666, 1234])
 def test_radom_sample(input):
     list1000 = list(range(1, 1000))
@@ -222,6 +224,7 @@ def test_radom_sample(input):
 
     assert (rs1 == rs2) & (rs1 != rs3)
 
+
 @pytest.mark.parametrize("input", [1, 11, 13, 42, 59, 666, 1234])
 def test_cnn(input):
     utils.set_seed(input)

From 2333ee7691a2bc941f416b3b600df8ea7144d4c8 Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Tue, 16 Jan 2024 23:48:32 -0500
Subject: [PATCH 10/43] use profile=True in forward to time-profile
 Preprocessor

the BasePreprocessor class now implements saving the time it takes to do each preprocessing step in the sample's `.runtime` attribute, a dictionary indexed like the preprocessor's .pipeline

needs tests
---
 opensoundscape/preprocess/preprocessors.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/opensoundscape/preprocess/preprocessors.py b/opensoundscape/preprocess/preprocessors.py
index 25fb6f94..82772432 100644
--- a/opensoundscape/preprocess/preprocessors.py
+++ b/opensoundscape/preprocess/preprocessors.py
@@ -2,6 +2,7 @@
 from pathlib import Path
 import pandas as pd
 import copy
+import time
 
 from opensoundscape.preprocess import actions
 from opensoundscape.preprocess.actions import (
@@ -89,6 +90,7 @@ def forward(
         break_on_key=None,
         bypass_augmentations=False,
         trace=False,
+        profile=False,
     ):
         """perform actions in self.pipeline on a sample (until a break point)
 
@@ -113,8 +115,10 @@ def forward(
                     the start and end time of clip in audio
             bypass_augmentations: if True, actions with .is_augmentatino=True
                 are skipped
-            trace (boolean - default False): if True, saves the output of each pipeline step in the `sample_info` output argument - should be utilized for analysis/debugging on samples of interest
-
+            trace (boolean - default False): if True, saves the output of each pipeline step in the `sample_info` output argument
+                Can be used for analysis/debugging of intermediate values of the sample during preprocessing
+            profile (boolean - default False): if True, saves the runtime of each pipeline step in `.runtime`
+                (a series indexed like .pipeline)
         Returns:
             sample (instance of AudioSample class)
 
@@ -129,10 +133,15 @@ def forward(
         if trace:
             sample.trace = pd.Series(index=self.pipeline.index)
 
+        if profile:
+            sample.runtime = pd.Series(index=self.pipeline.index)
+
         # run the pipeline by performing each Action on the AudioSample
         try:
             # perform each action in the pipeline
             for k, action in self.pipeline.items():
+                time0 = time.time()
+
                 if type(action) == break_on_type or k == break_on_key:
                     if trace:
                         # saved "output" of this step informs user pipeline was stopped
@@ -148,6 +157,9 @@ def forward(
                 # perform the action (modifies the AudioSample in-place)
                 action.go(sample)
 
+                if profile:
+                    sample.runtime[k] = time.time() - time0
+
                 if trace:  # user requested record of preprocessing steps
                     # save the current state of the sample's data
                     # (trace is a Series with index matching self.pipeline)

From f3e2650dacf92c16cdf88870718c9120c6169429 Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Tue, 16 Jan 2024 23:55:04 -0500
Subject: [PATCH 11/43] add test for preprocessor.forward `profile=True`

---
 tests/test_preprocessors.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/test_preprocessors.py b/tests/test_preprocessors.py
index 18d195e1..452e9cf9 100644
--- a/tests/test_preprocessors.py
+++ b/tests/test_preprocessors.py
@@ -41,6 +41,14 @@ def test_interrupt_get_item(preprocessor, sample):
     assert audio.samples.shape == (44100 * 10,)
 
 
+def test_profile(preprocessor, sample):
+    """sample should have .runtime attribute with index matching preprocessor.pipeline, and float values"""
+    sample = preprocessor.forward(sample, profile=True)
+    # should report the time to load the audio
+    assert sample.runtime[preprocessor.pipeline.index.values[0]] > 0
+    assert (sample.runtime.index == preprocessor.pipeline.index).all()
+
+
 def test_audio_resample(preprocessor, sample):
     """should retain original sample rate"""
     preprocessor.pipeline.load_audio.set(sample_rate=16000)

From 35fd0802fde90b34c291e03c1a47ece56409f946 Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Wed, 24 Jan 2024 00:00:07 -0500
Subject: [PATCH 12/43] resolve 911 change labels of Spectrogram.plot() and add
 kHz arg

---
 opensoundscape/spectrogram.py | 14 ++++++++++----
 tests/test_spectrogram.py     |  6 ++++++
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/opensoundscape/spectrogram.py b/opensoundscape/spectrogram.py
index ac19fea6..d3f36a62 100644
--- a/opensoundscape/spectrogram.py
+++ b/opensoundscape/spectrogram.py
@@ -416,7 +416,9 @@ def trim(self, start_time, end_time):
             times=self.times[lowest_index : highest_index + 1],
         )
 
-    def plot(self, inline=True, fname=None, show_colorbar=False, range=(-100, -20)):
+    def plot(
+        self, inline=True, fname=None, show_colorbar=False, range=(-100, -20), kHz=False
+    ):
         """Plot the spectrogram with matplotlib.pyplot
 
         Args:
@@ -425,19 +427,23 @@ def plot(self, inline=True, fname=None, show_colorbar=False, range=(-100, -20)):
             show_colorbar: include image legend colorbar from pyplot
             range: tuple of (min,max) values of .spectrogram to map to the lowest/highest
                 pixel values. Values outside this range will be clipped to the min/max values
+            kHz: bool [default:False] if True, y axis is plotted in units of kHz rather than Hz
         """
         norm = matplotlib.colors.Normalize(vmin=range[0], vmax=range[1])
+
+        # if user specifies kHz=True, use kHz units rather than Hz on y axis
+        y = self.frequencies / 1000 if kHz else self.frequencies
         plt.pcolormesh(
             self.times,
-            self.frequencies,
+            y,
             self.spectrogram,
             shading="auto",
             cmap="Greys",
             norm=norm,
         )
 
-        plt.xlabel("time (sec)")
-        plt.ylabel("frequency (Hz)")
+        plt.xlabel("Time (sec)")
+        plt.ylabel(f"Frequency ({'kHz' if kHz else 'Hz'})")
         if show_colorbar:
             plt.colorbar()
 
diff --git a/tests/test_spectrogram.py b/tests/test_spectrogram.py
index e1398c56..b1c4ab22 100644
--- a/tests/test_spectrogram.py
+++ b/tests/test_spectrogram.py
@@ -158,6 +158,12 @@ def test_plot_spectrogram():
     Spectrogram(np.zeros((5, 10)), np.zeros((5)), np.zeros((10)), (-100, -20)).plot()
 
 
+def test_plot_spectrogram_kHz():
+    Spectrogram(np.zeros((5, 10)), np.zeros((5)), np.zeros((10)), (-100, -20)).plot(
+        kHz=True
+    )
+
+
 def test_amplitude_spectrogram():
     Spectrogram(
         np.zeros((5, 10)), np.zeros((5)), np.zeros((10)), (-100, -20)

From 458e4efc4d62b0b2c4b82f02919dfc4d1a958c5d Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Fri, 23 Feb 2024 09:03:58 -0500
Subject: [PATCH 13/43] remove special characters from wandb.log keys

Windows users are getting errors logging to wandb. We can't reproduce them, but Louis suspected it may be because of " / " in wandb logging keys ending up in file paths. This commit removes slashes, spaces, and other special characters ([]) from wandb.log() string keys
---
 opensoundscape/ml/cnn.py | 55 +++++++++++++++++++++++-----------------
 1 file changed, 32 insertions(+), 23 deletions(-)

diff --git a/opensoundscape/ml/cnn.py b/opensoundscape/ml/cnn.py
index 4d6d0fc8..c6737439 100644
--- a/opensoundscape/ml/cnn.py
+++ b/opensoundscape/ml/cnn.py
@@ -2,6 +2,7 @@
 
 For tutorials, see notebooks on opensoundscape.org
 """
+
 from pathlib import Path
 import warnings
 import copy
@@ -219,10 +220,12 @@ def predict(
             # Log a table of preprocessed samples to wandb
             wandb_session.log(
                 {
-                    "Samples / Preprocessed samples": wandb_table(
-                        dataloader.dataset.dataset,
-                        self.wandb_logging["n_preview_samples"],
-                    )
+                    "Samples": {
+                        "Peprocessed_samples": wandb_table(
+                            dataloader.dataset.dataset,
+                            self.wandb_logging["n_preview_samples"],
+                        )
+                    }
                 }
             )
 
@@ -266,7 +269,9 @@ def predict(
                     gradcam_model=self if self.wandb_logging["gradcam"] else None,
                     raise_exceptions=True,  # TODO back to false when done debugging
                 )
-                wandb_session.log({f"Samples / Top scoring [{c}]": table})
+                wandb_session.log(
+                    {"Samples": {f"Top_scoring_{c.replace(' ','_')}": table}}
+                )
 
         if return_invalid_samples:
             return score_df, invalid_samples
@@ -840,24 +845,28 @@ def train(
             # log tables of preprocessed samples
             wandb_session.log(
                 {
-                    "Samples / training samples": wandb_table(
-                        AudioFileDataset(
-                            train_df, self.preprocessor, bypass_augmentations=False
+                    "Samples": {
+                        "training_samples": wandb_table(
+                            AudioFileDataset(
+                                train_df, self.preprocessor, bypass_augmentations=False
+                            ),
+                            self.wandb_logging["n_preview_samples"],
                         ),
-                        self.wandb_logging["n_preview_samples"],
-                    ),
-                    "Samples / training samples no aug": wandb_table(
-                        AudioFileDataset(
-                            train_df, self.preprocessor, bypass_augmentations=True
+                        "training_samples_no_aug": wandb_table(
+                            AudioFileDataset(
+                                train_df, self.preprocessor, bypass_augmentations=True
+                            ),
+                            self.wandb_logging["n_preview_samples"],
                         ),
-                        self.wandb_logging["n_preview_samples"],
-                    ),
-                    "Samples / validation samples": wandb_table(
-                        AudioFileDataset(
-                            validation_df, self.preprocessor, bypass_augmentations=True
+                        "validation_samples": wandb_table(
+                            AudioFileDataset(
+                                validation_df,
+                                self.preprocessor,
+                                bypass_augmentations=True,
+                            ),
+                            self.wandb_logging["n_preview_samples"],
                         ),
-                        self.wandb_logging["n_preview_samples"],
-                    ),
+                    }
                 }
             )
 
@@ -925,9 +934,9 @@ def train(
                     validation_df,
                     batch_size=batch_size,
                     num_workers=num_workers,
-                    activation_layer="softmax_and_logit"
-                    if self.single_target
-                    else None,
+                    activation_layer=(
+                        "softmax_and_logit" if self.single_target else None
+                    ),
                     split_files_into_clips=False,
                 )  # returns a dataframe matching validation_df
                 validation_targets = validation_df.values

From 98704d41f16c3059927901abd803dcfe5667b487 Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Mon, 8 Apr 2024 13:19:15 -0400
Subject: [PATCH 14/43] fix extend_to resolves #972 and #948

changes the behavior of extend_to() so that it doesn't trim audio
---
 opensoundscape/audio.py | 41 +++++++++++++++++++++--------------------
 tests/test_audio.py     | 17 ++++++++++++++---
 2 files changed, 35 insertions(+), 23 deletions(-)

diff --git a/opensoundscape/audio.py b/opensoundscape/audio.py
index 926bdf4f..c3faed55 100644
--- a/opensoundscape/audio.py
+++ b/opensoundscape/audio.py
@@ -587,41 +587,42 @@ def loop(self, length=None, n=None):
     def extend_to(self, duration):
         """Extend audio file to desired duration by adding silence to the end
 
-        If duration is less than the Audio's .duration, the Audio object is trimmed.
+        If `duration` is less than or equal to the Audio's self.duration, the Audio remains unchanged.
+
         Otherwise, silence is added to the end of the Audio object to achieve the desired
-        duration.
+        `duration`.
 
         Args:
-            duration: the final duration in seconds of the audio object
+            duration: the minimum final duration in seconds of the audio object
 
         Returns:
             a new Audio object of the desired duration
         """
 
-        target_n_samples = round(duration * self.sample_rate)
+        minimum_n_samples = round(duration * self.sample_rate)
         current_n_samples = len(self.samples)
 
-        if target_n_samples > current_n_samples:
+        if minimum_n_samples <= current_n_samples:
+            return self._spawn()
+
+        else:
             # add 0's to the end of the sample array
             new_samples = np.pad(
-                self.samples, pad_width=(0, target_n_samples - current_n_samples)
+                self.samples, pad_width=(0, minimum_n_samples - current_n_samples)
             )
-        elif target_n_samples < current_n_samples:
-            # trim to desired samples (similar to self.trim())
-            new_samples = self.samples[0:target_n_samples]
 
-        # update metadata to reflect new duration
-        if self.metadata is None:
-            metadata = None
-        else:
-            metadata = self.metadata.copy()
-            if "duration" in metadata:
-                metadata["duration"] = len(new_samples) / self.sample_rate
+            # update metadata to reflect new duration
+            if self.metadata is None:
+                metadata = None
+            else:
+                metadata = self.metadata.copy()
+                if "duration" in metadata:
+                    metadata["duration"] = len(new_samples) / self.sample_rate
 
-        return self._spawn(
-            samples=new_samples,
-            metadata=metadata,
-        )
+            return self._spawn(
+                samples=new_samples,
+                metadata=metadata,
+            )
 
     def extend_by(self, duration):
         """Extend audio file by adding `duration` seconds of silence to the end
diff --git a/tests/test_audio.py b/tests/test_audio.py
index 39dff9d8..5853d25d 100644
--- a/tests/test_audio.py
+++ b/tests/test_audio.py
@@ -523,12 +523,23 @@ def test_extend_to_correct_metadata(silence_10s_mp3_str):
 
 
 def test_extend_to_shorter_duration(silence_10s_mp3_str):
-    # extending 10s to 6s should simply trim the audio
+    # extending 10s to 6s should retain 10s
     audio = Audio.from_file(silence_10s_mp3_str, sample_rate=10000)
     a2 = audio.extend_to(6)
-    assert math.isclose(a2.duration, 6)
+    assert math.isclose(a2.duration, 10)
     # duration in metadata should be updated:
-    assert math.isclose(a2.metadata["duration"], 6)
+    assert math.isclose(a2.metadata["duration"], 10)
+    # other metadata should be retained:
+    assert a2.metadata["subtype"] == audio.metadata["subtype"]
+
+
+def test_extend_to_correct_duration_ok(silence_10s_mp3_str):
+    # extending 10s to 10 shouldn't raise error (#972)
+    audio = Audio.from_file(silence_10s_mp3_str, sample_rate=10000)
+    a2 = audio.extend_to(10)
+    assert math.isclose(a2.duration, 10)
+    # duration in metadata should be updated:
+    assert math.isclose(a2.metadata["duration"], 10)
     # other metadata should be retained:
     assert a2.metadata["subtype"] == audio.metadata["subtype"]
 

From f1dfaed57f2faf8bbd96e16c7354e0ab1095ae1d Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Mon, 8 Apr 2024 13:19:15 -0400
Subject: [PATCH 15/43] fix extend_to resolves #972 and #948

changes the behavior of extend_to() so that it doesn't trim audio
---
 opensoundscape/audio.py | 41 +++++++++++++++++++++--------------------
 tests/test_audio.py     | 17 ++++++++++++++---
 2 files changed, 35 insertions(+), 23 deletions(-)

diff --git a/opensoundscape/audio.py b/opensoundscape/audio.py
index 926bdf4f..c3faed55 100644
--- a/opensoundscape/audio.py
+++ b/opensoundscape/audio.py
@@ -587,41 +587,42 @@ def loop(self, length=None, n=None):
     def extend_to(self, duration):
         """Extend audio file to desired duration by adding silence to the end
 
-        If duration is less than the Audio's .duration, the Audio object is trimmed.
+        If `duration` is less than or equal to the Audio's self.duration, the Audio remains unchanged.
+
         Otherwise, silence is added to the end of the Audio object to achieve the desired
-        duration.
+        `duration`.
 
         Args:
-            duration: the final duration in seconds of the audio object
+            duration: the minimum final duration in seconds of the audio object
 
         Returns:
             a new Audio object of the desired duration
         """
 
-        target_n_samples = round(duration * self.sample_rate)
+        minimum_n_samples = round(duration * self.sample_rate)
         current_n_samples = len(self.samples)
 
-        if target_n_samples > current_n_samples:
+        if minimum_n_samples <= current_n_samples:
+            return self._spawn()
+
+        else:
             # add 0's to the end of the sample array
             new_samples = np.pad(
-                self.samples, pad_width=(0, target_n_samples - current_n_samples)
+                self.samples, pad_width=(0, minimum_n_samples - current_n_samples)
             )
-        elif target_n_samples < current_n_samples:
-            # trim to desired samples (similar to self.trim())
-            new_samples = self.samples[0:target_n_samples]
 
-        # update metadata to reflect new duration
-        if self.metadata is None:
-            metadata = None
-        else:
-            metadata = self.metadata.copy()
-            if "duration" in metadata:
-                metadata["duration"] = len(new_samples) / self.sample_rate
+            # update metadata to reflect new duration
+            if self.metadata is None:
+                metadata = None
+            else:
+                metadata = self.metadata.copy()
+                if "duration" in metadata:
+                    metadata["duration"] = len(new_samples) / self.sample_rate
 
-        return self._spawn(
-            samples=new_samples,
-            metadata=metadata,
-        )
+            return self._spawn(
+                samples=new_samples,
+                metadata=metadata,
+            )
 
     def extend_by(self, duration):
         """Extend audio file by adding `duration` seconds of silence to the end
diff --git a/tests/test_audio.py b/tests/test_audio.py
index 39dff9d8..5853d25d 100644
--- a/tests/test_audio.py
+++ b/tests/test_audio.py
@@ -523,12 +523,23 @@ def test_extend_to_correct_metadata(silence_10s_mp3_str):
 
 
 def test_extend_to_shorter_duration(silence_10s_mp3_str):
-    # extending 10s to 6s should simply trim the audio
+    # extending 10s to 6s should retain 10s
     audio = Audio.from_file(silence_10s_mp3_str, sample_rate=10000)
     a2 = audio.extend_to(6)
-    assert math.isclose(a2.duration, 6)
+    assert math.isclose(a2.duration, 10)
     # duration in metadata should be updated:
-    assert math.isclose(a2.metadata["duration"], 6)
+    assert math.isclose(a2.metadata["duration"], 10)
+    # other metadata should be retained:
+    assert a2.metadata["subtype"] == audio.metadata["subtype"]
+
+
+def test_extend_to_correct_duration_ok(silence_10s_mp3_str):
+    # extending 10s to 10 shouldn't raise error (#972)
+    audio = Audio.from_file(silence_10s_mp3_str, sample_rate=10000)
+    a2 = audio.extend_to(10)
+    assert math.isclose(a2.duration, 10)
+    # duration in metadata should be updated:
+    assert math.isclose(a2.metadata["duration"], 10)
     # other metadata should be retained:
     assert a2.metadata["subtype"] == audio.metadata["subtype"]
 

From 520488c200c0e1fa34e393d5084ecde913549fe3 Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Mon, 15 Apr 2024 13:41:51 -0400
Subject: [PATCH 16/43] use AudioSample.from_series to retain labels

resolves #961

BasePreprocessor._generate_sample, if passed pd.Series, should use the class method so that the resulting sample contains the labels rather than just path/start/end time
---
 opensoundscape/preprocess/preprocessors.py | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/opensoundscape/preprocess/preprocessors.py b/opensoundscape/preprocess/preprocessors.py
index 82772432..7bf1288a 100644
--- a/opensoundscape/preprocess/preprocessors.py
+++ b/opensoundscape/preprocess/preprocessors.py
@@ -1,4 +1,5 @@
 """Preprocessor classes: tools for preparing and augmenting audio samples"""
+
 from pathlib import Path
 import pandas as pd
 import copy
@@ -201,13 +202,7 @@ def _generate_sample(self, sample):
             ), "if passing tuple, first element must be str or pathlib.Path"
             sample = AudioSample(path, start_time=start, duration=self.sample_duration)
         elif isinstance(sample, pd.Series):
-            # .name should contain (path, start_time, end_time)
-            # note: end is not used, uses start_time self.sample_duration
-            path, start, _ = sample.name
-            assert isinstance(
-                path, (str, Path)
-            ), "if passing a series, series.name must contain (path, start_time, end_time)"
-            sample = AudioSample(path, start_time=start, duration=self.sample_duration)
+            sample = AudioSample.from_series(sample)
         else:
             assert isinstance(sample, AudioSample), (
                 "sample must be AudioSample, tuple of (path, start_time), "
@@ -297,11 +292,13 @@ def __init__(
                 ##  augmentations ##
                 # Overlay is a version of "mixup" that draws samples from a user-specified dataframe
                 # and overlays them on the current sample
-                "overlay": Overlay(
-                    is_augmentation=True, overlay_df=overlay_df, update_labels=False
-                )
-                if overlay_df is not None
-                else None,
+                "overlay": (
+                    Overlay(
+                        is_augmentation=True, overlay_df=overlay_df, update_labels=False
+                    )
+                    if overlay_df is not None
+                    else None
+                ),
                 # add vertical (time) and horizontal (frequency) masking bars
                 "time_mask": Action(actions.time_mask, is_augmentation=True),
                 "frequency_mask": Action(actions.frequency_mask, is_augmentation=True),

From 7b7d71b88a4bc76d8a914fa524d32cb9c022cf31 Mon Sep 17 00:00:00 2001
From: Louis Freeland-Haynes <66101835+louisfh@users.noreply.github.com>
Date: Thu, 2 May 2024 13:43:54 -0400
Subject: [PATCH 17/43] resolves issue 924 futurewarning pandas

---
 opensoundscape/sample.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/opensoundscape/sample.py b/opensoundscape/sample.py
index d852e2d1..3a440cb6 100644
--- a/opensoundscape/sample.py
+++ b/opensoundscape/sample.py
@@ -1,4 +1,5 @@
 """Class for holding information on a single sample"""
+
 import copy
 from pathlib import Path
 import torch
@@ -129,12 +130,12 @@ def end_time(self):
 
 # TODO: move this to dataloaders.py? or preprocessing.utils?
 def collate_audio_samples_to_dict(samples):
-    """generate batched tensors of data and labels (in a dictionary)
-
+    """
+    generate batched tensors of data and labels (in a dictionary).
     returns collated samples: a dictionary with keys "samples" and "labels"
 
     assumes that s.data is a Tensor and s.labels is a list/array
-    for each sample S
+    for each sample S, and that every sample has labels for the same classes.
 
     Args:
 
@@ -149,5 +150,5 @@ def collate_audio_samples_to_dict(samples):
     """
     return {
         "samples": torch.stack([s.data for s in samples]),
-        "labels": torch.Tensor([s.labels for s in samples]),
+        "labels": torch.Tensor([s.labels.iloc[0] for s in samples]),
     }

From 7e793c5d66fbabe18d31df1883742b7e3c16f1ab Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Thu, 2 May 2024 13:49:45 -0400
Subject: [PATCH 18/43] pass kwargs to torch.hub.load

in bioacoustics_model_zoo.load()
---
 opensoundscape/ml/bioacoustics_model_zoo.py | 6 ++++--
 opensoundscape/ml/cnn.py                    | 7 ++++---
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/opensoundscape/ml/bioacoustics_model_zoo.py b/opensoundscape/ml/bioacoustics_model_zoo.py
index d7b1bddd..9a5a86f3 100644
--- a/opensoundscape/ml/bioacoustics_model_zoo.py
+++ b/opensoundscape/ml/bioacoustics_model_zoo.py
@@ -1,4 +1,5 @@
 """lightweight wrapper to list and get models from bioacoustics model zoo with torch.hub"""
+
 import torch
 
 
@@ -14,7 +15,7 @@ def list_models():
     return torch.hub.list("kitzeslab/bioacoustics-model-zoo")
 
 
-def load(model):
+def load(model, **kwargs):
     """
     load a model from the [bioacoustics model zoo](https://github.com/kitzeslab/bioacoustics-model-zoo)
 
@@ -22,6 +23,7 @@ def load(model):
 
     Args:
         model: name of model to load, i.e. one listed by list_models()
+        **kwargs are passed to torch.hub.load()
 
     Returns:
         ready-to-use model object
@@ -35,4 +37,4 @@ def load(model):
     detailed instructions)
 
     """
-    return torch.hub.load("kitzeslab/bioacoustics-model-zoo", model)
+    return torch.hub.load("kitzeslab/bioacoustics-model-zoo", model, **kwargs)
diff --git a/opensoundscape/ml/cnn.py b/opensoundscape/ml/cnn.py
index 9659e004..59ffeb4d 100644
--- a/opensoundscape/ml/cnn.py
+++ b/opensoundscape/ml/cnn.py
@@ -2,6 +2,7 @@
 
 For tutorials, see notebooks on opensoundscape.org
 """
+
 from pathlib import Path
 import warnings
 import copy
@@ -925,9 +926,9 @@ def train(
                     validation_df,
                     batch_size=batch_size,
                     num_workers=num_workers,
-                    activation_layer="softmax_and_logit"
-                    if self.single_target
-                    else None,
+                    activation_layer=(
+                        "softmax_and_logit" if self.single_target else None
+                    ),
                     split_files_into_clips=False,
                 )  # returns a dataframe matching validation_df
                 validation_targets = validation_df.values

From 2d2e3d254225f092b3b6559bd2887e285caf4164 Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Thu, 2 May 2024 13:59:36 -0400
Subject: [PATCH 19/43] add plt.close()

resolves matplotlib not being released when saving spectrogram to file #987
---
 opensoundscape/spectrogram.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/opensoundscape/spectrogram.py b/opensoundscape/spectrogram.py
index ac19fea6..d13f7420 100644
--- a/opensoundscape/spectrogram.py
+++ b/opensoundscape/spectrogram.py
@@ -452,6 +452,8 @@ def plot(self, inline=True, fname=None, show_colorbar=False, range=(-100, -20)):
             else:
                 plt.show()
 
+        plt.close()  # close the plot to avoid memory leaks
+
     def amplitude(self, freq_range=None):
         """create an amplitude vs time signal from spectrogram
 

From 8f383dfe12bbc9128e5dbacd5b365d6cac99f214 Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Thu, 2 May 2024 14:09:26 -0400
Subject: [PATCH 20/43] copy overlay_df resolves modifying original label df in
 overlay init #953

avoid modifying original overlay_df object passed to Overlay.__init__()
---
 opensoundscape/preprocess/actions.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/opensoundscape/preprocess/actions.py b/opensoundscape/preprocess/actions.py
index 43ff0715..2e236481 100644
--- a/opensoundscape/preprocess/actions.py
+++ b/opensoundscape/preprocess/actions.py
@@ -8,6 +8,7 @@
 See the preprocessor module and Preprocessing tutorial
 for details on how to use and create your own actions.
 """
+
 import random
 import warnings
 import numpy as np
@@ -506,7 +507,7 @@ def __init__(self, is_augmentation=True, **kwargs):
 
         self.returns_labels = True
 
-        overlay_df = kwargs["overlay_df"]
+        overlay_df = kwargs["overlay_df"].copy()  # copy to avoid modifying original
         overlay_df = overlay_df[~overlay_df.index.duplicated()]  # remove duplicates
 
         # warn the user if using "different" as overlay_class

From a10be71ab8f2624cf12abe60cf36c5b0dac31fd0 Mon Sep 17 00:00:00 2001
From: Louis Freeland-Haynes <66101835+louisfh@users.noreply.github.com>
Date: Thu, 2 May 2024 16:01:48 -0400
Subject: [PATCH 21/43] Use np.vstack for collating batched samples

---
 opensoundscape/sample.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/opensoundscape/sample.py b/opensoundscape/sample.py
index 3a440cb6..7a51f57f 100644
--- a/opensoundscape/sample.py
+++ b/opensoundscape/sample.py
@@ -3,6 +3,7 @@
 import copy
 from pathlib import Path
 import torch
+import numpy as np
 
 
 class Sample:
@@ -150,5 +151,5 @@ def collate_audio_samples_to_dict(samples):
     """
     return {
         "samples": torch.stack([s.data for s in samples]),
-        "labels": torch.Tensor([s.labels.iloc[0] for s in samples]),
+        "labels": torch.Tensor(np.vstack([s.labels.values for s in samples])),
     }

From a9cdf88594ae2c6c51a5b9afd59758218fb4a8ca Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Thu, 2 May 2024 16:14:43 -0400
Subject: [PATCH 22/43] resolve spectrum estimation 'wrong' #947

scale DC and Nyquist frequency by just /N not 2/N
---
 opensoundscape/audio.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/opensoundscape/audio.py b/opensoundscape/audio.py
index c3faed55..556ad1aa 100644
--- a/opensoundscape/audio.py
+++ b/opensoundscape/audio.py
@@ -732,17 +732,20 @@ def spectrum(self):
 
         # Compute the fft (fast fourier transform) of the selected clip
         N = len(self.samples)
-        fft = scipy.fft.fft(self.samples)
+        fft = scipy.fft.rfft(self.samples)
+        fft = np.abs(fft)  # get the magnitude of the fft
 
         # create the frequencies corresponding to fft bins
-        freq = scipy.fft.fftfreq(N, d=1 / self.sample_rate)
+        freq = scipy.fft.rfftfreq(N, d=1 / self.sample_rate)
 
-        # remove negative frequencies and scale magnitude by 2.0/N:
-        fft = 2.0 / N * fft[0 : int(N / 2)]
-        frequencies = freq[0 : int(N / 2)]
-        fft = np.abs(fft)
+        # scale magnitude by 2.0/N,
+        # except for the DC and sr/2 (Nyquist frequency) components
+        fft *= 2.0 / N
+        fft[0] *= 0.5
+        if N % 2 == 0:
+            fft[-1] *= 0.5
 
-        return fft, frequencies
+        return fft, freq
 
     def normalize(self, peak_level=None, peak_dBFS=None):
         """Return audio object with normalized waveform

From 4f8e24c5ef59896583948d44242b6438267fe6ed Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Thu, 2 May 2024 18:13:45 -0400
Subject: [PATCH 23/43] resolve Audio.from_file complains if offset is np.int64
 #928

cast np int/float types to native types for timedelta

also fixed in two other places, one in audio.py and one in localization.py
---
 opensoundscape/audio.py        | 10 +++++++---
 opensoundscape/localization.py |  5 ++++-
 opensoundscape/utils.py        |  9 +++++++++
 tests/test_audio.py            |  6 ++++++
 4 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/opensoundscape/audio.py b/opensoundscape/audio.py
index c3faed55..49e20838 100644
--- a/opensoundscape/audio.py
+++ b/opensoundscape/audio.py
@@ -38,6 +38,7 @@
 import opensoundscape
 from opensoundscape.utils import generate_clip_times_df
 from opensoundscape.signal_processing import tdoa
+from opensoundscape.utils import cast_np_to_native
 
 DEFAULT_RESAMPLE_TYPE = "soxr_hq"  # changed from kaiser_fast in v0.9.0
 
@@ -354,6 +355,8 @@ def from_file(
 
             # if the offset > 0, we need to update the timestamp
             if "recording_start_time" in metadata and offset > 0:
+                # timedelta doesn't like np types, fix issue #928
+                offset = cast_np_to_native(offset)
                 metadata["recording_start_time"] += datetime.timedelta(seconds=offset)
 
         return cls(samples, sr, resample_type=resample_type, metadata=metadata)
@@ -533,9 +536,10 @@ def trim_samples(self, start_sample, end_sample):
         else:
             metadata = self.metadata.copy()
             if "recording_start_time" in metadata:
-                metadata["recording_start_time"] += datetime.timedelta(
-                    seconds=start_sample / self.sample_rate
-                )
+                # timedelta doesn't like np types, fix issue #928
+                seconds = start_sample / self.sample_rate
+                seconds = cast_np_to_native(seconds)
+                metadata["recording_start_time"] += datetime.timedelta(seconds=seconds)
 
             if "duration" in metadata:
                 metadata["duration"] = len(samples_trimmed) / self.sample_rate
diff --git a/opensoundscape/localization.py b/opensoundscape/localization.py
index 875cd092..c53353c7 100644
--- a/opensoundscape/localization.py
+++ b/opensoundscape/localization.py
@@ -1,10 +1,12 @@
 """Tools for localizing audio events from synchronized recording arrays"""
+
 import warnings
 import numpy as np
 import datetime
 
 from opensoundscape.audio import Audio
 from opensoundscape import audio
+from opensoundscape.utils import cast_np_to_native
 
 # define defaults for physical constants
 SPEED_OF_SOUND = 343  # default value in meters per second
@@ -635,8 +637,9 @@ def create_candidate_events(
                         if self.start_timestamp is None:
                             start_timestamp = None
                         else:
+                            # timedelta doesn't like np types, fix issue #928
                             start_timestamp = self.start_timestamp + datetime.timedelta(
-                                seconds=time_i
+                                seconds=cast_np_to_native(time_i)
                             )
                         # create a SpatialEvent for this cluster of simultaneous detections
                         candidate_events.append(
diff --git a/opensoundscape/utils.py b/opensoundscape/utils.py
index 30a78122..1eee5902 100644
--- a/opensoundscape/utils.py
+++ b/opensoundscape/utils.py
@@ -206,6 +206,15 @@ def generate_clip_times_df(
     return pd.DataFrame({"start_time": starts, "end_time": ends}).drop_duplicates()
 
 
+def cast_np_to_native(x):
+    # timedelta doesn't like np types, fix issue #928
+    if isinstance(x, np.integer):
+        return int(x)
+    elif isinstance(x, np.floating):
+        return float(x)
+    return x
+
+
 def make_clip_df(
     files,
     clip_duration,
diff --git a/tests/test_audio.py b/tests/test_audio.py
index 5853d25d..a2f11d03 100644
--- a/tests/test_audio.py
+++ b/tests/test_audio.py
@@ -376,6 +376,12 @@ def test_load_metadata(veryshort_wav_str):
     assert a.metadata["samplerate"] == 44100
 
 
+def test_load_metadata_int_offset(metadata_wav_str):
+    # addresses issue #928
+    Audio.from_file(metadata_wav_str, offset=np.int32(3), duration=0.1)
+    Audio.from_file(metadata_wav_str, offset=np.float32(3), duration=0.1)
+
+
 # currently don't know how to create a file with bad / no metadata
 # def test_load_metadata_warning(path_with_no_metadata):
 #     with pytest.raises(UserWarning)

From 5547e582eb1f65fbb6dd1092388ff86edcf3d820 Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Thu, 9 May 2024 15:18:57 -0400
Subject: [PATCH 24/43] allow single path to from_raven_files resolves #993

BoxedAnnotations.from_raven_files raven_files and audio_files arguments can now be a path (str or pathlib.Path) rather than list of paths.
---
 opensoundscape/annotations.py | 25 +++++++++++++++++++++----
 tests/test_annotations.py     | 30 ++++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/opensoundscape/annotations.py b/opensoundscape/annotations.py
index c661d6cf..6935a5b6 100644
--- a/opensoundscape/annotations.py
+++ b/opensoundscape/annotations.py
@@ -3,6 +3,7 @@
 includes BoxedAnnotations class and utilities to combine or "diff" annotations,
 etc.
 """
+
 from pathlib import Path
 import itertools
 import pandas as pd
@@ -115,9 +116,10 @@ def from_raven_files(
         """load annotations from Raven .txt files
 
         Args:
-            raven_files: list of raven .txt file paths (as str or pathlib.Path)
+            raven_files: list or iterable of raven .txt file paths (as str or pathlib.Path),
+                or a single file path (str or pathlib.Path). Eg ['path1.txt','path2.txt']
             audio_files: (list) optionally specify audio files corresponding to each
-                raven file (length should match raven_files)
+                raven file (length should match raven_files) Eg ['path1.txt','path2.txt']
                 - if None (default), .one_hot_clip_labels() will not be able to
                 check the duration of each audio file, and will raise an error
                 unless `full_duration` is passed as an argument
@@ -127,7 +129,7 @@ def from_raven_files(
                 - pass `None` to load the raven file without explicitly
                 assigning a column as the annotation column. The resulting
                 object's `.df` will have an `annotation` column with nan values!
-                NOTE: If `annotatino_column_name` is passed, this argument is ignored.
+                NOTE: If `annotation_column_name` is passed, this argument is ignored.
             annotation_column_name: (str) name of the column containing annotations
                 - default: None will use annotation-column_idx to find the annotation column
                 - if not None, this value overrides annotation_column_idx, and the column with
@@ -156,6 +158,21 @@ def from_raven_files(
             BoxedAnnotations object containing annotations from the Raven files
             (the .df attribute is a dataframe containing each annotation)
         """
+        # check input type of raven_files and audio_files
+        # if a single path is passed, convert to list
+        if isinstance(raven_files, (str, Path)):
+            raven_files = [raven_files]
+        else:
+            assert isinstance(
+                raven_files[0], (str, Path)
+            ), f"raven_files must be an iterable of string or pathlib.Path, or a single string or pathlib.Path. Got type: {type(raven_files)}"
+        if isinstance(audio_files, (str, Path)):
+            audio_files = [audio_files]
+        elif audio_files is not None:
+            assert isinstance(
+                audio_files[0], (str, Path)
+            ), f"audio_files must be an iterable of string or pathlib.Path, or a single string or pathlib.Path. Got type: {type(audio_files)}"
+
         all_file_dfs = []
 
         # mapping of Raven file columns to standard opensoundscape names
@@ -200,7 +217,7 @@ def from_raven_files(
                         df.columns[annotation_column_idx - 1]: "annotation",
                     }
                 )
-            else:  # None was passed to annotatino_column_idx
+            else:  # None was passed to annotation_column_idx
                 # we'll create an empty `annotation` column
                 df["annotation"] = np.nan
 
diff --git a/tests/test_annotations.py b/tests/test_annotations.py
index b88fc1bd..bf798012 100644
--- a/tests/test_annotations.py
+++ b/tests/test_annotations.py
@@ -523,3 +523,33 @@ def test_warn_if_file_wont_get_raven_output(raven_file, saved_raven_file):
 def test_assert_audio_files_annotation_files_match():
     with pytest.raises(AssertionError):
         BoxedAnnotations.from_raven_files(["path"], [])
+
+
+def test_from_raven_files(raven_file):
+    ba = BoxedAnnotations.from_raven_files([raven_file], ["path1"])
+    assert ba.annotation_files[0] == raven_file
+
+
+def test_from_raven_files_pathlib(raven_file):
+    ba = BoxedAnnotations.from_raven_files([Path(raven_file)], [Path("path1")])
+    assert str(ba.annotation_files[0]) == raven_file
+
+
+def test_from_raven_files_one_path(raven_file):
+    """now works passing str or Path rather than list"""
+    ba = BoxedAnnotations.from_raven_files(raven_file, ["path1"])
+    assert ba.annotation_files[0] == raven_file
+    assert len(ba.annotation_files) == 1
+    ba = BoxedAnnotations.from_raven_files(Path(raven_file), ["path1"])
+    assert str(ba.annotation_files[0]) == raven_file
+    assert len(ba.annotation_files) == 1
+
+
+def test_from_raven_files_one_audio_file(raven_file):
+    """now works passing str or Path rather than list"""
+    ba = BoxedAnnotations.from_raven_files(raven_file, "path1")
+    assert ba.audio_files[0] == "path1"
+    assert len(ba.audio_files) == 1
+    ba = BoxedAnnotations.from_raven_files(Path(raven_file), Path("path1"))
+    assert str(ba.audio_files[0]) == "path1"
+    assert len(ba.audio_files) == 1

From 9018e2adf9dca6e6e81c126202edf7b97d0ae428 Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Thu, 9 May 2024 15:28:59 -0400
Subject: [PATCH 25/43] allow CNN.predict(file) resolves allow
 CNN.predict(path) #983 for convencience, predict(samples) wraps str or path
 into a 1-item list

---
 opensoundscape/ml/cnn.py |  4 ++++
 tests/test_cnn.py        | 15 ++++++++++++---
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/opensoundscape/ml/cnn.py b/opensoundscape/ml/cnn.py
index 59ffeb4d..c2138c0f 100644
--- a/opensoundscape/ml/cnn.py
+++ b/opensoundscape/ml/cnn.py
@@ -120,6 +120,7 @@ def predict(
                 - a dataframe with index containing audio paths, OR
                 - a dataframe with multi-index (file, start_time, end_time), OR
                 - a list (or np.ndarray) of audio file paths
+                - a single file path (str or pathlib.Path)
             batch_size:
                 Number of files to load simultaneously [default: 1]
             num_workers:
@@ -180,6 +181,9 @@ def predict(
             for that sample will be np.nan
 
         """
+        # for convenience, convert str/pathlib.Path to list
+        if isinstance(samples, (str, Path)):
+            samples = [samples]
 
         # create dataloader to generate batches of AudioSamples
         dataloader = self.inference_dataloader_cls(
diff --git a/tests/test_cnn.py b/tests/test_cnn.py
index 8305ddc8..dde91786 100644
--- a/tests/test_cnn.py
+++ b/tests/test_cnn.py
@@ -214,6 +214,15 @@ def test_prediction_overlap(test_df):
     assert len(scores) == 3
 
 
+def test_predict_on_one_file(test_df):
+    model = cnn.CNN("resnet18", classes=[0, 1], sample_duration=10)
+    p = test_df.index.values[0]
+    scores = model.predict(p)
+    assert len(scores) == 1
+    scores = model.predict(Path(p))
+    assert len(scores) == 1
+
+
 def test_multi_target_prediction(train_df, test_df):
     model = cnn.CNN("resnet18", classes=[0, 1], sample_duration=5.0)
     scores = model.predict(test_df)
@@ -452,9 +461,9 @@ def test_predict_raise_errors(short_file_df, onemin_wav_df):
     )  # use 2 files. 1 file wrong is manually caught and userwarning raised
     model = cnn.CNN("resnet18", classes=["class"], sample_duration=30)
     model.preprocessor.pipeline.bandpass.bypass = False  # ensure bandpass happens
-    model.preprocessor.pipeline.bandpass.params[
-        "low"
-    ] = 1  # add a bad param. this should be min_f
+    model.preprocessor.pipeline.bandpass.params["low"] = (
+        1  # add a bad param. this should be min_f
+    )
 
     with pytest.raises(PreprocessingError):
         model.predict(files_df, raise_errors=True)

From 9c88f7003c50345f71f33d896a7495feabe40575 Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Thu, 9 May 2024 15:39:13 -0400
Subject: [PATCH 26/43] use _spawn in convert_labels

resolves one_hot_clip_labels breaks after using convert_labels on BoxedAnnotations object #916

BoxedAnnotations had a _spawn method but was calling its init method instead with just the df argument at the end of convert_labels. This caused loss of other attributes. Now calls _spawn to retain all attributes not explicitly passed. Added check within test_convert_labels.
---
 opensoundscape/annotations.py | 2 +-
 tests/test_annotations.py     | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/opensoundscape/annotations.py b/opensoundscape/annotations.py
index 6935a5b6..d677ed4e 100644
--- a/opensoundscape/annotations.py
+++ b/opensoundscape/annotations.py
@@ -799,7 +799,7 @@ def convert_labels(self, conversion_table):
             for k in df["annotation"]
         ]
 
-        return BoxedAnnotations(df)
+        return self._spawn(df=df)
 
 
 def diff(base_annotations, comparison_annotations):
diff --git a/tests/test_annotations.py b/tests/test_annotations.py
index bf798012..5e84d076 100644
--- a/tests/test_annotations.py
+++ b/tests/test_annotations.py
@@ -302,8 +302,10 @@ def test_one_hot_clip_labels_overlap(boxed_annotations):
 
 
 def test_convert_labels(boxed_annotations):
-    boxed_annotations = boxed_annotations.convert_labels({"a": "c"})
-    assert set(boxed_annotations.df["annotation"]) == {"b", "c", None}
+    boxed_annotations1 = boxed_annotations.convert_labels({"a": "c"})
+    assert set(boxed_annotations1.df["annotation"]) == {"b", "c", None}
+    # should retain properties, issue #916
+    assert boxed_annotations1.audio_files == boxed_annotations.audio_files
 
 
 def test_convert_labels_df(boxed_annotations):

From ebb4c6f48be8930791e82833a3405004bd3e5e0e Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Thu, 9 May 2024 16:38:38 -0400
Subject: [PATCH 27/43] improve AudioTrim action

resolves random_trim_audio in default SpectrogramPreprocessor doesn't work #904
---
 opensoundscape/preprocess/actions.py       | 94 ++++++++++++----------
 opensoundscape/preprocess/preprocessors.py | 12 ++-
 tests/test_actions.py                      | 28 +++----
 3 files changed, 75 insertions(+), 59 deletions(-)

diff --git a/opensoundscape/preprocess/actions.py b/opensoundscape/preprocess/actions.py
index 2e236481..1b4c1c4c 100644
--- a/opensoundscape/preprocess/actions.py
+++ b/opensoundscape/preprocess/actions.py
@@ -59,7 +59,11 @@ def set(self, **kwargs):
             f"unexpected arguments: {unmatched_args}. "
             f"The valid arguments and current values are: \n{self.params}"
         )
-        self.params.update(pd.Series(kwargs, dtype=object))
+        # Series.update ignores nan/None values, so we use dictionary.update method
+        new_params = dict(self.params)
+        new_params.update(kwargs)
+        self.params = pd.Series(new_params, dtype=object)
+        # self.params.update(pd.Series(kwargs, dtype=object))
 
     def get(self, arg):
         return self.params[arg]
@@ -159,7 +163,7 @@ class AudioTrim(Action):
     """Action to trim/extend audio to desired length
 
     Args:
-        see actions.trim_audio
+        see actions.audio_trim()
     """
 
     def __init__(self, **kwargs):
@@ -169,61 +173,69 @@ def go(self, sample, **kwargs):
         self.action_fn(sample, **dict(self.params, **kwargs))
 
 
-def trim_audio(sample, extend=True, random_trim=False, tol=1e-5):
-    """trim audio clips (Audio -> Audio)
+def trim_audio(sample, target_duration, extend=True, random_trim=False, tol=1e-6):
+    """trim audio clips from t=0 or random position (Audio -> Audio)
+
+    Trims an audio file to desired length.
 
-    Trims an audio file to desired length
     Allows audio to be trimmed from start or from a random time
-    Optionally extends audio shorter than clip_length with silence
+
+    Optionally extends audio shorter than clip_length to sample.duration by
+    appending silence.
 
     Args:
         sample: AudioSample with .data=Audio object, .duration as sample duration
+        target_duration: length of resulting clip in seconds. If None,
+            no trimming is performed.
         extend: if True, clips shorter than sample.duration are
-            extended with silence to required length
+            extended with silence to required length [Default: True]
         random_trim: if True, chooses a random segment of length sample.duration
             from the input audio. If False, the file is trimmed from 0 seconds
-            to sample.duration seconds.
-        tol: tolerance for considering a clip to be of the correct length (sec)
+            to sample.duration seconds. [Default: False]
+        tol: tolerance for considering a clip to be long enough (sec),
+            when raising an error for short clips [Default: 1e-6]
 
-    Returns:
-        trimmed audio
+    Effects:
+        Updates the sample's .data, .start_time, and .duration attributes
     """
+
+    if target_duration is None:
+        return
+
     audio = sample.data
 
     if len(audio.samples) == 0:
         raise ValueError("recieved zero-length audio")
 
-    if sample.target_duration is not None:
-        if audio.duration + tol <= sample.target_duration:
-            # input audio is not as long as desired length
-            if extend:  # extend clip sith silence
-                audio = audio.extend_to(sample.target_duration)
-            else:
-                raise ValueError(
-                    f"the length of the original file ({audio.duration} "
-                    f"sec) was less than the length to extract "
-                    f"({sample.target_duration} sec). To extend short "
-                    f"clips, use extend=True"
-                )
-        if random_trim:
-            # uniformly randomly choose clip time from full audio
-            extra_time = audio.duration - sample.target_duration
-            start_time = np.random.uniform() * extra_time
-        else:
-            start_time = 0
-
-        end_time = start_time + sample.target_duration
-        audio = audio.trim(start_time, end_time)
-
-        # update the sample
-        sample.data = audio
-        if sample.start_time is None:
-            sample.start_time = start_time
-        else:
-            sample.start_time += start_time
-        sample.duration = sample.target_duration
+    # input audio is not as long as desired length
+    if extend:  # extend clip sith silence
+        audio = audio.extend_to(target_duration)
+    else:
+        if audio.duration + tol < target_duration:
+            raise ValueError(
+                f"the length of the original file ({audio.duration} "
+                f"sec) was less than the length to extract "
+                f"({target_duration} sec). To extend short "
+                f"clips, use extend=True"
+            )
+    if random_trim:
+        # uniformly randomly choose clip time from full audio
+        # such that a full-length clip can be extracted
+        extra_time = audio.duration - target_duration
+        start_time = np.random.uniform() * extra_time
+    else:
+        start_time = 0
 
-    return sample
+    end_time = start_time + target_duration
+    audio = audio.trim(start_time, end_time)
+
+    # update the sample in-place
+    sample.data = audio
+    if sample.start_time is None:
+        sample.start_time = start_time
+    else:
+        sample.start_time += start_time
+    sample.duration = target_duration
 
 
 class SpectrogramToTensor(Action):
diff --git a/opensoundscape/preprocess/preprocessors.py b/opensoundscape/preprocess/preprocessors.py
index 7bf1288a..07e5ee9a 100644
--- a/opensoundscape/preprocess/preprocessors.py
+++ b/opensoundscape/preprocess/preprocessors.py
@@ -196,6 +196,7 @@ def _generate_sample(self, sample):
         """
         # handle paths or pd.Series as input for `sample`
         if isinstance(sample, tuple):
+            # assume duration should be self.sample_duration
             path, start = sample
             assert isinstance(
                 path, (str, Path)
@@ -277,9 +278,16 @@ def __init__(
                 # references AudioSample attributes: start_time and duration
                 "load_audio": AudioClipLoader(),
                 # if we are augmenting and get a long file, take a random trim from it
-                "random_trim_audio": AudioTrim(is_augmentation=True, random_trim=True),
+                "random_trim_audio": AudioTrim(
+                    target_duration=sample_duration,
+                    is_augmentation=True,
+                    random_trim=True,
+                ),
                 # otherwise, we expect to get the correct duration. no random trim
-                "trim_audio": AudioTrim(),  # trim or extend (w/silence) clips to correct length
+                # trim or extend (w/silence) clips to correct length
+                "trim_audio": AudioTrim(
+                    target_duration=sample_duration, random_trim=False
+                ),
                 # convert Audio object to Spectrogram
                 "to_spec": Action(Spectrogram.from_audio),
                 # bandpass to 0-11.025 kHz (to ensure all outputs have same scale in y-axis)
diff --git a/tests/test_actions.py b/tests/test_actions.py
index 76264de0..bb6417f0 100644
--- a/tests/test_actions.py
+++ b/tests/test_actions.py
@@ -88,44 +88,40 @@ def test_audio_clip_loader_clip(sample_clip):
 
 
 def test_action_trim(sample_audio):
-    action = actions.AudioTrim()
-    sample_audio.target_duration = 1.0
+    action = actions.AudioTrim(target_duration=1)
+    sample_audio.target_duration = 2  # should be ignored
     action.go(sample_audio)
     assert math.isclose(sample_audio.data.duration, 1.0, abs_tol=1e-4)
 
 
 def test_action_random_trim(sample_audio):
     sample2 = copy.deepcopy(sample_audio)
-    action = actions.AudioTrim(random_trim=True)
-    original_duration = sample_audio.data.duration
-    sample_audio.target_duration = sample2.target_duration = 0.01
+    action = actions.AudioTrim(target_duration=0.001, random_trim=True)
     action.go(sample_audio)
     action.go(sample2)
-    assert math.isclose(sample_audio.data.duration, 0.01, abs_tol=1e-4)
+    assert math.isclose(sample_audio.data.duration, 0.001, abs_tol=1e-4)
     # random trim should result in 2 different samples
+    assert not math.isclose(sample_audio.start_time, sample2.start_time, abs_tol=1e-9)
     assert not np.array_equal(sample_audio.data.samples, sample2.data.samples)
 
 
-def test_audio_trimmer_default(sample_audio):
-    """should not trim if no extra args"""
-    action = actions.AudioTrim()
-    sample_audio.target_duration = None
+def test_audio_trimmer_duration_None(sample_audio):
+    """should not trim if target_duration=None"""
+    action = actions.AudioTrim(target_duration=None)
     action.go(sample_audio)
     assert math.isclose(sample_audio.data.duration, 0.142086167800, abs_tol=1e-4)
 
 
 def test_audio_trimmer_raises_error_on_short_clip(sample_audio):
-    action = actions.AudioTrim()
-    sample_audio.target_duration = 10
+    action = actions.AudioTrim(target_duration=10, extend=False)
     with pytest.raises(ValueError):
-        action.go(sample_audio, extend=False)
+        action.go(sample_audio)
 
 
 def test_audio_trimmer_extend_short_clip(sample_audio):
-    action = actions.AudioTrim()
-    sample_audio.target_duration = 1
+    action = actions.AudioTrim(target_duration=10)
     action.go(sample_audio)  # extend=True is default
-    assert math.isclose(sample_audio.data.duration, 1.0, abs_tol=1e-4)
+    assert math.isclose(sample_audio.data.duration, 10, abs_tol=1e-4)
 
 
 def test_audio_random_gain(sample_audio):

From 052bf7c8d25fa641a04199ef9c796372d7f9012c Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Thu, 9 May 2024 16:43:18 -0400
Subject: [PATCH 28/43] resolve use kwargs when calling .__call__ #927

doesn't implement **kwargs option, but resolves the immediate bad-practice code of specifying dataloader, wandb_session, and progress_bar as positional arguments rather than keyword arguments. This will avoid silent failuer if user implements different __call__ method with different arguments
---
 opensoundscape/ml/cnn.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/opensoundscape/ml/cnn.py b/opensoundscape/ml/cnn.py
index c2138c0f..3a0858d8 100644
--- a/opensoundscape/ml/cnn.py
+++ b/opensoundscape/ml/cnn.py
@@ -233,7 +233,12 @@ def predict(
 
         ### Prediction/Inference ###
         # iterate dataloader and run inference (forward pass) to generate scores
-        pred_scores = self.__call__(dataloader, wandb_session, progress_bar)
+        # TODO: allow arbitrary **kwargs to be passed to __call__?
+        pred_scores = self.__call__(
+            dataloader=dataloader,
+            wandb_session=wandb_session,
+            progress_bar=progress_bar,
+        )
 
         ### Apply activation layer ### #TODO: test speed vs. doing it in __call__ on batches
         pred_scores = apply_activation_layer(pred_scores, activation_layer)

From 0f41d3b4aac80305dc4512faf33f34d7e9edb1cd Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Thu, 9 May 2024 17:26:55 -0400
Subject: [PATCH 29/43] make CNN.device a property

resolves avoid mismatch between model.device='string' and torch.device #888

allows us to convert string to torch.device when user changes the value eg m.device='cpu'
---
 opensoundscape/ml/cnn.py | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/opensoundscape/ml/cnn.py b/opensoundscape/ml/cnn.py
index 3a0858d8..26a1d4ac 100644
--- a/opensoundscape/ml/cnn.py
+++ b/opensoundscape/ml/cnn.py
@@ -83,6 +83,13 @@ def __init__(self, classes):
         ### metrics ###
         self.prediction_threshold = 0.5  # used for threshold-specific metrics
 
+        ### network device ###
+        # automatically gpu (default is 'cuda:0') if available
+        # can set after init, eg model.device='cuda:1'
+        # network and samples are moved to device during training/inference
+        # devices could be 'cuda:0', torch.device('cuda'), torch.device('cpu'), torch.device('mps') etc
+        self.device = _gpu_if_available()
+
     def _log(self, message, level=1):
         txt = str(message)
         if self.logging_level >= level and self.log_file is not None:
@@ -486,13 +493,6 @@ def __init__(
             self.architecture_name = str(type(architecture))
         self.network = architecture
 
-        ### network device ###
-        # automatically gpu (default is 'cuda:0') if available
-        # can override after init, eg model.device='cuda:1'
-        # network and samples are moved to gpu during training/inference
-        # devices could be 'cuda:0', torch.device('cuda'), torch.device('cpu'), torch.device('mps') etc
-        self.device = _gpu_if_available()
-
         ### sample loading/preprocessing ###
         # preprocessor will have attributes .sample_duration (seconds)
         # and height, width, channels for output shape
@@ -1401,6 +1401,19 @@ def avg_over_channels(img):
         # return list of AudioSamples containing .cam attributes
         return generated_samples
 
+    @property
+    def device(self):
+        return self._device
+    
+    @device.setter
+    def device(self, device):
+        """
+        Set the device to use in train/predict, casting strings to torch.device datatype
+
+        Args: 
+            device: a torch.device object or str such as 'cuda:0', 'mps', 'cpu'
+        """
+        self._device = torch.device(device)
 
 def use_resample_loss(
     model, train_df

From 5f67259d5edb1e093a91c6a6c253c0c46dda83a1 Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Thu, 9 May 2024 17:46:44 -0400
Subject: [PATCH 30/43] update test lengths match

---
 tests/test_annotations.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_annotations.py b/tests/test_annotations.py
index 5e84d076..bdf04d5d 100644
--- a/tests/test_annotations.py
+++ b/tests/test_annotations.py
@@ -524,7 +524,7 @@ def test_warn_if_file_wont_get_raven_output(raven_file, saved_raven_file):
 
 def test_assert_audio_files_annotation_files_match():
     with pytest.raises(AssertionError):
-        BoxedAnnotations.from_raven_files(["path"], [])
+        BoxedAnnotations.from_raven_files(["path"], ['a','b'])
 
 
 def test_from_raven_files(raven_file):

From 3b7827a1815b01cd32fcbe33bf0ff2ea9ab2acd3 Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Mon, 13 May 2024 07:59:14 -0400
Subject: [PATCH 31/43] check for empty lists and fix validation logic

---
 opensoundscape/annotations.py | 26 +++++++++++++++-----------
 tests/test_annotations.py     |  7 ++++++-
 2 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/opensoundscape/annotations.py b/opensoundscape/annotations.py
index d677ed4e..096eb457 100644
--- a/opensoundscape/annotations.py
+++ b/opensoundscape/annotations.py
@@ -163,16 +163,27 @@ def from_raven_files(
         if isinstance(raven_files, (str, Path)):
             raven_files = [raven_files]
         else:
+            assert len(raven_files)>0, "raven_files must be a non-empty list or iterable"
             assert isinstance(
                 raven_files[0], (str, Path)
             ), f"raven_files must be an iterable of string or pathlib.Path, or a single string or pathlib.Path. Got type: {type(raven_files)}"
+            
         if isinstance(audio_files, (str, Path)):
             audio_files = [audio_files]
-        elif audio_files is not None:
-            assert isinstance(
-                audio_files[0], (str, Path)
-            ), f"audio_files must be an iterable of string or pathlib.Path, or a single string or pathlib.Path. Got type: {type(audio_files)}"
+        else:
+            if audio_files is not None:
+                assert isinstance(
+                    audio_files[0], (str, Path)
+                ), f"audio_files must be an iterable of string or pathlib.Path, or a single string or pathlib.Path. Got type: {type(audio_files)}"
 
+        if audio_files is not None:
+            assert len(audio_files) == len(
+                raven_files
+            ), """
+            `audio_files` and `raven_files` lists must have one-to-one correspondence,
+            but their lengths did not match.
+            """
+                    
         all_file_dfs = []
 
         # mapping of Raven file columns to standard opensoundscape names
@@ -186,13 +197,6 @@ def from_raven_files(
         # update defaults with any user-specified mappings
         column_mapping_dict.update(column_mapping_dict or {})
 
-        if audio_files is not None:
-            assert len(audio_files) == len(
-                raven_files
-            ), """
-            `audio_files` and `raven_files` lists must have one-to-one correspondence,
-            but their lengths did not match.
-            """
         for i, raven_file in enumerate(raven_files):
             df = pd.read_csv(raven_file, delimiter="\t")
             if annotation_column_name is not None:
diff --git a/tests/test_annotations.py b/tests/test_annotations.py
index 5e84d076..3d26d532 100644
--- a/tests/test_annotations.py
+++ b/tests/test_annotations.py
@@ -524,7 +524,12 @@ def test_warn_if_file_wont_get_raven_output(raven_file, saved_raven_file):
 
 def test_assert_audio_files_annotation_files_match():
     with pytest.raises(AssertionError):
-        BoxedAnnotations.from_raven_files(["path"], [])
+        BoxedAnnotations.from_raven_files(["path"], ["a","b"])
+
+
+def test_assert_audio_files_annotation_files_empty():
+    with pytest.raises(AssertionError):
+        BoxedAnnotations.from_raven_files([], [])
 
 
 def test_from_raven_files(raven_file):

From 13edc52b5a9df61c7e9549c46f92dab2ba966c8a Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Mon, 13 May 2024 08:02:35 -0400
Subject: [PATCH 32/43] black

---
 tests/test_annotations.py | 2 +-
 tests/test_cnn.py         | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/tests/test_annotations.py b/tests/test_annotations.py
index bdf04d5d..1bb38a7e 100644
--- a/tests/test_annotations.py
+++ b/tests/test_annotations.py
@@ -524,7 +524,7 @@ def test_warn_if_file_wont_get_raven_output(raven_file, saved_raven_file):
 
 def test_assert_audio_files_annotation_files_match():
     with pytest.raises(AssertionError):
-        BoxedAnnotations.from_raven_files(["path"], ['a','b'])
+        BoxedAnnotations.from_raven_files(["path"], ["a", "b"])
 
 
 def test_from_raven_files(raven_file):
diff --git a/tests/test_cnn.py b/tests/test_cnn.py
index dde91786..a5a8a2ee 100644
--- a/tests/test_cnn.py
+++ b/tests/test_cnn.py
@@ -461,9 +461,8 @@ def test_predict_raise_errors(short_file_df, onemin_wav_df):
     )  # use 2 files. 1 file wrong is manually caught and userwarning raised
     model = cnn.CNN("resnet18", classes=["class"], sample_duration=30)
     model.preprocessor.pipeline.bandpass.bypass = False  # ensure bandpass happens
-    model.preprocessor.pipeline.bandpass.params["low"] = (
-        1  # add a bad param. this should be min_f
-    )
+    # add a bad param. this should be min_f
+    model.preprocessor.pipeline.bandpass.params["low"] = 1
 
     with pytest.raises(PreprocessingError):
         model.predict(files_df, raise_errors=True)

From c1bf20dbd9e3648772f150aca6f22a9d9f0a18e0 Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Mon, 13 May 2024 08:08:05 -0400
Subject: [PATCH 33/43] fix merge

---
 tests/test_annotations.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/test_annotations.py b/tests/test_annotations.py
index e2cf54a5..26bf8563 100644
--- a/tests/test_annotations.py
+++ b/tests/test_annotations.py
@@ -525,14 +525,11 @@ def test_warn_if_file_wont_get_raven_output(raven_file, saved_raven_file):
 def test_assert_audio_files_annotation_files_match():
     with pytest.raises(AssertionError):
         BoxedAnnotations.from_raven_files(["path"], ["a", "b"])
-=======
-        BoxedAnnotations.from_raven_files(["path"], ["a","b"])
 
 
 def test_assert_audio_files_annotation_files_empty():
     with pytest.raises(AssertionError):
         BoxedAnnotations.from_raven_files([], [])
->>>>>>> develop
 
 
 def test_from_raven_files(raven_file):

From d6a115933b96db78b8c55aea4ee95b64d1e1b1e0 Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Mon, 13 May 2024 11:44:17 -0400
Subject: [PATCH 34/43] black

---
 opensoundscape/annotations.py | 8 +++++---
 opensoundscape/ml/cnn.py      | 5 +++--
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/opensoundscape/annotations.py b/opensoundscape/annotations.py
index 096eb457..584e7b25 100644
--- a/opensoundscape/annotations.py
+++ b/opensoundscape/annotations.py
@@ -163,11 +163,13 @@ def from_raven_files(
         if isinstance(raven_files, (str, Path)):
             raven_files = [raven_files]
         else:
-            assert len(raven_files)>0, "raven_files must be a non-empty list or iterable"
+            assert (
+                len(raven_files) > 0
+            ), "raven_files must be a non-empty list or iterable"
             assert isinstance(
                 raven_files[0], (str, Path)
             ), f"raven_files must be an iterable of string or pathlib.Path, or a single string or pathlib.Path. Got type: {type(raven_files)}"
-            
+
         if isinstance(audio_files, (str, Path)):
             audio_files = [audio_files]
         else:
@@ -183,7 +185,7 @@ def from_raven_files(
             `audio_files` and `raven_files` lists must have one-to-one correspondence,
             but their lengths did not match.
             """
-                    
+
         all_file_dfs = []
 
         # mapping of Raven file columns to standard opensoundscape names
diff --git a/opensoundscape/ml/cnn.py b/opensoundscape/ml/cnn.py
index 26a1d4ac..bb0ce47b 100644
--- a/opensoundscape/ml/cnn.py
+++ b/opensoundscape/ml/cnn.py
@@ -1404,17 +1404,18 @@ def avg_over_channels(img):
     @property
     def device(self):
         return self._device
-    
+
     @device.setter
     def device(self, device):
         """
         Set the device to use in train/predict, casting strings to torch.device datatype
 
-        Args: 
+        Args:
             device: a torch.device object or str such as 'cuda:0', 'mps', 'cpu'
         """
         self._device = torch.device(device)
 
+
 def use_resample_loss(
     model, train_df
 ):  # TODO revisit how this work. Should be able to set loss_cls=ResampleLoss()

From 3affc8bd771f5bb3ecaf2afd7305f456c53e09d6 Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Mon, 13 May 2024 11:44:32 -0400
Subject: [PATCH 35/43] black

---
 opensoundscape/annotations.py | 8 +++++---
 opensoundscape/ml/cnn.py      | 5 +++--
 tests/test_annotations.py     | 2 +-
 tests/test_cnn.py             | 6 +++---
 4 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/opensoundscape/annotations.py b/opensoundscape/annotations.py
index 096eb457..584e7b25 100644
--- a/opensoundscape/annotations.py
+++ b/opensoundscape/annotations.py
@@ -163,11 +163,13 @@ def from_raven_files(
         if isinstance(raven_files, (str, Path)):
             raven_files = [raven_files]
         else:
-            assert len(raven_files)>0, "raven_files must be a non-empty list or iterable"
+            assert (
+                len(raven_files) > 0
+            ), "raven_files must be a non-empty list or iterable"
             assert isinstance(
                 raven_files[0], (str, Path)
             ), f"raven_files must be an iterable of string or pathlib.Path, or a single string or pathlib.Path. Got type: {type(raven_files)}"
-            
+
         if isinstance(audio_files, (str, Path)):
             audio_files = [audio_files]
         else:
@@ -183,7 +185,7 @@ def from_raven_files(
             `audio_files` and `raven_files` lists must have one-to-one correspondence,
             but their lengths did not match.
             """
-                    
+
         all_file_dfs = []
 
         # mapping of Raven file columns to standard opensoundscape names
diff --git a/opensoundscape/ml/cnn.py b/opensoundscape/ml/cnn.py
index 26a1d4ac..bb0ce47b 100644
--- a/opensoundscape/ml/cnn.py
+++ b/opensoundscape/ml/cnn.py
@@ -1404,17 +1404,18 @@ def avg_over_channels(img):
     @property
     def device(self):
         return self._device
-    
+
     @device.setter
     def device(self, device):
         """
         Set the device to use in train/predict, casting strings to torch.device datatype
 
-        Args: 
+        Args:
             device: a torch.device object or str such as 'cuda:0', 'mps', 'cpu'
         """
         self._device = torch.device(device)
 
+
 def use_resample_loss(
     model, train_df
 ):  # TODO revisit how this work. Should be able to set loss_cls=ResampleLoss()
diff --git a/tests/test_annotations.py b/tests/test_annotations.py
index 3d26d532..26bf8563 100644
--- a/tests/test_annotations.py
+++ b/tests/test_annotations.py
@@ -524,7 +524,7 @@ def test_warn_if_file_wont_get_raven_output(raven_file, saved_raven_file):
 
 def test_assert_audio_files_annotation_files_match():
     with pytest.raises(AssertionError):
-        BoxedAnnotations.from_raven_files(["path"], ["a","b"])
+        BoxedAnnotations.from_raven_files(["path"], ["a", "b"])
 
 
 def test_assert_audio_files_annotation_files_empty():
diff --git a/tests/test_cnn.py b/tests/test_cnn.py
index dde91786..4c0bf799 100644
--- a/tests/test_cnn.py
+++ b/tests/test_cnn.py
@@ -461,9 +461,9 @@ def test_predict_raise_errors(short_file_df, onemin_wav_df):
     )  # use 2 files. 1 file wrong is manually caught and userwarning raised
     model = cnn.CNN("resnet18", classes=["class"], sample_duration=30)
     model.preprocessor.pipeline.bandpass.bypass = False  # ensure bandpass happens
-    model.preprocessor.pipeline.bandpass.params["low"] = (
-        1  # add a bad param. this should be min_f
-    )
+    model.preprocessor.pipeline.bandpass.params[
+        "low"
+    ] = 1  # add a bad param. this should be min_f
 
     with pytest.raises(PreprocessingError):
         model.predict(files_df, raise_errors=True)

From c35d712cb99700ccf374717f6d0f4fe289a3a40c Mon Sep 17 00:00:00 2001
From: LeonardoViotti <leonardoviotti@gmail.com>
Date: Thu, 16 May 2024 15:04:58 -0400
Subject: [PATCH 36/43] Cleaned dtypes and assignemnt function in
 preprocessor.foward().

---
 opensoundscape/preprocess/preprocessors.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/opensoundscape/preprocess/preprocessors.py b/opensoundscape/preprocess/preprocessors.py
index 7bf1288a..19b59f9d 100644
--- a/opensoundscape/preprocess/preprocessors.py
+++ b/opensoundscape/preprocess/preprocessors.py
@@ -132,7 +132,7 @@ def forward(
         # create AudioSample from input path
         sample = self._generate_sample(sample)
         if trace:
-            sample.trace = pd.Series(index=self.pipeline.index)
+            sample.trace = pd.Series(index=self.pipeline.index, dtype=str)
 
         if profile:
             sample.runtime = pd.Series(index=self.pipeline.index)
@@ -146,13 +146,13 @@ def forward(
                 if type(action) == break_on_type or k == break_on_key:
                     if trace:
                         # saved "output" of this step informs user pipeline was stopped
-                        sample.trace[k] = f"## Pipeline terminated ## {sample.trace[k]}"
+                        sample.trace.loc[k] = f"## Pipeline terminated ## {sample.trace[k]}"
                     break
                 if action.bypass:
                     continue
                 if action.is_augmentation and bypass_augmentations:
                     if trace:
-                        sample.trace[k] = f"## Bypassed ## {sample.trace[k]}"
+                        sample.trace.loc[k] = f"## Bypassed ## {sample.trace[k]}"
                     continue
 
                 # perform the action (modifies the AudioSample in-place)

From 556a7ad6055b7e955fb45e13caa5bc3ad1336bad Mon Sep 17 00:00:00 2001
From: LeonardoViotti <leonardoviotti@gmail.com>
Date: Thu, 16 May 2024 15:06:18 -0400
Subject: [PATCH 37/43] Black formatted.

---
 opensoundscape/preprocess/preprocessors.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/opensoundscape/preprocess/preprocessors.py b/opensoundscape/preprocess/preprocessors.py
index 19b59f9d..bd7115d1 100644
--- a/opensoundscape/preprocess/preprocessors.py
+++ b/opensoundscape/preprocess/preprocessors.py
@@ -146,7 +146,9 @@ def forward(
                 if type(action) == break_on_type or k == break_on_key:
                     if trace:
                         # saved "output" of this step informs user pipeline was stopped
-                        sample.trace.loc[k] = f"## Pipeline terminated ## {sample.trace[k]}"
+                        sample.trace.loc[
+                            k
+                        ] = f"## Pipeline terminated ## {sample.trace[k]}"
                     break
                 if action.bypass:
                     continue

From 5fb567a1b87aa733650f1a38a2e7845040ee7abf Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Mon, 20 May 2024 14:35:25 -0400
Subject: [PATCH 38/43] remove output dir

this was added accidentally at some point
---
 output/great_plains_toad.wav_prdf.csv | 6 ------
 output/silence_10s.mp3_prdf.csv       | 6 ------
 2 files changed, 12 deletions(-)
 delete mode 100644 output/great_plains_toad.wav_prdf.csv
 delete mode 100644 output/silence_10s.mp3_prdf.csv

diff --git a/output/great_plains_toad.wav_prdf.csv b/output/great_plains_toad.wav_prdf.csv
deleted file mode 100644
index d473a035..00000000
--- a/output/great_plains_toad.wav_prdf.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-species,pulse_rate_low,pulse_rate_high,low_f,high_f,reject_low,reject_high,window_length,score,t,max_score,time_of_max_score
-Chorus frog,16,40,2500,4000,0,2000,2.0,"[3.2500390144493193e-06, 5.801447082919901e-06, 9.551969077025937e-06, 1.4846515109012422e-05, 7.20137742853964e-06, 5.479920164111909e-06, 7.678316376652447e-06, 8.12659428177074e-06, 5.672717116487688e-06, 6.653043750823905e-06, 4.195701290636413e-06, 3.7357589889274674e-06, 6.6037131566419975e-06, 1.0906428101669344e-05, 1.5674260578693147e-05, 1.0072739566848058e-05, 1.1340738797339192e-05, 1.068871284672846e-05, 8.506884345200445e-06, 1.1104020564912416e-05, 1.120065809557751e-05, 1.4966454596842588e-05, 1.8958975278076696e-05]","[0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0, 22.0, 24.0, 26.0, 28.0, 30.0, 32.0, 34.0, 36.0, 38.0, 40.0, 42.0, 44.0]",1.8958975278076696e-05,44.0
-great plains toad,7,18,2000,3000,0,1000,15.0,"[0.0002718100048208053, 0.0005690390622569361, 0.00047514692, 0.00040189223, 0.00057994, 0.00034554987, 0.00039256216, 0.00043337233, 0.00048808544, 0.00051435985, 0.0007436124, 0.0007989852889683302, 0.0008075010697968804, 0.0005887583053883069, 0.00043281727, 0.0003687196, 0.00045033346, 0.0004648417, 0.0006286908829177401, 0.000908370064970198, 0.0001388154024578076, 0.0001552719403460541, 0.00022877016916655616]","[0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0, 22.0, 24.0, 26.0, 28.0, 30.0, 32.0, 34.0, 36.0, 38.0, 40.0, 42.0, 44.0]",0.000908370064970198,38.0
-northern leopard frog,15,30,1000,1500,0,500,2.0,"[1.6637013860045666e-06, 3.1423272743529193e-06, 2.9155657794036147e-06, 4.701847271423706e-06, 3.6979163281179884e-06, 2.628235173919404e-06, 6.111995214169149e-06, 4.076330914856003e-06, 3.4581424063852097e-06, 7.116332895736921e-06, 3.7639912573265763e-06, 2.032287813134638e-06, 3.7557337204445752e-06, 5.892896770157595e-06, 7.470352687984107e-06, 6.670304596597368e-06, 7.206266780995212e-06, 6.29987338439374e-06, 5.08101927392769e-06, 5.585508731252252e-06, 6.422839082373554e-07, 5.817734516917052e-07, 1.1029788201200245e-06]","[0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0, 22.0, 24.0, 26.0, 28.0, 30.0, 32.0, 34.0, 36.0, 38.0, 40.0, 42.0, 44.0]",7.470352687984107e-06,28.0
-spadefoot toad,1,2,1000,2000,0,500,5.0,"[1.0594443312105714e-05, 2.4291446978269906e-05, 1.027778204489321e-05, 2.669954856170439e-05, 1.0010965250848444e-05, 2.9950880303490986e-06, 0.00010319260276200067, 2.362683986683449e-05, 4.634107276346073e-05, 7.130055482154363e-05, 3.34230484691724e-05, 4.9024073148100306e-05, 2.667075409194549e-05, 4.139503785398175e-05, 2.2421849948192466e-05, 4.7741522350134754e-05, 2.272280182926626e-05, 2.484097596097967e-05, 3.0864623961210006e-05, 2.0884407081110413e-05, 2.954356264469807e-05, 2.5428362564894202e-05, 3.864825752053816e-05]","[0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0, 22.0, 24.0, 26.0, 28.0, 30.0, 32.0, 34.0, 36.0, 38.0, 40.0, 42.0, 44.0]",0.00010319260276200067,12.0
-spadefoot toad (81Hz),70,90,1000,2000,0,500,0.5,"[3.0814845420079957e-06, 4.306986380019167e-06, 5.415654628289309e-06, 6.054611316641504e-06, 9.106696253076954e-06, 8.776539713632198e-06, 7.850333336890567e-06, 1.0805945797457844e-05, 1.3579018869263782e-05, 1.4185579691681417e-05, 6.628462615562992e-06, 7.68835627553044e-06, 7.279189188754188e-06, 1.069030912595832e-05, 1.18920172932865e-05, 1.2878259247705957e-05, 1.1367913911868312e-05, 9.535337598859539e-06, 8.566347406329984e-06, 1.1300234244738395e-05, 6.645809103973028e-06, 1.0086483189600105e-05, 1.206968388863879e-05]","[0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0, 22.0, 24.0, 26.0, 28.0, 30.0, 32.0, 34.0, 36.0, 38.0, 40.0, 42.0, 44.0]",1.4185579691681417e-05,18.0
diff --git a/output/silence_10s.mp3_prdf.csv b/output/silence_10s.mp3_prdf.csv
deleted file mode 100644
index fe6d85bd..00000000
--- a/output/silence_10s.mp3_prdf.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-species,pulse_rate_low,pulse_rate_high,low_f,high_f,reject_low,reject_high,window_length,score,t,max_score,time_of_max_score
-Chorus frog,16,40,2500,4000,0,2000,2.0,"[3.7678293192170487e-06, 3.7673481082101374e-06, 3.454049999746183e-06, 3.5254580099288085e-06, 2.9589592943656763e-06]","[0.0, 2.0, 4.0, 6.0, 8.0]",3.7678293192170487e-06,0.0
-great plains toad,7,18,2000,3000,0,1000,15.0,"[6.1310035018454136e-06, 4.067743333437454e-06, 4.216261440695183e-06, 4.714590624428302e-06, 4.268840606906055e-06]","[0.0, 2.0, 4.0, 6.0, 8.0]",6.1310035018454136e-06,0.0
-northern leopard frog,15,30,1000,1500,0,500,2.0,"[1.2832093738897077e-05, 1.3101839905406943e-05, 1.7973985551618525e-05, 1.3127703817943879e-05, 1.1394665880998894e-05]","[0.0, 2.0, 4.0, 6.0, 8.0]",1.7973985551618525e-05,4.0
-spadefoot toad,1,2,1000,2000,0,500,5.0,"[9.49055071971458e-06, 1.0222386372799245e-05, 6.886865649119779e-06, 5.276267993007903e-06, 6.3572764006441305e-06]","[0.0, 2.0, 4.0, 6.0, 8.0]",1.0222386372799245e-05,2.0
-spadefoot toad (81Hz),70,90,1000,2000,0,500,0.5,"[5.813727907147359e-06, 7.044397169718664e-06, 1.1273120465130139e-05, 5.966222054954781e-06, 1.1090558441103307e-05]","[0.0, 2.0, 4.0, 6.0, 8.0]",1.1273120465130139e-05,4.0

From 35db4fcfe87b70b4e220705a10a9e346c578a9a9 Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Mon, 20 May 2024 15:07:39 -0400
Subject: [PATCH 39/43] resolves #726

automatically sets `torch.multiprocessing.set_sharing_strategy("file_system")` during opensoundscape import. We may want to revisit this decision, but it seems that this is the recommended setting for avoiding issues seen when using parallelized DataLoader

see discussion and recommended solution here https://github.com/pytorch/pytorch/issues/11201#issuecomment-421146936
---
 opensoundscape/ml/__init__.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/opensoundscape/ml/__init__.py b/opensoundscape/ml/__init__.py
index d585c336..4cab8e8b 100644
--- a/opensoundscape/ml/__init__.py
+++ b/opensoundscape/ml/__init__.py
@@ -7,3 +7,10 @@
 from . import sampling
 from . import utils
 from . import bioacoustics_model_zoo
+import torch.multiprocessing
+
+# using 'file_system' avoids errors with "Too many open files",
+# "Pin memory thread exited unexpectedly", and RuntimeError('received %d items of ancdata')
+# when using parallelized DataLoader. This is the recommended solution according to
+# https://github.com/pytorch/pytorch/issues/11201#issuecomment-421146936
+torch.multiprocessing.set_sharing_strategy("file_system")

From 59906acefd7b1eeda1abae69e30df99693804319 Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Mon, 20 May 2024 15:57:30 -0400
Subject: [PATCH 40/43] check for labels outside range [0,1]

resolves check for values other than 0/1 in labels #891

now asserts that label values are >=0 and <=1 during CNN.train() and CNN.eval(). Adds tests for both. Also adds a missing test for input validation check of wrong class list during CNN.train()
---
 opensoundscape/ml/cnn.py   | 14 +++++++++++---
 opensoundscape/ml/utils.py | 15 +++++++++++++++
 tests/test_cnn.py          | 24 ++++++++++++++++++++++++
 3 files changed, 50 insertions(+), 3 deletions(-)

diff --git a/opensoundscape/ml/cnn.py b/opensoundscape/ml/cnn.py
index bb0ce47b..925ca6bb 100644
--- a/opensoundscape/ml/cnn.py
+++ b/opensoundscape/ml/cnn.py
@@ -20,7 +20,7 @@
 
 import opensoundscape
 from opensoundscape.ml import cnn_architectures
-from opensoundscape.ml.utils import apply_activation_layer
+from opensoundscape.ml.utils import apply_activation_layer, check_labels
 from opensoundscape.preprocess.preprocessors import SpectrogramPreprocessor
 from opensoundscape.ml.loss import (
     BCEWithLogitsLoss_hot,
@@ -365,8 +365,16 @@ def eval(self, targets, scores, logging_offset=0):
             scores: continuous values in 0/1 for each sample and class
             logging_offset: modify verbosity - for example, -1 will reduce
                 the amount of printing/logging by 1 level
+
+        Raises:
+            AssertionError: if targets are outside of range [0,1]
         """
 
+        # check for invalid label values
+        assert (
+            targets.max(axis=None) <= 1 and targets.min(axis=None) >= 0
+        ), "Labels must in range [0,1], but found values outside range"
+
         # remove all samples with NaN for a prediction
         targets = targets[~np.isnan(scores).any(axis=1), :]
         scores = scores[~np.isnan(scores).any(axis=1), :]
@@ -802,9 +810,9 @@ def train(
             `train_df=train_df[cnn.classes]` or `cnn.classes=train_df.columns` 
             before training.
             """
-        assert list(self.classes) == list(train_df.columns), class_err
+        check_labels(train_df, self.classes)
         if validation_df is not None:
-            assert list(self.classes) == list(validation_df.columns), class_err
+            check_labels(validation_df, self.classes)
 
         # Validation: warn user if no validation set
         if validation_df is None:
diff --git a/opensoundscape/ml/utils.py b/opensoundscape/ml/utils.py
index 1321ccca..9e464232 100644
--- a/opensoundscape/ml/utils.py
+++ b/opensoundscape/ml/utils.py
@@ -1,4 +1,5 @@
 """Utilties for .ml"""
+
 import warnings
 import pandas as pd
 import numpy as np
@@ -215,3 +216,17 @@ def collate_audio_samples_to_tensors(batch):
     tensors = torch.stack([i.data for i in batch])
     labels = torch.tensor([i.labels.tolist() for i in batch])
     return tensors, labels
+
+
+def check_labels(label_df, classes):
+    class_err = """
+            Train and validation datasets must have same classes
+            and class order as model object. Consider using
+            `train_df=train_df[cnn.classes]` or `cnn.classes=train_df.columns` 
+            before training.
+            """
+    assert list(classes) == list(label_df.columns), class_err
+
+    assert (
+        label_df.max(axis=None) <= 1 and label_df.min(axis=None) >= 0
+    ), "Labels must in range [0,1], but found values outside range"
diff --git a/tests/test_cnn.py b/tests/test_cnn.py
index 4c0bf799..14969a66 100644
--- a/tests/test_cnn.py
+++ b/tests/test_cnn.py
@@ -402,6 +402,21 @@ def test_prediction_warns_different_classes(train_df):
         assert "classes" in all_warnings
 
 
+def test_train_raises_wrong_class_list(train_df):
+    model = cnn.CNN("resnet18", classes=["different"], sample_duration=5.0)
+    with pytest.raises(AssertionError):
+        # raises AssertionError bc test_df columns != model.classes
+        model.train(train_df)
+
+
+def test_train_raises_labels_outside_range(train_df):
+    model = cnn.CNN("resnet18", classes=[0, 1], sample_duration=5.0)
+    train_df.iat[0, 0] = 2
+    with pytest.raises(AssertionError):
+        # raises AssertionError bc values outside [0,1] not allowed
+        model.train(train_df)
+
+
 def test_prediction_returns_consistent_values(train_df):
     model = cnn.CNN("resnet18", classes=["a", "b"], sample_duration=5.0)
     a = model.predict(train_df)
@@ -427,6 +442,15 @@ def test_eval(train_df):
     model.eval(train_df.values, scores.values)
 
 
+def test_eval_raises_bad_labels(train_df):
+    model = cnn.CNN("resnet18", classes=[0, 1], sample_duration=2)
+    scores = model.predict(train_df, split_files_into_clips=False)
+    train_df.iat[0, 0] = 2
+    with pytest.raises(AssertionError):
+        # raises AssertionError bc values outside [0,1] not allowed
+        model.eval(train_df.values, scores.values)
+
+
 def test_split_resnet_feat_clf(train_df):
     model = cnn.CNN("resnet18", classes=[0, 1], sample_duration=2)
     cnn.separate_resnet_feat_clf(model)

From 92bd4a915d668e17b5348a2cd7a5de2267a2a632 Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Tue, 21 May 2024 13:08:54 -0400
Subject: [PATCH 41/43] more flexible specification of overlap

add consistent and flexible specification of consecutive clip overlap throughout the code: clip step seconds, clip overlap seconds, or clip overlap fraction. Updates ribbit, CNN.predict, SafeAudioDataLoader, make_clip_df, generate_clip_times_df, AudioSplittingDataset, BoxedAnnotations.one_hot_clip_labels,  AudioFileDataset

resolves overlap_time or step_time argument for predicting on overlapping clips #876

note: there deprecates "overlap_fraction" kwarg to CNN.predict in favor of any of these: "clip_overlap", "clip_overlap_fraction", "clip_step"
---
 opensoundscape/annotations.py    | 25 ++++-------------
 opensoundscape/audio.py          | 35 ++++++------------------
 opensoundscape/ml/cnn.py         | 21 +++++++++-----
 opensoundscape/ml/dataloaders.py | 27 +++++++++++++-----
 opensoundscape/ml/datasets.py    |  9 +++---
 opensoundscape/ribbit.py         | 16 +++++++++--
 opensoundscape/utils.py          | 47 ++++++++++++++++++++++++++------
 tests/test_cnn.py                | 16 +++++++++++
 tests/test_utils.py              | 42 ++++++++++++++++++++++++++++
 9 files changed, 163 insertions(+), 75 deletions(-)

diff --git a/opensoundscape/annotations.py b/opensoundscape/annotations.py
index 584e7b25..45e339fd 100644
--- a/opensoundscape/annotations.py
+++ b/opensoundscape/annotations.py
@@ -640,13 +640,12 @@ def one_hot_labels_like(
     def one_hot_clip_labels(
         self,
         clip_duration,
-        clip_overlap,
         min_label_overlap,
         min_label_fraction=1,
         full_duration=None,
         class_subset=None,
-        final_clip=None,
         audio_files=None,
+        **kwargs,
     ):
         """Generate one-hot labels for clips of fixed duration
 
@@ -656,7 +655,6 @@ def one_hot_clip_labels(
 
         Args:
             clip_duration (float):  The duration in seconds of the clips
-            clip_overlap (float):   The overlap of the clips in seconds [default: 0]
             min_label_overlap: minimum duration (seconds) of annotation within the
                 time interval for it to count as a label. Note that any annotation
                 of length less than this value will be discarded.
@@ -677,19 +675,10 @@ def one_hot_clip_labels(
                 of `audio` for each row of self.df
             class_subset: list of classes for one-hot labels. If None, classes will
                 be all unique values of self.df['annotation']
-            final_clip (str): Behavior if final_clip is less than clip_duration
-                seconds long. By default, discards remaining time if less than
-                clip_duration seconds long [default: None].
-                Options:
-                - None: Discard the remainder (do not make a clip)
-                - "extend": Extend the final clip beyond full_duration to reach
-                    clip_duration length
-                - "remainder": Use only remainder of full_duration
-                    (final clip will be shorter than clip_duration)
-                - "full": Increase overlap with previous clip to yield a
-                    clip with clip_duration length
             audio_files: list of audio file paths (as str or pathlib.Path)
                 to create clips for. If None, uses self.audio_files. [default: None]
+            **kwargs (such as overlap_fraction, final_clip) are passed to
+                opensoundscape.utils.generate_clip_times_df() via make_clip_df()
         Returns:
             dataframe with index ['file','start_time','end_time'] and columns=classes
         """
@@ -718,9 +707,8 @@ def one_hot_clip_labels(
                 clip_df = make_clip_df(
                     files=[f for f in audio_files if f == f],  # remove NaN if present
                     clip_duration=clip_duration,
-                    clip_overlap=clip_overlap,
-                    final_clip=final_clip,
                     raise_exceptions=True,  # raise exceptions from librosa.duration(f)
+                    **kwargs,
                 )
             except GetDurationError as exc:
                 raise GetDurationError(
@@ -733,10 +721,7 @@ def one_hot_clip_labels(
         else:  # use fixed full_duration for all files
             # make a clip df, will be re-used for each file
             clip_df_template = generate_clip_times_df(
-                full_duration=full_duration,
-                clip_duration=clip_duration,
-                clip_overlap=clip_overlap,
-                final_clip=final_clip,
+                full_duration=full_duration, clip_duration=clip_duration, **kwargs
             )
             # make a clip df for all files
             clip_df = pd.concat([clip_df_template] * len(audio_files))
diff --git a/opensoundscape/audio.py b/opensoundscape/audio.py
index 98b5717e..b9fd1ac1 100644
--- a/opensoundscape/audio.py
+++ b/opensoundscape/audio.py
@@ -873,41 +873,24 @@ def save(
             else:  # we can write metadata for WAV and AIFF
                 _write_metadata(self.metadata, metadata_format, path)
 
-    def split(self, clip_duration, clip_overlap=0, final_clip=None):
+    def split(self, clip_duration, **kwargs):
         """Split Audio into even-lengthed clips
 
         The Audio object is split into clips of a specified duration and overlap
 
         Args:
             clip_duration (float):  The duration in seconds of the clips
-            clip_overlap (float):   The overlap of the clips in seconds [default: 0]
-            final_clip (str):       Behavior if final_clip is less than clip_duration
-                seconds long. By default, discards remaining audio if less than
-                clip_duration seconds long [default: None].
-                Options:
-                - None: Discard the remainder (do not make a clip)
-                - "extend": Extend the final clip with silence to reach
-                    clip_duration length
-                - "remainder": Use only remainder of Audio (final clip will be
-                    shorter than clip_duration)
-                - "full": Increase overlap with previous clip to yield a clip with
-                    clip_duration length
+            **kwargs (such as clip_overlap_fraction, final_clip) are passed to
+                opensoundscape.utils.generate_clip_times_df()
+                - extends last Audio object if user passes final_clip == "extend"
         Returns:
             - audio_clips: list of audio objects
             - dataframe w/columns for start_time and end_time of each clip
         """
-        if not final_clip in ["remainder", "full", "extend", None]:
-            raise ValueError(
-                f"final_clip must be 'remainder', 'full', 'extend',"
-                f"or None. Got {final_clip}."
-            )
 
         duration = self.duration
         clip_df = generate_clip_times_df(
-            full_duration=duration,
-            clip_duration=clip_duration,
-            clip_overlap=clip_overlap,
-            final_clip=final_clip,
+            full_duration=duration, clip_duration=clip_duration, **kwargs
         )
 
         clips = [None] * len(clip_df)
@@ -918,8 +901,9 @@ def split(self, clip_duration, clip_overlap=0, final_clip=None):
             audio_clip = self.trim(start, end)
 
             # Extend the final clip if necessary
-            if end > duration and final_clip == "extend":
-                audio_clip = audio_clip.extend_to(clip_duration)
+            if "final_clip" in kwargs.keys():
+                if end > duration and kwargs["final_clip"] == "extend":
+                    audio_clip = audio_clip.extend_to(clip_duration)
 
             # Add clip to list of clips
             clips[idx] = audio_clip
@@ -927,8 +911,7 @@ def split(self, clip_duration, clip_overlap=0, final_clip=None):
         if len(clips) == 0:
             warnings.warn(
                 f"Given Audio object with duration of `{duration}` "
-                f"seconds and `clip_duration={clip_duration}` but "
-                f" `final_clip={final_clip}` produces no clips. "
+                f"seconds and `clip_duration={clip_duration}`, produces no clips. "
                 f"Returning empty list."
             )
 
diff --git a/opensoundscape/ml/cnn.py b/opensoundscape/ml/cnn.py
index bb0ce47b..568c036e 100644
--- a/opensoundscape/ml/cnn.py
+++ b/opensoundscape/ml/cnn.py
@@ -43,6 +43,8 @@
     multi_target_metrics,
 )
 
+import warnings
+
 
 class BaseClassifier(torch.nn.Module):
     """
@@ -105,7 +107,10 @@ def predict(
         num_workers=0,
         activation_layer=None,
         split_files_into_clips=True,
-        overlap_fraction=0,
+        clip_overlap=None,
+        clip_overlap_fraction=None,
+        clip_step=None,
+        overlap_fraction=None,
         final_clip=None,
         bypass_augmentations=True,
         invalid_samples_log=None,
@@ -145,10 +150,9 @@ def predict(
             split_files_into_clips:
                 If True, internally splits and predicts on clips from longer audio files
                 Otherwise, assumes each row of `samples` corresponds to one complete sample
-            overlap_fraction: fraction of overlap between consecutive clips when
-                predicting on clips of longer audio files. For instance, 0.5
-                gives 50% overlap between consecutive clips.
-            final_clip: see `opensoundscape.utils.generate_clip_times_df`
+            clip_overlap_fraction, clip_overlap, clip_step, final_clip:
+                see `opensoundscape.utils.generate_clip_times_df`
+            overlap_fraction: deprecated alias for clip_overlap_fraction
             bypass_augmentations: If False, Actions with
                 is_augmentation==True are performed. Default True.
             invalid_samples_log: if not None, samples that failed to preprocess
@@ -188,7 +192,7 @@ def predict(
             for that sample will be np.nan
 
         """
-        # for convenience, convert str/pathlib.Path to list
+        # for convenience, convert str/pathlib.Path to list of length 1
         if isinstance(samples, (str, Path)):
             samples = [samples]
 
@@ -198,6 +202,9 @@ def predict(
             self.preprocessor,
             split_files_into_clips=split_files_into_clips,
             overlap_fraction=overlap_fraction,
+            clip_overlap=clip_overlap,
+            clip_overlap_fraction=clip_overlap_fraction,
+            clip_step=clip_step,
             final_clip=final_clip,
             bypass_augmentations=bypass_augmentations,
             batch_size=batch_size,
@@ -577,7 +584,7 @@ def _init_train_dataloader(self, train_df, batch_size, num_workers, raise_errors
             train_df,
             self.preprocessor,
             split_files_into_clips=True,
-            overlap_fraction=0,
+            clip_overlap=0,
             final_clip=None,
             bypass_augmentations=False,
             batch_size=batch_size,
diff --git a/opensoundscape/ml/dataloaders.py b/opensoundscape/ml/dataloaders.py
index 35e57cbe..b7a048a2 100644
--- a/opensoundscape/ml/dataloaders.py
+++ b/opensoundscape/ml/dataloaders.py
@@ -14,7 +14,10 @@ def __init__(
         samples,
         preprocessor,
         split_files_into_clips=True,
-        overlap_fraction=0,
+        clip_overlap=None,
+        clip_overlap_fraction=None,
+        clip_step=None,
+        overlap_fraction=None,
         final_clip=None,
         bypass_augmentations=True,
         raise_errors=False,
@@ -42,11 +45,9 @@ def __init__(
             preprocessor: preprocessor object, eg AudioPreprocessor or SpectrogramPreprocessor
             split_files_into_clips=True: use AudioSplittingDataset to automatically split
                 audio files into appropriate-lengthed clips
-            overlap_fraction: overlap fraction between consecutive clips, ignroed if
-                split_files_into_clips is False [default: 0]
-            final_clip: how to handle the final incomplete clip in a file
-                options:['extend','remainder','full',None] [default: None]
-                see opensoundscape.utils.generate_clip_times_df for details
+            clip_overlap_fraction, clip_overlap, clip_step, final_clip:
+                see `opensoundscape.utils.generate_clip_times_df`
+            overlap_fraction: deprecated alias for clip_overlap_fraction
             bypass_augmentations: if True, don't apply any augmentations [default: True]
             raise_errors: if True, raise errors during preprocessing [default: False]
             collate_fn: function to collate samples into batches [default: identity]
@@ -62,6 +63,16 @@ def __init__(
             "(c) (file,start_time,end_time) as MultiIndex"
         )
 
+        if overlap_fraction is not None:
+            warnings.warn(
+                "`overlap_fraction` argument is deprecated. Use `clip_overlap_fraction` instead.",
+                DeprecationWarning,
+            )
+            assert (
+                clip_overlap_fraction is None
+            ), "Cannot specify both overlap_fraction and clip_overlap_fraction"
+            clip_overlap_fraction = overlap_fraction
+
         # set up prediction Dataset, considering three possible cases:
         # (c1) user provided multi-index df with file,start_time,end_time of clips
         # (c2) user provided file list and wants clips to be split out automatically
@@ -75,7 +86,9 @@ def __init__(
             dataset = AudioSplittingDataset(
                 samples=samples,
                 preprocessor=preprocessor,
-                overlap_fraction=overlap_fraction,
+                clip_overlap=clip_overlap,
+                clip_overlap_fraction=clip_overlap_fraction,
+                clip_step=clip_step,
                 final_clip=final_clip,
             )
         else:  # c3 split_files_into_clips=False -> one sample & one prediction per file provided
diff --git a/opensoundscape/ml/datasets.py b/opensoundscape/ml/datasets.py
index 0f7f3c3a..58785095 100644
--- a/opensoundscape/ml/datasets.py
+++ b/opensoundscape/ml/datasets.py
@@ -1,4 +1,5 @@
 """Preprocessors: pd.Series child with an action sequence & forward method"""
+
 import warnings
 import copy
 from pathlib import Path
@@ -161,10 +162,11 @@ class AudioSplittingDataset(AudioFileDataset):
     automatically split longer files into clips (providing only the file paths).
 
     Args:
-        see AudioFileDataset and make_clip_df
+        samples and preprocessor are passed to AudioFileDataset.__init__
+        **kwargs are passed to opensoundscape.utils.make_clip_df
     """
 
-    def __init__(self, samples, preprocessor, overlap_fraction=0, final_clip=None):
+    def __init__(self, samples, preprocessor, **kwargs):
         super(AudioSplittingDataset, self).__init__(
             samples=samples, preprocessor=preprocessor
         )
@@ -177,7 +179,6 @@ def __init__(self, samples, preprocessor, overlap_fraction=0, final_clip=None):
         self.label_df, self.invalid_samples = make_clip_df(
             files=samples,
             clip_duration=preprocessor.sample_duration,
-            clip_overlap=overlap_fraction * preprocessor.sample_duration,
-            final_clip=final_clip,
             return_invalid_samples=True,
+            **kwargs,
         )
diff --git a/opensoundscape/ribbit.py b/opensoundscape/ribbit.py
index 8b7c1273..905b8c0a 100644
--- a/opensoundscape/ribbit.py
+++ b/opensoundscape/ribbit.py
@@ -2,6 +2,7 @@
 
 This module provides functionality to search audio for periodically fluctuating vocalizations.
 """
+
 import os
 import warnings
 
@@ -76,7 +77,9 @@ def ribbit(
     signal_band,
     pulse_rate_range,
     clip_duration,
-    clip_overlap=0,
+    clip_overlap=None,
+    clip_overlap_fraction=None,
+    clip_step=None,
     final_clip=None,
     noise_bands=None,
     spec_clip_range=(-100, -20),
@@ -93,8 +96,13 @@ def ribbit(
         pulse_rate_range: [min,max] pulses per second for the target species
         clip_duration: the length of audio (in seconds) to analyze at one time
             - each clip is analyzed independently and recieves a ribbit score
-        clip_overlap (float):   overlap between consecutive clips (sec)
-        final_clip (str):       behavior if final clip is less than clip_duration
+        clip_overlap (float): overlap between consecutive clips (sec)
+        clip_overlap_fraction (float): overlap between consecutive clips as a fraction of
+            clip_duration
+        clip_step (float): step size between consecutive clips (sec)
+            - only one of clip_overlap, clip_overlap_fraction, or clip_step should be provided
+            - if all are None, defaults to clip_overlap=0
+        final_clip (str): behavior if final clip is less than clip_duration
             seconds long. By default, discards remaining audio if less than
             clip_duration seconds long [default: None].
             Options:
@@ -189,6 +197,8 @@ def ribbit(
         full_duration=spectrogram.duration,
         clip_duration=clip_duration,
         clip_overlap=clip_overlap,
+        clip_overlap_fraction=clip_overlap_fraction,
+        clip_step=clip_step,
         final_clip=final_clip,
     )
     clip_df["score"] = np.nan
diff --git a/opensoundscape/utils.py b/opensoundscape/utils.py
index 1eee5902..58106e50 100644
--- a/opensoundscape/utils.py
+++ b/opensoundscape/utils.py
@@ -3,6 +3,7 @@
 import datetime
 import warnings
 
+from pathlib import Path
 import numpy as np
 import pandas as pd
 import pytz
@@ -127,7 +128,9 @@ def jitter(x, width, distribution="gaussian"):
 def generate_clip_times_df(
     full_duration,
     clip_duration,
-    clip_overlap=0,
+    clip_overlap=None,
+    clip_overlap_fraction=None,
+    clip_step=None,
     final_clip=None,
     rounding_precision=10,
 ):
@@ -142,7 +145,11 @@ def generate_clip_times_df(
     Args:
         full_duration: The amount of time (seconds) to split into clips
         clip_duration (float):  The duration in seconds of the clips
-        clip_overlap (float):   The overlap of the clips in seconds [default: 0]
+        clip_overlap (float):   The overlap of the clips in seconds
+        clip_overlap_fraction (float): The overlap of the clips as a fraction of clip_duration
+        clip_step (float):      The increment in seconds between starts of consecutive clips
+            - must only specify one of clip_overlap, clip_overlap_fraction, or clip_step
+            - if all are None, overlap is set to 0
         final_clip (str):       Behavior if final_clip is less than clip_duration
             seconds long. By default, discards remaining time if less than
             clip_duration seconds long [default: None].
@@ -167,7 +174,27 @@ def generate_clip_times_df(
             f"or None. Got {final_clip}."
         )
 
-    assert clip_overlap < clip_duration, "clip_overlap must be less than clip_duration"
+    overspecified_overlap_err = (
+        "only one of clip_overlap, clip_overlap_fraction, or clip_step can be specified"
+    )
+    if clip_overlap is not None:
+        if clip_overlap_fraction is not None or clip_step is not None:
+            raise ValueError(overspecified_overlap_err)
+        assert (
+            clip_overlap < clip_duration
+        ), "clip_overlap must be less than clip_duration"
+    elif clip_overlap_fraction is not None:
+        if clip_overlap is not None or clip_step is not None:
+            raise ValueError(overspecified_overlap_err)
+        assert 0 <= clip_overlap_fraction < 1, "clip_overlap_fraction must be in [0, 1)"
+        clip_overlap = clip_overlap_fraction * clip_duration
+    elif clip_step is not None:
+        # allow values outside of [0, clip_duration]
+        if clip_overlap is not None or clip_overlap_fraction is not None:
+            raise ValueError(overspecified_overlap_err)
+        clip_overlap = clip_duration - clip_step
+    else:
+        clip_overlap = 0
 
     # Lists of start and end times for clips
     increment = clip_duration - clip_overlap
@@ -218,7 +245,9 @@ def cast_np_to_native(x):
 def make_clip_df(
     files,
     clip_duration,
-    clip_overlap=0,
+    clip_overlap=None,
+    clip_overlap_fraction=None,
+    clip_step=None,
     final_clip=None,
     return_invalid_samples=False,
     raise_exceptions=False,
@@ -243,6 +272,8 @@ class labels. Labels for a file will be copied to all clips
             belonging to that file in the returned clip dataframe.
         clip_duration (float): see generate_clip_times_df
         clip_overlap (float): see generate_clip_times_df
+        clip_overlap_fraction (float): see generate_clip_times_df
+        clip_step (float): see generate_clip_times_df
         final_clip (str): see generate_clip_times_df
         return_invalid_samples (bool): if True, returns additional value,
             a list of samples that caused exceptions
@@ -263,10 +294,6 @@ class labels. Labels for a file will be copied to all clips
         the dataframe will have one row with np.nan for 'start_time' and 'end_time' for that
         file path.
     """
-    if isinstance(files, str):
-        raise TypeError(
-            "make_clip_df expects a list of files, it looks like you passed it a string"
-        )
 
     label_df = None  # assume no labels to begin with, just a list of paths
     if isinstance(files, pd.DataFrame):
@@ -274,6 +301,8 @@ class labels. Labels for a file will be copied to all clips
         # use the dataframe as labels, keeping each column as a class
         # if paths are duplicated in index, keep only the first of each
         label_df = files[~files.index.duplicated(keep="first")]
+    elif isinstance(files, (str, Path)):
+        files = [files]  # be lenient, turn single path into list
     else:
         assert hasattr(files, "__iter__"), (
             f"`files` should be a dataframe with paths as "
@@ -291,6 +320,8 @@ class labels. Labels for a file will be copied to all clips
                 full_duration=t,
                 clip_duration=clip_duration,
                 clip_overlap=clip_overlap,
+                clip_overlap_fraction=clip_overlap_fraction,
+                clip_step=clip_step,
                 final_clip=final_clip,
             )
             clips["file"] = f
diff --git a/tests/test_cnn.py b/tests/test_cnn.py
index 4c0bf799..3dbe101e 100644
--- a/tests/test_cnn.py
+++ b/tests/test_cnn.py
@@ -580,3 +580,19 @@ def test_predict_posixpath_missing_files(missing_file_df, test_df):
     assert np.all([isnan(score) for score in scores.iloc[0].values])
     assert len(invalid_samples) == 1
     assert missing_file_df.index.values[0] in invalid_samples
+
+
+def test_predict_overlap_fraction_deprecated(test_df):
+    """
+    should give deprecation error if clip_overlap_fraction is passed.
+
+    Future version will remove this argument in favor of clip_overlap_fraction
+
+    also, should raise AssertionError if both args are passed (over-specified)
+    """
+    model = cnn.CNN("resnet18", classes=[0, 1], sample_duration=5.0)
+    with pytest.warns(DeprecationWarning):
+        scores = model.predict(test_df, overlap_fraction=0.5)
+        assert len(scores) == 3
+    with pytest.raises(AssertionError):
+        model.predict(test_df, overlap_fraction=0.5, clip_overlap_fraction=0.5)
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 5ed2eb0f..2b1bacf4 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -134,6 +134,48 @@ def test_generate_clip_times_df_overlap():
     assert clip_df.iloc[1]["start_time"] == 2.5
     assert clip_df.iloc[1]["end_time"] == 7.5
 
+    clip_df = utils.generate_clip_times_df(
+        full_duration=10, clip_duration=5, clip_overlap_fraction=0.5
+    )
+    assert clip_df.shape[0] == 3
+    assert clip_df.iloc[0]["start_time"] == 0.0
+    assert clip_df.iloc[0]["end_time"] == 5.0
+    assert clip_df.iloc[1]["start_time"] == 2.5
+    assert clip_df.iloc[1]["end_time"] == 7.5
+
+    clip_df = utils.generate_clip_times_df(
+        full_duration=10, clip_duration=5, clip_step=2.5
+    )
+    assert clip_df.shape[0] == 3
+    assert clip_df.iloc[0]["start_time"] == 0.0
+    assert clip_df.iloc[0]["end_time"] == 5.0
+    assert clip_df.iloc[1]["start_time"] == 2.5
+    assert clip_df.iloc[1]["end_time"] == 7.5
+
+
+def test_generate_clip_times_df_overlap_raises_overspecified():
+    with pytest.raises(ValueError):
+        utils.generate_clip_times_df(
+            full_duration=10,
+            clip_duration=5,
+            clip_overlap=2.5,
+            clip_overlap_fraction=0.5,
+        )
+    with pytest.raises(ValueError):
+        utils.generate_clip_times_df(
+            full_duration=10,
+            clip_duration=5,
+            clip_overlap=2.5,
+            clip_step=0.5,
+        )
+    with pytest.raises(ValueError):
+        utils.generate_clip_times_df(
+            full_duration=10,
+            clip_duration=5,
+            clip_overlap_fraction=0.5,
+            clip_step=0.5,
+        )
+
 
 def test_make_clip_df(silence_10s_mp3_str):
     """many corner cases / alternatives are tested for audio.split()

From 2ceb6a9ad9fbc67c742e60b7f3ad1b14b4edd30f Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Tue, 21 May 2024 15:34:59 -0400
Subject: [PATCH 42/43] update arg name in test

---
 tests/test_datasets.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_datasets.py b/tests/test_datasets.py
index d43da7e4..e2822f54 100644
--- a/tests/test_datasets.py
+++ b/tests/test_datasets.py
@@ -219,7 +219,7 @@ def test_audio_splitting_dataset(dataset_df, pre):
 
 
 def test_audio_splitting_dataset_overlap(dataset_df, pre):
-    dataset = AudioSplittingDataset(dataset_df, pre, overlap_fraction=0.5)
+    dataset = AudioSplittingDataset(dataset_df, pre, clip_overlap_fraction=0.5)
     assert len(dataset) == 18
 
     # load a sample

From e4723f9f960107b7fa2f9e535384c9619b6267b5 Mon Sep 17 00:00:00 2001
From: sammlapp <sammlapp@gmail.com>
Date: Wed, 22 May 2024 13:37:12 -0400
Subject: [PATCH 43/43] don't nest wandb tables

nesting using / character is not behaving as expected, causing errors on windows machines. Nesting with dictionary doesn't work. So we just log all tables to default section, "Tables".

Tested on a mac by checking wandb tables during train() and predict(). All look good.
---
 opensoundscape/ml/cnn.py | 51 +++++++++++++++++-----------------------
 1 file changed, 22 insertions(+), 29 deletions(-)

diff --git a/opensoundscape/ml/cnn.py b/opensoundscape/ml/cnn.py
index cef2b56e..569d90b5 100644
--- a/opensoundscape/ml/cnn.py
+++ b/opensoundscape/ml/cnn.py
@@ -231,12 +231,10 @@ def predict(
             # Log a table of preprocessed samples to wandb
             wandb_session.log(
                 {
-                    "Samples": {
-                        "Peprocessed_samples": wandb_table(
-                            dataloader.dataset.dataset,
-                            self.wandb_logging["n_preview_samples"],
-                        )
-                    }
+                    "Peprocessed_samples": wandb_table(
+                        dataloader.dataset.dataset,
+                        self.wandb_logging["n_preview_samples"],
+                    )
                 }
             )
 
@@ -283,11 +281,8 @@ def predict(
                     classes_to_extract=[c],
                     drop_labels=True,
                     gradcam_model=self if self.wandb_logging["gradcam"] else None,
-                    raise_exceptions=True,  # TODO back to false when done debugging
-                )
-                wandb_session.log(
-                    {"Samples": {f"Top_scoring_{c.replace(' ','_')}": table}}
                 )
+                wandb_session.log({f"Top_scoring_{c.replace(' ','_')}": table})
 
         if return_invalid_samples:
             return score_df, invalid_samples
@@ -854,28 +849,26 @@ def train(
             # log tables of preprocessed samples
             wandb_session.log(
                 {
-                    "Samples": {
-                        "training_samples": wandb_table(
-                            AudioFileDataset(
-                                train_df, self.preprocessor, bypass_augmentations=False
-                            ),
-                            self.wandb_logging["n_preview_samples"],
+                    "training_samples": wandb_table(
+                        AudioFileDataset(
+                            train_df, self.preprocessor, bypass_augmentations=False
                         ),
-                        "training_samples_no_aug": wandb_table(
-                            AudioFileDataset(
-                                train_df, self.preprocessor, bypass_augmentations=True
-                            ),
-                            self.wandb_logging["n_preview_samples"],
+                        self.wandb_logging["n_preview_samples"],
+                    ),
+                    "training_samples_no_aug": wandb_table(
+                        AudioFileDataset(
+                            train_df, self.preprocessor, bypass_augmentations=True
                         ),
-                        "validation_samples": wandb_table(
-                            AudioFileDataset(
-                                validation_df,
-                                self.preprocessor,
-                                bypass_augmentations=True,
-                            ),
-                            self.wandb_logging["n_preview_samples"],
+                        self.wandb_logging["n_preview_samples"],
+                    ),
+                    "validation_samples": wandb_table(
+                        AudioFileDataset(
+                            validation_df,
+                            self.preprocessor,
+                            bypass_augmentations=True,
                         ),
-                    }
+                        self.wandb_logging["n_preview_samples"],
+                    ),
                 }
             )