implement Audio.trim_with_datetimes()

resolves Audio.trim with datetime #926 adds test also updates .trim() and .trim_samples() api to use out_of_bounds_mode argument for ignore (default), warn, or raise when requested time is outside audio extent, and updates tests accordingly
kitzeslab · Oct 6, 2024 · 92db5a9 · 92db5a9
1 parent afa3f21
commit 92db5a9
Show file tree

Hide file tree

Showing 2 changed files with 103 additions and 8 deletions.
diff --git a/opensoundscape/audio.py b/opensoundscape/audio.py
@@ -396,7 +396,7 @@ def resample(self, sample_rate, resample_type=None):
             resample_type=resample_type,
         )
 
-    def trim(self, start_time, end_time):
+    def trim(self, start_time, end_time, out_of_bounds_mode="ignore"):
         """Trim Audio object in time
 
         If start_time is less than zero, output starts from time 0
@@ -405,18 +405,26 @@ def trim(self, start_time, end_time):
         Args:
             start_time: time in seconds for start of extracted clip
             end_time: time in seconds for end of extracted clip
+            out_of_bounds_mode: behavior if requested time period is not fully contained
+                within the audio file. Options:
+                - 'ignore': return any available audio with no warning/error [default]
+                - 'warn': generate a warning
+                - 'raise': raise an AudioOutOfBoundsError
 
         Returns:
             a new Audio object containing samples from start_time to end_time
             - metadata is updated to reflect new start time and duration
 
         see also: trim_samples() to trim using sample positions instead of times
+        and trim_with_timestamps() to trim using localized datetime.datetime objects
         """
-        start_sample = max(0, self._get_sample_index(start_time))
+        start_sample = self._get_sample_index(start_time)
         end_sample = self._get_sample_index(end_time)
-        return self.trim_samples(start_sample, end_sample)
+        return self.trim_samples(
+            start_sample, end_sample, out_of_bounds_mode=out_of_bounds_mode
+        )
 
-    def trim_samples(self, start_sample, end_sample):
+    def trim_samples(self, start_sample, end_sample, out_of_bounds_mode="ignore"):
         """Trim Audio object by sample indices
 
         resulting sample array contains self.samples[start_sample:end_sample]
@@ -427,18 +435,36 @@ def trim_samples(self, start_sample, end_sample):
         Args:
             start_sample: sample index for start of extracted clip, inclusive
             end_sample: sample index for end of extracted clip, exlusive
+            out_of_bounds_mode: behavior if requested time period is not fully contained
+                within the audio file. Options:
+                - 'ignore': return any available audio with no warning/error [default]
+                - 'warn': generate a warning
+                - 'raise': raise an AudioOutOfBoundsError
 
         Returns:
             a new Audio object containing samples from start_sample to end_sample
             - metadata is updated to reflect new start time and duration
 
         see also: trim() to trim using time in seconds instead of sample positions
+        and trim_with_timestamps() to trim using localized datetime.datetime objects
         """
         assert (
             end_sample >= start_sample
         ), f"end_sample ({end_sample}) must be >= start_sample ({start_sample})"
 
-        start_sample = max(0, start_sample)
+        error_msg = f"Requested sample range [{start_sample},{end_sample}] is not fully contained within the audio file"
+        if end_sample > len(self.samples):
+            if out_of_bounds_mode == "raise":
+                raise AudioOutOfBoundsError(error_msg)
+            elif out_of_bounds_mode == "warn":
+                warnings.warn(error_msg)
+            # end_sample = len(self.samples) not needed, ok to slice beyond end of list
+        if start_sample < 0:
+            if out_of_bounds_mode == "raise":
+                raise AudioOutOfBoundsError(error_msg)
+            elif out_of_bounds_mode == "warn":
+                warnings.warn(error_msg)
+            start_sample = 0
 
         # list slicing is exclusive of the end index but inclusive of the start index
         # if end_sample is beyond the end of the sample, does not raise error just
@@ -464,6 +490,50 @@ def trim_samples(self, start_sample, end_sample):
             metadata=metadata,
         )
 
+    def trim_with_timestamps(
+        self, start_timestamp, end_timestamp, out_of_bounds_mode="warn"
+    ):
+        """Trim Audio object by localized datetime.datetime timestamps
+
+        requires that .metadata['recording_start_time'] is a localized datetime.datetime object
+
+        Args:
+            start_timestamp: localized datetime.datetime object for start of extracted clip
+            end_timestamp: localized datetime.datetime object for end of extracted clip
+            out_of_bounds_mode: behavior if requested time period is not fully contained
+                within the audio file. Options:
+                - 'ignore': return any available audio with no warning/error [default]
+                - 'warn': generate a warning
+                - 'raise': raise an AudioOutOfBoundsError
+
+        Returns:
+            a new Audio object containing samples from start_timestamp to end_timestamp
+            - metadata is updated to reflect new start time and duration
+        """
+        if "recording_start_time" not in self.metadata:
+            raise ValueError(
+                "metadata must contain 'recording_start_time' to use trim_with_timestamps"
+            )
+
+        assert isinstance(
+            self.metadata["recording_start_time"], datetime.datetime
+        ), "metadata['recording_start_time'] must be a datetime.datetime object"
+        assert isinstance(start_timestamp, datetime.datetime) and isinstance(
+            end_timestamp, datetime.datetime
+        ), "start_timestamp and end_timestamp must be localized datetime.datetime objects"
+        assert (
+            start_timestamp.tzinfo is not None and end_timestamp.tzinfo is not None
+        ), "start_timestamp and end_timestamp must be localized datetime.datetime objects, but tzinfo is None"
+
+        start_time = (
+            start_timestamp - self.metadata["recording_start_time"]
+        ).total_seconds()
+        end_time = (
+            end_timestamp - self.metadata["recording_start_time"]
+        ).total_seconds()
+
+        return self.trim(start_time, end_time, out_of_bounds_mode=out_of_bounds_mode)
+
     def loop(self, length=None, n=None):
         """Extend audio file by looping it
 

diff --git a/tests/test_audio.py b/tests/test_audio.py
@@ -434,8 +434,15 @@ def test_trim_updates_metadata(metadata_wav_str):
 
 def test_trim_from_negative_time(silence_10s_mp3_str):
     """correct behavior is to trim from time zero"""
-    audio = Audio.from_file(silence_10s_mp3_str, sample_rate=10000).trim(-1, 5)
-    assert math.isclose(audio.duration, 5, abs_tol=1e-5)
+    audio = Audio.from_file(silence_10s_mp3_str, sample_rate=10000)
+    a = audio.trim(-1, 5)
+    assert math.isclose(a.duration, 5, abs_tol=1e-5)
+
+    with pytest.warns(UserWarning):
+        audio.trim(-1, 5, out_of_bounds_mode="warn")
+
+    with pytest.raises(AudioOutOfBoundsError):
+        audio.trim(-1, 5, out_of_bounds_mode="raise")
 
 
 def test_trim_samples(silence_10s_mp3_str):
@@ -456,7 +463,25 @@ def test_trim_samples(silence_10s_mp3_str):
 
 def test_trim_past_end_of_clip(silence_10s_mp3_str):
     """correct behavior is to trim to the end of the clip"""
-    audio = Audio.from_file(silence_10s_mp3_str, sample_rate=10000).trim(9, 11)
+    a = Audio.from_file(silence_10s_mp3_str, sample_rate=10000)
+    audio = a.trim(9, 11)
+    assert math.isclose(audio.duration, 1, abs_tol=1e-5)
+
+    with pytest.warns(UserWarning):
+        a.trim(9, 11, out_of_bounds_mode="warn")
+
+    with pytest.raises(AudioOutOfBoundsError):
+        a.trim(9, 11, out_of_bounds_mode="raise")
+
+
+def test_trim_with_datetime(silence_10s_mp3_str):
+    a = Audio.from_file(silence_10s_mp3_str, sample_rate=10000)
+    a.metadata["recording_start_time"] = datetime.datetime(
+        2022, 1, 1, 0, 0, 0, tzinfo=pytz.utc
+    )
+    start = datetime.datetime(2022, 1, 1, 0, 0, 1, tzinfo=pytz.utc)
+    end = datetime.datetime(2022, 1, 1, 0, 0, 2, tzinfo=pytz.utc)
+    audio = a.trim_with_timestamps(start, end)
     assert math.isclose(audio.duration, 1, abs_tol=1e-5)