Merge branch 'develop' into issue_942_wrong_index

kitzeslab · May 23, 2024 · acdd7a3 · acdd7a3
2 parents b89d4aa + cf0de77
commit acdd7a3
Show file tree

Hide file tree

Showing 25 changed files with 655 additions and 248 deletions.
diff --git a/opensoundscape/annotations.py b/opensoundscape/annotations.py
@@ -3,6 +3,7 @@
 includes BoxedAnnotations class and utilities to combine or "diff" annotations,
 etc.
 """
+
 from pathlib import Path
 import itertools
 import pandas as pd
@@ -115,9 +116,10 @@ def from_raven_files(
         """load annotations from Raven .txt files
 
         Args:
-            raven_files: list of raven .txt file paths (as str or pathlib.Path)
+            raven_files: list or iterable of raven .txt file paths (as str or pathlib.Path),
+                or a single file path (str or pathlib.Path). Eg ['path1.txt','path2.txt']
             audio_files: (list) optionally specify audio files corresponding to each
-                raven file (length should match raven_files)
+                raven file (length should match raven_files) Eg ['path1.txt','path2.txt']
                 - if None (default), .one_hot_clip_labels() will not be able to
                 check the duration of each audio file, and will raise an error
                 unless `full_duration` is passed as an argument
@@ -127,7 +129,7 @@ def from_raven_files(
                 - pass `None` to load the raven file without explicitly
                 assigning a column as the annotation column. The resulting
                 object's `.df` will have an `annotation` column with nan values!
-                NOTE: If `annotatino_column_name` is passed, this argument is ignored.
+                NOTE: If `annotation_column_name` is passed, this argument is ignored.
             annotation_column_name: (str) name of the column containing annotations
                 - default: None will use annotation-column_idx to find the annotation column
                 - if not None, this value overrides annotation_column_idx, and the column with
@@ -156,6 +158,34 @@ def from_raven_files(
             BoxedAnnotations object containing annotations from the Raven files
             (the .df attribute is a dataframe containing each annotation)
         """
+        # check input type of raven_files and audio_files
+        # if a single path is passed, convert to list
+        if isinstance(raven_files, (str, Path)):
+            raven_files = [raven_files]
+        else:
+            assert (
+                len(raven_files) > 0
+            ), "raven_files must be a non-empty list or iterable"
+            assert isinstance(
+                raven_files[0], (str, Path)
+            ), f"raven_files must be an iterable of string or pathlib.Path, or a single string or pathlib.Path. Got type: {type(raven_files)}"
+
+        if isinstance(audio_files, (str, Path)):
+            audio_files = [audio_files]
+        else:
+            if audio_files is not None:
+                assert isinstance(
+                    audio_files[0], (str, Path)
+                ), f"audio_files must be an iterable of string or pathlib.Path, or a single string or pathlib.Path. Got type: {type(audio_files)}"
+
+        if audio_files is not None:
+            assert len(audio_files) == len(
+                raven_files
+            ), """
+            `audio_files` and `raven_files` lists must have one-to-one correspondence,
+            but their lengths did not match.
+            """
+
         all_file_dfs = []
 
         # mapping of Raven file columns to standard opensoundscape names
@@ -169,13 +199,6 @@ def from_raven_files(
         # update defaults with any user-specified mappings
         column_mapping_dict.update(column_mapping_dict or {})
 
-        if audio_files is not None:
-            assert len(audio_files) == len(
-                raven_files
-            ), """
-            `audio_files` and `raven_files` lists must have one-to-one correspondence,
-            but their lengths did not match.
-            """
         for i, raven_file in enumerate(raven_files):
             df = pd.read_csv(raven_file, delimiter="\t")
             if annotation_column_name is not None:
@@ -200,7 +223,7 @@ def from_raven_files(
                         df.columns[annotation_column_idx - 1]: "annotation",
                     }
                 )
-            else:  # None was passed to annotatino_column_idx
+            else:  # None was passed to annotation_column_idx
                 # we'll create an empty `annotation` column
                 df["annotation"] = np.nan
 
@@ -617,13 +640,12 @@ def one_hot_labels_like(
     def one_hot_clip_labels(
         self,
         clip_duration,
-        clip_overlap,
         min_label_overlap,
         min_label_fraction=1,
         full_duration=None,
         class_subset=None,
-        final_clip=None,
         audio_files=None,
+        **kwargs,
     ):
         """Generate one-hot labels for clips of fixed duration
 
@@ -633,7 +655,6 @@ def one_hot_clip_labels(
 
         Args:
             clip_duration (float):  The duration in seconds of the clips
-            clip_overlap (float):   The overlap of the clips in seconds [default: 0]
             min_label_overlap: minimum duration (seconds) of annotation within the
                 time interval for it to count as a label. Note that any annotation
                 of length less than this value will be discarded.
@@ -654,19 +675,10 @@ def one_hot_clip_labels(
                 of `audio` for each row of self.df
             class_subset: list of classes for one-hot labels. If None, classes will
                 be all unique values of self.df['annotation']
-            final_clip (str): Behavior if final_clip is less than clip_duration
-                seconds long. By default, discards remaining time if less than
-                clip_duration seconds long [default: None].
-                Options:
-                - None: Discard the remainder (do not make a clip)
-                - "extend": Extend the final clip beyond full_duration to reach
-                    clip_duration length
-                - "remainder": Use only remainder of full_duration
-                    (final clip will be shorter than clip_duration)
-                - "full": Increase overlap with previous clip to yield a
-                    clip with clip_duration length
             audio_files: list of audio file paths (as str or pathlib.Path)
                 to create clips for. If None, uses self.audio_files. [default: None]
+            **kwargs (such as overlap_fraction, final_clip) are passed to
+                opensoundscape.utils.generate_clip_times_df() via make_clip_df()
         Returns:
             dataframe with index ['file','start_time','end_time'] and columns=classes
         """
@@ -695,9 +707,8 @@ def one_hot_clip_labels(
                 clip_df = make_clip_df(
                     files=[f for f in audio_files if f == f],  # remove NaN if present
                     clip_duration=clip_duration,
-                    clip_overlap=clip_overlap,
-                    final_clip=final_clip,
                     raise_exceptions=True,  # raise exceptions from librosa.duration(f)
+                    **kwargs,
                 )
             except GetDurationError as exc:
                 raise GetDurationError(
@@ -710,10 +721,7 @@ def one_hot_clip_labels(
         else:  # use fixed full_duration for all files
             # make a clip df, will be re-used for each file
             clip_df_template = generate_clip_times_df(
-                full_duration=full_duration,
-                clip_duration=clip_duration,
-                clip_overlap=clip_overlap,
-                final_clip=final_clip,
+                full_duration=full_duration, clip_duration=clip_duration, **kwargs
             )
             # make a clip df for all files
             clip_df = pd.concat([clip_df_template] * len(audio_files))
@@ -782,7 +790,7 @@ def convert_labels(self, conversion_table):
             for k in df["annotation"]
         ]
 
-        return BoxedAnnotations(df)
+        return self._spawn(df=df)
 
 
 def diff(base_annotations, comparison_annotations):

diff --git a/opensoundscape/audio.py b/opensoundscape/audio.py
@@ -38,6 +38,7 @@
 import opensoundscape
 from opensoundscape.utils import generate_clip_times_df
 from opensoundscape.signal_processing import tdoa
+from opensoundscape.utils import cast_np_to_native
 
 DEFAULT_RESAMPLE_TYPE = "soxr_hq"  # changed from kaiser_fast in v0.9.0
 
@@ -354,6 +355,8 @@ def from_file(
 
             # if the offset > 0, we need to update the timestamp
             if "recording_start_time" in metadata and offset > 0:
+                # timedelta doesn't like np types, fix issue #928
+                offset = cast_np_to_native(offset)
                 metadata["recording_start_time"] += datetime.timedelta(seconds=offset)
 
         return cls(samples, sr, resample_type=resample_type, metadata=metadata)
@@ -533,9 +536,10 @@ def trim_samples(self, start_sample, end_sample):
         else:
             metadata = self.metadata.copy()
             if "recording_start_time" in metadata:
-                metadata["recording_start_time"] += datetime.timedelta(
-                    seconds=start_sample / self.sample_rate
-                )
+                # timedelta doesn't like np types, fix issue #928
+                seconds = start_sample / self.sample_rate
+                seconds = cast_np_to_native(seconds)
+                metadata["recording_start_time"] += datetime.timedelta(seconds=seconds)
 
             if "duration" in metadata:
                 metadata["duration"] = len(samples_trimmed) / self.sample_rate
@@ -587,41 +591,42 @@ def loop(self, length=None, n=None):
     def extend_to(self, duration):
         """Extend audio file to desired duration by adding silence to the end
 
-        If duration is less than the Audio's .duration, the Audio object is trimmed.
+        If `duration` is less than or equal to the Audio's self.duration, the Audio remains unchanged.
+
         Otherwise, silence is added to the end of the Audio object to achieve the desired
-        duration.
+        `duration`.
 
         Args:
-            duration: the final duration in seconds of the audio object
+            duration: the minimum final duration in seconds of the audio object
 
         Returns:
             a new Audio object of the desired duration
         """
 
-        target_n_samples = round(duration * self.sample_rate)
+        minimum_n_samples = round(duration * self.sample_rate)
         current_n_samples = len(self.samples)
 
-        if target_n_samples > current_n_samples:
+        if minimum_n_samples <= current_n_samples:
+            return self._spawn()
+
+        else:
             # add 0's to the end of the sample array
             new_samples = np.pad(
-                self.samples, pad_width=(0, target_n_samples - current_n_samples)
+                self.samples, pad_width=(0, minimum_n_samples - current_n_samples)
             )
-        elif target_n_samples < current_n_samples:
-            # trim to desired samples (similar to self.trim())
-            new_samples = self.samples[0:target_n_samples]
 
-        # update metadata to reflect new duration
-        if self.metadata is None:
-            metadata = None
-        else:
-            metadata = self.metadata.copy()
-            if "duration" in metadata:
-                metadata["duration"] = len(new_samples) / self.sample_rate
+            # update metadata to reflect new duration
+            if self.metadata is None:
+                metadata = None
+            else:
+                metadata = self.metadata.copy()
+                if "duration" in metadata:
+                    metadata["duration"] = len(new_samples) / self.sample_rate
 
-        return self._spawn(
-            samples=new_samples,
-            metadata=metadata,
-        )
+            return self._spawn(
+                samples=new_samples,
+                metadata=metadata,
+            )
 
     def extend_by(self, duration):
         """Extend audio file by adding `duration` seconds of silence to the end
@@ -731,17 +736,20 @@ def spectrum(self):
 
         # Compute the fft (fast fourier transform) of the selected clip
         N = len(self.samples)
-        fft = scipy.fft.fft(self.samples)
+        fft = scipy.fft.rfft(self.samples)
+        fft = np.abs(fft)  # get the magnitude of the fft
 
         # create the frequencies corresponding to fft bins
-        freq = scipy.fft.fftfreq(N, d=1 / self.sample_rate)
+        freq = scipy.fft.rfftfreq(N, d=1 / self.sample_rate)
 
-        # remove negative frequencies and scale magnitude by 2.0/N:
-        fft = 2.0 / N * fft[0 : int(N / 2)]
-        frequencies = freq[0 : int(N / 2)]
-        fft = np.abs(fft)
+        # scale magnitude by 2.0/N,
+        # except for the DC and sr/2 (Nyquist frequency) components
+        fft *= 2.0 / N
+        fft[0] *= 0.5
+        if N % 2 == 0:
+            fft[-1] *= 0.5
 
-        return fft, frequencies
+        return fft, freq
 
     def normalize(self, peak_level=None, peak_dBFS=None):
         """Return audio object with normalized waveform
@@ -865,41 +873,24 @@ def save(
             else:  # we can write metadata for WAV and AIFF
                 _write_metadata(self.metadata, metadata_format, path)
 
-    def split(self, clip_duration, clip_overlap=0, final_clip=None):
+    def split(self, clip_duration, **kwargs):
         """Split Audio into even-lengthed clips
 
         The Audio object is split into clips of a specified duration and overlap
 
         Args:
             clip_duration (float):  The duration in seconds of the clips
-            clip_overlap (float):   The overlap of the clips in seconds [default: 0]
-            final_clip (str):       Behavior if final_clip is less than clip_duration
-                seconds long. By default, discards remaining audio if less than
-                clip_duration seconds long [default: None].
-                Options:
-                - None: Discard the remainder (do not make a clip)
-                - "extend": Extend the final clip with silence to reach
-                    clip_duration length
-                - "remainder": Use only remainder of Audio (final clip will be
-                    shorter than clip_duration)
-                - "full": Increase overlap with previous clip to yield a clip with
-                    clip_duration length
+            **kwargs (such as clip_overlap_fraction, final_clip) are passed to
+                opensoundscape.utils.generate_clip_times_df()
+                - extends last Audio object if user passes final_clip == "extend"
         Returns:
             - audio_clips: list of audio objects
             - dataframe w/columns for start_time and end_time of each clip
         """
-        if not final_clip in ["remainder", "full", "extend", None]:
-            raise ValueError(
-                f"final_clip must be 'remainder', 'full', 'extend',"
-                f"or None. Got {final_clip}."
-            )
 
         duration = self.duration
         clip_df = generate_clip_times_df(
-            full_duration=duration,
-            clip_duration=clip_duration,
-            clip_overlap=clip_overlap,
-            final_clip=final_clip,
+            full_duration=duration, clip_duration=clip_duration, **kwargs
         )
 
         clips = [None] * len(clip_df)
@@ -910,17 +901,17 @@ def split(self, clip_duration, clip_overlap=0, final_clip=None):
             audio_clip = self.trim(start, end)
 
             # Extend the final clip if necessary
-            if end > duration and final_clip == "extend":
-                audio_clip = audio_clip.extend_to(clip_duration)
+            if "final_clip" in kwargs.keys():
+                if end > duration and kwargs["final_clip"] == "extend":
+                    audio_clip = audio_clip.extend_to(clip_duration)
 
             # Add clip to list of clips
             clips[idx] = audio_clip
 
         if len(clips) == 0:
             warnings.warn(
                 f"Given Audio object with duration of `{duration}` "
-                f"seconds and `clip_duration={clip_duration}` but "
-                f" `final_clip={final_clip}` produces no clips. "
+                f"seconds and `clip_duration={clip_duration}`, produces no clips. "
                 f"Returning empty list."
             )
 

diff --git a/opensoundscape/localization.py b/opensoundscape/localization.py
@@ -1,10 +1,12 @@
 """Tools for localizing audio events from synchronized recording arrays"""
+
 import warnings
 import numpy as np
 import datetime
 
 from opensoundscape.audio import Audio
 from opensoundscape import audio
+from opensoundscape.utils import cast_np_to_native
 
 # define defaults for physical constants
 SPEED_OF_SOUND = 343  # default value in meters per second
@@ -635,8 +637,9 @@ def create_candidate_events(
                         if self.start_timestamp is None:
                             start_timestamp = None
                         else:
+                            # timedelta doesn't like np types, fix issue #928
                             start_timestamp = self.start_timestamp + datetime.timedelta(
-                                seconds=time_i
+                                seconds=cast_np_to_native(time_i)
                             )
                         # create a SpatialEvent for this cluster of simultaneous detections
                         candidate_events.append(

diff --git a/opensoundscape/ml/__init__.py b/opensoundscape/ml/__init__.py
@@ -7,3 +7,10 @@
 from . import sampling
 from . import utils
 from . import bioacoustics_model_zoo
+import torch.multiprocessing
+
+# using 'file_system' avoids errors with "Too many open files",
+# "Pin memory thread exited unexpectedly", and RuntimeError('received %d items of ancdata')
+# when using parallelized DataLoader. This is the recommended solution according to
+# https://github.com/pytorch/pytorch/issues/11201#issuecomment-421146936
+torch.multiprocessing.set_sharing_strategy("file_system")