Skip to content

Commit

Permalink
Merge branch 'develop' into issue_942_wrong_index
Browse files Browse the repository at this point in the history
  • Loading branch information
sammlapp committed May 23, 2024
2 parents b89d4aa + cf0de77 commit acdd7a3
Show file tree
Hide file tree
Showing 25 changed files with 655 additions and 248 deletions.
72 changes: 40 additions & 32 deletions opensoundscape/annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
includes BoxedAnnotations class and utilities to combine or "diff" annotations,
etc.
"""

from pathlib import Path
import itertools
import pandas as pd
Expand Down Expand Up @@ -115,9 +116,10 @@ def from_raven_files(
"""load annotations from Raven .txt files
Args:
raven_files: list of raven .txt file paths (as str or pathlib.Path)
raven_files: list or iterable of raven .txt file paths (as str or pathlib.Path),
or a single file path (str or pathlib.Path). Eg ['path1.txt','path2.txt']
audio_files: (list) optionally specify audio files corresponding to each
raven file (length should match raven_files)
raven file (length should match raven_files) Eg ['path1.txt','path2.txt']
- if None (default), .one_hot_clip_labels() will not be able to
check the duration of each audio file, and will raise an error
unless `full_duration` is passed as an argument
Expand All @@ -127,7 +129,7 @@ def from_raven_files(
- pass `None` to load the raven file without explicitly
assigning a column as the annotation column. The resulting
object's `.df` will have an `annotation` column with nan values!
NOTE: If `annotatino_column_name` is passed, this argument is ignored.
NOTE: If `annotation_column_name` is passed, this argument is ignored.
annotation_column_name: (str) name of the column containing annotations
- default: None will use annotation-column_idx to find the annotation column
- if not None, this value overrides annotation_column_idx, and the column with
Expand Down Expand Up @@ -156,6 +158,34 @@ def from_raven_files(
BoxedAnnotations object containing annotations from the Raven files
(the .df attribute is a dataframe containing each annotation)
"""
# check input type of raven_files and audio_files
# if a single path is passed, convert to list
if isinstance(raven_files, (str, Path)):
raven_files = [raven_files]
else:
assert (
len(raven_files) > 0
), "raven_files must be a non-empty list or iterable"
assert isinstance(
raven_files[0], (str, Path)
), f"raven_files must be an iterable of string or pathlib.Path, or a single string or pathlib.Path. Got type: {type(raven_files)}"

if isinstance(audio_files, (str, Path)):
audio_files = [audio_files]
else:
if audio_files is not None:
assert isinstance(
audio_files[0], (str, Path)
), f"audio_files must be an iterable of string or pathlib.Path, or a single string or pathlib.Path. Got type: {type(audio_files)}"

if audio_files is not None:
assert len(audio_files) == len(
raven_files
), """
`audio_files` and `raven_files` lists must have one-to-one correspondence,
but their lengths did not match.
"""

all_file_dfs = []

# mapping of Raven file columns to standard opensoundscape names
Expand All @@ -169,13 +199,6 @@ def from_raven_files(
# update defaults with any user-specified mappings
column_mapping_dict.update(column_mapping_dict or {})

if audio_files is not None:
assert len(audio_files) == len(
raven_files
), """
`audio_files` and `raven_files` lists must have one-to-one correspondence,
but their lengths did not match.
"""
for i, raven_file in enumerate(raven_files):
df = pd.read_csv(raven_file, delimiter="\t")
if annotation_column_name is not None:
Expand All @@ -200,7 +223,7 @@ def from_raven_files(
df.columns[annotation_column_idx - 1]: "annotation",
}
)
else: # None was passed to annotatino_column_idx
else: # None was passed to annotation_column_idx
# we'll create an empty `annotation` column
df["annotation"] = np.nan

Expand Down Expand Up @@ -617,13 +640,12 @@ def one_hot_labels_like(
def one_hot_clip_labels(
self,
clip_duration,
clip_overlap,
min_label_overlap,
min_label_fraction=1,
full_duration=None,
class_subset=None,
final_clip=None,
audio_files=None,
**kwargs,
):
"""Generate one-hot labels for clips of fixed duration
Expand All @@ -633,7 +655,6 @@ def one_hot_clip_labels(
Args:
clip_duration (float): The duration in seconds of the clips
clip_overlap (float): The overlap of the clips in seconds [default: 0]
min_label_overlap: minimum duration (seconds) of annotation within the
time interval for it to count as a label. Note that any annotation
of length less than this value will be discarded.
Expand All @@ -654,19 +675,10 @@ def one_hot_clip_labels(
of `audio` for each row of self.df
class_subset: list of classes for one-hot labels. If None, classes will
be all unique values of self.df['annotation']
final_clip (str): Behavior if final_clip is less than clip_duration
seconds long. By default, discards remaining time if less than
clip_duration seconds long [default: None].
Options:
- None: Discard the remainder (do not make a clip)
- "extend": Extend the final clip beyond full_duration to reach
clip_duration length
- "remainder": Use only remainder of full_duration
(final clip will be shorter than clip_duration)
- "full": Increase overlap with previous clip to yield a
clip with clip_duration length
audio_files: list of audio file paths (as str or pathlib.Path)
to create clips for. If None, uses self.audio_files. [default: None]
**kwargs (such as overlap_fraction, final_clip) are passed to
opensoundscape.utils.generate_clip_times_df() via make_clip_df()
Returns:
dataframe with index ['file','start_time','end_time'] and columns=classes
"""
Expand Down Expand Up @@ -695,9 +707,8 @@ def one_hot_clip_labels(
clip_df = make_clip_df(
files=[f for f in audio_files if f == f], # remove NaN if present
clip_duration=clip_duration,
clip_overlap=clip_overlap,
final_clip=final_clip,
raise_exceptions=True, # raise exceptions from librosa.duration(f)
**kwargs,
)
except GetDurationError as exc:
raise GetDurationError(
Expand All @@ -710,10 +721,7 @@ def one_hot_clip_labels(
else: # use fixed full_duration for all files
# make a clip df, will be re-used for each file
clip_df_template = generate_clip_times_df(
full_duration=full_duration,
clip_duration=clip_duration,
clip_overlap=clip_overlap,
final_clip=final_clip,
full_duration=full_duration, clip_duration=clip_duration, **kwargs
)
# make a clip df for all files
clip_df = pd.concat([clip_df_template] * len(audio_files))
Expand Down Expand Up @@ -782,7 +790,7 @@ def convert_labels(self, conversion_table):
for k in df["annotation"]
]

return BoxedAnnotations(df)
return self._spawn(df=df)


def diff(base_annotations, comparison_annotations):
Expand Down
103 changes: 47 additions & 56 deletions opensoundscape/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import opensoundscape
from opensoundscape.utils import generate_clip_times_df
from opensoundscape.signal_processing import tdoa
from opensoundscape.utils import cast_np_to_native

DEFAULT_RESAMPLE_TYPE = "soxr_hq" # changed from kaiser_fast in v0.9.0

Expand Down Expand Up @@ -354,6 +355,8 @@ def from_file(

# if the offset > 0, we need to update the timestamp
if "recording_start_time" in metadata and offset > 0:
# timedelta doesn't like np types, fix issue #928
offset = cast_np_to_native(offset)
metadata["recording_start_time"] += datetime.timedelta(seconds=offset)

return cls(samples, sr, resample_type=resample_type, metadata=metadata)
Expand Down Expand Up @@ -533,9 +536,10 @@ def trim_samples(self, start_sample, end_sample):
else:
metadata = self.metadata.copy()
if "recording_start_time" in metadata:
metadata["recording_start_time"] += datetime.timedelta(
seconds=start_sample / self.sample_rate
)
# timedelta doesn't like np types, fix issue #928
seconds = start_sample / self.sample_rate
seconds = cast_np_to_native(seconds)
metadata["recording_start_time"] += datetime.timedelta(seconds=seconds)

if "duration" in metadata:
metadata["duration"] = len(samples_trimmed) / self.sample_rate
Expand Down Expand Up @@ -587,41 +591,42 @@ def loop(self, length=None, n=None):
def extend_to(self, duration):
"""Extend audio file to desired duration by adding silence to the end
If duration is less than the Audio's .duration, the Audio object is trimmed.
If `duration` is less than or equal to the Audio's self.duration, the Audio remains unchanged.
Otherwise, silence is added to the end of the Audio object to achieve the desired
duration.
`duration`.
Args:
duration: the final duration in seconds of the audio object
duration: the minimum final duration in seconds of the audio object
Returns:
a new Audio object of the desired duration
"""

target_n_samples = round(duration * self.sample_rate)
minimum_n_samples = round(duration * self.sample_rate)
current_n_samples = len(self.samples)

if target_n_samples > current_n_samples:
if minimum_n_samples <= current_n_samples:
return self._spawn()

else:
# add 0's to the end of the sample array
new_samples = np.pad(
self.samples, pad_width=(0, target_n_samples - current_n_samples)
self.samples, pad_width=(0, minimum_n_samples - current_n_samples)
)
elif target_n_samples < current_n_samples:
# trim to desired samples (similar to self.trim())
new_samples = self.samples[0:target_n_samples]

# update metadata to reflect new duration
if self.metadata is None:
metadata = None
else:
metadata = self.metadata.copy()
if "duration" in metadata:
metadata["duration"] = len(new_samples) / self.sample_rate
# update metadata to reflect new duration
if self.metadata is None:
metadata = None
else:
metadata = self.metadata.copy()
if "duration" in metadata:
metadata["duration"] = len(new_samples) / self.sample_rate

return self._spawn(
samples=new_samples,
metadata=metadata,
)
return self._spawn(
samples=new_samples,
metadata=metadata,
)

def extend_by(self, duration):
"""Extend audio file by adding `duration` seconds of silence to the end
Expand Down Expand Up @@ -731,17 +736,20 @@ def spectrum(self):

# Compute the fft (fast fourier transform) of the selected clip
N = len(self.samples)
fft = scipy.fft.fft(self.samples)
fft = scipy.fft.rfft(self.samples)
fft = np.abs(fft) # get the magnitude of the fft

# create the frequencies corresponding to fft bins
freq = scipy.fft.fftfreq(N, d=1 / self.sample_rate)
freq = scipy.fft.rfftfreq(N, d=1 / self.sample_rate)

# remove negative frequencies and scale magnitude by 2.0/N:
fft = 2.0 / N * fft[0 : int(N / 2)]
frequencies = freq[0 : int(N / 2)]
fft = np.abs(fft)
# scale magnitude by 2.0/N,
# except for the DC and sr/2 (Nyquist frequency) components
fft *= 2.0 / N
fft[0] *= 0.5
if N % 2 == 0:
fft[-1] *= 0.5

return fft, frequencies
return fft, freq

def normalize(self, peak_level=None, peak_dBFS=None):
"""Return audio object with normalized waveform
Expand Down Expand Up @@ -865,41 +873,24 @@ def save(
else: # we can write metadata for WAV and AIFF
_write_metadata(self.metadata, metadata_format, path)

def split(self, clip_duration, clip_overlap=0, final_clip=None):
def split(self, clip_duration, **kwargs):
"""Split Audio into even-lengthed clips
The Audio object is split into clips of a specified duration and overlap
Args:
clip_duration (float): The duration in seconds of the clips
clip_overlap (float): The overlap of the clips in seconds [default: 0]
final_clip (str): Behavior if final_clip is less than clip_duration
seconds long. By default, discards remaining audio if less than
clip_duration seconds long [default: None].
Options:
- None: Discard the remainder (do not make a clip)
- "extend": Extend the final clip with silence to reach
clip_duration length
- "remainder": Use only remainder of Audio (final clip will be
shorter than clip_duration)
- "full": Increase overlap with previous clip to yield a clip with
clip_duration length
**kwargs (such as clip_overlap_fraction, final_clip) are passed to
opensoundscape.utils.generate_clip_times_df()
- extends last Audio object if user passes final_clip == "extend"
Returns:
- audio_clips: list of audio objects
- dataframe w/columns for start_time and end_time of each clip
"""
if not final_clip in ["remainder", "full", "extend", None]:
raise ValueError(
f"final_clip must be 'remainder', 'full', 'extend',"
f"or None. Got {final_clip}."
)

duration = self.duration
clip_df = generate_clip_times_df(
full_duration=duration,
clip_duration=clip_duration,
clip_overlap=clip_overlap,
final_clip=final_clip,
full_duration=duration, clip_duration=clip_duration, **kwargs
)

clips = [None] * len(clip_df)
Expand All @@ -910,17 +901,17 @@ def split(self, clip_duration, clip_overlap=0, final_clip=None):
audio_clip = self.trim(start, end)

# Extend the final clip if necessary
if end > duration and final_clip == "extend":
audio_clip = audio_clip.extend_to(clip_duration)
if "final_clip" in kwargs.keys():
if end > duration and kwargs["final_clip"] == "extend":
audio_clip = audio_clip.extend_to(clip_duration)

# Add clip to list of clips
clips[idx] = audio_clip

if len(clips) == 0:
warnings.warn(
f"Given Audio object with duration of `{duration}` "
f"seconds and `clip_duration={clip_duration}` but "
f" `final_clip={final_clip}` produces no clips. "
f"seconds and `clip_duration={clip_duration}`, produces no clips. "
f"Returning empty list."
)

Expand Down
5 changes: 4 additions & 1 deletion opensoundscape/localization.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
"""Tools for localizing audio events from synchronized recording arrays"""

import warnings
import numpy as np
import datetime

from opensoundscape.audio import Audio
from opensoundscape import audio
from opensoundscape.utils import cast_np_to_native

# define defaults for physical constants
SPEED_OF_SOUND = 343 # default value in meters per second
Expand Down Expand Up @@ -635,8 +637,9 @@ def create_candidate_events(
if self.start_timestamp is None:
start_timestamp = None
else:
# timedelta doesn't like np types, fix issue #928
start_timestamp = self.start_timestamp + datetime.timedelta(
seconds=time_i
seconds=cast_np_to_native(time_i)
)
# create a SpatialEvent for this cluster of simultaneous detections
candidate_events.append(
Expand Down
7 changes: 7 additions & 0 deletions opensoundscape/ml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,10 @@
from . import sampling
from . import utils
from . import bioacoustics_model_zoo
import torch.multiprocessing

# using 'file_system' avoids errors with "Too many open files",
# "Pin memory thread exited unexpectedly", and RuntimeError('received %d items of ancdata')
# when using parallelized DataLoader. This is the recommended solution according to
# https://github.com/pytorch/pytorch/issues/11201#issuecomment-421146936
torch.multiprocessing.set_sharing_strategy("file_system")
Loading

0 comments on commit acdd7a3

Please sign in to comment.