Skip to content

Commit

Permalink
Merge pull request #1048 from kitzeslab/feat_350_pcen
Browse files Browse the repository at this point in the history
Feat 350 pcen
  • Loading branch information
sammlapp authored Sep 9, 2024
2 parents d39833e + b89be05 commit a3d4001
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 20 deletions.
6 changes: 6 additions & 0 deletions opensoundscape/preprocess/action_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import random

import librosa
import torch
import torchvision

Expand Down Expand Up @@ -262,3 +263,8 @@ def tensor_add_noise(tensor, std=1):
"""
noise = torch.empty_like(tensor).normal_(mean=0, std=std)
return tensor + noise


@register_action_fn
def pcen(s, **kwargs):
return s._spawn(spectrogram=librosa.pcen(S=s.spectrogram, **kwargs))
42 changes: 42 additions & 0 deletions opensoundscape/preprocess/preprocessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from opensoundscape.sample import AudioSample
from opensoundscape.preprocess.actions import ACTION_CLS_DICT
from opensoundscape.preprocess import io
from opensoundscape.preprocess import preprocessors, actions


PREPROCESSOR_CLS_DICT = {}
Expand Down Expand Up @@ -579,6 +580,47 @@ def _generate_sample(self, sample):
return sample


class PCENPreprocessor(preprocessors.SpectrogramPreprocessor):
def __init__(self, *args, **kwargs):
"""same arguments as SpectrogramPreprocessor
adds an action that performs PCEN after making spectrogram
PCEN is Per Channel Energy Normalization, see https://arxiv.org/abs/1607.05666
The only other difference from SpectrogramPreprocessor is that we set the dB_scale to False
when generating the Spectrogram, because PCEN expects a linear-scale spectrogram; and that
the we normalize the output of PCEN to [0,1], then use range=[0,1] for spec.to_tensor()
note: user should set self.pipeline['pcen'].params['sr'] and 'hop_length' to match the audio/spectrogram settings
after instantiating this class
User can modify parameters, in particular setting PCEN parameters via self.pipeline['pcen'].params
"""
super().__init__(*args, **kwargs)

# need to pass linear-value spectrogram to pcen
self.pipeline["to_spec"].set(dB_scale=False)

# use Librosa implementation of PCEN (could use a pytorch implementation in the future, and make it trainable)
pcen_action = actions.Action(fn=action_functions.pcen, is_augmentation=False)
self.insert_action(action_index="pcen", action=pcen_action, after_key="to_spec")

# normalize PCEN output to [0,1]
def normalize_to_01(s):
new_s = (s.spectrogram - s.spectrogram.min()) / (
s.spectrogram.max() - s.spectrogram.min()
)
return s._spawn(spectrogram=new_s)

self.insert_action(
action_index="normalize",
action=actions.Action(fn=normalize_to_01, is_augmentation=False),
after_key="pcen",
)

self.pipeline.to_tensor.set(range=[0, 1])


@register_preprocessor_cls
class AudioPreprocessor(BasePreprocessor):
"""Child of BasePreprocessor that only loads audio and resamples
Expand Down
8 changes: 8 additions & 0 deletions tests/test_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,3 +312,11 @@ def test_overlay_to_from_dict(sample_df):
# new action will have empty overlay_df and will be bypassed
assert action2.bypass == True
assert action2.overlay_df.empty


def test_pcen(sample_audio):
sample_audio.data = Spectrogram.from_audio(sample_audio.data, dB_scale=False)
action = actions.Action(action_functions.pcen)
original_spec = copy.copy(sample_audio.data.spectrogram)
action(sample_audio)
assert not np.array_equal(sample_audio.data.spectrogram, original_spec)
31 changes: 11 additions & 20 deletions tests/test_preprocessors.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from copy import copy
from pathlib import Path

import pytest
import math
import numpy as np
import pandas as pd
from copy import copy
from pathlib import Path
import torch

from opensoundscape.preprocess import preprocessors, actions, action_functions
from opensoundscape.preprocess.preprocessors import (
Expand Down Expand Up @@ -230,26 +232,15 @@ def test_noisereducespectrogrampreprocessor(short_sample):
assert s1.mean() < s2.mean()


def test_audiopreprocessor(audiopreprocessor, sample):
"""should retain original sample rate"""
s = audiopreprocessor.forward(sample).data
assert type(s) == Audio
assert math.isclose(s.duration, 2.0, abs_tol=1e-9)
assert s.sample_rate == 22050

def test_pcenpreprocessor(sample):

def test_audiopreprocessor_extend(audiopreprocessor, short_sample):
"""should retain original sample rate"""
s = audiopreprocessor.forward(short_sample).data
assert type(s) == Audio
assert math.isclose(s.duration, 2.0, abs_tol=1e-9)
assert s.sample_rate == 22050
p1 = preprocessors.PCENPreprocessor(sample_duration=1)
s1 = p1.forward(sample, bypass_augmentations=True).data
assert isinstance(s1, torch.Tensor)

# when trim_audio.extend is False, should raise an error
# if the input is too short
audiopreprocessor.pipeline.trim_audio.set(extend=False)
with pytest.raises(PreprocessingError):
s = audiopreprocessor.forward(short_sample).data
# try using some different settings
p1.pipeline.pcen.set(gain=0.5)
s2 = p1.forward(sample, bypass_augmentations=True).data


def test_preprocessor_to_from_dict(preprocessor, sample):
Expand Down

0 comments on commit a3d4001

Please sign in to comment.