Skip to content

Commit

Permalink
running pre-commit hooks
Browse files Browse the repository at this point in the history
  • Loading branch information
fabiocat93 authored and brukew committed Nov 19, 2024
1 parent ef07a02 commit e8b7e11
Show file tree
Hide file tree
Showing 9 changed files with 2,559 additions and 2,565 deletions.
2 changes: 1 addition & 1 deletion src/senselab/audio/tasks/features_extraction/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
""".. include:: ./doc.md""" # noqa: D415
""".. include:: ./doc.md""" # noqa: D415
54 changes: 28 additions & 26 deletions src/senselab/audio/tasks/features_extraction/api.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""This module provides functions for extracting features from audio files."""


from typing import Any, Dict, List

import pydra
Expand All @@ -24,37 +23,42 @@ def extract_features_from_audios(audios: List[Audio], plugin: str = "cf") -> Lis
List[Dict[str, Any]]: The list of feature dictionaries for each audio.
"""
# opensmile
extract_opensmile_features_from_audios_pt = pydra.mark.task(
extract_opensmile_features_from_audios)
extract_opensmile_features_from_audios_pt = pydra.mark.task(extract_opensmile_features_from_audios)
# praat_parselmouth
extract_praat_parselmouth_features_from_audios_pt = pydra.mark.task(
extract_praat_parselmouth_features_from_audios)
extract_praat_parselmouth_features_from_audios_pt = pydra.mark.task(extract_praat_parselmouth_features_from_audios)
# torchaudio
extract_torchaudio_features_from_audios_pt = pydra.mark.task(extract_torchaudio_features_from_audios)
# torchaudio_squim
extract_objective_quality_features_from_audios_pt = pydra.mark.task(
extract_objective_quality_features_from_audios)
extract_objective_quality_features_from_audios_pt = pydra.mark.task(extract_objective_quality_features_from_audios)

formatted_audios = [[audio] for audio in audios]

wf = pydra.Workflow(name="wf", input_spec=["x"])
wf.split("x", x=formatted_audios)
wf.add(extract_opensmile_features_from_audios_pt(name="extract_opensmile_features_from_audios_pt",
audios=wf.lzin.x))
wf.add(extract_praat_parselmouth_features_from_audios_pt(name="extract_praat_parselmouth_features_from_audios_pt",
audios=wf.lzin.x))
wf.add(extract_torchaudio_features_from_audios_pt(name="extract_torchaudio_features_from_audios_pt",
audios=wf.lzin.x))
wf.add(extract_objective_quality_features_from_audios_pt(name="extract_objective_quality_features_from_audios_pt",
audio_list=wf.lzin.x))
wf.add(
extract_opensmile_features_from_audios_pt(name="extract_opensmile_features_from_audios_pt", audios=wf.lzin.x)
)
wf.add(
extract_praat_parselmouth_features_from_audios_pt(
name="extract_praat_parselmouth_features_from_audios_pt", audios=wf.lzin.x
)
)
wf.add(
extract_torchaudio_features_from_audios_pt(name="extract_torchaudio_features_from_audios_pt", audios=wf.lzin.x)
)
wf.add(
extract_objective_quality_features_from_audios_pt(
name="extract_objective_quality_features_from_audios_pt", audio_list=wf.lzin.x
)
)

# setting multiple workflow outputs
wf.set_output(
[
("opensmile_out", wf.extract_opensmile_features_from_audios_pt.lzout.out),
("praat_parselmouth_out", wf.extract_praat_parselmouth_features_from_audios_pt.lzout.out),
("opensmile_out", wf.extract_opensmile_features_from_audios_pt.lzout.out),
("praat_parselmouth_out", wf.extract_praat_parselmouth_features_from_audios_pt.lzout.out),
("torchaudio_out", wf.extract_torchaudio_features_from_audios_pt.lzout.out),
("torchaudio_squim_out", wf.extract_objective_quality_features_from_audios_pt.lzout.out)
("torchaudio_squim_out", wf.extract_objective_quality_features_from_audios_pt.lzout.out),
]
)

Expand All @@ -67,17 +71,15 @@ def extract_features_from_audios(audios: List[Audio], plugin: str = "cf") -> Lis
for output in outputs:
formatted_output_item = {
"opensmile": output.output.opensmile_out[0],
"praat_parselmouth": output.output.praat_parselmouth_out['praat_parselmouth'][0],
"torchaudio": output.output.torchaudio_out[0]['torchaudio'],
"praat_parselmouth": output.output.praat_parselmouth_out["praat_parselmouth"][0],
"torchaudio": output.output.torchaudio_out[0]["torchaudio"],
"torchaudio_squim": {
"stoi": output.output.torchaudio_squim_out['stoi'][0],
"pesq": output.output.torchaudio_squim_out['pesq'][0],
"si_sdr": output.output.torchaudio_squim_out['si_sdr'][0]
}
"stoi": output.output.torchaudio_squim_out["stoi"][0],
"pesq": output.output.torchaudio_squim_out["pesq"][0],
"si_sdr": output.output.torchaudio_squim_out["si_sdr"][0],
},
}

formatted_output.append(formatted_output_item)

return formatted_output


2 changes: 1 addition & 1 deletion src/senselab/audio/tasks/features_extraction/doc.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ This module provides the API of the senselab audio features extraction.

Features can be extracted using `opensmile`, `praat-parselmouth`, `torchaudio`, and `torchaudio-squim`.
We are working to facilitate the way to extract features in a meaningful way.
Also, we are working to optimize these utilities.
Also, we are working to optimize these utilities.

**STAY TUNED**.
30 changes: 17 additions & 13 deletions src/senselab/audio/tasks/features_extraction/praat_parselmouth.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,12 +432,12 @@ def extract_pitch_descriptors(
if current_frame is not None:
current_function_name = current_frame.f_code.co_name
logger.error(f'Error in "{current_function_name}": \n' + str(e))
return {f"mean_f0_{unit.lower()}": float("nan"),
f"stdev_f0_{unit.lower()}": float("nan")}
return {f"mean_f0_{unit.lower()}": float("nan"), f"stdev_f0_{unit.lower()}": float("nan")}


def extract_intensity_descriptors(snd: Union[parselmouth.Sound, Path, Audio],
floor: float, frame_shift: float) -> Dict[str, float]:
def extract_intensity_descriptors(
snd: Union[parselmouth.Sound, Path, Audio], floor: float, frame_shift: float
) -> Dict[str, float]:
"""Extract Intensity Features.
Function to extract key intensity information from a given sound object.
Expand Down Expand Up @@ -543,7 +543,7 @@ def extract_harmonicity_descriptors(
if current_frame is not None:
current_function_name = current_frame.f_code.co_name
logger.error(f'Error in "{current_function_name}": \n' + str(e))

return {"hnr_db_mean": float("nan"), "hnr_db_std_dev": float("nan")}


Expand Down Expand Up @@ -924,10 +924,12 @@ def extract_spectral_moments(
if current_frame is not None:
current_function_name = current_frame.f_code.co_name
logger.error(f'Error in "{current_function_name}": \n' + str(e))
return {"spectral_gravity": np.nan,
"spectral_std_dev": np.nan,
"spectral_skewness": np.nan,
"spectral_kurtosis": np.nan}
return {
"spectral_gravity": np.nan,
"spectral_std_dev": np.nan,
"spectral_skewness": np.nan,
"spectral_kurtosis": np.nan,
}


### More functions ###
Expand Down Expand Up @@ -1044,9 +1046,9 @@ def _extract_shimmer(type: str, sound: parselmouth.Sound, point_process: parselm


### Wrapper ###
def extract_praat_parselmouth_features_from_audios(audios: List[Audio],
cache_dir: Optional[str] = None,
plugin: str = "cf") -> dict:
def extract_praat_parselmouth_features_from_audios(
audios: List[Audio], cache_dir: Optional[str] = None, plugin: str = "cf"
) -> dict:
"""Extract features from a list of Audio objects and return a JSON-like dictionary.
Args:
Expand All @@ -1073,10 +1075,12 @@ def extract_praat_parselmouth_features_from_audios(audios: List[Audio],

def _extract_pitch_floor(pitch_values_out: dict) -> float:
return pitch_values_out["pitch_floor"]

_extract_pitch_floor_pt = pydra.mark.task(_extract_pitch_floor)

def _extract_pitch_ceiling(pitch_values_out: dict) -> float:
return pitch_values_out["pitch_ceiling"]

_extract_pitch_ceiling_pt = pydra.mark.task(_extract_pitch_ceiling)

# Create the workflow
Expand All @@ -1101,7 +1105,7 @@ def _extract_pitch_ceiling(pitch_values_out: dict) -> float:
floor=wf._extract_pitch_floor_pt.lzout.out,
ceiling=wf._extract_pitch_ceiling_pt.lzout.out,
frame_shift=time_step,
unit=unit
unit=unit,
)
)
wf.add(
Expand Down
40 changes: 14 additions & 26 deletions src/senselab/audio/tasks/features_extraction/torchaudio.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,19 +177,14 @@ def extract_pitch_from_audios(
pitches.append(
{
"pitch": torchaudio.functional.detect_pitch_frequency(
audio.waveform,
sample_rate=audio.sampling_rate,
freq_low=freq_low,
freq_high=freq_high
audio.waveform, sample_rate=audio.sampling_rate, freq_low=freq_low, freq_high=freq_high
).squeeze(0)
}
)
return pitches



def extract_torchaudio_features_from_audios(audios: List[Audio],
plugin: str = "cf") -> List[Dict[str, Any]]:
def extract_torchaudio_features_from_audios(audios: List[Audio], plugin: str = "cf") -> List[Dict[str, Any]]:
"""Extract torchaudio features from a list of audio objects.
Args:
Expand All @@ -200,26 +195,19 @@ def extract_torchaudio_features_from_audios(audios: List[Audio],
List[Dict[str, Any]]: The list of feature dictionaries for each audio.
"""
extract_pitch_from_audios_pt = pydra.mark.task(extract_pitch_from_audios)
extract_mel_filter_bank_from_audios_pt = pydra.mark.task(
extract_mel_filter_bank_from_audios)
extract_mel_filter_bank_from_audios_pt = pydra.mark.task(extract_mel_filter_bank_from_audios)
extract_mfcc_from_audios_pt = pydra.mark.task(extract_mfcc_from_audios)
extract_mel_spectrogram_from_audios_pt = pydra.mark.task(
extract_mel_spectrogram_from_audios)
extract_mel_spectrogram_from_audios_pt = pydra.mark.task(extract_mel_spectrogram_from_audios)
extract_spectrogram_from_audios_pt = pydra.mark.task(extract_spectrogram_from_audios)

formatted_audios = [[audio] for audio in audios]
wf = pydra.Workflow(name="wf", input_spec=["x"])
wf.split("x", x=formatted_audios)
wf.add(extract_pitch_from_audios_pt(name="extract_pitch_from_audios_pt",
audios=wf.lzin.x))
wf.add(extract_mel_filter_bank_from_audios_pt(name="extract_mel_filter_bank_from_audios_pt",
audios=wf.lzin.x))
wf.add(extract_mfcc_from_audios_pt(name="extract_mfcc_from_audios_pt",
audios=wf.lzin.x))
wf.add(extract_mel_spectrogram_from_audios_pt(name="extract_mel_spectrogram_from_audios_pt",
audios=wf.lzin.x))
wf.add(extract_spectrogram_from_audios_pt(name="extract_spectrogram_from_audios_pt",
audios=wf.lzin.x))
wf.add(extract_pitch_from_audios_pt(name="extract_pitch_from_audios_pt", audios=wf.lzin.x))
wf.add(extract_mel_filter_bank_from_audios_pt(name="extract_mel_filter_bank_from_audios_pt", audios=wf.lzin.x))
wf.add(extract_mfcc_from_audios_pt(name="extract_mfcc_from_audios_pt", audios=wf.lzin.x))
wf.add(extract_mel_spectrogram_from_audios_pt(name="extract_mel_spectrogram_from_audios_pt", audios=wf.lzin.x))
wf.add(extract_spectrogram_from_audios_pt(name="extract_spectrogram_from_audios_pt", audios=wf.lzin.x))

# setting multiple workflow outputs
wf.set_output(
Expand All @@ -241,11 +229,11 @@ def extract_torchaudio_features_from_audios(audios: List[Audio],
for output in outputs:
formatted_output_item = {
"torchaudio": {
"pitch": output.output.pitch_out[0]['pitch'],
"mel_filter_bank": output.output.mel_filter_bank_out[0]['mel_filter_bank'],
"mfcc": output.output.mfcc_out[0]['mfcc'],
"mel_spectrogram": output.output.mel_spectrogram_out[0]['mel_spectrogram'],
"spectrogram": output.output.spectrogram_out[0]['spectrogram'],
"pitch": output.output.pitch_out[0]["pitch"],
"mel_filter_bank": output.output.mel_filter_bank_out[0]["mel_filter_bank"],
"mfcc": output.output.mfcc_out[0]["mfcc"],
"mel_spectrogram": output.output.mel_spectrogram_out[0]["mel_spectrogram"],
"spectrogram": output.output.spectrogram_out[0]["spectrogram"],
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
objective_model = SQUIM_OBJECTIVE.get_model()
subjective_model = SQUIM_SUBJECTIVE.get_model()


def extract_objective_quality_features_from_audios(audio_list: List[Audio]) -> Dict[str, Any]:
"""Extracts objective audio features from a list of Audio objects.
Expand Down
7 changes: 3 additions & 4 deletions src/tests/audio/tasks/features_extraction_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,10 +122,9 @@ def test_extract_spectral_moments(resampled_mono_audio_sample: Audio) -> None:
resampled_mono_audio_sample, floor=75.0, ceiling=500.0, window_size=0.025, frame_shift=0.01
)
assert isinstance(result, dict)
assert all(key in result for key in ["spectral_gravity",
"spectral_std_dev",
"spectral_skewness",
"spectral_kurtosis"])
assert all(
key in result for key in ["spectral_gravity", "spectral_std_dev", "spectral_skewness", "spectral_kurtosis"]
)
assert all(isinstance(result[key], float) for key in result)


Expand Down
8 changes: 4 additions & 4 deletions tutorials/audio/00_getting_started.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@
"metadata": {},
"source": [
"## Downmixing audio clips to mono\n",
"Want to downmix your audio to mono? It has neve been that **EASY!**! Here’s how:"
"Want to downmix your audio to mono? It has neve been that **EASY!**! Here\u2019s how:"
]
},
{
Expand Down Expand Up @@ -206,7 +206,7 @@
"metadata": {},
"source": [
"## Speaker Embeddings\n",
"Need to get unique speaker signatures? **EASY!** Here’s how:"
"Need to get unique speaker signatures? **EASY!** Here\u2019s how:"
]
},
{
Expand Down Expand Up @@ -251,7 +251,7 @@
"metadata": {},
"source": [
"## Audio Augmentation\n",
"Need to augment your audio data? **EASY!**! Here’s how:"
"Need to augment your audio data? **EASY!**! Here\u2019s how:"
]
},
{
Expand Down Expand Up @@ -296,7 +296,7 @@
"metadata": {},
"source": [
"## Speech Enhancement\n",
"Need to clean up your audio? **EASY!** Here’s how:"
"Need to clean up your audio? **EASY!** Here\u2019s how:"
]
},
{
Expand Down
Loading

0 comments on commit e8b7e11

Please sign in to comment.