Skip to content

Commit

Permalink
Merge branch 'langchain' of https://github.com/sensein/senselab into …
Browse files Browse the repository at this point in the history
…langchain
  • Loading branch information
batwood-1 committed Jan 23, 2025
2 parents c728b95 + f9399c4 commit cad601f
Show file tree
Hide file tree
Showing 24 changed files with 324 additions and 70 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ jobs:
--verbose
shell: bash
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
uses: codecov/codecov-action@v5
with:
token: ${{ secrets.CODECOV_TOKEN }}

Expand Down
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
hooks:
- id: check-added-large-files
args: [--maxkb=15000]
- id: check-case-conflict
- id: end-of-file-fixer
- id: trailing-whitespace
Expand Down
35 changes: 35 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,38 @@
# 0.26.1 (Mon Dec 30 2024)

#### 🐛 Bug Fix

- Update model.py [#228](https://github.com/sensein/senselab/pull/228) ([@fabiocat93](https://github.com/fabiocat93))

#### 📝 Documentation

- Updating tutorial files [#210](https://github.com/sensein/senselab/pull/210) ([@900miles](https://github.com/900miles) [@fabiocat93](https://github.com/fabiocat93))

#### 🔩 Dependency Updates

- Bump codecov/codecov-action from 4 to 5 [#211](https://github.com/sensein/senselab/pull/211) ([@dependabot[bot]](https://github.com/dependabot[bot]) [@fabiocat93](https://github.com/fabiocat93))

#### Authors: 3

- [@900miles](https://github.com/900miles)
- [@dependabot[bot]](https://github.com/dependabot[bot])
- Fabio Catania ([@fabiocat93](https://github.com/fabiocat93))

---

# 0.26.0 (Mon Dec 23 2024)

#### 🚀 Enhancement

- Bump the production-dependencies group across 1 directory with 3 updates [#218](https://github.com/sensein/senselab/pull/218) ([@dependabot[bot]](https://github.com/dependabot[bot]) [@fabiocat93](https://github.com/fabiocat93))

#### Authors: 2

- [@dependabot[bot]](https://github.com/dependabot[bot])
- Fabio Catania ([@fabiocat93](https://github.com/fabiocat93))

---

# 0.25.0 (Tue Dec 10 2024)

#### 🚀 Enhancement
Expand Down
9 changes: 3 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ datasets = "~=3"
torch = "~=2.5"
torchvision = "~=0.20"
torchaudio = "~=2.5"
transformers = "~=4.46.2"
transformers = "~=4.47"
pydra = "~=0.25"
pydantic = "~=2.7"
accelerate = "*"
Expand All @@ -43,7 +43,7 @@ torch-audiomentations = "~=0.11"
sentence-transformers = "~=3.1"
jiwer = "~=3.0"
speechbrain = "~=1"
pyav = "~=13"
pyav = "~=14.0"
pyannote-audio = "~=3.3"
pycountry = "~=24.6"
types-requests = "~=2.32"
Expand Down Expand Up @@ -130,10 +130,7 @@ target-version = "py310"

[tool.ruff.lint]
select = ["ANN", "D", "E", "F", "I"]
ignore = [
"ANN101", # self should not be annotated.
"ANN102" # cls should not be annotated.
]
ignore = []
fixable = ["ALL"]
unfixable = []
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
Expand Down
4 changes: 2 additions & 2 deletions src/senselab/text/tasks/evaluate_conversation/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ def Rouge(*args: List, **kwargs: Dict) -> rouge_scorer.RougeScorer:
Rouge.__doc__ = rouge_scorer.RougeScorer.__doc__


def sentence_bleu_sacre(*args: List, **kwargs: Dict) -> BLEUScore:
def sentence_bleu_sacre(*args: List) -> List[BLEUScore]:
"""Wrapper for sacrebleu's sentence_bleu function."""
return sb.sentence_bleu(*args, **kwargs)
return [sb.sentence_bleu(str(item), str(ref)) for item, ref in args]


sentence_bleu_sacre.__doc__ = sb.sentence_bleu.__doc__
Expand Down
10 changes: 5 additions & 5 deletions src/senselab/text/tasks/llms/transcript_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class Transcript:
convert_json_to_scriptlines(json_obj: Dict) -> List[ScriptLine]: Converts transcript format to LLM format.
"""

def __init__(self, transcript_path: Path) -> None:
def __init__(self: "Transcript", transcript_path: Path) -> None:
"""Initializes the manager with a transcript file path.
Args:
Expand All @@ -38,12 +38,12 @@ def __init__(self, transcript_path: Path) -> None:
json_obj = self._load_transcript(transcript_path)
self.scriptlines = self.convert_json_to_scriptlines(json_obj)

def print_human_readable(self) -> None:
def print_human_readable(self: "Transcript") -> None:
"""Prints the stored scriptlines in a human-readable format."""
for message in self.scriptlines:
print(f"{message.speaker}:\t\t{message.text}\n")

def get_num_tokens(self) -> int:
def get_num_tokens(self: "Transcript") -> int:
"""Returns the total number of OpenAI tokens in the conversation.
Returns:
Expand All @@ -56,7 +56,7 @@ def get_num_tokens(self) -> int:
c += len(encoding.encode(message.text))
return c

def extract_response_opportunities(self) -> List[List[ScriptLine]]:
def extract_response_opportunities(self: "Transcript") -> List[List[ScriptLine]]:
"""Extract consecutive sublists from the messages list, ending after every 'user' response.
This is used to compare AI responses to a human's response
Expand All @@ -71,7 +71,7 @@ def extract_response_opportunities(self) -> List[List[ScriptLine]]:
sublists = []

for i, message in enumerate(self.scriptlines):
if message.speaker == "user" and i > 0:
if message.speaker == "user" and i >= 0:
sublist = self.scriptlines[0 : i + 1]
sublists.append(sublist)

Expand Down
6 changes: 3 additions & 3 deletions src/senselab/utils/data_structures/device.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
class DeviceType(Enum):
"""Device types for PyTorch operations."""

CPU: str = "cpu"
CUDA: str = "cuda"
MPS: str = "mps"
CPU = "cpu"
CUDA = "cuda"
MPS = "mps"


DTYPE_MAP = {DeviceType.CPU: torch.float32, DeviceType.CUDA: torch.float16, DeviceType.MPS: torch.float32}
Expand Down
5 changes: 4 additions & 1 deletion src/senselab/utils/data_structures/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,10 @@ def check_hf_repo_exists(repo_id: str, revision: str = "main", repo_type: str =
"""Private function to check if a Hugging Face repository exists."""
api = HfApi()
try:
api.list_repo_commits(repo_id=repo_id, revision=revision, repo_type=repo_type)
if repo_type == "model":
api.model_info(repo_id=repo_id, revision=revision)
else:
api.list_repo_commits(repo_id=repo_id, revision=revision, repo_type=repo_type)
return True
except Exception:
# raise RuntimeError(f"An error occurred: {e}")
Expand Down
2 changes: 2 additions & 0 deletions src/tests/text/tasks/transcript_manager_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,9 @@ def test_get_num_tokens(sample_transcript: Path) -> None:
def test_response_opportunities_extraction(sample_transcript: Path) -> None:
"""Test the extraction of response opportunities."""
transcript = Transcript(sample_transcript)
print(transcript)
opportunities = transcript.extract_response_opportunities()
print(opportunities)

assert len(opportunities) == 2, "Expected two response opportunities"
assert opportunities[0][-1].speaker == "user", "Expected last message to be first message from user"
Expand Down
2 changes: 1 addition & 1 deletion src/tests/utils/data_structures/model_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

def test_check_hf_repo_exists_true() -> None:
"""Test HF repo exists."""
with patch("huggingface_hub.HfApi.list_repo_commits") as mock_list_repo_commits:
with patch("huggingface_hub.HfApi.model_info") as mock_list_repo_commits:
mock_list_repo_commits.return_value = True
assert check_hf_repo_exists("valid_repo") is True

Expand Down
28 changes: 20 additions & 8 deletions tutorials/audio/00_getting_started.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"source": [
"# Getting Started with ```senselab```\n",
"\n",
"[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/sensein/senselab/blob/main/tutorials/audiogetting_started.ipynb)\n",
"[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/sensein/senselab/blob/main/tutorials/audio/00_getting_started.ipynb)\n",
"\n",
"\n",
"Welcome to the `senselab` quick start tutorial! \n",
Expand Down Expand Up @@ -35,7 +35,7 @@
},
"outputs": [],
"source": [
"pip install senselab"
"%pip install senselab"
]
},
{
Expand All @@ -48,14 +48,17 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from senselab.audio.data_structures import Audio\n",
"!mkdir -p tutorial_audio_files\n",
"!wget -O tutorial_audio_files/audio_48khz_mono_16bits.wav https://github.com/sensein/senselab/raw/main/src/tests/data_for_testing/audio_48khz_mono_16bits.wav\n",
"!wget -O tutorial_audio_files/audio_48khz_stereo_16bits.wav https://github.com/sensein/senselab/raw/main/src/tests/data_for_testing/audio_48khz_stereo_16bits.wav\n",
"\n",
"MONO_AUDIO_PATH = \"../../src/tests/data_for_testing/audio_48khz_mono_16bits.wav\"\n",
"STEREO_AUDIO_PATH = \"../../src/tests/data_for_testing/audio_48khz_stereo_16bits.wav\"\n",
"MONO_AUDIO_PATH = \"tutorial_audio_files/audio_48khz_mono_16bits.wav\"\n",
"STEREO_AUDIO_PATH = \"tutorial_audio_files/audio_48khz_stereo_16bits.wav\"\n",
"\n",
"audio1 = Audio.from_filepath(MONO_AUDIO_PATH)\n",
"audio2 = Audio.from_filepath(STEREO_AUDIO_PATH)"
Expand All @@ -71,9 +74,18 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The original audio has 2 channels.\n",
"The downmixed audio has 1 channels.\n"
]
}
],
"source": [
"from senselab.audio.tasks.preprocessing import downmix_audios_to_mono\n",
"\n",
Expand Down Expand Up @@ -331,7 +343,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.10"
"version": "3.12.0"
}
},
"nbformat": 4,
Expand Down
25 changes: 22 additions & 3 deletions tutorials/audio/audio_data_augmentation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,23 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"We start by importing the modules required for the augmentation, plotting, and audio processing tasks."
"First, we should install senselab if it has not already been installed."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%pip install senselab"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now, we start by importing the modules required for the augmentation, plotting, and audio processing tasks."
]
},
{
Expand Down Expand Up @@ -78,7 +94,10 @@
"outputs": [],
"source": [
"# Load an audio file\n",
"audio = Audio.from_filepath(\"../../src/tests/data_for_testing/audio_48khz_mono_16bits.wav\")\n",
"!mkdir -p tutorial_audio_files\n",
"!wget -O tutorial_audio_files/audio_48khz_mono_16bits.wav https://github.com/sensein/senselab/raw/main/src/tests/data_for_testing/audio_48khz_mono_16bits.wav\n",
"\n",
"audio = Audio.from_filepath(\"tutorial_audio_files/audio_48khz_mono_16bits.wav\")\n",
"\n",
"# Play the audio\n",
"play_audio(audio)\n",
Expand Down Expand Up @@ -145,7 +164,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.10"
"version": "3.12.0"
}
},
"nbformat": 4,
Expand Down
27 changes: 20 additions & 7 deletions tutorials/audio/extract_speaker_embeddings.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@
"First, let's import the necessary libraries and the function we'll be using."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%pip install senselab"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -45,12 +54,16 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"audio1 = Audio.from_filepath(\"../../src/tests/data_for_testing/audio_48khz_mono_16bits.wav\")\n",
"audio2 = Audio.from_filepath(\"../../src/tests/data_for_testing/audio_48khz_stereo_16bits.wav\")\n",
"!mkdir -p tutorial_audio_files\n",
"!wget -O tutorial_audio_files/audio_48khz_mono_16bits.wav https://github.com/sensein/senselab/raw/main/src/tests/data_for_testing/audio_48khz_mono_16bits.wav\n",
"!wget -O tutorial_audio_files/audio_48khz_stereo_16bits.wav https://github.com/sensein/senselab/raw/main/src/tests/data_for_testing/audio_48khz_stereo_16bits.wav\n",
"\n",
"audio1 = Audio.from_filepath(\"tutorial_audio_files/audio_48khz_mono_16bits.wav\")\n",
"audio2 = Audio.from_filepath(\"tutorial_audio_files/audio_48khz_stereo_16bits.wav\")\n",
"\n",
"# Downmix to mono\n",
"audio2 = downmix_audios_to_mono([audio2])[0]\n",
Expand Down Expand Up @@ -99,7 +112,7 @@
"metadata": {},
"outputs": [],
"source": [
"from senselab.utils.tasks.cosine_similarity import cosine_similarity\n",
"from senselab.utils.tasks.cosine_similarity import compute_cosine_similarity\n",
"\n",
"\n",
"# DIRECTLY PLOT THE EMBEDDINGS FOR THE TWO FILES\n",
Expand Down Expand Up @@ -129,7 +142,7 @@
" \n",
" for i in range(n):\n",
" for j in range(n):\n",
" similarity_matrix[i, j] = cosine_similarity(embeddings[i], embeddings[j])\n",
" similarity_matrix[i, j] = compute_cosine_similarity(embeddings[i], embeddings[j])\n",
" \n",
" fig, ax = plt.subplots(figsize=(8, 6))\n",
" im = ax.imshow(similarity_matrix, cmap='coolwarm', vmin=-1, vmax=1)\n",
Expand Down Expand Up @@ -174,7 +187,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "senselab",
"display_name": "senselab-KP8v1V64-py3.10",
"language": "python",
"name": "python3"
},
Expand All @@ -188,7 +201,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.0"
"version": "3.10.10"
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit cad601f

Please sign in to comment.