From 20b6898b444c007f15b5b77c10a2cf589532db31 Mon Sep 17 00:00:00 2001
From: Sam Stoelinga <sammiestoel@gmail.com>
Date: Tue, 3 Sep 2024 21:58:35 -0700
Subject: [PATCH] add speech to text model

---
 charts/kubeai/Chart.yaml                |  2 +-
 charts/kubeai/charts/models/Chart.yaml  |  2 +-
 charts/kubeai/charts/models/values.yaml |  7 +++++++
 test/quickstart.sh                      | 13 +++++++++++++
 4 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/charts/kubeai/Chart.yaml b/charts/kubeai/Chart.yaml
index a1f007ad..3bc8a84d 100644
--- a/charts/kubeai/Chart.yaml
+++ b/charts/kubeai/Chart.yaml
@@ -31,7 +31,7 @@ dependencies:
   # These are instances of the Model custom resource.
   - name: models
     condition: models.enabled
-    version: 0.1.0
+    version: 0.1.1
 
 
 keywords: ["LLM", "AI"]
diff --git a/charts/kubeai/charts/models/Chart.yaml b/charts/kubeai/charts/models/Chart.yaml
index e2136ae7..609c4ad0 100644
--- a/charts/kubeai/charts/models/Chart.yaml
+++ b/charts/kubeai/charts/models/Chart.yaml
@@ -15,7 +15,7 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 0.1.0
+version: 0.1.1
 
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
diff --git a/charts/kubeai/charts/models/values.yaml b/charts/kubeai/charts/models/values.yaml
index 4608daa1..1a87d1cd 100644
--- a/charts/kubeai/charts/models/values.yaml
+++ b/charts/kubeai/charts/models/values.yaml
@@ -76,4 +76,11 @@ catalog:
     url: "ollama://qwen2:0.5b"
     engine: OLlama
     resourceProfile: cpu:1
+  faster-whisper-medium-en-cpu:
+    enabled: false
+    features: ["SpeechToText"]
+    owner: Systran
+    url: "hf://Systran/faster-whisper-medium.en"
+    engine: FasterWhisper
+    resourceProfile: cpu:1
 
diff --git a/test/quickstart.sh b/test/quickstart.sh
index efcff641..2693f194 100755
--- a/test/quickstart.sh
+++ b/test/quickstart.sh
@@ -63,6 +63,8 @@ models:
       enabled: true
     nomic-embed-text-cpu:
       enabled: true
+    faster-whisper-medium-en-cpu:
+      enabled: true
 EOF
 
 wait_for_pod_ready model=gemma2-2b-cpu
@@ -73,3 +75,14 @@ curl http://localhost:8000/openai/v1/completions \
   -H "Content-Type: application/json" \
   -d '{"model": "gemma2-2b-cpu", "prompt": "Who was the first president of the United States?", "max_tokens": 40}'
 
+# Test the speech to text endpoint
+curl -L -o kubeai.mp4 https://github.com/user-attachments/assets/711d1279-6af9-4c6c-a052-e59e7730b757
+result=$(curl http://localhost:8000/openai/v1/audio/transcriptions \
+  -F "file=@kubeai.mp4" \
+  -F "language=en" | jq '.text | ascii_downcase | contains("kubernetes")')
+if [ "$result" = "true" ]; then
+  echo "The transcript contains 'kubernetes'."
+else
+  echo "The text does not contain 'kubernetes'."
+  exit 1
+fi