Skip to content

Commit

Permalink
transcribe stream
Browse files Browse the repository at this point in the history
  • Loading branch information
lluisd committed Nov 13, 2024
1 parent 080d2ff commit b11743e
Show file tree
Hide file tree
Showing 10 changed files with 149 additions and 6 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/main_twitch-mz-bot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ jobs:
envkey_TWITCH_HOSTNAME: ${{ secrets.TWITCH_HOSTNAME }}
envkey_WHISPER_ENDPOINT: ${{ secrets.WHISPER_ENDPOINT }}
envkey_AZURE_OPENAI_ASSISTANT_ID: ${{ secrets.AZURE_OPENAI_ASSISTANT_ID }}
envkey_AZURE_STORAGE_CONNECTION_STRING: ${{ secrets.AZURE_STORAGE_CONNECTION_STRING }}
envkey_AZURE_STORAGE_CONTAINER_NAME: transcriptions
- name: Set up Node.js version
uses: actions/setup-node@v4
with:
Expand Down
10 changes: 10 additions & 0 deletions app.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ const TwitchService = require("./services/twitch");
const ScreenshotService = require("./services/screenshot");
const moment = require('moment-timezone')
const EventSub = require('./lib/eventSub')
const handlers = require('./handlers')

mongoose.connect(config.database).then(() => {
const messenger = new Messenger()
Expand All @@ -30,6 +31,15 @@ mongoose.connect(config.database).then(() => {
next();
});

app.get('/transcribe', async function(req, res) {
await handlers.openAI.uploadStreamToOpenai(`#${config.twitch.channels}`, bot)
const response = {
message: 'transcription started',
status: 'success'
};
res.json(response);
});

app.get('/p/:id', (req, res) => {
const spotNumber = parseInt(req.params.id)
if (typeof spotNumber === 'number') {
Expand Down
4 changes: 4 additions & 0 deletions config/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,9 @@ module.exports = {
},
whisper: {
endpoint: process.env.WHISPER_ENDPOINT,
},
blobStorage: {
connectionString: process.env.AZURE_STORAGE_CONNECTION_STRING,
containerName: process.env.AZURE_STORAGE_CONTAINER_NAME
}
}
26 changes: 25 additions & 1 deletion handlers/openAI.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
const OpenAIService = require('../services/openAI')
const LoggerService = require('../services/logger')
const TranscriptionsService = require('../services/transcriptions')
const moment = require("moment/moment");

class OpenAI {
async askOpenAI (target, text, username, bot) {
const response = await OpenAIService.askAssistant(text)
Expand All @@ -19,14 +21,36 @@ class OpenAI {
const json = JSON.stringify(response)

const formattedDate = date.format('YYYY-MM-DD');
const result = await OpenAIService.uploadFileToVectorStore(json, formattedDate)
const result = await OpenAIService.uploadFileToVectorStore(json, formattedDate, 'chat')
if (result.success) {
bot.say(target, `IA actualizada con el chat de ${formattedDate}`)
console.log('Chat uploaded to openai ' + result.filename)
} else {
console.log('Error uploading chat to openai of date ' + formattedDate)
}
}

async uploadStreamToOpenai (target, bot) {
const { mergedJsons, blobNames } = await TranscriptionsService.getBlobs()
let error = false
for (const date in mergedJsons) {
if (mergedJsons.hasOwnProperty(date)) {
const formattedDate = moment(date, 'YYYYMMDD').format('YYYY-MM-DD');
const result = await OpenAIService.uploadFileToVectorStore(mergedJsons[date], formattedDate, 'stream')
if (result.success) {
bot.say(target, `IA actualizada con el stream de ${formattedDate}`)
console.log('Stream uploaded to openai ' + result.filename)
} else {
error = true
console.log('Error uploading stream to openai of date ' + formattedDate)
}
}
}

if (!error) {
await TranscriptionsService.deleteBlobs(blobNames)
}
}
}

module.exports = OpenAI
6 changes: 5 additions & 1 deletion lib/inputParser.js
Original file line number Diff line number Diff line change
Expand Up @@ -135,9 +135,13 @@ class InputParser {
return text.toLowerCase().startsWith('!chat')
}

isAskingToUpdateChatToOpenAI (text) {
isAskingToUploadChatToOpenAI (text) {
return text.toLowerCase().startsWith('!updatechat')
}

isAskingToUploadStreamToOpenAI (text) {
return text.toLowerCase().startsWith('!updatestream')
}
}

module.exports = InputParser
5 changes: 4 additions & 1 deletion lib/messenger.js
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,11 @@ class Messenger {
return handlers.openAI.askOpenAI(target, textWithoutMention, context['display-name'], this.bot)
}

if (textSplit.length > 0 && inputParser.isAskingToUpdateChatToOpenAI(textSplit[0]) && isAdmin(context))
if (textSplit.length > 0 && inputParser.isAskingToUploadChatToOpenAI(textSplit[0]) && isAdmin(context))
return await handlers.openAI.createAndUploadToChat(target, this.bot, true)

if (textSplit.length > 0 && inputParser.isAskingToUploadStreamToOpenAI(textSplit[0]) && isAdmin(context))
return await handlers.openAI.uploadStreamToOpenai(target, this.bot, true)
}

handleConnect (addr, port) {
Expand Down
2 changes: 1 addition & 1 deletion lib/notifier.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class Notifier {
})

cron.schedule('15 4 * * *', async () => {
await handlers.openAI.createAndUploadToChat(this.twitchBot, this.target)
await handlers.openAI.createAndUploadToChat(this.target, this.twitchBot)
})

return Promise.resolve()
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"author": "Lluis <lluisdanes@gmail.com>",
"license": "MIT",
"dependencies": {
"@azure/storage-blob": "^12.25.0",
"@twurple/api": "^7.2.0",
"@twurple/auth": "^7.2.0",
"@twurple/auth-tmi": "^7.2.0",
Expand Down
6 changes: 4 additions & 2 deletions services/openAI.js
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@ async function askOpenAI(message) {
let assistantThread = null


async function uploadFileToVectorStore(json, formattedDate) {
async function uploadFileToVectorStore(json, formattedDate, origin) {
try {
const filename = `chat_${formattedDate}.json`
const filename = `${origin}_${formattedDate}.json`
const buffer = Buffer.from(json, 'utf-8');
const newFile = new File([buffer], filename, {
type: 'application/json',
Expand All @@ -77,6 +77,8 @@ async function uploadFileToVectorStore(json, formattedDate) {
}
}



async function askAssistant(message) {
let result
try {
Expand Down
93 changes: 93 additions & 0 deletions services/transcriptions.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
const { BlobServiceClient } = require('@azure/storage-blob');
const moment = require('moment');
const config = require('../config')
const {Buffer} = require("buffer");

async function deleteBlobs (blobNames) {
const blobServiceClient = BlobServiceClient.fromConnectionString(config.blobStorage.connectionString);
const containerClient = blobServiceClient.getContainerClient(config.blobStorage.containerName);

const blobs = containerClient.listBlobsFlat();
for await (const blob of blobs) {
if (blobNames.includes(blob.name)) {
const blockBlobClient = containerClient.getBlockBlobClient(blob.name);
await blockBlobClient.delete();
}
}
}

async function getBlobs() {
const blobServiceClient = BlobServiceClient.fromConnectionString(config.blobStorage.connectionString);
const containerClient = blobServiceClient.getContainerClient(config.blobStorage.containerName);

const pattern = /whisper-live(\d{8})-(\d{6})\.text/;

let jsons = {}
let blobNames = []
const blobs = containerClient.listBlobsFlat();
for await (const blob of blobs) {
const blobName = blob.name;
blobNames.push(blobName);
const match = blobName.match(pattern);
const date = match[1]; // '20241109'
const time = match[2]; // '143907'

const blockBlobClient = containerClient.getBlockBlobClient(blobName);

const datetimeStr = blobName.split('whisper-live')[1].split('.')[0];
const dateTime = moment(datetimeStr, 'YYYYMMDD-HHmmss').toISOString();

const downloadBlockBlobResponse = await blockBlobClient.download(0);
const downloaded = (
await streamToBuffer(downloadBlockBlobResponse.readableStreamBody)
).toString();

const lines = downloaded.toString().split('\r\n');

const modifiedLines = lines.map(line => ({
nick: 'manzana_oscura',
text: line,
date: dateTime
}));

//const jsonData = JSON.stringify(modifiedLines);

jsons[date] = jsons[date] || {};
jsons[date][time] = modifiedLines;
}

let mergedJsons = {};

for (const date in jsons) {
if (jsons.hasOwnProperty(date)) {
const sortedTimes = Object.keys(jsons[date]).sort((a, b) => Number(a) - Number(b))
let mergedObject = [];
for (const time of sortedTimes) {
mergedObject= mergedObject.concat(jsons[date][time])
}
mergedJsons[date] = JSON.stringify(mergedObject)
}
}

return { mergedJsons, blobNames }
}

// [Node.js only] A helper method used to read a Node.js readable stream into a Buffer
async function streamToBuffer(readableStream) {
return new Promise((resolve, reject) => {
const chunks = [];
readableStream.on("data", (data) => {
chunks.push(data instanceof Buffer ? data : Buffer.from(data));
});
readableStream.on("end", () => {
resolve(Buffer.concat(chunks));
});
readableStream.on("error", reject);
});
}


module.exports = {
getBlobs,
deleteBlobs
}

0 comments on commit b11743e

Please sign in to comment.