From b5d8a56f248649c1c2d0bf85828e63658e644147 Mon Sep 17 00:00:00 2001 From: Tomasz Juszczak Date: Tue, 8 Oct 2024 10:17:38 +0200 Subject: [PATCH] Added image processing capabilities --- .github/workflows/docker-publish.yml | 2 - Slack-GPT-Socket/GptApi/GptClient.cs | 121 ++++++++++++++++-- Slack-GPT-Socket/GptApi/GptClientResolver.cs | 51 +++----- Slack-GPT-Socket/GptApi/WritableMessage.cs | 28 ++++ Slack-GPT-Socket/Program.cs | 1 + .../SlackMessageEventBaseHandler.cs | 37 ++++-- 6 files changed, 182 insertions(+), 58 deletions(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 87a00e9..0c63abc 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -100,8 +100,6 @@ jobs: # https://github.com/sigstore/cosign - name: Sign the published Docker image if: ${{ github.event_name != 'pull_request' }} - env: - COSIGN_EXPERIMENTAL: "true" # This step uses the identity token to provision an ephemeral certificate # against the sigstore community Fulcio instance. run: echo "${{ steps.meta.outputs.tags }}" | xargs -I {} cosign sign {}@${{ steps.build-and-push.outputs.digest }} diff --git a/Slack-GPT-Socket/GptApi/GptClient.cs b/Slack-GPT-Socket/GptApi/GptClient.cs index 5ffd7f2..e05fe2d 100644 --- a/Slack-GPT-Socket/GptApi/GptClient.cs +++ b/Slack-GPT-Socket/GptApi/GptClient.cs @@ -1,10 +1,13 @@ using System.Diagnostics; +using System.Net.Http.Headers; using Microsoft.Extensions.Options; using Newtonsoft.Json; using OpenAI; using OpenAI.Chat; using Slack_GPT_Socket.Settings; using Slack_GPT_Socket.Utilities.LiteDB; +using SlackNet.Blocks; +using SlackNet.Events; namespace Slack_GPT_Socket.GptApi; @@ -15,8 +18,10 @@ public class GptClient { private readonly OpenAIClient _api; private readonly ILogger _log; + private readonly IOptions _settings; private readonly GptDefaults _gptDefaults; private readonly GptClientResolver _resolver; + private readonly IHttpClientFactory _httpClientFactory; /// /// Initializes a new instance of the class. @@ -25,18 +30,17 @@ public class GptClient /// The logger instance. /// The API settings. public GptClient( - GptCustomCommands customCommands, + GptCustomCommands customCommands, IUserCommandDb userCommandDb, - ILogger log, + ILogger log, IOptions gptDefaults, - IOptions settings) + IOptions settings, + IHttpClientFactory httpClientFactory) { - var httpClient = new HttpClient - { - Timeout = TimeSpan.FromMinutes(10) - }; + _httpClientFactory = httpClientFactory; _api = new OpenAIClient(settings.Value.OpenAIKey); _log = log; + _settings = settings; _gptDefaults = gptDefaults.Value; _resolver = new GptClientResolver(customCommands, _gptDefaults, userCommandDb); } @@ -44,10 +48,12 @@ public GptClient( /// /// Generates a response based on the given chat prompts. /// + /// Input slack event /// The list of chat prompts. /// The user identifier. /// A task representing the asynchronous operation, with a result of the generated response. - public async Task GeneratePrompt(List chatPrompts, string userId) + public async Task GeneratePrompt(MessageEventBase slackEvent, List chatPrompts, + string userId) { // get the last prompt var userPrompt = chatPrompts.Last(chatPrompt => chatPrompt.Role == Role.User); @@ -55,7 +61,102 @@ public async Task GeneratePrompt(List chatPrompts, prompt.UserId = userId; prompt.Prompt = userPrompt.Content; - var chatRequest = _resolver.ParseRequest(chatPrompts, prompt); + // TODO: Refactor me!!! + + var files = new List(); + foreach (var file in slackEvent.Files) + { + var fileUrl = file.UrlPrivateDownload ?? file.UrlPrivate; + if (string.IsNullOrEmpty(fileUrl)) + { + return new GptResponse + { + Error = "Requested file to process with this request, but it doesn't have a download URL" + }; + } + + var httpClient = _httpClientFactory.CreateClient(); + // configure httpClient to allow images and other files + httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue(file.Mimetype)); + httpClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", _settings.Value.SlackBotToken); + var fileRequest = await httpClient.GetAsync(fileUrl); + if (!fileRequest.IsSuccessStatusCode) + { + return new GptResponse + { + Error = "Requested file to process with this request, but it couldn't be downloaded successfully" + }; + } + var fileContent = await fileRequest.Content.ReadAsStreamAsync(); + var headers = fileRequest.Content.Headers; + + // check if headers contain the mimetype + if (!headers.TryGetValues("Content-Type", out var contentTypes)) + { + return new GptResponse + { + Error = "Requested file to process with this request, but it doesn't have a mimetype" + }; + } + var contentType = contentTypes.FirstOrDefault(); + if (contentType == null) + { + return new GptResponse + { + Error = "Requested file to process with this request, but it doesn't have a mimetype" + }; + } + // check if the mimetype is equal to the file mimetype + if (contentType != file.Mimetype) + { + return new GptResponse + { + Error = "Requested file to process with this request, but the mimetype doesn't match the file mimetype " + + $"expected {file.Mimetype} but got {contentType}" + }; + } + + using var memoryStream = new MemoryStream(); + await fileContent.CopyToAsync(memoryStream); + memoryStream.Position = 0; + + var chatPart = ChatMessageContentPart.CreateImagePart( + await BinaryData.FromStreamAsync(memoryStream), file.Mimetype); + files.Add(chatPart); + } + + // TODO: Refactor me!!! + + if (slackEvent.Blocks != null) + { + foreach (var block in slackEvent.Blocks) + { + if (block is not RichTextBlock rtb) continue; + foreach (var element in rtb.Elements) + { + if (element is not RichTextSection rts) continue; + foreach (var innerElement in rts.Elements) + { + if (innerElement is not RichTextLink rtl) continue; + + var uri = new Uri(rtl.Url); + if (uri.Scheme == "http" || uri.Scheme == "https") + { + var httpClient = _httpClientFactory.CreateClient(); + var response = await httpClient.GetAsync(uri); + if (response.IsSuccessStatusCode && + response.Content.Headers.ContentType!.MediaType!.StartsWith("image")) + { + var chatPart = ChatMessageContentPart.CreateImagePart(uri); + files.Add(chatPart); + } + } + } + } + } + } + + var chatRequest = _resolver.ParseRequest(chatPrompts, prompt, files); try { @@ -65,6 +166,8 @@ public async Task GeneratePrompt(List chatPrompts, var chatCompletion = result.Value; _log.LogInformation("GPT response: {Response}", JsonConvert.SerializeObject(chatCompletion)); + + return new GptResponse { Message = chatCompletion.Content.Last().Text, diff --git a/Slack-GPT-Socket/GptApi/GptClientResolver.cs b/Slack-GPT-Socket/GptApi/GptClientResolver.cs index 37b1f62..7cc0e33 100644 --- a/Slack-GPT-Socket/GptApi/GptClientResolver.cs +++ b/Slack-GPT-Socket/GptApi/GptClientResolver.cs @@ -1,5 +1,4 @@ using System.Text.RegularExpressions; -using OpenAI; using OpenAI.Chat; using Slack_GPT_Socket.GptApi.ParameterResolvers; using Slack_GPT_Socket.Settings; @@ -31,8 +30,10 @@ public GptClientResolver(GptCustomCommands customCommands, GptDefaults gptDefaul /// /// The list of chat prompts. /// The GPT request. + /// List of files attached to this prompt /// A ChatRequest instance. - public (IEnumerable Messages, ChatCompletionOptions Options, string Model) ParseRequest(List chatPrompts, GptRequest request) + public (IEnumerable Messages, ChatCompletionOptions Options, string Model) ParseRequest( + List chatPrompts, GptRequest request, List? files = null) { foreach (var chatPrompt in chatPrompts) { @@ -42,12 +43,11 @@ public GptClientResolver(GptCustomCommands customCommands, GptDefaults gptDefaul ResolveModel(ref content); ResolveParameters(ref content); chatPrompt.Content = content.Prompt; - } ResolveModel(ref request); ResolveParameters(ref request); - + var requestPrompts = new List(); requestPrompts.AddRange(chatPrompts); @@ -59,27 +59,13 @@ public GptClientResolver(GptCustomCommands customCommands, GptDefaults gptDefaul TopP = request.TopP, PresencePenalty = request.PresencePenalty, FrequencyPenalty = request.FrequencyPenalty, - EndUserId = request.UserId, + EndUserId = request.UserId }; + chatPrompts.Last().Files = files ?? []; + foreach (var chatPrompt in chatPrompts) { - switch (chatPrompt.Role) - { - case Role.User: - messages.Add(new UserChatMessage(chatPrompt.Content)); - break; - case Role.Assistant: - messages.Add(new AssistantChatMessage(chatPrompt.Content)); - break; - case Role.System: - messages.Add(new SystemChatMessage(chatPrompt.Content)); - break; - case Role.Tool: - messages.Add(new ToolChatMessage(chatPrompt.Content)); - break; - default: - throw new ArgumentOutOfRangeException(); - } + messages.Add(chatPrompt.ToChatMessage()); } return (messages, options, request.Model); @@ -130,10 +116,10 @@ private void ResolveModel(ref GptRequest input) private void ResolveParameters(ref GptRequest input) { var lastIndex = 0; - Match match = ParameterRegex.Match(input.Prompt); - - if(!match.Success) return; - + var match = ParameterRegex.Match(input.Prompt); + + if (!match.Success) return; + do { var paramName = match.Groups[1].Value; @@ -190,16 +176,15 @@ private static void TrimInputFromParameter(GptRequest input, ParameterEventArgs if (args.HasValue) { // Find last index of this value args.ValueRaw - var paramValueIndex = input.Prompt.IndexOf(args.ValueRaw, StringComparison.InvariantCultureIgnoreCase) + args.ValueRaw.Length + 1; + var paramValueIndex = input.Prompt.IndexOf(args.ValueRaw, StringComparison.InvariantCultureIgnoreCase) + + args.ValueRaw.Length + 1; lastIndex = paramValueIndex; input.Prompt = input.Prompt.Substring(paramValueIndex, input.Prompt.Length - paramValueIndex).Trim(); return; } - else - { - lastIndex = paramNameIndex + args.Name.Length + 2; - searchString = args.Name + " "; - input.Prompt = input.Prompt.Replace(searchString, "").Trim(); - } + + lastIndex = paramNameIndex + args.Name.Length + 2; + searchString = args.Name + " "; + input.Prompt = input.Prompt.Replace(searchString, "").Trim(); } } \ No newline at end of file diff --git a/Slack-GPT-Socket/GptApi/WritableMessage.cs b/Slack-GPT-Socket/GptApi/WritableMessage.cs index 321f072..a3ad323 100644 --- a/Slack-GPT-Socket/GptApi/WritableMessage.cs +++ b/Slack-GPT-Socket/GptApi/WritableMessage.cs @@ -1,4 +1,5 @@ using OpenAI; +using OpenAI.Chat; namespace Slack_GPT_Socket.GptApi; @@ -42,4 +43,31 @@ public WritableMessage(Role role, string userId, string content) /// Gets or sets the content of the chat prompt. /// public string Content { get; set; } + + /// + /// Gets or sets the files attached to the chat prompt. + /// + public List Files { get; set; } + + public ChatMessage ToChatMessage() + { + var textContent = ChatMessageContentPart.CreateTextPart(Content); + var fileContent = Files ?? []; + var content = new List {textContent}; + content.AddRange(fileContent); + + switch (Role) + { + case Role.User: + return new UserChatMessage(content); + case Role.Assistant: + return new AssistantChatMessage(content); + case Role.System: + return new SystemChatMessage(content); + case Role.Tool: + return new ToolChatMessage(Content); + default: + throw new ArgumentOutOfRangeException(); + } + } } \ No newline at end of file diff --git a/Slack-GPT-Socket/Program.cs b/Slack-GPT-Socket/Program.cs index 6c8b792..3961ad9 100644 --- a/Slack-GPT-Socket/Program.cs +++ b/Slack-GPT-Socket/Program.cs @@ -9,6 +9,7 @@ var builder = WebApplication.CreateBuilder(args); var settings = builder.Configuration.GetSection("Api").Get()!; +builder.Services.AddHttpClient(); builder.Services.AddOptions().Bind(builder.Configuration.GetSection("Api")); builder.Services.Configure(builder.Configuration.GetSection("GptCommands")); builder.Services.Configure(builder.Configuration.GetSection("GptDefaults")); diff --git a/Slack-GPT-Socket/SlackHandlers/SlackMessageEventBaseHandler.cs b/Slack-GPT-Socket/SlackHandlers/SlackMessageEventBaseHandler.cs index b98cadd..75346cb 100644 --- a/Slack-GPT-Socket/SlackHandlers/SlackMessageEventBaseHandler.cs +++ b/Slack-GPT-Socket/SlackHandlers/SlackMessageEventBaseHandler.cs @@ -307,30 +307,39 @@ public async Task PostGptAvailableWarningMessage(MessageEventBase slackEvent) /// Input slack event /// The chat context to be used in generating the prompt. /// The user ID to be used in generating the prompt. + /// Files attached with the prompt /// A GPTResponse instance containing the generated prompt. private async Task GeneratePrompt(MessageEventBase slackEvent, List context, string userId) { - // Start the periodic SendMessageProcessing task var cts = new CancellationTokenSource(); - var periodicTask = PeriodicSendMessageProcessing(slackEvent, cts.Token); - - var result = await GeneratePromptRetry(slackEvent, context, userId); - - // Cancel the periodic task once the long running method returns - cts.Cancel(); - - // Ensure the periodic task has completed before proceeding try { - await periodicTask; + // Start the periodic SendMessageProcessing task + var periodicTask = PeriodicSendMessageProcessing(slackEvent, cts.Token); + + var result = await GeneratePromptRetry(slackEvent, context, userId); + + await cts.CancelAsync(); + + // Ensure the periodic task has completed before proceeding + try + { + await periodicTask; + } + catch (TaskCanceledException) + { + // Ignore CTS CancelledException + } + + return result; } - catch (TaskCanceledException) + finally { - // Ignore CTS CancelledException + if(!cts.Token.IsCancellationRequested) + await cts.CancelAsync(); } - return result; } /// @@ -346,7 +355,7 @@ private async Task GeneratePromptRetry(MessageEventBase slackEvent, var errorsCount = 0; while (true) { - var result = await _gptClient.GeneratePrompt(context, userId); + var result = await _gptClient.GeneratePrompt(slackEvent, context, userId); var repeatOnErrorsArray = new[] {