diff --git a/Package.swift b/Package.swift index 5c0eb4d..39c5427 100644 --- a/Package.swift +++ b/Package.swift @@ -24,14 +24,15 @@ let package = Package( .library(name: "SpeziLLMOpenAI", targets: ["SpeziLLMOpenAI"]) ], dependencies: [ - .package(url: "https://github.com/MacPaw/OpenAI", .upToNextMinor(from: "0.2.5")), + .package(url: "https://github.com/MacPaw/OpenAI", .upToNextMinor(from: "0.2.6")), .package(url: "https://github.com/StanfordBDHG/llama.cpp", .upToNextMinor(from: "0.1.8")), .package(url: "https://github.com/StanfordSpezi/Spezi", from: "1.1.0"), .package(url: "https://github.com/StanfordSpezi/SpeziStorage", from: "1.0.0"), .package(url: "https://github.com/StanfordSpezi/SpeziOnboarding", from: "1.0.0"), .package(url: "https://github.com/StanfordSpezi/SpeziSpeech", from: "1.0.0"), - .package(url: "https://github.com/StanfordSpezi/SpeziChat", .upToNextMinor(from: "0.1.4")), - .package(url: "https://github.com/StanfordSpezi/SpeziViews", from: "1.0.0") + .package(url: "https://github.com/StanfordSpezi/SpeziChat", .upToNextMinor(from: "0.1.8")), + .package(url: "https://github.com/StanfordSpezi/SpeziViews", from: "1.0.0"), + .package(url: "https://github.com/groue/Semaphore.git", exact: "0.0.8") ], targets: [ .target( @@ -47,6 +48,7 @@ let package = Package( dependencies: [ .target(name: "SpeziLLM"), .product(name: "llama", package: "llama.cpp"), + .product(name: "Semaphore", package: "Semaphore"), .product(name: "Spezi", package: "Spezi") ], swiftSettings: [ @@ -65,6 +67,7 @@ let package = Package( dependencies: [ .target(name: "SpeziLLM"), .product(name: "OpenAI", package: "OpenAI"), + .product(name: "Semaphore", package: "Semaphore"), .product(name: "Spezi", package: "Spezi"), .product(name: "SpeziChat", package: "SpeziChat"), .product(name: "SpeziSecureStorage", package: "SpeziStorage"), diff --git a/README.md b/README.md index e018eae..f902f59 100644 --- a/README.md +++ b/README.md @@ -91,14 +91,14 @@ The target enables developers to easily execute medium-size Language Models (LLM #### Setup You can configure the Spezi Local LLM execution within the typical `SpeziAppDelegate`. -In the example below, the `LLMRunner` from the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) target which is responsible for providing LLM functionality within the Spezi ecosystem is configured with the `LLMLocalRunnerSetupTask` from the [SpeziLLMLocal](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillmlocal) target. This prepares the `LLMRunner` to locally execute Language Models. +In the example below, the `LLMRunner` from the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) target which is responsible for providing LLM functionality within the Spezi ecosystem is configured with the `LLMLocalPlatform` from the [SpeziLLMLocal](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillmlocal) target. This prepares the `LLMRunner` to locally execute Language Models. -``` +```swift class TestAppDelegate: SpeziAppDelegate { override var configuration: Configuration { Configuration { LLMRunner { - LLMLocalRunnerSetupTask() + LLMLocalPlatform() } } } @@ -107,27 +107,30 @@ class TestAppDelegate: SpeziAppDelegate { #### Usage -The code example below showcases the interaction with the `LLMLocal` through the the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) [`LLMRunner`](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llmrunner), which is injected into the SwiftUI `Environment` via the `Configuration` shown above.. -Based on a `String` prompt, the `LLMGenerationTask/generate(prompt:)` method returns an `AsyncThrowingStream` which yields the inferred characters until the generation has completed. +The code example below showcases the interaction with local LLMs through the the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) [`LLMRunner`](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llmrunner), which is injected into the SwiftUI `Environment` via the `Configuration` shown above. + +The `LLMLocalSchema` defines the type and configurations of the to-be-executed `LLMLocalSession`. This transformation is done via the [`LLMRunner`](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llmrunner) that uses the `LLMLocalPlatform`. The inference via `LLMLocalSession/generate()` returns an `AsyncThrowingStream` that yields all generated `String` pieces. ```swift -struct LocalLLMChatView: View { - @Environment(LLMRunner.self) var runner: LLMRunner - - // The locally executed LLM - @State var model: LLMLocal = .init( - modelPath: ... - ) - @State var responseText: String - - func executePrompt(prompt: String) { - // Execute the query on the runner, returning a stream of outputs - let stream = try await runner(with: model).generate(prompt: "Hello LLM!") - - for try await token in stream { - responseText.append(token) - } - } +struct LLMLocalDemoView: View { + @Environment(LLMRunner.self) var runner + @State var responseText = "" + + var body: some View { + Text(responseText) + .task { + // Instantiate the `LLMLocalSchema` to an `LLMLocalSession` via the `LLMRunner`. + let llmSession: LLMLocalSession = runner( + with: LLMLocalSchema( + modelPath: URL(string: "URL to the local model file")! + ) + ) + + for try await token in try await llmSession.generate() { + responseText.append(token) + } + } + } } ``` @@ -142,7 +145,7 @@ In addition, `SpeziLLMOpenAI` provides developers with a declarative Domain Spec #### Setup -In order to use `LLMOpenAI`, the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) [`LLMRunner`](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llmrunner) needs to be initialized in the Spezi `Configuration`. Only after, the `LLMRunner` can be used to execute the ``LLMOpenAI``. +In order to use OpenAI LLMs within the Spezi ecosystem, the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) [`LLMRunner`](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llmrunner) needs to be initialized in the Spezi `Configuration` with the `LLMOpenAIPlatform`. Only after, the `LLMRunner` can be used for inference of OpenAI LLMs. See the [SpeziLLM documentation](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) for more details. ```swift @@ -150,7 +153,7 @@ class LLMOpenAIAppDelegate: SpeziAppDelegate { override var configuration: Configuration { Configuration { LLMRunner { - LLMOpenAIRunnerSetupTask() + LLMOpenAIPlatform() } } } @@ -159,29 +162,33 @@ class LLMOpenAIAppDelegate: SpeziAppDelegate { #### Usage -The code example below showcases the interaction with the `LLMOpenAI` through the the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) [`LLMRunner`](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llmrunner), which is injected into the SwiftUI `Environment` via the `Configuration` shown above. -Based on a `String` prompt, the `LLMGenerationTask/generate(prompt:)` method returns an `AsyncThrowingStream` which yields the inferred characters until the generation has completed. +The code example below showcases the interaction with an OpenAI LLM through the the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) [`LLMRunner`](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llmrunner), which is injected into the SwiftUI `Environment` via the `Configuration` shown above. + +The `LLMOpenAISchema` defines the type and configurations of the to-be-executed `LLMOpenAISession`. This transformation is done via the [`LLMRunner`](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llmrunner) that uses the `LLMOpenAIPlatform`. The inference via `LLMOpenAISession/generate()` returns an `AsyncThrowingStream` that yields all generated `String` pieces. ```swift -struct LLMOpenAIChatView: View { - @Environment(LLMRunner.self) var runner: LLMRunner - - @State var model: LLMOpenAI = .init( - parameters: .init( - modelType: .gpt3_5Turbo, - systemPrompt: "You're a helpful assistant that answers questions from users.", - overwritingToken: "abc123" - ) - ) - @State var responseText: String - - func executePrompt(prompt: String) { - // Execute the query on the runner, returning a stream of outputs - let stream = try await runner(with: model).generate(prompt: "Hello LLM!") - - for try await token in stream { - responseText.append(token) - } +struct LLMOpenAIDemoView: View { + @Environment(LLMRunner.self) var runner + @State var responseText = "" + + var body: some View { + Text(responseText) + .task { + // Instantiate the `LLMOpenAISchema` to an `LLMOpenAISession` via the `LLMRunner`. + let llmSession: LLMOpenAISession = runner( + with: LLMOpenAISchema( + parameters: .init( + modelType: .gpt3_5Turbo, + systemPrompt: "You're a helpful assistant that answers questions from users.", + overwritingToken: "abc123" + ) + ) + ) + + for try await token in try await llmSession.generate() { + responseText.append(token) + } + } } } ``` diff --git a/Sources/SpeziLLM/Helpers/Chat+Append.swift b/Sources/SpeziLLM/Helpers/Chat+Append.swift index b9d7753..62d5d01 100644 --- a/Sources/SpeziLLM/Helpers/Chat+Append.swift +++ b/Sources/SpeziLLM/Helpers/Chat+Append.swift @@ -16,24 +16,30 @@ extension Chat { /// If the `overwrite` parameter is `true`, the existing message is overwritten. /// /// - Parameters: - /// - output: The `ChatEntity/Role/assistant` output `String` (part) that should be appended. - /// - overwrite: Indicates if the already present content of the assistant message should be overwritten. + /// - output: The `ChatEntity/Role/assistant` output `String` (part) that should be appended. Can contain Markdown-formatted text. + /// - complete: Indicates if the `ChatEntity` is complete after appending to it one last time via the ``append(assistantOutput:complete:overwrite:)`` function. + /// - overwrite: Indicates if the already present content of the assistant message should be overwritten. @MainActor - public mutating func append(assistantOutput output: String, overwrite: Bool = false) { - if self.last?.role == .assistant { - self[self.count - 1] = .init( - role: .assistant, - content: overwrite ? output : ((self.last?.content ?? "") + output) - ) - } else { - self.append(.init(role: .assistant, content: output)) + public mutating func append(assistantOutput output: String, complete: Bool = false, overwrite: Bool = false) { + guard let lastChatEntity = self.last, + lastChatEntity.role == .assistant else { + self.append(.init(role: .assistant, content: output, complete: complete)) + return } + + self[self.count - 1] = .init( + role: .assistant, + content: overwrite ? output : (lastChatEntity.content + output), + complete: complete, + id: lastChatEntity.id, + date: lastChatEntity.date + ) } /// Append an `ChatEntity/Role/user` input to the `Chat`. /// /// - Parameters: - /// - input: The `ChatEntity/Role/user` input that should be appended. + /// - input: The `ChatEntity/Role/user` input that should be appended. Can contain Markdown-formatted text. @MainActor public mutating func append(userInput input: String) { self.append(.init(role: .user, content: input)) @@ -42,8 +48,8 @@ extension Chat { /// Append an `ChatEntity/Role/system` prompt to the `Chat`. /// /// - Parameters: - /// - systemPrompt: The `ChatEntity/Role/system` prompt of the `Chat`, inserted at the very beginning. - /// - insertAtStart: Defines if the system prompt should be inserted at the start of the conversational context, defaults to `true`. + /// - systemPrompt: The `ChatEntity/Role/system` prompt of the `Chat`, inserted at the very beginning. Can contain Markdown-formatted text. + /// - insertAtStart: Defines if the system prompt should be inserted at the start of the conversational context, defaults to `true`. @MainActor public mutating func append(systemMessage systemPrompt: String, insertAtStart: Bool = true) { if insertAtStart { @@ -62,10 +68,28 @@ extension Chat { /// Append a `ChatEntity/Role/function` response from a function call to the `Chat. /// /// - Parameters: - /// - functionName: The name of the `ChatEntity/Role/function` that is called by the LLM. - /// - functionResponse: The response `String` of the `ChatEntity/Role/function` that is called by the LLM. + /// - functionName: The name of the `ChatEntity/Role/function` that is called by the LLM. + /// - functionResponse: The response `String` of the `ChatEntity/Role/function` that is called by the LLM. @MainActor public mutating func append(forFunction functionName: String, response functionResponse: String) { self.append(.init(role: .function(name: functionName), content: functionResponse)) } + + + /// Marks the latest chat entry as `ChatEntity/completed`, if the role of the chat is `ChatEntity/Role/assistant`. + @MainActor + public mutating func completeAssistantStreaming() { + guard let lastChatEntity = self.last, + lastChatEntity.role == .assistant else { + return + } + + self[self.count - 1] = .init( + role: .assistant, + content: lastChatEntity.content, + complete: true, + id: lastChatEntity.id, + date: lastChatEntity.date + ) + } } diff --git a/Sources/SpeziLLM/Helpers/Chat+Init.swift b/Sources/SpeziLLM/Helpers/Chat+Init.swift new file mode 100644 index 0000000..ab6e60e --- /dev/null +++ b/Sources/SpeziLLM/Helpers/Chat+Init.swift @@ -0,0 +1,29 @@ +// +// This source file is part of the Stanford Spezi open-source project +// +// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) +// +// SPDX-License-Identifier: MIT +// + +import SpeziChat + + +extension Chat { + /// Creates a new `Chat` array with an arbitrary number of system messages. + /// + /// - Parameters: + /// - systemMessages: `String`s that should be used as system messages. + public init(systemMessages: [String]) { + self = systemMessages.map { systemMessage in + .init(role: .system, content: systemMessage) + } + } + + + /// Resets the `Chat` array, deleting all persisted content. + @MainActor + public mutating func reset() { + self = [] + } +} diff --git a/Sources/SpeziLLM/LLM.swift b/Sources/SpeziLLM/LLM.swift deleted file mode 100644 index 2db0314..0000000 --- a/Sources/SpeziLLM/LLM.swift +++ /dev/null @@ -1,78 +0,0 @@ -// -// This source file is part of the Stanford Spezi open source project -// -// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) -// -// SPDX-License-Identifier: MIT -// - -import Foundation -import SpeziChat - - -/// Abstraction layer for the usage of Large Language Models within the Spezi ecosystem -/// -/// The ``LLM`` protocol is an abstraction layer of LLMs, regardless of the execution locality (local or remote) or the specific model type. -/// Developers can use the ``LLM`` protocol to conform their LLM interface implementations to a standard which is consistent throughout the Spezi ecosystem. -/// -/// The ``LLM`` contains the ``LLM/context`` property which holds the entire history of the model interactions. -/// This includes the system prompt, user input, but also assistant responses. -/// Ensure the property always contains all necessary information, as the ``LLM/generate(continuation:)`` function executes the inference based on the ``LLM/context``. -/// -/// - Important: An ``LLM`` shouldn't be executed on it's own but always used together with the ``LLMRunner``. -/// Please refer to the ``LLMRunner`` documentation for a complete code example. -/// -/// ### Usage -/// -/// An example conformance of the ``LLM`` looks like the code sample below (lots of details were omitted for simplicity). -/// The key point is the need to implement the ``LLM/setup(runnerConfig:)`` as well as the ``LLM/generate(continuation:)`` functions, whereas the ``LLM/setup(runnerConfig:)`` has an empty default implementation as not every ``LLMHostingType`` requires the need for a setup closure. -/// -/// ```swift -/// @Observable -/// public class LLMTest: LLM { -/// public let type: LLMHostingType = .local -/// @MainActor public var state: LLMState = .uninitialized -/// @MainActor public var context: Chat = [] -/// -/// public func setup(/* */) async throws {} -/// public func generate(/* */) async {} -/// } -/// ``` -public protocol LLM: AnyObject { - /// The type of the ``LLM`` as represented by the ``LLMHostingType``. - var type: LLMHostingType { get } - /// The state of the ``LLM`` indicated by the ``LLMState``. - @MainActor var state: LLMState { get set } - /// The current context state of the ``LLM``, includes the entire prompt history including system prompts, user input, and model responses. - @MainActor var context: Chat { get set } - - - /// Performs any setup-related actions for the ``LLM``. - /// After this function completes, the state of the ``LLM`` should be ``LLMState/ready``. - /// - /// - Parameters: - /// - runnerConfig: The runner configuration as a ``LLMRunnerConfiguration``. - func setup(runnerConfig: LLMRunnerConfiguration) async throws - - /// Performs the actual text generation functionality of the ``LLM`` based on the ``LLM/context``. - /// The result of the text generation is streamed via a Swift `AsyncThrowingStream` that is passed as a parameter. - /// - /// - Parameters: - /// - continuation: A Swift `AsyncThrowingStream` enabling the streaming of the text generation. - func generate(continuation: AsyncThrowingStream.Continuation) async -} - - -extension LLM { - /// Finishes the continuation with an error and sets the ``LLM/state`` to the respective error (on the main actor). - /// - /// - Parameters: - /// - error: The error that occurred. - /// - continuation: The `AsyncThrowingStream` that streams the generated output. - public func finishGenerationWithError(_ error: E, on continuation: AsyncThrowingStream.Continuation) async { - continuation.finish(throwing: error) - await MainActor.run { - self.state = .error(error: error) - } - } -} diff --git a/Sources/SpeziLLM/LLMError.swift b/Sources/SpeziLLM/LLMError.swift index 71634db..498ea86 100644 --- a/Sources/SpeziLLM/LLMError.swift +++ b/Sources/SpeziLLM/LLMError.swift @@ -9,34 +9,61 @@ import Foundation -/// Defines errors that may occur during setting up the runner environment for ``LLM`` generation jobs. -public enum LLMRunnerError: LLMError { - /// Indicates an error occurred during setup of the LLM generation. - case setupError +/// Defines universally occurring `Error`s while handling LLMs with SpeziLLM. +public enum LLMDefaultError: LLMError { + /// Indicates an unknown error during LLM execution. + case unknown(Error) public var errorDescription: String? { switch self { - case .setupError: - String(localized: LocalizedStringResource("LLM_SETUP_ERROR_DESCRIPTION", bundle: .atURL(from: .module))) + case .unknown: + String(localized: LocalizedStringResource("LLM_UNKNOWN_ERROR_DESCRIPTION", bundle: .atURL(from: .module))) } } public var recoverySuggestion: String? { switch self { - case .setupError: - String(localized: LocalizedStringResource("LLM_SETUP_ERROR_RECOVERY_SUGGESTION", bundle: .atURL(from: .module))) + case .unknown: + String(localized: LocalizedStringResource("LLM_UNKNOWN_ERROR_RECOVERY_SUGGESTION", bundle: .atURL(from: .module))) } } public var failureReason: String? { switch self { - case .setupError: - String(localized: LocalizedStringResource("LLM_SETUP_ERROR_FAILURE_REASON", bundle: .atURL(from: .module))) + case .unknown: + String(localized: LocalizedStringResource("LLM_UNKNOWN_ERROR_FAILURE_REASON", bundle: .atURL(from: .module))) + } + } + + + public static func == (lhs: LLMDefaultError, rhs: LLMDefaultError) -> Bool { + switch (lhs, rhs) { + case (.unknown, .unknown): true } } } -/// The ``LLMError`` defines a common error protocol which should be used for defining errors within the SpeziLLM ecosystem. +/// Defines a common `Error` protocol which should be used for defining errors within the SpeziLLM ecosystem. +/// +/// An example conformance to the ``LLMError`` can be found in the `SpeziLLMLocal` target. +/// +/// ```swift +/// public enum LLMLocalError: LLMError { +/// case modelNotFound +/// +/// public var errorDescription: String? { "Some example error description" } +/// public var recoverySuggestion: String? { "Some example recovery suggestion" } +/// public var failureReason: String? { "Some example failure reason" } +/// } +/// ``` public protocol LLMError: LocalizedError, Equatable {} + + +/// Ensure the conformance of the Swift `CancellationError` to ``LLMError``. +extension CancellationError: LLMError { + public static func == (lhs: CancellationError, rhs: CancellationError) -> Bool { + true + } +} diff --git a/Sources/SpeziLLM/LLMHostingType.swift b/Sources/SpeziLLM/LLMHostingType.swift deleted file mode 100644 index 092ed0a..0000000 --- a/Sources/SpeziLLM/LLMHostingType.swift +++ /dev/null @@ -1,19 +0,0 @@ -// -// This source file is part of the Stanford Spezi open source project -// -// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) -// -// SPDX-License-Identifier: MIT -// - -/// Indicates the hosting platform that a Spezi ``LLM`` should run on. -public enum LLMHostingType: String, CaseIterable { - /// Local, on-device execution of the ``LLM``. - case local - /// Execution of the ``LLM`` in the fog layer. - case fog - /// Remote, cloud-based execution of the ``LLM``. - case cloud - /// Mock execution - case mock -} diff --git a/Sources/SpeziLLM/LLMPlatform.swift b/Sources/SpeziLLM/LLMPlatform.swift new file mode 100644 index 0000000..7216a3c --- /dev/null +++ b/Sources/SpeziLLM/LLMPlatform.swift @@ -0,0 +1,69 @@ +// +// This source file is part of the Stanford Spezi open source project +// +// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) +// +// SPDX-License-Identifier: MIT +// + +import Foundation +import Spezi + + +/// LLM execution platform of an ``LLMSchema``. +/// +/// The ``LLMPlatform`` is responsible for turning the received ``LLMSchema`` (describing the type and configuration of the LLM) to an executable ``LLMSession``. +/// The ``LLMPlatform`` is bound to a single ``LLMSchema`` as well as a single ``LLMSession``, so a 1:1 relation of all these components. +/// +/// Use ``LLMPlatform/callAsFunction(with:)`` with an ``LLMSchema`` parameter to get an executable ``LLMSession`` that does the actual inference. +/// ``LLMPlatform/state`` indicates if the ``LLMPlatform`` is currently ``LLMPlatformState/idle`` or ``LLMPlatformState/processing``. +/// +/// - Important: ``LLMPlatform``s shouldn't be used directly but used via the ``LLMRunner`` that delegates the requests towards the specific ``LLMPlatform``. +/// The ``LLMRunner`` must be configured with all to-be-supported ``LLMPlatform``s within the Spezi `Configuration`. +/// +/// - Tip: The ``LLMPlatform`` is a Spezi `Module`, enabling to use the full power of the Spezi `Dependency` and `Module` mechanisms. +/// +/// ### Usage +/// +/// The example below demonstrates a concrete implementation of the ``LLMPlatform`` with the ``LLMMockSchema`` and ``LLMMockSession``. +/// +/// ```swift +/// public actor LLMMockPlatform: LLMPlatform { +/// @MainActor public let state: LLMPlatformState = .idle +/// +/// public init() {} +/// +/// public func callAsFunction(with: LLMMockSchema) async -> LLMMockSession { +/// LLMMockSession(self, schema: with) +/// } +/// } +/// ``` +public protocol LLMPlatform: Module, EnvironmentAccessible { + /// The ``LLMSchema`` that is bound to the ``LLMPlatform``. + associatedtype Schema: LLMSchema + /// The ``LLMSession`` that is created from the ``LLMSchema`` by the ``LLMPlatform``. + associatedtype Session: LLMSession + + + /// Describes the state of the ``LLMPlatform`` via the ``LLMPlatformState``. + @MainActor var state: LLMPlatformState { get } + + + /// Turns the received ``LLMSchema`` to an executable ``LLMSession``. + /// + /// The ``LLMPlatform`` uses the ``LLMSchema`` to create an ``LLMSession`` that performs the LLM inference and contains the LLM context. + /// + /// - Parameters: + /// - with: The ``LLMSchema`` that should be turned into an ``LLMSession``. + /// + /// - Returns: The ready to use ``LLMSession``. + func callAsFunction(with: Schema) -> Session +} + + +extension LLMPlatform { + /// Enables the identification of the ``LLMPlatform/Schema`` via an `ObjectIdentifier`. + var schemaId: ObjectIdentifier { + ObjectIdentifier(Schema.self) + } +} diff --git a/Sources/SpeziLLM/Tasks/LLMRunnerSetupTaskBuilder.swift b/Sources/SpeziLLM/LLMPlatformBuilder.swift similarity index 50% rename from Sources/SpeziLLM/Tasks/LLMRunnerSetupTaskBuilder.swift rename to Sources/SpeziLLM/LLMPlatformBuilder.swift index c3b9927..abccf1b 100644 --- a/Sources/SpeziLLM/Tasks/LLMRunnerSetupTaskBuilder.swift +++ b/Sources/SpeziLLM/LLMPlatformBuilder.swift @@ -11,12 +11,11 @@ import Spezi import SwiftUI -/// A result builder used to aggregate multiple Spezi ``LLMRunnerSetupTask``s within the ``LLMRunner``. +/// Result builder used to aggregate multiple Spezi ``LLMPlatform``s stated within the ``LLMRunner``. @resultBuilder -@_documentation(visibility: internal) -public enum LLMRunnerSetupTaskBuilder: DependencyCollectionBuilder { - /// An auto-closure expression, providing the default dependency value, building the ``DependencyCollection``. - public static func buildExpression(_ expression: @escaping @autoclosure () -> L) -> DependencyCollection { +public enum LLMPlatformBuilder: DependencyCollectionBuilder { + /// An auto-closure expression, providing the default dependency value, building the `DependencyCollection`. + public static func buildExpression(_ expression: @escaping @autoclosure () -> L) -> DependencyCollection { DependencyCollection(singleEntry: expression) } } diff --git a/Sources/SpeziLLM/LLMPlatformState.swift b/Sources/SpeziLLM/LLMPlatformState.swift new file mode 100644 index 0000000..0acbd3a --- /dev/null +++ b/Sources/SpeziLLM/LLMPlatformState.swift @@ -0,0 +1,18 @@ +// +// This source file is part of the Stanford Spezi open source project +// +// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) +// +// SPDX-License-Identifier: MIT +// + + +/// Describes the current state of the ``LLMPlatform`` which is responsible for sending ``LLMSchema``s to execution via ``LLMSession``s. +/// +/// The ``LLMPlatformState`` is quite minimal with only ``LLMPlatformState/idle`` and ``LLMPlatformState/processing`` states. +public enum LLMPlatformState { + /// Indicates that the ``LLMPlatform`` is currently idle and doesn't execute any ``LLMSession``s. + case idle + /// Indicates that the ``LLMPlatform`` is currently processing and executing ``LLMSession``s. + case processing +} diff --git a/Sources/SpeziLLM/LLMRunner.swift b/Sources/SpeziLLM/LLMRunner.swift index c448f05..1f3f3c1 100644 --- a/Sources/SpeziLLM/LLMRunner.swift +++ b/Sources/SpeziLLM/LLMRunner.swift @@ -8,192 +8,204 @@ import Foundation import Spezi +import SpeziChat -/// Handles the execution of Large Language Models (LLMs) in the Spezi ecosystem. +/// Manages the execution of LLMs in the Spezi ecosystem. /// -/// The ``LLMRunner`` is a Spezi `Module` that that wraps a Spezi ``LLM`` during it's execution, handling all management overhead tasks of the models execution. -/// The ``LLMRunner`` needs to be initialized in the Spezi `Configuration` with the ``LLMRunnerConfiguration`` as well as a set of ``LLMRunnerSetupTask``s as arguments. +/// The ``LLMRunner`` is a Spezi `Module` available for access through the SwiftUI `Environment` that is responsible for turning a ``LLMSchema`` towards an executable and stateful ``LLMSession``. +/// The ``LLMRunner`` delegates the creation of the ``LLMSession``s to the respective ``LLMPlatform``s, allowing for customized creation and dependency injection for each LLM type. /// -/// The runner manages a set of ``LLMGenerationTask``'s as well as the respective LLM execution backends in order to enable -/// a smooth and efficient model execution. +/// Within the Spezi ecosystem, the ``LLMRunner`` is set up via the Spezi `Configuration` by taking a trailing closure argument within ``LLMRunner/init(_:)``. +/// The closure aggregates multiple stated ``LLMPlatform``s via is the ``LLMPlatformBuilder``, enabling easy and dynamic configuration of all wanted ``LLMPlatform``s. +/// +/// The main functionality of the ``LLMRunner`` is``LLMRunner/callAsFunction(with:)``, turning a ``LLMSchema`` to an executable ``LLMSession`` via the respective ``LLMPlatform``. +/// The created ``LLMSession`` then holds the LLM context and is able to perform the actual LLM inference. +/// For one-shot LLM inference tasks, the ``LLMRunner`` provides ``LLMRunner/oneShot(with:chat:)-2a1du`` and ``LLMRunner/oneShot(with:chat:)-24coq``, enabling the ``LLMRunner`` to deal with the LLM state management and reducing the burden on developers by just returning an `AsyncThrowingStream` or `String` directly. /// /// ### Usage /// -/// The code section below showcases a complete code example on how to use the ``LLMRunner`` in combination with a `LLMLocal` (locally executed Language Model) from the [SpeziLLMLocal](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/spezillmlocal) target. +/// The code section below showcases a complete, bare-bone code example on how to use the ``LLMRunner`` with the ``LLMSchema``. +/// The example is structured as a SwiftUI `View` with a `Button` to trigger LLM inference via the ``LLMMockSchema``. The generated output stream is displayed in a `Text` field. +/// +/// - Tip: SpeziLLM provides the `@LLMSessionProvider` property wrapper (`View/LLMSessionProvider`) that drastically simplifies the state management of using the ``LLMSchema`` with the ``LLMRunner``. Refer to the docs for more information. /// /// ```swift -/// class LocalLLMAppDelegate: SpeziAppDelegate { -/// override var configuration: Configuration { -/// Configuration { -/// // Configure the runner responsible for executing LLMs -/// LLMRunner( -/// runnerConfig: .init( -/// taskPriority: .medium -/// ) -/// ) { -/// // Runner setup tasks conforming to `LLMRunnerSetupTask` protocol -/// LLMLocalRunnerSetupTask() +/// struct LLMDemoView: View { +/// // The runner responsible for executing the LLM. +/// @Environment(LLMRunner.self) var runner +/// +/// // The LLM in execution, as defined by the ``LLMSchema``. +/// @State var llmSession: LLMMockSession? +/// @State var responseText = "" +/// +/// var body: some View { +/// VStack { +/// Button { +/// Task { +/// try await executePrompt(prompt: "Hello LLM!") +/// } +/// } label: { +/// Text("Start LLM inference") /// } +/// .disabled(if: llmSession) +/// +/// Text(responseText) /// } +/// .task { +/// // Instantiate the `LLMSchema` to an `LLMSession` via the `LLMRunner`. +/// self.llmSession = runner(with: LLMMockSchema()) +/// } /// } -/// } -/// -/// struct LocalLLMChatView: View { -/// // The runner responsible for executing the LLM. -/// @Environment(LLMRunner.self) var runner: LLMRunner /// -/// // The executed LLM -/// @State var model: LLMLocal = .init( -/// modelPath: ... -/// ) -/// @State var responseText: String +/// func executePrompt(prompt: String) async throws { +/// await MainActor.run { +/// llmSession?.context.append(userInput: prompt) +/// } /// -/// func executePrompt(prompt: String) { -/// // Execute the query on the runner, returning a stream of outputs -/// let stream = try await runner(with: model).generate(prompt: "Hello LLM!") +/// // Performing the LLM inference, returning a stream of outputs. +/// guard let stream = try await llmSession?.generate() else { +/// return +/// } /// /// for try await token in stream { /// responseText.append(token) -/// } +/// } /// } /// } /// ``` -public actor LLMRunner: Module, DefaultInitializable, EnvironmentAccessible { +public class LLMRunner: Module, EnvironmentAccessible, DefaultInitializable { /// The ``State`` describes the current state of the ``LLMRunner``. /// As of now, the ``State`` is quite minimal with only ``LLMRunner/State-swift.enum/idle`` and ``LLMRunner/State-swift.enum/processing`` states. public enum State { case idle case processing - case error(LocalizedError) } - - /// The configuration of the runner represented by ``LLMRunnerConfiguration``. - private let runnerConfiguration: LLMRunnerConfiguration - /// Indicates if the ``LLMRunner`` should lazily perform the passed ``LLMRunnerSetupTask``'s. - private let lazyRunnerSetup: Bool - /// Holds all dependencies of the ``LLMRunner`` as expressed by all stated ``LLMRunnerSetupTask``'s in the ``init(runnerConfig:_:)``. - /// Is required to enable the injection of `Dependency`s into the ``LLMRunnerSetupTask``'s. - @Dependency private var runnerSetupTaskModules: [any Module] - /// All to be performed ``LLMRunner``-related setup tasks, mapped to the respective ``LLMHostingType``. - /// Derived from the ``LLMRunnerSetupTask``'s passed within the ``init(runnerConfig:_:)``. - private var runnerSetupTasks: [LLMHostingType: any LLMRunnerSetupTask] = [:] - - /// Stores all currently available ``LLMGenerationTask``'s, one for each Spezi ``LLM``, identified by the ``LLMTaskIdentifier``. - private var runnerTasks: [LLMTaskIdentifier: LLMGenerationTask] = [:] - /// Indicates for which ``LLMHostingType`` the runner backend is already initialized. - private var runnerBackendInitialized: [LLMHostingType: Bool] = [:] - /// The ``State`` of the runner, derived from the individual ``LLMGenerationTask``'s. + /// Holds all configured ``LLMPlatform``s of the ``LLMRunner`` as expressed by all stated ``LLMPlatform``'s in the ``LLMRunner/init(_:)``. + @Dependency private var llmPlatformModules: [any Module] + /// Maps the ``LLMSchema`` (identified by the `ObjectIdentifier`) towards the respective ``LLMPlatform``. + var llmPlatforms: [ObjectIdentifier: any LLMPlatform] = [:] + + /// The ``State`` of the runner, derived from the individual ``LLMPlatform``'s. @MainActor public var state: State { - get async { - var state: State = .idle - - for runnerTask in await self.runnerTasks.values where await runnerTask.state == .generating { - state = .processing - } - - return state + var state: State = .idle + + for platform in self.llmPlatforms.values where platform.state == .processing { + state = .processing } + + return state } - /// Creates the ``LLMRunner`` which is responsible for executing the Spezi ``LLM``'s. + /// Creates the ``LLMRunner`` which is responsible for executing LLMs within the Spezi ecosystem. /// /// - Parameters: - /// - runnerConfig: The configuration of the ``LLMRunner`` represented by the ``LLMRunnerConfiguration``. - /// - dependencies: A result builder that aggregates all stated ``LLMRunnerSetupTask``'s as dependencies. + /// - dependencies: A result builder that aggregates all stated ``LLMPlatform``s. public init( - runnerConfig: LLMRunnerConfiguration = .init(), - lazyRunnerSetup: Bool = true, - @LLMRunnerSetupTaskBuilder _ dependencies: @Sendable () -> DependencyCollection + @LLMPlatformBuilder _ dependencies: @Sendable () -> DependencyCollection ) { - self.runnerConfiguration = runnerConfig - self.lazyRunnerSetup = lazyRunnerSetup - self._runnerSetupTaskModules = Dependency(using: dependencies()) - - for modelType in LLMHostingType.allCases { - self.runnerBackendInitialized[modelType] = false - } + self._llmPlatformModules = Dependency(using: dependencies()) } - /// Convenience initializer for the creation of a ``LLMRunner``. - public init() { - self.init(runnerConfig: .init()) {} + /// Convenience initializer for the creation of an ``LLMRunner`` that doesn't support any ``LLMPlatform``s + /// Helpful for stating a Spezi `Dependency` to the ``LLMRunner``. + public required convenience init() { + self.init {} } - public nonisolated func configure() { - Task { - await mapRunnerSetupTasks() + + public func configure() { + self.llmPlatforms = _llmPlatformModules.wrappedValue.compactMap { platform in + platform as? (any LLMPlatform) + } + .reduce(into: [:]) { partialResult, platform in + partialResult[platform.schemaId] = platform } } - private func mapRunnerSetupTasks() async { - for module in runnerSetupTaskModules { - guard let task = module as? any LLMRunnerSetupTask else { - preconditionFailure("SpeziLLM: Reached inconsistent state. \(type(of: module)) is not a \((any LLMRunnerSetupTask).self)") - } - - runnerSetupTasks[task.type] = task - - if !lazyRunnerSetup { - try? await task.setupRunner(runnerConfig: self.runnerConfiguration) - runnerBackendInitialized[task.type] = true - } + /// Turns the received ``LLMSchema`` to an executable ``LLMSession``. + /// + /// The ``LLMRunner`` uses the configured ``LLMPlatform``s to create an executable ``LLMSession`` from the passed ``LLMSchema`` + /// + /// - Parameters: + /// - with: The ``LLMSchema`` that should be turned into an ``LLMSession``. + /// + /// - Returns: The ready to use ``LLMSession``. + public func callAsFunction(with llmSchema: L) -> L.Platform.Session { + // Searches for the respective `LLMPlatform` associated with the `LLMSchema`. + guard let platform = llmPlatforms[ObjectIdentifier(L.self)] else { + preconditionFailure(""" + The designated `LLMPlatform` \(String(describing: L.Platform.Session.self)) to run the `LLMSchema` \(String(describing: L.self)) was not configured within the Spezi `Configuration`. + Ensure that the `LLMRunner` is set up with all required `LLMPlatform`s. + """) + } + + // Checks the conformance of the related `LLMSession` to `Observable`. + guard L.Platform.Session.self is Observable.Type else { + preconditionFailure(""" + The passed `LLMSchema` \(String(describing: L.self)) corresponds to a not observable `LLMSession` type (found session was \(String(describing: L.Platform.Session.self))). + Ensure that the used `LLMSession` type (\(String(describing: L.Platform.Session.self))) conforms to the `Observable` protocol via the `@Observable` macro. + """) } + + // Delegates the creation of the `LLMSession` to the configured `LLMPlatform`s. + return platform.determinePlatform(for: llmSchema) } - - /// This call-as-a-function ``LLMRunner`` usage wraps a Spezi ``LLM`` and makes it ready for execution. - /// It manages a set of all ``LLMGenerationTask``'s, guaranteeing efficient model execution. + /// One-shot mechanism to turn the received ``LLMSchema`` into an `AsyncThrowingStream`. + /// + /// Directly returns an `AsyncThrowingStream` based on the defined ``LLMSchema`` as well as the passed `Chat` (context of the LLM). /// /// - Parameters: - /// - with: The ``LLM`` that should be executed. + /// - with: The ``LLMSchema`` that should be turned into an ``LLMSession``. + /// - chat: The context of the LLM used for the inference. /// - /// - Returns: The ready to use ``LLMGenerationTask``. - public func callAsFunction(with model: any LLM) async throws -> LLMGenerationTask { - let modelType = model.type - /// If necessary, setup of the runner backend - if runnerBackendInitialized[modelType] != true && modelType != .mock { - /// Initializes the required runner backends for the respective ``LLMHostingType``. - guard let task = self.runnerSetupTasks[modelType] else { - preconditionFailure(""" - A LLMRunnerSetupTask setting up the runner for a specific LLM environment was not found. - Please ensure that a LLMRunnerSetupTask is passed to the Spezi LLMRunner within the Spezi Configuration. - """) - } - - do { - try await task.setupRunner(runnerConfig: self.runnerConfiguration) - } catch { - // Adjust `LLM/state` to not initialized in order to allow for new errors to surface and trigger and alert - await MainActor.run { - model.state = .uninitialized - } - throw error - } - - runnerBackendInitialized[modelType] = true + /// - Returns: The ready to use `AsyncThrowingStream`. + public func oneShot(with llmSchema: L, chat: Chat) async throws -> AsyncThrowingStream { + let llmSession = callAsFunction(with: llmSchema) + await MainActor.run { + llmSession.context = chat } - /// Check if a fitting ``LLMRunnerInferenceTask`` for that model already exists - let taskIdentifier = LLMTaskIdentifier(fromModel: model) - guard let runnerTask = runnerTasks[taskIdentifier] else { - let runnerTask = LLMGenerationTask(model: model, runnerConfig: runnerConfiguration) - runnerTasks[taskIdentifier] = runnerTask - return runnerTask + return try await llmSession.generate() + } + + /// One-shot mechanism to turn the received ``LLMSchema`` into a completed output `String`. + /// + /// Directly returns the finished output `String` based on the defined ``LLMSchema`` as well as the passed `Chat` (context of the LLM). + /// + /// - Parameters: + /// - with: The ``LLMSchema`` that should be turned into an ``LLMSession``. + /// - chat: The context of the LLM used for the inference. + /// + /// - Returns: The completed output `String`. + public func oneShot(with llmSchema: L, chat: Chat) async throws -> String { + var output = "" + + for try await stringPiece in try await oneShot(with: llmSchema, chat: chat) { + output.append(stringPiece) } - return runnerTask + return output } - +} - /// Upon deinit, cancel all ``LLMRunnerInferenceTask``'s. - deinit { - let runnerTasks = runnerTasks - Task { - for runnerTask in runnerTasks.values { - await runnerTask.task?.cancel() - } +extension LLMPlatform { + /// Determine the correct ``LLMPlatform`` for the passed ``LLMSchema``. + fileprivate func determinePlatform(for schema: L) -> L.Platform.Session { + guard let schema = schema as? Schema else { + preconditionFailure(""" + Reached inconsistent state. Ensure that the specified LLMSchema matches the schema defined within the LLMPlatform. + """) } + + guard let session = self(with: schema) as? L.Platform.Session else { + preconditionFailure(""" + Reached inconsistent state. Ensure that the specified LLMSession matches the session defined within the LLMPlatform. + """) + } + + return session } } diff --git a/Sources/SpeziLLM/LLMSchema.swift b/Sources/SpeziLLM/LLMSchema.swift new file mode 100644 index 0000000..11bd6ad --- /dev/null +++ b/Sources/SpeziLLM/LLMSchema.swift @@ -0,0 +1,36 @@ +// +// This source file is part of the Stanford Spezi open source project +// +// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) +// +// SPDX-License-Identifier: MIT +// + +/// Defines the type and configuration of the LLM. +/// +/// The ``LLMSchema`` is used as a configuration for a to-be-used LLM. It contains all information necessary for the creation of an executable ``LLMSession``. +/// It is bound to a ``LLMPlatform`` that is responsible for turning the ``LLMSchema`` to an ``LLMSession``. +/// +/// - Tip: The ``LLMSchema`` should be implemented as a Swift `struct`, immutable and easily copyable. +/// +/// ### Usage +/// +/// The example below demonstrates a concrete implementation of the ``LLMSchema`` with the ``LLMMockPlatform``. +/// +/// ```swift +/// public struct LLMMockSchema: LLMSchema { +/// public typealias Platform = LLMMockPlatform +/// +/// public let injectIntoContext = false +/// +/// public init() {} +/// } +/// ``` +public protocol LLMSchema: Sendable { + /// The ``LLMPlatform`` responsible for turning the ``LLMSchema`` towards a ``LLMSession``. + associatedtype Platform: LLMPlatform + + + /// Indicates if the inference output by the ``LLMSession`` should automatically be inserted into the ``LLMSession/context``. + var injectIntoContext: Bool { get } +} diff --git a/Sources/SpeziLLM/LLMSession.swift b/Sources/SpeziLLM/LLMSession.swift new file mode 100644 index 0000000..a60fa19 --- /dev/null +++ b/Sources/SpeziLLM/LLMSession.swift @@ -0,0 +1,112 @@ +// +// This source file is part of the Stanford Spezi open source project +// +// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) +// +// SPDX-License-Identifier: MIT +// + +import Foundation +import SpeziChat + + +/// Represents an LLM in execution. +/// +/// The ``LLMSession`` is the executable version of the LLM containing context and state as defined by the ``LLMSchema``. +/// The ``LLMPlatform`` is responsible for turning the ``LLMSchema`` towards the ``LLMSession`` and is able to pass arbitrary dependencies to the ``LLMSession``. +/// +/// ``LLMSession`` does the heavy lifting of actually providing the inference logic of the LLMs to generate `String`-based output on the ``LLMSession/context`` input. +/// The inference is started by ``LLMSession/generate()``, returning an `AsyncThrowingStream` and can be cancelled via ``LLMSession/cancel()``. +/// +/// The ``LLMSession`` exposes its current state via the ``LLMSession/context`` property, containing all the conversational history with the LLM. +/// In addition, the ``LLMSession/state`` indicates the current lifecycle state of the LLM, so for example ``LLMState/ready`` or ``LLMState/generating``. +/// Both of these properties should be bound to the `MainActor` in order to allow for seamless SwiftUI `View` updates. +/// +/// The actual compute-intensive inference should be performed within a `Task`. The `Task` instance should be stored within the ``LLMSession`` in order to properly cancel the task at hand if requested to do so. +/// +/// - Warning: The ``LLMSession`` shouldn't be created manually but always through an ``LLMPlatform`` which in turn is automatically chosen for a given ``LLMSchema`` via the ``LLMRunner``. +/// +/// - Important: A ``LLMSession`` is a `class`-bound `protocol` and must therefore be implemented by a Swift `class`. +/// In addition, the ``LLMSession`` must be annotated with the `@Observable` macro in order to track the ``LLMSession/context`` changes, otherwise a runtime crash will occur during inference. +/// +/// ### Usage +/// +/// The example below demonstrates a concrete implementation of the ``LLMSession`` with the ``LLMMockPlatform`` and ``LLMMockSchema``. +/// +/// ```swift +/// @Observable +/// public class LLMMockSession: LLMSession { +/// let platform: LLMMockPlatform +/// let schema: LLMMockSchema +/// private var task: Task<(), Never>? +/// +/// @MainActor public var state: LLMState = .uninitialized +/// @MainActor public var context: Chat = [] +/// +/// init(_ platform: LLMMockPlatform, schema: LLMMockSchema) { +/// self.platform = platform +/// self.schema = schema +/// } +/// +/// @discardableResult +/// public func generate() async throws -> AsyncThrowingStream { +/// let (stream, continuation) = AsyncThrowingStream.makeStream(of: String.self) +/// +/// task = Task { +/// // Yield string pieces on the continuation +/// } +/// +/// return stream +/// } +/// +/// public func cancel() { +/// task?.cancel() +/// } +/// } +/// ``` +public protocol LLMSession: AnyObject, Sendable { + /// The state of the ``LLMSession`` indicated by the ``LLMState``. + @MainActor var state: LLMState { get set } + /// The current context state of the ``LLMSession``, includes the entire prompt history including system prompts, user input, and model responses. + @MainActor var context: Chat { get set } + + + /// Starts the inference of the ``LLMSession`` based on the ``LLMSession/context``. + /// + /// - Returns: An `AsyncThrowingStream` that yields the generated `String` pieces from the LLM. + @discardableResult + func generate() async throws -> AsyncThrowingStream + + /// Cancels the current inference of the ``LLMSession``. + func cancel() +} + + +extension LLMSession { + /// Finishes the continuation with an error and sets the ``LLMSession/state`` to the respective error. + /// + /// - Parameters: + /// - error: The error that occurred. + /// - continuation: The `AsyncThrowingStream` that streams the generated output. + public func finishGenerationWithError(_ error: E, on continuation: AsyncThrowingStream.Continuation) async { + continuation.finish(throwing: error) + await MainActor.run { + self.state = .error(error: error) + } + } + + /// Checks for cancellation of the current `Task` and sets the `CancellationError` error on the continuation as well as the ``LLMSession/state``. + /// + /// - Parameters: + /// - continuation: The `AsyncThrowingStream` that streams the generated output. + /// + /// - Returns: Boolean flag indicating if the `Task` has been cancelled, `true` if has been cancelled, `false` otherwise. + public func checkCancellation(on continuation: AsyncThrowingStream.Continuation) async -> Bool { + if Task.isCancelled { + await finishGenerationWithError(CancellationError(), on: continuation) + return true + } + + return false + } +} diff --git a/Sources/SpeziLLM/LLMSessionProvider.swift b/Sources/SpeziLLM/LLMSessionProvider.swift new file mode 100644 index 0000000..f28893a --- /dev/null +++ b/Sources/SpeziLLM/LLMSessionProvider.swift @@ -0,0 +1,118 @@ +// +// This source file is part of the Stanford Spezi open source project +// +// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) +// +// SPDX-License-Identifier: MIT +// + +import SwiftUI + + +/// Refer to the documentation of ``View/LLMSessionProvider`` for information on how to use the `@LLMSessionProvider` property wrapper. +@propertyWrapper +public struct _LLMSessionProvider: DynamicProperty { // swiftlint:disable:this type_name + /// Internal boxing type required to wrap the ``LLMSession``. + @Observable + class Box { + var value: T + + init(_ value: T) { + self.value = value + } + } + + + /// The ``LLMRunner`` used to initialize the ``LLMSession`` + @Environment(LLMRunner.self) private var runner + /// Boxed ``LLMSession`` `State` + @State private var llmBox: Box + + /// ``LLMSchema`` that defines the to-be-initialized ``LLMSession``. + private let schema: Schema + + + /// Access the initialized ``LLMSession``. + public var wrappedValue: Schema.Platform.Session { + guard let llm = llmBox.value else { + fatalError(""" + The underlying LLMSession hasn't been initialized yet via the LLM Runner. + Ensure that the @LLMSessionProvider is used within a SwiftUI View. + """) + } + + return llm + } + + /// Creates a `Binding` to the ``LLMSession``that one can pass around. Useful for passing the ``LLMSession`` as a `Binding` to the ``LLMChatView``. + public var projectedValue: Binding { + Binding { + wrappedValue + } set: { + llmBox.value = $0 + } + } + + + /// Initialize the `_LLMSessionProvider` with the to be instantiated ``LLMSchema``. + /// + /// - Parameters: + /// - schema: The ``LLMSchema`` to instantiate as an ``LLMSession``. + public init(schema: Schema) { + self.schema = schema + self._llmBox = State(wrappedValue: Box(nil)) + } + + + /// Called by SwiftUI upon `View` update, initializes the ``LLMSession`` if not done yet. + public func update() { + guard llmBox.value == nil else { + return + } + + // Initialize `LLMSession` via `LLMRunner` from the SwiftUI `Environment` + llmBox.value = runner(with: schema) + } +} + + +extension View { + /// Instantiates an ``LLMSession`` from the passed ``LLMSchema``. + /// + /// The ``LLMSessionProvider`` enables the convenient instantiation of the passed ``LLMSchema`` (defining the LLM) to a to-be-used ``LLMSession`` (LLM in execution). + /// The instantiation is done by the ``LLMRunner`` which determines the correct ``LLMPlatform`` for the ``LLMSchema`` to run on. + /// + /// - Warning: To use the ``LLMSessionProvider``, the ``LLMRunner`` must be configured within the Spezi `Configuration`. + /// + /// ### Usage + /// + /// The example below demonstrates using the ``LLMSessionProvider`` to generate LLM output. + /// + /// ```swift + /// struct LLMDemoView: View { + /// // Use the convenience property wrapper to instantiate the `LLMMockSession` + /// @LLMSessionProvider(schema: LLMMockSchema()) var llm: LLMMockSession + /// @State var responseText = "" + /// + /// var body: some View { + /// VStack { + /// Button { + /// Task { @MainActor in + /// llm.context.append(userInput: "Hello!") + /// + /// for try await token in try await llm.generate() { + /// responseText.append(token) + /// } + /// } + /// } label: { + /// Text("Start LLM inference") + /// } + /// .disabled(llm.state.representation == .processing) + /// + /// Text(responseText) + /// } + /// } + /// } + /// ``` + public typealias LLMSessionProvider = _LLMSessionProvider where Schema: LLMSchema +} diff --git a/Sources/SpeziLLM/LLMState.swift b/Sources/SpeziLLM/LLMState.swift index 8c896e5..ca2c435 100644 --- a/Sources/SpeziLLM/LLMState.swift +++ b/Sources/SpeziLLM/LLMState.swift @@ -8,19 +8,19 @@ import Foundation -/// Describes possible states that the LLM can be in. +/// Describes possible states that the ``LLMSession`` can be in. /// /// Based on the ``LLMState``, `SpeziLLM` performs proper actions on the model as well as state management. public enum LLMState: CustomStringConvertible, Equatable { - /// The Spezi ``LLM`` is allocated, but the underlying llama.cpp model has not yet been initialized. + /// The Spezi ``LLMSession`` is allocated, but the underlying model has not yet been initialized. case uninitialized - /// The Spezi ``LLM`` is in the process of being initialized, so the model file is loaded from memory. + /// The Spezi ``LLMSession`` is in the process of being initialized. case loading - /// The Spezi ``LLM`` as well as the underlying llama.cpp model is initialized and ready for use. + /// The Spezi ``LLMSession`` is initialized and ready for use. case ready - /// The Spezi ``LLM`` is currently in the process of generating an output. + /// The Spezi ``LLMSession`` is currently in the process of generating an output. case generating - /// The Spezi ``LLM`` is in an error state as described by the associated value ``LLMError``. + /// The Spezi ``LLMSession`` is in an error state as described by the associated value ``LLMError``. case error(error: any LLMError) diff --git a/Sources/SpeziLLM/Mock/LLMMock.swift b/Sources/SpeziLLM/Mock/LLMMock.swift deleted file mode 100644 index b3ceb03..0000000 --- a/Sources/SpeziLLM/Mock/LLMMock.swift +++ /dev/null @@ -1,42 +0,0 @@ -// -// This source file is part of the Stanford Spezi open source project -// -// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) -// -// SPDX-License-Identifier: MIT -// - -import Foundation -import SpeziChat - - -/// A mock SpeziLLM ``LLM`` that is used for testing and preview purposes. -@Observable -public class LLMMock: LLM { - public let type: LLMHostingType = .mock - @MainActor public var state: LLMState = .uninitialized - @MainActor public var context: Chat = [] - - - public init() {} - - - public func setup(runnerConfig: LLMRunnerConfiguration) async throws { - await MainActor.run { - self.state = .ready - } - } - - public func generate(continuation: AsyncThrowingStream.Continuation) async { - /// Generate mock message - try? await Task.sleep(for: .seconds(1)) - continuation.yield("Mock ") - try? await Task.sleep(for: .milliseconds(500)) - continuation.yield("Message ") - try? await Task.sleep(for: .milliseconds(500)) - continuation.yield("from ") - try? await Task.sleep(for: .milliseconds(500)) - continuation.yield("SpeziLLM!") - continuation.finish() - } -} diff --git a/Sources/SpeziLLM/Mock/LLMMockPlatform.swift b/Sources/SpeziLLM/Mock/LLMMockPlatform.swift new file mode 100644 index 0000000..01d2a28 --- /dev/null +++ b/Sources/SpeziLLM/Mock/LLMMockPlatform.swift @@ -0,0 +1,24 @@ +// +// This source file is part of the Stanford Spezi open source project +// +// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) +// +// SPDX-License-Identifier: MIT +// + + +/// A mock ``LLMPlatform``, used for testing purposes. +/// +/// The platform is associated with the ``LLMMockSchema`` and enables the execution of the ``LLMMockSession``. +public actor LLMMockPlatform: LLMPlatform { + @MainActor public let state: LLMPlatformState = .idle + + + /// Initializer for the ``LLMMockPlatform``. + public init() {} + + + nonisolated public func callAsFunction(with: LLMMockSchema) -> LLMMockSession { + LLMMockSession(self, schema: with) + } +} diff --git a/Sources/SpeziLLM/Mock/LLMMockSchema.swift b/Sources/SpeziLLM/Mock/LLMMockSchema.swift new file mode 100644 index 0000000..4c03cb6 --- /dev/null +++ b/Sources/SpeziLLM/Mock/LLMMockSchema.swift @@ -0,0 +1,21 @@ +// +// This source file is part of the Stanford Spezi open source project +// +// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) +// +// SPDX-License-Identifier: MIT +// + + +/// A mock ``LLMSchema``, used for testing purposes. +/// +/// The ``LLMMockSchema`` is bound to the ``LLMMockPlatform``. +public struct LLMMockSchema: LLMSchema { + public typealias Platform = LLMMockPlatform + + public let injectIntoContext = false + + + /// Initializer for the ``LLMMockSchema``. + public init() {} +} diff --git a/Sources/SpeziLLM/Mock/LLMMockSession.swift b/Sources/SpeziLLM/Mock/LLMMockSession.swift new file mode 100644 index 0000000..8806663 --- /dev/null +++ b/Sources/SpeziLLM/Mock/LLMMockSession.swift @@ -0,0 +1,105 @@ +// +// This source file is part of the Stanford Spezi open source project +// +// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) +// +// SPDX-License-Identifier: MIT +// + +import Observation +import SpeziChat + + +/// A mock ``LLMSession``, used for testing purposes. +/// +/// The ``LLMMockSession`` is created by the configuration defined in the ``LLMMockSchema``. +/// The ``LLMMockSession`` is then executed by the ``LLMMockPlatform``. +/// +/// The ``LLMMockSession`` generates an example output String ("Mock Message from SpeziLLM!") with a 1 second startup time +/// as well as 0.5 seconds between each `String` piece generation. +@Observable +public final class LLMMockSession: LLMSession, @unchecked Sendable { + let platform: LLMMockPlatform + let schema: LLMMockSchema + + @ObservationIgnored private var task: Task<(), Never>? + + @MainActor public var state: LLMState = .uninitialized + @MainActor public var context: Chat = [] + + + /// Initializer for the ``LLMMockSession``. + /// + /// - Parameters: + /// - platform: The mock LLM platform. + /// - schema: The mock LLM schema. + init(_ platform: LLMMockPlatform, schema: LLMMockSchema) { + self.platform = platform + self.schema = schema + } + + @discardableResult + public func generate() async throws -> AsyncThrowingStream { + let (stream, continuation) = AsyncThrowingStream.makeStream(of: String.self) + + task = Task { + await MainActor.run { + self.state = .loading + } + try? await Task.sleep(for: .seconds(1)) + guard await !checkCancellation(on: continuation) else { + return + } + + /// Generate mock messages + await MainActor.run { + self.state = .generating + } + await injectAndYield("Mock ", on: continuation) + + try? await Task.sleep(for: .milliseconds(500)) + guard await !checkCancellation(on: continuation) else { + return + } + await injectAndYield("Message ", on: continuation) + + try? await Task.sleep(for: .milliseconds(500)) + guard await !checkCancellation(on: continuation) else { + return + } + await injectAndYield("from ", on: continuation) + + try? await Task.sleep(for: .milliseconds(500)) + guard await !checkCancellation(on: continuation) else { + return + } + await injectAndYield("SpeziLLM!", on: continuation) + + continuation.finish() + await MainActor.run { + context.completeAssistantStreaming() + self.state = .ready + } + } + + return stream + } + + public func cancel() { + task?.cancel() + } + + private func injectAndYield(_ piece: String, on continuation: AsyncThrowingStream.Continuation) async { + continuation.yield(piece) + if schema.injectIntoContext { + await MainActor.run { + context.append(assistantOutput: piece) + } + } + } + + + deinit { + cancel() + } +} diff --git a/Sources/SpeziLLM/Resources/Localizable.xcstrings b/Sources/SpeziLLM/Resources/Localizable.xcstrings index 3fddccf..65f2137 100644 --- a/Sources/SpeziLLM/Resources/Localizable.xcstrings +++ b/Sources/SpeziLLM/Resources/Localizable.xcstrings @@ -2,6 +2,7 @@ "sourceLanguage" : "en", "strings" : { "LLM_SETUP_ERROR_DESCRIPTION" : { + "extractionState" : "stale", "localizations" : { "en" : { "stringUnit" : { @@ -12,6 +13,7 @@ } }, "LLM_SETUP_ERROR_FAILURE_REASON" : { + "extractionState" : "stale", "localizations" : { "en" : { "stringUnit" : { @@ -22,6 +24,7 @@ } }, "LLM_SETUP_ERROR_RECOVERY_SUGGESTION" : { + "extractionState" : "stale", "localizations" : { "en" : { "stringUnit" : { @@ -80,6 +83,15 @@ } } } + }, + "LLM_UNKNOWN_ERROR_DESCRIPTION" : { + + }, + "LLM_UNKNOWN_ERROR_FAILURE_REASON" : { + + }, + "LLM_UNKNOWN_ERROR_RECOVERY_SUGGESTION" : { + } }, "version" : "1.0" diff --git a/Sources/SpeziLLM/SpeziLLM.docc/SpeziLLM.md b/Sources/SpeziLLM/SpeziLLM.docc/SpeziLLM.md index b1042d0..6bfc59e 100644 --- a/Sources/SpeziLLM/SpeziLLM.docc/SpeziLLM.md +++ b/Sources/SpeziLLM/SpeziLLM.docc/SpeziLLM.md @@ -28,60 +28,44 @@ You need to add the SpeziLLM Swift package to ## Spezi LLM Components -The two main components of ``SpeziLLM`` are the ``LLM`` abstraction as well as the ``LLMRunner`` execution capability. The following section highlights the usage of these parts. +The two main components of ``SpeziLLM`` are the LLM abstractions which are composed of the ``LLMSchema``, ``LLMSession``, and ``LLMPlatform`` as well as the ``LLMRunner`` execution capability. The following section highlights the usage of these parts. -### LLM abstraction +### LLM abstractions + +``SpeziLLM`` provides three main parts abstracting LLMs: +- ``LLMSchema``: Configuration of the to-be-used LLM, containing all information necessary for the creation of an executable ``LLMSession``. +- ``LLMSession``: Executable version of the LLM containing context and state as defined by the ``LLMSchema``. +- ``LLMPlatform``: Responsible for turning the received ``LLMSchema`` to an executable ``LLMSession``. -The ``LLM`` protocol provides an abstraction layer for the usage of Large Language Models within the Spezi ecosystem, +These protocols provides an abstraction layer for the usage of Large Language Models within the Spezi ecosystem, regardless of the execution locality (local or remote) or the specific model type. -Developers can use the ``LLM`` protocol to conform their LLM interface implementations to a standard which is consistent throughout the Spezi ecosystem. +Developers can use these protocols to conform their LLM interface implementations to a standard which is consistent throughout the Spezi ecosystem. -The ``LLM`` contains the ``LLM/context`` property which holds the entire history of the model interactions. -This includes the system prompt, user input, but also assistant responses. -Ensure the property always contains all necessary information, as the ``LLM/generate(continuation:)`` function executes the inference based on the ``LLM/context``. +The actual inference logic as well as state is held within the ``LLMSession``. It requires implementation of the ``LLMSession/generate()`` as well as ``LLMSession/cancel()`` functions, starting and cancelling the inference by the LLM respectively. +In addition, it contains the ``LLMSession/context`` in which the entire conversational history with the LLM is held as well as the ``LLMSession/state`` describing the current execution state of the session. -> Important: An ``LLM`` shouldn't be executed on it's own but always used together with the ``LLMRunner``. +> Important: Any of the three aforementioned LLM abstractions shouldn't be used on it's own but always together with the ``LLMRunner``. Please refer to the ``LLMRunner`` documentation for a complete code example. -### Usage - -An example conformance of the ``LLM`` looks like the code sample below (lots of details were omitted for simplicity). -The key point is the need to implement the ``LLM/setup(runnerConfig:)`` as well as the ``LLM/generate(continuation:)`` functions, whereas the ``LLM/setup(runnerConfig:)`` has an empty default implementation as not every ``LLMHostingType`` requires the need for a setup closure. - -```swift -@Observable -public class LLMTest: LLM { - public let type: LLMHostingType = .local - @MainActor public var state: LLMState = .uninitialized - @MainActor public var context: Chat = [] - - public func setup(/* */) async throws {} - public func generate(/* */) async {} -} -``` - ### LLM runner -The ``LLMRunner`` is a Spezi `Module` that handles the execution of Language Models in the Spezi ecosystem, regardless of their execution locality (local or remote) or the specific model type. A ``LLMRunner`` wraps a Spezi ``LLM`` during it's execution, handling all management overhead tasks of the models execution. +The ``LLMRunner`` is a Spezi `Module` accessible via the SwiftUI `Environment` that handles the execution of Language Models in the Spezi ecosystem, regardless of their execution locality (represented by the ``LLMPlatform``) or the specific model type. +A ``LLMRunner`` is responsible for turning a ``LLMSchema`` towards an executable and stateful ``LLMSession`` by using the underlying ``LLMPlatform``. -The runner manages a set of ``LLMGenerationTask``'s as well as the respective LLM execution backends in order to enable a smooth and efficient model execution. +The ``LLMRunner`` is configured with the supported ``LLMPlatform``s, enabling the runner to delegate the LLM execution to the correct ``LLMPlatform``. #### Setup -The ``LLMRunner`` needs to be initialized in the Spezi `Configuration` with the ``LLMRunnerConfiguration`` as well as a set of ``LLMRunnerSetupTask``s as arguments. +Before usage, the ``LLMRunner`` needs to be initialized in the Spezi `Configuration` with the supported ``LLMPlatform``s. ```swift class LocalLLMAppDelegate: SpeziAppDelegate { override var configuration: Configuration { Configuration { - // Configure the runner responsible for executing LLMs - LLMRunner( - runnerConfig: .init( - taskPriority: .medium - ) - ) { - // Runner setup tasks conforming to `LLMRunnerSetupTask` protocol - LLMLocalRunnerSetupTask() + // Configure the runner responsible for executing LLMs. + LLMRunner { + // State the `LLMPlatform`s supported by the `LLMRunner`. + LLMMockPlatform() } } } @@ -90,78 +74,147 @@ class LocalLLMAppDelegate: SpeziAppDelegate { #### Usage -The code section below showcases a complete code example on how to use the ``LLMRunner`` in combination with a `LLMLocal` (locally executed Language Model) from the [SpeziLLMLocal](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillmlocal) target. +The code section below showcases a complete, bare-bone code example on how to use the ``LLMRunner`` with the ``LLMSchema``. +The example is structured as a SwiftUI `View` with a `Button` to trigger LLM inference via the ``LLMMockSchema``. The generated output stream is displayed in a `Text` field. + +- Tip: SpeziLLM provides the `@LLMSessionProvider` property wrapper (`View/LLMSessionProvider`) that drastically simplifies the state management of using the ``LLMSchema`` with the ``LLMRunner``. Refer to the docs below for more information. ```swift -import SpeziLLMLocal -// ... +struct LLMDemoView: View { + // The runner responsible for executing the LLM. + @Environment(LLMRunner.self) var runner -struct LocalLLMChatView: View { - // The runner responsible for executing the local LLM. - @Environment(LLMRunner.self) var runner: LLMRunner + // The LLM in execution, as defined by the ``LLMSchema``. + @State var llmSession: LLMMockSession? + @State var responseText = "" - // The locally executed LLM - @State var model: LLMLocal = .init( - modelPath: ... - ) - @State var responseText: String + var body: some View { + VStack { + Button { + Task { + try await executePrompt(prompt: "Hello LLM!") + } + } label: { + Text("Start LLM inference") + } + .disabled(if: llmSession) - func executePrompt(prompt: String) { - // Execute the query on the runner, returning a stream of outputs - let stream = try await runner(with: model).generate(prompt: "Hello LLM!") + Text(responseText) + } + .task { + // Instantiate the `LLMSchema` to an `LLMSession` via the `LLMRunner`. + self.llmSession = runner(with: LLMMockSchema()) + } + } + + func executePrompt(prompt: String) async throws { + await MainActor.run { + llmSession?.context.append(userInput: prompt) + } + + // Performing the LLM inference, returning a stream of outputs. + guard let stream = try await llmSession?.generate() else { + return + } for try await token in stream { responseText.append(token) - } + } } } ``` +As show in the example above, a simple LLM inference task with the ``LLMSession`` quickly becomes complex. +That's why SpeziLLM provides the `@LLMSessionProvider` property wrapper (`View/LLMSessionProvider`) that enables the convenient instantiation of the passed ``LLMSchema`` (defining the LLM) to a to-be-used ``LLMSession`` (LLM in execution). +The instantiation is done by the ``LLMRunner`` which determines the correct ``LLMPlatform`` for the ``LLMSchema`` to run on. +An example of using the `@LLMSessionProvider` property wrapper can be found below: + +```swift +struct LLMDemoView: View { + // Use the convenience property wrapper to instantiate the `LLMMockSession` + @LLMSessionProvider(schema: LLMMockSchema()) var llm: LLMMockSession + @State var responseText = "" + + var body: some View { + VStack { + Button { + Task { @MainActor in + llm.context.append(userInput: "Hello!") + + for try await token in try await llm.generate() { + responseText.append(token) + } + } + } label: { + Text("Start LLM inference") + } + .disabled(llm.state.representation == .processing) + + Text(responseText) + } + } +} +``` + ### LLM Chat View -The ``LLMChatView`` presents a basic chat view that enables users to chat with a Spezi ``LLM`` in a typical chat-like fashion. The input can be either typed out via the iOS keyboard or provided as voice input and transcribed into written text. -The ``LLMChatView`` takes an ``LLM`` instance as well as initial assistant prompt as arguments to configure the chat properly. +The ``LLMChatView`` and ``LLMChatViewSchema`` present a basic chat views that enables users to chat with a Spezi LLM in a typical chat-like fashion. The input can be either typed out via the iOS keyboard or provided as voice input and transcribed into written text. +The ``LLMChatViewSchema`` takes an ``LLMSchema`` instance to define which LLM in what configuration should be used for the text inference. +The ``LLMChatView`` is passed an ``LLMSession`` that represents the LLM in execution containing state and context. -> Tip: The ``LLMChatView`` builds on top of the [SpeziChat package](https://swiftpackageindex.com/stanfordspezi/spezichat/documentation). +> Tip: The ``LLMChatView`` and ``LLMChatViewSchema`` build on top of the [SpeziChat package](https://swiftpackageindex.com/stanfordspezi/spezichat/documentation). For more details, please refer to the DocC documentation of the [`ChatView`](https://swiftpackageindex.com/stanfordspezi/spezichat/documentation/spezichat/chatview). #### Usage -An example usage of the ``LLMChatView`` can be seen in the following example. -The example uses the ``LLMMock`` as the passed ``LLM`` instance in order to provide a default output generation stream. +An example usage of the ``LLMChatViewSchema`` can be seen in the following example. +The example uses the ``LLMMockSchema`` as the passed ``LLMSchema`` instance in order to provide a mock output generation stream. +Keep in mind that one cannot access the underlying context or state of the ``LLMSession`` when using the ``LLMChatViewSchema``. + +```swift +struct LLMDemoChatView: View { + var body: some View { + LLMChatViewSchema(with: LLMMockSchema()) + } +} +``` + +An example of using the lower-level ``LLMChatView`` can be seen in the following example. +Here, the user has full control over the ``LLMSession`` and can access the context or state of the LLM. +SpeziLLM provides the `@LLMSessionProvider` property wrapper (`View/LLMSessionProvider`) that enables the convenient instantiation of the passed ``LLMSchema`` (defining the LLM) to a to-be-used ``LLMSession`` (LLM in execution). ```swift -struct LLMLocalChatTestView: View { +struct LLMDemoChatView: View { + // Use the convenience property wrapper to instantiate the `LLMMockSession` + @LLMSessionProvider(schema: LLMMockSchema()) var llm: LLMMockSession + var body: some View { - LLMChatView( - model: LLMMock(), - initialAssistantPrompt: [ - .init( - role: .assistant, - content: "Hello!" - ) - ] - ) + LLMChatView(session: $llm) } } ``` ## Topics -### Model +### LLM abstraction -- ``LLM`` +- ``LLMSchema`` +- ``LLMSession`` - ``LLMState`` - ``LLMError`` -- ``LLMHostingType`` -### Execution +### LLM Execution - ``LLMRunner`` -- ``LLMRunnerConfiguration`` -- ``LLMGenerationTask`` -- ``LLMRunnerSetupTask`` +- ``LLMPlatform`` ### Views - ``LLMChatView`` +- ``LLMChatViewSchema`` + +### Mocks + +- ``LLMMockPlatform`` +- ``LLMMockSchema`` +- ``LLMMockSession`` diff --git a/Sources/SpeziLLM/Tasks/LLMGenerationTask.swift b/Sources/SpeziLLM/Tasks/LLMGenerationTask.swift deleted file mode 100644 index 0e77d8a..0000000 --- a/Sources/SpeziLLM/Tasks/LLMGenerationTask.swift +++ /dev/null @@ -1,97 +0,0 @@ -// -// This source file is part of the Stanford Spezi open source project -// -// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) -// -// SPDX-License-Identifier: MIT -// - -import Foundation -import SpeziChat - - -/// Handles LLM generation tasks via the ``LLMRunner``. -/// -/// It wraps a Spezi ``LLM`` and performs management overhead tasks. -/// -/// A code example on how to use ``LLMGenerationTask`` in combination with the ``LLMRunner`` can be -/// found in the documentation of the ``LLMRunner``. -public actor LLMGenerationTask { - /// The ``LLM`` which is executed by the ``LLMGenerationTask``. - let model: any LLM - /// The configuration of the ``LLMRunner``. - let runnerConfig: LLMRunnerConfiguration - /// A task managing the ``LLM` output generation. - var task: Task<(), Never>? - - - /// The `LLMTaskIdentifier` of the ``LLMGenerationTask``. - var id: LLMTaskIdentifier { - .init(fromModel: model) - } - - /// Describes the state of the ``LLM`` as a ``LLMState``. - public var state: LLMState { - get async { - await self.model.state - } - } - - - /// Creates the ``LLMGenerationTask`` based on the respective ``LLM``. - /// - /// - Parameters: - /// - model: The ``LLM`` that should be executed. - /// - runnerConfig: The configuration of the ``LLMRunner``. - init(model: any LLM, runnerConfig: LLMRunnerConfiguration) { - self.model = model - self.runnerConfig = runnerConfig - } - - - /// Starts the LLM output generation based on the ``LLM/context``. - /// Handles management takes like the initial setup of the ``LLM``. - /// - /// - Returns: An asynchronous stream of the ``LLM`` generation results. - /// - /// - Important: This function takes the state present within the ``LLM/context`` to query the ``LLM``. Ensure that the ``LLM/context`` reflects the state you want to use, especially the last (user) entry of the ``LLM/context``. - public func generate() async throws -> AsyncThrowingStream { - let (stream, continuation) = AsyncThrowingStream.makeStream(of: String.self) - - /// Setup the model if necessary. - if await self.model.state == .uninitialized { - try await model.setup(runnerConfig: self.runnerConfig) - } - - /// Execute the output generation of the LLM. - self.task = Task(priority: self.runnerConfig.taskPriority) { - await model.generate(continuation: continuation) - } - - return stream - } - - - /// Starts the LLM output generation based on an input prompt. - /// Handles management takes like the initial setup of the ``LLM``. - /// - /// - Parameters: - /// - userPrompt: The `String` that should be used as an input prompt to the ``LLM`` - /// - /// - Returns: An asynchronous stream of the ``LLM`` generation results. - /// - /// - Important: This function appends to the``LLM/context``. Ensure that this wasn't done before by, e.g., via the ``LLMChatView``. - public func generate(prompt userPrompt: String) async throws -> AsyncThrowingStream { - await MainActor.run { - self.model.context.append(userInput: userPrompt) - } - - return try await self.generate() - } - - - /// Upon deinit, cancel the LLM `Task`. - deinit { - task?.cancel() - } -} diff --git a/Sources/SpeziLLM/Tasks/LLMRunnerSetupTask.swift b/Sources/SpeziLLM/Tasks/LLMRunnerSetupTask.swift deleted file mode 100644 index a676167..0000000 --- a/Sources/SpeziLLM/Tasks/LLMRunnerSetupTask.swift +++ /dev/null @@ -1,36 +0,0 @@ -// -// This source file is part of the Stanford Spezi open source project -// -// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) -// -// SPDX-License-Identifier: MIT -// - -import Foundation -import Spezi - - -/// Provides an abstraction of different ``LLMRunner``-related setup `Task`'s. -/// -/// These setup tasks must be stated within the Spezi ``LLMRunner`` initializer in the Spezi `Configuration` in order to -/// properly configure the local environment for executing Spezi `LLM`s. -/// -/// ```swift -/// class LLMAppDelegate: SpeziAppDelegate { -/// override var configuration: Configuration { -/// Configuration { -/// LLMRunner { -/// // Concrete `LLMRunnerSetupTask`s -/// ... -/// } -/// } -/// } -/// } -public protocol LLMRunnerSetupTask: Module { - /// The ``LLMHostingType`` that the ``LLMRunnerSetupTask`` sets up. - var type: LLMHostingType { get } - - - /// Performs runner setup-related actions for the ``LLMRunner``. - func setupRunner(runnerConfig: LLMRunnerConfiguration) async throws -} diff --git a/Sources/SpeziLLM/Tasks/LLMRunnerSetupTaskCollection.swift b/Sources/SpeziLLM/Tasks/LLMRunnerSetupTaskCollection.swift deleted file mode 100644 index a618b50..0000000 --- a/Sources/SpeziLLM/Tasks/LLMRunnerSetupTaskCollection.swift +++ /dev/null @@ -1,33 +0,0 @@ -// -// This source file is part of the Stanford Spezi open source project -// -// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) -// -// SPDX-License-Identifier: MIT -// - -import Foundation -import SwiftUI - - -/// Defines a collection of Spezi ``LLMRunnerSetupTask``s that are defined with a ``LLMRunner``. -/// -/// You can not create a `_LLMRunnerSetupTaskCollection` yourself. Please use the ``LLMRunner`` that internally creates a `_LLMRunnerSetupTaskCollection` with the passed views. -public struct _LLMRunnerSetupTaskCollection { // swiftlint:disable:this type_name - let runnerSetupTasks: [LLMHostingType: any LLMRunnerSetupTask] - - - init(runnerSetupTasks: [any LLMRunnerSetupTask]) { - self.runnerSetupTasks = runnerSetupTasks.reduce(into: [LLMHostingType: any LLMRunnerSetupTask]()) { partialResult, runnerSetupTask in - /// Check if there are no duplicate ``LLMRunnerSetupTask``'s for the same ``LLMHostingType``. - guard partialResult[runnerSetupTask.type] == nil else { - fatalError(""" - LLMRunner was initialized with LLMRunnerSetupTasks's of the same LLMHostingType type. - Ensure that only one LLMRunnerSetupTask is responsible for setting up the runner of one LLMHostingType. - """) - } - - partialResult[runnerSetupTask.type] = runnerSetupTask - } - } -} diff --git a/Sources/SpeziLLM/Tasks/LLMTaskIdentifier.swift b/Sources/SpeziLLM/Tasks/LLMTaskIdentifier.swift deleted file mode 100644 index db644ec..0000000 --- a/Sources/SpeziLLM/Tasks/LLMTaskIdentifier.swift +++ /dev/null @@ -1,25 +0,0 @@ -// -// This source file is part of the Stanford Spezi open source project -// -// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) -// -// SPDX-License-Identifier: MIT -// - -import Foundation - - -/// An identifier for a ``LLMGenerationTask` based on the respective ``LLM``. -struct LLMTaskIdentifier: Hashable { - /// The wrapped identifier of the `LLM``. - let taskIdentifier: String - - - /// Creates the `LLMTaskIdentifier` identifying ``LLM``'s. - /// - /// - Parameters: - /// - fromModel: The ``LLM`` that should be identified. - init(fromModel model: any LLM) { - self.taskIdentifier = String(describing: type(of: model)) - } -} diff --git a/Sources/SpeziLLM/Views/LLMChatView.swift b/Sources/SpeziLLM/Views/LLMChatView.swift index fd19e9a..ebb306e 100644 --- a/Sources/SpeziLLM/Views/LLMChatView.swift +++ b/Sources/SpeziLLM/Views/LLMChatView.swift @@ -11,83 +11,108 @@ import SpeziViews import SwiftUI -/// Basic chat view that enables users to chat with a Spezi ``LLM``. -/// +/// Chat view that enables users to interact with an LLM based on an ``LLMSession``. +/// +/// The ``LLMChatView`` takes an ``LLMSession`` instance as parameter within the ``LLMChatView/init(session:)``. The ``LLMSession`` is the executable version of the LLM containing context and state as defined by the ``LLMSchema``. +/// /// The input can be either typed out via the iOS keyboard or provided as voice input and transcribed into written text. -/// The ``LLMChatView`` takes an ``LLM`` instance as well as initial assistant prompt as arguments to configure the chat properly. /// -/// > Tip: The ``LLMChatView`` builds on top of the [SpeziChat package](https://swiftpackageindex.com/stanfordspezi/spezichat/documentation). -/// > For more details, please refer to the DocC documentation of the [`ChatView`](https://swiftpackageindex.com/stanfordspezi/spezichat/documentation/spezichat/chatview). +/// - Tip: The ``LLMChatView`` builds on top of the [SpeziChat package](https://swiftpackageindex.com/stanfordspezi/spezichat/documentation). +/// For more details, please refer to the DocC documentation of the [`ChatView`](https://swiftpackageindex.com/stanfordspezi/spezichat/documentation/spezichat/chatview). +/// +/// - Tip: To add text-to-speech capabilities to the ``LLMChatView``, use the [SpeziChat package](https://swiftpackageindex.com/stanfordspezi/spezichat/documentation) and more specifically the `View/speak(_:muted:)` and `View/speechToolbarButton(enabled:muted:)` view modifiers. +/// For more details, please refer to the DocC documentation of the [`ChatView`](https://swiftpackageindex.com/stanfordspezi/spezichat/documentation/spezichat/chatview). /// /// ### Usage /// -/// An example usage of the ``LLMChatView`` can be seen in the following example. -/// The example uses the ``LLMMock`` as the passed ``LLM`` instance in order to provide a default output generation stream. +/// The next code examples demonstrate how to use the ``LLMChatView`` with ``LLMSession``s. +/// +/// The ``LLMChatView`` must be passed a ``LLMSession``, meaning a ready-to-use LLM, resulting in the need for the developer to manually allocate the ``LLMSession`` via the ``LLMRunner`` and ``LLMSchema`` (which includes state management). +/// +/// In order to simplify the usage of an ``LLMSession``, SpeziLLM provides the ``LLMSessionProvider`` property wrapper that conveniently instantiates an ``LLMSchema`` to an ``LLMSession``. +/// The `@LLMSessionProvider` wrapper abstracts away the necessity to use the ``LLMRunner`` from the SwiftUI `Environment` within a `.task()` view modifier to instantiate the ``LLMSession``. +/// In addition, state handling becomes easier, as one doesn't have to deal with the optionality of the ``LLMSession`` anymore. +/// +/// In addition, one is able to use the text-to-speech capabilities of [SpeziChat package](https://swiftpackageindex.com/stanfordspezi/spezichat/documentation) via the `View/speak(_:muted:)` and `View/speechToolbarButton(enabled:muted:)` view modifiers. /// /// ```swift -/// struct LLMLocalChatTestView: View { +/// struct LLMChatTestView: View { +/// // Use the convenience property wrapper to instantiate the `LLMMockSession` +/// @LLMSessionProvider(schema: LLMMockSchema()) var llm: LLMMockSession +/// @State var muted = true +/// /// var body: some View { -/// LLMChatView( -/// model: LLMMock() -/// ) +/// LLMChatView(session: $llm) +/// .speak(llm.context, muted: muted) +/// .speechToolbarButton(muted: $muted) /// } /// } /// ``` -public struct LLMChatView: View { - /// A ``LLMRunner`` is responsible for executing the ``LLM``. Must be configured via the Spezi `Configuration`. - @Environment(LLMRunner.self) private var runner - /// A SpeziLLM ``LLM`` that is used for the text generation within the chat view - @State private var model: any LLM - +public struct LLMChatView: View { + /// The LLM in execution, as defined by the ``LLMSchema``. + @Binding private var llm: Session /// Indicates if the input field is disabled. - @MainActor var inputDisabled: Bool { - model.state.representation == .processing + @MainActor private var inputDisabled: Bool { + llm.state.representation == .processing } + public var body: some View { ChatView( - $model.context, + $llm.context, disableInput: inputDisabled, - exportFormat: .pdf + exportFormat: .pdf, + messagePendingAnimation: .automatic ) - .onChange(of: model.context) { oldValue, newValue in - /// Once the user enters a message in the chat, send a request to the local LLM. + .viewStateAlert(state: llm.state) + .onChange(of: llm.context) { oldValue, newValue in + // Once the user enters a message in the chat, send a generation request to the LLM. if oldValue.count != newValue.count, let lastChat = newValue.last, lastChat.role == .user { Task { do { - let stream = try await runner(with: model).generate() + // Trigger an output generation based on the `LLMSession/context`. + let stream = try await llm.generate() for try await token in stream { - model.context.append(assistantOutput: token) + llm.context.append(assistantOutput: token) } } catch let error as LLMError { - model.state = .error(error: error) + llm.state = .error(error: error) } catch { - model.state = .error(error: LLMRunnerError.setupError) + llm.state = .error(error: LLMDefaultError.unknown(error)) } } } } - .viewStateAlert(state: model.state) } - /// Creates a ``LLMChatView`` that provides developers with a basic chat view towards a SpeziLLM ``LLM``. + /// Creates a ``LLMChatView`` with a `Binding` of a ``LLMSession`` that provides developers with a basic chat view to interact with a Spezi LLM. /// /// - Parameters: - /// - model: The SpeziLLM ``LLM`` that should be used for the text generation. - public init( - model: any LLM - ) { - self._model = State(wrappedValue: model) + /// - model: A `Binding` of a ``LLMSession`` that contains the ready-to-use LLM to generate outputs based on user input. + public init(session: Binding) { + self._llm = session } } +#if DEBUG #Preview { - LLMChatView( - model: LLMMock() - ) + @State var llm = LLMMockSession(.init(), schema: .init()) + + + return NavigationStack { + LLMChatView(session: $llm) + .speak(llm.context, muted: true) + .speechToolbarButton(muted: .constant(true)) + .previewWith { + LLMRunner { + LLMMockPlatform() + } + } + } } +#endif diff --git a/Sources/SpeziLLM/Views/LLMChatViewDisabledModifier.swift b/Sources/SpeziLLM/Views/LLMChatViewDisabledModifier.swift new file mode 100644 index 0000000..c2176ab --- /dev/null +++ b/Sources/SpeziLLM/Views/LLMChatViewDisabledModifier.swift @@ -0,0 +1,62 @@ +// +// This source file is part of the Stanford Spezi open source project +// +// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) +// +// SPDX-License-Identifier: MIT +// + +import SwiftUI + + +/// The underlying `ViewModifier` of `View/disabled(if:)`. +private struct LLMChatViewDisabledModifier: ViewModifier { + let llm: L? + + + func body(content: Content) -> some View { + content + .disabled(llm == nil) + } +} + + +extension View { + /// Disables the content block this modifier is attached to. + /// + /// Based on the optionality of the passed `LLMSession`, the content block this modifier is attached to is automatically disabled if the ``LLMSession`` is `nil`. + /// + /// ### Usage + /// + /// The code example below showcases how to use the `View/disabled(if:)` modifier to disable content based on the state of the ``LLMSession``. + /// + /// ```swift + /// struct LLMDemoView: View { + /// @Environment(LLMRunner.self) var runner + /// @State var llmSession: LLMMockSession? + /// + /// var body: some View { + /// VStack { + /// Button { + /// // ... + /// } label: { + /// Text("Start LLM inference") + /// } + /// .disabled(if: llmSession) + /// + /// Text(responseText) + /// } + /// .task { + /// self.llmSession = runner(with: LLMMockSchema()) + /// } + /// } + /// } + /// ``` + public func disabled(if llm: L?) -> some View { + modifier( + LLMChatViewDisabledModifier( + llm: llm + ) + ) + } +} diff --git a/Sources/SpeziLLM/Views/LLMChatViewSchema.swift b/Sources/SpeziLLM/Views/LLMChatViewSchema.swift new file mode 100644 index 0000000..7ec1a6f --- /dev/null +++ b/Sources/SpeziLLM/Views/LLMChatViewSchema.swift @@ -0,0 +1,65 @@ +// +// This source file is part of the Stanford Spezi open source project +// +// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) +// +// SPDX-License-Identifier: MIT +// + +import SwiftUI + + +/// Chat view that enables users to interact with an LLM based on an ``LLMSchema``. +/// +/// The ``LLMChatViewSchema`` takes an ``LLMSchema`` instance as parameter within the ``LLMChatViewSchema/init(with:)``. +/// The ``LLMSchema`` defines the type and properties of the LLM that will be used by the ``LLMChatViewSchema`` to generate responses to user prompts. +/// +/// - Tip: The ``LLMChatViewSchema`` is a convenience abstraction of the ``LLMChatView``. Refer to ``LLMChatView`` for more details. +/// +/// - Tip: With the ``LLMChatViewSchema``, the developer doesn't have access to the underlying ``LLMSession`` that contains the ``LLMSession/context`` and ``LLMSession/state``. If access to these properties is required, please use the ``LLMChatView``. +/// +/// ### Usage +/// +/// An example usage of the ``LLMChatViewSchema`` with an ``LLMSchema`` can be seen in the following example. +/// The example uses the ``LLMMockSchema`` to generate responses to user prompts. +/// +/// ```swift +/// struct LLMLocalChatSchemaView: View { +/// var body: some View { +/// LLMChatViewSchema( +/// with: LLMMockSchema() +/// ) +/// } +/// } +/// ``` +public struct LLMChatViewSchema: View { + @LLMSessionProvider var llm: Schema.Platform.Session + + + public var body: some View { + LLMChatView(session: $llm) + } + + + /// Creates a ``LLMChatViewSchema`` with an ``LLMSchema`` that provides developers with a basic chat view to interact with a Spezi LLM. + /// + /// - Parameters: + /// - schema: The ``LLMSchema`` that defines the to-be-used LLM to generate outputs based on user input. + public init(with schema: Schema) { + self._llm = LLMSessionProvider(schema: schema) + } +} + + +#if DEBUG +#Preview { + NavigationStack { + LLMChatViewSchema(with: LLMMockSchema()) + .previewWith { + LLMRunner { + LLMMockPlatform() + } + } + } +} +#endif diff --git a/Sources/SpeziLLMLocal/Configuration/LLMLocalParameters.swift b/Sources/SpeziLLMLocal/Configuration/LLMLocalParameters.swift index 0e4be75..9c5233b 100644 --- a/Sources/SpeziLLMLocal/Configuration/LLMLocalParameters.swift +++ b/Sources/SpeziLLMLocal/Configuration/LLMLocalParameters.swift @@ -28,10 +28,10 @@ public struct LLMLocalParameters: Sendable { /// The to-be-used system prompt of the LLM - let systemPrompt: String - /// Indicates the maximum output length generated by the ``LLM``. + let systemPrompt: String? + /// Indicates the maximum output length generated by the LLM. let maxOutputLength: Int - /// Indicates whether the BOS token is added by the ``LLM``. If `nil`, the default from the model itself is taken. + /// Indicates whether the BOS token is added by the LLM. If `nil`, the default from the model itself is taken. let addBosToken: Bool @@ -131,8 +131,8 @@ public struct LLMLocalParameters: Sendable { /// /// - Parameters: /// - systemPrompt: The to-be-used system prompt of the LLM enabling fine-tuning of the LLMs behaviour. Defaults to the regular default chat-based LLM system prompt. - /// - maxOutputLength: The maximum output length generated by the Spezi `LLM`, defaults to `512`. - /// - addBosToken: Indicates wether the BOS token is added by the Spezi `LLM`, defaults to `false`. + /// - maxOutputLength: The maximum output length generated by the Spezi LLM, defaults to `512`. + /// - addBosToken: Indicates wether the BOS token is added by the Spezi LLM, defaults to `false`. /// - gpuLayerCount: Number of layers to store in VRAM, defaults to `1`, meaning Apple's `Metal` framework is enabled. /// - mainGpu: GPU that is used for scratch and small tensors, defaults to `0` representing the main GPU. /// - tensorSplit: Split layers across multiple GPUs, defaults to `nil`, meaning no split. @@ -142,7 +142,7 @@ public struct LLMLocalParameters: Sendable { /// - useMmap: Indicates if mmap should be used., defaults to `true`. /// - useMlock: Forces the system to keep model in RAM, defaults to `false`. public init( - systemPrompt: String = Defaults.defaultSystemPrompt, + systemPrompt: String? = Defaults.defaultSystemPrompt, maxOutputLength: Int = 512, addBosToken: Bool = false, gpuLayerCount: Int32 = 1, diff --git a/Sources/SpeziLLM/Configuration/LLMRunnerConfiguration.swift b/Sources/SpeziLLMLocal/Configuration/LLMLocalPlatformConfiguration.swift similarity index 73% rename from Sources/SpeziLLM/Configuration/LLMRunnerConfiguration.swift rename to Sources/SpeziLLMLocal/Configuration/LLMLocalPlatformConfiguration.swift index 9bfcb71..34cc02c 100644 --- a/Sources/SpeziLLM/Configuration/LLMRunnerConfiguration.swift +++ b/Sources/SpeziLLMLocal/Configuration/LLMLocalPlatformConfiguration.swift @@ -8,15 +8,16 @@ import Foundation -/// Represents the configuration of the Spezi ``LLMRunner``. -public struct LLMRunnerConfiguration: Sendable { + +/// Represents the configuration of the Spezi ``LLMLocalPlatform``. +public struct LLMLocalPlatformConfiguration: Sendable { /// The task priority of the initiated LLM inference tasks. - public let taskPriority: TaskPriority + let taskPriority: TaskPriority /// Indicates if this is a device with non-unified memory access. - public let nonUniformMemoryAccess: Bool + let nonUniformMemoryAccess: Bool - /// Creates the ``LLMRunnerConfiguration`` which configures the Spezi ``LLMRunner``. + /// Creates the ``LLMLocalPlatformConfiguration`` which configures the Spezi ``LLMLocalPlatform``. /// /// - Parameters: /// - taskPriority: The task priority of the initiated LLM inference tasks, defaults to `.userInitiated`. diff --git a/Sources/SpeziLLMLocal/LLMLocal.swift b/Sources/SpeziLLMLocal/LLMLocal.swift deleted file mode 100644 index 5e86adf..0000000 --- a/Sources/SpeziLLMLocal/LLMLocal.swift +++ /dev/null @@ -1,121 +0,0 @@ -// -// This source file is part of the Stanford Spezi open source project -// -// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) -// -// SPDX-License-Identifier: MIT -// - -import Foundation -import llama -import os -import SpeziChat -import SpeziLLM - - -/// Enables the local execution of Spezi `LLM`s. -/// -/// The ``LLMLocal`` is a Spezi `LLM` and utilizes the [llama.cpp library](https://github.com/ggerganov/llama.cpp) to locally execute a large language model on-device. -/// The main properties of the ``LLMLocal`` are ``LLMLocal/context`` and ``LLMLocal/state``. -/// Use these properties to access the conversational history of the `LLM` as well as the current generation state. -/// -/// - Important: ``LLMLocal`` shouldn't be used on it's own but always wrapped by the Spezi `LLMRunner` as the runner handles -/// all management overhead tasks. A code example on how to use ``LLMLocal`` in combination with the `LLMRunner` can be -/// found in the documentation of the `LLMRunner`. -/// -/// - Important: In order to use the LLM local target, one needs to set build parameters in the consuming Xcode project or the consuming SPM package to enable the [Swift / C++ Interop](https://www.swift.org/documentation/cxx-interop/), -/// introduced in Xcode 15 and Swift 5.9. Please refer to for more information. -@Observable -public class LLMLocal: LLM { - /// A Swift Logger that logs important information from the ``LLMLocal``. - static let logger = Logger(subsystem: "edu.stanford.spezi", category: "SpeziLLM") - public let type: LLMHostingType = .local - @MainActor public var state: LLMState = .uninitialized - @MainActor public var context: Chat = [] - - /// Parameters of the llama.cpp ``LLM``. - let parameters: LLMLocalParameters - /// Context parameters of the llama.cpp ``LLM``. - let contextParameters: LLMLocalContextParameters - /// Sampling parameters of the llama.cpp ``LLM``. - let samplingParameters: LLMLocalSamplingParameters - /// Closure to properly format the ``LLMLocal/context`` to a `String` which is tokenized and passed to the `LLM`. - let formatChat: ((Chat) throws -> String) - /// The on-device `URL` where the model is located. - private let modelPath: URL - /// A pointer to the allocated model via llama.cpp. - @ObservationIgnored var model: OpaquePointer? - /// A pointer to the allocated model context from llama.cpp. - @ObservationIgnored var modelContext: OpaquePointer? - - - /// Creates a ``LLMLocal`` instance that can then be passed to the `LLMRunner` for execution. - /// - /// - Parameters: - /// - modelPath: A local `URL` where the LLM file is stored. The format of the LLM must be in the llama.cpp `.gguf` format. - /// - parameters: Parameterize the ``LLMLocal`` via ``LLMLocalParameters``. - /// - contextParameters: Configure the context of the ``LLMLocal`` via ``LLMLocalContextParameters``. - /// - samplingParameters: Parameterize the sampling methods of the ``LLMLocal`` via ``LLMLocalSamplingParameters``. - /// - formatChat: Closure to properly format the ``LLMLocal/context`` to a `String` which is tokenized and passed to the `LLM`, defaults to Llama2 prompt format. - public init( - modelPath: URL, - parameters: LLMLocalParameters = .init(), - contextParameters: LLMLocalContextParameters = .init(), - samplingParameters: LLMLocalSamplingParameters = .init(), - formatChat: @escaping ((Chat) throws -> String) = PromptFormattingDefaults.llama2 - ) { - self.modelPath = modelPath - self.parameters = parameters - self.contextParameters = contextParameters - self.samplingParameters = samplingParameters - self.formatChat = formatChat - Task { @MainActor in - self.context.append(systemMessage: parameters.systemPrompt) - } - } - - - public func setup(runnerConfig: LLMRunnerConfiguration) async throws { - Self.logger.debug("SpeziLLMLocal: Local LLM is being initialized") - await MainActor.run { - self.state = .loading - } - - guard let model = llama_load_model_from_file(modelPath.path().cString(using: .utf8), parameters.llamaCppRepresentation) else { - Self.logger.error("SpeziLLMLocal: Local LLM file could not be opened, indicating that the model file doesn't exist") - await MainActor.run { - self.state = .error(error: LLMLocalError.modelNotFound) - } - throw LLMLocalError.modelNotFound - } - - /// Check if model was trained for the configured context window size - guard self.contextParameters.contextWindowSize <= llama_n_ctx_train(model) else { - Self.logger.error("SpeziLLMLocal: Model was trained on only \(llama_n_ctx_train(model), privacy: .public) context tokens, not the configured \(self.contextParameters.contextWindowSize, privacy: .public) context tokens") - await MainActor.run { - self.state = .error(error: LLMLocalError.contextSizeMismatch) - } - throw LLMLocalError.contextSizeMismatch - } - - self.model = model - - await MainActor.run { - self.state = .ready - } - Self.logger.debug("SpeziLLMLocal: Local LLM finished initializing, now ready to use") - } - - public func generate(continuation: AsyncThrowingStream.Continuation) async { - Self.logger.debug("SpeziLLMLocal: Local LLM started a new inference") - await _generate(continuation: continuation) - Self.logger.debug("SpeziLLMLocal: Local LLM completed an inference") - } - - - /// Upon deinit, free the context and the model via llama.cpp - deinit { - llama_free(self.modelContext) - llama_free_model(self.model) - } -} diff --git a/Sources/SpeziLLMLocal/LLMLocalError.swift b/Sources/SpeziLLMLocal/LLMLocalError.swift index 128ff41..4f3b96a 100644 --- a/Sources/SpeziLLMLocal/LLMLocalError.swift +++ b/Sources/SpeziLLMLocal/LLMLocalError.swift @@ -10,11 +10,11 @@ import Foundation import SpeziLLM -/// Describes possible errors that occur during the execution of ``LLMLocal`` via the SpeziLLM `LLMRunner`. +/// Describes possible errors that occur during the execution of ``LLMLocalSession`` via the SpeziLLM `LLMRunner`. public enum LLMLocalError: LLMError { /// Indicates that the local model file is not found. case modelNotFound - /// Indicates that the ``LLMLocal`` is not yet ready, e.g., not initialized. + /// Indicates that the ``LLMLocalSession`` is not yet ready, e.g., not initialized. case modelNotReadyYet /// Indicates that during generation an error occurred. case generationError diff --git a/Sources/SpeziLLMLocal/LLMLocalPlatform.swift b/Sources/SpeziLLMLocal/LLMLocalPlatform.swift new file mode 100644 index 0000000..51d122d --- /dev/null +++ b/Sources/SpeziLLMLocal/LLMLocalPlatform.swift @@ -0,0 +1,91 @@ +// +// This source file is part of the Stanford Spezi open source project +// +// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) +// +// SPDX-License-Identifier: MIT +// + +import Foundation +import llama +import Semaphore +import Spezi +import SpeziLLM + + +/// LLM execution platform of an ``LLMLocalSchema``. +/// +/// The ``LLMLocalPlatform`` turns a received ``LLMLocalSchema`` to an executable ``LLMLocalSession``. +/// Use ``LLMLocalPlatform/callAsFunction(with:)`` with an ``LLMLocalSchema`` parameter to get an executable ``LLMLocalSession`` that does the actual inference. +/// +/// - Important: ``LLMLocalPlatform`` shouldn't be used directly but used via the `SpeziLLM` `LLMRunner` that delegates the requests towards the ``LLMLocalPlatform``. +/// The `SpeziLLM` `LLMRunner` must be configured with the ``LLMLocalPlatform`` within the Spezi `Configuration`. +/// +/// - Tip: For more information, refer to the documentation of the `LLMPlatform` from SpeziLLM. +/// +/// ### Usage +/// +/// The example below demonstrates the setup of the ``LLMLocalPlatform`` within the Spezi `Configuration`. +/// +/// ```swift +/// class TestAppDelegate: SpeziAppDelegate { +/// override var configuration: Configuration { +/// Configuration { +/// LLMRunner { +/// LLMLocalPlatform() +/// } +/// } +/// } +/// } +/// ``` +public actor LLMLocalPlatform: LLMPlatform, DefaultInitializable { + /// Enforce only one concurrent execution of a local LLM. + private let semaphore = AsyncSemaphore(value: 1) + let configuration: LLMLocalPlatformConfiguration + + @MainActor public var state: LLMPlatformState = .idle + + + /// Creates an instance of the ``LLMLocalPlatform``. + /// + /// - Parameters: + /// - configuration: The configuration of the platform. + public init(configuration: LLMLocalPlatformConfiguration) { + self.configuration = configuration + } + + /// Convenience initializer for the ``LLMLocalPlatform``. + public init() { + self.init(configuration: .init()) + } + + + public nonisolated func configure() { + // Initialize the llama.cpp backend + llama_backend_init(configuration.nonUniformMemoryAccess) + } + + nonisolated public func callAsFunction(with llmSchema: LLMLocalSchema) -> LLMLocalSession { + LLMLocalSession(self, schema: llmSchema) + } + + nonisolated func exclusiveAccess() async throws { + try await semaphore.waitUnlessCancelled() + await MainActor.run { + state = .processing + } + } + + nonisolated func signal() async { + semaphore.signal() + await MainActor.run { + state = .idle + } + } + + + deinit { + // Frees the llama.cpp backend + llama_backend_free() + } +} diff --git a/Sources/SpeziLLMLocal/LLMLocalRunnerSetupTask.swift b/Sources/SpeziLLMLocal/LLMLocalRunnerSetupTask.swift deleted file mode 100644 index c650c5e..0000000 --- a/Sources/SpeziLLMLocal/LLMLocalRunnerSetupTask.swift +++ /dev/null @@ -1,46 +0,0 @@ -// -// This source file is part of the Stanford Spezi open source project -// -// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) -// -// SPDX-License-Identifier: MIT -// - -import llama -import Spezi -import SpeziLLM - - -/// Sets up the environment in order to locally execute Spezi `LLM`s. -/// -/// The ``LLMLocalRunnerSetupTask`` needs to be stated within the `LLMRunner` initializer in the Spezi `Configuration`. -/// -/// ```swift -/// class LocalLLMAppDelegate: SpeziAppDelegate { -/// override var configuration: Configuration { -/// Configuration { -/// // Configure the runner responsible for executing local LLMs -/// LLMRunner { -/// LLMLocalRunnerSetupTask() -/// } -/// } -/// } -/// } -public class LLMLocalRunnerSetupTask: LLMRunnerSetupTask, DefaultInitializable { - public let type: LLMHostingType = .local - - - public required init() { } - - - public func setupRunner(runnerConfig: LLMRunnerConfiguration) async throws { - /// Initialize the llama.cpp backend. - llama_backend_init(runnerConfig.nonUniformMemoryAccess) - } - - - deinit { - /// Frees the llama.cpp backend. - llama_backend_free() - } -} diff --git a/Sources/SpeziLLMLocal/LLMLocal+PromptFormatting.swift b/Sources/SpeziLLMLocal/LLMLocalSchema+PromptFormatting.swift similarity index 96% rename from Sources/SpeziLLMLocal/LLMLocal+PromptFormatting.swift rename to Sources/SpeziLLMLocal/LLMLocalSchema+PromptFormatting.swift index 6040495..9de9400 100644 --- a/Sources/SpeziLLMLocal/LLMLocal+PromptFormatting.swift +++ b/Sources/SpeziLLMLocal/LLMLocalSchema+PromptFormatting.swift @@ -9,11 +9,11 @@ import SpeziChat -extension LLMLocal { +extension LLMLocalSchema { /// Holds default prompt formatting strategies for [Llama2](https://ai.meta.com/llama/) as well as [Phi-2](https://www.microsoft.com/en-us/research/blog/phi-2-the-surprising-power-of-small-language-models/) models. public enum PromptFormattingDefaults { /// Prompt formatting closure for the [Llama2](https://ai.meta.com/llama/) model - public static let llama2: ((Chat) throws -> String) = { chat in + public static let llama2: (@Sendable (Chat) throws -> String) = { chat in /// BOS token of the LLM, used at the start of each prompt passage. let BOS = "" /// EOS token of the LLM, used at the end of each prompt passage. @@ -77,7 +77,7 @@ extension LLMLocal { } /// Prompt formatting closure for the [Phi-2](https://www.microsoft.com/en-us/research/blog/phi-2-the-surprising-power-of-small-language-models/) model - public static let phi2: ((Chat) throws -> String) = { chat in + public static let phi2: (@Sendable (Chat) throws -> String) = { chat in // Ensure that system prompt as well as a first user prompt exist guard let systemPrompt = chat.first, systemPrompt.role == .system, diff --git a/Sources/SpeziLLMLocal/LLMLocalSchema.swift b/Sources/SpeziLLMLocal/LLMLocalSchema.swift new file mode 100644 index 0000000..1909180 --- /dev/null +++ b/Sources/SpeziLLMLocal/LLMLocalSchema.swift @@ -0,0 +1,61 @@ +// +// This source file is part of the Stanford Spezi open source project +// +// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) +// +// SPDX-License-Identifier: MIT +// + +import Foundation +import SpeziChat +import SpeziLLM + + +/// Defines the type and configuration of the ``LLMLocalSession``. +/// +/// The ``LLMLocalSchema`` is used as a configuration for the to-be-used local LLM. It contains all information necessary for the creation of an executable ``LLMLocalSession``. +/// It is bound to a ``LLMLocalPlatform`` that is responsible for turning the ``LLMLocalSchema`` to an ``LLMLocalSession``. +/// +/// - Tip: For more information, refer to the documentation of the `LLMSchema` from SpeziLLM. +public struct LLMLocalSchema: LLMSchema { + public typealias Platform = LLMLocalPlatform + + + /// The on-device `URL` where the model is located. + let modelPath: URL + /// Parameters of the llama.cpp LLM. + let parameters: LLMLocalParameters + /// Context parameters of the llama.cpp LLM. + let contextParameters: LLMLocalContextParameters + /// Sampling parameters of the llama.cpp LLM. + let samplingParameters: LLMLocalSamplingParameters + /// Closure to properly format the ``LLMLocal/context`` to a `String` which is tokenized and passed to the LLM. + let formatChat: (@Sendable (Chat) throws -> String) + public let injectIntoContext: Bool + + + /// Creates an instance of the ``LLMLocalSchema`` containing all necessary configuration for local LLM inference. + /// + /// - Parameters: + /// - modelPath: A local `URL` where the LLM file is stored. The format of the LLM must be in the llama.cpp `.gguf` format. + /// - parameters: Parameterize the LLM via ``LLMLocalParameters``. + /// - contextParameters: Configure the context of the LLM via ``LLMLocalContextParameters``. + /// - samplingParameters: Parameterize the sampling methods of the LLM via ``LLMLocalSamplingParameters``. + /// - injectIntoContext: Indicates if the inference output by the ``LLMLocalSession`` should automatically be inserted into the ``LLMLocalSession/context``, defaults to false. + /// - formatChat: Closure to properly format the ``LLMLocalSession/context`` to a `String` which is tokenized and passed to the LLM, defaults to Llama2 prompt format. + public init( + modelPath: URL, + parameters: LLMLocalParameters = .init(), + contextParameters: LLMLocalContextParameters = .init(), + samplingParameters: LLMLocalSamplingParameters = .init(), + injectIntoContext: Bool = false, + formatChat: @escaping (@Sendable (Chat) throws -> String) = PromptFormattingDefaults.llama2 + ) { + self.modelPath = modelPath + self.parameters = parameters + self.contextParameters = contextParameters + self.samplingParameters = samplingParameters + self.injectIntoContext = injectIntoContext + self.formatChat = formatChat + } +} diff --git a/Sources/SpeziLLMLocal/LLMLocal+Generation.swift b/Sources/SpeziLLMLocal/LLMLocalSession+Generation.swift similarity index 73% rename from Sources/SpeziLLMLocal/LLMLocal+Generation.swift rename to Sources/SpeziLLMLocal/LLMLocalSession+Generation.swift index 5985c7e..2aac5ea 100644 --- a/Sources/SpeziLLMLocal/LLMLocal+Generation.swift +++ b/Sources/SpeziLLMLocal/LLMLocalSession+Generation.swift @@ -11,8 +11,8 @@ import llama import SpeziLLM -/// Extension of ``LLMLocal`` handling the text generation. -extension LLMLocal { +/// Extension of ``LLMLocalSession`` handling the text generation. +extension LLMLocalSession { /// Typealias for the llama.cpp `llama_token`. typealias LLMLocalToken = llama_token @@ -24,16 +24,18 @@ extension LLMLocal { func _generate( // swiftlint:disable:this identifier_name function_body_length cyclomatic_complexity continuation: AsyncThrowingStream.Continuation ) async { + Self.logger.debug("SpeziLLMLocal: Local LLM started a new inference") + await MainActor.run { self.state = .generating } // Log the most important parameters of the LLM - Self.logger.debug("SpeziLLMLocal: n_length = \(self.parameters.maxOutputLength, privacy: .public), n_ctx = \(self.contextParameters.contextWindowSize, privacy: .public), n_batch = \(self.contextParameters.batchSize, privacy: .public), n_kv_req = \(self.parameters.maxOutputLength, privacy: .public)") + Self.logger.debug("SpeziLLMLocal: n_length = \(self.schema.parameters.maxOutputLength, privacy: .public), n_ctx = \(self.schema.contextParameters.contextWindowSize, privacy: .public), n_batch = \(self.schema.contextParameters.batchSize, privacy: .public), n_kv_req = \(self.schema.parameters.maxOutputLength, privacy: .public)") // Allocate new model context, if not already present if self.modelContext == nil { - guard let context = llama_new_context_with_model(model, self.contextParameters.llamaCppRepresentation) else { + guard let context = llama_new_context_with_model(model, schema.contextParameters.llamaCppRepresentation) else { Self.logger.error("SpeziLLMLocal: Failed to initialize context") await finishGenerationWithError(LLMLocalError.generationError, on: continuation) return @@ -42,13 +44,13 @@ extension LLMLocal { } // Check if the maximal output generation length is smaller or equals to the context window size. - guard self.parameters.maxOutputLength <= self.contextParameters.contextWindowSize else { - Self.logger.error("SpeziLLMLocal: Error: n_kv_req \(self.parameters.maxOutputLength, privacy: .public) > n_ctx, the required KV cache size is not big enough") + guard schema.parameters.maxOutputLength <= schema.contextParameters.contextWindowSize else { + Self.logger.error("SpeziLLMLocal: Error: n_kv_req \(self.schema.parameters.maxOutputLength, privacy: .public) > n_ctx, the required KV cache size is not big enough") await finishGenerationWithError(LLMLocalError.generationError, on: continuation) return } - // Tokenizes the entire context of the `LLM` + // Tokenizes the entire context of the LLM guard let tokens = try? await tokenize() else { Self.logger.error(""" SpeziLLMLocal: Tokenization failed as illegal context exists. @@ -59,11 +61,15 @@ extension LLMLocal { return } + guard await !checkCancellation(on: continuation) else { + return + } + // Check if the input token count is smaller than the context window size decremented by 4 (space for end tokens). - guard tokens.count <= self.contextParameters.contextWindowSize - 4 else { + guard tokens.count <= schema.contextParameters.contextWindowSize - 4 else { Self.logger.error(""" SpeziLLMLocal: Input prompt is too long with \(tokens.count, privacy: .public) tokens for the configured - context window size of \(self.contextParameters.contextWindowSize, privacy: .public) tokens. + context window size of \(self.schema.contextParameters.contextWindowSize, privacy: .public) tokens. """) await finishGenerationWithError(LLMLocalError.generationError, on: continuation) return @@ -84,6 +90,10 @@ extension LLMLocal { // llama_decode will output logits only for the last token of the prompt batch.logits[Int(batch.n_tokens) - 1] = 1 + guard await !checkCancellation(on: continuation) else { + return + } + if llama_decode(self.modelContext, batch) != 0 { Self.logger.error(""" SpeziLLMLocal: Initial prompt decoding as failed! @@ -92,6 +102,10 @@ extension LLMLocal { return } + guard await !checkCancellation(on: continuation) else { + return + } + // Batch already includes tokens from the input prompt var batchTokenIndex = batch.n_tokens var decodedTokens = 0 @@ -99,13 +113,17 @@ extension LLMLocal { // Calculate the token generation rate let startTime = Date() - while decodedTokens <= self.parameters.maxOutputLength { + while decodedTokens <= schema.parameters.maxOutputLength { + guard await !checkCancellation(on: continuation) else { + return + } + let nextTokenId = sample(batchSize: batch.n_tokens) // Either finish the generation once EOS token appears, the maximum output length of the answer is reached or the context window is reached if nextTokenId == llama_token_eos(self.model) - || decodedTokens == self.parameters.maxOutputLength - || batchTokenIndex == self.contextParameters.contextWindowSize { + || decodedTokens == schema.parameters.maxOutputLength + || batchTokenIndex == schema.contextParameters.contextWindowSize { continuation.finish() await MainActor.run { self.state = .ready @@ -123,6 +141,15 @@ extension LLMLocal { Self.logger.debug(""" SpeziLLMLocal: Yielded token: \(nextStringPiece, privacy: .public) """) + + // Automatically inject the yielded string piece into the `LLMLocal/context` + if schema.injectIntoContext { + let nextStringPiece = nextStringPiece + await MainActor.run { + context.append(assistantOutput: nextStringPiece) + } + } + continuation.yield(nextStringPiece) // Prepare the next batch @@ -148,10 +175,13 @@ extension LLMLocal { Self.logger.debug("SpeziLLMLocal: Decoded \(decodedTokens, privacy: .public) tokens in \(String(format: "%.2f", elapsedTime), privacy: .public) s, speed: \(String(format: "%.2f", Double(decodedTokens) / elapsedTime), privacy: .public)) t/s") llama_print_timings(self.modelContext) - + continuation.finish() await MainActor.run { + context.completeAssistantStreaming() self.state = .ready } + + Self.logger.debug("SpeziLLMLocal: Local LLM completed an inference") } } diff --git a/Sources/SpeziLLMLocal/LLMLocalSession+PromptFormatting.swift b/Sources/SpeziLLMLocal/LLMLocalSession+PromptFormatting.swift new file mode 100644 index 0000000..f77fa16 --- /dev/null +++ b/Sources/SpeziLLMLocal/LLMLocalSession+PromptFormatting.swift @@ -0,0 +1,148 @@ +// +// This source file is part of the Stanford Spezi open source project +// +// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) +// +// SPDX-License-Identifier: MIT +// + +import SpeziChat + + +extension LLMLocalSession { + /// Holds default prompt formatting strategies for [Llama2](https://ai.meta.com/llama/) as well as [Phi-2](https://www.microsoft.com/en-us/research/blog/phi-2-the-surprising-power-of-small-language-models/) models. + public enum PromptFormattingDefaults { + /// Prompt formatting closure for the [Llama2](https://ai.meta.com/llama/) model + public static let llama2: ((Chat) throws -> String) = { chat in // swiftlint:disable:this closure_body_length + /// BOS token of the LLM, used at the start of each prompt passage. + let BOS = "" + /// EOS token of the LLM, used at the end of each prompt passage. + let EOS = "" + /// BOSYS token of the LLM, used at the start of the system prompt. + let BOSYS = "<>" + /// EOSYS token of the LLM, used at the end of the system prompt. + let EOSYS = "<>" + /// BOINST token of the LLM, used at the start of the instruction part of the prompt. + let BOINST = "[INST]" + /// EOINST token of the LLM, used at the end of the instruction part of the prompt. + let EOINST = "[/INST]" + + guard chat.first?.role == .system else { + throw LLMLocalError.illegalContext + } + + var systemPrompts: [String] = [] + var initialUserPrompt: String = "" + + for chatEntity in chat { + if chatEntity.role != .system { + if chatEntity.role == .user { + initialUserPrompt = chatEntity.content + break + } else { + throw LLMLocalError.illegalContext + } + } + + systemPrompts.append(chatEntity.content) + } + + /// Build the initial Llama2 prompt structure + /// + /// A template of the prompt structure looks like: + /// """ + /// [INST] <> + /// {your_system_prompt} + /// <> + /// + /// {user_message_1} [/INST] + /// """ + var prompt = """ + \(BOS)\(BOINST) \(BOSYS) + \(systemPrompts.joined(separator: " ")) + \(EOSYS) + + \(initialUserPrompt) \(EOINST) + """ + " " // Add a spacer to the generated output from the model + + for chatEntry in chat.dropFirst(2) { + if chatEntry.role == .assistant { + /// Append response from assistant to the Llama2 prompt structure + /// + /// A template for appending an assistant response to the overall prompt looks like: + /// {user_message_1} [/INST]){model_reply_1} + prompt += """ + \(chatEntry.content)\(EOS) + """ + } else if chatEntry.role == .user { + /// Append response from user to the Llama2 prompt structure + /// + /// A template for appending an assistant response to the overall prompt looks like: + /// [INST] {user_message_2} [/INST] + prompt += """ + \(BOS)\(BOINST) \(chatEntry.content) \(EOINST) + """ + " " // Add a spacer to the generated output from the model + } + } + + return prompt + } + + /// Prompt formatting closure for the [Phi-2](https://www.microsoft.com/en-us/research/blog/phi-2-the-surprising-power-of-small-language-models/) model + public static let phi2: ((Chat) throws -> String) = { chat in + guard chat.first?.role == .system else { + throw LLMLocalError.illegalContext + } + + var systemPrompts: [String] = [] + var initialUserPrompt: String = "" + + for chatEntity in chat { + if chatEntity.role != .system { + if chatEntity.role == .user { + initialUserPrompt = chatEntity.content + break + } else { + throw LLMLocalError.illegalContext + } + } + + systemPrompts.append(chatEntity.content) + } + + /// Build the initial Phi-2 prompt structure + /// + /// A template of the prompt structure looks like: + /// """ + /// System: {your_system_prompt} + /// Instruct: {model_reply_1} + /// Output: {model_reply_1} + /// """ + var prompt = """ + System: \(systemPrompts.joined(separator: " ")) + Instruct: \(initialUserPrompt)\n + """ + + for chatEntry in chat.dropFirst(2) { + if chatEntry.role == .assistant { + /// Append response from assistant to the Phi-2 prompt structure + prompt += """ + Output: \(chatEntry.content)\n + """ + } else if chatEntry.role == .user { + /// Append response from assistant to the Phi-2 prompt structure + prompt += """ + Instruct: \(chatEntry.content)\n + """ + } + } + + /// Model starts responding after + if chat.last?.role == .user { + prompt += "Output: " + } + + return prompt + } + } +} diff --git a/Sources/SpeziLLMLocal/LLMLocal+Sampling.swift b/Sources/SpeziLLMLocal/LLMLocalSession+Sampling.swift similarity index 62% rename from Sources/SpeziLLMLocal/LLMLocal+Sampling.swift rename to Sources/SpeziLLMLocal/LLMLocalSession+Sampling.swift index b1b0aaf..942e282 100644 --- a/Sources/SpeziLLMLocal/LLMLocal+Sampling.swift +++ b/Sources/SpeziLLMLocal/LLMLocalSession+Sampling.swift @@ -10,7 +10,7 @@ import Foundation import llama -extension LLMLocal { +extension LLMLocalSession { /// Based on the current state of the context, sample the to be inferred output via the temperature method /// /// - Parameters: @@ -33,14 +33,14 @@ extension LLMLocal { ) // Sample via the temperature method - let minKeep = Int(max(1, self.samplingParameters.outputProbabilities)) - llama_sample_top_k(self.modelContext, &candidatesP, self.samplingParameters.topK, minKeep) - llama_sample_tail_free(self.modelContext, &candidatesP, self.samplingParameters.tfs, minKeep) - llama_sample_typical(self.modelContext, &candidatesP, self.samplingParameters.typicalP, minKeep) - llama_sample_top_p(self.modelContext, &candidatesP, self.samplingParameters.topP, minKeep) - llama_sample_min_p(self.modelContext, &candidatesP, self.samplingParameters.minP, minKeep) - llama_sample_temp(self.modelContext, &candidatesP, self.samplingParameters.temperature) + let minKeep = Int(max(1, schema.samplingParameters.outputProbabilities)) + llama_sample_top_k(modelContext, &candidatesP, schema.samplingParameters.topK, minKeep) + llama_sample_tail_free(modelContext, &candidatesP, schema.samplingParameters.tfs, minKeep) + llama_sample_typical(modelContext, &candidatesP, schema.samplingParameters.typicalP, minKeep) + llama_sample_top_p(modelContext, &candidatesP, schema.samplingParameters.topP, minKeep) + llama_sample_min_p(modelContext, &candidatesP, schema.samplingParameters.minP, minKeep) + llama_sample_temp(modelContext, &candidatesP, schema.samplingParameters.temperature) - return llama_sample_token(self.modelContext, &candidatesP) + return llama_sample_token(modelContext, &candidatesP) } } diff --git a/Sources/SpeziLLMLocal/LLMLocalSession+Setup.swift b/Sources/SpeziLLMLocal/LLMLocalSession+Setup.swift new file mode 100644 index 0000000..c60d279 --- /dev/null +++ b/Sources/SpeziLLMLocal/LLMLocalSession+Setup.swift @@ -0,0 +1,48 @@ +// +// This source file is part of the Stanford Spezi open source project +// +// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) +// +// SPDX-License-Identifier: MIT +// + +import llama + + +extension LLMLocalSession { + /// Set up the local LLM execution environment via llama.cpp + /// + /// - Parameters: + /// - continuation: A Swift `AsyncThrowingStream` that streams the generated output. + /// - Returns: `true` if the setup was successful, `false` otherwise. + func setup(continuation: AsyncThrowingStream.Continuation) async -> Bool { + Self.logger.debug("SpeziLLMLocal: Local LLM is being initialized") + await MainActor.run { + state = .loading + } + + guard let model = llama_load_model_from_file(schema.modelPath.path().cString(using: .utf8), schema.parameters.llamaCppRepresentation) else { + await finishGenerationWithError(LLMLocalError.modelNotFound, on: continuation) + Self.logger.error("SpeziLLMLocal: Local LLM file could not be opened, indicating that the model file doesn't exist") + return false + } + + /// Check if model was trained for the configured context window size + guard schema.contextParameters.contextWindowSize <= llama_n_ctx_train(model) else { + await finishGenerationWithError(LLMLocalError.contextSizeMismatch, on: continuation) + Self.logger.error(""" + SpeziLLMLocal: Model was trained on only \(llama_n_ctx_train(model), privacy: .public) context tokens, + not the configured \(self.schema.contextParameters.contextWindowSize, privacy: .public) context tokens + """) + return false + } + + self.model = model + + await MainActor.run { + state = .ready + } + Self.logger.debug("SpeziLLMLocal: Local LLM finished initializing, now ready to use") + return true + } +} diff --git a/Sources/SpeziLLMLocal/LLMLocal+Tokenization.swift b/Sources/SpeziLLMLocal/LLMLocalSession+Tokenization.swift similarity index 81% rename from Sources/SpeziLLMLocal/LLMLocal+Tokenization.swift rename to Sources/SpeziLLMLocal/LLMLocalSession+Tokenization.swift index fcec72e..b6c36d8 100644 --- a/Sources/SpeziLLMLocal/LLMLocal+Tokenization.swift +++ b/Sources/SpeziLLMLocal/LLMLocalSession+Tokenization.swift @@ -10,23 +10,23 @@ import Foundation import llama -/// Extension of ``LLMLocal`` handling the text tokenization. -extension LLMLocal { +/// Extension of ``LLMLocalSession`` handling the text tokenization. +extension LLMLocalSession { /// Converts the current context of the model to the individual `LLMLocalToken`'s based on the model's dictionary. /// This is a required tasks as LLMs internally processes tokens. /// /// - Returns: The tokenized `String` as `LLMLocalToken`'s. func tokenize() async throws -> [LLMLocalToken] { // Format the chat into a prompt that conforms to the prompt structure of the respective LLM - let formattedChat = try await formatChat(self.context) + let formattedChat = try await schema.formatChat(self.context) var tokens: [LLMLocalToken] = .init( - llama_tokenize_with_context(self.modelContext, std.string(formattedChat), self.parameters.addBosToken, true) + llama_tokenize_with_context(self.modelContext, std.string(formattedChat), schema.parameters.addBosToken, true) ) // Truncate tokens if there wouldn't be enough context size for the generated output - if tokens.count > Int(self.contextParameters.contextWindowSize) - self.parameters.maxOutputLength { - tokens = Array(tokens.suffix(Int(self.contextParameters.contextWindowSize) - self.parameters.maxOutputLength)) + if tokens.count > Int(schema.contextParameters.contextWindowSize) - schema.parameters.maxOutputLength { + tokens = Array(tokens.suffix(Int(schema.contextParameters.contextWindowSize) - schema.parameters.maxOutputLength)) } // Output generation shouldn't run without any tokens @@ -47,7 +47,7 @@ extension LLMLocal { /// - Parameters: /// - tokens: An array of `LLMLocalToken`s that should be detokenized. /// - Returns: An array of tupels of `LLMLocalToken`s as well as their `String` representation. - /// + /// /// - Note: Used only for debug purposes func detokenize(tokens: [LLMLocalToken]) -> [(LLMLocalToken, String)] { tokens.reduce(into: [(LLMLocalToken, String)]()) { partialResult, token in diff --git a/Sources/SpeziLLMLocal/LLMLocalSession.swift b/Sources/SpeziLLMLocal/LLMLocalSession.swift new file mode 100644 index 0000000..2c410fc --- /dev/null +++ b/Sources/SpeziLLMLocal/LLMLocalSession.swift @@ -0,0 +1,136 @@ +// +// This source file is part of the Stanford Spezi open source project +// +// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) +// +// SPDX-License-Identifier: MIT +// + +import Foundation +import os +import SpeziChat +import SpeziLLM + + +/// Represents an ``LLMLocalSchema`` in execution. +/// +/// The ``LLMLocalSession`` is the executable version of the local LLM containing context and state as defined by the ``LLMLocalSchema``. +/// It utilizes the [llama.cpp library](https://github.com/ggerganov/llama.cpp) to locally execute a large language model on-device. +/// +/// The inference is started by ``LLMLocalSession/generate()``, returning an `AsyncThrowingStream` and can be cancelled via ``LLMLocalSession/cancel()``. +/// The ``LLMLocalSession`` exposes its current state via the ``LLMLocalSession/context`` property, containing all the conversational history with the LLM. +/// +/// - Warning: The ``LLMLocalSession`` shouldn't be created manually but always through the ``LLMLocalPlatform`` via the `LLMRunner`. +/// +/// - Important: In order to use the LLM local target, one needs to set build parameters in the consuming Xcode project or the consuming SPM package to enable the [Swift / C++ Interop](https://www.swift.org/documentation/cxx-interop/), +/// introduced in Xcode 15 and Swift 5.9. Please refer to for more information. +/// +/// - Tip: For more information, refer to the documentation of the `LLMSession` from SpeziLLM. +/// +/// ### Usage +/// +/// The example below demonstrates a minimal usage of the ``LLMLocalSession`` via the `LLMRunner`. +/// +/// ```swift +/// struct LLMLocalDemoView: View { +/// @Environment(LLMRunner.self) var runner +/// @State var responseText = "" +/// +/// var body: some View { +/// Text(responseText) +/// .task { +/// // Instantiate the `LLMLocalSchema` to an `LLMLocalSession` via the `LLMRunner`. +/// let llmSession: LLMLocalSession = runner( +/// with: LLMLocalSchema( +/// modelPath: URL(string: "URL to the local model file")! +/// ) +/// ) +/// +/// for try await token in try await llmSession.generate() { +/// responseText.append(token) +/// } +/// } +/// } +/// } +/// ``` +@Observable +public final class LLMLocalSession: LLMSession, @unchecked Sendable { + /// A Swift Logger that logs important information from the ``LLMLocalSession``. + static let logger = Logger(subsystem: "edu.stanford.spezi", category: "SpeziLLMLocal") + + let platform: LLMLocalPlatform + let schema: LLMLocalSchema + + /// A task managing the ``LLMLocalSession`` output generation. + @ObservationIgnored private var task: Task<(), Never>? + + @MainActor public var state: LLMState = .uninitialized + @MainActor public var context: Chat = [] + + /// A pointer to the allocated model via llama.cpp. + @ObservationIgnored var model: OpaquePointer? + /// A pointer to the allocated model context from llama.cpp. + @ObservationIgnored var modelContext: OpaquePointer? + + + /// Creates an instance of a ``LLMLocalSession`` responsible for LLM inference. + /// Only the ``LLMLocalPlatform`` should create an instance of ``LLMLocalSession``. + /// + /// - Parameters: + /// - platform: Reference to the ``LLMLocalPlatform`` where the ``LLMLocalSession`` is running on. + /// - schema: The configuration of the local LLM expressed by the ``LLMLocalSchema``. + init(_ platform: LLMLocalPlatform, schema: LLMLocalSchema) { + self.platform = platform + self.schema = schema + + // Inject system prompt into context + if let systemPrompt = schema.parameters.systemPrompt { + Task { @MainActor in + context.append(systemMessage: systemPrompt) + } + } + } + + + @discardableResult + public func generate() async throws -> AsyncThrowingStream { + try await platform.exclusiveAccess() + + let (stream, continuation) = AsyncThrowingStream.makeStream(of: String.self) + + // Execute the output generation of the LLM + task = Task(priority: platform.configuration.taskPriority) { + // Unregister as soon as `Task` finishes + defer { + Task { + await platform.signal() + } + } + + // Setup the model, if not already done + if model == nil { + guard await setup(continuation: continuation) else { + return + } + } + + guard await !checkCancellation(on: continuation) else { + return + } + + // Execute the inference + await _generate(continuation: continuation) + } + + return stream + } + + public func cancel() { + task?.cancel() + } + + + deinit { + cancel() + } +} diff --git a/Sources/SpeziLLMLocal/SpeziLLMLocal.docc/SpeziLLMLocal.md b/Sources/SpeziLLMLocal/SpeziLLMLocal.docc/SpeziLLMLocal.md index 2a3ba8c..e246f8e 100644 --- a/Sources/SpeziLLMLocal/SpeziLLMLocal.docc/SpeziLLMLocal.md +++ b/Sources/SpeziLLMLocal/SpeziLLMLocal.docc/SpeziLLMLocal.md @@ -57,32 +57,28 @@ You need to add the SpeziLLM Swift package to ## Spezi LLM Local Components -The core component of the ``SpeziLLMLocal`` target is the ``LLMLocal`` class which conforms to the [`LLM` protocol of SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llm). ``LLMLocal`` heavily utilizes the [llama.cpp library](https://github.com/ggerganov/llama.cpp) to perform the inference of the Language Model. +The core components of the ``SpeziLLMLocal`` target are ``LLMLocalSchema``, ``LLMLocalSession`` as well as ``LLMLocalPlatform``. They heavily utilize the [llama.cpp library](https://github.com/ggerganov/llama.cpp) to perform the inference of the Language Model. ``LLMLocalSchema`` defines the type and configuration of the LLM, ``LLMLocalSession`` represents the ``LLMLocalSchema`` in execution while ``LLMLocalPlatform`` is the LLM execution platform. -> Important: To execute a LLM locally, ``LLMLocal`` requires the model file being present on the local device. +> Important: To execute a LLM locally, the model file must be present on the local device. > The model must be in the popular `.gguf` format introduced by the [llama.cpp library](https://github.com/ggerganov/llama.cpp) > Tip: In order to download the model file of the Language model to the local device, SpeziLLM provides the [SpeziLLMLocalDownload](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillmlocaldownload) target which provides model download and storage functionalities. -``LLMLocal`` offers a variety of configuration possibilities, such as the used model file, the context window, the maximum output size or the batch size. These options can be set via the ``LLMLocal/init(modelPath:parameters:contextParameters:samplingParameters:formatChat:)`` initializer and the ``LLMLocalParameters``, ``LLMLocalContextParameters``, and ``LLMLocalSamplingParameters`` types. Keep in mind that the model file must be in the popular `.gguf` format! +``LLMLocalSchema`` offers a variety of configuration possibilities, such as the used model file, the context window, the maximum output size or the batch size. These options can be set via the ``LLMLocalSchema/init(modelPath:parameters:contextParameters:samplingParameters:injectIntoContext:formatChat:)`` initializer and the ``LLMLocalParameters``, ``LLMLocalContextParameters``, and ``LLMLocalSamplingParameters`` types. Keep in mind that the model file must be in the popular `.gguf` format! -- Important: ``LLMLocal`` shouldn't be used on it's own but always wrapped by the Spezi `LLMRunner` as the runner handles all management overhead tasks. +- Important: ``LLMLocalSchema``, ``LLMLocalSession`` as well as ``LLMLocalPlatform`` shouldn't be used on it's own but always used together with the Spezi `LLMRunner`! ### Setup -In order to use the ``LLMLocal``, the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) [`LLMRunner`](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llmrunner) needs to be initialized in the Spezi `Configuration`. Only after, the `LLMRunner` can be used to execute the ``LLMLocal`` locally. +In order to use local LLMs within Spezi, the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) [`LLMRunner`](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llmrunner) needs to be initialized in the Spezi `Configuration` with the ``LLMLocalPlatform``. Only after, the `LLMRunner` can be used to execute LLMs locally. See the [SpeziLLM documentation](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) for more details. ```swift class LocalLLMAppDelegate: SpeziAppDelegate { override var configuration: Configuration { Configuration { - LLMRunner( - runnerConfig: .init( - taskPriority: .medium - ) - ) { - LLMLocalRunnerSetupTask() + LLMRunner { + LLMLocalPlatform() } } } @@ -91,52 +87,56 @@ class LocalLLMAppDelegate: SpeziAppDelegate { ### Usage -The code example below showcases the interaction with the ``LLMLocal`` through the the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) [`LLMRunner`](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llmrunner). -Based on a `String` prompt, the `LLMGenerationTask/generate(prompt:)` method returns an `AsyncThrowingStream` which yields the inferred characters until the generation has completed. +The code example below showcases the interaction with the local LLM abstractions through the the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) [`LLMRunner`](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llmrunner), which is injected into the SwiftUI `Environment` via the `Configuration` shown above. -The ``LLMLocal`` contains the ``LLMLocal/context`` property which holds the entire history of the model interactions. -This includes the system prompt, user input, but also assistant responses. -Ensure the property always contains all necessary information, as the ``LLMLocal/generate(continuation:)`` function executes the inference based on the ``LLMLocal/context`` +The ``LLMLocalSchema`` defines the type and configurations of the to-be-executed ``LLMLocalSession``. This transformation is done via the [`LLMRunner`](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llmrunner) that uses the ``LLMLocalPlatform``. The inference via ``LLMLocalSession/generate()`` returns an `AsyncThrowingStream` that yields all generated `String` pieces. -> Tip: The model can be queried via the `LLMGenerationTask/generate()` and `LLMGenerationTask/generate(prompt:)` calls (returned from wrapping the ``LLMLocal`` in the `LLMRunner` from the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) target). - The first method takes no input prompt at all but uses the current context of the model (so `LLM/context`) to query the model. - The second takes a `String`-based input from the user and appends it to the context of the model (so `LLM/context`) before querying the model. +The ``LLMLocalSession`` contains the ``LLMLocalSession/context`` property which holds the entire history of the model interactions. This includes the system prompt, user input, but also assistant responses. +Ensure the property always contains all necessary information, as the ``LLMLocalSession/generate()`` function executes the inference based on the ``LLMLocalSession/context``. -> Important: The ``LLMLocal`` should only be used together with the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) [`LLMRunner`](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llmrunner)! +> Important: The local LLM abstractions should only be used together with the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) [`LLMRunner`](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llmrunner)! ```swift -struct LocalLLMChatView: View { - @Environment(LLMRunner.self) var runner: LLMRunner - - // The locally executed LLM - @State var model: LLMLocal = .init( - modelPath: ... - ) - @State var responseText: String - - func executePrompt(prompt: String) { - // Execute the query on the runner, returning a stream of outputs - let stream = try await runner(with: model).generate(prompt: "Hello LLM!") - - for try await token in stream { - responseText.append(token) - } - } +struct LLMLocalDemoView: View { + @Environment(LLMRunner.self) var runner + @State var responseText = "" + + var body: some View { + Text(responseText) + .task { + // Instantiate the `LLMLocalSchema` to an `LLMLocalSession` via the `LLMRunner`. + let llmSession: LLMLocalSession = runner( + with: LLMLocalSchema( + modelPath: URL(string: "URL to the local model file")! + ) + ) + + for try await token in try await llmSession.generate() { + responseText.append(token) + } + } + } } ``` ## Topics -### Model +### LLM Local abstraction + +- ``LLMLocalSchema`` +- ``LLMLocalSession`` -- ``LLMLocal`` +### LLM Execution -### Configuration +- ``LLMLocalPlatform`` +- ``LLMLocalPlatformConfiguration`` + +### LLM Configuration - ``LLMLocalParameters`` - ``LLMLocalContextParameters`` - ``LLMLocalSamplingParameters`` -### Setup +### Misc -- ``LLMLocalRunnerSetupTask`` +- ``LLMLocalError`` diff --git a/Sources/SpeziLLMLocalDownload/LLMLocalDownloadView.swift b/Sources/SpeziLLMLocalDownload/LLMLocalDownloadView.swift index 85a17f9..2a2a091 100644 --- a/Sources/SpeziLLMLocalDownload/LLMLocalDownloadView.swift +++ b/Sources/SpeziLLMLocalDownload/LLMLocalDownloadView.swift @@ -11,11 +11,11 @@ import SpeziViews import SwiftUI -/// Provides an onboarding view for downloading locally executed Spezi `LLM`s to the device. +/// Provides an onboarding view for downloading locally executed Spezi LLMs to the device. /// /// It can be combined with the SpeziOnboarding `OnboardingStack` to create an easy onboarding flow within the application. /// -/// The ``LLMLocalDownloadView/init(llmDownloadUrl:llmStorageUrl:action:)`` initializer accepts the remote download `URL` of the LLM, the local storage `URL` of the downloaded model, as well as an action closure to move onto the next (onboarding) step. +/// The ``LLMLocalDownloadView/init(downloadDescription:llmDownloadUrl:llmStorageUrl:action:)-9hraf`` initializer accepts a download description displayed in the view, the remote download `URL` of the LLM, the local storage `URL` of the downloaded model, as well as an action closure to move onto the next (onboarding) step. /// /// The heavy lifting of downloading and storing the model is done by the ``LLMLocalDownloadManager`` which exposes the current downloading state view the ``LLMLocalDownloadManager/state`` property of type ``LLMLocalDownloadManager/DownloadState``. /// @@ -37,6 +37,7 @@ import SwiftUI /// /// var body: some View { /// LLMLocalDownloadView( +/// downloadDescription: "The Llama2 7B model will be downloaded", /// llmDownloadUrl: LLMLocalDownloadManager.LLMUrlDefaults.llama2ChatModelUrl, // Download the Llama2 7B model /// llmStorageUrl: .cachesDirectory.appending(path: "llm.gguf") // Store the downloaded LLM in the caches directory /// ) { @@ -50,6 +51,8 @@ public struct LLMLocalDownloadView: View { @State private var downloadManager: LLMLocalDownloadManager /// The action that should be performed when pressing the primary button of the view. private let action: () async throws -> Void + /// Description of the to-be-downloaded model shown in the ``LLMLocalDownloadView``. + private let downloadDescription: Text /// Indicates the state of the view, get's derived from the ``LLMLocalDownloadManager/state``. @State private var viewState: ViewState = .idle @@ -109,7 +112,7 @@ public struct LLMLocalDownloadView: View { .font(.system(size: 100)) .foregroundColor(.accentColor) .accessibilityHidden(true) - Text("LLM_DOWNLOAD_DESCRIPTION", bundle: .module) + downloadDescription .multilineTextAlignment(.center) .padding(.vertical, 16) } @@ -171,10 +174,12 @@ public struct LLMLocalDownloadView: View { /// Creates a ``LLMLocalDownloadView`` that presents an onboarding view that helps with downloading the necessary LLM files from remote servers. /// /// - Parameters: + /// - downloadDescription: Localized description of the to-be-downloaded model shown in the ``LLMLocalDownloadView``. /// - llmDownloadUrl: The remote `URL` from where the LLM file should be downloaded. /// - llmDownloadLocation: The local `URL` where the LLM file should be stored. /// - action: The action that should be performed when pressing the primary button of the view. public init( + downloadDescription: LocalizedStringResource, llmDownloadUrl: URL = LLMLocalDownloadManager.LLMUrlDefaults.llama2ChatModelUrl, llmStorageUrl: URL = .cachesDirectory.appending(path: "llm.gguf"), action: @escaping () async throws -> Void @@ -185,11 +190,41 @@ public struct LLMLocalDownloadView: View { llmStorageUrl: llmStorageUrl ) ) + self.downloadDescription = Text(downloadDescription) + self.action = action + } + + /// Creates a ``LLMLocalDownloadView`` that presents an onboarding view that helps with downloading the necessary LLM files from remote servers. + /// + /// - Parameters: + /// - downloadDescription: Description of the to-be-downloaded model shown in the ``LLMLocalDownloadView``. + /// - llmDownloadUrl: The remote `URL` from where the LLM file should be downloaded. + /// - llmDownloadLocation: The local `URL` where the LLM file should be stored. + /// - action: The action that should be performed when pressing the primary button of the view. + @_disfavoredOverload + public init( + downloadDescription: S, + llmDownloadUrl: URL = LLMLocalDownloadManager.LLMUrlDefaults.llama2ChatModelUrl, + llmStorageUrl: URL = .cachesDirectory.appending(path: "llm.gguf"), + action: @escaping () async throws -> Void + ) { + self._downloadManager = State( + wrappedValue: LLMLocalDownloadManager( + llmDownloadUrl: llmDownloadUrl, + llmStorageUrl: llmStorageUrl + ) + ) + self.downloadDescription = Text(verbatim: String(downloadDescription)) self.action = action } } +#if DEBUG #Preview { - LLMLocalDownloadView(action: {}) + LLMLocalDownloadView( + downloadDescription: "LLM_DOWNLOAD_DESCRIPTION".localized(.module), + action: {} + ) } +#endif diff --git a/Sources/SpeziLLMLocalDownload/SpeziLLMLocalDownload.docc/SpeziLLMLocalDownload.md b/Sources/SpeziLLMLocalDownload/SpeziLLMLocalDownload.docc/SpeziLLMLocalDownload.md index 0d09531..ff2358c 100644 --- a/Sources/SpeziLLMLocalDownload/SpeziLLMLocalDownload.docc/SpeziLLMLocalDownload.md +++ b/Sources/SpeziLLMLocalDownload/SpeziLLMLocalDownload.docc/SpeziLLMLocalDownload.md @@ -32,11 +32,11 @@ The two main components of ``SpeziLLMLocalDownload`` are the ``LLMLocalDownloadV ### Download View -The ``LLMLocalDownloadView`` provides an out-of-the-box onboarding view for downloading locally executed [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) [`LLM`s](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llmrunner) to the device. +The ``LLMLocalDownloadView`` provides an out-of-the-box onboarding view for downloading locally executed [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) LLMs to the device. It can be combined with the [SpeziOnboarding](https://swiftpackageindex.com/stanfordspezi/spezionboarding/documentation) [`OnboardingStack`](https://swiftpackageindex.com/stanfordspezi/spezionboarding/documentation/spezionboarding/onboardingstack) to create an easy onboarding flow within the application. The ``LLMLocalDownloadView`` automatically checks if the model already exists on disk, and if not, offers the start of the download via a button click. The download process itself includes the presentation of a percentage progress view in order to give the user a better feeling for the download progress. -The ``LLMLocalDownloadView/init(llmDownloadUrl:llmStorageUrl:action:)`` initializer accepts the remote download `URL` of the LLM, the local storage `URL` of the downloaded model, as well as an action closure to move onto the next (onboarding) step. +The ``LLMLocalDownloadView/init(downloadDescription:llmDownloadUrl:llmStorageUrl:action:)-9hraf`` initializer accepts a download description displayed in the view, the remote download `URL` of the LLM, the local storage `URL` of the downloaded model, as well as an action closure to move onto the next (onboarding) step. The heavy lifting of downloading and storing the model is done by the ``LLMLocalDownloadManager`` which exposes the current downloading state view the ``LLMLocalDownloadManager/state`` property of type ``LLMLocalDownloadManager/DownloadState``. @@ -61,6 +61,7 @@ struct LLMLocalOnboardingDownloadView: View { var body: some View { LLMLocalDownloadView( + downloadDescription: "The Llama2 7B model will be downloaded", llmDownloadUrl: LLMLocalDownloadManager.LLMUrlDefaults.llama2ChatModelUrl, // Download the Llama2 7B model llmStorageUrl: .cachesDirectory.appending(path: "llm.gguf") // Store the downloaded LLM in the caches directory ) { diff --git a/Sources/SpeziLLMOpenAI/Configuration/LLMOpenAIParameters.swift b/Sources/SpeziLLMOpenAI/Configuration/LLMOpenAIParameters.swift index 8a20965..4438597 100644 --- a/Sources/SpeziLLMOpenAI/Configuration/LLMOpenAIParameters.swift +++ b/Sources/SpeziLLMOpenAI/Configuration/LLMOpenAIParameters.swift @@ -19,11 +19,14 @@ public struct LLMOpenAIParameters: Sendable { }() } + /// The to-be-used OpenAI model. let modelType: Model - /// The to-be-used system prompt of the LLM. - let systemPrompt: String - /// Separate OpenAI token that overrides the one defined within the ``LLMRemoteRunnerSetupTask``. + /// The to-be-used system prompt(s) of the LLM. + let systemPrompts: [String] + /// Indicates if a model access test should be made during LLM setup. + let modelAccessTest: Bool + /// Separate OpenAI token that overrides the one defined within the ``LLMOpenAIPlatform``. let overwritingToken: String? @@ -32,14 +35,37 @@ public struct LLMOpenAIParameters: Sendable { /// - Parameters: /// - modelType: The to-be-used OpenAI model such as GPT3.5 or GPT4. /// - systemPrompt: The to-be-used system prompt of the LLM enabling fine-tuning of the LLMs behaviour. Defaults to the regular OpenAI chat-based GPT system prompt. - /// - overwritingToken: Separate OpenAI token that overrides the one defined within the ``LLMOpenAIRunnerSetupTask``. + /// - modelAccessTest: Indicates if access to the configured OpenAI model via the specified token should be made upon LLM setup. + /// - overwritingToken: Separate OpenAI token that overrides the one defined within the ``LLMOpenAIPlatform``. public init( modelType: Model, systemPrompt: String = Defaults.defaultOpenAISystemPrompt, + modelAccessTest: Bool = false, + overwritingToken: String? = nil + ) { + self.modelType = modelType + self.systemPrompts = [systemPrompt] + self.modelAccessTest = modelAccessTest + self.overwritingToken = overwritingToken + } + + /// Creates the ``LLMOpenAIParameters``. + /// + /// - Parameters: + /// - modelType: The to-be-used OpenAI model such as GPT3.5 or GPT4. + /// - systemPrompts: The to-be-used system prompt(s) of the LLM enabling fine-tuning of the LLMs behaviour. Defaults to the regular OpenAI chat-based GPT system prompt. + /// - modelAccessTest: Indicates if access to the configured OpenAI model via the specified token should be made upon LLM setup. + /// - overwritingToken: Separate OpenAI token that overrides the one defined within the ``LLMOpenAIPlatform``. + @_disfavoredOverload + public init( + modelType: Model, + systemPrompts: [String] = [Defaults.defaultOpenAISystemPrompt], + modelAccessTest: Bool = false, overwritingToken: String? = nil ) { self.modelType = modelType - self.systemPrompt = systemPrompt + self.systemPrompts = systemPrompts + self.modelAccessTest = modelAccessTest self.overwritingToken = overwritingToken } } diff --git a/Sources/SpeziLLMOpenAI/Configuration/LLMOpenAIPlatformConfiguration.swift b/Sources/SpeziLLMOpenAI/Configuration/LLMOpenAIPlatformConfiguration.swift new file mode 100644 index 0000000..a528efb --- /dev/null +++ b/Sources/SpeziLLMOpenAI/Configuration/LLMOpenAIPlatformConfiguration.swift @@ -0,0 +1,39 @@ +// +// This source file is part of the Stanford Spezi open source project +// +// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) +// +// SPDX-License-Identifier: MIT +// + +import Foundation + + +/// Represents the configuration of the Spezi ``LLMOpenAIPlatform``. +public struct LLMOpenAIPlatformConfiguration: Sendable { + /// The task priority of the initiated LLM inference tasks. + let taskPriority: TaskPriority + let concurrentStreams: Int + let apiToken: String? + let timeout: TimeInterval + + + /// Creates the ``LLMOpenAIPlatformConfiguration`` which configures the Spezi ``LLMOpenAIPlatform``. + /// + /// - Parameters: + /// - taskPriority: The task priority of the initiated LLM inference tasks, defaults to `.userInitiated`. + /// - concurrentStreams: Indicates the number of concurrent streams to the OpenAI API, defaults to `10`. + /// - apiToken: Specifies the OpenAI API token on a global basis, defaults to `nil`. + /// - timeout: Indicates the maximum network timeout of OpenAI requests in seconds. defaults to `60`. + public init( + taskPriority: TaskPriority = .userInitiated, + concurrentStreams: Int = 10, + apiToken: String? = nil, + timeout: TimeInterval = 60 + ) { + self.taskPriority = taskPriority + self.concurrentStreams = concurrentStreams + self.apiToken = apiToken + self.timeout = timeout + } +} diff --git a/Sources/SpeziLLMOpenAI/FunctionCalling/Helpers/LLMFunctionBuilder.swift b/Sources/SpeziLLMOpenAI/FunctionCalling/Helpers/LLMFunctionBuilder.swift index 7a5bc6a..95ff68b 100644 --- a/Sources/SpeziLLMOpenAI/FunctionCalling/Helpers/LLMFunctionBuilder.swift +++ b/Sources/SpeziLLMOpenAI/FunctionCalling/Helpers/LLMFunctionBuilder.swift @@ -9,7 +9,7 @@ import Foundation -/// A result builder used to aggregate multiple ``LLMFunction``s within the ``LLMOpenAI``. +/// A result builder used to aggregate multiple ``LLMFunction``s within the ``LLMOpenAISchema``. @resultBuilder public enum LLMFunctionBuilder { /// If declared, provides contextual type information for statement expressions to translate them into partial results. diff --git a/Sources/SpeziLLMOpenAI/FunctionCalling/Helpers/_LLMFunctionCollection.swift b/Sources/SpeziLLMOpenAI/FunctionCalling/Helpers/_LLMFunctionCollection.swift index 7547f02..260618f 100644 --- a/Sources/SpeziLLMOpenAI/FunctionCalling/Helpers/_LLMFunctionCollection.swift +++ b/Sources/SpeziLLMOpenAI/FunctionCalling/Helpers/_LLMFunctionCollection.swift @@ -12,7 +12,7 @@ import SwiftUI /// Defines a collection of ``SpeziLLMOpenAI`` ``LLMFunction``s. /// -/// You can not create a `_LLMFunctionCollection` yourself. Please use the ``LLMOpenAI`` that internally creates a `_LLMFunctionCollection` with the passed ``LLMFunction``s. +/// You can not create a `_LLMFunctionCollection` yourself. Please use the ``LLMOpenAISchema`` that internally creates a `_LLMFunctionCollection` with the passed ``LLMFunction``s. public struct _LLMFunctionCollection { // swiftlint:disable:this type_name var functions: [String: LLMFunction] = [:] diff --git a/Sources/SpeziLLMOpenAI/FunctionCalling/LLMFunction.swift b/Sources/SpeziLLMOpenAI/FunctionCalling/LLMFunction.swift index 485ee36..de49e41 100644 --- a/Sources/SpeziLLMOpenAI/FunctionCalling/LLMFunction.swift +++ b/Sources/SpeziLLMOpenAI/FunctionCalling/LLMFunction.swift @@ -12,7 +12,7 @@ /// The `LLMFunction` is the Spezi-based implementation of an [OpenAI LLM function](https://platform.openai.com/docs/guides/function-calling). /// It enables a structured, bidirectional, and reliable communication between the OpenAI LLMs and external tools, such as the Spezi ecosystem. /// -/// Upon initializing the ``LLMOpenAI``, developers can pass an array of ``LLMFunction``s via ``LLMOpenAI/init(parameters:modelParameters:_:)``. +/// Upon initializing the ``LLMOpenAISchema``, developers can pass an array of ``LLMFunction``s via ``LLMOpenAISchema/init(parameters:modelParameters:injectIntoContext:_:)``. /// These functions are then made available to OpenAI's GPT models and can be called if the model decides to do so, based on the current conversational context. /// An ``LLMFunction`` can have multiple ``LLMFunction/Parameter``s (`@Parameter`) to tailor the requested functionality of the LLM. /// @@ -30,7 +30,7 @@ /// /// # Usage /// -/// The code below demonstrates a short example of the base usage of ``LLMFunction``s with ``LLMOpenAI``. +/// The code below demonstrates a short example of the base usage of ``LLMFunction``s with ``LLMOpenAISchema``. /// In case the user asks the LLM about the weather in a specific location, the LLM will request to call the `WeatherFunction` to provide a (in this example dummy) weather reading. /// /// ```swift @@ -49,7 +49,7 @@ /// /// // Enclosing view to display an LLM chat /// struct LLMOpenAIChatTestView: View { -/// private let model = LLMOpenAI( +/// private let schema = LLMOpenAISchema( /// parameters: .init( /// modelType: .gpt4_1106_preview, /// systemPrompt: "You're a helpful assistant that answers questions from users." @@ -60,7 +60,7 @@ /// /// var body: some View { /// LLMChatView( -/// model: model +/// schema: schema /// ) /// } /// } diff --git a/Sources/SpeziLLMOpenAI/FunctionCalling/LLMFunctionParameterWrapper+ArrayTypes.swift b/Sources/SpeziLLMOpenAI/FunctionCalling/LLMFunctionParameterWrapper+ArrayTypes.swift index e7a8e6b..c9d5649 100644 --- a/Sources/SpeziLLMOpenAI/FunctionCalling/LLMFunctionParameterWrapper+ArrayTypes.swift +++ b/Sources/SpeziLLMOpenAI/FunctionCalling/LLMFunctionParameterWrapper+ArrayTypes.swift @@ -22,7 +22,7 @@ extension _LLMFunctionParameterWrapper where T: AnyArray, T.Element: BinaryInteg /// - minItems: Defines the minimum amount of values in the `array`. /// - maxItems: Defines the maximum amount of values in the `array`. /// - uniqueItems: Specifies if all `array` elements need to be unique. - public convenience init( + public convenience init( description: D, const: (any StringProtocol)? = nil, multipleOf: Int? = nil, @@ -60,7 +60,7 @@ extension _LLMFunctionParameterWrapper where T: AnyArray, T.Element: BinaryFloat /// - minItems: Defines the minimum amount of values in the `array`. /// - maxItems: Defines the maximum amount of values in the `array`. /// - uniqueItems: Specifies if all `array` elements need to be unique. - public convenience init( + public convenience init( description: D, const: (any StringProtocol)? = nil, minimum: T.Element? = nil, @@ -94,7 +94,7 @@ extension _LLMFunctionParameterWrapper where T: AnyArray, T.Element == Bool { /// - minItems: Defines the minimum amount of values in the `array`. /// - maxItems: Defines the maximum amount of values in the `array`. /// - uniqueItems: Specifies if all `array` elements need to be unique. - public convenience init( + public convenience init( description: D, const: (any StringProtocol)? = nil, minItems: Int? = nil, @@ -126,7 +126,7 @@ extension _LLMFunctionParameterWrapper where T: AnyArray, T.Element: StringProto /// - minItems: Defines the minimum amount of values in the `array`. /// - maxItems: Defines the maximum amount of values in the `array`. /// - uniqueItems: Specifies if all `array` elements need to be unique. - public convenience init( + public convenience init( description: D, pattern: (any StringProtocol)? = nil, const: (any StringProtocol)? = nil, diff --git a/Sources/SpeziLLMOpenAI/FunctionCalling/LLMFunctionParameterWrapper+CustomTypes.swift b/Sources/SpeziLLMOpenAI/FunctionCalling/LLMFunctionParameterWrapper+CustomTypes.swift index a93eb64..03c7a50 100644 --- a/Sources/SpeziLLMOpenAI/FunctionCalling/LLMFunctionParameterWrapper+CustomTypes.swift +++ b/Sources/SpeziLLMOpenAI/FunctionCalling/LLMFunctionParameterWrapper+CustomTypes.swift @@ -18,7 +18,7 @@ extension _LLMFunctionParameterWrapper where T: AnyArray, T.Element: LLMFunction /// - minItems: Defines the minimum amount of values in the `array`. /// - maxItems: Defines the maximum amount of values in the `array`. /// - uniqueItems: Specifies if all `array` elements need to be unique. - public convenience init( + public convenience init( description: D, minItems: Int? = nil, maxItems: Int? = nil, @@ -52,7 +52,7 @@ extension _LLMFunctionParameterWrapper where T: AnyOptional, T.Wrapped: AnyArray /// - minItems: Defines the minimum amount of values in the `array`. /// - maxItems: Defines the maximum amount of values in the `array`. /// - uniqueItems: Specifies if all `array` elements need to be unique. - public convenience init( + public convenience init( description: D, minItems: Int? = nil, maxItems: Int? = nil, diff --git a/Sources/SpeziLLMOpenAI/FunctionCalling/LLMFunctionParameterWrapper+Enum.swift b/Sources/SpeziLLMOpenAI/FunctionCalling/LLMFunctionParameterWrapper+Enum.swift index 1d87893..85a377a 100644 --- a/Sources/SpeziLLMOpenAI/FunctionCalling/LLMFunctionParameterWrapper+Enum.swift +++ b/Sources/SpeziLLMOpenAI/FunctionCalling/LLMFunctionParameterWrapper+Enum.swift @@ -16,7 +16,7 @@ extension _LLMFunctionParameterWrapper where T: LLMFunctionParameterEnum, T.RawV /// - Parameters: /// - description: Describes the purpose of the parameter, used by the LLM to grasp the purpose of the parameter. /// - const: Specifies the constant `String`-based value of a certain parameter. - public convenience init( + public convenience init( description: D, const: (any StringProtocol)? = nil ) { @@ -35,7 +35,7 @@ extension _LLMFunctionParameterWrapper where T: AnyOptional, T.Wrapped: LLMFunct /// - Parameters: /// - description: Describes the purpose of the parameter, used by the LLM to grasp the purpose of the parameter. /// - const: Specifies the constant `String`-based value of a certain parameter. - public convenience init( + public convenience init( description: D, const: (any StringProtocol)? = nil ) { @@ -57,7 +57,7 @@ extension _LLMFunctionParameterWrapper where T: AnyArray, T.Element: LLMFunction /// - minItems: Defines the minimum amount of values in the `array`. /// - maxItems: Defines the maximum amount of values in the `array`. /// - uniqueItems: Specifies if all `array` elements need to be unique. - public convenience init( + public convenience init( description: D, const: (any StringProtocol)? = nil, minItems: Int? = nil, @@ -91,7 +91,7 @@ extension _LLMFunctionParameterWrapper where T: AnyOptional, /// - minItems: Defines the minimum amount of values in the `array`. /// - maxItems: Defines the maximum amount of values in the `array`. /// - uniqueItems: Specifies if all `array` elements need to be unique. - public convenience init( + public convenience init( description: D, const: (any StringProtocol)? = nil, minItems: Int? = nil, diff --git a/Sources/SpeziLLMOpenAI/FunctionCalling/LLMFunctionParameterWrapper+OptionalTypes.swift b/Sources/SpeziLLMOpenAI/FunctionCalling/LLMFunctionParameterWrapper+OptionalTypes.swift index 544ea7c..db497fd 100644 --- a/Sources/SpeziLLMOpenAI/FunctionCalling/LLMFunctionParameterWrapper+OptionalTypes.swift +++ b/Sources/SpeziLLMOpenAI/FunctionCalling/LLMFunctionParameterWrapper+OptionalTypes.swift @@ -19,7 +19,7 @@ extension _LLMFunctionParameterWrapper where T: AnyOptional, T.Wrapped: BinaryIn /// - multipleOf: Defines that the LLM parameter needs to be a multiple of the init argument. /// - minimum: The minimum value of the parameter. /// - maximum: The maximum value of the parameter. - public convenience init( + public convenience init( description: D, const: (any StringProtocol)? = nil, multipleOf: Int? = nil, @@ -45,7 +45,7 @@ extension _LLMFunctionParameterWrapper where T: AnyOptional, T.Wrapped: BinaryFl /// - const: Specifies the constant `String`-based value of a certain parameter. /// - minimum: The minimum value of the parameter. /// - maximum: The maximum value of the parameter. - public convenience init( + public convenience init( description: D, const: (any StringProtocol)? = nil, minimum: T.Wrapped? = nil, @@ -67,7 +67,7 @@ extension _LLMFunctionParameterWrapper where T: AnyOptional, T.Wrapped == Bool { /// - Parameters: /// - description: Describes the purpose of the parameter, used by the LLM to grasp the purpose of the parameter. /// - const: Specifies the constant `String`-based value of a certain parameter. - public convenience init( + public convenience init( description: D, const: (any StringProtocol)? = nil ) { @@ -88,7 +88,7 @@ extension _LLMFunctionParameterWrapper where T: AnyOptional, T.Wrapped: StringPr /// - pattern: A Regular Expression that the parameter needs to conform to. /// - const: Specifies the constant `String`-based value of a certain parameter. /// - enumValues: Defines all cases of the `String` parameter. - public convenience init( + public convenience init( description: D, format: _LLMFunctionParameterWrapper.Format? = nil, pattern: (any StringProtocol)? = nil, @@ -118,7 +118,7 @@ extension _LLMFunctionParameterWrapper where T: AnyOptional, T.Wrapped: AnyArray /// - minItems: Defines the minimum amount of values in the `array`. /// - maxItems: Defines the maximum amount of values in the `array`. /// - uniqueItems: Specifies if all `array` elements need to be unique. - public convenience init( + public convenience init( description: D, const: (any StringProtocol)? = nil, multipleOf: Int? = nil, @@ -156,7 +156,7 @@ extension _LLMFunctionParameterWrapper where T: AnyOptional, T.Wrapped: AnyArray /// - minItems: Defines the minimum amount of values in the `array`. /// - maxItems: Defines the maximum amount of values in the `array`. /// - uniqueItems: Specifies if all `array` elements need to be unique. - public convenience init( + public convenience init( description: D, const: (any StringProtocol)? = nil, minimum: T.Wrapped.Element? = nil, @@ -190,7 +190,7 @@ extension _LLMFunctionParameterWrapper where T: AnyOptional, T.Wrapped: AnyArray /// - minItems: Defines the minimum amount of values in the `array`. /// - maxItems: Defines the maximum amount of values in the `array`. /// - uniqueItems: Specifies if all `array` elements need to be unique. - public convenience init( + public convenience init( description: D, const: (any StringProtocol)? = nil, minItems: Int? = nil, @@ -222,7 +222,7 @@ extension _LLMFunctionParameterWrapper where T: AnyOptional, T.Wrapped: AnyArray /// - minItems: Defines the minimum amount of values in the `array`. /// - maxItems: Defines the maximum amount of values in the `array`. /// - uniqueItems: Specifies if all `array` elements need to be unique. - public convenience init( + public convenience init( description: D, pattern: (any StringProtocol)? = nil, const: (any StringProtocol)? = nil, diff --git a/Sources/SpeziLLMOpenAI/FunctionCalling/LLMFunctionParameterWrapper+PrimitiveTypes.swift b/Sources/SpeziLLMOpenAI/FunctionCalling/LLMFunctionParameterWrapper+PrimitiveTypes.swift index 4b130fb..232447f 100644 --- a/Sources/SpeziLLMOpenAI/FunctionCalling/LLMFunctionParameterWrapper+PrimitiveTypes.swift +++ b/Sources/SpeziLLMOpenAI/FunctionCalling/LLMFunctionParameterWrapper+PrimitiveTypes.swift @@ -17,7 +17,7 @@ extension _LLMFunctionParameterWrapper where T: BinaryInteger { /// - multipleOf: Defines that the LLM parameter needs to be a multiple of the init argument. /// - minimum: The minimum value of the parameter. /// - maximum: The maximum value of the parameter. - public convenience init( + public convenience init( description: D, const: (any StringProtocol)? = nil, multipleOf: Int? = nil, @@ -44,7 +44,7 @@ extension _LLMFunctionParameterWrapper where T: BinaryFloatingPoint { /// - const: Specifies the constant `String`-based value of a certain parameter. /// - minimum: The minimum value of the parameter. /// - maximum: The maximum value of the parameter. - public convenience init( + public convenience init( description: D, const: (any StringProtocol)? = nil, minimum: T? = nil, @@ -66,7 +66,7 @@ extension _LLMFunctionParameterWrapper where T == Bool { /// - Parameters: /// - description: Describes the purpose of the parameter, used by the LLM to grasp the purpose of the parameter. /// - const: Specifies the constant `String`-based value of a certain parameter. - public convenience init( + public convenience init( description: D, const: (any StringProtocol)? = nil ) { @@ -87,7 +87,7 @@ extension _LLMFunctionParameterWrapper where T: StringProtocol { /// - pattern: A Regular Expression that the parameter needs to conform to. /// - const: Specifies the constant `String`-based value of a certain parameter. /// - enumValues: Defines all cases of the `String` parameter. - public convenience init( + public convenience init( description: D, format: _LLMFunctionParameterWrapper.Format? = nil, pattern: (any StringProtocol)? = nil, diff --git a/Sources/SpeziLLMOpenAI/FunctionCalling/LLMFunctionParameterWrapper.swift b/Sources/SpeziLLMOpenAI/FunctionCalling/LLMFunctionParameterWrapper.swift index c6a3e48..cb63d34 100644 --- a/Sources/SpeziLLMOpenAI/FunctionCalling/LLMFunctionParameterWrapper.swift +++ b/Sources/SpeziLLMOpenAI/FunctionCalling/LLMFunctionParameterWrapper.swift @@ -17,7 +17,7 @@ public typealias LLMFunctionParameterItemSchema = JSONSchema.Items /// Refer to the documentation of ``LLMFunction/Parameter`` for information on how to use the `@Parameter` property wrapper. @propertyWrapper -public class _LLMFunctionParameterWrapper: LLMFunctionParameterSchemaCollector { // swiftlint:disable:this type_name +public class _LLMFunctionParameterWrapper: LLMFunctionParameterSchemaCollector { // swiftlint:disable:this type_name private var injectedValue: T? var schema: LLMFunctionParameterPropertySchema @@ -50,7 +50,7 @@ public class _LLMFunctionParameterWrapper: LLMF /// - Parameters: /// - description: Describes the purpose of the parameter, used by the LLM to grasp the purpose of the parameter. @_disfavoredOverload - public convenience init(description: D) where T: LLMFunctionParameter { + public convenience init(description: D) where T: LLMFunctionParameter { self.init(schema: .init( type: T.schema.type, description: String(description), // Take description from the property wrapper, all other things from self defined schema @@ -105,6 +105,6 @@ extension LLMFunction { /// } /// } /// ``` - public typealias Parameter = - _LLMFunctionParameterWrapper where WrappedValue: Decodable, Description: StringProtocol + public typealias Parameter = + _LLMFunctionParameterWrapper where WrappedValue: Decodable } diff --git a/Sources/SpeziLLMOpenAI/Helpers/LLMOpenAIFinishReason.swift b/Sources/SpeziLLMOpenAI/Helpers/LLMOpenAIFinishReason.swift new file mode 100644 index 0000000..8ddd732 --- /dev/null +++ b/Sources/SpeziLLMOpenAI/Helpers/LLMOpenAIFinishReason.swift @@ -0,0 +1,20 @@ +// +// This source file is part of the Stanford Spezi open source project +// +// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) +// +// SPDX-License-Identifier: MIT +// + +import Foundation + + +/// Represents possible OpenAI finish reasons in the inference response +/// More documentation can be found in the [OpenAI docs](https://platform.openai.com/docs/guides/text-generation/chat-completions-api) +enum LLMOpenAIFinishReason: String, Decodable { + case stop + case length + case functionCall = "function_call" + case contentFilter = "content_filter" + case null +} diff --git a/Sources/SpeziLLMOpenAI/Helpers/LLMStreamResult.swift b/Sources/SpeziLLMOpenAI/Helpers/LLMOpenAIStreamResult.swift similarity index 62% rename from Sources/SpeziLLMOpenAI/Helpers/LLMStreamResult.swift rename to Sources/SpeziLLMOpenAI/Helpers/LLMOpenAIStreamResult.swift index eebf903..558d460 100644 --- a/Sources/SpeziLLMOpenAI/Helpers/LLMStreamResult.swift +++ b/Sources/SpeziLLMOpenAI/Helpers/LLMOpenAIStreamResult.swift @@ -9,7 +9,7 @@ import OpenAI -/// Helper to process the returned stream by the LLM output generation call. +/// Helper to process the returned stream by the LLM output generation call, especially in regards to the function call and a possible stop reason struct LLMOpenAIStreamResult { struct FunctionCall { var name: String? @@ -23,26 +23,29 @@ struct LLMOpenAIStreamResult { } - let id: Int - var content: String? + var deltaContent: String? var role: Chat.Role? var functionCall: FunctionCall? - var finishReason: String? + private var finishReasonBase: String? + var finishReason: LLMOpenAIFinishReason { + guard let finishReasonBase else { + return .null + } + + return .init(rawValue: finishReasonBase) ?? .null + } - init(id: Int, content: String? = nil, role: Chat.Role? = nil, functionCall: FunctionCall? = nil, finishReason: String? = nil) { - self.id = id - self.content = content + init(deltaContent: String? = nil, role: Chat.Role? = nil, functionCall: FunctionCall? = nil, finishReason: String? = nil) { + self.deltaContent = deltaContent self.role = role self.functionCall = functionCall - self.finishReason = finishReason + self.finishReasonBase = finishReason } - mutating func append(choice: ChatStreamResult.Choice) { - if let deltaContent = choice.delta.content { - self.content = (self.content ?? "") + deltaContent - } + mutating func append(choice: ChatStreamResult.Choice) -> Self { + self.deltaContent = choice.delta.content if let role = choice.delta.role { self.role = role @@ -63,8 +66,10 @@ struct LLMOpenAIStreamResult { self.functionCall = newFunctionCall } - if let finishReason = choice.finishReason { - self.finishReason = (self.finishReason ?? "") + finishReason + if let finishReasonBase = choice.finishReason { + self.finishReasonBase = (self.finishReasonBase ?? "") + finishReasonBase } + + return self } } diff --git a/Sources/SpeziLLMOpenAI/Helpers/OpenAI+Export.swift b/Sources/SpeziLLMOpenAI/Helpers/OpenAI+Export.swift new file mode 100644 index 0000000..6d37a82 --- /dev/null +++ b/Sources/SpeziLLMOpenAI/Helpers/OpenAI+Export.swift @@ -0,0 +1,16 @@ +// +// This source file is part of the Stanford LLM on FHIR project +// +// SPDX-FileCopyrightText: 2023 Stanford University +// +// SPDX-License-Identifier: MIT +// + +import struct OpenAI.Model + + +/// Convenience export of the `OpenAI/Model` type. +/// +/// The ``LLMOpenAIModelType`` exports the `OpenAI/Model` describing the type of the to-be-used OpenAI Model. +/// This enables convenience access to the `OpenAI/Model` without naming conflicts resulting from the `OpenAI/Model` name. +public typealias LLMOpenAIModelType = Model diff --git a/Sources/SpeziLLMOpenAI/LLMOpenAI+Generation.swift b/Sources/SpeziLLMOpenAI/LLMOpenAI+Generation.swift deleted file mode 100644 index 4d8ddef..0000000 --- a/Sources/SpeziLLMOpenAI/LLMOpenAI+Generation.swift +++ /dev/null @@ -1,108 +0,0 @@ -// -// This source file is part of the Stanford Spezi open source project -// -// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) -// -// SPDX-License-Identifier: MIT -// - -import Foundation -import OpenAI -import SpeziChat - - -extension LLMOpenAI { - // swiftlint:disable:next identifier_name function_body_length - func _generate(continuation: AsyncThrowingStream.Continuation) async throws { - while true { - let chatStream: AsyncThrowingStream = await self.model.chatsStream(query: self.openAIChatQuery) - - var llmStreamResults: [LLMOpenAIStreamResult] = [] - - for try await chatStreamResult in chatStream { - // Important to iterate over all choices as LLM could choose to call multiple functions - for choice in chatStreamResult.choices { - // Already existing stream result - if let existingIndex = llmStreamResults.firstIndex(where: { $0.id == choice.index }) { - var existingLLMStreamResult = llmStreamResults[existingIndex] - existingLLMStreamResult.append(choice: choice) - llmStreamResults[existingIndex] = existingLLMStreamResult - // New stream result - } else { - var newLLMStreamResult = LLMOpenAIStreamResult(id: choice.index) - newLLMStreamResult.append(choice: choice) - llmStreamResults.append(newLLMStreamResult) - } - } - - // Append assistant messages during the streaming to ensure that they are visible to the user during processing - let assistantContentResults = llmStreamResults.filter { llmStreamResult in - llmStreamResult.role == .assistant && !(llmStreamResult.content?.isEmpty ?? true) - } - - // Only consider the first found assistant content result - guard let content = assistantContentResults.first?.content else { - continue - } - - await MainActor.run { - self.context.append(assistantOutput: content, overwrite: true) - } - } - - let functionCalls = llmStreamResults.compactMap { $0.functionCall } - - // Exit the while loop if we don't have any function calls. - guard !functionCalls.isEmpty else { - break - } - - // Parallelize function call execution - try await withThrowingTaskGroup(of: Void.self) { group in - for functionCall in functionCalls { - group.addTask { - Self.logger.debug(""" - SpeziLLMOpenAI: Function call \(functionCall.name ?? ""), Arguments: \(functionCall.arguments ?? "") - """) - - guard let functionName = functionCall.name, - let functionArgument = functionCall.arguments?.data(using: .utf8), - let function = self.functions[functionName] else { - Self.logger.debug("SpeziLLMOpenAI: Couldn't find the requested function to call") - return - } - - // Inject parameters into the @Parameters of the function call - do { - try function.injectParameters(from: functionArgument) - } catch { - throw LLMOpenAIError.invalidFunctionCallArguments(error) - } - - // Execute function - // Errors thrown by the functions are surfaced to the user as an LLM generation error - let functionCallResponse = try await function.execute() - - Self.logger.debug(""" - SpeziLLMOpenAI: Function call \(functionCall.name ?? "") \ - Arguments: \(functionCall.arguments ?? "") \ - Response: \(functionCallResponse ?? "") - """) - - await MainActor.run { - let defaultResponse = "Function call to \(functionCall.name ?? "") succeeded, function intentionally didn't respond anything." - - // Return `defaultResponse` in case of `nil` or empty return of the function call - self.context.append( - forFunction: functionName, - response: functionCallResponse?.isEmpty != false ? defaultResponse : (functionCallResponse ?? defaultResponse) - ) - } - } - } - - try await group.waitForAll() - } - } - } -} diff --git a/Sources/SpeziLLMOpenAI/LLMOpenAI.swift b/Sources/SpeziLLMOpenAI/LLMOpenAI.swift deleted file mode 100644 index 8fbc822..0000000 --- a/Sources/SpeziLLMOpenAI/LLMOpenAI.swift +++ /dev/null @@ -1,208 +0,0 @@ -// -// This source file is part of the Stanford Spezi open source project -// -// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) -// -// SPDX-License-Identifier: MIT -// - -import Foundation -import struct OpenAI.Chat -import struct OpenAI.ChatFunctionDeclaration -import struct OpenAI.ChatQuery -import class OpenAI.OpenAI -import struct OpenAI.Model -import struct OpenAI.ChatStreamResult -import struct OpenAI.APIErrorResponse -import os -import SpeziChat -import SpeziLLM - - -/// Generate output via the OpenAI GPT models. -/// -/// ``LLMOpenAI`` is a Spezi `LLM` and provides access to text-based models from OpenAI, such as GPT-3.5 or GPT-4. -/// The main properties of the ``LLMOpenAI`` are ``LLMOpenAI/context`` and ``LLMOpenAI/state``. -/// Use these properties to access the conversational history of the `LLM` as well as the current generation state. -/// -/// - Important: ``LLMOpenAI`` shouldn't be used on it's own but always wrapped by the Spezi `LLMRunner` as the runner handles -/// all management overhead tasks. -/// -/// > Tip: ``SpeziLLMOpenAI`` also enables the function calling mechanism to establish a structured, bidirectional, and reliable communication between the OpenAI LLMs and external tools. For details, refer to ``LLMFunction`` and ``LLMFunction/Parameter`` or the DocC article. -/// -/// ### Usage -/// -/// The code section below showcases a complete code example on how to use the ``LLMOpenAI`` in combination with a `LLMRunner` from the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) target. -/// -/// - Important: The model can be queried via the `LLMGenerationTask/generate()` and `LLMGenerationTask/generate(prompt:)` calls (returned from wrapping the ``LLMOpenAI`` in the `LLMRunner` from the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) target). -/// The first method takes no input prompt at all but uses the current context of the model (so `LLM/context`) to query the model. -/// The second takes a `String`-based input from the user and appends it to the context of the model (so `LLM/context`) before querying the model. -/// -/// ```swift -/// class LLMOpenAIAppDelegate: SpeziAppDelegate { -/// override var configuration: Configuration { -/// Configuration { -/// LLMRunner { -/// LLMOpenAIRunnerSetupTask() -/// } -/// } -/// } -/// } -/// -/// struct LLMOpenAIChatView: View { -/// // The runner responsible for executing the OpenAI LLM. -/// @Environment(LLMRunner.self) var runner: LLMRunner -/// -/// // The OpenAI LLM -/// @State var model: LLMOpenAI = .init( -/// parameters: .init( -/// modelType: .gpt3_5Turbo, -/// systemPrompt: "You're a helpful assistant that answers questions from users.", -/// overwritingToken: "abc123" -/// ) -/// ) -/// @State var responseText: String -/// -/// func executePrompt(prompt: String) { -/// // Execute the query on the runner, returning a stream of outputs -/// let stream = try await runner(with: model).generate(prompt: "Hello LLM!") -/// -/// for try await token in stream { -/// responseText.append(token) -/// } -/// } -/// } -/// ``` -@Observable -public class LLMOpenAI: LLM { - /// Default values of ``LLMOpenAI``. - public enum Defaults { - /// Empty default of passed function calls (`_LLMFunctionCollection`). - /// Reason: Cannot use internal init of `_LLMFunctionCollection` as default parameter within public ``LLMOpenAI/init(parameters:modelParameters:_:)``. - public static let emptyLLMFunctions: _LLMFunctionCollection = .init(functions: []) - } - - - /// A Swift Logger that logs important information from the ``LLMOpenAI``. - static let logger = Logger(subsystem: "edu.stanford.spezi", category: "SpeziLLM") - - @MainActor public var state: LLMState = .uninitialized - @MainActor public var context: SpeziChat.Chat = [] - - public let type: LLMHostingType = .cloud - let parameters: LLMOpenAIParameters - let modelParameters: LLMOpenAIModelParameters - let functions: [String: LLMFunction] - @ObservationIgnored private var wrappedModel: OpenAI? - - - var model: OpenAI { - guard let model = wrappedModel else { - preconditionFailure(""" - SpeziLLMOpenAI: Illegal Access - Tried to access the wrapped OpenAI model of `LLMOpenAI` before being initialized. - Ensure that the `LLMOpenAIRunnerSetupTask` is passed to the `LLMRunner` within the Spezi `Configuration`. - """) - } - return model - } - - - /// Creates a ``LLMOpenAI`` instance that can then be passed to the `LLMRunner` for execution. - /// - /// - Parameters: - /// - parameters: LLM Parameters - /// - modelParameters: LLM Model Parameters - /// - functionsCollection: LLM Functions (tools) used for the OpenAI function calling mechanism. - public init( - parameters: LLMOpenAIParameters, - modelParameters: LLMOpenAIModelParameters = .init(), - @LLMFunctionBuilder _ functionsCollection: @escaping () -> _LLMFunctionCollection = { Defaults.emptyLLMFunctions } - ) { - self.parameters = parameters - self.modelParameters = modelParameters - self.functions = functionsCollection().functions - - Task { @MainActor in - self.context.append(systemMessage: parameters.systemPrompt) - } - } - - - public func setup(runnerConfig: LLMRunnerConfiguration) async throws { - await MainActor.run { - self.state = .loading - } - - // Overwrite API token if passed - if let overwritingToken = self.parameters.overwritingToken { - self.wrappedModel = OpenAI( - configuration: .init( - token: overwritingToken, - organizationIdentifier: LLMOpenAIRunnerSetupTask.openAIModel.configuration.organizationIdentifier, - host: LLMOpenAIRunnerSetupTask.openAIModel.configuration.host, - timeoutInterval: LLMOpenAIRunnerSetupTask.openAIModel.configuration.timeoutInterval - ) - ) - } else { - self.wrappedModel = LLMOpenAIRunnerSetupTask.openAIModel - } - - do { - _ = try await self.model.model(query: .init(model: self.parameters.modelType)) - } catch let error as URLError { - throw LLMOpenAIError.connectivityIssues(error) - } catch { - LLMOpenAI.logger.error(""" - SpeziLLMOpenAI: Couldn't access the specified OpenAI model. - Ensure the model exists and the configured API key is able to access the model. - """) - throw LLMOpenAIError.modelAccessError(error) - } - - await MainActor.run { - self.state = .ready - } - } - - public func generate(continuation: AsyncThrowingStream.Continuation) async { - Self.logger.debug("SpeziLLMOpenAI: OpenAI GPT started a new inference") - - await MainActor.run { - self.state = .generating - } - - do { - try await _generate(continuation: continuation) - - continuation.finish() - - await MainActor.run { - self.state = .ready - } - - Self.logger.debug("SpeziLLMOpenAI: OpenAI GPT completed an inference") - } catch let error as APIErrorResponse { - if error.error.code == LLMOpenAIError.insufficientQuota.openAIErrorMessage { - LLMOpenAI.logger.error(""" - SpeziLLMOpenAI: Quota limit of OpenAI is reached. Ensure the configured API key has enough resources. - """) - await finishGenerationWithError(LLMOpenAIError.insufficientQuota, on: continuation) - } else { - LLMOpenAI.logger.error(""" - SpeziLLMOpenAI: OpenAI inference failed with a generation error. - """) - await finishGenerationWithError(LLMOpenAIError.generationError, on: continuation) - } - } catch let error as LLMOpenAIError { - LLMOpenAI.logger.error(""" - SpeziLLMOpenAI: OpenAI inference failed with the OpenAIError: \(error.localizedDescription). - """) - await finishGenerationWithError(error, on: continuation) - } catch { - LLMOpenAI.logger.error(""" - SpeziLLMOpenAI: OpenAI inference failed with a generation error. - """) - await finishGenerationWithError(LLMOpenAIError.generationError, on: continuation) - } - } -} diff --git a/Sources/SpeziLLMOpenAI/LLMOpenAIError.swift b/Sources/SpeziLLMOpenAI/LLMOpenAIError.swift index 62e0a6a..2510b3a 100644 --- a/Sources/SpeziLLMOpenAI/LLMOpenAIError.swift +++ b/Sources/SpeziLLMOpenAI/LLMOpenAIError.swift @@ -12,6 +12,8 @@ import SpeziLLM /// Errors that can occur by interacting with the OpenAI API. public enum LLMOpenAIError: LLMError { + /// OpenAI API token is missing. + case missingAPIToken /// OpenAI API token is invalid. case invalidAPIToken /// Connectivity error @@ -24,8 +26,12 @@ public enum LLMOpenAIError: LLMError { case generationError /// Error during accessing the OpenAI Model case modelAccessError(Error) + /// Invalid function call name + case invalidFunctionCallName /// Invalid function call parameters (mismatch between sent parameters from OpenAI and declared ones within the ``LLMFunction``), including the decoding error case invalidFunctionCallArguments(Error) + /// Exception during function call execution + case functionCallError(Error) /// Unknown error case unknownError(Error) @@ -41,6 +47,8 @@ public enum LLMOpenAIError: LLMError { public var errorDescription: String? { switch self { + case .missingAPIToken: + String(localized: LocalizedStringResource("LLM_MISSING_TOKEN_ERROR_DESCRIPTION", bundle: .atURL(from: .module))) case .invalidAPIToken: String(localized: LocalizedStringResource("LLM_INVALID_TOKEN_ERROR_DESCRIPTION", bundle: .atURL(from: .module))) case .connectivityIssues: @@ -53,8 +61,12 @@ public enum LLMOpenAIError: LLMError { String(localized: LocalizedStringResource("LLM_GENERATION_ERROR_DESCRIPTION", bundle: .atURL(from: .module))) case .modelAccessError: String(localized: LocalizedStringResource("LLM_MODEL_ACCESS_ERROR_DESCRIPTION", bundle: .atURL(from: .module))) + case .invalidFunctionCallName: + String(localized: LocalizedStringResource("LLM_INVALID_FUNCTION_CALL_NAME_ERROR_DESCRIPTION", bundle: .atURL(from: .module))) case .invalidFunctionCallArguments: String(localized: LocalizedStringResource("LLM_INVALID_FUNCTION_ARGUMENTS_ERROR_DESCRIPTION", bundle: .atURL(from: .module))) + case .functionCallError: + String(localized: LocalizedStringResource("LLM_FUNCTION_CALL_ERROR_DESCRIPTION", bundle: .atURL(from: .module))) case .unknownError: String(localized: LocalizedStringResource("LLM_UNKNOWN_ERROR_DESCRIPTION", bundle: .atURL(from: .module))) } @@ -62,6 +74,8 @@ public enum LLMOpenAIError: LLMError { public var recoverySuggestion: String? { switch self { + case .missingAPIToken: + String(localized: LocalizedStringResource("LLM_MISSING_TOKEN_RECOVERY_SUGGESTION", bundle: .atURL(from: .module))) case .invalidAPIToken: String(localized: LocalizedStringResource("LLM_INVALID_TOKEN_RECOVERY_SUGGESTION", bundle: .atURL(from: .module))) case .connectivityIssues: @@ -74,8 +88,12 @@ public enum LLMOpenAIError: LLMError { String(localized: LocalizedStringResource("LLM_GENERATION_ERROR_RECOVERY_SUGGESTION", bundle: .atURL(from: .module))) case .modelAccessError: String(localized: LocalizedStringResource("LLM_MODEL_ACCESS_ERROR_RECOVERY_SUGGESTION", bundle: .atURL(from: .module))) + case .invalidFunctionCallName: + String(localized: LocalizedStringResource("LLM_INVALID_FUNCTION_CALL_NAME_ERROR_RECOVERY_SUGGESTION", bundle: .atURL(from: .module))) case .invalidFunctionCallArguments: String(localized: LocalizedStringResource("LLM_INVALID_FUNCTION_ARGUMENTS_RECOVERY_SUGGESTION", bundle: .atURL(from: .module))) + case .functionCallError: + String(localized: LocalizedStringResource("LLM_FUNCTION_CALL_ERROR_RECOVERY_SUGGESTION", bundle: .atURL(from: .module))) case .unknownError: String(localized: LocalizedStringResource("LLM_UNKNOWN_ERROR_RECOVERY_SUGGESTION", bundle: .atURL(from: .module))) } @@ -83,6 +101,8 @@ public enum LLMOpenAIError: LLMError { public var failureReason: String? { switch self { + case .missingAPIToken: + String(localized: LocalizedStringResource("LLM_MISSING_TOKEN_FAILURE_REASON", bundle: .atURL(from: .module))) case .invalidAPIToken: String(localized: LocalizedStringResource("LLM_INVALID_TOKEN_FAILURE_REASON", bundle: .atURL(from: .module))) case .connectivityIssues: @@ -95,23 +115,30 @@ public enum LLMOpenAIError: LLMError { String(localized: LocalizedStringResource("LLM_GENERATION_ERROR_FAILURE_REASON", bundle: .atURL(from: .module))) case .modelAccessError: String(localized: LocalizedStringResource("LLM_MODEL_ACCESS_ERROR_FAILURE_REASON", bundle: .atURL(from: .module))) + case .invalidFunctionCallName: + String(localized: LocalizedStringResource("LLM_INVALID_FUNCTION_CALL_NAME_ERROR_FAILURE_REASON", bundle: .atURL(from: .module))) case .invalidFunctionCallArguments: String(localized: LocalizedStringResource("LLM_INVALID_FUNCTION_ARGUMENTS_FAILURE_REASON", bundle: .atURL(from: .module))) + case .functionCallError: + String(localized: LocalizedStringResource("LLM_FUNCTION_CALL_ERROR_FAILURE_REASON", bundle: .atURL(from: .module))) case .unknownError: String(localized: LocalizedStringResource("LLM_UNKNOWN_ERROR_FAILURE_REASON", bundle: .atURL(from: .module))) } } - public static func == (lhs: LLMOpenAIError, rhs: LLMOpenAIError) -> Bool { + public static func == (lhs: LLMOpenAIError, rhs: LLMOpenAIError) -> Bool { // swiftlint:disable:this cyclomatic_complexity switch (lhs, rhs) { + case (.missingAPIToken, .missingAPIToken): true case (.invalidAPIToken, .invalidAPIToken): true case (.connectivityIssues, .connectivityIssues): true case (.storageError, .storageError): true case (.insufficientQuota, .insufficientQuota): true case (.generationError, .generationError): true case (.modelAccessError, .modelAccessError): true + case (.invalidFunctionCallName, .invalidFunctionCallName): true case (.invalidFunctionCallArguments, .invalidFunctionCallArguments): true + case (.functionCallError, .functionCallError): true case (.unknownError, .unknownError): true default: false } diff --git a/Sources/SpeziLLMOpenAI/LLMOpenAIPlatform.swift b/Sources/SpeziLLMOpenAI/LLMOpenAIPlatform.swift new file mode 100644 index 0000000..746a7c8 --- /dev/null +++ b/Sources/SpeziLLMOpenAI/LLMOpenAIPlatform.swift @@ -0,0 +1,107 @@ +// +// This source file is part of the Stanford Spezi open source project +// +// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) +// +// SPDX-License-Identifier: MIT +// + +import Foundation +import os +import Semaphore +import Spezi +import SpeziLLM +import SpeziSecureStorage + +/// LLM execution platform of an ``LLMOpenAISchema``. +/// +/// The ``LLMOpenAIPlatform`` turns a received ``LLMOpenAISchema`` to an executable ``LLMOpenAISession``. +/// Use ``LLMOpenAIPlatform/callAsFunction(with:)`` with an ``LLMOpenAISchema`` parameter to get an executable ``LLMOpenAISession`` that does the actual inference. +/// +/// - Important: ``LLMOpenAIPlatform`` shouldn't be used directly but used via the `SpeziLLM` `LLMRunner` that delegates the requests towards the ``LLMOpenAIPlatform``. +/// The `SpeziLLM` `LLMRunner` must be configured with the ``LLMOpenAIPlatform`` within the Spezi `Configuration`. +/// +/// - Tip: For more information, refer to the documentation of the `LLMPlatform` from SpeziLLM. +/// +/// ### Usage +/// +/// The example below demonstrates the setup of the ``LLMOpenAIPlatform`` within the Spezi `Configuration`. +/// +/// ```swift +/// class TestAppDelegate: SpeziAppDelegate { +/// override var configuration: Configuration { +/// Configuration { +/// LLMRunner { +/// LLMOpenAIPlatform() +/// } +/// } +/// } +/// } +/// ``` +public class LLMOpenAIPlatform: LLMPlatform, DefaultInitializable, @unchecked Sendable { + /// A Swift Logger that logs important information from the ``LLMLocalSession``. + static let logger = Logger(subsystem: "edu.stanford.spezi", category: "SpeziLLMOpenAI") + + /// Enforce an arbitrary number of concurrent execution jobs of OpenAI LLMs. + private let semaphore: AsyncSemaphore + let configuration: LLMOpenAIPlatformConfiguration + + @MainActor public var state: LLMPlatformState = .idle + @Dependency private var tokenSaver: LLMOpenAITokenSaver + @Dependency private var secureStorage: SecureStorage + + /// Creates an instance of the ``LLMOpenAIPlatform``. + /// + /// - Parameters: + /// - configuration: The configuration of the platform. + public init(configuration: LLMOpenAIPlatformConfiguration) { + self.configuration = configuration + self.semaphore = AsyncSemaphore(value: configuration.concurrentStreams) + } + + /// Convenience initializer for the ``LLMOpenAIPlatform``. + public required convenience init() { + self.init(configuration: .init()) + } + + + public func configure() { + // If token passed via init + if let apiToken = configuration.apiToken { + do { + try secureStorage.store( + credentials: Credentials(username: LLMOpenAIConstants.credentialsUsername, password: apiToken), + server: LLMOpenAIConstants.credentialsServer + ) + } catch { + preconditionFailure(""" + SpeziLLMOpenAI: Configured OpenAI API token could not be stored within the SpeziSecureStorage. + """) + } + } + } + + public func callAsFunction(with llmSchema: LLMOpenAISchema) -> LLMOpenAISession { + LLMOpenAISession(self, schema: llmSchema, secureStorage: secureStorage) + } + + func exclusiveAccess() async throws { + try await semaphore.waitUnlessCancelled() + + if await state != .processing { + await MainActor.run { + state = .processing + } + } + } + + func signal() async { + let otherTasksWaiting = semaphore.signal() + + if !otherTasksWaiting { + await MainActor.run { + state = .idle + } + } + } +} diff --git a/Sources/SpeziLLMOpenAI/LLMOpenAIRunnerSetup.swift b/Sources/SpeziLLMOpenAI/LLMOpenAIRunnerSetup.swift deleted file mode 100644 index 3a9d5f5..0000000 --- a/Sources/SpeziLLMOpenAI/LLMOpenAIRunnerSetup.swift +++ /dev/null @@ -1,123 +0,0 @@ -// -// This source file is part of the Stanford Spezi open source project -// -// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) -// -// SPDX-License-Identifier: MIT -// - -import Foundation -import OpenAI -import Spezi -import SpeziLLM -import SpeziSecureStorage - - -/// Sets up the OpenAI environment in order to execute Spezi `LLM`s. -/// -/// The task needs to be stated within the `LLMRunner` initializer in the Spezi `Configuration`. -/// -/// One is able to specify Spezi-wide configurations for the OpenAI interaction, such as the API key or a network timeout duration (however, not a requirement!). -/// However, these configurations can be overwritten via individual ``LLMOpenAI`` instances. -/// -/// ### Usage -/// -/// A minimal example of using the ``LLMOpenAIRunnerSetupTask`` can be found below. -/// -/// ```swift -/// class LocalLLMAppDelegate: SpeziAppDelegate { -/// override var configuration: Configuration { -/// Configuration { -/// LLMRunner { -/// LLMOpenAIRunnerSetupTask(apiToken: "") -/// } -/// } -/// } -/// } -/// ``` -public class LLMOpenAIRunnerSetupTask: LLMRunnerSetupTask { - static var openAIModel: OpenAI { - guard let openAIModel = LLMOpenAIRunnerSetupTask.wrappedOpenAIModel else { - preconditionFailure(""" - Illegal Access: Tried to access the wrapped OpenAI model of the `LLMOpenAIRunnerSetupTask` before being initialized. - Ensure that the `LLMOpenAIRunnerSetupTask` is passed to the `LLMRunner` within the Spezi `Configuration`. - """) - } - return openAIModel - } - private static var wrappedOpenAIModel: OpenAI? - - - @Module.Model private var tokenSaver: LLMOpenAITokenSaver - @Dependency private var secureStorage: SecureStorage - - public let type: LLMHostingType = .cloud - private let apiToken: String? - private let timeout: TimeInterval - - - public init( - apiToken: String? = nil, - timeout: TimeInterval = 60 - ) { - self.apiToken = apiToken - self.timeout = timeout - } - - - public func configure() { - self.tokenSaver = LLMOpenAITokenSaver(secureStorage: secureStorage) - } - - public func setupRunner( - runnerConfig: LLMRunnerConfiguration - ) async throws { - // If token passed via init - if let apiToken { - LLMOpenAIRunnerSetupTask.wrappedOpenAIModel = OpenAI( - configuration: .init( - token: apiToken, - timeoutInterval: self.timeout - ) - ) - - try secureStorage.store( - credentials: Credentials(username: LLMOpenAIConstants.credentialsUsername, password: apiToken), - server: LLMOpenAIConstants.credentialsServer - ) - } else { - // If token is present within the Spezi `SecureStorage` - guard let credentials = try? secureStorage.retrieveCredentials( - LLMOpenAIConstants.credentialsUsername, - server: LLMOpenAIConstants.credentialsServer - ) else { - preconditionFailure(""" - SpeziLLM: OpenAI Token wasn't properly set, please ensure that the token is either passed directly via the Spezi `Configuration` - or stored within the `SecureStorage` via the `LLMOpenAITokenSaver` before dispatching the first inference. - """) - } - - // Initialize the OpenAI model - LLMOpenAIRunnerSetupTask.wrappedOpenAIModel = OpenAI( - configuration: .init( - token: credentials.password, - timeoutInterval: self.timeout - ) - ) - } - - // Check validity of passed token by making a request to list all models - do { - _ = try await LLMOpenAIRunnerSetupTask.openAIModel.models() - } catch let error as URLError { - throw LLMOpenAIError.connectivityIssues(error) - } catch let error as APIErrorResponse { - if error.error.code == LLMOpenAIError.invalidAPIToken.openAIErrorMessage { - throw LLMOpenAIError.invalidAPIToken - } - throw LLMOpenAIError.unknownError(error) - } catch { - throw LLMOpenAIError.unknownError(error) - } - } -} diff --git a/Sources/SpeziLLMOpenAI/LLMOpenAISchema.swift b/Sources/SpeziLLMOpenAI/LLMOpenAISchema.swift new file mode 100644 index 0000000..f3ea55d --- /dev/null +++ b/Sources/SpeziLLMOpenAI/LLMOpenAISchema.swift @@ -0,0 +1,58 @@ +// +// This source file is part of the Stanford Spezi open source project +// +// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) +// +// SPDX-License-Identifier: MIT +// + +import Foundation +import SpeziChat +import SpeziLLM + + +/// Defines the type and configuration of the ``LLMOpenAISession``. +/// +/// The ``LLMOpenAISchema`` is used as a configuration for the to-be-used OpenAI LLM. It contains all information necessary for the creation of an executable ``LLMOpenAISession``. +/// It is bound to a ``LLMOpenAIPlatform`` that is responsible for turning the ``LLMOpenAISchema`` to an ``LLMOpenAISession``. +/// +/// - Tip: ``LLMOpenAISchema`` also enables the function calling mechanism to establish a structured, bidirectional, and reliable communication between the OpenAI LLMs and external tools. For details, refer to ``LLMFunction`` and ``LLMFunction/Parameter`` or the DocC article. +/// +/// - Tip: For more information, refer to the documentation of the `LLMSchema` from SpeziLLM. +public struct LLMOpenAISchema: LLMSchema, @unchecked Sendable { + public typealias Platform = LLMOpenAIPlatform + + + /// Default values of ``LLMOpenAISchema``. + public enum Defaults { + /// Empty default of passed function calls (`_LLMFunctionCollection`). + /// Reason: Cannot use internal init of `_LLMFunctionCollection` as default parameter within public ``LLMOpenAISchema/init(parameters:modelParameters:injectIntoContext:_:)``. + public static let emptyLLMFunctions: _LLMFunctionCollection = .init(functions: []) + } + + + let parameters: LLMOpenAIParameters + let modelParameters: LLMOpenAIModelParameters + let functions: [String: LLMFunction] + public let injectIntoContext: Bool + + + /// Creates an instance of the ``LLMOpenAISchema`` containing all necessary configuration for OpenAI LLM inference. + /// + /// - Parameters: + /// - parameters: Parameters of the OpenAI LLM client. + /// - modelParameters: Parameters of the used OpenAI LLM. + /// - injectIntoContext: Indicates if the inference output by the ``LLMOpenAISession`` should automatically be inserted into the ``LLMOpenAISession/context``, defaults to false. + /// - functionsCollection: LLM Functions (tools) used for the OpenAI function calling mechanism. + public init( + parameters: LLMOpenAIParameters, + modelParameters: LLMOpenAIModelParameters = .init(), + injectIntoContext: Bool = false, + @LLMFunctionBuilder _ functionsCollection: @escaping () -> _LLMFunctionCollection = { Defaults.emptyLLMFunctions } + ) { + self.parameters = parameters + self.modelParameters = modelParameters + self.injectIntoContext = injectIntoContext + self.functions = functionsCollection().functions + } +} diff --git a/Sources/SpeziLLMOpenAI/LLMOpenAI+Configuration.swift b/Sources/SpeziLLMOpenAI/LLMOpenAISession+Configuration.swift similarity index 65% rename from Sources/SpeziLLMOpenAI/LLMOpenAI+Configuration.swift rename to Sources/SpeziLLMOpenAI/LLMOpenAISession+Configuration.swift index 78fe6a6..834a96c 100644 --- a/Sources/SpeziLLMOpenAI/LLMOpenAI+Configuration.swift +++ b/Sources/SpeziLLMOpenAI/LLMOpenAISession+Configuration.swift @@ -8,7 +8,8 @@ import OpenAI -extension LLMOpenAI { + +extension LLMOpenAISession { /// Map the ``LLMOpenAI/context`` to the OpenAI `[Chat]` representation. private var openAIContext: [Chat] { get async { @@ -33,7 +34,7 @@ extension LLMOpenAI { /// in an OpenAI `ChatQuery` representation used for querying the OpenAI API. var openAIChatQuery: ChatQuery { get async { - let functions: [ChatFunctionDeclaration] = self.functions.values.compactMap { function in + let functions: [ChatFunctionDeclaration] = schema.functions.values.compactMap { function in let functionType = Swift.type(of: function) return .init( @@ -44,19 +45,19 @@ extension LLMOpenAI { } return await .init( - model: self.parameters.modelType, + model: schema.parameters.modelType, messages: self.openAIContext, - responseFormat: self.modelParameters.responseFormat, + responseFormat: schema.modelParameters.responseFormat, functions: functions.isEmpty ? nil : functions, - temperature: self.modelParameters.temperature, - topP: self.modelParameters.topP, - n: self.modelParameters.completionsPerOutput, - stop: self.modelParameters.stopSequence.isEmpty ? nil : self.modelParameters.stopSequence, - maxTokens: self.modelParameters.maxOutputLength, - presencePenalty: self.modelParameters.presencePenalty, - frequencyPenalty: self.modelParameters.frequencyPenalty, - logitBias: self.modelParameters.logitBias.isEmpty ? nil : self.modelParameters.logitBias, - user: self.modelParameters.user + temperature: schema.modelParameters.temperature, + topP: schema.modelParameters.topP, + n: schema.modelParameters.completionsPerOutput, + stop: schema.modelParameters.stopSequence.isEmpty ? nil : schema.modelParameters.stopSequence, + maxTokens: schema.modelParameters.maxOutputLength, + presencePenalty: schema.modelParameters.presencePenalty, + frequencyPenalty: schema.modelParameters.frequencyPenalty, + logitBias: schema.modelParameters.logitBias.isEmpty ? nil : schema.modelParameters.logitBias, + user: schema.modelParameters.user ) } } diff --git a/Sources/SpeziLLMOpenAI/LLMOpenAISession+Generation.swift b/Sources/SpeziLLMOpenAI/LLMOpenAISession+Generation.swift new file mode 100644 index 0000000..a5eb0d8 --- /dev/null +++ b/Sources/SpeziLLMOpenAI/LLMOpenAISession+Generation.swift @@ -0,0 +1,162 @@ +// +// This source file is part of the Stanford Spezi open source project +// +// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) +// +// SPDX-License-Identifier: MIT +// + +import Foundation +import OpenAI +import SpeziChat + + +extension LLMOpenAISession { + /// Based on the input prompt, generate the output via the OpenAI API. + /// + /// - Parameters: + /// - continuation: A Swift `AsyncThrowingStream` that streams the generated output. + func _generate( // swiftlint:disable:this identifier_name function_body_length cyclomatic_complexity + continuation: AsyncThrowingStream.Continuation + ) async { + Self.logger.debug("SpeziLLMOpenAI: OpenAI GPT started a new inference") + await MainActor.run { + self.state = .generating + } + + while true { + let chatStream: AsyncThrowingStream = await self.model.chatsStream(query: self.openAIChatQuery) + + var llmStreamResults: [Int: LLMOpenAIStreamResult] = [:] + + do { + for try await chatStreamResult in chatStream { + // Important to iterate over all choices as LLM could choose to call multiple functions / generate multiple choices + for choice in chatStreamResult.choices { + llmStreamResults[choice.index] = llmStreamResults[ + choice.index, + default: .init() + ].append(choice: choice) + } + + // Append assistant messages during the streaming to ensure that they are visible to the user during processing + let assistantResults = llmStreamResults.values.filter { llmStreamResult in + llmStreamResult.role == .assistant && !(llmStreamResult.deltaContent?.isEmpty ?? true) + } + + // Only consider the first found assistant content result + guard let content = assistantResults.first?.deltaContent else { + continue + } + + // Automatically inject the yielded string piece into the `LLMLocal/context` + if schema.injectIntoContext { + await MainActor.run { + context.append(assistantOutput: content) + } + } + + continuation.yield(content) + } + + await MainActor.run { + context.completeAssistantStreaming() + } + } catch let error as APIErrorResponse { + switch error.error.code { + case LLMOpenAIError.invalidAPIToken.openAIErrorMessage: + Self.logger.error("SpeziLLMOpenAI: Invalid OpenAI API token - \(error)") + await finishGenerationWithError(LLMOpenAIError.invalidAPIToken, on: continuation) + case LLMOpenAIError.insufficientQuota.openAIErrorMessage: + Self.logger.error("SpeziLLMOpenAI: Insufficient OpenAI API quota - \(error)") + await finishGenerationWithError(LLMOpenAIError.insufficientQuota, on: continuation) + default: + Self.logger.error("SpeziLLMOpenAI: Generation error occurred - \(error)") + await finishGenerationWithError(LLMOpenAIError.generationError, on: continuation) + } + return + } catch { + Self.logger.error("SpeziLLMOpenAI: Generation error occurred - \(error)") + await finishGenerationWithError(LLMOpenAIError.generationError, on: continuation) + return + } + + let functionCalls = llmStreamResults.values.compactMap { $0.functionCall } + + // Exit the while loop if we don't have any function calls + guard !functionCalls.isEmpty else { + break + } + + // Parallelize function call execution + do { + try await withThrowingTaskGroup(of: Void.self) { group in // swiftlint:disable:this closure_body_length + for functionCall in functionCalls { + group.addTask { // swiftlint:disable:this closure_body_length + Self.logger.debug(""" + SpeziLLMOpenAI: Function call \(functionCall.name ?? ""), Arguments: \(functionCall.arguments ?? "") + """) + + guard let functionName = functionCall.name, + let functionArgument = functionCall.arguments?.data(using: .utf8), + let function = self.schema.functions[functionName] else { + Self.logger.debug("SpeziLLMOpenAI: Couldn't find the requested function to call") + await self.finishGenerationWithError(LLMOpenAIError.invalidFunctionCallName, on: continuation) + throw LLMOpenAIError.invalidFunctionCallName + } + + // Inject parameters into the @Parameters of the function call + do { + try function.injectParameters(from: functionArgument) + } catch { + Self.logger.error("SpeziLLMOpenAI: Invalid function call arguments - \(error)") + await self.finishGenerationWithError(LLMOpenAIError.invalidFunctionCallArguments(error), on: continuation) + throw LLMOpenAIError.invalidFunctionCallArguments(error) + } + + let functionCallResponse: String? + + do { + // Execute function + // Errors thrown by the functions are surfaced to the user as an LLM generation error + functionCallResponse = try await function.execute() + } catch { + Self.logger.error("SpeziLLMOpenAI: Function call execution error - \(error)") + await self.finishGenerationWithError(LLMOpenAIError.functionCallError(error), on: continuation) + throw LLMOpenAIError.functionCallError(error) + } + + Self.logger.debug(""" + SpeziLLMOpenAI: Function call \(functionCall.name ?? "") \ + Arguments: \(functionCall.arguments ?? "") \ + Response: \(functionCallResponse ?? "") + """) + + await MainActor.run { + let defaultResponse = "Function call to \(functionCall.name ?? "") succeeded, function intentionally didn't respond anything." + + // Return `defaultResponse` in case of `nil` or empty return of the function call + self.context.append( + forFunction: functionName, + response: functionCallResponse?.isEmpty != false ? defaultResponse : (functionCallResponse ?? defaultResponse) + ) + } + } + } + + try await group.waitForAll() + } + } catch { + // Stop LLM inference in case of a function call error + return + } + } + + continuation.finish() + Self.logger.debug("SpeziLLMOpenAI: OpenAI GPT completed an inference") + + await MainActor.run { + self.state = .ready + } + } +} diff --git a/Sources/SpeziLLMOpenAI/LLMOpenAISession+Setup.swift b/Sources/SpeziLLMOpenAI/LLMOpenAISession+Setup.swift new file mode 100644 index 0000000..331c2cd --- /dev/null +++ b/Sources/SpeziLLMOpenAI/LLMOpenAISession+Setup.swift @@ -0,0 +1,95 @@ +// +// This source file is part of the Stanford Spezi open source project +// +// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) +// +// SPDX-License-Identifier: MIT +// + +import Foundation +import OpenAI + + +extension LLMOpenAISession { + /// Set up the OpenAI LLM execution client. + /// + /// - Parameters: + /// - continuation: A Swift `AsyncThrowingStream` that streams the generated output. + /// - Returns: `true` if the setup was successful, `false` otherwise. + func setup(continuation: AsyncThrowingStream.Continuation) async -> Bool { + Self.logger.debug("SpeziLLMOpenAI: OpenAI LLM is being initialized") + await MainActor.run { + self.state = .loading + } + + // Overwrite API token if passed + if let overwritingToken = schema.parameters.overwritingToken { + self.wrappedModel = OpenAI( + configuration: .init( + token: overwritingToken, + timeoutInterval: platform.configuration.timeout + ) + ) + } else { + // If token is present within the Spezi `SecureStorage` + guard let credentials = try? secureStorage.retrieveCredentials( + LLMOpenAIConstants.credentialsUsername, + server: LLMOpenAIConstants.credentialsServer + ) else { + Self.logger.error(""" + SpeziLLMOpenAI: Missing OpenAI API token. + Please ensure that the token is either passed directly via the Spezi `Configuration` + or stored within the `SecureStorage` via the `LLMOpenAITokenSaver` before dispatching the first inference. + """) + await finishGenerationWithError(LLMOpenAIError.missingAPIToken, on: continuation) + return false + } + + // Initialize the OpenAI model + self.wrappedModel = OpenAI( + configuration: .init( + token: credentials.password, + timeoutInterval: platform.configuration.timeout + ) + ) + } + + // Check access to the specified OpenAI model + if schema.parameters.modelAccessTest, + await !modelAccessTest(continuation: continuation) { + return false + } + + await MainActor.run { + self.state = .ready + } + Self.logger.debug("SpeziLLMOpenAI: OpenAI LLM finished initializing, now ready to use") + return true + } + + /// Tests access to the OpenAI model. + /// + /// - Parameters: + /// - continuation: A Swift `AsyncThrowingStream` that streams the generated output. + /// - Returns: `true` if the model access test was successful, `false` otherwise. + private func modelAccessTest(continuation: AsyncThrowingStream.Continuation) async -> Bool { + do { + _ = try await self.model.model(query: .init(model: schema.parameters.modelType)) + Self.logger.error("SpeziLLMOpenAI: Model access check completed") + return true + } catch let error as URLError { + Self.logger.error("SpeziLLMOpenAI: Model access check - Connectivity Issues with the OpenAI API: \(error)") + await finishGenerationWithError(LLMOpenAIError.connectivityIssues(error), on: continuation) + } catch { + if let apiError = error as? APIErrorResponse, apiError.error.code == LLMOpenAIError.invalidAPIToken.openAIErrorMessage { + Self.logger.error("SpeziLLMOpenAI: Model access check - Invalid OpenAI API token: \(apiError)") + await finishGenerationWithError(LLMOpenAIError.invalidAPIToken, on: continuation) + } else { + Self.logger.error("SpeziLLMOpenAI: Model access check - Couldn't access the specified OpenAI model: \(error)") + await finishGenerationWithError(LLMOpenAIError.modelAccessError(error), on: continuation) + } + } + + return false + } +} diff --git a/Sources/SpeziLLMOpenAI/LLMOpenAISession.swift b/Sources/SpeziLLMOpenAI/LLMOpenAISession.swift new file mode 100644 index 0000000..561d3df --- /dev/null +++ b/Sources/SpeziLLMOpenAI/LLMOpenAISession.swift @@ -0,0 +1,167 @@ +// +// This source file is part of the Stanford Spezi open source project +// +// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) +// +// SPDX-License-Identifier: MIT +// + +import Foundation +import struct OpenAI.Chat +import struct OpenAI.ChatFunctionDeclaration +import struct OpenAI.ChatQuery +import class OpenAI.OpenAI +import struct OpenAI.Model +import struct OpenAI.ChatStreamResult +import struct OpenAI.APIErrorResponse +import os +import SpeziChat +import SpeziLLM +import SpeziSecureStorage + + +/// Represents an ``LLMOpenAISchema`` in execution. +/// +/// The ``LLMOpenAISession`` is the executable version of the OpenAI LLM containing context and state as defined by the ``LLMOpenAISchema``. +/// It provides access to text-based models from OpenAI, such as GPT-3.5 or GPT-4. +/// +/// The inference is started by ``LLMOpenAISession/generate()``, returning an `AsyncThrowingStream` and can be cancelled via ``LLMOpenAISession/cancel()``. +/// The ``LLMOpenAISession`` exposes its current state via the ``LLMOpenAISession/context`` property, containing all the conversational history with the LLM. +/// +/// - Warning: The ``LLMOpenAISession`` shouldn't be created manually but always through the ``LLMOpenAIPlatform`` via the `LLMRunner`. +/// +/// - Tip: ``LLMOpenAISession`` also enables the function calling mechanism to establish a structured, bidirectional, and reliable communication between the OpenAI LLMs and external tools. For details, refer to ``LLMFunction`` and ``LLMFunction/Parameter`` or the DocC article. +/// +/// - Tip: For more information, refer to the documentation of the `LLMSession` from SpeziLLM. +/// +/// ### Usage +/// +/// The example below demonstrates a minimal usage of the ``LLMOpenAISession`` via the `LLMRunner`. +/// +/// ```swift +/// struct LLMOpenAIDemoView: View { +/// @Environment(LLMRunner.self) var runner +/// @State var responseText = "" +/// +/// var body: some View { +/// Text(responseText) +/// .task { +/// // Instantiate the `LLMOpenAISchema` to an `LLMOpenAISession` via the `LLMRunner`. +/// let llmSession: LLMOpenAISession = runner( +/// with: LLMOpenAISchema( +/// parameters: .init( +/// modelType: .gpt3_5Turbo, +/// systemPrompt: "You're a helpful assistant that answers questions from users.", +/// overwritingToken: "abc123" +/// ) +/// ) +/// ) +/// +/// for try await token in try await llmSession.generate() { +/// responseText.append(token) +/// } +/// } +/// } +/// } +/// ``` +@Observable +public final class LLMOpenAISession: LLMSession, @unchecked Sendable { + /// A Swift Logger that logs important information from the ``LLMOpenAISession``. + static let logger = Logger(subsystem: "edu.stanford.spezi", category: "SpeziLLMOpenAI") + + + let platform: LLMOpenAIPlatform + let schema: LLMOpenAISchema + let secureStorage: SecureStorage + + /// A set of `Task`s managing the ``LLMOpenAISession`` output generation. + @ObservationIgnored private var tasks: Set> = [] + /// Ensuring thread-safe access to the `LLMOpenAISession/task`. + @ObservationIgnored private var lock = NSLock() + + @MainActor public var state: LLMState = .uninitialized + @MainActor public var context: SpeziChat.Chat = [] + @ObservationIgnored var wrappedModel: OpenAI? + + var model: OpenAI { + guard let model = wrappedModel else { + preconditionFailure(""" + SpeziLLMOpenAI: Illegal Access - Tried to access the wrapped OpenAI model of `LLMOpenAISession` before being initialized. + Ensure that the `LLMOpenAIPlatform` is passed to the `LLMRunner` within the Spezi `Configuration`. + """) + } + return model + } + + + /// Creates an instance of a ``LLMOpenAISession`` responsible for LLM inference. + /// Only the ``LLMOpenAIPlatform`` should create an instance of ``LLMOpenAISession``. + /// + /// - Parameters: + /// - platform: Reference to the ``LLMOpenAIPlatform`` where the ``LLMOpenAISession`` is running on. + /// - schema: The configuration of the OpenAI LLM expressed by the ``LLMOpenAISchema``. + /// - secureStorage: Reference to the `SecureStorage` from `SpeziStorage` in order to securely persist the token. + init(_ platform: LLMOpenAIPlatform, schema: LLMOpenAISchema, secureStorage: SecureStorage) { + self.platform = platform + self.schema = schema + self.secureStorage = secureStorage + + // Inject system prompts into context + Task { @MainActor in + schema.parameters.systemPrompts.forEach { systemPrompt in + context.append(systemMessage: systemPrompt) + } + } + } + + + @discardableResult + public func generate() async throws -> AsyncThrowingStream { + try await platform.exclusiveAccess() + + let (stream, continuation) = AsyncThrowingStream.makeStream(of: String.self) + + // Execute the output generation of the LLM + let task = Task(priority: platform.configuration.taskPriority) { + // Unregister as soon as `Task` finishes + defer { + Task { + await platform.signal() + } + } + + // Setup the model, if not already done + if wrappedModel == nil { + guard await setup(continuation: continuation) else { + return + } + } + + guard await !checkCancellation(on: continuation) else { + return + } + + // Execute the inference + await _generate(continuation: continuation) + } + + _ = lock.withLock { + tasks.insert(task) + } + + return stream + } + + public func cancel() { + lock.withLock { + for task in tasks { + task.cancel() + } + } + } + + + deinit { + cancel() + } +} diff --git a/Sources/SpeziLLMOpenAI/LLMOpenAITokenSaver.swift b/Sources/SpeziLLMOpenAI/LLMOpenAITokenSaver.swift index cc5c463..ae5f9f3 100644 --- a/Sources/SpeziLLMOpenAI/LLMOpenAITokenSaver.swift +++ b/Sources/SpeziLLMOpenAI/LLMOpenAITokenSaver.swift @@ -7,6 +7,7 @@ // import Foundation +import Spezi import SpeziSecureStorage import SwiftUI @@ -16,7 +17,7 @@ import SwiftUI /// The ``LLMOpenAITokenSaver`` provides the ``LLMOpenAITokenSaver/token`` property to easily read and write to the `SecureStorage`. /// If a SwiftUI `Binding` is required (e.g., for a `TextField`), one can use the ``LLMOpenAITokenSaver/tokenBinding`` property. /// -/// One needs to specify the ``LLMOpenAIRunnerSetupTask`` within the Spezi `Configuration` to be able to access the ``LLMOpenAITokenSaver`` from within the SwiftUI `Environment`. +/// One needs to specify the ``LLMOpenAIPlatform`` within the Spezi `Configuration` to be able to access the ``LLMOpenAITokenSaver`` from within the SwiftUI `Environment`. /// /// ### Usage /// @@ -27,7 +28,7 @@ import SwiftUI /// override var configuration: Configuration { /// Configuration { /// LLMRunner { -/// LLMOpenAIRunnerSetupTask() +/// LLMOpenAIPlatform() /// } /// } /// } @@ -50,8 +51,8 @@ import SwiftUI /// } /// ``` @Observable -public class LLMOpenAITokenSaver { - private let secureStorage: SecureStorage +public class LLMOpenAITokenSaver: Module, EnvironmentAccessible, DefaultInitializable { + @Dependency @ObservationIgnored private var secureStorage: SecureStorage /// Indicates if a token is present within the Spezi `SecureStorage`. @@ -102,7 +103,5 @@ public class LLMOpenAITokenSaver { } - init(secureStorage: SecureStorage) { - self.secureStorage = secureStorage - } + public required init() {} } diff --git a/Sources/SpeziLLMOpenAI/Onboarding/LLMOpenAIAPITokenOnboardingStep.swift b/Sources/SpeziLLMOpenAI/Onboarding/LLMOpenAIAPITokenOnboardingStep.swift index 728a0a4..412cf6a 100644 --- a/Sources/SpeziLLMOpenAI/Onboarding/LLMOpenAIAPITokenOnboardingStep.swift +++ b/Sources/SpeziLLMOpenAI/Onboarding/LLMOpenAIAPITokenOnboardingStep.swift @@ -14,7 +14,7 @@ import SwiftUI /// View to display an onboarding step for the user to enter an OpenAI API Key. /// -/// > Warning: Ensure that the ``LLMOpenAIRunnerSetupTask`` is specified within the Spezi `Configuration` when using this view in the onboarding flow. +/// - Warning: Ensure that the ``LLMOpenAIPlatform`` is specified within the Spezi `Configuration` when using this view in the onboarding flow. public struct LLMOpenAIAPITokenOnboardingStep: View { @Environment(LLMOpenAITokenSaver.self) private var tokenSaver diff --git a/Sources/SpeziLLMOpenAI/Onboarding/LLMOpenAIModelOnboardingStep.swift b/Sources/SpeziLLMOpenAI/Onboarding/LLMOpenAIModelOnboardingStep.swift index 8751aba..74a28de 100644 --- a/Sources/SpeziLLMOpenAI/Onboarding/LLMOpenAIModelOnboardingStep.swift +++ b/Sources/SpeziLLMOpenAI/Onboarding/LLMOpenAIModelOnboardingStep.swift @@ -6,7 +6,6 @@ // SPDX-License-Identifier: MIT // -@_exported import struct OpenAI.Model import Spezi import SpeziOnboarding import SwiftUI @@ -15,14 +14,14 @@ import SwiftUI /// View to display an onboarding step for the user to enter change the OpenAI model. public struct LLMOpenAIModelOnboardingStep: View { public enum Default { - public static let models = [Model.gpt3_5Turbo, Model.gpt4] + public static let models: [LLMOpenAIModelType] = [.gpt3_5Turbo, .gpt4_turbo_preview] } - @State private var modelSelection: Model + @State private var modelSelection: LLMOpenAIModelType private let actionText: String - private let action: (Model) -> Void - private let models: [Model] + private let action: (LLMOpenAIModelType) -> Void + private let models: [LLMOpenAIModelType] public var body: some View { @@ -60,8 +59,8 @@ public struct LLMOpenAIModelOnboardingStep: View { /// - action: Action that should be performed after the openAI model selection has been done, selection is passed as closure argument. public init( actionText: LocalizedStringResource? = nil, - models: [Model] = Default.models, - _ action: @escaping (Model) -> Void + models: [LLMOpenAIModelType] = Default.models, + _ action: @escaping (LLMOpenAIModelType) -> Void ) { self.init( actionText: actionText?.localizedString() ?? String(localized: "OPENAI_MODEL_SELECTION_SAVE_BUTTON", bundle: .module), @@ -77,18 +76,18 @@ public struct LLMOpenAIModelOnboardingStep: View { @_disfavoredOverload public init( actionText: ActionText, - models: [Model] = Default.models, - _ action: @escaping (Model) -> Void + models: [LLMOpenAIModelType] = Default.models, + _ action: @escaping (LLMOpenAIModelType) -> Void ) { self.actionText = String(actionText) self.models = models self.action = action - self._modelSelection = State(initialValue: models.first ?? .gpt3_5Turbo_1106) + self._modelSelection = State(initialValue: models.first ?? .gpt3_5Turbo) } } -extension Model { +extension LLMOpenAIModelType { fileprivate var formattedModelDescription: String { self.replacing("-", with: " ").capitalized.replacing("Gpt", with: "GPT") } diff --git a/Sources/SpeziLLMOpenAI/Resources/Localizable.xcstrings b/Sources/SpeziLLMOpenAI/Resources/Localizable.xcstrings index 5dd7174..7160c54 100644 --- a/Sources/SpeziLLMOpenAI/Resources/Localizable.xcstrings +++ b/Sources/SpeziLLMOpenAI/Resources/Localizable.xcstrings @@ -31,6 +31,36 @@ } } }, + "LLM_FUNCTION_CALL_ERROR_DESCRIPTION" : { + "localizations" : { + "en" : { + "stringUnit" : { + "state" : "translated", + "value" : "LLM Function Call has failed." + } + } + } + }, + "LLM_FUNCTION_CALL_ERROR_FAILURE_REASON" : { + "localizations" : { + "en" : { + "stringUnit" : { + "state" : "translated", + "value" : "The LLM Function Call executed on the device has thrown an error." + } + } + } + }, + "LLM_FUNCTION_CALL_ERROR_RECOVERY_SUGGESTION" : { + "localizations" : { + "en" : { + "stringUnit" : { + "state" : "translated", + "value" : "Please retry the input query or restart the application." + } + } + } + }, "LLM_GENERATION_ERROR_DESCRIPTION" : { "localizations" : { "en" : { @@ -121,6 +151,36 @@ } } }, + "LLM_INVALID_FUNCTION_CALL_NAME_ERROR_DESCRIPTION" : { + "localizations" : { + "en" : { + "stringUnit" : { + "state" : "translated", + "value" : "LLM called an unknown function." + } + } + } + }, + "LLM_INVALID_FUNCTION_CALL_NAME_ERROR_FAILURE_REASON" : { + "localizations" : { + "en" : { + "stringUnit" : { + "state" : "translated", + "value" : "The LLM has tried to call a function that doesn't exist." + } + } + } + }, + "LLM_INVALID_FUNCTION_CALL_NAME_ERROR_RECOVERY_SUGGESTION" : { + "localizations" : { + "en" : { + "stringUnit" : { + "state" : "translated", + "value" : "Please retry the input query." + } + } + } + }, "LLM_INVALID_TOKEN_ERROR_DESCRIPTION" : { "localizations" : { "en" : { @@ -146,7 +206,37 @@ "en" : { "stringUnit" : { "state" : "translated", - "value" : "Please ensure that the specified OpenAI API key is valid." + "value" : "Please ensure that the configured OpenAI API key is valid and able to access OpenAI models." + } + } + } + }, + "LLM_MISSING_TOKEN_ERROR_DESCRIPTION" : { + "localizations" : { + "en" : { + "stringUnit" : { + "state" : "translated", + "value" : "OpenAI API Key missing." + } + } + } + }, + "LLM_MISSING_TOKEN_FAILURE_REASON" : { + "localizations" : { + "en" : { + "stringUnit" : { + "state" : "translated", + "value" : "OpenAI API Key wasn't set before using the APIs generational capabilities." + } + } + } + }, + "LLM_MISSING_TOKEN_RECOVERY_SUGGESTION" : { + "localizations" : { + "en" : { + "stringUnit" : { + "state" : "translated", + "value" : "Ensure that the API Key is set before dispatching the first inference." } } } diff --git a/Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/FunctionCalling.md b/Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/FunctionCalling.md index cec9634..64edf79 100644 --- a/Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/FunctionCalling.md +++ b/Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/FunctionCalling.md @@ -36,8 +36,8 @@ The available ``LLMFunction``s are then declared via ``LLMOpenAI/init(parameters ### Example -A full code example of using a ``LLMFunction`` with the ``LLMOpenAI`` can be found below. -As LLMs cannot access real time information, the ``LLMOpenAI`` model is provided with a weather ``LLMFunction``, enabling the LLM to fetch up-to-date weather information for a specific location. +A full code example of using a ``LLMFunction`` using the ``LLMOpenAISchema`` (configuration of the LLM) can be found below. +As LLMs cannot access real time information, the OpenAI model is provided with a weather ``LLMFunction``, enabling the LLM to fetch up-to-date weather information for a specific location. ```swift // The defined `LLMFunction` made available to the OpenAI LLM @@ -55,7 +55,7 @@ struct WeatherFunction: LLMFunction { // Enclosing view to display an LLM chat struct LLMOpenAIChatTestView: View { - private let model = LLMOpenAI( + private let schema = LLMOpenAISchema( parameters: .init( modelType: .gpt4_1106_preview, systemPrompt: "You're a helpful assistant that answers questions from users." @@ -66,7 +66,7 @@ struct LLMOpenAIChatTestView: View { var body: some View { LLMChatView( - model: model + schema: schema ) } } diff --git a/Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/SpeziLLMOpenAI.md b/Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/SpeziLLMOpenAI.md index f7aac6f..e04e254 100644 --- a/Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/SpeziLLMOpenAI.md +++ b/Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/SpeziLLMOpenAI.md @@ -30,7 +30,7 @@ A module that allows you to interact with GPT-based Large Language Models (LLMs) } @Column { @Image(source: "ChatView", alt: "Screenshot displaying the usage of the LLMOpenAI with the SpeziChat Chat View."){ - ``LLMOpenAI`` + ``LLMOpenAISession`` } } } @@ -47,7 +47,7 @@ You need to add the SpeziLLM Swift package to ## Spezi LLM OpenAI Components -The core component of the ``SpeziLLMOpenAI`` target is the ``LLMOpenAI`` class which conforms to the [`LLM` protocol of SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llm). ``LLMOpenAI`` uses the OpenAI API to perform textual inference on the GPT-3.5 or GPT-4 models from OpenAI. +The core components of the ``SpeziLLMOpenAI`` target are the ``LLMOpenAISchema``, ``LLMOpenAISession`` as well as ``LLMOpenAIPlatform``. They heavily use the OpenAI API to perform textual inference on the GPT-3.5 or GPT-4 models from OpenAI. > Important: To utilize an LLM from OpenAI, an OpenAI API Key is required. Ensure that the OpenAI account associated with the key has enough resources to access the specified model as well as enough credits to perform the actual inference. @@ -55,13 +55,13 @@ The core component of the ``SpeziLLMOpenAI`` target is the ``LLMOpenAI`` class w ### LLM OpenAI -``LLMOpenAI`` offers a variety of configuration possibilities that are supported by the OpenAI API, such as the model type, the system prompt, the temperature of the model, and many more. These options can be set via the ``LLMOpenAI/init(parameters:modelParameters:_:)`` initializer and the ``LLMOpenAIParameters`` and ``LLMOpenAIModelParameters``. +``LLMOpenAISchema`` offers a variety of configuration possibilities that are supported by the OpenAI API, such as the model type, the system prompt, the temperature of the model, and many more. These options can be set via the ``LLMOpenAISchema/init(parameters:modelParameters:injectIntoContext:_:)`` initializer and the ``LLMOpenAIParameters`` and ``LLMOpenAIModelParameters``. -- Important: ``LLMOpenAI`` shouldn't be used on it's own but always wrapped by the Spezi `LLMRunner` as the runner handles all management overhead tasks. +- Important: The OpenAI LLM abstractions shouldn't be used on it's own but always used together with the Spezi `LLMRunner`. #### Setup -In order to use ``LLMOpenAI``, the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) [`LLMRunner`](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llmrunner) needs to be initialized in the Spezi `Configuration`. Only after, the `LLMRunner` can be used to execute the ``LLMOpenAI``. +In order to use OpenAI LLMs, the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) [`LLMRunner`](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llmrunner) needs to be initialized in the Spezi `Configuration` with the ``LLMOpenAIPlatform``. Only after, the `LLMRunner` can be used to do inference via OpenAI LLMs. See the [SpeziLLM documentation](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) for more details. ```swift @@ -69,7 +69,7 @@ class LLMOpenAIAppDelegate: SpeziAppDelegate { override var configuration: Configuration { Configuration { LLMRunner { - LLMOpenAIRunnerSetupTask() + LLMOpenAIPlatform() } } } @@ -78,41 +78,38 @@ class LLMOpenAIAppDelegate: SpeziAppDelegate { #### Usage -The code example below showcases the interaction with the ``LLMOpenAI`` through the the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) [`LLMRunner`](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llmrunner), which is injected into the SwiftUI `Environment` via the `Configuration` shown above. -Based on a `String` prompt, the `LLMGenerationTask/generate(prompt:)` method returns an `AsyncThrowingStream` which yields the inferred characters until the generation has completed. +The code example below showcases the interaction with the OpenAI LLMs within the Spezi ecosystem through the the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) [`LLMRunner`](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llmrunner), which is injected into the SwiftUI `Environment` via the `Configuration` shown above. -The ``LLMOpenAI`` contains the ``LLMOpenAI/context`` property which holds the entire history of the model interactions. -This includes the system prompt, user input, but also assistant responses. -Ensure the property always contains all necessary information, as the ``LLMOpenAI/generate(continuation:)`` function executes the inference based on the ``LLMOpenAI/context`` +The ``LLMOpenAISchema`` defines the type and configurations of the to-be-executed ``LLMOpenAISession``. This transformation is done via the [`LLMRunner`](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llmrunner) that uses the ``LLMOpenAIPlatform``. The inference via ``LLMOpenAISession/generate()`` returns an `AsyncThrowingStream` that yields all generated `String` pieces. -> Tip: The model can be queried via the `LLMGenerationTask/generate()` and `LLMGenerationTask/generate(prompt:)` calls (returned from wrapping the ``LLMOpenAI`` in the `LLMRunner` from the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) target). - The first method takes no input prompt at all but uses the current context of the model (so `LLM/context`) to query the model. - The second takes a `String`-based input from the user and appends it to the context of the model (so `LLM/context`) before querying the model. +The ``LLMOpenAISession`` contains the ``LLMOpenAISession/context`` property which holds the entire history of the model interactions. This includes the system prompt, user input, but also assistant responses. +Ensure the property always contains all necessary information, as the ``LLMOpenAISession/generate()`` function executes the inference based on the ``LLMOpenAISession/context`` -> Important: The ``LLMOpenAI`` should only be used together with the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) [`LLMRunner`](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llmrunner)! +> Important: The OpenAI LLM abstractions should only be used together with the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) [`LLMRunner`](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llmrunner)! ```swift -struct LLMOpenAIChatView: View { - // The runner responsible for executing the OpenAI LLM. - @Environment(LLMRunner.self) var runner: LLMRunner - - // The OpenAI LLM - @State var model: LLMOpenAI = .init( - parameters: .init( - modelType: .gpt3_5Turbo, - systemPrompt: "You're a helpful assistant that answers questions from users.", - overwritingToken: "abc123" - ) - ) - @State var responseText: String - - func executePrompt(prompt: String) { - // Execute the query on the runner, returning a stream of outputs - let stream = try await runner(with: model).generate(prompt: "Hello LLM!") - - for try await token in stream { - responseText.append(token) - } +struct LLMOpenAIDemoView: View { + @Environment(LLMRunner.self) var runner + @State var responseText = "" + + var body: some View { + Text(responseText) + .task { + // Instantiate the `LLMOpenAISchema` to an `LLMOpenAISession` via the `LLMRunner`. + let llmSession: LLMOpenAISession = runner( + with: LLMOpenAISchema( + parameters: .init( + modelType: .gpt3_5Turbo, + systemPrompt: "You're a helpful assistant that answers questions from users.", + overwritingToken: "abc123" + ) + ) + ) + + for try await token in try await llmSession.generate() { + responseText.append(token) + } + } } } ``` @@ -131,9 +128,6 @@ First, create a new view to show the onboarding step: ```swift import SpeziOnboarding -import SpeziLLMOpenAI -import SwiftUI - struct OpenAIAPIKey: View { @EnvironmentObject private var onboardingNavigationPath: OnboardingNavigationPath @@ -150,14 +144,10 @@ This view can then be added to the `OnboardingFlow` within the Spezi Template Ap ```swift import SpeziOnboarding -import SpeziLLMOpenAI -import SwiftUI - struct OnboardingFlow: View { @AppStorage(StorageKeys.onboardingFlowComplete) var completedOnboardingFlow = false - var body: some View { OnboardingStack(onboardingFlowComplete: $completedOnboardingFlow) { // ... other steps @@ -172,20 +162,28 @@ Now the OpenAI API Key entry view will appear within your application's onboardi ## Topics -### Model +### LLM Local abstraction -- ``LLMOpenAI`` +- ``LLMOpenAISchema`` +- ``LLMOpenAISession`` -### Configuration +### LLM Execution -- ``LLMOpenAIParameters`` -- ``LLMOpenAIModelParameters`` - -### Setup - -- ``LLMOpenAIRunnerSetupTask`` +- ``LLMOpenAIPlatform`` +- ``LLMOpenAIPlatformConfiguration`` ### Onboarding - ``LLMOpenAIAPITokenOnboardingStep`` - ``LLMOpenAIModelOnboardingStep`` +- ``LLMOpenAITokenSaver`` +- ``LLMOpenAIModelType`` + +### LLM Configuration + +- ``LLMOpenAIParameters`` +- ``LLMOpenAIModelParameters`` + +### Misc + +- ``LLMOpenAIError`` diff --git a/Tests/SpeziLLMTests/LLMOpenAIParameterTests+Array.swift b/Tests/SpeziLLMTests/LLMOpenAIParameterTests+Array.swift index d4bac98..2e7fde6 100644 --- a/Tests/SpeziLLMTests/LLMOpenAIParameterTests+Array.swift +++ b/Tests/SpeziLLMTests/LLMOpenAIParameterTests+Array.swift @@ -55,8 +55,8 @@ final class LLMOpenAIParameterArrayTests: XCTestCase { } } - let llm = LLMOpenAI( - parameters: .init(modelType: .gpt4_1106_preview) + let llm = LLMOpenAISchema( + parameters: .init(modelType: .gpt4_turbo_preview) ) { LLMFunctionTest(someInitArg: "testArg") } diff --git a/Tests/SpeziLLMTests/LLMOpenAIParameterTests+CustomTypes.swift b/Tests/SpeziLLMTests/LLMOpenAIParameterTests+CustomTypes.swift index fb36816..f01bf90 100644 --- a/Tests/SpeziLLMTests/LLMOpenAIParameterTests+CustomTypes.swift +++ b/Tests/SpeziLLMTests/LLMOpenAIParameterTests+CustomTypes.swift @@ -64,8 +64,8 @@ final class LLMOpenAIParameterCustomTypesTests: XCTestCase { } } - let llm = LLMOpenAI( - parameters: .init(modelType: .gpt4_1106_preview) + let llm = LLMOpenAISchema( + parameters: .init(modelType: .gpt4_turbo_preview) ) { LLMFunctionTest(someInitArg: "testArg") } diff --git a/Tests/SpeziLLMTests/LLMOpenAIParameterTests+Enum.swift b/Tests/SpeziLLMTests/LLMOpenAIParameterTests+Enum.swift index 9771570..0b6106f 100644 --- a/Tests/SpeziLLMTests/LLMOpenAIParameterTests+Enum.swift +++ b/Tests/SpeziLLMTests/LLMOpenAIParameterTests+Enum.swift @@ -60,8 +60,8 @@ final class LLMOpenAIParameterEnumTests: XCTestCase { } } - let llm = LLMOpenAI( - parameters: .init(modelType: .gpt4_1106_preview) + let llm = LLMOpenAISchema( + parameters: .init(modelType: .gpt4_turbo_preview) ) { LLMFunctionTest(someInitArg: "testArg") } diff --git a/Tests/SpeziLLMTests/LLMOpenAIParameterTests+InvalidParameters.swift b/Tests/SpeziLLMTests/LLMOpenAIParameterTests+InvalidParameters.swift index d134bc3..c75380a 100644 --- a/Tests/SpeziLLMTests/LLMOpenAIParameterTests+InvalidParameters.swift +++ b/Tests/SpeziLLMTests/LLMOpenAIParameterTests+InvalidParameters.swift @@ -43,8 +43,8 @@ final class LLMOpenAIInvalidParametersTests: XCTestCase { } } - let llm = LLMOpenAI( - parameters: .init(modelType: .gpt4_1106_preview) + let llm = LLMOpenAISchema( + parameters: .init(modelType: .gpt4_turbo_preview) ) { LLMFunctionTest(someInitArg: "testArg") } diff --git a/Tests/SpeziLLMTests/LLMOpenAIParameterTests+OptionalTypes.swift b/Tests/SpeziLLMTests/LLMOpenAIParameterTests+OptionalTypes.swift index 74b08fa..6be2db7 100644 --- a/Tests/SpeziLLMTests/LLMOpenAIParameterTests+OptionalTypes.swift +++ b/Tests/SpeziLLMTests/LLMOpenAIParameterTests+OptionalTypes.swift @@ -74,8 +74,8 @@ final class LLMOpenAIParameterOptionalTypesTests: XCTestCase { } } - let llm = LLMOpenAI( - parameters: .init(modelType: .gpt4_1106_preview) + let llm = LLMOpenAISchema( + parameters: .init(modelType: .gpt4_turbo_preview) ) { LLMFunctionTest(someInitArg: "testArg") } diff --git a/Tests/SpeziLLMTests/LLMOpenAIParameterTests+PrimitiveTypes.swift b/Tests/SpeziLLMTests/LLMOpenAIParameterTests+PrimitiveTypes.swift index 308704f..74a0e50 100644 --- a/Tests/SpeziLLMTests/LLMOpenAIParameterTests+PrimitiveTypes.swift +++ b/Tests/SpeziLLMTests/LLMOpenAIParameterTests+PrimitiveTypes.swift @@ -55,8 +55,8 @@ final class LLMOpenAIParameterPrimitiveTypesTests: XCTestCase { } } - let llm = LLMOpenAI( - parameters: .init(modelType: .gpt4_1106_preview) + let llm = LLMOpenAISchema( + parameters: .init(modelType: .gpt4_turbo_preview) ) { LLMFunctionTest(someInitArg: "testArg") } diff --git a/Tests/UITests/TestApp/FeatureFlags.swift b/Tests/UITests/TestApp/FeatureFlags.swift index 6965a45..28827b4 100644 --- a/Tests/UITests/TestApp/FeatureFlags.swift +++ b/Tests/UITests/TestApp/FeatureFlags.swift @@ -9,7 +9,7 @@ import Foundation -enum FeatureFlags { +enum FeatureFlags: Sendable { /// Configures the LLMs to mock all generated responses in order to simplify development and write UI Tests. static let mockMode = ProcessInfo.processInfo.arguments.contains("--mockMode") } diff --git a/Tests/UITests/TestApp/LLMLocal/LLMLocalChatTestView.swift b/Tests/UITests/TestApp/LLMLocal/LLMLocalChatTestView.swift index 6488817..3f54520 100644 --- a/Tests/UITests/TestApp/LLMLocal/LLMLocalChatTestView.swift +++ b/Tests/UITests/TestApp/LLMLocal/LLMLocalChatTestView.swift @@ -13,29 +13,44 @@ import SwiftUI /// Presents a chat view that enables user's to interact with the local LLM. struct LLMLocalChatTestView: View { - /// The Spezi `LLM` that is configured and executed on the `LLMRunner` - private var model: LLM = { - if FeatureFlags.mockMode { - LLMMock() - } else { - LLMLocal( - modelPath: .cachesDirectory.appending(path: "llm.gguf"), /// Loads the LLM from the passed cache directory - parameters: .init(maxOutputLength: 512), /// Limits the size of the generated response to 512 tokens - contextParameters: .init(contextWindowSize: 1024) /// Sets the context size of the model at 1024 tokens - ) - } - }() + let mockMode: Bool var body: some View { - LLMChatView( - model: model - ) + Group { + if FeatureFlags.mockMode || mockMode { + LLMChatViewSchema( + with: LLMMockSchema() + ) + } else { + LLMChatViewSchema( + with: LLMLocalSchema( + modelPath: .cachesDirectory.appending(path: "llm.gguf"), + parameters: .init(maxOutputLength: 512), + contextParameters: .init(contextWindowSize: 1024) + ) + ) + } + } .navigationTitle("LLM_LOCAL_CHAT_VIEW_TITLE") } + + + init(mockMode: Bool = false) { + self.mockMode = mockMode + } } +#if DEBUG #Preview { - LLMLocalChatTestView() + NavigationStack { + LLMLocalChatTestView(mockMode: true) + } + .previewWith { + LLMRunner { + LLMMockPlatform() + } + } } +#endif diff --git a/Tests/UITests/TestApp/LLMLocal/LLMLocalTestView.swift b/Tests/UITests/TestApp/LLMLocal/LLMLocalTestView.swift index accc363..e845ec2 100644 --- a/Tests/UITests/TestApp/LLMLocal/LLMLocalTestView.swift +++ b/Tests/UITests/TestApp/LLMLocal/LLMLocalTestView.swift @@ -6,23 +6,38 @@ // SPDX-License-Identifier: MIT // +import SpeziLLM import SpeziOnboarding import SwiftUI struct LLMLocalTestView: View { @AppStorage(StorageKeys.onboardingFlowComplete) private var completedOnboardingFlow = false - + let mockMode: Bool var body: some View { - LLMLocalChatTestView() + LLMLocalChatTestView(mockMode: mockMode) .sheet(isPresented: !$completedOnboardingFlow) { LLMLocalOnboardingFlow() } } + + + init(mockMode: Bool = false) { + self.mockMode = mockMode + } } +#if DEBUG #Preview { - LLMLocalTestView() + NavigationStack { + LLMLocalTestView(mockMode: true) + } + .previewWith { + LLMRunner { + LLMMockPlatform() + } + } } +#endif diff --git a/Tests/UITests/TestApp/LLMLocal/Onboarding/LLMLocalOnboardingDownloadView.swift b/Tests/UITests/TestApp/LLMLocal/Onboarding/LLMLocalOnboardingDownloadView.swift index 1c09fe3..ccabd5f 100644 --- a/Tests/UITests/TestApp/LLMLocal/Onboarding/LLMLocalOnboardingDownloadView.swift +++ b/Tests/UITests/TestApp/LLMLocal/Onboarding/LLMLocalOnboardingDownloadView.swift @@ -19,8 +19,8 @@ struct LLMLocalOnboardingDownloadView: View { var body: some View { LLMLocalDownloadView( - llmDownloadUrl: LLMLocalDownloadManager.LLMUrlDefaults.llama2ChatModelUrl, /// By default, download the Llama2 model - llmStorageUrl: .cachesDirectory.appending(path: "llm.gguf") /// Store the downloaded LLM in the caches directory + downloadDescription: "LLM_DOWNLOAD_DESCRIPTION", + llmDownloadUrl: LLMLocalDownloadManager.LLMUrlDefaults.llama2ChatModelUrl /// By default, download the Llama2 model ) { onboardingNavigationPath.nextStep() } @@ -28,8 +28,10 @@ struct LLMLocalOnboardingDownloadView: View { } +#if DEBUG #Preview { OnboardingStack { LLMLocalOnboardingDownloadView() } } +#endif diff --git a/Tests/UITests/TestApp/LLMLocal/Onboarding/LLMLocalOnboardingFlow.swift b/Tests/UITests/TestApp/LLMLocal/Onboarding/LLMLocalOnboardingFlow.swift index 8b3df80..104392e 100644 --- a/Tests/UITests/TestApp/LLMLocal/Onboarding/LLMLocalOnboardingFlow.swift +++ b/Tests/UITests/TestApp/LLMLocal/Onboarding/LLMLocalOnboardingFlow.swift @@ -28,6 +28,8 @@ struct LLMLocalOnboardingFlow: View { } +#if DEBUG #Preview { LLMLocalOnboardingFlow() } +#endif diff --git a/Tests/UITests/TestApp/LLMLocal/Onboarding/LLMLocalOnboardingWelcomeView.swift b/Tests/UITests/TestApp/LLMLocal/Onboarding/LLMLocalOnboardingWelcomeView.swift index 20278a2..d515f66 100644 --- a/Tests/UITests/TestApp/LLMLocal/Onboarding/LLMLocalOnboardingWelcomeView.swift +++ b/Tests/UITests/TestApp/LLMLocal/Onboarding/LLMLocalOnboardingWelcomeView.swift @@ -55,9 +55,11 @@ struct LLMLocalOnboardingWelcomeView: View { } +#if DEBUG #Preview { OnboardingStack { LLMLocalOnboardingWelcomeView() LLMLocalOnboardingDownloadView() } } +#endif diff --git a/Tests/UITests/TestApp/LLMOpenAI/LLMOpenAIChatTestView.swift b/Tests/UITests/TestApp/LLMOpenAI/LLMOpenAIChatTestView.swift index f75e654..e485386 100644 --- a/Tests/UITests/TestApp/LLMOpenAI/LLMOpenAIChatTestView.swift +++ b/Tests/UITests/TestApp/LLMOpenAI/LLMOpenAIChatTestView.swift @@ -13,31 +13,36 @@ import SwiftUI struct LLMOpenAIChatTestView: View { + static let schema = LLMOpenAISchema( + parameters: .init( + modelType: .gpt4_turbo_preview, + systemPrompt: "You're a helpful assistant that answers questions from users." + ) + ) { + LLMOpenAIFunctionWeather() + LLMOpenAIFunctionHealthData() + LLMOpenAIFunctionPerson() + } + + @LLMSessionProvider(schema: Self.schema) var llm: LLMOpenAISession @State var showOnboarding = false - - /// The Spezi `LLM` that is configured and executed on the `LLMRunner` - private var model: LLM = { - if FeatureFlags.mockMode { - LLMMock() - } else { - LLMOpenAI( - parameters: .init( - modelType: .gpt4_1106_preview, - systemPrompt: "You're a helpful assistant that answers questions from users." - ) - ) { - LLMOpenAIFunctionWeather() - LLMOpenAIFunctionHealthData() - LLMOpenAIFunctionPerson() - } - } - }() + @State var muted = true var body: some View { - LLMChatView( - model: model - ) + Group { + if FeatureFlags.mockMode { + LLMChatViewSchema(with: LLMMockSchema()) + } else { + // Either use the convenience LLMChatViewSchema that only gets passed the schema. No access to underlying LLMSession + // LLMChatViewSchema(with: Self.schema) + + // Otherwise use the LLMChatView and pass a LLMSession Binding in there. Use the @LLMSessionProvider wrapper to instantiate the LLMSession + LLMChatView(session: $llm) + .speak(llm.context, muted: muted) + .speechToolbarButton(muted: $muted) + } + } .navigationTitle("LLM_OPENAI_CHAT_VIEW_TITLE") .toolbar { ToolbarItem { diff --git a/Tests/UITests/TestApp/Resources/Localizable.xcstrings b/Tests/UITests/TestApp/Resources/Localizable.xcstrings index 1c949eb..a4dd085 100644 --- a/Tests/UITests/TestApp/Resources/Localizable.xcstrings +++ b/Tests/UITests/TestApp/Resources/Localizable.xcstrings @@ -1,6 +1,16 @@ { "sourceLanguage" : "en", "strings" : { + "LLM_DOWNLOAD_DESCRIPTION" : { + "localizations" : { + "en" : { + "stringUnit" : { + "state" : "translated", + "value" : "By default, the application downloads the Llama 2 7B model in its chat variation. The size of the model is around 3.5GB." + } + } + } + }, "LLM_LOCAL_CHAT_VIEW_TITLE" : { "localizations" : { "en" : { diff --git a/Tests/UITests/TestApp/TestAppDelegate.swift b/Tests/UITests/TestApp/TestAppDelegate.swift index b40b276..8ce9866 100644 --- a/Tests/UITests/TestApp/TestAppDelegate.swift +++ b/Tests/UITests/TestApp/TestAppDelegate.swift @@ -16,8 +16,9 @@ class TestAppDelegate: SpeziAppDelegate { override var configuration: Configuration { Configuration { LLMRunner { - LLMLocalRunnerSetupTask() - LLMOpenAIRunnerSetupTask() + LLMMockPlatform() + LLMLocalPlatform() + LLMOpenAIPlatform() } } } diff --git a/Tests/UITests/TestAppUITests/TestAppLLMOpenAIUITests.swift b/Tests/UITests/TestAppUITests/TestAppLLMOpenAIUITests.swift index a73673d..ef32665 100644 --- a/Tests/UITests/TestAppUITests/TestAppLLMOpenAIUITests.swift +++ b/Tests/UITests/TestAppUITests/TestAppLLMOpenAIUITests.swift @@ -36,8 +36,8 @@ class TestAppLLMOpenAIUITests: XCTestCase { XCTAssert(app.buttons["Next"].waitForExistence(timeout: 2)) app.buttons["Next"].tap() - app.pickers["modelPicker"].pickerWheels.element(boundBy: 0).adjust(toPickerWheelValue: "GPT 4") - XCTAssert(app.pickerWheels["GPT 4"].waitForExistence(timeout: 2)) + app.pickers["modelPicker"].pickerWheels.element(boundBy: 0).adjust(toPickerWheelValue: "GPT 4 Turbo Preview") + XCTAssert(app.pickerWheels["GPT 4 Turbo Preview"].waitForExistence(timeout: 2)) sleep(1) XCTAssert(app.buttons["Next"].waitForExistence(timeout: 2)) diff --git a/Tests/UITests/UITests.xcodeproj/project.pbxproj b/Tests/UITests/UITests.xcodeproj/project.pbxproj index 43fa6b6..44057c4 100644 --- a/Tests/UITests/UITests.xcodeproj/project.pbxproj +++ b/Tests/UITests/UITests.xcodeproj/project.pbxproj @@ -528,6 +528,7 @@ ENABLE_TESTING_SEARCH_PATHS = YES; GENERATE_INFOPLIST_FILE = YES; INFOPLIST_KEY_NSMicrophoneUsageDescription = "The Test Application uses the micophone to test the dication functionality."; + INFOPLIST_KEY_NSSpeechRecognitionUsageDescription = "Speech recognition necessary for transcribing voice input."; INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES; INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES; INFOPLIST_KEY_UILaunchScreen_Generation = YES; @@ -564,6 +565,7 @@ ENABLE_TESTING_SEARCH_PATHS = YES; GENERATE_INFOPLIST_FILE = YES; INFOPLIST_KEY_NSMicrophoneUsageDescription = "The Test Application uses the micophone to test the dication functionality."; + INFOPLIST_KEY_NSSpeechRecognitionUsageDescription = "Speech recognition necessary for transcribing voice input."; INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES; INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES; INFOPLIST_KEY_UILaunchScreen_Generation = YES;