diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 12bcffd..0aa3b30 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -20,20 +20,13 @@ jobs: name: Build and Test Swift Package uses: StanfordSpezi/.github/.github/workflows/xcodebuild-or-fastlane.yml@v2 with: - artifactname: SpeziML.xcresult + artifactname: SpeziML-Package.xcresult runsonlabels: '["macOS", "self-hosted"]' - scheme: SpeziML - build: - name: Build Swift Package on Xcode 14 - uses: StanfordSpezi/.github/.github/workflows/xcodebuild-or-fastlane.yml@v2 - with: - runsonlabels: '["macos-13"]' - scheme: SpeziML + scheme: SpeziML-Package buildandtestuitests: name: Build and Test UI Tests uses: StanfordSpezi/.github/.github/workflows/xcodebuild-or-fastlane.yml@v2 with: - xcodeversion: latest artifactname: TestApp.xcresult runsonlabels: '["macOS", "self-hosted"]' path: 'Tests/UITests' @@ -43,4 +36,4 @@ jobs: needs: [buildandtest, buildandtestuitests] uses: StanfordSpezi/.github/.github/workflows/create-and-upload-coverage-report.yml@v2 with: - coveragereports: SpeziML.xcresult TestApp.xcresult + coveragereports: SpeziML-Package.xcresult TestApp.xcresult diff --git a/Package.swift b/Package.swift index 6163c82..d092a00 100644 --- a/Package.swift +++ b/Package.swift @@ -1,4 +1,4 @@ -// swift-tools-version:5.7 +// swift-tools-version:5.9 // // This source file is part of the Stanford Spezi open source project @@ -18,7 +18,8 @@ let package = Package( .iOS(.v16) ], products: [ - .library(name: "SpeziOpenAI", targets: ["SpeziOpenAI"]) + .library(name: "SpeziOpenAI", targets: ["SpeziOpenAI"]), + .library(name: "SpeziSpeechRecognizer", targets: ["SpeziSpeechRecognizer"]) ], dependencies: [ .package(url: "https://github.com/MacPaw/OpenAI", .upToNextMinor(from: "0.2.3")), @@ -30,6 +31,7 @@ let package = Package( .target( name: "SpeziOpenAI", dependencies: [ + .target(name: "SpeziSpeechRecognizer"), .product(name: "OpenAI", package: "OpenAI"), .product(name: "Spezi", package: "Spezi"), .product(name: "SpeziLocalStorage", package: "SpeziStorage"), @@ -37,6 +39,9 @@ let package = Package( .product(name: "SpeziOnboarding", package: "SpeziOnboarding") ] ), + .target( + name: "SpeziSpeechRecognizer" + ), .testTarget( name: "SpeziOpenAITests", dependencies: [ diff --git a/Sources/SpeziOpenAI/MessageInputView.swift b/Sources/SpeziOpenAI/MessageInputView.swift index 3f6e349..aa8c266 100644 --- a/Sources/SpeziOpenAI/MessageInputView.swift +++ b/Sources/SpeziOpenAI/MessageInputView.swift @@ -6,17 +6,21 @@ // SPDX-License-Identifier: MIT // +import AVFoundation import OpenAI +import Speech +import SpeziSpeechRecognizer import SwiftUI /// Displays a textfield to append a message to a chat. public struct MessageInputView: View { - let messagePlaceholder: String + private let messagePlaceholder: String + @StateObject private var speechRecognizer = SpeechRecognizer() - @Binding var chat: [Chat] - @State var message: String = "" - @State var messageViewHeight: CGFloat = 0 + @Binding private var chat: [Chat] + @State private var message: String = "" + @State private var messageViewHeight: CGFloat = 0 public var body: some View { @@ -33,32 +37,21 @@ public struct MessageInputView: View { RoundedRectangle(cornerRadius: 20) .fill(.white.opacity(0.2)) } - .padding(.trailing, -30) + .padding(.trailing, -42) } .lineLimit(1...5) - Button( - action: { - chat.append(Chat(role: .user, content: message)) - message = "" - }, - label: { - Image(systemName: "arrow.up.circle.fill") - .accessibilityLabel(String(localized: "SEND_MESSAGE", bundle: .module)) - .font(.title) - .padding(.horizontal, -14) - .foregroundColor( - message.isEmpty ? Color(.systemGray5) : .accentColor - ) + Group { + if speechRecognizer.isAvailable && (message.isEmpty || speechRecognizer.isRecording) { + microphoneButton + } else { + sendButton + .disabled(message.isEmpty) } - ) - .padding(.trailing, -38) - .padding(.bottom, 3) - .disabled(message.isEmpty) + } + .frame(minWidth: 33) } - .padding(.trailing, 23) .padding(.horizontal, 16) .padding(.vertical, 6) - .background(.white.opacity(0.4)) .background(.thinMaterial) .background { GeometryReader { proxy in @@ -74,6 +67,46 @@ public struct MessageInputView: View { .messageInputViewHeight(messageViewHeight) } + private var sendButton: some View { + Button( + action: { + sendMessageButtonPressed() + }, + label: { + Image(systemName: "arrow.up.circle.fill") + .accessibilityLabel(String(localized: "SEND_MESSAGE", bundle: .module)) + .font(.title) + .foregroundColor( + message.isEmpty ? Color(.systemGray5) : .accentColor + ) + } + ) + .offset(x: -2, y: -3) + } + + private var microphoneButton: some View { + Button( + action: { + microphoneButtonPressed() + }, + label: { + Image(systemName: "mic.fill") + .accessibilityLabel(String(localized: "MICROPHONE_BUTTON", bundle: .module)) + .font(.title2) + .foregroundColor( + speechRecognizer.isRecording ? .red : Color(.systemGray2) + ) + .scaleEffect(speechRecognizer.isRecording ? 1.2 : 1.0) + .opacity(speechRecognizer.isRecording ? 0.7 : 1.0) + .animation( + speechRecognizer.isRecording ? .easeInOut(duration: 0.5).repeatForever(autoreverses: true) : .default, + value: speechRecognizer.isRecording + ) + } + ) + .offset(x: -4, y: -6) + } + /// - Parameters: /// - chat: The chat that should be appended to. @@ -85,6 +118,31 @@ public struct MessageInputView: View { self._chat = chat self.messagePlaceholder = messagePlaceholder ?? "Message" } + + + private func sendMessageButtonPressed() { + speechRecognizer.stop() + chat.append(Chat(role: .user, content: message)) + message = "" + } + + private func microphoneButtonPressed() { + if speechRecognizer.isRecording { + speechRecognizer.stop() + } else { + Task { + do { + for try await result in speechRecognizer.start() { + if result.bestTranscription.formattedString.contains("send") { + sendMessageButtonPressed() + } else { + message = result.bestTranscription.formattedString + } + } + } + } + } + } } diff --git a/Sources/SpeziOpenAI/Resources/en.lproj/Localizable.strings b/Sources/SpeziOpenAI/Resources/en.lproj/Localizable.strings index 93a8923..84e54c0 100644 --- a/Sources/SpeziOpenAI/Resources/en.lproj/Localizable.strings +++ b/Sources/SpeziOpenAI/Resources/en.lproj/Localizable.strings @@ -23,4 +23,5 @@ // MARK: Message Views "MESSAGE_INPUT_TEXTFIELD" = "Message Input Textfield"; +"MICROPHONE_BUTTON" = "Record Message"; "SEND_MESSAGE" = "Send Message"; diff --git a/Sources/SpeziSpeechRecognizer/SpeechRecognizer.swift b/Sources/SpeziSpeechRecognizer/SpeechRecognizer.swift new file mode 100644 index 0000000..49477ad --- /dev/null +++ b/Sources/SpeziSpeechRecognizer/SpeechRecognizer.swift @@ -0,0 +1,144 @@ +// +// This source file is part of the Stanford Spezi open source project +// +// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) +// +// SPDX-License-Identifier: MIT +// + +import Speech + +/// Encapsulates the functionality of the `SFSpeechRecognizer`. +/// +/// It provides methods to start and stop voice recognition, and publishes the state of recognition and its availability. +public class SpeechRecognizer: NSObject, ObservableObject, SFSpeechRecognizerDelegate { + private let speechRecognizer: SFSpeechRecognizer? + private let audioEngine: AVAudioEngine? + + /// Indicates whether the speech recognition is currently in progress. + @Published public private(set) var isRecording = false + /// Indicates the availability of the speech recognition service. + @Published public private(set) var isAvailable: Bool + + private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest? + private var recognitionTask: SFSpeechRecognitionTask? + + + /// Initializes a new instance of `SpeechRecognizer`. + /// + /// - Parameter locale: The locale for the speech recognition. Defaults to the current locale. + public init(locale: Locale = .current) { + if let speechRecognizer = SFSpeechRecognizer(locale: locale) { + self.speechRecognizer = speechRecognizer + self.isAvailable = speechRecognizer.isAvailable + } else { + self.speechRecognizer = nil + self.isAvailable = false + } + + self.audioEngine = AVAudioEngine() + + super.init() + + speechRecognizer?.delegate = self + } + + + /// Starts the speech recognition process. + /// + /// - Returns: An asynchronous stream of speech recognition results. + public func start() -> AsyncThrowingStream { // swiftlint:disable:this function_body_length + // We allow a larger function and closure length as the function provides a clear encapsulated functionality and the closue is mainly the function + // wrapped in a continuation. + AsyncThrowingStream { continuation in // swiftlint:disable:this closure_body_length + guard !isRecording else { + print("You already having a recording session in progress, please cancel the first one using `stop` before starting a new session.") + stop() + continuation.finish() + return + } + + guard isAvailable, let audioEngine, let speechRecognizer else { + print("The speechrecognizer is not available.") + stop() + continuation.finish() + return + } + + do { + let audioSession = AVAudioSession.sharedInstance() + try audioSession.setCategory(.record, mode: .measurement, options: .duckOthers) + try audioSession.setActive(true, options: .notifyOthersOnDeactivation) + } catch { + print("Error setting up the audio session: \(error.localizedDescription)") + stop() + continuation.finish(throwing: error) + } + + let inputNode = audioEngine.inputNode + + let recognitionRequest = SFSpeechAudioBufferRecognitionRequest() + recognitionRequest.shouldReportPartialResults = true + self.recognitionRequest = recognitionRequest + + recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in + if let error { + continuation.finish(throwing: error) + } + + guard self.isRecording, let result else { + self.stop() + return + } + + continuation.yield(result) + } + + let recordingFormat = inputNode.outputFormat(forBus: 0) + inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { buffer, _ in + self.recognitionRequest?.append(buffer) + } + + audioEngine.prepare() + do { + isRecording = true + try audioEngine.start() + } catch { + print("Error setting up the audio session: \(error.localizedDescription)") + stop() + continuation.finish(throwing: error) + } + + continuation.onTermination = { @Sendable _ in + self.stop() + } + } + } + + /// Stops the current speech recognition session. + public func stop() { + guard isAvailable && isRecording else { + return + } + + audioEngine?.stop() + audioEngine?.inputNode.removeTap(onBus: 0) + + recognitionRequest?.endAudio() + recognitionRequest = nil + + recognitionTask?.cancel() + recognitionTask = nil + + isRecording = false + } + + @_documentation(visibility: internal) + public func speechRecognizer(_ speechRecognizer: SFSpeechRecognizer, availabilityDidChange available: Bool) { + guard self.speechRecognizer == speechRecognizer else { + return + } + + self.isAvailable = available + } +} diff --git a/Tests/UITests/TestAppUITests/TestAppUITests.swift b/Tests/UITests/TestAppUITests/TestAppUITests.swift index 9756c38..446e2c1 100644 --- a/Tests/UITests/TestAppUITests/TestAppUITests.swift +++ b/Tests/UITests/TestAppUITests/TestAppUITests.swift @@ -75,12 +75,12 @@ class TestAppUITests: XCTestCase { XCTAssert(app.staticTexts["User Message!"].waitForExistence(timeout: 2)) XCTAssert(app.staticTexts["Assistant Message!"].waitForExistence(timeout: 2)) - XCTAssert(app.buttons["Send Message"].waitForExistence(timeout: 2)) + XCTAssert(app.buttons["Record Message"].waitForExistence(timeout: 2)) XCTAssertFalse(app.staticTexts["System Message!"].waitForExistence(timeout: 2)) XCTAssertFalse(app.staticTexts["Function Message!"].waitForExistence(timeout: 2)) - XCTAssertFalse(app.buttons["Send Message"].isEnabled) + XCTAssert(app.buttons["Record Message"].isEnabled) try app.textViews["Message Input Textfield"].enter(value: "New Message!", dismissKeyboard: false) XCTAssert(app.buttons["Send Message"].isEnabled) diff --git a/Tests/UITests/UITests.xcodeproj/project.pbxproj b/Tests/UITests/UITests.xcodeproj/project.pbxproj index 12b419c..fdd33f2 100644 --- a/Tests/UITests/UITests.xcodeproj/project.pbxproj +++ b/Tests/UITests/UITests.xcodeproj/project.pbxproj @@ -7,6 +7,7 @@ objects = { /* Begin PBXBuildFile section */ + 2F554CB72AD2036D0062CCA1 /* SpeziSpeechRecognizer in Frameworks */ = {isa = PBXBuildFile; productRef = 2F554CB62AD2036D0062CCA1 /* SpeziSpeechRecognizer */; }; 2F55FC552A1B42D20051DF48 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2F55FC542A1B42D20051DF48 /* ContentView.swift */; }; 2F6D139A28F5F386007C25D6 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 2F6D139928F5F386007C25D6 /* Assets.xcassets */; }; 2F8A431329130A8C005D2B8F /* TestAppUITests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2F8A431229130A8C005D2B8F /* TestAppUITests.swift */; }; @@ -47,6 +48,7 @@ isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; files = ( + 2F554CB72AD2036D0062CCA1 /* SpeziSpeechRecognizer in Frameworks */, 2FD5904B2A19E4AE00153BE4 /* XCTSpezi in Frameworks */, 2FD5904D2A19E54C00153BE4 /* SpeziOpenAI in Frameworks */, 2FD590492A19E4AE00153BE4 /* Spezi in Frameworks */, @@ -133,6 +135,7 @@ 2FD590482A19E4AE00153BE4 /* Spezi */, 2FD5904A2A19E4AE00153BE4 /* XCTSpezi */, 2FD5904C2A19E54C00153BE4 /* SpeziOpenAI */, + 2F554CB62AD2036D0062CCA1 /* SpeziSpeechRecognizer */, ); productName = Example; productReference = 2F6D139228F5F384007C25D6 /* TestApp.app */; @@ -390,6 +393,7 @@ buildSettings = { ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; + CODE_SIGN_IDENTITY = "Apple Development"; CODE_SIGN_STYLE = Automatic; CURRENT_PROJECT_VERSION = 1; DEVELOPMENT_ASSET_PATHS = ""; @@ -397,6 +401,7 @@ ENABLE_PREVIEWS = YES; ENABLE_TESTING_SEARCH_PATHS = YES; GENERATE_INFOPLIST_FILE = YES; + INFOPLIST_KEY_NSMicrophoneUsageDescription = "The Test Application uses the micophone to test the dication functionality."; INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES; INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES; INFOPLIST_KEY_UILaunchScreen_Generation = YES; @@ -409,6 +414,7 @@ MARKETING_VERSION = 1.0; PRODUCT_BUNDLE_IDENTIFIER = edu.stanford.speziml.testapp; PRODUCT_NAME = "$(TARGET_NAME)"; + PROVISIONING_PROFILE_SPECIFIER = ""; SWIFT_EMIT_LOC_STRINGS = YES; SWIFT_STRICT_CONCURRENCY = complete; SWIFT_VERSION = 5.0; @@ -421,6 +427,7 @@ buildSettings = { ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; + CODE_SIGN_IDENTITY = "Apple Development"; CODE_SIGN_STYLE = Automatic; CURRENT_PROJECT_VERSION = 1; DEVELOPMENT_ASSET_PATHS = ""; @@ -428,6 +435,7 @@ ENABLE_PREVIEWS = YES; ENABLE_TESTING_SEARCH_PATHS = YES; GENERATE_INFOPLIST_FILE = YES; + INFOPLIST_KEY_NSMicrophoneUsageDescription = "The Test Application uses the micophone to test the dication functionality."; INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES; INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES; INFOPLIST_KEY_UILaunchScreen_Generation = YES; @@ -440,6 +448,7 @@ MARKETING_VERSION = 1.0; PRODUCT_BUNDLE_IDENTIFIER = edu.stanford.speziml.testapp; PRODUCT_NAME = "$(TARGET_NAME)"; + PROVISIONING_PROFILE_SPECIFIER = ""; SWIFT_EMIT_LOC_STRINGS = YES; SWIFT_STRICT_CONCURRENCY = complete; SWIFT_VERSION = 5.0; @@ -550,6 +559,7 @@ buildSettings = { ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; + CODE_SIGN_IDENTITY = "Apple Development"; CODE_SIGN_STYLE = Automatic; CURRENT_PROJECT_VERSION = 1; DEVELOPMENT_ASSET_PATHS = ""; @@ -557,6 +567,7 @@ ENABLE_PREVIEWS = YES; ENABLE_TESTING_SEARCH_PATHS = YES; GENERATE_INFOPLIST_FILE = YES; + INFOPLIST_KEY_NSMicrophoneUsageDescription = "The Test Application uses the micophone to test the dication functionality."; INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES; INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES; INFOPLIST_KEY_UILaunchScreen_Generation = YES; @@ -569,6 +580,7 @@ MARKETING_VERSION = 1.0; PRODUCT_BUNDLE_IDENTIFIER = edu.stanford.speziml.testapp; PRODUCT_NAME = "$(TARGET_NAME)"; + PROVISIONING_PROFILE_SPECIFIER = ""; SWIFT_EMIT_LOC_STRINGS = YES; SWIFT_STRICT_CONCURRENCY = complete; SWIFT_VERSION = 5.0; @@ -650,6 +662,10 @@ /* End XCRemoteSwiftPackageReference section */ /* Begin XCSwiftPackageProductDependency section */ + 2F554CB62AD2036D0062CCA1 /* SpeziSpeechRecognizer */ = { + isa = XCSwiftPackageProductDependency; + productName = SpeziSpeechRecognizer; + }; 2FD590482A19E4AE00153BE4 /* Spezi */ = { isa = XCSwiftPackageProductDependency; package = 2FD590442A19E40F00153BE4 /* XCRemoteSwiftPackageReference "Spezi" */;