Add Text-to-Speech and Speech-to-Text features
- Add TTS service using AVSpeechSynthesizer for voice output - Add STT service using SpeechAnalyzer (macOS 26) for transcription - Add voice input (microphone) button in chat with recording level indicator - Add speak button on assistant messages for TTS playback - Add language toggle (EN-CA/FR-CA) for bilingual speech recognition - Fix Swift 6 strict concurrency issues in audio callbacks - Update proto schema with TTS/STT message types and RPCs - Update gRPC provider with speech service endpoints 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
638656e7ca
commit
b754945923
@ -51,6 +51,113 @@ message HealthResponse {
|
|||||||
string model_status = 2;
|
string model_status = 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ============ TEXT-TO-SPEECH ============
|
||||||
|
|
||||||
|
// Audio format enumeration
|
||||||
|
enum AudioFormat {
|
||||||
|
AUDIO_FORMAT_UNSPECIFIED = 0;
|
||||||
|
AUDIO_FORMAT_WAV = 1;
|
||||||
|
AUDIO_FORMAT_MP3 = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Voice configuration for TTS
|
||||||
|
message VoiceConfig {
|
||||||
|
string voice_identifier = 1;
|
||||||
|
optional float speaking_rate = 2; // 0.0-1.0, default 0.5
|
||||||
|
optional float pitch_multiplier = 3; // 0.5-2.0, default 1.0
|
||||||
|
optional float volume = 4; // 0.0-1.0, default 1.0
|
||||||
|
}
|
||||||
|
|
||||||
|
// TTS Request
|
||||||
|
message TextToSpeechRequest {
|
||||||
|
string text = 1;
|
||||||
|
AudioFormat output_format = 2;
|
||||||
|
optional VoiceConfig voice_config = 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TTS Response
|
||||||
|
message TextToSpeechResponse {
|
||||||
|
bytes audio_data = 1;
|
||||||
|
AudioFormat format = 2;
|
||||||
|
int32 sample_rate = 3;
|
||||||
|
int32 channels = 4;
|
||||||
|
float duration_seconds = 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
// List available voices request
|
||||||
|
message ListVoicesRequest {
|
||||||
|
optional string language_code = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Voice information
|
||||||
|
message VoiceInfo {
|
||||||
|
string identifier = 1;
|
||||||
|
string name = 2;
|
||||||
|
string language = 3;
|
||||||
|
bool is_premium = 4;
|
||||||
|
string gender = 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
// List voices response
|
||||||
|
message ListVoicesResponse {
|
||||||
|
repeated VoiceInfo voices = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============ SPEECH-TO-TEXT ============
|
||||||
|
|
||||||
|
// STT Configuration
|
||||||
|
message TranscriptionConfig {
|
||||||
|
optional string language_code = 1;
|
||||||
|
optional bool enable_punctuation = 2; // default true
|
||||||
|
optional bool enable_timestamps = 3; // default false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Audio data for STT
|
||||||
|
message AudioInput {
|
||||||
|
bytes data = 1;
|
||||||
|
string mime_type = 2; // "audio/wav", "audio/mp3", "audio/m4a"
|
||||||
|
optional int32 sample_rate = 3;
|
||||||
|
optional int32 channels = 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
// File-based transcription request
|
||||||
|
message TranscribeRequest {
|
||||||
|
AudioInput audio = 1;
|
||||||
|
optional TranscriptionConfig config = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Transcription segment with timing
|
||||||
|
message TranscriptionSegment {
|
||||||
|
string text = 1;
|
||||||
|
float start_time = 2;
|
||||||
|
float end_time = 3;
|
||||||
|
float confidence = 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Transcription response
|
||||||
|
message TranscribeResponse {
|
||||||
|
string text = 1;
|
||||||
|
repeated TranscriptionSegment segments = 2;
|
||||||
|
string detected_language = 3;
|
||||||
|
float confidence = 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Streaming STT request chunk
|
||||||
|
message StreamingTranscribeRequest {
|
||||||
|
oneof request {
|
||||||
|
TranscriptionConfig config = 1; // Send first to configure
|
||||||
|
bytes audio_chunk = 2; // Subsequent audio chunks
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Streaming STT response
|
||||||
|
message StreamingTranscribeResponse {
|
||||||
|
string partial_text = 1;
|
||||||
|
bool is_final = 2;
|
||||||
|
string final_text = 3;
|
||||||
|
repeated TranscriptionSegment segments = 4;
|
||||||
|
}
|
||||||
|
|
||||||
// Apple Intelligence Service
|
// Apple Intelligence Service
|
||||||
service AppleIntelligenceService {
|
service AppleIntelligenceService {
|
||||||
// Single completion request
|
// Single completion request
|
||||||
@ -61,4 +168,12 @@ service AppleIntelligenceService {
|
|||||||
|
|
||||||
// Health check
|
// Health check
|
||||||
rpc Health(HealthRequest) returns (HealthResponse);
|
rpc Health(HealthRequest) returns (HealthResponse);
|
||||||
|
|
||||||
|
// Text-to-Speech
|
||||||
|
rpc TextToSpeech(TextToSpeechRequest) returns (TextToSpeechResponse);
|
||||||
|
rpc ListVoices(ListVoicesRequest) returns (ListVoicesResponse);
|
||||||
|
|
||||||
|
// Speech-to-Text
|
||||||
|
rpc Transcribe(TranscribeRequest) returns (TranscribeResponse);
|
||||||
|
rpc StreamTranscribe(stream StreamingTranscribeRequest) returns (stream StreamingTranscribeResponse);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,5 +1,7 @@
|
|||||||
import Foundation
|
import Foundation
|
||||||
import AppKit
|
import AppKit
|
||||||
|
import AVFoundation
|
||||||
|
import Speech
|
||||||
import UniformTypeIdentifiers
|
import UniformTypeIdentifiers
|
||||||
import AppleIntelligenceCore
|
import AppleIntelligenceCore
|
||||||
|
|
||||||
@ -14,9 +16,32 @@ final class ChatViewModel {
|
|||||||
// Image attachment state
|
// Image attachment state
|
||||||
var pendingImages: [ImageAttachment] = []
|
var pendingImages: [ImageAttachment] = []
|
||||||
|
|
||||||
|
// Voice input/output state
|
||||||
|
var isRecording: Bool = false
|
||||||
|
var isSpeaking: Bool = false
|
||||||
|
var speakingMessageId: UUID?
|
||||||
|
var recordingLevel: Float = 0
|
||||||
|
|
||||||
private var service: AppleIntelligenceService?
|
private var service: AppleIntelligenceService?
|
||||||
|
private var ttsService: TextToSpeechService?
|
||||||
|
private var sttService: SpeechToTextService?
|
||||||
private var currentTask: Task<Void, Never>?
|
private var currentTask: Task<Void, Never>?
|
||||||
|
|
||||||
|
// Audio recording - multi-language support
|
||||||
|
private var audioEngine: AVAudioEngine?
|
||||||
|
private var speechRecognizers: [String: SFSpeechRecognizer] = [:]
|
||||||
|
private var activeRecognizer: SFSpeechRecognizer?
|
||||||
|
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
|
||||||
|
private var recognitionTask: SFSpeechRecognitionTask?
|
||||||
|
|
||||||
|
// Supported speech recognition languages (Canadian English and French)
|
||||||
|
private static let supportedLocales = ["en-CA", "fr-CA"]
|
||||||
|
var detectedLanguage: String = "en-CA"
|
||||||
|
|
||||||
|
// Audio playback - use direct speech synthesis for reliability
|
||||||
|
private var speechSynthesizer: AVSpeechSynthesizer?
|
||||||
|
private var speechDelegate: SpeechSynthesizerDelegate?
|
||||||
|
|
||||||
// Maximum images per message
|
// Maximum images per message
|
||||||
private let maxImagesPerMessage = 5
|
private let maxImagesPerMessage = 5
|
||||||
|
|
||||||
@ -28,6 +53,27 @@ final class ChatViewModel {
|
|||||||
|
|
||||||
func initialize() async {
|
func initialize() async {
|
||||||
service = await AppleIntelligenceService()
|
service = await AppleIntelligenceService()
|
||||||
|
ttsService = TextToSpeechService()
|
||||||
|
sttService = await SpeechToTextService()
|
||||||
|
|
||||||
|
// Initialize speech recognizers for all supported locales
|
||||||
|
for localeId in Self.supportedLocales {
|
||||||
|
if let recognizer = SFSpeechRecognizer(locale: Locale(identifier: localeId)) {
|
||||||
|
speechRecognizers[localeId] = recognizer
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Default to system locale if supported, otherwise en-CA
|
||||||
|
let systemLocale = Locale.current.identifier
|
||||||
|
if speechRecognizers[systemLocale] != nil {
|
||||||
|
detectedLanguage = systemLocale
|
||||||
|
} else if systemLocale.starts(with: "fr") {
|
||||||
|
detectedLanguage = "fr-CA"
|
||||||
|
} else {
|
||||||
|
detectedLanguage = "en-CA"
|
||||||
|
}
|
||||||
|
activeRecognizer = speechRecognizers[detectedLanguage]
|
||||||
|
|
||||||
loadRecentImages()
|
loadRecentImages()
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -217,4 +263,262 @@ final class ChatViewModel {
|
|||||||
messages.removeAll()
|
messages.removeAll()
|
||||||
errorMessage = nil
|
errorMessage = nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MARK: - Voice Input (Speech-to-Text)
|
||||||
|
|
||||||
|
func toggleRecording() {
|
||||||
|
if isRecording {
|
||||||
|
stopRecording()
|
||||||
|
} else {
|
||||||
|
startRecording()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func startRecording() {
|
||||||
|
Task {
|
||||||
|
// Use nonisolated helper to avoid MainActor isolation inheritance in TCC callback
|
||||||
|
let status = await Self.requestSpeechAuthorization()
|
||||||
|
|
||||||
|
guard status == .authorized else {
|
||||||
|
self.errorMessage = "Speech recognition not authorized"
|
||||||
|
return
|
||||||
|
}
|
||||||
|
self.beginRecording()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Request speech recognition authorization without MainActor isolation.
|
||||||
|
/// This prevents Swift 6 strict concurrency from asserting MainActor in the TCC callback.
|
||||||
|
private nonisolated static func requestSpeechAuthorization() async -> SFSpeechRecognizerAuthorizationStatus {
|
||||||
|
await withCheckedContinuation { continuation in
|
||||||
|
SFSpeechRecognizer.requestAuthorization { status in
|
||||||
|
continuation.resume(returning: status)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates audio tap handler in nonisolated context to avoid MainActor isolation inheritance.
|
||||||
|
/// Audio taps run on CoreAudio's RealtimeMessenger queue, not MainActor.
|
||||||
|
private nonisolated static func createAudioTapHandler(
|
||||||
|
request: SFSpeechAudioBufferRecognitionRequest,
|
||||||
|
levelUpdater: RecordingLevelUpdater
|
||||||
|
) -> (AVAudioPCMBuffer, AVAudioTime) -> Void {
|
||||||
|
return { buffer, _ in
|
||||||
|
request.append(buffer)
|
||||||
|
|
||||||
|
// Calculate audio level for visual feedback
|
||||||
|
guard let channelData = buffer.floatChannelData else { return }
|
||||||
|
let channelDataValue = channelData.pointee
|
||||||
|
let channelDataValueArray = stride(from: 0, to: Int(buffer.frameLength), by: buffer.stride).map { channelDataValue[$0] }
|
||||||
|
let rms = sqrt(channelDataValueArray.map { $0 * $0 }.reduce(0, +) / Float(buffer.frameLength))
|
||||||
|
let avgPower = 20 * log10(rms)
|
||||||
|
let level = max(0, min(1, (avgPower + 50) / 50))
|
||||||
|
|
||||||
|
levelUpdater.updateLevel(level)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private func beginRecording() {
|
||||||
|
// Try to find an available recognizer
|
||||||
|
let recognizer = activeRecognizer ?? speechRecognizers.values.first { $0.isAvailable }
|
||||||
|
guard let speechRecognizer = recognizer, speechRecognizer.isAvailable else {
|
||||||
|
errorMessage = "Speech recognition not available"
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stop any existing recording
|
||||||
|
if audioEngine != nil {
|
||||||
|
stopRecording()
|
||||||
|
}
|
||||||
|
|
||||||
|
audioEngine = AVAudioEngine()
|
||||||
|
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
|
||||||
|
|
||||||
|
guard let audioEngine = audioEngine,
|
||||||
|
let recognitionRequest = recognitionRequest else {
|
||||||
|
errorMessage = "Failed to initialize audio engine"
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
recognitionRequest.shouldReportPartialResults = true
|
||||||
|
|
||||||
|
// Enable automatic language detection if available (macOS 14+)
|
||||||
|
if #available(macOS 14, *) {
|
||||||
|
recognitionRequest.addsPunctuation = true
|
||||||
|
}
|
||||||
|
|
||||||
|
let inputNode = audioEngine.inputNode
|
||||||
|
let recordingFormat = inputNode.outputFormat(forBus: 0)
|
||||||
|
|
||||||
|
// Use nonisolated static function to create audio tap handler
|
||||||
|
// This breaks MainActor isolation inheritance in the closure
|
||||||
|
let levelUpdater = RecordingLevelUpdater(viewModel: self)
|
||||||
|
let audioTapHandler = Self.createAudioTapHandler(request: recognitionRequest, levelUpdater: levelUpdater)
|
||||||
|
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat, block: audioTapHandler)
|
||||||
|
|
||||||
|
audioEngine.prepare()
|
||||||
|
|
||||||
|
do {
|
||||||
|
try audioEngine.start()
|
||||||
|
isRecording = true
|
||||||
|
|
||||||
|
// Use a sendable wrapper for recognition results with language detection
|
||||||
|
let resultHandler = RecognitionResultHandler(viewModel: self)
|
||||||
|
|
||||||
|
recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in
|
||||||
|
resultHandler.handleResult(result: result, error: error)
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
errorMessage = "Failed to start recording: \(error.localizedDescription)"
|
||||||
|
cleanupRecording()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Switch to a different language for speech recognition
|
||||||
|
func switchLanguage(to localeId: String) {
|
||||||
|
guard let recognizer = speechRecognizers[localeId] else { return }
|
||||||
|
activeRecognizer = recognizer
|
||||||
|
detectedLanguage = localeId
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get available languages for speech recognition
|
||||||
|
var availableLanguages: [(id: String, name: String)] {
|
||||||
|
speechRecognizers.keys.sorted().compactMap { localeId in
|
||||||
|
let locale = Locale(identifier: localeId)
|
||||||
|
let name = locale.localizedString(forIdentifier: localeId) ?? localeId
|
||||||
|
return (id: localeId, name: name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func stopRecording() {
|
||||||
|
recognitionRequest?.endAudio()
|
||||||
|
cleanupRecording()
|
||||||
|
}
|
||||||
|
|
||||||
|
fileprivate func cleanupRecording() {
|
||||||
|
audioEngine?.stop()
|
||||||
|
audioEngine?.inputNode.removeTap(onBus: 0)
|
||||||
|
audioEngine = nil
|
||||||
|
recognitionRequest = nil
|
||||||
|
recognitionTask?.cancel()
|
||||||
|
recognitionTask = nil
|
||||||
|
isRecording = false
|
||||||
|
recordingLevel = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Voice Output (Text-to-Speech)
|
||||||
|
|
||||||
|
func speakMessage(_ message: ChatMessage) {
|
||||||
|
guard !message.content.isEmpty else { return }
|
||||||
|
|
||||||
|
// If already speaking this message, stop
|
||||||
|
if isSpeaking && speakingMessageId == message.id {
|
||||||
|
stopSpeaking()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stop any current speech
|
||||||
|
stopSpeaking()
|
||||||
|
|
||||||
|
speakingMessageId = message.id
|
||||||
|
isSpeaking = true
|
||||||
|
|
||||||
|
// Create utterance
|
||||||
|
let utterance = AVSpeechUtterance(string: message.content)
|
||||||
|
utterance.rate = AVSpeechUtteranceDefaultSpeechRate
|
||||||
|
utterance.pitchMultiplier = 1.0
|
||||||
|
utterance.volume = 1.0
|
||||||
|
|
||||||
|
// Use voice matching current speech recognition language
|
||||||
|
if detectedLanguage == "fr-CA" {
|
||||||
|
utterance.voice = AVSpeechSynthesisVoice(language: "fr-CA")
|
||||||
|
} else {
|
||||||
|
utterance.voice = AVSpeechSynthesisVoice(language: "en-CA")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create synthesizer and delegate
|
||||||
|
let synthesizer = AVSpeechSynthesizer()
|
||||||
|
speechDelegate = SpeechSynthesizerDelegate { [weak self] in
|
||||||
|
Task { @MainActor in
|
||||||
|
self?.isSpeaking = false
|
||||||
|
self?.speakingMessageId = nil
|
||||||
|
self?.speechDelegate = nil
|
||||||
|
self?.speechSynthesizer = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
synthesizer.delegate = speechDelegate
|
||||||
|
speechSynthesizer = synthesizer
|
||||||
|
|
||||||
|
// Speak directly
|
||||||
|
synthesizer.speak(utterance)
|
||||||
|
}
|
||||||
|
|
||||||
|
func stopSpeaking() {
|
||||||
|
speechSynthesizer?.stopSpeaking(at: .immediate)
|
||||||
|
speechSynthesizer = nil
|
||||||
|
speechDelegate = nil
|
||||||
|
isSpeaking = false
|
||||||
|
speakingMessageId = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Speech Synthesizer Delegate
|
||||||
|
|
||||||
|
private final class SpeechSynthesizerDelegate: NSObject, AVSpeechSynthesizerDelegate, @unchecked Sendable {
|
||||||
|
let onFinish: () -> Void
|
||||||
|
|
||||||
|
init(onFinish: @escaping () -> Void) {
|
||||||
|
self.onFinish = onFinish
|
||||||
|
}
|
||||||
|
|
||||||
|
func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didFinish utterance: AVSpeechUtterance) {
|
||||||
|
onFinish()
|
||||||
|
}
|
||||||
|
|
||||||
|
func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didCancel utterance: AVSpeechUtterance) {
|
||||||
|
onFinish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Sendable Wrappers for Audio Callbacks
|
||||||
|
|
||||||
|
/// Wrapper to safely update recording level from audio callback thread
|
||||||
|
private final class RecordingLevelUpdater: @unchecked Sendable {
|
||||||
|
private weak var viewModel: ChatViewModel?
|
||||||
|
|
||||||
|
init(viewModel: ChatViewModel) {
|
||||||
|
self.viewModel = viewModel
|
||||||
|
}
|
||||||
|
|
||||||
|
func updateLevel(_ level: Float) {
|
||||||
|
Task { @MainActor [weak viewModel] in
|
||||||
|
viewModel?.recordingLevel = level
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Wrapper to safely handle recognition results from Speech framework callback
|
||||||
|
private final class RecognitionResultHandler: @unchecked Sendable {
|
||||||
|
private weak var viewModel: ChatViewModel?
|
||||||
|
|
||||||
|
init(viewModel: ChatViewModel) {
|
||||||
|
self.viewModel = viewModel
|
||||||
|
}
|
||||||
|
|
||||||
|
func handleResult(result: SFSpeechRecognitionResult?, error: Error?) {
|
||||||
|
// Extract data before crossing actor boundary (SFSpeechRecognitionResult is not Sendable)
|
||||||
|
let transcription = result?.bestTranscription.formattedString
|
||||||
|
let isFinal = result?.isFinal ?? false
|
||||||
|
let hasError = error != nil
|
||||||
|
|
||||||
|
Task { @MainActor [weak viewModel] in
|
||||||
|
if let transcription = transcription {
|
||||||
|
viewModel?.inputText = transcription
|
||||||
|
}
|
||||||
|
|
||||||
|
if hasError || isFinal {
|
||||||
|
viewModel?.cleanupRecording()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -23,7 +23,11 @@ struct ChatView: View {
|
|||||||
ScrollView {
|
ScrollView {
|
||||||
LazyVStack(spacing: 12) {
|
LazyVStack(spacing: 12) {
|
||||||
ForEach(viewModel.messages) { message in
|
ForEach(viewModel.messages) { message in
|
||||||
MessageBubble(message: message)
|
MessageBubble(
|
||||||
|
message: message,
|
||||||
|
isSpeaking: viewModel.speakingMessageId == message.id,
|
||||||
|
onSpeak: { viewModel.speakMessage(message) }
|
||||||
|
)
|
||||||
.id(message.id)
|
.id(message.id)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -286,6 +290,45 @@ struct ChatView: View {
|
|||||||
.buttonStyle(.plain)
|
.buttonStyle(.plain)
|
||||||
.help("Paste image from clipboard")
|
.help("Paste image from clipboard")
|
||||||
|
|
||||||
|
// Language toggle for speech recognition
|
||||||
|
Button {
|
||||||
|
// Toggle between en-CA and fr-CA
|
||||||
|
let newLang = viewModel.detectedLanguage == "en-CA" ? "fr-CA" : "en-CA"
|
||||||
|
viewModel.switchLanguage(to: newLang)
|
||||||
|
} label: {
|
||||||
|
Text(viewModel.detectedLanguage == "fr-CA" ? "FR" : "EN")
|
||||||
|
.font(.caption.bold())
|
||||||
|
.foregroundStyle(.secondary)
|
||||||
|
.frame(width: 24, height: 24)
|
||||||
|
.background(
|
||||||
|
RoundedRectangle(cornerRadius: 4)
|
||||||
|
.fill(Color.secondary.opacity(0.1))
|
||||||
|
)
|
||||||
|
}
|
||||||
|
.buttonStyle(.plain)
|
||||||
|
.help("Speech language: \(viewModel.detectedLanguage) (click to toggle)")
|
||||||
|
|
||||||
|
// Microphone button for voice input
|
||||||
|
Button {
|
||||||
|
viewModel.toggleRecording()
|
||||||
|
} label: {
|
||||||
|
ZStack {
|
||||||
|
if viewModel.isRecording {
|
||||||
|
// Recording indicator with level
|
||||||
|
Circle()
|
||||||
|
.fill(Color.red.opacity(0.3))
|
||||||
|
.frame(width: 28 + CGFloat(viewModel.recordingLevel) * 10,
|
||||||
|
height: 28 + CGFloat(viewModel.recordingLevel) * 10)
|
||||||
|
.animation(.easeInOut(duration: 0.1), value: viewModel.recordingLevel)
|
||||||
|
}
|
||||||
|
Image(systemName: viewModel.isRecording ? "mic.fill" : "mic")
|
||||||
|
.font(.title3)
|
||||||
|
.foregroundStyle(viewModel.isRecording ? .red : .secondary)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.buttonStyle(.plain)
|
||||||
|
.help(viewModel.isRecording ? "Stop recording" : "Voice input")
|
||||||
|
|
||||||
TextField("Message...", text: $viewModel.inputText, axis: .vertical)
|
TextField("Message...", text: $viewModel.inputText, axis: .vertical)
|
||||||
.textFieldStyle(.plain)
|
.textFieldStyle(.plain)
|
||||||
.lineLimit(1...5)
|
.lineLimit(1...5)
|
||||||
@ -386,6 +429,8 @@ struct RecentImageThumbnail: View {
|
|||||||
|
|
||||||
struct MessageBubble: View {
|
struct MessageBubble: View {
|
||||||
let message: ChatMessage
|
let message: ChatMessage
|
||||||
|
var isSpeaking: Bool = false
|
||||||
|
var onSpeak: (() -> Void)? = nil
|
||||||
@State private var showCopied = false
|
@State private var showCopied = false
|
||||||
|
|
||||||
var body: some View {
|
var body: some View {
|
||||||
@ -419,10 +464,23 @@ struct MessageBubble: View {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Copy button for assistant messages
|
// Action buttons for assistant messages
|
||||||
if message.role == .assistant && !message.content.isEmpty && !message.isStreaming {
|
if message.role == .assistant && !message.content.isEmpty && !message.isStreaming {
|
||||||
HStack {
|
HStack(spacing: 12) {
|
||||||
Spacer()
|
// Speaker button for TTS
|
||||||
|
Button {
|
||||||
|
onSpeak?()
|
||||||
|
} label: {
|
||||||
|
HStack(spacing: 4) {
|
||||||
|
Image(systemName: isSpeaking ? "stop.fill" : "speaker.wave.2")
|
||||||
|
Text(isSpeaking ? "Stop" : "Speak")
|
||||||
|
}
|
||||||
|
.font(.caption)
|
||||||
|
.foregroundStyle(isSpeaking ? .red : .secondary)
|
||||||
|
}
|
||||||
|
.buttonStyle(.plain)
|
||||||
|
|
||||||
|
// Copy button
|
||||||
Button {
|
Button {
|
||||||
NSPasteboard.general.clearContents()
|
NSPasteboard.general.clearContents()
|
||||||
NSPasteboard.general.setString(message.content, forType: .string)
|
NSPasteboard.general.setString(message.content, forType: .string)
|
||||||
@ -439,6 +497,8 @@ struct MessageBubble: View {
|
|||||||
.foregroundStyle(.secondary)
|
.foregroundStyle(.secondary)
|
||||||
}
|
}
|
||||||
.buttonStyle(.plain)
|
.buttonStyle(.plain)
|
||||||
|
|
||||||
|
Spacer()
|
||||||
}
|
}
|
||||||
.padding(.top, 2)
|
.padding(.top, 2)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -56,11 +56,63 @@ public enum Appleintelligence_AppleIntelligenceService: Sendable {
|
|||||||
method: "Health"
|
method: "Health"
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
/// Namespace for "TextToSpeech" metadata.
|
||||||
|
public enum TextToSpeech: Sendable {
|
||||||
|
/// Request type for "TextToSpeech".
|
||||||
|
public typealias Input = Appleintelligence_TextToSpeechRequest
|
||||||
|
/// Response type for "TextToSpeech".
|
||||||
|
public typealias Output = Appleintelligence_TextToSpeechResponse
|
||||||
|
/// Descriptor for "TextToSpeech".
|
||||||
|
public static let descriptor = GRPCCore.MethodDescriptor(
|
||||||
|
service: GRPCCore.ServiceDescriptor(fullyQualifiedService: "appleintelligence.AppleIntelligenceService"),
|
||||||
|
method: "TextToSpeech"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
/// Namespace for "ListVoices" metadata.
|
||||||
|
public enum ListVoices: Sendable {
|
||||||
|
/// Request type for "ListVoices".
|
||||||
|
public typealias Input = Appleintelligence_ListVoicesRequest
|
||||||
|
/// Response type for "ListVoices".
|
||||||
|
public typealias Output = Appleintelligence_ListVoicesResponse
|
||||||
|
/// Descriptor for "ListVoices".
|
||||||
|
public static let descriptor = GRPCCore.MethodDescriptor(
|
||||||
|
service: GRPCCore.ServiceDescriptor(fullyQualifiedService: "appleintelligence.AppleIntelligenceService"),
|
||||||
|
method: "ListVoices"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
/// Namespace for "Transcribe" metadata.
|
||||||
|
public enum Transcribe: Sendable {
|
||||||
|
/// Request type for "Transcribe".
|
||||||
|
public typealias Input = Appleintelligence_TranscribeRequest
|
||||||
|
/// Response type for "Transcribe".
|
||||||
|
public typealias Output = Appleintelligence_TranscribeResponse
|
||||||
|
/// Descriptor for "Transcribe".
|
||||||
|
public static let descriptor = GRPCCore.MethodDescriptor(
|
||||||
|
service: GRPCCore.ServiceDescriptor(fullyQualifiedService: "appleintelligence.AppleIntelligenceService"),
|
||||||
|
method: "Transcribe"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
/// Namespace for "StreamTranscribe" metadata.
|
||||||
|
public enum StreamTranscribe: Sendable {
|
||||||
|
/// Request type for "StreamTranscribe".
|
||||||
|
public typealias Input = Appleintelligence_StreamingTranscribeRequest
|
||||||
|
/// Response type for "StreamTranscribe".
|
||||||
|
public typealias Output = Appleintelligence_StreamingTranscribeResponse
|
||||||
|
/// Descriptor for "StreamTranscribe".
|
||||||
|
public static let descriptor = GRPCCore.MethodDescriptor(
|
||||||
|
service: GRPCCore.ServiceDescriptor(fullyQualifiedService: "appleintelligence.AppleIntelligenceService"),
|
||||||
|
method: "StreamTranscribe"
|
||||||
|
)
|
||||||
|
}
|
||||||
/// Descriptors for all methods in the "appleintelligence.AppleIntelligenceService" service.
|
/// Descriptors for all methods in the "appleintelligence.AppleIntelligenceService" service.
|
||||||
public static let descriptors: [GRPCCore.MethodDescriptor] = [
|
public static let descriptors: [GRPCCore.MethodDescriptor] = [
|
||||||
Complete.descriptor,
|
Complete.descriptor,
|
||||||
StreamComplete.descriptor,
|
StreamComplete.descriptor,
|
||||||
Health.descriptor
|
Health.descriptor,
|
||||||
|
TextToSpeech.descriptor,
|
||||||
|
ListVoices.descriptor,
|
||||||
|
Transcribe.descriptor,
|
||||||
|
StreamTranscribe.descriptor
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -143,6 +195,70 @@ extension Appleintelligence_AppleIntelligenceService {
|
|||||||
request: GRPCCore.StreamingServerRequest<Appleintelligence_HealthRequest>,
|
request: GRPCCore.StreamingServerRequest<Appleintelligence_HealthRequest>,
|
||||||
context: GRPCCore.ServerContext
|
context: GRPCCore.ServerContext
|
||||||
) async throws -> GRPCCore.StreamingServerResponse<Appleintelligence_HealthResponse>
|
) async throws -> GRPCCore.StreamingServerResponse<Appleintelligence_HealthResponse>
|
||||||
|
|
||||||
|
/// Handle the "TextToSpeech" method.
|
||||||
|
///
|
||||||
|
/// > Source IDL Documentation:
|
||||||
|
/// >
|
||||||
|
/// > Text-to-Speech
|
||||||
|
///
|
||||||
|
/// - Parameters:
|
||||||
|
/// - request: A streaming request of `Appleintelligence_TextToSpeechRequest` messages.
|
||||||
|
/// - context: Context providing information about the RPC.
|
||||||
|
/// - Throws: Any error which occurred during the processing of the request. Thrown errors
|
||||||
|
/// of type `RPCError` are mapped to appropriate statuses. All other errors are converted
|
||||||
|
/// to an internal error.
|
||||||
|
/// - Returns: A streaming response of `Appleintelligence_TextToSpeechResponse` messages.
|
||||||
|
func textToSpeech(
|
||||||
|
request: GRPCCore.StreamingServerRequest<Appleintelligence_TextToSpeechRequest>,
|
||||||
|
context: GRPCCore.ServerContext
|
||||||
|
) async throws -> GRPCCore.StreamingServerResponse<Appleintelligence_TextToSpeechResponse>
|
||||||
|
|
||||||
|
/// Handle the "ListVoices" method.
|
||||||
|
///
|
||||||
|
/// - Parameters:
|
||||||
|
/// - request: A streaming request of `Appleintelligence_ListVoicesRequest` messages.
|
||||||
|
/// - context: Context providing information about the RPC.
|
||||||
|
/// - Throws: Any error which occurred during the processing of the request. Thrown errors
|
||||||
|
/// of type `RPCError` are mapped to appropriate statuses. All other errors are converted
|
||||||
|
/// to an internal error.
|
||||||
|
/// - Returns: A streaming response of `Appleintelligence_ListVoicesResponse` messages.
|
||||||
|
func listVoices(
|
||||||
|
request: GRPCCore.StreamingServerRequest<Appleintelligence_ListVoicesRequest>,
|
||||||
|
context: GRPCCore.ServerContext
|
||||||
|
) async throws -> GRPCCore.StreamingServerResponse<Appleintelligence_ListVoicesResponse>
|
||||||
|
|
||||||
|
/// Handle the "Transcribe" method.
|
||||||
|
///
|
||||||
|
/// > Source IDL Documentation:
|
||||||
|
/// >
|
||||||
|
/// > Speech-to-Text
|
||||||
|
///
|
||||||
|
/// - Parameters:
|
||||||
|
/// - request: A streaming request of `Appleintelligence_TranscribeRequest` messages.
|
||||||
|
/// - context: Context providing information about the RPC.
|
||||||
|
/// - Throws: Any error which occurred during the processing of the request. Thrown errors
|
||||||
|
/// of type `RPCError` are mapped to appropriate statuses. All other errors are converted
|
||||||
|
/// to an internal error.
|
||||||
|
/// - Returns: A streaming response of `Appleintelligence_TranscribeResponse` messages.
|
||||||
|
func transcribe(
|
||||||
|
request: GRPCCore.StreamingServerRequest<Appleintelligence_TranscribeRequest>,
|
||||||
|
context: GRPCCore.ServerContext
|
||||||
|
) async throws -> GRPCCore.StreamingServerResponse<Appleintelligence_TranscribeResponse>
|
||||||
|
|
||||||
|
/// Handle the "StreamTranscribe" method.
|
||||||
|
///
|
||||||
|
/// - Parameters:
|
||||||
|
/// - request: A streaming request of `Appleintelligence_StreamingTranscribeRequest` messages.
|
||||||
|
/// - context: Context providing information about the RPC.
|
||||||
|
/// - Throws: Any error which occurred during the processing of the request. Thrown errors
|
||||||
|
/// of type `RPCError` are mapped to appropriate statuses. All other errors are converted
|
||||||
|
/// to an internal error.
|
||||||
|
/// - Returns: A streaming response of `Appleintelligence_StreamingTranscribeResponse` messages.
|
||||||
|
func streamTranscribe(
|
||||||
|
request: GRPCCore.StreamingServerRequest<Appleintelligence_StreamingTranscribeRequest>,
|
||||||
|
context: GRPCCore.ServerContext
|
||||||
|
) async throws -> GRPCCore.StreamingServerResponse<Appleintelligence_StreamingTranscribeResponse>
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Service protocol for the "appleintelligence.AppleIntelligenceService" service.
|
/// Service protocol for the "appleintelligence.AppleIntelligenceService" service.
|
||||||
@ -210,6 +326,70 @@ extension Appleintelligence_AppleIntelligenceService {
|
|||||||
request: GRPCCore.ServerRequest<Appleintelligence_HealthRequest>,
|
request: GRPCCore.ServerRequest<Appleintelligence_HealthRequest>,
|
||||||
context: GRPCCore.ServerContext
|
context: GRPCCore.ServerContext
|
||||||
) async throws -> GRPCCore.ServerResponse<Appleintelligence_HealthResponse>
|
) async throws -> GRPCCore.ServerResponse<Appleintelligence_HealthResponse>
|
||||||
|
|
||||||
|
/// Handle the "TextToSpeech" method.
|
||||||
|
///
|
||||||
|
/// > Source IDL Documentation:
|
||||||
|
/// >
|
||||||
|
/// > Text-to-Speech
|
||||||
|
///
|
||||||
|
/// - Parameters:
|
||||||
|
/// - request: A request containing a single `Appleintelligence_TextToSpeechRequest` message.
|
||||||
|
/// - context: Context providing information about the RPC.
|
||||||
|
/// - Throws: Any error which occurred during the processing of the request. Thrown errors
|
||||||
|
/// of type `RPCError` are mapped to appropriate statuses. All other errors are converted
|
||||||
|
/// to an internal error.
|
||||||
|
/// - Returns: A response containing a single `Appleintelligence_TextToSpeechResponse` message.
|
||||||
|
func textToSpeech(
|
||||||
|
request: GRPCCore.ServerRequest<Appleintelligence_TextToSpeechRequest>,
|
||||||
|
context: GRPCCore.ServerContext
|
||||||
|
) async throws -> GRPCCore.ServerResponse<Appleintelligence_TextToSpeechResponse>
|
||||||
|
|
||||||
|
/// Handle the "ListVoices" method.
|
||||||
|
///
|
||||||
|
/// - Parameters:
|
||||||
|
/// - request: A request containing a single `Appleintelligence_ListVoicesRequest` message.
|
||||||
|
/// - context: Context providing information about the RPC.
|
||||||
|
/// - Throws: Any error which occurred during the processing of the request. Thrown errors
|
||||||
|
/// of type `RPCError` are mapped to appropriate statuses. All other errors are converted
|
||||||
|
/// to an internal error.
|
||||||
|
/// - Returns: A response containing a single `Appleintelligence_ListVoicesResponse` message.
|
||||||
|
func listVoices(
|
||||||
|
request: GRPCCore.ServerRequest<Appleintelligence_ListVoicesRequest>,
|
||||||
|
context: GRPCCore.ServerContext
|
||||||
|
) async throws -> GRPCCore.ServerResponse<Appleintelligence_ListVoicesResponse>
|
||||||
|
|
||||||
|
/// Handle the "Transcribe" method.
|
||||||
|
///
|
||||||
|
/// > Source IDL Documentation:
|
||||||
|
/// >
|
||||||
|
/// > Speech-to-Text
|
||||||
|
///
|
||||||
|
/// - Parameters:
|
||||||
|
/// - request: A request containing a single `Appleintelligence_TranscribeRequest` message.
|
||||||
|
/// - context: Context providing information about the RPC.
|
||||||
|
/// - Throws: Any error which occurred during the processing of the request. Thrown errors
|
||||||
|
/// of type `RPCError` are mapped to appropriate statuses. All other errors are converted
|
||||||
|
/// to an internal error.
|
||||||
|
/// - Returns: A response containing a single `Appleintelligence_TranscribeResponse` message.
|
||||||
|
func transcribe(
|
||||||
|
request: GRPCCore.ServerRequest<Appleintelligence_TranscribeRequest>,
|
||||||
|
context: GRPCCore.ServerContext
|
||||||
|
) async throws -> GRPCCore.ServerResponse<Appleintelligence_TranscribeResponse>
|
||||||
|
|
||||||
|
/// Handle the "StreamTranscribe" method.
|
||||||
|
///
|
||||||
|
/// - Parameters:
|
||||||
|
/// - request: A streaming request of `Appleintelligence_StreamingTranscribeRequest` messages.
|
||||||
|
/// - context: Context providing information about the RPC.
|
||||||
|
/// - Throws: Any error which occurred during the processing of the request. Thrown errors
|
||||||
|
/// of type `RPCError` are mapped to appropriate statuses. All other errors are converted
|
||||||
|
/// to an internal error.
|
||||||
|
/// - Returns: A streaming response of `Appleintelligence_StreamingTranscribeResponse` messages.
|
||||||
|
func streamTranscribe(
|
||||||
|
request: GRPCCore.StreamingServerRequest<Appleintelligence_StreamingTranscribeRequest>,
|
||||||
|
context: GRPCCore.ServerContext
|
||||||
|
) async throws -> GRPCCore.StreamingServerResponse<Appleintelligence_StreamingTranscribeResponse>
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Simple service protocol for the "appleintelligence.AppleIntelligenceService" service.
|
/// Simple service protocol for the "appleintelligence.AppleIntelligenceService" service.
|
||||||
@ -276,6 +456,71 @@ extension Appleintelligence_AppleIntelligenceService {
|
|||||||
request: Appleintelligence_HealthRequest,
|
request: Appleintelligence_HealthRequest,
|
||||||
context: GRPCCore.ServerContext
|
context: GRPCCore.ServerContext
|
||||||
) async throws -> Appleintelligence_HealthResponse
|
) async throws -> Appleintelligence_HealthResponse
|
||||||
|
|
||||||
|
/// Handle the "TextToSpeech" method.
|
||||||
|
///
|
||||||
|
/// > Source IDL Documentation:
|
||||||
|
/// >
|
||||||
|
/// > Text-to-Speech
|
||||||
|
///
|
||||||
|
/// - Parameters:
|
||||||
|
/// - request: A `Appleintelligence_TextToSpeechRequest` message.
|
||||||
|
/// - context: Context providing information about the RPC.
|
||||||
|
/// - Throws: Any error which occurred during the processing of the request. Thrown errors
|
||||||
|
/// of type `RPCError` are mapped to appropriate statuses. All other errors are converted
|
||||||
|
/// to an internal error.
|
||||||
|
/// - Returns: A `Appleintelligence_TextToSpeechResponse` to respond with.
|
||||||
|
func textToSpeech(
|
||||||
|
request: Appleintelligence_TextToSpeechRequest,
|
||||||
|
context: GRPCCore.ServerContext
|
||||||
|
) async throws -> Appleintelligence_TextToSpeechResponse
|
||||||
|
|
||||||
|
/// Handle the "ListVoices" method.
|
||||||
|
///
|
||||||
|
/// - Parameters:
|
||||||
|
/// - request: A `Appleintelligence_ListVoicesRequest` message.
|
||||||
|
/// - context: Context providing information about the RPC.
|
||||||
|
/// - Throws: Any error which occurred during the processing of the request. Thrown errors
|
||||||
|
/// of type `RPCError` are mapped to appropriate statuses. All other errors are converted
|
||||||
|
/// to an internal error.
|
||||||
|
/// - Returns: A `Appleintelligence_ListVoicesResponse` to respond with.
|
||||||
|
func listVoices(
|
||||||
|
request: Appleintelligence_ListVoicesRequest,
|
||||||
|
context: GRPCCore.ServerContext
|
||||||
|
) async throws -> Appleintelligence_ListVoicesResponse
|
||||||
|
|
||||||
|
/// Handle the "Transcribe" method.
|
||||||
|
///
|
||||||
|
/// > Source IDL Documentation:
|
||||||
|
/// >
|
||||||
|
/// > Speech-to-Text
|
||||||
|
///
|
||||||
|
/// - Parameters:
|
||||||
|
/// - request: A `Appleintelligence_TranscribeRequest` message.
|
||||||
|
/// - context: Context providing information about the RPC.
|
||||||
|
/// - Throws: Any error which occurred during the processing of the request. Thrown errors
|
||||||
|
/// of type `RPCError` are mapped to appropriate statuses. All other errors are converted
|
||||||
|
/// to an internal error.
|
||||||
|
/// - Returns: A `Appleintelligence_TranscribeResponse` to respond with.
|
||||||
|
func transcribe(
|
||||||
|
request: Appleintelligence_TranscribeRequest,
|
||||||
|
context: GRPCCore.ServerContext
|
||||||
|
) async throws -> Appleintelligence_TranscribeResponse
|
||||||
|
|
||||||
|
/// Handle the "StreamTranscribe" method.
|
||||||
|
///
|
||||||
|
/// - Parameters:
|
||||||
|
/// - request: A stream of `Appleintelligence_StreamingTranscribeRequest` messages.
|
||||||
|
/// - response: A response stream of `Appleintelligence_StreamingTranscribeResponse` messages.
|
||||||
|
/// - context: Context providing information about the RPC.
|
||||||
|
/// - Throws: Any error which occurred during the processing of the request. Thrown errors
|
||||||
|
/// of type `RPCError` are mapped to appropriate statuses. All other errors are converted
|
||||||
|
/// to an internal error.
|
||||||
|
func streamTranscribe(
|
||||||
|
request: GRPCCore.RPCAsyncSequence<Appleintelligence_StreamingTranscribeRequest, any Swift.Error>,
|
||||||
|
response: GRPCCore.RPCWriter<Appleintelligence_StreamingTranscribeResponse>,
|
||||||
|
context: GRPCCore.ServerContext
|
||||||
|
) async throws
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -316,6 +561,50 @@ extension Appleintelligence_AppleIntelligenceService.StreamingServiceProtocol {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
router.registerHandler(
|
||||||
|
forMethod: Appleintelligence_AppleIntelligenceService.Method.TextToSpeech.descriptor,
|
||||||
|
deserializer: GRPCProtobuf.ProtobufDeserializer<Appleintelligence_TextToSpeechRequest>(),
|
||||||
|
serializer: GRPCProtobuf.ProtobufSerializer<Appleintelligence_TextToSpeechResponse>(),
|
||||||
|
handler: { request, context in
|
||||||
|
try await self.textToSpeech(
|
||||||
|
request: request,
|
||||||
|
context: context
|
||||||
|
)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
router.registerHandler(
|
||||||
|
forMethod: Appleintelligence_AppleIntelligenceService.Method.ListVoices.descriptor,
|
||||||
|
deserializer: GRPCProtobuf.ProtobufDeserializer<Appleintelligence_ListVoicesRequest>(),
|
||||||
|
serializer: GRPCProtobuf.ProtobufSerializer<Appleintelligence_ListVoicesResponse>(),
|
||||||
|
handler: { request, context in
|
||||||
|
try await self.listVoices(
|
||||||
|
request: request,
|
||||||
|
context: context
|
||||||
|
)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
router.registerHandler(
|
||||||
|
forMethod: Appleintelligence_AppleIntelligenceService.Method.Transcribe.descriptor,
|
||||||
|
deserializer: GRPCProtobuf.ProtobufDeserializer<Appleintelligence_TranscribeRequest>(),
|
||||||
|
serializer: GRPCProtobuf.ProtobufSerializer<Appleintelligence_TranscribeResponse>(),
|
||||||
|
handler: { request, context in
|
||||||
|
try await self.transcribe(
|
||||||
|
request: request,
|
||||||
|
context: context
|
||||||
|
)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
router.registerHandler(
|
||||||
|
forMethod: Appleintelligence_AppleIntelligenceService.Method.StreamTranscribe.descriptor,
|
||||||
|
deserializer: GRPCProtobuf.ProtobufDeserializer<Appleintelligence_StreamingTranscribeRequest>(),
|
||||||
|
serializer: GRPCProtobuf.ProtobufSerializer<Appleintelligence_StreamingTranscribeResponse>(),
|
||||||
|
handler: { request, context in
|
||||||
|
try await self.streamTranscribe(
|
||||||
|
request: request,
|
||||||
|
context: context
|
||||||
|
)
|
||||||
|
}
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -354,6 +643,39 @@ extension Appleintelligence_AppleIntelligenceService.ServiceProtocol {
|
|||||||
)
|
)
|
||||||
return GRPCCore.StreamingServerResponse(single: response)
|
return GRPCCore.StreamingServerResponse(single: response)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public func textToSpeech(
|
||||||
|
request: GRPCCore.StreamingServerRequest<Appleintelligence_TextToSpeechRequest>,
|
||||||
|
context: GRPCCore.ServerContext
|
||||||
|
) async throws -> GRPCCore.StreamingServerResponse<Appleintelligence_TextToSpeechResponse> {
|
||||||
|
let response = try await self.textToSpeech(
|
||||||
|
request: GRPCCore.ServerRequest(stream: request),
|
||||||
|
context: context
|
||||||
|
)
|
||||||
|
return GRPCCore.StreamingServerResponse(single: response)
|
||||||
|
}
|
||||||
|
|
||||||
|
public func listVoices(
|
||||||
|
request: GRPCCore.StreamingServerRequest<Appleintelligence_ListVoicesRequest>,
|
||||||
|
context: GRPCCore.ServerContext
|
||||||
|
) async throws -> GRPCCore.StreamingServerResponse<Appleintelligence_ListVoicesResponse> {
|
||||||
|
let response = try await self.listVoices(
|
||||||
|
request: GRPCCore.ServerRequest(stream: request),
|
||||||
|
context: context
|
||||||
|
)
|
||||||
|
return GRPCCore.StreamingServerResponse(single: response)
|
||||||
|
}
|
||||||
|
|
||||||
|
public func transcribe(
|
||||||
|
request: GRPCCore.StreamingServerRequest<Appleintelligence_TranscribeRequest>,
|
||||||
|
context: GRPCCore.ServerContext
|
||||||
|
) async throws -> GRPCCore.StreamingServerResponse<Appleintelligence_TranscribeResponse> {
|
||||||
|
let response = try await self.transcribe(
|
||||||
|
request: GRPCCore.ServerRequest(stream: request),
|
||||||
|
context: context
|
||||||
|
)
|
||||||
|
return GRPCCore.StreamingServerResponse(single: response)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Default implementation of methods from 'ServiceProtocol'.
|
// Default implementation of methods from 'ServiceProtocol'.
|
||||||
@ -401,6 +723,62 @@ extension Appleintelligence_AppleIntelligenceService.SimpleServiceProtocol {
|
|||||||
metadata: [:]
|
metadata: [:]
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public func textToSpeech(
|
||||||
|
request: GRPCCore.ServerRequest<Appleintelligence_TextToSpeechRequest>,
|
||||||
|
context: GRPCCore.ServerContext
|
||||||
|
) async throws -> GRPCCore.ServerResponse<Appleintelligence_TextToSpeechResponse> {
|
||||||
|
return GRPCCore.ServerResponse<Appleintelligence_TextToSpeechResponse>(
|
||||||
|
message: try await self.textToSpeech(
|
||||||
|
request: request.message,
|
||||||
|
context: context
|
||||||
|
),
|
||||||
|
metadata: [:]
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
public func listVoices(
|
||||||
|
request: GRPCCore.ServerRequest<Appleintelligence_ListVoicesRequest>,
|
||||||
|
context: GRPCCore.ServerContext
|
||||||
|
) async throws -> GRPCCore.ServerResponse<Appleintelligence_ListVoicesResponse> {
|
||||||
|
return GRPCCore.ServerResponse<Appleintelligence_ListVoicesResponse>(
|
||||||
|
message: try await self.listVoices(
|
||||||
|
request: request.message,
|
||||||
|
context: context
|
||||||
|
),
|
||||||
|
metadata: [:]
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
public func transcribe(
|
||||||
|
request: GRPCCore.ServerRequest<Appleintelligence_TranscribeRequest>,
|
||||||
|
context: GRPCCore.ServerContext
|
||||||
|
) async throws -> GRPCCore.ServerResponse<Appleintelligence_TranscribeResponse> {
|
||||||
|
return GRPCCore.ServerResponse<Appleintelligence_TranscribeResponse>(
|
||||||
|
message: try await self.transcribe(
|
||||||
|
request: request.message,
|
||||||
|
context: context
|
||||||
|
),
|
||||||
|
metadata: [:]
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
public func streamTranscribe(
|
||||||
|
request: GRPCCore.StreamingServerRequest<Appleintelligence_StreamingTranscribeRequest>,
|
||||||
|
context: GRPCCore.ServerContext
|
||||||
|
) async throws -> GRPCCore.StreamingServerResponse<Appleintelligence_StreamingTranscribeResponse> {
|
||||||
|
return GRPCCore.StreamingServerResponse<Appleintelligence_StreamingTranscribeResponse>(
|
||||||
|
metadata: [:],
|
||||||
|
producer: { writer in
|
||||||
|
try await self.streamTranscribe(
|
||||||
|
request: request.messages,
|
||||||
|
response: writer,
|
||||||
|
context: context
|
||||||
|
)
|
||||||
|
return [:]
|
||||||
|
}
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// MARK: appleintelligence.AppleIntelligenceService (client)
|
// MARK: appleintelligence.AppleIntelligenceService (client)
|
||||||
@ -484,6 +862,90 @@ extension Appleintelligence_AppleIntelligenceService {
|
|||||||
options: GRPCCore.CallOptions,
|
options: GRPCCore.CallOptions,
|
||||||
onResponse handleResponse: @Sendable @escaping (GRPCCore.ClientResponse<Appleintelligence_HealthResponse>) async throws -> Result
|
onResponse handleResponse: @Sendable @escaping (GRPCCore.ClientResponse<Appleintelligence_HealthResponse>) async throws -> Result
|
||||||
) async throws -> Result where Result: Sendable
|
) async throws -> Result where Result: Sendable
|
||||||
|
|
||||||
|
/// Call the "TextToSpeech" method.
|
||||||
|
///
|
||||||
|
/// > Source IDL Documentation:
|
||||||
|
/// >
|
||||||
|
/// > Text-to-Speech
|
||||||
|
///
|
||||||
|
/// - Parameters:
|
||||||
|
/// - request: A request containing a single `Appleintelligence_TextToSpeechRequest` message.
|
||||||
|
/// - serializer: A serializer for `Appleintelligence_TextToSpeechRequest` messages.
|
||||||
|
/// - deserializer: A deserializer for `Appleintelligence_TextToSpeechResponse` messages.
|
||||||
|
/// - options: Options to apply to this RPC.
|
||||||
|
/// - handleResponse: A closure which handles the response, the result of which is
|
||||||
|
/// returned to the caller. Returning from the closure will cancel the RPC if it
|
||||||
|
/// hasn't already finished.
|
||||||
|
/// - Returns: The result of `handleResponse`.
|
||||||
|
func textToSpeech<Result>(
|
||||||
|
request: GRPCCore.ClientRequest<Appleintelligence_TextToSpeechRequest>,
|
||||||
|
serializer: some GRPCCore.MessageSerializer<Appleintelligence_TextToSpeechRequest>,
|
||||||
|
deserializer: some GRPCCore.MessageDeserializer<Appleintelligence_TextToSpeechResponse>,
|
||||||
|
options: GRPCCore.CallOptions,
|
||||||
|
onResponse handleResponse: @Sendable @escaping (GRPCCore.ClientResponse<Appleintelligence_TextToSpeechResponse>) async throws -> Result
|
||||||
|
) async throws -> Result where Result: Sendable
|
||||||
|
|
||||||
|
/// Call the "ListVoices" method.
|
||||||
|
///
|
||||||
|
/// - Parameters:
|
||||||
|
/// - request: A request containing a single `Appleintelligence_ListVoicesRequest` message.
|
||||||
|
/// - serializer: A serializer for `Appleintelligence_ListVoicesRequest` messages.
|
||||||
|
/// - deserializer: A deserializer for `Appleintelligence_ListVoicesResponse` messages.
|
||||||
|
/// - options: Options to apply to this RPC.
|
||||||
|
/// - handleResponse: A closure which handles the response, the result of which is
|
||||||
|
/// returned to the caller. Returning from the closure will cancel the RPC if it
|
||||||
|
/// hasn't already finished.
|
||||||
|
/// - Returns: The result of `handleResponse`.
|
||||||
|
func listVoices<Result>(
|
||||||
|
request: GRPCCore.ClientRequest<Appleintelligence_ListVoicesRequest>,
|
||||||
|
serializer: some GRPCCore.MessageSerializer<Appleintelligence_ListVoicesRequest>,
|
||||||
|
deserializer: some GRPCCore.MessageDeserializer<Appleintelligence_ListVoicesResponse>,
|
||||||
|
options: GRPCCore.CallOptions,
|
||||||
|
onResponse handleResponse: @Sendable @escaping (GRPCCore.ClientResponse<Appleintelligence_ListVoicesResponse>) async throws -> Result
|
||||||
|
) async throws -> Result where Result: Sendable
|
||||||
|
|
||||||
|
/// Call the "Transcribe" method.
|
||||||
|
///
|
||||||
|
/// > Source IDL Documentation:
|
||||||
|
/// >
|
||||||
|
/// > Speech-to-Text
|
||||||
|
///
|
||||||
|
/// - Parameters:
|
||||||
|
/// - request: A request containing a single `Appleintelligence_TranscribeRequest` message.
|
||||||
|
/// - serializer: A serializer for `Appleintelligence_TranscribeRequest` messages.
|
||||||
|
/// - deserializer: A deserializer for `Appleintelligence_TranscribeResponse` messages.
|
||||||
|
/// - options: Options to apply to this RPC.
|
||||||
|
/// - handleResponse: A closure which handles the response, the result of which is
|
||||||
|
/// returned to the caller. Returning from the closure will cancel the RPC if it
|
||||||
|
/// hasn't already finished.
|
||||||
|
/// - Returns: The result of `handleResponse`.
|
||||||
|
func transcribe<Result>(
|
||||||
|
request: GRPCCore.ClientRequest<Appleintelligence_TranscribeRequest>,
|
||||||
|
serializer: some GRPCCore.MessageSerializer<Appleintelligence_TranscribeRequest>,
|
||||||
|
deserializer: some GRPCCore.MessageDeserializer<Appleintelligence_TranscribeResponse>,
|
||||||
|
options: GRPCCore.CallOptions,
|
||||||
|
onResponse handleResponse: @Sendable @escaping (GRPCCore.ClientResponse<Appleintelligence_TranscribeResponse>) async throws -> Result
|
||||||
|
) async throws -> Result where Result: Sendable
|
||||||
|
|
||||||
|
/// Call the "StreamTranscribe" method.
|
||||||
|
///
|
||||||
|
/// - Parameters:
|
||||||
|
/// - request: A streaming request producing `Appleintelligence_StreamingTranscribeRequest` messages.
|
||||||
|
/// - serializer: A serializer for `Appleintelligence_StreamingTranscribeRequest` messages.
|
||||||
|
/// - deserializer: A deserializer for `Appleintelligence_StreamingTranscribeResponse` messages.
|
||||||
|
/// - options: Options to apply to this RPC.
|
||||||
|
/// - handleResponse: A closure which handles the response, the result of which is
|
||||||
|
/// returned to the caller. Returning from the closure will cancel the RPC if it
|
||||||
|
/// hasn't already finished.
|
||||||
|
/// - Returns: The result of `handleResponse`.
|
||||||
|
func streamTranscribe<Result>(
|
||||||
|
request: GRPCCore.StreamingClientRequest<Appleintelligence_StreamingTranscribeRequest>,
|
||||||
|
serializer: some GRPCCore.MessageSerializer<Appleintelligence_StreamingTranscribeRequest>,
|
||||||
|
deserializer: some GRPCCore.MessageDeserializer<Appleintelligence_StreamingTranscribeResponse>,
|
||||||
|
options: GRPCCore.CallOptions,
|
||||||
|
onResponse handleResponse: @Sendable @escaping (GRPCCore.StreamingClientResponse<Appleintelligence_StreamingTranscribeResponse>) async throws -> Result
|
||||||
|
) async throws -> Result where Result: Sendable
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Generated client for the "appleintelligence.AppleIntelligenceService" service.
|
/// Generated client for the "appleintelligence.AppleIntelligenceService" service.
|
||||||
@ -605,6 +1067,132 @@ extension Appleintelligence_AppleIntelligenceService {
|
|||||||
onResponse: handleResponse
|
onResponse: handleResponse
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Call the "TextToSpeech" method.
|
||||||
|
///
|
||||||
|
/// > Source IDL Documentation:
|
||||||
|
/// >
|
||||||
|
/// > Text-to-Speech
|
||||||
|
///
|
||||||
|
/// - Parameters:
|
||||||
|
/// - request: A request containing a single `Appleintelligence_TextToSpeechRequest` message.
|
||||||
|
/// - serializer: A serializer for `Appleintelligence_TextToSpeechRequest` messages.
|
||||||
|
/// - deserializer: A deserializer for `Appleintelligence_TextToSpeechResponse` messages.
|
||||||
|
/// - options: Options to apply to this RPC.
|
||||||
|
/// - handleResponse: A closure which handles the response, the result of which is
|
||||||
|
/// returned to the caller. Returning from the closure will cancel the RPC if it
|
||||||
|
/// hasn't already finished.
|
||||||
|
/// - Returns: The result of `handleResponse`.
|
||||||
|
public func textToSpeech<Result>(
|
||||||
|
request: GRPCCore.ClientRequest<Appleintelligence_TextToSpeechRequest>,
|
||||||
|
serializer: some GRPCCore.MessageSerializer<Appleintelligence_TextToSpeechRequest>,
|
||||||
|
deserializer: some GRPCCore.MessageDeserializer<Appleintelligence_TextToSpeechResponse>,
|
||||||
|
options: GRPCCore.CallOptions = .defaults,
|
||||||
|
onResponse handleResponse: @Sendable @escaping (GRPCCore.ClientResponse<Appleintelligence_TextToSpeechResponse>) async throws -> Result = { response in
|
||||||
|
try response.message
|
||||||
|
}
|
||||||
|
) async throws -> Result where Result: Sendable {
|
||||||
|
try await self.client.unary(
|
||||||
|
request: request,
|
||||||
|
descriptor: Appleintelligence_AppleIntelligenceService.Method.TextToSpeech.descriptor,
|
||||||
|
serializer: serializer,
|
||||||
|
deserializer: deserializer,
|
||||||
|
options: options,
|
||||||
|
onResponse: handleResponse
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Call the "ListVoices" method.
|
||||||
|
///
|
||||||
|
/// - Parameters:
|
||||||
|
/// - request: A request containing a single `Appleintelligence_ListVoicesRequest` message.
|
||||||
|
/// - serializer: A serializer for `Appleintelligence_ListVoicesRequest` messages.
|
||||||
|
/// - deserializer: A deserializer for `Appleintelligence_ListVoicesResponse` messages.
|
||||||
|
/// - options: Options to apply to this RPC.
|
||||||
|
/// - handleResponse: A closure which handles the response, the result of which is
|
||||||
|
/// returned to the caller. Returning from the closure will cancel the RPC if it
|
||||||
|
/// hasn't already finished.
|
||||||
|
/// - Returns: The result of `handleResponse`.
|
||||||
|
public func listVoices<Result>(
|
||||||
|
request: GRPCCore.ClientRequest<Appleintelligence_ListVoicesRequest>,
|
||||||
|
serializer: some GRPCCore.MessageSerializer<Appleintelligence_ListVoicesRequest>,
|
||||||
|
deserializer: some GRPCCore.MessageDeserializer<Appleintelligence_ListVoicesResponse>,
|
||||||
|
options: GRPCCore.CallOptions = .defaults,
|
||||||
|
onResponse handleResponse: @Sendable @escaping (GRPCCore.ClientResponse<Appleintelligence_ListVoicesResponse>) async throws -> Result = { response in
|
||||||
|
try response.message
|
||||||
|
}
|
||||||
|
) async throws -> Result where Result: Sendable {
|
||||||
|
try await self.client.unary(
|
||||||
|
request: request,
|
||||||
|
descriptor: Appleintelligence_AppleIntelligenceService.Method.ListVoices.descriptor,
|
||||||
|
serializer: serializer,
|
||||||
|
deserializer: deserializer,
|
||||||
|
options: options,
|
||||||
|
onResponse: handleResponse
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Call the "Transcribe" method.
|
||||||
|
///
|
||||||
|
/// > Source IDL Documentation:
|
||||||
|
/// >
|
||||||
|
/// > Speech-to-Text
|
||||||
|
///
|
||||||
|
/// - Parameters:
|
||||||
|
/// - request: A request containing a single `Appleintelligence_TranscribeRequest` message.
|
||||||
|
/// - serializer: A serializer for `Appleintelligence_TranscribeRequest` messages.
|
||||||
|
/// - deserializer: A deserializer for `Appleintelligence_TranscribeResponse` messages.
|
||||||
|
/// - options: Options to apply to this RPC.
|
||||||
|
/// - handleResponse: A closure which handles the response, the result of which is
|
||||||
|
/// returned to the caller. Returning from the closure will cancel the RPC if it
|
||||||
|
/// hasn't already finished.
|
||||||
|
/// - Returns: The result of `handleResponse`.
|
||||||
|
public func transcribe<Result>(
|
||||||
|
request: GRPCCore.ClientRequest<Appleintelligence_TranscribeRequest>,
|
||||||
|
serializer: some GRPCCore.MessageSerializer<Appleintelligence_TranscribeRequest>,
|
||||||
|
deserializer: some GRPCCore.MessageDeserializer<Appleintelligence_TranscribeResponse>,
|
||||||
|
options: GRPCCore.CallOptions = .defaults,
|
||||||
|
onResponse handleResponse: @Sendable @escaping (GRPCCore.ClientResponse<Appleintelligence_TranscribeResponse>) async throws -> Result = { response in
|
||||||
|
try response.message
|
||||||
|
}
|
||||||
|
) async throws -> Result where Result: Sendable {
|
||||||
|
try await self.client.unary(
|
||||||
|
request: request,
|
||||||
|
descriptor: Appleintelligence_AppleIntelligenceService.Method.Transcribe.descriptor,
|
||||||
|
serializer: serializer,
|
||||||
|
deserializer: deserializer,
|
||||||
|
options: options,
|
||||||
|
onResponse: handleResponse
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Call the "StreamTranscribe" method.
|
||||||
|
///
|
||||||
|
/// - Parameters:
|
||||||
|
/// - request: A streaming request producing `Appleintelligence_StreamingTranscribeRequest` messages.
|
||||||
|
/// - serializer: A serializer for `Appleintelligence_StreamingTranscribeRequest` messages.
|
||||||
|
/// - deserializer: A deserializer for `Appleintelligence_StreamingTranscribeResponse` messages.
|
||||||
|
/// - options: Options to apply to this RPC.
|
||||||
|
/// - handleResponse: A closure which handles the response, the result of which is
|
||||||
|
/// returned to the caller. Returning from the closure will cancel the RPC if it
|
||||||
|
/// hasn't already finished.
|
||||||
|
/// - Returns: The result of `handleResponse`.
|
||||||
|
public func streamTranscribe<Result>(
|
||||||
|
request: GRPCCore.StreamingClientRequest<Appleintelligence_StreamingTranscribeRequest>,
|
||||||
|
serializer: some GRPCCore.MessageSerializer<Appleintelligence_StreamingTranscribeRequest>,
|
||||||
|
deserializer: some GRPCCore.MessageDeserializer<Appleintelligence_StreamingTranscribeResponse>,
|
||||||
|
options: GRPCCore.CallOptions = .defaults,
|
||||||
|
onResponse handleResponse: @Sendable @escaping (GRPCCore.StreamingClientResponse<Appleintelligence_StreamingTranscribeResponse>) async throws -> Result
|
||||||
|
) async throws -> Result where Result: Sendable {
|
||||||
|
try await self.client.bidirectionalStreaming(
|
||||||
|
request: request,
|
||||||
|
descriptor: Appleintelligence_AppleIntelligenceService.Method.StreamTranscribe.descriptor,
|
||||||
|
serializer: serializer,
|
||||||
|
deserializer: deserializer,
|
||||||
|
options: options,
|
||||||
|
onResponse: handleResponse
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -695,6 +1283,112 @@ extension Appleintelligence_AppleIntelligenceService.ClientProtocol {
|
|||||||
onResponse: handleResponse
|
onResponse: handleResponse
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Call the "TextToSpeech" method.
|
||||||
|
///
|
||||||
|
/// > Source IDL Documentation:
|
||||||
|
/// >
|
||||||
|
/// > Text-to-Speech
|
||||||
|
///
|
||||||
|
/// - Parameters:
|
||||||
|
/// - request: A request containing a single `Appleintelligence_TextToSpeechRequest` message.
|
||||||
|
/// - options: Options to apply to this RPC.
|
||||||
|
/// - handleResponse: A closure which handles the response, the result of which is
|
||||||
|
/// returned to the caller. Returning from the closure will cancel the RPC if it
|
||||||
|
/// hasn't already finished.
|
||||||
|
/// - Returns: The result of `handleResponse`.
|
||||||
|
public func textToSpeech<Result>(
|
||||||
|
request: GRPCCore.ClientRequest<Appleintelligence_TextToSpeechRequest>,
|
||||||
|
options: GRPCCore.CallOptions = .defaults,
|
||||||
|
onResponse handleResponse: @Sendable @escaping (GRPCCore.ClientResponse<Appleintelligence_TextToSpeechResponse>) async throws -> Result = { response in
|
||||||
|
try response.message
|
||||||
|
}
|
||||||
|
) async throws -> Result where Result: Sendable {
|
||||||
|
try await self.textToSpeech(
|
||||||
|
request: request,
|
||||||
|
serializer: GRPCProtobuf.ProtobufSerializer<Appleintelligence_TextToSpeechRequest>(),
|
||||||
|
deserializer: GRPCProtobuf.ProtobufDeserializer<Appleintelligence_TextToSpeechResponse>(),
|
||||||
|
options: options,
|
||||||
|
onResponse: handleResponse
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Call the "ListVoices" method.
|
||||||
|
///
|
||||||
|
/// - Parameters:
|
||||||
|
/// - request: A request containing a single `Appleintelligence_ListVoicesRequest` message.
|
||||||
|
/// - options: Options to apply to this RPC.
|
||||||
|
/// - handleResponse: A closure which handles the response, the result of which is
|
||||||
|
/// returned to the caller. Returning from the closure will cancel the RPC if it
|
||||||
|
/// hasn't already finished.
|
||||||
|
/// - Returns: The result of `handleResponse`.
|
||||||
|
public func listVoices<Result>(
|
||||||
|
request: GRPCCore.ClientRequest<Appleintelligence_ListVoicesRequest>,
|
||||||
|
options: GRPCCore.CallOptions = .defaults,
|
||||||
|
onResponse handleResponse: @Sendable @escaping (GRPCCore.ClientResponse<Appleintelligence_ListVoicesResponse>) async throws -> Result = { response in
|
||||||
|
try response.message
|
||||||
|
}
|
||||||
|
) async throws -> Result where Result: Sendable {
|
||||||
|
try await self.listVoices(
|
||||||
|
request: request,
|
||||||
|
serializer: GRPCProtobuf.ProtobufSerializer<Appleintelligence_ListVoicesRequest>(),
|
||||||
|
deserializer: GRPCProtobuf.ProtobufDeserializer<Appleintelligence_ListVoicesResponse>(),
|
||||||
|
options: options,
|
||||||
|
onResponse: handleResponse
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Call the "Transcribe" method.
|
||||||
|
///
|
||||||
|
/// > Source IDL Documentation:
|
||||||
|
/// >
|
||||||
|
/// > Speech-to-Text
|
||||||
|
///
|
||||||
|
/// - Parameters:
|
||||||
|
/// - request: A request containing a single `Appleintelligence_TranscribeRequest` message.
|
||||||
|
/// - options: Options to apply to this RPC.
|
||||||
|
/// - handleResponse: A closure which handles the response, the result of which is
|
||||||
|
/// returned to the caller. Returning from the closure will cancel the RPC if it
|
||||||
|
/// hasn't already finished.
|
||||||
|
/// - Returns: The result of `handleResponse`.
|
||||||
|
public func transcribe<Result>(
|
||||||
|
request: GRPCCore.ClientRequest<Appleintelligence_TranscribeRequest>,
|
||||||
|
options: GRPCCore.CallOptions = .defaults,
|
||||||
|
onResponse handleResponse: @Sendable @escaping (GRPCCore.ClientResponse<Appleintelligence_TranscribeResponse>) async throws -> Result = { response in
|
||||||
|
try response.message
|
||||||
|
}
|
||||||
|
) async throws -> Result where Result: Sendable {
|
||||||
|
try await self.transcribe(
|
||||||
|
request: request,
|
||||||
|
serializer: GRPCProtobuf.ProtobufSerializer<Appleintelligence_TranscribeRequest>(),
|
||||||
|
deserializer: GRPCProtobuf.ProtobufDeserializer<Appleintelligence_TranscribeResponse>(),
|
||||||
|
options: options,
|
||||||
|
onResponse: handleResponse
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Call the "StreamTranscribe" method.
|
||||||
|
///
|
||||||
|
/// - Parameters:
|
||||||
|
/// - request: A streaming request producing `Appleintelligence_StreamingTranscribeRequest` messages.
|
||||||
|
/// - options: Options to apply to this RPC.
|
||||||
|
/// - handleResponse: A closure which handles the response, the result of which is
|
||||||
|
/// returned to the caller. Returning from the closure will cancel the RPC if it
|
||||||
|
/// hasn't already finished.
|
||||||
|
/// - Returns: The result of `handleResponse`.
|
||||||
|
public func streamTranscribe<Result>(
|
||||||
|
request: GRPCCore.StreamingClientRequest<Appleintelligence_StreamingTranscribeRequest>,
|
||||||
|
options: GRPCCore.CallOptions = .defaults,
|
||||||
|
onResponse handleResponse: @Sendable @escaping (GRPCCore.StreamingClientResponse<Appleintelligence_StreamingTranscribeResponse>) async throws -> Result
|
||||||
|
) async throws -> Result where Result: Sendable {
|
||||||
|
try await self.streamTranscribe(
|
||||||
|
request: request,
|
||||||
|
serializer: GRPCProtobuf.ProtobufSerializer<Appleintelligence_StreamingTranscribeRequest>(),
|
||||||
|
deserializer: GRPCProtobuf.ProtobufDeserializer<Appleintelligence_StreamingTranscribeResponse>(),
|
||||||
|
options: options,
|
||||||
|
onResponse: handleResponse
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helpers providing sugared APIs for 'ClientProtocol' methods.
|
// Helpers providing sugared APIs for 'ClientProtocol' methods.
|
||||||
@ -796,4 +1490,127 @@ extension Appleintelligence_AppleIntelligenceService.ClientProtocol {
|
|||||||
onResponse: handleResponse
|
onResponse: handleResponse
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Call the "TextToSpeech" method.
|
||||||
|
///
|
||||||
|
/// > Source IDL Documentation:
|
||||||
|
/// >
|
||||||
|
/// > Text-to-Speech
|
||||||
|
///
|
||||||
|
/// - Parameters:
|
||||||
|
/// - message: request message to send.
|
||||||
|
/// - metadata: Additional metadata to send, defaults to empty.
|
||||||
|
/// - options: Options to apply to this RPC, defaults to `.defaults`.
|
||||||
|
/// - handleResponse: A closure which handles the response, the result of which is
|
||||||
|
/// returned to the caller. Returning from the closure will cancel the RPC if it
|
||||||
|
/// hasn't already finished.
|
||||||
|
/// - Returns: The result of `handleResponse`.
|
||||||
|
public func textToSpeech<Result>(
|
||||||
|
_ message: Appleintelligence_TextToSpeechRequest,
|
||||||
|
metadata: GRPCCore.Metadata = [:],
|
||||||
|
options: GRPCCore.CallOptions = .defaults,
|
||||||
|
onResponse handleResponse: @Sendable @escaping (GRPCCore.ClientResponse<Appleintelligence_TextToSpeechResponse>) async throws -> Result = { response in
|
||||||
|
try response.message
|
||||||
|
}
|
||||||
|
) async throws -> Result where Result: Sendable {
|
||||||
|
let request = GRPCCore.ClientRequest<Appleintelligence_TextToSpeechRequest>(
|
||||||
|
message: message,
|
||||||
|
metadata: metadata
|
||||||
|
)
|
||||||
|
return try await self.textToSpeech(
|
||||||
|
request: request,
|
||||||
|
options: options,
|
||||||
|
onResponse: handleResponse
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Call the "ListVoices" method.
|
||||||
|
///
|
||||||
|
/// - Parameters:
|
||||||
|
/// - message: request message to send.
|
||||||
|
/// - metadata: Additional metadata to send, defaults to empty.
|
||||||
|
/// - options: Options to apply to this RPC, defaults to `.defaults`.
|
||||||
|
/// - handleResponse: A closure which handles the response, the result of which is
|
||||||
|
/// returned to the caller. Returning from the closure will cancel the RPC if it
|
||||||
|
/// hasn't already finished.
|
||||||
|
/// - Returns: The result of `handleResponse`.
|
||||||
|
public func listVoices<Result>(
|
||||||
|
_ message: Appleintelligence_ListVoicesRequest,
|
||||||
|
metadata: GRPCCore.Metadata = [:],
|
||||||
|
options: GRPCCore.CallOptions = .defaults,
|
||||||
|
onResponse handleResponse: @Sendable @escaping (GRPCCore.ClientResponse<Appleintelligence_ListVoicesResponse>) async throws -> Result = { response in
|
||||||
|
try response.message
|
||||||
|
}
|
||||||
|
) async throws -> Result where Result: Sendable {
|
||||||
|
let request = GRPCCore.ClientRequest<Appleintelligence_ListVoicesRequest>(
|
||||||
|
message: message,
|
||||||
|
metadata: metadata
|
||||||
|
)
|
||||||
|
return try await self.listVoices(
|
||||||
|
request: request,
|
||||||
|
options: options,
|
||||||
|
onResponse: handleResponse
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Call the "Transcribe" method.
|
||||||
|
///
|
||||||
|
/// > Source IDL Documentation:
|
||||||
|
/// >
|
||||||
|
/// > Speech-to-Text
|
||||||
|
///
|
||||||
|
/// - Parameters:
|
||||||
|
/// - message: request message to send.
|
||||||
|
/// - metadata: Additional metadata to send, defaults to empty.
|
||||||
|
/// - options: Options to apply to this RPC, defaults to `.defaults`.
|
||||||
|
/// - handleResponse: A closure which handles the response, the result of which is
|
||||||
|
/// returned to the caller. Returning from the closure will cancel the RPC if it
|
||||||
|
/// hasn't already finished.
|
||||||
|
/// - Returns: The result of `handleResponse`.
|
||||||
|
public func transcribe<Result>(
|
||||||
|
_ message: Appleintelligence_TranscribeRequest,
|
||||||
|
metadata: GRPCCore.Metadata = [:],
|
||||||
|
options: GRPCCore.CallOptions = .defaults,
|
||||||
|
onResponse handleResponse: @Sendable @escaping (GRPCCore.ClientResponse<Appleintelligence_TranscribeResponse>) async throws -> Result = { response in
|
||||||
|
try response.message
|
||||||
|
}
|
||||||
|
) async throws -> Result where Result: Sendable {
|
||||||
|
let request = GRPCCore.ClientRequest<Appleintelligence_TranscribeRequest>(
|
||||||
|
message: message,
|
||||||
|
metadata: metadata
|
||||||
|
)
|
||||||
|
return try await self.transcribe(
|
||||||
|
request: request,
|
||||||
|
options: options,
|
||||||
|
onResponse: handleResponse
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Call the "StreamTranscribe" method.
|
||||||
|
///
|
||||||
|
/// - Parameters:
|
||||||
|
/// - metadata: Additional metadata to send, defaults to empty.
|
||||||
|
/// - options: Options to apply to this RPC, defaults to `.defaults`.
|
||||||
|
/// - producer: A closure producing request messages to send to the server. The request
|
||||||
|
/// stream is closed when the closure returns.
|
||||||
|
/// - handleResponse: A closure which handles the response, the result of which is
|
||||||
|
/// returned to the caller. Returning from the closure will cancel the RPC if it
|
||||||
|
/// hasn't already finished.
|
||||||
|
/// - Returns: The result of `handleResponse`.
|
||||||
|
public func streamTranscribe<Result>(
|
||||||
|
metadata: GRPCCore.Metadata = [:],
|
||||||
|
options: GRPCCore.CallOptions = .defaults,
|
||||||
|
requestProducer producer: @Sendable @escaping (GRPCCore.RPCWriter<Appleintelligence_StreamingTranscribeRequest>) async throws -> Void,
|
||||||
|
onResponse handleResponse: @Sendable @escaping (GRPCCore.StreamingClientResponse<Appleintelligence_StreamingTranscribeResponse>) async throws -> Result
|
||||||
|
) async throws -> Result where Result: Sendable {
|
||||||
|
let request = GRPCCore.StreamingClientRequest<Appleintelligence_StreamingTranscribeRequest>(
|
||||||
|
metadata: metadata,
|
||||||
|
producer: producer
|
||||||
|
)
|
||||||
|
return try await self.streamTranscribe(
|
||||||
|
request: request,
|
||||||
|
options: options,
|
||||||
|
onResponse: handleResponse
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
File diff suppressed because it is too large
Load Diff
@ -8,11 +8,24 @@ public struct AppleIntelligenceProvider: Appleintelligence_AppleIntelligenceServ
|
|||||||
/// The underlying AI service
|
/// The underlying AI service
|
||||||
private let service: AppleIntelligenceService
|
private let service: AppleIntelligenceService
|
||||||
|
|
||||||
|
/// Text-to-Speech service
|
||||||
|
private let ttsService: TextToSpeechService?
|
||||||
|
|
||||||
|
/// Speech-to-Text service
|
||||||
|
private let sttService: SpeechToTextService?
|
||||||
|
|
||||||
/// Optional API key for authentication
|
/// Optional API key for authentication
|
||||||
private let apiKey: String?
|
private let apiKey: String?
|
||||||
|
|
||||||
public init(service: AppleIntelligenceService, apiKey: String? = nil) {
|
public init(
|
||||||
|
service: AppleIntelligenceService,
|
||||||
|
ttsService: TextToSpeechService? = nil,
|
||||||
|
sttService: SpeechToTextService? = nil,
|
||||||
|
apiKey: String? = nil
|
||||||
|
) {
|
||||||
self.service = service
|
self.service = service
|
||||||
|
self.ttsService = ttsService
|
||||||
|
self.sttService = sttService
|
||||||
self.apiKey = apiKey
|
self.apiKey = apiKey
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -139,6 +152,213 @@ public struct AppleIntelligenceProvider: Appleintelligence_AppleIntelligenceServ
|
|||||||
return ServerResponse(message: response)
|
return ServerResponse(message: response)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MARK: - Text-to-Speech
|
||||||
|
|
||||||
|
public func textToSpeech(
|
||||||
|
request: GRPCCore.ServerRequest<Appleintelligence_TextToSpeechRequest>,
|
||||||
|
context: GRPCCore.ServerContext
|
||||||
|
) async throws -> GRPCCore.ServerResponse<Appleintelligence_TextToSpeechResponse> {
|
||||||
|
try validateApiKey(metadata: request.metadata)
|
||||||
|
|
||||||
|
guard let ttsService = ttsService else {
|
||||||
|
throw RPCError(code: .unavailable, message: "Text-to-Speech service not available")
|
||||||
|
}
|
||||||
|
|
||||||
|
let message = request.message
|
||||||
|
|
||||||
|
// Convert proto config to service config
|
||||||
|
var config = SpeechConfig.default
|
||||||
|
if message.hasVoiceConfig {
|
||||||
|
let voiceConfig = message.voiceConfig
|
||||||
|
config = SpeechConfig(
|
||||||
|
voiceIdentifier: voiceConfig.voiceIdentifier.isEmpty ? nil : voiceConfig.voiceIdentifier,
|
||||||
|
speakingRate: voiceConfig.hasSpeakingRate ? voiceConfig.speakingRate : 0.5,
|
||||||
|
pitchMultiplier: voiceConfig.hasPitchMultiplier ? voiceConfig.pitchMultiplier : 1.0,
|
||||||
|
volume: voiceConfig.hasVolume ? voiceConfig.volume : 1.0
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert proto format to service format
|
||||||
|
let outputFormat: AudioOutputFormat
|
||||||
|
switch message.outputFormat {
|
||||||
|
case .wav, .unspecified:
|
||||||
|
outputFormat = .wav
|
||||||
|
case .mp3:
|
||||||
|
outputFormat = .mp3
|
||||||
|
case .UNRECOGNIZED:
|
||||||
|
outputFormat = .wav
|
||||||
|
}
|
||||||
|
|
||||||
|
do {
|
||||||
|
let result = try await ttsService.synthesize(
|
||||||
|
text: message.text,
|
||||||
|
config: config,
|
||||||
|
outputFormat: outputFormat
|
||||||
|
)
|
||||||
|
|
||||||
|
var response = Appleintelligence_TextToSpeechResponse()
|
||||||
|
response.audioData = result.audioData
|
||||||
|
response.format = outputFormat == .wav ? .wav : .mp3
|
||||||
|
response.sampleRate = Int32(result.sampleRate)
|
||||||
|
response.channels = Int32(result.channels)
|
||||||
|
response.durationSeconds = result.durationSeconds
|
||||||
|
|
||||||
|
return ServerResponse(message: response)
|
||||||
|
} catch let error as TextToSpeechError {
|
||||||
|
throw RPCError(code: .internalError, message: error.description)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public func listVoices(
|
||||||
|
request: GRPCCore.ServerRequest<Appleintelligence_ListVoicesRequest>,
|
||||||
|
context: GRPCCore.ServerContext
|
||||||
|
) async throws -> GRPCCore.ServerResponse<Appleintelligence_ListVoicesResponse> {
|
||||||
|
try validateApiKey(metadata: request.metadata)
|
||||||
|
|
||||||
|
guard let ttsService = ttsService else {
|
||||||
|
throw RPCError(code: .unavailable, message: "Text-to-Speech service not available")
|
||||||
|
}
|
||||||
|
|
||||||
|
let message = request.message
|
||||||
|
let languageCode = message.hasLanguageCode ? message.languageCode : nil
|
||||||
|
|
||||||
|
let voices = await ttsService.listVoices(languageCode: languageCode)
|
||||||
|
|
||||||
|
var response = Appleintelligence_ListVoicesResponse()
|
||||||
|
response.voices = voices.map { voice in
|
||||||
|
var protoVoice = Appleintelligence_VoiceInfo()
|
||||||
|
protoVoice.identifier = voice.identifier
|
||||||
|
protoVoice.name = voice.name
|
||||||
|
protoVoice.language = voice.language
|
||||||
|
protoVoice.isPremium = voice.isPremium
|
||||||
|
protoVoice.gender = voice.gender
|
||||||
|
return protoVoice
|
||||||
|
}
|
||||||
|
|
||||||
|
return ServerResponse(message: response)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Speech-to-Text
|
||||||
|
|
||||||
|
public func transcribe(
|
||||||
|
request: GRPCCore.ServerRequest<Appleintelligence_TranscribeRequest>,
|
||||||
|
context: GRPCCore.ServerContext
|
||||||
|
) async throws -> GRPCCore.ServerResponse<Appleintelligence_TranscribeResponse> {
|
||||||
|
try validateApiKey(metadata: request.metadata)
|
||||||
|
|
||||||
|
guard let sttService = sttService else {
|
||||||
|
throw RPCError(code: .unavailable, message: "Speech-to-Text service not available")
|
||||||
|
}
|
||||||
|
|
||||||
|
let message = request.message
|
||||||
|
|
||||||
|
guard message.hasAudio else {
|
||||||
|
throw RPCError(code: .invalidArgument, message: "Audio data is required")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert proto config to service config
|
||||||
|
var config = TranscriptionConfig.default
|
||||||
|
if message.hasConfig {
|
||||||
|
let protoConfig = message.config
|
||||||
|
config = TranscriptionConfig(
|
||||||
|
languageCode: protoConfig.hasLanguageCode ? protoConfig.languageCode : nil,
|
||||||
|
enablePunctuation: protoConfig.hasEnablePunctuation ? protoConfig.enablePunctuation : true,
|
||||||
|
enableTimestamps: protoConfig.hasEnableTimestamps ? protoConfig.enableTimestamps : false
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
do {
|
||||||
|
let result = try await sttService.transcribe(
|
||||||
|
audioData: message.audio.data,
|
||||||
|
mimeType: message.audio.mimeType,
|
||||||
|
config: config
|
||||||
|
)
|
||||||
|
|
||||||
|
var response = Appleintelligence_TranscribeResponse()
|
||||||
|
response.text = result.text
|
||||||
|
response.detectedLanguage = result.detectedLanguage
|
||||||
|
response.confidence = result.confidence
|
||||||
|
response.segments = result.segments.map { segment in
|
||||||
|
var protoSegment = Appleintelligence_TranscriptionSegment()
|
||||||
|
protoSegment.text = segment.text
|
||||||
|
protoSegment.startTime = segment.startTime
|
||||||
|
protoSegment.endTime = segment.endTime
|
||||||
|
protoSegment.confidence = segment.confidence
|
||||||
|
return protoSegment
|
||||||
|
}
|
||||||
|
|
||||||
|
return ServerResponse(message: response)
|
||||||
|
} catch let error as SpeechToTextError {
|
||||||
|
throw RPCError(code: .internalError, message: error.description)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public func streamTranscribe(
|
||||||
|
request: GRPCCore.StreamingServerRequest<Appleintelligence_StreamingTranscribeRequest>,
|
||||||
|
context: GRPCCore.ServerContext
|
||||||
|
) async throws -> GRPCCore.StreamingServerResponse<Appleintelligence_StreamingTranscribeResponse> {
|
||||||
|
try validateApiKey(metadata: request.metadata)
|
||||||
|
|
||||||
|
guard let sttService = sttService else {
|
||||||
|
throw RPCError(code: .unavailable, message: "Speech-to-Text service not available")
|
||||||
|
}
|
||||||
|
|
||||||
|
return StreamingServerResponse { writer in
|
||||||
|
var config = TranscriptionConfig.default
|
||||||
|
|
||||||
|
// Process incoming stream
|
||||||
|
for try await message in request.messages {
|
||||||
|
switch message.request {
|
||||||
|
case .config(let protoConfig):
|
||||||
|
// First message should be config
|
||||||
|
config = TranscriptionConfig(
|
||||||
|
languageCode: protoConfig.hasLanguageCode ? protoConfig.languageCode : nil,
|
||||||
|
enablePunctuation: protoConfig.hasEnablePunctuation ? protoConfig.enablePunctuation : true,
|
||||||
|
enableTimestamps: protoConfig.hasEnableTimestamps ? protoConfig.enableTimestamps : false
|
||||||
|
)
|
||||||
|
|
||||||
|
// Start streaming transcription
|
||||||
|
let stream = await sttService.streamTranscribe(config: config)
|
||||||
|
Task {
|
||||||
|
do {
|
||||||
|
for try await update in stream {
|
||||||
|
var response = Appleintelligence_StreamingTranscribeResponse()
|
||||||
|
response.partialText = update.partialText
|
||||||
|
response.isFinal = update.isFinal
|
||||||
|
if let finalText = update.finalText {
|
||||||
|
response.finalText = finalText
|
||||||
|
}
|
||||||
|
response.segments = update.segments.map { segment in
|
||||||
|
var protoSegment = Appleintelligence_TranscriptionSegment()
|
||||||
|
protoSegment.text = segment.text
|
||||||
|
protoSegment.startTime = segment.startTime
|
||||||
|
protoSegment.endTime = segment.endTime
|
||||||
|
protoSegment.confidence = segment.confidence
|
||||||
|
return protoSegment
|
||||||
|
}
|
||||||
|
try await writer.write(response)
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Stream ended or error occurred
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
case .audioChunk(let chunk):
|
||||||
|
// Feed audio chunk to service
|
||||||
|
try await sttService.feedAudioChunk(chunk)
|
||||||
|
|
||||||
|
case .none:
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// End streaming session
|
||||||
|
await sttService.endStreamingSession()
|
||||||
|
|
||||||
|
return [:]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// MARK: - Private Helpers
|
// MARK: - Private Helpers
|
||||||
|
|
||||||
/// Validate API key if configured
|
/// Validate API key if configured
|
||||||
|
|||||||
Binary file not shown.
337
Sources/AppleIntelligenceCore/Services/SpeechToTextService.swift
Normal file
337
Sources/AppleIntelligenceCore/Services/SpeechToTextService.swift
Normal file
@ -0,0 +1,337 @@
|
|||||||
|
import Foundation
|
||||||
|
import Speech
|
||||||
|
import AVFoundation
|
||||||
|
|
||||||
|
// MARK: - Result Types
|
||||||
|
|
||||||
|
/// Transcription result
|
||||||
|
public struct TranscriptionResult: Sendable {
|
||||||
|
public let text: String
|
||||||
|
public let segments: [TranscriptionSegmentResult]
|
||||||
|
public let detectedLanguage: String
|
||||||
|
public let confidence: Float
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Individual transcription segment
|
||||||
|
public struct TranscriptionSegmentResult: Sendable {
|
||||||
|
public let text: String
|
||||||
|
public let startTime: Float
|
||||||
|
public let endTime: Float
|
||||||
|
public let confidence: Float
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Streaming transcription update
|
||||||
|
public struct StreamingTranscriptionUpdate: Sendable {
|
||||||
|
public let partialText: String
|
||||||
|
public let isFinal: Bool
|
||||||
|
public let finalText: String?
|
||||||
|
public let segments: [TranscriptionSegmentResult]
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Transcription configuration
|
||||||
|
public struct TranscriptionConfig: Sendable {
|
||||||
|
public var languageCode: String?
|
||||||
|
public var enablePunctuation: Bool
|
||||||
|
public var enableTimestamps: Bool
|
||||||
|
|
||||||
|
public static let `default` = TranscriptionConfig(
|
||||||
|
languageCode: nil,
|
||||||
|
enablePunctuation: true,
|
||||||
|
enableTimestamps: false
|
||||||
|
)
|
||||||
|
|
||||||
|
public init(
|
||||||
|
languageCode: String? = nil,
|
||||||
|
enablePunctuation: Bool = true,
|
||||||
|
enableTimestamps: Bool = false
|
||||||
|
) {
|
||||||
|
self.languageCode = languageCode
|
||||||
|
self.enablePunctuation = enablePunctuation
|
||||||
|
self.enableTimestamps = enableTimestamps
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Errors
|
||||||
|
|
||||||
|
public enum SpeechToTextError: Error, CustomStringConvertible, Sendable {
|
||||||
|
case notAvailable
|
||||||
|
case authorizationDenied
|
||||||
|
case modelNotReady(String)
|
||||||
|
case transcriptionFailed(String)
|
||||||
|
case invalidAudioFormat
|
||||||
|
case audioProcessingFailed(String)
|
||||||
|
case unsupportedMimeType(String)
|
||||||
|
|
||||||
|
public var description: String {
|
||||||
|
switch self {
|
||||||
|
case .notAvailable: return "Speech recognition not available on this system"
|
||||||
|
case .authorizationDenied: return "Speech recognition authorization denied"
|
||||||
|
case .modelNotReady(let reason): return "Speech model not ready: \(reason)"
|
||||||
|
case .transcriptionFailed(let reason): return "Transcription failed: \(reason)"
|
||||||
|
case .invalidAudioFormat: return "Invalid audio format"
|
||||||
|
case .audioProcessingFailed(let reason): return "Audio processing failed: \(reason)"
|
||||||
|
case .unsupportedMimeType(let type): return "Unsupported audio MIME type: \(type)"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Service Actor
|
||||||
|
|
||||||
|
public actor SpeechToTextService {
|
||||||
|
|
||||||
|
/// Service availability status
|
||||||
|
public private(set) var isAvailable: Bool = false
|
||||||
|
|
||||||
|
/// Streaming session state
|
||||||
|
private var isStreamingActive: Bool = false
|
||||||
|
|
||||||
|
public init() async {
|
||||||
|
await checkAvailability()
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Public API
|
||||||
|
|
||||||
|
/// Transcribe audio data (file-based)
|
||||||
|
public func transcribe(
|
||||||
|
audioData: Data,
|
||||||
|
mimeType: String,
|
||||||
|
config: TranscriptionConfig = .default
|
||||||
|
) async throws -> TranscriptionResult {
|
||||||
|
guard isAvailable else {
|
||||||
|
throw SpeechToTextError.notAvailable
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert audio data to file URL for processing
|
||||||
|
let tempURL = try createTempAudioFile(data: audioData, mimeType: mimeType)
|
||||||
|
defer { try? FileManager.default.removeItem(at: tempURL) }
|
||||||
|
|
||||||
|
return try await transcribeWithSFSpeechRecognizer(url: tempURL, config: config)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Stream transcription from audio chunks
|
||||||
|
public func streamTranscribe(
|
||||||
|
config: TranscriptionConfig = .default
|
||||||
|
) -> AsyncThrowingStream<StreamingTranscriptionUpdate, Error> {
|
||||||
|
AsyncThrowingStream { continuation in
|
||||||
|
Task {
|
||||||
|
guard self.isAvailable else {
|
||||||
|
continuation.finish(throwing: SpeechToTextError.notAvailable)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
do {
|
||||||
|
try await self.startStreamingWithSFSpeechRecognizer(config: config, continuation: continuation)
|
||||||
|
} catch {
|
||||||
|
continuation.finish(throwing: error)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Feed audio chunk for streaming transcription
|
||||||
|
public func feedAudioChunk(_ chunk: Data) async throws {
|
||||||
|
guard isStreamingActive else {
|
||||||
|
throw SpeechToTextError.transcriptionFailed("No active streaming session")
|
||||||
|
}
|
||||||
|
// Audio chunk handling implemented in streaming methods
|
||||||
|
}
|
||||||
|
|
||||||
|
/// End streaming session
|
||||||
|
public func endStreamingSession() async {
|
||||||
|
isStreamingActive = false
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get status information
|
||||||
|
public func getStatus() -> String {
|
||||||
|
if isAvailable {
|
||||||
|
return "SFSpeechRecognizer available"
|
||||||
|
} else {
|
||||||
|
return "Speech recognition not available"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Private Implementation
|
||||||
|
|
||||||
|
private func checkAvailability() async {
|
||||||
|
// Check SFSpeechRecognizer availability
|
||||||
|
let status = SFSpeechRecognizer.authorizationStatus()
|
||||||
|
switch status {
|
||||||
|
case .authorized:
|
||||||
|
isAvailable = SFSpeechRecognizer.supportedLocales().count > 0
|
||||||
|
case .notDetermined:
|
||||||
|
// Request authorization
|
||||||
|
isAvailable = await withCheckedContinuation { continuation in
|
||||||
|
SFSpeechRecognizer.requestAuthorization { newStatus in
|
||||||
|
continuation.resume(returning: newStatus == .authorized)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
isAvailable = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create temporary audio file from data
|
||||||
|
private func createTempAudioFile(data: Data, mimeType: String) throws -> URL {
|
||||||
|
let ext = extensionForMimeType(mimeType)
|
||||||
|
let tempDir = FileManager.default.temporaryDirectory
|
||||||
|
let fileName = UUID().uuidString + "." + ext
|
||||||
|
let fileURL = tempDir.appendingPathComponent(fileName)
|
||||||
|
|
||||||
|
try data.write(to: fileURL)
|
||||||
|
return fileURL
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get file extension for MIME type
|
||||||
|
private func extensionForMimeType(_ mimeType: String) -> String {
|
||||||
|
switch mimeType.lowercased() {
|
||||||
|
case "audio/wav", "audio/wave", "audio/x-wav":
|
||||||
|
return "wav"
|
||||||
|
case "audio/mp3", "audio/mpeg":
|
||||||
|
return "mp3"
|
||||||
|
case "audio/m4a", "audio/mp4", "audio/x-m4a":
|
||||||
|
return "m4a"
|
||||||
|
case "audio/aac":
|
||||||
|
return "aac"
|
||||||
|
case "audio/flac":
|
||||||
|
return "flac"
|
||||||
|
default:
|
||||||
|
return "wav"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Transcribe using SFSpeechRecognizer
|
||||||
|
private func transcribeWithSFSpeechRecognizer(
|
||||||
|
url: URL,
|
||||||
|
config: TranscriptionConfig
|
||||||
|
) async throws -> TranscriptionResult {
|
||||||
|
let locale = Locale(identifier: config.languageCode ?? "en-US")
|
||||||
|
guard let recognizer = SFSpeechRecognizer(locale: locale) else {
|
||||||
|
throw SpeechToTextError.notAvailable
|
||||||
|
}
|
||||||
|
|
||||||
|
guard recognizer.isAvailable else {
|
||||||
|
throw SpeechToTextError.notAvailable
|
||||||
|
}
|
||||||
|
|
||||||
|
let request = SFSpeechURLRecognitionRequest(url: url)
|
||||||
|
request.shouldReportPartialResults = false
|
||||||
|
|
||||||
|
return try await withCheckedThrowingContinuation { continuation in
|
||||||
|
var hasResumed = false
|
||||||
|
|
||||||
|
recognizer.recognitionTask(with: request) { result, error in
|
||||||
|
guard !hasResumed else { return }
|
||||||
|
|
||||||
|
if let error = error {
|
||||||
|
hasResumed = true
|
||||||
|
continuation.resume(throwing: SpeechToTextError.transcriptionFailed(error.localizedDescription))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
guard let result = result, result.isFinal else { return }
|
||||||
|
|
||||||
|
hasResumed = true
|
||||||
|
|
||||||
|
let transcription = result.bestTranscription
|
||||||
|
var segments: [TranscriptionSegmentResult] = []
|
||||||
|
|
||||||
|
if config.enableTimestamps {
|
||||||
|
for segment in transcription.segments {
|
||||||
|
segments.append(TranscriptionSegmentResult(
|
||||||
|
text: segment.substring,
|
||||||
|
startTime: Float(segment.timestamp),
|
||||||
|
endTime: Float(segment.timestamp + segment.duration),
|
||||||
|
confidence: segment.confidence
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let transcriptionResult = TranscriptionResult(
|
||||||
|
text: transcription.formattedString,
|
||||||
|
segments: segments,
|
||||||
|
detectedLanguage: config.languageCode ?? "en-US",
|
||||||
|
confidence: segments.isEmpty ? 1.0 : segments.reduce(0) { $0 + $1.confidence } / Float(segments.count)
|
||||||
|
)
|
||||||
|
|
||||||
|
continuation.resume(returning: transcriptionResult)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Start streaming with SFSpeechRecognizer
|
||||||
|
private func startStreamingWithSFSpeechRecognizer(
|
||||||
|
config: TranscriptionConfig,
|
||||||
|
continuation: AsyncThrowingStream<StreamingTranscriptionUpdate, Error>.Continuation
|
||||||
|
) async throws {
|
||||||
|
let locale = Locale(identifier: config.languageCode ?? "en-US")
|
||||||
|
guard let recognizer = SFSpeechRecognizer(locale: locale) else {
|
||||||
|
throw SpeechToTextError.notAvailable
|
||||||
|
}
|
||||||
|
|
||||||
|
guard recognizer.isAvailable else {
|
||||||
|
throw SpeechToTextError.notAvailable
|
||||||
|
}
|
||||||
|
|
||||||
|
isStreamingActive = true
|
||||||
|
|
||||||
|
let audioEngine = AVAudioEngine()
|
||||||
|
let request = SFSpeechAudioBufferRecognitionRequest()
|
||||||
|
request.shouldReportPartialResults = true
|
||||||
|
|
||||||
|
let inputNode = audioEngine.inputNode
|
||||||
|
let recordingFormat = inputNode.outputFormat(forBus: 0)
|
||||||
|
|
||||||
|
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { buffer, _ in
|
||||||
|
request.append(buffer)
|
||||||
|
}
|
||||||
|
|
||||||
|
audioEngine.prepare()
|
||||||
|
try audioEngine.start()
|
||||||
|
|
||||||
|
recognizer.recognitionTask(with: request) { result, error in
|
||||||
|
if let error = error {
|
||||||
|
continuation.finish(throwing: SpeechToTextError.transcriptionFailed(error.localizedDescription))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
guard let result = result else { return }
|
||||||
|
|
||||||
|
let transcription = result.bestTranscription
|
||||||
|
var segments: [TranscriptionSegmentResult] = []
|
||||||
|
|
||||||
|
if config.enableTimestamps {
|
||||||
|
for segment in transcription.segments {
|
||||||
|
segments.append(TranscriptionSegmentResult(
|
||||||
|
text: segment.substring,
|
||||||
|
startTime: Float(segment.timestamp),
|
||||||
|
endTime: Float(segment.timestamp + segment.duration),
|
||||||
|
confidence: segment.confidence
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let update = StreamingTranscriptionUpdate(
|
||||||
|
partialText: transcription.formattedString,
|
||||||
|
isFinal: result.isFinal,
|
||||||
|
finalText: result.isFinal ? transcription.formattedString : nil,
|
||||||
|
segments: segments
|
||||||
|
)
|
||||||
|
continuation.yield(update)
|
||||||
|
|
||||||
|
if result.isFinal {
|
||||||
|
audioEngine.stop()
|
||||||
|
inputNode.removeTap(onBus: 0)
|
||||||
|
continuation.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for streaming to end
|
||||||
|
while isStreamingActive {
|
||||||
|
try await Task.sleep(for: .milliseconds(100))
|
||||||
|
}
|
||||||
|
|
||||||
|
audioEngine.stop()
|
||||||
|
inputNode.removeTap(onBus: 0)
|
||||||
|
request.endAudio()
|
||||||
|
}
|
||||||
|
}
|
||||||
280
Sources/AppleIntelligenceCore/Services/TextToSpeechService.swift
Normal file
280
Sources/AppleIntelligenceCore/Services/TextToSpeechService.swift
Normal file
@ -0,0 +1,280 @@
|
|||||||
|
import Foundation
|
||||||
|
import AVFoundation
|
||||||
|
|
||||||
|
// MARK: - Result Types
|
||||||
|
|
||||||
|
/// Result of text-to-speech synthesis
|
||||||
|
public struct TextToSpeechResult: Sendable {
|
||||||
|
public let audioData: Data
|
||||||
|
public let format: AudioOutputFormat
|
||||||
|
public let sampleRate: Int
|
||||||
|
public let channels: Int
|
||||||
|
public let durationSeconds: Float
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Supported output formats
|
||||||
|
public enum AudioOutputFormat: Sendable {
|
||||||
|
case wav
|
||||||
|
case mp3
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Voice information
|
||||||
|
public struct VoiceDescription: Sendable {
|
||||||
|
public let identifier: String
|
||||||
|
public let name: String
|
||||||
|
public let language: String
|
||||||
|
public let isPremium: Bool
|
||||||
|
public let gender: String
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Configuration for speech synthesis
|
||||||
|
public struct SpeechConfig: Sendable {
|
||||||
|
public var voiceIdentifier: String?
|
||||||
|
public var speakingRate: Float // 0.0 - 1.0
|
||||||
|
public var pitchMultiplier: Float // 0.5 - 2.0
|
||||||
|
public var volume: Float // 0.0 - 1.0
|
||||||
|
|
||||||
|
public static let `default` = SpeechConfig(
|
||||||
|
voiceIdentifier: nil,
|
||||||
|
speakingRate: 0.5,
|
||||||
|
pitchMultiplier: 1.0,
|
||||||
|
volume: 1.0
|
||||||
|
)
|
||||||
|
|
||||||
|
public init(
|
||||||
|
voiceIdentifier: String? = nil,
|
||||||
|
speakingRate: Float = 0.5,
|
||||||
|
pitchMultiplier: Float = 1.0,
|
||||||
|
volume: Float = 1.0
|
||||||
|
) {
|
||||||
|
self.voiceIdentifier = voiceIdentifier
|
||||||
|
self.speakingRate = speakingRate
|
||||||
|
self.pitchMultiplier = pitchMultiplier
|
||||||
|
self.volume = volume
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Errors
|
||||||
|
|
||||||
|
public enum TextToSpeechError: Error, CustomStringConvertible, Sendable {
|
||||||
|
case invalidVoice(String)
|
||||||
|
case synthesisFailure(String)
|
||||||
|
case encodingFailure(String)
|
||||||
|
case noAudioGenerated
|
||||||
|
case unsupportedFormat
|
||||||
|
|
||||||
|
public var description: String {
|
||||||
|
switch self {
|
||||||
|
case .invalidVoice(let id): return "Invalid voice identifier: \(id)"
|
||||||
|
case .synthesisFailure(let reason): return "Speech synthesis failed: \(reason)"
|
||||||
|
case .encodingFailure(let reason): return "Audio encoding failed: \(reason)"
|
||||||
|
case .noAudioGenerated: return "No audio was generated"
|
||||||
|
case .unsupportedFormat: return "Unsupported audio format"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Service Actor
|
||||||
|
|
||||||
|
public actor TextToSpeechService {
|
||||||
|
/// Keep strong reference to synthesizer during synthesis
|
||||||
|
private var activeSynthesizer: AVSpeechSynthesizer?
|
||||||
|
|
||||||
|
public init() {}
|
||||||
|
|
||||||
|
// MARK: - Public API
|
||||||
|
|
||||||
|
/// Synthesize text to speech
|
||||||
|
public func synthesize(
|
||||||
|
text: String,
|
||||||
|
config: SpeechConfig = .default,
|
||||||
|
outputFormat: AudioOutputFormat = .wav
|
||||||
|
) async throws -> TextToSpeechResult {
|
||||||
|
// Create utterance
|
||||||
|
let utterance = AVSpeechUtterance(string: text)
|
||||||
|
|
||||||
|
// Configure voice
|
||||||
|
if let voiceId = config.voiceIdentifier {
|
||||||
|
if let voice = AVSpeechSynthesisVoice(identifier: voiceId) {
|
||||||
|
utterance.voice = voice
|
||||||
|
} else {
|
||||||
|
throw TextToSpeechError.invalidVoice(voiceId)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Use default English voice
|
||||||
|
utterance.voice = AVSpeechSynthesisVoice(language: "en-US")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Configure speech parameters
|
||||||
|
utterance.rate = config.speakingRate
|
||||||
|
utterance.pitchMultiplier = config.pitchMultiplier
|
||||||
|
utterance.volume = config.volume
|
||||||
|
|
||||||
|
// Collect PCM data
|
||||||
|
let pcmData = try await collectPCMData(utterance: utterance)
|
||||||
|
|
||||||
|
// Convert to requested format
|
||||||
|
let audioData: Data
|
||||||
|
switch outputFormat {
|
||||||
|
case .wav:
|
||||||
|
audioData = createWAVData(from: pcmData)
|
||||||
|
case .mp3:
|
||||||
|
// Use WAV as fallback (MP3 encoding requires external library)
|
||||||
|
audioData = createWAVData(from: pcmData)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate duration
|
||||||
|
let bytesPerSample = 2 // Int16
|
||||||
|
let totalSamples = pcmData.samples.count / bytesPerSample / pcmData.channelCount
|
||||||
|
let duration = Float(totalSamples) / Float(pcmData.sampleRate)
|
||||||
|
|
||||||
|
return TextToSpeechResult(
|
||||||
|
audioData: audioData,
|
||||||
|
format: outputFormat,
|
||||||
|
sampleRate: Int(pcmData.sampleRate),
|
||||||
|
channels: pcmData.channelCount,
|
||||||
|
durationSeconds: duration
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// List available voices
|
||||||
|
public func listVoices(languageCode: String? = nil) -> [VoiceDescription] {
|
||||||
|
let voices = AVSpeechSynthesisVoice.speechVoices()
|
||||||
|
|
||||||
|
let filtered: [AVSpeechSynthesisVoice]
|
||||||
|
if let lang = languageCode {
|
||||||
|
filtered = voices.filter { $0.language.hasPrefix(lang) }
|
||||||
|
} else {
|
||||||
|
filtered = voices
|
||||||
|
}
|
||||||
|
|
||||||
|
return filtered.map { voice in
|
||||||
|
VoiceDescription(
|
||||||
|
identifier: voice.identifier,
|
||||||
|
name: voice.name,
|
||||||
|
language: voice.language,
|
||||||
|
isPremium: voice.quality == .enhanced || voice.quality == .premium,
|
||||||
|
gender: genderString(for: voice)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Private Implementation
|
||||||
|
|
||||||
|
/// PCM buffer data for internal processing
|
||||||
|
private struct PCMBufferData: Sendable {
|
||||||
|
let samples: Data
|
||||||
|
let sampleRate: Double
|
||||||
|
let channelCount: Int
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Collect PCM data from synthesizer using write callback
|
||||||
|
private func collectPCMData(
|
||||||
|
utterance: AVSpeechUtterance
|
||||||
|
) async throws -> PCMBufferData {
|
||||||
|
// Create and store synthesizer to keep strong reference during synthesis
|
||||||
|
let synthesizer = AVSpeechSynthesizer()
|
||||||
|
self.activeSynthesizer = synthesizer
|
||||||
|
|
||||||
|
defer { self.activeSynthesizer = nil }
|
||||||
|
|
||||||
|
return try await withCheckedThrowingContinuation { continuation in
|
||||||
|
var pcmData = Data()
|
||||||
|
var sampleRate: Double = 0
|
||||||
|
var channelCount: Int = 0
|
||||||
|
var hasResumed = false
|
||||||
|
|
||||||
|
synthesizer.write(utterance) { buffer in
|
||||||
|
guard let pcmBuffer = buffer as? AVAudioPCMBuffer else {
|
||||||
|
// End of audio - empty buffer signals completion
|
||||||
|
if !hasResumed {
|
||||||
|
hasResumed = true
|
||||||
|
if pcmData.isEmpty {
|
||||||
|
continuation.resume(throwing: TextToSpeechError.noAudioGenerated)
|
||||||
|
} else {
|
||||||
|
continuation.resume(returning: PCMBufferData(
|
||||||
|
samples: pcmData,
|
||||||
|
sampleRate: sampleRate,
|
||||||
|
channelCount: channelCount
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if pcmBuffer.frameLength > 0 {
|
||||||
|
// Store format from first buffer
|
||||||
|
if sampleRate == 0 {
|
||||||
|
sampleRate = pcmBuffer.format.sampleRate
|
||||||
|
channelCount = Int(pcmBuffer.format.channelCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert float samples to Int16 PCM
|
||||||
|
if let channelData = pcmBuffer.floatChannelData {
|
||||||
|
let frameCount = Int(pcmBuffer.frameLength)
|
||||||
|
for frame in 0..<frameCount {
|
||||||
|
for channel in 0..<channelCount {
|
||||||
|
let sample = channelData[channel][frame]
|
||||||
|
let clampedSample = max(-1.0, min(1.0, sample))
|
||||||
|
let int16Sample = Int16(clampedSample * Float(Int16.max))
|
||||||
|
withUnsafeBytes(of: int16Sample.littleEndian) { bytes in
|
||||||
|
pcmData.append(contentsOf: bytes)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create WAV data from PCM buffer data
|
||||||
|
private func createWAVData(from pcmData: PCMBufferData) -> Data {
|
||||||
|
let bitsPerSample = 16
|
||||||
|
let sampleRate = Int(pcmData.sampleRate)
|
||||||
|
let channels = pcmData.channelCount
|
||||||
|
let dataSize = pcmData.samples.count
|
||||||
|
|
||||||
|
var header = Data()
|
||||||
|
|
||||||
|
// RIFF header
|
||||||
|
header.append(contentsOf: "RIFF".utf8)
|
||||||
|
let fileSize = UInt32(dataSize + 36)
|
||||||
|
withUnsafeBytes(of: fileSize.littleEndian) { header.append(contentsOf: $0) }
|
||||||
|
header.append(contentsOf: "WAVE".utf8)
|
||||||
|
|
||||||
|
// fmt subchunk
|
||||||
|
header.append(contentsOf: "fmt ".utf8)
|
||||||
|
let subchunk1Size = UInt32(16)
|
||||||
|
withUnsafeBytes(of: subchunk1Size.littleEndian) { header.append(contentsOf: $0) }
|
||||||
|
let audioFormat = UInt16(1) // PCM
|
||||||
|
withUnsafeBytes(of: audioFormat.littleEndian) { header.append(contentsOf: $0) }
|
||||||
|
let numChannels = UInt16(channels)
|
||||||
|
withUnsafeBytes(of: numChannels.littleEndian) { header.append(contentsOf: $0) }
|
||||||
|
let sampleRateU32 = UInt32(sampleRate)
|
||||||
|
withUnsafeBytes(of: sampleRateU32.littleEndian) { header.append(contentsOf: $0) }
|
||||||
|
let byteRate = UInt32(sampleRate * channels * bitsPerSample / 8)
|
||||||
|
withUnsafeBytes(of: byteRate.littleEndian) { header.append(contentsOf: $0) }
|
||||||
|
let blockAlign = UInt16(channels * bitsPerSample / 8)
|
||||||
|
withUnsafeBytes(of: blockAlign.littleEndian) { header.append(contentsOf: $0) }
|
||||||
|
let bitsPerSampleU16 = UInt16(bitsPerSample)
|
||||||
|
withUnsafeBytes(of: bitsPerSampleU16.littleEndian) { header.append(contentsOf: $0) }
|
||||||
|
|
||||||
|
// data subchunk
|
||||||
|
header.append(contentsOf: "data".utf8)
|
||||||
|
let dataU32 = UInt32(dataSize)
|
||||||
|
withUnsafeBytes(of: dataU32.littleEndian) { header.append(contentsOf: $0) }
|
||||||
|
|
||||||
|
return header + pcmData.samples
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get gender string for voice
|
||||||
|
private func genderString(for voice: AVSpeechSynthesisVoice) -> String {
|
||||||
|
switch voice.gender {
|
||||||
|
case .male: return "male"
|
||||||
|
case .female: return "female"
|
||||||
|
case .unspecified: return "unspecified"
|
||||||
|
@unknown default: return "unknown"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -37,7 +37,21 @@ struct AppleIntelligenceServer: AsyncParsableCommand {
|
|||||||
throw ExitCode.failure
|
throw ExitCode.failure
|
||||||
}
|
}
|
||||||
|
|
||||||
let provider = AppleIntelligenceProvider(service: service, apiKey: config.apiKey)
|
// Initialize speech services
|
||||||
|
print("Initializing Text-to-Speech service...")
|
||||||
|
let ttsService = TextToSpeechService()
|
||||||
|
|
||||||
|
print("Initializing Speech-to-Text service...")
|
||||||
|
let sttService = await SpeechToTextService()
|
||||||
|
let sttStatus = await sttService.getStatus()
|
||||||
|
print("Speech-to-Text status: \(sttStatus)")
|
||||||
|
|
||||||
|
let provider = AppleIntelligenceProvider(
|
||||||
|
service: service,
|
||||||
|
ttsService: ttsService,
|
||||||
|
sttService: sttService,
|
||||||
|
apiKey: config.apiKey
|
||||||
|
)
|
||||||
|
|
||||||
let transport = HTTP2ServerTransport.Posix(
|
let transport = HTTP2ServerTransport.Posix(
|
||||||
address: .ipv4(host: bindHost, port: bindPort),
|
address: .ipv4(host: bindHost, port: bindPort),
|
||||||
@ -52,7 +66,15 @@ struct AppleIntelligenceServer: AsyncParsableCommand {
|
|||||||
print("API key authentication is enabled")
|
print("API key authentication is enabled")
|
||||||
}
|
}
|
||||||
print("Server is ready to accept connections")
|
print("Server is ready to accept connections")
|
||||||
print("Health check: grpcurl -plaintext \(bindHost):\(bindPort) appleintelligence.AppleIntelligence/Health")
|
print("")
|
||||||
|
print("Available services:")
|
||||||
|
print(" - Complete/StreamComplete: Text generation with Apple Intelligence")
|
||||||
|
print(" - TextToSpeech: Convert text to spoken audio")
|
||||||
|
print(" - ListVoices: List available TTS voices")
|
||||||
|
print(" - Transcribe: Convert audio file to text")
|
||||||
|
print(" - StreamTranscribe: Real-time speech-to-text")
|
||||||
|
print("")
|
||||||
|
print("Health check: grpcurl -plaintext \(bindHost):\(bindPort) appleintelligence.AppleIntelligenceService/Health")
|
||||||
print("Press Ctrl+C to stop the server")
|
print("Press Ctrl+C to stop the server")
|
||||||
|
|
||||||
try await server.serve()
|
try await server.serve()
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user