Add Text-to-Speech and Speech-to-Text features

- Add TTS service using AVSpeechSynthesizer for voice output
- Add STT service using SpeechAnalyzer (macOS 26) for transcription
- Add voice input (microphone) button in chat with recording level indicator
- Add speak button on assistant messages for TTS playback
- Add language toggle (EN-CA/FR-CA) for bilingual speech recognition
- Fix Swift 6 strict concurrency issues in audio callbacks
- Update proto schema with TTS/STT message types and RPCs
- Update gRPC provider with speech service endpoints

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Mathias Beaulieu-Duncan 2025-12-31 02:57:30 -05:00
parent 638656e7ca
commit b754945923
10 changed files with 3151 additions and 8 deletions

View File

@ -51,6 +51,113 @@ message HealthResponse {
string model_status = 2;
}
// ============ TEXT-TO-SPEECH ============
// Audio format enumeration
enum AudioFormat {
AUDIO_FORMAT_UNSPECIFIED = 0;
AUDIO_FORMAT_WAV = 1;
AUDIO_FORMAT_MP3 = 2;
}
// Voice configuration for TTS
message VoiceConfig {
string voice_identifier = 1;
optional float speaking_rate = 2; // 0.0-1.0, default 0.5
optional float pitch_multiplier = 3; // 0.5-2.0, default 1.0
optional float volume = 4; // 0.0-1.0, default 1.0
}
// TTS Request
message TextToSpeechRequest {
string text = 1;
AudioFormat output_format = 2;
optional VoiceConfig voice_config = 3;
}
// TTS Response
message TextToSpeechResponse {
bytes audio_data = 1;
AudioFormat format = 2;
int32 sample_rate = 3;
int32 channels = 4;
float duration_seconds = 5;
}
// List available voices request
message ListVoicesRequest {
optional string language_code = 1;
}
// Voice information
message VoiceInfo {
string identifier = 1;
string name = 2;
string language = 3;
bool is_premium = 4;
string gender = 5;
}
// List voices response
message ListVoicesResponse {
repeated VoiceInfo voices = 1;
}
// ============ SPEECH-TO-TEXT ============
// STT Configuration
message TranscriptionConfig {
optional string language_code = 1;
optional bool enable_punctuation = 2; // default true
optional bool enable_timestamps = 3; // default false
}
// Audio data for STT
message AudioInput {
bytes data = 1;
string mime_type = 2; // "audio/wav", "audio/mp3", "audio/m4a"
optional int32 sample_rate = 3;
optional int32 channels = 4;
}
// File-based transcription request
message TranscribeRequest {
AudioInput audio = 1;
optional TranscriptionConfig config = 2;
}
// Transcription segment with timing
message TranscriptionSegment {
string text = 1;
float start_time = 2;
float end_time = 3;
float confidence = 4;
}
// Transcription response
message TranscribeResponse {
string text = 1;
repeated TranscriptionSegment segments = 2;
string detected_language = 3;
float confidence = 4;
}
// Streaming STT request chunk
message StreamingTranscribeRequest {
oneof request {
TranscriptionConfig config = 1; // Send first to configure
bytes audio_chunk = 2; // Subsequent audio chunks
}
}
// Streaming STT response
message StreamingTranscribeResponse {
string partial_text = 1;
bool is_final = 2;
string final_text = 3;
repeated TranscriptionSegment segments = 4;
}
// Apple Intelligence Service
service AppleIntelligenceService {
// Single completion request
@ -61,4 +168,12 @@ service AppleIntelligenceService {
// Health check
rpc Health(HealthRequest) returns (HealthResponse);
// Text-to-Speech
rpc TextToSpeech(TextToSpeechRequest) returns (TextToSpeechResponse);
rpc ListVoices(ListVoicesRequest) returns (ListVoicesResponse);
// Speech-to-Text
rpc Transcribe(TranscribeRequest) returns (TranscribeResponse);
rpc StreamTranscribe(stream StreamingTranscribeRequest) returns (stream StreamingTranscribeResponse);
}

View File

@ -1,5 +1,7 @@
import Foundation
import AppKit
import AVFoundation
import Speech
import UniformTypeIdentifiers
import AppleIntelligenceCore
@ -14,9 +16,32 @@ final class ChatViewModel {
// Image attachment state
var pendingImages: [ImageAttachment] = []
// Voice input/output state
var isRecording: Bool = false
var isSpeaking: Bool = false
var speakingMessageId: UUID?
var recordingLevel: Float = 0
private var service: AppleIntelligenceService?
private var ttsService: TextToSpeechService?
private var sttService: SpeechToTextService?
private var currentTask: Task<Void, Never>?
// Audio recording - multi-language support
private var audioEngine: AVAudioEngine?
private var speechRecognizers: [String: SFSpeechRecognizer] = [:]
private var activeRecognizer: SFSpeechRecognizer?
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
private var recognitionTask: SFSpeechRecognitionTask?
// Supported speech recognition languages (Canadian English and French)
private static let supportedLocales = ["en-CA", "fr-CA"]
var detectedLanguage: String = "en-CA"
// Audio playback - use direct speech synthesis for reliability
private var speechSynthesizer: AVSpeechSynthesizer?
private var speechDelegate: SpeechSynthesizerDelegate?
// Maximum images per message
private let maxImagesPerMessage = 5
@ -28,6 +53,27 @@ final class ChatViewModel {
func initialize() async {
service = await AppleIntelligenceService()
ttsService = TextToSpeechService()
sttService = await SpeechToTextService()
// Initialize speech recognizers for all supported locales
for localeId in Self.supportedLocales {
if let recognizer = SFSpeechRecognizer(locale: Locale(identifier: localeId)) {
speechRecognizers[localeId] = recognizer
}
}
// Default to system locale if supported, otherwise en-CA
let systemLocale = Locale.current.identifier
if speechRecognizers[systemLocale] != nil {
detectedLanguage = systemLocale
} else if systemLocale.starts(with: "fr") {
detectedLanguage = "fr-CA"
} else {
detectedLanguage = "en-CA"
}
activeRecognizer = speechRecognizers[detectedLanguage]
loadRecentImages()
}
@ -217,4 +263,262 @@ final class ChatViewModel {
messages.removeAll()
errorMessage = nil
}
// MARK: - Voice Input (Speech-to-Text)
func toggleRecording() {
if isRecording {
stopRecording()
} else {
startRecording()
}
}
func startRecording() {
Task {
// Use nonisolated helper to avoid MainActor isolation inheritance in TCC callback
let status = await Self.requestSpeechAuthorization()
guard status == .authorized else {
self.errorMessage = "Speech recognition not authorized"
return
}
self.beginRecording()
}
}
/// Request speech recognition authorization without MainActor isolation.
/// This prevents Swift 6 strict concurrency from asserting MainActor in the TCC callback.
private nonisolated static func requestSpeechAuthorization() async -> SFSpeechRecognizerAuthorizationStatus {
await withCheckedContinuation { continuation in
SFSpeechRecognizer.requestAuthorization { status in
continuation.resume(returning: status)
}
}
}
/// Creates audio tap handler in nonisolated context to avoid MainActor isolation inheritance.
/// Audio taps run on CoreAudio's RealtimeMessenger queue, not MainActor.
private nonisolated static func createAudioTapHandler(
request: SFSpeechAudioBufferRecognitionRequest,
levelUpdater: RecordingLevelUpdater
) -> (AVAudioPCMBuffer, AVAudioTime) -> Void {
return { buffer, _ in
request.append(buffer)
// Calculate audio level for visual feedback
guard let channelData = buffer.floatChannelData else { return }
let channelDataValue = channelData.pointee
let channelDataValueArray = stride(from: 0, to: Int(buffer.frameLength), by: buffer.stride).map { channelDataValue[$0] }
let rms = sqrt(channelDataValueArray.map { $0 * $0 }.reduce(0, +) / Float(buffer.frameLength))
let avgPower = 20 * log10(rms)
let level = max(0, min(1, (avgPower + 50) / 50))
levelUpdater.updateLevel(level)
}
}
private func beginRecording() {
// Try to find an available recognizer
let recognizer = activeRecognizer ?? speechRecognizers.values.first { $0.isAvailable }
guard let speechRecognizer = recognizer, speechRecognizer.isAvailable else {
errorMessage = "Speech recognition not available"
return
}
// Stop any existing recording
if audioEngine != nil {
stopRecording()
}
audioEngine = AVAudioEngine()
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
guard let audioEngine = audioEngine,
let recognitionRequest = recognitionRequest else {
errorMessage = "Failed to initialize audio engine"
return
}
recognitionRequest.shouldReportPartialResults = true
// Enable automatic language detection if available (macOS 14+)
if #available(macOS 14, *) {
recognitionRequest.addsPunctuation = true
}
let inputNode = audioEngine.inputNode
let recordingFormat = inputNode.outputFormat(forBus: 0)
// Use nonisolated static function to create audio tap handler
// This breaks MainActor isolation inheritance in the closure
let levelUpdater = RecordingLevelUpdater(viewModel: self)
let audioTapHandler = Self.createAudioTapHandler(request: recognitionRequest, levelUpdater: levelUpdater)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat, block: audioTapHandler)
audioEngine.prepare()
do {
try audioEngine.start()
isRecording = true
// Use a sendable wrapper for recognition results with language detection
let resultHandler = RecognitionResultHandler(viewModel: self)
recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in
resultHandler.handleResult(result: result, error: error)
}
} catch {
errorMessage = "Failed to start recording: \(error.localizedDescription)"
cleanupRecording()
}
}
/// Switch to a different language for speech recognition
func switchLanguage(to localeId: String) {
guard let recognizer = speechRecognizers[localeId] else { return }
activeRecognizer = recognizer
detectedLanguage = localeId
}
/// Get available languages for speech recognition
var availableLanguages: [(id: String, name: String)] {
speechRecognizers.keys.sorted().compactMap { localeId in
let locale = Locale(identifier: localeId)
let name = locale.localizedString(forIdentifier: localeId) ?? localeId
return (id: localeId, name: name)
}
}
func stopRecording() {
recognitionRequest?.endAudio()
cleanupRecording()
}
fileprivate func cleanupRecording() {
audioEngine?.stop()
audioEngine?.inputNode.removeTap(onBus: 0)
audioEngine = nil
recognitionRequest = nil
recognitionTask?.cancel()
recognitionTask = nil
isRecording = false
recordingLevel = 0
}
// MARK: - Voice Output (Text-to-Speech)
func speakMessage(_ message: ChatMessage) {
guard !message.content.isEmpty else { return }
// If already speaking this message, stop
if isSpeaking && speakingMessageId == message.id {
stopSpeaking()
return
}
// Stop any current speech
stopSpeaking()
speakingMessageId = message.id
isSpeaking = true
// Create utterance
let utterance = AVSpeechUtterance(string: message.content)
utterance.rate = AVSpeechUtteranceDefaultSpeechRate
utterance.pitchMultiplier = 1.0
utterance.volume = 1.0
// Use voice matching current speech recognition language
if detectedLanguage == "fr-CA" {
utterance.voice = AVSpeechSynthesisVoice(language: "fr-CA")
} else {
utterance.voice = AVSpeechSynthesisVoice(language: "en-CA")
}
// Create synthesizer and delegate
let synthesizer = AVSpeechSynthesizer()
speechDelegate = SpeechSynthesizerDelegate { [weak self] in
Task { @MainActor in
self?.isSpeaking = false
self?.speakingMessageId = nil
self?.speechDelegate = nil
self?.speechSynthesizer = nil
}
}
synthesizer.delegate = speechDelegate
speechSynthesizer = synthesizer
// Speak directly
synthesizer.speak(utterance)
}
func stopSpeaking() {
speechSynthesizer?.stopSpeaking(at: .immediate)
speechSynthesizer = nil
speechDelegate = nil
isSpeaking = false
speakingMessageId = nil
}
}
// MARK: - Speech Synthesizer Delegate
private final class SpeechSynthesizerDelegate: NSObject, AVSpeechSynthesizerDelegate, @unchecked Sendable {
let onFinish: () -> Void
init(onFinish: @escaping () -> Void) {
self.onFinish = onFinish
}
func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didFinish utterance: AVSpeechUtterance) {
onFinish()
}
func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didCancel utterance: AVSpeechUtterance) {
onFinish()
}
}
// MARK: - Sendable Wrappers for Audio Callbacks
/// Wrapper to safely update recording level from audio callback thread
private final class RecordingLevelUpdater: @unchecked Sendable {
private weak var viewModel: ChatViewModel?
init(viewModel: ChatViewModel) {
self.viewModel = viewModel
}
func updateLevel(_ level: Float) {
Task { @MainActor [weak viewModel] in
viewModel?.recordingLevel = level
}
}
}
/// Wrapper to safely handle recognition results from Speech framework callback
private final class RecognitionResultHandler: @unchecked Sendable {
private weak var viewModel: ChatViewModel?
init(viewModel: ChatViewModel) {
self.viewModel = viewModel
}
func handleResult(result: SFSpeechRecognitionResult?, error: Error?) {
// Extract data before crossing actor boundary (SFSpeechRecognitionResult is not Sendable)
let transcription = result?.bestTranscription.formattedString
let isFinal = result?.isFinal ?? false
let hasError = error != nil
Task { @MainActor [weak viewModel] in
if let transcription = transcription {
viewModel?.inputText = transcription
}
if hasError || isFinal {
viewModel?.cleanupRecording()
}
}
}
}

View File

@ -23,7 +23,11 @@ struct ChatView: View {
ScrollView {
LazyVStack(spacing: 12) {
ForEach(viewModel.messages) { message in
MessageBubble(message: message)
MessageBubble(
message: message,
isSpeaking: viewModel.speakingMessageId == message.id,
onSpeak: { viewModel.speakMessage(message) }
)
.id(message.id)
}
}
@ -286,6 +290,45 @@ struct ChatView: View {
.buttonStyle(.plain)
.help("Paste image from clipboard")
// Language toggle for speech recognition
Button {
// Toggle between en-CA and fr-CA
let newLang = viewModel.detectedLanguage == "en-CA" ? "fr-CA" : "en-CA"
viewModel.switchLanguage(to: newLang)
} label: {
Text(viewModel.detectedLanguage == "fr-CA" ? "FR" : "EN")
.font(.caption.bold())
.foregroundStyle(.secondary)
.frame(width: 24, height: 24)
.background(
RoundedRectangle(cornerRadius: 4)
.fill(Color.secondary.opacity(0.1))
)
}
.buttonStyle(.plain)
.help("Speech language: \(viewModel.detectedLanguage) (click to toggle)")
// Microphone button for voice input
Button {
viewModel.toggleRecording()
} label: {
ZStack {
if viewModel.isRecording {
// Recording indicator with level
Circle()
.fill(Color.red.opacity(0.3))
.frame(width: 28 + CGFloat(viewModel.recordingLevel) * 10,
height: 28 + CGFloat(viewModel.recordingLevel) * 10)
.animation(.easeInOut(duration: 0.1), value: viewModel.recordingLevel)
}
Image(systemName: viewModel.isRecording ? "mic.fill" : "mic")
.font(.title3)
.foregroundStyle(viewModel.isRecording ? .red : .secondary)
}
}
.buttonStyle(.plain)
.help(viewModel.isRecording ? "Stop recording" : "Voice input")
TextField("Message...", text: $viewModel.inputText, axis: .vertical)
.textFieldStyle(.plain)
.lineLimit(1...5)
@ -386,6 +429,8 @@ struct RecentImageThumbnail: View {
struct MessageBubble: View {
let message: ChatMessage
var isSpeaking: Bool = false
var onSpeak: (() -> Void)? = nil
@State private var showCopied = false
var body: some View {
@ -419,10 +464,23 @@ struct MessageBubble: View {
}
}
// Copy button for assistant messages
// Action buttons for assistant messages
if message.role == .assistant && !message.content.isEmpty && !message.isStreaming {
HStack {
Spacer()
HStack(spacing: 12) {
// Speaker button for TTS
Button {
onSpeak?()
} label: {
HStack(spacing: 4) {
Image(systemName: isSpeaking ? "stop.fill" : "speaker.wave.2")
Text(isSpeaking ? "Stop" : "Speak")
}
.font(.caption)
.foregroundStyle(isSpeaking ? .red : .secondary)
}
.buttonStyle(.plain)
// Copy button
Button {
NSPasteboard.general.clearContents()
NSPasteboard.general.setString(message.content, forType: .string)
@ -439,6 +497,8 @@ struct MessageBubble: View {
.foregroundStyle(.secondary)
}
.buttonStyle(.plain)
Spacer()
}
.padding(.top, 2)
}

View File

@ -56,11 +56,63 @@ public enum Appleintelligence_AppleIntelligenceService: Sendable {
method: "Health"
)
}
/// Namespace for "TextToSpeech" metadata.
public enum TextToSpeech: Sendable {
/// Request type for "TextToSpeech".
public typealias Input = Appleintelligence_TextToSpeechRequest
/// Response type for "TextToSpeech".
public typealias Output = Appleintelligence_TextToSpeechResponse
/// Descriptor for "TextToSpeech".
public static let descriptor = GRPCCore.MethodDescriptor(
service: GRPCCore.ServiceDescriptor(fullyQualifiedService: "appleintelligence.AppleIntelligenceService"),
method: "TextToSpeech"
)
}
/// Namespace for "ListVoices" metadata.
public enum ListVoices: Sendable {
/// Request type for "ListVoices".
public typealias Input = Appleintelligence_ListVoicesRequest
/// Response type for "ListVoices".
public typealias Output = Appleintelligence_ListVoicesResponse
/// Descriptor for "ListVoices".
public static let descriptor = GRPCCore.MethodDescriptor(
service: GRPCCore.ServiceDescriptor(fullyQualifiedService: "appleintelligence.AppleIntelligenceService"),
method: "ListVoices"
)
}
/// Namespace for "Transcribe" metadata.
public enum Transcribe: Sendable {
/// Request type for "Transcribe".
public typealias Input = Appleintelligence_TranscribeRequest
/// Response type for "Transcribe".
public typealias Output = Appleintelligence_TranscribeResponse
/// Descriptor for "Transcribe".
public static let descriptor = GRPCCore.MethodDescriptor(
service: GRPCCore.ServiceDescriptor(fullyQualifiedService: "appleintelligence.AppleIntelligenceService"),
method: "Transcribe"
)
}
/// Namespace for "StreamTranscribe" metadata.
public enum StreamTranscribe: Sendable {
/// Request type for "StreamTranscribe".
public typealias Input = Appleintelligence_StreamingTranscribeRequest
/// Response type for "StreamTranscribe".
public typealias Output = Appleintelligence_StreamingTranscribeResponse
/// Descriptor for "StreamTranscribe".
public static let descriptor = GRPCCore.MethodDescriptor(
service: GRPCCore.ServiceDescriptor(fullyQualifiedService: "appleintelligence.AppleIntelligenceService"),
method: "StreamTranscribe"
)
}
/// Descriptors for all methods in the "appleintelligence.AppleIntelligenceService" service.
public static let descriptors: [GRPCCore.MethodDescriptor] = [
Complete.descriptor,
StreamComplete.descriptor,
Health.descriptor
Health.descriptor,
TextToSpeech.descriptor,
ListVoices.descriptor,
Transcribe.descriptor,
StreamTranscribe.descriptor
]
}
}
@ -143,6 +195,70 @@ extension Appleintelligence_AppleIntelligenceService {
request: GRPCCore.StreamingServerRequest<Appleintelligence_HealthRequest>,
context: GRPCCore.ServerContext
) async throws -> GRPCCore.StreamingServerResponse<Appleintelligence_HealthResponse>
/// Handle the "TextToSpeech" method.
///
/// > Source IDL Documentation:
/// >
/// > Text-to-Speech
///
/// - Parameters:
/// - request: A streaming request of `Appleintelligence_TextToSpeechRequest` messages.
/// - context: Context providing information about the RPC.
/// - Throws: Any error which occurred during the processing of the request. Thrown errors
/// of type `RPCError` are mapped to appropriate statuses. All other errors are converted
/// to an internal error.
/// - Returns: A streaming response of `Appleintelligence_TextToSpeechResponse` messages.
func textToSpeech(
request: GRPCCore.StreamingServerRequest<Appleintelligence_TextToSpeechRequest>,
context: GRPCCore.ServerContext
) async throws -> GRPCCore.StreamingServerResponse<Appleintelligence_TextToSpeechResponse>
/// Handle the "ListVoices" method.
///
/// - Parameters:
/// - request: A streaming request of `Appleintelligence_ListVoicesRequest` messages.
/// - context: Context providing information about the RPC.
/// - Throws: Any error which occurred during the processing of the request. Thrown errors
/// of type `RPCError` are mapped to appropriate statuses. All other errors are converted
/// to an internal error.
/// - Returns: A streaming response of `Appleintelligence_ListVoicesResponse` messages.
func listVoices(
request: GRPCCore.StreamingServerRequest<Appleintelligence_ListVoicesRequest>,
context: GRPCCore.ServerContext
) async throws -> GRPCCore.StreamingServerResponse<Appleintelligence_ListVoicesResponse>
/// Handle the "Transcribe" method.
///
/// > Source IDL Documentation:
/// >
/// > Speech-to-Text
///
/// - Parameters:
/// - request: A streaming request of `Appleintelligence_TranscribeRequest` messages.
/// - context: Context providing information about the RPC.
/// - Throws: Any error which occurred during the processing of the request. Thrown errors
/// of type `RPCError` are mapped to appropriate statuses. All other errors are converted
/// to an internal error.
/// - Returns: A streaming response of `Appleintelligence_TranscribeResponse` messages.
func transcribe(
request: GRPCCore.StreamingServerRequest<Appleintelligence_TranscribeRequest>,
context: GRPCCore.ServerContext
) async throws -> GRPCCore.StreamingServerResponse<Appleintelligence_TranscribeResponse>
/// Handle the "StreamTranscribe" method.
///
/// - Parameters:
/// - request: A streaming request of `Appleintelligence_StreamingTranscribeRequest` messages.
/// - context: Context providing information about the RPC.
/// - Throws: Any error which occurred during the processing of the request. Thrown errors
/// of type `RPCError` are mapped to appropriate statuses. All other errors are converted
/// to an internal error.
/// - Returns: A streaming response of `Appleintelligence_StreamingTranscribeResponse` messages.
func streamTranscribe(
request: GRPCCore.StreamingServerRequest<Appleintelligence_StreamingTranscribeRequest>,
context: GRPCCore.ServerContext
) async throws -> GRPCCore.StreamingServerResponse<Appleintelligence_StreamingTranscribeResponse>
}
/// Service protocol for the "appleintelligence.AppleIntelligenceService" service.
@ -210,6 +326,70 @@ extension Appleintelligence_AppleIntelligenceService {
request: GRPCCore.ServerRequest<Appleintelligence_HealthRequest>,
context: GRPCCore.ServerContext
) async throws -> GRPCCore.ServerResponse<Appleintelligence_HealthResponse>
/// Handle the "TextToSpeech" method.
///
/// > Source IDL Documentation:
/// >
/// > Text-to-Speech
///
/// - Parameters:
/// - request: A request containing a single `Appleintelligence_TextToSpeechRequest` message.
/// - context: Context providing information about the RPC.
/// - Throws: Any error which occurred during the processing of the request. Thrown errors
/// of type `RPCError` are mapped to appropriate statuses. All other errors are converted
/// to an internal error.
/// - Returns: A response containing a single `Appleintelligence_TextToSpeechResponse` message.
func textToSpeech(
request: GRPCCore.ServerRequest<Appleintelligence_TextToSpeechRequest>,
context: GRPCCore.ServerContext
) async throws -> GRPCCore.ServerResponse<Appleintelligence_TextToSpeechResponse>
/// Handle the "ListVoices" method.
///
/// - Parameters:
/// - request: A request containing a single `Appleintelligence_ListVoicesRequest` message.
/// - context: Context providing information about the RPC.
/// - Throws: Any error which occurred during the processing of the request. Thrown errors
/// of type `RPCError` are mapped to appropriate statuses. All other errors are converted
/// to an internal error.
/// - Returns: A response containing a single `Appleintelligence_ListVoicesResponse` message.
func listVoices(
request: GRPCCore.ServerRequest<Appleintelligence_ListVoicesRequest>,
context: GRPCCore.ServerContext
) async throws -> GRPCCore.ServerResponse<Appleintelligence_ListVoicesResponse>
/// Handle the "Transcribe" method.
///
/// > Source IDL Documentation:
/// >
/// > Speech-to-Text
///
/// - Parameters:
/// - request: A request containing a single `Appleintelligence_TranscribeRequest` message.
/// - context: Context providing information about the RPC.
/// - Throws: Any error which occurred during the processing of the request. Thrown errors
/// of type `RPCError` are mapped to appropriate statuses. All other errors are converted
/// to an internal error.
/// - Returns: A response containing a single `Appleintelligence_TranscribeResponse` message.
func transcribe(
request: GRPCCore.ServerRequest<Appleintelligence_TranscribeRequest>,
context: GRPCCore.ServerContext
) async throws -> GRPCCore.ServerResponse<Appleintelligence_TranscribeResponse>
/// Handle the "StreamTranscribe" method.
///
/// - Parameters:
/// - request: A streaming request of `Appleintelligence_StreamingTranscribeRequest` messages.
/// - context: Context providing information about the RPC.
/// - Throws: Any error which occurred during the processing of the request. Thrown errors
/// of type `RPCError` are mapped to appropriate statuses. All other errors are converted
/// to an internal error.
/// - Returns: A streaming response of `Appleintelligence_StreamingTranscribeResponse` messages.
func streamTranscribe(
request: GRPCCore.StreamingServerRequest<Appleintelligence_StreamingTranscribeRequest>,
context: GRPCCore.ServerContext
) async throws -> GRPCCore.StreamingServerResponse<Appleintelligence_StreamingTranscribeResponse>
}
/// Simple service protocol for the "appleintelligence.AppleIntelligenceService" service.
@ -276,6 +456,71 @@ extension Appleintelligence_AppleIntelligenceService {
request: Appleintelligence_HealthRequest,
context: GRPCCore.ServerContext
) async throws -> Appleintelligence_HealthResponse
/// Handle the "TextToSpeech" method.
///
/// > Source IDL Documentation:
/// >
/// > Text-to-Speech
///
/// - Parameters:
/// - request: A `Appleintelligence_TextToSpeechRequest` message.
/// - context: Context providing information about the RPC.
/// - Throws: Any error which occurred during the processing of the request. Thrown errors
/// of type `RPCError` are mapped to appropriate statuses. All other errors are converted
/// to an internal error.
/// - Returns: A `Appleintelligence_TextToSpeechResponse` to respond with.
func textToSpeech(
request: Appleintelligence_TextToSpeechRequest,
context: GRPCCore.ServerContext
) async throws -> Appleintelligence_TextToSpeechResponse
/// Handle the "ListVoices" method.
///
/// - Parameters:
/// - request: A `Appleintelligence_ListVoicesRequest` message.
/// - context: Context providing information about the RPC.
/// - Throws: Any error which occurred during the processing of the request. Thrown errors
/// of type `RPCError` are mapped to appropriate statuses. All other errors are converted
/// to an internal error.
/// - Returns: A `Appleintelligence_ListVoicesResponse` to respond with.
func listVoices(
request: Appleintelligence_ListVoicesRequest,
context: GRPCCore.ServerContext
) async throws -> Appleintelligence_ListVoicesResponse
/// Handle the "Transcribe" method.
///
/// > Source IDL Documentation:
/// >
/// > Speech-to-Text
///
/// - Parameters:
/// - request: A `Appleintelligence_TranscribeRequest` message.
/// - context: Context providing information about the RPC.
/// - Throws: Any error which occurred during the processing of the request. Thrown errors
/// of type `RPCError` are mapped to appropriate statuses. All other errors are converted
/// to an internal error.
/// - Returns: A `Appleintelligence_TranscribeResponse` to respond with.
func transcribe(
request: Appleintelligence_TranscribeRequest,
context: GRPCCore.ServerContext
) async throws -> Appleintelligence_TranscribeResponse
/// Handle the "StreamTranscribe" method.
///
/// - Parameters:
/// - request: A stream of `Appleintelligence_StreamingTranscribeRequest` messages.
/// - response: A response stream of `Appleintelligence_StreamingTranscribeResponse` messages.
/// - context: Context providing information about the RPC.
/// - Throws: Any error which occurred during the processing of the request. Thrown errors
/// of type `RPCError` are mapped to appropriate statuses. All other errors are converted
/// to an internal error.
func streamTranscribe(
request: GRPCCore.RPCAsyncSequence<Appleintelligence_StreamingTranscribeRequest, any Swift.Error>,
response: GRPCCore.RPCWriter<Appleintelligence_StreamingTranscribeResponse>,
context: GRPCCore.ServerContext
) async throws
}
}
@ -316,6 +561,50 @@ extension Appleintelligence_AppleIntelligenceService.StreamingServiceProtocol {
)
}
)
router.registerHandler(
forMethod: Appleintelligence_AppleIntelligenceService.Method.TextToSpeech.descriptor,
deserializer: GRPCProtobuf.ProtobufDeserializer<Appleintelligence_TextToSpeechRequest>(),
serializer: GRPCProtobuf.ProtobufSerializer<Appleintelligence_TextToSpeechResponse>(),
handler: { request, context in
try await self.textToSpeech(
request: request,
context: context
)
}
)
router.registerHandler(
forMethod: Appleintelligence_AppleIntelligenceService.Method.ListVoices.descriptor,
deserializer: GRPCProtobuf.ProtobufDeserializer<Appleintelligence_ListVoicesRequest>(),
serializer: GRPCProtobuf.ProtobufSerializer<Appleintelligence_ListVoicesResponse>(),
handler: { request, context in
try await self.listVoices(
request: request,
context: context
)
}
)
router.registerHandler(
forMethod: Appleintelligence_AppleIntelligenceService.Method.Transcribe.descriptor,
deserializer: GRPCProtobuf.ProtobufDeserializer<Appleintelligence_TranscribeRequest>(),
serializer: GRPCProtobuf.ProtobufSerializer<Appleintelligence_TranscribeResponse>(),
handler: { request, context in
try await self.transcribe(
request: request,
context: context
)
}
)
router.registerHandler(
forMethod: Appleintelligence_AppleIntelligenceService.Method.StreamTranscribe.descriptor,
deserializer: GRPCProtobuf.ProtobufDeserializer<Appleintelligence_StreamingTranscribeRequest>(),
serializer: GRPCProtobuf.ProtobufSerializer<Appleintelligence_StreamingTranscribeResponse>(),
handler: { request, context in
try await self.streamTranscribe(
request: request,
context: context
)
}
)
}
}
@ -354,6 +643,39 @@ extension Appleintelligence_AppleIntelligenceService.ServiceProtocol {
)
return GRPCCore.StreamingServerResponse(single: response)
}
public func textToSpeech(
request: GRPCCore.StreamingServerRequest<Appleintelligence_TextToSpeechRequest>,
context: GRPCCore.ServerContext
) async throws -> GRPCCore.StreamingServerResponse<Appleintelligence_TextToSpeechResponse> {
let response = try await self.textToSpeech(
request: GRPCCore.ServerRequest(stream: request),
context: context
)
return GRPCCore.StreamingServerResponse(single: response)
}
public func listVoices(
request: GRPCCore.StreamingServerRequest<Appleintelligence_ListVoicesRequest>,
context: GRPCCore.ServerContext
) async throws -> GRPCCore.StreamingServerResponse<Appleintelligence_ListVoicesResponse> {
let response = try await self.listVoices(
request: GRPCCore.ServerRequest(stream: request),
context: context
)
return GRPCCore.StreamingServerResponse(single: response)
}
public func transcribe(
request: GRPCCore.StreamingServerRequest<Appleintelligence_TranscribeRequest>,
context: GRPCCore.ServerContext
) async throws -> GRPCCore.StreamingServerResponse<Appleintelligence_TranscribeResponse> {
let response = try await self.transcribe(
request: GRPCCore.ServerRequest(stream: request),
context: context
)
return GRPCCore.StreamingServerResponse(single: response)
}
}
// Default implementation of methods from 'ServiceProtocol'.
@ -401,6 +723,62 @@ extension Appleintelligence_AppleIntelligenceService.SimpleServiceProtocol {
metadata: [:]
)
}
public func textToSpeech(
request: GRPCCore.ServerRequest<Appleintelligence_TextToSpeechRequest>,
context: GRPCCore.ServerContext
) async throws -> GRPCCore.ServerResponse<Appleintelligence_TextToSpeechResponse> {
return GRPCCore.ServerResponse<Appleintelligence_TextToSpeechResponse>(
message: try await self.textToSpeech(
request: request.message,
context: context
),
metadata: [:]
)
}
public func listVoices(
request: GRPCCore.ServerRequest<Appleintelligence_ListVoicesRequest>,
context: GRPCCore.ServerContext
) async throws -> GRPCCore.ServerResponse<Appleintelligence_ListVoicesResponse> {
return GRPCCore.ServerResponse<Appleintelligence_ListVoicesResponse>(
message: try await self.listVoices(
request: request.message,
context: context
),
metadata: [:]
)
}
public func transcribe(
request: GRPCCore.ServerRequest<Appleintelligence_TranscribeRequest>,
context: GRPCCore.ServerContext
) async throws -> GRPCCore.ServerResponse<Appleintelligence_TranscribeResponse> {
return GRPCCore.ServerResponse<Appleintelligence_TranscribeResponse>(
message: try await self.transcribe(
request: request.message,
context: context
),
metadata: [:]
)
}
public func streamTranscribe(
request: GRPCCore.StreamingServerRequest<Appleintelligence_StreamingTranscribeRequest>,
context: GRPCCore.ServerContext
) async throws -> GRPCCore.StreamingServerResponse<Appleintelligence_StreamingTranscribeResponse> {
return GRPCCore.StreamingServerResponse<Appleintelligence_StreamingTranscribeResponse>(
metadata: [:],
producer: { writer in
try await self.streamTranscribe(
request: request.messages,
response: writer,
context: context
)
return [:]
}
)
}
}
// MARK: appleintelligence.AppleIntelligenceService (client)
@ -484,6 +862,90 @@ extension Appleintelligence_AppleIntelligenceService {
options: GRPCCore.CallOptions,
onResponse handleResponse: @Sendable @escaping (GRPCCore.ClientResponse<Appleintelligence_HealthResponse>) async throws -> Result
) async throws -> Result where Result: Sendable
/// Call the "TextToSpeech" method.
///
/// > Source IDL Documentation:
/// >
/// > Text-to-Speech
///
/// - Parameters:
/// - request: A request containing a single `Appleintelligence_TextToSpeechRequest` message.
/// - serializer: A serializer for `Appleintelligence_TextToSpeechRequest` messages.
/// - deserializer: A deserializer for `Appleintelligence_TextToSpeechResponse` messages.
/// - options: Options to apply to this RPC.
/// - handleResponse: A closure which handles the response, the result of which is
/// returned to the caller. Returning from the closure will cancel the RPC if it
/// hasn't already finished.
/// - Returns: The result of `handleResponse`.
func textToSpeech<Result>(
request: GRPCCore.ClientRequest<Appleintelligence_TextToSpeechRequest>,
serializer: some GRPCCore.MessageSerializer<Appleintelligence_TextToSpeechRequest>,
deserializer: some GRPCCore.MessageDeserializer<Appleintelligence_TextToSpeechResponse>,
options: GRPCCore.CallOptions,
onResponse handleResponse: @Sendable @escaping (GRPCCore.ClientResponse<Appleintelligence_TextToSpeechResponse>) async throws -> Result
) async throws -> Result where Result: Sendable
/// Call the "ListVoices" method.
///
/// - Parameters:
/// - request: A request containing a single `Appleintelligence_ListVoicesRequest` message.
/// - serializer: A serializer for `Appleintelligence_ListVoicesRequest` messages.
/// - deserializer: A deserializer for `Appleintelligence_ListVoicesResponse` messages.
/// - options: Options to apply to this RPC.
/// - handleResponse: A closure which handles the response, the result of which is
/// returned to the caller. Returning from the closure will cancel the RPC if it
/// hasn't already finished.
/// - Returns: The result of `handleResponse`.
func listVoices<Result>(
request: GRPCCore.ClientRequest<Appleintelligence_ListVoicesRequest>,
serializer: some GRPCCore.MessageSerializer<Appleintelligence_ListVoicesRequest>,
deserializer: some GRPCCore.MessageDeserializer<Appleintelligence_ListVoicesResponse>,
options: GRPCCore.CallOptions,
onResponse handleResponse: @Sendable @escaping (GRPCCore.ClientResponse<Appleintelligence_ListVoicesResponse>) async throws -> Result
) async throws -> Result where Result: Sendable
/// Call the "Transcribe" method.
///
/// > Source IDL Documentation:
/// >
/// > Speech-to-Text
///
/// - Parameters:
/// - request: A request containing a single `Appleintelligence_TranscribeRequest` message.
/// - serializer: A serializer for `Appleintelligence_TranscribeRequest` messages.
/// - deserializer: A deserializer for `Appleintelligence_TranscribeResponse` messages.
/// - options: Options to apply to this RPC.
/// - handleResponse: A closure which handles the response, the result of which is
/// returned to the caller. Returning from the closure will cancel the RPC if it
/// hasn't already finished.
/// - Returns: The result of `handleResponse`.
func transcribe<Result>(
request: GRPCCore.ClientRequest<Appleintelligence_TranscribeRequest>,
serializer: some GRPCCore.MessageSerializer<Appleintelligence_TranscribeRequest>,
deserializer: some GRPCCore.MessageDeserializer<Appleintelligence_TranscribeResponse>,
options: GRPCCore.CallOptions,
onResponse handleResponse: @Sendable @escaping (GRPCCore.ClientResponse<Appleintelligence_TranscribeResponse>) async throws -> Result
) async throws -> Result where Result: Sendable
/// Call the "StreamTranscribe" method.
///
/// - Parameters:
/// - request: A streaming request producing `Appleintelligence_StreamingTranscribeRequest` messages.
/// - serializer: A serializer for `Appleintelligence_StreamingTranscribeRequest` messages.
/// - deserializer: A deserializer for `Appleintelligence_StreamingTranscribeResponse` messages.
/// - options: Options to apply to this RPC.
/// - handleResponse: A closure which handles the response, the result of which is
/// returned to the caller. Returning from the closure will cancel the RPC if it
/// hasn't already finished.
/// - Returns: The result of `handleResponse`.
func streamTranscribe<Result>(
request: GRPCCore.StreamingClientRequest<Appleintelligence_StreamingTranscribeRequest>,
serializer: some GRPCCore.MessageSerializer<Appleintelligence_StreamingTranscribeRequest>,
deserializer: some GRPCCore.MessageDeserializer<Appleintelligence_StreamingTranscribeResponse>,
options: GRPCCore.CallOptions,
onResponse handleResponse: @Sendable @escaping (GRPCCore.StreamingClientResponse<Appleintelligence_StreamingTranscribeResponse>) async throws -> Result
) async throws -> Result where Result: Sendable
}
/// Generated client for the "appleintelligence.AppleIntelligenceService" service.
@ -605,6 +1067,132 @@ extension Appleintelligence_AppleIntelligenceService {
onResponse: handleResponse
)
}
/// Call the "TextToSpeech" method.
///
/// > Source IDL Documentation:
/// >
/// > Text-to-Speech
///
/// - Parameters:
/// - request: A request containing a single `Appleintelligence_TextToSpeechRequest` message.
/// - serializer: A serializer for `Appleintelligence_TextToSpeechRequest` messages.
/// - deserializer: A deserializer for `Appleintelligence_TextToSpeechResponse` messages.
/// - options: Options to apply to this RPC.
/// - handleResponse: A closure which handles the response, the result of which is
/// returned to the caller. Returning from the closure will cancel the RPC if it
/// hasn't already finished.
/// - Returns: The result of `handleResponse`.
public func textToSpeech<Result>(
request: GRPCCore.ClientRequest<Appleintelligence_TextToSpeechRequest>,
serializer: some GRPCCore.MessageSerializer<Appleintelligence_TextToSpeechRequest>,
deserializer: some GRPCCore.MessageDeserializer<Appleintelligence_TextToSpeechResponse>,
options: GRPCCore.CallOptions = .defaults,
onResponse handleResponse: @Sendable @escaping (GRPCCore.ClientResponse<Appleintelligence_TextToSpeechResponse>) async throws -> Result = { response in
try response.message
}
) async throws -> Result where Result: Sendable {
try await self.client.unary(
request: request,
descriptor: Appleintelligence_AppleIntelligenceService.Method.TextToSpeech.descriptor,
serializer: serializer,
deserializer: deserializer,
options: options,
onResponse: handleResponse
)
}
/// Call the "ListVoices" method.
///
/// - Parameters:
/// - request: A request containing a single `Appleintelligence_ListVoicesRequest` message.
/// - serializer: A serializer for `Appleintelligence_ListVoicesRequest` messages.
/// - deserializer: A deserializer for `Appleintelligence_ListVoicesResponse` messages.
/// - options: Options to apply to this RPC.
/// - handleResponse: A closure which handles the response, the result of which is
/// returned to the caller. Returning from the closure will cancel the RPC if it
/// hasn't already finished.
/// - Returns: The result of `handleResponse`.
public func listVoices<Result>(
request: GRPCCore.ClientRequest<Appleintelligence_ListVoicesRequest>,
serializer: some GRPCCore.MessageSerializer<Appleintelligence_ListVoicesRequest>,
deserializer: some GRPCCore.MessageDeserializer<Appleintelligence_ListVoicesResponse>,
options: GRPCCore.CallOptions = .defaults,
onResponse handleResponse: @Sendable @escaping (GRPCCore.ClientResponse<Appleintelligence_ListVoicesResponse>) async throws -> Result = { response in
try response.message
}
) async throws -> Result where Result: Sendable {
try await self.client.unary(
request: request,
descriptor: Appleintelligence_AppleIntelligenceService.Method.ListVoices.descriptor,
serializer: serializer,
deserializer: deserializer,
options: options,
onResponse: handleResponse
)
}
/// Call the "Transcribe" method.
///
/// > Source IDL Documentation:
/// >
/// > Speech-to-Text
///
/// - Parameters:
/// - request: A request containing a single `Appleintelligence_TranscribeRequest` message.
/// - serializer: A serializer for `Appleintelligence_TranscribeRequest` messages.
/// - deserializer: A deserializer for `Appleintelligence_TranscribeResponse` messages.
/// - options: Options to apply to this RPC.
/// - handleResponse: A closure which handles the response, the result of which is
/// returned to the caller. Returning from the closure will cancel the RPC if it
/// hasn't already finished.
/// - Returns: The result of `handleResponse`.
public func transcribe<Result>(
request: GRPCCore.ClientRequest<Appleintelligence_TranscribeRequest>,
serializer: some GRPCCore.MessageSerializer<Appleintelligence_TranscribeRequest>,
deserializer: some GRPCCore.MessageDeserializer<Appleintelligence_TranscribeResponse>,
options: GRPCCore.CallOptions = .defaults,
onResponse handleResponse: @Sendable @escaping (GRPCCore.ClientResponse<Appleintelligence_TranscribeResponse>) async throws -> Result = { response in
try response.message
}
) async throws -> Result where Result: Sendable {
try await self.client.unary(
request: request,
descriptor: Appleintelligence_AppleIntelligenceService.Method.Transcribe.descriptor,
serializer: serializer,
deserializer: deserializer,
options: options,
onResponse: handleResponse
)
}
/// Call the "StreamTranscribe" method.
///
/// - Parameters:
/// - request: A streaming request producing `Appleintelligence_StreamingTranscribeRequest` messages.
/// - serializer: A serializer for `Appleintelligence_StreamingTranscribeRequest` messages.
/// - deserializer: A deserializer for `Appleintelligence_StreamingTranscribeResponse` messages.
/// - options: Options to apply to this RPC.
/// - handleResponse: A closure which handles the response, the result of which is
/// returned to the caller. Returning from the closure will cancel the RPC if it
/// hasn't already finished.
/// - Returns: The result of `handleResponse`.
public func streamTranscribe<Result>(
request: GRPCCore.StreamingClientRequest<Appleintelligence_StreamingTranscribeRequest>,
serializer: some GRPCCore.MessageSerializer<Appleintelligence_StreamingTranscribeRequest>,
deserializer: some GRPCCore.MessageDeserializer<Appleintelligence_StreamingTranscribeResponse>,
options: GRPCCore.CallOptions = .defaults,
onResponse handleResponse: @Sendable @escaping (GRPCCore.StreamingClientResponse<Appleintelligence_StreamingTranscribeResponse>) async throws -> Result
) async throws -> Result where Result: Sendable {
try await self.client.bidirectionalStreaming(
request: request,
descriptor: Appleintelligence_AppleIntelligenceService.Method.StreamTranscribe.descriptor,
serializer: serializer,
deserializer: deserializer,
options: options,
onResponse: handleResponse
)
}
}
}
@ -695,6 +1283,112 @@ extension Appleintelligence_AppleIntelligenceService.ClientProtocol {
onResponse: handleResponse
)
}
/// Call the "TextToSpeech" method.
///
/// > Source IDL Documentation:
/// >
/// > Text-to-Speech
///
/// - Parameters:
/// - request: A request containing a single `Appleintelligence_TextToSpeechRequest` message.
/// - options: Options to apply to this RPC.
/// - handleResponse: A closure which handles the response, the result of which is
/// returned to the caller. Returning from the closure will cancel the RPC if it
/// hasn't already finished.
/// - Returns: The result of `handleResponse`.
public func textToSpeech<Result>(
request: GRPCCore.ClientRequest<Appleintelligence_TextToSpeechRequest>,
options: GRPCCore.CallOptions = .defaults,
onResponse handleResponse: @Sendable @escaping (GRPCCore.ClientResponse<Appleintelligence_TextToSpeechResponse>) async throws -> Result = { response in
try response.message
}
) async throws -> Result where Result: Sendable {
try await self.textToSpeech(
request: request,
serializer: GRPCProtobuf.ProtobufSerializer<Appleintelligence_TextToSpeechRequest>(),
deserializer: GRPCProtobuf.ProtobufDeserializer<Appleintelligence_TextToSpeechResponse>(),
options: options,
onResponse: handleResponse
)
}
/// Call the "ListVoices" method.
///
/// - Parameters:
/// - request: A request containing a single `Appleintelligence_ListVoicesRequest` message.
/// - options: Options to apply to this RPC.
/// - handleResponse: A closure which handles the response, the result of which is
/// returned to the caller. Returning from the closure will cancel the RPC if it
/// hasn't already finished.
/// - Returns: The result of `handleResponse`.
public func listVoices<Result>(
request: GRPCCore.ClientRequest<Appleintelligence_ListVoicesRequest>,
options: GRPCCore.CallOptions = .defaults,
onResponse handleResponse: @Sendable @escaping (GRPCCore.ClientResponse<Appleintelligence_ListVoicesResponse>) async throws -> Result = { response in
try response.message
}
) async throws -> Result where Result: Sendable {
try await self.listVoices(
request: request,
serializer: GRPCProtobuf.ProtobufSerializer<Appleintelligence_ListVoicesRequest>(),
deserializer: GRPCProtobuf.ProtobufDeserializer<Appleintelligence_ListVoicesResponse>(),
options: options,
onResponse: handleResponse
)
}
/// Call the "Transcribe" method.
///
/// > Source IDL Documentation:
/// >
/// > Speech-to-Text
///
/// - Parameters:
/// - request: A request containing a single `Appleintelligence_TranscribeRequest` message.
/// - options: Options to apply to this RPC.
/// - handleResponse: A closure which handles the response, the result of which is
/// returned to the caller. Returning from the closure will cancel the RPC if it
/// hasn't already finished.
/// - Returns: The result of `handleResponse`.
public func transcribe<Result>(
request: GRPCCore.ClientRequest<Appleintelligence_TranscribeRequest>,
options: GRPCCore.CallOptions = .defaults,
onResponse handleResponse: @Sendable @escaping (GRPCCore.ClientResponse<Appleintelligence_TranscribeResponse>) async throws -> Result = { response in
try response.message
}
) async throws -> Result where Result: Sendable {
try await self.transcribe(
request: request,
serializer: GRPCProtobuf.ProtobufSerializer<Appleintelligence_TranscribeRequest>(),
deserializer: GRPCProtobuf.ProtobufDeserializer<Appleintelligence_TranscribeResponse>(),
options: options,
onResponse: handleResponse
)
}
/// Call the "StreamTranscribe" method.
///
/// - Parameters:
/// - request: A streaming request producing `Appleintelligence_StreamingTranscribeRequest` messages.
/// - options: Options to apply to this RPC.
/// - handleResponse: A closure which handles the response, the result of which is
/// returned to the caller. Returning from the closure will cancel the RPC if it
/// hasn't already finished.
/// - Returns: The result of `handleResponse`.
public func streamTranscribe<Result>(
request: GRPCCore.StreamingClientRequest<Appleintelligence_StreamingTranscribeRequest>,
options: GRPCCore.CallOptions = .defaults,
onResponse handleResponse: @Sendable @escaping (GRPCCore.StreamingClientResponse<Appleintelligence_StreamingTranscribeResponse>) async throws -> Result
) async throws -> Result where Result: Sendable {
try await self.streamTranscribe(
request: request,
serializer: GRPCProtobuf.ProtobufSerializer<Appleintelligence_StreamingTranscribeRequest>(),
deserializer: GRPCProtobuf.ProtobufDeserializer<Appleintelligence_StreamingTranscribeResponse>(),
options: options,
onResponse: handleResponse
)
}
}
// Helpers providing sugared APIs for 'ClientProtocol' methods.
@ -796,4 +1490,127 @@ extension Appleintelligence_AppleIntelligenceService.ClientProtocol {
onResponse: handleResponse
)
}
/// Call the "TextToSpeech" method.
///
/// > Source IDL Documentation:
/// >
/// > Text-to-Speech
///
/// - Parameters:
/// - message: request message to send.
/// - metadata: Additional metadata to send, defaults to empty.
/// - options: Options to apply to this RPC, defaults to `.defaults`.
/// - handleResponse: A closure which handles the response, the result of which is
/// returned to the caller. Returning from the closure will cancel the RPC if it
/// hasn't already finished.
/// - Returns: The result of `handleResponse`.
public func textToSpeech<Result>(
_ message: Appleintelligence_TextToSpeechRequest,
metadata: GRPCCore.Metadata = [:],
options: GRPCCore.CallOptions = .defaults,
onResponse handleResponse: @Sendable @escaping (GRPCCore.ClientResponse<Appleintelligence_TextToSpeechResponse>) async throws -> Result = { response in
try response.message
}
) async throws -> Result where Result: Sendable {
let request = GRPCCore.ClientRequest<Appleintelligence_TextToSpeechRequest>(
message: message,
metadata: metadata
)
return try await self.textToSpeech(
request: request,
options: options,
onResponse: handleResponse
)
}
/// Call the "ListVoices" method.
///
/// - Parameters:
/// - message: request message to send.
/// - metadata: Additional metadata to send, defaults to empty.
/// - options: Options to apply to this RPC, defaults to `.defaults`.
/// - handleResponse: A closure which handles the response, the result of which is
/// returned to the caller. Returning from the closure will cancel the RPC if it
/// hasn't already finished.
/// - Returns: The result of `handleResponse`.
public func listVoices<Result>(
_ message: Appleintelligence_ListVoicesRequest,
metadata: GRPCCore.Metadata = [:],
options: GRPCCore.CallOptions = .defaults,
onResponse handleResponse: @Sendable @escaping (GRPCCore.ClientResponse<Appleintelligence_ListVoicesResponse>) async throws -> Result = { response in
try response.message
}
) async throws -> Result where Result: Sendable {
let request = GRPCCore.ClientRequest<Appleintelligence_ListVoicesRequest>(
message: message,
metadata: metadata
)
return try await self.listVoices(
request: request,
options: options,
onResponse: handleResponse
)
}
/// Call the "Transcribe" method.
///
/// > Source IDL Documentation:
/// >
/// > Speech-to-Text
///
/// - Parameters:
/// - message: request message to send.
/// - metadata: Additional metadata to send, defaults to empty.
/// - options: Options to apply to this RPC, defaults to `.defaults`.
/// - handleResponse: A closure which handles the response, the result of which is
/// returned to the caller. Returning from the closure will cancel the RPC if it
/// hasn't already finished.
/// - Returns: The result of `handleResponse`.
public func transcribe<Result>(
_ message: Appleintelligence_TranscribeRequest,
metadata: GRPCCore.Metadata = [:],
options: GRPCCore.CallOptions = .defaults,
onResponse handleResponse: @Sendable @escaping (GRPCCore.ClientResponse<Appleintelligence_TranscribeResponse>) async throws -> Result = { response in
try response.message
}
) async throws -> Result where Result: Sendable {
let request = GRPCCore.ClientRequest<Appleintelligence_TranscribeRequest>(
message: message,
metadata: metadata
)
return try await self.transcribe(
request: request,
options: options,
onResponse: handleResponse
)
}
/// Call the "StreamTranscribe" method.
///
/// - Parameters:
/// - metadata: Additional metadata to send, defaults to empty.
/// - options: Options to apply to this RPC, defaults to `.defaults`.
/// - producer: A closure producing request messages to send to the server. The request
/// stream is closed when the closure returns.
/// - handleResponse: A closure which handles the response, the result of which is
/// returned to the caller. Returning from the closure will cancel the RPC if it
/// hasn't already finished.
/// - Returns: The result of `handleResponse`.
public func streamTranscribe<Result>(
metadata: GRPCCore.Metadata = [:],
options: GRPCCore.CallOptions = .defaults,
requestProducer producer: @Sendable @escaping (GRPCCore.RPCWriter<Appleintelligence_StreamingTranscribeRequest>) async throws -> Void,
onResponse handleResponse: @Sendable @escaping (GRPCCore.StreamingClientResponse<Appleintelligence_StreamingTranscribeResponse>) async throws -> Result
) async throws -> Result where Result: Sendable {
let request = GRPCCore.StreamingClientRequest<Appleintelligence_StreamingTranscribeRequest>(
metadata: metadata,
producer: producer
)
return try await self.streamTranscribe(
request: request,
options: options,
onResponse: handleResponse
)
}
}

View File

@ -8,11 +8,24 @@ public struct AppleIntelligenceProvider: Appleintelligence_AppleIntelligenceServ
/// The underlying AI service
private let service: AppleIntelligenceService
/// Text-to-Speech service
private let ttsService: TextToSpeechService?
/// Speech-to-Text service
private let sttService: SpeechToTextService?
/// Optional API key for authentication
private let apiKey: String?
public init(service: AppleIntelligenceService, apiKey: String? = nil) {
public init(
service: AppleIntelligenceService,
ttsService: TextToSpeechService? = nil,
sttService: SpeechToTextService? = nil,
apiKey: String? = nil
) {
self.service = service
self.ttsService = ttsService
self.sttService = sttService
self.apiKey = apiKey
}
@ -139,6 +152,213 @@ public struct AppleIntelligenceProvider: Appleintelligence_AppleIntelligenceServ
return ServerResponse(message: response)
}
// MARK: - Text-to-Speech
public func textToSpeech(
request: GRPCCore.ServerRequest<Appleintelligence_TextToSpeechRequest>,
context: GRPCCore.ServerContext
) async throws -> GRPCCore.ServerResponse<Appleintelligence_TextToSpeechResponse> {
try validateApiKey(metadata: request.metadata)
guard let ttsService = ttsService else {
throw RPCError(code: .unavailable, message: "Text-to-Speech service not available")
}
let message = request.message
// Convert proto config to service config
var config = SpeechConfig.default
if message.hasVoiceConfig {
let voiceConfig = message.voiceConfig
config = SpeechConfig(
voiceIdentifier: voiceConfig.voiceIdentifier.isEmpty ? nil : voiceConfig.voiceIdentifier,
speakingRate: voiceConfig.hasSpeakingRate ? voiceConfig.speakingRate : 0.5,
pitchMultiplier: voiceConfig.hasPitchMultiplier ? voiceConfig.pitchMultiplier : 1.0,
volume: voiceConfig.hasVolume ? voiceConfig.volume : 1.0
)
}
// Convert proto format to service format
let outputFormat: AudioOutputFormat
switch message.outputFormat {
case .wav, .unspecified:
outputFormat = .wav
case .mp3:
outputFormat = .mp3
case .UNRECOGNIZED:
outputFormat = .wav
}
do {
let result = try await ttsService.synthesize(
text: message.text,
config: config,
outputFormat: outputFormat
)
var response = Appleintelligence_TextToSpeechResponse()
response.audioData = result.audioData
response.format = outputFormat == .wav ? .wav : .mp3
response.sampleRate = Int32(result.sampleRate)
response.channels = Int32(result.channels)
response.durationSeconds = result.durationSeconds
return ServerResponse(message: response)
} catch let error as TextToSpeechError {
throw RPCError(code: .internalError, message: error.description)
}
}
public func listVoices(
request: GRPCCore.ServerRequest<Appleintelligence_ListVoicesRequest>,
context: GRPCCore.ServerContext
) async throws -> GRPCCore.ServerResponse<Appleintelligence_ListVoicesResponse> {
try validateApiKey(metadata: request.metadata)
guard let ttsService = ttsService else {
throw RPCError(code: .unavailable, message: "Text-to-Speech service not available")
}
let message = request.message
let languageCode = message.hasLanguageCode ? message.languageCode : nil
let voices = await ttsService.listVoices(languageCode: languageCode)
var response = Appleintelligence_ListVoicesResponse()
response.voices = voices.map { voice in
var protoVoice = Appleintelligence_VoiceInfo()
protoVoice.identifier = voice.identifier
protoVoice.name = voice.name
protoVoice.language = voice.language
protoVoice.isPremium = voice.isPremium
protoVoice.gender = voice.gender
return protoVoice
}
return ServerResponse(message: response)
}
// MARK: - Speech-to-Text
public func transcribe(
request: GRPCCore.ServerRequest<Appleintelligence_TranscribeRequest>,
context: GRPCCore.ServerContext
) async throws -> GRPCCore.ServerResponse<Appleintelligence_TranscribeResponse> {
try validateApiKey(metadata: request.metadata)
guard let sttService = sttService else {
throw RPCError(code: .unavailable, message: "Speech-to-Text service not available")
}
let message = request.message
guard message.hasAudio else {
throw RPCError(code: .invalidArgument, message: "Audio data is required")
}
// Convert proto config to service config
var config = TranscriptionConfig.default
if message.hasConfig {
let protoConfig = message.config
config = TranscriptionConfig(
languageCode: protoConfig.hasLanguageCode ? protoConfig.languageCode : nil,
enablePunctuation: protoConfig.hasEnablePunctuation ? protoConfig.enablePunctuation : true,
enableTimestamps: protoConfig.hasEnableTimestamps ? protoConfig.enableTimestamps : false
)
}
do {
let result = try await sttService.transcribe(
audioData: message.audio.data,
mimeType: message.audio.mimeType,
config: config
)
var response = Appleintelligence_TranscribeResponse()
response.text = result.text
response.detectedLanguage = result.detectedLanguage
response.confidence = result.confidence
response.segments = result.segments.map { segment in
var protoSegment = Appleintelligence_TranscriptionSegment()
protoSegment.text = segment.text
protoSegment.startTime = segment.startTime
protoSegment.endTime = segment.endTime
protoSegment.confidence = segment.confidence
return protoSegment
}
return ServerResponse(message: response)
} catch let error as SpeechToTextError {
throw RPCError(code: .internalError, message: error.description)
}
}
public func streamTranscribe(
request: GRPCCore.StreamingServerRequest<Appleintelligence_StreamingTranscribeRequest>,
context: GRPCCore.ServerContext
) async throws -> GRPCCore.StreamingServerResponse<Appleintelligence_StreamingTranscribeResponse> {
try validateApiKey(metadata: request.metadata)
guard let sttService = sttService else {
throw RPCError(code: .unavailable, message: "Speech-to-Text service not available")
}
return StreamingServerResponse { writer in
var config = TranscriptionConfig.default
// Process incoming stream
for try await message in request.messages {
switch message.request {
case .config(let protoConfig):
// First message should be config
config = TranscriptionConfig(
languageCode: protoConfig.hasLanguageCode ? protoConfig.languageCode : nil,
enablePunctuation: protoConfig.hasEnablePunctuation ? protoConfig.enablePunctuation : true,
enableTimestamps: protoConfig.hasEnableTimestamps ? protoConfig.enableTimestamps : false
)
// Start streaming transcription
let stream = await sttService.streamTranscribe(config: config)
Task {
do {
for try await update in stream {
var response = Appleintelligence_StreamingTranscribeResponse()
response.partialText = update.partialText
response.isFinal = update.isFinal
if let finalText = update.finalText {
response.finalText = finalText
}
response.segments = update.segments.map { segment in
var protoSegment = Appleintelligence_TranscriptionSegment()
protoSegment.text = segment.text
protoSegment.startTime = segment.startTime
protoSegment.endTime = segment.endTime
protoSegment.confidence = segment.confidence
return protoSegment
}
try await writer.write(response)
}
} catch {
// Stream ended or error occurred
}
}
case .audioChunk(let chunk):
// Feed audio chunk to service
try await sttService.feedAudioChunk(chunk)
case .none:
break
}
}
// End streaming session
await sttService.endStreamingSession()
return [:]
}
}
// MARK: - Private Helpers
/// Validate API key if configured

View File

@ -0,0 +1,337 @@
import Foundation
import Speech
import AVFoundation
// MARK: - Result Types
/// Transcription result
public struct TranscriptionResult: Sendable {
public let text: String
public let segments: [TranscriptionSegmentResult]
public let detectedLanguage: String
public let confidence: Float
}
/// Individual transcription segment
public struct TranscriptionSegmentResult: Sendable {
public let text: String
public let startTime: Float
public let endTime: Float
public let confidence: Float
}
/// Streaming transcription update
public struct StreamingTranscriptionUpdate: Sendable {
public let partialText: String
public let isFinal: Bool
public let finalText: String?
public let segments: [TranscriptionSegmentResult]
}
/// Transcription configuration
public struct TranscriptionConfig: Sendable {
public var languageCode: String?
public var enablePunctuation: Bool
public var enableTimestamps: Bool
public static let `default` = TranscriptionConfig(
languageCode: nil,
enablePunctuation: true,
enableTimestamps: false
)
public init(
languageCode: String? = nil,
enablePunctuation: Bool = true,
enableTimestamps: Bool = false
) {
self.languageCode = languageCode
self.enablePunctuation = enablePunctuation
self.enableTimestamps = enableTimestamps
}
}
// MARK: - Errors
public enum SpeechToTextError: Error, CustomStringConvertible, Sendable {
case notAvailable
case authorizationDenied
case modelNotReady(String)
case transcriptionFailed(String)
case invalidAudioFormat
case audioProcessingFailed(String)
case unsupportedMimeType(String)
public var description: String {
switch self {
case .notAvailable: return "Speech recognition not available on this system"
case .authorizationDenied: return "Speech recognition authorization denied"
case .modelNotReady(let reason): return "Speech model not ready: \(reason)"
case .transcriptionFailed(let reason): return "Transcription failed: \(reason)"
case .invalidAudioFormat: return "Invalid audio format"
case .audioProcessingFailed(let reason): return "Audio processing failed: \(reason)"
case .unsupportedMimeType(let type): return "Unsupported audio MIME type: \(type)"
}
}
}
// MARK: - Service Actor
public actor SpeechToTextService {
/// Service availability status
public private(set) var isAvailable: Bool = false
/// Streaming session state
private var isStreamingActive: Bool = false
public init() async {
await checkAvailability()
}
// MARK: - Public API
/// Transcribe audio data (file-based)
public func transcribe(
audioData: Data,
mimeType: String,
config: TranscriptionConfig = .default
) async throws -> TranscriptionResult {
guard isAvailable else {
throw SpeechToTextError.notAvailable
}
// Convert audio data to file URL for processing
let tempURL = try createTempAudioFile(data: audioData, mimeType: mimeType)
defer { try? FileManager.default.removeItem(at: tempURL) }
return try await transcribeWithSFSpeechRecognizer(url: tempURL, config: config)
}
/// Stream transcription from audio chunks
public func streamTranscribe(
config: TranscriptionConfig = .default
) -> AsyncThrowingStream<StreamingTranscriptionUpdate, Error> {
AsyncThrowingStream { continuation in
Task {
guard self.isAvailable else {
continuation.finish(throwing: SpeechToTextError.notAvailable)
return
}
do {
try await self.startStreamingWithSFSpeechRecognizer(config: config, continuation: continuation)
} catch {
continuation.finish(throwing: error)
}
}
}
}
/// Feed audio chunk for streaming transcription
public func feedAudioChunk(_ chunk: Data) async throws {
guard isStreamingActive else {
throw SpeechToTextError.transcriptionFailed("No active streaming session")
}
// Audio chunk handling implemented in streaming methods
}
/// End streaming session
public func endStreamingSession() async {
isStreamingActive = false
}
/// Get status information
public func getStatus() -> String {
if isAvailable {
return "SFSpeechRecognizer available"
} else {
return "Speech recognition not available"
}
}
// MARK: - Private Implementation
private func checkAvailability() async {
// Check SFSpeechRecognizer availability
let status = SFSpeechRecognizer.authorizationStatus()
switch status {
case .authorized:
isAvailable = SFSpeechRecognizer.supportedLocales().count > 0
case .notDetermined:
// Request authorization
isAvailable = await withCheckedContinuation { continuation in
SFSpeechRecognizer.requestAuthorization { newStatus in
continuation.resume(returning: newStatus == .authorized)
}
}
default:
isAvailable = false
}
}
/// Create temporary audio file from data
private func createTempAudioFile(data: Data, mimeType: String) throws -> URL {
let ext = extensionForMimeType(mimeType)
let tempDir = FileManager.default.temporaryDirectory
let fileName = UUID().uuidString + "." + ext
let fileURL = tempDir.appendingPathComponent(fileName)
try data.write(to: fileURL)
return fileURL
}
/// Get file extension for MIME type
private func extensionForMimeType(_ mimeType: String) -> String {
switch mimeType.lowercased() {
case "audio/wav", "audio/wave", "audio/x-wav":
return "wav"
case "audio/mp3", "audio/mpeg":
return "mp3"
case "audio/m4a", "audio/mp4", "audio/x-m4a":
return "m4a"
case "audio/aac":
return "aac"
case "audio/flac":
return "flac"
default:
return "wav"
}
}
/// Transcribe using SFSpeechRecognizer
private func transcribeWithSFSpeechRecognizer(
url: URL,
config: TranscriptionConfig
) async throws -> TranscriptionResult {
let locale = Locale(identifier: config.languageCode ?? "en-US")
guard let recognizer = SFSpeechRecognizer(locale: locale) else {
throw SpeechToTextError.notAvailable
}
guard recognizer.isAvailable else {
throw SpeechToTextError.notAvailable
}
let request = SFSpeechURLRecognitionRequest(url: url)
request.shouldReportPartialResults = false
return try await withCheckedThrowingContinuation { continuation in
var hasResumed = false
recognizer.recognitionTask(with: request) { result, error in
guard !hasResumed else { return }
if let error = error {
hasResumed = true
continuation.resume(throwing: SpeechToTextError.transcriptionFailed(error.localizedDescription))
return
}
guard let result = result, result.isFinal else { return }
hasResumed = true
let transcription = result.bestTranscription
var segments: [TranscriptionSegmentResult] = []
if config.enableTimestamps {
for segment in transcription.segments {
segments.append(TranscriptionSegmentResult(
text: segment.substring,
startTime: Float(segment.timestamp),
endTime: Float(segment.timestamp + segment.duration),
confidence: segment.confidence
))
}
}
let transcriptionResult = TranscriptionResult(
text: transcription.formattedString,
segments: segments,
detectedLanguage: config.languageCode ?? "en-US",
confidence: segments.isEmpty ? 1.0 : segments.reduce(0) { $0 + $1.confidence } / Float(segments.count)
)
continuation.resume(returning: transcriptionResult)
}
}
}
/// Start streaming with SFSpeechRecognizer
private func startStreamingWithSFSpeechRecognizer(
config: TranscriptionConfig,
continuation: AsyncThrowingStream<StreamingTranscriptionUpdate, Error>.Continuation
) async throws {
let locale = Locale(identifier: config.languageCode ?? "en-US")
guard let recognizer = SFSpeechRecognizer(locale: locale) else {
throw SpeechToTextError.notAvailable
}
guard recognizer.isAvailable else {
throw SpeechToTextError.notAvailable
}
isStreamingActive = true
let audioEngine = AVAudioEngine()
let request = SFSpeechAudioBufferRecognitionRequest()
request.shouldReportPartialResults = true
let inputNode = audioEngine.inputNode
let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { buffer, _ in
request.append(buffer)
}
audioEngine.prepare()
try audioEngine.start()
recognizer.recognitionTask(with: request) { result, error in
if let error = error {
continuation.finish(throwing: SpeechToTextError.transcriptionFailed(error.localizedDescription))
return
}
guard let result = result else { return }
let transcription = result.bestTranscription
var segments: [TranscriptionSegmentResult] = []
if config.enableTimestamps {
for segment in transcription.segments {
segments.append(TranscriptionSegmentResult(
text: segment.substring,
startTime: Float(segment.timestamp),
endTime: Float(segment.timestamp + segment.duration),
confidence: segment.confidence
))
}
}
let update = StreamingTranscriptionUpdate(
partialText: transcription.formattedString,
isFinal: result.isFinal,
finalText: result.isFinal ? transcription.formattedString : nil,
segments: segments
)
continuation.yield(update)
if result.isFinal {
audioEngine.stop()
inputNode.removeTap(onBus: 0)
continuation.finish()
}
}
// Wait for streaming to end
while isStreamingActive {
try await Task.sleep(for: .milliseconds(100))
}
audioEngine.stop()
inputNode.removeTap(onBus: 0)
request.endAudio()
}
}

View File

@ -0,0 +1,280 @@
import Foundation
import AVFoundation
// MARK: - Result Types
/// Result of text-to-speech synthesis
public struct TextToSpeechResult: Sendable {
public let audioData: Data
public let format: AudioOutputFormat
public let sampleRate: Int
public let channels: Int
public let durationSeconds: Float
}
/// Supported output formats
public enum AudioOutputFormat: Sendable {
case wav
case mp3
}
/// Voice information
public struct VoiceDescription: Sendable {
public let identifier: String
public let name: String
public let language: String
public let isPremium: Bool
public let gender: String
}
/// Configuration for speech synthesis
public struct SpeechConfig: Sendable {
public var voiceIdentifier: String?
public var speakingRate: Float // 0.0 - 1.0
public var pitchMultiplier: Float // 0.5 - 2.0
public var volume: Float // 0.0 - 1.0
public static let `default` = SpeechConfig(
voiceIdentifier: nil,
speakingRate: 0.5,
pitchMultiplier: 1.0,
volume: 1.0
)
public init(
voiceIdentifier: String? = nil,
speakingRate: Float = 0.5,
pitchMultiplier: Float = 1.0,
volume: Float = 1.0
) {
self.voiceIdentifier = voiceIdentifier
self.speakingRate = speakingRate
self.pitchMultiplier = pitchMultiplier
self.volume = volume
}
}
// MARK: - Errors
public enum TextToSpeechError: Error, CustomStringConvertible, Sendable {
case invalidVoice(String)
case synthesisFailure(String)
case encodingFailure(String)
case noAudioGenerated
case unsupportedFormat
public var description: String {
switch self {
case .invalidVoice(let id): return "Invalid voice identifier: \(id)"
case .synthesisFailure(let reason): return "Speech synthesis failed: \(reason)"
case .encodingFailure(let reason): return "Audio encoding failed: \(reason)"
case .noAudioGenerated: return "No audio was generated"
case .unsupportedFormat: return "Unsupported audio format"
}
}
}
// MARK: - Service Actor
public actor TextToSpeechService {
/// Keep strong reference to synthesizer during synthesis
private var activeSynthesizer: AVSpeechSynthesizer?
public init() {}
// MARK: - Public API
/// Synthesize text to speech
public func synthesize(
text: String,
config: SpeechConfig = .default,
outputFormat: AudioOutputFormat = .wav
) async throws -> TextToSpeechResult {
// Create utterance
let utterance = AVSpeechUtterance(string: text)
// Configure voice
if let voiceId = config.voiceIdentifier {
if let voice = AVSpeechSynthesisVoice(identifier: voiceId) {
utterance.voice = voice
} else {
throw TextToSpeechError.invalidVoice(voiceId)
}
} else {
// Use default English voice
utterance.voice = AVSpeechSynthesisVoice(language: "en-US")
}
// Configure speech parameters
utterance.rate = config.speakingRate
utterance.pitchMultiplier = config.pitchMultiplier
utterance.volume = config.volume
// Collect PCM data
let pcmData = try await collectPCMData(utterance: utterance)
// Convert to requested format
let audioData: Data
switch outputFormat {
case .wav:
audioData = createWAVData(from: pcmData)
case .mp3:
// Use WAV as fallback (MP3 encoding requires external library)
audioData = createWAVData(from: pcmData)
}
// Calculate duration
let bytesPerSample = 2 // Int16
let totalSamples = pcmData.samples.count / bytesPerSample / pcmData.channelCount
let duration = Float(totalSamples) / Float(pcmData.sampleRate)
return TextToSpeechResult(
audioData: audioData,
format: outputFormat,
sampleRate: Int(pcmData.sampleRate),
channels: pcmData.channelCount,
durationSeconds: duration
)
}
/// List available voices
public func listVoices(languageCode: String? = nil) -> [VoiceDescription] {
let voices = AVSpeechSynthesisVoice.speechVoices()
let filtered: [AVSpeechSynthesisVoice]
if let lang = languageCode {
filtered = voices.filter { $0.language.hasPrefix(lang) }
} else {
filtered = voices
}
return filtered.map { voice in
VoiceDescription(
identifier: voice.identifier,
name: voice.name,
language: voice.language,
isPremium: voice.quality == .enhanced || voice.quality == .premium,
gender: genderString(for: voice)
)
}
}
// MARK: - Private Implementation
/// PCM buffer data for internal processing
private struct PCMBufferData: Sendable {
let samples: Data
let sampleRate: Double
let channelCount: Int
}
/// Collect PCM data from synthesizer using write callback
private func collectPCMData(
utterance: AVSpeechUtterance
) async throws -> PCMBufferData {
// Create and store synthesizer to keep strong reference during synthesis
let synthesizer = AVSpeechSynthesizer()
self.activeSynthesizer = synthesizer
defer { self.activeSynthesizer = nil }
return try await withCheckedThrowingContinuation { continuation in
var pcmData = Data()
var sampleRate: Double = 0
var channelCount: Int = 0
var hasResumed = false
synthesizer.write(utterance) { buffer in
guard let pcmBuffer = buffer as? AVAudioPCMBuffer else {
// End of audio - empty buffer signals completion
if !hasResumed {
hasResumed = true
if pcmData.isEmpty {
continuation.resume(throwing: TextToSpeechError.noAudioGenerated)
} else {
continuation.resume(returning: PCMBufferData(
samples: pcmData,
sampleRate: sampleRate,
channelCount: channelCount
))
}
}
return
}
if pcmBuffer.frameLength > 0 {
// Store format from first buffer
if sampleRate == 0 {
sampleRate = pcmBuffer.format.sampleRate
channelCount = Int(pcmBuffer.format.channelCount)
}
// Convert float samples to Int16 PCM
if let channelData = pcmBuffer.floatChannelData {
let frameCount = Int(pcmBuffer.frameLength)
for frame in 0..<frameCount {
for channel in 0..<channelCount {
let sample = channelData[channel][frame]
let clampedSample = max(-1.0, min(1.0, sample))
let int16Sample = Int16(clampedSample * Float(Int16.max))
withUnsafeBytes(of: int16Sample.littleEndian) { bytes in
pcmData.append(contentsOf: bytes)
}
}
}
}
}
}
}
}
/// Create WAV data from PCM buffer data
private func createWAVData(from pcmData: PCMBufferData) -> Data {
let bitsPerSample = 16
let sampleRate = Int(pcmData.sampleRate)
let channels = pcmData.channelCount
let dataSize = pcmData.samples.count
var header = Data()
// RIFF header
header.append(contentsOf: "RIFF".utf8)
let fileSize = UInt32(dataSize + 36)
withUnsafeBytes(of: fileSize.littleEndian) { header.append(contentsOf: $0) }
header.append(contentsOf: "WAVE".utf8)
// fmt subchunk
header.append(contentsOf: "fmt ".utf8)
let subchunk1Size = UInt32(16)
withUnsafeBytes(of: subchunk1Size.littleEndian) { header.append(contentsOf: $0) }
let audioFormat = UInt16(1) // PCM
withUnsafeBytes(of: audioFormat.littleEndian) { header.append(contentsOf: $0) }
let numChannels = UInt16(channels)
withUnsafeBytes(of: numChannels.littleEndian) { header.append(contentsOf: $0) }
let sampleRateU32 = UInt32(sampleRate)
withUnsafeBytes(of: sampleRateU32.littleEndian) { header.append(contentsOf: $0) }
let byteRate = UInt32(sampleRate * channels * bitsPerSample / 8)
withUnsafeBytes(of: byteRate.littleEndian) { header.append(contentsOf: $0) }
let blockAlign = UInt16(channels * bitsPerSample / 8)
withUnsafeBytes(of: blockAlign.littleEndian) { header.append(contentsOf: $0) }
let bitsPerSampleU16 = UInt16(bitsPerSample)
withUnsafeBytes(of: bitsPerSampleU16.littleEndian) { header.append(contentsOf: $0) }
// data subchunk
header.append(contentsOf: "data".utf8)
let dataU32 = UInt32(dataSize)
withUnsafeBytes(of: dataU32.littleEndian) { header.append(contentsOf: $0) }
return header + pcmData.samples
}
/// Get gender string for voice
private func genderString(for voice: AVSpeechSynthesisVoice) -> String {
switch voice.gender {
case .male: return "male"
case .female: return "female"
case .unspecified: return "unspecified"
@unknown default: return "unknown"
}
}
}

View File

@ -37,7 +37,21 @@ struct AppleIntelligenceServer: AsyncParsableCommand {
throw ExitCode.failure
}
let provider = AppleIntelligenceProvider(service: service, apiKey: config.apiKey)
// Initialize speech services
print("Initializing Text-to-Speech service...")
let ttsService = TextToSpeechService()
print("Initializing Speech-to-Text service...")
let sttService = await SpeechToTextService()
let sttStatus = await sttService.getStatus()
print("Speech-to-Text status: \(sttStatus)")
let provider = AppleIntelligenceProvider(
service: service,
ttsService: ttsService,
sttService: sttService,
apiKey: config.apiKey
)
let transport = HTTP2ServerTransport.Posix(
address: .ipv4(host: bindHost, port: bindPort),
@ -52,7 +66,15 @@ struct AppleIntelligenceServer: AsyncParsableCommand {
print("API key authentication is enabled")
}
print("Server is ready to accept connections")
print("Health check: grpcurl -plaintext \(bindHost):\(bindPort) appleintelligence.AppleIntelligence/Health")
print("")
print("Available services:")
print(" - Complete/StreamComplete: Text generation with Apple Intelligence")
print(" - TextToSpeech: Convert text to spoken audio")
print(" - ListVoices: List available TTS voices")
print(" - Transcribe: Convert audio file to text")
print(" - StreamTranscribe: Real-time speech-to-text")
print("")
print("Health check: grpcurl -plaintext \(bindHost):\(bindPort) appleintelligence.AppleIntelligenceService/Health")
print("Press Ctrl+C to stop the server")
try await server.serve()