Add Text-to-Speech and Speech-to-Text features

- Add TTS service using AVSpeechSynthesizer for voice output
- Add STT service using SpeechAnalyzer (macOS 26) for transcription
- Add voice input (microphone) button in chat with recording level indicator
- Add speak button on assistant messages for TTS playback
- Add language toggle (EN-CA/FR-CA) for bilingual speech recognition
- Fix Swift 6 strict concurrency issues in audio callbacks
- Update proto schema with TTS/STT message types and RPCs
- Update gRPC provider with speech service endpoints

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Mathias Beaulieu-Duncan
2025-12-31 02:57:30 -05:00
parent 638656e7ca
commit b754945923
10 changed files with 3151 additions and 8 deletions
+24 -2
View File
@@ -37,7 +37,21 @@ struct AppleIntelligenceServer: AsyncParsableCommand {
throw ExitCode.failure
}
let provider = AppleIntelligenceProvider(service: service, apiKey: config.apiKey)
// Initialize speech services
print("Initializing Text-to-Speech service...")
let ttsService = TextToSpeechService()
print("Initializing Speech-to-Text service...")
let sttService = await SpeechToTextService()
let sttStatus = await sttService.getStatus()
print("Speech-to-Text status: \(sttStatus)")
let provider = AppleIntelligenceProvider(
service: service,
ttsService: ttsService,
sttService: sttService,
apiKey: config.apiKey
)
let transport = HTTP2ServerTransport.Posix(
address: .ipv4(host: bindHost, port: bindPort),
@@ -52,7 +66,15 @@ struct AppleIntelligenceServer: AsyncParsableCommand {
print("API key authentication is enabled")
}
print("Server is ready to accept connections")
print("Health check: grpcurl -plaintext \(bindHost):\(bindPort) appleintelligence.AppleIntelligence/Health")
print("")
print("Available services:")
print(" - Complete/StreamComplete: Text generation with Apple Intelligence")
print(" - TextToSpeech: Convert text to spoken audio")
print(" - ListVoices: List available TTS voices")
print(" - Transcribe: Convert audio file to text")
print(" - StreamTranscribe: Real-time speech-to-text")
print("")
print("Health check: grpcurl -plaintext \(bindHost):\(bindPort) appleintelligence.AppleIntelligenceService/Health")
print("Press Ctrl+C to stop the server")
try await server.serve()