swift-apple-intelligence-grpc/Sources/AppleIntelligenceApp/Views/ChatView.swift
Mathias Beaulieu-Duncan b754945923 Add Text-to-Speech and Speech-to-Text features
- Add TTS service using AVSpeechSynthesizer for voice output
- Add STT service using SpeechAnalyzer (macOS 26) for transcription
- Add voice input (microphone) button in chat with recording level indicator
- Add speak button on assistant messages for TTS playback
- Add language toggle (EN-CA/FR-CA) for bilingual speech recognition
- Fix Swift 6 strict concurrency issues in audio callbacks
- Update proto schema with TTS/STT message types and RPCs
- Update gRPC provider with speech service endpoints

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-31 02:57:30 -05:00

610 lines
21 KiB
Swift

import SwiftUI
import UniformTypeIdentifiers
struct ChatView: View {
@Bindable var viewModel: ChatViewModel
@FocusState private var isInputFocused: Bool
@State private var isShowingFilePicker = false
@State private var isDragOver = false
@State private var previewImageURL: URL?
var body: some View {
HStack(spacing: 0) {
// Recent images sidebar
if !viewModel.recentImages.isEmpty {
recentImagesSidebar
Divider()
}
// Main chat area
VStack(spacing: 0) {
// Messages list
ScrollViewReader { proxy in
ScrollView {
LazyVStack(spacing: 12) {
ForEach(viewModel.messages) { message in
MessageBubble(
message: message,
isSpeaking: viewModel.speakingMessageId == message.id,
onSpeak: { viewModel.speakMessage(message) }
)
.id(message.id)
}
}
.padding()
}
.onChange(of: viewModel.messages.count) { _, _ in
if let lastMessage = viewModel.messages.last {
withAnimation {
proxy.scrollTo(lastMessage.id, anchor: .bottom)
}
}
}
.onChange(of: viewModel.messages.last?.content) { _, _ in
if let lastMessage = viewModel.messages.last {
withAnimation {
proxy.scrollTo(lastMessage.id, anchor: .bottom)
}
}
}
}
// Error message
if let error = viewModel.errorMessage {
HStack {
Image(systemName: "exclamationmark.triangle.fill")
.foregroundStyle(.yellow)
Text(error)
.font(.caption)
.foregroundStyle(.secondary)
Spacer()
Button("Dismiss") {
viewModel.errorMessage = nil
}
.buttonStyle(.plain)
.font(.caption)
}
.padding(.horizontal)
.padding(.vertical, 8)
.background(.red.opacity(0.1))
}
Divider()
// Pending images preview
if !viewModel.pendingImages.isEmpty {
pendingImagesView
}
// Input area
inputArea
}
.onDrop(of: [.fileURL, .image], isTargeted: $isDragOver) { providers in
handleDrop(providers: providers)
return true
}
.overlay {
if isDragOver {
RoundedRectangle(cornerRadius: 8)
.stroke(Color.accentColor, lineWidth: 3)
.background(Color.accentColor.opacity(0.1))
.padding(4)
}
}
}
.frame(minWidth: 500, minHeight: 500)
.toolbar {
ToolbarItem(placement: .primaryAction) {
Button {
viewModel.loadRecentImages()
} label: {
Image(systemName: "arrow.clockwise")
}
.help("Refresh recent images")
}
ToolbarItem(placement: .primaryAction) {
Button {
viewModel.clearChat()
} label: {
Image(systemName: "trash")
}
.help("Clear chat")
.disabled(viewModel.messages.isEmpty)
}
}
.task {
await viewModel.initialize()
}
.onAppear {
NSApp.setActivationPolicy(.regular)
NSApp.activate(ignoringOtherApps: true)
DispatchQueue.main.asyncAfter(deadline: .now() + 0.2) {
if let window = NSApp.windows.first(where: { $0.title == "Chat" }) {
window.makeKeyAndOrderFront(nil)
}
isInputFocused = true
}
}
.onDisappear {
if NSApp.windows.filter({ $0.isVisible && $0.title != "" }).isEmpty {
NSApp.setActivationPolicy(.accessory)
}
}
.fileImporter(
isPresented: $isShowingFilePicker,
allowedContentTypes: ChatViewModel.supportedImageTypes,
allowsMultipleSelection: true
) { result in
switch result {
case .success(let urls):
for url in urls {
if url.startAccessingSecurityScopedResource() {
viewModel.addImage(from: url)
url.stopAccessingSecurityScopedResource()
}
}
case .failure(let error):
viewModel.errorMessage = error.localizedDescription
}
}
.sheet(item: $previewImageURL) { url in
ImagePreviewSheet(url: url) {
viewModel.addRecentImage(url)
previewImageURL = nil
} onCancel: {
previewImageURL = nil
}
}
}
// MARK: - Drag & Drop Handler
private func handleDrop(providers: [NSItemProvider]) {
for provider in providers {
// Try to load as file URL first
if provider.hasItemConformingToTypeIdentifier(UTType.fileURL.identifier) {
provider.loadItem(forTypeIdentifier: UTType.fileURL.identifier, options: nil) { item, error in
guard error == nil else { return }
if let data = item as? Data,
let url = URL(dataRepresentation: data, relativeTo: nil) {
DispatchQueue.main.async {
viewModel.addImage(from: url)
}
} else if let url = item as? URL {
DispatchQueue.main.async {
viewModel.addImage(from: url)
}
}
}
}
// Try to load as image data
else if provider.hasItemConformingToTypeIdentifier(UTType.image.identifier) {
provider.loadDataRepresentation(forTypeIdentifier: UTType.image.identifier) { data, error in
guard let data = data, error == nil else { return }
DispatchQueue.main.async {
let attachment = ImageAttachment(data: data, filename: "dropped_image.png")
if viewModel.pendingImages.count < 5 {
viewModel.pendingImages.append(attachment)
}
}
}
}
}
}
// MARK: - Recent Images Sidebar
private var recentImagesSidebar: some View {
VStack(alignment: .leading, spacing: 8) {
Text("Recent")
.font(.headline)
.foregroundStyle(.secondary)
.padding(.horizontal, 8)
.padding(.top, 8)
ScrollView {
LazyVStack(spacing: 8) {
ForEach(viewModel.recentImages, id: \.self) { url in
RecentImageThumbnail(url: url) {
previewImageURL = url
}
}
}
.padding(.horizontal, 8)
.padding(.bottom, 8)
}
}
.frame(width: 100)
.background(Color(nsColor: .controlBackgroundColor).opacity(0.5))
}
// MARK: - Pending Images Preview
private var pendingImagesView: some View {
ScrollView(.horizontal, showsIndicators: false) {
HStack(spacing: 8) {
ForEach(viewModel.pendingImages) { attachment in
pendingImageThumbnail(attachment)
}
}
.padding(.horizontal)
.padding(.vertical, 8)
}
.background(Color(nsColor: .controlBackgroundColor))
}
private func pendingImageThumbnail(_ attachment: ImageAttachment) -> some View {
ZStack(alignment: .topTrailing) {
if let thumbnail = attachment.thumbnail {
Image(nsImage: thumbnail)
.resizable()
.aspectRatio(contentMode: .fill)
.frame(width: 60, height: 60)
.clipShape(RoundedRectangle(cornerRadius: 8))
} else {
RoundedRectangle(cornerRadius: 8)
.fill(Color.gray.opacity(0.3))
.frame(width: 60, height: 60)
.overlay {
Image(systemName: "photo")
.foregroundStyle(.secondary)
}
}
Button {
viewModel.removePendingImage(attachment)
} label: {
Image(systemName: "xmark.circle.fill")
.font(.system(size: 16))
.foregroundStyle(.white)
.background(Circle().fill(.black.opacity(0.6)).frame(width: 18, height: 18))
}
.buttonStyle(.plain)
.offset(x: 6, y: -6)
}
}
// MARK: - Input Area
private var inputArea: some View {
HStack(spacing: 8) {
Button {
isShowingFilePicker = true
} label: {
Image(systemName: "photo.badge.plus")
.font(.title3)
.foregroundStyle(.secondary)
}
.buttonStyle(.plain)
.help("Add image")
Button {
viewModel.addImageFromPasteboard()
} label: {
Image(systemName: "doc.on.clipboard")
.font(.title3)
.foregroundStyle(.secondary)
}
.buttonStyle(.plain)
.help("Paste image from clipboard")
// Language toggle for speech recognition
Button {
// Toggle between en-CA and fr-CA
let newLang = viewModel.detectedLanguage == "en-CA" ? "fr-CA" : "en-CA"
viewModel.switchLanguage(to: newLang)
} label: {
Text(viewModel.detectedLanguage == "fr-CA" ? "FR" : "EN")
.font(.caption.bold())
.foregroundStyle(.secondary)
.frame(width: 24, height: 24)
.background(
RoundedRectangle(cornerRadius: 4)
.fill(Color.secondary.opacity(0.1))
)
}
.buttonStyle(.plain)
.help("Speech language: \(viewModel.detectedLanguage) (click to toggle)")
// Microphone button for voice input
Button {
viewModel.toggleRecording()
} label: {
ZStack {
if viewModel.isRecording {
// Recording indicator with level
Circle()
.fill(Color.red.opacity(0.3))
.frame(width: 28 + CGFloat(viewModel.recordingLevel) * 10,
height: 28 + CGFloat(viewModel.recordingLevel) * 10)
.animation(.easeInOut(duration: 0.1), value: viewModel.recordingLevel)
}
Image(systemName: viewModel.isRecording ? "mic.fill" : "mic")
.font(.title3)
.foregroundStyle(viewModel.isRecording ? .red : .secondary)
}
}
.buttonStyle(.plain)
.help(viewModel.isRecording ? "Stop recording" : "Voice input")
TextField("Message...", text: $viewModel.inputText, axis: .vertical)
.textFieldStyle(.plain)
.lineLimit(1...5)
.focused($isInputFocused)
.onSubmit {
if viewModel.canSend {
viewModel.sendMessage()
}
}
if viewModel.isLoading {
Button {
viewModel.stopGeneration()
} label: {
Image(systemName: "stop.circle.fill")
.font(.title2)
.foregroundStyle(.red)
}
.buttonStyle(.plain)
} else {
Button {
viewModel.sendMessage()
} label: {
Image(systemName: "arrow.up.circle.fill")
.font(.title2)
.foregroundStyle(viewModel.canSend ? Color.accentColor : Color.gray)
}
.buttonStyle(.plain)
.disabled(!viewModel.canSend)
}
}
.padding()
}
}
// MARK: - Recent Image Thumbnail
struct RecentImageThumbnail: View {
let url: URL
let onTap: () -> Void
@State private var thumbnail: NSImage?
var body: some View {
Button(action: onTap) {
ZStack {
if let thumbnail = thumbnail {
Image(nsImage: thumbnail)
.resizable()
.aspectRatio(contentMode: .fill)
.frame(width: 80, height: 80)
.clipShape(RoundedRectangle(cornerRadius: 8))
} else {
RoundedRectangle(cornerRadius: 8)
.fill(Color.gray.opacity(0.3))
.frame(width: 80, height: 80)
.overlay {
ProgressView()
.scaleEffect(0.6)
}
}
}
}
.buttonStyle(.plain)
.help(url.lastPathComponent)
.task {
await loadThumbnail()
}
}
private func loadThumbnail() async {
guard let image = NSImage(contentsOf: url) else { return }
let maxSize: CGFloat = 80
let ratio = min(maxSize / image.size.width, maxSize / image.size.height, 1.0)
let newSize = NSSize(
width: image.size.width * ratio,
height: image.size.height * ratio
)
let thumb = NSImage(size: newSize)
thumb.lockFocus()
image.draw(
in: NSRect(origin: .zero, size: newSize),
from: NSRect(origin: .zero, size: image.size),
operation: .copy,
fraction: 1.0
)
thumb.unlockFocus()
await MainActor.run {
thumbnail = thumb
}
}
}
// MARK: - Message Bubble
struct MessageBubble: View {
let message: ChatMessage
var isSpeaking: Bool = false
var onSpeak: (() -> Void)? = nil
@State private var showCopied = false
var body: some View {
HStack {
if message.role == .user {
Spacer(minLength: 60)
}
VStack(alignment: message.role == .user ? .trailing : .leading, spacing: 4) {
if !message.images.isEmpty {
imageGrid
}
if !message.content.isEmpty {
Text(message.content)
.textSelection(.enabled)
.padding(.horizontal, 12)
.padding(.vertical, 8)
.background(bubbleColor)
.foregroundStyle(message.role == .user ? .white : .primary)
.clipShape(RoundedRectangle(cornerRadius: 16))
}
if message.isStreaming {
HStack(spacing: 4) {
ProgressView()
.scaleEffect(0.6)
Text("Generating...")
.font(.caption2)
.foregroundStyle(.secondary)
}
}
// Action buttons for assistant messages
if message.role == .assistant && !message.content.isEmpty && !message.isStreaming {
HStack(spacing: 12) {
// Speaker button for TTS
Button {
onSpeak?()
} label: {
HStack(spacing: 4) {
Image(systemName: isSpeaking ? "stop.fill" : "speaker.wave.2")
Text(isSpeaking ? "Stop" : "Speak")
}
.font(.caption)
.foregroundStyle(isSpeaking ? .red : .secondary)
}
.buttonStyle(.plain)
// Copy button
Button {
NSPasteboard.general.clearContents()
NSPasteboard.general.setString(message.content, forType: .string)
showCopied = true
DispatchQueue.main.asyncAfter(deadline: .now() + 1.5) {
showCopied = false
}
} label: {
HStack(spacing: 4) {
Image(systemName: showCopied ? "checkmark" : "doc.on.doc")
Text(showCopied ? "Copied" : "Copy")
}
.font(.caption)
.foregroundStyle(.secondary)
}
.buttonStyle(.plain)
Spacer()
}
.padding(.top, 2)
}
}
if message.role == .assistant {
Spacer(minLength: 60)
}
}
}
@ViewBuilder
private var imageGrid: some View {
let columns = min(message.images.count, 3)
LazyVGrid(
columns: Array(repeating: GridItem(.flexible(), spacing: 4), count: columns),
spacing: 4
) {
ForEach(message.images) { attachment in
if let thumbnail = attachment.thumbnail {
Image(nsImage: thumbnail)
.resizable()
.aspectRatio(contentMode: .fill)
.frame(width: 80, height: 80)
.clipShape(RoundedRectangle(cornerRadius: 8))
}
}
}
.padding(4)
.background(
message.role == .user
? Color.accentColor.opacity(0.8)
: Color(nsColor: .controlBackgroundColor)
)
.clipShape(RoundedRectangle(cornerRadius: 12))
}
private var bubbleColor: Color {
switch message.role {
case .user:
return .accentColor
case .assistant:
return Color(nsColor: .controlBackgroundColor)
}
}
}
// MARK: - Image Preview Sheet
struct ImagePreviewSheet: View {
let url: URL
let onConfirm: () -> Void
let onCancel: () -> Void
@State private var image: NSImage?
var body: some View {
VStack(spacing: 16) {
Text("Add Image")
.font(.headline)
if let image = image {
Image(nsImage: image)
.resizable()
.aspectRatio(contentMode: .fit)
.frame(maxWidth: 500, maxHeight: 400)
.clipShape(RoundedRectangle(cornerRadius: 8))
.shadow(radius: 4)
} else {
RoundedRectangle(cornerRadius: 8)
.fill(Color.gray.opacity(0.2))
.frame(width: 300, height: 200)
.overlay {
ProgressView()
}
}
Text(url.lastPathComponent)
.font(.caption)
.foregroundStyle(.secondary)
.lineLimit(1)
HStack(spacing: 16) {
Button("Cancel") {
onCancel()
}
.keyboardShortcut(.cancelAction)
Button("Add to Message") {
onConfirm()
}
.keyboardShortcut(.defaultAction)
.buttonStyle(.borderedProminent)
}
}
.padding(24)
.frame(minWidth: 400, minHeight: 300)
.task {
image = NSImage(contentsOf: url)
}
}
}
// MARK: - URL Identifiable Extension
extension URL: @retroactive Identifiable {
public var id: String { absoluteString }
}