Add vision support, gRPC reflection toggle, and chat improvements

- Add Vision framework integration for image analysis (OCR, classification)
- Add image attachment support in chat UI with drag & drop
- Add recent images sidebar from Downloads/Desktop
- Add copy to clipboard button for assistant responses
- Add gRPC reflection service with toggle in settings
- Create proper .proto file and generate Swift code
- Add server restart when toggling reflection setting
- Fix port number formatting in settings (remove comma grouping)
- Update gRPC dependencies to v2.x

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Mathias Beaulieu-Duncan
2025-12-30 16:18:06 -05:00
parent 62ab635aec
commit 638656e7ca
18 changed files with 2474 additions and 478 deletions
@@ -6,6 +6,7 @@ public enum AppleIntelligenceError: Error, CustomStringConvertible, Sendable {
case modelNotAvailable
case generationFailed(String)
case sessionCreationFailed
case imageAnalysisFailed(String)
public var description: String {
switch self {
@@ -15,6 +16,8 @@ public enum AppleIntelligenceError: Error, CustomStringConvertible, Sendable {
return "Generation failed: \(reason)"
case .sessionCreationFailed:
return "Failed to create language model session"
case .imageAnalysisFailed(let reason):
return "Image analysis failed: \(reason)"
}
}
}
@@ -24,6 +27,9 @@ public actor AppleIntelligenceService {
/// The language model session
private var session: LanguageModelSession?
/// Vision analysis service for image processing
private let visionService = VisionAnalysisService()
/// Whether the model is available
public private(set) var isAvailable: Bool = false
@@ -60,21 +66,42 @@ public actor AppleIntelligenceService {
}
/// Generate a completion for the given prompt (non-streaming)
public func complete(prompt: String, temperature: Float?, maxTokens: Int?) async throws -> String {
public func complete(
prompt: String,
temperature: Float?,
maxTokens: Int?,
images: [(data: Data, filename: String?)] = []
) async throws -> (text: String, analyses: [VisionAnalysisResult]) {
guard isAvailable, let session = session else {
throw AppleIntelligenceError.modelNotAvailable
}
let response = try await session.respond(to: prompt)
return response.content
// Analyze images if provided
var analyses: [VisionAnalysisResult] = []
var enhancedPrompt = prompt
if !images.isEmpty {
do {
analyses = try await visionService.analyzeMultiple(images: images)
let analysesWithFilenames = zip(analyses, images).map { (result: $0.0, filename: $0.1.filename) }
let context = await visionService.formatAnalysesAsPromptContext(analyses: analysesWithFilenames)
enhancedPrompt = context + "\n\n" + prompt
} catch {
throw AppleIntelligenceError.imageAnalysisFailed(error.localizedDescription)
}
}
let response = try await session.respond(to: enhancedPrompt)
return (text: response.content, analyses: analyses)
}
/// Generate a streaming completion for the given prompt
public func streamComplete(
prompt: String,
temperature: Float?,
maxTokens: Int?
) -> AsyncThrowingStream<String, Error> {
maxTokens: Int?,
images: [(data: Data, filename: String?)] = []
) -> AsyncThrowingStream<(text: String, analyses: [VisionAnalysisResult]?), Error> {
AsyncThrowingStream { continuation in
Task {
guard self.isAvailable, let session = self.session else {
@@ -82,10 +109,33 @@ public actor AppleIntelligenceService {
return
}
// Analyze images first if provided
var analyses: [VisionAnalysisResult] = []
var enhancedPrompt = prompt
if !images.isEmpty {
do {
analyses = try await self.visionService.analyzeMultiple(images: images)
let analysesWithFilenames = zip(analyses, images).map { (result: $0.0, filename: $0.1.filename) }
let context = await self.visionService.formatAnalysesAsPromptContext(analyses: analysesWithFilenames)
enhancedPrompt = context + "\n\n" + prompt
} catch {
continuation.finish(throwing: AppleIntelligenceError.imageAnalysisFailed(error.localizedDescription))
return
}
}
do {
let stream = session.streamResponse(to: prompt)
let stream = session.streamResponse(to: enhancedPrompt)
var isFirst = true
for try await partialResponse in stream {
continuation.yield(partialResponse.content)
// Include analyses only in first chunk
if isFirst {
continuation.yield((text: partialResponse.content, analyses: analyses))
isFirst = false
} else {
continuation.yield((text: partialResponse.content, analyses: nil))
}
}
continuation.finish()
} catch {
@@ -0,0 +1,243 @@
import Foundation
import Vision
import CoreImage
#if canImport(AppKit)
import AppKit
#endif
/// Result of Vision framework analysis on an image
public struct VisionAnalysisResult: Sendable {
public let textContent: String
public let labels: [String]
public let description: String
public init(textContent: String = "", labels: [String] = [], description: String = "") {
self.textContent = textContent
self.labels = labels
self.description = description
}
/// Format analysis for LLM context
public func formatAsContext(imageIndex: Int, filename: String?) -> String {
var parts: [String] = []
let imageName = filename ?? "Image \(imageIndex + 1)"
if !textContent.isEmpty {
parts.append("Text: \"\(textContent)\"")
}
if !labels.isEmpty {
parts.append("Objects: \(labels.joined(separator: ", "))")
}
if parts.isEmpty {
return "\(imageName): No content detected"
}
return "\(imageName): \(parts.joined(separator: " | "))"
}
}
/// Errors from Vision analysis
public enum VisionAnalysisError: Error, CustomStringConvertible, Sendable {
case invalidImageData
case analysisFailure(String)
case unsupportedFormat
public var description: String {
switch self {
case .invalidImageData:
return "Invalid or corrupted image data"
case .analysisFailure(let reason):
return "Vision analysis failed: \(reason)"
case .unsupportedFormat:
return "Unsupported image format"
}
}
}
/// Service for analyzing images using Apple's Vision framework
public actor VisionAnalysisService {
/// Configuration for which analyses to perform
public struct AnalysisOptions: Sendable {
public var performOCR: Bool
public var performClassification: Bool
public init(performOCR: Bool = true, performClassification: Bool = true) {
self.performOCR = performOCR
self.performClassification = performClassification
}
public static let all = AnalysisOptions()
public static let textOnly = AnalysisOptions(performOCR: true, performClassification: false)
}
public init() {}
/// Analyze a single image
public func analyze(
imageData: Data,
options: AnalysisOptions = .all
) async throws -> VisionAnalysisResult {
guard let cgImage = createCGImage(from: imageData) else {
throw VisionAnalysisError.invalidImageData
}
var textContent = ""
var labels: [String] = []
// Perform OCR
if options.performOCR {
textContent = try await performTextRecognition(on: cgImage)
}
// Perform image classification
if options.performClassification {
labels = try await performImageClassification(on: cgImage)
}
// Build description
var descriptionParts: [String] = []
if !textContent.isEmpty {
let truncatedText = textContent.count > 200
? String(textContent.prefix(200)) + "..."
: textContent
descriptionParts.append("Contains text: \"\(truncatedText)\"")
}
if !labels.isEmpty {
descriptionParts.append("Shows: \(labels.prefix(5).joined(separator: ", "))")
}
let description = descriptionParts.isEmpty
? "Image with no recognizable content"
: descriptionParts.joined(separator: ". ")
return VisionAnalysisResult(
textContent: textContent,
labels: labels,
description: description
)
}
/// Analyze multiple images
public func analyzeMultiple(
images: [(data: Data, filename: String?)],
options: AnalysisOptions = .all
) async throws -> [VisionAnalysisResult] {
var results: [VisionAnalysisResult] = []
for image in images {
let result = try await analyze(imageData: image.data, options: options)
results.append(result)
}
return results
}
/// Format multiple analyses as a combined context string for LLM
public func formatAnalysesAsPromptContext(
analyses: [(result: VisionAnalysisResult, filename: String?)]
) -> String {
guard !analyses.isEmpty else { return "" }
var lines: [String] = ["[Image Analysis]"]
for (index, analysis) in analyses.enumerated() {
lines.append(analysis.result.formatAsContext(
imageIndex: index,
filename: analysis.filename
))
}
lines.append("[End Image Analysis]")
return lines.joined(separator: "\n")
}
// MARK: - Private Methods
private func createCGImage(from data: Data) -> CGImage? {
#if canImport(AppKit)
guard let nsImage = NSImage(data: data),
let cgImage = nsImage.cgImage(forProposedRect: nil, context: nil, hints: nil) else {
// Try CIImage as fallback
guard let ciImage = CIImage(data: data) else { return nil }
let context = CIContext()
return context.createCGImage(ciImage, from: ciImage.extent)
}
return cgImage
#else
guard let ciImage = CIImage(data: data) else { return nil }
let context = CIContext()
return context.createCGImage(ciImage, from: ciImage.extent)
#endif
}
private func performTextRecognition(on image: CGImage) async throws -> String {
try await withCheckedThrowingContinuation { continuation in
let request = VNRecognizeTextRequest { request, error in
if let error = error {
continuation.resume(throwing: VisionAnalysisError.analysisFailure(error.localizedDescription))
return
}
guard let observations = request.results as? [VNRecognizedTextObservation] else {
continuation.resume(returning: "")
return
}
let recognizedText = observations.compactMap { observation in
observation.topCandidates(1).first?.string
}.joined(separator: "\n")
continuation.resume(returning: recognizedText)
}
request.recognitionLevel = .accurate
request.usesLanguageCorrection = true
let handler = VNImageRequestHandler(cgImage: image, options: [:])
do {
try handler.perform([request])
} catch {
continuation.resume(throwing: VisionAnalysisError.analysisFailure(error.localizedDescription))
}
}
}
private func performImageClassification(on image: CGImage) async throws -> [String] {
try await withCheckedThrowingContinuation { continuation in
let request = VNClassifyImageRequest { request, error in
if let error = error {
continuation.resume(throwing: VisionAnalysisError.analysisFailure(error.localizedDescription))
return
}
guard let observations = request.results as? [VNClassificationObservation] else {
continuation.resume(returning: [])
return
}
// Filter to high-confidence labels and take top 10
let labels = observations
.filter { $0.confidence > 0.3 }
.prefix(10)
.map { $0.identifier.replacingOccurrences(of: "_", with: " ") }
continuation.resume(returning: Array(labels))
}
let handler = VNImageRequestHandler(cgImage: image, options: [:])
do {
try handler.perform([request])
} catch {
continuation.resume(throwing: VisionAnalysisError.analysisFailure(error.localizedDescription))
}
}
}
}