Add vision support, gRPC reflection toggle, and chat improvements
- Add Vision framework integration for image analysis (OCR, classification) - Add image attachment support in chat UI with drag & drop - Add recent images sidebar from Downloads/Desktop - Add copy to clipboard button for assistant responses - Add gRPC reflection service with toggle in settings - Create proper .proto file and generate Swift code - Add server restart when toggling reflection setting - Fix port number formatting in settings (remove comma grouping) - Update gRPC dependencies to v2.x 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -6,6 +6,7 @@ public enum AppleIntelligenceError: Error, CustomStringConvertible, Sendable {
|
||||
case modelNotAvailable
|
||||
case generationFailed(String)
|
||||
case sessionCreationFailed
|
||||
case imageAnalysisFailed(String)
|
||||
|
||||
public var description: String {
|
||||
switch self {
|
||||
@@ -15,6 +16,8 @@ public enum AppleIntelligenceError: Error, CustomStringConvertible, Sendable {
|
||||
return "Generation failed: \(reason)"
|
||||
case .sessionCreationFailed:
|
||||
return "Failed to create language model session"
|
||||
case .imageAnalysisFailed(let reason):
|
||||
return "Image analysis failed: \(reason)"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -24,6 +27,9 @@ public actor AppleIntelligenceService {
|
||||
/// The language model session
|
||||
private var session: LanguageModelSession?
|
||||
|
||||
/// Vision analysis service for image processing
|
||||
private let visionService = VisionAnalysisService()
|
||||
|
||||
/// Whether the model is available
|
||||
public private(set) var isAvailable: Bool = false
|
||||
|
||||
@@ -60,21 +66,42 @@ public actor AppleIntelligenceService {
|
||||
}
|
||||
|
||||
/// Generate a completion for the given prompt (non-streaming)
|
||||
public func complete(prompt: String, temperature: Float?, maxTokens: Int?) async throws -> String {
|
||||
public func complete(
|
||||
prompt: String,
|
||||
temperature: Float?,
|
||||
maxTokens: Int?,
|
||||
images: [(data: Data, filename: String?)] = []
|
||||
) async throws -> (text: String, analyses: [VisionAnalysisResult]) {
|
||||
guard isAvailable, let session = session else {
|
||||
throw AppleIntelligenceError.modelNotAvailable
|
||||
}
|
||||
|
||||
let response = try await session.respond(to: prompt)
|
||||
return response.content
|
||||
// Analyze images if provided
|
||||
var analyses: [VisionAnalysisResult] = []
|
||||
var enhancedPrompt = prompt
|
||||
|
||||
if !images.isEmpty {
|
||||
do {
|
||||
analyses = try await visionService.analyzeMultiple(images: images)
|
||||
let analysesWithFilenames = zip(analyses, images).map { (result: $0.0, filename: $0.1.filename) }
|
||||
let context = await visionService.formatAnalysesAsPromptContext(analyses: analysesWithFilenames)
|
||||
enhancedPrompt = context + "\n\n" + prompt
|
||||
} catch {
|
||||
throw AppleIntelligenceError.imageAnalysisFailed(error.localizedDescription)
|
||||
}
|
||||
}
|
||||
|
||||
let response = try await session.respond(to: enhancedPrompt)
|
||||
return (text: response.content, analyses: analyses)
|
||||
}
|
||||
|
||||
/// Generate a streaming completion for the given prompt
|
||||
public func streamComplete(
|
||||
prompt: String,
|
||||
temperature: Float?,
|
||||
maxTokens: Int?
|
||||
) -> AsyncThrowingStream<String, Error> {
|
||||
maxTokens: Int?,
|
||||
images: [(data: Data, filename: String?)] = []
|
||||
) -> AsyncThrowingStream<(text: String, analyses: [VisionAnalysisResult]?), Error> {
|
||||
AsyncThrowingStream { continuation in
|
||||
Task {
|
||||
guard self.isAvailable, let session = self.session else {
|
||||
@@ -82,10 +109,33 @@ public actor AppleIntelligenceService {
|
||||
return
|
||||
}
|
||||
|
||||
// Analyze images first if provided
|
||||
var analyses: [VisionAnalysisResult] = []
|
||||
var enhancedPrompt = prompt
|
||||
|
||||
if !images.isEmpty {
|
||||
do {
|
||||
analyses = try await self.visionService.analyzeMultiple(images: images)
|
||||
let analysesWithFilenames = zip(analyses, images).map { (result: $0.0, filename: $0.1.filename) }
|
||||
let context = await self.visionService.formatAnalysesAsPromptContext(analyses: analysesWithFilenames)
|
||||
enhancedPrompt = context + "\n\n" + prompt
|
||||
} catch {
|
||||
continuation.finish(throwing: AppleIntelligenceError.imageAnalysisFailed(error.localizedDescription))
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
do {
|
||||
let stream = session.streamResponse(to: prompt)
|
||||
let stream = session.streamResponse(to: enhancedPrompt)
|
||||
var isFirst = true
|
||||
for try await partialResponse in stream {
|
||||
continuation.yield(partialResponse.content)
|
||||
// Include analyses only in first chunk
|
||||
if isFirst {
|
||||
continuation.yield((text: partialResponse.content, analyses: analyses))
|
||||
isFirst = false
|
||||
} else {
|
||||
continuation.yield((text: partialResponse.content, analyses: nil))
|
||||
}
|
||||
}
|
||||
continuation.finish()
|
||||
} catch {
|
||||
|
||||
@@ -0,0 +1,243 @@
|
||||
import Foundation
|
||||
import Vision
|
||||
import CoreImage
|
||||
|
||||
#if canImport(AppKit)
|
||||
import AppKit
|
||||
#endif
|
||||
|
||||
/// Result of Vision framework analysis on an image
|
||||
public struct VisionAnalysisResult: Sendable {
|
||||
public let textContent: String
|
||||
public let labels: [String]
|
||||
public let description: String
|
||||
|
||||
public init(textContent: String = "", labels: [String] = [], description: String = "") {
|
||||
self.textContent = textContent
|
||||
self.labels = labels
|
||||
self.description = description
|
||||
}
|
||||
|
||||
/// Format analysis for LLM context
|
||||
public func formatAsContext(imageIndex: Int, filename: String?) -> String {
|
||||
var parts: [String] = []
|
||||
|
||||
let imageName = filename ?? "Image \(imageIndex + 1)"
|
||||
|
||||
if !textContent.isEmpty {
|
||||
parts.append("Text: \"\(textContent)\"")
|
||||
}
|
||||
|
||||
if !labels.isEmpty {
|
||||
parts.append("Objects: \(labels.joined(separator: ", "))")
|
||||
}
|
||||
|
||||
if parts.isEmpty {
|
||||
return "\(imageName): No content detected"
|
||||
}
|
||||
|
||||
return "\(imageName): \(parts.joined(separator: " | "))"
|
||||
}
|
||||
}
|
||||
|
||||
/// Errors from Vision analysis
|
||||
public enum VisionAnalysisError: Error, CustomStringConvertible, Sendable {
|
||||
case invalidImageData
|
||||
case analysisFailure(String)
|
||||
case unsupportedFormat
|
||||
|
||||
public var description: String {
|
||||
switch self {
|
||||
case .invalidImageData:
|
||||
return "Invalid or corrupted image data"
|
||||
case .analysisFailure(let reason):
|
||||
return "Vision analysis failed: \(reason)"
|
||||
case .unsupportedFormat:
|
||||
return "Unsupported image format"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Service for analyzing images using Apple's Vision framework
|
||||
public actor VisionAnalysisService {
|
||||
|
||||
/// Configuration for which analyses to perform
|
||||
public struct AnalysisOptions: Sendable {
|
||||
public var performOCR: Bool
|
||||
public var performClassification: Bool
|
||||
|
||||
public init(performOCR: Bool = true, performClassification: Bool = true) {
|
||||
self.performOCR = performOCR
|
||||
self.performClassification = performClassification
|
||||
}
|
||||
|
||||
public static let all = AnalysisOptions()
|
||||
public static let textOnly = AnalysisOptions(performOCR: true, performClassification: false)
|
||||
}
|
||||
|
||||
public init() {}
|
||||
|
||||
/// Analyze a single image
|
||||
public func analyze(
|
||||
imageData: Data,
|
||||
options: AnalysisOptions = .all
|
||||
) async throws -> VisionAnalysisResult {
|
||||
guard let cgImage = createCGImage(from: imageData) else {
|
||||
throw VisionAnalysisError.invalidImageData
|
||||
}
|
||||
|
||||
var textContent = ""
|
||||
var labels: [String] = []
|
||||
|
||||
// Perform OCR
|
||||
if options.performOCR {
|
||||
textContent = try await performTextRecognition(on: cgImage)
|
||||
}
|
||||
|
||||
// Perform image classification
|
||||
if options.performClassification {
|
||||
labels = try await performImageClassification(on: cgImage)
|
||||
}
|
||||
|
||||
// Build description
|
||||
var descriptionParts: [String] = []
|
||||
if !textContent.isEmpty {
|
||||
let truncatedText = textContent.count > 200
|
||||
? String(textContent.prefix(200)) + "..."
|
||||
: textContent
|
||||
descriptionParts.append("Contains text: \"\(truncatedText)\"")
|
||||
}
|
||||
if !labels.isEmpty {
|
||||
descriptionParts.append("Shows: \(labels.prefix(5).joined(separator: ", "))")
|
||||
}
|
||||
|
||||
let description = descriptionParts.isEmpty
|
||||
? "Image with no recognizable content"
|
||||
: descriptionParts.joined(separator: ". ")
|
||||
|
||||
return VisionAnalysisResult(
|
||||
textContent: textContent,
|
||||
labels: labels,
|
||||
description: description
|
||||
)
|
||||
}
|
||||
|
||||
/// Analyze multiple images
|
||||
public func analyzeMultiple(
|
||||
images: [(data: Data, filename: String?)],
|
||||
options: AnalysisOptions = .all
|
||||
) async throws -> [VisionAnalysisResult] {
|
||||
var results: [VisionAnalysisResult] = []
|
||||
|
||||
for image in images {
|
||||
let result = try await analyze(imageData: image.data, options: options)
|
||||
results.append(result)
|
||||
}
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
/// Format multiple analyses as a combined context string for LLM
|
||||
public func formatAnalysesAsPromptContext(
|
||||
analyses: [(result: VisionAnalysisResult, filename: String?)]
|
||||
) -> String {
|
||||
guard !analyses.isEmpty else { return "" }
|
||||
|
||||
var lines: [String] = ["[Image Analysis]"]
|
||||
|
||||
for (index, analysis) in analyses.enumerated() {
|
||||
lines.append(analysis.result.formatAsContext(
|
||||
imageIndex: index,
|
||||
filename: analysis.filename
|
||||
))
|
||||
}
|
||||
|
||||
lines.append("[End Image Analysis]")
|
||||
|
||||
return lines.joined(separator: "\n")
|
||||
}
|
||||
|
||||
// MARK: - Private Methods
|
||||
|
||||
private func createCGImage(from data: Data) -> CGImage? {
|
||||
#if canImport(AppKit)
|
||||
guard let nsImage = NSImage(data: data),
|
||||
let cgImage = nsImage.cgImage(forProposedRect: nil, context: nil, hints: nil) else {
|
||||
// Try CIImage as fallback
|
||||
guard let ciImage = CIImage(data: data) else { return nil }
|
||||
let context = CIContext()
|
||||
return context.createCGImage(ciImage, from: ciImage.extent)
|
||||
}
|
||||
return cgImage
|
||||
#else
|
||||
guard let ciImage = CIImage(data: data) else { return nil }
|
||||
let context = CIContext()
|
||||
return context.createCGImage(ciImage, from: ciImage.extent)
|
||||
#endif
|
||||
}
|
||||
|
||||
private func performTextRecognition(on image: CGImage) async throws -> String {
|
||||
try await withCheckedThrowingContinuation { continuation in
|
||||
let request = VNRecognizeTextRequest { request, error in
|
||||
if let error = error {
|
||||
continuation.resume(throwing: VisionAnalysisError.analysisFailure(error.localizedDescription))
|
||||
return
|
||||
}
|
||||
|
||||
guard let observations = request.results as? [VNRecognizedTextObservation] else {
|
||||
continuation.resume(returning: "")
|
||||
return
|
||||
}
|
||||
|
||||
let recognizedText = observations.compactMap { observation in
|
||||
observation.topCandidates(1).first?.string
|
||||
}.joined(separator: "\n")
|
||||
|
||||
continuation.resume(returning: recognizedText)
|
||||
}
|
||||
|
||||
request.recognitionLevel = .accurate
|
||||
request.usesLanguageCorrection = true
|
||||
|
||||
let handler = VNImageRequestHandler(cgImage: image, options: [:])
|
||||
|
||||
do {
|
||||
try handler.perform([request])
|
||||
} catch {
|
||||
continuation.resume(throwing: VisionAnalysisError.analysisFailure(error.localizedDescription))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private func performImageClassification(on image: CGImage) async throws -> [String] {
|
||||
try await withCheckedThrowingContinuation { continuation in
|
||||
let request = VNClassifyImageRequest { request, error in
|
||||
if let error = error {
|
||||
continuation.resume(throwing: VisionAnalysisError.analysisFailure(error.localizedDescription))
|
||||
return
|
||||
}
|
||||
|
||||
guard let observations = request.results as? [VNClassificationObservation] else {
|
||||
continuation.resume(returning: [])
|
||||
return
|
||||
}
|
||||
|
||||
// Filter to high-confidence labels and take top 10
|
||||
let labels = observations
|
||||
.filter { $0.confidence > 0.3 }
|
||||
.prefix(10)
|
||||
.map { $0.identifier.replacingOccurrences(of: "_", with: " ") }
|
||||
|
||||
continuation.resume(returning: Array(labels))
|
||||
}
|
||||
|
||||
let handler = VNImageRequestHandler(cgImage: image, options: [:])
|
||||
|
||||
do {
|
||||
try handler.perform([request])
|
||||
} catch {
|
||||
continuation.resume(throwing: VisionAnalysisError.analysisFailure(error.localizedDescription))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user