Compare commits
No commits in common. "main" and "0.1.0-rc1" have entirely different histories.
@ -1,22 +1,13 @@
|
||||
{
|
||||
"originHash" : "1d1344dab64c4f153b2a1af227098e02f62d2c1f627c95dcad4304f1c16a97a3",
|
||||
"originHash" : "73128af91f020c013de06bf6af5d06131ff05e38285118f5ff904ee06a3a6e24",
|
||||
"pins" : [
|
||||
{
|
||||
"identity" : "grpc-swift-2",
|
||||
"identity" : "grpc-swift",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/grpc/grpc-swift-2.git",
|
||||
"location" : "https://github.com/grpc/grpc-swift.git",
|
||||
"state" : {
|
||||
"revision" : "531924b28fde0cf7585123c781c6f55cc35ef7fc",
|
||||
"version" : "2.2.1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "grpc-swift-extras",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/grpc/grpc-swift-extras.git",
|
||||
"state" : {
|
||||
"revision" : "7ab4a690ac09696689a9c4b99320af7ef809bb3d",
|
||||
"version" : "2.1.1"
|
||||
"revision" : "adc18c3e1c55027d0ce43893897ac448e3f27ebe",
|
||||
"version" : "2.2.3"
|
||||
}
|
||||
},
|
||||
{
|
||||
@ -24,8 +15,8 @@
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/grpc/grpc-swift-nio-transport.git",
|
||||
"state" : {
|
||||
"revision" : "dcfa8dc858bba5ded7a3760cede8c5fc03558a42",
|
||||
"version" : "2.4.0"
|
||||
"revision" : "ca2303eb7f3df556beafbba33a143ffa30d5b786",
|
||||
"version" : "1.2.3"
|
||||
}
|
||||
},
|
||||
{
|
||||
@ -33,8 +24,8 @@
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/grpc/grpc-swift-protobuf.git",
|
||||
"state" : {
|
||||
"revision" : "a1aa982cb2a276c72b478433eb75a4ec6508a277",
|
||||
"version" : "2.1.2"
|
||||
"revision" : "53e89e3a5d417307f70a721c7b83e564fefb1e1c",
|
||||
"version" : "1.3.1"
|
||||
}
|
||||
},
|
||||
{
|
||||
@ -109,15 +100,6 @@
|
||||
"version" : "4.2.0"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-distributed-tracing",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/apple/swift-distributed-tracing.git",
|
||||
"state" : {
|
||||
"revision" : "baa932c1336f7894145cbaafcd34ce2dd0b77c97",
|
||||
"version" : "1.3.1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-http-structured-headers",
|
||||
"kind" : "remoteSourceControl",
|
||||
@ -208,15 +190,6 @@
|
||||
"version" : "1.33.3"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-service-context",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/apple/swift-service-context.git",
|
||||
"state" : {
|
||||
"revision" : "1983448fefc717a2bc2ebde5490fe99873c5b8a6",
|
||||
"version" : "1.2.1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-service-lifecycle",
|
||||
"kind" : "remoteSourceControl",
|
||||
|
||||
@ -11,10 +11,9 @@ let package = Package(
|
||||
.executable(name: "AppleIntelligenceApp", targets: ["AppleIntelligenceApp"]),
|
||||
],
|
||||
dependencies: [
|
||||
.package(url: "https://github.com/grpc/grpc-swift-2.git", from: "2.0.0"),
|
||||
.package(url: "https://github.com/grpc/grpc-swift-nio-transport.git", from: "2.0.0"),
|
||||
.package(url: "https://github.com/grpc/grpc-swift-protobuf.git", from: "2.0.0"),
|
||||
.package(url: "https://github.com/grpc/grpc-swift-extras.git", from: "2.0.0"),
|
||||
.package(url: "https://github.com/grpc/grpc-swift.git", from: "2.0.0"),
|
||||
.package(url: "https://github.com/grpc/grpc-swift-nio-transport.git", from: "1.0.0"),
|
||||
.package(url: "https://github.com/grpc/grpc-swift-protobuf.git", from: "1.0.0"),
|
||||
.package(url: "https://github.com/apple/swift-protobuf.git", from: "1.28.0"),
|
||||
.package(url: "https://github.com/apple/swift-argument-parser.git", from: "1.3.0"),
|
||||
],
|
||||
@ -23,15 +22,11 @@ let package = Package(
|
||||
.target(
|
||||
name: "AppleIntelligenceCore",
|
||||
dependencies: [
|
||||
.product(name: "GRPCCore", package: "grpc-swift-2"),
|
||||
.product(name: "GRPCCore", package: "grpc-swift"),
|
||||
.product(name: "GRPCNIOTransportHTTP2", package: "grpc-swift-nio-transport"),
|
||||
.product(name: "GRPCProtobuf", package: "grpc-swift-protobuf"),
|
||||
.product(name: "GRPCReflectionService", package: "grpc-swift-extras"),
|
||||
.product(name: "SwiftProtobuf", package: "swift-protobuf"),
|
||||
],
|
||||
resources: [
|
||||
.copy("Resources/apple_intelligence.pb")
|
||||
],
|
||||
swiftSettings: [
|
||||
.unsafeFlags(["-Xfrontend", "-suppress-warnings"])
|
||||
]
|
||||
|
||||
@ -1,179 +0,0 @@
|
||||
syntax = "proto3";
|
||||
|
||||
package appleintelligence;
|
||||
|
||||
// Image data for vision requests
|
||||
message ImageData {
|
||||
bytes data = 1;
|
||||
string filename = 2;
|
||||
string mime_type = 3;
|
||||
}
|
||||
|
||||
// Vision analysis results
|
||||
message ImageAnalysis {
|
||||
string text_content = 1;
|
||||
repeated string labels = 2;
|
||||
string description = 3;
|
||||
}
|
||||
|
||||
// Completion request
|
||||
message CompletionRequest {
|
||||
string prompt = 1;
|
||||
optional float temperature = 2;
|
||||
optional int32 max_tokens = 3;
|
||||
repeated ImageData images = 4;
|
||||
bool include_analysis = 5;
|
||||
}
|
||||
|
||||
// Completion response (non-streaming)
|
||||
message CompletionResponse {
|
||||
string id = 1;
|
||||
string text = 2;
|
||||
string finish_reason = 3;
|
||||
repeated ImageAnalysis image_analyses = 4;
|
||||
}
|
||||
|
||||
// Streaming completion chunk
|
||||
message CompletionChunk {
|
||||
string id = 1;
|
||||
string delta = 2;
|
||||
bool is_final = 3;
|
||||
string finish_reason = 4;
|
||||
repeated ImageAnalysis image_analyses = 5;
|
||||
}
|
||||
|
||||
// Health check request
|
||||
message HealthRequest {}
|
||||
|
||||
// Health check response
|
||||
message HealthResponse {
|
||||
bool healthy = 1;
|
||||
string model_status = 2;
|
||||
}
|
||||
|
||||
// ============ TEXT-TO-SPEECH ============
|
||||
|
||||
// Audio format enumeration
|
||||
enum AudioFormat {
|
||||
AUDIO_FORMAT_UNSPECIFIED = 0;
|
||||
AUDIO_FORMAT_WAV = 1;
|
||||
AUDIO_FORMAT_MP3 = 2;
|
||||
}
|
||||
|
||||
// Voice configuration for TTS
|
||||
message VoiceConfig {
|
||||
string voice_identifier = 1;
|
||||
optional float speaking_rate = 2; // 0.0-1.0, default 0.5
|
||||
optional float pitch_multiplier = 3; // 0.5-2.0, default 1.0
|
||||
optional float volume = 4; // 0.0-1.0, default 1.0
|
||||
}
|
||||
|
||||
// TTS Request
|
||||
message TextToSpeechRequest {
|
||||
string text = 1;
|
||||
AudioFormat output_format = 2;
|
||||
optional VoiceConfig voice_config = 3;
|
||||
}
|
||||
|
||||
// TTS Response
|
||||
message TextToSpeechResponse {
|
||||
bytes audio_data = 1;
|
||||
AudioFormat format = 2;
|
||||
int32 sample_rate = 3;
|
||||
int32 channels = 4;
|
||||
float duration_seconds = 5;
|
||||
}
|
||||
|
||||
// List available voices request
|
||||
message ListVoicesRequest {
|
||||
optional string language_code = 1;
|
||||
}
|
||||
|
||||
// Voice information
|
||||
message VoiceInfo {
|
||||
string identifier = 1;
|
||||
string name = 2;
|
||||
string language = 3;
|
||||
bool is_premium = 4;
|
||||
string gender = 5;
|
||||
}
|
||||
|
||||
// List voices response
|
||||
message ListVoicesResponse {
|
||||
repeated VoiceInfo voices = 1;
|
||||
}
|
||||
|
||||
// ============ SPEECH-TO-TEXT ============
|
||||
|
||||
// STT Configuration
|
||||
message TranscriptionConfig {
|
||||
optional string language_code = 1;
|
||||
optional bool enable_punctuation = 2; // default true
|
||||
optional bool enable_timestamps = 3; // default false
|
||||
}
|
||||
|
||||
// Audio data for STT
|
||||
message AudioInput {
|
||||
bytes data = 1;
|
||||
string mime_type = 2; // "audio/wav", "audio/mp3", "audio/m4a"
|
||||
optional int32 sample_rate = 3;
|
||||
optional int32 channels = 4;
|
||||
}
|
||||
|
||||
// File-based transcription request
|
||||
message TranscribeRequest {
|
||||
AudioInput audio = 1;
|
||||
optional TranscriptionConfig config = 2;
|
||||
}
|
||||
|
||||
// Transcription segment with timing
|
||||
message TranscriptionSegment {
|
||||
string text = 1;
|
||||
float start_time = 2;
|
||||
float end_time = 3;
|
||||
float confidence = 4;
|
||||
}
|
||||
|
||||
// Transcription response
|
||||
message TranscribeResponse {
|
||||
string text = 1;
|
||||
repeated TranscriptionSegment segments = 2;
|
||||
string detected_language = 3;
|
||||
float confidence = 4;
|
||||
}
|
||||
|
||||
// Streaming STT request chunk
|
||||
message StreamingTranscribeRequest {
|
||||
oneof request {
|
||||
TranscriptionConfig config = 1; // Send first to configure
|
||||
bytes audio_chunk = 2; // Subsequent audio chunks
|
||||
}
|
||||
}
|
||||
|
||||
// Streaming STT response
|
||||
message StreamingTranscribeResponse {
|
||||
string partial_text = 1;
|
||||
bool is_final = 2;
|
||||
string final_text = 3;
|
||||
repeated TranscriptionSegment segments = 4;
|
||||
}
|
||||
|
||||
// Apple Intelligence Service
|
||||
service AppleIntelligenceService {
|
||||
// Single completion request
|
||||
rpc Complete(CompletionRequest) returns (CompletionResponse);
|
||||
|
||||
// Streaming completion request
|
||||
rpc StreamComplete(CompletionRequest) returns (stream CompletionChunk);
|
||||
|
||||
// Health check
|
||||
rpc Health(HealthRequest) returns (HealthResponse);
|
||||
|
||||
// Text-to-Speech
|
||||
rpc TextToSpeech(TextToSpeechRequest) returns (TextToSpeechResponse);
|
||||
rpc ListVoices(ListVoicesRequest) returns (ListVoicesResponse);
|
||||
|
||||
// Speech-to-Text
|
||||
rpc Transcribe(TranscribeRequest) returns (TranscribeResponse);
|
||||
rpc StreamTranscribe(stream StreamingTranscribeRequest) returns (stream StreamingTranscribeResponse);
|
||||
}
|
||||
174
README.md
174
README.md
@ -6,11 +6,8 @@ A Swift-based gRPC server that exposes Apple Intelligence (Foundation Models) ov
|
||||
|
||||
- **gRPC API** - Standard gRPC interface accessible from any language
|
||||
- **Streaming Support** - Real-time token streaming for responsive UX
|
||||
- **Vision Analysis** - Analyze images with text extraction, labeling, and descriptions
|
||||
- **Text-to-Speech** - Convert text to audio (WAV/MP3) with multiple voices
|
||||
- **Speech-to-Text** - Transcribe audio files or stream audio in real-time
|
||||
- **Menu Bar App** - Native macOS app with system tray integration
|
||||
- **Built-in Chat UI** - Test the AI directly from the app with voice input/output
|
||||
- **Built-in Chat UI** - Test the AI directly from the app
|
||||
- **API Key Auth** - Optional bearer token authentication
|
||||
- **Auto-Start** - Launch at login and auto-start server options
|
||||
|
||||
@ -48,7 +45,7 @@ swift build -c release --product AppleIntelligenceServer
|
||||
1. Launch **Apple Intelligence Server** from Applications
|
||||
2. Click the brain icon in the menu bar
|
||||
3. Toggle **Start Server** to begin accepting connections
|
||||
4. Use **Chat** to test the AI directly (supports voice input/output)
|
||||
4. Use **Chat** to test the AI directly
|
||||
5. Configure host, port, and API key in **Settings**
|
||||
|
||||
### CLI Server
|
||||
@ -66,19 +63,10 @@ GRPC_HOST=127.0.0.1 GRPC_PORT=8080 API_KEY=secret .build/release/AppleIntelligen
|
||||
### Service Definition
|
||||
|
||||
```protobuf
|
||||
service AppleIntelligenceService {
|
||||
// AI Completion
|
||||
service AppleIntelligence {
|
||||
rpc Health(HealthRequest) returns (HealthResponse);
|
||||
rpc Complete(CompletionRequest) returns (CompletionResponse);
|
||||
rpc StreamComplete(CompletionRequest) returns (stream CompletionChunk);
|
||||
|
||||
// Text-to-Speech
|
||||
rpc TextToSpeech(TextToSpeechRequest) returns (TextToSpeechResponse);
|
||||
rpc ListVoices(ListVoicesRequest) returns (ListVoicesResponse);
|
||||
|
||||
// Speech-to-Text
|
||||
rpc Transcribe(TranscribeRequest) returns (TranscribeResponse);
|
||||
rpc StreamTranscribe(stream StreamingTranscribeRequest) returns (stream StreamingTranscribeResponse);
|
||||
}
|
||||
```
|
||||
|
||||
@ -87,134 +75,24 @@ service AppleIntelligenceService {
|
||||
| Method | Type | Description |
|
||||
|--------|------|-------------|
|
||||
| `Health` | Unary | Check server and model availability |
|
||||
| `Complete` | Unary | Generate complete response (supports images) |
|
||||
| `Complete` | Unary | Generate complete response |
|
||||
| `StreamComplete` | Server Streaming | Stream tokens as they're generated |
|
||||
| `TextToSpeech` | Unary | Convert text to audio |
|
||||
| `ListVoices` | Unary | List available TTS voices |
|
||||
| `Transcribe` | Unary | Transcribe audio file to text |
|
||||
| `StreamTranscribe` | Bidirectional | Real-time audio transcription |
|
||||
|
||||
### Vision Support
|
||||
|
||||
The `Complete` and `StreamComplete` methods support image analysis:
|
||||
|
||||
```protobuf
|
||||
message CompletionRequest {
|
||||
string prompt = 1;
|
||||
optional float temperature = 2;
|
||||
optional int32 max_tokens = 3;
|
||||
repeated ImageData images = 4; // Attach images for analysis
|
||||
bool include_analysis = 5; // Return detailed analysis
|
||||
}
|
||||
|
||||
message ImageData {
|
||||
bytes data = 1;
|
||||
string filename = 2;
|
||||
string mime_type = 3; // image/png, image/jpeg, etc.
|
||||
}
|
||||
```
|
||||
|
||||
**Supported Image Formats:** PNG, JPEG, GIF, WebP, HEIC
|
||||
|
||||
### Text-to-Speech
|
||||
|
||||
```protobuf
|
||||
message TextToSpeechRequest {
|
||||
string text = 1;
|
||||
AudioFormat output_format = 2; // WAV or MP3
|
||||
optional VoiceConfig voice_config = 3;
|
||||
}
|
||||
|
||||
message VoiceConfig {
|
||||
string voice_identifier = 1; // Voice ID from ListVoices
|
||||
optional float speaking_rate = 2; // 0.0-1.0, default 0.5
|
||||
optional float pitch_multiplier = 3; // 0.5-2.0, default 1.0
|
||||
optional float volume = 4; // 0.0-1.0, default 1.0
|
||||
}
|
||||
```
|
||||
|
||||
**Output Formats:** WAV, MP3
|
||||
|
||||
### Speech-to-Text
|
||||
|
||||
#### File-based Transcription
|
||||
|
||||
```protobuf
|
||||
message TranscribeRequest {
|
||||
AudioInput audio = 1;
|
||||
optional TranscriptionConfig config = 2;
|
||||
}
|
||||
|
||||
message AudioInput {
|
||||
bytes data = 1;
|
||||
string mime_type = 2; // audio/wav, audio/mp3, etc.
|
||||
optional int32 sample_rate = 3;
|
||||
optional int32 channels = 4;
|
||||
}
|
||||
|
||||
message TranscriptionConfig {
|
||||
optional string language_code = 1; // e.g., "en-US", "fr-CA"
|
||||
optional bool enable_punctuation = 2;
|
||||
optional bool enable_timestamps = 3;
|
||||
}
|
||||
```
|
||||
|
||||
**Supported Audio Formats:** WAV, MP3, M4A, AAC, FLAC
|
||||
|
||||
#### Streaming Transcription
|
||||
|
||||
For real-time transcription, use bidirectional streaming:
|
||||
|
||||
1. Send `TranscriptionConfig` first to configure the session
|
||||
2. Send `audio_chunk` messages with PCM audio data (16-bit, 16kHz, mono)
|
||||
3. Receive `StreamingTranscribeResponse` with partial and final results
|
||||
|
||||
```protobuf
|
||||
message StreamingTranscribeRequest {
|
||||
oneof request {
|
||||
TranscriptionConfig config = 1; // Send first
|
||||
bytes audio_chunk = 2; // Then audio chunks
|
||||
}
|
||||
}
|
||||
|
||||
message StreamingTranscribeResponse {
|
||||
string partial_text = 1;
|
||||
bool is_final = 2;
|
||||
string final_text = 3;
|
||||
repeated TranscriptionSegment segments = 4;
|
||||
}
|
||||
```
|
||||
|
||||
### Quick Test with grpcurl
|
||||
|
||||
```bash
|
||||
# Health check
|
||||
grpcurl -plaintext localhost:50051 appleintelligence.AppleIntelligenceService/Health
|
||||
grpcurl -plaintext localhost:50051 appleintelligence.AppleIntelligence/Health
|
||||
|
||||
# Text completion
|
||||
# Non-streaming completion
|
||||
grpcurl -plaintext \
|
||||
-d '{"prompt": "What is 2 + 2?"}' \
|
||||
localhost:50051 appleintelligence.AppleIntelligenceService/Complete
|
||||
localhost:50051 appleintelligence.AppleIntelligence/Complete
|
||||
|
||||
# Streaming completion
|
||||
grpcurl -plaintext \
|
||||
-d '{"prompt": "Tell me a short story"}' \
|
||||
localhost:50051 appleintelligence.AppleIntelligenceService/StreamComplete
|
||||
|
||||
# List TTS voices
|
||||
grpcurl -plaintext \
|
||||
-d '{"language_code": "en-US"}' \
|
||||
localhost:50051 appleintelligence.AppleIntelligenceService/ListVoices
|
||||
|
||||
# Text-to-Speech (base64 encode the response audio_data)
|
||||
grpcurl -plaintext \
|
||||
-d '{"text": "Hello world", "output_format": 1}' \
|
||||
localhost:50051 appleintelligence.AppleIntelligenceService/TextToSpeech
|
||||
|
||||
# Transcribe audio file (base64 encode audio data)
|
||||
grpcurl -plaintext \
|
||||
-d '{"audio": {"data": "'$(base64 -i audio.wav)'", "mime_type": "audio/wav"}}' \
|
||||
localhost:50051 appleintelligence.AppleIntelligenceService/Transcribe
|
||||
localhost:50051 appleintelligence.AppleIntelligence/StreamComplete
|
||||
```
|
||||
|
||||
## Configuration
|
||||
@ -225,21 +103,6 @@ grpcurl -plaintext \
|
||||
| `GRPC_PORT` | `50051` | Port to listen on |
|
||||
| `API_KEY` | *none* | Optional API key for authentication |
|
||||
|
||||
## Supported Languages
|
||||
|
||||
### Speech Recognition (STT)
|
||||
- English (US, CA, GB, AU, IN, IE, ZA)
|
||||
- French (CA, FR)
|
||||
- Spanish (ES, MX)
|
||||
- German, Italian, Portuguese, Japanese, Korean, Chinese
|
||||
- And many more via macOS Speech framework
|
||||
|
||||
### Text-to-Speech (TTS)
|
||||
All voices available in macOS System Settings, including:
|
||||
- Premium voices (highest quality, requires download)
|
||||
- Enhanced voices (good quality)
|
||||
- Default/Compact voices (pre-installed)
|
||||
|
||||
## Client Libraries
|
||||
|
||||
Connect from any language with gRPC support:
|
||||
@ -257,21 +120,15 @@ See [docs/grpc-client-guide.md](docs/grpc-client-guide.md) for detailed examples
|
||||
```
|
||||
apple-intelligence-grpc/
|
||||
├── Package.swift
|
||||
├── Proto/
|
||||
│ └── apple_intelligence.proto # gRPC service definition
|
||||
├── Sources/
|
||||
│ ├── AppleIntelligenceCore/ # Shared gRPC service code
|
||||
│ │ ├── Config.swift
|
||||
│ │ ├── Services/
|
||||
│ │ │ ├── AppleIntelligenceService.swift
|
||||
│ │ │ ├── TextToSpeechService.swift
|
||||
│ │ │ ├── SpeechToTextService.swift
|
||||
│ │ │ └── VisionAnalysisService.swift
|
||||
│ │ │ └── AppleIntelligenceService.swift
|
||||
│ │ ├── Providers/
|
||||
│ │ │ └── AppleIntelligenceProvider.swift
|
||||
│ │ └── Generated/
|
||||
│ │ ├── apple_intelligence.pb.swift
|
||||
│ │ └── apple_intelligence.grpc.swift
|
||||
│ │ └── AppleIntelligence.pb.swift
|
||||
│ ├── AppleIntelligenceServer/ # CLI executable
|
||||
│ │ └── main.swift
|
||||
│ └── AppleIntelligenceApp/ # Menu bar app
|
||||
@ -325,17 +182,6 @@ See [docs/pipeline-configuration.md](docs/pipeline-configuration.md) for setup i
|
||||
- Include the API key in the Authorization header: `Bearer YOUR_API_KEY`
|
||||
- Verify the key matches what's configured in Settings
|
||||
|
||||
### Speech Recognition Not Working
|
||||
|
||||
- Grant microphone permission when prompted
|
||||
- Check System Settings → Privacy & Security → Speech Recognition
|
||||
- Ensure the language is supported
|
||||
|
||||
### TTS Voice Quality
|
||||
|
||||
- Download Premium/Enhanced voices from System Settings → Accessibility → Read & Speak
|
||||
- Premium voices are larger (~150-500MB) but sound more natural
|
||||
|
||||
## License
|
||||
|
||||
MIT
|
||||
|
||||
@ -34,7 +34,7 @@ struct AppleIntelligenceApp: App {
|
||||
.defaultSize(width: 500, height: 600)
|
||||
|
||||
Window("Settings", id: "settings") {
|
||||
SettingsView(settings: settings, serverManager: serverManager)
|
||||
SettingsView(settings: settings)
|
||||
}
|
||||
.windowResizability(.contentSize)
|
||||
}
|
||||
|
||||
@ -19,10 +19,6 @@ final class AppSettings {
|
||||
didSet { UserDefaults.standard.set(autoStartServer, forKey: "auto_start_server") }
|
||||
}
|
||||
|
||||
var enableReflection: Bool {
|
||||
didSet { UserDefaults.standard.set(enableReflection, forKey: "enable_reflection") }
|
||||
}
|
||||
|
||||
var launchAtLogin: Bool {
|
||||
didSet {
|
||||
do {
|
||||
@ -43,12 +39,6 @@ final class AppSettings {
|
||||
self.port = savedPort == 0 ? 50051 : savedPort
|
||||
self.apiKey = UserDefaults.standard.string(forKey: "api_key") ?? ""
|
||||
self.autoStartServer = UserDefaults.standard.bool(forKey: "auto_start_server")
|
||||
// Default to true if not set
|
||||
if UserDefaults.standard.object(forKey: "enable_reflection") == nil {
|
||||
self.enableReflection = true
|
||||
} else {
|
||||
self.enableReflection = UserDefaults.standard.bool(forKey: "enable_reflection")
|
||||
}
|
||||
self.launchAtLogin = SMAppService.mainApp.status == .enabled
|
||||
}
|
||||
|
||||
@ -57,7 +47,6 @@ final class AppSettings {
|
||||
port = 50051
|
||||
apiKey = ""
|
||||
autoStartServer = false
|
||||
enableReflection = true
|
||||
launchAtLogin = false
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,62 +1,4 @@
|
||||
import Foundation
|
||||
import AppKit
|
||||
|
||||
/// Represents an attached image in a chat message
|
||||
struct ImageAttachment: Identifiable, Equatable {
|
||||
let id: UUID
|
||||
let data: Data
|
||||
let filename: String?
|
||||
let thumbnail: NSImage?
|
||||
let mimeType: String
|
||||
|
||||
init(data: Data, filename: String? = nil) {
|
||||
self.id = UUID()
|
||||
self.data = data
|
||||
self.filename = filename
|
||||
self.thumbnail = Self.generateThumbnail(from: data)
|
||||
self.mimeType = Self.detectMimeType(from: data)
|
||||
}
|
||||
|
||||
private static func generateThumbnail(from data: Data) -> NSImage? {
|
||||
guard let image = NSImage(data: data) else { return nil }
|
||||
|
||||
let maxSize: CGFloat = 100
|
||||
let ratio = min(maxSize / image.size.width, maxSize / image.size.height, 1.0)
|
||||
let newSize = NSSize(
|
||||
width: image.size.width * ratio,
|
||||
height: image.size.height * ratio
|
||||
)
|
||||
|
||||
let thumbnail = NSImage(size: newSize)
|
||||
thumbnail.lockFocus()
|
||||
image.draw(
|
||||
in: NSRect(origin: .zero, size: newSize),
|
||||
from: NSRect(origin: .zero, size: image.size),
|
||||
operation: .copy,
|
||||
fraction: 1.0
|
||||
)
|
||||
thumbnail.unlockFocus()
|
||||
return thumbnail
|
||||
}
|
||||
|
||||
private static func detectMimeType(from data: Data) -> String {
|
||||
guard data.count >= 4 else { return "application/octet-stream" }
|
||||
let bytes = [UInt8](data.prefix(4))
|
||||
|
||||
if bytes[0] == 0x89 && bytes[1] == 0x50 && bytes[2] == 0x4E && bytes[3] == 0x47 {
|
||||
return "image/png"
|
||||
} else if bytes[0] == 0xFF && bytes[1] == 0xD8 {
|
||||
return "image/jpeg"
|
||||
} else if bytes[0] == 0x47 && bytes[1] == 0x49 && bytes[2] == 0x46 {
|
||||
return "image/gif"
|
||||
}
|
||||
return "image/png" // Default to PNG
|
||||
}
|
||||
|
||||
static func == (lhs: ImageAttachment, rhs: ImageAttachment) -> Bool {
|
||||
lhs.id == rhs.id
|
||||
}
|
||||
}
|
||||
|
||||
struct ChatMessage: Identifiable, Equatable {
|
||||
let id: UUID
|
||||
@ -64,19 +6,17 @@ struct ChatMessage: Identifiable, Equatable {
|
||||
var content: String
|
||||
let timestamp: Date
|
||||
var isStreaming: Bool
|
||||
var images: [ImageAttachment]
|
||||
|
||||
enum Role: Equatable {
|
||||
case user
|
||||
case assistant
|
||||
}
|
||||
|
||||
init(role: Role, content: String, isStreaming: Bool = false, images: [ImageAttachment] = []) {
|
||||
init(role: Role, content: String, isStreaming: Bool = false) {
|
||||
self.id = UUID()
|
||||
self.role = role
|
||||
self.content = content
|
||||
self.timestamp = Date()
|
||||
self.isStreaming = isStreaming
|
||||
self.images = images
|
||||
}
|
||||
}
|
||||
|
||||
@ -2,7 +2,6 @@ import Foundation
|
||||
import AppleIntelligenceCore
|
||||
import GRPCCore
|
||||
import GRPCNIOTransportHTTP2
|
||||
import GRPCReflectionService
|
||||
|
||||
@MainActor
|
||||
@Observable
|
||||
@ -52,7 +51,6 @@ final class ServerManager {
|
||||
let host = settings.host
|
||||
let port = settings.port
|
||||
let apiKey = settings.apiKey.isEmpty ? nil : settings.apiKey
|
||||
let enableReflection = settings.enableReflection
|
||||
|
||||
serverTask = Task {
|
||||
do {
|
||||
@ -84,16 +82,7 @@ final class ServerManager {
|
||||
config: .defaults
|
||||
)
|
||||
|
||||
// Build services list with optional reflection
|
||||
var services: [any RegistrableRPCService] = [provider]
|
||||
if enableReflection {
|
||||
if let descriptorURL = AppleIntelligenceResources.descriptorSetURL {
|
||||
let reflectionService = try ReflectionService(descriptorSetFileURLs: [descriptorURL])
|
||||
services.append(reflectionService)
|
||||
}
|
||||
}
|
||||
|
||||
let server = GRPCServer(transport: transport, services: services)
|
||||
let server = GRPCServer(transport: transport, services: [provider])
|
||||
|
||||
await MainActor.run {
|
||||
self.state = .running(host: host, port: port)
|
||||
@ -124,19 +113,6 @@ final class ServerManager {
|
||||
state = .stopped
|
||||
}
|
||||
|
||||
func restart() {
|
||||
guard state.isRunning else { return }
|
||||
|
||||
// Stop the current server
|
||||
stop()
|
||||
state = .starting
|
||||
|
||||
// Start again after a short delay to allow port release
|
||||
DispatchQueue.main.asyncAfter(deadline: .now() + 0.5) { [weak self] in
|
||||
self?.start()
|
||||
}
|
||||
}
|
||||
|
||||
func toggle() {
|
||||
if state.isRunning {
|
||||
stop()
|
||||
|
||||
@ -1,8 +1,4 @@
|
||||
import Foundation
|
||||
import AppKit
|
||||
import AVFoundation
|
||||
import Speech
|
||||
import UniformTypeIdentifiers
|
||||
import AppleIntelligenceCore
|
||||
|
||||
@MainActor
|
||||
@ -13,113 +9,11 @@ final class ChatViewModel {
|
||||
var isLoading: Bool = false
|
||||
var errorMessage: String?
|
||||
|
||||
// Image attachment state
|
||||
var pendingImages: [ImageAttachment] = []
|
||||
|
||||
// Voice input/output state
|
||||
var isRecording: Bool = false
|
||||
var isSpeaking: Bool = false
|
||||
var speakingMessageId: UUID?
|
||||
var recordingLevel: Float = 0
|
||||
|
||||
private var service: AppleIntelligenceService?
|
||||
private var ttsService: TextToSpeechService?
|
||||
private var sttService: SpeechToTextService?
|
||||
private var currentTask: Task<Void, Never>?
|
||||
|
||||
// Audio recording - multi-language support
|
||||
private var audioEngine: AVAudioEngine?
|
||||
private var speechRecognizers: [String: SFSpeechRecognizer] = [:]
|
||||
private var activeRecognizer: SFSpeechRecognizer?
|
||||
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
|
||||
private var recognitionTask: SFSpeechRecognitionTask?
|
||||
|
||||
// Supported speech recognition languages (Canadian English and French)
|
||||
private static let supportedLocales = ["en-CA", "fr-CA"]
|
||||
var detectedLanguage: String = "en-CA"
|
||||
|
||||
// Audio playback - use direct speech synthesis for reliability
|
||||
private var speechSynthesizer: AVSpeechSynthesizer?
|
||||
private var speechDelegate: SpeechSynthesizerDelegate?
|
||||
|
||||
// Maximum images per message
|
||||
private let maxImagesPerMessage = 5
|
||||
|
||||
// Supported image types
|
||||
static let supportedImageTypes: [UTType] = [.png, .jpeg, .gif, .webP, .heic]
|
||||
|
||||
// Recent images from Downloads and Desktop
|
||||
var recentImages: [URL] = []
|
||||
|
||||
func initialize() async {
|
||||
service = await AppleIntelligenceService()
|
||||
ttsService = TextToSpeechService()
|
||||
sttService = await SpeechToTextService()
|
||||
|
||||
// Initialize speech recognizers for all supported locales
|
||||
for localeId in Self.supportedLocales {
|
||||
if let recognizer = SFSpeechRecognizer(locale: Locale(identifier: localeId)) {
|
||||
speechRecognizers[localeId] = recognizer
|
||||
}
|
||||
}
|
||||
|
||||
// Default to system locale if supported, otherwise en-CA
|
||||
let systemLocale = Locale.current.identifier
|
||||
if speechRecognizers[systemLocale] != nil {
|
||||
detectedLanguage = systemLocale
|
||||
} else if systemLocale.starts(with: "fr") {
|
||||
detectedLanguage = "fr-CA"
|
||||
} else {
|
||||
detectedLanguage = "en-CA"
|
||||
}
|
||||
activeRecognizer = speechRecognizers[detectedLanguage]
|
||||
|
||||
loadRecentImages()
|
||||
}
|
||||
|
||||
// MARK: - Recent Images
|
||||
|
||||
func loadRecentImages() {
|
||||
let fileManager = FileManager.default
|
||||
let homeDir = fileManager.homeDirectoryForCurrentUser
|
||||
|
||||
let folders = [
|
||||
homeDir.appendingPathComponent("Downloads"),
|
||||
homeDir.appendingPathComponent("Desktop")
|
||||
]
|
||||
|
||||
let imageExtensions = ["png", "jpg", "jpeg", "gif", "webp", "heic", "heif"]
|
||||
|
||||
var allImages: [(url: URL, date: Date)] = []
|
||||
|
||||
for folder in folders {
|
||||
guard let contents = try? fileManager.contentsOfDirectory(
|
||||
at: folder,
|
||||
includingPropertiesForKeys: [.contentModificationDateKey, .isRegularFileKey],
|
||||
options: [.skipsHiddenFiles]
|
||||
) else { continue }
|
||||
|
||||
for url in contents {
|
||||
let ext = url.pathExtension.lowercased()
|
||||
guard imageExtensions.contains(ext) else { continue }
|
||||
|
||||
if let attributes = try? url.resourceValues(forKeys: [.contentModificationDateKey, .isRegularFileKey]),
|
||||
attributes.isRegularFile == true,
|
||||
let modDate = attributes.contentModificationDate {
|
||||
allImages.append((url: url, date: modDate))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by date descending and take last 10
|
||||
recentImages = allImages
|
||||
.sorted { $0.date > $1.date }
|
||||
.prefix(10)
|
||||
.map { $0.url }
|
||||
}
|
||||
|
||||
func addRecentImage(_ url: URL) {
|
||||
addImage(from: url)
|
||||
}
|
||||
|
||||
var isServiceAvailable: Bool {
|
||||
@ -128,77 +22,19 @@ final class ChatViewModel {
|
||||
}
|
||||
}
|
||||
|
||||
var canSend: Bool {
|
||||
!inputText.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty || !pendingImages.isEmpty
|
||||
}
|
||||
|
||||
// MARK: - Image Handling
|
||||
|
||||
func addImage(from url: URL) {
|
||||
guard pendingImages.count < maxImagesPerMessage else {
|
||||
errorMessage = "Maximum \(maxImagesPerMessage) images per message"
|
||||
return
|
||||
}
|
||||
|
||||
do {
|
||||
let data = try Data(contentsOf: url)
|
||||
let attachment = ImageAttachment(data: data, filename: url.lastPathComponent)
|
||||
pendingImages.append(attachment)
|
||||
errorMessage = nil
|
||||
} catch {
|
||||
errorMessage = "Failed to load image: \(error.localizedDescription)"
|
||||
}
|
||||
}
|
||||
|
||||
func addImageFromPasteboard() {
|
||||
guard let image = NSPasteboard.general.readObjects(
|
||||
forClasses: [NSImage.self],
|
||||
options: nil
|
||||
)?.first as? NSImage else {
|
||||
return
|
||||
}
|
||||
|
||||
guard pendingImages.count < maxImagesPerMessage else {
|
||||
errorMessage = "Maximum \(maxImagesPerMessage) images per message"
|
||||
return
|
||||
}
|
||||
|
||||
if let tiffData = image.tiffRepresentation,
|
||||
let bitmap = NSBitmapImageRep(data: tiffData),
|
||||
let pngData = bitmap.representation(using: .png, properties: [:]) {
|
||||
let attachment = ImageAttachment(data: pngData, filename: "pasted_image.png")
|
||||
pendingImages.append(attachment)
|
||||
errorMessage = nil
|
||||
}
|
||||
}
|
||||
|
||||
func removePendingImage(_ attachment: ImageAttachment) {
|
||||
pendingImages.removeAll { $0.id == attachment.id }
|
||||
}
|
||||
|
||||
func clearPendingImages() {
|
||||
pendingImages.removeAll()
|
||||
}
|
||||
|
||||
// MARK: - Messaging
|
||||
|
||||
func sendMessage() {
|
||||
let text = inputText.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
guard !text.isEmpty || !pendingImages.isEmpty else { return }
|
||||
guard !text.isEmpty else { return }
|
||||
guard !isLoading else { return }
|
||||
|
||||
// Capture images before clearing
|
||||
let imagesToSend = pendingImages
|
||||
|
||||
// Add user message with images
|
||||
let userMessage = ChatMessage(role: .user, content: text, images: imagesToSend)
|
||||
// Add user message
|
||||
let userMessage = ChatMessage(role: .user, content: text)
|
||||
messages.append(userMessage)
|
||||
inputText = ""
|
||||
pendingImages = []
|
||||
errorMessage = nil
|
||||
|
||||
// Add placeholder for assistant response
|
||||
let assistantMessage = ChatMessage(role: .assistant, content: "", isStreaming: true)
|
||||
var assistantMessage = ChatMessage(role: .assistant, content: "", isStreaming: true)
|
||||
messages.append(assistantMessage)
|
||||
|
||||
isLoading = true
|
||||
@ -209,20 +45,14 @@ final class ChatViewModel {
|
||||
throw AppleIntelligenceError.modelNotAvailable
|
||||
}
|
||||
|
||||
// Convert attachments to service format
|
||||
let images = imagesToSend.map { attachment in
|
||||
(data: attachment.data, filename: attachment.filename)
|
||||
}
|
||||
|
||||
let stream = await service.streamComplete(
|
||||
prompt: text,
|
||||
temperature: nil,
|
||||
maxTokens: nil,
|
||||
images: images
|
||||
maxTokens: nil
|
||||
)
|
||||
|
||||
var fullResponse = ""
|
||||
for try await (partialResponse, _) in stream {
|
||||
for try await partialResponse in stream {
|
||||
fullResponse = partialResponse
|
||||
// Update the last message (assistant's response)
|
||||
if let index = messages.lastIndex(where: { $0.role == .assistant }) {
|
||||
@ -263,279 +93,4 @@ final class ChatViewModel {
|
||||
messages.removeAll()
|
||||
errorMessage = nil
|
||||
}
|
||||
|
||||
// MARK: - Voice Input (Speech-to-Text)
|
||||
|
||||
func toggleRecording() {
|
||||
if isRecording {
|
||||
stopRecording()
|
||||
} else {
|
||||
startRecording()
|
||||
}
|
||||
}
|
||||
|
||||
func startRecording() {
|
||||
Task {
|
||||
// Use nonisolated helper to avoid MainActor isolation inheritance in TCC callback
|
||||
let status = await Self.requestSpeechAuthorization()
|
||||
|
||||
guard status == .authorized else {
|
||||
self.errorMessage = "Speech recognition not authorized"
|
||||
return
|
||||
}
|
||||
self.beginRecording()
|
||||
}
|
||||
}
|
||||
|
||||
/// Request speech recognition authorization without MainActor isolation.
|
||||
/// This prevents Swift 6 strict concurrency from asserting MainActor in the TCC callback.
|
||||
private nonisolated static func requestSpeechAuthorization() async -> SFSpeechRecognizerAuthorizationStatus {
|
||||
await withCheckedContinuation { continuation in
|
||||
SFSpeechRecognizer.requestAuthorization { status in
|
||||
continuation.resume(returning: status)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates audio tap handler in nonisolated context to avoid MainActor isolation inheritance.
|
||||
/// Audio taps run on CoreAudio's RealtimeMessenger queue, not MainActor.
|
||||
private nonisolated static func createAudioTapHandler(
|
||||
request: SFSpeechAudioBufferRecognitionRequest,
|
||||
levelUpdater: RecordingLevelUpdater
|
||||
) -> (AVAudioPCMBuffer, AVAudioTime) -> Void {
|
||||
return { buffer, _ in
|
||||
request.append(buffer)
|
||||
|
||||
// Calculate audio level for visual feedback
|
||||
guard let channelData = buffer.floatChannelData else { return }
|
||||
let channelDataValue = channelData.pointee
|
||||
let channelDataValueArray = stride(from: 0, to: Int(buffer.frameLength), by: buffer.stride).map { channelDataValue[$0] }
|
||||
let rms = sqrt(channelDataValueArray.map { $0 * $0 }.reduce(0, +) / Float(buffer.frameLength))
|
||||
let avgPower = 20 * log10(rms)
|
||||
let level = max(0, min(1, (avgPower + 50) / 50))
|
||||
|
||||
levelUpdater.updateLevel(level)
|
||||
}
|
||||
}
|
||||
|
||||
private func beginRecording() {
|
||||
// Try to find an available recognizer
|
||||
let recognizer = activeRecognizer ?? speechRecognizers.values.first { $0.isAvailable }
|
||||
guard let speechRecognizer = recognizer, speechRecognizer.isAvailable else {
|
||||
errorMessage = "Speech recognition not available"
|
||||
return
|
||||
}
|
||||
|
||||
// Stop any existing recording
|
||||
if audioEngine != nil {
|
||||
stopRecording()
|
||||
}
|
||||
|
||||
audioEngine = AVAudioEngine()
|
||||
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
|
||||
|
||||
guard let audioEngine = audioEngine,
|
||||
let recognitionRequest = recognitionRequest else {
|
||||
errorMessage = "Failed to initialize audio engine"
|
||||
return
|
||||
}
|
||||
|
||||
recognitionRequest.shouldReportPartialResults = true
|
||||
|
||||
// Enable automatic language detection if available (macOS 14+)
|
||||
if #available(macOS 14, *) {
|
||||
recognitionRequest.addsPunctuation = true
|
||||
}
|
||||
|
||||
let inputNode = audioEngine.inputNode
|
||||
let recordingFormat = inputNode.outputFormat(forBus: 0)
|
||||
|
||||
// Use nonisolated static function to create audio tap handler
|
||||
// This breaks MainActor isolation inheritance in the closure
|
||||
let levelUpdater = RecordingLevelUpdater(viewModel: self)
|
||||
let audioTapHandler = Self.createAudioTapHandler(request: recognitionRequest, levelUpdater: levelUpdater)
|
||||
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat, block: audioTapHandler)
|
||||
|
||||
audioEngine.prepare()
|
||||
|
||||
do {
|
||||
try audioEngine.start()
|
||||
isRecording = true
|
||||
|
||||
// Use a sendable wrapper for recognition results with language detection
|
||||
let resultHandler = RecognitionResultHandler(viewModel: self)
|
||||
|
||||
recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in
|
||||
resultHandler.handleResult(result: result, error: error)
|
||||
}
|
||||
} catch {
|
||||
errorMessage = "Failed to start recording: \(error.localizedDescription)"
|
||||
cleanupRecording()
|
||||
}
|
||||
}
|
||||
|
||||
/// Switch to a different language for speech recognition
|
||||
func switchLanguage(to localeId: String) {
|
||||
guard let recognizer = speechRecognizers[localeId] else { return }
|
||||
activeRecognizer = recognizer
|
||||
detectedLanguage = localeId
|
||||
}
|
||||
|
||||
/// Get available languages for speech recognition
|
||||
var availableLanguages: [(id: String, name: String)] {
|
||||
speechRecognizers.keys.sorted().compactMap { localeId in
|
||||
let locale = Locale(identifier: localeId)
|
||||
let name = locale.localizedString(forIdentifier: localeId) ?? localeId
|
||||
return (id: localeId, name: name)
|
||||
}
|
||||
}
|
||||
|
||||
func stopRecording() {
|
||||
recognitionRequest?.endAudio()
|
||||
cleanupRecording()
|
||||
}
|
||||
|
||||
fileprivate func cleanupRecording() {
|
||||
audioEngine?.stop()
|
||||
audioEngine?.inputNode.removeTap(onBus: 0)
|
||||
audioEngine = nil
|
||||
recognitionRequest = nil
|
||||
recognitionTask?.cancel()
|
||||
recognitionTask = nil
|
||||
isRecording = false
|
||||
recordingLevel = 0
|
||||
}
|
||||
|
||||
// MARK: - Voice Output (Text-to-Speech)
|
||||
|
||||
func speakMessage(_ message: ChatMessage) {
|
||||
guard !message.content.isEmpty else { return }
|
||||
|
||||
// If already speaking this message, stop
|
||||
if isSpeaking && speakingMessageId == message.id {
|
||||
stopSpeaking()
|
||||
return
|
||||
}
|
||||
|
||||
// Stop any current speech
|
||||
stopSpeaking()
|
||||
|
||||
speakingMessageId = message.id
|
||||
isSpeaking = true
|
||||
|
||||
// Create utterance
|
||||
let utterance = AVSpeechUtterance(string: message.content)
|
||||
utterance.rate = AVSpeechUtteranceDefaultSpeechRate
|
||||
utterance.pitchMultiplier = 1.0
|
||||
utterance.volume = 1.0
|
||||
|
||||
// Detect message language and use appropriate voice
|
||||
let isFrench = Self.detectFrench(message.content)
|
||||
let language = isFrench ? "fr-CA" : "en-US"
|
||||
utterance.voice = AVSpeechSynthesisVoice(language: language)
|
||||
|
||||
// Create synthesizer and delegate
|
||||
let synthesizer = AVSpeechSynthesizer()
|
||||
speechDelegate = SpeechSynthesizerDelegate { [weak self] in
|
||||
Task { @MainActor in
|
||||
self?.isSpeaking = false
|
||||
self?.speakingMessageId = nil
|
||||
self?.speechDelegate = nil
|
||||
self?.speechSynthesizer = nil
|
||||
}
|
||||
}
|
||||
synthesizer.delegate = speechDelegate
|
||||
speechSynthesizer = synthesizer
|
||||
|
||||
// Speak directly
|
||||
synthesizer.speak(utterance)
|
||||
}
|
||||
|
||||
func stopSpeaking() {
|
||||
speechSynthesizer?.stopSpeaking(at: .immediate)
|
||||
speechSynthesizer = nil
|
||||
speechDelegate = nil
|
||||
isSpeaking = false
|
||||
speakingMessageId = nil
|
||||
}
|
||||
|
||||
/// Detect if text is likely French based on common words
|
||||
private static func detectFrench(_ text: String) -> Bool {
|
||||
let lowercased = text.lowercased()
|
||||
let frenchIndicators = [
|
||||
" le ", " la ", " les ", " un ", " une ", " des ",
|
||||
" je ", " tu ", " il ", " elle ", " nous ", " vous ", " ils ", " elles ",
|
||||
" est ", " sont ", " avoir ", " être ", " fait ", " faire ",
|
||||
" que ", " qui ", " quoi ", " dans ", " pour ", " avec ", " sur ",
|
||||
" ce ", " cette ", " ces ", " mon ", " ma ", " mes ",
|
||||
" pas ", " plus ", " très ", " bien ", " aussi ",
|
||||
"bonjour", "merci", "salut", "oui", "non", "peut",
|
||||
" et ", " ou ", " mais ", " donc ", " car ",
|
||||
"c'est", "j'ai", "qu'est", "n'est", "d'un", "l'on"
|
||||
]
|
||||
|
||||
let frenchCount = frenchIndicators.filter { lowercased.contains($0) }.count
|
||||
return frenchCount >= 2
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Speech Synthesizer Delegate
|
||||
|
||||
private final class SpeechSynthesizerDelegate: NSObject, AVSpeechSynthesizerDelegate, @unchecked Sendable {
|
||||
let onFinish: () -> Void
|
||||
|
||||
init(onFinish: @escaping () -> Void) {
|
||||
self.onFinish = onFinish
|
||||
}
|
||||
|
||||
func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didFinish utterance: AVSpeechUtterance) {
|
||||
onFinish()
|
||||
}
|
||||
|
||||
func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didCancel utterance: AVSpeechUtterance) {
|
||||
onFinish()
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Sendable Wrappers for Audio Callbacks
|
||||
|
||||
/// Wrapper to safely update recording level from audio callback thread
|
||||
private final class RecordingLevelUpdater: @unchecked Sendable {
|
||||
private weak var viewModel: ChatViewModel?
|
||||
|
||||
init(viewModel: ChatViewModel) {
|
||||
self.viewModel = viewModel
|
||||
}
|
||||
|
||||
func updateLevel(_ level: Float) {
|
||||
Task { @MainActor [weak viewModel] in
|
||||
viewModel?.recordingLevel = level
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Wrapper to safely handle recognition results from Speech framework callback
|
||||
private final class RecognitionResultHandler: @unchecked Sendable {
|
||||
private weak var viewModel: ChatViewModel?
|
||||
|
||||
init(viewModel: ChatViewModel) {
|
||||
self.viewModel = viewModel
|
||||
}
|
||||
|
||||
func handleResult(result: SFSpeechRecognitionResult?, error: Error?) {
|
||||
// Extract data before crossing actor boundary (SFSpeechRecognitionResult is not Sendable)
|
||||
let transcription = result?.bestTranscription.formattedString
|
||||
let isFinal = result?.isFinal ?? false
|
||||
let hasError = error != nil
|
||||
|
||||
Task { @MainActor [weak viewModel] in
|
||||
if let transcription = transcription {
|
||||
viewModel?.inputText = transcription
|
||||
}
|
||||
|
||||
if hasError || isFinal {
|
||||
viewModel?.cleanupRecording()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,33 +1,17 @@
|
||||
import SwiftUI
|
||||
import UniformTypeIdentifiers
|
||||
|
||||
struct ChatView: View {
|
||||
@Bindable var viewModel: ChatViewModel
|
||||
@FocusState private var isInputFocused: Bool
|
||||
@State private var isShowingFilePicker = false
|
||||
@State private var isDragOver = false
|
||||
@State private var previewImageURL: URL?
|
||||
|
||||
var body: some View {
|
||||
HStack(spacing: 0) {
|
||||
// Recent images sidebar
|
||||
if !viewModel.recentImages.isEmpty {
|
||||
recentImagesSidebar
|
||||
Divider()
|
||||
}
|
||||
|
||||
// Main chat area
|
||||
VStack(spacing: 0) {
|
||||
// Messages list
|
||||
ScrollViewReader { proxy in
|
||||
ScrollView {
|
||||
LazyVStack(spacing: 12) {
|
||||
ForEach(viewModel.messages) { message in
|
||||
MessageBubble(
|
||||
message: message,
|
||||
isSpeaking: viewModel.speakingMessageId == message.id,
|
||||
onSpeak: { viewModel.speakMessage(message) }
|
||||
)
|
||||
MessageBubble(message: message)
|
||||
.id(message.id)
|
||||
}
|
||||
}
|
||||
@ -71,260 +55,14 @@ struct ChatView: View {
|
||||
|
||||
Divider()
|
||||
|
||||
// Pending images preview
|
||||
if !viewModel.pendingImages.isEmpty {
|
||||
pendingImagesView
|
||||
}
|
||||
|
||||
// Input area
|
||||
inputArea
|
||||
}
|
||||
.onDrop(of: [.fileURL, .image], isTargeted: $isDragOver) { providers in
|
||||
handleDrop(providers: providers)
|
||||
return true
|
||||
}
|
||||
.overlay {
|
||||
if isDragOver {
|
||||
RoundedRectangle(cornerRadius: 8)
|
||||
.stroke(Color.accentColor, lineWidth: 3)
|
||||
.background(Color.accentColor.opacity(0.1))
|
||||
.padding(4)
|
||||
}
|
||||
}
|
||||
}
|
||||
.frame(minWidth: 500, minHeight: 500)
|
||||
.toolbar {
|
||||
ToolbarItem(placement: .primaryAction) {
|
||||
Button {
|
||||
viewModel.loadRecentImages()
|
||||
} label: {
|
||||
Image(systemName: "arrow.clockwise")
|
||||
}
|
||||
.help("Refresh recent images")
|
||||
}
|
||||
ToolbarItem(placement: .primaryAction) {
|
||||
Button {
|
||||
viewModel.clearChat()
|
||||
} label: {
|
||||
Image(systemName: "trash")
|
||||
}
|
||||
.help("Clear chat")
|
||||
.disabled(viewModel.messages.isEmpty)
|
||||
}
|
||||
}
|
||||
.task {
|
||||
await viewModel.initialize()
|
||||
}
|
||||
.onAppear {
|
||||
NSApp.setActivationPolicy(.regular)
|
||||
NSApp.activate(ignoringOtherApps: true)
|
||||
|
||||
DispatchQueue.main.asyncAfter(deadline: .now() + 0.2) {
|
||||
if let window = NSApp.windows.first(where: { $0.title == "Chat" }) {
|
||||
window.makeKeyAndOrderFront(nil)
|
||||
}
|
||||
isInputFocused = true
|
||||
}
|
||||
}
|
||||
.onDisappear {
|
||||
if NSApp.windows.filter({ $0.isVisible && $0.title != "" }).isEmpty {
|
||||
NSApp.setActivationPolicy(.accessory)
|
||||
}
|
||||
}
|
||||
.fileImporter(
|
||||
isPresented: $isShowingFilePicker,
|
||||
allowedContentTypes: ChatViewModel.supportedImageTypes,
|
||||
allowsMultipleSelection: true
|
||||
) { result in
|
||||
switch result {
|
||||
case .success(let urls):
|
||||
for url in urls {
|
||||
if url.startAccessingSecurityScopedResource() {
|
||||
viewModel.addImage(from: url)
|
||||
url.stopAccessingSecurityScopedResource()
|
||||
}
|
||||
}
|
||||
case .failure(let error):
|
||||
viewModel.errorMessage = error.localizedDescription
|
||||
}
|
||||
}
|
||||
.sheet(item: $previewImageURL) { url in
|
||||
ImagePreviewSheet(url: url) {
|
||||
viewModel.addRecentImage(url)
|
||||
previewImageURL = nil
|
||||
} onCancel: {
|
||||
previewImageURL = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Drag & Drop Handler
|
||||
|
||||
private func handleDrop(providers: [NSItemProvider]) {
|
||||
for provider in providers {
|
||||
// Try to load as file URL first
|
||||
if provider.hasItemConformingToTypeIdentifier(UTType.fileURL.identifier) {
|
||||
provider.loadItem(forTypeIdentifier: UTType.fileURL.identifier, options: nil) { item, error in
|
||||
guard error == nil else { return }
|
||||
|
||||
if let data = item as? Data,
|
||||
let url = URL(dataRepresentation: data, relativeTo: nil) {
|
||||
DispatchQueue.main.async {
|
||||
viewModel.addImage(from: url)
|
||||
}
|
||||
} else if let url = item as? URL {
|
||||
DispatchQueue.main.async {
|
||||
viewModel.addImage(from: url)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Try to load as image data
|
||||
else if provider.hasItemConformingToTypeIdentifier(UTType.image.identifier) {
|
||||
provider.loadDataRepresentation(forTypeIdentifier: UTType.image.identifier) { data, error in
|
||||
guard let data = data, error == nil else { return }
|
||||
DispatchQueue.main.async {
|
||||
let attachment = ImageAttachment(data: data, filename: "dropped_image.png")
|
||||
if viewModel.pendingImages.count < 5 {
|
||||
viewModel.pendingImages.append(attachment)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Recent Images Sidebar
|
||||
|
||||
private var recentImagesSidebar: some View {
|
||||
VStack(alignment: .leading, spacing: 8) {
|
||||
Text("Recent")
|
||||
.font(.headline)
|
||||
.foregroundStyle(.secondary)
|
||||
.padding(.horizontal, 8)
|
||||
.padding(.top, 8)
|
||||
|
||||
ScrollView {
|
||||
LazyVStack(spacing: 8) {
|
||||
ForEach(viewModel.recentImages, id: \.self) { url in
|
||||
RecentImageThumbnail(url: url) {
|
||||
previewImageURL = url
|
||||
}
|
||||
}
|
||||
}
|
||||
.padding(.horizontal, 8)
|
||||
.padding(.bottom, 8)
|
||||
}
|
||||
}
|
||||
.frame(width: 100)
|
||||
.background(Color(nsColor: .controlBackgroundColor).opacity(0.5))
|
||||
}
|
||||
|
||||
// MARK: - Pending Images Preview
|
||||
|
||||
private var pendingImagesView: some View {
|
||||
ScrollView(.horizontal, showsIndicators: false) {
|
||||
HStack(spacing: 8) {
|
||||
ForEach(viewModel.pendingImages) { attachment in
|
||||
pendingImageThumbnail(attachment)
|
||||
}
|
||||
}
|
||||
.padding(.horizontal)
|
||||
.padding(.vertical, 8)
|
||||
}
|
||||
.background(Color(nsColor: .controlBackgroundColor))
|
||||
}
|
||||
|
||||
private func pendingImageThumbnail(_ attachment: ImageAttachment) -> some View {
|
||||
ZStack(alignment: .topTrailing) {
|
||||
if let thumbnail = attachment.thumbnail {
|
||||
Image(nsImage: thumbnail)
|
||||
.resizable()
|
||||
.aspectRatio(contentMode: .fill)
|
||||
.frame(width: 60, height: 60)
|
||||
.clipShape(RoundedRectangle(cornerRadius: 8))
|
||||
} else {
|
||||
RoundedRectangle(cornerRadius: 8)
|
||||
.fill(Color.gray.opacity(0.3))
|
||||
.frame(width: 60, height: 60)
|
||||
.overlay {
|
||||
Image(systemName: "photo")
|
||||
.foregroundStyle(.secondary)
|
||||
}
|
||||
}
|
||||
|
||||
Button {
|
||||
viewModel.removePendingImage(attachment)
|
||||
} label: {
|
||||
Image(systemName: "xmark.circle.fill")
|
||||
.font(.system(size: 16))
|
||||
.foregroundStyle(.white)
|
||||
.background(Circle().fill(.black.opacity(0.6)).frame(width: 18, height: 18))
|
||||
}
|
||||
.buttonStyle(.plain)
|
||||
.offset(x: 6, y: -6)
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Input Area
|
||||
|
||||
private var inputArea: some View {
|
||||
HStack(spacing: 8) {
|
||||
Button {
|
||||
isShowingFilePicker = true
|
||||
} label: {
|
||||
Image(systemName: "photo.badge.plus")
|
||||
.font(.title3)
|
||||
.foregroundStyle(.secondary)
|
||||
}
|
||||
.buttonStyle(.plain)
|
||||
.help("Add image (or paste with ⌘V)")
|
||||
|
||||
// Language toggle for speech recognition
|
||||
Button {
|
||||
// Toggle between en-CA and fr-CA
|
||||
let newLang = viewModel.detectedLanguage == "en-CA" ? "fr-CA" : "en-CA"
|
||||
viewModel.switchLanguage(to: newLang)
|
||||
} label: {
|
||||
Text(viewModel.detectedLanguage == "fr-CA" ? "FR" : "EN")
|
||||
.font(.caption.bold())
|
||||
.foregroundStyle(.secondary)
|
||||
.frame(width: 24, height: 24)
|
||||
.background(
|
||||
RoundedRectangle(cornerRadius: 4)
|
||||
.fill(Color.secondary.opacity(0.1))
|
||||
)
|
||||
}
|
||||
.buttonStyle(.plain)
|
||||
.help("Speech language: \(viewModel.detectedLanguage) (click to toggle)")
|
||||
|
||||
// Microphone button for voice input
|
||||
Button {
|
||||
viewModel.toggleRecording()
|
||||
} label: {
|
||||
ZStack {
|
||||
if viewModel.isRecording {
|
||||
// Recording indicator with level
|
||||
Circle()
|
||||
.fill(Color.red.opacity(0.3))
|
||||
.frame(width: 28 + CGFloat(viewModel.recordingLevel) * 10,
|
||||
height: 28 + CGFloat(viewModel.recordingLevel) * 10)
|
||||
.animation(.easeInOut(duration: 0.1), value: viewModel.recordingLevel)
|
||||
}
|
||||
Image(systemName: viewModel.isRecording ? "mic.fill" : "mic")
|
||||
.font(.title3)
|
||||
.foregroundStyle(viewModel.isRecording ? .red : .secondary)
|
||||
}
|
||||
}
|
||||
.buttonStyle(.plain)
|
||||
.help(viewModel.isRecording ? "Stop recording" : "Voice input")
|
||||
|
||||
HStack(spacing: 12) {
|
||||
TextField("Message...", text: $viewModel.inputText, axis: .vertical)
|
||||
.textFieldStyle(.plain)
|
||||
.lineLimit(1...5)
|
||||
.focused($isInputFocused)
|
||||
.onSubmit {
|
||||
if viewModel.canSend {
|
||||
if !viewModel.inputText.isEmpty {
|
||||
viewModel.sendMessage()
|
||||
}
|
||||
}
|
||||
@ -344,104 +82,53 @@ struct ChatView: View {
|
||||
} label: {
|
||||
Image(systemName: "arrow.up.circle.fill")
|
||||
.font(.title2)
|
||||
.foregroundStyle(viewModel.canSend ? Color.accentColor : Color.gray)
|
||||
.foregroundStyle(viewModel.inputText.isEmpty ? .gray : .accentColor)
|
||||
}
|
||||
.buttonStyle(.plain)
|
||||
.disabled(!viewModel.canSend)
|
||||
.disabled(viewModel.inputText.isEmpty)
|
||||
}
|
||||
}
|
||||
.padding()
|
||||
.onPasteCommand(of: [.image, .png, .jpeg, .tiff]) { providers in
|
||||
for provider in providers {
|
||||
// Try to load as image
|
||||
if provider.hasItemConformingToTypeIdentifier(UTType.image.identifier) {
|
||||
provider.loadDataRepresentation(forTypeIdentifier: UTType.image.identifier) { data, _ in
|
||||
if let data = data {
|
||||
DispatchQueue.main.async {
|
||||
let attachment = ImageAttachment(data: data, filename: "pasted_image.png")
|
||||
if viewModel.pendingImages.count < 5 {
|
||||
viewModel.pendingImages.append(attachment)
|
||||
}
|
||||
.frame(minWidth: 400, minHeight: 500)
|
||||
.toolbar {
|
||||
ToolbarItem(placement: .primaryAction) {
|
||||
Button {
|
||||
viewModel.clearChat()
|
||||
} label: {
|
||||
Image(systemName: "trash")
|
||||
}
|
||||
.help("Clear chat")
|
||||
.disabled(viewModel.messages.isEmpty)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
// Fallback to pasteboard check
|
||||
viewModel.addImageFromPasteboard()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Recent Image Thumbnail
|
||||
|
||||
struct RecentImageThumbnail: View {
|
||||
let url: URL
|
||||
let onTap: () -> Void
|
||||
|
||||
@State private var thumbnail: NSImage?
|
||||
|
||||
var body: some View {
|
||||
Button(action: onTap) {
|
||||
ZStack {
|
||||
if let thumbnail = thumbnail {
|
||||
Image(nsImage: thumbnail)
|
||||
.resizable()
|
||||
.aspectRatio(contentMode: .fill)
|
||||
.frame(width: 80, height: 80)
|
||||
.clipShape(RoundedRectangle(cornerRadius: 8))
|
||||
} else {
|
||||
RoundedRectangle(cornerRadius: 8)
|
||||
.fill(Color.gray.opacity(0.3))
|
||||
.frame(width: 80, height: 80)
|
||||
.overlay {
|
||||
ProgressView()
|
||||
.scaleEffect(0.6)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
.buttonStyle(.plain)
|
||||
.help(url.lastPathComponent)
|
||||
.task {
|
||||
await loadThumbnail()
|
||||
await viewModel.initialize()
|
||||
}
|
||||
.onAppear {
|
||||
// Force the app to become active and accept keyboard input
|
||||
NSApp.setActivationPolicy(.regular)
|
||||
NSApp.activate(ignoringOtherApps: true)
|
||||
|
||||
DispatchQueue.main.asyncAfter(deadline: .now() + 0.2) {
|
||||
// Make sure the window is key
|
||||
if let window = NSApp.windows.first(where: { $0.title == "Chat" }) {
|
||||
window.makeKeyAndOrderFront(nil)
|
||||
}
|
||||
isInputFocused = true
|
||||
}
|
||||
}
|
||||
|
||||
private func loadThumbnail() async {
|
||||
guard let image = NSImage(contentsOf: url) else { return }
|
||||
|
||||
let maxSize: CGFloat = 80
|
||||
let ratio = min(maxSize / image.size.width, maxSize / image.size.height, 1.0)
|
||||
let newSize = NSSize(
|
||||
width: image.size.width * ratio,
|
||||
height: image.size.height * ratio
|
||||
)
|
||||
|
||||
let thumb = NSImage(size: newSize)
|
||||
thumb.lockFocus()
|
||||
image.draw(
|
||||
in: NSRect(origin: .zero, size: newSize),
|
||||
from: NSRect(origin: .zero, size: image.size),
|
||||
operation: .copy,
|
||||
fraction: 1.0
|
||||
)
|
||||
thumb.unlockFocus()
|
||||
|
||||
await MainActor.run {
|
||||
thumbnail = thumb
|
||||
.onDisappear {
|
||||
// Return to accessory mode when chat is closed
|
||||
if NSApp.windows.filter({ $0.isVisible && $0.title != "" }).isEmpty {
|
||||
NSApp.setActivationPolicy(.accessory)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Message Bubble
|
||||
|
||||
struct MessageBubble: View {
|
||||
let message: ChatMessage
|
||||
var isSpeaking: Bool = false
|
||||
var onSpeak: (() -> Void)? = nil
|
||||
@State private var showCopied = false
|
||||
|
||||
var body: some View {
|
||||
HStack {
|
||||
@ -450,11 +137,6 @@ struct MessageBubble: View {
|
||||
}
|
||||
|
||||
VStack(alignment: message.role == .user ? .trailing : .leading, spacing: 4) {
|
||||
if !message.images.isEmpty {
|
||||
imageGrid
|
||||
}
|
||||
|
||||
if !message.content.isEmpty {
|
||||
Text(message.content)
|
||||
.textSelection(.enabled)
|
||||
.padding(.horizontal, 12)
|
||||
@ -462,7 +144,6 @@ struct MessageBubble: View {
|
||||
.background(bubbleColor)
|
||||
.foregroundStyle(message.role == .user ? .white : .primary)
|
||||
.clipShape(RoundedRectangle(cornerRadius: 16))
|
||||
}
|
||||
|
||||
if message.isStreaming {
|
||||
HStack(spacing: 4) {
|
||||
@ -473,45 +154,6 @@ struct MessageBubble: View {
|
||||
.foregroundStyle(.secondary)
|
||||
}
|
||||
}
|
||||
|
||||
// Action buttons for assistant messages
|
||||
if message.role == .assistant && !message.content.isEmpty && !message.isStreaming {
|
||||
HStack(spacing: 12) {
|
||||
// Speaker button for TTS
|
||||
Button {
|
||||
onSpeak?()
|
||||
} label: {
|
||||
HStack(spacing: 4) {
|
||||
Image(systemName: isSpeaking ? "stop.fill" : "speaker.wave.2")
|
||||
Text(isSpeaking ? "Stop" : "Speak")
|
||||
}
|
||||
.font(.caption)
|
||||
.foregroundStyle(isSpeaking ? .red : .secondary)
|
||||
}
|
||||
.buttonStyle(.plain)
|
||||
|
||||
// Copy button
|
||||
Button {
|
||||
NSPasteboard.general.clearContents()
|
||||
NSPasteboard.general.setString(message.content, forType: .string)
|
||||
showCopied = true
|
||||
DispatchQueue.main.asyncAfter(deadline: .now() + 1.5) {
|
||||
showCopied = false
|
||||
}
|
||||
} label: {
|
||||
HStack(spacing: 4) {
|
||||
Image(systemName: showCopied ? "checkmark" : "doc.on.doc")
|
||||
Text(showCopied ? "Copied" : "Copy")
|
||||
}
|
||||
.font(.caption)
|
||||
.foregroundStyle(.secondary)
|
||||
}
|
||||
.buttonStyle(.plain)
|
||||
|
||||
Spacer()
|
||||
}
|
||||
.padding(.top, 2)
|
||||
}
|
||||
}
|
||||
|
||||
if message.role == .assistant {
|
||||
@ -520,32 +162,6 @@ struct MessageBubble: View {
|
||||
}
|
||||
}
|
||||
|
||||
@ViewBuilder
|
||||
private var imageGrid: some View {
|
||||
let columns = min(message.images.count, 3)
|
||||
LazyVGrid(
|
||||
columns: Array(repeating: GridItem(.flexible(), spacing: 4), count: columns),
|
||||
spacing: 4
|
||||
) {
|
||||
ForEach(message.images) { attachment in
|
||||
if let thumbnail = attachment.thumbnail {
|
||||
Image(nsImage: thumbnail)
|
||||
.resizable()
|
||||
.aspectRatio(contentMode: .fill)
|
||||
.frame(width: 80, height: 80)
|
||||
.clipShape(RoundedRectangle(cornerRadius: 8))
|
||||
}
|
||||
}
|
||||
}
|
||||
.padding(4)
|
||||
.background(
|
||||
message.role == .user
|
||||
? Color.accentColor.opacity(0.8)
|
||||
: Color(nsColor: .controlBackgroundColor)
|
||||
)
|
||||
.clipShape(RoundedRectangle(cornerRadius: 12))
|
||||
}
|
||||
|
||||
private var bubbleColor: Color {
|
||||
switch message.role {
|
||||
case .user:
|
||||
@ -555,65 +171,3 @@ struct MessageBubble: View {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Image Preview Sheet
|
||||
|
||||
struct ImagePreviewSheet: View {
|
||||
let url: URL
|
||||
let onConfirm: () -> Void
|
||||
let onCancel: () -> Void
|
||||
|
||||
@State private var image: NSImage?
|
||||
|
||||
var body: some View {
|
||||
VStack(spacing: 16) {
|
||||
Text("Add Image")
|
||||
.font(.headline)
|
||||
|
||||
if let image = image {
|
||||
Image(nsImage: image)
|
||||
.resizable()
|
||||
.aspectRatio(contentMode: .fit)
|
||||
.frame(maxWidth: 500, maxHeight: 400)
|
||||
.clipShape(RoundedRectangle(cornerRadius: 8))
|
||||
.shadow(radius: 4)
|
||||
} else {
|
||||
RoundedRectangle(cornerRadius: 8)
|
||||
.fill(Color.gray.opacity(0.2))
|
||||
.frame(width: 300, height: 200)
|
||||
.overlay {
|
||||
ProgressView()
|
||||
}
|
||||
}
|
||||
|
||||
Text(url.lastPathComponent)
|
||||
.font(.caption)
|
||||
.foregroundStyle(.secondary)
|
||||
.lineLimit(1)
|
||||
|
||||
HStack(spacing: 16) {
|
||||
Button("Cancel") {
|
||||
onCancel()
|
||||
}
|
||||
.keyboardShortcut(.cancelAction)
|
||||
|
||||
Button("Add to Message") {
|
||||
onConfirm()
|
||||
}
|
||||
.keyboardShortcut(.defaultAction)
|
||||
.buttonStyle(.borderedProminent)
|
||||
}
|
||||
}
|
||||
.padding(24)
|
||||
.frame(minWidth: 400, minHeight: 300)
|
||||
.task {
|
||||
image = NSImage(contentsOf: url)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - URL Identifiable Extension
|
||||
|
||||
extension URL: @retroactive Identifiable {
|
||||
public var id: String { absoluteString }
|
||||
}
|
||||
|
||||
@ -2,7 +2,6 @@ import SwiftUI
|
||||
|
||||
struct SettingsView: View {
|
||||
@Bindable var settings: AppSettings
|
||||
var serverManager: ServerManager?
|
||||
@Environment(\.dismiss) private var dismiss
|
||||
|
||||
var body: some View {
|
||||
@ -11,7 +10,7 @@ struct SettingsView: View {
|
||||
TextField("Host", text: $settings.host)
|
||||
.textFieldStyle(.roundedBorder)
|
||||
|
||||
TextField("Port", value: $settings.port, format: .number.grouping(.never))
|
||||
TextField("Port", value: $settings.port, format: .number)
|
||||
.textFieldStyle(.roundedBorder)
|
||||
|
||||
SecureField("API Key (optional)", text: $settings.apiKey)
|
||||
@ -23,13 +22,6 @@ struct SettingsView: View {
|
||||
Toggle("Auto-start server on launch", isOn: $settings.autoStartServer)
|
||||
}
|
||||
|
||||
Section("API") {
|
||||
Toggle("Enable gRPC reflection", isOn: $settings.enableReflection)
|
||||
.onChange(of: settings.enableReflection) { _, _ in
|
||||
serverManager?.restart()
|
||||
}
|
||||
}
|
||||
|
||||
Section {
|
||||
HStack {
|
||||
Button("Reset to Defaults") {
|
||||
@ -46,7 +38,7 @@ struct SettingsView: View {
|
||||
}
|
||||
}
|
||||
.formStyle(.grouped)
|
||||
.frame(width: 400, height: 380)
|
||||
.frame(width: 400, height: 310)
|
||||
.fixedSize()
|
||||
.onAppear {
|
||||
NSApp.setActivationPolicy(.regular)
|
||||
|
||||
@ -0,0 +1,238 @@
|
||||
// DO NOT EDIT.
|
||||
// swift-format-ignore-file
|
||||
// swiftlint:disable all
|
||||
//
|
||||
// Generated protocol buffer code for apple_intelligence.proto
|
||||
|
||||
import Foundation
|
||||
import SwiftProtobuf
|
||||
|
||||
// MARK: - Messages
|
||||
|
||||
struct Appleintelligence_CompletionRequest: Sendable, SwiftProtobuf.Message {
|
||||
static let protoMessageName: String = "appleintelligence.CompletionRequest"
|
||||
|
||||
var prompt: String = ""
|
||||
var temperature: Float = 0
|
||||
var maxTokens: Int32 = 0
|
||||
|
||||
var hasTemperature: Bool = false
|
||||
var hasMaxTokens: Bool = false
|
||||
|
||||
var unknownFields = SwiftProtobuf.UnknownStorage()
|
||||
|
||||
init() {}
|
||||
|
||||
init(prompt: String, temperature: Float? = nil, maxTokens: Int32? = nil) {
|
||||
self.prompt = prompt
|
||||
if let temp = temperature {
|
||||
self.temperature = temp
|
||||
self.hasTemperature = true
|
||||
}
|
||||
if let tokens = maxTokens {
|
||||
self.maxTokens = tokens
|
||||
self.hasMaxTokens = true
|
||||
}
|
||||
}
|
||||
|
||||
mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
|
||||
while let fieldNumber = try decoder.nextFieldNumber() {
|
||||
switch fieldNumber {
|
||||
case 1: try decoder.decodeSingularStringField(value: &prompt)
|
||||
case 2:
|
||||
try decoder.decodeSingularFloatField(value: &temperature)
|
||||
hasTemperature = true
|
||||
case 3:
|
||||
try decoder.decodeSingularInt32Field(value: &maxTokens)
|
||||
hasMaxTokens = true
|
||||
default: break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
|
||||
if !prompt.isEmpty {
|
||||
try visitor.visitSingularStringField(value: prompt, fieldNumber: 1)
|
||||
}
|
||||
if hasTemperature {
|
||||
try visitor.visitSingularFloatField(value: temperature, fieldNumber: 2)
|
||||
}
|
||||
if hasMaxTokens {
|
||||
try visitor.visitSingularInt32Field(value: maxTokens, fieldNumber: 3)
|
||||
}
|
||||
try unknownFields.traverse(visitor: &visitor)
|
||||
}
|
||||
|
||||
static func ==(lhs: Self, rhs: Self) -> Bool {
|
||||
lhs.prompt == rhs.prompt && lhs.temperature == rhs.temperature && lhs.maxTokens == rhs.maxTokens && lhs.unknownFields == rhs.unknownFields
|
||||
}
|
||||
|
||||
func isEqualTo(message: any SwiftProtobuf.Message) -> Bool {
|
||||
guard let other = message as? Self else { return false }
|
||||
return self == other
|
||||
}
|
||||
}
|
||||
|
||||
struct Appleintelligence_CompletionResponse: Sendable, SwiftProtobuf.Message {
|
||||
static let protoMessageName: String = "appleintelligence.CompletionResponse"
|
||||
|
||||
var id: String = ""
|
||||
var text: String = ""
|
||||
var finishReason: String = ""
|
||||
|
||||
var unknownFields = SwiftProtobuf.UnknownStorage()
|
||||
|
||||
init() {}
|
||||
|
||||
mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
|
||||
while let fieldNumber = try decoder.nextFieldNumber() {
|
||||
switch fieldNumber {
|
||||
case 1: try decoder.decodeSingularStringField(value: &id)
|
||||
case 2: try decoder.decodeSingularStringField(value: &text)
|
||||
case 3: try decoder.decodeSingularStringField(value: &finishReason)
|
||||
default: break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
|
||||
if !id.isEmpty {
|
||||
try visitor.visitSingularStringField(value: id, fieldNumber: 1)
|
||||
}
|
||||
if !text.isEmpty {
|
||||
try visitor.visitSingularStringField(value: text, fieldNumber: 2)
|
||||
}
|
||||
if !finishReason.isEmpty {
|
||||
try visitor.visitSingularStringField(value: finishReason, fieldNumber: 3)
|
||||
}
|
||||
try unknownFields.traverse(visitor: &visitor)
|
||||
}
|
||||
|
||||
static func ==(lhs: Self, rhs: Self) -> Bool {
|
||||
lhs.id == rhs.id && lhs.text == rhs.text && lhs.finishReason == rhs.finishReason && lhs.unknownFields == rhs.unknownFields
|
||||
}
|
||||
|
||||
func isEqualTo(message: any SwiftProtobuf.Message) -> Bool {
|
||||
guard let other = message as? Self else { return false }
|
||||
return self == other
|
||||
}
|
||||
}
|
||||
|
||||
struct Appleintelligence_CompletionChunk: Sendable, SwiftProtobuf.Message {
|
||||
static let protoMessageName: String = "appleintelligence.CompletionChunk"
|
||||
|
||||
var id: String = ""
|
||||
var delta: String = ""
|
||||
var isFinal: Bool = false
|
||||
var finishReason: String = ""
|
||||
|
||||
var hasFinishReason: Bool {
|
||||
!finishReason.isEmpty
|
||||
}
|
||||
|
||||
var unknownFields = SwiftProtobuf.UnknownStorage()
|
||||
|
||||
init() {}
|
||||
|
||||
mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
|
||||
while let fieldNumber = try decoder.nextFieldNumber() {
|
||||
switch fieldNumber {
|
||||
case 1: try decoder.decodeSingularStringField(value: &id)
|
||||
case 2: try decoder.decodeSingularStringField(value: &delta)
|
||||
case 3: try decoder.decodeSingularBoolField(value: &isFinal)
|
||||
case 4: try decoder.decodeSingularStringField(value: &finishReason)
|
||||
default: break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
|
||||
if !id.isEmpty {
|
||||
try visitor.visitSingularStringField(value: id, fieldNumber: 1)
|
||||
}
|
||||
if !delta.isEmpty {
|
||||
try visitor.visitSingularStringField(value: delta, fieldNumber: 2)
|
||||
}
|
||||
if isFinal {
|
||||
try visitor.visitSingularBoolField(value: isFinal, fieldNumber: 3)
|
||||
}
|
||||
if !finishReason.isEmpty {
|
||||
try visitor.visitSingularStringField(value: finishReason, fieldNumber: 4)
|
||||
}
|
||||
try unknownFields.traverse(visitor: &visitor)
|
||||
}
|
||||
|
||||
static func ==(lhs: Self, rhs: Self) -> Bool {
|
||||
lhs.id == rhs.id && lhs.delta == rhs.delta && lhs.isFinal == rhs.isFinal && lhs.finishReason == rhs.finishReason && lhs.unknownFields == rhs.unknownFields
|
||||
}
|
||||
|
||||
func isEqualTo(message: any SwiftProtobuf.Message) -> Bool {
|
||||
guard let other = message as? Self else { return false }
|
||||
return self == other
|
||||
}
|
||||
}
|
||||
|
||||
struct Appleintelligence_HealthRequest: Sendable, SwiftProtobuf.Message {
|
||||
static let protoMessageName: String = "appleintelligence.HealthRequest"
|
||||
|
||||
var unknownFields = SwiftProtobuf.UnknownStorage()
|
||||
|
||||
init() {}
|
||||
|
||||
mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
|
||||
while let _ = try decoder.nextFieldNumber() {}
|
||||
}
|
||||
|
||||
func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
|
||||
try unknownFields.traverse(visitor: &visitor)
|
||||
}
|
||||
|
||||
static func ==(lhs: Self, rhs: Self) -> Bool {
|
||||
lhs.unknownFields == rhs.unknownFields
|
||||
}
|
||||
|
||||
func isEqualTo(message: any SwiftProtobuf.Message) -> Bool {
|
||||
guard let other = message as? Self else { return false }
|
||||
return self == other
|
||||
}
|
||||
}
|
||||
|
||||
struct Appleintelligence_HealthResponse: Sendable, SwiftProtobuf.Message {
|
||||
static let protoMessageName: String = "appleintelligence.HealthResponse"
|
||||
|
||||
var healthy: Bool = false
|
||||
var modelStatus: String = ""
|
||||
|
||||
var unknownFields = SwiftProtobuf.UnknownStorage()
|
||||
|
||||
init() {}
|
||||
|
||||
mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
|
||||
while let fieldNumber = try decoder.nextFieldNumber() {
|
||||
switch fieldNumber {
|
||||
case 1: try decoder.decodeSingularBoolField(value: &healthy)
|
||||
case 2: try decoder.decodeSingularStringField(value: &modelStatus)
|
||||
default: break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
|
||||
if healthy {
|
||||
try visitor.visitSingularBoolField(value: healthy, fieldNumber: 1)
|
||||
}
|
||||
if !modelStatus.isEmpty {
|
||||
try visitor.visitSingularStringField(value: modelStatus, fieldNumber: 2)
|
||||
}
|
||||
try unknownFields.traverse(visitor: &visitor)
|
||||
}
|
||||
|
||||
static func ==(lhs: Self, rhs: Self) -> Bool {
|
||||
lhs.healthy == rhs.healthy && lhs.modelStatus == rhs.modelStatus && lhs.unknownFields == rhs.unknownFields
|
||||
}
|
||||
|
||||
func isEqualTo(message: any SwiftProtobuf.Message) -> Bool {
|
||||
guard let other = message as? Self else { return false }
|
||||
return self == other
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -4,51 +4,63 @@ import GRPCProtobuf
|
||||
import GRPCNIOTransportHTTP2
|
||||
|
||||
/// gRPC service provider for Apple Intelligence
|
||||
public struct AppleIntelligenceProvider: Appleintelligence_AppleIntelligenceService.ServiceProtocol {
|
||||
public struct AppleIntelligenceProvider: RegistrableRPCService {
|
||||
/// Service descriptor
|
||||
public static let serviceDescriptor = ServiceDescriptor(
|
||||
fullyQualifiedService: "appleintelligence.AppleIntelligence"
|
||||
)
|
||||
|
||||
/// Method descriptors
|
||||
enum Methods {
|
||||
static let complete = MethodDescriptor(
|
||||
service: AppleIntelligenceProvider.serviceDescriptor,
|
||||
method: "Complete"
|
||||
)
|
||||
static let streamComplete = MethodDescriptor(
|
||||
service: AppleIntelligenceProvider.serviceDescriptor,
|
||||
method: "StreamComplete"
|
||||
)
|
||||
static let health = MethodDescriptor(
|
||||
service: AppleIntelligenceProvider.serviceDescriptor,
|
||||
method: "Health"
|
||||
)
|
||||
}
|
||||
|
||||
/// The underlying AI service
|
||||
private let service: AppleIntelligenceService
|
||||
|
||||
/// Text-to-Speech service
|
||||
private let ttsService: TextToSpeechService?
|
||||
|
||||
/// Speech-to-Text service
|
||||
private let sttService: SpeechToTextService?
|
||||
|
||||
/// Optional API key for authentication
|
||||
private let apiKey: String?
|
||||
|
||||
public init(
|
||||
service: AppleIntelligenceService,
|
||||
ttsService: TextToSpeechService? = nil,
|
||||
sttService: SpeechToTextService? = nil,
|
||||
apiKey: String? = nil
|
||||
) {
|
||||
public init(service: AppleIntelligenceService, apiKey: String? = nil) {
|
||||
self.service = service
|
||||
self.ttsService = ttsService
|
||||
self.sttService = sttService
|
||||
self.apiKey = apiKey
|
||||
}
|
||||
|
||||
// MARK: - ServiceProtocol Implementation
|
||||
public func registerMethods<Transport: ServerTransport>(with router: inout RPCRouter<Transport>) {
|
||||
// Register Complete method (unary)
|
||||
router.registerHandler(
|
||||
forMethod: Methods.complete,
|
||||
deserializer: ProtobufDeserializer<Appleintelligence_CompletionRequest>(),
|
||||
serializer: ProtobufSerializer<Appleintelligence_CompletionResponse>()
|
||||
) { request, context in
|
||||
try self.validateApiKey(metadata: request.metadata)
|
||||
|
||||
public func complete(
|
||||
request: GRPCCore.ServerRequest<Appleintelligence_CompletionRequest>,
|
||||
context: GRPCCore.ServerContext
|
||||
) async throws -> GRPCCore.ServerResponse<Appleintelligence_CompletionResponse> {
|
||||
try validateApiKey(metadata: request.metadata)
|
||||
|
||||
let message = request.message
|
||||
|
||||
// Convert protobuf images to service format
|
||||
let images = message.images.map { img in
|
||||
(data: img.data, filename: img.filename.isEmpty ? nil : img.filename)
|
||||
// Collect the single message from the request stream
|
||||
var requestMessage: Appleintelligence_CompletionRequest?
|
||||
for try await message in request.messages {
|
||||
requestMessage = message
|
||||
break
|
||||
}
|
||||
|
||||
let (text, analyses) = try await service.complete(
|
||||
guard let message = requestMessage else {
|
||||
throw RPCError(code: .invalidArgument, message: "No request message received")
|
||||
}
|
||||
|
||||
let text = try await self.service.complete(
|
||||
prompt: message.prompt,
|
||||
temperature: message.hasTemperature ? message.temperature : nil,
|
||||
maxTokens: message.hasMaxTokens ? Int(message.maxTokens) : nil,
|
||||
images: images
|
||||
maxTokens: message.hasMaxTokens ? Int(message.maxTokens) : nil
|
||||
)
|
||||
|
||||
var response = Appleintelligence_CompletionResponse()
|
||||
@ -56,45 +68,42 @@ public struct AppleIntelligenceProvider: Appleintelligence_AppleIntelligenceServ
|
||||
response.text = text
|
||||
response.finishReason = "stop"
|
||||
|
||||
// Include analysis results if requested
|
||||
if message.includeAnalysis {
|
||||
response.imageAnalyses = analyses.map { analysis in
|
||||
var protoAnalysis = Appleintelligence_ImageAnalysis()
|
||||
protoAnalysis.textContent = analysis.textContent
|
||||
protoAnalysis.labels = analysis.labels
|
||||
protoAnalysis.description_p = analysis.description
|
||||
return protoAnalysis
|
||||
}
|
||||
return StreamingServerResponse(single: ServerResponse(message: response))
|
||||
}
|
||||
|
||||
return ServerResponse(message: response)
|
||||
// Register StreamComplete method (server streaming)
|
||||
router.registerHandler(
|
||||
forMethod: Methods.streamComplete,
|
||||
deserializer: ProtobufDeserializer<Appleintelligence_CompletionRequest>(),
|
||||
serializer: ProtobufSerializer<Appleintelligence_CompletionChunk>()
|
||||
) { request, context in
|
||||
try self.validateApiKey(metadata: request.metadata)
|
||||
|
||||
// Collect the single message from the request stream
|
||||
var requestMessage: Appleintelligence_CompletionRequest?
|
||||
for try await message in request.messages {
|
||||
requestMessage = message
|
||||
break
|
||||
}
|
||||
|
||||
public func streamComplete(
|
||||
request: GRPCCore.ServerRequest<Appleintelligence_CompletionRequest>,
|
||||
context: GRPCCore.ServerContext
|
||||
) async throws -> GRPCCore.StreamingServerResponse<Appleintelligence_CompletionChunk> {
|
||||
try validateApiKey(metadata: request.metadata)
|
||||
guard let message = requestMessage else {
|
||||
throw RPCError(code: .invalidArgument, message: "No request message received")
|
||||
}
|
||||
|
||||
let message = request.message
|
||||
let completionId = UUID().uuidString
|
||||
|
||||
// Convert protobuf images to service format
|
||||
let images = message.images.map { img in
|
||||
(data: img.data, filename: img.filename.isEmpty ? nil : img.filename)
|
||||
}
|
||||
let prompt = message.prompt
|
||||
let temperature = message.hasTemperature ? message.temperature : nil
|
||||
let maxTokens = message.hasMaxTokens ? Int(message.maxTokens) : nil
|
||||
|
||||
return StreamingServerResponse { writer in
|
||||
let stream = await self.service.streamComplete(
|
||||
prompt: message.prompt,
|
||||
temperature: message.hasTemperature ? message.temperature : nil,
|
||||
maxTokens: message.hasMaxTokens ? Int(message.maxTokens) : nil,
|
||||
images: images
|
||||
prompt: prompt,
|
||||
temperature: temperature,
|
||||
maxTokens: maxTokens
|
||||
)
|
||||
|
||||
var lastContent = ""
|
||||
var isFirstChunk = true
|
||||
for try await (partialResponse, analyses) in stream {
|
||||
for try await partialResponse in stream {
|
||||
// Calculate the delta (new text since last response)
|
||||
let delta: String
|
||||
if partialResponse.hasPrefix(lastContent) {
|
||||
@ -104,25 +113,12 @@ public struct AppleIntelligenceProvider: Appleintelligence_AppleIntelligenceServ
|
||||
}
|
||||
lastContent = partialResponse
|
||||
|
||||
if !delta.isEmpty || isFirstChunk {
|
||||
if !delta.isEmpty {
|
||||
var chunk = Appleintelligence_CompletionChunk()
|
||||
chunk.id = completionId
|
||||
chunk.delta = delta
|
||||
chunk.isFinal = false
|
||||
|
||||
// Include analyses in first chunk if requested
|
||||
if isFirstChunk && message.includeAnalysis, let analyses = analyses {
|
||||
chunk.imageAnalyses = analyses.map { analysis in
|
||||
var protoAnalysis = Appleintelligence_ImageAnalysis()
|
||||
protoAnalysis.textContent = analysis.textContent
|
||||
protoAnalysis.labels = analysis.labels
|
||||
protoAnalysis.description_p = analysis.description
|
||||
return protoAnalysis
|
||||
}
|
||||
}
|
||||
|
||||
try await writer.write(chunk)
|
||||
isFirstChunk = false
|
||||
}
|
||||
}
|
||||
|
||||
@ -138,229 +134,26 @@ public struct AppleIntelligenceProvider: Appleintelligence_AppleIntelligenceServ
|
||||
}
|
||||
}
|
||||
|
||||
public func health(
|
||||
request: GRPCCore.ServerRequest<Appleintelligence_HealthRequest>,
|
||||
context: GRPCCore.ServerContext
|
||||
) async throws -> GRPCCore.ServerResponse<Appleintelligence_HealthResponse> {
|
||||
let isHealthy = await service.isAvailable
|
||||
let modelStatus = await service.getModelStatus()
|
||||
// Register Health method (unary)
|
||||
router.registerHandler(
|
||||
forMethod: Methods.health,
|
||||
deserializer: ProtobufDeserializer<Appleintelligence_HealthRequest>(),
|
||||
serializer: ProtobufSerializer<Appleintelligence_HealthResponse>()
|
||||
) { request, context in
|
||||
// Consume request messages (empty for health check)
|
||||
for try await _ in request.messages {}
|
||||
|
||||
let isHealthy = await self.service.isAvailable
|
||||
let modelStatus = await self.service.getModelStatus()
|
||||
|
||||
var response = Appleintelligence_HealthResponse()
|
||||
response.healthy = isHealthy
|
||||
response.modelStatus = modelStatus
|
||||
|
||||
return ServerResponse(message: response)
|
||||
}
|
||||
|
||||
// MARK: - Text-to-Speech
|
||||
|
||||
public func textToSpeech(
|
||||
request: GRPCCore.ServerRequest<Appleintelligence_TextToSpeechRequest>,
|
||||
context: GRPCCore.ServerContext
|
||||
) async throws -> GRPCCore.ServerResponse<Appleintelligence_TextToSpeechResponse> {
|
||||
try validateApiKey(metadata: request.metadata)
|
||||
|
||||
guard let ttsService = ttsService else {
|
||||
throw RPCError(code: .unavailable, message: "Text-to-Speech service not available")
|
||||
}
|
||||
|
||||
let message = request.message
|
||||
|
||||
// Convert proto config to service config
|
||||
var config = SpeechConfig.default
|
||||
if message.hasVoiceConfig {
|
||||
let voiceConfig = message.voiceConfig
|
||||
config = SpeechConfig(
|
||||
voiceIdentifier: voiceConfig.voiceIdentifier.isEmpty ? nil : voiceConfig.voiceIdentifier,
|
||||
speakingRate: voiceConfig.hasSpeakingRate ? voiceConfig.speakingRate : 0.5,
|
||||
pitchMultiplier: voiceConfig.hasPitchMultiplier ? voiceConfig.pitchMultiplier : 1.0,
|
||||
volume: voiceConfig.hasVolume ? voiceConfig.volume : 1.0
|
||||
)
|
||||
}
|
||||
|
||||
// Convert proto format to service format
|
||||
let outputFormat: AudioOutputFormat
|
||||
switch message.outputFormat {
|
||||
case .wav, .unspecified:
|
||||
outputFormat = .wav
|
||||
case .mp3:
|
||||
outputFormat = .mp3
|
||||
case .UNRECOGNIZED:
|
||||
outputFormat = .wav
|
||||
}
|
||||
|
||||
do {
|
||||
let result = try await ttsService.synthesize(
|
||||
text: message.text,
|
||||
config: config,
|
||||
outputFormat: outputFormat
|
||||
)
|
||||
|
||||
var response = Appleintelligence_TextToSpeechResponse()
|
||||
response.audioData = result.audioData
|
||||
response.format = outputFormat == .wav ? .wav : .mp3
|
||||
response.sampleRate = Int32(result.sampleRate)
|
||||
response.channels = Int32(result.channels)
|
||||
response.durationSeconds = result.durationSeconds
|
||||
|
||||
return ServerResponse(message: response)
|
||||
} catch let error as TextToSpeechError {
|
||||
throw RPCError(code: .internalError, message: error.description)
|
||||
return StreamingServerResponse(single: ServerResponse(message: response))
|
||||
}
|
||||
}
|
||||
|
||||
public func listVoices(
|
||||
request: GRPCCore.ServerRequest<Appleintelligence_ListVoicesRequest>,
|
||||
context: GRPCCore.ServerContext
|
||||
) async throws -> GRPCCore.ServerResponse<Appleintelligence_ListVoicesResponse> {
|
||||
try validateApiKey(metadata: request.metadata)
|
||||
|
||||
guard let ttsService = ttsService else {
|
||||
throw RPCError(code: .unavailable, message: "Text-to-Speech service not available")
|
||||
}
|
||||
|
||||
let message = request.message
|
||||
let languageCode = message.hasLanguageCode ? message.languageCode : nil
|
||||
|
||||
let voices = await ttsService.listVoices(languageCode: languageCode)
|
||||
|
||||
var response = Appleintelligence_ListVoicesResponse()
|
||||
response.voices = voices.map { voice in
|
||||
var protoVoice = Appleintelligence_VoiceInfo()
|
||||
protoVoice.identifier = voice.identifier
|
||||
protoVoice.name = voice.name
|
||||
protoVoice.language = voice.language
|
||||
protoVoice.isPremium = voice.isPremium
|
||||
protoVoice.gender = voice.gender
|
||||
return protoVoice
|
||||
}
|
||||
|
||||
return ServerResponse(message: response)
|
||||
}
|
||||
|
||||
// MARK: - Speech-to-Text
|
||||
|
||||
public func transcribe(
|
||||
request: GRPCCore.ServerRequest<Appleintelligence_TranscribeRequest>,
|
||||
context: GRPCCore.ServerContext
|
||||
) async throws -> GRPCCore.ServerResponse<Appleintelligence_TranscribeResponse> {
|
||||
try validateApiKey(metadata: request.metadata)
|
||||
|
||||
guard let sttService = sttService else {
|
||||
throw RPCError(code: .unavailable, message: "Speech-to-Text service not available")
|
||||
}
|
||||
|
||||
let message = request.message
|
||||
|
||||
guard message.hasAudio else {
|
||||
throw RPCError(code: .invalidArgument, message: "Audio data is required")
|
||||
}
|
||||
|
||||
// Convert proto config to service config
|
||||
var config = TranscriptionConfig.default
|
||||
if message.hasConfig {
|
||||
let protoConfig = message.config
|
||||
config = TranscriptionConfig(
|
||||
languageCode: protoConfig.hasLanguageCode ? protoConfig.languageCode : nil,
|
||||
enablePunctuation: protoConfig.hasEnablePunctuation ? protoConfig.enablePunctuation : true,
|
||||
enableTimestamps: protoConfig.hasEnableTimestamps ? protoConfig.enableTimestamps : false
|
||||
)
|
||||
}
|
||||
|
||||
do {
|
||||
let result = try await sttService.transcribe(
|
||||
audioData: message.audio.data,
|
||||
mimeType: message.audio.mimeType,
|
||||
config: config
|
||||
)
|
||||
|
||||
var response = Appleintelligence_TranscribeResponse()
|
||||
response.text = result.text
|
||||
response.detectedLanguage = result.detectedLanguage
|
||||
response.confidence = result.confidence
|
||||
response.segments = result.segments.map { segment in
|
||||
var protoSegment = Appleintelligence_TranscriptionSegment()
|
||||
protoSegment.text = segment.text
|
||||
protoSegment.startTime = segment.startTime
|
||||
protoSegment.endTime = segment.endTime
|
||||
protoSegment.confidence = segment.confidence
|
||||
return protoSegment
|
||||
}
|
||||
|
||||
return ServerResponse(message: response)
|
||||
} catch let error as SpeechToTextError {
|
||||
throw RPCError(code: .internalError, message: error.description)
|
||||
}
|
||||
}
|
||||
|
||||
public func streamTranscribe(
|
||||
request: GRPCCore.StreamingServerRequest<Appleintelligence_StreamingTranscribeRequest>,
|
||||
context: GRPCCore.ServerContext
|
||||
) async throws -> GRPCCore.StreamingServerResponse<Appleintelligence_StreamingTranscribeResponse> {
|
||||
try validateApiKey(metadata: request.metadata)
|
||||
|
||||
guard let sttService = sttService else {
|
||||
throw RPCError(code: .unavailable, message: "Speech-to-Text service not available")
|
||||
}
|
||||
|
||||
return StreamingServerResponse { writer in
|
||||
var config = TranscriptionConfig.default
|
||||
|
||||
// Process incoming stream
|
||||
for try await message in request.messages {
|
||||
switch message.request {
|
||||
case .config(let protoConfig):
|
||||
// First message should be config
|
||||
config = TranscriptionConfig(
|
||||
languageCode: protoConfig.hasLanguageCode ? protoConfig.languageCode : nil,
|
||||
enablePunctuation: protoConfig.hasEnablePunctuation ? protoConfig.enablePunctuation : true,
|
||||
enableTimestamps: protoConfig.hasEnableTimestamps ? protoConfig.enableTimestamps : false
|
||||
)
|
||||
|
||||
// Start streaming transcription
|
||||
let stream = await sttService.streamTranscribe(config: config)
|
||||
Task {
|
||||
do {
|
||||
for try await update in stream {
|
||||
var response = Appleintelligence_StreamingTranscribeResponse()
|
||||
response.partialText = update.partialText
|
||||
response.isFinal = update.isFinal
|
||||
if let finalText = update.finalText {
|
||||
response.finalText = finalText
|
||||
}
|
||||
response.segments = update.segments.map { segment in
|
||||
var protoSegment = Appleintelligence_TranscriptionSegment()
|
||||
protoSegment.text = segment.text
|
||||
protoSegment.startTime = segment.startTime
|
||||
protoSegment.endTime = segment.endTime
|
||||
protoSegment.confidence = segment.confidence
|
||||
return protoSegment
|
||||
}
|
||||
try await writer.write(response)
|
||||
}
|
||||
} catch {
|
||||
// Stream ended or error occurred
|
||||
}
|
||||
}
|
||||
|
||||
case .audioChunk(let chunk):
|
||||
// Feed audio chunk to service
|
||||
try await sttService.feedAudioChunk(chunk)
|
||||
|
||||
case .none:
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// End streaming session
|
||||
await sttService.endStreamingSession()
|
||||
|
||||
return [:]
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Private Helpers
|
||||
|
||||
/// Validate API key if configured
|
||||
private func validateApiKey(metadata: Metadata) throws {
|
||||
guard let expectedKey = apiKey else {
|
||||
|
||||
@ -1,9 +0,0 @@
|
||||
import Foundation
|
||||
|
||||
/// Helper for accessing bundled resources
|
||||
public enum AppleIntelligenceResources {
|
||||
/// URL to the protobuf descriptor set file for reflection
|
||||
public static var descriptorSetURL: URL? {
|
||||
Bundle.module.url(forResource: "apple_intelligence", withExtension: "pb")
|
||||
}
|
||||
}
|
||||
Binary file not shown.
@ -6,7 +6,6 @@ public enum AppleIntelligenceError: Error, CustomStringConvertible, Sendable {
|
||||
case modelNotAvailable
|
||||
case generationFailed(String)
|
||||
case sessionCreationFailed
|
||||
case imageAnalysisFailed(String)
|
||||
|
||||
public var description: String {
|
||||
switch self {
|
||||
@ -16,8 +15,6 @@ public enum AppleIntelligenceError: Error, CustomStringConvertible, Sendable {
|
||||
return "Generation failed: \(reason)"
|
||||
case .sessionCreationFailed:
|
||||
return "Failed to create language model session"
|
||||
case .imageAnalysisFailed(let reason):
|
||||
return "Image analysis failed: \(reason)"
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -27,9 +24,6 @@ public actor AppleIntelligenceService {
|
||||
/// The language model session
|
||||
private var session: LanguageModelSession?
|
||||
|
||||
/// Vision analysis service for image processing
|
||||
private let visionService = VisionAnalysisService()
|
||||
|
||||
/// Whether the model is available
|
||||
public private(set) var isAvailable: Bool = false
|
||||
|
||||
@ -66,42 +60,21 @@ public actor AppleIntelligenceService {
|
||||
}
|
||||
|
||||
/// Generate a completion for the given prompt (non-streaming)
|
||||
public func complete(
|
||||
prompt: String,
|
||||
temperature: Float?,
|
||||
maxTokens: Int?,
|
||||
images: [(data: Data, filename: String?)] = []
|
||||
) async throws -> (text: String, analyses: [VisionAnalysisResult]) {
|
||||
public func complete(prompt: String, temperature: Float?, maxTokens: Int?) async throws -> String {
|
||||
guard isAvailable, let session = session else {
|
||||
throw AppleIntelligenceError.modelNotAvailable
|
||||
}
|
||||
|
||||
// Analyze images if provided
|
||||
var analyses: [VisionAnalysisResult] = []
|
||||
var enhancedPrompt = prompt
|
||||
|
||||
if !images.isEmpty {
|
||||
do {
|
||||
analyses = try await visionService.analyzeMultiple(images: images)
|
||||
let analysesWithFilenames = zip(analyses, images).map { (result: $0.0, filename: $0.1.filename) }
|
||||
let context = await visionService.formatAnalysesAsPromptContext(analyses: analysesWithFilenames)
|
||||
enhancedPrompt = context + "\n\n" + prompt
|
||||
} catch {
|
||||
throw AppleIntelligenceError.imageAnalysisFailed(error.localizedDescription)
|
||||
}
|
||||
}
|
||||
|
||||
let response = try await session.respond(to: enhancedPrompt)
|
||||
return (text: response.content, analyses: analyses)
|
||||
let response = try await session.respond(to: prompt)
|
||||
return response.content
|
||||
}
|
||||
|
||||
/// Generate a streaming completion for the given prompt
|
||||
public func streamComplete(
|
||||
prompt: String,
|
||||
temperature: Float?,
|
||||
maxTokens: Int?,
|
||||
images: [(data: Data, filename: String?)] = []
|
||||
) -> AsyncThrowingStream<(text: String, analyses: [VisionAnalysisResult]?), Error> {
|
||||
maxTokens: Int?
|
||||
) -> AsyncThrowingStream<String, Error> {
|
||||
AsyncThrowingStream { continuation in
|
||||
Task {
|
||||
guard self.isAvailable, let session = self.session else {
|
||||
@ -109,33 +82,10 @@ public actor AppleIntelligenceService {
|
||||
return
|
||||
}
|
||||
|
||||
// Analyze images first if provided
|
||||
var analyses: [VisionAnalysisResult] = []
|
||||
var enhancedPrompt = prompt
|
||||
|
||||
if !images.isEmpty {
|
||||
do {
|
||||
analyses = try await self.visionService.analyzeMultiple(images: images)
|
||||
let analysesWithFilenames = zip(analyses, images).map { (result: $0.0, filename: $0.1.filename) }
|
||||
let context = await self.visionService.formatAnalysesAsPromptContext(analyses: analysesWithFilenames)
|
||||
enhancedPrompt = context + "\n\n" + prompt
|
||||
} catch {
|
||||
continuation.finish(throwing: AppleIntelligenceError.imageAnalysisFailed(error.localizedDescription))
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
do {
|
||||
let stream = session.streamResponse(to: enhancedPrompt)
|
||||
var isFirst = true
|
||||
let stream = session.streamResponse(to: prompt)
|
||||
for try await partialResponse in stream {
|
||||
// Include analyses only in first chunk
|
||||
if isFirst {
|
||||
continuation.yield((text: partialResponse.content, analyses: analyses))
|
||||
isFirst = false
|
||||
} else {
|
||||
continuation.yield((text: partialResponse.content, analyses: nil))
|
||||
}
|
||||
continuation.yield(partialResponse.content)
|
||||
}
|
||||
continuation.finish()
|
||||
} catch {
|
||||
|
||||
@ -1,387 +0,0 @@
|
||||
import Foundation
|
||||
import Speech
|
||||
import AVFoundation
|
||||
|
||||
// MARK: - Result Types
|
||||
|
||||
/// Transcription result
|
||||
public struct TranscriptionResult: Sendable {
|
||||
public let text: String
|
||||
public let segments: [TranscriptionSegmentResult]
|
||||
public let detectedLanguage: String
|
||||
public let confidence: Float
|
||||
}
|
||||
|
||||
/// Individual transcription segment
|
||||
public struct TranscriptionSegmentResult: Sendable {
|
||||
public let text: String
|
||||
public let startTime: Float
|
||||
public let endTime: Float
|
||||
public let confidence: Float
|
||||
}
|
||||
|
||||
/// Streaming transcription update
|
||||
public struct StreamingTranscriptionUpdate: Sendable {
|
||||
public let partialText: String
|
||||
public let isFinal: Bool
|
||||
public let finalText: String?
|
||||
public let segments: [TranscriptionSegmentResult]
|
||||
}
|
||||
|
||||
/// Transcription configuration
|
||||
public struct TranscriptionConfig: Sendable {
|
||||
public var languageCode: String?
|
||||
public var enablePunctuation: Bool
|
||||
public var enableTimestamps: Bool
|
||||
|
||||
public static let `default` = TranscriptionConfig(
|
||||
languageCode: nil,
|
||||
enablePunctuation: true,
|
||||
enableTimestamps: false
|
||||
)
|
||||
|
||||
public init(
|
||||
languageCode: String? = nil,
|
||||
enablePunctuation: Bool = true,
|
||||
enableTimestamps: Bool = false
|
||||
) {
|
||||
self.languageCode = languageCode
|
||||
self.enablePunctuation = enablePunctuation
|
||||
self.enableTimestamps = enableTimestamps
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Errors
|
||||
|
||||
public enum SpeechToTextError: Error, CustomStringConvertible, Sendable {
|
||||
case notAvailable
|
||||
case authorizationDenied
|
||||
case modelNotReady(String)
|
||||
case transcriptionFailed(String)
|
||||
case invalidAudioFormat
|
||||
case audioProcessingFailed(String)
|
||||
case unsupportedMimeType(String)
|
||||
|
||||
public var description: String {
|
||||
switch self {
|
||||
case .notAvailable: return "Speech recognition not available on this system"
|
||||
case .authorizationDenied: return "Speech recognition authorization denied"
|
||||
case .modelNotReady(let reason): return "Speech model not ready: \(reason)"
|
||||
case .transcriptionFailed(let reason): return "Transcription failed: \(reason)"
|
||||
case .invalidAudioFormat: return "Invalid audio format"
|
||||
case .audioProcessingFailed(let reason): return "Audio processing failed: \(reason)"
|
||||
case .unsupportedMimeType(let type): return "Unsupported audio MIME type: \(type)"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Service Actor
|
||||
|
||||
public actor SpeechToTextService {
|
||||
|
||||
/// Service availability status
|
||||
public private(set) var isAvailable: Bool = false
|
||||
|
||||
/// Streaming session state
|
||||
private var isStreamingActive: Bool = false
|
||||
private var streamingRequest: SFSpeechAudioBufferRecognitionRequest?
|
||||
private var streamingRecognizer: SFSpeechRecognizer?
|
||||
private var streamingTask: SFSpeechRecognitionTask?
|
||||
private var streamingContinuation: AsyncThrowingStream<StreamingTranscriptionUpdate, Error>.Continuation?
|
||||
|
||||
public init() async {
|
||||
await checkAvailability()
|
||||
}
|
||||
|
||||
// MARK: - Public API
|
||||
|
||||
/// Transcribe audio data (file-based)
|
||||
public func transcribe(
|
||||
audioData: Data,
|
||||
mimeType: String,
|
||||
config: TranscriptionConfig = .default
|
||||
) async throws -> TranscriptionResult {
|
||||
guard isAvailable else {
|
||||
throw SpeechToTextError.notAvailable
|
||||
}
|
||||
|
||||
// Convert audio data to file URL for processing
|
||||
let tempURL = try createTempAudioFile(data: audioData, mimeType: mimeType)
|
||||
defer { try? FileManager.default.removeItem(at: tempURL) }
|
||||
|
||||
return try await transcribeWithSFSpeechRecognizer(url: tempURL, config: config)
|
||||
}
|
||||
|
||||
/// Stream transcription from audio chunks sent via gRPC
|
||||
public func streamTranscribe(
|
||||
config: TranscriptionConfig = .default
|
||||
) -> AsyncThrowingStream<StreamingTranscriptionUpdate, Error> {
|
||||
AsyncThrowingStream { continuation in
|
||||
Task {
|
||||
guard await self.isAvailable else {
|
||||
continuation.finish(throwing: SpeechToTextError.notAvailable)
|
||||
return
|
||||
}
|
||||
|
||||
do {
|
||||
try await self.startStreamingSession(config: config, continuation: continuation)
|
||||
} catch {
|
||||
continuation.finish(throwing: error)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Feed audio chunk for streaming transcription (PCM audio data)
|
||||
public func feedAudioChunk(_ chunk: Data) async throws {
|
||||
guard isStreamingActive, let request = streamingRequest else {
|
||||
throw SpeechToTextError.transcriptionFailed("No active streaming session")
|
||||
}
|
||||
|
||||
// Convert raw PCM data to audio buffer
|
||||
// Assuming 16-bit PCM, mono, 16kHz (common format for speech)
|
||||
let audioFormat = AVAudioFormat(
|
||||
commonFormat: .pcmFormatInt16,
|
||||
sampleRate: 16000,
|
||||
channels: 1,
|
||||
interleaved: true
|
||||
)!
|
||||
|
||||
let frameCount = UInt32(chunk.count / 2) // 2 bytes per Int16 sample
|
||||
guard let buffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: frameCount) else {
|
||||
throw SpeechToTextError.audioProcessingFailed("Failed to create audio buffer")
|
||||
}
|
||||
|
||||
buffer.frameLength = frameCount
|
||||
|
||||
// Copy data into buffer
|
||||
chunk.withUnsafeBytes { rawPtr in
|
||||
if let int16Ptr = rawPtr.baseAddress?.assumingMemoryBound(to: Int16.self) {
|
||||
buffer.int16ChannelData?[0].update(from: int16Ptr, count: Int(frameCount))
|
||||
}
|
||||
}
|
||||
|
||||
request.append(buffer)
|
||||
}
|
||||
|
||||
/// End streaming session
|
||||
public func endStreamingSession() async {
|
||||
streamingRequest?.endAudio()
|
||||
isStreamingActive = false
|
||||
streamingRequest = nil
|
||||
streamingTask = nil
|
||||
streamingRecognizer = nil
|
||||
streamingContinuation = nil
|
||||
}
|
||||
|
||||
/// Get status information
|
||||
public func getStatus() -> String {
|
||||
if isAvailable {
|
||||
return "SFSpeechRecognizer available"
|
||||
} else {
|
||||
return "Speech recognition not available"
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Private Implementation
|
||||
|
||||
private func checkAvailability() async {
|
||||
// Check SFSpeechRecognizer availability
|
||||
let status = SFSpeechRecognizer.authorizationStatus()
|
||||
switch status {
|
||||
case .authorized:
|
||||
isAvailable = SFSpeechRecognizer.supportedLocales().count > 0
|
||||
case .notDetermined:
|
||||
// Request authorization
|
||||
isAvailable = await withCheckedContinuation { continuation in
|
||||
SFSpeechRecognizer.requestAuthorization { newStatus in
|
||||
continuation.resume(returning: newStatus == .authorized)
|
||||
}
|
||||
}
|
||||
default:
|
||||
isAvailable = false
|
||||
}
|
||||
}
|
||||
|
||||
/// Create temporary audio file from data
|
||||
private func createTempAudioFile(data: Data, mimeType: String) throws -> URL {
|
||||
let ext = extensionForMimeType(mimeType)
|
||||
let tempDir = FileManager.default.temporaryDirectory
|
||||
let fileName = UUID().uuidString + "." + ext
|
||||
let fileURL = tempDir.appendingPathComponent(fileName)
|
||||
|
||||
try data.write(to: fileURL)
|
||||
return fileURL
|
||||
}
|
||||
|
||||
/// Get file extension for MIME type
|
||||
private func extensionForMimeType(_ mimeType: String) -> String {
|
||||
switch mimeType.lowercased() {
|
||||
case "audio/wav", "audio/wave", "audio/x-wav":
|
||||
return "wav"
|
||||
case "audio/mp3", "audio/mpeg":
|
||||
return "mp3"
|
||||
case "audio/m4a", "audio/mp4", "audio/x-m4a":
|
||||
return "m4a"
|
||||
case "audio/aac":
|
||||
return "aac"
|
||||
case "audio/flac":
|
||||
return "flac"
|
||||
default:
|
||||
return "wav"
|
||||
}
|
||||
}
|
||||
|
||||
/// Transcribe using SFSpeechRecognizer
|
||||
private func transcribeWithSFSpeechRecognizer(
|
||||
url: URL,
|
||||
config: TranscriptionConfig
|
||||
) async throws -> TranscriptionResult {
|
||||
let locale = Locale(identifier: config.languageCode ?? "en-US")
|
||||
guard let recognizer = SFSpeechRecognizer(locale: locale) else {
|
||||
throw SpeechToTextError.notAvailable
|
||||
}
|
||||
|
||||
guard recognizer.isAvailable else {
|
||||
throw SpeechToTextError.notAvailable
|
||||
}
|
||||
|
||||
let request = SFSpeechURLRecognitionRequest(url: url)
|
||||
request.shouldReportPartialResults = false
|
||||
|
||||
return try await withCheckedThrowingContinuation { continuation in
|
||||
var hasResumed = false
|
||||
|
||||
recognizer.recognitionTask(with: request) { result, error in
|
||||
guard !hasResumed else { return }
|
||||
|
||||
if let error = error {
|
||||
hasResumed = true
|
||||
continuation.resume(throwing: SpeechToTextError.transcriptionFailed(error.localizedDescription))
|
||||
return
|
||||
}
|
||||
|
||||
guard let result = result, result.isFinal else { return }
|
||||
|
||||
hasResumed = true
|
||||
|
||||
let transcription = result.bestTranscription
|
||||
var segments: [TranscriptionSegmentResult] = []
|
||||
|
||||
if config.enableTimestamps {
|
||||
for segment in transcription.segments {
|
||||
segments.append(TranscriptionSegmentResult(
|
||||
text: segment.substring,
|
||||
startTime: Float(segment.timestamp),
|
||||
endTime: Float(segment.timestamp + segment.duration),
|
||||
confidence: segment.confidence
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
let transcriptionResult = TranscriptionResult(
|
||||
text: transcription.formattedString,
|
||||
segments: segments,
|
||||
detectedLanguage: config.languageCode ?? "en-US",
|
||||
confidence: segments.isEmpty ? 1.0 : segments.reduce(0) { $0 + $1.confidence } / Float(segments.count)
|
||||
)
|
||||
|
||||
continuation.resume(returning: transcriptionResult)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Start streaming session for gRPC audio chunks
|
||||
private func startStreamingSession(
|
||||
config: TranscriptionConfig,
|
||||
continuation: AsyncThrowingStream<StreamingTranscriptionUpdate, Error>.Continuation
|
||||
) async throws {
|
||||
let locale = Locale(identifier: config.languageCode ?? "en-US")
|
||||
guard let recognizer = SFSpeechRecognizer(locale: locale) else {
|
||||
throw SpeechToTextError.notAvailable
|
||||
}
|
||||
|
||||
guard recognizer.isAvailable else {
|
||||
throw SpeechToTextError.notAvailable
|
||||
}
|
||||
|
||||
// Set up streaming state
|
||||
isStreamingActive = true
|
||||
streamingRecognizer = recognizer
|
||||
streamingContinuation = continuation
|
||||
|
||||
let request = SFSpeechAudioBufferRecognitionRequest()
|
||||
request.shouldReportPartialResults = true
|
||||
streamingRequest = request
|
||||
|
||||
// Create wrapper to handle results safely
|
||||
let service = self
|
||||
let resultHandler = StreamingResultHandler(
|
||||
config: config,
|
||||
continuation: continuation,
|
||||
onFinish: {
|
||||
Task { await service.endStreamingSession() }
|
||||
}
|
||||
)
|
||||
|
||||
streamingTask = recognizer.recognitionTask(with: request) { result, error in
|
||||
resultHandler.handleResult(result: result, error: error)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Streaming Result Handler
|
||||
|
||||
/// Wrapper to safely handle streaming recognition results
|
||||
private final class StreamingResultHandler: @unchecked Sendable {
|
||||
private let config: TranscriptionConfig
|
||||
private let continuation: AsyncThrowingStream<StreamingTranscriptionUpdate, Error>.Continuation
|
||||
private let onFinish: () -> Void
|
||||
|
||||
init(
|
||||
config: TranscriptionConfig,
|
||||
continuation: AsyncThrowingStream<StreamingTranscriptionUpdate, Error>.Continuation,
|
||||
onFinish: @escaping () -> Void
|
||||
) {
|
||||
self.config = config
|
||||
self.continuation = continuation
|
||||
self.onFinish = onFinish
|
||||
}
|
||||
|
||||
func handleResult(result: SFSpeechRecognitionResult?, error: Error?) {
|
||||
if let error = error {
|
||||
continuation.finish(throwing: SpeechToTextError.transcriptionFailed(error.localizedDescription))
|
||||
onFinish()
|
||||
return
|
||||
}
|
||||
|
||||
guard let result = result else { return }
|
||||
|
||||
let transcription = result.bestTranscription
|
||||
var segments: [TranscriptionSegmentResult] = []
|
||||
|
||||
if config.enableTimestamps {
|
||||
for segment in transcription.segments {
|
||||
segments.append(TranscriptionSegmentResult(
|
||||
text: segment.substring,
|
||||
startTime: Float(segment.timestamp),
|
||||
endTime: Float(segment.timestamp + segment.duration),
|
||||
confidence: segment.confidence
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
let update = StreamingTranscriptionUpdate(
|
||||
partialText: transcription.formattedString,
|
||||
isFinal: result.isFinal,
|
||||
finalText: result.isFinal ? transcription.formattedString : nil,
|
||||
segments: segments
|
||||
)
|
||||
continuation.yield(update)
|
||||
|
||||
if result.isFinal {
|
||||
continuation.finish()
|
||||
onFinish()
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1,280 +0,0 @@
|
||||
import Foundation
|
||||
import AVFoundation
|
||||
|
||||
// MARK: - Result Types
|
||||
|
||||
/// Result of text-to-speech synthesis
|
||||
public struct TextToSpeechResult: Sendable {
|
||||
public let audioData: Data
|
||||
public let format: AudioOutputFormat
|
||||
public let sampleRate: Int
|
||||
public let channels: Int
|
||||
public let durationSeconds: Float
|
||||
}
|
||||
|
||||
/// Supported output formats
|
||||
public enum AudioOutputFormat: Sendable {
|
||||
case wav
|
||||
case mp3
|
||||
}
|
||||
|
||||
/// Voice information
|
||||
public struct VoiceDescription: Sendable {
|
||||
public let identifier: String
|
||||
public let name: String
|
||||
public let language: String
|
||||
public let isPremium: Bool
|
||||
public let gender: String
|
||||
}
|
||||
|
||||
/// Configuration for speech synthesis
|
||||
public struct SpeechConfig: Sendable {
|
||||
public var voiceIdentifier: String?
|
||||
public var speakingRate: Float // 0.0 - 1.0
|
||||
public var pitchMultiplier: Float // 0.5 - 2.0
|
||||
public var volume: Float // 0.0 - 1.0
|
||||
|
||||
public static let `default` = SpeechConfig(
|
||||
voiceIdentifier: nil,
|
||||
speakingRate: 0.5,
|
||||
pitchMultiplier: 1.0,
|
||||
volume: 1.0
|
||||
)
|
||||
|
||||
public init(
|
||||
voiceIdentifier: String? = nil,
|
||||
speakingRate: Float = 0.5,
|
||||
pitchMultiplier: Float = 1.0,
|
||||
volume: Float = 1.0
|
||||
) {
|
||||
self.voiceIdentifier = voiceIdentifier
|
||||
self.speakingRate = speakingRate
|
||||
self.pitchMultiplier = pitchMultiplier
|
||||
self.volume = volume
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Errors
|
||||
|
||||
public enum TextToSpeechError: Error, CustomStringConvertible, Sendable {
|
||||
case invalidVoice(String)
|
||||
case synthesisFailure(String)
|
||||
case encodingFailure(String)
|
||||
case noAudioGenerated
|
||||
case unsupportedFormat
|
||||
|
||||
public var description: String {
|
||||
switch self {
|
||||
case .invalidVoice(let id): return "Invalid voice identifier: \(id)"
|
||||
case .synthesisFailure(let reason): return "Speech synthesis failed: \(reason)"
|
||||
case .encodingFailure(let reason): return "Audio encoding failed: \(reason)"
|
||||
case .noAudioGenerated: return "No audio was generated"
|
||||
case .unsupportedFormat: return "Unsupported audio format"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Service Actor
|
||||
|
||||
public actor TextToSpeechService {
|
||||
/// Keep strong reference to synthesizer during synthesis
|
||||
private var activeSynthesizer: AVSpeechSynthesizer?
|
||||
|
||||
public init() {}
|
||||
|
||||
// MARK: - Public API
|
||||
|
||||
/// Synthesize text to speech
|
||||
public func synthesize(
|
||||
text: String,
|
||||
config: SpeechConfig = .default,
|
||||
outputFormat: AudioOutputFormat = .wav
|
||||
) async throws -> TextToSpeechResult {
|
||||
// Create utterance
|
||||
let utterance = AVSpeechUtterance(string: text)
|
||||
|
||||
// Configure voice
|
||||
if let voiceId = config.voiceIdentifier {
|
||||
if let voice = AVSpeechSynthesisVoice(identifier: voiceId) {
|
||||
utterance.voice = voice
|
||||
} else {
|
||||
throw TextToSpeechError.invalidVoice(voiceId)
|
||||
}
|
||||
} else {
|
||||
// Use default English voice
|
||||
utterance.voice = AVSpeechSynthesisVoice(language: "en-US")
|
||||
}
|
||||
|
||||
// Configure speech parameters
|
||||
utterance.rate = config.speakingRate
|
||||
utterance.pitchMultiplier = config.pitchMultiplier
|
||||
utterance.volume = config.volume
|
||||
|
||||
// Collect PCM data
|
||||
let pcmData = try await collectPCMData(utterance: utterance)
|
||||
|
||||
// Convert to requested format
|
||||
let audioData: Data
|
||||
switch outputFormat {
|
||||
case .wav:
|
||||
audioData = createWAVData(from: pcmData)
|
||||
case .mp3:
|
||||
// Use WAV as fallback (MP3 encoding requires external library)
|
||||
audioData = createWAVData(from: pcmData)
|
||||
}
|
||||
|
||||
// Calculate duration
|
||||
let bytesPerSample = 2 // Int16
|
||||
let totalSamples = pcmData.samples.count / bytesPerSample / pcmData.channelCount
|
||||
let duration = Float(totalSamples) / Float(pcmData.sampleRate)
|
||||
|
||||
return TextToSpeechResult(
|
||||
audioData: audioData,
|
||||
format: outputFormat,
|
||||
sampleRate: Int(pcmData.sampleRate),
|
||||
channels: pcmData.channelCount,
|
||||
durationSeconds: duration
|
||||
)
|
||||
}
|
||||
|
||||
/// List available voices
|
||||
public func listVoices(languageCode: String? = nil) -> [VoiceDescription] {
|
||||
let voices = AVSpeechSynthesisVoice.speechVoices()
|
||||
|
||||
let filtered: [AVSpeechSynthesisVoice]
|
||||
if let lang = languageCode {
|
||||
filtered = voices.filter { $0.language.hasPrefix(lang) }
|
||||
} else {
|
||||
filtered = voices
|
||||
}
|
||||
|
||||
return filtered.map { voice in
|
||||
VoiceDescription(
|
||||
identifier: voice.identifier,
|
||||
name: voice.name,
|
||||
language: voice.language,
|
||||
isPremium: voice.quality == .enhanced || voice.quality == .premium,
|
||||
gender: genderString(for: voice)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Private Implementation
|
||||
|
||||
/// PCM buffer data for internal processing
|
||||
private struct PCMBufferData: Sendable {
|
||||
let samples: Data
|
||||
let sampleRate: Double
|
||||
let channelCount: Int
|
||||
}
|
||||
|
||||
/// Collect PCM data from synthesizer using write callback
|
||||
private func collectPCMData(
|
||||
utterance: AVSpeechUtterance
|
||||
) async throws -> PCMBufferData {
|
||||
// Create and store synthesizer to keep strong reference during synthesis
|
||||
let synthesizer = AVSpeechSynthesizer()
|
||||
self.activeSynthesizer = synthesizer
|
||||
|
||||
defer { self.activeSynthesizer = nil }
|
||||
|
||||
return try await withCheckedThrowingContinuation { continuation in
|
||||
var pcmData = Data()
|
||||
var sampleRate: Double = 0
|
||||
var channelCount: Int = 0
|
||||
var hasResumed = false
|
||||
|
||||
synthesizer.write(utterance) { buffer in
|
||||
guard let pcmBuffer = buffer as? AVAudioPCMBuffer else {
|
||||
// End of audio - empty buffer signals completion
|
||||
if !hasResumed {
|
||||
hasResumed = true
|
||||
if pcmData.isEmpty {
|
||||
continuation.resume(throwing: TextToSpeechError.noAudioGenerated)
|
||||
} else {
|
||||
continuation.resume(returning: PCMBufferData(
|
||||
samples: pcmData,
|
||||
sampleRate: sampleRate,
|
||||
channelCount: channelCount
|
||||
))
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if pcmBuffer.frameLength > 0 {
|
||||
// Store format from first buffer
|
||||
if sampleRate == 0 {
|
||||
sampleRate = pcmBuffer.format.sampleRate
|
||||
channelCount = Int(pcmBuffer.format.channelCount)
|
||||
}
|
||||
|
||||
// Convert float samples to Int16 PCM
|
||||
if let channelData = pcmBuffer.floatChannelData {
|
||||
let frameCount = Int(pcmBuffer.frameLength)
|
||||
for frame in 0..<frameCount {
|
||||
for channel in 0..<channelCount {
|
||||
let sample = channelData[channel][frame]
|
||||
let clampedSample = max(-1.0, min(1.0, sample))
|
||||
let int16Sample = Int16(clampedSample * Float(Int16.max))
|
||||
withUnsafeBytes(of: int16Sample.littleEndian) { bytes in
|
||||
pcmData.append(contentsOf: bytes)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Create WAV data from PCM buffer data
|
||||
private func createWAVData(from pcmData: PCMBufferData) -> Data {
|
||||
let bitsPerSample = 16
|
||||
let sampleRate = Int(pcmData.sampleRate)
|
||||
let channels = pcmData.channelCount
|
||||
let dataSize = pcmData.samples.count
|
||||
|
||||
var header = Data()
|
||||
|
||||
// RIFF header
|
||||
header.append(contentsOf: "RIFF".utf8)
|
||||
let fileSize = UInt32(dataSize + 36)
|
||||
withUnsafeBytes(of: fileSize.littleEndian) { header.append(contentsOf: $0) }
|
||||
header.append(contentsOf: "WAVE".utf8)
|
||||
|
||||
// fmt subchunk
|
||||
header.append(contentsOf: "fmt ".utf8)
|
||||
let subchunk1Size = UInt32(16)
|
||||
withUnsafeBytes(of: subchunk1Size.littleEndian) { header.append(contentsOf: $0) }
|
||||
let audioFormat = UInt16(1) // PCM
|
||||
withUnsafeBytes(of: audioFormat.littleEndian) { header.append(contentsOf: $0) }
|
||||
let numChannels = UInt16(channels)
|
||||
withUnsafeBytes(of: numChannels.littleEndian) { header.append(contentsOf: $0) }
|
||||
let sampleRateU32 = UInt32(sampleRate)
|
||||
withUnsafeBytes(of: sampleRateU32.littleEndian) { header.append(contentsOf: $0) }
|
||||
let byteRate = UInt32(sampleRate * channels * bitsPerSample / 8)
|
||||
withUnsafeBytes(of: byteRate.littleEndian) { header.append(contentsOf: $0) }
|
||||
let blockAlign = UInt16(channels * bitsPerSample / 8)
|
||||
withUnsafeBytes(of: blockAlign.littleEndian) { header.append(contentsOf: $0) }
|
||||
let bitsPerSampleU16 = UInt16(bitsPerSample)
|
||||
withUnsafeBytes(of: bitsPerSampleU16.littleEndian) { header.append(contentsOf: $0) }
|
||||
|
||||
// data subchunk
|
||||
header.append(contentsOf: "data".utf8)
|
||||
let dataU32 = UInt32(dataSize)
|
||||
withUnsafeBytes(of: dataU32.littleEndian) { header.append(contentsOf: $0) }
|
||||
|
||||
return header + pcmData.samples
|
||||
}
|
||||
|
||||
/// Get gender string for voice
|
||||
private func genderString(for voice: AVSpeechSynthesisVoice) -> String {
|
||||
switch voice.gender {
|
||||
case .male: return "male"
|
||||
case .female: return "female"
|
||||
case .unspecified: return "unspecified"
|
||||
@unknown default: return "unknown"
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1,243 +0,0 @@
|
||||
import Foundation
|
||||
import Vision
|
||||
import CoreImage
|
||||
|
||||
#if canImport(AppKit)
|
||||
import AppKit
|
||||
#endif
|
||||
|
||||
/// Result of Vision framework analysis on an image
|
||||
public struct VisionAnalysisResult: Sendable {
|
||||
public let textContent: String
|
||||
public let labels: [String]
|
||||
public let description: String
|
||||
|
||||
public init(textContent: String = "", labels: [String] = [], description: String = "") {
|
||||
self.textContent = textContent
|
||||
self.labels = labels
|
||||
self.description = description
|
||||
}
|
||||
|
||||
/// Format analysis for LLM context
|
||||
public func formatAsContext(imageIndex: Int, filename: String?) -> String {
|
||||
var parts: [String] = []
|
||||
|
||||
let imageName = filename ?? "Image \(imageIndex + 1)"
|
||||
|
||||
if !textContent.isEmpty {
|
||||
parts.append("Text: \"\(textContent)\"")
|
||||
}
|
||||
|
||||
if !labels.isEmpty {
|
||||
parts.append("Objects: \(labels.joined(separator: ", "))")
|
||||
}
|
||||
|
||||
if parts.isEmpty {
|
||||
return "\(imageName): No content detected"
|
||||
}
|
||||
|
||||
return "\(imageName): \(parts.joined(separator: " | "))"
|
||||
}
|
||||
}
|
||||
|
||||
/// Errors from Vision analysis
|
||||
public enum VisionAnalysisError: Error, CustomStringConvertible, Sendable {
|
||||
case invalidImageData
|
||||
case analysisFailure(String)
|
||||
case unsupportedFormat
|
||||
|
||||
public var description: String {
|
||||
switch self {
|
||||
case .invalidImageData:
|
||||
return "Invalid or corrupted image data"
|
||||
case .analysisFailure(let reason):
|
||||
return "Vision analysis failed: \(reason)"
|
||||
case .unsupportedFormat:
|
||||
return "Unsupported image format"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Service for analyzing images using Apple's Vision framework
|
||||
public actor VisionAnalysisService {
|
||||
|
||||
/// Configuration for which analyses to perform
|
||||
public struct AnalysisOptions: Sendable {
|
||||
public var performOCR: Bool
|
||||
public var performClassification: Bool
|
||||
|
||||
public init(performOCR: Bool = true, performClassification: Bool = true) {
|
||||
self.performOCR = performOCR
|
||||
self.performClassification = performClassification
|
||||
}
|
||||
|
||||
public static let all = AnalysisOptions()
|
||||
public static let textOnly = AnalysisOptions(performOCR: true, performClassification: false)
|
||||
}
|
||||
|
||||
public init() {}
|
||||
|
||||
/// Analyze a single image
|
||||
public func analyze(
|
||||
imageData: Data,
|
||||
options: AnalysisOptions = .all
|
||||
) async throws -> VisionAnalysisResult {
|
||||
guard let cgImage = createCGImage(from: imageData) else {
|
||||
throw VisionAnalysisError.invalidImageData
|
||||
}
|
||||
|
||||
var textContent = ""
|
||||
var labels: [String] = []
|
||||
|
||||
// Perform OCR
|
||||
if options.performOCR {
|
||||
textContent = try await performTextRecognition(on: cgImage)
|
||||
}
|
||||
|
||||
// Perform image classification
|
||||
if options.performClassification {
|
||||
labels = try await performImageClassification(on: cgImage)
|
||||
}
|
||||
|
||||
// Build description
|
||||
var descriptionParts: [String] = []
|
||||
if !textContent.isEmpty {
|
||||
let truncatedText = textContent.count > 200
|
||||
? String(textContent.prefix(200)) + "..."
|
||||
: textContent
|
||||
descriptionParts.append("Contains text: \"\(truncatedText)\"")
|
||||
}
|
||||
if !labels.isEmpty {
|
||||
descriptionParts.append("Shows: \(labels.prefix(5).joined(separator: ", "))")
|
||||
}
|
||||
|
||||
let description = descriptionParts.isEmpty
|
||||
? "Image with no recognizable content"
|
||||
: descriptionParts.joined(separator: ". ")
|
||||
|
||||
return VisionAnalysisResult(
|
||||
textContent: textContent,
|
||||
labels: labels,
|
||||
description: description
|
||||
)
|
||||
}
|
||||
|
||||
/// Analyze multiple images
|
||||
public func analyzeMultiple(
|
||||
images: [(data: Data, filename: String?)],
|
||||
options: AnalysisOptions = .all
|
||||
) async throws -> [VisionAnalysisResult] {
|
||||
var results: [VisionAnalysisResult] = []
|
||||
|
||||
for image in images {
|
||||
let result = try await analyze(imageData: image.data, options: options)
|
||||
results.append(result)
|
||||
}
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
/// Format multiple analyses as a combined context string for LLM
|
||||
public func formatAnalysesAsPromptContext(
|
||||
analyses: [(result: VisionAnalysisResult, filename: String?)]
|
||||
) -> String {
|
||||
guard !analyses.isEmpty else { return "" }
|
||||
|
||||
var lines: [String] = ["[Image Analysis]"]
|
||||
|
||||
for (index, analysis) in analyses.enumerated() {
|
||||
lines.append(analysis.result.formatAsContext(
|
||||
imageIndex: index,
|
||||
filename: analysis.filename
|
||||
))
|
||||
}
|
||||
|
||||
lines.append("[End Image Analysis]")
|
||||
|
||||
return lines.joined(separator: "\n")
|
||||
}
|
||||
|
||||
// MARK: - Private Methods
|
||||
|
||||
private func createCGImage(from data: Data) -> CGImage? {
|
||||
#if canImport(AppKit)
|
||||
guard let nsImage = NSImage(data: data),
|
||||
let cgImage = nsImage.cgImage(forProposedRect: nil, context: nil, hints: nil) else {
|
||||
// Try CIImage as fallback
|
||||
guard let ciImage = CIImage(data: data) else { return nil }
|
||||
let context = CIContext()
|
||||
return context.createCGImage(ciImage, from: ciImage.extent)
|
||||
}
|
||||
return cgImage
|
||||
#else
|
||||
guard let ciImage = CIImage(data: data) else { return nil }
|
||||
let context = CIContext()
|
||||
return context.createCGImage(ciImage, from: ciImage.extent)
|
||||
#endif
|
||||
}
|
||||
|
||||
private func performTextRecognition(on image: CGImage) async throws -> String {
|
||||
try await withCheckedThrowingContinuation { continuation in
|
||||
let request = VNRecognizeTextRequest { request, error in
|
||||
if let error = error {
|
||||
continuation.resume(throwing: VisionAnalysisError.analysisFailure(error.localizedDescription))
|
||||
return
|
||||
}
|
||||
|
||||
guard let observations = request.results as? [VNRecognizedTextObservation] else {
|
||||
continuation.resume(returning: "")
|
||||
return
|
||||
}
|
||||
|
||||
let recognizedText = observations.compactMap { observation in
|
||||
observation.topCandidates(1).first?.string
|
||||
}.joined(separator: "\n")
|
||||
|
||||
continuation.resume(returning: recognizedText)
|
||||
}
|
||||
|
||||
request.recognitionLevel = .accurate
|
||||
request.usesLanguageCorrection = true
|
||||
|
||||
let handler = VNImageRequestHandler(cgImage: image, options: [:])
|
||||
|
||||
do {
|
||||
try handler.perform([request])
|
||||
} catch {
|
||||
continuation.resume(throwing: VisionAnalysisError.analysisFailure(error.localizedDescription))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private func performImageClassification(on image: CGImage) async throws -> [String] {
|
||||
try await withCheckedThrowingContinuation { continuation in
|
||||
let request = VNClassifyImageRequest { request, error in
|
||||
if let error = error {
|
||||
continuation.resume(throwing: VisionAnalysisError.analysisFailure(error.localizedDescription))
|
||||
return
|
||||
}
|
||||
|
||||
guard let observations = request.results as? [VNClassificationObservation] else {
|
||||
continuation.resume(returning: [])
|
||||
return
|
||||
}
|
||||
|
||||
// Filter to high-confidence labels and take top 10
|
||||
let labels = observations
|
||||
.filter { $0.confidence > 0.3 }
|
||||
.prefix(10)
|
||||
.map { $0.identifier.replacingOccurrences(of: "_", with: " ") }
|
||||
|
||||
continuation.resume(returning: Array(labels))
|
||||
}
|
||||
|
||||
let handler = VNImageRequestHandler(cgImage: image, options: [:])
|
||||
|
||||
do {
|
||||
try handler.perform([request])
|
||||
} catch {
|
||||
continuation.resume(throwing: VisionAnalysisError.analysisFailure(error.localizedDescription))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -37,21 +37,7 @@ struct AppleIntelligenceServer: AsyncParsableCommand {
|
||||
throw ExitCode.failure
|
||||
}
|
||||
|
||||
// Initialize speech services
|
||||
print("Initializing Text-to-Speech service...")
|
||||
let ttsService = TextToSpeechService()
|
||||
|
||||
print("Initializing Speech-to-Text service...")
|
||||
let sttService = await SpeechToTextService()
|
||||
let sttStatus = await sttService.getStatus()
|
||||
print("Speech-to-Text status: \(sttStatus)")
|
||||
|
||||
let provider = AppleIntelligenceProvider(
|
||||
service: service,
|
||||
ttsService: ttsService,
|
||||
sttService: sttService,
|
||||
apiKey: config.apiKey
|
||||
)
|
||||
let provider = AppleIntelligenceProvider(service: service, apiKey: config.apiKey)
|
||||
|
||||
let transport = HTTP2ServerTransport.Posix(
|
||||
address: .ipv4(host: bindHost, port: bindPort),
|
||||
@ -66,15 +52,7 @@ struct AppleIntelligenceServer: AsyncParsableCommand {
|
||||
print("API key authentication is enabled")
|
||||
}
|
||||
print("Server is ready to accept connections")
|
||||
print("")
|
||||
print("Available services:")
|
||||
print(" - Complete/StreamComplete: Text generation with Apple Intelligence")
|
||||
print(" - TextToSpeech: Convert text to spoken audio")
|
||||
print(" - ListVoices: List available TTS voices")
|
||||
print(" - Transcribe: Convert audio file to text")
|
||||
print(" - StreamTranscribe: Real-time speech-to-text")
|
||||
print("")
|
||||
print("Health check: grpcurl -plaintext \(bindHost):\(bindPort) appleintelligence.AppleIntelligenceService/Health")
|
||||
print("Health check: grpcurl -plaintext \(bindHost):\(bindPort) appleintelligence.AppleIntelligence/Health")
|
||||
print("Press Ctrl+C to stop the server")
|
||||
|
||||
try await server.serve()
|
||||
|
||||
Loading…
Reference in New Issue
Block a user