diff --git a/README.md b/README.md
index 8a385f3..c5570c7 100644
--- a/README.md
+++ b/README.md
@@ -6,8 +6,11 @@ A Swift-based gRPC server that exposes Apple Intelligence (Foundation Models) ov
 
 - **gRPC API** - Standard gRPC interface accessible from any language
 - **Streaming Support** - Real-time token streaming for responsive UX
+- **Vision Analysis** - Analyze images with text extraction, labeling, and descriptions
+- **Text-to-Speech** - Convert text to audio (WAV/MP3) with multiple voices
+- **Speech-to-Text** - Transcribe audio files or stream audio in real-time
 - **Menu Bar App** - Native macOS app with system tray integration
-- **Built-in Chat UI** - Test the AI directly from the app
+- **Built-in Chat UI** - Test the AI directly from the app with voice input/output
 - **API Key Auth** - Optional bearer token authentication
 - **Auto-Start** - Launch at login and auto-start server options
 
@@ -45,7 +48,7 @@ swift build -c release --product AppleIntelligenceServer
 1. Launch **Apple Intelligence Server** from Applications
 2. Click the brain icon in the menu bar
 3. Toggle **Start Server** to begin accepting connections
-4. Use **Chat** to test the AI directly
+4. Use **Chat** to test the AI directly (supports voice input/output)
 5. Configure host, port, and API key in **Settings**
 
 ### CLI Server
@@ -63,10 +66,19 @@ GRPC_HOST=127.0.0.1 GRPC_PORT=8080 API_KEY=secret .build/release/AppleIntelligen
 ### Service Definition
 
 ```protobuf
-service AppleIntelligence {
+service AppleIntelligenceService {
+  // AI Completion
   rpc Health(HealthRequest) returns (HealthResponse);
   rpc Complete(CompletionRequest) returns (CompletionResponse);
   rpc StreamComplete(CompletionRequest) returns (stream CompletionChunk);
+
+  // Text-to-Speech
+  rpc TextToSpeech(TextToSpeechRequest) returns (TextToSpeechResponse);
+  rpc ListVoices(ListVoicesRequest) returns (ListVoicesResponse);
+
+  // Speech-to-Text
+  rpc Transcribe(TranscribeRequest) returns (TranscribeResponse);
+  rpc StreamTranscribe(stream StreamingTranscribeRequest) returns (stream StreamingTranscribeResponse);
 }
 ```
 
@@ -75,24 +87,134 @@ service AppleIntelligence {
 | Method | Type | Description |
 |--------|------|-------------|
 | `Health` | Unary | Check server and model availability |
-| `Complete` | Unary | Generate complete response |
+| `Complete` | Unary | Generate complete response (supports images) |
 | `StreamComplete` | Server Streaming | Stream tokens as they're generated |
+| `TextToSpeech` | Unary | Convert text to audio |
+| `ListVoices` | Unary | List available TTS voices |
+| `Transcribe` | Unary | Transcribe audio file to text |
+| `StreamTranscribe` | Bidirectional | Real-time audio transcription |
+
+### Vision Support
+
+The `Complete` and `StreamComplete` methods support image analysis:
+
+```protobuf
+message CompletionRequest {
+  string prompt = 1;
+  optional float temperature = 2;
+  optional int32 max_tokens = 3;
+  repeated ImageData images = 4;      // Attach images for analysis
+  bool include_analysis = 5;          // Return detailed analysis
+}
+
+message ImageData {
+  bytes data = 1;
+  string filename = 2;
+  string mime_type = 3;               // image/png, image/jpeg, etc.
+}
+```
+
+**Supported Image Formats:** PNG, JPEG, GIF, WebP, HEIC
+
+### Text-to-Speech
+
+```protobuf
+message TextToSpeechRequest {
+  string text = 1;
+  AudioFormat output_format = 2;      // WAV or MP3
+  optional VoiceConfig voice_config = 3;
+}
+
+message VoiceConfig {
+  string voice_identifier = 1;        // Voice ID from ListVoices
+  optional float speaking_rate = 2;   // 0.0-1.0, default 0.5
+  optional float pitch_multiplier = 3; // 0.5-2.0, default 1.0
+  optional float volume = 4;          // 0.0-1.0, default 1.0
+}
+```
+
+**Output Formats:** WAV, MP3
+
+### Speech-to-Text
+
+#### File-based Transcription
+
+```protobuf
+message TranscribeRequest {
+  AudioInput audio = 1;
+  optional TranscriptionConfig config = 2;
+}
+
+message AudioInput {
+  bytes data = 1;
+  string mime_type = 2;               // audio/wav, audio/mp3, etc.
+  optional int32 sample_rate = 3;
+  optional int32 channels = 4;
+}
+
+message TranscriptionConfig {
+  optional string language_code = 1;  // e.g., "en-US", "fr-CA"
+  optional bool enable_punctuation = 2;
+  optional bool enable_timestamps = 3;
+}
+```
+
+**Supported Audio Formats:** WAV, MP3, M4A, AAC, FLAC
+
+#### Streaming Transcription
+
+For real-time transcription, use bidirectional streaming:
+
+1. Send `TranscriptionConfig` first to configure the session
+2. Send `audio_chunk` messages with PCM audio data (16-bit, 16kHz, mono)
+3. Receive `StreamingTranscribeResponse` with partial and final results
+
+```protobuf
+message StreamingTranscribeRequest {
+  oneof request {
+    TranscriptionConfig config = 1;   // Send first
+    bytes audio_chunk = 2;            // Then audio chunks
+  }
+}
+
+message StreamingTranscribeResponse {
+  string partial_text = 1;
+  bool is_final = 2;
+  string final_text = 3;
+  repeated TranscriptionSegment segments = 4;
+}
+```
 
 ### Quick Test with grpcurl
 
 ```bash
 # Health check
-grpcurl -plaintext localhost:50051 appleintelligence.AppleIntelligence/Health
+grpcurl -plaintext localhost:50051 appleintelligence.AppleIntelligenceService/Health
 
-# Non-streaming completion
+# Text completion
 grpcurl -plaintext \
   -d '{"prompt": "What is 2 + 2?"}' \
-  localhost:50051 appleintelligence.AppleIntelligence/Complete
+  localhost:50051 appleintelligence.AppleIntelligenceService/Complete
 
 # Streaming completion
 grpcurl -plaintext \
   -d '{"prompt": "Tell me a short story"}' \
-  localhost:50051 appleintelligence.AppleIntelligence/StreamComplete
+  localhost:50051 appleintelligence.AppleIntelligenceService/StreamComplete
+
+# List TTS voices
+grpcurl -plaintext \
+  -d '{"language_code": "en-US"}' \
+  localhost:50051 appleintelligence.AppleIntelligenceService/ListVoices
+
+# Text-to-Speech (base64 encode the response audio_data)
+grpcurl -plaintext \
+  -d '{"text": "Hello world", "output_format": 1}' \
+  localhost:50051 appleintelligence.AppleIntelligenceService/TextToSpeech
+
+# Transcribe audio file (base64 encode audio data)
+grpcurl -plaintext \
+  -d '{"audio": {"data": "'$(base64 -i audio.wav)'", "mime_type": "audio/wav"}}' \
+  localhost:50051 appleintelligence.AppleIntelligenceService/Transcribe
 ```
 
 ## Configuration
@@ -103,6 +225,21 @@ grpcurl -plaintext \
 | `GRPC_PORT` | `50051` | Port to listen on |
 | `API_KEY` | *none* | Optional API key for authentication |
 
+## Supported Languages
+
+### Speech Recognition (STT)
+- English (US, CA, GB, AU, IN, IE, ZA)
+- French (CA, FR)
+- Spanish (ES, MX)
+- German, Italian, Portuguese, Japanese, Korean, Chinese
+- And many more via macOS Speech framework
+
+### Text-to-Speech (TTS)
+All voices available in macOS System Settings, including:
+- Premium voices (highest quality, requires download)
+- Enhanced voices (good quality)
+- Default/Compact voices (pre-installed)
+
 ## Client Libraries
 
 Connect from any language with gRPC support:
@@ -120,30 +257,36 @@ See [docs/grpc-client-guide.md](docs/grpc-client-guide.md) for detailed examples
 ```
 apple-intelligence-grpc/
 ├── Package.swift
+├── Proto/
+│   └── apple_intelligence.proto      # gRPC service definition
 ├── Sources/
-│   ├── AppleIntelligenceCore/       # Shared gRPC service code
+│   ├── AppleIntelligenceCore/        # Shared gRPC service code
 │   │   ├── Config.swift
 │   │   ├── Services/
-│   │   │   └── AppleIntelligenceService.swift
+│   │   │   ├── AppleIntelligenceService.swift
+│   │   │   ├── TextToSpeechService.swift
+│   │   │   ├── SpeechToTextService.swift
+│   │   │   └── VisionAnalysisService.swift
 │   │   ├── Providers/
 │   │   │   └── AppleIntelligenceProvider.swift
 │   │   └── Generated/
-│   │       └── AppleIntelligence.pb.swift
-│   ├── AppleIntelligenceServer/     # CLI executable
+│   │       ├── apple_intelligence.pb.swift
+│   │       └── apple_intelligence.grpc.swift
+│   ├── AppleIntelligenceServer/      # CLI executable
 │   │   └── main.swift
-│   └── AppleIntelligenceApp/        # Menu bar app
+│   └── AppleIntelligenceApp/         # Menu bar app
 │       ├── App.swift
 │       ├── ServerManager.swift
 │       ├── Models/
 │       ├── Views/
 │       └── ViewModels/
 ├── scripts/
-│   ├── build-app.sh                 # Build .app bundle
-│   └── create-dmg.sh                # Create DMG installer
+│   ├── build-app.sh                  # Build .app bundle
+│   └── create-dmg.sh                 # Create DMG installer
 └── docs/
-    ├── grpc-client-guide.md         # Client connection examples
-    ├── macos-runner-setup.md        # CI runner setup
-    └── pipeline-configuration.md    # CI/CD configuration
+    ├── grpc-client-guide.md          # Client connection examples
+    ├── macos-runner-setup.md         # CI runner setup
+    └── pipeline-configuration.md     # CI/CD configuration
 ```
 
 ## CI/CD
@@ -182,6 +325,17 @@ See [docs/pipeline-configuration.md](docs/pipeline-configuration.md) for setup i
 - Include the API key in the Authorization header: `Bearer YOUR_API_KEY`
 - Verify the key matches what's configured in Settings
 
+### Speech Recognition Not Working
+
+- Grant microphone permission when prompted
+- Check System Settings → Privacy & Security → Speech Recognition
+- Ensure the language is supported
+
+### TTS Voice Quality
+
+- Download Premium/Enhanced voices from System Settings → Accessibility → Read & Speak
+- Premium voices are larger (~150-500MB) but sound more natural
+
 ## License
 
 MIT