Vision-based auto-approval system for Claude Code CLI using MiniCPM-V vision model. Features: - Automatic detection and response to approval prompts - Screenshot capture and vision analysis via Ollama - Support for multiple screenshot tools (scrot, gnome-screenshot, etc.) - Configurable timing and behavior - Debug mode for troubleshooting - Comprehensive documentation Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: Jean-Philippe Brule <jp@svrnty.io>
134 lines
3.9 KiB
Python
134 lines
3.9 KiB
Python
"""
|
|
Vision analysis using MiniCPM-V via Ollama
|
|
"""
|
|
|
|
import base64
|
|
import requests
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
from . import config
|
|
|
|
|
|
VISION_PROMPT = """You are analyzing a terminal screenshot showing a Claude Code approval prompt.
|
|
|
|
Look for:
|
|
- Menu options like "1. Yes", "2. Yes, allow all", "3. No"
|
|
- Questions asking for approval (create/edit/delete files)
|
|
- Yes/No questions
|
|
|
|
If you see an approval prompt with numbered options:
|
|
- Respond ONLY with the number to select "Yes" (usually "1")
|
|
- Output format: Just the number, nothing else
|
|
|
|
If you see a yes/no question:
|
|
- Respond with: y
|
|
|
|
If you don't see any prompt requiring input:
|
|
- Respond with: WAIT
|
|
|
|
Your response (one word only):"""
|
|
|
|
|
|
class VisionAnalyzer:
|
|
"""Analyzes screenshots using vision model"""
|
|
|
|
def __init__(self, ollama_url: str = None, model: str = None):
|
|
"""
|
|
Initialize vision analyzer
|
|
|
|
Args:
|
|
ollama_url: Ollama API URL (default from config)
|
|
model: Vision model name (default from config)
|
|
"""
|
|
self.ollama_url = ollama_url or config.OLLAMA_URL
|
|
self.model = model or config.VISION_MODEL
|
|
|
|
def analyze_screenshot(self, image_path: str) -> Optional[str]:
|
|
"""
|
|
Analyze screenshot and determine what response to give
|
|
|
|
Args:
|
|
image_path: Path to screenshot image
|
|
|
|
Returns:
|
|
Response to send ("1", "y", "WAIT", etc.) or None on error
|
|
"""
|
|
try:
|
|
# Read and encode image
|
|
with open(image_path, 'rb') as f:
|
|
image_data = base64.b64encode(f.read()).decode('utf-8')
|
|
|
|
# Send to Ollama
|
|
payload = {
|
|
"model": self.model,
|
|
"prompt": VISION_PROMPT,
|
|
"images": [image_data],
|
|
"stream": False
|
|
}
|
|
|
|
if config.DEBUG:
|
|
print(f"[DEBUG] Sending to Ollama: {self.ollama_url}")
|
|
print(f"[DEBUG] Model: {self.model}")
|
|
|
|
response = requests.post(
|
|
self.ollama_url,
|
|
json=payload,
|
|
timeout=config.VISION_TIMEOUT
|
|
)
|
|
response.raise_for_status()
|
|
|
|
result = response.json()
|
|
answer = result.get('response', '').strip()
|
|
|
|
if config.DEBUG:
|
|
print(f"[DEBUG] Vision model response: {answer}")
|
|
|
|
return answer
|
|
|
|
except requests.Timeout:
|
|
if config.DEBUG:
|
|
print("[DEBUG] Vision analysis timeout")
|
|
return None
|
|
except requests.RequestException as e:
|
|
if config.DEBUG:
|
|
print(f"[DEBUG] Vision API error: {e}")
|
|
return None
|
|
except Exception as e:
|
|
if config.DEBUG:
|
|
print(f"[DEBUG] Unexpected error in vision analysis: {e}")
|
|
return None
|
|
|
|
def test_connection(self) -> bool:
|
|
"""
|
|
Test if Ollama is accessible and model is available
|
|
|
|
Returns:
|
|
True if connection successful, False otherwise
|
|
"""
|
|
try:
|
|
# Try to list tags
|
|
tags_url = self.ollama_url.replace('/api/generate', '/api/tags')
|
|
response = requests.get(tags_url, timeout=5)
|
|
response.raise_for_status()
|
|
|
|
data = response.json()
|
|
models = [m['name'] for m in data.get('models', [])]
|
|
|
|
if config.DEBUG:
|
|
print(f"[DEBUG] Available models: {models}")
|
|
|
|
# Check if our model is available
|
|
model_available = any(self.model in m for m in models)
|
|
|
|
if not model_available:
|
|
print(f"Warning: Model '{self.model}' not found in Ollama")
|
|
print(f"Available models: {', '.join(models)}")
|
|
|
|
return model_available
|
|
|
|
except Exception as e:
|
|
if config.DEBUG:
|
|
print(f"[DEBUG] Connection test failed: {e}")
|
|
return False
|