Vision-based auto-approval system for Claude Code CLI using MiniCPM-V vision model. Features: - Automatic detection and response to approval prompts - Screenshot capture and vision analysis via Ollama - Support for multiple screenshot tools (scrot, gnome-screenshot, etc.) - Configurable timing and behavior - Debug mode for troubleshooting - Comprehensive documentation Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: Jean-Philippe Brule <jp@svrnty.io>
165 lines
5.1 KiB
Python
165 lines
5.1 KiB
Python
"""
|
|
Main entry point for Claude Vision Auto
|
|
"""
|
|
|
|
import sys
|
|
import time
|
|
import select
|
|
import subprocess
|
|
from pathlib import Path
|
|
|
|
from . import config
|
|
from .screenshot import take_screenshot, cleanup_old_screenshots
|
|
from .vision_analyzer import VisionAnalyzer
|
|
|
|
|
|
def run_claude_with_vision(args: list = None):
|
|
"""
|
|
Run Claude Code with vision-based auto-approval
|
|
|
|
Args:
|
|
args: Command line arguments to pass to claude
|
|
"""
|
|
args = args or []
|
|
|
|
# Initialize vision analyzer
|
|
analyzer = VisionAnalyzer()
|
|
|
|
# Test connection
|
|
print("[Claude Vision Auto] Testing Ollama connection...")
|
|
if not analyzer.test_connection():
|
|
print("[ERROR] Cannot connect to Ollama or model not available")
|
|
print(f"Make sure Ollama is running and '{config.VISION_MODEL}' is installed")
|
|
sys.exit(1)
|
|
|
|
print(f"[Claude Vision Auto] Connected to Ollama")
|
|
print(f"[Claude Vision Auto] Using model: {config.VISION_MODEL}")
|
|
print(f"[Claude Vision Auto] Idle threshold: {config.IDLE_THRESHOLD}s")
|
|
print()
|
|
|
|
# Build command
|
|
cmd = ['claude'] + args
|
|
|
|
# Start Claude Code process
|
|
try:
|
|
process = subprocess.Popen(
|
|
cmd,
|
|
stdin=subprocess.PIPE,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.STDOUT,
|
|
bufsize=0
|
|
)
|
|
except FileNotFoundError:
|
|
print("[ERROR] 'claude' command not found")
|
|
print("Make sure Claude Code CLI is installed")
|
|
sys.exit(1)
|
|
|
|
last_output_time = time.time()
|
|
output_buffer = bytearray()
|
|
|
|
# Cleanup old screenshots
|
|
cleanup_old_screenshots()
|
|
|
|
try:
|
|
while True:
|
|
# Check if there's data to read
|
|
readable, _, _ = select.select([process.stdout], [], [], 0.1)
|
|
|
|
if readable:
|
|
char = process.stdout.read(1)
|
|
if not char:
|
|
# Process ended
|
|
break
|
|
|
|
# Print to terminal
|
|
sys.stdout.buffer.write(char)
|
|
sys.stdout.buffer.flush()
|
|
|
|
output_buffer.extend(char)
|
|
last_output_time = time.time()
|
|
|
|
# Keep buffer reasonable size
|
|
if len(output_buffer) > config.OUTPUT_BUFFER_SIZE:
|
|
output_buffer = output_buffer[-config.OUTPUT_BUFFER_SIZE:]
|
|
|
|
# Check if idle (no output for threshold seconds)
|
|
idle_time = time.time() - last_output_time
|
|
|
|
if idle_time >= config.IDLE_THRESHOLD:
|
|
# Check if buffer suggests we're waiting for input
|
|
buffer_str = output_buffer.decode('utf-8', errors='ignore')
|
|
|
|
# Look for approval keywords
|
|
has_keywords = any(
|
|
keyword in buffer_str
|
|
for keyword in config.APPROVAL_KEYWORDS
|
|
)
|
|
|
|
if has_keywords:
|
|
if config.DEBUG:
|
|
print("\n[DEBUG] Approval keywords detected in buffer")
|
|
|
|
print("\n[Vision] Analyzing prompt...", file=sys.stderr)
|
|
|
|
# Take screenshot
|
|
screenshot_path = take_screenshot()
|
|
|
|
if screenshot_path:
|
|
# Analyze with vision
|
|
response = analyzer.analyze_screenshot(screenshot_path)
|
|
|
|
if response:
|
|
print(f"[Vision] Response: {response}", file=sys.stderr)
|
|
|
|
if response and response.upper() != "WAIT":
|
|
# Send response
|
|
time.sleep(config.RESPONSE_DELAY)
|
|
process.stdin.write(f"{response}\n".encode('utf-8'))
|
|
process.stdin.flush()
|
|
|
|
# Clear buffer
|
|
output_buffer.clear()
|
|
last_output_time = time.time()
|
|
|
|
print("[Vision] Response sent", file=sys.stderr)
|
|
else:
|
|
print("[Vision] No action needed (WAIT)", file=sys.stderr)
|
|
else:
|
|
print("[Vision] Analysis failed, waiting for manual input", file=sys.stderr)
|
|
|
|
# Clean up screenshot
|
|
try:
|
|
Path(screenshot_path).unlink()
|
|
except Exception:
|
|
pass
|
|
else:
|
|
print("[Vision] Screenshot failed, waiting for manual input", file=sys.stderr)
|
|
|
|
# Reset idle detection
|
|
last_output_time = time.time()
|
|
|
|
# Check if process is still running
|
|
if process.poll() is not None:
|
|
break
|
|
|
|
except KeyboardInterrupt:
|
|
print("\n[Claude Vision Auto] Interrupted by user")
|
|
process.terminate()
|
|
process.wait()
|
|
sys.exit(130)
|
|
|
|
finally:
|
|
# Wait for process to finish
|
|
exit_code = process.wait()
|
|
sys.exit(exit_code)
|
|
|
|
|
|
def main():
|
|
"""CLI entry point"""
|
|
args = sys.argv[1:]
|
|
run_claude_with_vision(args)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|