Vision-module-auto/claude_vision_auto/main.py
Svrnty 41cecca0e2 Initial release of Claude Vision Auto v1.0.0
Vision-based auto-approval system for Claude Code CLI using MiniCPM-V vision model.

Features:
- Automatic detection and response to approval prompts
- Screenshot capture and vision analysis via Ollama
- Support for multiple screenshot tools (scrot, gnome-screenshot, etc.)
- Configurable timing and behavior
- Debug mode for troubleshooting
- Comprehensive documentation

Generated with Claude Code

Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Jean-Philippe Brule <jp@svrnty.io>
2025-10-29 10:09:01 -04:00

165 lines
5.1 KiB
Python

"""
Main entry point for Claude Vision Auto
"""
import sys
import time
import select
import subprocess
from pathlib import Path
from . import config
from .screenshot import take_screenshot, cleanup_old_screenshots
from .vision_analyzer import VisionAnalyzer
def run_claude_with_vision(args: list = None):
"""
Run Claude Code with vision-based auto-approval
Args:
args: Command line arguments to pass to claude
"""
args = args or []
# Initialize vision analyzer
analyzer = VisionAnalyzer()
# Test connection
print("[Claude Vision Auto] Testing Ollama connection...")
if not analyzer.test_connection():
print("[ERROR] Cannot connect to Ollama or model not available")
print(f"Make sure Ollama is running and '{config.VISION_MODEL}' is installed")
sys.exit(1)
print(f"[Claude Vision Auto] Connected to Ollama")
print(f"[Claude Vision Auto] Using model: {config.VISION_MODEL}")
print(f"[Claude Vision Auto] Idle threshold: {config.IDLE_THRESHOLD}s")
print()
# Build command
cmd = ['claude'] + args
# Start Claude Code process
try:
process = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
bufsize=0
)
except FileNotFoundError:
print("[ERROR] 'claude' command not found")
print("Make sure Claude Code CLI is installed")
sys.exit(1)
last_output_time = time.time()
output_buffer = bytearray()
# Cleanup old screenshots
cleanup_old_screenshots()
try:
while True:
# Check if there's data to read
readable, _, _ = select.select([process.stdout], [], [], 0.1)
if readable:
char = process.stdout.read(1)
if not char:
# Process ended
break
# Print to terminal
sys.stdout.buffer.write(char)
sys.stdout.buffer.flush()
output_buffer.extend(char)
last_output_time = time.time()
# Keep buffer reasonable size
if len(output_buffer) > config.OUTPUT_BUFFER_SIZE:
output_buffer = output_buffer[-config.OUTPUT_BUFFER_SIZE:]
# Check if idle (no output for threshold seconds)
idle_time = time.time() - last_output_time
if idle_time >= config.IDLE_THRESHOLD:
# Check if buffer suggests we're waiting for input
buffer_str = output_buffer.decode('utf-8', errors='ignore')
# Look for approval keywords
has_keywords = any(
keyword in buffer_str
for keyword in config.APPROVAL_KEYWORDS
)
if has_keywords:
if config.DEBUG:
print("\n[DEBUG] Approval keywords detected in buffer")
print("\n[Vision] Analyzing prompt...", file=sys.stderr)
# Take screenshot
screenshot_path = take_screenshot()
if screenshot_path:
# Analyze with vision
response = analyzer.analyze_screenshot(screenshot_path)
if response:
print(f"[Vision] Response: {response}", file=sys.stderr)
if response and response.upper() != "WAIT":
# Send response
time.sleep(config.RESPONSE_DELAY)
process.stdin.write(f"{response}\n".encode('utf-8'))
process.stdin.flush()
# Clear buffer
output_buffer.clear()
last_output_time = time.time()
print("[Vision] Response sent", file=sys.stderr)
else:
print("[Vision] No action needed (WAIT)", file=sys.stderr)
else:
print("[Vision] Analysis failed, waiting for manual input", file=sys.stderr)
# Clean up screenshot
try:
Path(screenshot_path).unlink()
except Exception:
pass
else:
print("[Vision] Screenshot failed, waiting for manual input", file=sys.stderr)
# Reset idle detection
last_output_time = time.time()
# Check if process is still running
if process.poll() is not None:
break
except KeyboardInterrupt:
print("\n[Claude Vision Auto] Interrupted by user")
process.terminate()
process.wait()
sys.exit(130)
finally:
# Wait for process to finish
exit_code = process.wait()
sys.exit(exit_code)
def main():
"""CLI entry point"""
args = sys.argv[1:]
run_claude_with_vision(args)
if __name__ == '__main__':
main()