""" Main entry point for Claude Vision Auto """ import sys import time import select import subprocess from pathlib import Path from . import config from .screenshot import take_screenshot, cleanup_old_screenshots from .vision_analyzer import VisionAnalyzer def run_claude_with_vision(args: list = None): """ Run Claude Code with vision-based auto-approval Args: args: Command line arguments to pass to claude """ args = args or [] # Initialize vision analyzer analyzer = VisionAnalyzer() # Test connection print("[Claude Vision Auto] Testing Ollama connection...") if not analyzer.test_connection(): print("[ERROR] Cannot connect to Ollama or model not available") print(f"Make sure Ollama is running and '{config.VISION_MODEL}' is installed") sys.exit(1) print(f"[Claude Vision Auto] Connected to Ollama") print(f"[Claude Vision Auto] Using model: {config.VISION_MODEL}") print(f"[Claude Vision Auto] Idle threshold: {config.IDLE_THRESHOLD}s") print() # Build command cmd = ['claude'] + args # Start Claude Code process try: process = subprocess.Popen( cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=0 ) except FileNotFoundError: print("[ERROR] 'claude' command not found") print("Make sure Claude Code CLI is installed") sys.exit(1) last_output_time = time.time() output_buffer = bytearray() # Cleanup old screenshots cleanup_old_screenshots() try: while True: # Check if there's data to read readable, _, _ = select.select([process.stdout], [], [], 0.1) if readable: char = process.stdout.read(1) if not char: # Process ended break # Print to terminal sys.stdout.buffer.write(char) sys.stdout.buffer.flush() output_buffer.extend(char) last_output_time = time.time() # Keep buffer reasonable size if len(output_buffer) > config.OUTPUT_BUFFER_SIZE: output_buffer = output_buffer[-config.OUTPUT_BUFFER_SIZE:] # Check if idle (no output for threshold seconds) idle_time = time.time() - last_output_time if idle_time >= config.IDLE_THRESHOLD: # Check if buffer suggests we're waiting for input buffer_str = output_buffer.decode('utf-8', errors='ignore') # Look for approval keywords has_keywords = any( keyword in buffer_str for keyword in config.APPROVAL_KEYWORDS ) if has_keywords: if config.DEBUG: print("\n[DEBUG] Approval keywords detected in buffer") print("\n[Vision] Analyzing prompt...", file=sys.stderr) # Take screenshot screenshot_path = take_screenshot() if screenshot_path: # Analyze with vision response = analyzer.analyze_screenshot(screenshot_path) if response: print(f"[Vision] Response: {response}", file=sys.stderr) if response and response.upper() != "WAIT": # Send response time.sleep(config.RESPONSE_DELAY) process.stdin.write(f"{response}\n".encode('utf-8')) process.stdin.flush() # Clear buffer output_buffer.clear() last_output_time = time.time() print("[Vision] Response sent", file=sys.stderr) else: print("[Vision] No action needed (WAIT)", file=sys.stderr) else: print("[Vision] Analysis failed, waiting for manual input", file=sys.stderr) # Clean up screenshot try: Path(screenshot_path).unlink() except Exception: pass else: print("[Vision] Screenshot failed, waiting for manual input", file=sys.stderr) # Reset idle detection last_output_time = time.time() # Check if process is still running if process.poll() is not None: break except KeyboardInterrupt: print("\n[Claude Vision Auto] Interrupted by user") process.terminate() process.wait() sys.exit(130) finally: # Wait for process to finish exit_code = process.wait() sys.exit(exit_code) def main(): """CLI entry point""" args = sys.argv[1:] run_claude_with_vision(args) if __name__ == '__main__': main()