Vision-module-auto/claude_vision_auto/main.py

"""
Main entry point for Claude Vision Auto
"""

import sys
import time
import select
import subprocess
from pathlib import Path

from . import config
from .screenshot import take_screenshot, cleanup_old_screenshots
from .vision_analyzer import VisionAnalyzer


def run_claude_with_vision(args: list = None):
    """
    Run Claude Code with vision-based auto-approval

    Args:
        args: Command line arguments to pass to claude
    """
    args = args or []

    # Initialize vision analyzer
    analyzer = VisionAnalyzer()

    # Test connection
    print("[Claude Vision Auto] Testing Ollama connection...")
    if not analyzer.test_connection():
        print("[ERROR] Cannot connect to Ollama or model not available")
        print(f"Make sure Ollama is running and '{config.VISION_MODEL}' is installed")
        sys.exit(1)

    print(f"[Claude Vision Auto] Connected to Ollama")
    print(f"[Claude Vision Auto] Using model: {config.VISION_MODEL}")
    print(f"[Claude Vision Auto] Idle threshold: {config.IDLE_THRESHOLD}s")
    print()

    # Build command
    cmd = ['claude'] + args

    # Start Claude Code process
    try:
        process = subprocess.Popen(
            cmd,
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            bufsize=0
        )
    except FileNotFoundError:
        print("[ERROR] 'claude' command not found")
        print("Make sure Claude Code CLI is installed")
        sys.exit(1)

    last_output_time = time.time()
    output_buffer = bytearray()

    # Cleanup old screenshots
    cleanup_old_screenshots()

    try:
        while True:
            # Check if there's data to read
            readable, _, _ = select.select([process.stdout], [], [], 0.1)

            if readable:
                char = process.stdout.read(1)
                if not char:
                    # Process ended
                    break

                # Print to terminal
                sys.stdout.buffer.write(char)
                sys.stdout.buffer.flush()

                output_buffer.extend(char)
                last_output_time = time.time()

                # Keep buffer reasonable size
                if len(output_buffer) > config.OUTPUT_BUFFER_SIZE:
                    output_buffer = output_buffer[-config.OUTPUT_BUFFER_SIZE:]

            # Check if idle (no output for threshold seconds)
            idle_time = time.time() - last_output_time

            if idle_time >= config.IDLE_THRESHOLD:
                # Check if buffer suggests we're waiting for input
                buffer_str = output_buffer.decode('utf-8', errors='ignore')

                # Look for approval keywords
                has_keywords = any(
                    keyword in buffer_str
                    for keyword in config.APPROVAL_KEYWORDS
                )

                if has_keywords:
                    if config.DEBUG:
                        print("\n[DEBUG] Approval keywords detected in buffer")

                    print("\n[Vision] Analyzing prompt...", file=sys.stderr)

                    # Take screenshot
                    screenshot_path = take_screenshot()

                    if screenshot_path:
                        # Analyze with vision
                        response = analyzer.analyze_screenshot(screenshot_path)

                        if response:
                            print(f"[Vision] Response: {response}", file=sys.stderr)

                            if response and response.upper() != "WAIT":
                                # Send response
                                time.sleep(config.RESPONSE_DELAY)
                                process.stdin.write(f"{response}\n".encode('utf-8'))
                                process.stdin.flush()

                                # Clear buffer
                                output_buffer.clear()
                                last_output_time = time.time()

                                print("[Vision] Response sent", file=sys.stderr)
                            else:
                                print("[Vision] No action needed (WAIT)", file=sys.stderr)
                        else:
                            print("[Vision] Analysis failed, waiting for manual input", file=sys.stderr)

                        # Clean up screenshot
                        try:
                            Path(screenshot_path).unlink()
                        except Exception:
                            pass
                    else:
                        print("[Vision] Screenshot failed, waiting for manual input", file=sys.stderr)

                # Reset idle detection
                last_output_time = time.time()

            # Check if process is still running
            if process.poll() is not None:
                break

    except KeyboardInterrupt:
        print("\n[Claude Vision Auto] Interrupted by user")
        process.terminate()
        process.wait()
        sys.exit(130)

    finally:
        # Wait for process to finish
        exit_code = process.wait()
        sys.exit(exit_code)


def main():
    """CLI entry point"""
    args = sys.argv[1:]
    run_claude_with_vision(args)


if __name__ == '__main__':
    main()