diff --git a/claude_vision_auto/main.py b/claude_vision_auto/main.py index f3fd957..70ea542 100644 --- a/claude_vision_auto/main.py +++ b/claude_vision_auto/main.py @@ -3,8 +3,13 @@ Main entry point for Claude Vision Auto """ import sys +import os import time import select +import pty +import tty +import termios +import signal import subprocess from pathlib import Path @@ -40,118 +45,168 @@ def run_claude_with_vision(args: list = None): # Build command cmd = ['claude'] + args - # Start Claude Code process - try: - process = subprocess.Popen( - cmd, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - bufsize=0 - ) - except FileNotFoundError: + # Check if claude exists + if not subprocess.run(['which', 'claude'], capture_output=True).returncode == 0: print("[ERROR] 'claude' command not found") print("Make sure Claude Code CLI is installed") sys.exit(1) - last_output_time = time.time() - output_buffer = bytearray() - # Cleanup old screenshots cleanup_old_screenshots() + # Save original terminal settings + old_tty = termios.tcgetattr(sys.stdin) + try: - while True: - # Check if there's data to read - readable, _, _ = select.select([process.stdout], [], [], 0.1) + # Create pseudo-terminal + master_fd, slave_fd = pty.openpty() - if readable: - char = process.stdout.read(1) - if not char: - # Process ended - break + # Fork process + pid = os.fork() - # Print to terminal - sys.stdout.buffer.write(char) - sys.stdout.buffer.flush() + if pid == 0: + # Child process - run claude + os.close(master_fd) - output_buffer.extend(char) - last_output_time = time.time() + # Set up slave as stdin/stdout/stderr + os.dup2(slave_fd, 0) + os.dup2(slave_fd, 1) + os.dup2(slave_fd, 2) - # Keep buffer reasonable size - if len(output_buffer) > config.OUTPUT_BUFFER_SIZE: - output_buffer = output_buffer[-config.OUTPUT_BUFFER_SIZE:] + if slave_fd > 2: + os.close(slave_fd) - # Check if idle (no output for threshold seconds) - idle_time = time.time() - last_output_time + # Execute claude + os.execvp('claude', cmd) + else: + # Parent process - handle I/O and vision analysis + os.close(slave_fd) - if idle_time >= config.IDLE_THRESHOLD: - # Check if buffer suggests we're waiting for input - buffer_str = output_buffer.decode('utf-8', errors='ignore') + # Set terminal to raw mode + tty.setraw(sys.stdin.fileno()) - # Look for approval keywords - has_keywords = any( - keyword in buffer_str - for keyword in config.APPROVAL_KEYWORDS - ) + last_output_time = time.time() + output_buffer = bytearray() - if has_keywords: - if config.DEBUG: - print("\n[DEBUG] Approval keywords detected in buffer") + try: + while True: + # Check for data from claude or user + readable, _, _ = select.select( + [master_fd, sys.stdin.fileno()], + [], + [], + 0.1 + ) - print("\n[Vision] Analyzing prompt...", file=sys.stderr) + for fd in readable: + if fd == master_fd: + # Read from claude process + try: + data = os.read(master_fd, 1024) + if not data: + # Process ended + os.waitpid(pid, 0) + return - # Take screenshot - screenshot_path = take_screenshot() + # Write to stdout + os.write(sys.stdout.fileno(), data) - if screenshot_path: - # Analyze with vision - response = analyzer.analyze_screenshot(screenshot_path) - - if response: - print(f"[Vision] Response: {response}", file=sys.stderr) - - if response and response.upper() != "WAIT": - # Send response - time.sleep(config.RESPONSE_DELAY) - process.stdin.write(f"{response}\n".encode('utf-8')) - process.stdin.flush() - - # Clear buffer - output_buffer.clear() + # Add to buffer for pattern matching + output_buffer.extend(data) last_output_time = time.time() - print("[Vision] Response sent", file=sys.stderr) + # Keep buffer reasonable size + if len(output_buffer) > config.OUTPUT_BUFFER_SIZE: + output_buffer = output_buffer[-config.OUTPUT_BUFFER_SIZE:] + + except OSError: + # Process ended + os.waitpid(pid, 0) + return + + elif fd == sys.stdin.fileno(): + # Read from user input + data = os.read(sys.stdin.fileno(), 1024) + if data: + # Forward to claude + os.write(master_fd, data) + + # Check if idle (no output for threshold seconds) + idle_time = time.time() - last_output_time + + if idle_time >= config.IDLE_THRESHOLD: + # Check if buffer suggests we're waiting for input + buffer_str = output_buffer.decode('utf-8', errors='ignore') + + # Look for approval keywords + has_keywords = any( + keyword in buffer_str + for keyword in config.APPROVAL_KEYWORDS + ) + + if has_keywords: + if config.DEBUG: + sys.stderr.write("\n[DEBUG] Approval keywords detected in buffer\n") + sys.stderr.flush() + + sys.stderr.write("\n[Vision] Analyzing prompt...\n") + sys.stderr.flush() + + # Take screenshot + screenshot_path = take_screenshot() + + if screenshot_path: + # Analyze with vision + response = analyzer.analyze_screenshot(screenshot_path) + + if response: + sys.stderr.write(f"[Vision] Response: {response}\n") + sys.stderr.flush() + + if response and response.upper() != "WAIT": + # Send response + time.sleep(config.RESPONSE_DELAY) + os.write(master_fd, f"{response}\n".encode('utf-8')) + + # Clear buffer + output_buffer.clear() + last_output_time = time.time() + + sys.stderr.write("[Vision] Response sent\n") + sys.stderr.flush() + else: + sys.stderr.write("[Vision] No action needed (WAIT)\n") + sys.stderr.flush() + else: + sys.stderr.write("[Vision] Analysis failed, waiting for manual input\n") + sys.stderr.flush() + + # Clean up screenshot + try: + Path(screenshot_path).unlink() + except Exception: + pass else: - print("[Vision] No action needed (WAIT)", file=sys.stderr) - else: - print("[Vision] Analysis failed, waiting for manual input", file=sys.stderr) + sys.stderr.write("[Vision] Screenshot failed, waiting for manual input\n") + sys.stderr.flush() - # Clean up screenshot - try: - Path(screenshot_path).unlink() - except Exception: - pass - else: - print("[Vision] Screenshot failed, waiting for manual input", file=sys.stderr) + # Reset idle detection + last_output_time = time.time() - # Reset idle detection - last_output_time = time.time() - - # Check if process is still running - if process.poll() is not None: - break - - except KeyboardInterrupt: - print("\n[Claude Vision Auto] Interrupted by user") - process.terminate() - process.wait() - sys.exit(130) + except KeyboardInterrupt: + # Kill child process + os.kill(pid, signal.SIGTERM) + os.waitpid(pid, 0) finally: - # Wait for process to finish - exit_code = process.wait() - sys.exit(exit_code) + # Restore terminal settings + termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_tty) + + # Close master fd if still open + try: + os.close(master_fd) + except: + pass def main():