diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..5b683fa --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,9 @@ +include README.md +include LICENSE +include CHANGELOG.md +include QUICKSTART.md +include requirements.txt +include requirements-dev.txt +recursive-include claude_vision_auto *.yaml +recursive-include docs *.md +recursive-include examples *.sh diff --git a/QUICKSTART.md b/QUICKSTART.md new file mode 100644 index 0000000..e21e891 --- /dev/null +++ b/QUICKSTART.md @@ -0,0 +1,129 @@ +# Quick Start Guide + +Get Claude Vision Auto running in 5 minutes. + +## Prerequisites Check + +```bash +# Check Claude Code +claude --version + +# Check Docker +docker ps + +# Check Python +python3 --version +``` + +## Installation + +```bash +cd /home/svrnty/claude-vision-auto + +# Install system dependencies +sudo apt-get update && sudo apt-get install -y scrot + +# Install Python package +make install +``` + +## Start Ollama (if not running) + +```bash +# Check if running +docker ps | grep ollama + +# If not running, start it +docker run -d \ + -p 11434:11434 \ + --name ollama \ + --restart unless-stopped \ + ollama/ollama:latest + +# Pull vision model +docker exec ollama ollama pull minicpm-v:latest +``` + +## Test Installation + +```bash +# Verify command +which claude-vision + +# Test connection +claude-vision --help 2>&1 | head -5 +``` + +## First Run + +```bash +# Start interactive session +claude-vision + +# You should see: +# [Claude Vision Auto] Testing Ollama connection... +# [Claude Vision Auto] Connected to Ollama +# [Claude Vision Auto] Using model: minicpm-v:latest +``` + +## Test Auto-Approval + +```bash +# Try a simple command +claude-vision "create a test.md file in /tmp" + +# Watch for auto-approval when prompted: +# [Vision] Analyzing prompt... +# [Vision] Response: 1 +# [Vision] Response sent +``` + +## Troubleshooting + +### Ollama Not Connected +```bash +docker start ollama +docker exec ollama ollama pull minicpm-v:latest +``` + +### Screenshot Fails +```bash +sudo apt-get install scrot +scrot /tmp/test.png # Test it works +``` + +### Command Not Found +```bash +export PATH="$HOME/.local/bin:$PATH" +source ~/.bashrc +``` + +## Next Steps + +- Read [README.md](README.md) for full documentation +- See [docs/USAGE.md](docs/USAGE.md) for usage examples +- Check [docs/INSTALLATION.md](docs/INSTALLATION.md) for detailed setup + +## Quick Configuration + +Add to `~/.bashrc`: + +```bash +# Claude Vision Auto +export PATH="$HOME/.local/bin:$PATH" +alias cv="claude-vision" +alias cvd="DEBUG=true claude-vision" +``` + +Reload: + +```bash +source ~/.bashrc +cv # Now you can use 'cv' instead of 'claude-vision' +``` + +## Support + +- Issues: https://git.openharbor.io/svrnty/claude-vision-auto/issues +- Documentation: See README.md and docs/ +- Debug mode: `DEBUG=true claude-vision` diff --git a/bin/claude-vision-config b/bin/claude-vision-config new file mode 100644 index 0000000..9eeac7d --- /dev/null +++ b/bin/claude-vision-config @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +""" +Generate user configuration file for Claude Vision Auto +""" + +import sys +from pathlib import Path +from claude_vision_auto.config import create_user_config, get_config_dir + + +def main(): + """Create user configuration file""" + config_path = create_user_config() + + if config_path: + print(f"✅ Created user configuration file:") + print(f" {config_path}") + print() + print("Edit this file to customize:") + print(f" - Vision model (minicpm-v, llama3.2-vision, llava)") + print(f" - Vision prompt for better responses") + print(f" - Timing settings (idle threshold, response delay)") + print(f" - Approval keywords") + print() + print(f"Edit with: nano {config_path}") + else: + config_dir = get_config_dir() + config_path = config_dir / "config.yaml" + print(f"ℹ️ Configuration file already exists:") + print(f" {config_path}") + print() + print(f"Edit with: nano {config_path}") + + sys.exit(0) + + +if __name__ == '__main__': + main() diff --git a/claude_vision_auto/config.py b/claude_vision_auto/config.py index 6a10ee4..e86ab21 100644 --- a/claude_vision_auto/config.py +++ b/claude_vision_auto/config.py @@ -3,44 +3,169 @@ Configuration for Claude Vision Auto """ import os +import yaml from pathlib import Path +from typing import Dict, Any -# Ollama configuration -OLLAMA_URL = os.getenv("OLLAMA_URL", "http://localhost:11434/api/generate") -VISION_MODEL = os.getenv("VISION_MODEL", "minicpm-v:latest") -# Timing configuration -IDLE_THRESHOLD = float(os.getenv("IDLE_THRESHOLD", "3.0")) # seconds of no output before screenshot -RESPONSE_DELAY = float(os.getenv("RESPONSE_DELAY", "1.0")) # seconds to wait before sending response +def get_config_dir() -> Path: + """Get configuration directory""" + config_dir = Path.home() / ".config" / "claude-vision-auto" + config_dir.mkdir(parents=True, exist_ok=True) + return config_dir -# Buffer configuration -OUTPUT_BUFFER_SIZE = int(os.getenv("OUTPUT_BUFFER_SIZE", "4096")) # bytes - -# Keywords that suggest we're waiting for approval -APPROVAL_KEYWORDS = [ - "Yes", - "No", - "(y/n)", - "[y/n]", - "Approve", - "Do you want to", - "create", - "edit", - "delete" -] - -# Screenshot configuration -SCREENSHOT_TIMEOUT = int(os.getenv("SCREENSHOT_TIMEOUT", "5")) # seconds -SCREENSHOT_TOOLS = ["scrot", "gnome-screenshot", "import", "maim"] - -# Vision analysis timeout -VISION_TIMEOUT = int(os.getenv("VISION_TIMEOUT", "30")) # seconds - -# Debug mode -DEBUG = os.getenv("DEBUG", "false").lower() in ("true", "1", "yes") def get_cache_dir() -> Path: """Get cache directory for screenshots""" cache_dir = Path.home() / ".cache" / "claude-vision-auto" cache_dir.mkdir(parents=True, exist_ok=True) return cache_dir + + +def load_config() -> Dict[str, Any]: + """ + Load configuration from YAML files with priority: + 1. User config (~/.config/claude-vision-auto/config.yaml) + 2. Default config (package default_config.yaml) + 3. Environment variables (highest priority) + """ + # Load default config + default_config_path = Path(__file__).parent / "default_config.yaml" + with open(default_config_path, 'r') as f: + config = yaml.safe_load(f) + + # Load user config if exists + user_config_path = get_config_dir() / "config.yaml" + if user_config_path.exists(): + with open(user_config_path, 'r') as f: + user_config = yaml.safe_load(f) + # Deep merge user config + if user_config: + config = deep_merge(config, user_config) + + # Override with environment variables + if os.getenv("OLLAMA_URL"): + config['ollama']['url'] = os.getenv("OLLAMA_URL") + if os.getenv("VISION_MODEL"): + config['ollama']['model'] = os.getenv("VISION_MODEL") + if os.getenv("IDLE_THRESHOLD"): + config['timing']['idle_threshold'] = float(os.getenv("IDLE_THRESHOLD")) + if os.getenv("RESPONSE_DELAY"): + config['timing']['response_delay'] = float(os.getenv("RESPONSE_DELAY")) + if os.getenv("DEBUG"): + config['debug'] = os.getenv("DEBUG", "false").lower() in ("true", "1", "yes") + + return config + + +def deep_merge(base: Dict, override: Dict) -> Dict: + """Deep merge two dictionaries""" + result = base.copy() + for key, value in override.items(): + if key in result and isinstance(result[key], dict) and isinstance(value, dict): + result[key] = deep_merge(result[key], value) + else: + result[key] = value + return result + + +def create_user_config(): + """Create user config file with defaults""" + config_dir = get_config_dir() + user_config_path = config_dir / "config.yaml" + + if not user_config_path.exists(): + # Copy default config to user config + default_config_path = Path(__file__).parent / "default_config.yaml" + with open(default_config_path, 'r') as f: + default_content = f.read() + + with open(user_config_path, 'w') as f: + f.write(default_content) + + return user_config_path + return None + + +def create_user_config_cli(): + """CLI command to create user configuration file""" + import sys + + config_path = create_user_config() + + if config_path: + print(f"✅ Created user configuration file:") + print(f" {config_path}") + print() + print("Edit this file to customize:") + print(f" - Vision model (minicpm-v, llama3.2-vision, llava)") + print(f" - Vision prompt for better responses") + print(f" - Timing settings (idle threshold, response delay)") + print(f" - Approval keywords") + print() + print(f"Edit with: nano {config_path}") + else: + config_dir = get_config_dir() + config_path = config_dir / "config.yaml" + print(f"ℹ️ Configuration file already exists:") + print(f" {config_path}") + print() + print(f"Edit with: nano {config_path}") + + sys.exit(0) + + +# Load configuration +_config = load_config() + +# Export commonly used values +OLLAMA_URL = _config['ollama']['url'] +VISION_MODEL = _config['ollama']['model'] +VISION_TIMEOUT = _config['ollama']['timeout'] + +IDLE_THRESHOLD = _config['timing']['idle_threshold'] +RESPONSE_DELAY = _config['timing']['response_delay'] +SCREENSHOT_TIMEOUT = _config['timing']['screenshot_timeout'] + +VISION_PROMPT = _config['vision_prompt'] +RESPONSE_MAPPING = _config['response_mapping'] +APPROVAL_KEYWORDS = _config['approval_keywords'] + +OUTPUT_BUFFER_SIZE = _config['buffer']['size'] +SCREENSHOT_TOOLS = _config['screenshot']['tools'] +SCREENSHOT_CACHE_CLEANUP = _config['screenshot']['cache_cleanup_seconds'] + +DEBUG = _config['debug'] + + +def get_config() -> Dict[str, Any]: + """Get full configuration dict""" + return _config.copy() + + +def reload_config(): + """Reload configuration from files""" + global _config, OLLAMA_URL, VISION_MODEL, VISION_TIMEOUT + global IDLE_THRESHOLD, RESPONSE_DELAY, SCREENSHOT_TIMEOUT + global VISION_PROMPT, RESPONSE_MAPPING, APPROVAL_KEYWORDS + global OUTPUT_BUFFER_SIZE, SCREENSHOT_TOOLS, SCREENSHOT_CACHE_CLEANUP, DEBUG + + _config = load_config() + + OLLAMA_URL = _config['ollama']['url'] + VISION_MODEL = _config['ollama']['model'] + VISION_TIMEOUT = _config['ollama']['timeout'] + + IDLE_THRESHOLD = _config['timing']['idle_threshold'] + RESPONSE_DELAY = _config['timing']['response_delay'] + SCREENSHOT_TIMEOUT = _config['timing']['screenshot_timeout'] + + VISION_PROMPT = _config['vision_prompt'] + RESPONSE_MAPPING = _config['response_mapping'] + APPROVAL_KEYWORDS = _config['approval_keywords'] + + OUTPUT_BUFFER_SIZE = _config['buffer']['size'] + SCREENSHOT_TOOLS = _config['screenshot']['tools'] + SCREENSHOT_CACHE_CLEANUP = _config['screenshot']['cache_cleanup_seconds'] + + DEBUG = _config['debug'] diff --git a/claude_vision_auto/default_config.yaml b/claude_vision_auto/default_config.yaml new file mode 100644 index 0000000..7ff74eb --- /dev/null +++ b/claude_vision_auto/default_config.yaml @@ -0,0 +1,67 @@ +# Claude Vision Auto Configuration + +# Ollama Settings +ollama: + url: "http://localhost:11434/api/generate" + model: "minicpm-v:latest" # Options: minicpm-v:latest, llama3.2-vision:latest, llava:latest + timeout: 30 + +# Timing Settings +timing: + idle_threshold: 3.0 # Seconds of no output before taking screenshot + response_delay: 1.0 # Seconds to wait before sending response + screenshot_timeout: 5 # Screenshot capture timeout + +# Vision Analysis Prompt +vision_prompt: | + You are analyzing a terminal screenshot showing a Claude Code approval prompt. + + Look for: + - Numbered menu options like "1. Yes", "2. Yes, allow all", "3. No" + - Questions asking for approval (create/edit/delete files) + - Yes/No questions with (y/n) or [y/n] format + + RESPONSE RULES: + - If you see numbered options with "Yes" as option 1: respond with ONLY "1" + - If you see a yes/no question with (y/n) or [y/n]: respond with ONLY "y" + - If you don't see any prompt requiring input: respond with ONLY "WAIT" + - NEVER provide explanations, ONLY the single response character/number + + Your response (one character/number only): + +# Response Mapping +# What the vision model should output for different scenarios +response_mapping: + approval_prompt: "1" # Response for numbered "Yes" option + yes_no_question: "y" # Response for y/n questions + no_action: "WAIT" # When no input is needed + +# Approval Detection Keywords +# Keywords that indicate we might be waiting for approval +approval_keywords: + - "Yes" + - "No" + - "(y/n)" + - "[y/n]" + - "Approve" + - "Do you want to" + - "create" + - "edit" + - "delete" + - "Allow" + +# Buffer Settings +buffer: + size: 4096 # Output buffer size in bytes + +# Debug Mode +debug: false # Set to true for verbose logging + +# Screenshot Settings +screenshot: + tools: + - "scrot" + - "gnome-screenshot" + - "import" + - "maim" + cache_cleanup_seconds: 3600 # Clean up screenshots older than 1 hour diff --git a/claude_vision_auto/screenshot.py b/claude_vision_auto/screenshot.py index fba0e4e..0d61f87 100644 --- a/claude_vision_auto/screenshot.py +++ b/claude_vision_auto/screenshot.py @@ -100,15 +100,18 @@ def take_screenshot() -> Optional[str]: return None -def cleanup_old_screenshots(max_age_seconds: int = 3600): +def cleanup_old_screenshots(max_age_seconds: int = None): """ Clean up old screenshots from cache directory Args: - max_age_seconds: Maximum age of screenshots to keep (default 1 hour) + max_age_seconds: Maximum age of screenshots to keep (default from config) """ import time + if max_age_seconds is None: + max_age_seconds = config.SCREENSHOT_CACHE_CLEANUP + cache_dir = config.get_cache_dir() current_time = time.time() diff --git a/claude_vision_auto/vision_analyzer.py b/claude_vision_auto/vision_analyzer.py index 17fce2a..b8c34fd 100644 --- a/claude_vision_auto/vision_analyzer.py +++ b/claude_vision_auto/vision_analyzer.py @@ -10,26 +10,6 @@ from typing import Optional from . import config -VISION_PROMPT = """You are analyzing a terminal screenshot showing a Claude Code approval prompt. - -Look for: -- Menu options like "1. Yes", "2. Yes, allow all", "3. No" -- Questions asking for approval (create/edit/delete files) -- Yes/No questions - -If you see an approval prompt with numbered options: -- Respond ONLY with the number to select "Yes" (usually "1") -- Output format: Just the number, nothing else - -If you see a yes/no question: -- Respond with: y - -If you don't see any prompt requiring input: -- Respond with: WAIT - -Your response (one word only):""" - - class VisionAnalyzer: """Analyzes screenshots using vision model""" @@ -62,7 +42,7 @@ class VisionAnalyzer: # Send to Ollama payload = { "model": self.model, - "prompt": VISION_PROMPT, + "prompt": config.VISION_PROMPT, "images": [image_data], "stream": False } diff --git a/requirements.txt b/requirements.txt index 0eb8cae..c7ff134 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ requests>=2.31.0 +pyyaml>=6.0 diff --git a/setup.py b/setup.py index 59322dc..6759aff 100644 --- a/setup.py +++ b/setup.py @@ -38,10 +38,15 @@ setup( python_requires=">=3.8", install_requires=[ "requests>=2.31.0", + "pyyaml>=6.0", ], + package_data={ + "claude_vision_auto": ["default_config.yaml"], + }, entry_points={ "console_scripts": [ "claude-vision=claude_vision_auto.main:main", + "claude-vision-config=claude_vision_auto.config:create_user_config_cli", ], }, include_package_data=True,