feat: Add YAML-based configuration system

- Add default_config.yaml with customizable settings - Model selection (minicpm-v, llama3.2-vision, llava) - Customizable vision prompt for better responses - Timing parameters (idle threshold, response delay) - Approval keywords configuration - User config at ~/.config/claude-vision-auto/config.yaml - New command: claude-vision-config to generate user config - Environment variables still override config files - Added PyYAML dependency Configuration priority: 1. Environment variables (highest) 2. User config (~/.config/claude-vision-auto/config.yaml) 3. Default config (package default) Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: Jean-Philippe Brule <jp@svrnty.io>
2025-10-29 10:19:35 -04:00 · 2025-10-29 10:19:35 -04:00 · 52b0813b64
commit 52b0813b64
parent 41cecca0e2
9 changed files with 411 additions and 54 deletions
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -0,0 +1,9 @@
 include README.md
 include LICENSE
 include CHANGELOG.md
 include QUICKSTART.md
 include requirements.txt
 include requirements-dev.txt
 recursive-include claude_vision_auto *.yaml
 recursive-include docs *.md
 recursive-include examples *.sh
--- a/QUICKSTART.md
+++ b/QUICKSTART.md
@ -0,0 +1,129 @@
 # Quick Start Guide
 Get Claude Vision Auto running in 5 minutes.
 ## Prerequisites Check
 ```bash
 # Check Claude Code
 claude --version
 # Check Docker
 docker ps
 # Check Python
 python3 --version
 ```
 ## Installation
 ```bash
 cd /home/svrnty/claude-vision-auto
 # Install system dependencies
 sudo apt-get update && sudo apt-get install -y scrot
 # Install Python package
 make install
 ```
 ## Start Ollama (if not running)
 ```bash
 # Check if running
 docker ps | grep ollama
 # If not running, start it
 docker run -d \
    -p 11434:11434 \
    --name ollama \
    --restart unless-stopped \
    ollama/ollama:latest
 # Pull vision model
 docker exec ollama ollama pull minicpm-v:latest
 ```
 ## Test Installation
 ```bash
 # Verify command
 which claude-vision
 # Test connection
 claude-vision --help 2>&1 | head -5
 ```
 ## First Run
 ```bash
 # Start interactive session
 claude-vision
 # You should see:
 # [Claude Vision Auto] Testing Ollama connection...
 # [Claude Vision Auto] Connected to Ollama
 # [Claude Vision Auto] Using model: minicpm-v:latest
 ```
 ## Test Auto-Approval
 ```bash
 # Try a simple command
 claude-vision "create a test.md file in /tmp"
 # Watch for auto-approval when prompted:
 # [Vision] Analyzing prompt...
 # [Vision] Response: 1
 # [Vision] Response sent
 ```
 ## Troubleshooting
 ### Ollama Not Connected
 ```bash
 docker start ollama
 docker exec ollama ollama pull minicpm-v:latest
 ```
 ### Screenshot Fails
 ```bash
 sudo apt-get install scrot
 scrot /tmp/test.png  # Test it works
 ```
 ### Command Not Found
 ```bash
 export PATH="$HOME/.local/bin:$PATH"
 source ~/.bashrc
 ```
 ## Next Steps
 - Read [README.md](README.md) for full documentation
 - See [docs/USAGE.md](docs/USAGE.md) for usage examples
 - Check [docs/INSTALLATION.md](docs/INSTALLATION.md) for detailed setup
 ## Quick Configuration
 Add to `~/.bashrc`:
 ```bash
 # Claude Vision Auto
 export PATH="$HOME/.local/bin:$PATH"
 alias cv="claude-vision"
 alias cvd="DEBUG=true claude-vision"
 ```
 Reload:
 ```bash
 source ~/.bashrc
 cv  # Now you can use 'cv' instead of 'claude-vision'
 ```
 ## Support
 - Issues: https://git.openharbor.io/svrnty/claude-vision-auto/issues
 - Documentation: See README.md and docs/
 - Debug mode: `DEBUG=true claude-vision`
--- a/bin/claude-vision-config
+++ b/bin/claude-vision-config
@ -0,0 +1,38 @@
 #!/usr/bin/env python3
 """
 Generate user configuration file for Claude Vision Auto
 """
 import sys
 from pathlib import Path
 from claude_vision_auto.config import create_user_config, get_config_dir
 def main():
    """Create user configuration file"""
    config_path = create_user_config()
    if config_path:
        print(f"✅ Created user configuration file:")
        print(f"   {config_path}")
        print()
        print("Edit this file to customize:")
        print(f"   - Vision model (minicpm-v, llama3.2-vision, llava)")
        print(f"   - Vision prompt for better responses")
        print(f"   - Timing settings (idle threshold, response delay)")
        print(f"   - Approval keywords")
        print()
        print(f"Edit with: nano {config_path}")
    else:
        config_dir = get_config_dir()
        config_path = config_dir / "config.yaml"
        print(f"ℹ️  Configuration file already exists:")
        print(f"   {config_path}")
        print()
        print(f"Edit with: nano {config_path}")
    sys.exit(0)
 if __name__ == '__main__':
    main()
--- a/claude_vision_auto/config.py
+++ b/claude_vision_auto/config.py
@ -3,44 +3,169 @@ Configuration for Claude Vision Auto
 """
 import os
 import yaml
 from pathlib import Path
 from typing import Dict, Any
 # Ollama configuration
 OLLAMA_URL = os.getenv("OLLAMA_URL", "http://localhost:11434/api/generate")
 VISION_MODEL = os.getenv("VISION_MODEL", "minicpm-v:latest")
-# Timing configuration
+def get_config_dir() -> Path:
-IDLE_THRESHOLD = float(os.getenv("IDLE_THRESHOLD", "3.0"))  # seconds of no output before screenshot
+    """Get configuration directory"""
-RESPONSE_DELAY = float(os.getenv("RESPONSE_DELAY", "1.0"))  # seconds to wait before sending response
+    config_dir = Path.home() / ".config" / "claude-vision-auto"
    config_dir.mkdir(parents=True, exist_ok=True)
    return config_dir
 # Buffer configuration
 OUTPUT_BUFFER_SIZE = int(os.getenv("OUTPUT_BUFFER_SIZE", "4096"))  # bytes
 # Keywords that suggest we're waiting for approval
 APPROVAL_KEYWORDS = [
    "Yes",
    "No",
    "(y/n)",
    "[y/n]",
    "Approve",
    "Do you want to",
    "create",
    "edit",
    "delete"
 ]
 # Screenshot configuration
 SCREENSHOT_TIMEOUT = int(os.getenv("SCREENSHOT_TIMEOUT", "5"))  # seconds
 SCREENSHOT_TOOLS = ["scrot", "gnome-screenshot", "import", "maim"]
 # Vision analysis timeout
 VISION_TIMEOUT = int(os.getenv("VISION_TIMEOUT", "30"))  # seconds
 # Debug mode
 DEBUG = os.getenv("DEBUG", "false").lower() in ("true", "1", "yes")
 def get_cache_dir() -> Path:
    """Get cache directory for screenshots"""
    cache_dir = Path.home() / ".cache" / "claude-vision-auto"
    cache_dir.mkdir(parents=True, exist_ok=True)
    return cache_dir
 def load_config() -> Dict[str, Any]:
    """
    Load configuration from YAML files with priority:
    1. User config (~/.config/claude-vision-auto/config.yaml)
    2. Default config (package default_config.yaml)
    3. Environment variables (highest priority)
    """
    # Load default config
    default_config_path = Path(__file__).parent / "default_config.yaml"
    with open(default_config_path, 'r') as f:
        config = yaml.safe_load(f)
    # Load user config if exists
    user_config_path = get_config_dir() / "config.yaml"
    if user_config_path.exists():
        with open(user_config_path, 'r') as f:
            user_config = yaml.safe_load(f)
            # Deep merge user config
            if user_config:
                config = deep_merge(config, user_config)
    # Override with environment variables
    if os.getenv("OLLAMA_URL"):
        config['ollama']['url'] = os.getenv("OLLAMA_URL")
    if os.getenv("VISION_MODEL"):
        config['ollama']['model'] = os.getenv("VISION_MODEL")
    if os.getenv("IDLE_THRESHOLD"):
        config['timing']['idle_threshold'] = float(os.getenv("IDLE_THRESHOLD"))
    if os.getenv("RESPONSE_DELAY"):
        config['timing']['response_delay'] = float(os.getenv("RESPONSE_DELAY"))
    if os.getenv("DEBUG"):
        config['debug'] = os.getenv("DEBUG", "false").lower() in ("true", "1", "yes")
    return config
 def deep_merge(base: Dict, override: Dict) -> Dict:
    """Deep merge two dictionaries"""
    result = base.copy()
    for key, value in override.items():
        if key in result and isinstance(result[key], dict) and isinstance(value, dict):
            result[key] = deep_merge(result[key], value)
        else:
            result[key] = value
    return result
 def create_user_config():
    """Create user config file with defaults"""
    config_dir = get_config_dir()
    user_config_path = config_dir / "config.yaml"
    if not user_config_path.exists():
        # Copy default config to user config
        default_config_path = Path(__file__).parent / "default_config.yaml"
        with open(default_config_path, 'r') as f:
            default_content = f.read()
        with open(user_config_path, 'w') as f:
            f.write(default_content)
        return user_config_path
    return None
 def create_user_config_cli():
    """CLI command to create user configuration file"""
    import sys
    config_path = create_user_config()
    if config_path:
        print(f"✅ Created user configuration file:")
        print(f"   {config_path}")
        print()
        print("Edit this file to customize:")
        print(f"   - Vision model (minicpm-v, llama3.2-vision, llava)")
        print(f"   - Vision prompt for better responses")
        print(f"   - Timing settings (idle threshold, response delay)")
        print(f"   - Approval keywords")
        print()
        print(f"Edit with: nano {config_path}")
    else:
        config_dir = get_config_dir()
        config_path = config_dir / "config.yaml"
        print(f"ℹ️  Configuration file already exists:")
        print(f"   {config_path}")
        print()
        print(f"Edit with: nano {config_path}")
    sys.exit(0)
 # Load configuration
 _config = load_config()
 # Export commonly used values
 OLLAMA_URL = _config['ollama']['url']
 VISION_MODEL = _config['ollama']['model']
 VISION_TIMEOUT = _config['ollama']['timeout']
 IDLE_THRESHOLD = _config['timing']['idle_threshold']
 RESPONSE_DELAY = _config['timing']['response_delay']
 SCREENSHOT_TIMEOUT = _config['timing']['screenshot_timeout']
 VISION_PROMPT = _config['vision_prompt']
 RESPONSE_MAPPING = _config['response_mapping']
 APPROVAL_KEYWORDS = _config['approval_keywords']
 OUTPUT_BUFFER_SIZE = _config['buffer']['size']
 SCREENSHOT_TOOLS = _config['screenshot']['tools']
 SCREENSHOT_CACHE_CLEANUP = _config['screenshot']['cache_cleanup_seconds']
 DEBUG = _config['debug']
 def get_config() -> Dict[str, Any]:
    """Get full configuration dict"""
    return _config.copy()
 def reload_config():
    """Reload configuration from files"""
    global _config, OLLAMA_URL, VISION_MODEL, VISION_TIMEOUT
    global IDLE_THRESHOLD, RESPONSE_DELAY, SCREENSHOT_TIMEOUT
    global VISION_PROMPT, RESPONSE_MAPPING, APPROVAL_KEYWORDS
    global OUTPUT_BUFFER_SIZE, SCREENSHOT_TOOLS, SCREENSHOT_CACHE_CLEANUP, DEBUG
    _config = load_config()
    OLLAMA_URL = _config['ollama']['url']
    VISION_MODEL = _config['ollama']['model']
    VISION_TIMEOUT = _config['ollama']['timeout']
    IDLE_THRESHOLD = _config['timing']['idle_threshold']
    RESPONSE_DELAY = _config['timing']['response_delay']
    SCREENSHOT_TIMEOUT = _config['timing']['screenshot_timeout']
    VISION_PROMPT = _config['vision_prompt']
    RESPONSE_MAPPING = _config['response_mapping']
    APPROVAL_KEYWORDS = _config['approval_keywords']
    OUTPUT_BUFFER_SIZE = _config['buffer']['size']
    SCREENSHOT_TOOLS = _config['screenshot']['tools']
    SCREENSHOT_CACHE_CLEANUP = _config['screenshot']['cache_cleanup_seconds']
    DEBUG = _config['debug']
--- a/claude_vision_auto/default_config.yaml
+++ b/claude_vision_auto/default_config.yaml
@ -0,0 +1,67 @@
 # Claude Vision Auto Configuration
 # Ollama Settings
 ollama:
  url: "http://localhost:11434/api/generate"
  model: "minicpm-v:latest"  # Options: minicpm-v:latest, llama3.2-vision:latest, llava:latest
  timeout: 30
 # Timing Settings
 timing:
  idle_threshold: 3.0      # Seconds of no output before taking screenshot
  response_delay: 1.0      # Seconds to wait before sending response
  screenshot_timeout: 5    # Screenshot capture timeout
 # Vision Analysis Prompt
 vision_prompt: |
  You are analyzing a terminal screenshot showing a Claude Code approval prompt.
  Look for:
  - Numbered menu options like "1. Yes", "2. Yes, allow all", "3. No"
  - Questions asking for approval (create/edit/delete files)
  - Yes/No questions with (y/n) or [y/n] format
  RESPONSE RULES:
  - If you see numbered options with "Yes" as option 1: respond with ONLY "1"
  - If you see a yes/no question with (y/n) or [y/n]: respond with ONLY "y"
  - If you don't see any prompt requiring input: respond with ONLY "WAIT"
  - NEVER provide explanations, ONLY the single response character/number
  Your response (one character/number only):
 # Response Mapping
 # What the vision model should output for different scenarios
 response_mapping:
  approval_prompt: "1"      # Response for numbered "Yes" option
  yes_no_question: "y"      # Response for y/n questions
  no_action: "WAIT"         # When no input is needed
 # Approval Detection Keywords
 # Keywords that indicate we might be waiting for approval
 approval_keywords:
  - "Yes"
  - "No"
  - "(y/n)"
  - "[y/n]"
  - "Approve"
  - "Do you want to"
  - "create"
  - "edit"
  - "delete"
  - "Allow"
 # Buffer Settings
 buffer:
  size: 4096  # Output buffer size in bytes
 # Debug Mode
 debug: false  # Set to true for verbose logging
 # Screenshot Settings
 screenshot:
  tools:
    - "scrot"
    - "gnome-screenshot"
    - "import"
    - "maim"
  cache_cleanup_seconds: 3600  # Clean up screenshots older than 1 hour
--- a/claude_vision_auto/screenshot.py
+++ b/claude_vision_auto/screenshot.py
@ -100,15 +100,18 @@ def take_screenshot() -> Optional[str]:
        return None
-def cleanup_old_screenshots(max_age_seconds: int = 3600):
+def cleanup_old_screenshots(max_age_seconds: int = None):
    """
    Clean up old screenshots from cache directory
    Args:
-        max_age_seconds: Maximum age of screenshots to keep (default 1 hour)
+        max_age_seconds: Maximum age of screenshots to keep (default from config)
    """
    import time
    if max_age_seconds is None:
        max_age_seconds = config.SCREENSHOT_CACHE_CLEANUP
    cache_dir = config.get_cache_dir()
    current_time = time.time()
--- a/claude_vision_auto/vision_analyzer.py
+++ b/claude_vision_auto/vision_analyzer.py
@ -10,26 +10,6 @@ from typing import Optional
 from . import config
 VISION_PROMPT = """You are analyzing a terminal screenshot showing a Claude Code approval prompt.
 Look for:
 - Menu options like "1. Yes", "2. Yes, allow all", "3. No"
 - Questions asking for approval (create/edit/delete files)
 - Yes/No questions
 If you see an approval prompt with numbered options:
 - Respond ONLY with the number to select "Yes" (usually "1")
 - Output format: Just the number, nothing else
 If you see a yes/no question:
 - Respond with: y
 If you don't see any prompt requiring input:
 - Respond with: WAIT
 Your response (one word only):"""
 class VisionAnalyzer:
    """Analyzes screenshots using vision model"""
@ -62,7 +42,7 @@ class VisionAnalyzer:
            # Send to Ollama
            payload = {
                "model": self.model,
-                "prompt": VISION_PROMPT,
+                "prompt": config.VISION_PROMPT,
                "images": [image_data],
                "stream": False
            }
--- a/requirements.txt
+++ b/requirements.txt
@ -1 +1,2 @@
 requests>=2.31.0
 pyyaml>=6.0
--- a/setup.py
+++ b/setup.py
@ -38,10 +38,15 @@ setup(
    python_requires=">=3.8",
    install_requires=[
        "requests>=2.31.0",
        "pyyaml>=6.0",
    ],
    package_data={
        "claude_vision_auto": ["default_config.yaml"],
    },
    entry_points={
        "console_scripts": [
            "claude-vision=claude_vision_auto.main:main",
            "claude-vision-config=claude_vision_auto.config:create_user_config_cli",
        ],
    },
    include_package_data=True,
`@ -1 +1,2 @@`
	`requests>=2.31.0`	`requests>=2.31.0`
		`pyyaml>=6.0`