feat: Add YAML-based configuration system

- Add default_config.yaml with customizable settings - Model selection (minicpm-v, llama3.2-vision, llava) - Customizable vision prompt for better responses - Timing parameters (idle threshold, response delay) - Approval keywords configuration - User config at ~/.config/claude-vision-auto/config.yaml - New command: claude-vision-config to generate user config - Environment variables still override config files - Added PyYAML dependency Configuration priority: 1. Environment variables (highest) 2. User config (~/.config/claude-vision-auto/config.yaml) 3. Default config (package default) Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: Jean-Philippe Brule <jp@svrnty.io>
2025-10-29 10:19:35 -04:00
parent 41cecca0e2
commit 52b0813b64
9 changed files with 411 additions and 54 deletions
@@ -3,44 +3,169 @@ Configuration for Claude Vision Auto
 """

 import os
+import yaml
 from pathlib import Path
+from typing import Dict, Any

-# Ollama configuration
-OLLAMA_URL = os.getenv("OLLAMA_URL", "http://localhost:11434/api/generate")
-VISION_MODEL = os.getenv("VISION_MODEL", "minicpm-v:latest")

-# Timing configuration
-IDLE_THRESHOLD = float(os.getenv("IDLE_THRESHOLD", "3.0"))  # seconds of no output before screenshot
-RESPONSE_DELAY = float(os.getenv("RESPONSE_DELAY", "1.0"))  # seconds to wait before sending response
+def get_config_dir() -> Path:
+    """Get configuration directory"""
+    config_dir = Path.home() / ".config" / "claude-vision-auto"
+    config_dir.mkdir(parents=True, exist_ok=True)
+    return config_dir

-# Buffer configuration
-OUTPUT_BUFFER_SIZE = int(os.getenv("OUTPUT_BUFFER_SIZE", "4096"))  # bytes
-
-# Keywords that suggest we're waiting for approval
-APPROVAL_KEYWORDS = [
-    "Yes",
-    "No",
-    "(y/n)",
-    "[y/n]",
-    "Approve",
-    "Do you want to",
-    "create",
-    "edit",
-    "delete"
-]
-
-# Screenshot configuration
-SCREENSHOT_TIMEOUT = int(os.getenv("SCREENSHOT_TIMEOUT", "5"))  # seconds
-SCREENSHOT_TOOLS = ["scrot", "gnome-screenshot", "import", "maim"]
-
-# Vision analysis timeout
-VISION_TIMEOUT = int(os.getenv("VISION_TIMEOUT", "30"))  # seconds
-
-# Debug mode
-DEBUG = os.getenv("DEBUG", "false").lower() in ("true", "1", "yes")

 def get_cache_dir() -> Path:
    """Get cache directory for screenshots"""
    cache_dir = Path.home() / ".cache" / "claude-vision-auto"
    cache_dir.mkdir(parents=True, exist_ok=True)
    return cache_dir
+
+
+def load_config() -> Dict[str, Any]:
+    """
+    Load configuration from YAML files with priority:
+    1. User config (~/.config/claude-vision-auto/config.yaml)
+    2. Default config (package default_config.yaml)
+    3. Environment variables (highest priority)
+    """
+    # Load default config
+    default_config_path = Path(__file__).parent / "default_config.yaml"
+    with open(default_config_path, 'r') as f:
+        config = yaml.safe_load(f)
+
+    # Load user config if exists
+    user_config_path = get_config_dir() / "config.yaml"
+    if user_config_path.exists():
+        with open(user_config_path, 'r') as f:
+            user_config = yaml.safe_load(f)
+            # Deep merge user config
+            if user_config:
+                config = deep_merge(config, user_config)
+
+    # Override with environment variables
+    if os.getenv("OLLAMA_URL"):
+        config['ollama']['url'] = os.getenv("OLLAMA_URL")
+    if os.getenv("VISION_MODEL"):
+        config['ollama']['model'] = os.getenv("VISION_MODEL")
+    if os.getenv("IDLE_THRESHOLD"):
+        config['timing']['idle_threshold'] = float(os.getenv("IDLE_THRESHOLD"))
+    if os.getenv("RESPONSE_DELAY"):
+        config['timing']['response_delay'] = float(os.getenv("RESPONSE_DELAY"))
+    if os.getenv("DEBUG"):
+        config['debug'] = os.getenv("DEBUG", "false").lower() in ("true", "1", "yes")
+
+    return config
+
+
+def deep_merge(base: Dict, override: Dict) -> Dict:
+    """Deep merge two dictionaries"""
+    result = base.copy()
+    for key, value in override.items():
+        if key in result and isinstance(result[key], dict) and isinstance(value, dict):
+            result[key] = deep_merge(result[key], value)
+        else:
+            result[key] = value
+    return result
+
+
+def create_user_config():
+    """Create user config file with defaults"""
+    config_dir = get_config_dir()
+    user_config_path = config_dir / "config.yaml"
+
+    if not user_config_path.exists():
+        # Copy default config to user config
+        default_config_path = Path(__file__).parent / "default_config.yaml"
+        with open(default_config_path, 'r') as f:
+            default_content = f.read()
+
+        with open(user_config_path, 'w') as f:
+            f.write(default_content)
+
+        return user_config_path
+    return None
+
+
+def create_user_config_cli():
+    """CLI command to create user configuration file"""
+    import sys
+
+    config_path = create_user_config()
+
+    if config_path:
+        print(f"✅ Created user configuration file:")
+        print(f"   {config_path}")
+        print()
+        print("Edit this file to customize:")
+        print(f"   - Vision model (minicpm-v, llama3.2-vision, llava)")
+        print(f"   - Vision prompt for better responses")
+        print(f"   - Timing settings (idle threshold, response delay)")
+        print(f"   - Approval keywords")
+        print()
+        print(f"Edit with: nano {config_path}")
+    else:
+        config_dir = get_config_dir()
+        config_path = config_dir / "config.yaml"
+        print(f"ℹ️  Configuration file already exists:")
+        print(f"   {config_path}")
+        print()
+        print(f"Edit with: nano {config_path}")
+
+    sys.exit(0)
+
+
+# Load configuration
+_config = load_config()
+
+# Export commonly used values
+OLLAMA_URL = _config['ollama']['url']
+VISION_MODEL = _config['ollama']['model']
+VISION_TIMEOUT = _config['ollama']['timeout']
+
+IDLE_THRESHOLD = _config['timing']['idle_threshold']
+RESPONSE_DELAY = _config['timing']['response_delay']
+SCREENSHOT_TIMEOUT = _config['timing']['screenshot_timeout']
+
+VISION_PROMPT = _config['vision_prompt']
+RESPONSE_MAPPING = _config['response_mapping']
+APPROVAL_KEYWORDS = _config['approval_keywords']
+
+OUTPUT_BUFFER_SIZE = _config['buffer']['size']
+SCREENSHOT_TOOLS = _config['screenshot']['tools']
+SCREENSHOT_CACHE_CLEANUP = _config['screenshot']['cache_cleanup_seconds']
+
+DEBUG = _config['debug']
+
+
+def get_config() -> Dict[str, Any]:
+    """Get full configuration dict"""
+    return _config.copy()
+
+
+def reload_config():
+    """Reload configuration from files"""
+    global _config, OLLAMA_URL, VISION_MODEL, VISION_TIMEOUT
+    global IDLE_THRESHOLD, RESPONSE_DELAY, SCREENSHOT_TIMEOUT
+    global VISION_PROMPT, RESPONSE_MAPPING, APPROVAL_KEYWORDS
+    global OUTPUT_BUFFER_SIZE, SCREENSHOT_TOOLS, SCREENSHOT_CACHE_CLEANUP, DEBUG
+
+    _config = load_config()
+
+    OLLAMA_URL = _config['ollama']['url']
+    VISION_MODEL = _config['ollama']['model']
+    VISION_TIMEOUT = _config['ollama']['timeout']
+
+    IDLE_THRESHOLD = _config['timing']['idle_threshold']
+    RESPONSE_DELAY = _config['timing']['response_delay']
+    SCREENSHOT_TIMEOUT = _config['timing']['screenshot_timeout']
+
+    VISION_PROMPT = _config['vision_prompt']
+    RESPONSE_MAPPING = _config['response_mapping']
+    APPROVAL_KEYWORDS = _config['approval_keywords']
+
+    OUTPUT_BUFFER_SIZE = _config['buffer']['size']
+    SCREENSHOT_TOOLS = _config['screenshot']['tools']
+    SCREENSHOT_CACHE_CLEANUP = _config['screenshot']['cache_cleanup_seconds']
+
+    DEBUG = _config['debug']
@@ -0,0 +1,67 @@
+# Claude Vision Auto Configuration
+
+# Ollama Settings
+ollama:
+  url: "http://localhost:11434/api/generate"
+  model: "minicpm-v:latest"  # Options: minicpm-v:latest, llama3.2-vision:latest, llava:latest
+  timeout: 30
+
+# Timing Settings
+timing:
+  idle_threshold: 3.0      # Seconds of no output before taking screenshot
+  response_delay: 1.0      # Seconds to wait before sending response
+  screenshot_timeout: 5    # Screenshot capture timeout
+
+# Vision Analysis Prompt
+vision_prompt: |
+  You are analyzing a terminal screenshot showing a Claude Code approval prompt.
+
+  Look for:
+  - Numbered menu options like "1. Yes", "2. Yes, allow all", "3. No"
+  - Questions asking for approval (create/edit/delete files)
+  - Yes/No questions with (y/n) or [y/n] format
+
+  RESPONSE RULES:
+  - If you see numbered options with "Yes" as option 1: respond with ONLY "1"
+  - If you see a yes/no question with (y/n) or [y/n]: respond with ONLY "y"
+  - If you don't see any prompt requiring input: respond with ONLY "WAIT"
+  - NEVER provide explanations, ONLY the single response character/number
+
+  Your response (one character/number only):
+
+# Response Mapping
+# What the vision model should output for different scenarios
+response_mapping:
+  approval_prompt: "1"      # Response for numbered "Yes" option
+  yes_no_question: "y"      # Response for y/n questions
+  no_action: "WAIT"         # When no input is needed
+
+# Approval Detection Keywords
+# Keywords that indicate we might be waiting for approval
+approval_keywords:
+  - "Yes"
+  - "No"
+  - "(y/n)"
+  - "[y/n]"
+  - "Approve"
+  - "Do you want to"
+  - "create"
+  - "edit"
+  - "delete"
+  - "Allow"
+
+# Buffer Settings
+buffer:
+  size: 4096  # Output buffer size in bytes
+
+# Debug Mode
+debug: false  # Set to true for verbose logging
+
+# Screenshot Settings
+screenshot:
+  tools:
+    - "scrot"
+    - "gnome-screenshot"
+    - "import"
+    - "maim"
+  cache_cleanup_seconds: 3600  # Clean up screenshots older than 1 hour
@@ -100,15 +100,18 @@ def take_screenshot() -> Optional[str]:
        return None


-def cleanup_old_screenshots(max_age_seconds: int = 3600):
+def cleanup_old_screenshots(max_age_seconds: int = None):
    """
    Clean up old screenshots from cache directory

    Args:
-        max_age_seconds: Maximum age of screenshots to keep (default 1 hour)
+        max_age_seconds: Maximum age of screenshots to keep (default from config)
    """
    import time

+    if max_age_seconds is None:
+        max_age_seconds = config.SCREENSHOT_CACHE_CLEANUP
+
    cache_dir = config.get_cache_dir()
    current_time = time.time()

@@ -10,26 +10,6 @@ from typing import Optional
 from . import config


-VISION_PROMPT = """You are analyzing a terminal screenshot showing a Claude Code approval prompt.
-
-Look for:
- Menu options like "1. Yes", "2. Yes, allow all", "3. No"
- Questions asking for approval (create/edit/delete files)
- Yes/No questions
-
-If you see an approval prompt with numbered options:
- Respond ONLY with the number to select "Yes" (usually "1")
- Output format: Just the number, nothing else
-
-If you see a yes/no question:
- Respond with: y
-
-If you don't see any prompt requiring input:
- Respond with: WAIT
-
-Your response (one word only):"""
-
-
 class VisionAnalyzer:
    """Analyzes screenshots using vision model"""

@@ -62,7 +42,7 @@ class VisionAnalyzer:
            # Send to Ollama
            payload = {
                "model": self.model,
-                "prompt": VISION_PROMPT,
+                "prompt": config.VISION_PROMPT,
                "images": [image_data],
                "stream": False
            }