feat: Add YAML-based configuration system

- Add default_config.yaml with customizable settings
- Model selection (minicpm-v, llama3.2-vision, llava)
- Customizable vision prompt for better responses
- Timing parameters (idle threshold, response delay)
- Approval keywords configuration
- User config at ~/.config/claude-vision-auto/config.yaml
- New command: claude-vision-config to generate user config
- Environment variables still override config files
- Added PyYAML dependency

Configuration priority:
1. Environment variables (highest)
2. User config (~/.config/claude-vision-auto/config.yaml)
3. Default config (package default)

Generated with Claude Code

Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Jean-Philippe Brule <jp@svrnty.io>
This commit is contained in:
Svrnty 2025-10-29 10:19:35 -04:00
parent 41cecca0e2
commit 52b0813b64
9 changed files with 411 additions and 54 deletions

9
MANIFEST.in Normal file
View File

@ -0,0 +1,9 @@
include README.md
include LICENSE
include CHANGELOG.md
include QUICKSTART.md
include requirements.txt
include requirements-dev.txt
recursive-include claude_vision_auto *.yaml
recursive-include docs *.md
recursive-include examples *.sh

129
QUICKSTART.md Normal file
View File

@ -0,0 +1,129 @@
# Quick Start Guide
Get Claude Vision Auto running in 5 minutes.
## Prerequisites Check
```bash
# Check Claude Code
claude --version
# Check Docker
docker ps
# Check Python
python3 --version
```
## Installation
```bash
cd /home/svrnty/claude-vision-auto
# Install system dependencies
sudo apt-get update && sudo apt-get install -y scrot
# Install Python package
make install
```
## Start Ollama (if not running)
```bash
# Check if running
docker ps | grep ollama
# If not running, start it
docker run -d \
-p 11434:11434 \
--name ollama \
--restart unless-stopped \
ollama/ollama:latest
# Pull vision model
docker exec ollama ollama pull minicpm-v:latest
```
## Test Installation
```bash
# Verify command
which claude-vision
# Test connection
claude-vision --help 2>&1 | head -5
```
## First Run
```bash
# Start interactive session
claude-vision
# You should see:
# [Claude Vision Auto] Testing Ollama connection...
# [Claude Vision Auto] Connected to Ollama
# [Claude Vision Auto] Using model: minicpm-v:latest
```
## Test Auto-Approval
```bash
# Try a simple command
claude-vision "create a test.md file in /tmp"
# Watch for auto-approval when prompted:
# [Vision] Analyzing prompt...
# [Vision] Response: 1
# [Vision] Response sent
```
## Troubleshooting
### Ollama Not Connected
```bash
docker start ollama
docker exec ollama ollama pull minicpm-v:latest
```
### Screenshot Fails
```bash
sudo apt-get install scrot
scrot /tmp/test.png # Test it works
```
### Command Not Found
```bash
export PATH="$HOME/.local/bin:$PATH"
source ~/.bashrc
```
## Next Steps
- Read [README.md](README.md) for full documentation
- See [docs/USAGE.md](docs/USAGE.md) for usage examples
- Check [docs/INSTALLATION.md](docs/INSTALLATION.md) for detailed setup
## Quick Configuration
Add to `~/.bashrc`:
```bash
# Claude Vision Auto
export PATH="$HOME/.local/bin:$PATH"
alias cv="claude-vision"
alias cvd="DEBUG=true claude-vision"
```
Reload:
```bash
source ~/.bashrc
cv # Now you can use 'cv' instead of 'claude-vision'
```
## Support
- Issues: https://git.openharbor.io/svrnty/claude-vision-auto/issues
- Documentation: See README.md and docs/
- Debug mode: `DEBUG=true claude-vision`

38
bin/claude-vision-config Normal file
View File

@ -0,0 +1,38 @@
#!/usr/bin/env python3
"""
Generate user configuration file for Claude Vision Auto
"""
import sys
from pathlib import Path
from claude_vision_auto.config import create_user_config, get_config_dir
def main():
"""Create user configuration file"""
config_path = create_user_config()
if config_path:
print(f"✅ Created user configuration file:")
print(f" {config_path}")
print()
print("Edit this file to customize:")
print(f" - Vision model (minicpm-v, llama3.2-vision, llava)")
print(f" - Vision prompt for better responses")
print(f" - Timing settings (idle threshold, response delay)")
print(f" - Approval keywords")
print()
print(f"Edit with: nano {config_path}")
else:
config_dir = get_config_dir()
config_path = config_dir / "config.yaml"
print(f" Configuration file already exists:")
print(f" {config_path}")
print()
print(f"Edit with: nano {config_path}")
sys.exit(0)
if __name__ == '__main__':
main()

View File

@ -3,44 +3,169 @@ Configuration for Claude Vision Auto
""" """
import os import os
import yaml
from pathlib import Path from pathlib import Path
from typing import Dict, Any
# Ollama configuration
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://localhost:11434/api/generate")
VISION_MODEL = os.getenv("VISION_MODEL", "minicpm-v:latest")
# Timing configuration def get_config_dir() -> Path:
IDLE_THRESHOLD = float(os.getenv("IDLE_THRESHOLD", "3.0")) # seconds of no output before screenshot """Get configuration directory"""
RESPONSE_DELAY = float(os.getenv("RESPONSE_DELAY", "1.0")) # seconds to wait before sending response config_dir = Path.home() / ".config" / "claude-vision-auto"
config_dir.mkdir(parents=True, exist_ok=True)
return config_dir
# Buffer configuration
OUTPUT_BUFFER_SIZE = int(os.getenv("OUTPUT_BUFFER_SIZE", "4096")) # bytes
# Keywords that suggest we're waiting for approval
APPROVAL_KEYWORDS = [
"Yes",
"No",
"(y/n)",
"[y/n]",
"Approve",
"Do you want to",
"create",
"edit",
"delete"
]
# Screenshot configuration
SCREENSHOT_TIMEOUT = int(os.getenv("SCREENSHOT_TIMEOUT", "5")) # seconds
SCREENSHOT_TOOLS = ["scrot", "gnome-screenshot", "import", "maim"]
# Vision analysis timeout
VISION_TIMEOUT = int(os.getenv("VISION_TIMEOUT", "30")) # seconds
# Debug mode
DEBUG = os.getenv("DEBUG", "false").lower() in ("true", "1", "yes")
def get_cache_dir() -> Path: def get_cache_dir() -> Path:
"""Get cache directory for screenshots""" """Get cache directory for screenshots"""
cache_dir = Path.home() / ".cache" / "claude-vision-auto" cache_dir = Path.home() / ".cache" / "claude-vision-auto"
cache_dir.mkdir(parents=True, exist_ok=True) cache_dir.mkdir(parents=True, exist_ok=True)
return cache_dir return cache_dir
def load_config() -> Dict[str, Any]:
"""
Load configuration from YAML files with priority:
1. User config (~/.config/claude-vision-auto/config.yaml)
2. Default config (package default_config.yaml)
3. Environment variables (highest priority)
"""
# Load default config
default_config_path = Path(__file__).parent / "default_config.yaml"
with open(default_config_path, 'r') as f:
config = yaml.safe_load(f)
# Load user config if exists
user_config_path = get_config_dir() / "config.yaml"
if user_config_path.exists():
with open(user_config_path, 'r') as f:
user_config = yaml.safe_load(f)
# Deep merge user config
if user_config:
config = deep_merge(config, user_config)
# Override with environment variables
if os.getenv("OLLAMA_URL"):
config['ollama']['url'] = os.getenv("OLLAMA_URL")
if os.getenv("VISION_MODEL"):
config['ollama']['model'] = os.getenv("VISION_MODEL")
if os.getenv("IDLE_THRESHOLD"):
config['timing']['idle_threshold'] = float(os.getenv("IDLE_THRESHOLD"))
if os.getenv("RESPONSE_DELAY"):
config['timing']['response_delay'] = float(os.getenv("RESPONSE_DELAY"))
if os.getenv("DEBUG"):
config['debug'] = os.getenv("DEBUG", "false").lower() in ("true", "1", "yes")
return config
def deep_merge(base: Dict, override: Dict) -> Dict:
"""Deep merge two dictionaries"""
result = base.copy()
for key, value in override.items():
if key in result and isinstance(result[key], dict) and isinstance(value, dict):
result[key] = deep_merge(result[key], value)
else:
result[key] = value
return result
def create_user_config():
"""Create user config file with defaults"""
config_dir = get_config_dir()
user_config_path = config_dir / "config.yaml"
if not user_config_path.exists():
# Copy default config to user config
default_config_path = Path(__file__).parent / "default_config.yaml"
with open(default_config_path, 'r') as f:
default_content = f.read()
with open(user_config_path, 'w') as f:
f.write(default_content)
return user_config_path
return None
def create_user_config_cli():
"""CLI command to create user configuration file"""
import sys
config_path = create_user_config()
if config_path:
print(f"✅ Created user configuration file:")
print(f" {config_path}")
print()
print("Edit this file to customize:")
print(f" - Vision model (minicpm-v, llama3.2-vision, llava)")
print(f" - Vision prompt for better responses")
print(f" - Timing settings (idle threshold, response delay)")
print(f" - Approval keywords")
print()
print(f"Edit with: nano {config_path}")
else:
config_dir = get_config_dir()
config_path = config_dir / "config.yaml"
print(f" Configuration file already exists:")
print(f" {config_path}")
print()
print(f"Edit with: nano {config_path}")
sys.exit(0)
# Load configuration
_config = load_config()
# Export commonly used values
OLLAMA_URL = _config['ollama']['url']
VISION_MODEL = _config['ollama']['model']
VISION_TIMEOUT = _config['ollama']['timeout']
IDLE_THRESHOLD = _config['timing']['idle_threshold']
RESPONSE_DELAY = _config['timing']['response_delay']
SCREENSHOT_TIMEOUT = _config['timing']['screenshot_timeout']
VISION_PROMPT = _config['vision_prompt']
RESPONSE_MAPPING = _config['response_mapping']
APPROVAL_KEYWORDS = _config['approval_keywords']
OUTPUT_BUFFER_SIZE = _config['buffer']['size']
SCREENSHOT_TOOLS = _config['screenshot']['tools']
SCREENSHOT_CACHE_CLEANUP = _config['screenshot']['cache_cleanup_seconds']
DEBUG = _config['debug']
def get_config() -> Dict[str, Any]:
"""Get full configuration dict"""
return _config.copy()
def reload_config():
"""Reload configuration from files"""
global _config, OLLAMA_URL, VISION_MODEL, VISION_TIMEOUT
global IDLE_THRESHOLD, RESPONSE_DELAY, SCREENSHOT_TIMEOUT
global VISION_PROMPT, RESPONSE_MAPPING, APPROVAL_KEYWORDS
global OUTPUT_BUFFER_SIZE, SCREENSHOT_TOOLS, SCREENSHOT_CACHE_CLEANUP, DEBUG
_config = load_config()
OLLAMA_URL = _config['ollama']['url']
VISION_MODEL = _config['ollama']['model']
VISION_TIMEOUT = _config['ollama']['timeout']
IDLE_THRESHOLD = _config['timing']['idle_threshold']
RESPONSE_DELAY = _config['timing']['response_delay']
SCREENSHOT_TIMEOUT = _config['timing']['screenshot_timeout']
VISION_PROMPT = _config['vision_prompt']
RESPONSE_MAPPING = _config['response_mapping']
APPROVAL_KEYWORDS = _config['approval_keywords']
OUTPUT_BUFFER_SIZE = _config['buffer']['size']
SCREENSHOT_TOOLS = _config['screenshot']['tools']
SCREENSHOT_CACHE_CLEANUP = _config['screenshot']['cache_cleanup_seconds']
DEBUG = _config['debug']

View File

@ -0,0 +1,67 @@
# Claude Vision Auto Configuration
# Ollama Settings
ollama:
url: "http://localhost:11434/api/generate"
model: "minicpm-v:latest" # Options: minicpm-v:latest, llama3.2-vision:latest, llava:latest
timeout: 30
# Timing Settings
timing:
idle_threshold: 3.0 # Seconds of no output before taking screenshot
response_delay: 1.0 # Seconds to wait before sending response
screenshot_timeout: 5 # Screenshot capture timeout
# Vision Analysis Prompt
vision_prompt: |
You are analyzing a terminal screenshot showing a Claude Code approval prompt.
Look for:
- Numbered menu options like "1. Yes", "2. Yes, allow all", "3. No"
- Questions asking for approval (create/edit/delete files)
- Yes/No questions with (y/n) or [y/n] format
RESPONSE RULES:
- If you see numbered options with "Yes" as option 1: respond with ONLY "1"
- If you see a yes/no question with (y/n) or [y/n]: respond with ONLY "y"
- If you don't see any prompt requiring input: respond with ONLY "WAIT"
- NEVER provide explanations, ONLY the single response character/number
Your response (one character/number only):
# Response Mapping
# What the vision model should output for different scenarios
response_mapping:
approval_prompt: "1" # Response for numbered "Yes" option
yes_no_question: "y" # Response for y/n questions
no_action: "WAIT" # When no input is needed
# Approval Detection Keywords
# Keywords that indicate we might be waiting for approval
approval_keywords:
- "Yes"
- "No"
- "(y/n)"
- "[y/n]"
- "Approve"
- "Do you want to"
- "create"
- "edit"
- "delete"
- "Allow"
# Buffer Settings
buffer:
size: 4096 # Output buffer size in bytes
# Debug Mode
debug: false # Set to true for verbose logging
# Screenshot Settings
screenshot:
tools:
- "scrot"
- "gnome-screenshot"
- "import"
- "maim"
cache_cleanup_seconds: 3600 # Clean up screenshots older than 1 hour

View File

@ -100,15 +100,18 @@ def take_screenshot() -> Optional[str]:
return None return None
def cleanup_old_screenshots(max_age_seconds: int = 3600): def cleanup_old_screenshots(max_age_seconds: int = None):
""" """
Clean up old screenshots from cache directory Clean up old screenshots from cache directory
Args: Args:
max_age_seconds: Maximum age of screenshots to keep (default 1 hour) max_age_seconds: Maximum age of screenshots to keep (default from config)
""" """
import time import time
if max_age_seconds is None:
max_age_seconds = config.SCREENSHOT_CACHE_CLEANUP
cache_dir = config.get_cache_dir() cache_dir = config.get_cache_dir()
current_time = time.time() current_time = time.time()

View File

@ -10,26 +10,6 @@ from typing import Optional
from . import config from . import config
VISION_PROMPT = """You are analyzing a terminal screenshot showing a Claude Code approval prompt.
Look for:
- Menu options like "1. Yes", "2. Yes, allow all", "3. No"
- Questions asking for approval (create/edit/delete files)
- Yes/No questions
If you see an approval prompt with numbered options:
- Respond ONLY with the number to select "Yes" (usually "1")
- Output format: Just the number, nothing else
If you see a yes/no question:
- Respond with: y
If you don't see any prompt requiring input:
- Respond with: WAIT
Your response (one word only):"""
class VisionAnalyzer: class VisionAnalyzer:
"""Analyzes screenshots using vision model""" """Analyzes screenshots using vision model"""
@ -62,7 +42,7 @@ class VisionAnalyzer:
# Send to Ollama # Send to Ollama
payload = { payload = {
"model": self.model, "model": self.model,
"prompt": VISION_PROMPT, "prompt": config.VISION_PROMPT,
"images": [image_data], "images": [image_data],
"stream": False "stream": False
} }

View File

@ -1 +1,2 @@
requests>=2.31.0 requests>=2.31.0
pyyaml>=6.0

View File

@ -38,10 +38,15 @@ setup(
python_requires=">=3.8", python_requires=">=3.8",
install_requires=[ install_requires=[
"requests>=2.31.0", "requests>=2.31.0",
"pyyaml>=6.0",
], ],
package_data={
"claude_vision_auto": ["default_config.yaml"],
},
entry_points={ entry_points={
"console_scripts": [ "console_scripts": [
"claude-vision=claude_vision_auto.main:main", "claude-vision=claude_vision_auto.main:main",
"claude-vision-config=claude_vision_auto.config:create_user_config_cli",
], ],
}, },
include_package_data=True, include_package_data=True,