""" Vision analysis using MiniCPM-V via Ollama """ import base64 import requests from pathlib import Path from typing import Optional from . import config class VisionAnalyzer: """Analyzes screenshots using vision model""" def __init__(self, ollama_url: str = None, model: str = None): """ Initialize vision analyzer Args: ollama_url: Ollama API URL (default from config) model: Vision model name (default from config) """ self.ollama_url = ollama_url or config.OLLAMA_URL self.model = model or config.VISION_MODEL def analyze_screenshot(self, image_path: str) -> Optional[str]: """ Analyze screenshot and determine what response to give Args: image_path: Path to screenshot image Returns: Response to send ("1", "y", "WAIT", etc.) or None on error """ try: # Read and encode image with open(image_path, 'rb') as f: image_data = base64.b64encode(f.read()).decode('utf-8') # Send to Ollama payload = { "model": self.model, "prompt": config.VISION_PROMPT, "images": [image_data], "stream": False } if config.DEBUG: print(f"[DEBUG] Sending to Ollama: {self.ollama_url}") print(f"[DEBUG] Model: {self.model}") response = requests.post( self.ollama_url, json=payload, timeout=config.VISION_TIMEOUT ) response.raise_for_status() result = response.json() answer = result.get('response', '').strip() if config.DEBUG: print(f"[DEBUG] Vision model response: {answer}") return answer except requests.Timeout: if config.DEBUG: print("[DEBUG] Vision analysis timeout") return None except requests.RequestException as e: if config.DEBUG: print(f"[DEBUG] Vision API error: {e}") return None except Exception as e: if config.DEBUG: print(f"[DEBUG] Unexpected error in vision analysis: {e}") return None def test_connection(self) -> bool: """ Test if Ollama is accessible and model is available Returns: True if connection successful, False otherwise """ try: # Try to list tags tags_url = self.ollama_url.replace('/api/generate', '/api/tags') response = requests.get(tags_url, timeout=5) response.raise_for_status() data = response.json() models = [m['name'] for m in data.get('models', [])] if config.DEBUG: print(f"[DEBUG] Available models: {models}") # Check if our model is available model_available = any(self.model in m for m in models) if not model_available: print(f"Warning: Model '{self.model}' not found in Ollama") print(f"Available models: {', '.join(models)}") return model_available except Exception as e: if config.DEBUG: print(f"[DEBUG] Connection test failed: {e}") return False