using Codex.Dal.Entities; using Codex.Dal.Enums; using System.Net.Http.Json; using System.Text; using System.Text.Json.Serialization; namespace Codex.Dal.Services; /// /// Implementation of Ollama service for LLM interactions /// public class OllamaService : IOllamaService { private readonly IHttpClientFactory _httpClientFactory; public OllamaService(IHttpClientFactory httpClientFactory) { _httpClientFactory = httpClientFactory; } public async Task GenerateAsync( string endpoint, string model, string systemPrompt, List contextMessages, string userMessage, double temperature, int maxTokens, CancellationToken cancellationToken = default) { var httpClient = _httpClientFactory.CreateClient(); httpClient.Timeout = TimeSpan.FromMinutes(5); // Allow for longer generation times // Build prompt with system instruction and conversation context var promptBuilder = new StringBuilder(); // Add system prompt if (!string.IsNullOrWhiteSpace(systemPrompt)) { promptBuilder.AppendLine($"System: {systemPrompt}"); promptBuilder.AppendLine(); } // Add conversation context foreach (var msg in contextMessages) { var role = msg.Role switch { MessageRole.User => "User", MessageRole.Assistant => "Assistant", MessageRole.System => "System", MessageRole.Tool => "Tool", _ => "Unknown" }; promptBuilder.AppendLine($"{role}: {msg.Content}"); } // Add current user message promptBuilder.AppendLine($"User: {userMessage}"); promptBuilder.Append("Assistant:"); // Build request payload var payload = new OllamaGenerateRequest { Model = model, Prompt = promptBuilder.ToString(), Temperature = temperature, Options = new OllamaOptions { NumPredict = maxTokens, Temperature = temperature }, Stream = false }; try { var response = await httpClient.PostAsJsonAsync( $"{endpoint.TrimEnd('/')}/api/generate", payload, cancellationToken ); response.EnsureSuccessStatusCode(); var result = await response.Content.ReadFromJsonAsync(cancellationToken); if (result == null) { throw new InvalidOperationException("Received null response from Ollama API"); } return new OllamaResponse { Content = result.Response?.Trim() ?? string.Empty, InputTokens = result.PromptEvalCount, OutputTokens = result.EvalCount }; } catch (HttpRequestException ex) { throw new InvalidOperationException( $"Failed to connect to Ollama at {endpoint}. Ensure Ollama is running and the endpoint is correct.", ex ); } catch (TaskCanceledException ex) { throw new InvalidOperationException( $"Request to Ollama timed out. The model may be taking too long to respond.", ex ); } } /// /// Request payload for Ollama /api/generate endpoint /// private record OllamaGenerateRequest { [JsonPropertyName("model")] public string Model { get; init; } = string.Empty; [JsonPropertyName("prompt")] public string Prompt { get; init; } = string.Empty; [JsonPropertyName("temperature")] public double Temperature { get; init; } [JsonPropertyName("options")] public OllamaOptions? Options { get; init; } [JsonPropertyName("stream")] public bool Stream { get; init; } } /// /// Options for Ollama generation /// private record OllamaOptions { [JsonPropertyName("num_predict")] public int NumPredict { get; init; } [JsonPropertyName("temperature")] public double Temperature { get; init; } } /// /// Response from Ollama /api/generate endpoint /// private record OllamaGenerateResponse { [JsonPropertyName("response")] public string? Response { get; init; } [JsonPropertyName("model")] public string? Model { get; init; } [JsonPropertyName("created_at")] public string? CreatedAt { get; init; } [JsonPropertyName("done")] public bool Done { get; init; } [JsonPropertyName("total_duration")] public long? TotalDuration { get; init; } [JsonPropertyName("load_duration")] public long? LoadDuration { get; init; } [JsonPropertyName("prompt_eval_count")] public int? PromptEvalCount { get; init; } [JsonPropertyName("prompt_eval_duration")] public long? PromptEvalDuration { get; init; } [JsonPropertyName("eval_count")] public int? EvalCount { get; init; } [JsonPropertyName("eval_duration")] public long? EvalDuration { get; init; } } }