using System.Collections.Concurrent; using OpenHarbor.MCP.Gateway.Core.Models; namespace OpenHarbor.MCP.Gateway.Infrastructure.Health; /// /// Passive health tracker that monitors server health based on actual request patterns. /// Tracks success/failure rates and response times without active probing. /// public class PassiveHealthTracker { private readonly ConcurrentDictionary _healthData = new(); /// /// Number of consecutive failures before marking server as unhealthy. Default is 5. /// public int UnhealthyThreshold { get; set; } = 5; /// /// Number of consecutive successes before marking server as healthy again. Default is 3. /// public int HealthyThreshold { get; set; } = 3; /// /// Response time threshold for marking requests as slow. Default is 5 seconds. /// public TimeSpan SlowResponseThreshold { get; set; } = TimeSpan.FromSeconds(5); /// /// Records a successful request to a server. /// public void RecordSuccess(string serverId, TimeSpan responseTime) { var data = _healthData.GetOrAdd(serverId, _ => new ServerHealthData { ServerId = serverId }); lock (data) { data.ConsecutiveSuccesses++; data.ConsecutiveFailures = 0; data.LastResponseTime = responseTime; data.LastCheck = DateTime.UtcNow; data.LastErrorMessage = null; // Add response time to rolling average data.ResponseTimes.Enqueue(responseTime); if (data.ResponseTimes.Count > 10) // Keep last 10 response times { data.ResponseTimes.Dequeue(); } // Update health status based on thresholds if (data.ConsecutiveSuccesses >= HealthyThreshold) { data.IsHealthy = true; } } } /// /// Records a failed request to a server. /// public void RecordFailure(string serverId, string errorMessage) { var data = _healthData.GetOrAdd(serverId, _ => new ServerHealthData { ServerId = serverId }); lock (data) { data.ConsecutiveFailures++; data.ConsecutiveSuccesses = 0; data.LastCheck = DateTime.UtcNow; data.LastErrorMessage = errorMessage; // Update health status based on thresholds if (data.ConsecutiveFailures >= UnhealthyThreshold) { data.IsHealthy = false; } } } /// /// Gets the current health status for a specific server. /// public ServerHealthStatus? GetServerHealth(string serverId) { if (!_healthData.TryGetValue(serverId, out var data)) { return null; } lock (data) { return new ServerHealthStatus { ServerId = data.ServerId, ServerName = serverId, // Default to ID if name not set IsHealthy = data.IsHealthy, LastCheck = data.LastCheck, ResponseTime = data.ResponseTimes.Any() ? TimeSpan.FromMilliseconds(data.ResponseTimes.Average(t => t.TotalMilliseconds)) : data.LastResponseTime, ErrorMessage = data.LastErrorMessage }; } } /// /// Gets health status for all tracked servers. /// public IEnumerable GetAllServerHealth() { return _healthData.Keys.Select(serverId => GetServerHealth(serverId)).Where(h => h != null).Cast(); } /// /// Resets all tracked health data. /// public void Reset() { _healthData.Clear(); } private class ServerHealthData { public string ServerId { get; set; } = string.Empty; public bool IsHealthy { get; set; } = true; // Start as healthy public int ConsecutiveSuccesses { get; set; } public int ConsecutiveFailures { get; set; } public DateTime LastCheck { get; set; } public TimeSpan? LastResponseTime { get; set; } public string? LastErrorMessage { get; set; } public Queue ResponseTimes { get; set; } = new Queue(); } }