using System.Collections.Concurrent;
using OpenHarbor.MCP.Gateway.Core.Models;
namespace OpenHarbor.MCP.Gateway.Infrastructure.Health;
///
/// Passive health tracker that monitors server health based on actual request patterns.
/// Tracks success/failure rates and response times without active probing.
///
public class PassiveHealthTracker
{
private readonly ConcurrentDictionary _healthData = new();
///
/// Number of consecutive failures before marking server as unhealthy. Default is 5.
///
public int UnhealthyThreshold { get; set; } = 5;
///
/// Number of consecutive successes before marking server as healthy again. Default is 3.
///
public int HealthyThreshold { get; set; } = 3;
///
/// Response time threshold for marking requests as slow. Default is 5 seconds.
///
public TimeSpan SlowResponseThreshold { get; set; } = TimeSpan.FromSeconds(5);
///
/// Records a successful request to a server.
///
public void RecordSuccess(string serverId, TimeSpan responseTime)
{
var data = _healthData.GetOrAdd(serverId, _ => new ServerHealthData { ServerId = serverId });
lock (data)
{
data.ConsecutiveSuccesses++;
data.ConsecutiveFailures = 0;
data.LastResponseTime = responseTime;
data.LastCheck = DateTime.UtcNow;
data.LastErrorMessage = null;
// Add response time to rolling average
data.ResponseTimes.Enqueue(responseTime);
if (data.ResponseTimes.Count > 10) // Keep last 10 response times
{
data.ResponseTimes.Dequeue();
}
// Update health status based on thresholds
if (data.ConsecutiveSuccesses >= HealthyThreshold)
{
data.IsHealthy = true;
}
}
}
///
/// Records a failed request to a server.
///
public void RecordFailure(string serverId, string errorMessage)
{
var data = _healthData.GetOrAdd(serverId, _ => new ServerHealthData { ServerId = serverId });
lock (data)
{
data.ConsecutiveFailures++;
data.ConsecutiveSuccesses = 0;
data.LastCheck = DateTime.UtcNow;
data.LastErrorMessage = errorMessage;
// Update health status based on thresholds
if (data.ConsecutiveFailures >= UnhealthyThreshold)
{
data.IsHealthy = false;
}
}
}
///
/// Gets the current health status for a specific server.
///
public ServerHealthStatus? GetServerHealth(string serverId)
{
if (!_healthData.TryGetValue(serverId, out var data))
{
return null;
}
lock (data)
{
return new ServerHealthStatus
{
ServerId = data.ServerId,
ServerName = serverId, // Default to ID if name not set
IsHealthy = data.IsHealthy,
LastCheck = data.LastCheck,
ResponseTime = data.ResponseTimes.Any()
? TimeSpan.FromMilliseconds(data.ResponseTimes.Average(t => t.TotalMilliseconds))
: data.LastResponseTime,
ErrorMessage = data.LastErrorMessage
};
}
}
///
/// Gets health status for all tracked servers.
///
public IEnumerable GetAllServerHealth()
{
return _healthData.Keys.Select(serverId => GetServerHealth(serverId)).Where(h => h != null).Cast();
}
///
/// Resets all tracked health data.
///
public void Reset()
{
_healthData.Clear();
}
private class ServerHealthData
{
public string ServerId { get; set; } = string.Empty;
public bool IsHealthy { get; set; } = true; // Start as healthy
public int ConsecutiveSuccesses { get; set; }
public int ConsecutiveFailures { get; set; }
public DateTime LastCheck { get; set; }
public TimeSpan? LastResponseTime { get; set; }
public string? LastErrorMessage { get; set; }
public Queue ResponseTimes { get; set; } = new Queue();
}
}