Research conducted on modern AI coding assistants (Cursor, GitHub Copilot, Cline,
Aider, Windsurf, Replit Agent) to understand architecture patterns, context management,
code editing workflows, and tool use protocols.
Key Decision: Pivoted from building full CLI (40-50h) to validation-driven MCP-first
approach (10-15h). Build 5 core CODEX MCP tools that work with ANY coding assistant,
validate adoption over 2-4 weeks, then decide on full CLI if demand proven.
Files:
- research/ai-systems/modern-coding-assistants-architecture.md (comprehensive research)
- research/ai-systems/codex-coding-assistant-implementation-plan.md (original CLI plan, preserved)
- research/ai-systems/codex-mcp-tools-implementation-plan.md (approved MCP-first plan)
- ideas/registry.json (updated with approved MCP tools proposal)
Architech Validation: APPROVED with pivot to MCP-first approach
Human Decision: Approved (pragmatic validation-driven development)
Next: Begin Phase 1 implementation (10-15 hours, 5 core MCP tools)
🤖 Generated with CODEX Research System
Co-Authored-By: The Archivist <archivist@codex.svrnty.io>
Co-Authored-By: The Architech <architech@codex.svrnty.io>
Co-Authored-By: Mathias Beaulieu-Duncan <mat@svrnty.io>
135 lines
4.4 KiB
C#
135 lines
4.4 KiB
C#
using System.Collections.Concurrent;
|
|
using OpenHarbor.MCP.Gateway.Core.Models;
|
|
|
|
namespace OpenHarbor.MCP.Gateway.Infrastructure.Health;
|
|
|
|
/// <summary>
|
|
/// Passive health tracker that monitors server health based on actual request patterns.
|
|
/// Tracks success/failure rates and response times without active probing.
|
|
/// </summary>
|
|
public class PassiveHealthTracker
|
|
{
|
|
private readonly ConcurrentDictionary<string, ServerHealthData> _healthData = new();
|
|
|
|
/// <summary>
|
|
/// Number of consecutive failures before marking server as unhealthy. Default is 5.
|
|
/// </summary>
|
|
public int UnhealthyThreshold { get; set; } = 5;
|
|
|
|
/// <summary>
|
|
/// Number of consecutive successes before marking server as healthy again. Default is 3.
|
|
/// </summary>
|
|
public int HealthyThreshold { get; set; } = 3;
|
|
|
|
/// <summary>
|
|
/// Response time threshold for marking requests as slow. Default is 5 seconds.
|
|
/// </summary>
|
|
public TimeSpan SlowResponseThreshold { get; set; } = TimeSpan.FromSeconds(5);
|
|
|
|
/// <summary>
|
|
/// Records a successful request to a server.
|
|
/// </summary>
|
|
public void RecordSuccess(string serverId, TimeSpan responseTime)
|
|
{
|
|
var data = _healthData.GetOrAdd(serverId, _ => new ServerHealthData { ServerId = serverId });
|
|
|
|
lock (data)
|
|
{
|
|
data.ConsecutiveSuccesses++;
|
|
data.ConsecutiveFailures = 0;
|
|
data.LastResponseTime = responseTime;
|
|
data.LastCheck = DateTime.UtcNow;
|
|
data.LastErrorMessage = null;
|
|
|
|
// Add response time to rolling average
|
|
data.ResponseTimes.Enqueue(responseTime);
|
|
if (data.ResponseTimes.Count > 10) // Keep last 10 response times
|
|
{
|
|
data.ResponseTimes.Dequeue();
|
|
}
|
|
|
|
// Update health status based on thresholds
|
|
if (data.ConsecutiveSuccesses >= HealthyThreshold)
|
|
{
|
|
data.IsHealthy = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Records a failed request to a server.
|
|
/// </summary>
|
|
public void RecordFailure(string serverId, string errorMessage)
|
|
{
|
|
var data = _healthData.GetOrAdd(serverId, _ => new ServerHealthData { ServerId = serverId });
|
|
|
|
lock (data)
|
|
{
|
|
data.ConsecutiveFailures++;
|
|
data.ConsecutiveSuccesses = 0;
|
|
data.LastCheck = DateTime.UtcNow;
|
|
data.LastErrorMessage = errorMessage;
|
|
|
|
// Update health status based on thresholds
|
|
if (data.ConsecutiveFailures >= UnhealthyThreshold)
|
|
{
|
|
data.IsHealthy = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Gets the current health status for a specific server.
|
|
/// </summary>
|
|
public ServerHealthStatus? GetServerHealth(string serverId)
|
|
{
|
|
if (!_healthData.TryGetValue(serverId, out var data))
|
|
{
|
|
return null;
|
|
}
|
|
|
|
lock (data)
|
|
{
|
|
return new ServerHealthStatus
|
|
{
|
|
ServerId = data.ServerId,
|
|
ServerName = serverId, // Default to ID if name not set
|
|
IsHealthy = data.IsHealthy,
|
|
LastCheck = data.LastCheck,
|
|
ResponseTime = data.ResponseTimes.Any()
|
|
? TimeSpan.FromMilliseconds(data.ResponseTimes.Average(t => t.TotalMilliseconds))
|
|
: data.LastResponseTime,
|
|
ErrorMessage = data.LastErrorMessage
|
|
};
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Gets health status for all tracked servers.
|
|
/// </summary>
|
|
public IEnumerable<ServerHealthStatus> GetAllServerHealth()
|
|
{
|
|
return _healthData.Keys.Select(serverId => GetServerHealth(serverId)).Where(h => h != null).Cast<ServerHealthStatus>();
|
|
}
|
|
|
|
/// <summary>
|
|
/// Resets all tracked health data.
|
|
/// </summary>
|
|
public void Reset()
|
|
{
|
|
_healthData.Clear();
|
|
}
|
|
|
|
private class ServerHealthData
|
|
{
|
|
public string ServerId { get; set; } = string.Empty;
|
|
public bool IsHealthy { get; set; } = true; // Start as healthy
|
|
public int ConsecutiveSuccesses { get; set; }
|
|
public int ConsecutiveFailures { get; set; }
|
|
public DateTime LastCheck { get; set; }
|
|
public TimeSpan? LastResponseTime { get; set; }
|
|
public string? LastErrorMessage { get; set; }
|
|
public Queue<TimeSpan> ResponseTimes { get; set; } = new Queue<TimeSpan>();
|
|
}
|
|
}
|