dotnet-cqrs/docs/observability/metrics/README.md

4.6 KiB

Metrics

OpenTelemetry-compatible metrics for monitoring and alerting.

Overview

Svrnty.CQRS provides comprehensive metrics using System.Diagnostics.Metrics:

  • Event Counters - Published, consumed, errors
  • Processing Metrics - Latency, throughput
  • Consumer Metrics - Lag, active consumers
  • Projection Metrics - Progress, errors

Quick Start

using Svrnty.CQRS.Events;
using OpenTelemetry.Metrics;

var builder = WebApplication.CreateBuilder(args);

// Register event stream metrics
builder.Services.AddEventStreamMetrics();

// Configure OpenTelemetry
builder.Services.AddOpenTelemetry()
    .WithMetrics(metrics => metrics
        .AddMeter("Svrnty.CQRS.Events")
        .AddPrometheusExporter());

var app = builder.Build();

// Export metrics at /metrics
app.MapPrometheusScrapingEndpoint();

app.Run();

Available Metrics

Event Metrics

// Counter: Total events published
svrnty_cqrs_events_published_total{stream="orders", event_type="OrderPlaced"}

// Counter: Total events consumed
svrnty_cqrs_events_consumed_total{stream="orders", subscription="email-processor"}

// Counter: Total errors
svrnty_cqrs_events_errors_total{stream="orders", error_type="ValidationError"}

// Counter: Total retries
svrnty_cqrs_events_retries_total{stream="orders"}

Performance Metrics

// Histogram: Processing latency in milliseconds
svrnty_cqrs_events_processing_latency{stream="orders", subscription="email-processor"}

// Histogram: Publish latency
svrnty_cqrs_events_publish_latency{stream="orders"}

// Gauge: Events per second
svrnty_cqrs_events_per_second{stream="orders"}

Consumer Metrics

// Gauge: Consumer lag (events behind)
svrnty_cqrs_consumer_lag{stream="orders", consumer="worker-1"}

// Gauge: Active consumers
svrnty_cqrs_active_consumers{stream="orders", group="order-processors"}

// Gauge: Stream length
svrnty_cqrs_stream_length{stream="orders"}

Recording Metrics

public class EventStreamMetrics
{
    private readonly Meter _meter;
    private readonly Counter<long> _eventsPublished;
    private readonly Counter<long> _eventsConsumed;
    private readonly Histogram<double> _processingLatency;
    private readonly ObservableGauge<long> _consumerLag;

    public EventStreamMetrics()
    {
        _meter = new Meter("Svrnty.CQRS.Events", "1.0.0");

        _eventsPublished = _meter.CreateCounter<long>(
            "svrnty.cqrs.events.published",
            description: "Total events published");

        _eventsConsumed = _meter.CreateCounter<long>(
            "svrnty.cqrs.events.consumed",
            description: "Total events consumed");

        _processingLatency = _meter.CreateHistogram<double>(
            "svrnty.cqrs.events.processing_latency",
            unit: "ms",
            description: "Event processing latency");

        _consumerLag = _meter.CreateObservableGauge<long>(
            "svrnty.cqrs.consumer.lag",
            () => GetConsumerLagMeasurements(),
            description: "Consumer lag in events");
    }

    public void RecordEventPublished(string streamName, string eventType)
    {
        _eventsPublished.Add(1,
            new KeyValuePair<string, object?>("stream", streamName),
            new KeyValuePair<string, object?>("event_type", eventType));
    }

    public void RecordEventConsumed(string streamName, string subscriptionId)
    {
        _eventsConsumed.Add(1,
            new KeyValuePair<string, object?>("stream", streamName),
            new KeyValuePair<string, object?>("subscription", subscriptionId));
    }

    public void RecordProcessingLatency(string streamName, TimeSpan duration)
    {
        _processingLatency.Record(duration.TotalMilliseconds,
            new KeyValuePair<string, object?>("stream", streamName));
    }
}

Prometheus Integration

// Add Prometheus exporter
builder.Services.AddOpenTelemetry()
    .WithMetrics(metrics => metrics
        .AddMeter("Svrnty.CQRS.Events")
        .AddPrometheusExporter());

app.MapPrometheusScrapingEndpoint("/metrics");

// Query metrics:
// curl http://localhost:5000/metrics

Grafana Dashboards

# Events per second by stream
rate(svrnty_cqrs_events_published_total[1m])

# Consumer lag
svrnty_cqrs_consumer_lag

# P95 processing latency
histogram_quantile(0.95, rate(svrnty_cqrs_events_processing_latency_bucket[5m]))

# Error rate
rate(svrnty_cqrs_events_errors_total[5m])

See Also