dotnet-cqrs/docs/event-streaming/sagas/compensation.md

13 KiB

Compensation

Rollback completed saga steps when failures occur.

Overview

Compensation undoes completed saga steps to maintain consistency when later steps fail:

  • Backward Recovery - Undo completed operations
  • Idempotency - Safe to retry compensations
  • Ordering - Compensate in reverse order
  • Partial Compensation - Only undo completed steps

Quick Start

private async Task CompensateAsync(OrderFulfillmentState state, CancellationToken ct)
{
    _logger.LogWarning("Starting compensation for saga {SagaId}", state.SagaId);

    state.CurrentStep = SagaStep.Compensating;
    await _stateStore.SaveStateAsync(state.SagaId, state, ct);

    // Compensate in reverse order of execution
    if (state.Shipped && !state.ShipmentCancelled)
    {
        await CompensateShipmentAsync(state, ct);
    }

    if (state.PaymentProcessed && !state.PaymentRefunded)
    {
        await CompensatePaymentAsync(state, ct);
    }

    if (state.InventoryReserved && !state.InventoryReleased)
    {
        await CompensateInventoryAsync(state, ct);
    }

    state.CurrentStep = SagaStep.Failed;
    state.CompletedAt = DateTimeOffset.UtcNow;
    await _stateStore.SaveStateAsync(state.SagaId, state, ct);

    _logger.LogInformation("Compensation complete for saga {SagaId}", state.SagaId);
}

Compensation Principles

Reverse Order

Compensate steps in reverse order of execution:

// Execution order:
// 1. Reserve Inventory
// 2. Process Payment
// 3. Ship Order → FAILS

// Compensation order:
// 1. (Ship order didn't complete, skip)
// 2. Refund Payment
// 3. Release Inventory

Idempotency

Make compensations idempotent (safe to retry):

private async Task CompensatePaymentAsync(OrderFulfillmentState state, CancellationToken ct)
{
    // Check if already refunded (idempotency)
    if (state.PaymentRefunded)
    {
        _logger.LogInformation("Payment already refunded for order {OrderId}", state.OrderId);
        return;
    }

    try
    {
        await _paymentService.RefundAsync(state.OrderId, ct);
        state.PaymentRefunded = true;
        await _stateStore.SaveStateAsync(state.SagaId, state, ct);

        _logger.LogInformation("Payment refunded for order {OrderId}", state.OrderId);
    }
    catch (Exception ex)
    {
        _logger.LogError(ex, "Failed to refund payment for order {OrderId}", state.OrderId);
        throw;
    }
}

Partial Compensation

Only compensate completed steps:

private async Task CompensateAsync(OrderFulfillmentState state, CancellationToken ct)
{
    // Only compensate what was actually completed
    var completedSteps = new List<string>();

    if (state.Shipped)
        completedSteps.Add("Shipment");
    if (state.PaymentProcessed)
        completedSteps.Add("Payment");
    if (state.InventoryReserved)
        completedSteps.Add("Inventory");

    _logger.LogWarning(
        "Compensating {Count} completed steps for {SagaId}: {Steps}",
        completedSteps.Count,
        state.SagaId,
        string.Join(", ", completedSteps));

    // Compensate only completed steps
    if (state.Shipped && !state.ShipmentCancelled)
    {
        await CompensateShipmentAsync(state, ct);
    }

    if (state.PaymentProcessed && !state.PaymentRefunded)
    {
        await CompensatePaymentAsync(state, ct);
    }

    if (state.InventoryReserved && !state.InventoryReleased)
    {
        await CompensateInventoryAsync(state, ct);
    }
}

Compensation Strategies

Immediate Compensation

Undo immediately when step fails:

public async Task HandleAsync(OrderPlacedEvent @event, CancellationToken ct)
{
    var state = new OrderFulfillmentState { OrderId = @event.OrderId };

    try
    {
        // Step 1
        await _inventoryService.ReserveAsync(@event.OrderId, @event.Items, ct);
        state.InventoryReserved = true;

        // Step 2
        await _paymentService.ChargeAsync(@event.OrderId, @event.TotalAmount, ct);
        state.PaymentProcessed = true;

        // Step 3 - fails
        await _shippingService.ShipAsync(@event.OrderId, ct);
    }
    catch (Exception ex)
    {
        // Immediately compensate
        await CompensateAsync(state, ct);
        throw;
    }
}

Delayed Compensation

Queue compensation for later execution:

public async Task HandleAsync(OrderPlacedEvent @event, CancellationToken ct)
{
    try
    {
        await ExecuteStepsAsync(@event, ct);
    }
    catch (Exception ex)
    {
        // Queue compensation for background processing
        await _compensationQueue.EnqueueAsync(new CompensationTask
        {
            SagaId = state.SagaId,
            State = state,
            Reason = ex.Message,
            ScheduledFor = DateTimeOffset.UtcNow.AddSeconds(30)
        });

        throw;
    }
}

// Background service processes compensation queue
public class CompensationProcessor : BackgroundService
{
    protected override async Task ExecuteAsync(CancellationToken stoppingToken)
    {
        await foreach (var task in _compensationQueue.DequeueAsync(stoppingToken))
        {
            await CompensateAsync(task.State, stoppingToken);
        }
    }
}

Retry with Backoff

Retry failed compensations with exponential backoff:

private async Task CompensateWithRetryAsync(
    OrderFulfillmentState state,
    CancellationToken ct)
{
    var maxAttempts = 5;

    for (int attempt = 1; attempt <= maxAttempts; attempt++)
    {
        try
        {
            await CompensateAsync(state, ct);
            return;  // Success
        }
        catch (Exception ex) when (attempt < maxAttempts)
        {
            var delay = TimeSpan.FromSeconds(Math.Pow(2, attempt));

            _logger.LogWarning(ex,
                "Compensation attempt {Attempt}/{Max} failed for {SagaId}, retrying in {Delay}",
                attempt,
                maxAttempts,
                state.SagaId,
                delay);

            await Task.Delay(delay, ct);
        }
    }

    // All retries failed - manual intervention required
    await NotifyOpsTeamAsync(state, "Compensation failed after all retries");
}

Compensation Examples

Inventory Compensation

private async Task CompensateInventoryAsync(OrderFulfillmentState state, CancellationToken ct)
{
    if (state.InventoryReleased)
    {
        _logger.LogInformation("Inventory already released for order {OrderId}", state.OrderId);
        return;
    }

    _logger.LogInformation("Releasing inventory for order {OrderId}", state.OrderId);

    try
    {
        await _inventoryService.ReleaseReservationAsync(state.OrderId, ct);

        state.InventoryReleased = true;
        await _stateStore.SaveStateAsync(state.SagaId, state, ct);
    }
    catch (ReservationNotFoundException)
    {
        // Already released or never existed - treat as success
        state.InventoryReleased = true;
        await _stateStore.SaveStateAsync(state.SagaId, state, ct);
    }
    catch (Exception ex)
    {
        _logger.LogError(ex, "Failed to release inventory for order {OrderId}", state.OrderId);
        throw;
    }
}

Payment Compensation

private async Task CompensatePaymentAsync(OrderFulfillmentState state, CancellationToken ct)
{
    if (state.PaymentRefunded)
    {
        _logger.LogInformation("Payment already refunded for order {OrderId}", state.OrderId);
        return;
    }

    _logger.LogInformation("Refunding payment for order {OrderId}", state.OrderId);

    try
    {
        var refundId = await _paymentService.RefundAsync(
            state.OrderId,
            state.PaymentTransactionId,
            reason: "Order fulfillment failed",
            ct);

        state.PaymentRefunded = true;
        state.RefundTransactionId = refundId;
        await _stateStore.SaveStateAsync(state.SagaId, state, ct);

        // Publish refund event
        await _eventPublisher.PublishAsync(new PaymentRefundedEvent
        {
            OrderId = state.OrderId,
            RefundId = refundId,
            Reason = "Order fulfillment failed"
        });
    }
    catch (Exception ex)
    {
        _logger.LogError(ex, "Failed to refund payment for order {OrderId}", state.OrderId);
        throw;
    }
}

Shipment Compensation

private async Task CompensateShipmentAsync(OrderFulfillmentState state, CancellationToken ct)
{
    if (state.ShipmentCancelled)
    {
        _logger.LogInformation("Shipment already cancelled for order {OrderId}", state.OrderId);
        return;
    }

    _logger.LogWarning("Cancelling shipment for order {OrderId}", state.OrderId);

    try
    {
        await _shippingService.CancelShipmentAsync(
            state.OrderId,
            state.ShipmentTrackingNumber,
            ct);

        state.ShipmentCancelled = true;
        await _stateStore.SaveStateAsync(state.SagaId, state, ct);

        // Notify customer
        await _notificationService.SendAsync(new ShipmentCancelledNotification
        {
            OrderId = state.OrderId,
            Reason = "Order fulfillment failed"
        });
    }
    catch (ShipmentAlreadyDeliveredException)
    {
        _logger.LogError("Cannot cancel shipment for order {OrderId} - already delivered", state.OrderId);

        // Can't undo delivery - create return label instead
        await CreateReturnLabelAsync(state.OrderId, ct);
    }
    catch (Exception ex)
    {
        _logger.LogError(ex, "Failed to cancel shipment for order {OrderId}", state.OrderId);
        throw;
    }
}

Compensation Limitations

Non-Compensatable Actions

Some actions cannot be undone:

// ✅ Compensatable
await _inventoryService.ReserveAsync(orderId, items);  // Can release
await _paymentService.ChargeAsync(orderId, amount);    // Can refund

// ❌ Non-Compensatable
await _emailService.SendOrderConfirmationAsync(orderId);  // Cannot unsend
await _externalApiService.NotifyPartnerAsync(orderId);    // May not support undo

Handling Non-Compensatable Actions

private async Task CompensateAsync(OrderFulfillmentState state, CancellationToken ct)
{
    // Compensate what we can
    if (state.PaymentProcessed)
    {
        await _paymentService.RefundAsync(state.OrderId, ct);
    }

    if (state.InventoryReserved)
    {
        await _inventoryService.ReleaseAsync(state.OrderId, ct);
    }

    // Handle non-compensatable actions
    if (state.ConfirmationEmailSent)
    {
        // Send cancellation email instead
        await _emailService.SendOrderCancellationAsync(state.OrderId, ct);
    }

    if (state.PartnerNotified)
    {
        // Notify partner of cancellation
        await _externalApiService.NotifyCancellationAsync(state.OrderId, ct);
    }
}

Monitoring Compensation

Compensation Metrics

public class CompensationMetrics
{
    public int TotalCompensations { get; set; }
    public int SuccessfulCompensations { get; set; }
    public int FailedCompensations { get; set; }
    public Dictionary<string, int> CompensationsByStep { get; set; }

    public double SuccessRate =>
        TotalCompensations > 0
            ? (double)SuccessfulCompensations / TotalCompensations * 100
            : 100;
}

// Track metrics
_metrics.RecordCompensation(state.SagaId, success: true);

if (metrics.FailedCompensations > 10)
{
    _logger.LogWarning("High compensation failure rate: {Rate:F1}%",
        100 - metrics.SuccessRate);
}

Alerting

private async Task CompensateAsync(OrderFulfillmentState state, CancellationToken ct)
{
    try
    {
        await ExecuteCompensationStepsAsync(state, ct);
    }
    catch (Exception ex)
    {
        _logger.LogCritical(ex, "Compensation failed for saga {SagaId}", state.SagaId);

        // Alert operations team
        await _alertService.SendAsync(new Alert
        {
            Severity = AlertSeverity.Critical,
            Title = $"Saga compensation failed: {state.SagaId}",
            Description = $"Order {state.OrderId} requires manual intervention",
            SagaId = state.SagaId,
            OrderId = state.OrderId,
            FailureReason = ex.Message
        });

        throw;
    }
}

Best Practices

DO

  • Compensate in reverse order
  • Make compensations idempotent
  • Log all compensation attempts
  • Track compensation state
  • Retry with backoff
  • Handle non-compensatable actions
  • Alert on compensation failures
  • Test compensation thoroughly
  • Document compensation logic

DON'T

  • Don't assume compensation always succeeds
  • Don't forget partial compensation
  • Don't skip logging
  • Don't ignore non-compensatable actions
  • Don't retry indefinitely
  • Don't leave system in inconsistent state
  • Don't forget to update state after compensation
  • Don't compensate non-completed steps

See Also