From 27afb97090a5dc8b84060978bbd341bc64147d76 Mon Sep 17 00:00:00 2001
From: Malte Viering <malte.viering@sap.com>
Date: Fri, 27 Mar 2026 16:50:53 +0100
Subject: [PATCH 1/3] failover set offset via rnd and not via reconcile count

---
 cmd/main.go                                   |   3 +-
 .../reservations/failover/controller.go       |   7 +-
 .../reservations/failover/controller_test.go  | 300 +++++++++---------
 3 files changed, 160 insertions(+), 150 deletions(-)

diff --git a/cmd/main.go b/cmd/main.go
index fc3519fe7..f0923ab44 100644
--- a/cmd/main.go
+++ b/cmd/main.go
@@ -622,7 +622,8 @@ func main() {
 			"reconcileInterval", failoverConfig.ReconcileInterval,
 			"revalidationInterval", failoverConfig.RevalidationInterval,
 			"trustHypervisorLocation", failoverConfig.TrustHypervisorLocation,
-			"maxVMsToProcess", failoverConfig.MaxVMsToProcess)
+			"maxVMsToProcess", failoverConfig.MaxVMsToProcess,
+			"vmSelectionRotationInterval", failoverConfig.VMSelectionRotationInterval)
 	}
 
 	// +kubebuilder:scaffold:builder
diff --git a/internal/scheduling/reservations/failover/controller.go b/internal/scheduling/reservations/failover/controller.go
index 4c99f4b2a..74b7d9b7a 100644
--- a/internal/scheduling/reservations/failover/controller.go
+++ b/internal/scheduling/reservations/failover/controller.go
@@ -6,6 +6,7 @@ package failover
 import (
 	"context"
 	"fmt"
+	"math/rand/v2"
 	"path/filepath"
 	"slices"
 	"sort"
@@ -518,7 +519,11 @@ func (c *FailoverReservationController) selectVMsToProcess(
 	offset := 0
 	rotationInterval := *c.Config.VMSelectionRotationInterval
 	if rotationInterval > 0 && c.reconcileCount%int64(rotationInterval) == 0 {
-		offset = int(c.reconcileCount) % len(vmsMissingFailover)
+		offset = rand.IntN(len(vmsMissingFailover)) //nolint:gosec // non-cryptographic randomness is fine for VM selection rotation
+		logger.Info("applying random rotation offset for VM selection",
+			"offset", offset,
+			"totalVMs", len(vmsMissingFailover),
+			"rotationInterval", rotationInterval)
 	}
 
 	selected = make([]vmFailoverNeed, 0, maxToProcess)
diff --git a/internal/scheduling/reservations/failover/controller_test.go b/internal/scheduling/reservations/failover/controller_test.go
index 424898b03..78d3168c7 100644
--- a/internal/scheduling/reservations/failover/controller_test.go
+++ b/internal/scheduling/reservations/failover/controller_test.go
@@ -855,7 +855,7 @@ func getAllocations(res *v1alpha1.Reservation) map[string]string {
 // ============================================================================
 
 func TestSelectVMsToProcess(t *testing.T) {
-	// Create 10 VMs with different memory sizes (sorted by memory descending)
+	// Create VMs with different memory sizes (sorted by memory descending)
 	createVMs := func(count int) []vmFailoverNeed {
 		vms := make([]vmFailoverNeed, count)
 		for i := range count {
@@ -873,166 +873,170 @@ func TestSelectVMsToProcess(t *testing.T) {
 		return vms
 	}
 
-	tests := []struct {
-		name           string
-		reconcileCount int64
-		vmCount        int
-		maxToProcess   int
-		expectedOffset int // Expected starting offset in the VM list
-		expectedHit    bool
-	}{
-		// 3 out of 4 runs should start at offset 0
-		{
-			name:           "reconcile 1 - offset 0",
-			reconcileCount: 1,
-			vmCount:        10,
-			maxToProcess:   3,
-			expectedOffset: 0,
-			expectedHit:    true,
-		},
-		{
-			name:           "reconcile 2 - offset 0",
-			reconcileCount: 2,
-			vmCount:        10,
-			maxToProcess:   3,
-			expectedOffset: 0,
-			expectedHit:    true,
-		},
-		{
-			name:           "reconcile 3 - offset 0",
-			reconcileCount: 3,
-			vmCount:        10,
-			maxToProcess:   3,
-			expectedOffset: 0,
-			expectedHit:    true,
-		},
-		// Every 4th reconcile uses reconcileCount as offset (mod vmCount)
-		{
-			name:           "reconcile 4 - offset 4",
+	t.Run("no rotation - offset 0", func(t *testing.T) {
+		ctx := context.Background()
+		controller := &FailoverReservationController{
+			reconcileCount: 1, // Not divisible by 4, so no rotation
+			Config: FailoverConfig{
+				VMSelectionRotationInterval: intPtr(4),
+			},
+		}
+
+		vms := createVMs(10)
+		selected, hitLimit := controller.selectVMsToProcess(ctx, vms, 3)
+
+		if !hitLimit {
+			t.Error("expected hitLimit=true")
+		}
+		if len(selected) != 3 {
+			t.Errorf("expected 3 VMs selected, got %d", len(selected))
+		}
+		// Without rotation, should start at offset 0 (vm-a has most memory)
+		if selected[0].VM.UUID != "vm-a" {
+			t.Errorf("expected first VM to be vm-a, got %s", selected[0].VM.UUID)
+		}
+	})
+
+	t.Run("rotation triggered - random offset", func(t *testing.T) {
+		ctx := context.Background()
+		controller := &FailoverReservationController{
+			reconcileCount: 4, // Divisible by 4, triggers rotation
+			Config: FailoverConfig{
+				VMSelectionRotationInterval: intPtr(4),
+			},
+		}
+
+		vms := createVMs(10)
+		selected, hitLimit := controller.selectVMsToProcess(ctx, vms, 3)
+
+		if !hitLimit {
+			t.Error("expected hitLimit=true")
+		}
+		if len(selected) != 3 {
+			t.Errorf("expected 3 VMs selected, got %d", len(selected))
+		}
+		// With rotation, offset is random - just verify we got valid VMs
+		// and the selection wraps correctly
+		vmSet := make(map[string]bool)
+		for _, vm := range vms {
+			vmSet[vm.VM.UUID] = true
+		}
+		for _, s := range selected {
+			if !vmSet[s.VM.UUID] {
+				t.Errorf("selected VM %s not in original list", s.VM.UUID)
+			}
+		}
+	})
+
+	t.Run("rotation disabled - always offset 0", func(t *testing.T) {
+		ctx := context.Background()
+		controller := &FailoverReservationController{
 			reconcileCount: 4,
-			vmCount:        10,
-			maxToProcess:   3,
-			expectedOffset: 4,
-			expectedHit:    true,
-		},
-		{
-			name:           "reconcile 5 - offset 0",
-			reconcileCount: 5,
-			vmCount:        10,
-			maxToProcess:   3,
-			expectedOffset: 0,
-			expectedHit:    true,
-		},
-		{
-			name:           "reconcile 6 - offset 0",
-			reconcileCount: 6,
-			vmCount:        10,
-			maxToProcess:   3,
-			expectedOffset: 0,
-			expectedHit:    true,
-		},
-		{
-			name:           "reconcile 7 - offset 0",
-			reconcileCount: 7,
-			vmCount:        10,
-			maxToProcess:   3,
-			expectedOffset: 0,
-			expectedHit:    true,
-		},
-		{
-			name:           "reconcile 8 - offset 8",
-			reconcileCount: 8,
-			vmCount:        10,
-			maxToProcess:   3,
-			expectedOffset: 8,
-			expectedHit:    true,
-		},
-		// Test wrap-around when reconcileCount > vmCount
-		{
-			name:           "reconcile 12 - offset 2 (12 mod 10)",
-			reconcileCount: 12,
-			vmCount:        10,
-			maxToProcess:   3,
-			expectedOffset: 2, // 12 % 10 = 2
-			expectedHit:    true,
-		},
-		{
-			name:           "reconcile 20 - offset 0 (20 mod 10)",
-			reconcileCount: 20,
-			vmCount:        10,
-			maxToProcess:   3,
-			expectedOffset: 0, // 20 % 10 = 0
-			expectedHit:    true,
-		},
-		// Edge cases
-		{
-			name:           "maxToProcess 0 - no limit, returns all",
+			Config: FailoverConfig{
+				VMSelectionRotationInterval: intPtr(0), // Disabled
+			},
+		}
+
+		vms := createVMs(10)
+		selected, hitLimit := controller.selectVMsToProcess(ctx, vms, 3)
+
+		if !hitLimit {
+			t.Error("expected hitLimit=true")
+		}
+		// With rotation disabled, should always start at offset 0
+		if selected[0].VM.UUID != "vm-a" {
+			t.Errorf("expected first VM to be vm-a, got %s", selected[0].VM.UUID)
+		}
+	})
+
+	t.Run("maxToProcess 0 - no limit, returns all", func(t *testing.T) {
+		ctx := context.Background()
+		controller := &FailoverReservationController{
 			reconcileCount: 4,
-			vmCount:        10,
-			maxToProcess:   0,
-			expectedOffset: 0, // No limit means all VMs returned starting from 0
-			expectedHit:    false,
-		},
-		{
-			name:           "maxToProcess >= vmCount - no limit hit",
+			Config: FailoverConfig{
+				VMSelectionRotationInterval: intPtr(4),
+			},
+		}
+
+		vms := createVMs(10)
+		selected, hitLimit := controller.selectVMsToProcess(ctx, vms, 0)
+
+		if hitLimit {
+			t.Error("expected hitLimit=false when maxToProcess=0")
+		}
+		if len(selected) != 10 {
+			t.Errorf("expected all 10 VMs when no limit, got %d", len(selected))
+		}
+	})
+
+	t.Run("maxToProcess >= vmCount - no limit hit", func(t *testing.T) {
+		ctx := context.Background()
+		controller := &FailoverReservationController{
 			reconcileCount: 4,
-			vmCount:        5,
-			maxToProcess:   10,
-			expectedOffset: 0, // All VMs fit, no rotation needed
-			expectedHit:    false,
-		},
-	}
+			Config: FailoverConfig{
+				VMSelectionRotationInterval: intPtr(4),
+			},
+		}
 
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			ctx := context.Background()
-			controller := &FailoverReservationController{
-				reconcileCount: tt.reconcileCount,
-				Config: FailoverConfig{
-					VMSelectionRotationInterval: intPtr(4), // Default rotation interval
-				},
-			}
+		vms := createVMs(5)
+		selected, hitLimit := controller.selectVMsToProcess(ctx, vms, 10)
 
-			vms := createVMs(tt.vmCount)
-			selected, hitLimit := controller.selectVMsToProcess(ctx, vms, tt.maxToProcess)
+		if hitLimit {
+			t.Error("expected hitLimit=false when maxToProcess >= vmCount")
+		}
+		if len(selected) != 5 {
+			t.Errorf("expected all 5 VMs, got %d", len(selected))
+		}
+	})
 
-			if hitLimit != tt.expectedHit {
-				t.Errorf("expected hitLimit=%v, got %v", tt.expectedHit, hitLimit)
-			}
+	t.Run("empty VMs list", func(t *testing.T) {
+		ctx := context.Background()
+		controller := &FailoverReservationController{
+			reconcileCount: 4,
+			Config: FailoverConfig{
+				VMSelectionRotationInterval: intPtr(4),
+			},
+		}
 
-			if !tt.expectedHit {
-				// When no limit is hit, all VMs should be returned
-				if len(selected) != tt.vmCount {
-					t.Errorf("expected all %d VMs when no limit hit, got %d", tt.vmCount, len(selected))
-				}
-				return
-			}
+		vms := []vmFailoverNeed{}
+		selected, hitLimit := controller.selectVMsToProcess(ctx, vms, 3)
 
-			// Verify the first selected VM is at the expected offset
-			if len(selected) == 0 {
-				t.Error("expected at least one VM selected")
-				return
+		if hitLimit {
+			t.Error("expected hitLimit=false for empty list")
+		}
+		if len(selected) != 0 {
+			t.Errorf("expected 0 VMs, got %d", len(selected))
+		}
+	})
+
+	t.Run("random offset is within bounds", func(t *testing.T) {
+		ctx := context.Background()
+		// Run multiple times to verify random offset is always valid
+		for i := range 20 {
+			controller := &FailoverReservationController{
+				reconcileCount: int64((i + 1) * 4), // Always triggers rotation
+				Config: FailoverConfig{
+					VMSelectionRotationInterval: intPtr(4),
+				},
 			}
 
-			// The VMs are sorted by memory descending, so vm-a has most memory, vm-j has least
-			// After sorting, the order is: vm-a, vm-b, vm-c, ..., vm-j
-			// With offset, we should start at vms[offset]
-			expectedFirstVM := vms[tt.expectedOffset].VM.UUID
-			actualFirstVM := selected[0].VM.UUID
+			vms := createVMs(10)
+			selected, _ := controller.selectVMsToProcess(ctx, vms, 3)
 
-			if actualFirstVM != expectedFirstVM {
-				t.Errorf("expected first VM to be %s (offset %d), got %s",
-					expectedFirstVM, tt.expectedOffset, actualFirstVM)
+			if len(selected) != 3 {
+				t.Errorf("iteration %d: expected 3 VMs, got %d", i, len(selected))
 			}
 
-			// Verify we got the expected number of VMs
-			expectedCount := tt.maxToProcess
-			if expectedCount > tt.vmCount {
-				expectedCount = tt.vmCount
+			// Verify all selected VMs are from the original list
+			vmSet := make(map[string]bool)
+			for _, vm := range vms {
+				vmSet[vm.VM.UUID] = true
 			}
-			if len(selected) != expectedCount {
-				t.Errorf("expected %d VMs selected, got %d", expectedCount, len(selected))
+			for _, s := range selected {
+				if !vmSet[s.VM.UUID] {
+					t.Errorf("iteration %d: selected VM %s not in original list", i, s.VM.UUID)
+				}
 			}
-		})
-	}
+		}
+	})
 }

From bbc6165454d03d426d7aa68cf48d92bcf7fb8491 Mon Sep 17 00:00:00 2001
From: Malte Viering <malte.viering@sap.com>
Date: Mon, 30 Mar 2026 11:00:14 +0200
Subject: [PATCH 2/3] logging

---
 .../reservations/scheduler_client.go           | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/internal/scheduling/reservations/scheduler_client.go b/internal/scheduling/reservations/scheduler_client.go
index db7829e23..8e025060c 100644
--- a/internal/scheduling/reservations/scheduler_client.go
+++ b/internal/scheduling/reservations/scheduler_client.go
@@ -143,15 +143,7 @@ func (c *SchedulerClient) ScheduleReservation(ctx context.Context, req ScheduleR
 
 	logger.V(1).Info("sending external scheduler request",
 		"url", c.URL,
-		"instanceUUID", req.InstanceUUID,
-		"projectID", req.ProjectID,
-		"flavorName", req.FlavorName,
-		"flavorExtraSpecs", req.FlavorExtraSpecs,
-		"memoryMB", req.MemoryMB,
-		"vcpus", req.VCPUs,
-		"eligibleHostsCount", len(req.EligibleHosts),
-		"ignoreHosts", req.IgnoreHosts,
-		"isEvacuation", req.isEvacuation())
+		"request", externalSchedulerRequest)
 
 	// Marshal the request
 	reqBody, err := json.Marshal(externalSchedulerRequest)
@@ -207,11 +199,3 @@ func (req ScheduleReservationRequest) getSchedulerHints() map[string]any {
 	}
 	return req.SchedulerHints
 }
-
-// isEvacuation returns true if the request has the evacuation intent hint set.
-func (req ScheduleReservationRequest) isEvacuation() bool {
-	if req.SchedulerHints == nil {
-		return false
-	}
-	return req.SchedulerHints["_nova_check_type"] == "evacuate"
-}

From 722e0cdc1fda442b4ee20beb67f0efee603c532a Mon Sep 17 00:00:00 2001
From: Malte Viering <malte.viering@sap.com>
Date: Mon, 30 Mar 2026 14:03:11 +0200
Subject: [PATCH 3/3] Add prom metrics for failover reservations

---
 cmd/main.go                                   |   7 +
 .../reservations/failover/controller.go       |  23 +-
 .../reservations/failover/integration_test.go |   1 +
 .../reservations/failover/monitor.go          | 150 +++++++++
 .../reservations/failover/monitor_test.go     | 290 ++++++++++++++++++
 5 files changed, 465 insertions(+), 6 deletions(-)
 create mode 100644 internal/scheduling/reservations/failover/monitor.go
 create mode 100644 internal/scheduling/reservations/failover/monitor_test.go

diff --git a/cmd/main.go b/cmd/main.go
index f0923ab44..be9154825 100644
--- a/cmd/main.go
+++ b/cmd/main.go
@@ -571,6 +571,12 @@ func main() {
 		// The scheduler client calls the nova external scheduler API to get placement decisions
 		schedulerClient := reservations.NewSchedulerClient(failoverConfig.SchedulerURL)
 
+		failoverMonitor := failover.NewFailoverMonitor()
+		if err := metrics.Registry.Register(failoverMonitor); err != nil {
+			setupLog.Error(err, "failed to register failover monitor metrics, continuing without metrics")
+			failoverMonitor = nil
+		}
+
 		// Defer the initialization of PostgresReader until the manager starts
 		// because the cache is not ready during setup
 		if err := mgr.Add(manager.RunnableFunc(func(ctx context.Context) error {
@@ -596,6 +602,7 @@ func main() {
 				vmSource,
 				failoverConfig,
 				schedulerClient,
+				failoverMonitor,
 			)
 
 			// Set up the watch-based reconciler for per-reservation reconciliation
diff --git a/internal/scheduling/reservations/failover/controller.go b/internal/scheduling/reservations/failover/controller.go
index 74b7d9b7a..35a63cd8a 100644
--- a/internal/scheduling/reservations/failover/controller.go
+++ b/internal/scheduling/reservations/failover/controller.go
@@ -42,15 +42,17 @@ type FailoverReservationController struct {
 	Config          FailoverConfig
 	SchedulerClient *reservations.SchedulerClient
 	Recorder        events.EventRecorder // Event recorder for emitting Kubernetes events
-	reconcileCount  int64                // Track reconciliation count for rotating VM selection
+	Monitor         *FailoverMonitor
+	reconcileCount  int64 // Track reconciliation count for rotating VM selection
 }
 
-func NewFailoverReservationController(c client.Client, vmSource VMSource, config FailoverConfig, schedulerClient *reservations.SchedulerClient) *FailoverReservationController {
+func NewFailoverReservationController(c client.Client, vmSource VMSource, config FailoverConfig, schedulerClient *reservations.SchedulerClient, monitor *FailoverMonitor) *FailoverReservationController {
 	return &FailoverReservationController{
 		Client:          c,
 		VMSource:        vmSource,
 		Config:          config,
 		SchedulerClient: schedulerClient,
+		Monitor:         monitor,
 	}
 }
 
@@ -229,6 +231,9 @@ func (c *FailoverReservationController) validateReservation(ctx context.Context,
 
 // reconcileSummary holds statistics from the reconciliation cycle.
 type reconcileSummary struct {
+	duration            time.Duration
+	totalVMs            int
+	totalReservations   int
 	vmsMissingFailover  int
 	vmsProcessed        int
 	reservationsNeeded  int
@@ -324,7 +329,9 @@ func (c *FailoverReservationController) ReconcilePeriodic(ctx context.Context) (
 	summary.totalFailed = assignSummary.totalFailed
 
 	// Log summary
-	duration := time.Since(startTime)
+	summary.duration = time.Since(startTime)
+	summary.totalVMs = len(vms)
+	summary.totalReservations = len(failoverReservations)
 	requeueAfter := c.Config.ReconcileInterval.Duration
 	successCount := summary.totalCreated + summary.totalReused
 	madeProgress := successCount >= *c.Config.MinSuccessForShortInterval
@@ -334,10 +341,10 @@ func (c *FailoverReservationController) ReconcilePeriodic(ctx context.Context) (
 	}
 	logger.Info("periodic reconciliation completed",
 		"reconcileCount", c.reconcileCount,
-		"duration", duration.Round(time.Millisecond),
+		"duration", summary.duration.Round(time.Millisecond),
 		"requeueAfter", requeueAfter,
-		"totalVMs", len(vms),
-		"totalReservations", len(failoverReservations),
+		"totalVMs", summary.totalVMs,
+		"totalReservations", summary.totalReservations,
 		"vmsMissingFailover", summary.vmsMissingFailover,
 		"vmsProcessed", summary.vmsProcessed,
 		"reservationsNeeded", summary.reservationsNeeded,
@@ -347,6 +354,10 @@ func (c *FailoverReservationController) ReconcilePeriodic(ctx context.Context) (
 		"updated", summary.reservationsUpdated,
 		"deleted", summary.reservationsDeleted)
 
+	if c.Monitor != nil {
+		c.Monitor.RecordReconciliation(summary, "")
+	}
+
 	return ctrl.Result{RequeueAfter: requeueAfter}, nil
 }
 
diff --git a/internal/scheduling/reservations/failover/integration_test.go b/internal/scheduling/reservations/failover/integration_test.go
index 7fe992df5..66d5733bb 100644
--- a/internal/scheduling/reservations/failover/integration_test.go
+++ b/internal/scheduling/reservations/failover/integration_test.go
@@ -693,6 +693,7 @@ func (env *IntegrationTestEnv) TriggerFailoverReconcile(flavorRequirements map[s
 		env.VMSource,
 		config,
 		schedulerClient,
+		nil,
 	)
 
 	_, err := controller.ReconcilePeriodic(context.Background())
diff --git a/internal/scheduling/reservations/failover/monitor.go b/internal/scheduling/reservations/failover/monitor.go
new file mode 100644
index 000000000..c1089ea53
--- /dev/null
+++ b/internal/scheduling/reservations/failover/monitor.go
@@ -0,0 +1,150 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package failover
+
+import (
+	"github.com/prometheus/client_golang/prometheus"
+)
+
+var azLabel = []string{"availability_zone"}
+
+// FailoverMonitor provides Prometheus metrics for the failover reconciliation controller.
+type FailoverMonitor struct {
+	reconciliationRuns     *prometheus.CounterVec
+	reconciliationDuration *prometheus.HistogramVec
+	totalVMs               *prometheus.GaugeVec
+	totalReservations      *prometheus.GaugeVec
+	vmsMissingFailover     *prometheus.GaugeVec
+	vmsProcessed           *prometheus.CounterVec
+	reservationsNeeded     *prometheus.CounterVec
+	reservationsReused     *prometheus.CounterVec
+	reservationsCreated    *prometheus.CounterVec
+	reservationsFailed     *prometheus.CounterVec
+	reservationsUpdated    *prometheus.CounterVec
+	reservationsDeleted    *prometheus.CounterVec
+}
+
+// NewFailoverMonitor creates a new monitor with Prometheus metrics.
+func NewFailoverMonitor() *FailoverMonitor {
+	m := &FailoverMonitor{
+		reconciliationRuns: prometheus.NewCounterVec(prometheus.CounterOpts{
+			Name: "cortex_failover_reconciliation_runs_total",
+			Help: "Total number of failover periodic reconciliation runs since pod restart",
+		}, azLabel),
+		reconciliationDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{
+			Name:    "cortex_failover_reconciliation_duration_seconds",
+			Help:    "Duration of failover periodic reconciliation cycles",
+			Buckets: []float64{0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60},
+		}, azLabel),
+		totalVMs: prometheus.NewGaugeVec(prometheus.GaugeOpts{
+			Name: "cortex_failover_reconciliation_total_vms",
+			Help: "Total number of VMs seen during the last reconciliation",
+		}, azLabel),
+		totalReservations: prometheus.NewGaugeVec(prometheus.GaugeOpts{
+			Name: "cortex_failover_reconciliation_total_reservations",
+			Help: "Total number of failover reservations during the last reconciliation",
+		}, azLabel),
+		vmsMissingFailover: prometheus.NewGaugeVec(prometheus.GaugeOpts{
+			Name: "cortex_failover_reconciliation_vms_missing_failover",
+			Help: "Number of VMs missing required failover reservations during the last reconciliation",
+		}, azLabel),
+		vmsProcessed: prometheus.NewCounterVec(prometheus.CounterOpts{
+			Name: "cortex_failover_reconciliation_vms_processed_total",
+			Help: "Total number of VMs processed across all reconciliation cycles since pod restart",
+		}, azLabel),
+		reservationsNeeded: prometheus.NewCounterVec(prometheus.CounterOpts{
+			Name: "cortex_failover_reconciliation_reservations_needed_total",
+			Help: "Total number of reservations needed across all reconciliation cycles since pod restart",
+		}, azLabel),
+		reservationsReused: prometheus.NewCounterVec(prometheus.CounterOpts{
+			Name: "cortex_failover_reconciliation_reservations_reused_total",
+			Help: "Total number of reservations reused across all reconciliation cycles since pod restart",
+		}, azLabel),
+		reservationsCreated: prometheus.NewCounterVec(prometheus.CounterOpts{
+			Name: "cortex_failover_reconciliation_reservations_created_total",
+			Help: "Total number of reservations created across all reconciliation cycles since pod restart",
+		}, azLabel),
+		reservationsFailed: prometheus.NewCounterVec(prometheus.CounterOpts{
+			Name: "cortex_failover_reconciliation_reservations_failed_total",
+			Help: "Total number of failed reservation attempts across all reconciliation cycles since pod restart",
+		}, azLabel),
+		reservationsUpdated: prometheus.NewCounterVec(prometheus.CounterOpts{
+			Name: "cortex_failover_reconciliation_reservations_updated_total",
+			Help: "Total number of reservation allocation updates across all reconciliation cycles since pod restart",
+		}, azLabel),
+		reservationsDeleted: prometheus.NewCounterVec(prometheus.CounterOpts{
+			Name: "cortex_failover_reconciliation_reservations_deleted_total",
+			Help: "Total number of empty reservations deleted across all reconciliation cycles since pod restart",
+		}, azLabel),
+	}
+
+	// Pre-initialize the aggregate label so metrics appear even before the first reconciliation.
+	m.preInitialize("")
+
+	return m
+}
+
+func (m *FailoverMonitor) preInitialize(az string) {
+	m.reconciliationRuns.WithLabelValues(az)
+	m.reconciliationDuration.WithLabelValues(az)
+	m.totalVMs.WithLabelValues(az)
+	m.totalReservations.WithLabelValues(az)
+	m.vmsMissingFailover.WithLabelValues(az)
+	m.vmsProcessed.WithLabelValues(az)
+	m.reservationsNeeded.WithLabelValues(az)
+	m.reservationsReused.WithLabelValues(az)
+	m.reservationsCreated.WithLabelValues(az)
+	m.reservationsFailed.WithLabelValues(az)
+	m.reservationsUpdated.WithLabelValues(az)
+	m.reservationsDeleted.WithLabelValues(az)
+}
+
+// RecordReconciliation records all metrics from a single reconciliation cycle.
+// The availabilityZone parameter allows future per-AZ reporting; pass "" for aggregate.
+func (m *FailoverMonitor) RecordReconciliation(summary reconcileSummary, availabilityZone string) {
+	m.reconciliationRuns.WithLabelValues(availabilityZone).Inc()
+	m.reconciliationDuration.WithLabelValues(availabilityZone).Observe(summary.duration.Seconds())
+	m.totalVMs.WithLabelValues(availabilityZone).Set(float64(summary.totalVMs))
+	m.totalReservations.WithLabelValues(availabilityZone).Set(float64(summary.totalReservations))
+	m.vmsMissingFailover.WithLabelValues(availabilityZone).Set(float64(summary.vmsMissingFailover))
+	m.vmsProcessed.WithLabelValues(availabilityZone).Add(float64(summary.vmsProcessed))
+	m.reservationsNeeded.WithLabelValues(availabilityZone).Add(float64(summary.reservationsNeeded))
+	m.reservationsReused.WithLabelValues(availabilityZone).Add(float64(summary.totalReused))
+	m.reservationsCreated.WithLabelValues(availabilityZone).Add(float64(summary.totalCreated))
+	m.reservationsFailed.WithLabelValues(availabilityZone).Add(float64(summary.totalFailed))
+	m.reservationsUpdated.WithLabelValues(availabilityZone).Add(float64(summary.reservationsUpdated))
+	m.reservationsDeleted.WithLabelValues(availabilityZone).Add(float64(summary.reservationsDeleted))
+}
+
+// Describe implements prometheus.Collector.
+func (m *FailoverMonitor) Describe(ch chan<- *prometheus.Desc) {
+	m.reconciliationRuns.Describe(ch)
+	m.reconciliationDuration.Describe(ch)
+	m.totalVMs.Describe(ch)
+	m.totalReservations.Describe(ch)
+	m.vmsMissingFailover.Describe(ch)
+	m.vmsProcessed.Describe(ch)
+	m.reservationsNeeded.Describe(ch)
+	m.reservationsReused.Describe(ch)
+	m.reservationsCreated.Describe(ch)
+	m.reservationsFailed.Describe(ch)
+	m.reservationsUpdated.Describe(ch)
+	m.reservationsDeleted.Describe(ch)
+}
+
+// Collect implements prometheus.Collector.
+func (m *FailoverMonitor) Collect(ch chan<- prometheus.Metric) {
+	m.reconciliationRuns.Collect(ch)
+	m.reconciliationDuration.Collect(ch)
+	m.totalVMs.Collect(ch)
+	m.totalReservations.Collect(ch)
+	m.vmsMissingFailover.Collect(ch)
+	m.vmsProcessed.Collect(ch)
+	m.reservationsNeeded.Collect(ch)
+	m.reservationsReused.Collect(ch)
+	m.reservationsCreated.Collect(ch)
+	m.reservationsFailed.Collect(ch)
+	m.reservationsUpdated.Collect(ch)
+	m.reservationsDeleted.Collect(ch)
+}
diff --git a/internal/scheduling/reservations/failover/monitor_test.go b/internal/scheduling/reservations/failover/monitor_test.go
new file mode 100644
index 000000000..382cfcaf8
--- /dev/null
+++ b/internal/scheduling/reservations/failover/monitor_test.go
@@ -0,0 +1,290 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package failover
+
+import (
+	"testing"
+	"time"
+
+	"github.com/prometheus/client_golang/prometheus"
+	dto "github.com/prometheus/client_model/go"
+)
+
+func gatherMetricFamilies(t *testing.T, monitor *FailoverMonitor) map[string]*dto.MetricFamily {
+	t.Helper()
+	registry := prometheus.NewRegistry()
+	if err := registry.Register(monitor); err != nil {
+		t.Fatalf("failed to register monitor: %v", err)
+	}
+	families, err := registry.Gather()
+	if err != nil {
+		t.Fatalf("failed to gather metrics: %v", err)
+	}
+	result := make(map[string]*dto.MetricFamily, len(families))
+	for _, f := range families {
+		result[*f.Name] = f
+	}
+	return result
+}
+
+func findMetricWithAZ(family *dto.MetricFamily, az string) *dto.Metric {
+	for _, m := range family.Metric {
+		for _, l := range m.Label {
+			if *l.Name == "availability_zone" && *l.Value == az {
+				return m
+			}
+		}
+	}
+	return nil
+}
+
+func TestFailoverMonitor_MetricsRegistration(t *testing.T) {
+	monitor := NewFailoverMonitor()
+
+	// Record a reconciliation so all metrics have values
+	monitor.RecordReconciliation(reconcileSummary{
+		duration:            500 * time.Millisecond,
+		totalVMs:            100,
+		totalReservations:   50,
+		vmsMissingFailover:  5,
+		vmsProcessed:        10,
+		reservationsNeeded:  8,
+		totalReused:         3,
+		totalCreated:        4,
+		totalFailed:         1,
+		reservationsUpdated: 2,
+		reservationsDeleted: 1,
+	}, "")
+
+	families := gatherMetricFamilies(t, monitor)
+
+	expectedMetrics := map[string]dto.MetricType{
+		"cortex_failover_reconciliation_runs_total":                 dto.MetricType_COUNTER,
+		"cortex_failover_reconciliation_duration_seconds":           dto.MetricType_HISTOGRAM,
+		"cortex_failover_reconciliation_total_vms":                  dto.MetricType_GAUGE,
+		"cortex_failover_reconciliation_total_reservations":         dto.MetricType_GAUGE,
+		"cortex_failover_reconciliation_vms_missing_failover":       dto.MetricType_GAUGE,
+		"cortex_failover_reconciliation_vms_processed_total":        dto.MetricType_COUNTER,
+		"cortex_failover_reconciliation_reservations_needed_total":  dto.MetricType_COUNTER,
+		"cortex_failover_reconciliation_reservations_reused_total":  dto.MetricType_COUNTER,
+		"cortex_failover_reconciliation_reservations_created_total": dto.MetricType_COUNTER,
+		"cortex_failover_reconciliation_reservations_failed_total":  dto.MetricType_COUNTER,
+		"cortex_failover_reconciliation_reservations_updated_total": dto.MetricType_COUNTER,
+		"cortex_failover_reconciliation_reservations_deleted_total": dto.MetricType_COUNTER,
+	}
+
+	for name, expectedType := range expectedMetrics {
+		family, ok := families[name]
+		if !ok {
+			t.Errorf("metric %q not found in registry", name)
+			continue
+		}
+		if *family.Type != expectedType {
+			t.Errorf("metric %q: expected type %v, got %v", name, expectedType, *family.Type)
+		}
+	}
+}
+
+func TestFailoverMonitor_RecordReconciliation(t *testing.T) {
+	tests := []struct {
+		name             string
+		summaries        []reconcileSummary
+		azs              []string
+		wantRuns         float64
+		wantGaugeVMs     float64
+		wantGaugeRes     float64
+		wantGaugeMissing float64
+		wantProcessed    float64
+		wantCreated      float64
+		wantFailed       float64
+		wantReused       float64
+		wantNeeded       float64
+		wantUpdated      float64
+		wantDeleted      float64
+		checkAZ          string
+	}{
+		{
+			name: "single reconciliation",
+			summaries: []reconcileSummary{{
+				duration:            500 * time.Millisecond,
+				totalVMs:            100,
+				totalReservations:   50,
+				vmsMissingFailover:  5,
+				vmsProcessed:        10,
+				reservationsNeeded:  8,
+				totalReused:         3,
+				totalCreated:        4,
+				totalFailed:         1,
+				reservationsUpdated: 2,
+				reservationsDeleted: 1,
+			}},
+			azs:              []string{""},
+			wantRuns:         1,
+			wantGaugeVMs:     100,
+			wantGaugeRes:     50,
+			wantGaugeMissing: 5,
+			wantProcessed:    10,
+			wantCreated:      4,
+			wantFailed:       1,
+			wantReused:       3,
+			wantNeeded:       8,
+			wantUpdated:      2,
+			wantDeleted:      1,
+			checkAZ:          "",
+		},
+		{
+			name: "counters accumulate across runs",
+			summaries: []reconcileSummary{
+				{duration: 100 * time.Millisecond, totalVMs: 100, totalReservations: 50, vmsProcessed: 10, totalCreated: 4, totalFailed: 1, totalReused: 3, reservationsNeeded: 8, reservationsUpdated: 2, reservationsDeleted: 1, vmsMissingFailover: 5},
+				{duration: 200 * time.Millisecond, totalVMs: 95, totalReservations: 52, vmsProcessed: 5, totalCreated: 2, totalFailed: 0, totalReused: 1, reservationsNeeded: 3, reservationsUpdated: 1, reservationsDeleted: 0, vmsMissingFailover: 3},
+			},
+			azs:              []string{"", ""},
+			wantRuns:         2,
+			wantGaugeVMs:     95,
+			wantGaugeRes:     52,
+			wantGaugeMissing: 3,
+			wantProcessed:    15,
+			wantCreated:      6,
+			wantFailed:       1,
+			wantReused:       4,
+			wantNeeded:       11,
+			wantUpdated:      3,
+			wantDeleted:      1,
+			checkAZ:          "",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			monitor := NewFailoverMonitor()
+
+			for i, s := range tt.summaries {
+				monitor.RecordReconciliation(s, tt.azs[i])
+			}
+
+			families := gatherMetricFamilies(t, monitor)
+
+			assertCounter(t, families, "cortex_failover_reconciliation_runs_total", tt.checkAZ, tt.wantRuns)
+			assertGauge(t, families, "cortex_failover_reconciliation_total_vms", tt.checkAZ, tt.wantGaugeVMs)
+			assertGauge(t, families, "cortex_failover_reconciliation_total_reservations", tt.checkAZ, tt.wantGaugeRes)
+			assertGauge(t, families, "cortex_failover_reconciliation_vms_missing_failover", tt.checkAZ, tt.wantGaugeMissing)
+			assertCounter(t, families, "cortex_failover_reconciliation_vms_processed_total", tt.checkAZ, tt.wantProcessed)
+			assertCounter(t, families, "cortex_failover_reconciliation_reservations_needed_total", tt.checkAZ, tt.wantNeeded)
+			assertCounter(t, families, "cortex_failover_reconciliation_reservations_reused_total", tt.checkAZ, tt.wantReused)
+			assertCounter(t, families, "cortex_failover_reconciliation_reservations_created_total", tt.checkAZ, tt.wantCreated)
+			assertCounter(t, families, "cortex_failover_reconciliation_reservations_failed_total", tt.checkAZ, tt.wantFailed)
+			assertCounter(t, families, "cortex_failover_reconciliation_reservations_updated_total", tt.checkAZ, tt.wantUpdated)
+			assertCounter(t, families, "cortex_failover_reconciliation_reservations_deleted_total", tt.checkAZ, tt.wantDeleted)
+
+			// Verify histogram recorded observations
+			histFamily := families["cortex_failover_reconciliation_duration_seconds"]
+			if histFamily == nil {
+				t.Fatal("duration histogram not found")
+			}
+			m := findMetricWithAZ(histFamily, tt.checkAZ)
+			if m == nil || m.Histogram == nil {
+				t.Fatal("duration histogram metric not found for AZ")
+			}
+			if got := m.Histogram.GetSampleCount(); got != uint64(len(tt.summaries)) {
+				t.Errorf("duration histogram sample count: got %d, want %d", got, len(tt.summaries))
+			}
+		})
+	}
+}
+
+func TestFailoverMonitor_AvailabilityZoneLabel(t *testing.T) {
+	monitor := NewFailoverMonitor()
+
+	monitor.RecordReconciliation(reconcileSummary{
+		duration:          100 * time.Millisecond,
+		totalVMs:          50,
+		totalReservations: 20,
+		vmsProcessed:      10,
+		totalCreated:      5,
+	}, "eu-de-1a")
+
+	monitor.RecordReconciliation(reconcileSummary{
+		duration:          200 * time.Millisecond,
+		totalVMs:          40,
+		totalReservations: 15,
+		vmsProcessed:      8,
+		totalCreated:      3,
+	}, "eu-de-1b")
+
+	families := gatherMetricFamilies(t, monitor)
+
+	assertCounter(t, families, "cortex_failover_reconciliation_runs_total", "eu-de-1a", 1)
+	assertCounter(t, families, "cortex_failover_reconciliation_runs_total", "eu-de-1b", 1)
+	assertGauge(t, families, "cortex_failover_reconciliation_total_vms", "eu-de-1a", 50)
+	assertGauge(t, families, "cortex_failover_reconciliation_total_vms", "eu-de-1b", 40)
+	assertCounter(t, families, "cortex_failover_reconciliation_reservations_created_total", "eu-de-1a", 5)
+	assertCounter(t, families, "cortex_failover_reconciliation_reservations_created_total", "eu-de-1b", 3)
+}
+
+func TestFailoverMonitor_PreInitialization(t *testing.T) {
+	monitor := NewFailoverMonitor()
+
+	// Without recording anything, all metrics should still be present with the aggregate label
+	families := gatherMetricFamilies(t, monitor)
+
+	expectedMetrics := []string{
+		"cortex_failover_reconciliation_runs_total",
+		"cortex_failover_reconciliation_duration_seconds",
+		"cortex_failover_reconciliation_total_vms",
+		"cortex_failover_reconciliation_total_reservations",
+		"cortex_failover_reconciliation_vms_missing_failover",
+		"cortex_failover_reconciliation_vms_processed_total",
+		"cortex_failover_reconciliation_reservations_needed_total",
+		"cortex_failover_reconciliation_reservations_reused_total",
+		"cortex_failover_reconciliation_reservations_created_total",
+		"cortex_failover_reconciliation_reservations_failed_total",
+		"cortex_failover_reconciliation_reservations_updated_total",
+		"cortex_failover_reconciliation_reservations_deleted_total",
+	}
+
+	for _, name := range expectedMetrics {
+		family, ok := families[name]
+		if !ok {
+			t.Errorf("metric %q not found after pre-initialization (no reconciliation recorded)", name)
+			continue
+		}
+		if m := findMetricWithAZ(family, ""); m == nil {
+			t.Errorf("metric %q missing aggregate label (availability_zone=\"\")", name)
+		}
+	}
+}
+
+func assertCounter(t *testing.T, families map[string]*dto.MetricFamily, name, az string, expected float64) {
+	t.Helper()
+	family, ok := families[name]
+	if !ok {
+		t.Errorf("counter %q not found", name)
+		return
+	}
+	m := findMetricWithAZ(family, az)
+	if m == nil || m.Counter == nil {
+		t.Errorf("counter %q with az=%q not found", name, az)
+		return
+	}
+	if got := m.Counter.GetValue(); got != expected {
+		t.Errorf("counter %q: got %v, want %v", name, got, expected)
+	}
+}
+
+func assertGauge(t *testing.T, families map[string]*dto.MetricFamily, name, az string, expected float64) {
+	t.Helper()
+	family, ok := families[name]
+	if !ok {
+		t.Errorf("gauge %q not found", name)
+		return
+	}
+	m := findMetricWithAZ(family, az)
+	if m == nil || m.Gauge == nil {
+		t.Errorf("gauge %q with az=%q not found", name, az)
+		return
+	}
+	if got := m.Gauge.GetValue(); got != expected {
+		t.Errorf("gauge %q: got %v, want %v", name, got, expected)
+	}
+}