From 56df52d4316899f29c69b46e92f8f09034144b43 Mon Sep 17 00:00:00 2001 From: Alice Frosi Date: Thu, 11 Jun 2026 13:26:56 +0000 Subject: [PATCH 1/7] daemon: add Switch and Reboot methods to bootc executor Assisted-by: Claude Opus 4.6 (1M context) Signed-off-by: Alice Frosi --- internal/bootc/executor.go | 51 ++++++++++++++++++++++++++++-------- internal/daemon/fake_test.go | 8 ++++++ 2 files changed, 48 insertions(+), 11 deletions(-) diff --git a/internal/bootc/executor.go b/internal/bootc/executor.go index 013d0ce..2a0763f 100644 --- a/internal/bootc/executor.go +++ b/internal/bootc/executor.go @@ -6,6 +6,9 @@ import ( "context" "fmt" "os/exec" + "strings" + + logf "sigs.k8s.io/controller-runtime/pkg/log" ) // Executor abstracts the execution of bootc commands on the host. @@ -13,6 +16,8 @@ import ( // PID namespaces. Tests can provide a fake implementation. type Executor interface { Status(ctx context.Context) ([]byte, error) + Switch(ctx context.Context, image string) error + Reboot(ctx context.Context) error } // HostExecutor runs bootc commands on the host via nsenter. @@ -23,21 +28,45 @@ func NewHostExecutor() *HostExecutor { return &HostExecutor{} } -func (e *HostExecutor) Status(ctx context.Context) ([]byte, error) { - cmd := exec.CommandContext(ctx, - "nsenter", +func (e *HostExecutor) nsenterCmd(ctx context.Context, args ...string) *exec.Cmd { + base := []string{ "--target", "1", - "--mount", - "--pid", - "--setuid", "0", - "--setgid", "0", - "--env", - "--", - "bootc", "status", "--json", "--format-version", "1", - ) + "--mount", "--pid", + "--setuid", "0", "--setgid", "0", + "--env", "--", + } + return exec.CommandContext(ctx, "nsenter", append(base, args...)...) +} + +func (e *HostExecutor) Status(ctx context.Context) ([]byte, error) { + cmd := e.nsenterCmd(ctx, "bootc", "status", "--json", "--format-version", "1") out, err := cmd.Output() if err != nil { return nil, fmt.Errorf("running bootc status: %w", err) } return out, nil } + +func (e *HostExecutor) Switch(ctx context.Context, image string) error { + log := logf.FromContext(ctx) + + cmd := e.nsenterCmd(ctx, "bootc", "switch", image) + log.Info("Executing", "cmd", strings.Join(cmd.Args, " ")) + out, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("running bootc switch: %s: %w", out, err) + } + return nil +} + +func (e *HostExecutor) Reboot(ctx context.Context) error { + log := logf.FromContext(ctx) + + cmd := e.nsenterCmd(ctx, "systemctl", "reboot") + log.Info("Executing", "cmd", strings.Join(cmd.Args, " ")) + out, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("running systemctl reboot: %s: %w", out, err) + } + return nil +} diff --git a/internal/daemon/fake_test.go b/internal/daemon/fake_test.go index b6ad398..3d1339b 100644 --- a/internal/daemon/fake_test.go +++ b/internal/daemon/fake_test.go @@ -19,6 +19,14 @@ func (f *fakeExecutor) Status(_ context.Context) ([]byte, error) { return f.data, f.err } +func (f *fakeExecutor) Switch(_ context.Context, _ string) error { + return nil +} + +func (f *fakeExecutor) Upgrade(_ context.Context) error { + return nil +} + func (f *fakeExecutor) set(data []byte, err error) { f.mu.Lock() defer f.mu.Unlock() From 2d53b0e3c2a074685c55887723abb0f1b8a7be6d Mon Sep 17 00:00:00 2001 From: Alice Frosi Date: Thu, 11 Jun 2026 14:13:20 +0000 Subject: [PATCH 2/7] daemon: implement state machine in BootcNode reconciler Rewrite the reconciler to detect image mismatches between spec.desiredImage and the booted image, stage via bootc switch in a background goroutine. Once, it finished to staged the image, the termination of the goroutine triggers once more the reconciliation loop which will detect that the system requires a reboot. The reconciliation function ensures that the bootc node transitions from Staging to Staged, and then to Rebooting. Assisted-by: Claude Opus 4.6 (1M context) Signed-off-by: Alice Frosi --- internal/daemon/reconciler.go | 234 ++++++++++++++++++++++++++++++---- 1 file changed, 210 insertions(+), 24 deletions(-) diff --git a/internal/daemon/reconciler.go b/internal/daemon/reconciler.go index 084a329..32f4306 100644 --- a/internal/daemon/reconciler.go +++ b/internal/daemon/reconciler.go @@ -5,32 +5,54 @@ package daemon import ( "context" "fmt" + "reflect" + "strings" + "sync" + "time" + "github.com/go-logr/logr" apierrors "k8s.io/apimachinery/pkg/api/errors" apimeta "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/handler" logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/source" bootcv1alpha1 "github.com/jlebon/bootc-operator/api/v1alpha1" "github.com/jlebon/bootc-operator/internal/bootc" ) +// switchOp tracks the state of an in-flight bootc switch operation. +type switchOp struct { + mu sync.Mutex + image string + cancel context.CancelFunc + err error +} + // BootcNodeReconciler reconciles the BootcNode for the node this daemon -// runs on. It reads bootc status from the host and writes it into the -// BootcNode's status subresource. +// runs on. It reads bootc status from the host, detects image mismatches, +// and drives updates via bootc switch. type BootcNodeReconciler struct { client.Client Scheme *runtime.Scheme NodeName string Executor bootc.Executor + + inflight switchOp + switchDone chan event.GenericEvent } func (r *BootcNodeReconciler) SetupWithManager(mgr ctrl.Manager) error { + r.switchDone = make(chan event.GenericEvent, 1) + return ctrl.NewControllerManagedBy(mgr). For(&bootcv1alpha1.BootcNode{}). + WatchesRawSource(source.Channel(r.switchDone, &handler.EnqueueRequestForObject{})). Named("bootcnode"). Complete(r) } @@ -51,23 +73,27 @@ func (r *BootcNodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( return ctrl.Result{}, fmt.Errorf("fetching BootcNode: %w", err) } - patch := client.MergeFrom(bn.DeepCopy()) + specChanged := bn.Generation > bn.Status.ObservedGeneration + orig := bn.DeepCopy() + patch := client.MergeFrom(orig) + bn.Status.ObservedGeneration = bn.Generation - if err := r.populateStatus(ctx, &bn); err != nil { - log.Error(err, "Failed to populate bootc status") - } + result, reconcileErr := r.reconcileBootcNode(ctx, &bn, specChanged) - if err := r.Status().Patch(ctx, &bn, patch); err != nil { - return ctrl.Result{}, fmt.Errorf("patching BootcNode status: %w", err) + if !reflect.DeepEqual(bn.Status, orig.Status) { + if patchErr := r.Status().Patch(ctx, &bn, patch); patchErr != nil { + return ctrl.Result{}, fmt.Errorf("patching BootcNode status: %w", patchErr) + } } - log.Info("Patched BootcNode status from bootc") - return ctrl.Result{}, nil + return result, reconcileErr } -func (r *BootcNodeReconciler) populateStatus(ctx context.Context, bn *bootcv1alpha1.BootcNode) error { - data, err := r.Executor.Status(ctx) - if err != nil { +func (r *BootcNodeReconciler) reconcileBootcNode(ctx context.Context, bn *bootcv1alpha1.BootcNode, specChanged bool) (ctrl.Result, error) { + log := logf.FromContext(ctx).WithValues("node", r.NodeName) + + if err := r.populateBootcFields(ctx, bn); err != nil { + log.Error(err, "Failed to populate bootc status") apimeta.SetStatusCondition(&bn.Status.Conditions, metav1.Condition{ Type: bootcv1alpha1.NodeDegraded, Status: metav1.ConditionTrue, @@ -75,30 +101,100 @@ func (r *BootcNodeReconciler) populateStatus(ctx context.Context, bn *bootcv1alp Message: fmt.Sprintf("failed to get bootc status: %v", err), ObservedGeneration: bn.Generation, }) - return fmt.Errorf("getting bootc status: %w", err) + return ctrl.Result{}, fmt.Errorf("populating bootc fields: %w", err) } - status, err := bootc.ParseStatus(data) - if err != nil { + if bn.Status.Booted == nil { apimeta.SetStatusCondition(&bn.Status.Conditions, metav1.Condition{ Type: bootcv1alpha1.NodeDegraded, Status: metav1.ConditionTrue, Reason: bootcv1alpha1.NodeReasonError, - Message: fmt.Sprintf("failed to parse bootc status: %v", err), + Message: "bootc status has no booted entry", ObservedGeneration: bn.Generation, }) - return fmt.Errorf("parsing bootc status: %w", err) + return ctrl.Result{}, fmt.Errorf("bootc status has no booted entry") } - bn.Status.ObservedGeneration = bn.Generation - bn.Status.Booted = convertBootEntry(status.Status.Booted) - bn.Status.Staged = convertBootEntry(status.Status.Staged) - bn.Status.Rollback = convertBootEntry(status.Status.Rollback) + // Node is idle + if !imageNeedsUpdate(bn.Spec.DesiredImage, bn.Status.Booted.ImageDigest) { + apimeta.SetStatusCondition(&bn.Status.Conditions, metav1.Condition{ + Type: bootcv1alpha1.NodeIdle, + Status: metav1.ConditionTrue, + Reason: bootcv1alpha1.NodeReasonIdle, + ObservedGeneration: bn.Generation, + }) + apimeta.SetStatusCondition(&bn.Status.Conditions, metav1.Condition{ + Type: bootcv1alpha1.NodeDegraded, + Status: metav1.ConditionFalse, + Reason: bootcv1alpha1.NodeReasonHealthy, + ObservedGeneration: bn.Generation, + }) + return ctrl.Result{}, nil + } + + switchErr := r.inflight.takeErr() + + if switchErr != nil { + apimeta.SetStatusCondition(&bn.Status.Conditions, metav1.Condition{ + Type: bootcv1alpha1.NodeIdle, + Status: metav1.ConditionTrue, + Reason: bootcv1alpha1.NodeReasonIdle, + ObservedGeneration: bn.Generation, + }) + apimeta.SetStatusCondition(&bn.Status.Conditions, metav1.Condition{ + Type: bootcv1alpha1.NodeDegraded, + Status: metav1.ConditionTrue, + Reason: bootcv1alpha1.NodeReasonError, + Message: fmt.Sprintf("bootc switch failed: %v", switchErr), + ObservedGeneration: bn.Generation, + }) + // Requeue with a delay to retry transient failures (e.g. network + // blips, registry timeouts) without hammering the registry. + return ctrl.Result{RequeueAfter: 30 * time.Second}, nil + } + + reboot := bn.Spec.DesiredImageState == bootcv1alpha1.DesiredImageStateBooted + desiredImage := bn.Spec.DesiredImage + + if skip := r.inflight.acquire(log, desiredImage); skip { + return ctrl.Result{}, nil + } + + _, desiredDigest, _ := strings.Cut(desiredImage, "@") + alreadyStaged := bn.Status.Staged != nil && bn.Status.Staged.ImageDigest == desiredDigest + + idleCond := apimeta.FindStatusCondition(bn.Status.Conditions, bootcv1alpha1.NodeIdle) + + // We always transition through Staged before Rebooting. Without this, + // a fast controller can drain and set DesiredImageState=Booted before + // we reconcile after staging completes, causing us to skip Staged. + var reason string + switch { + case !alreadyStaged: + reason = bootcv1alpha1.NodeReasonStaging + switchCtx, cancel := context.WithCancel(context.Background()) + r.inflight.start(desiredImage, cancel) + log.Info("Starting staging", "image", desiredImage) + go r.inflight.run(switchCtx, r.NodeName, desiredImage, r.Executor, r.switchDone) + + case reboot && idleCond != nil && (idleCond.Reason == bootcv1alpha1.NodeReasonStaged || idleCond.Reason == bootcv1alpha1.NodeReasonRebooting): + reason = bootcv1alpha1.NodeReasonRebooting + if idleCond.Reason == bootcv1alpha1.NodeReasonStaged { + log.Info("Starting reboot", "image", desiredImage) + if err := r.Executor.Reboot(ctx); err != nil { + return ctrl.Result{}, fmt.Errorf("reboot: %w", err) + } + } + + default: + reason = bootcv1alpha1.NodeReasonStaged + log.Info("Image staged", "image", desiredImage) + } apimeta.SetStatusCondition(&bn.Status.Conditions, metav1.Condition{ Type: bootcv1alpha1.NodeIdle, - Status: metav1.ConditionTrue, - Reason: bootcv1alpha1.NodeReasonIdle, + Status: metav1.ConditionFalse, + Reason: reason, ObservedGeneration: bn.Generation, }) apimeta.SetStatusCondition(&bn.Status.Conditions, metav1.Condition{ @@ -108,9 +204,99 @@ func (r *BootcNodeReconciler) populateStatus(ctx context.Context, bn *bootcv1alp ObservedGeneration: bn.Generation, }) + return ctrl.Result{}, nil +} + +func (s *switchOp) takeErr() error { + s.mu.Lock() + defer s.mu.Unlock() + err := s.err + s.err = nil + return err +} + +// acquire checks whether a switch operation is already in flight. +// It returns true if the reconciler should skip (operation in progress), +// or false after cancelling any stale in-flight switch. +func (s *switchOp) acquire(log logr.Logger, image string) bool { + s.mu.Lock() + defer s.mu.Unlock() + + if s.image == image { + log.Info("Switch already in progress for this image", "image", image) + return true + } + if s.cancel != nil { + log.Info("Cancelling in-flight switch", "old", s.image, "new", image) + s.cancel() + s.image = "" + s.cancel = nil + } + return false +} + +func (s *switchOp) start(image string, cancel context.CancelFunc) { + s.mu.Lock() + defer s.mu.Unlock() + s.image = image + s.cancel = cancel + s.err = nil +} + +func (s *switchOp) run(ctx context.Context, nodeName, image string, executor bootc.Executor, done chan<- event.GenericEvent) { + log := logf.FromContext(context.Background()).WithValues("node", nodeName, "image", image) + + err := executor.Switch(ctx, image) + + s.mu.Lock() + if ctx.Err() != nil { + log.Info("Switch cancelled") + } else if err != nil { + log.Error(err, "Switch failed") + s.err = err + } + s.image = "" + s.cancel = nil + s.mu.Unlock() + + if ctx.Err() == nil { + done <- event.GenericEvent{ + Object: &bootcv1alpha1.BootcNode{ + ObjectMeta: metav1.ObjectMeta{Name: nodeName}, + }, + } + } +} + +func (r *BootcNodeReconciler) populateBootcFields(ctx context.Context, bn *bootcv1alpha1.BootcNode) error { + data, err := r.Executor.Status(ctx) + if err != nil { + return fmt.Errorf("getting bootc status: %w", err) + } + + status, err := bootc.ParseStatus(data) + if err != nil { + return fmt.Errorf("failed to parse bootc status: %w", err) + } + + bn.Status.Booted = convertBootEntry(status.Status.Booted) + bn.Status.Staged = convertBootEntry(status.Status.Staged) + bn.Status.Rollback = convertBootEntry(status.Status.Rollback) + return nil } +// imageNeedsUpdate compares only the digest portion of desiredImage against +// bootedDigest. It assumes upgrades always come from the same image repository. +// TODO: also compare the image repository to detect cross-image switches. +func imageNeedsUpdate(desiredImage, bootedDigest string) bool { + _, digest, ok := strings.Cut(desiredImage, "@") + if !ok { + return true + } + return digest != bootedDigest +} + func convertBootEntry(entry *bootc.BootEntry) *bootcv1alpha1.ImageInfo { if entry == nil || entry.Image == nil { return nil From a8cb12cc91d4d71f96c18a4c50dacf127f4bcded Mon Sep 17 00:00:00 2001 From: Alice Frosi Date: Thu, 11 Jun 2026 14:13:28 +0000 Subject: [PATCH 3/7] daemon: refactor fakeExecutor to model bootc state Replace raw JSON bytes with a bootc.Status struct in the test fake. Status() serializes the struct via json.Marshal, and Switch() auto-mutates the status (staging sets Staged). Upgrade() records the call for test assertions. Add newBootcStatus() and newBootEntry() helpers to build test state without verbose JSON constants. Assisted-by: Claude Opus 4.6 (1M context) Signed-off-by: Alice Frosi --- internal/daemon/fake_test.go | 110 ++++++++++++++++++++++++++--- internal/daemon/reconciler_test.go | 106 ++++++++------------------- 2 files changed, 129 insertions(+), 87 deletions(-) diff --git a/internal/daemon/fake_test.go b/internal/daemon/fake_test.go index 3d1339b..d9ea2da 100644 --- a/internal/daemon/fake_test.go +++ b/internal/daemon/fake_test.go @@ -4,32 +4,122 @@ package daemon import ( "context" + "encoding/json" + "strings" "sync" + + testutil "github.com/jlebon/bootc-operator/test/util" + + "github.com/jlebon/bootc-operator/internal/bootc" ) type fakeExecutor struct { - mu sync.Mutex - data []byte - err error + mu sync.Mutex + status bootc.Status + statusErr error + + switchErr error + switchImg string + switchApply bool + switchHook func() } func (f *fakeExecutor) Status(_ context.Context) ([]byte, error) { f.mu.Lock() defer f.mu.Unlock() - return f.data, f.err + if f.statusErr != nil { + return nil, f.statusErr + } + data, err := json.Marshal(f.status) + if err != nil { + return nil, err + } + return data, nil } -func (f *fakeExecutor) Switch(_ context.Context, _ string) error { +func (f *fakeExecutor) Switch(_ context.Context, image string, apply bool) error { + f.mu.Lock() + f.switchImg = image + f.switchApply = apply + hook := f.switchHook + err := f.switchErr + f.mu.Unlock() + + if hook != nil { + hook() + } + if err != nil { + return err + } + + f.mu.Lock() + defer f.mu.Unlock() + _, digest, _ := strings.Cut(image, "@") + f.status.Status.Staged = newBootEntry(image, digest) return nil } -func (f *fakeExecutor) Upgrade(_ context.Context) error { - return nil +func (f *fakeExecutor) setStatusErr(err error) { + f.mu.Lock() + defer f.mu.Unlock() + f.statusErr = err +} + +func (f *fakeExecutor) setSwitchErr(err error) { + f.mu.Lock() + defer f.mu.Unlock() + f.switchErr = err } -func (f *fakeExecutor) set(data []byte, err error) { +func (f *fakeExecutor) setSwitchHook(hook func()) { f.mu.Lock() defer f.mu.Unlock() - f.data = data - f.err = err + f.switchHook = hook +} + +func (f *fakeExecutor) getSwitchImg() string { + f.mu.Lock() + defer f.mu.Unlock() + return f.switchImg +} + +func (f *fakeExecutor) getSwitchApply() bool { + f.mu.Lock() + defer f.mu.Unlock() + return f.switchApply +} + +func (f *fakeExecutor) reset() { + f.mu.Lock() + defer f.mu.Unlock() + f.status = bootc.Status{} + f.statusErr = nil + f.switchErr = nil + f.switchImg = "" + f.switchHook = nil + f.switchApply = false +} + +func newBootEntry(image, digest string) *bootc.BootEntry { + return &bootc.BootEntry{ + Image: &bootc.ImageStatus{ + Image: bootc.ImageReference{Image: image, Transport: "registry"}, + ImageDigest: digest, + Architecture: "amd64", + }, + } +} + +func newBootcStatus(bootedDigest string) bootc.Status { + return bootc.Status{ + APIVersion: "org.containers.bootc/v1alpha1", + Kind: "BootcHost", + Spec: bootc.StatusSpec{ + Image: &bootc.ImageReference{Image: testutil.ImageTaggedRef, Transport: "registry"}, + BootOrder: "default", + }, + Status: bootc.StatusBody{ + Booted: newBootEntry(testutil.ImageTaggedRef, bootedDigest), + }, + } } diff --git a/internal/daemon/reconciler_test.go b/internal/daemon/reconciler_test.go index e1fdc82..41984e5 100644 --- a/internal/daemon/reconciler_test.go +++ b/internal/daemon/reconciler_test.go @@ -4,6 +4,7 @@ package daemon import ( "context" + "errors" "fmt" "testing" "time" @@ -13,6 +14,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" bootcv1alpha1 "github.com/jlebon/bootc-operator/api/v1alpha1" + "github.com/jlebon/bootc-operator/internal/bootc" testutil "github.com/jlebon/bootc-operator/test/util" ) @@ -20,55 +22,9 @@ const ( pollInterval = 200 * time.Millisecond pollTimeout = 10 * time.Second - testImageRef = testutil.ImageDigestRefA + bootcStatusErrMsg = "bootc status failed" - bootcStatusFull = `{ - "apiVersion": "org.containers.bootc/v1alpha1", - "kind": "BootcHost", - "spec": { - "image": {"image": "quay.io/example/myos:latest", "transport": "registry"}, - "bootOrder": "default" - }, - "status": { - "booted": { - "image": { - "image": {"image": "quay.io/example/myos:latest", "transport": "registry"}, - "imageDigest": "` + testutil.DigestA + `", - "version": "1.0", - "architecture": "amd64" - }, - "incompatible": false, - "pinned": false, - "softRebootCapable": false, - "downloadOnly": false - }, - "staged": { - "image": { - "image": {"image": "quay.io/example/myos:latest", "transport": "registry"}, - "imageDigest": "` + testutil.DigestB + `", - "version": "2.0", - "architecture": "amd64" - }, - "incompatible": false, - "pinned": false, - "softRebootCapable": true, - "downloadOnly": false - }, - "rollback": { - "image": { - "image": {"image": "quay.io/example/myos:latest", "transport": "registry"}, - "imageDigest": "` + testutil.DigestC + `", - "version": "0.9", - "architecture": "amd64" - }, - "incompatible": false, - "pinned": false, - "softRebootCapable": false, - "downloadOnly": false - }, - "rollbackQueued": false - } -}` + testImageRef = testutil.ImageDigestRefA ) func TestReconcilePopulatesStatus(t *testing.T) { @@ -77,7 +33,28 @@ func TestReconcilePopulatesStatus(t *testing.T) { g.SetDefaultEventuallyPollingInterval(pollInterval) ctx := context.Background() - fake.set([]byte(bootcStatusFull), nil) + v1 := "1.0" + v2 := "2.0" + v3 := "0.9" + fake.status = newBootcStatus(testutil.DigestA) + fake.status.Status.Booted.Image.Version = &v1 + fake.status.Status.Staged = &bootc.BootEntry{ + Image: &bootc.ImageStatus{ + Image: bootc.ImageReference{Image: testutil.ImageTaggedRef, Transport: "registry"}, + ImageDigest: testutil.DigestB, + Version: &v2, + Architecture: "amd64", + }, + SoftRebootCapable: true, + } + fake.status.Status.Rollback = &bootc.BootEntry{ + Image: &bootc.ImageStatus{ + Image: bootc.ImageReference{Image: testutil.ImageTaggedRef, Transport: "registry"}, + ImageDigest: testutil.DigestC, + Version: &v3, + Architecture: "amd64", + }, + } bn := testutil.NewNode(testNodeName, testImageRef) g.Expect(k8sClient.Create(ctx, bn)).To(Succeed()) @@ -123,33 +100,8 @@ func TestReconcileBootcStatusError(t *testing.T) { g.SetDefaultEventuallyPollingInterval(pollInterval) ctx := context.Background() - fake.set(nil, fmt.Errorf("bootc status failed")) - - bn := testutil.NewNode(testNodeName, testImageRef) - g.Expect(k8sClient.Create(ctx, bn)).To(Succeed()) - t.Cleanup(func() { - _ = k8sClient.Delete(ctx, bn) - }) - - g.Eventually(func(g Gomega) { - var got bootcv1alpha1.BootcNode - g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(bn), &got)).To(Succeed()) - g.Expect(got.Status.Conditions).To(ContainElement(And( - HaveField("Type", bootcv1alpha1.NodeDegraded), - HaveField("Status", metav1.ConditionTrue), - HaveField("Reason", bootcv1alpha1.NodeReasonError), - HaveField("Message", ContainSubstring("bootc status")), - ))) - }).Should(Succeed()) -} - -func TestReconcileInvalidJSON(t *testing.T) { - g := NewWithT(t) - g.SetDefaultEventuallyTimeout(pollTimeout) - g.SetDefaultEventuallyPollingInterval(pollInterval) - ctx := context.Background() - - fake.set([]byte(`{invalid json`), nil) + fake.reset() + fake.setStatusErr(errors.New(bootcStatusErrMsg)) bn := testutil.NewNode(testNodeName, testImageRef) g.Expect(k8sClient.Create(ctx, bn)).To(Succeed()) @@ -164,7 +116,7 @@ func TestReconcileInvalidJSON(t *testing.T) { HaveField("Type", bootcv1alpha1.NodeDegraded), HaveField("Status", metav1.ConditionTrue), HaveField("Reason", bootcv1alpha1.NodeReasonError), - HaveField("Message", ContainSubstring("parse")), + HaveField("Message", Equal(fmt.Sprintf("failed to get bootc status: getting bootc status: %s", bootcStatusErrMsg))), ))) }).Should(Succeed()) } From f5b5a6740de6a95ce9dfc299b039ebb48171d7b6 Mon Sep 17 00:00:00 2001 From: Alice Frosi Date: Thu, 11 Jun 2026 14:13:37 +0000 Subject: [PATCH 4/7] daemon: add state machine unit tests Add envtest cases for the daemon reconciler state machine: - TestStagingTriggered: image mismatch triggers bootc switch - TestStagingError: switch failure sets Degraded condition - TestAlreadyStaged: skip switch when image already staged - TestRebootingSet: reboot triggered when desiredImageState is Booted - TestRollback: restage when desired image changes - TestCancelInflightSwitch: spec change cancels in-flight switch Assisted-by: Claude Opus 4.6 (1M context) Signed-off-by: Alice Frosi --- internal/daemon/fake_test.go | 24 ++-- internal/daemon/reconciler_test.go | 203 ++++++++++++++++++++++++++++- test/util/builders.go | 7 + 3 files changed, 224 insertions(+), 10 deletions(-) diff --git a/internal/daemon/fake_test.go b/internal/daemon/fake_test.go index d9ea2da..7bfc293 100644 --- a/internal/daemon/fake_test.go +++ b/internal/daemon/fake_test.go @@ -18,10 +18,10 @@ type fakeExecutor struct { status bootc.Status statusErr error - switchErr error - switchImg string - switchApply bool - switchHook func() + switchErr error + switchImg string + switchHook func() + rebooted bool } func (f *fakeExecutor) Status(_ context.Context) ([]byte, error) { @@ -37,10 +37,9 @@ func (f *fakeExecutor) Status(_ context.Context) ([]byte, error) { return data, nil } -func (f *fakeExecutor) Switch(_ context.Context, image string, apply bool) error { +func (f *fakeExecutor) Switch(_ context.Context, image string) error { f.mu.Lock() f.switchImg = image - f.switchApply = apply hook := f.switchHook err := f.switchErr f.mu.Unlock() @@ -59,6 +58,13 @@ func (f *fakeExecutor) Switch(_ context.Context, image string, apply bool) error return nil } +func (f *fakeExecutor) Reboot(_ context.Context) error { + f.mu.Lock() + defer f.mu.Unlock() + f.rebooted = true + return nil +} + func (f *fakeExecutor) setStatusErr(err error) { f.mu.Lock() defer f.mu.Unlock() @@ -83,10 +89,10 @@ func (f *fakeExecutor) getSwitchImg() string { return f.switchImg } -func (f *fakeExecutor) getSwitchApply() bool { +func (f *fakeExecutor) getRebooted() bool { f.mu.Lock() defer f.mu.Unlock() - return f.switchApply + return f.rebooted } func (f *fakeExecutor) reset() { @@ -97,7 +103,7 @@ func (f *fakeExecutor) reset() { f.switchErr = nil f.switchImg = "" f.switchHook = nil - f.switchApply = false + f.rebooted = false } func newBootEntry(image, digest string) *bootc.BootEntry { diff --git a/internal/daemon/reconciler_test.go b/internal/daemon/reconciler_test.go index 41984e5..8d11482 100644 --- a/internal/daemon/reconciler_test.go +++ b/internal/daemon/reconciler_test.go @@ -22,9 +22,12 @@ const ( pollInterval = 200 * time.Millisecond pollTimeout = 10 * time.Second + switchErrMsg = "switch failed: pull error" bootcStatusErrMsg = "bootc status failed" - testImageRef = testutil.ImageDigestRefA + testImageRef = testutil.ImageDigestRefA + testOtherImageRef = testutil.ImageDigestRefB + testThirdImageRef = testutil.ImageDigestRefC ) func TestReconcilePopulatesStatus(t *testing.T) { @@ -120,3 +123,201 @@ func TestReconcileBootcStatusError(t *testing.T) { ))) }).Should(Succeed()) } + +func TestStagingTriggered(t *testing.T) { + g := NewWithT(t) + g.SetDefaultEventuallyTimeout(pollTimeout) + g.SetDefaultEventuallyPollingInterval(pollInterval) + ctx := context.Background() + + fake.reset() + fake.status = newBootcStatus(testutil.DigestA) + + bn := testutil.NewNode(testNodeName, testOtherImageRef) + g.Expect(k8sClient.Create(ctx, bn)).To(Succeed()) + t.Cleanup(func() { + _ = k8sClient.Delete(ctx, bn) + }) + + g.Eventually(func(g Gomega) { + var got bootcv1alpha1.BootcNode + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(bn), &got)).To(Succeed()) + + g.Expect(got.Status.Staged).NotTo(BeNil()) + g.Expect(got.Status.Staged.ImageDigest).To(Equal(testutil.DigestB)) + + g.Expect(got.Status.Conditions).To(ContainElement(And( + HaveField("Type", bootcv1alpha1.NodeIdle), + HaveField("Status", metav1.ConditionFalse), + HaveField("Reason", bootcv1alpha1.NodeReasonStaged), + ))) + g.Expect(got.Status.Conditions).To(ContainElement(And( + HaveField("Type", bootcv1alpha1.NodeDegraded), + HaveField("Status", metav1.ConditionFalse), + ))) + }).Should(Succeed()) + + g.Expect(fake.getSwitchImg()).To(Equal(testOtherImageRef)) + g.Expect(fake.getRebooted()).To(BeFalse()) +} + +func TestStagingError(t *testing.T) { + g := NewWithT(t) + g.SetDefaultEventuallyTimeout(pollTimeout) + g.SetDefaultEventuallyPollingInterval(pollInterval) + ctx := context.Background() + + fake.reset() + fake.status = newBootcStatus(testutil.DigestA) + fake.setSwitchErr(errors.New(switchErrMsg)) + + bn := testutil.NewNode(testNodeName, testOtherImageRef) + g.Expect(k8sClient.Create(ctx, bn)).To(Succeed()) + t.Cleanup(func() { + _ = k8sClient.Delete(ctx, bn) + }) + + g.Eventually(func(g Gomega) { + var got bootcv1alpha1.BootcNode + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(bn), &got)).To(Succeed()) + g.Expect(got.Status.Conditions).To(ContainElement(And( + HaveField("Type", bootcv1alpha1.NodeIdle), + HaveField("Status", metav1.ConditionTrue), + HaveField("Reason", bootcv1alpha1.NodeReasonIdle), + ))) + g.Expect(got.Status.Conditions).To(ContainElement(And( + HaveField("Type", bootcv1alpha1.NodeDegraded), + HaveField("Status", metav1.ConditionTrue), + HaveField("Reason", bootcv1alpha1.NodeReasonError), + HaveField("Message", Equal(fmt.Sprintf("bootc switch failed: %s", switchErrMsg))), + ))) + }).Should(Succeed()) +} + +func TestAlreadyStaged(t *testing.T) { + g := NewWithT(t) + g.SetDefaultEventuallyTimeout(pollTimeout) + g.SetDefaultEventuallyPollingInterval(pollInterval) + ctx := context.Background() + + fake.reset() + fake.status = newBootcStatus(testutil.DigestA) + fake.status.Status.Staged = newBootEntry(testutil.ImageDigestRefB, testutil.DigestB) + + bn := testutil.NewNode(testNodeName, testOtherImageRef) + g.Expect(k8sClient.Create(ctx, bn)).To(Succeed()) + t.Cleanup(func() { + _ = k8sClient.Delete(ctx, bn) + }) + + g.Eventually(func(g Gomega) { + var got bootcv1alpha1.BootcNode + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(bn), &got)).To(Succeed()) + g.Expect(got.Status.Conditions).To(ContainElement(And( + HaveField("Type", bootcv1alpha1.NodeIdle), + HaveField("Status", metav1.ConditionFalse), + HaveField("Reason", bootcv1alpha1.NodeReasonStaged), + ))) + }).Should(Succeed()) + + g.Expect(fake.getSwitchImg()).To(BeEmpty()) +} + +func TestRebootingSet(t *testing.T) { + g := NewWithT(t) + g.SetDefaultEventuallyTimeout(pollTimeout) + g.SetDefaultEventuallyPollingInterval(pollInterval) + ctx := context.Background() + + fake.reset() + fake.status = newBootcStatus(testutil.DigestA) + fake.status.Status.Staged = newBootEntry(testutil.ImageDigestRefB, testutil.DigestB) + + bn := testutil.NewNode(testNodeName, testOtherImageRef, testutil.WithDesiredImageState(bootcv1alpha1.DesiredImageStateBooted)) + g.Expect(k8sClient.Create(ctx, bn)).To(Succeed()) + t.Cleanup(func() { + _ = k8sClient.Delete(ctx, bn) + }) + + g.Eventually(func(g Gomega) { + var got bootcv1alpha1.BootcNode + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(bn), &got)).To(Succeed()) + g.Expect(got.Status.Conditions).To(ContainElement(And( + HaveField("Type", bootcv1alpha1.NodeIdle), + HaveField("Status", metav1.ConditionFalse), + HaveField("Reason", bootcv1alpha1.NodeReasonRebooting), + ))) + }).Should(Succeed()) + + g.Expect(fake.getRebooted()).To(BeTrue()) +} + +func TestRollback(t *testing.T) { + g := NewWithT(t) + g.SetDefaultEventuallyTimeout(pollTimeout) + g.SetDefaultEventuallyPollingInterval(pollInterval) + ctx := context.Background() + + fake.reset() + fake.status = newBootcStatus(testutil.DigestA) + fake.status.Status.Staged = newBootEntry(testutil.ImageDigestRefB, testutil.DigestB) + + bn := testutil.NewNode(testNodeName, testThirdImageRef) + g.Expect(k8sClient.Create(ctx, bn)).To(Succeed()) + t.Cleanup(func() { + _ = k8sClient.Delete(ctx, bn) + }) + + g.Eventually(func(g Gomega) { + var got bootcv1alpha1.BootcNode + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(bn), &got)).To(Succeed()) + g.Expect(got.Status.Conditions).To(ContainElement(And( + HaveField("Type", bootcv1alpha1.NodeIdle), + HaveField("Status", metav1.ConditionFalse), + HaveField("Reason", bootcv1alpha1.NodeReasonStaged), + ))) + g.Expect(got.Status.Staged).NotTo(BeNil()) + g.Expect(got.Status.Staged.ImageDigest).To(Equal(testutil.DigestC)) + }).Should(Succeed()) + + g.Expect(fake.getRebooted()).To(BeFalse()) +} + +func TestCancelInflightSwitch(t *testing.T) { + g := NewWithT(t) + g.SetDefaultEventuallyTimeout(pollTimeout) + g.SetDefaultEventuallyPollingInterval(pollInterval) + ctx := context.Background() + + fake.reset() + fake.status = newBootcStatus(testutil.DigestA) + + firstBlock := make(chan struct{}) + fake.setSwitchHook(func() { + <-firstBlock + }) + + bn := testutil.NewNode(testNodeName, testOtherImageRef) + g.Expect(k8sClient.Create(ctx, bn)).To(Succeed()) + t.Cleanup(func() { + _ = k8sClient.Delete(ctx, bn) + }) + + g.Eventually(func() string { + return fake.getSwitchImg() + }).Should(Equal(testOtherImageRef)) + + fake.setSwitchHook(nil) + close(firstBlock) + + g.Eventually(func(g Gomega) { + var latest bootcv1alpha1.BootcNode + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(bn), &latest)).To(Succeed()) + latest.Spec.DesiredImage = testThirdImageRef + g.Expect(k8sClient.Update(ctx, &latest)).To(Succeed()) + }).Should(Succeed()) + + g.Eventually(func() string { + return fake.getSwitchImg() + }).Should(Equal(testThirdImageRef)) +} diff --git a/test/util/builders.go b/test/util/builders.go index 2fbce6e..0787351 100644 --- a/test/util/builders.go +++ b/test/util/builders.go @@ -184,6 +184,13 @@ func WithNodeAnnotation(key, value string) NodeOption { } } +// WithDesiredImageState overrides the default DesiredImageState on a node. +func WithDesiredImageState(state bootcv1alpha1.DesiredImageState) NodeOption { + return func(node *bootcv1alpha1.BootcNode) { + node.Spec.DesiredImageState = state + } +} + // WithNodePullSecret sets the pull secret reference and hash on a node. func WithNodePullSecret(name, namespace, hash string) NodeOption { return func(node *bootcv1alpha1.BootcNode) { From 89c5e1fa2f30f733add20ee8da5d2ef67b4636cc Mon Sep 17 00:00:00 2001 From: Alice Frosi Date: Fri, 12 Jun 2026 11:46:44 +0000 Subject: [PATCH 5/7] e2e: add UpdateImageDigestedPullSpec to e2e env Assisted-by: Claude Opus 4.6 (1M context) --- Makefile | 1 + test/e2e/e2eutil/env.go | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/Makefile b/Makefile index 4245b3c..8b5e444 100644 --- a/Makefile +++ b/Makefile @@ -68,6 +68,7 @@ e2e: ## Run e2e tests (requires: make deploy-bink). V=1 for verbose. RUN= BINK_LOCAL_REGISTRY_NODE_IMAGE=$(BINK_LOCAL_REGISTRY_NODE_IMAGE) \ ARTIFACTS=$(ARTIFACTS) \ BINK_NODE_IMAGE_DIGEST=$$(skopeo inspect --tls-verify=false --format '{{.Digest}}' docker://localhost:5000/node:latest) \ + UPDATE_IMAGE_DIGEST=$$(skopeo inspect --tls-verify=false docker://localhost:5000/node:update | jq -r '.Digest') \ go test -timeout 10m -count=1 $(if $(V),-v) $(if $(RUN),-run $(RUN)) . ##@ Build diff --git a/test/e2e/e2eutil/env.go b/test/e2e/e2eutil/env.go index 423ca19..7053579 100644 --- a/test/e2e/e2eutil/env.go +++ b/test/e2e/e2eutil/env.go @@ -60,6 +60,10 @@ type Env struct { // nodeImageRegistry is the in-cluster registry path for the seeded node image // (e.g. "registry.cluster.local:5000/node"). Empty when not seeded. nodeImageRegistry string + + // updateImageDigest is the manifest digest of the update image + // (e.g. "sha256:def456..."). Empty when not built. + updateImageDigest string } // New connects to an existing bink cluster and returns an Env ready @@ -80,6 +84,7 @@ func New(t *testing.T) *Env { nodeImageDigest := os.Getenv("BINK_NODE_IMAGE_DIGEST") nodeImageRegistry := os.Getenv("BINK_LOCAL_REGISTRY_NODE_IMAGE") + updateImageDigest := os.Getenv("UPDATE_IMAGE_DIGEST") k8sClient := buildClient(t, kubeconfigPath) @@ -89,6 +94,7 @@ func New(t *testing.T) *Env { testID: sanitizeTestName(t.Name()), nodeImageDigest: nodeImageDigest, nodeImageRegistry: nodeImageRegistry, + updateImageDigest: updateImageDigest, } t.Cleanup(func() { @@ -214,6 +220,20 @@ func (e *Env) NodeImageDigest() string { return e.nodeImageDigest } +// UpdateImageDigestedPullSpec returns the digest-qualified reference for the +// update image (e.g. "registry.cluster.local:5000/node@sha256:def456"). +func (e *Env) UpdateImageDigestedPullSpec() string { + if e.nodeImageRegistry == "" || e.updateImageDigest == "" { + return "" + } + return e.nodeImageRegistry + "@" + e.updateImageDigest +} + +// UpdateImageDigest returns the manifest digest of the update image. +func (e *Env) UpdateImageDigest() string { + return e.updateImageDigest +} + // cleanup gathers diagnostic logs, then deletes test-scoped resources // and bink nodes. func (e *Env) cleanup(t *testing.T) { From 7fa5af62d4e1ded2c6ea0ecdb93a5b2072c90761 Mon Sep 17 00:00:00 2001 From: Alice Frosi Date: Fri, 12 Jun 2026 11:46:47 +0000 Subject: [PATCH 6/7] e2e: add TestUpdateReboot for full update lifecycle Assisted-by: Claude Opus 4.6 (1M context) --- test/e2e/bootcnode_test.go | 112 +++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) diff --git a/test/e2e/bootcnode_test.go b/test/e2e/bootcnode_test.go index 221f113..4cc9506 100644 --- a/test/e2e/bootcnode_test.go +++ b/test/e2e/bootcnode_test.go @@ -4,6 +4,9 @@ package e2e import ( "context" + "fmt" + "os" + "os/exec" "testing" "time" @@ -87,3 +90,112 @@ func TestControllerMembership(t *testing.T) { ))) }).WithTimeout(3 * time.Minute).Should(Succeed()) } + +// TestUpdateReboot provisions a worker node, creates a pool with the +// original image, then updates the pool to a new image and verifies the +// full update lifecycle: staging, reboot, and idle with the new image. +func TestUpdateReboot(t *testing.T) { + g := NewWithT(t) + g.SetDefaultEventuallyTimeout(pollTimeout) + g.SetDefaultEventuallyPollingInterval(pollInterval) + + env := e2eutil.New(t) + nodeName := env.AddNode(t) + + ctx := context.Background() + + // Phase 1: Create pool with original image and wait for Idle. + pool := env.NewPool("workers", env.NodeImageDigestedPullSpec()) + g.Expect(env.Client.Create(ctx, pool)).To(Succeed()) + + var bn bootcv1alpha1.BootcNode + g.Eventually(func(g Gomega) { + g.Expect(env.Client.Get(ctx, client.ObjectKey{Name: nodeName}, &bn)).To(Succeed()) + g.Expect(bn.Status.Booted).NotTo(BeNil()) + g.Expect(bn.Status.Conditions).To(ContainElement(And( + HaveField("Type", bootcv1alpha1.NodeIdle), + HaveField("Status", metav1.ConditionTrue), + HaveField("Reason", bootcv1alpha1.NodeReasonIdle), + ))) + }).WithTimeout(3 * time.Minute).Should(Succeed()) + + t.Logf("Node %q is Idle with original image", nodeName) + + // Phase 2: Patch pool to update image. + updateRef := env.UpdateImageDigestedPullSpec() + if updateRef == "" { + t.Fatal("UPDATE_IMAGE_DIGEST must be set") + } + + modified := pool.DeepCopy() + modified.Spec.Image.Ref = updateRef + g.Expect(env.Client.Patch(ctx, modified, client.MergeFrom(pool))).To(Succeed()) + *pool = *modified + + t.Logf("Patched pool to update image %s", updateRef) + + // Phase 3: Wait for Rebooting — proves image was staged and reboot started. + // The Staged phase is tested separately with rollout paused. + g.Eventually(func(g Gomega) { + g.Expect(env.Client.Get(ctx, client.ObjectKey{Name: nodeName}, &bn)).To(Succeed()) + g.Expect(bn.Status.Conditions).To(ContainElement(And( + HaveField("Type", bootcv1alpha1.NodeIdle), + HaveField("Status", metav1.ConditionFalse), + HaveField("Reason", bootcv1alpha1.NodeReasonRebooting), + ))) + }).WithTimeout(5*time.Minute).Should(Succeed(), "expected node to reach Rebooting state") + + t.Logf("Node %q reached Rebooting state", nodeName) + + // Phase 4: Wait for Idle with the update digest — proves reboot completed. + g.Eventually(func(g Gomega) { + g.Expect(env.Client.Get(ctx, client.ObjectKey{Name: nodeName}, &bn)).To(Succeed()) + g.Expect(bn.Status.Booted).NotTo(BeNil()) + g.Expect(bn.Status.Booted.ImageDigest).To(Equal(env.UpdateImageDigest()), + "expected booted digest to match update image") + g.Expect(bn.Status.Conditions).To(ContainElement(And( + HaveField("Type", bootcv1alpha1.NodeIdle), + HaveField("Status", metav1.ConditionTrue), + HaveField("Reason", bootcv1alpha1.NodeReasonIdle), + ))) + }).WithTimeout(5*time.Minute).Should(Succeed(), "expected node to reach Idle with update image after reboot") + + t.Logf("Node %q is Idle with update image", nodeName) + + // Phase 5: Verify node is schedulable (uncordoned after reboot). + var node corev1.Node + g.Expect(env.Client.Get(ctx, client.ObjectKey{Name: nodeName}, &node)).To(Succeed()) + g.Expect(node.Spec.Unschedulable).To(BeFalse(), "expected node to be schedulable after update") + + // Phase 6: Verify update marker exists on the host via daemon pod exec. + var daemonPod corev1.Pod + g.Eventually(func(g Gomega) { + var pods corev1.PodList + g.Expect(env.Client.List(ctx, &pods, + client.InNamespace("bootc-operator"), + client.MatchingLabels{ + "app.kubernetes.io/name": "bootc-operator", + "app.kubernetes.io/component": "daemon", + }, + )).To(Succeed()) + var matched []corev1.Pod + for _, p := range pods.Items { + if p.Spec.NodeName == nodeName { + matched = append(matched, p) + } + } + g.Expect(matched).To(HaveLen(1), "expected exactly one daemon pod on %s", nodeName) + g.Expect(matched[0].Status.Phase).To(Equal(corev1.PodRunning)) + daemonPod = matched[0] + }).WithTimeout(1*time.Minute).Should(Succeed(), "expected running daemon pod on %s", nodeName) + + kubeconfigPath := os.Getenv("KUBECONFIG") + cmd := exec.CommandContext(ctx, "kubectl", "--kubeconfig", kubeconfigPath, + "-n", "bootc-operator", "exec", daemonPod.Name, "--", + "stat", "/proc/1/root/usr/share/update-marker") + out, err := cmd.CombinedOutput() + g.Expect(err).NotTo(HaveOccurred(), + fmt.Sprintf("expected update-marker to exist on host, kubectl exec output: %s", string(out))) + + t.Logf("Verified update-marker exists on host via daemon pod") +} From abc1f6f064c857a5eccaccf8403d3d84cd665a31 Mon Sep 17 00:00:00 2001 From: Alice Frosi Date: Fri, 12 Jun 2026 11:46:50 +0000 Subject: [PATCH 7/7] daemon: increase memory resource Assisted-by: Claude Opus 4.6 (1M context) --- config/daemon/daemon.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/daemon/daemon.yaml b/config/daemon/daemon.yaml index fefaa14..af5d7c8 100644 --- a/config/daemon/daemon.yaml +++ b/config/daemon/daemon.yaml @@ -38,7 +38,7 @@ spec: resources: limits: cpu: 500m - memory: 128Mi + memory: 512Mi requests: cpu: 10m memory: 64Mi