From 641d58eb4f6190e3970a1e4e8a0f31779d1cae8c Mon Sep 17 00:00:00 2001 From: "Per G. da Silva" Date: Thu, 18 Jun 2026 11:04:37 +0200 Subject: [PATCH] NO-ISSUE: Add opt-in kube-rbac-proxy sidecar for pprof endpoint (AWS) Add an --enable-pprof flag to machine-api-operator that, when set, configures the AWS machine controller with a pprof profiling endpoint and a kube-rbac-proxy sidecar to securely expose it. When --enable-pprof is passed to MAO and the platform is AWS: - The machine controller receives --enable-pprof and --pprof-bind-address=127.0.0.1:6061 to serve plaintext pprof on loopback - A kube-rbac-proxy-pprof sidecar is added, listening on port 6060 with centralized TLS, proxying to the upstream pprof server The flag is off by default and only meaningful on AWS (the only platform whose machine controller supports pprof). On all other platforms, the flag is accepted but has no effect. Signed-off-by: Per G. da Silva Co-Authored-By: Claude Opus 4.6 (1M context) --- cmd/machine-api-operator/start.go | 3 +++ pkg/operator/config.go | 3 +++ pkg/operator/operator.go | 8 +++++-- pkg/operator/sync.go | 18 ++++++++++++++-- pkg/operator/sync_test.go | 36 ++++++++++++++++++++++++++++++- 5 files changed, 63 insertions(+), 5 deletions(-) diff --git a/cmd/machine-api-operator/start.go b/cmd/machine-api-operator/start.go index 0398fe133..675d0b070 100644 --- a/cmd/machine-api-operator/start.go +++ b/cmd/machine-api-operator/start.go @@ -66,6 +66,7 @@ var ( imagesFile string tlsMinVersion string tlsCipherSuites []string + enablePprof bool } ) @@ -73,6 +74,7 @@ func init() { rootCmd.AddCommand(startCmd) startCmd.PersistentFlags().StringVar(&startOpts.kubeconfig, "kubeconfig", "", "Kubeconfig file to access a remote cluster (testing only)") startCmd.PersistentFlags().StringVar(&startOpts.imagesFile, "images-json", "", "images.json file for MAO.") + startCmd.PersistentFlags().BoolVar(&startOpts.enablePprof, "enable-pprof", false, "Enable the pprof profiling endpoint on the machine controller (AWS only).") startCmd.PersistentFlags().StringVar(&startOpts.tlsMinVersion, "tls-min-version", "", "Minimum TLS version supported. When set with --tls-cipher-suites, overrides the cluster-wide TLS profile. Possible values: "+strings.Join(cliflag.TLSPossibleVersions(), ", ")) startCmd.PersistentFlags().StringSliceVar(&startOpts.tlsCipherSuites, "tls-cipher-suites", nil, "Comma-separated list of cipher suites for the server. When set with --tls-min-version, overrides the cluster-wide TLS profile. Possible values: "+strings.Join(cliflag.TLSCipherPossibleValues(), ", ")) @@ -239,6 +241,7 @@ func startControllers(ctx *ControllerContext) error { componentNamespace, componentName, startOpts.imagesFile, config, + startOpts.enablePprof, ctx.KubeNamespacedInformerFactory.Apps().V1().Deployments(), ctx.KubeNamespacedInformerFactory.Apps().V1().DaemonSets(), ctx.ConfigInformerFactory.Config().V1().ClusterOperators(), diff --git a/pkg/operator/config.go b/pkg/operator/config.go index cf36ac395..ae59d85b3 100644 --- a/pkg/operator/config.go +++ b/pkg/operator/config.go @@ -27,6 +27,9 @@ type OperatorConfig struct { Features map[string]bool TLSProfile configv1.TLSProfileSpec TLSAdherencePolicy configv1.TLSAdherencePolicy + // EnablePprof enables the pprof profiling endpoint on the machine controller. + // Currently only supported on AWS. + EnablePprof bool } type Controllers struct { diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index f5886a08d..4aeed288a 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -52,8 +52,9 @@ const ( type Operator struct { namespace, name string - imagesFile string - config string + imagesFile string + config string + enablePprof bool kubeClient kubernetes.Interface osClient osclientset.Interface @@ -94,6 +95,7 @@ func New( imagesFile string, config string, + enablePprof bool, deployInformer appsinformersv1.DeploymentInformer, daemonsetInformer appsinformersv1.DaemonSetInformer, @@ -126,6 +128,7 @@ func New( namespace: namespace, name: name, imagesFile: imagesFile, + enablePprof: enablePprof, kubeClient: kubeClient, osClient: osClient, machineClient: machineClient, @@ -508,5 +511,6 @@ func (optr *Operator) maoConfigFromInfrastructure() (*OperatorConfig, error) { Features: features, TLSProfile: tlsProfile, TLSAdherencePolicy: apiServer.Spec.TLSAdherence, + EnablePprof: optr.enablePprof, }, nil } diff --git a/pkg/operator/sync.go b/pkg/operator/sync.go index 986f86d91..8ad2f30f6 100644 --- a/pkg/operator/sync.go +++ b/pkg/operator/sync.go @@ -45,6 +45,8 @@ const ( machineExposeMetricsPort = 8441 machineSetExposeMetricsPort = 8442 machineHealthCheckExposeMetricsPort = 8444 + pprofExposePort = 6060 + pprofUpstreamPort = 6061 defaultMachineHealthPort = 9440 defaultMachineSetHealthPort = 9441 defaultMachineHealthCheckHealthPort = 9442 @@ -535,7 +537,8 @@ func newPodTemplateSpec(config *OperatorConfig, features map[string]bool) *corev containers := newContainers(config, features, tlsArgs) withMHCProxy := config.Controllers.MachineHealthCheck != "" - proxyContainers := newKubeProxyContainers(config.Controllers.KubeRBACProxy, withMHCProxy, tlsArgs) + withPprofProxy := config.EnablePprof && config.PlatformType == configv1.AWSPlatformType + proxyContainers := newKubeProxyContainers(config.Controllers.KubeRBACProxy, withMHCProxy, withPprofProxy, tlsArgs) tolerations := []corev1.Toleration{ { Key: "node-role.kubernetes.io/master", @@ -697,6 +700,12 @@ func newContainers(config *OperatorConfig, features map[string]bool, tlsArgs []s featureGateArgs := append(args, buildFeatureGatesString(features)) machineControllerArgs := append([]string{}, featureGateArgs...) + if config.EnablePprof && config.PlatformType == configv1.AWSPlatformType { + machineControllerArgs = append(machineControllerArgs, + "--enable-pprof", + fmt.Sprintf("--pprof-bind-address=127.0.0.1:%d", pprofUpstreamPort), + ) + } switch config.PlatformType { case configv1.AzurePlatformType, configv1.GCPPlatformType: machineControllerArgs = append(machineControllerArgs, "--max-concurrent-reconciles=10") @@ -908,7 +917,7 @@ func resolveTLSProfile(tlsProfile configv1.TLSProfileSpec, tlsAdherencePolicy co return *configv1.TLSProfiles[libgocrypto.DefaultTLSProfileType] } -func newKubeProxyContainers(image string, withMHCProxy bool, tlsArgs []string) []corev1.Container { +func newKubeProxyContainers(image string, withMHCProxy, withPprofProxy bool, tlsArgs []string) []corev1.Container { proxyContainers := []corev1.Container{ newKubeProxyContainer(image, "machineset-mtrc", metrics.DefaultMachineSetMetricsAddress, machineSetExposeMetricsPort, tlsArgs), newKubeProxyContainer(image, "machine-mtrc", metrics.DefaultMachineMetricsAddress, machineExposeMetricsPort, tlsArgs), @@ -918,6 +927,11 @@ func newKubeProxyContainers(image string, withMHCProxy bool, tlsArgs []string) [ newKubeProxyContainer(image, "mhc-mtrc", metrics.DefaultHealthCheckMetricsAddress, machineHealthCheckExposeMetricsPort, tlsArgs), ) } + if withPprofProxy { + proxyContainers = append(proxyContainers, + newKubeProxyContainer(image, "pprof", fmt.Sprintf(":%d", pprofUpstreamPort), pprofExposePort, tlsArgs), + ) + } return proxyContainers } diff --git a/pkg/operator/sync_test.go b/pkg/operator/sync_test.go index cabc8b424..c4ea1b82e 100644 --- a/pkg/operator/sync_test.go +++ b/pkg/operator/sync_test.go @@ -663,7 +663,7 @@ func TestNewKubeProxyContainers(t *testing.T) { t.Run(tc.name, func(t *testing.T) { g := NewWithT(t) - containers := newKubeProxyContainers(tc.image, tc.withMHCProxy, getTLSArgs(tc.tlsProfile)) + containers := newKubeProxyContainers(tc.image, tc.withMHCProxy, false, getTLSArgs(tc.tlsProfile)) // Verify we get the expected number of containers g.Expect(containers).To(HaveLen(len(tc.expectedPorts))) @@ -789,6 +789,32 @@ func TestNewPodTemplateSpecTLSArgs(t *testing.T) { tlsAdherencePolicy: configv1.TLSAdherencePolicyStrictAllComponents, expectTLSArgsOnProfileConsumers: true, }, + { + name: "AWS: pprof enabled adds proxy sidecar and machine-controller args", + config: &OperatorConfig{ + TargetNamespace: targetNamespace, + PlatformType: configv1.AWSPlatformType, + EnablePprof: true, + Controllers: Controllers{ + Provider: "provider-image:latest", + MachineSet: "machineset-image:latest", + NodeLink: "nodelink-image:latest", + MachineHealthCheck: "mhc-image:latest", + KubeRBACProxy: "kube-rbac-proxy-image:latest", + }, + }, + tlsProfile: configv1.TLSProfileSpec{ + Ciphers: []string{ + "ECDHE-ECDSA-AES128-GCM-SHA256", + "ECDHE-RSA-AES128-GCM-SHA256", + }, + MinTLSVersion: configv1.VersionTLS12, + }, + expectedTLSProfile: configv1.TLSProfileSpec{Ciphers: []string{"ECDHE-ECDSA-AES128-GCM-SHA256", "ECDHE-RSA-AES128-GCM-SHA256"}, MinTLSVersion: configv1.VersionTLS12}, + expectMachineControllerTLSOnBareMetal: false, + tlsAdherencePolicy: configv1.TLSAdherencePolicyStrictAllComponents, + expectTLSArgsOnProfileConsumers: true, + }, { name: "AWS: no opinion applies default profile TLS args through pod template", config: &OperatorConfig{ @@ -841,6 +867,14 @@ func TestNewPodTemplateSpecTLSArgs(t *testing.T) { g.Expect(containerArgs).To(HaveKey("kube-rbac-proxy-mhc-mtrc")) } + if tc.config.EnablePprof && tc.config.PlatformType == configv1.AWSPlatformType { + g.Expect(containerArgs).To(HaveKey("kube-rbac-proxy-pprof")) + g.Expect(strings.Join(containerArgs["machine-controller"], " ")).To(ContainSubstring("--enable-pprof")) + } else { + g.Expect(containerArgs).ToNot(HaveKey("kube-rbac-proxy-pprof")) + g.Expect(strings.Join(containerArgs["machine-controller"], " ")).ToNot(ContainSubstring("--enable-pprof")) + } + expectedTLSArgs := getTLSArgs(tc.expectedTLSProfile) assertTLSArgs := func(args []string, shouldContain bool) { joined := strings.Join(args, " ")