diff --git a/cmd/machine-api-operator/start.go b/cmd/machine-api-operator/start.go index 0398fe133..675d0b070 100644 --- a/cmd/machine-api-operator/start.go +++ b/cmd/machine-api-operator/start.go @@ -66,6 +66,7 @@ var ( imagesFile string tlsMinVersion string tlsCipherSuites []string + enablePprof bool } ) @@ -73,6 +74,7 @@ func init() { rootCmd.AddCommand(startCmd) startCmd.PersistentFlags().StringVar(&startOpts.kubeconfig, "kubeconfig", "", "Kubeconfig file to access a remote cluster (testing only)") startCmd.PersistentFlags().StringVar(&startOpts.imagesFile, "images-json", "", "images.json file for MAO.") + startCmd.PersistentFlags().BoolVar(&startOpts.enablePprof, "enable-pprof", false, "Enable the pprof profiling endpoint on the machine controller (AWS only).") startCmd.PersistentFlags().StringVar(&startOpts.tlsMinVersion, "tls-min-version", "", "Minimum TLS version supported. When set with --tls-cipher-suites, overrides the cluster-wide TLS profile. Possible values: "+strings.Join(cliflag.TLSPossibleVersions(), ", ")) startCmd.PersistentFlags().StringSliceVar(&startOpts.tlsCipherSuites, "tls-cipher-suites", nil, "Comma-separated list of cipher suites for the server. When set with --tls-min-version, overrides the cluster-wide TLS profile. Possible values: "+strings.Join(cliflag.TLSCipherPossibleValues(), ", ")) @@ -239,6 +241,7 @@ func startControllers(ctx *ControllerContext) error { componentNamespace, componentName, startOpts.imagesFile, config, + startOpts.enablePprof, ctx.KubeNamespacedInformerFactory.Apps().V1().Deployments(), ctx.KubeNamespacedInformerFactory.Apps().V1().DaemonSets(), ctx.ConfigInformerFactory.Config().V1().ClusterOperators(), diff --git a/pkg/operator/config.go b/pkg/operator/config.go index cf36ac395..ae59d85b3 100644 --- a/pkg/operator/config.go +++ b/pkg/operator/config.go @@ -27,6 +27,9 @@ type OperatorConfig struct { Features map[string]bool TLSProfile configv1.TLSProfileSpec TLSAdherencePolicy configv1.TLSAdherencePolicy + // EnablePprof enables the pprof profiling endpoint on the machine controller. + // Currently only supported on AWS. + EnablePprof bool } type Controllers struct { diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index f5886a08d..4aeed288a 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -52,8 +52,9 @@ const ( type Operator struct { namespace, name string - imagesFile string - config string + imagesFile string + config string + enablePprof bool kubeClient kubernetes.Interface osClient osclientset.Interface @@ -94,6 +95,7 @@ func New( imagesFile string, config string, + enablePprof bool, deployInformer appsinformersv1.DeploymentInformer, daemonsetInformer appsinformersv1.DaemonSetInformer, @@ -126,6 +128,7 @@ func New( namespace: namespace, name: name, imagesFile: imagesFile, + enablePprof: enablePprof, kubeClient: kubeClient, osClient: osClient, machineClient: machineClient, @@ -508,5 +511,6 @@ func (optr *Operator) maoConfigFromInfrastructure() (*OperatorConfig, error) { Features: features, TLSProfile: tlsProfile, TLSAdherencePolicy: apiServer.Spec.TLSAdherence, + EnablePprof: optr.enablePprof, }, nil } diff --git a/pkg/operator/sync.go b/pkg/operator/sync.go index 986f86d91..8ad2f30f6 100644 --- a/pkg/operator/sync.go +++ b/pkg/operator/sync.go @@ -45,6 +45,8 @@ const ( machineExposeMetricsPort = 8441 machineSetExposeMetricsPort = 8442 machineHealthCheckExposeMetricsPort = 8444 + pprofExposePort = 6060 + pprofUpstreamPort = 6061 defaultMachineHealthPort = 9440 defaultMachineSetHealthPort = 9441 defaultMachineHealthCheckHealthPort = 9442 @@ -535,7 +537,8 @@ func newPodTemplateSpec(config *OperatorConfig, features map[string]bool) *corev containers := newContainers(config, features, tlsArgs) withMHCProxy := config.Controllers.MachineHealthCheck != "" - proxyContainers := newKubeProxyContainers(config.Controllers.KubeRBACProxy, withMHCProxy, tlsArgs) + withPprofProxy := config.EnablePprof && config.PlatformType == configv1.AWSPlatformType + proxyContainers := newKubeProxyContainers(config.Controllers.KubeRBACProxy, withMHCProxy, withPprofProxy, tlsArgs) tolerations := []corev1.Toleration{ { Key: "node-role.kubernetes.io/master", @@ -697,6 +700,12 @@ func newContainers(config *OperatorConfig, features map[string]bool, tlsArgs []s featureGateArgs := append(args, buildFeatureGatesString(features)) machineControllerArgs := append([]string{}, featureGateArgs...) + if config.EnablePprof && config.PlatformType == configv1.AWSPlatformType { + machineControllerArgs = append(machineControllerArgs, + "--enable-pprof", + fmt.Sprintf("--pprof-bind-address=127.0.0.1:%d", pprofUpstreamPort), + ) + } switch config.PlatformType { case configv1.AzurePlatformType, configv1.GCPPlatformType: machineControllerArgs = append(machineControllerArgs, "--max-concurrent-reconciles=10") @@ -908,7 +917,7 @@ func resolveTLSProfile(tlsProfile configv1.TLSProfileSpec, tlsAdherencePolicy co return *configv1.TLSProfiles[libgocrypto.DefaultTLSProfileType] } -func newKubeProxyContainers(image string, withMHCProxy bool, tlsArgs []string) []corev1.Container { +func newKubeProxyContainers(image string, withMHCProxy, withPprofProxy bool, tlsArgs []string) []corev1.Container { proxyContainers := []corev1.Container{ newKubeProxyContainer(image, "machineset-mtrc", metrics.DefaultMachineSetMetricsAddress, machineSetExposeMetricsPort, tlsArgs), newKubeProxyContainer(image, "machine-mtrc", metrics.DefaultMachineMetricsAddress, machineExposeMetricsPort, tlsArgs), @@ -918,6 +927,11 @@ func newKubeProxyContainers(image string, withMHCProxy bool, tlsArgs []string) [ newKubeProxyContainer(image, "mhc-mtrc", metrics.DefaultHealthCheckMetricsAddress, machineHealthCheckExposeMetricsPort, tlsArgs), ) } + if withPprofProxy { + proxyContainers = append(proxyContainers, + newKubeProxyContainer(image, "pprof", fmt.Sprintf(":%d", pprofUpstreamPort), pprofExposePort, tlsArgs), + ) + } return proxyContainers } diff --git a/pkg/operator/sync_test.go b/pkg/operator/sync_test.go index cabc8b424..c4ea1b82e 100644 --- a/pkg/operator/sync_test.go +++ b/pkg/operator/sync_test.go @@ -663,7 +663,7 @@ func TestNewKubeProxyContainers(t *testing.T) { t.Run(tc.name, func(t *testing.T) { g := NewWithT(t) - containers := newKubeProxyContainers(tc.image, tc.withMHCProxy, getTLSArgs(tc.tlsProfile)) + containers := newKubeProxyContainers(tc.image, tc.withMHCProxy, false, getTLSArgs(tc.tlsProfile)) // Verify we get the expected number of containers g.Expect(containers).To(HaveLen(len(tc.expectedPorts))) @@ -789,6 +789,32 @@ func TestNewPodTemplateSpecTLSArgs(t *testing.T) { tlsAdherencePolicy: configv1.TLSAdherencePolicyStrictAllComponents, expectTLSArgsOnProfileConsumers: true, }, + { + name: "AWS: pprof enabled adds proxy sidecar and machine-controller args", + config: &OperatorConfig{ + TargetNamespace: targetNamespace, + PlatformType: configv1.AWSPlatformType, + EnablePprof: true, + Controllers: Controllers{ + Provider: "provider-image:latest", + MachineSet: "machineset-image:latest", + NodeLink: "nodelink-image:latest", + MachineHealthCheck: "mhc-image:latest", + KubeRBACProxy: "kube-rbac-proxy-image:latest", + }, + }, + tlsProfile: configv1.TLSProfileSpec{ + Ciphers: []string{ + "ECDHE-ECDSA-AES128-GCM-SHA256", + "ECDHE-RSA-AES128-GCM-SHA256", + }, + MinTLSVersion: configv1.VersionTLS12, + }, + expectedTLSProfile: configv1.TLSProfileSpec{Ciphers: []string{"ECDHE-ECDSA-AES128-GCM-SHA256", "ECDHE-RSA-AES128-GCM-SHA256"}, MinTLSVersion: configv1.VersionTLS12}, + expectMachineControllerTLSOnBareMetal: false, + tlsAdherencePolicy: configv1.TLSAdherencePolicyStrictAllComponents, + expectTLSArgsOnProfileConsumers: true, + }, { name: "AWS: no opinion applies default profile TLS args through pod template", config: &OperatorConfig{ @@ -841,6 +867,14 @@ func TestNewPodTemplateSpecTLSArgs(t *testing.T) { g.Expect(containerArgs).To(HaveKey("kube-rbac-proxy-mhc-mtrc")) } + if tc.config.EnablePprof && tc.config.PlatformType == configv1.AWSPlatformType { + g.Expect(containerArgs).To(HaveKey("kube-rbac-proxy-pprof")) + g.Expect(strings.Join(containerArgs["machine-controller"], " ")).To(ContainSubstring("--enable-pprof")) + } else { + g.Expect(containerArgs).ToNot(HaveKey("kube-rbac-proxy-pprof")) + g.Expect(strings.Join(containerArgs["machine-controller"], " ")).ToNot(ContainSubstring("--enable-pprof")) + } + expectedTLSArgs := getTLSArgs(tc.expectedTLSProfile) assertTLSArgs := func(args []string, shouldContain bool) { joined := strings.Join(args, " ")