From 45beca56fd3207001472dfa8a2f64425c04335fa Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Wed, 17 Apr 2024 17:27:24 +0100 Subject: [PATCH 1/2] Added Mtls patch (cherry picked from commit de2de96fc88022df783b637ccb145d1d73ba66ff) --- config/rbac/role.yaml | 6 + main.go | 25 +++ pkg/config/config.go | 2 + pkg/controllers/raycluster_webhook.go | 217 +++++++++++++++++++++++++- pkg/controllers/support.go | 11 ++ 5 files changed, 254 insertions(+), 7 deletions(-) diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 24ac70871..e42aacff0 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -44,6 +44,12 @@ rules: - subjectaccessreviews verbs: - create +- apiGroups: + - config.openshift.io + resources: + - ingresses + verbs: + - get - apiGroups: - "" resources: diff --git a/main.go b/main.go index 700f12e8f..24e7fade3 100644 --- a/main.go +++ b/main.go @@ -52,6 +52,7 @@ import ( "sigs.k8s.io/yaml" routev1 "github.com/openshift/api/route/v1" + clientset "github.com/openshift/client-go/config/clientset/versioned" "github.com/project-codeflare/codeflare-operator/pkg/config" "github.com/project-codeflare/codeflare-operator/pkg/controllers" @@ -75,6 +76,8 @@ func init() { utilruntime.Must(routev1.Install(scheme)) } +// +kubebuilder:rbac:groups=config.openshift.io,resources=ingresses,verbs=get; + func main() { var configMapName string flag.StringVar(&configMapName, "config", "codeflare-operator-config", @@ -117,6 +120,7 @@ func main() { KubeRay: &config.KubeRayConfiguration{ RayDashboardOAuthEnabled: ptr.To(true), IngressDomain: "", + MTLSEnabled: ptr.To(true), }, } @@ -155,6 +159,13 @@ func main() { certsReady := make(chan struct{}) exitOnError(setupCertManagement(mgr, namespace, certsReady), "unable to setup cert-controller") + if cfg.KubeRay.IngressDomain == "" { + configClient, err := clientset.NewForConfig(kubeConfig) + exitOnError(err, "unable to create Route Client Set") + cfg.KubeRay.IngressDomain, err = getClusterDomain(ctx, configClient) + exitOnError(err, cfg.KubeRay.IngressDomain) + } + go setupControllers(mgr, kubeClient, cfg, isOpenShift(ctx, kubeClient.DiscoveryClient), certsReady) setupLog.Info("setting up health endpoints") @@ -332,3 +343,17 @@ func isOpenShift(ctx context.Context, dc discovery.DiscoveryInterface) bool { logger.Info("We detected being on Vanilla Kubernetes!") return false } + +func getClusterDomain(ctx context.Context, configClient *clientset.Clientset) (string, error) { + ingress, err := configClient.ConfigV1().Ingresses().Get(ctx, "cluster", metav1.GetOptions{}) + if err != nil { + return "", fmt.Errorf("failed to get Ingress object: %v", err) + } + + domain := ingress.Spec.Domain + if domain == "" { + return "", fmt.Errorf("domain is not set in the Ingress object") + } + + return domain, nil +} diff --git a/pkg/config/config.go b/pkg/config/config.go index 08e2579b9..af3bc3491 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -35,6 +35,8 @@ type KubeRayConfiguration struct { RayDashboardOAuthEnabled *bool `json:"rayDashboardOAuthEnabled,omitempty"` IngressDomain string `json:"ingressDomain"` + + MTLSEnabled *bool `json:"mTLSEnabled,omitempty"` } type ControllerManager struct { diff --git a/pkg/controllers/raycluster_webhook.go b/pkg/controllers/raycluster_webhook.go index b29794ee9..1e5b0b870 100644 --- a/pkg/controllers/raycluster_webhook.go +++ b/pkg/controllers/raycluster_webhook.go @@ -18,6 +18,7 @@ package controllers import ( "context" + "strconv" rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" @@ -36,6 +37,7 @@ import ( const ( oauthProxyContainerName = "oauth-proxy" oauthProxyVolumeName = "proxy-tls-secret" + initContainerName = "create-cert" ) // log is for logging in this package. @@ -66,17 +68,44 @@ var _ webhook.CustomValidator = &rayClusterWebhook{} func (w *rayClusterWebhook) Default(ctx context.Context, obj runtime.Object) error { rayCluster := obj.(*rayv1.RayCluster) - if !ptr.Deref(w.Config.RayDashboardOAuthEnabled, true) { - return nil - } + if ptr.Deref(w.Config.RayDashboardOAuthEnabled, true) { + rayclusterlog.V(2).Info("Adding OAuth sidecar container") + rayCluster.Spec.HeadGroupSpec.Template.Spec.Containers = upsert(rayCluster.Spec.HeadGroupSpec.Template.Spec.Containers, oauthProxyContainer(rayCluster), withContainerName(oauthProxyContainerName)) - rayclusterlog.V(2).Info("Adding OAuth sidecar container") + rayCluster.Spec.HeadGroupSpec.Template.Spec.Volumes = upsert(rayCluster.Spec.HeadGroupSpec.Template.Spec.Volumes, oauthProxyTLSSecretVolume(rayCluster), withVolumeName(oauthProxyVolumeName)) - rayCluster.Spec.HeadGroupSpec.Template.Spec.Containers = upsert(rayCluster.Spec.HeadGroupSpec.Template.Spec.Containers, oauthProxyContainer(rayCluster), withContainerName(oauthProxyContainerName)) + rayCluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName = rayCluster.Name + "-oauth-proxy" + } - rayCluster.Spec.HeadGroupSpec.Template.Spec.Volumes = upsert(rayCluster.Spec.HeadGroupSpec.Template.Spec.Volumes, oauthProxyTLSSecretVolume(rayCluster), withVolumeName(oauthProxyVolumeName)) + if ptr.Deref(w.Config.MTLSEnabled, true) { + rayclusterlog.V(2).Info("Adding create-cert Init Containers") + // HeadGroupSpec // + // Append the list of environment variables for the ray-head container + for _, envVar := range envVarList() { + rayCluster.Spec.HeadGroupSpec.Template.Spec.Containers[0].Env = upsert(rayCluster.Spec.HeadGroupSpec.Template.Spec.Containers[0].Env, envVar, withEnvVarName(envVar.Name)) + } + + // Append the create-cert Init Container + rayCluster.Spec.HeadGroupSpec.Template.Spec.InitContainers = upsert(rayCluster.Spec.HeadGroupSpec.Template.Spec.InitContainers, rayHeadInitContainer(rayCluster, w.Config.IngressDomain), withContainerName(initContainerName)) + + // Append the CA volumes + for _, caVol := range caVolumes(rayCluster) { + rayCluster.Spec.HeadGroupSpec.Template.Spec.Volumes = upsert(rayCluster.Spec.HeadGroupSpec.Template.Spec.Volumes, caVol, withVolumeName(caVol.Name)) + } + // WorkerGroupSpec // + // Append the list of environment variables for the worker container + for _, envVar := range envVarList() { + rayCluster.Spec.WorkerGroupSpecs[0].Template.Spec.Containers[0].Env = upsert(rayCluster.Spec.WorkerGroupSpecs[0].Template.Spec.Containers[0].Env, envVar, withEnvVarName(envVar.Name)) + } + + // Append the CA volumes + for _, caVol := range caVolumes(rayCluster) { + rayCluster.Spec.WorkerGroupSpecs[0].Template.Spec.Volumes = upsert(rayCluster.Spec.WorkerGroupSpecs[0].Template.Spec.Volumes, caVol, withVolumeName(caVol.Name)) + } + // Append the create-cert Init Container + rayCluster.Spec.WorkerGroupSpecs[0].Template.Spec.InitContainers = upsert(rayCluster.Spec.WorkerGroupSpecs[0].Template.Spec.InitContainers, rayWorkerInitContainer(), withContainerName(initContainerName)) - rayCluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName = rayCluster.Name + "-oauth-proxy" + } return nil } @@ -117,6 +146,14 @@ func (w *rayClusterWebhook) ValidateUpdate(ctx context.Context, oldObj, newObj r allErrors = append(allErrors, validateHeadGroupServiceAccountName(rayCluster)...) } + // Init Container related errors + if ptr.Deref(w.Config.MTLSEnabled, true) { + allErrors = append(allErrors, validateHeadInitContainer(rayCluster, w)...) + allErrors = append(allErrors, validateWorkerInitContainer(rayCluster)...) + allErrors = append(allErrors, validateHeadEnvVars(rayCluster)...) + allErrors = append(allErrors, validateWorkerEnvVars(rayCluster)...) + allErrors = append(allErrors, validateCaVolumes(rayCluster)...) + } return warnings, allErrors.ToAggregate() } @@ -225,3 +262,169 @@ func oauthProxyTLSSecretVolume(rayCluster *rayv1.RayCluster) corev1.Volume { }, } } + +func initCaVolumeMounts() []corev1.VolumeMount { + return []corev1.VolumeMount{ + { + Name: "ca-vol", + MountPath: "/home/ray/workspace/ca", + ReadOnly: true, + }, + { + Name: "server-cert", + MountPath: "/home/ray/workspace/tls", + ReadOnly: false, + }, + } +} + +func envVarList() []corev1.EnvVar { + return []corev1.EnvVar{ + { + Name: "MY_POD_IP", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "status.podIP", + }, + }, + }, + { + Name: "RAY_USE_TLS", + Value: "1", + }, + { + Name: "RAY_TLS_SERVER_CERT", + Value: "/home/ray/workspace/tls/server.crt", + }, + { + Name: "RAY_TLS_SERVER_KEY", + Value: "/home/ray/workspace/tls/server.key", + }, + { + Name: "RAY_TLS_CA_CERT", + Value: "/home/ray/workspace/tls/ca.crt", + }, + } +} + +func caVolumes(rayCluster *rayv1.RayCluster) []corev1.Volume { + return []corev1.Volume{ + { + Name: "ca-vol", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: `ca-secret-` + rayCluster.Name, + }, + }, + }, + { + Name: "server-cert", + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{}, + }, + }, + } +} + +func rayHeadInitContainer(rayCluster *rayv1.RayCluster, domain string) corev1.Container { + rayClientRoute := "rayclient-" + rayCluster.Name + "-" + rayCluster.Namespace + "." + domain + // Service name for basic interactive + svcDomain := rayCluster.Name + "-head-svc." + rayCluster.Namespace + ".svc" + + initContainerHead := corev1.Container{ + Name: "create-cert", + Image: "quay.io/project-codeflare/ray:latest-py39-cu118", + Command: []string{ + "sh", + "-c", + `cd /home/ray/workspace/tls && openssl req -nodes -newkey rsa:2048 -keyout server.key -out server.csr -subj '/CN=ray-head' && printf "authorityKeyIdentifier=keyid,issuer\nbasicConstraints=CA:FALSE\nsubjectAltName = @alt_names\n[alt_names]\nDNS.1 = 127.0.0.1\nDNS.2 = localhost\nDNS.3 = ${FQ_RAY_IP}\nDNS.4 = $(awk 'END{print $1}' /etc/hosts)\nDNS.5 = ` + rayClientRoute + `\nDNS.6 = ` + svcDomain + `">./domain.ext && cp /home/ray/workspace/ca/* . && openssl x509 -req -CA ca.crt -CAkey ca.key -in server.csr -out server.crt -days 365 -CAcreateserial -extfile domain.ext`, + }, + VolumeMounts: initCaVolumeMounts(), + } + return initContainerHead +} + +func rayWorkerInitContainer() corev1.Container { + initContainerWorker := corev1.Container{ + Name: "create-cert", + Image: "quay.io/project-codeflare/ray:latest-py39-cu118", + Command: []string{ + "sh", + "-c", + `cd /home/ray/workspace/tls && openssl req -nodes -newkey rsa:2048 -keyout server.key -out server.csr -subj '/CN=ray-head' && printf "authorityKeyIdentifier=keyid,issuer\nbasicConstraints=CA:FALSE\nsubjectAltName = @alt_names\n[alt_names]\nDNS.1 = 127.0.0.1\nDNS.2 = localhost\nDNS.3 = ${FQ_RAY_IP}\nDNS.4 = $(awk 'END{print $1}' /etc/hosts)">./domain.ext && cp /home/ray/workspace/ca/* . && openssl x509 -req -CA ca.crt -CAkey ca.key -in server.csr -out server.crt -days 365 -CAcreateserial -extfile domain.ext`, + }, + VolumeMounts: initCaVolumeMounts(), + } + return initContainerWorker +} + +func validateHeadInitContainer(rayCluster *rayv1.RayCluster, w *rayClusterWebhook) field.ErrorList { + var allErrors field.ErrorList + + if err := contains(rayCluster.Spec.HeadGroupSpec.Template.Spec.InitContainers, rayHeadInitContainer(rayCluster, w.Config.IngressDomain), byContainerName, + field.NewPath("spec", "headGroupSpec", "template", "spec", "initContainers"), + "create-cert Init Container is immutable"); err != nil { + allErrors = append(allErrors, err) + } + + return allErrors +} + +func validateWorkerInitContainer(rayCluster *rayv1.RayCluster) field.ErrorList { + var allErrors field.ErrorList + + if err := contains(rayCluster.Spec.WorkerGroupSpecs[0].Template.Spec.InitContainers, rayWorkerInitContainer(), byContainerName, + field.NewPath("spec", "workerGroupSpecs", "0", "template", "spec", "initContainers"), + "create-cert Init Container is immutable"); err != nil { + allErrors = append(allErrors, err) + } + + return allErrors +} + +func validateCaVolumes(rayCluster *rayv1.RayCluster) field.ErrorList { + var allErrors field.ErrorList + + for _, caVol := range caVolumes(rayCluster) { + if err := contains(rayCluster.Spec.HeadGroupSpec.Template.Spec.Volumes, caVol, byVolumeName, + field.NewPath("spec", "headGroupSpec", "template", "spec", "volumes"), + "ca-vol and server-cert Secret volumes are immutable"); err != nil { + allErrors = append(allErrors, err) + } + if err := contains(rayCluster.Spec.WorkerGroupSpecs[0].Template.Spec.Volumes, caVol, byVolumeName, + field.NewPath("spec", "workerGroupSpecs", "0", "template", "spec", "volumes"), + "ca-vol and server-cert Secret volumes are immutable"); err != nil { + allErrors = append(allErrors, err) + } + } + + return allErrors +} + +func validateHeadEnvVars(rayCluster *rayv1.RayCluster) field.ErrorList { + var allErrors field.ErrorList + + for _, envVar := range envVarList() { + if err := contains(rayCluster.Spec.HeadGroupSpec.Template.Spec.Containers[0].Env, envVar, byEnvVarName, + field.NewPath("spec", "headGroupSpec", "template", "spec", "containers", strconv.Itoa(0), "env"), + "RAY_TLS related environment variables are immutable"); err != nil { + allErrors = append(allErrors, err) + } + } + + return allErrors +} + +func validateWorkerEnvVars(rayCluster *rayv1.RayCluster) field.ErrorList { + var allErrors field.ErrorList + + for _, envVar := range envVarList() { + if err := contains(rayCluster.Spec.WorkerGroupSpecs[0].Template.Spec.Containers[0].Env, envVar, byEnvVarName, + field.NewPath("spec", "workerGroupSpecs", "0", "template", "spec", "containers", strconv.Itoa(0), "env"), + "RAY_TLS related environment variables are immutable"); err != nil { + allErrors = append(allErrors, err) + } + } + + return allErrors +} diff --git a/pkg/controllers/support.go b/pkg/controllers/support.go index 0efbc7475..0ea8f424d 100644 --- a/pkg/controllers/support.go +++ b/pkg/controllers/support.go @@ -140,3 +140,14 @@ func withVolumeName(name string) compare[corev1.Volume] { return v1.Name == name } } + +var byEnvVarName = compare[corev1.EnvVar]( + func(e1, e2 corev1.EnvVar) bool { + return e1.Name == e2.Name + }) + +func withEnvVarName(name string) compare[corev1.EnvVar] { + return func(e1, e2 corev1.EnvVar) bool { + return e1.Name == name + } +} From 7e88de6ccbf73098c61db07d034c8c577621c1fd Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Fri, 19 Apr 2024 11:24:02 +0100 Subject: [PATCH 2/2] Added Ingress domain to validateHeadInitContainer --- pkg/controllers/raycluster_webhook.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/controllers/raycluster_webhook.go b/pkg/controllers/raycluster_webhook.go index 1e5b0b870..9362afe56 100644 --- a/pkg/controllers/raycluster_webhook.go +++ b/pkg/controllers/raycluster_webhook.go @@ -148,7 +148,7 @@ func (w *rayClusterWebhook) ValidateUpdate(ctx context.Context, oldObj, newObj r // Init Container related errors if ptr.Deref(w.Config.MTLSEnabled, true) { - allErrors = append(allErrors, validateHeadInitContainer(rayCluster, w)...) + allErrors = append(allErrors, validateHeadInitContainer(rayCluster, w.Config.IngressDomain)...) allErrors = append(allErrors, validateWorkerInitContainer(rayCluster)...) allErrors = append(allErrors, validateHeadEnvVars(rayCluster)...) allErrors = append(allErrors, validateWorkerEnvVars(rayCluster)...) @@ -358,10 +358,10 @@ func rayWorkerInitContainer() corev1.Container { return initContainerWorker } -func validateHeadInitContainer(rayCluster *rayv1.RayCluster, w *rayClusterWebhook) field.ErrorList { +func validateHeadInitContainer(rayCluster *rayv1.RayCluster, domain string) field.ErrorList { var allErrors field.ErrorList - if err := contains(rayCluster.Spec.HeadGroupSpec.Template.Spec.InitContainers, rayHeadInitContainer(rayCluster, w.Config.IngressDomain), byContainerName, + if err := contains(rayCluster.Spec.HeadGroupSpec.Template.Spec.InitContainers, rayHeadInitContainer(rayCluster, domain), byContainerName, field.NewPath("spec", "headGroupSpec", "template", "spec", "initContainers"), "create-cert Init Container is immutable"); err != nil { allErrors = append(allErrors, err)