Skip to content

Commit 0fe66eb

Browse files
committed
Added Mtls patch
(cherry picked from commit de2de96fc88022df783b637ccb145d1d73ba66ff) Review changes
1 parent a51f336 commit 0fe66eb

File tree

4 files changed

+240
-61
lines changed

4 files changed

+240
-61
lines changed

config/rbac/role.yaml

+6
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@ rules:
1717
- subjectaccessreviews
1818
verbs:
1919
- create
20+
- apiGroups:
21+
- config.openshift.io
22+
resources:
23+
- ingresses
24+
verbs:
25+
- get
2026
- apiGroups:
2127
- ""
2228
resources:

main.go

+24
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ import (
4747
"sigs.k8s.io/yaml"
4848

4949
routev1 "github.com/openshift/api/route/v1"
50+
clientset "github.com/openshift/client-go/config/clientset/versioned"
5051

5152
"github.com/project-codeflare/codeflare-operator/pkg/config"
5253
"github.com/project-codeflare/codeflare-operator/pkg/controllers"
@@ -72,6 +73,8 @@ func init() {
7273
utilruntime.Must(routev1.Install(scheme))
7374
}
7475

76+
// +kubebuilder:rbac:groups=config.openshift.io,resources=ingresses,verbs=get;
77+
7578
func main() {
7679
var configMapName string
7780
flag.StringVar(&configMapName, "config", "codeflare-operator-config",
@@ -116,6 +119,7 @@ func main() {
116119
KubeRay: &config.KubeRayConfiguration{
117120
RayDashboardOAuthEnabled: pointer.Bool(true),
118121
IngressDomain: "",
122+
MTLSEnabled: pointer.Bool(true),
119123
},
120124
}
121125

@@ -150,6 +154,12 @@ func main() {
150154
OpenShift := isOpenShift(ctx, kubeClient.DiscoveryClient)
151155

152156
if OpenShift {
157+
if cfg.KubeRay.IngressDomain == "" {
158+
configClient, err := clientset.NewForConfig(kubeConfig)
159+
exitOnError(err, "unable to create Route Client Set")
160+
cfg.KubeRay.IngressDomain, err = getClusterDomain(ctx, configClient)
161+
exitOnError(err, cfg.KubeRay.IngressDomain)
162+
}
153163
// TODO: setup the RayCluster webhook on vanilla Kubernetes
154164
exitOnError(controllers.SetupRayClusterWebhookWithManager(mgr, cfg.KubeRay), "error setting up RayCluster webhook")
155165
}
@@ -274,3 +284,17 @@ func isOpenShift(ctx context.Context, dc discovery.DiscoveryInterface) bool {
274284
logger.Info("We detected being on Vanilla Kubernetes!")
275285
return false
276286
}
287+
288+
func getClusterDomain(ctx context.Context, configClient *clientset.Clientset) (string, error) {
289+
ingress, err := configClient.ConfigV1().Ingresses().Get(ctx, "cluster", metav1.GetOptions{})
290+
if err != nil {
291+
return "", fmt.Errorf("failed to get Ingress object: %v", err)
292+
}
293+
294+
domain := ingress.Spec.Domain
295+
if domain == "" {
296+
return "", fmt.Errorf("domain is not set in the Ingress object")
297+
}
298+
299+
return domain, nil
300+
}

pkg/config/config.go

+2
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ type KubeRayConfiguration struct {
3535
RayDashboardOAuthEnabled *bool `json:"rayDashboardOAuthEnabled,omitempty"`
3636

3737
IngressDomain string `json:"ingressDomain"`
38+
39+
MTLSEnabled *bool `json:"mTLSEnabled,omitempty"`
3840
}
3941

4042
type ControllerManager struct {

pkg/controllers/raycluster_webhook.go

+208-61
Original file line numberDiff line numberDiff line change
@@ -61,88 +61,54 @@ var _ webhook.CustomValidator = &rayClusterWebhook{}
6161
func (w *rayClusterWebhook) Default(ctx context.Context, obj runtime.Object) error {
6262
rayCluster := obj.(*rayv1.RayCluster)
6363

64-
if !pointer.BoolDeref(w.Config.RayDashboardOAuthEnabled, true) {
65-
return nil
64+
oauthExists := false
65+
initHeadExists := false
66+
initWorkerExists := false
67+
68+
// Check for the create-cert Init Containers
69+
for _, container := range rayCluster.Spec.HeadGroupSpec.Template.Spec.InitContainers {
70+
if container.Name == "create-cert" {
71+
rayclusterlog.V(2).Info("Head Init Containers already exist, no patch needed")
72+
initHeadExists = true
73+
break // exits the for loop
74+
}
75+
}
76+
// Check fot the create-cert Init Container WorkerGroupSpec
77+
for _, container := range rayCluster.Spec.WorkerGroupSpecs[0].Template.Spec.InitContainers {
78+
if container.Name == "create-cert" {
79+
rayclusterlog.V(2).Info("Worker Init Containers already exist, no patch needed")
80+
initWorkerExists = true
81+
break // exits the for loop
82+
}
6683
}
6784

6885
// Check and add OAuth proxy if it does not exist
6986
for _, container := range rayCluster.Spec.HeadGroupSpec.Template.Spec.Containers {
7087
if container.Name == "oauth-proxy" {
7188
rayclusterlog.V(2).Info("OAuth sidecar already exists, no patch needed")
72-
return nil
89+
oauthExists = true
7390
}
7491
}
7592

76-
rayclusterlog.V(2).Info("Adding OAuth sidecar container")
77-
78-
newOAuthSidecar := corev1.Container{
79-
Name: "oauth-proxy",
80-
Image: "registry.redhat.io/openshift4/ose-oauth-proxy@sha256:1ea6a01bf3e63cdcf125c6064cbd4a4a270deaf0f157b3eabb78f60556840366",
81-
Ports: []corev1.ContainerPort{
82-
{ContainerPort: 8443, Name: "oauth-proxy"},
83-
},
84-
Env: []corev1.EnvVar{
85-
{
86-
Name: "COOKIE_SECRET",
87-
ValueFrom: &corev1.EnvVarSource{
88-
SecretKeyRef: &corev1.SecretKeySelector{
89-
LocalObjectReference: corev1.LocalObjectReference{
90-
Name: rayCluster.Name + "-oauth-config",
91-
},
92-
Key: "cookie_secret",
93-
},
94-
},
95-
},
96-
},
97-
Args: []string{
98-
"--https-address=:8443",
99-
"--provider=openshift",
100-
"--openshift-service-account=" + rayCluster.Name + "-oauth-proxy",
101-
"--upstream=http://localhost:8265",
102-
"--tls-cert=/etc/tls/private/tls.crt",
103-
"--tls-key=/etc/tls/private/tls.key",
104-
"--cookie-secret=$(COOKIE_SECRET)",
105-
"--openshift-delegate-urls={\"/\":{\"resource\":\"pods\",\"namespace\":\"default\",\"verb\":\"get\"}}",
106-
},
107-
VolumeMounts: []corev1.VolumeMount{
108-
{
109-
Name: "proxy-tls-secret",
110-
MountPath: "/etc/tls/private",
111-
ReadOnly: true,
112-
},
113-
},
114-
}
115-
116-
rayCluster.Spec.HeadGroupSpec.Template.Spec.Containers = append(rayCluster.Spec.HeadGroupSpec.Template.Spec.Containers, newOAuthSidecar)
117-
118-
tlsSecretVolume := corev1.Volume{
119-
Name: "proxy-tls-secret",
120-
VolumeSource: corev1.VolumeSource{
121-
Secret: &corev1.SecretVolumeSource{
122-
SecretName: rayCluster.Name + "-proxy-tls-secret",
123-
},
124-
},
93+
if pointer.BoolDeref(w.Config.RayDashboardOAuthEnabled, true) {
94+
oauthPatch(rayCluster, oauthExists)
12595
}
126-
127-
rayCluster.Spec.HeadGroupSpec.Template.Spec.Volumes = append(rayCluster.Spec.HeadGroupSpec.Template.Spec.Volumes, tlsSecretVolume)
128-
129-
// Ensure the service account is set
130-
if rayCluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName == "" {
131-
rayCluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName = rayCluster.Name + "-oauth-proxy"
96+
if pointer.BoolDeref(w.Config.MTLSEnabled, true) {
97+
w.mTLSPatch(rayCluster, initHeadExists, initWorkerExists)
13298
}
13399

134100
return nil
135101
}
136102

137103
func (w *rayClusterWebhook) ValidateCreate(ctx context.Context, obj runtime.Object) (admission.Warnings, error) {
138-
raycluster := obj.(*rayv1.RayCluster)
104+
rayCluster := obj.(*rayv1.RayCluster)
139105
var warnings admission.Warnings
140106
var allErrors field.ErrorList
141107
specPath := field.NewPath("spec")
142108

143-
if pointer.BoolDeref(raycluster.Spec.HeadGroupSpec.EnableIngress, false) {
109+
if pointer.BoolDeref(rayCluster.Spec.HeadGroupSpec.EnableIngress, false) {
144110
rayclusterlog.Info("Creating RayCluster resources with EnableIngress set to true or unspecified is not allowed")
145-
allErrors = append(allErrors, field.Invalid(specPath.Child("headGroupSpec").Child("enableIngress"), raycluster.Spec.HeadGroupSpec.EnableIngress, "creating RayCluster resources with EnableIngress set to true or unspecified is not allowed"))
111+
allErrors = append(allErrors, field.Invalid(specPath.Child("headGroupSpec").Child("enableIngress"), rayCluster.Spec.HeadGroupSpec.EnableIngress, "creating RayCluster resources with EnableIngress set to true or unspecified is not allowed"))
146112
}
147113

148114
return warnings, allErrors.ToAggregate()
@@ -162,3 +128,184 @@ func (w *rayClusterWebhook) ValidateDelete(ctx context.Context, obj runtime.Obje
162128
// Optional: Add delete validation logic here
163129
return nil, nil
164130
}
131+
func oauthPatch(rayCluster *rayv1.RayCluster, oauthExists bool) {
132+
if !oauthExists {
133+
rayclusterlog.V(2).Info("Adding OAuth sidecar container")
134+
135+
newOAuthSidecar := corev1.Container{
136+
Name: "oauth-proxy",
137+
Image: "registry.redhat.io/openshift4/ose-oauth-proxy@sha256:1ea6a01bf3e63cdcf125c6064cbd4a4a270deaf0f157b3eabb78f60556840366",
138+
Ports: []corev1.ContainerPort{
139+
{ContainerPort: 8443, Name: "oauth-proxy"},
140+
},
141+
Env: []corev1.EnvVar{
142+
{
143+
Name: "COOKIE_SECRET",
144+
ValueFrom: &corev1.EnvVarSource{
145+
SecretKeyRef: &corev1.SecretKeySelector{
146+
LocalObjectReference: corev1.LocalObjectReference{
147+
Name: rayCluster.Name + "-oauth-config",
148+
},
149+
Key: "cookie_secret",
150+
},
151+
},
152+
},
153+
},
154+
Args: []string{
155+
"--https-address=:8443",
156+
"--provider=openshift",
157+
"--openshift-service-account=" + rayCluster.Name + "-oauth-proxy",
158+
"--upstream=http://localhost:8265",
159+
"--tls-cert=/etc/tls/private/tls.crt",
160+
"--tls-key=/etc/tls/private/tls.key",
161+
"--cookie-secret=$(COOKIE_SECRET)",
162+
"--openshift-delegate-urls={\"/\":{\"resource\":\"pods\",\"namespace\":\"default\",\"verb\":\"get\"}}",
163+
},
164+
VolumeMounts: []corev1.VolumeMount{
165+
{
166+
Name: "proxy-tls-secret",
167+
MountPath: "/etc/tls/private",
168+
ReadOnly: true,
169+
},
170+
},
171+
}
172+
173+
rayCluster.Spec.HeadGroupSpec.Template.Spec.Containers = append(rayCluster.Spec.HeadGroupSpec.Template.Spec.Containers, newOAuthSidecar)
174+
175+
tlsSecretVolume := corev1.Volume{
176+
Name: "proxy-tls-secret",
177+
VolumeSource: corev1.VolumeSource{
178+
Secret: &corev1.SecretVolumeSource{
179+
SecretName: rayCluster.Name + "-proxy-tls-secret",
180+
},
181+
},
182+
}
183+
184+
rayCluster.Spec.HeadGroupSpec.Template.Spec.Volumes = append(rayCluster.Spec.HeadGroupSpec.Template.Spec.Volumes, tlsSecretVolume)
185+
186+
// Ensure the service account is set
187+
if rayCluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName == "" {
188+
rayCluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName = rayCluster.Name + "-oauth-proxy"
189+
}
190+
}
191+
}
192+
193+
func (w *rayClusterWebhook) mTLSPatch(rayCluster *rayv1.RayCluster, initHeadExists bool, initWorkerExists bool) {
194+
rayclusterlog.V(2).Info("creating json patch for RayCluster initContainers")
195+
196+
// Volume Mounts for the Init Containers
197+
key_volumes := []corev1.VolumeMount{
198+
{
199+
Name: "ca-vol",
200+
MountPath: "/home/ray/workspace/ca",
201+
ReadOnly: true,
202+
},
203+
{
204+
Name: "server-cert",
205+
MountPath: "/home/ray/workspace/tls",
206+
ReadOnly: false,
207+
},
208+
}
209+
210+
// Service name for basic interactive
211+
svcDomain := rayCluster.Name + "-head-svc." + rayCluster.Namespace + ".svc"
212+
// Ca Secret generated by the SDK
213+
secretName := `ca-secret-` + rayCluster.Name
214+
215+
// Env variables for Worker & Head Containers
216+
envList := []corev1.EnvVar{
217+
{
218+
Name: "MY_POD_IP",
219+
ValueFrom: &corev1.EnvVarSource{
220+
FieldRef: &corev1.ObjectFieldSelector{
221+
FieldPath: "status.podIP",
222+
},
223+
},
224+
},
225+
{
226+
Name: "RAY_USE_TLS",
227+
Value: "1",
228+
},
229+
{
230+
Name: "RAY_TLS_SERVER_CERT",
231+
Value: "/home/ray/workspace/tls/server.crt",
232+
},
233+
{
234+
Name: "RAY_TLS_SERVER_KEY",
235+
Value: "/home/ray/workspace/tls/server.key",
236+
},
237+
{
238+
Name: "RAY_TLS_CA_CERT",
239+
Value: "/home/ray/workspace/tls/ca.crt",
240+
},
241+
}
242+
243+
// Volumes for the main container of Head and worker
244+
caVolumes := []corev1.Volume{
245+
{
246+
Name: "ca-vol",
247+
VolumeSource: corev1.VolumeSource{
248+
Secret: &corev1.SecretVolumeSource{
249+
SecretName: secretName,
250+
},
251+
},
252+
},
253+
{
254+
Name: "server-cert",
255+
VolumeSource: corev1.VolumeSource{
256+
EmptyDir: &corev1.EmptyDirVolumeSource{},
257+
},
258+
},
259+
}
260+
261+
if !initHeadExists {
262+
rayClientRoute := "rayclient-" + rayCluster.Name + "-" + rayCluster.Namespace + "." + w.Config.IngressDomain
263+
initContainerHead := corev1.Container{
264+
Name: "create-cert",
265+
Image: "quay.io/project-codeflare/ray:latest-py39-cu118",
266+
Command: []string{
267+
"sh",
268+
"-c",
269+
`cd /home/ray/workspace/tls && openssl req -nodes -newkey rsa:2048 -keyout server.key -out server.csr -subj '/CN=ray-head' && printf "authorityKeyIdentifier=keyid,issuer\nbasicConstraints=CA:FALSE\nsubjectAltName = @alt_names\n[alt_names]\nDNS.1 = 127.0.0.1\nDNS.2 = localhost\nDNS.3 = ${FQ_RAY_IP}\nDNS.4 = $(awk 'END{print $1}' /etc/hosts)\nDNS.5 = ` + rayClientRoute + `\nDNS.6 = ` + svcDomain + `">./domain.ext && cp /home/ray/workspace/ca/* . && openssl x509 -req -CA ca.crt -CAkey ca.key -in server.csr -out server.crt -days 365 -CAcreateserial -extfile domain.ext`,
270+
},
271+
VolumeMounts: key_volumes,
272+
}
273+
274+
// Append the list of environment variables for the ray-head container
275+
for index, container := range rayCluster.Spec.HeadGroupSpec.Template.Spec.Containers {
276+
if container.Name == "ray-head" {
277+
rayCluster.Spec.HeadGroupSpec.Template.Spec.Containers[index].Env = append(rayCluster.Spec.HeadGroupSpec.Template.Spec.Containers[index].Env, envList...)
278+
}
279+
}
280+
281+
// Append the create-cert Init Container
282+
rayCluster.Spec.HeadGroupSpec.Template.Spec.InitContainers = append(rayCluster.Spec.HeadGroupSpec.Template.Spec.InitContainers, initContainerHead)
283+
284+
// Append the CA volumes
285+
rayCluster.Spec.HeadGroupSpec.Template.Spec.Volumes = append(rayCluster.Spec.HeadGroupSpec.Template.Spec.Volumes, caVolumes...)
286+
}
287+
288+
if !initWorkerExists {
289+
initContainerWorker := corev1.Container{
290+
Name: "create-cert",
291+
Image: "quay.io/project-codeflare/ray:latest-py39-cu118",
292+
Command: []string{
293+
"sh",
294+
"-c",
295+
`cd /home/ray/workspace/tls && openssl req -nodes -newkey rsa:2048 -keyout server.key -out server.csr -subj '/CN=ray-head' && printf "authorityKeyIdentifier=keyid,issuer\nbasicConstraints=CA:FALSE\nsubjectAltName = @alt_names\n[alt_names]\nDNS.1 = 127.0.0.1\nDNS.2 = localhost\nDNS.3 = ${FQ_RAY_IP}\nDNS.4 = $(awk 'END{print $1}' /etc/hosts)">./domain.ext && cp /home/ray/workspace/ca/* . && openssl x509 -req -CA ca.crt -CAkey ca.key -in server.csr -out server.crt -days 365 -CAcreateserial -extfile domain.ext`,
296+
},
297+
VolumeMounts: key_volumes,
298+
}
299+
// Append the CA volumes
300+
rayCluster.Spec.WorkerGroupSpecs[0].Template.Spec.Volumes = append(rayCluster.Spec.WorkerGroupSpecs[0].Template.Spec.Volumes, caVolumes...)
301+
// Append the create-cert Init Container
302+
rayCluster.Spec.WorkerGroupSpecs[0].Template.Spec.InitContainers = append(rayCluster.Spec.WorkerGroupSpecs[0].Template.Spec.InitContainers, initContainerWorker)
303+
304+
// Append the list of environment variables for the machine-learning container
305+
for index, container := range rayCluster.Spec.WorkerGroupSpecs[0].Template.Spec.Containers {
306+
if container.Name == "machine-learning" {
307+
rayCluster.Spec.WorkerGroupSpecs[0].Template.Spec.Containers[index].Env = append(rayCluster.Spec.WorkerGroupSpecs[0].Template.Spec.Containers[index].Env, envList...)
308+
}
309+
}
310+
}
311+
}

0 commit comments

Comments
 (0)