Skip to content

Commit 88c964d

Browse files
committed
Added Mtls patch
(cherry picked from commit de2de96fc88022df783b637ccb145d1d73ba66ff)
1 parent 4a7cb60 commit 88c964d

File tree

2 files changed

+230
-57
lines changed

2 files changed

+230
-57
lines changed

main.go

+30-7
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"strings"
2525
"time"
2626

27+
routev1ClientSet "github.com/openshift/client-go/route/clientset/versioned"
2728
rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
2829
"go.uber.org/zap/zapcore"
2930

@@ -56,12 +57,15 @@ import (
5657
)
5758

5859
var (
59-
scheme = runtime.NewScheme()
60-
setupLog = ctrl.Log.WithName("setup")
61-
OperatorVersion = "UNKNOWN"
62-
McadVersion = "UNKNOWN"
63-
InstaScaleVersion = "UNKNOWN"
64-
BuildDate = "UNKNOWN"
60+
scheme = runtime.NewScheme()
61+
setupLog = ctrl.Log.WithName("setup")
62+
OperatorVersion = "UNKNOWN"
63+
McadVersion = "UNKNOWN"
64+
InstaScaleVersion = "UNKNOWN"
65+
BuildDate = "UNKNOWN"
66+
NameConsoleLink string = "console"
67+
NamespaceConsoleLink string = "openshift-console"
68+
domain string = ""
6569
)
6670

6771
func init() {
@@ -150,8 +154,17 @@ func main() {
150154
OpenShift := isOpenShift(ctx, kubeClient.DiscoveryClient)
151155

152156
if OpenShift {
157+
if cfg.KubeRay.IngressDomain == "" {
158+
routeClient, err := routev1ClientSet.NewForConfig(kubeConfig)
159+
exitOnError(err, "unable to create Route Client Set")
160+
domain, err = getOpenShiftDomainName(ctx, routeClient)
161+
exitOnError(err, domain)
162+
} else {
163+
domain = cfg.KubeRay.IngressDomain
164+
}
165+
153166
// TODO: setup the RayCluster webhook on vanilla Kubernetes
154-
exitOnError(controllers.SetupRayClusterWebhookWithManager(mgr, cfg.KubeRay), "error setting up RayCluster webhook")
167+
exitOnError(controllers.SetupRayClusterWebhookWithManager(mgr, cfg.KubeRay, domain), "error setting up RayCluster webhook")
155168
}
156169

157170
ok, err := hasAPIResourceForGVK(kubeClient.DiscoveryClient, rayv1.GroupVersion.WithKind("RayCluster"))
@@ -274,3 +287,13 @@ func isOpenShift(ctx context.Context, dc discovery.DiscoveryInterface) bool {
274287
logger.Info("We detected being on Vanilla Kubernetes!")
275288
return false
276289
}
290+
291+
func getOpenShiftDomainName(ctx context.Context, routeClient routev1ClientSet.Interface) (string, error) {
292+
route, err := routeClient.RouteV1().Routes(NamespaceConsoleLink).Get(ctx, NameConsoleLink, metav1.GetOptions{})
293+
if err != nil {
294+
return "error getting console route URL", err
295+
}
296+
domainIndex := strings.Index(route.Spec.Host, ".")
297+
consoleLinkDomain := route.Spec.Host[domainIndex+1:]
298+
return consoleLinkDomain, nil
299+
}

pkg/controllers/raycluster_webhook.go

+200-50
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,13 @@ import (
3232
)
3333

3434
// log is for logging in this package.
35-
var rayclusterlog = logf.Log.WithName("raycluster-resource")
35+
var (
36+
rayclusterlog = logf.Log.WithName("raycluster-resource")
37+
baseDomain string = ""
38+
)
3639

37-
func SetupRayClusterWebhookWithManager(mgr ctrl.Manager, cfg *config.KubeRayConfiguration) error {
40+
func SetupRayClusterWebhookWithManager(mgr ctrl.Manager, cfg *config.KubeRayConfiguration, domain string) error {
41+
baseDomain = domain
3842
return ctrl.NewWebhookManagedBy(mgr).
3943
For(&rayv1.RayCluster{}).
4044
WithDefaulter(&rayClusterDefaulter{
@@ -55,80 +59,226 @@ var _ webhook.CustomDefaulter = &rayClusterDefaulter{}
5559
func (r *rayClusterDefaulter) Default(ctx context.Context, obj runtime.Object) error {
5660
raycluster := obj.(*rayv1.RayCluster)
5761

62+
oauthExists := false
63+
initHeadExists := false
64+
initWorkerExists := false
65+
66+
// Check for the create-cert Init Containers
67+
for _, container := range raycluster.Spec.HeadGroupSpec.Template.Spec.InitContainers {
68+
if container.Name == "create-cert" {
69+
rayclusterlog.V(2).Info("Head Init Containers already exist, no patch needed")
70+
initHeadExists = true
71+
break // exits the for loop
72+
}
73+
}
74+
// Check fot the create-cert Init Container WorkerGroupSpec
75+
for _, container := range raycluster.Spec.WorkerGroupSpecs[0].Template.Spec.InitContainers {
76+
if container.Name == "create-cert" {
77+
rayclusterlog.V(2).Info("Worker Init Containers already exist, no patch needed")
78+
initWorkerExists = true
79+
break // exits the for loop
80+
}
81+
}
82+
5883
if !pointer.BoolDeref(r.Config.RayDashboardOAuthEnabled, true) {
84+
// Still need to call init container patch even if oauth is disabled
85+
mtlsPatch(raycluster, initHeadExists, initWorkerExists)
5986
return nil
6087
}
6188

6289
// Check and add OAuth proxy if it does not exist
6390
for _, container := range raycluster.Spec.HeadGroupSpec.Template.Spec.Containers {
6491
if container.Name == "oauth-proxy" {
6592
rayclusterlog.V(2).Info("OAuth sidecar already exists, no patch needed")
66-
return nil
93+
oauthExists = true
6794
}
6895
}
96+
if !oauthExists {
97+
rayclusterlog.V(2).Info("Adding OAuth sidecar container")
98+
// definition of the new container
99+
newOAuthSidecar := corev1.Container{
100+
Name: "oauth-proxy",
101+
Image: "registry.redhat.io/openshift4/ose-oauth-proxy@sha256:1ea6a01bf3e63cdcf125c6064cbd4a4a270deaf0f157b3eabb78f60556840366",
102+
Ports: []corev1.ContainerPort{
103+
{ContainerPort: 8443, Name: "oauth-proxy"},
104+
},
105+
Args: []string{
106+
"--https-address=:8443",
107+
"--provider=openshift",
108+
"--openshift-service-account=" + raycluster.Name + "-oauth-proxy",
109+
"--upstream=http://localhost:8265",
110+
"--tls-cert=/etc/tls/private/tls.crt",
111+
"--tls-key=/etc/tls/private/tls.key",
112+
"--cookie-secret=$(COOKIE_SECRET)",
113+
"--openshift-delegate-urls={\"/\":{\"resource\":\"pods\",\"namespace\":\"default\",\"verb\":\"get\"}}",
114+
},
115+
VolumeMounts: []corev1.VolumeMount{
116+
{
117+
Name: "proxy-tls-secret",
118+
MountPath: "/etc/tls/private",
119+
ReadOnly: true,
120+
},
121+
},
122+
}
69123

70-
rayclusterlog.V(2).Info("Adding OAuth sidecar container")
71-
// definition of the new container
72-
newOAuthSidecar := corev1.Container{
73-
Name: "oauth-proxy",
74-
Image: "registry.redhat.io/openshift4/ose-oauth-proxy@sha256:1ea6a01bf3e63cdcf125c6064cbd4a4a270deaf0f157b3eabb78f60556840366",
75-
Ports: []corev1.ContainerPort{
76-
{ContainerPort: 8443, Name: "oauth-proxy"},
77-
},
78-
Args: []string{
79-
"--https-address=:8443",
80-
"--provider=openshift",
81-
"--openshift-service-account=" + raycluster.Name + "-oauth-proxy",
82-
"--upstream=http://localhost:8265",
83-
"--tls-cert=/etc/tls/private/tls.crt",
84-
"--tls-key=/etc/tls/private/tls.key",
85-
"--cookie-secret=$(COOKIE_SECRET)",
86-
"--openshift-delegate-urls={\"/\":{\"resource\":\"pods\",\"namespace\":\"default\",\"verb\":\"get\"}}",
87-
},
88-
VolumeMounts: []corev1.VolumeMount{
89-
{
90-
Name: "proxy-tls-secret",
91-
MountPath: "/etc/tls/private",
92-
ReadOnly: true,
124+
// Adding the new OAuth sidecar container
125+
raycluster.Spec.HeadGroupSpec.Template.Spec.Containers = append(raycluster.Spec.HeadGroupSpec.Template.Spec.Containers, newOAuthSidecar)
126+
127+
cookieSecret := corev1.EnvVar{
128+
Name: "COOKIE_SECRET",
129+
ValueFrom: &corev1.EnvVarSource{
130+
SecretKeyRef: &corev1.SecretKeySelector{
131+
LocalObjectReference: corev1.LocalObjectReference{
132+
Name: raycluster.Name + "-oauth-config",
133+
},
134+
Key: "cookie_secret",
135+
},
93136
},
137+
}
138+
139+
raycluster.Spec.HeadGroupSpec.Template.Spec.Containers[0].Env = append(
140+
raycluster.Spec.HeadGroupSpec.Template.Spec.Containers[0].Env,
141+
cookieSecret,
142+
)
143+
144+
tlsSecretVolume := corev1.Volume{
145+
Name: "proxy-tls-secret",
146+
VolumeSource: corev1.VolumeSource{
147+
Secret: &corev1.SecretVolumeSource{
148+
SecretName: raycluster.Name + "-proxy-tls-secret",
149+
},
150+
},
151+
}
152+
153+
raycluster.Spec.HeadGroupSpec.Template.Spec.Volumes = append(raycluster.Spec.HeadGroupSpec.Template.Spec.Volumes, tlsSecretVolume)
154+
155+
// Ensure the service account is set
156+
if raycluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName == "" {
157+
raycluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName = raycluster.Name + "-oauth-proxy"
158+
}
159+
}
160+
161+
mtlsPatch(raycluster, initHeadExists, initWorkerExists)
162+
return nil
163+
}
164+
165+
func mtlsPatch(raycluster *rayv1.RayCluster, initHeadExists bool, initWorkerExists bool) {
166+
167+
rayclusterlog.V(2).Info("creating json patch for RayCluster initContainers")
168+
169+
// Volume Mounts for the Init Containers
170+
key_volumes := []corev1.VolumeMount{
171+
{
172+
Name: "ca-vol",
173+
MountPath: "/home/ray/workspace/ca",
174+
ReadOnly: true,
175+
},
176+
{
177+
Name: "server-cert",
178+
MountPath: "/home/ray/workspace/tls",
179+
ReadOnly: false,
94180
},
95181
}
96182

97-
// Adding the new OAuth sidecar container
98-
raycluster.Spec.HeadGroupSpec.Template.Spec.Containers = append(raycluster.Spec.HeadGroupSpec.Template.Spec.Containers, newOAuthSidecar)
183+
// Service name for basic interactive
184+
svcDomain := raycluster.Name + "-head-svc." + raycluster.Namespace + ".svc"
185+
// Ca Secret generated by the SDK
186+
secretName := `ca-secret-` + raycluster.Name
99187

100-
cookieSecret := corev1.EnvVar{
101-
Name: "COOKIE_SECRET",
102-
ValueFrom: &corev1.EnvVarSource{
103-
SecretKeyRef: &corev1.SecretKeySelector{
104-
LocalObjectReference: corev1.LocalObjectReference{
105-
Name: raycluster.Name + "-oauth-config",
188+
// Env variables for Worker & Head Containers
189+
envList := []corev1.EnvVar{
190+
{
191+
Name: "MY_POD_IP",
192+
ValueFrom: &corev1.EnvVarSource{
193+
FieldRef: &corev1.ObjectFieldSelector{
194+
FieldPath: "status.podIP",
106195
},
107-
Key: "cookie_secret",
108196
},
109197
},
198+
{
199+
Name: "RAY_USE_TLS",
200+
Value: "1",
201+
},
202+
{
203+
Name: "RAY_TLS_SERVER_CERT",
204+
Value: "/home/ray/workspace/tls/server.crt",
205+
},
206+
{
207+
Name: "RAY_TLS_SERVER_KEY",
208+
Value: "/home/ray/workspace/tls/server.key",
209+
},
210+
{
211+
Name: "RAY_TLS_CA_CERT",
212+
Value: "/home/ray/workspace/tls/ca.crt",
213+
},
110214
}
111215

112-
raycluster.Spec.HeadGroupSpec.Template.Spec.Containers[0].Env = append(
113-
raycluster.Spec.HeadGroupSpec.Template.Spec.Containers[0].Env,
114-
cookieSecret,
115-
)
116-
117-
tlsSecretVolume := corev1.Volume{
118-
Name: "proxy-tls-secret",
119-
VolumeSource: corev1.VolumeSource{
120-
Secret: &corev1.SecretVolumeSource{
121-
SecretName: raycluster.Name + "-proxy-tls-secret",
216+
// Volumes for the main container of Head and worker
217+
caVolumes := []corev1.Volume{
218+
{
219+
Name: "ca-vol",
220+
VolumeSource: corev1.VolumeSource{
221+
Secret: &corev1.SecretVolumeSource{
222+
SecretName: secretName,
223+
},
224+
},
225+
},
226+
{
227+
Name: "server-cert",
228+
VolumeSource: corev1.VolumeSource{
229+
EmptyDir: &corev1.EmptyDirVolumeSource{},
122230
},
123231
},
124232
}
125233

126-
raycluster.Spec.HeadGroupSpec.Template.Spec.Volumes = append(raycluster.Spec.HeadGroupSpec.Template.Spec.Volumes, tlsSecretVolume)
234+
if !initHeadExists {
235+
rayClientRoute := "rayclient-" + raycluster.Name + "-" + raycluster.Namespace + "." + baseDomain
236+
initContainerHead := corev1.Container{
237+
Name: "create-cert",
238+
Image: "quay.io/project-codeflare/ray:latest-py39-cu118",
239+
Command: []string{
240+
"sh",
241+
"-c",
242+
`cd /home/ray/workspace/tls && openssl req -nodes -newkey rsa:2048 -keyout server.key -out server.csr -subj '/CN=ray-head' && printf "authorityKeyIdentifier=keyid,issuer\nbasicConstraints=CA:FALSE\nsubjectAltName = @alt_names\n[alt_names]\nDNS.1 = 127.0.0.1\nDNS.2 = localhost\nDNS.3 = ${FQ_RAY_IP}\nDNS.4 = $(awk 'END{print $1}' /etc/hosts)\nDNS.5 = ` + rayClientRoute + `\nDNS.6 = ` + svcDomain + `">./domain.ext && cp /home/ray/workspace/ca/* . && openssl x509 -req -CA ca.crt -CAkey ca.key -in server.csr -out server.crt -days 365 -CAcreateserial -extfile domain.ext`,
243+
},
244+
VolumeMounts: key_volumes,
245+
}
246+
247+
// Append the list of environment variables for the ray-head container
248+
for index, container := range raycluster.Spec.HeadGroupSpec.Template.Spec.Containers {
249+
if container.Name == "ray-head" {
250+
raycluster.Spec.HeadGroupSpec.Template.Spec.Containers[index].Env = append(raycluster.Spec.HeadGroupSpec.Template.Spec.Containers[index].Env, envList...)
251+
}
252+
}
253+
254+
// Append the create-cert Init Container
255+
raycluster.Spec.HeadGroupSpec.Template.Spec.InitContainers = append(raycluster.Spec.HeadGroupSpec.Template.Spec.InitContainers, initContainerHead)
127256

128-
// Ensure the service account is set
129-
if raycluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName == "" {
130-
raycluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName = raycluster.Name + "-oauth-proxy"
257+
// Append the CA volumes
258+
raycluster.Spec.HeadGroupSpec.Template.Spec.Volumes = append(raycluster.Spec.HeadGroupSpec.Template.Spec.Volumes, caVolumes...)
131259
}
132260

133-
return nil
261+
if !initWorkerExists {
262+
initContainerWorker := corev1.Container{
263+
Name: "create-cert",
264+
Image: "quay.io/project-codeflare/ray:latest-py39-cu118",
265+
Command: []string{
266+
"sh",
267+
"-c",
268+
`cd /home/ray/workspace/tls && openssl req -nodes -newkey rsa:2048 -keyout server.key -out server.csr -subj '/CN=ray-head' && printf "authorityKeyIdentifier=keyid,issuer\nbasicConstraints=CA:FALSE\nsubjectAltName = @alt_names\n[alt_names]\nDNS.1 = 127.0.0.1\nDNS.2 = localhost\nDNS.3 = ${FQ_RAY_IP}\nDNS.4 = $(awk 'END{print $1}' /etc/hosts)">./domain.ext && cp /home/ray/workspace/ca/* . && openssl x509 -req -CA ca.crt -CAkey ca.key -in server.csr -out server.crt -days 365 -CAcreateserial -extfile domain.ext`,
269+
},
270+
VolumeMounts: key_volumes,
271+
}
272+
// Append the CA volumes
273+
raycluster.Spec.WorkerGroupSpecs[0].Template.Spec.Volumes = append(raycluster.Spec.WorkerGroupSpecs[0].Template.Spec.Volumes, caVolumes...)
274+
// Append the create-cert Init Container
275+
raycluster.Spec.WorkerGroupSpecs[0].Template.Spec.InitContainers = append(raycluster.Spec.WorkerGroupSpecs[0].Template.Spec.InitContainers, initContainerWorker)
276+
277+
// Append the list of environment variables for the machine-learning container
278+
for index, container := range raycluster.Spec.WorkerGroupSpecs[0].Template.Spec.Containers {
279+
if container.Name == "machine-learning" {
280+
raycluster.Spec.WorkerGroupSpecs[0].Template.Spec.Containers[index].Env = append(raycluster.Spec.WorkerGroupSpecs[0].Template.Spec.Containers[index].Env, envList...)
281+
}
282+
}
283+
}
134284
}

0 commit comments

Comments
 (0)