Skip to content

Commit 05519da

Browse files
committed
Created InitContainer & resources for mtls patch
1 parent 1e3bedc commit 05519da

File tree

2 files changed

+181
-13
lines changed

2 files changed

+181
-13
lines changed

main.go

+30-7
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ import (
4747
"sigs.k8s.io/yaml"
4848

4949
routev1 "github.com/openshift/api/route/v1"
50+
routev1ClientSet "github.com/openshift/client-go/route/clientset/versioned"
5051

5152
"github.com/project-codeflare/codeflare-operator/pkg/config"
5253
"github.com/project-codeflare/codeflare-operator/pkg/controllers"
@@ -56,12 +57,15 @@ import (
5657
)
5758

5859
var (
59-
scheme = runtime.NewScheme()
60-
setupLog = ctrl.Log.WithName("setup")
61-
OperatorVersion = "UNKNOWN"
62-
McadVersion = "UNKNOWN"
63-
InstaScaleVersion = "UNKNOWN"
64-
BuildDate = "UNKNOWN"
60+
scheme = runtime.NewScheme()
61+
setupLog = ctrl.Log.WithName("setup")
62+
OperatorVersion = "UNKNOWN"
63+
McadVersion = "UNKNOWN"
64+
InstaScaleVersion = "UNKNOWN"
65+
BuildDate = "UNKNOWN"
66+
NameConsoleLink string = "console"
67+
NamespaceConsoleLink string = "openshift-console"
68+
domain = ""
6569
)
6670

6771
func init() {
@@ -147,8 +151,17 @@ func main() {
147151
})
148152
exitOnError(err, "unable to start manager")
149153

154+
if cfg.KubeRay.IngressDomain == "" {
155+
routeClient, err := routev1ClientSet.NewForConfig(kubeConfig)
156+
exitOnError(err, "unable to create Route Client Set")
157+
domain, err = getOpenShiftDomainName(ctx, routeClient)
158+
exitOnError(err, domain)
159+
} else {
160+
domain = cfg.KubeRay.IngressDomain
161+
}
162+
150163
rayClusterDefaulter := &controllers.RayClusterDefaulter{}
151-
exitOnError(rayClusterDefaulter.SetupWebhookWithManager(mgr), "error setting up webhook")
164+
exitOnError(rayClusterDefaulter.SetupWebhookWithManager(mgr, domain), "error setting up webhook")
152165

153166
ok, err := HasAPIResourceForGVK(kubeClient.DiscoveryClient, rayv1.GroupVersion.WithKind("RayCluster"))
154167
if ok {
@@ -248,3 +261,13 @@ func exitOnError(err error, msg string) {
248261
os.Exit(1)
249262
}
250263
}
264+
265+
func getOpenShiftDomainName(ctx context.Context, routeClient routev1ClientSet.Interface) (string, error) {
266+
route, err := routeClient.RouteV1().Routes(NamespaceConsoleLink).Get(ctx, NameConsoleLink, metav1.GetOptions{})
267+
if err != nil {
268+
return "error getting console route URL", err
269+
}
270+
domainIndex := strings.Index(route.Spec.Host, ".")
271+
consoleLinkDomain := route.Spec.Host[domainIndex+1:]
272+
return consoleLinkDomain, nil
273+
}

pkg/controllers/raycluster_webhook.go

+151-6
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,14 @@ import (
2828
"sigs.k8s.io/controller-runtime/pkg/webhook"
2929
)
3030

31-
// log is for logging in this package.
32-
var rayclusterlog = logf.Log.WithName("raycluster-resource")
31+
var (
32+
// log is for logging in this package.
33+
rayclusterlog = logf.Log.WithName("raycluster-resource")
34+
baseDomain string = ""
35+
)
3336

34-
func (r *RayClusterDefaulter) SetupWebhookWithManager(mgr ctrl.Manager) error {
37+
func (r *RayClusterDefaulter) SetupWebhookWithManager(mgr ctrl.Manager, domain string) error {
38+
baseDomain = domain
3539
return ctrl.NewWebhookManagedBy(mgr).
3640
For(&rayv1.RayCluster{}).
3741
WithDefaulter(&RayClusterDefaulter{}).
@@ -49,17 +53,36 @@ func (r *RayClusterDefaulter) Default(ctx context.Context, obj runtime.Object) e
4953
raycluster := obj.(*rayv1.RayCluster)
5054

5155
rayclusterlog.Info("default", "name", raycluster.Name)
56+
oauthExists := false
57+
initHeadExists := false
58+
workerHeadExists := false
5259
// Check and add OAuth proxy if it does not exist.
53-
alreadyExists := false
5460
for _, container := range raycluster.Spec.HeadGroupSpec.Template.Spec.Containers {
5561
if container.Name == "oauth-proxy" {
5662
rayclusterlog.Info("OAuth sidecar already exists, no patch needed")
57-
alreadyExists = true
63+
oauthExists = true
64+
break // exits the for loop
65+
}
66+
}
67+
68+
// Check for the create-cert Init Containers
69+
for _, container := range raycluster.Spec.HeadGroupSpec.Template.Spec.InitContainers {
70+
if container.Name == "create-cert" {
71+
rayclusterlog.Info("Head Init Containers already exist, no patch needed")
72+
initHeadExists = true
73+
break // exits the for loop
74+
}
75+
}
76+
// Check fot the create-cert Init Container WorkerGroupSpec
77+
for _, container := range raycluster.Spec.WorkerGroupSpecs[0].Template.Spec.InitContainers {
78+
if container.Name == "create-cert" {
79+
rayclusterlog.Info("Worker Init Containers already exist, no patch needed")
80+
workerHeadExists = true
5881
break // exits the for loop
5982
}
6083
}
6184

62-
if !alreadyExists {
85+
if !oauthExists {
6386
rayclusterlog.Info("Adding OAuth sidecar container")
6487
// definition of the new container
6588
newOAuthSidecar := corev1.Container{
@@ -119,5 +142,127 @@ func (r *RayClusterDefaulter) Default(ctx context.Context, obj runtime.Object) e
119142
raycluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName = raycluster.Name + "-oauth-proxy"
120143
}
121144
}
145+
mtlsPatch(raycluster, initHeadExists, workerHeadExists)
122146
return nil
123147
}
148+
149+
func mtlsPatch(raycluster *rayv1.RayCluster, initHeadExists bool, workerHeadExists bool) {
150+
151+
rayclusterlog.Info("creating json patch for RayCluster initContainers")
152+
153+
// Volume Mounts for the Init Containers
154+
key_volumes := []corev1.VolumeMount{
155+
{
156+
Name: "ca-vol",
157+
MountPath: "/home/ray/workspace/ca",
158+
ReadOnly: true,
159+
},
160+
{
161+
Name: "server-cert",
162+
MountPath: "/home/ray/workspace/tls",
163+
ReadOnly: false,
164+
},
165+
}
166+
167+
// Service name for basic interactive
168+
svcDomain := raycluster.Name + "-head-svc." + raycluster.Namespace + ".svc"
169+
// Ca Secret generated by the SDK
170+
secretName := `ca-secret-` + raycluster.Name
171+
172+
// Env variables for Worker & Head Containers
173+
envList := []corev1.EnvVar{
174+
{
175+
Name: "MY_POD_IP",
176+
ValueFrom: &corev1.EnvVarSource{
177+
FieldRef: &corev1.ObjectFieldSelector{
178+
FieldPath: "status.podIP",
179+
},
180+
},
181+
},
182+
{
183+
Name: "RAY_USE_TLS",
184+
Value: "1",
185+
},
186+
{
187+
Name: "RAY_TLS_SERVER_CERT",
188+
Value: "/home/ray/workspace/tls/server.crt",
189+
},
190+
{
191+
Name: "RAY_TLS_SERVER_KEY",
192+
Value: "/home/ray/workspace/tls/server.key",
193+
},
194+
{
195+
Name: "RAY_TLS_CA_CERT",
196+
Value: "/home/ray/workspace/tls/ca.crt",
197+
},
198+
}
199+
200+
// Volumes for the main container of Head and worker
201+
caVolumes := []corev1.Volume{
202+
{
203+
Name: "ca-vol",
204+
VolumeSource: corev1.VolumeSource{
205+
Secret: &corev1.SecretVolumeSource{
206+
SecretName: secretName,
207+
},
208+
},
209+
},
210+
{
211+
Name: "server-cert",
212+
VolumeSource: corev1.VolumeSource{
213+
EmptyDir: &corev1.EmptyDirVolumeSource{},
214+
},
215+
},
216+
}
217+
218+
if !initHeadExists {
219+
rayClientRoute := "rayclient-" + raycluster.Name + "-" + raycluster.Namespace + "." + baseDomain
220+
initContainerHead := corev1.Container{
221+
Name: "create-cert",
222+
Image: "quay.io/project-codeflare/ray:latest-py39-cu118",
223+
Command: []string{
224+
"sh",
225+
"-c",
226+
`cd /home/ray/workspace/tls && openssl req -nodes -newkey rsa:2048 -keyout server.key -out server.csr -subj '/CN=ray-head' && printf "authorityKeyIdentifier=keyid,issuer\nbasicConstraints=CA:FALSE\nsubjectAltName = @alt_names\n[alt_names]\nDNS.1 = 127.0.0.1\nDNS.2 = localhost\nDNS.3 = ${FQ_RAY_IP}\nDNS.4 = $(awk 'END{print $1}' /etc/hosts)\nDNS.5 = ` + rayClientRoute + `\nDNS.6 = ` + svcDomain + `">./domain.ext && cp /home/ray/workspace/ca/* . && openssl x509 -req -CA ca.crt -CAkey ca.key -in server.csr -out server.crt -days 365 -CAcreateserial -extfile domain.ext`,
227+
},
228+
VolumeMounts: key_volumes,
229+
}
230+
231+
// Append the list of environment variables for the ray-head container
232+
for index, container := range raycluster.Spec.HeadGroupSpec.Template.Spec.Containers {
233+
if container.Name == "ray-head" {
234+
raycluster.Spec.HeadGroupSpec.Template.Spec.Containers[index].Env = append(raycluster.Spec.HeadGroupSpec.Template.Spec.Containers[index].Env, envList...)
235+
}
236+
}
237+
238+
// Append the create-cert Init Container
239+
raycluster.Spec.HeadGroupSpec.Template.Spec.InitContainers = append(raycluster.Spec.HeadGroupSpec.Template.Spec.InitContainers, initContainerHead)
240+
241+
// Append the CA volumes
242+
raycluster.Spec.HeadGroupSpec.Template.Spec.Volumes = append(raycluster.Spec.HeadGroupSpec.Template.Spec.Volumes, caVolumes...)
243+
}
244+
245+
if !workerHeadExists {
246+
initContainerWorker := corev1.Container{
247+
Name: "create-cert",
248+
Image: "quay.io/project-codeflare/ray:latest-py39-cu118",
249+
Command: []string{
250+
"sh",
251+
"-c",
252+
`cd /home/ray/workspace/tls && openssl req -nodes -newkey rsa:2048 -keyout server.key -out server.csr -subj '/CN=ray-head' && printf "authorityKeyIdentifier=keyid,issuer\nbasicConstraints=CA:FALSE\nsubjectAltName = @alt_names\n[alt_names]\nDNS.1 = 127.0.0.1\nDNS.2 = localhost\nDNS.3 = ${FQ_RAY_IP}\nDNS.4 = $(awk 'END{print $1}' /etc/hosts)">./domain.ext && cp /home/ray/workspace/ca/* . && openssl x509 -req -CA ca.crt -CAkey ca.key -in server.csr -out server.crt -days 365 -CAcreateserial -extfile domain.ext`,
253+
},
254+
VolumeMounts: key_volumes,
255+
}
256+
// Append the CA volumes
257+
raycluster.Spec.WorkerGroupSpecs[0].Template.Spec.Volumes = append(raycluster.Spec.WorkerGroupSpecs[0].Template.Spec.Volumes, caVolumes...)
258+
// Append the create-cert Init Container
259+
raycluster.Spec.WorkerGroupSpecs[0].Template.Spec.InitContainers = append(raycluster.Spec.WorkerGroupSpecs[0].Template.Spec.InitContainers, initContainerWorker)
260+
261+
// Append the list of environment variables for the machine-learning container
262+
for index, container := range raycluster.Spec.WorkerGroupSpecs[0].Template.Spec.Containers {
263+
if container.Name == "machine-learning" {
264+
raycluster.Spec.WorkerGroupSpecs[0].Template.Spec.Containers[index].Env = append(raycluster.Spec.WorkerGroupSpecs[0].Template.Spec.Containers[index].Env, envList...)
265+
}
266+
}
267+
}
268+
}

0 commit comments

Comments
 (0)