Skip to content

Commit ed8a71f

Browse files
add e2e tests for AppWrappers with Jobs, Deployments, and Services
1 parent 8ad4785 commit ed8a71f

5 files changed

+328
-6
lines changed
+165
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
/*
2+
Copyright 2024.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package e2e
18+
19+
import (
20+
"testing"
21+
22+
. "github.com/onsi/gomega"
23+
mcadv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2"
24+
. "github.com/project-codeflare/codeflare-common/support"
25+
26+
appsv1 "k8s.io/api/apps/v1"
27+
corev1 "k8s.io/api/core/v1"
28+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
29+
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
30+
"k8s.io/apimachinery/pkg/runtime"
31+
"k8s.io/apimachinery/pkg/util/intstr"
32+
"k8s.io/utils/ptr"
33+
"sigs.k8s.io/kueue/apis/kueue/v1beta1"
34+
)
35+
36+
// verify that an AppWrapper containing a Deployment and Service can execute successfully
37+
func TestDeploymentAppWrapper(t *testing.T) {
38+
test := With(t)
39+
40+
// Create a namespace
41+
namespace := test.NewTestNamespace()
42+
43+
// Create Kueue resources
44+
resourceFlavor := CreateKueueResourceFlavor(test, v1beta1.ResourceFlavorSpec{})
45+
defer func() {
46+
_ = test.Client().Kueue().KueueV1beta1().ResourceFlavors().Delete(test.Ctx(), resourceFlavor.Name, metav1.DeleteOptions{})
47+
}()
48+
clusterQueue := createClusterQueue(test, resourceFlavor, 0)
49+
defer func() {
50+
_ = test.Client().Kueue().KueueV1beta1().ClusterQueues().Delete(test.Ctx(), clusterQueue.Name, metav1.DeleteOptions{})
51+
}()
52+
localQueue := CreateKueueLocalQueue(test, namespace.Name, clusterQueue.Name, AsDefaultQueue)
53+
54+
// Deployment + Service (ie, a typical inference setup)
55+
test.T().Logf("AppWrapper containing Deployment and Service")
56+
job := &appsv1.Deployment{
57+
TypeMeta: metav1.TypeMeta{
58+
APIVersion: appsv1.SchemeGroupVersion.String(),
59+
Kind: "Deployment",
60+
},
61+
ObjectMeta: metav1.ObjectMeta{
62+
GenerateName: "deployment",
63+
Namespace: namespace.Name,
64+
Labels: map[string]string{"app": "inference"},
65+
},
66+
Spec: appsv1.DeploymentSpec{
67+
Replicas: ptr.To(int32(1)),
68+
Selector: &metav1.LabelSelector{
69+
MatchLabels: map[string]string{"app": "inference"},
70+
},
71+
Template: corev1.PodTemplateSpec{
72+
ObjectMeta: metav1.ObjectMeta{
73+
Labels: map[string]string{"app": "inference"},
74+
},
75+
Spec: corev1.PodSpec{
76+
Tolerations: []corev1.Toleration{
77+
{
78+
Key: "nvidia.com/gpu",
79+
Operator: corev1.TolerationOpExists,
80+
},
81+
},
82+
Containers: []corev1.Container{
83+
{
84+
Name: "job",
85+
Image: "quay.io/project-codeflare/busybox:1.36",
86+
Command: []string{"/bin/sh", "-c", "sleep 600; exit 0"},
87+
},
88+
},
89+
RestartPolicy: corev1.RestartPolicyAlways,
90+
},
91+
},
92+
},
93+
}
94+
95+
service := &corev1.Service{
96+
TypeMeta: metav1.TypeMeta{
97+
APIVersion: corev1.SchemeGroupVersion.String(),
98+
Kind: "Service",
99+
},
100+
ObjectMeta: metav1.ObjectMeta{
101+
GenerateName: "service",
102+
Namespace: namespace.Name,
103+
Labels: map[string]string{"app": "inference"},
104+
},
105+
Spec: corev1.ServiceSpec{
106+
Type: corev1.ServiceTypeClusterIP,
107+
Selector: map[string]string{"app": "inference"},
108+
Ports: []corev1.ServicePort{{Port: 8080, Protocol: corev1.ProtocolTCP, TargetPort: intstr.FromInt(8080)}},
109+
},
110+
}
111+
112+
raw1 := Raw(test, job)
113+
raw1 = RemoveCreationTimestamp(test, raw1)
114+
raw2 := Raw(test, service)
115+
raw2 = RemoveCreationTimestamp(test, raw2)
116+
117+
// Create an AppWrapper resource
118+
aw := &mcadv1beta2.AppWrapper{
119+
TypeMeta: metav1.TypeMeta{
120+
APIVersion: mcadv1beta2.GroupVersion.String(),
121+
Kind: "AppWrapper",
122+
},
123+
ObjectMeta: metav1.ObjectMeta{
124+
GenerateName: "infserver-",
125+
Namespace: namespace.Name,
126+
Labels: map[string]string{"kueue.x-k8s.io/queue-name": localQueue.Name},
127+
},
128+
Spec: mcadv1beta2.AppWrapperSpec{
129+
Components: []mcadv1beta2.AppWrapperComponent{
130+
{
131+
Template: raw1,
132+
},
133+
{
134+
Template: raw2,
135+
},
136+
},
137+
},
138+
}
139+
140+
appWrapperResource := mcadv1beta2.GroupVersion.WithResource("appwrappers")
141+
awMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(aw)
142+
test.Expect(err).NotTo(HaveOccurred())
143+
unstruct := unstructured.Unstructured{Object: awMap}
144+
unstructp, err := test.Client().Dynamic().Resource(appWrapperResource).Namespace(namespace.Name).Create(test.Ctx(), &unstruct, metav1.CreateOptions{})
145+
test.Expect(err).NotTo(HaveOccurred())
146+
err = runtime.DefaultUnstructuredConverter.FromUnstructured(unstructp.Object, aw)
147+
test.Expect(err).NotTo(HaveOccurred())
148+
test.T().Logf("Created AppWrapper %s/%s successfully", aw.Namespace, aw.Name)
149+
150+
test.T().Logf("Waiting for AppWrapper %s/%s to be running", aw.Namespace, aw.Name)
151+
test.Eventually(AppWrappers(test, namespace), TestTimeoutMedium).
152+
Should(ContainElement(WithTransform(AppWrapperPhase, Equal(mcadv1beta2.AppWrapperRunning))))
153+
154+
// A deployment will not complete; so simply make sure it keeps running for reasonable interval
155+
test.T().Logf("Ensuring the AppWrapper %s/%s continues to run", aw.Namespace, aw.Name)
156+
test.Consistently(AppWrappers(test, namespace), TestTimeoutMedium).Should(
157+
ContainElement(WithTransform(AppWrapperPhase, Equal(mcadv1beta2.AppWrapperRunning))))
158+
159+
test.T().Logf("Deleting AppWrapper %s/%s", aw.Namespace, aw.Name)
160+
err = test.Client().Dynamic().Resource(appWrapperResource).Namespace(namespace.Name).Delete(test.Ctx(), aw.Name, metav1.DeleteOptions{})
161+
test.Expect(err).NotTo(HaveOccurred())
162+
163+
test.T().Logf("Waiting for AppWrapper %s/%s to be deleted", aw.Namespace, aw.Name)
164+
test.Eventually(AppWrappers(test, namespace), TestTimeoutShort).Should(BeEmpty())
165+
}

test/e2e/job_appwrapper_test.go

+143
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
/*
2+
Copyright 2024.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package e2e
18+
19+
import (
20+
"testing"
21+
22+
. "github.com/onsi/gomega"
23+
mcadv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2"
24+
. "github.com/project-codeflare/codeflare-common/support"
25+
26+
batchv1 "k8s.io/api/batch/v1"
27+
corev1 "k8s.io/api/core/v1"
28+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
29+
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
30+
"k8s.io/apimachinery/pkg/runtime"
31+
"sigs.k8s.io/kueue/apis/kueue/v1beta1"
32+
)
33+
34+
// verify that an AppWrapper containing a batchv1/Job can execute successfully
35+
func TestBatchJobAppWrapper(t *testing.T) {
36+
test := With(t)
37+
38+
// Create a namespace
39+
namespace := test.NewTestNamespace()
40+
41+
// Create Kueue resources
42+
resourceFlavor := CreateKueueResourceFlavor(test, v1beta1.ResourceFlavorSpec{})
43+
defer func() {
44+
_ = test.Client().Kueue().KueueV1beta1().ResourceFlavors().Delete(test.Ctx(), resourceFlavor.Name, metav1.DeleteOptions{})
45+
}()
46+
clusterQueue := createClusterQueue(test, resourceFlavor, 0)
47+
defer func() {
48+
_ = test.Client().Kueue().KueueV1beta1().ClusterQueues().Delete(test.Ctx(), clusterQueue.Name, metav1.DeleteOptions{})
49+
}()
50+
localQueue := CreateKueueLocalQueue(test, namespace.Name, clusterQueue.Name, AsDefaultQueue)
51+
52+
// Batch Job
53+
test.T().Logf("AppWrapper containing batchv1/Job")
54+
job := &batchv1.Job{
55+
TypeMeta: metav1.TypeMeta{
56+
APIVersion: batchv1.SchemeGroupVersion.String(),
57+
Kind: "Job",
58+
},
59+
ObjectMeta: metav1.ObjectMeta{
60+
GenerateName: "batchjob",
61+
Namespace: namespace.Name,
62+
},
63+
Spec: batchv1.JobSpec{
64+
Completions: Ptr(int32(1)),
65+
Parallelism: Ptr(int32(1)),
66+
Template: corev1.PodTemplateSpec{
67+
Spec: corev1.PodSpec{
68+
Tolerations: []corev1.Toleration{
69+
{
70+
Key: "nvidia.com/gpu",
71+
Operator: corev1.TolerationOpExists,
72+
},
73+
},
74+
Containers: []corev1.Container{
75+
{
76+
Name: "job",
77+
Image: "quay.io/project-codeflare/busybox:1.36",
78+
Command: []string{"/bin/sh", "-c", "sleep 20; exit 0"},
79+
},
80+
},
81+
RestartPolicy: corev1.RestartPolicyNever,
82+
},
83+
},
84+
},
85+
}
86+
87+
raw := Raw(test, job)
88+
raw = RemoveCreationTimestamp(test, raw)
89+
90+
// Create an AppWrapper resource
91+
aw := &mcadv1beta2.AppWrapper{
92+
TypeMeta: metav1.TypeMeta{
93+
APIVersion: mcadv1beta2.GroupVersion.String(),
94+
Kind: "AppWrapper",
95+
},
96+
ObjectMeta: metav1.ObjectMeta{
97+
GenerateName: "batchjob-",
98+
Namespace: namespace.Name,
99+
Labels: map[string]string{"kueue.x-k8s.io/queue-name": localQueue.Name},
100+
},
101+
Spec: mcadv1beta2.AppWrapperSpec{
102+
Components: []mcadv1beta2.AppWrapperComponent{
103+
{
104+
Template: raw,
105+
},
106+
},
107+
},
108+
}
109+
110+
appWrapperResource := mcadv1beta2.GroupVersion.WithResource("appwrappers")
111+
awMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(aw)
112+
test.Expect(err).NotTo(HaveOccurred())
113+
unstruct := unstructured.Unstructured{Object: awMap}
114+
unstructp, err := test.Client().Dynamic().Resource(appWrapperResource).Namespace(namespace.Name).Create(test.Ctx(), &unstruct, metav1.CreateOptions{})
115+
test.Expect(err).NotTo(HaveOccurred())
116+
err = runtime.DefaultUnstructuredConverter.FromUnstructured(unstructp.Object, aw)
117+
test.Expect(err).NotTo(HaveOccurred())
118+
test.T().Logf("Created AppWrapper %s/%s successfully", aw.Namespace, aw.Name)
119+
120+
test.T().Logf("Waiting for AppWrapper %s/%s to be running", aw.Namespace, aw.Name)
121+
test.Eventually(AppWrappers(test, namespace), TestTimeoutMedium).
122+
Should(ContainElement(WithTransform(AppWrapperPhase, Equal(mcadv1beta2.AppWrapperRunning))))
123+
124+
test.T().Logf("Waiting for AppWrapper %s/%s to complete", aw.Namespace, aw.Name)
125+
test.Eventually(AppWrappers(test, namespace), TestTimeoutLong).Should(
126+
ContainElement(
127+
Or(
128+
WithTransform(AppWrapperPhase, Equal(mcadv1beta2.AppWrapperSucceeded)),
129+
WithTransform(AppWrapperPhase, Equal(mcadv1beta2.AppWrapperFailed)),
130+
),
131+
))
132+
133+
// Assert the AppWrapper has completed successfully
134+
test.Expect(AppWrappers(test, namespace)(test)).
135+
To(ContainElement(WithTransform(AppWrapperPhase, Equal(mcadv1beta2.AppWrapperSucceeded))))
136+
137+
test.T().Logf("Deleting AppWrapper %s/%s", aw.Namespace, aw.Name)
138+
err = test.Client().Dynamic().Resource(appWrapperResource).Namespace(namespace.Name).Delete(test.Ctx(), aw.Name, metav1.DeleteOptions{})
139+
test.Expect(err).NotTo(HaveOccurred())
140+
141+
test.T().Logf("Waiting for AppWrapper %s/%s to be deleted", aw.Namespace, aw.Name)
142+
test.Eventually(AppWrappers(test, namespace), TestTimeoutShort).Should(BeEmpty())
143+
}

test/e2e/mnist_pytorch_appwrapper_test.go

+6-2
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,13 @@ func runMnistPyTorchAppWrapper(t *testing.T, accelerator string, numberOfGpus in
4848

4949
// Create Kueue resources
5050
resourceFlavor := CreateKueueResourceFlavor(test, v1beta1.ResourceFlavorSpec{})
51-
defer test.Client().Kueue().KueueV1beta1().ResourceFlavors().Delete(test.Ctx(), resourceFlavor.Name, metav1.DeleteOptions{})
51+
defer func() {
52+
_ = test.Client().Kueue().KueueV1beta1().ResourceFlavors().Delete(test.Ctx(), resourceFlavor.Name, metav1.DeleteOptions{})
53+
}()
5254
clusterQueue := createClusterQueue(test, resourceFlavor, numberOfGpus)
53-
defer test.Client().Kueue().KueueV1beta1().ClusterQueues().Delete(test.Ctx(), clusterQueue.Name, metav1.DeleteOptions{})
55+
defer func() {
56+
_ = test.Client().Kueue().KueueV1beta1().ClusterQueues().Delete(test.Ctx(), clusterQueue.Name, metav1.DeleteOptions{})
57+
}()
5458
localQueue := CreateKueueLocalQueue(test, namespace.Name, clusterQueue.Name, AsDefaultQueue)
5559

5660
// Test configuration

test/e2e/mnist_rayjob_raycluster_test.go

+12-4
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,13 @@ func runMnistRayJobRayCluster(t *testing.T, accelerator string, numberOfGpus int
5555

5656
// Create Kueue resources
5757
resourceFlavor := CreateKueueResourceFlavor(test, v1beta1.ResourceFlavorSpec{})
58-
defer test.Client().Kueue().KueueV1beta1().ResourceFlavors().Delete(test.Ctx(), resourceFlavor.Name, metav1.DeleteOptions{})
58+
defer func() {
59+
_ = test.Client().Kueue().KueueV1beta1().ResourceFlavors().Delete(test.Ctx(), resourceFlavor.Name, metav1.DeleteOptions{})
60+
}()
5961
clusterQueue := createClusterQueue(test, resourceFlavor, numberOfGpus)
60-
defer test.Client().Kueue().KueueV1beta1().ClusterQueues().Delete(test.Ctx(), clusterQueue.Name, metav1.DeleteOptions{})
62+
defer func() {
63+
_ = test.Client().Kueue().KueueV1beta1().ClusterQueues().Delete(test.Ctx(), clusterQueue.Name, metav1.DeleteOptions{})
64+
}()
6165
CreateKueueLocalQueue(test, namespace.Name, clusterQueue.Name, AsDefaultQueue)
6266

6367
// Create MNIST training script
@@ -122,9 +126,13 @@ func runMnistRayJobRayClusterAppWrapper(t *testing.T, accelerator string, number
122126

123127
// Create Kueue resources
124128
resourceFlavor := CreateKueueResourceFlavor(test, v1beta1.ResourceFlavorSpec{})
125-
defer test.Client().Kueue().KueueV1beta1().ResourceFlavors().Delete(test.Ctx(), resourceFlavor.Name, metav1.DeleteOptions{})
129+
defer func() {
130+
_ = test.Client().Kueue().KueueV1beta1().ResourceFlavors().Delete(test.Ctx(), resourceFlavor.Name, metav1.DeleteOptions{})
131+
}()
126132
clusterQueue := createClusterQueue(test, resourceFlavor, numberOfGpus)
127-
defer test.Client().Kueue().KueueV1beta1().ClusterQueues().Delete(test.Ctx(), clusterQueue.Name, metav1.DeleteOptions{})
133+
defer func() {
134+
_ = test.Client().Kueue().KueueV1beta1().ClusterQueues().Delete(test.Ctx(), clusterQueue.Name, metav1.DeleteOptions{})
135+
}()
128136
localQueue := CreateKueueLocalQueue(test, namespace.Name, clusterQueue.Name, AsDefaultQueue)
129137

130138
// Create MNIST training script

test/e2e/support.go

+2
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ func ReadFile(t support.Test, fileName string) []byte {
3939
func RemoveCreationTimestamp(t support.Test, rawExtension runtime.RawExtension) runtime.RawExtension {
4040
t.T().Helper()
4141
patchedRaw := strings.ReplaceAll(string(rawExtension.Raw), `"metadata":{"creationTimestamp":null},`, "")
42+
patchedRaw = strings.ReplaceAll(patchedRaw, `"metadata":{"creationTimestamp":null,`, `"metadata":{`)
43+
patchedRaw = strings.ReplaceAll(patchedRaw, `"creationTimestamp":null,`, "")
4244
return runtime.RawExtension{
4345
Raw: []byte(patchedRaw),
4446
}

0 commit comments

Comments
 (0)