1
- name : e2e
1
+ name : Guided notebooks tests
2
2
3
3
on :
4
4
pull_request :
@@ -27,6 +27,10 @@ concurrency:
27
27
env :
28
28
CODEFLARE_OPERATOR_IMG : " quay.io/project-codeflare/codeflare-operator:dev"
29
29
30
+ permissions :
31
+ id-token : write # This is required for requesting the JWT
32
+ contents : read
33
+
30
34
jobs :
31
35
kubernetes :
32
36
76
80
77
81
- name : Install NVidia GPU operator for KinD
78
82
uses : ./common/github-actions/nvidia-gpu-operator
83
+ with :
84
+ enable-time-slicing : ' true'
79
85
80
86
- name : Deploy CodeFlare stack
81
87
id : deploy
88
94
kubectl wait --timeout=120s --for=condition=Available=true deployment -n openshift-operators codeflare-operator-manager
89
95
cd ..
90
96
97
+ - name : Install MINIO
98
+ run : |
99
+ kubectl apply -f ./tests/e2e/minio_deployment.yaml
100
+ kubectl wait --timeout=120s --for=condition=Available=true deployment -n default minio
101
+
91
102
- name : Add user to KinD
92
103
uses : ./common/github-actions/kind-add-user
93
104
with :
@@ -113,46 +124,99 @@ jobs:
113
124
kubectl create clusterrolebinding sdk-user-list-secrets --clusterrole=list-secrets --user=sdk-user
114
125
kubectl config use-context sdk-user
115
126
116
- - name : Run e2e tests
127
+ - name : Setup Guided notebooks execution
117
128
run : |
118
- export CODEFLARE_TEST_OUTPUT_DIR=${{ env.TEMP_DIR }}
119
- echo "CODEFLARE_TEST_OUTPUT_DIR=${CODEFLARE_TEST_OUTPUT_DIR}" >> $GITHUB_ENV
129
+ echo "Installing papermill and dependencies..."
130
+ pip install poetry papermill ipython ipykernel
131
+ # Disable virtualenv due to problems using packaged in virtualenv in papermill
132
+ poetry config virtualenvs.create false
120
133
121
- set -euo pipefail
122
- pip install poetry
134
+ echo "Installing SDK..."
123
135
poetry install --with test,docs
124
- echo "Running e2e tests..."
125
- poetry run pytest -v -s ./tests/e2e -m 'kind and nvidia_gpu' > ${CODEFLARE_TEST_OUTPUT_DIR}/pytest_output.log 2>&1
136
+
137
+ - name : Run 0_basic_ray.ipynb
138
+ run : |
139
+ set -euo pipefail
140
+
141
+ # Remove login/logout cells, as KinD doesn't support authentication using token
142
+ jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' 0_basic_ray.ipynb > 0_basic_ray.ipynb.tmp && mv 0_basic_ray.ipynb.tmp 0_basic_ray.ipynb
143
+ jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' 0_basic_ray.ipynb > 0_basic_ray.ipynb.tmp && mv 0_basic_ray.ipynb.tmp 0_basic_ray.ipynb
144
+ # Run notebook
145
+ # poetry run papermill 0_basic_ray.ipynb 0_basic_ray_out.ipynb --log-output --execution-timeout 600
146
+ working-directory : demo-notebooks/guided-demos
147
+
148
+ - name : Run 1_cluster_job_client.ipynb
149
+ run : |
150
+ set -euo pipefail
151
+
152
+ # Remove login/logout cells, as KinD doesn't support authentication using token
153
+ jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' 1_cluster_job_client.ipynb > 1_cluster_job_client.ipynb.tmp && mv 1_cluster_job_client.ipynb.tmp 1_cluster_job_client.ipynb
154
+ jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' 1_cluster_job_client.ipynb > 1_cluster_job_client.ipynb.tmp && mv 1_cluster_job_client.ipynb.tmp 1_cluster_job_client.ipynb
155
+ # Replace async logs with waiting for job to finish, async logs don't work properly in papermill
156
+ JOB_WAIT=$(jq -r '.' ${GITHUB_WORKSPACE}/.github/resources/wait_for_job_cell.json)
157
+ jq --argjson job_wait "$JOB_WAIT" -r '(.cells[] | select(.source[] | contains("async for lines in client.tail_job_logs"))) |= $job_wait' 1_cluster_job_client.ipynb > 1_cluster_job_client.ipynb.tmp && mv 1_cluster_job_client.ipynb.tmp 1_cluster_job_client.ipynb
158
+ # Run notebook
159
+ # poetry run papermill 1_cluster_job_client.ipynb 1_cluster_job_client_out.ipynb --log-output --execution-timeout 1200
160
+ working-directory : demo-notebooks/guided-demos
161
+
162
+ - name : Run 2_basic_interactive.ipynb
163
+ run : |
164
+ set -euo pipefail
165
+
166
+ # Remove login/logout cells, as KinD doesn't support authentication using token
167
+ jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' 2_basic_interactive.ipynb > 2_basic_interactive.ipynb.tmp && mv 2_basic_interactive.ipynb.tmp 2_basic_interactive.ipynb
168
+ jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' 2_basic_interactive.ipynb > 2_basic_interactive.ipynb.tmp && mv 2_basic_interactive.ipynb.tmp 2_basic_interactive.ipynb
169
+ # Rewrite cluster_uri() to local_client_url() to retrieve client URL available out of cluster, as the test is executed outside of cluster
170
+ sed -i "s/cluster_uri()/local_client_url()/" 2_basic_interactive.ipynb
171
+ # Set explicit namespace as SDK need it (currently) to resolve local queues
172
+ sed -i "s/head_cpus=1,/head_cpus=1, namespace='default',/" 2_basic_interactive.ipynb
173
+ # Add MINIO related modules to runtime environment
174
+ sed -i "s/transformers/s3fs\\\\\", \\\\\"pyarrow\\\\\", \\\\\"transformers/" 2_basic_interactive.ipynb
175
+ # Replace markdown cell with remote configuration for MINIO
176
+ MINIO_CONFIG=$(jq -r '.' ${GITHUB_WORKSPACE}/.github/resources/minio_remote_config_cell.json)
177
+ jq --argjson minio_config "$MINIO_CONFIG" -r '(.cells[] | select(.source[] | contains("Now that we are connected"))) |= $minio_config' 2_basic_interactive.ipynb > 2_basic_interactive.ipynb.tmp && mv 2_basic_interactive.ipynb.tmp 2_basic_interactive.ipynb
178
+ # Configure persistent storage for Ray trainer
179
+ sed -i -E "s/# run_config.*\)/, run_config=ray.get(get_minio_run_config.remote())/" 2_basic_interactive.ipynb
180
+ # Run notebook
181
+ poetry run papermill 2_basic_interactive.ipynb 2_basic_interactive_out.ipynb --log-output --execution-timeout 1200
126
182
env :
127
183
GRPC_DNS_RESOLVER : " native"
184
+ working-directory : demo-notebooks/guided-demos
128
185
129
186
- name : Switch to kind-cluster context to print logs
130
187
if : always() && steps.deploy.outcome == 'success'
131
188
run : kubectl config use-context kind-cluster
132
189
133
- - name : Print Pytest output log
190
+ - name : Print debug info
134
191
if : always() && steps.deploy.outcome == 'success'
135
192
run : |
136
- echo "Printing Pytest output logs "
137
- cat ${CODEFLARE_TEST_OUTPUT_DIR}/pytest_output.log
193
+ echo "Printing debug info "
194
+ kubectl describe pods -n default
138
195
139
196
- name : Print CodeFlare operator logs
140
197
if : always() && steps.deploy.outcome == 'success'
141
198
run : |
142
199
echo "Printing CodeFlare operator logs"
143
- kubectl logs -n openshift-operators --tail -1 -l app.kubernetes.io/name=codeflare-operator | tee ${CODEFLARE_TEST_OUTPUT_DIR}/codeflare-operator.log
200
+ kubectl logs -n openshift-operators --tail -1 -l app.kubernetes.io/name=codeflare-operator | tee ${TEMP_DIR}/codeflare-operator.log
201
+
202
+ - name : Print Kueue operator logs
203
+ if : always() && steps.deploy.outcome == 'success'
204
+ run : |
205
+ echo "Printing Kueue operator logs"
206
+ KUEUE_CONTROLLER_POD=$(kubectl get pods -n kueue-system | grep kueue-controller | awk '{print $1}')
207
+ kubectl logs -n kueue-system --tail -1 ${KUEUE_CONTROLLER_POD} | tee ${TEMP_DIR}/kueue.log
144
208
145
209
- name : Print KubeRay operator logs
146
210
if : always() && steps.deploy.outcome == 'success'
147
211
run : |
148
212
echo "Printing KubeRay operator logs"
149
- kubectl logs -n ray-system --tail -1 -l app.kubernetes.io/name=kuberay | tee ${CODEFLARE_TEST_OUTPUT_DIR }/kuberay.log
213
+ kubectl logs -n ray-system --tail -1 -l app.kubernetes.io/name=kuberay | tee ${TEMP_DIR }/kuberay.log
150
214
151
215
- name : Export all KinD pod logs
152
216
uses : ./common/github-actions/kind-export-logs
153
217
if : always() && steps.deploy.outcome == 'success'
154
218
with :
155
- output-directory : ${CODEFLARE_TEST_OUTPUT_DIR }
219
+ output-directory : ${TEMP_DIR }
156
220
157
221
- name : Upload logs
158
222
uses : actions/upload-artifact@v4
@@ -161,4 +225,4 @@ jobs:
161
225
name : logs
162
226
retention-days : 10
163
227
path : |
164
- ${{ env.CODEFLARE_TEST_OUTPUT_DIR }}/**/*.log
228
+ ${{ env.TEMP_DIR }}/**/*.log
0 commit comments