1
- name : e2e
1
+ name : Guided notebooks tests
2
2
3
3
on :
4
4
pull_request :
76
76
77
77
- name : Install NVidia GPU operator for KinD
78
78
uses : ./common/github-actions/nvidia-gpu-operator
79
+ with :
80
+ enable-time-slicing : ' true'
79
81
80
82
- name : Deploy CodeFlare stack
81
83
id : deploy
@@ -113,46 +115,77 @@ jobs:
113
115
kubectl create clusterrolebinding sdk-user-list-secrets --clusterrole=list-secrets --user=sdk-user
114
116
kubectl config use-context sdk-user
115
117
116
- - name : Run e2e tests
118
+ - name : Setup Guided notebooks execution
117
119
run : |
118
- export CODEFLARE_TEST_OUTPUT_DIR=${{ env.TEMP_DIR }}
119
- echo "CODEFLARE_TEST_OUTPUT_DIR=${CODEFLARE_TEST_OUTPUT_DIR}" >> $GITHUB_ENV
120
+ echo "Installing papermill and dependencies..."
121
+ pip install poetry papermill ipython ipykernel
122
+ # Disable virtualenv due to problems using packaged in virtualenv in papermill
123
+ poetry config virtualenvs.create false
120
124
121
- set -euo pipefail
122
- pip install poetry
125
+ echo "Installing SDK..."
123
126
poetry install --with test,docs
124
- echo "Running e2e tests..."
125
- poetry run pytest -v -s ./tests/e2e -m 'kind and nvidia_gpu' > ${CODEFLARE_TEST_OUTPUT_DIR}/pytest_output.log 2>&1
127
+
128
+ - name : Run 0_basic_ray.ipynb
129
+ run : |
130
+ set -euo pipefail
131
+
132
+ # Remove login/logout cells, as KinD doesn't support authentication using token
133
+ jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' 0_basic_ray.ipynb > 0_basic_ray.ipynb.tmp && mv 0_basic_ray.ipynb.tmp 0_basic_ray.ipynb
134
+ jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' 0_basic_ray.ipynb > 0_basic_ray.ipynb.tmp && mv 0_basic_ray.ipynb.tmp 0_basic_ray.ipynb
135
+ # Run notebook
136
+ poetry run papermill 0_basic_ray.ipynb 0_basic_ray_out.ipynb --log-output --execution-timeout 600
137
+ working-directory : demo-notebooks/guided-demos
138
+
139
+ - name : Run 1_cluster_job_client.ipynb
140
+ run : |
141
+ set -euo pipefail
142
+
143
+ # Remove login/logout cells, as KinD doesn't support authentication using token
144
+ jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' 1_cluster_job_client.ipynb > 1_cluster_job_client.ipynb.tmp && mv 1_cluster_job_client.ipynb.tmp 1_cluster_job_client.ipynb
145
+ jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' 1_cluster_job_client.ipynb > 1_cluster_job_client.ipynb.tmp && mv 1_cluster_job_client.ipynb.tmp 1_cluster_job_client.ipynb
146
+ # Replace job termination with waiting for job to finish
147
+ JOB_WAIT=$(jq -r '.' ${GITHUB_WORKSPACE}/.github/resources/wait_for_job_cell.json)
148
+ jq --argjson job_wait "$JOB_WAIT" -r '(.cells[] | select(.source[] | contains("Delete a job"))) |= $job_wait' 1_cluster_job_client.ipynb > 1_cluster_job_client.ipynb.tmp && mv 1_cluster_job_client.ipynb.tmp 1_cluster_job_client.ipynb
149
+ # Run notebook
150
+ poetry run papermill 1_cluster_job_client.ipynb 1_cluster_job_client_out.ipynb --log-output --execution-timeout 900
126
151
env :
127
152
GRPC_DNS_RESOLVER : " native"
153
+ working-directory : demo-notebooks/guided-demos
128
154
129
155
- name : Switch to kind-cluster context to print logs
130
156
if : always() && steps.deploy.outcome == 'success'
131
157
run : kubectl config use-context kind-cluster
132
158
133
- - name : Print Pytest output log
159
+ - name : Print debug info
134
160
if : always() && steps.deploy.outcome == 'success'
135
161
run : |
136
- echo "Printing Pytest output logs "
137
- cat ${CODEFLARE_TEST_OUTPUT_DIR}/pytest_output.log
162
+ echo "Printing debug info "
163
+ kubectl describe pods -n default
138
164
139
165
- name : Print CodeFlare operator logs
140
166
if : always() && steps.deploy.outcome == 'success'
141
167
run : |
142
168
echo "Printing CodeFlare operator logs"
143
- kubectl logs -n openshift-operators --tail -1 -l app.kubernetes.io/name=codeflare-operator | tee ${CODEFLARE_TEST_OUTPUT_DIR}/codeflare-operator.log
169
+ kubectl logs -n openshift-operators --tail -1 -l app.kubernetes.io/name=codeflare-operator | tee ${TEMP_DIR}/codeflare-operator.log
170
+
171
+ - name : Print Kueue operator logs
172
+ if : always() && steps.deploy.outcome == 'success'
173
+ run : |
174
+ echo "Printing Kueue operator logs"
175
+ KUEUE_CONTROLLER_POD=$(kubectl get pods -n kueue-system | grep kueue-controller | awk '{print $1}')
176
+ kubectl logs -n kueue-system --tail -1 ${KUEUE_CONTROLLER_POD} | tee ${TEMP_DIR}/kueue.log
144
177
145
178
- name : Print KubeRay operator logs
146
179
if : always() && steps.deploy.outcome == 'success'
147
180
run : |
148
181
echo "Printing KubeRay operator logs"
149
- kubectl logs -n ray-system --tail -1 -l app.kubernetes.io/name=kuberay | tee ${CODEFLARE_TEST_OUTPUT_DIR }/kuberay.log
182
+ kubectl logs -n ray-system --tail -1 -l app.kubernetes.io/name=kuberay | tee ${TEMP_DIR }/kuberay.log
150
183
151
184
- name : Export all KinD pod logs
152
185
uses : ./common/github-actions/kind-export-logs
153
186
if : always() && steps.deploy.outcome == 'success'
154
187
with :
155
- output-directory : ${CODEFLARE_TEST_OUTPUT_DIR }
188
+ output-directory : ${TEMP_DIR }
156
189
157
190
- name : Upload logs
158
191
uses : actions/upload-artifact@v4
@@ -161,4 +194,4 @@ jobs:
161
194
name : logs
162
195
retention-days : 10
163
196
path : |
164
- ${{ env.CODEFLARE_TEST_OUTPUT_DIR }}/**/*.log
197
+ ${{ env.TEMP_DIR }}/**/*.log
0 commit comments