14
14
15
15
jobs :
16
16
verify-hf_interactive :
17
- # if: ${{ github.event.label.name == 'test-additional-notebooks ' }}
17
+ if : ${{ github.event.label.name == 'test' }}
18
18
runs-on : ubuntu-20.04-4core
19
19
20
20
steps :
85
85
# Remove login/logout cells, as KinD doesn't support authentication using token
86
86
jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' hf_interactive.ipynb > hf_interactive.ipynb.tmp && mv hf_interactive.ipynb.tmp hf_interactive.ipynb
87
87
jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' hf_interactive.ipynb > hf_interactive.ipynb.tmp && mv hf_interactive.ipynb.tmp hf_interactive.ipynb
88
+ # Rewrite cluster_uri() to local_client_url() to retrieve client URL available out of cluster, as the test is executed outside of cluster
89
+ sed -i "s/cluster_uri()/local_client_url()/g" hf_interactive.ipynb
88
90
# Replace async logs with waiting for job to finish, async logs don't work properly in papermill
89
91
JOB_WAIT=$(jq -r '.' ${GITHUB_WORKSPACE}/.github/resources/wait_for_job_cell.json)
90
92
jq --argjson job_wait "$JOB_WAIT" -r '(.cells[] | select(.source[] | contains("async for lines in client.tail_job_logs"))) |= $job_wait' hf_interactive.ipynb > hf_interactive.ipynb.tmp && mv hf_interactive.ipynb.tmp hf_interactive.ipynb
@@ -136,7 +138,7 @@ jobs:
136
138
verify-local_interactive :
137
139
# if: ${{ github.event.label.name == 'test-additional-notebooks' }}
138
140
# runs-on: ubuntu-20.04-4core
139
- runs-on : ubuntu-20.04-4core
141
+ runs-on : ubuntu-20.04-4core-gpu
140
142
141
143
steps :
142
144
- name : Checkout code
@@ -174,9 +176,17 @@ jobs:
174
176
python-version : ' 3.9'
175
177
cache : ' pip' # caching pip dependencies
176
178
179
+ - name : Setup NVidia GPU environment for KinD
180
+ uses : ./common/github-actions/nvidia-gpu-setup
181
+
177
182
- name : Setup and start KinD cluster
178
183
uses : ./common/github-actions/kind
179
184
185
+ - name : Install NVidia GPU operator for KinD
186
+ uses : ./common/github-actions/nvidia-gpu-operator
187
+ with :
188
+ enable-time-slicing : ' true'
189
+
180
190
- name : Deploy CodeFlare stack
181
191
id : deploy
182
192
run : |
@@ -188,6 +198,11 @@ jobs:
188
198
kubectl wait --timeout=120s --for=condition=Available=true deployment -n openshift-operators codeflare-operator-manager
189
199
cd ..
190
200
201
+ - name : Install MINIO
202
+ run : |
203
+ kubectl apply -f ./tests/e2e/minio_deployment.yaml
204
+ kubectl wait --timeout=120s --for=condition=Available=true deployment -n default minio
205
+
191
206
- name : Setup Additional demo notebooks execution
192
207
run : |
193
208
echo "Installing papermill and dependencies..."
@@ -205,14 +220,41 @@ jobs:
205
220
# Remove login/logout cells, as KinD doesn't support authentication using token
206
221
jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object and log in to desired user account")))' local_interactive.ipynb > local_interactive.ipynb.tmp && mv local_interactive.ipynb.tmp local_interactive.ipynb
207
222
jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' local_interactive.ipynb > local_interactive.ipynb.tmp && mv local_interactive.ipynb.tmp local_interactive.ipynb
223
+ # Rewrite cluster_uri() to local_client_url() to retrieve client URL available out of cluster, as the test is executed outside of cluster
224
+ sed -i "s/cluster_uri()/local_client_url()/" local_interactive.ipynb
208
225
# Replace async logs with waiting for job to finish, async logs don't work properly in papermill
209
226
JOB_WAIT=$(jq -r '.' ${GITHUB_WORKSPACE}/.github/resources/wait_for_job_cell.json)
210
227
jq --argjson job_wait "$JOB_WAIT" -r '(.cells[] | select(.source[] | contains("async for lines in client.tail_job_logs"))) |= $job_wait' local_interactive.ipynb > local_interactive.ipynb.tmp && mv local_interactive.ipynb.tmp local_interactive.ipynb
211
228
# Set explicit namespace as SDK need it (currently) to resolve local queues
212
- sed -i "s/worker_cpu_requests=1,/worker_cpu_requests=1 , namespace='default',/" local_interactive.ipynb
229
+ sed -i "s/worker_cpu_requests=1,/worker_cpu_requests='250m' , namespace='default',/" local_interactive.ipynb
213
230
# Run notebook
214
- poetry run papermill local_interactive.ipynb hf_interactive_out.ipynb --log-output --execution-timeout 1200
231
+ poetry run papermill local_interactive.ipynb local_interactive_out.ipynb --log-output --execution-timeout 1200
232
+ env :
233
+ GRPC_DNS_RESOLVER : " native"
215
234
working-directory : demo-notebooks/additional-demos
235
+ # - name: Run 2_basic_interactive.ipynb
236
+ # run: |
237
+ # set -euo pipefail
238
+
239
+ # # Remove login/logout cells, as KinD doesn't support authentication using token
240
+ # jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' 2_basic_interactive.ipynb > 2_basic_interactive.ipynb.tmp && mv 2_basic_interactive.ipynb.tmp 2_basic_interactive.ipynb
241
+ # jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' 2_basic_interactive.ipynb > 2_basic_interactive.ipynb.tmp && mv 2_basic_interactive.ipynb.tmp 2_basic_interactive.ipynb
242
+ # # Rewrite cluster_uri() to local_client_url() to retrieve client URL available out of cluster, as the test is executed outside of cluster
243
+ # sed -i "s/cluster_uri()/local_client_url()/" 2_basic_interactive.ipynb
244
+ # # Set explicit namespace as SDK need it (currently) to resolve local queues
245
+ # sed -i "s/head_cpu_limits=1,/head_cpu_limits=1, namespace='default',/" 2_basic_interactive.ipynb
246
+ # # Add MINIO related modules to runtime environment
247
+ # sed -i "s/\\\\\"transformers/\\\\\"s3fs\\\\\", \\\\\"pyarrow\\\\\", \\\\\"transformers/" 2_basic_interactive.ipynb
248
+ # # Replace markdown cell with remote configuration for MINIO
249
+ # MINIO_CONFIG=$(jq -r '.' ${GITHUB_WORKSPACE}/.github/resources/minio_remote_config_cell.json)
250
+ # jq --argjson minio_config "$MINIO_CONFIG" -r '(.cells[] | select(.source[] | contains("Now that we are connected"))) |= $minio_config' 2_basic_interactive.ipynb > 2_basic_interactive.ipynb.tmp && mv 2_basic_interactive.ipynb.tmp 2_basic_interactive.ipynb
251
+ # # Configure persistent storage for Ray trainer
252
+ # sed -i -E "s/# run_config.*\)/, run_config=ray.get(get_minio_run_config.remote())/" 2_basic_interactive.ipynb
253
+ # # Run notebook
254
+ # poetry run papermill 2_basic_interactive.ipynb 2_basic_interactive_out.ipynb --log-output --execution-timeout 1200
255
+ # env:
256
+ # GRPC_DNS_RESOLVER: "native"
257
+ # working-directory: demo-notebooks/guided-demos
216
258
217
259
- name : Print CodeFlare operator logs
218
260
if : always() && steps.deploy.outcome == 'success'
@@ -249,7 +291,7 @@ jobs:
249
291
${{ env.TEMP_DIR }}/**/*.log
250
292
251
293
verify-ray_job_client :
252
- # if: ${{ github.event.label.name == 'test-additional-notebooks ' }}
294
+ if : ${{ github.event.label.name == 'test' }}
253
295
runs-on : ubuntu-20.04-4core
254
296
255
297
steps :
@@ -319,6 +361,8 @@ jobs:
319
361
# Remove login/logout cells, as KinD doesn't support authentication using token
320
362
jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' ray_job_client.ipynb > ray_job_client.ipynb.tmp && mv ray_job_client.ipynb.tmp ray_job_client.ipynb
321
363
jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' ray_job_client.ipynb > ray_job_client.ipynb.tmp && mv ray_job_client.ipynb.tmp ray_job_client.ipynb
364
+ # Rewrite cluster_uri() to local_client_url() to retrieve client URL available out of cluster, as the test is executed outside of cluster
365
+ sed -i "s/cluster_uri()/local_client_url()/g" ray_job_client.ipynb
322
366
# Replace async logs with waiting for job to finish, async logs don't work properly in papermill
323
367
JOB_WAIT=$(jq -r '.' ${GITHUB_WORKSPACE}/.github/resources/wait_for_job_cell.json)
324
368
jq --argjson job_wait "$JOB_WAIT" -r '(.cells[] | select(.source[] | contains("async for lines in client.tail_job_logs"))) |= $job_wait' ray_job_client.ipynb > ray_job_client.ipynb.tmp && mv ray_job_client.ipynb.tmp ray_job_client.ipynb
0 commit comments