14
14
15
15
jobs :
16
16
verify-hf_interactive :
17
- # if: ${{ github.event.label.name == 'test-additional-notebooks ' }}
17
+ if : ${{ github.event.label.name == 'test' }}
18
18
runs-on : ubuntu-20.04-4core
19
19
20
20
steps :
85
85
# Remove login/logout cells, as KinD doesn't support authentication using token
86
86
jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' hf_interactive.ipynb > hf_interactive.ipynb.tmp && mv hf_interactive.ipynb.tmp hf_interactive.ipynb
87
87
jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' hf_interactive.ipynb > hf_interactive.ipynb.tmp && mv hf_interactive.ipynb.tmp hf_interactive.ipynb
88
+ # Rewrite cluster_uri() to local_client_url() to retrieve client URL available out of cluster, as the test is executed outside of cluster
89
+ sed -i "s/cluster_uri()/local_client_url()/g" hf_interactive.ipynb
88
90
# Replace async logs with waiting for job to finish, async logs don't work properly in papermill
89
91
JOB_WAIT=$(jq -r '.' ${GITHUB_WORKSPACE}/.github/resources/wait_for_job_cell.json)
90
92
jq --argjson job_wait "$JOB_WAIT" -r '(.cells[] | select(.source[] | contains("async for lines in client.tail_job_logs"))) |= $job_wait' hf_interactive.ipynb > hf_interactive.ipynb.tmp && mv hf_interactive.ipynb.tmp hf_interactive.ipynb
@@ -136,7 +138,7 @@ jobs:
136
138
verify-local_interactive :
137
139
# if: ${{ github.event.label.name == 'test-additional-notebooks' }}
138
140
# runs-on: ubuntu-20.04-4core
139
- runs-on : ubuntu-20.04-4core
141
+ runs-on : ubuntu-20.04-4core-gpu
140
142
141
143
steps :
142
144
- name : Checkout code
@@ -174,9 +176,17 @@ jobs:
174
176
python-version : ' 3.9'
175
177
cache : ' pip' # caching pip dependencies
176
178
179
+ - name : Setup NVidia GPU environment for KinD
180
+ uses : ./common/github-actions/nvidia-gpu-setup
181
+
177
182
- name : Setup and start KinD cluster
178
183
uses : ./common/github-actions/kind
179
184
185
+ - name : Install NVidia GPU operator for KinD
186
+ uses : ./common/github-actions/nvidia-gpu-operator
187
+ with :
188
+ enable-time-slicing : ' true'
189
+
180
190
- name : Deploy CodeFlare stack
181
191
id : deploy
182
192
run : |
@@ -188,6 +198,11 @@ jobs:
188
198
kubectl wait --timeout=120s --for=condition=Available=true deployment -n openshift-operators codeflare-operator-manager
189
199
cd ..
190
200
201
+ - name : Install MINIO
202
+ run : |
203
+ kubectl apply -f ./tests/e2e/minio_deployment.yaml
204
+ kubectl wait --timeout=120s --for=condition=Available=true deployment -n default minio
205
+
191
206
- name : Setup Additional demo notebooks execution
192
207
run : |
193
208
echo "Installing papermill and dependencies..."
@@ -198,21 +213,46 @@ jobs:
198
213
echo "Installing SDK..."
199
214
poetry install --with test,docs
200
215
201
- - name : Run local_interactive.ipynb
216
+ # - name: Run local_interactive.ipynb
217
+ # run: |
218
+ # set -euo pipefail
219
+
220
+ # # Remove login/logout cells, as KinD doesn't support authentication using token
221
+ # jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object and log in to desired user account")))' local_interactive.ipynb > local_interactive.ipynb.tmp && mv local_interactive.ipynb.tmp local_interactive.ipynb
222
+ # jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' local_interactive.ipynb > local_interactive.ipynb.tmp && mv local_interactive.ipynb.tmp local_interactive.ipynb
223
+ # # Rewrite cluster_uri() to local_client_url() to retrieve client URL available out of cluster, as the test is executed outside of cluster
224
+ # sed -i "s/cluster_uri()/local_client_url()/g" local_interactive.ipynb
225
+ # # Replace async logs with waiting for job to finish, async logs don't work properly in papermill
226
+ # JOB_WAIT=$(jq -r '.' ${GITHUB_WORKSPACE}/.github/resources/wait_for_job_cell.json)
227
+ # jq --argjson job_wait "$JOB_WAIT" -r '(.cells[] | select(.source[] | contains("async for lines in client.tail_job_logs"))) |= $job_wait' local_interactive.ipynb > local_interactive.ipynb.tmp && mv local_interactive.ipynb.tmp local_interactive.ipynb
228
+ # # Set explicit namespace as SDK need it (currently) to resolve local queues
229
+ # sed -i "s/worker_cpu_requests=1,/worker_cpu_requests='250m', namespace='default',/" local_interactive.ipynb
230
+ # # Run notebook
231
+ # poetry run papermill local_interactive.ipynb local_interactive_out.ipynb --log-output --execution-timeout 1200
232
+ # working-directory: demo-notebooks/additional-demos
233
+ - name : Run 2_basic_interactive.ipynb
202
234
run : |
203
235
set -euo pipefail
204
236
205
237
# Remove login/logout cells, as KinD doesn't support authentication using token
206
- jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object and log in to desired user account")))' local_interactive.ipynb > local_interactive.ipynb.tmp && mv local_interactive.ipynb.tmp local_interactive.ipynb
207
- jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' local_interactive.ipynb > local_interactive.ipynb.tmp && mv local_interactive.ipynb.tmp local_interactive.ipynb
208
- # Replace async logs with waiting for job to finish, async logs don't work properly in papermill
209
- JOB_WAIT=$(jq -r '.' ${GITHUB_WORKSPACE}/.github/resources/wait_for_job_cell.json)
210
- jq --argjson job_wait "$JOB_WAIT" -r '(.cells[] | select(.source[] | contains("async for lines in client.tail_job_logs"))) |= $job_wait' local_interactive.ipynb > local_interactive.ipynb.tmp && mv local_interactive.ipynb.tmp local_interactive.ipynb
238
+ jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' 2_basic_interactive.ipynb > 2_basic_interactive.ipynb.tmp && mv 2_basic_interactive.ipynb.tmp 2_basic_interactive.ipynb
239
+ jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' 2_basic_interactive.ipynb > 2_basic_interactive.ipynb.tmp && mv 2_basic_interactive.ipynb.tmp 2_basic_interactive.ipynb
240
+ # Rewrite cluster_uri() to local_client_url() to retrieve client URL available out of cluster, as the test is executed outside of cluster
241
+ sed -i "s/cluster_uri()/local_client_url()/" 2_basic_interactive.ipynb
211
242
# Set explicit namespace as SDK need it (currently) to resolve local queues
212
- sed -i "s/worker_cpu_requests=1,/worker_cpu_requests=1, namespace='default',/" local_interactive.ipynb
243
+ sed -i "s/head_cpu_limits=1,/head_cpu_limits=1, namespace='default',/" 2_basic_interactive.ipynb
244
+ # Add MINIO related modules to runtime environment
245
+ sed -i "s/\\\\\"transformers/\\\\\"s3fs\\\\\", \\\\\"pyarrow\\\\\", \\\\\"transformers/" 2_basic_interactive.ipynb
246
+ # Replace markdown cell with remote configuration for MINIO
247
+ MINIO_CONFIG=$(jq -r '.' ${GITHUB_WORKSPACE}/.github/resources/minio_remote_config_cell.json)
248
+ jq --argjson minio_config "$MINIO_CONFIG" -r '(.cells[] | select(.source[] | contains("Now that we are connected"))) |= $minio_config' 2_basic_interactive.ipynb > 2_basic_interactive.ipynb.tmp && mv 2_basic_interactive.ipynb.tmp 2_basic_interactive.ipynb
249
+ # Configure persistent storage for Ray trainer
250
+ sed -i -E "s/# run_config.*\)/, run_config=ray.get(get_minio_run_config.remote())/" 2_basic_interactive.ipynb
213
251
# Run notebook
214
- poetry run papermill local_interactive.ipynb hf_interactive_out.ipynb --log-output --execution-timeout 1200
215
- working-directory : demo-notebooks/additional-demos
252
+ poetry run papermill 2_basic_interactive.ipynb 2_basic_interactive_out.ipynb --log-output --execution-timeout 1200
253
+ env :
254
+ GRPC_DNS_RESOLVER : " native"
255
+ working-directory : demo-notebooks/guided-demos
216
256
217
257
- name : Print CodeFlare operator logs
218
258
if : always() && steps.deploy.outcome == 'success'
@@ -249,7 +289,7 @@ jobs:
249
289
${{ env.TEMP_DIR }}/**/*.log
250
290
251
291
verify-ray_job_client :
252
- # if: ${{ github.event.label.name == 'test-additional-notebooks ' }}
292
+ if : ${{ github.event.label.name == 'test' }}
253
293
runs-on : ubuntu-20.04-4core
254
294
255
295
steps :
@@ -319,6 +359,8 @@ jobs:
319
359
# Remove login/logout cells, as KinD doesn't support authentication using token
320
360
jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' ray_job_client.ipynb > ray_job_client.ipynb.tmp && mv ray_job_client.ipynb.tmp ray_job_client.ipynb
321
361
jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' ray_job_client.ipynb > ray_job_client.ipynb.tmp && mv ray_job_client.ipynb.tmp ray_job_client.ipynb
362
+ # Rewrite cluster_uri() to local_client_url() to retrieve client URL available out of cluster, as the test is executed outside of cluster
363
+ sed -i "s/cluster_uri()/local_client_url()/g" ray_job_client.ipynb
322
364
# Replace async logs with waiting for job to finish, async logs don't work properly in papermill
323
365
JOB_WAIT=$(jq -r '.' ${GITHUB_WORKSPACE}/.github/resources/wait_for_job_cell.json)
324
366
jq --argjson job_wait "$JOB_WAIT" -r '(.cells[] | select(.source[] | contains("async for lines in client.tail_job_logs"))) |= $job_wait' ray_job_client.ipynb > ray_job_client.ipynb.tmp && mv ray_job_client.ipynb.tmp ray_job_client.ipynb
0 commit comments