Skip to content

Commit b8db46f

Browse files
committed
Run PR check for guided notebooks
1 parent e7a45ba commit b8db46f

File tree

3 files changed

+47
-17
lines changed

3 files changed

+47
-17
lines changed

.github/workflows/e2e_tests.yaml renamed to .github/workflows/guided_notebook_tests.yaml

+41-15
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: e2e
1+
name: Guided notebooks tests
22

33
on:
44
pull_request:
@@ -76,6 +76,8 @@ jobs:
7676

7777
- name: Install NVidia GPU operator for KinD
7878
uses: ./common/github-actions/nvidia-gpu-operator
79+
with:
80+
enable-time-slicing: 'true'
7981

8082
- name: Deploy CodeFlare stack
8183
id: deploy
@@ -113,46 +115,70 @@ jobs:
113115
kubectl create clusterrolebinding sdk-user-list-secrets --clusterrole=list-secrets --user=sdk-user
114116
kubectl config use-context sdk-user
115117
116-
- name: Run e2e tests
118+
- name: Setup Guided notebooks execution
117119
run: |
118-
export CODEFLARE_TEST_OUTPUT_DIR=${{ env.TEMP_DIR }}
119-
echo "CODEFLARE_TEST_OUTPUT_DIR=${CODEFLARE_TEST_OUTPUT_DIR}" >> $GITHUB_ENV
120+
echo "Installing papermill and dependencies..."
121+
pip install poetry papermill ipython ipykernel
122+
# Disable virtualenv due to problems using packaged in virtualenv in papermill
123+
poetry config virtualenvs.create false
120124
121-
set -euo pipefail
122-
pip install poetry
125+
echo "Installing SDK..."
123126
poetry install --with test,docs
124-
echo "Running e2e tests..."
125-
poetry run pytest -v -s ./tests/e2e -m 'kind and nvidia_gpu' > ${CODEFLARE_TEST_OUTPUT_DIR}/pytest_output.log 2>&1
127+
128+
- name: Run 0_basic_ray.ipynb
129+
run: |
130+
set -euo pipefail
131+
132+
jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' 0_basic_ray.ipynb > 0_basic_ray.ipynb.tmp && mv 0_basic_ray.ipynb.tmp 0_basic_ray.ipynb
133+
jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' 0_basic_ray.ipynb > 0_basic_ray.ipynb.tmp && mv 0_basic_ray.ipynb.tmp 0_basic_ray.ipynb
134+
poetry run papermill 0_basic_ray.ipynb 0_basic_ray_out.ipynb --log-output --execution-timeout 600
135+
working-directory: demo-notebooks/guided-demos
136+
137+
- name: Run 1_cluster_job_client.ipynb
138+
run: |
139+
set -euo pipefail
140+
141+
jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' 1_cluster_job_client.ipynb > 1_cluster_job_client.ipynb.tmp && mv 1_cluster_job_client.ipynb.tmp 1_cluster_job_client.ipynb
142+
jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' 1_cluster_job_client.ipynb > 1_cluster_job_client.ipynb.tmp && mv 1_cluster_job_client.ipynb.tmp 1_cluster_job_client.ipynb
143+
poetry run papermill 1_cluster_job_client.ipynb 1_cluster_job_client_out.ipynb --log-output --execution-timeout 600
126144
env:
127145
GRPC_DNS_RESOLVER: "native"
146+
working-directory: demo-notebooks/guided-demos
128147

129148
- name: Switch to kind-cluster context to print logs
130149
if: always() && steps.deploy.outcome == 'success'
131150
run: kubectl config use-context kind-cluster
132151

133-
- name: Print Pytest output log
152+
- name: Print debug info
134153
if: always() && steps.deploy.outcome == 'success'
135154
run: |
136-
echo "Printing Pytest output logs"
137-
cat ${CODEFLARE_TEST_OUTPUT_DIR}/pytest_output.log
155+
echo "Printing debug info"
156+
kubectl describe pods -n default
138157
139158
- name: Print CodeFlare operator logs
140159
if: always() && steps.deploy.outcome == 'success'
141160
run: |
142161
echo "Printing CodeFlare operator logs"
143-
kubectl logs -n openshift-operators --tail -1 -l app.kubernetes.io/name=codeflare-operator | tee ${CODEFLARE_TEST_OUTPUT_DIR}/codeflare-operator.log
162+
kubectl logs -n openshift-operators --tail -1 -l app.kubernetes.io/name=codeflare-operator | tee ${TEMP_DIR}/codeflare-operator.log
163+
164+
- name: Print Kueue operator logs
165+
if: always() && steps.deploy.outcome == 'success'
166+
run: |
167+
echo "Printing Kueue operator logs"
168+
KUEUE_CONTROLLER_POD=$(kubectl get pods -n kueue-system | grep kueue-controller | awk '{print $1}')
169+
kubectl logs -n kueue-system --tail -1 ${KUEUE_CONTROLLER_POD} | tee ${TEMP_DIR}/kueue.log
144170
145171
- name: Print KubeRay operator logs
146172
if: always() && steps.deploy.outcome == 'success'
147173
run: |
148174
echo "Printing KubeRay operator logs"
149-
kubectl logs -n ray-system --tail -1 -l app.kubernetes.io/name=kuberay | tee ${CODEFLARE_TEST_OUTPUT_DIR}/kuberay.log
175+
kubectl logs -n ray-system --tail -1 -l app.kubernetes.io/name=kuberay | tee ${TEMP_DIR}/kuberay.log
150176
151177
- name: Export all KinD pod logs
152178
uses: ./common/github-actions/kind-export-logs
153179
if: always() && steps.deploy.outcome == 'success'
154180
with:
155-
output-directory: ${CODEFLARE_TEST_OUTPUT_DIR}
181+
output-directory: ${TEMP_DIR}
156182

157183
- name: Upload logs
158184
uses: actions/upload-artifact@v4
@@ -161,4 +187,4 @@ jobs:
161187
name: logs
162188
retention-days: 10
163189
path: |
164-
${{ env.CODEFLARE_TEST_OUTPUT_DIR }}/**/*.log
190+
${{ env.TEMP_DIR }}/**/*.log

demo-notebooks/guided-demos/0_basic_ray.ipynb

+3-1
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,12 @@
6363
"cluster = Cluster(ClusterConfiguration(\n",
6464
" name='raytest', \n",
6565
" namespace='default', # Update to your namespace\n",
66+
" head_cpus='500m',\n",
67+
" head_memory=2,\n",
6668
" head_gpus=0, # For GPU enabled workloads set the head_gpus and num_gpus\n",
6769
" num_gpus=0,\n",
6870
" num_workers=2,\n",
69-
" min_cpus=1,\n",
71+
" min_cpus='250m',\n",
7072
" max_cpus=1,\n",
7173
" min_memory=4,\n",
7274
" max_memory=4,\n",

demo-notebooks/guided-demos/1_cluster_job_client.ipynb

+3-1
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,12 @@
4545
"cluster = Cluster(ClusterConfiguration(\n",
4646
" name='jobtest',\n",
4747
" namespace='default', # Update to your namespace\n",
48+
" head_cpus='500m',\n",
49+
" head_memory=2,\n",
4850
" head_gpus=1, # For GPU enabled workloads set the head_gpus and num_gpus\n",
4951
" num_gpus=1,\n",
5052
" num_workers=2,\n",
51-
" min_cpus=1,\n",
53+
" min_cpus='250m',\n",
5254
" max_cpus=1,\n",
5355
" min_memory=4,\n",
5456
" max_memory=4,\n",

0 commit comments

Comments
 (0)