|
| 1 | +name: DVC Studio Experiment |
| 2 | + |
| 3 | +on: |
| 4 | + |
| 5 | + push: |
| 6 | + tags-ignore: |
| 7 | + - '**' |
| 8 | + |
| 9 | + workflow_dispatch: |
| 10 | + inputs: |
| 11 | + exp-run-args: |
| 12 | + description: 'Args to be passed to dvc exp run call' |
| 13 | + required: false |
| 14 | + type: string |
| 15 | + default: '' |
| 16 | + parent-sha: |
| 17 | + description: 'SHA of the commit to start the experiment from' |
| 18 | + required: false |
| 19 | + type: string |
| 20 | + default: '' |
| 21 | + cloud: |
| 22 | + description: 'Cloud compute provider to host the runner' |
| 23 | + required: false |
| 24 | + default: 'aws' |
| 25 | + type: choice |
| 26 | + options: |
| 27 | + - aws |
| 28 | + - azure |
| 29 | + - gcp |
| 30 | + type: |
| 31 | + description: 'https://registry.terraform.io/providers/iterative/iterative/latest/docs/resources/task#machine-type' |
| 32 | + required: false |
| 33 | + default: 'g5.2xlarge' |
| 34 | + region: |
| 35 | + description: 'https://registry.terraform.io/providers/iterative/iterative/latest/docs/resources/task#cloud-region' |
| 36 | + required: false |
| 37 | + default: 'us-east' |
| 38 | + spot: |
| 39 | + description: 'Request a spot instance' |
| 40 | + required: false |
| 41 | + default: false |
| 42 | + type: boolean |
| 43 | + storage: |
| 44 | + description: 'Disk size in GB' |
| 45 | + required: false |
| 46 | + default: 40 |
| 47 | + type: number |
| 48 | + timeout: |
| 49 | + description: 'Timeout in seconds' |
| 50 | + required: false |
| 51 | + default: 3600 |
| 52 | + type: number |
| 53 | + |
| 54 | +permissions: |
| 55 | + contents: write |
| 56 | + id-token: write |
| 57 | + pull-requests: write |
| 58 | + |
| 59 | +jobs: |
| 60 | + |
| 61 | + deploy-runner: |
| 62 | + if: ${{ (github.actor == 'iterative-studio[bot]') || (github.event_name == 'workflow_dispatch') }} |
| 63 | + environment: cloud |
| 64 | + runs-on: ubuntu-latest |
| 65 | + |
| 66 | + steps: |
| 67 | + - uses: actions/checkout@v3 |
| 68 | + with: |
| 69 | + ref: ${{ inputs.parent-sha || '' }} |
| 70 | + - uses: iterative/setup-cml@v2 |
| 71 | + - uses: aws-actions/configure-aws-credentials@v4 |
| 72 | + with: |
| 73 | + aws-region: us-east-2 |
| 74 | + role-to-assume: ${{ vars.AWS_SANDBOX_ROLE }} |
| 75 | + role-duration-seconds: 43200 |
| 76 | + - name: Create Runner |
| 77 | + env: |
| 78 | + REPO_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }} |
| 79 | + run: | |
| 80 | + cml runner launch --single \ |
| 81 | + --labels=cml \ |
| 82 | + --cloud=${{ inputs.cloud || 'aws' }} \ |
| 83 | + --cloud-region=${{ inputs.region || 'us-east' }} \ |
| 84 | + --cloud-hdd-size=${{ inputs.storage || '40' }} \ |
| 85 | + --cloud-type=${{ inputs.type || 'g5.2xlarge' }} \ |
| 86 | + --idle-timeout=${{ inputs.timeout || '3600' }} \ |
| 87 | + ${{ (inputs.spot == 'true' && '--cloud-spot') || '' }} |
| 88 | +
|
| 89 | + runner-job: |
| 90 | + needs: deploy-runner |
| 91 | + runs-on: [ self-hosted, cml ] |
| 92 | + environment: cloud |
| 93 | + container: |
| 94 | + image: iterativeai/cml:latest-gpu |
| 95 | + options: --gpus all --ipc host |
| 96 | + |
| 97 | + steps: |
| 98 | + - uses: actions/checkout@v3 |
| 99 | + with: |
| 100 | + ref: ${{ inputs.parent-sha || '' }} |
| 101 | + - uses: aws-actions/configure-aws-credentials@v4 |
| 102 | + with: |
| 103 | + aws-region: us-east-2 |
| 104 | + role-to-assume: ${{ vars.AWS_SANDBOX_ROLE }} |
| 105 | + role-duration-seconds: 43200 |
| 106 | + |
| 107 | + - run: pip install -r requirements.txt |
| 108 | + |
| 109 | + - name: Train |
| 110 | + env: |
| 111 | + REPO_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }} |
| 112 | + DVC_STUDIO_TOKEN: ${{ secrets.DVC_STUDIO_TOKEN }} |
| 113 | + DVCLIVE_LOGLEVEL: DEBUG |
| 114 | + run: | |
| 115 | + cml ci --fetch-depth 0 |
| 116 | + dvc exp run --pull --allow-missing ${{ github.event.inputs.exp-run-args }} |
| 117 | + dvc remote add --local push_remote s3://dvc-public/remote/get-started-pools |
| 118 | +
|
| 119 | + - name: Workflow Dispatch Sharing |
| 120 | + if: github.event_name == 'workflow_dispatch' |
| 121 | + env: |
| 122 | + DVC_STUDIO_TOKEN: ${{ secrets.DVC_STUDIO_TOKEN }} |
| 123 | + run: | |
| 124 | + dvc exp push origin -r push_remote |
| 125 | +
|
| 126 | + - name: Commit-based Sharing |
| 127 | + if: github.actor == 'iterative-studio[bot]' |
| 128 | + env: |
| 129 | + REPO_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }} |
| 130 | + run: | |
| 131 | + dvc push -r push_remote |
| 132 | + cml pr --squash --skip-ci . |
| 133 | + echo "## Metrics" > report.md |
| 134 | + dvc metrics diff main --md >> report.md |
| 135 | + echo "## Params" >> report.md |
| 136 | + dvc params diff main --md >> report.md |
| 137 | + cml comment create --pr report.md |
0 commit comments