5
5
# docs/source/contributing/dockerfile/dockerfile.md and
6
6
# docs/source/assets/contributing/dockerfile-stages-dependency.png
7
7
8
- ARG CUDA_VERSION=12.4 .1
8
+ ARG CUDA_VERSION=12.8 .1
9
9
# ################### BASE BUILD IMAGE ####################
10
10
# prepare basic build environment
11
11
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 AS base
12
- ARG CUDA_VERSION=12.4 .1
12
+ ARG CUDA_VERSION=12.8 .1
13
13
ARG PYTHON_VERSION=3.12
14
14
ARG TARGETPLATFORM
15
15
ENV DEBIAN_FRONTEND=noninteractive
@@ -37,6 +37,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
37
37
# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
38
38
# Reference: https://github.com/astral-sh/uv/pull/1694
39
39
ENV UV_HTTP_TIMEOUT=500
40
+ ENV UV_INDEX_STRATEGY="unsafe-best-match"
40
41
41
42
# Upgrade to GCC 10 to avoid https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92519
42
43
# as it was causing spam when compiling the CUTLASS kernels
@@ -69,7 +70,8 @@ RUN --mount=type=cache,target=/root/.cache/uv \
69
70
COPY requirements/common.txt requirements/common.txt
70
71
COPY requirements/cuda.txt requirements/cuda.txt
71
72
RUN --mount=type=cache,target=/root/.cache/uv \
72
- uv pip install --system -r requirements/cuda.txt
73
+ uv pip install --system -r requirements/cuda.txt \
74
+ --extra-index-url https://download.pytorch.org/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.' )
73
75
74
76
# cuda arch list used by torch
75
77
# can be useful for both `dev` and `test`
@@ -92,9 +94,11 @@ COPY requirements/build.txt requirements/build.txt
92
94
# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
93
95
# Reference: https://github.com/astral-sh/uv/pull/1694
94
96
ENV UV_HTTP_TIMEOUT=500
97
+ ENV UV_INDEX_STRATEGY="unsafe-best-match"
95
98
96
99
RUN --mount=type=cache,target=/root/.cache/uv \
97
- uv pip install --system -r requirements/build.txt
100
+ uv pip install --system -r requirements/build.txt \
101
+ --extra-index-url https://download.pytorch.org/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.' )
98
102
99
103
COPY . .
100
104
ARG GIT_REPO_CHECK=0
@@ -161,22 +165,25 @@ FROM base as dev
161
165
# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
162
166
# Reference: https://github.com/astral-sh/uv/pull/1694
163
167
ENV UV_HTTP_TIMEOUT=500
168
+ ENV UV_INDEX_STRATEGY="unsafe-best-match"
169
+
170
+ # Workaround for #17068
171
+ RUN --mount=type=cache,target=/root/.cache/uv \
172
+ uv pip install --system --no-build-isolation "git+https://github.com/state-spaces/mamba@v2.2.4"
164
173
165
174
COPY requirements/lint.txt requirements/lint.txt
166
175
COPY requirements/test.txt requirements/test.txt
167
176
COPY requirements/dev.txt requirements/dev.txt
168
- # Workaround for #17068
169
- RUN --mount=type=cache,target=/root/.cache/uv \
170
- uv pip install --system mamba-ssm==2.2.4 --no-build-isolation
171
177
RUN --mount=type=cache,target=/root/.cache/uv \
172
- uv pip install --system -r requirements/dev.txt
178
+ uv pip install --system -r requirements/dev.txt \
179
+ --extra-index-url https://download.pytorch.org/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.' )
173
180
# ################### DEV IMAGE ####################
174
181
175
182
# ################### vLLM installation IMAGE ####################
176
183
# image with vLLM installed
177
184
# TODO: Restore to base image after FlashInfer AOT wheel fixed
178
185
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS vllm-base
179
- ARG CUDA_VERSION=12.4 .1
186
+ ARG CUDA_VERSION=12.8 .1
180
187
ARG PYTHON_VERSION=3.12
181
188
WORKDIR /vllm-workspace
182
189
ENV DEBIAN_FRONTEND=noninteractive
@@ -209,6 +216,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
209
216
# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
210
217
# Reference: https://github.com/astral-sh/uv/pull/1694
211
218
ENV UV_HTTP_TIMEOUT=500
219
+ ENV UV_INDEX_STRATEGY="unsafe-best-match"
212
220
213
221
# Workaround for https://github.com/openai/triton/issues/2507 and
214
222
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
@@ -229,7 +237,8 @@ RUN --mount=type=cache,target=/root/.cache/uv \
229
237
# Install vllm wheel first, so that torch etc will be installed.
230
238
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
231
239
--mount=type=cache,target=/root/.cache/uv \
232
- uv pip install --system dist/*.whl --verbose
240
+ uv pip install --system dist/*.whl --verbose \
241
+ --extra-index-url https://download.pytorch.org/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.' )
233
242
234
243
# If we need to build FlashInfer wheel before its release:
235
244
# $ export FLASHINFER_ENABLE_AOT=1
@@ -246,19 +255,26 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
246
255
RUN --mount=type=cache,target=/root/.cache/uv \
247
256
. /etc/environment && \
248
257
if [ "$TARGETPLATFORM" != "linux/arm64" ]; then \
249
- uv pip install --system https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.1.post2/flashinfer_python-0.2.1.post2+cu124torch2.6-cp38-abi3-linux_x86_64.whl ; \
258
+ # TESTING: install FlashInfer from source to test 2.7.0 final RC
259
+ FLASHINFER_ENABLE_AOT=1 TORCH_CUDA_ARCH_LIST='7.5 8.0 8.6 8.9 9.0+PTX' \
260
+ uv pip install --system --no-build-isolation "git+https://github.com/flashinfer-ai/flashinfer@v0.2.2.post1" ; \
250
261
fi
251
262
COPY examples examples
252
263
COPY benchmarks benchmarks
253
264
COPY ./vllm/collect_env.py .
254
265
266
+ RUN --mount=type=cache,target=/root/.cache/uv \
267
+ . /etc/environment && \
268
+ uv pip list
269
+
255
270
# Although we build Flashinfer with AOT mode, there's still
256
271
# some issues w.r.t. JIT compilation. Therefore we need to
257
272
# install build dependencies for JIT compilation.
258
273
# TODO: Remove this once FlashInfer AOT wheel is fixed
259
274
COPY requirements/build.txt requirements/build.txt
260
275
RUN --mount=type=cache,target=/root/.cache/uv \
261
- uv pip install --system -r requirements/build.txt
276
+ uv pip install --system -r requirements/build.txt \
277
+ --extra-index-url https://download.pytorch.org/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.' )
262
278
263
279
# ################### vLLM installation IMAGE ####################
264
280
@@ -272,11 +288,13 @@ ADD . /vllm-workspace/
272
288
# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
273
289
# Reference: https://github.com/astral-sh/uv/pull/1694
274
290
ENV UV_HTTP_TIMEOUT=500
291
+ ENV UV_INDEX_STRATEGY="unsafe-best-match"
275
292
276
- # install development dependencies (for testing)
277
293
# Workaround for #17068
278
294
RUN --mount=type=cache,target=/root/.cache/uv \
279
- uv pip install --system mamba-ssm==2.2.4 --no-build-isolation
295
+ uv pip install --system --no-build-isolation "git+https://github.com/state-spaces/mamba@v2.2.4"
296
+
297
+ # install development dependencies (for testing)
280
298
RUN --mount=type=cache,target=/root/.cache/uv \
281
299
uv pip install --system -r requirements/dev.txt
282
300
0 commit comments