diff --git a/.github/actions/nm-get-docker-tags/action.yml b/.github/actions/nm-get-docker-tags/action.yml index af4130459286b..c7b9cf85e69e7 100644 --- a/.github/actions/nm-get-docker-tags/action.yml +++ b/.github/actions/nm-get-docker-tags/action.yml @@ -4,8 +4,8 @@ inputs: wf_category: description: "type of nm-vllm to install for the docker image: NIGHTLY or RELEASE" required: true - wheel: - description: "wheel name, if latest use the latest from nm pypi" + whl: + description: "name of nm-vllm wheel to install for the docker image" required: true outputs: tag: @@ -22,16 +22,23 @@ runs: steps: - id: tags run: | - BUILD_VERSION=`echo "${{ inputs.wheel }}" | cut -d'-' -f2` + BUILD_VERSION=`echo "${{ inputs.whl }}" | cut -d'-' -f2` if [[ "${{ inputs.wf_category }}" == "RELEASE" ]]; then - TAG="v${build_version}" - EXTRA_TAG=latest + if [[ "${BUILD_VERSION}" =~ ^[0-9]+.[0-9]+.[0-9]+$ ]]; then + TAG="v${BUILD_VERSION}" + EXTRA_TAG=latest + else + echo "ERROR: wheel version ${BUILD_VERSION} doesn't match RELEASE format. Check input." + exit 1 + fi else - TAG=`echo "${build_version}" | cut -d'.' -f4` - EXTRA_TAG=nightly - fi - if [[ "${{ inputs.wheel }}" == "latest" ]]; then - BUILD_VERSION="latest" + if [[ "${BUILD_VERSION}" =~ ^[0-9]+.[0-9]+.[0-9]+.[0-9]{8}$ ]]; then + TAG=`echo "${BUILD_VERSION}" | cut -d'.' -f4` + EXTRA_TAG=nightly + else + echo "ERROR: wheel version ${BUILD_VERSION} doesn't match NIGHTLY format. Check input." + exit 1 + fi fi echo "tag=${TAG}" >> $GITHUB_OUTPUT echo "extra_tag=${EXTRA_TAG}" >> $GITHUB_OUTPUT diff --git a/.github/workflows/nm-build-test.yml b/.github/workflows/nm-build-test.yml index 9a5043308a067..0516679b5cf7e 100644 --- a/.github/workflows/nm-build-test.yml +++ b/.github/workflows/nm-build-test.yml @@ -173,12 +173,12 @@ jobs: # update docker DOCKER: - needs: [BUILD, UPLOAD] - if: ${{ inputs.push_to_pypi }} + needs: [BUILD] + if: ${{ inputs.wf_category != 'REMOTE' }} uses: ./.github/workflows/publish-docker.yml with: push_to_repository: ${{ inputs.push_to_pypi }} gitref: ${{ inputs.gitref }} wf_category: ${{ inputs.wf_category }} - wheel: ${{ needs.BUILD.outputs.whl }} + whl: ${{ needs.BUILD.outputs.whl }} secrets: inherit diff --git a/.github/workflows/nm-build.yml b/.github/workflows/nm-build.yml index d8672a5118971..10173813ac4b4 100644 --- a/.github/workflows/nm-build.yml +++ b/.github/workflows/nm-build.yml @@ -31,6 +31,10 @@ on: description: "python version, e.g. 3.10.12" type: string required: true + outputs: + whl: + description: 'basename for generated whl' + value: ${{ jobs.BUILD.outputs.whl }} # makes workflow manually callable workflow_dispatch: diff --git a/.github/workflows/publish-docker.yml b/.github/workflows/publish-docker.yml index 984f8a2463d16..4492994d79197 100644 --- a/.github/workflows/publish-docker.yml +++ b/.github/workflows/publish-docker.yml @@ -15,10 +15,10 @@ on: description: "type of nm-vllm to install for the docker image: NIGHTLY (default) or RELEASE" type: string default: 'NIGHTLY' - wheel: - description: "nm-vllm wheel to install for the docker image: latest (default) or specific wheel name" + whl: + description: "nm-vllm wheel to install for the docker image" type: string - default: 'latest' + required: true workflow_dispatch: inputs: @@ -34,10 +34,10 @@ on: description: "type of nm-vllm to install for the docker image: NIGHTLY (default) or RELEASE" type: string default: 'NIGHTLY' - wheel: - description: "nm-vllm wheel to install for the docker image: latest (default) or specific wheel name" + whl: + description: "nm-vllm wheel to install for the docker image" type: string - default: 'latest' + required: true jobs: build-docker-image: @@ -70,7 +70,7 @@ jobs: uses: ./.github/actions/nm-get-docker-tags/ with: wf_category: ${{ inputs.wf_category }} - wheel: ${{ inputs.wheel }} + whl: ${{ inputs.whl }} - name: Build image id: build diff --git a/Dockerfile b/Dockerfile index 60fe17c4f08d9..cc4976db8fc13 100644 --- a/Dockerfile +++ b/Dockerfile @@ -62,20 +62,6 @@ RUN apt-get update -y && apt-get install -y ccache #################### EXTENSION Build IMAGE #################### -#################### FLASH_ATTENTION Build IMAGE #################### -FROM dev as flash-attn-builder -# flash attention version -ARG flash_attn_version=v2.5.8 -ENV FLASH_ATTN_VERSION=${flash_attn_version} - -WORKDIR /usr/src/flash-attention-v2 - -# Download the wheel or build it if a pre-compiled release doesn't exist -RUN pip --verbose wheel flash-attn==${FLASH_ATTN_VERSION} \ - --no-build-isolation --no-deps --no-cache-dir - -#################### FLASH_ATTENTION Build IMAGE #################### - #################### vLLM installation IMAGE #################### # image with vLLM installed FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu22.04 AS vllm-base @@ -114,9 +100,6 @@ RUN --mount=type=bind,from=build \ fi; \ fi -RUN --mount=type=bind,from=flash-attn-builder,src=/usr/src/flash-attention-v2,target=/usr/src/flash-attention-v2 \ - --mount=type=cache,target=/root/.cache/pip \ - pip install /usr/src/flash-attention-v2/*.whl --no-cache-dir #################### vLLM installation IMAGE #################### #################### TEST IMAGE ####################