From e3b0c71b46c6111a03e2a407ee9ea0c601fef170 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Fri, 15 May 2026 03:45:15 +0000 Subject: [PATCH] =?UTF-8?q?Add=202-GPU=20runners:=20arm64=20l4=C3=972=20ni?= =?UTF-8?q?ghtly=20+=20Windows=20amd64=20special=20runners?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit arm64 l4×2 runners are restricted to nightly-only use per the runner team (ARM64 L4 capacity concerns). Add them as nightly-standard entries in ci-nightly.yml so they run the standard test suite against wheels from the latest successful main CI run. Windows amd64 2-GPU runners (t4×2 TCC, h100×2 MCDM) are added as special runners in the regular PR CI matrix, mirroring the existing Linux amd64 2-GPU special runners. Also update the Windows test job name to show GPU count (x2) for multi-GPU entries, matching the Linux job name format. Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/ci-nightly.yml | 30 +++++++++++++++++++++--- .github/workflows/test-wheel-windows.yml | 2 +- ci/test-matrix.yml | 6 +++++ 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci-nightly.yml b/.github/workflows/ci-nightly.yml index 180351d45ee..0aa4bfc3d48 100644 --- a/.github/workflows/ci-nightly.yml +++ b/.github/workflows/ci-nightly.yml @@ -3,10 +3,11 @@ # SPDX-License-Identifier: Apache-2.0 # Nightly CI pipeline that tests optional dependencies (PyTorch, numba-cuda) -# against the latest cuda-python wheels built on main. +# against the latest cuda-python wheels built on main, and runs the standard +# test suite on runners reserved for nightly-only use (e.g. arm64 l4×2). # # This workflow does NOT build wheels — it downloads them from the latest -# successful CI run on main and runs integration tests with optional deps. +# successful CI run on main and runs integration/standard tests. name: "CI: Nightly optional-deps" @@ -191,6 +192,26 @@ jobs: test-mode: nightly-numba-cuda matrix_filter: 'map(select(.MODE == "nightly-numba-cuda"))' + # ── Standard tests on nightly-only runners ── + + test-standard-linux-aarch64: + name: "Nightly standard (linux-aarch64)" + if: ${{ github.repository_owner == 'nvidia' }} + needs: find-wheels + permissions: + contents: read + actions: read + secrets: inherit + uses: ./.github/workflows/test-wheel-linux.yml + with: + build-type: nightly + host-platform: linux-aarch64 + build-ctk-ver: ${{ needs.find-wheels.outputs.CUDA_BUILD_VER }} + run-id: ${{ needs.find-wheels.outputs.RUN_ID }} + sha: ${{ needs.find-wheels.outputs.HEAD_SHA }} + test-mode: standard + matrix_filter: 'map(select(.MODE == "nightly-standard"))' + # ── Status check ── checks: @@ -205,6 +226,7 @@ jobs: - test-numba-cuda-linux-64 - test-numba-cuda-linux-aarch64 - test-numba-cuda-windows + - test-standard-linux-aarch64 steps: - name: Exit run: | @@ -227,7 +249,9 @@ jobs: needs.test-numba-cuda-linux-aarch64.result == 'cancelled' || needs.test-numba-cuda-linux-aarch64.result == 'failure' || needs.test-numba-cuda-windows.result == 'cancelled' || - needs.test-numba-cuda-windows.result == 'failure' }}; then + needs.test-numba-cuda-windows.result == 'failure' || + needs.test-standard-linux-aarch64.result == 'cancelled' || + needs.test-standard-linux-aarch64.result == 'failure' }}; then exit 1 fi exit 0 diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 328c0910677..04b77b27b2c 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -87,7 +87,7 @@ jobs: echo "MATRIX=${MATRIX}" | tee --append "${GITHUB_OUTPUT}" test: - name: Python ${{ matrix.PY_VER }}, CUDA ${{ matrix.CUDA_VER }} (${{ (matrix.LOCAL_CTK == '1' && 'local') || 'wheels' }}), GPU ${{ matrix.GPU }} (${{ matrix.DRIVER_MODE }})${{ matrix.TORCH_VER && format(', {0}', matrix.TORCH_VER) || '' }}${{ matrix.MODE == 'nightly-numba-cuda' && ', latest' || '' }} + name: Python ${{ matrix.PY_VER }}, CUDA ${{ matrix.CUDA_VER }} (${{ (matrix.LOCAL_CTK == '1' && 'local') || 'wheels' }}), GPU ${{ matrix.GPU }}${{ matrix.GPU_COUNT != '1' && format(' (x{0})', matrix.GPU_COUNT) || '' }} (${{ matrix.DRIVER_MODE }})${{ matrix.TORCH_VER && format(', {0}', matrix.TORCH_VER) || '' }}${{ matrix.MODE == 'nightly-numba-cuda' && ', latest' || '' }} # The build stage could fail but we want the CI to keep moving. needs: compute-matrix strategy: diff --git a/ci/test-matrix.yml b/ci/test-matrix.yml index 884a4865523..35f02847ed7 100644 --- a/ci/test-matrix.yml +++ b/ci/test-matrix.yml @@ -77,6 +77,9 @@ linux: - { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } - { MODE: 'nightly-numba-cuda', ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } - { MODE: 'nightly-numba-cuda', ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } + # nightly-standard (arm64 l4×2 — nightly-only per runner team request) + - { MODE: 'nightly-standard', ARCH: 'arm64', PY_VER: '3.14', CUDA_VER: '13.2.1', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '2', DRIVER: 'latest' } + - { MODE: 'nightly-standard', ARCH: 'arm64', PY_VER: '3.14t', CUDA_VER: '13.2.1', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '2', DRIVER: 'latest' } windows: pull-request: @@ -99,6 +102,9 @@ windows: - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' } - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } + # special runners + - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.2.1', LOCAL_CTK: '1', GPU: 't4', GPU_COUNT: '2', DRIVER: 'latest', DRIVER_MODE: 'TCC' } + - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'h100', GPU_COUNT: '2', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } nightly: # nightly-pytorch - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: '2.11.0', TORCH_CUDA: 'cu126' }