diff --git a/.github/workflows/ci-nightly.yml b/.github/workflows/ci-nightly.yml index 180351d45ee..0aa4bfc3d48 100644 --- a/.github/workflows/ci-nightly.yml +++ b/.github/workflows/ci-nightly.yml @@ -3,10 +3,11 @@ # SPDX-License-Identifier: Apache-2.0 # Nightly CI pipeline that tests optional dependencies (PyTorch, numba-cuda) -# against the latest cuda-python wheels built on main. +# against the latest cuda-python wheels built on main, and runs the standard +# test suite on runners reserved for nightly-only use (e.g. arm64 l4×2). # # This workflow does NOT build wheels — it downloads them from the latest -# successful CI run on main and runs integration tests with optional deps. +# successful CI run on main and runs integration/standard tests. name: "CI: Nightly optional-deps" @@ -191,6 +192,26 @@ jobs: test-mode: nightly-numba-cuda matrix_filter: 'map(select(.MODE == "nightly-numba-cuda"))' + # ── Standard tests on nightly-only runners ── + + test-standard-linux-aarch64: + name: "Nightly standard (linux-aarch64)" + if: ${{ github.repository_owner == 'nvidia' }} + needs: find-wheels + permissions: + contents: read + actions: read + secrets: inherit + uses: ./.github/workflows/test-wheel-linux.yml + with: + build-type: nightly + host-platform: linux-aarch64 + build-ctk-ver: ${{ needs.find-wheels.outputs.CUDA_BUILD_VER }} + run-id: ${{ needs.find-wheels.outputs.RUN_ID }} + sha: ${{ needs.find-wheels.outputs.HEAD_SHA }} + test-mode: standard + matrix_filter: 'map(select(.MODE == "nightly-standard"))' + # ── Status check ── checks: @@ -205,6 +226,7 @@ jobs: - test-numba-cuda-linux-64 - test-numba-cuda-linux-aarch64 - test-numba-cuda-windows + - test-standard-linux-aarch64 steps: - name: Exit run: | @@ -227,7 +249,9 @@ jobs: needs.test-numba-cuda-linux-aarch64.result == 'cancelled' || needs.test-numba-cuda-linux-aarch64.result == 'failure' || needs.test-numba-cuda-windows.result == 'cancelled' || - needs.test-numba-cuda-windows.result == 'failure' }}; then + needs.test-numba-cuda-windows.result == 'failure' || + needs.test-standard-linux-aarch64.result == 'cancelled' || + needs.test-standard-linux-aarch64.result == 'failure' }}; then exit 1 fi exit 0 diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 328c0910677..04b77b27b2c 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -87,7 +87,7 @@ jobs: echo "MATRIX=${MATRIX}" | tee --append "${GITHUB_OUTPUT}" test: - name: Python ${{ matrix.PY_VER }}, CUDA ${{ matrix.CUDA_VER }} (${{ (matrix.LOCAL_CTK == '1' && 'local') || 'wheels' }}), GPU ${{ matrix.GPU }} (${{ matrix.DRIVER_MODE }})${{ matrix.TORCH_VER && format(', {0}', matrix.TORCH_VER) || '' }}${{ matrix.MODE == 'nightly-numba-cuda' && ', latest' || '' }} + name: Python ${{ matrix.PY_VER }}, CUDA ${{ matrix.CUDA_VER }} (${{ (matrix.LOCAL_CTK == '1' && 'local') || 'wheels' }}), GPU ${{ matrix.GPU }}${{ matrix.GPU_COUNT != '1' && format(' (x{0})', matrix.GPU_COUNT) || '' }} (${{ matrix.DRIVER_MODE }})${{ matrix.TORCH_VER && format(', {0}', matrix.TORCH_VER) || '' }}${{ matrix.MODE == 'nightly-numba-cuda' && ', latest' || '' }} # The build stage could fail but we want the CI to keep moving. needs: compute-matrix strategy: diff --git a/ci/test-matrix.yml b/ci/test-matrix.yml index 884a4865523..35f02847ed7 100644 --- a/ci/test-matrix.yml +++ b/ci/test-matrix.yml @@ -77,6 +77,9 @@ linux: - { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } - { MODE: 'nightly-numba-cuda', ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } - { MODE: 'nightly-numba-cuda', ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } + # nightly-standard (arm64 l4×2 — nightly-only per runner team request) + - { MODE: 'nightly-standard', ARCH: 'arm64', PY_VER: '3.14', CUDA_VER: '13.2.1', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '2', DRIVER: 'latest' } + - { MODE: 'nightly-standard', ARCH: 'arm64', PY_VER: '3.14t', CUDA_VER: '13.2.1', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '2', DRIVER: 'latest' } windows: pull-request: @@ -99,6 +102,9 @@ windows: - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' } - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } + # special runners + - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.2.1', LOCAL_CTK: '1', GPU: 't4', GPU_COUNT: '2', DRIVER: 'latest', DRIVER_MODE: 'TCC' } + - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'h100', GPU_COUNT: '2', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } nightly: # nightly-pytorch - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: '2.11.0', TORCH_CUDA: 'cu126' }