From 21ce4bc2de4eca149c017eb56d8574382053c9ff Mon Sep 17 00:00:00 2001 From: Wonhyuk Yang Date: Thu, 9 Apr 2026 21:30:27 +0900 Subject: [PATCH] [CI] Add thirdparty release manifest; pin base image tag and build on demand in docker-image workflow --- .github/workflows/docker-base-image-2-8.yml | 71 ---------- .github/workflows/docker-image-2-8.yml | 69 --------- .github/workflows/docker-image.yml | 149 ++++++++++++++++++++ .github/workflows/tag_release.yml | 76 +++++++++- scripts/ci/thirdparty_base_pin.sh | 6 + scripts/ci/thirdparty_github_asset_env.sh | 54 +++++++ thirdparty/github-releases.json | 19 +++ 7 files changed, 303 insertions(+), 141 deletions(-) delete mode 100644 .github/workflows/docker-base-image-2-8.yml delete mode 100644 .github/workflows/docker-image-2-8.yml create mode 100644 .github/workflows/docker-image.yml create mode 100755 scripts/ci/thirdparty_base_pin.sh create mode 100755 scripts/ci/thirdparty_github_asset_env.sh create mode 100644 thirdparty/github-releases.json diff --git a/.github/workflows/docker-base-image-2-8.yml b/.github/workflows/docker-base-image-2-8.yml deleted file mode 100644 index 74e81e07..00000000 --- a/.github/workflows/docker-base-image-2-8.yml +++ /dev/null @@ -1,71 +0,0 @@ -name: Docker Base Image CI (PyTorch 2.8) - -on: - push: - branches: [ "base_v2.8" ] - workflow_dispatch: - repository_dispatch: - types: [ build_base ] - -jobs: - build: - runs-on: ubuntu-latest - - permissions: - contents: read - packages: write - - steps: - - name: Checkout Code - uses: actions/checkout@v4 - - - name: Log in to GitHub Container Registry - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Set environment - env: - GIT_ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - if [ -n "${{ github.event.pull_request.head.sha }}" ]; then - echo "GITHUB_SHA=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV - echo "GITHUB_SHA=${{ github.event.pull_request.head.sha }}" - else - echo "GITHUB_SHA=${{ github.sha }}" >> $GITHUB_ENV - echo "GITHUB_SHA=${{ github.sha }}" - fi - - gem5_response_file=/tmp/releases-gem5-latest.json - curl -s https://api.github.com/repos/PSAL-POSTECH/GEM5/releases/latest > ${gem5_response_file} - GEM5_ASSET_ID=$(jq ".assets[0].id" ${gem5_response_file}) - echo "GEM5_ASSET_ID=$GEM5_ASSET_ID" - echo "GEM5_ASSET_ID=$GEM5_ASSET_ID" >> $GITHUB_ENV - - llvm_response_file=/tmp/releases-gem5-latest.json - curl -s https://api.github.com/repos/PSAL-POSTECH/llvm-project/releases/latest > ${llvm_response_file} - LLVM_ASSET_ID=$(jq ".assets[0].id" ${llvm_response_file}) - echo "LLVM_ASSET_ID=$LLVM_ASSET_ID" - echo "LLVM_ASSET_ID=$LLVM_ASSET_ID" >> $GITHUB_ENV - - spike_response_file=/tmp/releases-spike-latest.json - curl -s https://api.github.com/repos/PSAL-POSTECH/riscv-isa-sim/releases/latest > ${spike_response_file} - SPIKE_ASSET_ID=$(jq ".assets[0].id" ${spike_response_file}) - echo "SPIKE_ASSET_ID=$SPIKE_ASSET_ID" - echo "SPIKE_ASSET_ID=$SPIKE_ASSET_ID" >> $GITHUB_ENV - - - name: Build and Push Docker Image (PyTorch 2.8) - uses: docker/build-push-action@v4 - with: - context: . - file: ./Dockerfile.base - push: true - build-args: | - PYTORCH_IMAGE=pytorch/pytorch:2.8.0-cuda12.6-cudnn9-devel - GEM5_ASSET_ID=${{ env.GEM5_ASSET_ID }} - LLVM_ASSET_ID=${{ env.LLVM_ASSET_ID }} - SPIKE_ASSET_ID=${{ env.SPIKE_ASSET_ID }} - tags: | - ghcr.io/psal-postech/torchsim_base_2_8:latest diff --git a/.github/workflows/docker-image-2-8.yml b/.github/workflows/docker-image-2-8.yml deleted file mode 100644 index 52464dff..00000000 --- a/.github/workflows/docker-image-2-8.yml +++ /dev/null @@ -1,69 +0,0 @@ -name: Docker image CI (PyTorch 2.8) - -on: - pull_request: - branches: [ "master", "develop" ] - workflow_dispatch: - -jobs: - build-and-test: - runs-on: self-hosted - - permissions: - contents: read - packages: write - - steps: - - name: Checkout Code - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.sha }} - submodules: recursive - - - name: Login to GHCR - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Build and Push Docker Image (PyTorch 2.8) - uses: docker/build-push-action@v6 - with: - context: . - file: ./Dockerfile - push: true - no-cache: true - build-args: | - BASE_IMAGE=ghcr.io/psal-postech/torchsim_base_2_8:latest - tags: ghcr.io/psal-postech/torchsim-test-2-8:${{ github.sha }} - - - name: Wait for GHCR propagation - run: | - for i in {1..30}; do - echo "Checking if image exists in GHCR (attempt $i)..." - if docker manifest inspect ghcr.io/psal-postech/torchsim-test-2-8:${GITHUB_SHA} > /dev/null 2>&1; then - echo "Image is now available in GHCR." - exit 0 - fi - echo "Image not yet available, retrying in 30 seconds..." - sleep 20 - done - echo "Image did not become available in GHCR within expected time." - exit 1 - - test-pytorchsim-wrapper1: - needs: build-and-test - uses: ./.github/workflows/pytorchsim_test.yml - with: - image_name: ghcr.io/psal-postech/torchsim-test-2-8:${{ github.sha }} - vector_lane: 128 - spad_size: 128 - - test-pytorchsim-wrapper2: - needs: build-and-test - uses: ./.github/workflows/pytorchsim_test.yml - with: - image_name: ghcr.io/psal-postech/torchsim-test-2-8:${{ github.sha }} - vector_lane: 32 - spad_size: 32 diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml new file mode 100644 index 00000000..67140c89 --- /dev/null +++ b/.github/workflows/docker-image.yml @@ -0,0 +1,149 @@ +name: Docker image CI + +on: + pull_request: + branches: [ "master", "develop" ] + workflow_dispatch: + +env: + BASE_IMAGE_REPO: ghcr.io/psal-postech/torchsim_base + # PR: head commit; otherwise workflow_dispatch uses the branch SHA + SOURCE_SHA: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + +jobs: + ensure-base: + runs-on: ubuntu-latest + outputs: + base_image: ${{ steps.pin.outputs.base_image }} + permissions: + contents: read + packages: write + + steps: + - name: Checkout Code + uses: actions/checkout@v4 + with: + ref: ${{ env.SOURCE_SHA }} + submodules: recursive + + - name: Log in to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: PyTorch base image from manifest + run: | + PYTORCH_IMAGE=$(python3 -c "import json; from pathlib import Path; v=json.loads(Path('thirdparty/github-releases.json').read_text()).get('pytorch_image'); print(v or '')") + if [ -z "$PYTORCH_IMAGE" ]; then echo "thirdparty/github-releases.json: pytorch_image is required" >&2; exit 1; fi + echo "PYTORCH_IMAGE=$PYTORCH_IMAGE" >> "$GITHUB_ENV" + + - name: Thirdparty pin + id: pin + run: | + PIN="$(bash scripts/ci/thirdparty_base_pin.sh)" + echo "pin=${PIN}" >> "$GITHUB_OUTPUT" + echo "base_image=${BASE_IMAGE_REPO}:thirdparty-${PIN}" >> "$GITHUB_OUTPUT" + echo "BASE_IMAGE=${BASE_IMAGE_REPO}:thirdparty-${PIN}" >> "$GITHUB_ENV" + + - name: Check base image exists + id: exists + run: | + if docker manifest inspect "${BASE_IMAGE}" > /dev/null 2>&1; then + echo "ok=true" >> "$GITHUB_OUTPUT" + else + echo "ok=false" >> "$GITHUB_OUTPUT" + fi + + - name: Resolve GitHub release asset IDs + if: steps.exists.outputs.ok != 'true' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: bash scripts/ci/thirdparty_github_asset_env.sh >> "$GITHUB_ENV" + + - name: Build and push base image (missing pin) + if: steps.exists.outputs.ok != 'true' + uses: docker/build-push-action@v4 + with: + context: . + file: ./Dockerfile.base + push: true + build-args: | + PYTORCH_IMAGE=${{ env.PYTORCH_IMAGE }} + GEM5_ASSET_ID=${{ env.GEM5_ASSET_ID }} + LLVM_ASSET_ID=${{ env.LLVM_ASSET_ID }} + SPIKE_ASSET_ID=${{ env.SPIKE_ASSET_ID }} + tags: ${{ env.BASE_IMAGE }} + + build-and-test: + needs: ensure-base + runs-on: self-hosted + + permissions: + contents: read + packages: write + + steps: + - name: Checkout Code + uses: actions/checkout@v4 + with: + ref: ${{ env.SOURCE_SHA }} + submodules: recursive + + - name: Login to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and Push Docker Image + uses: docker/build-push-action@v6 + with: + context: . + file: ./Dockerfile + push: true + no-cache: true + build-args: | + BASE_IMAGE=${{ needs.ensure-base.outputs.base_image }} + tags: ghcr.io/psal-postech/torchsim-test:${{ env.SOURCE_SHA }} + + # Do not use GITHUB_SHA here: on pull_request it is the merge commit, while the image tag uses SOURCE_SHA (PR head). + - name: Wait for GHCR propagation + env: + IMAGE_SHA: ${{ env.SOURCE_SHA }} + run: | + IMG="ghcr.io/psal-postech/torchsim-test:${IMAGE_SHA}" + echo "Verifying tag matches push: ${IMAGE_SHA}" + for i in $(seq 1 30); do + echo "Checking if image exists in GHCR (attempt $i)..." + if docker buildx imagetools inspect "$IMG" > /dev/null 2>&1; then + echo "Image is now available in GHCR." + exit 0 + fi + if [ "$i" -eq 1 ]; then + echo "buildx imagetools inspect failed; stderr (first attempt):" + docker buildx imagetools inspect "$IMG" 2>&1 || true + fi + echo "Image not yet available, retrying in 20 seconds..." + sleep 20 + done + echo "Image did not become available in GHCR within expected time." + exit 1 + + test-pytorchsim-wrapper1: + needs: build-and-test + uses: ./.github/workflows/pytorchsim_test.yml + with: + image_name: ghcr.io/psal-postech/torchsim-test:${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + vector_lane: 128 + spad_size: 128 + + test-pytorchsim-wrapper2: + needs: build-and-test + uses: ./.github/workflows/pytorchsim_test.yml + with: + image_name: ghcr.io/psal-postech/torchsim-test:${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + vector_lane: 32 + spad_size: 32 diff --git a/.github/workflows/tag_release.yml b/.github/workflows/tag_release.yml index 0728a583..f92fc060 100644 --- a/.github/workflows/tag_release.yml +++ b/.github/workflows/tag_release.yml @@ -5,8 +5,80 @@ on: tags: - 'v*' +env: + BASE_IMAGE_REPO: ghcr.io/psal-postech/torchsim_base + jobs: + ensure-base: + runs-on: ubuntu-latest + outputs: + base_image: ${{ steps.pin.outputs.base_image }} + permissions: + contents: read + packages: write + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + repository: PSAL-POSTECH/PyTorchSim + ref: ${{ github.sha }} + submodules: recursive + + - name: Log in to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: PyTorch base image from manifest + run: | + PYTORCH_IMAGE=$(python3 -c "import json; from pathlib import Path; v=json.loads(Path('thirdparty/github-releases.json').read_text()).get('pytorch_image'); print(v or '')") + if [ -z "$PYTORCH_IMAGE" ]; then echo "thirdparty/github-releases.json: pytorch_image is required" >&2; exit 1; fi + echo "PYTORCH_IMAGE=$PYTORCH_IMAGE" >> "$GITHUB_ENV" + + - name: Thirdparty pin + id: pin + run: | + PIN="$(bash scripts/ci/thirdparty_base_pin.sh)" + echo "pin=${PIN}" >> "$GITHUB_OUTPUT" + echo "base_image=${BASE_IMAGE_REPO}:thirdparty-${PIN}" >> "$GITHUB_OUTPUT" + echo "BASE_IMAGE=${BASE_IMAGE_REPO}:thirdparty-${PIN}" >> "$GITHUB_ENV" + + - name: Check base image exists + id: exists + run: | + if docker manifest inspect "${BASE_IMAGE}" > /dev/null 2>&1; then + echo "ok=true" >> "$GITHUB_OUTPUT" + else + echo "ok=false" >> "$GITHUB_OUTPUT" + fi + + - name: Resolve GitHub release asset IDs + if: steps.exists.outputs.ok != 'true' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: bash scripts/ci/thirdparty_github_asset_env.sh >> "$GITHUB_ENV" + + - name: Build and push base image (missing pin) + if: steps.exists.outputs.ok != 'true' + uses: docker/build-push-action@v4 + with: + context: . + file: ./Dockerfile.base + push: true + build-args: | + PYTORCH_IMAGE=${{ env.PYTORCH_IMAGE }} + GEM5_ASSET_ID=${{ env.GEM5_ASSET_ID }} + LLVM_ASSET_ID=${{ env.LLVM_ASSET_ID }} + SPIKE_ASSET_ID=${{ env.SPIKE_ASSET_ID }} + tags: | + ${{ env.BASE_IMAGE }} + ${{ env.BASE_IMAGE_REPO }}:latest + build: + needs: ensure-base runs-on: self-hosted permissions: @@ -42,4 +114,6 @@ jobs: push: true secrets: | GIT_ACCESS_TOKEN=${{ secrets.GIT_ACCESS_TOKEN }} - tags: ghcr.io/psal-postech/${{ env.IMAGE_TAG}} \ No newline at end of file + build-args: | + BASE_IMAGE=${{ needs.ensure-base.outputs.base_image }} + tags: ghcr.io/psal-postech/${{ env.IMAGE_TAG }} diff --git a/scripts/ci/thirdparty_base_pin.sh b/scripts/ci/thirdparty_base_pin.sh new file mode 100755 index 00000000..6cfc7d9a --- /dev/null +++ b/scripts/ci/thirdparty_base_pin.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash +# Deterministic short pin for tagging torchsim_base images (thirdparty + base Dockerfile). +set -euo pipefail +ROOT="$(cd "$(dirname "$0")/../.." && pwd)" +cd "$ROOT" +{ cat thirdparty/github-releases.json; cat Dockerfile.base; } | sha256sum | awk '{print substr($1,1,12)}' diff --git a/scripts/ci/thirdparty_github_asset_env.sh b/scripts/ci/thirdparty_github_asset_env.sh new file mode 100755 index 00000000..8cbe9e12 --- /dev/null +++ b/scripts/ci/thirdparty_github_asset_env.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# Emit GEM5_ASSET_ID, LLVM_ASSET_ID, SPIKE_ASSET_ID lines for appending to GITHUB_ENV. +# Requires: jq, curl, GITHUB_TOKEN, repo root as cwd or GITHUB_WORKSPACE. +set -euo pipefail +ROOT="${GITHUB_WORKSPACE:-$(cd "$(dirname "$0")/../.." && pwd)}" +MANIFEST="${ROOT}/thirdparty/github-releases.json" +if [ ! -f "$MANIFEST" ]; then + echo "Missing thirdparty manifest: $MANIFEST" >&2 + exit 1 +fi +if [ -z "${GITHUB_TOKEN:-}" ]; then + echo "GITHUB_TOKEN is not set" >&2 + exit 1 +fi + +thirdparty_asset_id() { + local key="$1" + local out_var="$2" + local repo release_tag asset_name owner name api_url tmp id + repo=$(jq -r --arg k "$key" '.[$k].repository' "$MANIFEST") + release_tag=$(jq -r --arg k "$key" '.[$k].release_tag' "$MANIFEST") + asset_name=$(jq -r --arg k "$key" '.[$k].asset_name // ""' "$MANIFEST") + owner="${repo%%/*}" + name="${repo##*/}" + if [ "$release_tag" = "latest" ]; then + api_url="https://api.github.com/repos/${owner}/${name}/releases/latest" + else + api_url="https://api.github.com/repos/${owner}/${name}/releases/tags/${release_tag}" + fi + tmp=$(mktemp) + if ! curl -fsS -H "Authorization: Bearer ${GITHUB_TOKEN}" \ + -H "Accept: application/vnd.github+json" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + "$api_url" -o "$tmp"; then + echo "Failed to fetch release metadata for ${key} (${owner}/${name}, ${release_tag})" >&2 + rm -f "$tmp" + exit 1 + fi + if [ -n "$asset_name" ]; then + id=$(jq -r --arg n "$asset_name" '.assets[] | select(.name == $n) | .id' "$tmp" | head -n1) + else + id=$(jq -r '.assets[0].id' "$tmp") + fi + rm -f "$tmp" + if [ -z "$id" ] || [ "$id" = "null" ]; then + echo "Could not resolve asset id for ${key} (${owner}/${name}, tag=${release_tag}, asset_name=${asset_name:-})" >&2 + exit 1 + fi + echo "${out_var}=${id}" +} + +thirdparty_asset_id gem5 GEM5_ASSET_ID +thirdparty_asset_id llvm_project LLVM_ASSET_ID +thirdparty_asset_id spike SPIKE_ASSET_ID diff --git a/thirdparty/github-releases.json b/thirdparty/github-releases.json new file mode 100644 index 00000000..25c220c9 --- /dev/null +++ b/thirdparty/github-releases.json @@ -0,0 +1,19 @@ +{ + "description": "GitHub release pins for CI (docker base image). pytorch_image is the ARG PYTORCH_IMAGE for Dockerfile.base. Use release_tag \"latest\" or an exact release tag for GitHub deps. asset_name must match the release attachment filename. CI builds ghcr.io/.../torchsim_base:thirdparty-<12 hex> when missing (pin = sha256 of this file plus Dockerfile.base) and updates :latest on that push.", + "pytorch_image": "pytorch/pytorch:2.8.0-cuda12.6-cudnn9-devel", + "gem5": { + "repository": "PSAL-POSTECH/gem5", + "release_tag": "v1.0.1", + "asset_name": "gem5-release.tar.gz" + }, + "llvm_project": { + "repository": "PSAL-POSTECH/llvm-project", + "release_tag": "v1.0.6", + "asset_name": "riscv-llvm-release.tar.gz" + }, + "spike": { + "repository": "PSAL-POSTECH/riscv-isa-sim", + "release_tag": "v1.0.1", + "asset_name": "spike-release.tar.gz" + } +}