diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 47c03be6..531db723 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -16,51 +16,45 @@ concurrency: cancel-in-progress: true jobs: - lint: - name: Lint (${{ matrix.shard }}) + # Partition the workspace packages into balanced shards. Alphabetical + # sharding put ~43% of packages in the [s-z] shard (the `uc-*` packages all + # start with `u`), making it the CI long pole. This computes shards balanced + # by source weight so every static-check leg finishes at roughly the same + # time. It only reads package.json files, so it needs no dependency install. + setup: + name: Setup shards runs-on: group: databricks-protected-runner-group labels: linux-ubuntu-latest - strategy: - fail-fast: false - matrix: - # Shards partition packages by the first letter after `@databricks/sdk-`. - # Together the four ranges cover a-z. - shard: ['[a-c]*', '[d-l]*', '[m-r]*', '[s-z]*'] + outputs: + shards: ${{ steps.shards.outputs.shards }} steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - - name: Setup JFrog CLI - uses: jfrog/setup-jfrog-cli@279b1f629f43dd5bc658d8361ac4802a7ef8d2d5 # v4.9.1 - env: - JF_URL: https://databricks.jfrog.io - with: - oidc-provider-name: github-actions - - name: Setup Node.js uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4 with: node-version: '22' - cache: 'npm' - - - name: Configure npm for JFrog - run: jf npmc --repo-resolve=db-npm - - - name: Install dependencies - run: jf npm ci - - name: Run lint and format checks - run: npm run checks -- --filter='@databricks/sdk-${{ matrix.shard }}' - - typecheck: - name: Type Check (${{ matrix.shard }}) + - name: Compute balanced shards + id: shards + run: echo "shards=$(node scripts/ci-shards.mjs 8)" >> "$GITHUB_OUTPUT" + + # Build, lint, and format-check each shard in one job. Lint and build both + # depend on `^build`, so running them together builds each shard's dependency + # closure once instead of repeating it across separate jobs. The build task is + # `tsc -b`, which type-checks `src` while emitting, so a standalone `tsc + # --noEmit` typecheck over the same files would be redundant and is omitted. + static: + name: Static (shard ${{ matrix.shard.id }}) + needs: setup runs-on: group: databricks-protected-runner-group labels: linux-ubuntu-latest strategy: fail-fast: false matrix: - shard: ['[a-c]*', '[d-l]*', '[m-r]*', '[s-z]*'] + shard: ${{ fromJSON(needs.setup.outputs.shards) }} steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 @@ -83,8 +77,16 @@ jobs: - name: Install dependencies run: jf npm ci - - name: Run type check - run: npm run typecheck -- --filter='@databricks/sdk-${{ matrix.shard }}' + - name: Build, lint, and format-check shard + shell: bash + env: + SHARD_PKGS: ${{ matrix.shard.pkgs }} + run: | + filters="" + for pkg in $SHARD_PKGS; do + filters="$filters --filter=$pkg" + done + npm run ci:verify -- $filters test: name: Test (Node.js ${{ matrix.node-version }}) @@ -157,41 +159,6 @@ jobs: - name: Run browser tests run: npm run test:browser - build: - name: Build (Node.js ${{ matrix.node-version }}, ${{ matrix.shard }}) - runs-on: - group: databricks-protected-runner-group - labels: linux-ubuntu-latest - strategy: - fail-fast: false - matrix: - node-version: ['22', '24'] - shard: ['[a-c]*', '[d-l]*', '[m-r]*', '[s-z]*'] - steps: - - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - - - name: Setup JFrog CLI - uses: jfrog/setup-jfrog-cli@279b1f629f43dd5bc658d8361ac4802a7ef8d2d5 # v4.9.1 - env: - JF_URL: https://databricks.jfrog.io - with: - oidc-provider-name: github-actions - - - name: Setup Node.js ${{ matrix.node-version }} - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4 - with: - node-version: ${{ matrix.node-version }} - cache: 'npm' - - - name: Configure npm for JFrog - run: jf npmc --repo-resolve=db-npm - - - name: Install dependencies - run: jf npm ci - - - name: Build - run: npm run build -- --filter='@databricks/sdk-${{ matrix.shard }}' - check-licenses: name: Check Licenses runs-on: @@ -214,7 +181,7 @@ jobs: ci-gate: name: CI Gate if: always() - needs: [lint, typecheck, test, test-browser, build, check-licenses] + needs: [setup, static, test, test-browser, check-licenses] runs-on: group: databricks-protected-runner-group labels: linux-ubuntu-latest diff --git a/package.json b/package.json index f0386a29..018dc48a 100644 --- a/package.json +++ b/package.json @@ -16,6 +16,7 @@ "format": "turbo run format", "format:check": "turbo run format:check", "checks": "turbo run lint format:check", + "ci:verify": "turbo run build lint format:check", "typecheck": "turbo run typecheck", "clean": "turbo run clean", "check:licenses": "node scripts/check-licenses.mjs", diff --git a/scripts/ci-shards.mjs b/scripts/ci-shards.mjs new file mode 100644 index 00000000..5fe6ad05 --- /dev/null +++ b/scripts/ci-shards.mjs @@ -0,0 +1,142 @@ +#!/usr/bin/env node +// Partition the workspace packages into balanced CI shards. +// +// CI runs the static checks (build + lint + format) as a matrix where each +// shard handles a subset of packages. Alphabetical sharding is badly +// unbalanced because ~25% of packages share the `uc-` prefix, so this script +// balances shards by source weight instead. +// +// Usage: +// node scripts/ci-shards.mjs [shardCount] +// Prints a single-line JSON array of shard objects to stdout, suitable for +// a GitHub Actions matrix: [{ "id": "1", "pkgs": "" }]. +// +// node scripts/ci-shards.mjs [shardCount] --pretty +// Prints a human-readable balance report to stderr (for local inspection). +// +// Weighting: each package's weight is the total byte size of its TypeScript +// source files under `src/`, plus a fixed base to account for the per-package +// fixed cost (turbo task startup, dependency build). Shards are filled with a +// largest-processing-time-first greedy algorithm, which keeps the heaviest +// shard close to the theoretical optimum. + +import {readdirSync, readFileSync, statSync} from 'node:fs'; +import {join, dirname} from 'node:path'; +import {fileURLToPath} from 'node:url'; + +const REPO_ROOT = join(dirname(fileURLToPath(import.meta.url)), '..'); +const PACKAGES_DIR = join(REPO_ROOT, 'packages'); +const PACKAGE_PREFIX = '@databricks/sdk-'; + +// A fixed weight added to every package, in source-byte-equivalents. Type-aware +// lint and the tsc build have a large fixed cost per package (loading the TS +// program and resolving the dependency type graph), so a shard's runtime is +// driven more by how many packages it holds than by their total source size. +// Setting the base near the median source size keeps shards balanced by package +// count while still giving heavy packages (e.g. `jobs`) extra weight. +const BASE_WEIGHT = 60_000; + +function sourceWeight(packageDir) { + const srcDir = join(packageDir, 'src'); + let total = 0; + const walk = dir => { + let entries; + try { + entries = readdirSync(dir, {withFileTypes: true}); + } catch { + return; + } + for (const entry of entries) { + const full = join(dir, entry.name); + if (entry.isDirectory()) { + walk(full); + } else if (entry.isFile() && /\.tsx?$/.test(entry.name)) { + total += statSync(full).size; + } + } + }; + walk(srcDir); + return total; +} + +function collectPackages() { + // The root package.json declares `workspaces: ["packages/**"]`, so packages + // are nested at arbitrary depth (e.g. packages/uc/catalogs). Walk the tree + // and collect every package.json, skipping node_modules and build output. + const packages = []; + const walk = dir => { + for (const entry of readdirSync(dir, {withFileTypes: true})) { + if (entry.name === 'node_modules' || entry.name === 'dist') continue; + const full = join(dir, entry.name); + if (entry.isDirectory()) { + walk(full); + } else if (entry.isFile() && entry.name === 'package.json') { + let pkg; + try { + pkg = JSON.parse(readFileSync(full, 'utf8')); + } catch { + continue; + } + if ( + typeof pkg.name === 'string' && + pkg.name.startsWith(PACKAGE_PREFIX) + ) { + packages.push({ + name: pkg.name, + weight: BASE_WEIGHT + sourceWeight(dir), + }); + } + } + } + }; + walk(PACKAGES_DIR); + return packages; +} + +function partition(packages, shardCount) { + // Largest-processing-time-first: assign the heaviest package to the + // currently lightest shard. + const shards = Array.from({length: shardCount}, () => ({ + pkgs: [], + weight: 0, + })); + const sorted = [...packages].sort( + (a, b) => b.weight - a.weight || a.name.localeCompare(b.name) + ); + for (const pkg of sorted) { + const lightest = shards.reduce((min, s) => + s.weight < min.weight ? s : min + ); + lightest.pkgs.push(pkg.name); + lightest.weight += pkg.weight; + } + return shards; +} + +const shardCount = Number.parseInt(process.argv[2] ?? '8', 10); +const pretty = process.argv.includes('--pretty'); + +const packages = collectPackages(); +const shards = partition(packages, shardCount); + +if (pretty) { + const lines = shards.map((s, i) => { + const kb = Math.round(s.weight / 1024); + return `shard ${i + 1}: ${String(s.pkgs.length).padStart(2)} pkgs, ${String(kb).padStart(5)} KB`; + }); + const weights = shards.map(s => s.weight); + const imbalance = ( + (Math.max(...weights) / Math.min(...weights) - 1) * + 100 + ).toFixed(1); + process.stderr.write( + `${packages.length} packages -> ${shardCount} shards\n${lines.join('\n')}\n` + + `max/min weight imbalance: ${imbalance}%\n` + ); +} + +const matrix = shards.map((s, i) => ({ + id: String(i + 1), + pkgs: [...s.pkgs].sort().join(' '), +})); +process.stdout.write(JSON.stringify(matrix));