Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 33 additions & 66 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,51 +16,45 @@ concurrency:
cancel-in-progress: true

jobs:
lint:
name: Lint (${{ matrix.shard }})
# Partition the workspace packages into balanced shards. Alphabetical
# sharding put ~43% of packages in the [s-z] shard (the `uc-*` packages all
# start with `u`), making it the CI long pole. This computes shards balanced
# by source weight so every static-check leg finishes at roughly the same
# time. It only reads package.json files, so it needs no dependency install.
setup:
name: Setup shards
runs-on:
group: databricks-protected-runner-group
labels: linux-ubuntu-latest
strategy:
fail-fast: false
matrix:
# Shards partition packages by the first letter after `@databricks/sdk-`.
# Together the four ranges cover a-z.
shard: ['[a-c]*', '[d-l]*', '[m-r]*', '[s-z]*']
outputs:
shards: ${{ steps.shards.outputs.shards }}
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4

- name: Setup JFrog CLI
uses: jfrog/setup-jfrog-cli@279b1f629f43dd5bc658d8361ac4802a7ef8d2d5 # v4.9.1
env:
JF_URL: https://databricks.jfrog.io
with:
oidc-provider-name: github-actions

- name: Setup Node.js
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
with:
node-version: '22'
cache: 'npm'

- name: Configure npm for JFrog
run: jf npmc --repo-resolve=db-npm

- name: Install dependencies
run: jf npm ci

- name: Run lint and format checks
run: npm run checks -- --filter='@databricks/sdk-${{ matrix.shard }}'

typecheck:
name: Type Check (${{ matrix.shard }})
- name: Compute balanced shards
id: shards
run: echo "shards=$(node scripts/ci-shards.mjs 8)" >> "$GITHUB_OUTPUT"

# Build, lint, and format-check each shard in one job. Lint and build both
# depend on `^build`, so running them together builds each shard's dependency
# closure once instead of repeating it across separate jobs. The build task is
# `tsc -b`, which type-checks `src` while emitting, so a standalone `tsc
# --noEmit` typecheck over the same files would be redundant and is omitted.
static:
name: Static (shard ${{ matrix.shard.id }})
needs: setup
runs-on:
group: databricks-protected-runner-group
labels: linux-ubuntu-latest
strategy:
fail-fast: false
matrix:
shard: ['[a-c]*', '[d-l]*', '[m-r]*', '[s-z]*']
shard: ${{ fromJSON(needs.setup.outputs.shards) }}
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4

Expand All @@ -83,8 +77,16 @@ jobs:
- name: Install dependencies
run: jf npm ci

- name: Run type check
run: npm run typecheck -- --filter='@databricks/sdk-${{ matrix.shard }}'
- name: Build, lint, and format-check shard
shell: bash
env:
SHARD_PKGS: ${{ matrix.shard.pkgs }}
run: |
filters=""
for pkg in $SHARD_PKGS; do
filters="$filters --filter=$pkg"
done
npm run ci:verify -- $filters

test:
name: Test (Node.js ${{ matrix.node-version }})
Expand Down Expand Up @@ -157,41 +159,6 @@ jobs:
- name: Run browser tests
run: npm run test:browser

build:
name: Build (Node.js ${{ matrix.node-version }}, ${{ matrix.shard }})
runs-on:
group: databricks-protected-runner-group
labels: linux-ubuntu-latest
strategy:
fail-fast: false
matrix:
node-version: ['22', '24']
shard: ['[a-c]*', '[d-l]*', '[m-r]*', '[s-z]*']
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4

- name: Setup JFrog CLI
uses: jfrog/setup-jfrog-cli@279b1f629f43dd5bc658d8361ac4802a7ef8d2d5 # v4.9.1
env:
JF_URL: https://databricks.jfrog.io
with:
oidc-provider-name: github-actions

- name: Setup Node.js ${{ matrix.node-version }}
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
with:
node-version: ${{ matrix.node-version }}
cache: 'npm'

- name: Configure npm for JFrog
run: jf npmc --repo-resolve=db-npm

- name: Install dependencies
run: jf npm ci

- name: Build
run: npm run build -- --filter='@databricks/sdk-${{ matrix.shard }}'

check-licenses:
name: Check Licenses
runs-on:
Expand All @@ -214,7 +181,7 @@ jobs:
ci-gate:
name: CI Gate
if: always()
needs: [lint, typecheck, test, test-browser, build, check-licenses]
needs: [setup, static, test, test-browser, check-licenses]
runs-on:
group: databricks-protected-runner-group
labels: linux-ubuntu-latest
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"format": "turbo run format",
"format:check": "turbo run format:check",
"checks": "turbo run lint format:check",
"ci:verify": "turbo run build lint format:check",
"typecheck": "turbo run typecheck",
"clean": "turbo run clean",
"check:licenses": "node scripts/check-licenses.mjs",
Expand Down
142 changes: 142 additions & 0 deletions scripts/ci-shards.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
#!/usr/bin/env node
// Partition the workspace packages into balanced CI shards.
//
// CI runs the static checks (build + lint + format) as a matrix where each
// shard handles a subset of packages. Alphabetical sharding is badly
// unbalanced because ~25% of packages share the `uc-` prefix, so this script
// balances shards by source weight instead.
//
// Usage:
// node scripts/ci-shards.mjs [shardCount]
// Prints a single-line JSON array of shard objects to stdout, suitable for
// a GitHub Actions matrix: [{ "id": "1", "pkgs": "<space-separated names>" }].
//
// node scripts/ci-shards.mjs [shardCount] --pretty
// Prints a human-readable balance report to stderr (for local inspection).
//
// Weighting: each package's weight is the total byte size of its TypeScript
// source files under `src/`, plus a fixed base to account for the per-package
// fixed cost (turbo task startup, dependency build). Shards are filled with a
// largest-processing-time-first greedy algorithm, which keeps the heaviest
// shard close to the theoretical optimum.

import {readdirSync, readFileSync, statSync} from 'node:fs';
import {join, dirname} from 'node:path';
import {fileURLToPath} from 'node:url';

const REPO_ROOT = join(dirname(fileURLToPath(import.meta.url)), '..');
const PACKAGES_DIR = join(REPO_ROOT, 'packages');
const PACKAGE_PREFIX = '@databricks/sdk-';

// A fixed weight added to every package, in source-byte-equivalents. Type-aware
// lint and the tsc build have a large fixed cost per package (loading the TS
// program and resolving the dependency type graph), so a shard's runtime is
// driven more by how many packages it holds than by their total source size.
// Setting the base near the median source size keeps shards balanced by package
// count while still giving heavy packages (e.g. `jobs`) extra weight.
const BASE_WEIGHT = 60_000;

function sourceWeight(packageDir) {
const srcDir = join(packageDir, 'src');
let total = 0;
const walk = dir => {
let entries;
try {
entries = readdirSync(dir, {withFileTypes: true});
} catch {
return;
}
for (const entry of entries) {
const full = join(dir, entry.name);
if (entry.isDirectory()) {
walk(full);
} else if (entry.isFile() && /\.tsx?$/.test(entry.name)) {
total += statSync(full).size;
}
}
};
walk(srcDir);
return total;
}

function collectPackages() {
// The root package.json declares `workspaces: ["packages/**"]`, so packages
// are nested at arbitrary depth (e.g. packages/uc/catalogs). Walk the tree
// and collect every package.json, skipping node_modules and build output.
const packages = [];
const walk = dir => {
for (const entry of readdirSync(dir, {withFileTypes: true})) {
if (entry.name === 'node_modules' || entry.name === 'dist') continue;
const full = join(dir, entry.name);
if (entry.isDirectory()) {
walk(full);
} else if (entry.isFile() && entry.name === 'package.json') {
let pkg;
try {
pkg = JSON.parse(readFileSync(full, 'utf8'));
} catch {
continue;
}
if (
typeof pkg.name === 'string' &&
pkg.name.startsWith(PACKAGE_PREFIX)
) {
packages.push({
name: pkg.name,
weight: BASE_WEIGHT + sourceWeight(dir),
});
}
}
}
};
walk(PACKAGES_DIR);
return packages;
}

function partition(packages, shardCount) {
// Largest-processing-time-first: assign the heaviest package to the
// currently lightest shard.
const shards = Array.from({length: shardCount}, () => ({
pkgs: [],
weight: 0,
}));
const sorted = [...packages].sort(
(a, b) => b.weight - a.weight || a.name.localeCompare(b.name)
);
for (const pkg of sorted) {
const lightest = shards.reduce((min, s) =>
s.weight < min.weight ? s : min
);
lightest.pkgs.push(pkg.name);
lightest.weight += pkg.weight;
}
return shards;
}

const shardCount = Number.parseInt(process.argv[2] ?? '8', 10);
const pretty = process.argv.includes('--pretty');

const packages = collectPackages();
const shards = partition(packages, shardCount);

if (pretty) {
const lines = shards.map((s, i) => {
const kb = Math.round(s.weight / 1024);
return `shard ${i + 1}: ${String(s.pkgs.length).padStart(2)} pkgs, ${String(kb).padStart(5)} KB`;
});
const weights = shards.map(s => s.weight);
const imbalance = (
(Math.max(...weights) / Math.min(...weights) - 1) *
100
).toFixed(1);
process.stderr.write(
`${packages.length} packages -> ${shardCount} shards\n${lines.join('\n')}\n` +
`max/min weight imbalance: ${imbalance}%\n`
);
}

const matrix = shards.map((s, i) => ({
id: String(i + 1),
pkgs: [...s.pkgs].sort().join(' '),
}));
process.stdout.write(JSON.stringify(matrix));
Loading