diff --git a/.github/actions/setup-build-environment/action.yml b/.github/actions/setup-build-environment/action.yml
index 3e7bde9704e..d7dae7b63cf 100644
--- a/.github/actions/setup-build-environment/action.yml
+++ b/.github/actions/setup-build-environment/action.yml
@@ -5,6 +5,10 @@ inputs:
   cache-key:
     description: 'Cache key identifier for Go cache'
     required: true
+  save-cache:
+    description: 'Whether this job may save the Go cache (only effective on main). Set to false on jobs that share a cache-key across many matrix instances so only one designated job writes the key.'
+    required: false
+    default: 'true'
 
 runs:
   using: 'composite'
@@ -38,9 +42,12 @@ runs:
     # On runs against main (push + the scheduled wipe-and-repopulate
     # cron added in #2092): restore now, save at job end via the
     # unified action's post-step (which fires at the calling job's
-    # end, even when invoked from a composite).
+    # end, even when invoked from a composite). Gated on save-cache so
+    # that when many matrix instances share one cache-key, only the
+    # designated job writes it (concurrent same-key saves all fail but
+    # the first, so the extra writers just waste time).
     - name: Restore and save Go cache (main)
-      if: github.ref == 'refs/heads/main'
+      if: github.ref == 'refs/heads/main' && inputs.save-cache == 'true'
       uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
       with:
         path: |
@@ -50,12 +57,12 @@ runs:
         restore-keys: |
           setup-go-${{ inputs.cache-key }}-${{ runner.os }}-go${{ steps.setup-go.outputs.go-version }}-
 
-    # On every other ref (PR / merge_group): restore only. Prefix
-    # fallback via restore-keys means runs whose go.sum differs from
-    # main still restore main's most recent cache and rebuild only
-    # the delta.
+    # On every other ref (PR / merge_group) or when this job is not the
+    # designated cache writer: restore only. Prefix fallback via
+    # restore-keys means runs whose go.sum differs from main still
+    # restore main's most recent cache and rebuild only the delta.
     - name: Restore Go cache (non-main)
-      if: github.ref != 'refs/heads/main'
+      if: github.ref != 'refs/heads/main' || inputs.save-cache != 'true'
       uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
       with:
         path: |
diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml
index 0bab4ba3d76..8aaa12926a7 100644
--- a/.github/workflows/push.yml
+++ b/.github/workflows/push.yml
@@ -35,6 +35,7 @@ jobs:
     runs-on: ubuntu-latest
     outputs:
       targets: ${{ steps.mask1.outputs.targets || steps.mask2.outputs.targets || steps.mask3.outputs.targets }}
+      acc_matrix: ${{ steps.accmatrix.outputs.matrix }}
     steps:
       - name: Checkout repository
         uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -70,14 +71,60 @@ jobs:
           # Always run all tests
           echo "targets=[\"test\"]" >> $GITHUB_OUTPUT
 
-  test:
+      # Build the acceptance-test shard matrix. Shard counts vary per
+      # (os, engine), which a static cross-product matrix can't express, so we
+      # emit an explicit include-list consumed via fromJSON in the test job.
+      - name: Build acceptance test shard matrix
+        id: accmatrix
+        env:
+          EVENT_NAME: ${{ github.event_name }}
+        run: |
+          python3 - <<'PY' >> "$GITHUB_OUTPUT"
+          import json, os
+
+          event = os.environ["EVENT_NAME"]
+          runners = {
+              "linux": {"group": "databricks-protected-runner-group-large", "labels": "linux-ubuntu-latest-large"},
+              "windows": {"group": "databricks-protected-runner-group-large", "labels": "windows-server-latest-large"},
+              "macos": {"labels": "macos-latest"},
+          }
+          # (os, engine) -> shard count. Windows gets more shards because
+          # TASK_CONCURRENCY=1 serializes tests within each job, so the only
+          # way to cut its wall time is more parallel jobs. direct is faster
+          # than terraform and needs fewer shards.
+          shard_counts = {
+              ("linux", "terraform"): 4,
+              ("linux", "direct"): 2,
+              ("macos", "terraform"): 4,
+              ("macos", "direct"): 2,
+              ("windows", "terraform"): 8,
+              ("windows", "direct"): 4,
+          }
+
+          include = []
+          for (osname, engine), total in shard_counts.items():
+              # Run on Linux only in merge queue to reduce time to merge.
+              if event == "merge_group" and osname != "linux":
+                  continue
+              for index in range(total):
+                  include.append({
+                      "os": {"name": osname, "runner": runners[osname]},
+                      "deployment": engine,
+                      "shard_index": index,
+                      "shard_total": total,
+                  })
+
+          print("matrix=" + json.dumps({"include": include}))
+          PY
+
+  test-unit:
     needs:
       - cleanups
       - testmask
 
     # Only run if the target is in the list of targets from testmask
     if: ${{ contains(fromJSON(needs.testmask.outputs.targets), 'test') }}
-    name: "task test (${{matrix.os.name}}, ${{matrix.deployment}})"
+    name: "task test-unit (${{matrix.os.name}})"
     runs-on: ${{ matrix.os.runner }}
 
     defaults:
@@ -94,8 +141,6 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        # Use separate fields for the OS name and runner configuration.
-        # When combined in a single object, "runs-on" errors with "Unexpected value 'name'".
         os:
           - name: linux
             runner:
@@ -111,10 +156,6 @@ jobs:
             runner:
               labels: macos-latest
 
-        deployment:
-          - "terraform"
-          - "direct"
-
         # Include "event_name" in the matrix so we can include/exclude based on it.
         event:
           - ${{ github.event_name }}
@@ -135,20 +176,83 @@ jobs:
       - name: Setup build environment
         uses: ./.github/actions/setup-build-environment
         with:
-          cache-key: test-${{ matrix.deployment }}
+          # Sole writer of the shared "test" cache (test-acc shards restore it).
+          cache-key: test
+
+      - name: Run tests
+        run: go tool -modfile=tools/task/go.mod task test-unit
+
+      - name: Upload gotestsum JSON output
+        # Always upload so we can inspect timing even if tests fail.
+        # This is debug-only telemetry; a flaky artifact upload must not fail
+        # an otherwise-passing job.
+        if: ${{ always() }}
+        continue-on-error: true
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        with:
+          name: test-output-unit-${{ matrix.os.name }}
+          path: test-output-unit.json
+          if-no-files-found: warn
+          retention-days: 7
+
+  test:
+    needs:
+      - cleanups
+      - testmask
+
+    # Only run if the target is in the list of targets from testmask
+    if: ${{ contains(fromJSON(needs.testmask.outputs.targets), 'test') }}
+    name: "task test-acc (${{matrix.os.name}}, ${{matrix.deployment}}, shard ${{matrix.shard_index}}/${{matrix.shard_total}})"
+    runs-on: ${{ matrix.os.runner }}
+
+    defaults:
+      run:
+        shell: bash
+
+    permissions:
+      id-token: write
+      contents: read
+
+    env:
+      TASK_CONCURRENCY: ${{ matrix.os.name == 'windows' && '1' || '' }}
+
+    strategy:
+      fail-fast: false
+      # Generated by testmask: an include-list with per-(os, engine) shard
+      # counts. Each entry carries os{name,runner}, deployment, shard_index,
+      # and shard_total.
+      matrix: ${{ fromJSON(needs.testmask.outputs.acc_matrix) }}
+
+    steps:
+      - name: Checkout repository and submodules
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+
+      - name: Setup build environment
+        uses: ./.github/actions/setup-build-environment
+        with:
+          # Shares the cache-key with test-unit so these shards restore the
+          # cache it saves. save-cache is false because many shard/deployment
+          # instances share this key; test-unit is the sole writer.
+          cache-key: test
+          save-cache: false
 
       - name: Run tests
         env:
           ENVFILTER: DATABRICKS_BUNDLE_ENGINE=${{ matrix.deployment }}
-        run: go tool -modfile=tools/task/go.mod task test
+          SHARD_INDEX: ${{ matrix.shard_index }}
+          SHARD_TOTAL: ${{ matrix.shard_total }}
+        run: go tool -modfile=tools/task/go.mod task test-acc
 
       - name: Upload gotestsum JSON output
         # Always upload so we can inspect timing even if tests fail.
+        # This is debug-only telemetry; a flaky artifact upload must not fail
+        # an otherwise-passing job.
         if: ${{ always() }}
+        continue-on-error: true
         uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
         with:
-          name: test-output-${{ matrix.os.name }}-${{ matrix.deployment }}
-          path: test-output.json
+          name: test-output-${{ matrix.os.name }}-${{ matrix.deployment }}-shard${{ matrix.shard_index }}
+          path: test-output-acc.json
           if-no-files-found: warn
           retention-days: 7
 
@@ -329,6 +433,7 @@ jobs:
   # Reference: https://github.com/orgs/community/discussions/25970
   test-result:
     needs:
+      - test-unit
       - test
       - test-exp-aitools
       - test-exp-ssh
diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go
index f6ec0805fb2..fe8877c7489 100644
--- a/acceptance/acceptance_test.go
+++ b/acceptance/acceptance_test.go
@@ -350,6 +350,10 @@ func testAccept(t *testing.T, inprocessMode bool, singleTest string) int {
 			return n != singleTest
 		})
 		require.NotEmpty(t, testDirs, "singleTest=%#v did not match any tests\n%#v", singleTest, testDirs)
+	} else {
+		// Sharding applies only to the full run. A specific singleTest (e.g.
+		// TestInprocessMode) must never be filtered out by the shard split.
+		testDirs = shardTests(testDirs)
 	}
 
 	skippedDirs := 0
@@ -513,6 +517,24 @@ func getTests(t *testing.T) []string {
 	return testDirs
 }
 
+// shardTests returns the subset of testDirs assigned to this CI shard when
+// SHARD_TOTAL > 1, or testDirs unchanged otherwise. testDirs must be sorted so
+// the split is deterministic and stable across runs.
+func shardTests(testDirs []string) []string {
+	total, _ := strconv.Atoi(os.Getenv("SHARD_TOTAL"))
+	if total <= 1 {
+		return testDirs
+	}
+	index, _ := strconv.Atoi(os.Getenv("SHARD_INDEX"))
+	sharded := testDirs[:0]
+	for i, d := range testDirs {
+		if i%total == index {
+			sharded = append(sharded, d)
+		}
+	}
+	return sharded
+}
+
 func validateTestPhase(phase int) error {
 	if phase == 0 || phase == 1 {
 		return nil