diff --git a/acceptance/bundle/validate/reserved_deployment_fields/databricks.yml b/acceptance/bundle/validate/reserved_deployment_fields/databricks.yml new file mode 100644 index 00000000000..7215146813d --- /dev/null +++ b/acceptance/bundle/validate/reserved_deployment_fields/databricks.yml @@ -0,0 +1,18 @@ +bundle: + name: test-bundle + +resources: + jobs: + my_job: + name: my_job + deployment: + kind: BUNDLE + deployment_id: "dep-123" + version_id: "ver-456" + pipelines: + my_pipeline: + name: my_pipeline + deployment: + kind: BUNDLE + deployment_id: "dep-789" + version_id: "ver-012" diff --git a/acceptance/bundle/validate/reserved_deployment_fields/out.test.toml b/acceptance/bundle/validate/reserved_deployment_fields/out.test.toml new file mode 100644 index 00000000000..f784a183258 --- /dev/null +++ b/acceptance/bundle/validate/reserved_deployment_fields/out.test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = false +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/validate/reserved_deployment_fields/output.txt b/acceptance/bundle/validate/reserved_deployment_fields/output.txt new file mode 100644 index 00000000000..87a0f1b2d49 --- /dev/null +++ b/acceptance/bundle/validate/reserved_deployment_fields/output.txt @@ -0,0 +1,27 @@ + +>>> [CLI] bundle validate +Error: deployment_id must not be set in bundle configuration; it is managed by Databricks Asset Bundles + at resources.jobs.my_job.deployment.deployment_id + in databricks.yml:10:24 + +Error: version_id must not be set in bundle configuration; it is managed by Databricks Asset Bundles + at resources.jobs.my_job.deployment.version_id + in databricks.yml:11:21 + +Error: deployment_id must not be set in bundle configuration; it is managed by Databricks Asset Bundles + at resources.pipelines.my_pipeline.deployment.deployment_id + in databricks.yml:17:24 + +Error: version_id must not be set in bundle configuration; it is managed by Databricks Asset Bundles + at resources.pipelines.my_pipeline.deployment.version_id + in databricks.yml:18:21 + +Name: test-bundle +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/test-bundle/default + +Found 4 errors + +Exit code: 1 diff --git a/acceptance/bundle/validate/reserved_deployment_fields/script b/acceptance/bundle/validate/reserved_deployment_fields/script new file mode 100644 index 00000000000..5350876150f --- /dev/null +++ b/acceptance/bundle/validate/reserved_deployment_fields/script @@ -0,0 +1 @@ +trace $CLI bundle validate diff --git a/bundle/config/validate/validate_deployment_fields.go b/bundle/config/validate/validate_deployment_fields.go new file mode 100644 index 00000000000..6a0bf7fedb1 --- /dev/null +++ b/bundle/config/validate/validate_deployment_fields.go @@ -0,0 +1,61 @@ +package validate + +import ( + "cmp" + "context" + "slices" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" +) + +func ValidateDeploymentFields() bundle.ReadOnlyMutator { + return &validateDeploymentFields{} +} + +type validateDeploymentFields struct{ bundle.RO } + +func (v *validateDeploymentFields) Name() string { + return "validate:validate_deployment_fields" +} + +func (v *validateDeploymentFields) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics { + var diags diag.Diagnostics + + // deployment_id and version_id identify the bundle deployment and its version + // in the Deployment Metadata Service. The CLI sets them on every deploy, so a + // value provided by hand would be overwritten; reject it up front. + reject := func(resourcePath, field, value string) { + if value == "" { + return + } + path := resourcePath + ".deployment." + field + diags = append(diags, diag.Diagnostic{ + Severity: diag.Error, + Summary: field + " must not be set in bundle configuration; it is managed by Databricks Asset Bundles", + Paths: []dyn.Path{dyn.MustPathFromString(path)}, + Locations: b.Config.GetLocations(path), + }) + } + + for name, job := range b.Config.Resources.Jobs { + if d := job.Deployment; d != nil { + reject("resources.jobs."+name, "deployment_id", d.DeploymentId) + reject("resources.jobs."+name, "version_id", d.VersionId) + } + } + for name, pipeline := range b.Config.Resources.Pipelines { + if d := pipeline.Deployment; d != nil { + reject("resources.pipelines."+name, "deployment_id", d.DeploymentId) + reject("resources.pipelines."+name, "version_id", d.VersionId) + } + } + + // Map iteration order is randomized; sort by path for stable output. + slices.SortFunc(diags, func(x, y diag.Diagnostic) int { + return cmp.Compare(x.Paths[0].String(), y.Paths[0].String()) + }) + + return diags +} diff --git a/bundle/config/validate/validate_deployment_fields_test.go b/bundle/config/validate/validate_deployment_fields_test.go new file mode 100644 index 00000000000..bdd5fb52c5b --- /dev/null +++ b/bundle/config/validate/validate_deployment_fields_test.go @@ -0,0 +1,80 @@ +package validate + +import ( + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/databricks-sdk-go/service/jobs" + "github.com/databricks/databricks-sdk-go/service/pipelines" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +const reservedFieldMsg = " must not be set in bundle configuration; it is managed by Databricks Asset Bundles" + +func jobBundle(d *jobs.JobDeployment) *bundle.Bundle { + return &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "my_job": {JobSettings: jobs.JobSettings{Name: "my_job", Deployment: d}}, + }, + }, + }, + } +} + +func pipelineBundle(d *pipelines.PipelineDeployment) *bundle.Bundle { + return &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Pipelines: map[string]*resources.Pipeline{ + "my_pipeline": {CreatePipeline: pipelines.CreatePipeline{Name: "my_pipeline", Deployment: d}}, + }, + }, + }, + } +} + +func TestValidateDeploymentFieldsRejectsReservedFields(t *testing.T) { + tests := []struct { + name string + b *bundle.Bundle + want string + }{ + {"job deployment_id", jobBundle(&jobs.JobDeployment{DeploymentId: "x"}), "deployment_id" + reservedFieldMsg}, + {"job version_id", jobBundle(&jobs.JobDeployment{VersionId: "x"}), "version_id" + reservedFieldMsg}, + {"pipeline deployment_id", pipelineBundle(&pipelines.PipelineDeployment{DeploymentId: "x"}), "deployment_id" + reservedFieldMsg}, + {"pipeline version_id", pipelineBundle(&pipelines.PipelineDeployment{VersionId: "x"}), "version_id" + reservedFieldMsg}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + diags := ValidateDeploymentFields().Apply(t.Context(), tt.b) + require.Len(t, diags, 1) + assert.Equal(t, diag.Error, diags[0].Severity) + assert.Equal(t, tt.want, diags[0].Summary) + }) + } +} + +func TestValidateDeploymentFieldsReportsAllOffenders(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "my_job": {JobSettings: jobs.JobSettings{Deployment: &jobs.JobDeployment{DeploymentId: "a"}}}, + }, + Pipelines: map[string]*resources.Pipeline{ + "my_pipeline": {CreatePipeline: pipelines.CreatePipeline{Deployment: &pipelines.PipelineDeployment{VersionId: "b"}}}, + }, + }, + }, + } + + diags := ValidateDeploymentFields().Apply(t.Context(), b) + require.Len(t, diags, 2) +} diff --git a/bundle/direct/bundle_plan.go b/bundle/direct/bundle_plan.go index 5591626cd75..e2be68f80fb 100644 --- a/bundle/direct/bundle_plan.go +++ b/bundle/direct/bundle_plan.go @@ -357,7 +357,12 @@ func addPerFieldActions(ctx context.Context, adapter *dresources.Adapter, change return err } - if structdiff.IsEqual(ch.Remote, ch.New) { + if shouldDropFromPlan(cfg, path) || shouldDropFromPlan(generatedCfg, path) { + // Drop the field from the plan entirely. ReasonDrop removes the entry + // below so it never surfaces as a change regardless of its diff state. + ch.Action = deployplan.Skip + ch.Reason = deployplan.ReasonDrop + } else if structdiff.IsEqual(ch.Remote, ch.New) { ch.Action = deployplan.Skip ch.Reason = deployplan.ReasonRemoteAlreadySet } else if allEmpty(ch.Old, ch.New, ch.Remote) { @@ -434,6 +439,16 @@ func shouldSkip(cfg *dresources.ResourceLifecycleConfig, path *structpath.PathNo return "", false } +// shouldDropFromPlan reports whether the field matches a HideFromPlan rule, meaning +// it should be removed from the plan entirely rather than shown as a change. +func shouldDropFromPlan(cfg *dresources.ResourceLifecycleConfig, path *structpath.PathNode) bool { + if cfg == nil { + return false + } + _, ok := findMatchingRule(path, cfg.HideFromPlan) + return ok +} + func shouldUpdateOrRecreate(cfg *dresources.ResourceLifecycleConfig, path *structpath.PathNode) (deployplan.ActionType, string) { if cfg == nil { return deployplan.Undefined, "" diff --git a/bundle/direct/bundle_plan_test.go b/bundle/direct/bundle_plan_test.go index ccfb7cb517f..5d01f5de607 100644 --- a/bundle/direct/bundle_plan_test.go +++ b/bundle/direct/bundle_plan_test.go @@ -3,8 +3,11 @@ package direct import ( "testing" + "github.com/databricks/cli/bundle/direct/dresources" "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/structs/structpath" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestDynPathToStructPath(t *testing.T) { @@ -35,3 +38,19 @@ func TestDynPathToStructPath(t *testing.T) { assert.Equal(t, tc.expected, node.String()) } } + +// TestShouldDropFromPlanDeploymentVersion verifies that the DMS deployment +// version_id is dropped from the plan for jobs and pipelines, while +// deployment_id is left in (it is stable and fine to show). +func TestShouldDropFromPlanDeploymentVersion(t *testing.T) { + versionID, err := structpath.ParsePath("deployment.version_id") + require.NoError(t, err) + deploymentID, err := structpath.ParsePath("deployment.deployment_id") + require.NoError(t, err) + + for _, resourceType := range []string{"jobs", "pipelines"} { + cfg := dresources.GetResourceConfig(resourceType) + assert.True(t, shouldDropFromPlan(cfg, versionID), "%s: deployment.version_id should be dropped from plan", resourceType) + assert.False(t, shouldDropFromPlan(cfg, deploymentID), "%s: deployment.deployment_id should not be dropped from plan", resourceType) + } +} diff --git a/bundle/direct/dresources/config.go b/bundle/direct/dresources/config.go index 70f6dbb1d8d..eb25368325d 100644 --- a/bundle/direct/dresources/config.go +++ b/bundle/direct/dresources/config.go @@ -62,6 +62,13 @@ type ResourceLifecycleConfig struct { // BackendDefaults: fields where the backend may set defaults. // When old and new are nil but remote is set, and the remote value matches allowed values (if specified), the change is skipped. BackendDefaults []BackendDefaultRule `yaml:"backend_defaults,omitempty"` + + // HideFromPlan: field patterns removed from the plan diff entirely (never shown as a change). + // This only controls display: it does not by itself decide whether a change is applied. + // Use for fields the CLI sets on every deploy where the churn is pure noise (e.g. + // deployment.version_id). Pair it with ignore_local_changes / ignore_remote_changes, which + // are what actually keep the field from driving an update. + HideFromPlan []FieldRule `yaml:"hide_from_plan,omitempty"` } // Config is the root configuration structure for resource lifecycle behavior. @@ -81,6 +88,7 @@ var empty = ResourceLifecycleConfig{ RecreateOnChanges: nil, UpdateIDOnChanges: nil, BackendDefaults: nil, + HideFromPlan: nil, } func mustParseConfig(data []byte) func() *Config { diff --git a/bundle/direct/dresources/config_test.go b/bundle/direct/dresources/config_test.go index a9a347ec747..c376e60c98b 100644 --- a/bundle/direct/dresources/config_test.go +++ b/bundle/direct/dresources/config_test.go @@ -35,6 +35,7 @@ func categoryRules(c ResourceLifecycleConfig) []struct { {"recreate_on_changes", c.RecreateOnChanges}, {"update_id_on_changes", c.UpdateIDOnChanges}, {"backend_defaults", backendAsFieldRules}, + {"hide_from_plan", c.HideFromPlan}, } } diff --git a/bundle/direct/dresources/resources.yml b/bundle/direct/dresources/resources.yml index 8da2d5fee50..58fb8f07cbc 100644 --- a/bundle/direct/dresources/resources.yml +++ b/bundle/direct/dresources/resources.yml @@ -8,6 +8,7 @@ # ignore_local_changes: fields where local changes are ignored (can't be updated via API) # backend_defaults: fields where the backend may set defaults (skipped when old/new are nil but remote is set) # Optional "values" list constrains which remote values are allowed (as JSON-compatible literals). +# hide_from_plan: fields removed from the plan diff entirely (never shown as a change; display only) # # Each field entry has: # field: the field path @@ -16,7 +17,23 @@ resources: jobs: + # version_id is set to the current DMS deployment version on every + # deploy, so it changes constantly. hide_from_plan keeps that churn out of + # the plan output, while the ignore_*_changes rules are what keep it from + # driving an update or showing as drift. deployment_id is intentionally left + # out: it is stable across versions, so showing it in the plan is fine. + hide_from_plan: + - field: deployment.version_id + reason: managed by the deployment metadata service + + ignore_local_changes: + - field: deployment.version_id + reason: managed by the deployment metadata service + ignore_remote_changes: + - field: deployment.version_id + reason: managed by the deployment metadata service + # Same as clusters.{aws,azure,gcp}_attributes — see clusters/resource_cluster.go#L361-L363 # s.SchemaPath("aws_attributes").SetSuppressDiff() # s.SchemaPath("azure_attributes").SetSuppressDiff() @@ -118,7 +135,16 @@ resources: - field: ingestion_definition.ingest_from_uc_foreign_catalog reason: immutable + # See jobs.hide_from_plan above: version_id is set on every deploy, so it is + # hidden from the plan and ignored as a local/remote change. deployment_id + # is left out so it still shows in the plan. + hide_from_plan: + - field: deployment.version_id + reason: managed by the deployment metadata service + ignore_remote_changes: + - field: deployment.version_id + reason: managed by the deployment metadata service # "id" is handled in a special way before any fields changed # However, it is also part of RemotePipeline via CreatePipeline. # Thus it shows up as a remote change since we don't set on the object. @@ -128,6 +154,8 @@ resources: reason: input_only ignore_local_changes: + - field: deployment.version_id + reason: managed by the deployment metadata service # "id" is output-only, providing it in config would be a mistake - field: id reason: "!drop" diff --git a/bundle/internal/schema/main.go b/bundle/internal/schema/main.go index f8bc399134e..86d45758834 100644 --- a/bundle/internal/schema/main.go +++ b/bundle/internal/schema/main.go @@ -14,6 +14,7 @@ import ( "github.com/databricks/cli/bundle/config/variable" "github.com/databricks/cli/libs/jsonschema" "github.com/databricks/databricks-sdk-go/service/jobs" + "github.com/databricks/databricks-sdk-go/service/pipelines" ) func interpolationPattern(s string) string { @@ -128,6 +129,25 @@ func removePipelineFields(typ reflect.Type, s jsonschema.Schema) jsonschema.Sche return s } +// removeDeploymentFields strips deployment_id and version_id from the job and +// pipeline deployment blocks. The CLI sets these to track the bundle +// deployment in the Deployment Metadata Service; they are not user-configurable, +// so they must not appear in the JSON schema or the generated annotation files. +// The parent "deployment" block is already removed from the Job and Pipeline +// schemas (see removeJobsFields / removePipelineFields); this removes the fields +// from the JobDeployment / PipelineDeployment type definitions themselves. +func removeDeploymentFields(typ reflect.Type, s jsonschema.Schema) jsonschema.Schema { + switch typ { + case reflect.TypeFor[jobs.JobDeployment](), reflect.TypeFor[pipelines.PipelineDeployment](): + delete(s.Properties, "deployment_id") + delete(s.Properties, "version_id") + default: + // Do nothing + } + + return s +} + // While volume_type is required in the volume create API, DABs automatically sets // it's value to "MANAGED" if it's not provided. Thus, we make it optional // in the bundle schema. @@ -225,6 +245,7 @@ func generateSchema(workdir, outputFile string, docsMode bool) { transforms := []func(reflect.Type, jsonschema.Schema) jsonschema.Schema{ removeJobsFields, removePipelineFields, + removeDeploymentFields, makeVolumeTypeOptional, a.addAnnotations, removeOutputOnlyFields, diff --git a/bundle/phases/initialize.go b/bundle/phases/initialize.go index a40506ebb18..4409658b7f3 100644 --- a/bundle/phases/initialize.go +++ b/bundle/phases/initialize.go @@ -158,6 +158,10 @@ func Initialize(ctx context.Context, b *bundle.Bundle) { // Validate that no dashboard etags are set. They are purely internal state and should not be set by the user. validate.ValidateDashboardEtags(), + // Validate that deployment_id / version_id are not set on jobs or pipelines. + // They are set by the CLI to track the bundle deployment and must not be set by the user. + validate.ValidateDeploymentFields(), + // Reads (dynamic): * (strings) (searches for ${resources.*} references) // Warns (TF engine) or errors (direct engine) when a cross-resource reference // points to a Terraform-only field with no DABs equivalent. diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json index 476eaaf3637..42fe5c3d781 100644 --- a/bundle/schema/jsonschema.json +++ b/bundle/schema/jsonschema.json @@ -6419,9 +6419,6 @@ { "type": "object", "properties": { - "deployment_id": { - "$ref": "#/$defs/string" - }, "kind": { "description": "The kind of deployment that manages the job.\n\n* `BUNDLE`: The job is managed by Databricks Asset Bundle.\n* `SYSTEM_MANAGED`: The job is managed by Databricks and is read-only.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobDeploymentKind" @@ -6429,9 +6426,6 @@ "metadata_file_path": { "description": "Path of the file that contains deployment metadata.", "$ref": "#/$defs/string" - }, - "version_id": { - "$ref": "#/$defs/string" } }, "additionalProperties": false, @@ -9463,9 +9457,6 @@ { "type": "object", "properties": { - "deployment_id": { - "$ref": "#/$defs/string" - }, "kind": { "description": "The deployment method that manages the pipeline.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.DeploymentKind" @@ -9473,9 +9464,6 @@ "metadata_file_path": { "description": "The path to the file containing metadata about the deployment.", "$ref": "#/$defs/string" - }, - "version_id": { - "$ref": "#/$defs/string" } }, "additionalProperties": false,