From a15edf8611f3ddb450f0cf85f030f3e3fb1269f5 Mon Sep 17 00:00:00 2001 From: "michael.richey" Date: Mon, 27 Apr 2026 12:58:36 -0400 Subject: [PATCH 1/5] feat(sync): add --force-missing-dependencies support to import command Allows import to resolve and fetch all transitive dependencies, producing a self-contained local state so sync never calls the source API for missing deps in split import/sync workflows. Co-Authored-By: Claude Sonnet 4.6 --- datadog_sync/utils/resources_handler.py | 112 ++++ tests/integration/helpers.py | 3 + tests/unit/test_import_force_missing_deps.py | 606 +++++++++++++++++++ 3 files changed, 721 insertions(+) create mode 100644 tests/unit/test_import_force_missing_deps.py diff --git a/datadog_sync/utils/resources_handler.py b/datadog_sync/utils/resources_handler.py index 36cf1afe..90449f6e 100644 --- a/datadog_sync/utils/resources_handler.py +++ b/datadog_sync/utils/resources_handler.py @@ -374,6 +374,19 @@ async def _diffs_worker_cb(self, q_item: List) -> None: async def import_resources(self) -> None: await self.import_resources_without_saving() + + if self.config.force_missing_dependencies: + missing = self._discover_missing_dependencies() + if missing: + self.config.logger.info(f"importing {len(missing)} missing dependencies...") + await self.worker.init_workers(self._import_missing_dep_cb, None, len(missing)) + for item in missing: + self.worker.work_queue.put_nowait(item) + await self.worker.schedule_workers() + self.config.logger.info("finished importing missing dependencies") + else: + self.config.logger.info("no missing dependencies found") + self.config.state.dump_state(Origin.SOURCE) async def import_resources_without_saving(self) -> None: @@ -449,6 +462,105 @@ async def _import_resource(self, q_item: List) -> None: self.config.logger.error(f"error while importing resource: resource_type:{resource_type} id:{_id}") self.config.logger.debug(f"error detail: {str(e)}", resource_type=resource_type) + def _discover_missing_dependencies(self) -> Set[Tuple[str, str]]: + """Scan imported resources for dependency IDs not yet in source state. + + Reuses _resource_connections() which calls find_attr() -> connect_id() + on each resource class — the same code path as the sync-time dependency + check. With empty destination state, all deps appear as "failed + connections"; we discard those and only use the missing_resources subset + (deps not in source state). + + Note: config.resources contains ALL resource types (not just resources_arg), + so dependency types outside the user's --resources list are handled correctly. + """ + missing: Set[Tuple[str, str]] = set() + for resource_type in self.config.resources_arg: + r_class = self.config.resources[resource_type] + if not r_class.resource_config.resource_connections: + continue + for _id in list(self.config.state.source[resource_type].keys()): + # failed_connections is discarded: during import, destination state + # is empty so all deps appear as "failed connections". Only + # missing_resources (deps not in source state) matters here. + _, missing_for_resource = self._resource_connections(resource_type, _id) + missing.update(missing_for_resource) + return missing + + async def _import_missing_dep_cb(self, q_item: Tuple[str, str]) -> None: + """Import a single missing dependency by ID, then recursively discover + further missing deps and enqueue them. + + Import-time equivalent of _force_missing_dep_import_cb (sync-time). + See that method for the sync-time version. Key differences: + - Does NOT populate _dependency_graph (import has no sync graph) + - Only ensures resources land in source state + + Metrics/counters are intentionally omitted, matching the sync-time + callback convention. + + Circular dependency safety: _import_resource stores the resource in + config.state.source[resource_type][_id] (base_resource.py) BEFORE + this method returns. When _resource_connections discovers transitive deps, + the source-state check prevents re-enqueuing already-imported resources. + + Race condition note: The dedup guard below is not atomic with the + subsequent _import_resource call. Two workers can pass the guard + simultaneously for the same dep. This is benign — _import_resource + is idempotent (fetches the same data and overwrites the same key). + The check is a best-effort optimization, not a correctness guarantee. + Same accepted risk exists in _force_missing_dep_import_cb. + + Cancel callback note: Workers use cancel_cb=None (default: queue.empty). + There is a narrow window where the queue empties before a callback + enqueues transitive deps, causing premature shutdown. This risk is + slightly higher than in _force_missing_dep_import_cb (sync-time) + because during import, transitive deps from newly-imported resources + are MORE likely (sync-time already did a full scan via + get_dependency_graph). In practice this is acceptable because imports + are idempotent — a second run resolves any missed deps. + """ + resource_type, _id = q_item + + # Guard: unknown resource type + if resource_type not in self.config.resources: + self.config.logger.warning(f"skipping unknown dependency type: {resource_type}", _id=_id) + return + + # Skip if already imported (best-effort dedup + circular dep safety) + if _id in self.config.state.source[resource_type]: + return + + try: + _id = await self.config.resources[resource_type]._import_resource(_id=_id) + self._emit(resource_type, _id, "import", "success") + except SkipResource as e: + self._emit(resource_type, _id, "import", "skipped", reason=self._sanitize_reason(e)) + self.config.logger.info(f"skipping dependency: {str(e)}", resource_type=resource_type, _id=_id) + return + except CustomClientHTTPError as e: + self._emit(resource_type, _id, "import", "failure", reason=self._sanitize_reason(e)) + self.config.logger.error(f"error importing dependency: {str(e)}", resource_type=resource_type, _id=_id) + return + except Exception as e: + self._emit(resource_type, _id, "import", "failure", reason=self._sanitize_reason(e)) + self.config.logger.error(f"error importing dependency: {str(e)}", resource_type=resource_type, _id=_id) + return + + # Recursively discover transitive deps from the newly imported resource. + try: + r_class = self.config.resources[resource_type] + if r_class.resource_config.resource_connections: + _, transitive_missing = self._resource_connections(resource_type, _id) + for dep in transitive_missing: + if dep[1] not in self.config.state.source[dep[0]]: + self.worker.work_queue.put_nowait(dep) + except Exception as e: + self.config.logger.error( + f"error discovering transitive deps: {str(e)}", resource_type=resource_type, _id=_id + ) + + # See also: _import_missing_dep_cb (import-time equivalent) async def _force_missing_dep_import_cb(self, q_item: List): resource_type, _id = q_item try: diff --git a/tests/integration/helpers.py b/tests/integration/helpers.py index adc06935..2129f96f 100644 --- a/tests/integration/helpers.py +++ b/tests/integration/helpers.py @@ -90,6 +90,9 @@ def import_resources(self, runner, caplog): if self.resource_per_file: cmd.append("--resource-per-file") + if self.force_missing_deps: + cmd.append("--force-missing-dependencies") + ret = runner.invoke(cli, cmd) assert 0 == ret.exit_code return ret diff --git a/tests/unit/test_import_force_missing_deps.py b/tests/unit/test_import_force_missing_deps.py new file mode 100644 index 00000000..8958c70a --- /dev/null +++ b/tests/unit/test_import_force_missing_deps.py @@ -0,0 +1,606 @@ +# Unless explicitly stated otherwise all files in this repository are licensed +# under the 3-clause BSD style license (see LICENSE). +# This product includes software developed at Datadog (https://www.datadoghq.com/). +# Copyright 2019 Datadog, Inc. + +import asyncio +import pytest +from unittest.mock import AsyncMock, MagicMock, patch + +from datadog_sync.constants import Origin +from datadog_sync.utils.resources_handler import ResourcesHandler +from datadog_sync.utils.resource_utils import CustomClientHTTPError, SkipResource +from datadog_sync.utils.log import Log + + +@pytest.fixture +def import_test(config): + """Provides (handler, config) with full state isolation. + + Saves config.filters, config.filter_operator, config.resources_arg, + config.logger, and all source state before each test; restores after. + + Replaces config.logger with a Log instance so that logger calls with + custom kwargs (resource_type=, _id=) don't raise TypeError — the + session-level conftest fixture uses a bare logging.Logger which rejects + these keyword args (they're custom to the Log class). + """ + saved_filters = config.filters + saved_operator = config.filter_operator + saved_resources_arg = config.resources_arg[:] + saved_logger = config.logger + saved_source = {} + for rt in config.resources: + saved_source[rt] = dict(config.state.source[rt]) + + config.logger = Log(verbose=False) + handler = ResourcesHandler(config) + + yield handler, config + + config.filters = saved_filters + config.filter_operator = saved_operator + config.resources_arg = saved_resources_arg + config.logger = saved_logger + for rt in config.resources: + config.state.source[rt].clear() + config.state.source[rt].update(saved_source.get(rt, {})) + + +def setup_state(config, source_resources, resources_arg=None): + """Populate state.source and optionally set resources_arg. + + Clears ALL resource types in source state first. + """ + for rt in config.resources: + config.state.source[rt].clear() + for rt, resources in source_resources.items(): + for _id, resource in resources.items(): + config.state.source[rt][_id] = resource + if resources_arg is not None: + config.resources_arg = resources_arg + else: + config.resources_arg = list(source_resources.keys()) + + +# --------------------------------------------------------------------------- +# Cycle A — RED: _discover_missing_dependencies() (method not yet implemented) +# --------------------------------------------------------------------------- + + +def test_discover_finds_missing_deps(import_test): + """dashboard_list referencing a dashboard not in source state → detected as missing.""" + handler, config = import_test + setup_state( + config, + { + "dashboard_lists": { + "dl-1": {"id": "dl-1", "dashboards": [{"id": "dash-1"}]}, + }, + }, + resources_arg=["dashboard_lists"], + ) + + result = handler._discover_missing_dependencies() + + assert ("dashboards", "dash-1") in result + + +def test_discover_ignores_present_deps(import_test): + """dashboard_list referencing a dashboard already in source state → not reported as missing.""" + handler, config = import_test + setup_state( + config, + { + "dashboard_lists": { + "dl-1": {"id": "dl-1", "dashboards": [{"id": "dash-1"}]}, + }, + "dashboards": { + "dash-1": {"id": "dash-1", "title": "Already imported"}, + }, + }, + resources_arg=["dashboard_lists"], + ) + + result = handler._discover_missing_dependencies() + + assert ("dashboards", "dash-1") not in result + assert result == set() + + +def test_discover_empty_attr_paths_skipped(import_test): + """resource_connections with an empty attr-path list produces no missing deps.""" + handler, config = import_test + setup_state( + config, + { + "dashboard_lists": { + "dl-1": {"id": "dl-1", "dashboards": [{"id": "dash-1"}]}, + }, + }, + resources_arg=["dashboard_lists"], + ) + + r_class = config.resources["dashboard_lists"] + original_connections = r_class.resource_config.resource_connections + r_class.resource_config.resource_connections = {"dashboards": []} + try: + result = handler._discover_missing_dependencies() + finally: + r_class.resource_config.resource_connections = original_connections + + assert result == set() + + +def test_discover_no_connections(import_test): + """Resource type with no resource_connections (monitors) → empty missing set.""" + handler, config = import_test + setup_state( + config, + { + "monitors": { + "mon-1": {"id": "mon-1", "name": "My Monitor"}, + "mon-2": {"id": "mon-2", "name": "Another Monitor"}, + }, + }, + resources_arg=["monitors"], + ) + + result = handler._discover_missing_dependencies() + + assert result == set() + + +def test_discover_multiple_resource_types(import_test): + """Both dashboard_lists→dashboards and dashboards→monitors missing deps are discovered.""" + handler, config = import_test + setup_state( + config, + { + "dashboard_lists": { + "dl-1": {"id": "dl-1", "dashboards": [{"id": "dash-1"}]}, + }, + "dashboards": { + "dash-2": { + "id": "dash-2", + "widgets": [{"definition": {"alert_id": "mon-99"}}], + }, + }, + }, + resources_arg=["dashboard_lists", "dashboards"], + ) + + result = handler._discover_missing_dependencies() + + assert ("dashboards", "dash-1") in result + assert ("monitors", "mon-99") in result + + +def test_discover_finds_deps_outside_resources_arg(import_test): + """Deps on types NOT in resources_arg (but in config.resources) still appear in missing set.""" + handler, config = import_test + # dashboards is NOT in resources_arg, but dashboard_lists references it + setup_state( + config, + { + "dashboard_lists": { + "dl-1": {"id": "dl-1", "dashboards": [{"id": "dash-1"}]}, + }, + }, + resources_arg=["dashboard_lists"], + ) + # dash-1 is absent from source state; dashboards type not in resources_arg + + result = handler._discover_missing_dependencies() + + assert ("dashboards", "dash-1") in result + + +def test_discover_unknown_dep_type_not_in_resources(import_test): + """Unknown dep types returned by _resource_connections appear in the result set. + + The guard for unknown types lives in _import_missing_dep_cb (import-time callback), + not in _discover_missing_dependencies itself. This test verifies that discovery + passes through whatever _resource_connections returns without filtering. + """ + handler, config = import_test + setup_state( + config, + { + "dashboard_lists": { + "dl-1": {"id": "dl-1", "dashboards": [{"id": "dash-1"}]}, + }, + }, + resources_arg=["dashboard_lists"], + ) + unknown_missing = {("completely_unknown_type", "some-id-abc")} + + with patch.object(handler, "_resource_connections", return_value=(set(), unknown_missing)): + result = handler._discover_missing_dependencies() + + assert ("completely_unknown_type", "some-id-abc") in result + + +# --------------------------------------------------------------------------- +# Cycle B — RED: _import_missing_dep_cb() (method not yet implemented) +# --------------------------------------------------------------------------- + + +def _make_http_error(status=404, message="Not Found"): + mock_response = MagicMock() + mock_response.status = status + mock_response.message = message + return CustomClientHTTPError(mock_response) + + +def test_import_missing_dep_cb_skips_already_imported(import_test): + """Callback is a no-op when the dep is already in source state.""" + handler, config = import_test + setup_state( + config, + { + "dashboards": { + "dash-1": {"id": "dash-1", "title": "Already here"}, + }, + }, + resources_arg=["dashboards"], + ) + + with patch.object(config.resources["dashboards"], "_import_resource", new=AsyncMock()) as mock_import: + asyncio.run(handler._import_missing_dep_cb(("dashboards", "dash-1"))) + + mock_import.assert_not_called() + + +def test_import_missing_dep_cb_happy_path(import_test): + """Successful import emits success and enqueues transitive missing deps.""" + handler, config = import_test + setup_state(config, {}, resources_arg=["dashboards"]) + + mock_worker = MagicMock() + handler.worker = mock_worker + + async def fake_import_resource(_id): + config.state.source["dashboards"][_id] = {"id": _id} + return _id + + transitive_missing = {("monitors", "mon-99")} + + with patch.object(config.resources["dashboards"], "_import_resource", side_effect=fake_import_resource): + with patch.object(handler, "_resource_connections", return_value=(set(), transitive_missing)): + with patch.object(handler, "_emit") as mock_emit: + asyncio.run(handler._import_missing_dep_cb(("dashboards", "dash-1"))) + + mock_emit.assert_called_once_with("dashboards", "dash-1", "import", "success") + mock_worker.work_queue.put_nowait.assert_called_once_with(("monitors", "mon-99")) + + +def test_import_missing_dep_cb_skip_resource(import_test): + """SkipResource → emits skipped and does not proceed to transitive discovery.""" + handler, config = import_test + setup_state(config, {}, resources_arg=["dashboards"]) + + skip_err = SkipResource("dash-1", "dashboards", "not applicable") + + with patch.object( + config.resources["dashboards"], + "_import_resource", + new=AsyncMock(side_effect=skip_err), + ): + with patch.object(handler, "_resource_connections") as mock_rc: + with patch.object(handler, "_emit") as mock_emit: + asyncio.run(handler._import_missing_dep_cb(("dashboards", "dash-1"))) + + mock_emit.assert_called_once_with("dashboards", "dash-1", "import", "skipped", reason="SkipResource") + mock_rc.assert_not_called() + + +def test_import_missing_dep_cb_http_error(import_test): + """CustomClientHTTPError → emits failure with HTTP status reason.""" + handler, config = import_test + setup_state(config, {}, resources_arg=["dashboards"]) + + http_err = _make_http_error(status=404) + + with patch.object( + config.resources["dashboards"], + "_import_resource", + new=AsyncMock(side_effect=http_err), + ): + with patch.object(handler, "_emit") as mock_emit: + asyncio.run(handler._import_missing_dep_cb(("dashboards", "dash-1"))) + + mock_emit.assert_called_once_with("dashboards", "dash-1", "import", "failure", reason="HTTP 404") + + +def test_import_missing_dep_cb_generic_error(import_test): + """Unexpected exception → emits failure with exception class name as reason.""" + handler, config = import_test + setup_state(config, {}, resources_arg=["dashboards"]) + + with patch.object( + config.resources["dashboards"], + "_import_resource", + new=AsyncMock(side_effect=RuntimeError("boom")), + ): + with patch.object(handler, "_emit") as mock_emit: + asyncio.run(handler._import_missing_dep_cb(("dashboards", "dash-1"))) + + mock_emit.assert_called_once_with("dashboards", "dash-1", "import", "failure", reason="RuntimeError") + + +def test_import_missing_dep_cb_unknown_resource_type(import_test): + """Unknown resource type → logs warning and returns without error.""" + handler, config = import_test + + # "completely_unknown_type" is not registered in config.resources + assert "completely_unknown_type" not in config.resources + + with patch.object(handler, "_emit") as mock_emit: + asyncio.run(handler._import_missing_dep_cb(("completely_unknown_type", "some-id"))) + + mock_emit.assert_not_called() + + +def test_import_missing_dep_cb_does_not_apply_filters(import_test): + """_import_missing_dep_cb bypasses user filters — filter() is never called.""" + handler, config = import_test + setup_state(config, {}, resources_arg=["dashboards"]) + + mock_worker = MagicMock() + handler.worker = mock_worker + + async def fake_import(_id): + config.state.source["dashboards"][_id] = {"id": _id} + return _id + + with patch.object(config.resources["dashboards"], "_import_resource", side_effect=fake_import): + with patch.object(handler, "_resource_connections", return_value=(set(), set())): + with patch.object(config.resources["dashboards"], "filter") as mock_filter: + asyncio.run(handler._import_missing_dep_cb(("dashboards", "dash-1"))) + + mock_filter.assert_not_called() + + +def test_circular_dep_does_not_infinite_loop(import_test): + """Already-imported dep is not re-enqueued, preventing circular import loops. + + Scenario: dash-1 is in source state. When importing dash-2, _resource_connections + discovers dash-1 as a transitive dep. Because dash-1 is already present, it must + NOT be enqueued again. + """ + handler, config = import_test + # dash-1 already imported + setup_state( + config, + { + "dashboards": { + "dash-1": {"id": "dash-1", "title": "Already imported"}, + }, + }, + resources_arg=["dashboards"], + ) + + mock_worker = MagicMock() + handler.worker = mock_worker + + async def fake_import(_id): + config.state.source["dashboards"][_id] = {"id": _id} + return _id + + # Transitive discovery of dash-2 finds dash-1 as a dep — already present + transitive_missing = {("dashboards", "dash-1")} + + with patch.object(config.resources["dashboards"], "_import_resource", side_effect=fake_import): + with patch.object(handler, "_resource_connections", return_value=(set(), transitive_missing)): + asyncio.run(handler._import_missing_dep_cb(("dashboards", "dash-2"))) + + # dash-1 must NOT be re-enqueued + for call in mock_worker.work_queue.put_nowait.call_args_list: + assert call.args[0] != ("dashboards", "dash-1"), "circular dep was re-enqueued" + + +# --------------------------------------------------------------------------- +# Cycle C — RED: import_resources() wiring (not yet modified) +# --------------------------------------------------------------------------- + + +def _make_mock_worker(): + """Return an AsyncMock worker suitable for import_resources flow tests.""" + mock_worker = AsyncMock() + mock_worker.work_queue = MagicMock() + return mock_worker + + +def test_import_resources_calls_discover_when_enabled(import_test): + """force_missing_dependencies=True → discovery runs and workers are scheduled.""" + handler, config = import_test + config.force_missing_dependencies = True + handler.worker = _make_mock_worker() + + missing = {("dashboards", "dash-1")} + + with patch.object(handler, "import_resources_without_saving", new=AsyncMock()): + with patch.object(handler, "_discover_missing_dependencies", return_value=missing) as mock_discover: + with patch.object(config.state, "dump_state"): + asyncio.run(handler.import_resources()) + + mock_discover.assert_called_once() + handler.worker.init_workers.assert_called_once() + handler.worker.work_queue.put_nowait.assert_called_once_with(("dashboards", "dash-1")) + handler.worker.schedule_workers.assert_called_once() + + +def test_import_resources_skips_discover_when_disabled(import_test): + """force_missing_dependencies=False → discovery is never called.""" + handler, config = import_test + config.force_missing_dependencies = False + handler.worker = _make_mock_worker() + + with patch.object(handler, "import_resources_without_saving", new=AsyncMock()): + with patch.object(handler, "_discover_missing_dependencies") as mock_discover: + with patch.object(config.state, "dump_state"): + asyncio.run(handler.import_resources()) + + mock_discover.assert_not_called() + + +def test_import_resources_empty_missing_set_skips_workers(import_test): + """When discovery returns empty set, no workers are initialised.""" + handler, config = import_test + config.force_missing_dependencies = True + handler.worker = _make_mock_worker() + + with patch.object(handler, "import_resources_without_saving", new=AsyncMock()): + with patch.object(handler, "_discover_missing_dependencies", return_value=set()): + with patch.object(config.state, "dump_state"): + asyncio.run(handler.import_resources()) + + handler.worker.init_workers.assert_not_called() + handler.worker.work_queue.put_nowait.assert_not_called() + handler.worker.schedule_workers.assert_not_called() + + +def test_import_flag_is_noop_before_wiring(import_test): + """Baseline: force_missing_dependencies=False does not change import_resources flow. + + Verifies that dump_state is still called exactly once and + import_resources_without_saving runs as normal. + """ + handler, config = import_test + config.force_missing_dependencies = False + handler.worker = _make_mock_worker() + + with patch.object(handler, "import_resources_without_saving", new=AsyncMock()) as mock_iwos: + with patch.object(config.state, "dump_state") as mock_dump: + asyncio.run(handler.import_resources()) + + mock_iwos.assert_called_once() + mock_dump.assert_called_once_with(Origin.SOURCE) + + +def test_import_resources_empty_resources_arg(import_test): + """force_missing_dependencies=True with empty resources_arg → no crash, discovery still called.""" + handler, config = import_test + config.force_missing_dependencies = True + config.resources_arg = [] + handler.worker = _make_mock_worker() + + with patch.object(handler, "import_resources_without_saving", new=AsyncMock()): + with patch.object(handler, "_discover_missing_dependencies", return_value=set()) as mock_discover: + with patch.object(config.state, "dump_state"): + asyncio.run(handler.import_resources()) + + mock_discover.assert_called_once() + handler.worker.init_workers.assert_not_called() + + +def test_import_resources_filter_plus_force(import_test): + """filter + force_missing_dependencies: discovery uses whatever source state was populated. + + import_resources_without_saving is responsible for applying filters before + writing to source state. _discover_missing_dependencies then operates only + on whatever is present in source state — so filtered-out resources are + naturally excluded from dependency discovery. + """ + handler, config = import_test + config.force_missing_dependencies = True + handler.worker = _make_mock_worker() + + # Simulate import_resources_without_saving populating only the filtered-in resource + def fake_iwos(): + config.state.source["dashboard_lists"]["dl-1"] = { + "id": "dl-1", + "dashboards": [{"id": "dash-1"}], + } + + missing_from_filtered = {("dashboards", "dash-1")} + + with patch.object(handler, "import_resources_without_saving", new=AsyncMock(side_effect=fake_iwos)): + with patch.object( + handler, "_discover_missing_dependencies", return_value=missing_from_filtered + ) as mock_discover: + with patch.object(config.state, "dump_state"): + asyncio.run(handler.import_resources()) + + mock_discover.assert_called_once() + handler.worker.work_queue.put_nowait.assert_called_once_with(("dashboards", "dash-1")) + + +def test_import_resources_persists_transitive_dep_types(import_test): + """dump_state is called exactly once, after all discovery (including transitive dep types). + + Even when _import_missing_dep_cb imports dep types outside resources_arg, + dump_state captures everything in a single atomic call. + """ + handler, config = import_test + config.force_missing_dependencies = True + handler.worker = _make_mock_worker() + + # Simulate _import_missing_dep_cb writing a dep type outside resources_arg + async def fake_schedule_workers(): + config.state.source["dashboards"]["dash-99"] = {"id": "dash-99"} + + handler.worker.schedule_workers = fake_schedule_workers + missing = {("dashboards", "dash-99")} + + with patch.object(handler, "import_resources_without_saving", new=AsyncMock()): + with patch.object(handler, "_discover_missing_dependencies", return_value=missing): + with patch.object(config.state, "dump_state") as mock_dump: + asyncio.run(handler.import_resources()) + + # dump_state called exactly once, after workers finish + mock_dump.assert_called_once_with(Origin.SOURCE) + # dep type outside resources_arg is in source state when dump runs + assert "dash-99" in config.state.source["dashboards"] + + +# --------------------------------------------------------------------------- +# Cycle D — RED: integration helper import_resources() wiring +# --------------------------------------------------------------------------- + + +def test_integration_import_passes_force_missing_deps_flag(): + """When force_missing_deps=True, import_resources() appends --force-missing-dependencies.""" + from tests.integration.helpers import BaseResourcesTestClass + + class TestHelper(BaseResourcesTestClass): + resource_type = "dashboard_lists" + force_missing_deps = True + + helper = TestHelper() + mock_runner = MagicMock() + mock_ret = MagicMock() + mock_ret.exit_code = 0 + mock_runner.invoke.return_value = mock_ret + mock_caplog = MagicMock() + mock_caplog.set_level = MagicMock() + + helper.import_resources(mock_runner, mock_caplog) + + call_args = mock_runner.invoke.call_args + cmd = call_args[0][1] # second positional arg is the cmd list + assert "--force-missing-dependencies" in cmd + + +def test_integration_import_omits_flag_when_disabled(): + """When force_missing_deps=False, import_resources() does NOT append --force-missing-dependencies.""" + from tests.integration.helpers import BaseResourcesTestClass + + class TestHelper(BaseResourcesTestClass): + resource_type = "dashboard_lists" + force_missing_deps = False + + helper = TestHelper() + mock_runner = MagicMock() + mock_ret = MagicMock() + mock_ret.exit_code = 0 + mock_runner.invoke.return_value = mock_ret + mock_caplog = MagicMock() + mock_caplog.set_level = MagicMock() + + helper.import_resources(mock_runner, mock_caplog) + + call_args = mock_runner.invoke.call_args + cmd = call_args[0][1] + assert "--force-missing-dependencies" not in cmd From 41a2cc2fa64fc584b16608816ce4a16f1746fee9 Mon Sep 17 00:00:00 2001 From: "michael.richey" Date: Mon, 27 Apr 2026 14:41:22 -0400 Subject: [PATCH 2/5] fix(import): use source-only dep discovery with BFS closure walk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes two P2 bugs in --force-missing-dependencies import discovery: Bug #1 (destination state leak): _discover_missing_dependencies() was reusing _resource_connections() → connect_id(), which checks config.state.destination first. If a dep existed in stale destination state, it appeared "resolved" and was never checked against source, causing it to be silently skipped even when not imported. Bug #2 (no closure through present deps): Discovery only scanned resources_arg types. Deps already in source state from a prior partial import had their own transitive deps ignored. Example: dashboard_list → dashboard (present) → monitor (missing) — monitor was never found. Fix: Introduce a parallel import-time path: - BaseResource.extract_source_ids(): source-only mirror of connect_id, no destination state check, no mutation. Overridden in Monitors, RestrictionPolicies, TeamMemberships, SyntheticsTests for types that need regex/prefix/type-dispatch logic. - ResourcesHandler._dep_in_source_state(): exact + prefix match for composite synthetics_tests keys ('{public_id}#{monitor_id}'). - ResourcesHandler._source_dependencies_for_resource(): find_attr() traversal using extract_source_ids instead of connect_id. - Rewrite _discover_missing_dependencies() as a BFS closure walk that seeds from resources_arg nodes, follows edges through already-present source resources, and collects only nodes not yet in source state. - Update _import_missing_dep_cb() transitive discovery to use the same source-only path. The sync-time path (_resource_connections, connect_id, get_dependency_graph) is entirely untouched. Co-Authored-By: Claude Sonnet 4.6 --- datadog_sync/model/monitors.py | 20 ++ datadog_sync/model/restriction_policies.py | 16 + datadog_sync/model/synthetics_tests.py | 17 + datadog_sync/model/team_memberships.py | 8 + datadog_sync/utils/base_resource.py | 14 + datadog_sync/utils/resources_handler.py | 95 ++++-- tests/unit/test_import_force_missing_deps.py | 336 ++++++++++++++++++- 7 files changed, 474 insertions(+), 32 deletions(-) diff --git a/datadog_sync/model/monitors.py b/datadog_sync/model/monitors.py index bb37ba9c..cc29906a 100644 --- a/datadog_sync/model/monitors.py +++ b/datadog_sync/model/monitors.py @@ -195,3 +195,23 @@ def connect_id(self, key: str, r_obj: Dict, resource_to_connect: str) -> Optiona else: # Use default connect_id method in base class when not handling special case for `query` return super(Monitors, self).connect_id(key, r_obj, resource_to_connect) + + def extract_source_ids(self, key: str, r_obj: Dict, resource_to_connect: str) -> Optional[List[str]]: + # Mirror of connect_id -- keep in sync when connect_id changes. + if key == "query" and r_obj.get("type") == "composite" and resource_to_connect != "service_level_objectives": + return re.findall("[0-9]+", r_obj[key]) + elif key == "query" and resource_to_connect == "service_level_objectives" and r_obj.get("type") == "slo alert": + if res := re.search(r'(?:error_budget|burn_rate)\("(.*?)"\)\.', r_obj[key]): + return [res.group(1)] + return [] + elif key == "query": + return [] + elif key == "principals": + type_map = {"user": "users", "role": "roles", "team": "teams"} + return [ + _id + for p in r_obj[key] + for _type, _id in [p.split(":", 1)] + if type_map.get(_type) == resource_to_connect + ] + return super(Monitors, self).extract_source_ids(key, r_obj, resource_to_connect) diff --git a/datadog_sync/model/restriction_policies.py b/datadog_sync/model/restriction_policies.py index f3a7f4c5..27d65af9 100644 --- a/datadog_sync/model/restriction_policies.py +++ b/datadog_sync/model/restriction_policies.py @@ -196,3 +196,19 @@ def connect_id(self, key: str, r_obj: Dict, resource_to_connect: str) -> Optiona failed_connections.append(_id) return failed_connections + + def extract_source_ids(self, key: str, r_obj: Dict, resource_to_connect: str) -> Optional[List[str]]: + # Mirror of connect_id -- keep in sync when connect_id changes. + if key == "id": + _type, _id = r_obj[key].split(":", 1) + type_map = {"dashboard": "dashboards", "slo": "service_level_objectives", "notebook": "notebooks"} + return [_id] if type_map.get(_type) == resource_to_connect else [] + elif key == "principals": + type_map = {"user": "users", "role": "roles", "team": "teams"} + return [ + _id + for p in r_obj[key] + for _type, _id in [p.split(":", 1)] + if type_map.get(_type) == resource_to_connect + ] + return super().extract_source_ids(key, r_obj, resource_to_connect) diff --git a/datadog_sync/model/synthetics_tests.py b/datadog_sync/model/synthetics_tests.py index ada0f216..3a3e9422 100644 --- a/datadog_sync/model/synthetics_tests.py +++ b/datadog_sync/model/synthetics_tests.py @@ -414,3 +414,20 @@ def connect_id(self, key: str, r_obj: Dict, resource_to_connect: str) -> Optiona return super(SyntheticsTests, self).connect_id(key, r_obj, resource_to_connect) else: return super(SyntheticsTests, self).connect_id(key, r_obj, resource_to_connect) + + def extract_source_ids(self, key: str, r_obj: Dict, resource_to_connect: str) -> Optional[List[str]]: + # Mirror of connect_id -- keep in sync when connect_id changes. + # Only synthetics_private_locations and mobile application versions need special handling. + # rum_applications, synthetics_tests (subtests), synthetics_global_variables, roles, and + # synthetics_mobile_applications all use plain IDs at the leaf — base extract_source_ids + # handles them. For synthetics_tests subtests, _dep_in_source_state handles composite key + # prefix matching ('{public_id}#{monitor_id}' keys). + if resource_to_connect == "synthetics_private_locations": + pl = self.config.resources["synthetics_private_locations"] + return [str(_id) for _id in r_obj[key] if pl.pl_id_regex.match(str(_id))] + elif resource_to_connect == "synthetics_mobile_applications_versions" and key == "referenceId": + # When referenceType is "latest", referenceId contains the application ID, not a version ID. + if r_obj.get("referenceType") == "latest": + return super(SyntheticsTests, self).extract_source_ids(key, r_obj, "synthetics_mobile_applications") + return super(SyntheticsTests, self).extract_source_ids(key, r_obj, resource_to_connect) + return super(SyntheticsTests, self).extract_source_ids(key, r_obj, resource_to_connect) diff --git a/datadog_sync/model/team_memberships.py b/datadog_sync/model/team_memberships.py index c48c7072..c121c34a 100644 --- a/datadog_sync/model/team_memberships.py +++ b/datadog_sync/model/team_memberships.py @@ -198,3 +198,11 @@ def connect_id(self, key: str, r_obj: Dict, resource_to_connect: str) -> Optiona else: failed_connections.append(_id) return failed_connections + + def extract_source_ids(self, key: str, r_obj: Dict, resource_to_connect: str) -> Optional[List[str]]: + # Mirror of connect_id -- keep in sync when connect_id changes. + _type = r_obj["type"] + type_map = {"users": "users", "team": "teams"} + if type_map.get(_type) == resource_to_connect: + return [r_obj["id"]] + return [] diff --git a/datadog_sync/utils/base_resource.py b/datadog_sync/utils/base_resource.py index 797b0562..dab40194 100644 --- a/datadog_sync/utils/base_resource.py +++ b/datadog_sync/utils/base_resource.py @@ -233,6 +233,20 @@ def connect_id(self, key: str, r_obj: Dict, resource_to_connect: str) -> Optiona return failed_connections + def extract_source_ids(self, key: str, r_obj: Dict, resource_to_connect: str) -> Optional[List[str]]: + """Extract dependency IDs referenced at r_obj[key] for resource_to_connect. + + Source-only: does NOT check destination state, does NOT mutate r_obj. + Override in subclasses with custom connect_id logic (regex, prefix + parsing, type dispatch, etc.). + Mirror of connect_id -- keep in sync when connect_id changes. + """ + if not r_obj.get(key): + return None + if isinstance(r_obj[key], list): + return [str(v) for v in r_obj[key]] + return [str(r_obj[key])] + def connect_resources(self, _id: str, resource: Dict) -> None: if not self.resource_config.resource_connections: return diff --git a/datadog_sync/utils/resources_handler.py b/datadog_sync/utils/resources_handler.py index 90449f6e..1db77b4b 100644 --- a/datadog_sync/utils/resources_handler.py +++ b/datadog_sync/utils/resources_handler.py @@ -462,29 +462,80 @@ async def _import_resource(self, q_item: List) -> None: self.config.logger.error(f"error while importing resource: resource_type:{resource_type} id:{_id}") self.config.logger.debug(f"error detail: {str(e)}", resource_type=resource_type) + def _dep_in_source_state(self, dep_type: str, dep_id: str) -> bool: + """Check if a dependency exists in source state. + + Handles composite keys: synthetics_tests uses '{public_id}#{monitor_id}' + keys in source state, so an exact match on a bare public_id fails. + Prefix matching resolves this. + + Note: ServiceLevelObjectives.connect_id uses k.endswith(_id) for its + synthetics lookup, but SLO's resource_connections has + "synthetics_tests": [] (empty attr list), so find_attr is never called + for that path and this method is never invoked for it. Prefix-only + matching is therefore safe. + """ + source = self.config.state.source.get(dep_type, {}) + if dep_id in source: + return True + if dep_type == "synthetics_tests": + return any(k.startswith(dep_id + "#") for k in source) + return False + + def _source_dependencies_for_resource(self, resource_type: str, _id: str) -> Set[Tuple[str, str]]: + """Return all (dep_type, dep_id) referenced by a source resource. + + Uses extract_source_ids — source-only, does NOT check destination state, + does NOT mutate the resource. + + Does NOT call ensure_resource_loaded(). During import with + --force-missing-dependencies, resources are fetched from the API and + stored directly in source state. ensure_resource_loaded() remains in + _resource_connections() for the sync-time path only. + """ + deps: Set[Tuple[str, str]] = set() + r_class = self.config.resources[resource_type] + if not r_class.resource_config.resource_connections: + return deps + resource = deepcopy(self.config.state.source[resource_type][_id]) + for dep_type, paths in r_class.resource_config.resource_connections.items(): + for path in paths: + if not path: + continue # empty attr path (e.g., SLO's "synthetics_tests": []) + ids = find_attr(path, dep_type, resource, r_class.extract_source_ids) + for dep_id in ids or []: + deps.add((dep_type, dep_id)) + return deps + def _discover_missing_dependencies(self) -> Set[Tuple[str, str]]: - """Scan imported resources for dependency IDs not yet in source state. + """Scan imported resources and all reachable transitive deps for + dependency IDs not yet in source state. - Reuses _resource_connections() which calls find_attr() -> connect_id() - on each resource class — the same code path as the sync-time dependency - check. With empty destination state, all deps appear as "failed - connections"; we discard those and only use the missing_resources subset - (deps not in source state). + BFS closure walk: seeds from resources_arg nodes, follows edges through + already-present source resources so that transitive deps of present + resources are also discovered. Uses _dep_in_source_state() for composite + key handling (e.g., synthetics_tests '{public_id}#{monitor_id}' keys). - Note: config.resources contains ALL resource types (not just resources_arg), - so dependency types outside the user's --resources list are handled correctly. + Uses extract_source_ids (source-only) rather than connect_id (which + checks destination state) to avoid stale destination state causing deps + to appear resolved when they haven't been fetched into source state. """ missing: Set[Tuple[str, str]] = set() - for resource_type in self.config.resources_arg: - r_class = self.config.resources[resource_type] - if not r_class.resource_config.resource_connections: + seen: Set[Tuple[str, str]] = set() + queue = [(rt, _id) for rt in self.config.resources_arg for _id in list(self.config.state.source[rt].keys())] + while queue: + rt, _id = queue.pop() + if (rt, _id) in seen: + continue + seen.add((rt, _id)) + r_class = self.config.resources.get(rt) + if not r_class or not r_class.resource_config.resource_connections: continue - for _id in list(self.config.state.source[resource_type].keys()): - # failed_connections is discarded: during import, destination state - # is empty so all deps appear as "failed connections". Only - # missing_resources (deps not in source state) matters here. - _, missing_for_resource = self._resource_connections(resource_type, _id) - missing.update(missing_for_resource) + for dep_type, dep_id in self._source_dependencies_for_resource(rt, _id): + if self._dep_in_source_state(dep_type, dep_id): + queue.append((dep_type, dep_id)) # present — scan for ITS deps + else: + missing.add((dep_type, dep_id)) return missing async def _import_missing_dep_cb(self, q_item: Tuple[str, str]) -> None: @@ -548,13 +599,15 @@ async def _import_missing_dep_cb(self, q_item: Tuple[str, str]) -> None: return # Recursively discover transitive deps from the newly imported resource. + # Uses _source_dependencies_for_resource (source-only) rather than + # _resource_connections (destination-state check) to avoid stale + # destination state masking missing deps. try: r_class = self.config.resources[resource_type] if r_class.resource_config.resource_connections: - _, transitive_missing = self._resource_connections(resource_type, _id) - for dep in transitive_missing: - if dep[1] not in self.config.state.source[dep[0]]: - self.worker.work_queue.put_nowait(dep) + for dep_type, dep_id in self._source_dependencies_for_resource(resource_type, _id): + if not self._dep_in_source_state(dep_type, dep_id): + self.worker.work_queue.put_nowait((dep_type, dep_id)) except Exception as e: self.config.logger.error( f"error discovering transitive deps: {str(e)}", resource_type=resource_type, _id=_id diff --git a/tests/unit/test_import_force_missing_deps.py b/tests/unit/test_import_force_missing_deps.py index 8958c70a..c1126436 100644 --- a/tests/unit/test_import_force_missing_deps.py +++ b/tests/unit/test_import_force_missing_deps.py @@ -197,11 +197,11 @@ def test_discover_finds_deps_outside_resources_arg(import_test): def test_discover_unknown_dep_type_not_in_resources(import_test): - """Unknown dep types returned by _resource_connections appear in the result set. + """Unknown dep types surfaced by _source_dependencies_for_resource appear in missing set. The guard for unknown types lives in _import_missing_dep_cb (import-time callback), not in _discover_missing_dependencies itself. This test verifies that discovery - passes through whatever _resource_connections returns without filtering. + passes through whatever _source_dependencies_for_resource returns without filtering. """ handler, config = import_test setup_state( @@ -213,9 +213,9 @@ def test_discover_unknown_dep_type_not_in_resources(import_test): }, resources_arg=["dashboard_lists"], ) - unknown_missing = {("completely_unknown_type", "some-id-abc")} + unknown_deps = {("completely_unknown_type", "some-id-abc")} - with patch.object(handler, "_resource_connections", return_value=(set(), unknown_missing)): + with patch.object(handler, "_source_dependencies_for_resource", return_value=unknown_deps): result = handler._discover_missing_dependencies() assert ("completely_unknown_type", "some-id-abc") in result @@ -264,10 +264,10 @@ async def fake_import_resource(_id): config.state.source["dashboards"][_id] = {"id": _id} return _id - transitive_missing = {("monitors", "mon-99")} + transitive_deps = {("monitors", "mon-99")} with patch.object(config.resources["dashboards"], "_import_resource", side_effect=fake_import_resource): - with patch.object(handler, "_resource_connections", return_value=(set(), transitive_missing)): + with patch.object(handler, "_source_dependencies_for_resource", return_value=transitive_deps): with patch.object(handler, "_emit") as mock_emit: asyncio.run(handler._import_missing_dep_cb(("dashboards", "dash-1"))) @@ -287,12 +287,12 @@ def test_import_missing_dep_cb_skip_resource(import_test): "_import_resource", new=AsyncMock(side_effect=skip_err), ): - with patch.object(handler, "_resource_connections") as mock_rc: + with patch.object(handler, "_source_dependencies_for_resource") as mock_sdpr: with patch.object(handler, "_emit") as mock_emit: asyncio.run(handler._import_missing_dep_cb(("dashboards", "dash-1"))) mock_emit.assert_called_once_with("dashboards", "dash-1", "import", "skipped", reason="SkipResource") - mock_rc.assert_not_called() + mock_sdpr.assert_not_called() def test_import_missing_dep_cb_http_error(import_test): @@ -355,7 +355,7 @@ async def fake_import(_id): return _id with patch.object(config.resources["dashboards"], "_import_resource", side_effect=fake_import): - with patch.object(handler, "_resource_connections", return_value=(set(), set())): + with patch.object(handler, "_source_dependencies_for_resource", return_value=set()): with patch.object(config.resources["dashboards"], "filter") as mock_filter: asyncio.run(handler._import_missing_dep_cb(("dashboards", "dash-1"))) @@ -389,10 +389,10 @@ async def fake_import(_id): return _id # Transitive discovery of dash-2 finds dash-1 as a dep — already present - transitive_missing = {("dashboards", "dash-1")} + transitive_deps = {("dashboards", "dash-1")} with patch.object(config.resources["dashboards"], "_import_resource", side_effect=fake_import): - with patch.object(handler, "_resource_connections", return_value=(set(), transitive_missing)): + with patch.object(handler, "_source_dependencies_for_resource", return_value=transitive_deps): asyncio.run(handler._import_missing_dep_cb(("dashboards", "dash-2"))) # dash-1 must NOT be re-enqueued @@ -604,3 +604,317 @@ class TestHelper(BaseResourcesTestClass): call_args = mock_runner.invoke.call_args cmd = call_args[0][1] assert "--force-missing-dependencies" not in cmd + + +# --------------------------------------------------------------------------- +# Cycle E — RED: Bug #1 — destination state must not leak into discovery +# --------------------------------------------------------------------------- + + +def test_discover_not_affected_by_destination_state(import_test): + """Bug #1: dep in destination but NOT source must still be reported missing. + + Root cause: connect_id checks destination first. If dep found there, + it's not returned as 'failed', so _resource_connections never checks source. + The new _discover_missing_dependencies must use source-only logic. + """ + handler, config = import_test + setup_state( + config, + { + "dashboard_lists": { + "dl-1": {"id": "dl-1", "dashboards": [{"id": "dash-1"}]}, + }, + }, + resources_arg=["dashboard_lists"], + ) + # Put dash-1 in destination (stale) but NOT in source + config.state.destination["dashboards"]["dash-1"] = {"id": "dest-dash-1"} + + result = handler._discover_missing_dependencies() + + assert ("dashboards", "dash-1") in result + + +# --------------------------------------------------------------------------- +# Cycle F — RED: Bug #2 — BFS closure walk through already-present source deps +# --------------------------------------------------------------------------- + + +def test_discover_closure_through_present_deps(import_test): + """Bug #2: dep already in source state must have ITS own deps discovered. + + dashboard_lists → dashboards (dash-1 present) → monitors (mon-1 missing). + resources_arg only contains dashboard_lists, but mon-1 must appear in result + because the BFS walk follows edges through already-present source resources. + """ + handler, config = import_test + setup_state( + config, + { + "dashboard_lists": { + "dl-1": {"id": "dl-1", "dashboards": [{"id": "dash-1"}]}, + }, + "dashboards": { + "dash-1": { + "id": "dash-1", + "widgets": [{"definition": {"alert_id": "mon-1"}}], + }, + }, + }, + resources_arg=["dashboard_lists"], + ) + # mon-1 is NOT in source state + + result = handler._discover_missing_dependencies() + + assert ("monitors", "mon-1") in result + + +# --------------------------------------------------------------------------- +# Cycle G — RED: extract_source_ids overrides +# --------------------------------------------------------------------------- + + +def test_base_extract_source_ids_none_value(import_test): + """Base: key with None/falsy value → returns None (no deps).""" + _, config = import_test + r_class = config.resources["dashboard_lists"] + result = r_class.extract_source_ids("dashboards", {"dashboards": None}, "dashboards") + assert result is None + + +def test_base_extract_source_ids_list(import_test): + """Base: list value → each element stringified.""" + _, config = import_test + r_class = config.resources["dashboard_lists"] + result = r_class.extract_source_ids("dashboards", {"dashboards": [1, 2, 3]}, "dashboards") + assert result == ["1", "2", "3"] + + +def test_base_extract_source_ids_scalar(import_test): + """Base: scalar value → single-element list.""" + _, config = import_test + r_class = config.resources["dashboard_lists"] + result = r_class.extract_source_ids("dashboards", {"dashboards": "abc"}, "dashboards") + assert result == ["abc"] + + +def test_monitors_extract_source_ids_composite_query(import_test): + """Monitors: composite monitor query → all referenced monitor IDs extracted.""" + _, config = import_test + r_class = config.resources["monitors"] + r_obj = {"type": "composite", "query": "123 && 456"} + result = r_class.extract_source_ids("query", r_obj, "monitors") + assert sorted(result) == ["123", "456"] + + +def test_monitors_extract_source_ids_slo_alert(import_test): + """Monitors: slo alert query → SLO ID extracted.""" + _, config = import_test + r_class = config.resources["monitors"] + r_obj = {"type": "slo alert", "query": 'error_budget("slo-abc-123").over("7d") > 10'} + result = r_class.extract_source_ids("query", r_obj, "service_level_objectives") + assert result == ["slo-abc-123"] + + +def test_monitors_extract_source_ids_regular_query_returns_empty(import_test): + """Monitors: non-composite query → empty list (not None).""" + _, config = import_test + r_class = config.resources["monitors"] + r_obj = {"type": "metric alert", "query": "avg:system.cpu.user{*} > 80"} + result = r_class.extract_source_ids("query", r_obj, "monitors") + assert result == [] + + +def test_monitors_extract_source_ids_principals(import_test): + """Monitors: principals list → IDs filtered by resource_to_connect type.""" + _, config = import_test + r_class = config.resources["monitors"] + r_obj = {"principals": ["user:u-111", "role:r-222", "team:t-333"]} + + assert r_class.extract_source_ids("principals", r_obj, "users") == ["u-111"] + assert r_class.extract_source_ids("principals", r_obj, "roles") == ["r-222"] + assert r_class.extract_source_ids("principals", r_obj, "teams") == ["t-333"] + + +def test_restriction_policies_extract_source_ids_prefixed_id(import_test): + """RestrictionPolicies: prefixed ID → correct dep ID for matching type, [] for others.""" + _, config = import_test + r_class = config.resources["restriction_policies"] + r_obj = {"id": "dashboard:abc-123"} + + assert r_class.extract_source_ids("id", r_obj, "dashboards") == ["abc-123"] + assert r_class.extract_source_ids("id", r_obj, "service_level_objectives") == [] + assert r_class.extract_source_ids("id", r_obj, "notebooks") == [] + + +def test_restriction_policies_extract_source_ids_principals(import_test): + """RestrictionPolicies: principals → IDs filtered by resource_to_connect type.""" + _, config = import_test + r_class = config.resources["restriction_policies"] + r_obj = {"principals": ["user:u-1", "role:r-2", "team:t-3"]} + + assert r_class.extract_source_ids("principals", r_obj, "users") == ["u-1"] + assert r_class.extract_source_ids("principals", r_obj, "roles") == ["r-2"] + assert r_class.extract_source_ids("principals", r_obj, "teams") == ["t-3"] + + +def test_synthetics_tests_extract_source_ids_pl_filter(import_test): + """SyntheticsTests: locations list → only private location IDs (pl:xxx) returned.""" + _, config = import_test + r_class = config.resources["synthetics_tests"] + # Mix of region strings and private location IDs + r_obj = {"locations": ["aws:us-east-1", "pl:my-loc-abc123", "azure:eastus", "pl:another-loc-xyz"]} + result = r_class.extract_source_ids("locations", r_obj, "synthetics_private_locations") + assert sorted(result) == sorted(["pl:my-loc-abc123", "pl:another-loc-xyz"]) + + +def test_synthetics_tests_extract_source_ids_mobile_latest(import_test): + """SyntheticsTests: referenceType=latest → extract against synthetics_mobile_applications.""" + _, config = import_test + r_class = config.resources["synthetics_tests"] + r_obj = {"referenceId": "app-uuid-xyz", "referenceType": "latest"} + result = r_class.extract_source_ids("referenceId", r_obj, "synthetics_mobile_applications_versions") + # For referenceType=latest, should return the ID (delegates to mobile_applications path) + assert result == ["app-uuid-xyz"] + + +# --------------------------------------------------------------------------- +# Cycle H — GREEN/GREEN: regression tests for unchanged sync-time path +# --------------------------------------------------------------------------- + + +def test_dep_in_source_state_exact_match(import_test): + """_dep_in_source_state: exact key match returns True.""" + handler, config = import_test + config.state.source["dashboards"]["dash-1"] = {"id": "dash-1"} + assert handler._dep_in_source_state("dashboards", "dash-1") is True + + +def test_dep_in_source_state_miss(import_test): + """_dep_in_source_state: key absent from source → False.""" + handler, config = import_test + assert handler._dep_in_source_state("dashboards", "nonexistent") is False + + +def test_dep_in_source_state_synthetics_prefix_match(import_test): + """_dep_in_source_state: synthetics_tests composite key matched by prefix.""" + handler, config = import_test + config.state.source["synthetics_tests"]["abc-public#999"] = {"public_id": "abc-public", "monitor_id": 999} + assert handler._dep_in_source_state("synthetics_tests", "abc-public") is True + + +def test_dep_in_source_state_synthetics_no_false_positive(import_test): + """_dep_in_source_state: 'abc' must NOT match 'abcdef#123' — prefix only.""" + handler, config = import_test + config.state.source["synthetics_tests"]["abcdef#123"] = {"public_id": "abcdef", "monitor_id": 123} + assert handler._dep_in_source_state("synthetics_tests", "abc") is False + + +def test_discover_no_connections_empty_result(import_test): + """Monitors has no resource_connections → discover returns empty set.""" + handler, config = import_test + setup_state( + config, + { + "monitors": { + "mon-1": {"id": "mon-1", "name": "My Monitor"}, + }, + }, + resources_arg=["monitors"], + ) + result = handler._discover_missing_dependencies() + assert result == set() + + +def test_discover_present_dep_excluded(import_test): + """Dep already in source state is excluded from missing set.""" + handler, config = import_test + setup_state( + config, + { + "dashboard_lists": { + "dl-1": {"id": "dl-1", "dashboards": [{"id": "dash-1"}]}, + }, + "dashboards": { + "dash-1": {"id": "dash-1", "title": "Present"}, + }, + }, + resources_arg=["dashboard_lists"], + ) + result = handler._discover_missing_dependencies() + assert ("dashboards", "dash-1") not in result + + +def test_empty_attr_path_produces_no_deps(import_test): + """resource_connections with empty attr-path list produces no spurious deps.""" + handler, config = import_test + setup_state( + config, + { + "dashboard_lists": { + "dl-1": {"id": "dl-1", "dashboards": [{"id": "dash-1"}]}, + }, + }, + resources_arg=["dashboard_lists"], + ) + r_class = config.resources["dashboard_lists"] + original_connections = r_class.resource_config.resource_connections + r_class.resource_config.resource_connections = {"dashboards": []} + try: + result = handler._discover_missing_dependencies() + finally: + r_class.resource_config.resource_connections = original_connections + assert result == set() + + +# --------------------------------------------------------------------------- +# Cycle I — Enforcement: every custom connect_id must have extract_source_ids +# --------------------------------------------------------------------------- + + +def test_extract_source_ids_overrides_complete(config): + """Every resource with non-trivial connect_id also has extract_source_ids override. + + Classes where base extract_source_ids is verified sufficient are allowlisted. + These include: + - Classes whose connect_id just calls super() (delegate-to-super) + - Classes with custom connect_id logic where base extraction still works + """ + from datadog_sync.utils.base_resource import BaseResource + + # Classes where base extract_source_ids is verified sufficient. + # Delegate-to-super: their connect_id just calls super().connect_id(). + # Custom-but-safe: custom connect_id logic, but base extract_source_ids works. + safe_without_override = { + # Delegate-to-super (connect_id just calls super().connect_id) + "dashboards", + "dashboard_lists", + "powerpacks", + "downtimes", + "downtime_schedules", + "users", + "authn_mappings", + "slo_corrections", + "logs_restriction_queries", + "logs_archives_order", + "sensitive_data_scanner_groups_order", + "sensitive_data_scanner_rules", + "synthetics_private_locations", + "teams", + # Custom connect_id but base extract is sufficient + "logs_indexes_order", # Custom remapping; base returns correct raw names + "synthetics_global_variables", # Composite key handled by _dep_in_source_state + "synthetics_test_suites", # Same as synthetics_global_variables + "service_level_objectives", # Empty synthetics_tests paths; base works for monitors + "logs_pipelines_order", # Invalid-entry filtering is a sync concern only + } + for rt_name, r_class in config.resources.items(): + has_custom_connect = type(r_class).connect_id is not BaseResource.connect_id + has_custom_extract = type(r_class).extract_source_ids is not BaseResource.extract_source_ids + if has_custom_connect and not has_custom_extract: + assert rt_name in safe_without_override, ( + f"{rt_name} has custom connect_id but no extract_source_ids override " + f"and is not in the verified-safe allowlist" + ) From bb911187cc7a402bbed23b771da2bae5547c5406 Mon Sep 17 00:00:00 2001 From: "michael.richey" Date: Mon, 27 Apr 2026 15:33:48 -0400 Subject: [PATCH 3/5] fix(import): correct mobile app dep type + harden test fixture isolation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P2: synthetics_tests referenceId was emitted under the wrong dep type. extract_source_ids (and connect_id) internally called super() with "synthetics_mobile_applications" but _source_dependencies_for_resource always tagged the result with the outer resource_connections dict key ("synthetics_mobile_applications_versions"). Import would then try to fetch the application ID from the versions endpoint. Fix: add "options.mobileApplication.referenceId" as a path under synthetics_mobile_applications in resource_connections, and split both connect_id and extract_source_ids into two explicit referenceId branches: - synthetics_mobile_applications + referenceType=latest → handle - synthetics_mobile_applications_versions + referenceType=latest → [] (and vice versa for pinned references). Each branch now produces the correct dep type without needing cross-type redirection. P3: import_test fixture (module-scoped config) did not save/restore destination state or force_missing_dependencies. Tests mutating either leaked state into later tests. Fix uses deepcopy to snapshot both source and destination state before each test and fully restores all mutated fields in the teardown. Co-Authored-By: Claude Sonnet 4.6 --- datadog_sync/model/synthetics_tests.py | 26 +++++-- tests/unit/test_import_force_missing_deps.py | 77 ++++++++++++++++++-- 2 files changed, 89 insertions(+), 14 deletions(-) diff --git a/datadog_sync/model/synthetics_tests.py b/datadog_sync/model/synthetics_tests.py index 3a3e9422..5bfb2d7d 100644 --- a/datadog_sync/model/synthetics_tests.py +++ b/datadog_sync/model/synthetics_tests.py @@ -36,6 +36,7 @@ class SyntheticsTests(BaseResource): "rum_applications": ["options.rumSettings.applicationId"], "synthetics_mobile_applications": [ "options.mobileApplication.applicationId", + "options.mobileApplication.referenceId", ], "synthetics_mobile_applications_versions": [ "mobileApplicationsVersions", @@ -406,11 +407,15 @@ def connect_id(self, key: str, r_obj: Dict, resource_to_connect: str) -> Optiona else: failed_connections.append(_id) return failed_connections + elif resource_to_connect == "synthetics_mobile_applications" and key == "referenceId": + # referenceId is an application ID only when referenceType is "latest". + if r_obj.get("referenceType") == "latest": + return super(SyntheticsTests, self).connect_id(key, r_obj, resource_to_connect) + return [] elif resource_to_connect == "synthetics_mobile_applications_versions" and key == "referenceId": - # When referenceType is "latest", referenceId contains the application ID, not a version ID. - # Connect it against synthetics_mobile_applications instead. + # referenceId is a version ID only when referenceType is not "latest". if r_obj.get("referenceType") == "latest": - return super(SyntheticsTests, self).connect_id(key, r_obj, "synthetics_mobile_applications") + return [] return super(SyntheticsTests, self).connect_id(key, r_obj, resource_to_connect) else: return super(SyntheticsTests, self).connect_id(key, r_obj, resource_to_connect) @@ -419,15 +424,20 @@ def extract_source_ids(self, key: str, r_obj: Dict, resource_to_connect: str) -> # Mirror of connect_id -- keep in sync when connect_id changes. # Only synthetics_private_locations and mobile application versions need special handling. # rum_applications, synthetics_tests (subtests), synthetics_global_variables, roles, and - # synthetics_mobile_applications all use plain IDs at the leaf — base extract_source_ids - # handles them. For synthetics_tests subtests, _dep_in_source_state handles composite key - # prefix matching ('{public_id}#{monitor_id}' keys). + # synthetics_mobile_applications (applicationId key) all use plain IDs at the leaf — + # base extract_source_ids handles them. For synthetics_tests subtests, _dep_in_source_state + # handles composite key prefix matching ('{public_id}#{monitor_id}' keys). if resource_to_connect == "synthetics_private_locations": pl = self.config.resources["synthetics_private_locations"] return [str(_id) for _id in r_obj[key] if pl.pl_id_regex.match(str(_id))] + elif resource_to_connect == "synthetics_mobile_applications" and key == "referenceId": + # referenceId is an application ID only when referenceType is "latest". + if r_obj.get("referenceType") == "latest": + return super(SyntheticsTests, self).extract_source_ids(key, r_obj, resource_to_connect) + return [] elif resource_to_connect == "synthetics_mobile_applications_versions" and key == "referenceId": - # When referenceType is "latest", referenceId contains the application ID, not a version ID. + # referenceId is a version ID only when referenceType is not "latest". if r_obj.get("referenceType") == "latest": - return super(SyntheticsTests, self).extract_source_ids(key, r_obj, "synthetics_mobile_applications") + return [] return super(SyntheticsTests, self).extract_source_ids(key, r_obj, resource_to_connect) return super(SyntheticsTests, self).extract_source_ids(key, r_obj, resource_to_connect) diff --git a/tests/unit/test_import_force_missing_deps.py b/tests/unit/test_import_force_missing_deps.py index c1126436..4d2548d5 100644 --- a/tests/unit/test_import_force_missing_deps.py +++ b/tests/unit/test_import_force_missing_deps.py @@ -4,6 +4,7 @@ # Copyright 2019 Datadog, Inc. import asyncio +from copy import deepcopy import pytest from unittest.mock import AsyncMock, MagicMock, patch @@ -18,7 +19,13 @@ def import_test(config): """Provides (handler, config) with full state isolation. Saves config.filters, config.filter_operator, config.resources_arg, - config.logger, and all source state before each test; restores after. + config.logger, config.force_missing_dependencies, and all source AND + destination state before each test; restores after. + + Uses deepcopy to avoid nested-dict aliasing so that tests mutating + destination state (e.g., Bug #1 test) don't leak into later tests. + config is module-scoped, so this fixture must fully restore any field + that tests may mutate. Replaces config.logger with a Log instance so that logger calls with custom kwargs (resource_type=, _id=) don't raise TypeError — the @@ -29,9 +36,12 @@ def import_test(config): saved_operator = config.filter_operator saved_resources_arg = config.resources_arg[:] saved_logger = config.logger + saved_force = config.force_missing_dependencies saved_source = {} + saved_destination = {} for rt in config.resources: - saved_source[rt] = dict(config.state.source[rt]) + saved_source[rt] = deepcopy(dict(config.state.source[rt])) + saved_destination[rt] = deepcopy(dict(config.state.destination[rt])) config.logger = Log(verbose=False) handler = ResourcesHandler(config) @@ -42,9 +52,12 @@ def import_test(config): config.filter_operator = saved_operator config.resources_arg = saved_resources_arg config.logger = saved_logger + config.force_missing_dependencies = saved_force for rt in config.resources: config.state.source[rt].clear() config.state.source[rt].update(saved_source.get(rt, {})) + config.state.destination[rt].clear() + config.state.destination[rt].update(saved_destination.get(rt, {})) def setup_state(config, source_resources, resources_arg=None): @@ -771,13 +784,65 @@ def test_synthetics_tests_extract_source_ids_pl_filter(import_test): def test_synthetics_tests_extract_source_ids_mobile_latest(import_test): - """SyntheticsTests: referenceType=latest → extract against synthetics_mobile_applications.""" + """SyntheticsTests: referenceType=latest → dep emitted under synthetics_mobile_applications, not versions.""" _, config = import_test r_class = config.resources["synthetics_tests"] r_obj = {"referenceId": "app-uuid-xyz", "referenceType": "latest"} - result = r_class.extract_source_ids("referenceId", r_obj, "synthetics_mobile_applications_versions") - # For referenceType=latest, should return the ID (delegates to mobile_applications path) - assert result == ["app-uuid-xyz"] + # mobile_applications path returns the ID + assert r_class.extract_source_ids("referenceId", r_obj, "synthetics_mobile_applications") == ["app-uuid-xyz"] + # versions path returns empty — prevents wrong dep type + assert r_class.extract_source_ids("referenceId", r_obj, "synthetics_mobile_applications_versions") == [] + + +def test_synthetics_tests_extract_source_ids_mobile_pinned(import_test): + """SyntheticsTests: referenceType=version → dep emitted under synthetics_mobile_applications_versions, not app.""" + _, config = import_test + r_class = config.resources["synthetics_tests"] + r_obj = {"referenceId": "ver-uuid-abc", "referenceType": "version"} + # versions path returns the ID + assert r_class.extract_source_ids("referenceId", r_obj, "synthetics_mobile_applications_versions") == [ + "ver-uuid-abc" + ] + # mobile_applications path returns empty — prevents wrong dep type + assert r_class.extract_source_ids("referenceId", r_obj, "synthetics_mobile_applications") == [] + + +def test_synthetics_tests_connect_id_mobile_latest(import_test): + """SyntheticsTests connect_id: referenceType=latest remaps against mobile_applications, skips versions.""" + _, config = import_test + r_class = config.resources["synthetics_tests"] + config.state.destination["synthetics_mobile_applications"]["app-src-id"] = {"id": "app-dest-id"} + + r_obj = {"referenceId": "app-src-id", "referenceType": "latest"} + # mobile_applications path remaps successfully + failed = r_class.connect_id("referenceId", r_obj, "synthetics_mobile_applications") + assert failed == [] + assert r_obj["referenceId"] == "app-dest-id" + + r_obj2 = {"referenceId": "app-src-id", "referenceType": "latest"} + # versions path is a no-op + failed2 = r_class.connect_id("referenceId", r_obj2, "synthetics_mobile_applications_versions") + assert failed2 == [] + assert r_obj2["referenceId"] == "app-src-id" # unchanged + + +def test_synthetics_tests_connect_id_mobile_pinned(import_test): + """SyntheticsTests connect_id: referenceType=version remaps against versions, skips mobile_applications.""" + _, config = import_test + r_class = config.resources["synthetics_tests"] + config.state.destination["synthetics_mobile_applications_versions"]["ver-src-id"] = {"id": "ver-dest-id"} + + r_obj = {"referenceId": "ver-src-id", "referenceType": "version"} + # versions path remaps successfully + failed = r_class.connect_id("referenceId", r_obj, "synthetics_mobile_applications_versions") + assert failed == [] + assert r_obj["referenceId"] == "ver-dest-id" + + r_obj2 = {"referenceId": "ver-src-id", "referenceType": "version"} + # mobile_applications path is a no-op + failed2 = r_class.connect_id("referenceId", r_obj2, "synthetics_mobile_applications") + assert failed2 == [] + assert r_obj2["referenceId"] == "ver-src-id" # unchanged # --------------------------------------------------------------------------- From c224d83206596bfa6c5c66b3628b5c594737d17f Mon Sep 17 00:00:00 2001 From: "michael.richey" Date: Mon, 27 Apr 2026 15:59:16 -0400 Subject: [PATCH 4/5] fix(sync): use canonical source key in BFS and fix SLO synthetics monitor ID discovery - Replace _dep_in_source_state (bool) with _source_state_key (Optional[str]) so BFS queues the actual dict key (e.g. 'pub#999') rather than the bare public_id, preventing KeyError in _source_dependencies_for_resource - Add SLO extract_source_ids override that mirrors connect_id's suffix-match logic, filtering out monitor_ids that are backed by synthetics_tests to prevent false ("monitors", id) missing-dep entries - Update enforcement test allowlist: remove service_level_objectives now that it has a custom extract_source_ids override - Rename 4 _dep_in_source_state tests to _source_state_key; add 2 regression tests covering the KeyError and SLO synthetics false-miss bugs Co-Authored-By: Claude Sonnet 4.6 --- .../model/service_level_objectives.py | 14 ++++ datadog_sync/utils/resources_handler.py | 35 ++++---- tests/unit/test_import_force_missing_deps.py | 80 ++++++++++++++++--- 3 files changed, 99 insertions(+), 30 deletions(-) diff --git a/datadog_sync/model/service_level_objectives.py b/datadog_sync/model/service_level_objectives.py index 3d0b227b..6a3683b6 100644 --- a/datadog_sync/model/service_level_objectives.py +++ b/datadog_sync/model/service_level_objectives.py @@ -97,3 +97,17 @@ def connect_id(self, key: str, r_obj: Dict, resource_to_connect: str) -> Optiona if not found: failed_connections.append(_id) return failed_connections + + def extract_source_ids(self, key: str, r_obj: Dict, resource_to_connect: str) -> Optional[List[str]]: + # Mirror of connect_id -- keep in sync when connect_id changes. + # connect_id checks each monitor_id against monitors destination state first, + # then falls back to synthetics_tests using suffix match on composite keys + # ('{public_id}#{monitor_id}'). For source discovery, exclude IDs that are + # synthetics monitor IDs to prevent false ("monitors", id) misses. + if key != "monitor_ids": + return super().extract_source_ids(key, r_obj, resource_to_connect) + ids = [str(obj) for obj in r_obj[key]] + if resource_to_connect == "monitors": + synthetics = self.config.state.source["synthetics_tests"] + return [_id for _id in ids if not any(k.endswith("#" + _id) for k in synthetics)] + return super().extract_source_ids(key, r_obj, resource_to_connect) diff --git a/datadog_sync/utils/resources_handler.py b/datadog_sync/utils/resources_handler.py index 1db77b4b..68741edb 100644 --- a/datadog_sync/utils/resources_handler.py +++ b/datadog_sync/utils/resources_handler.py @@ -462,25 +462,23 @@ async def _import_resource(self, q_item: List) -> None: self.config.logger.error(f"error while importing resource: resource_type:{resource_type} id:{_id}") self.config.logger.debug(f"error detail: {str(e)}", resource_type=resource_type) - def _dep_in_source_state(self, dep_type: str, dep_id: str) -> bool: - """Check if a dependency exists in source state. + def _source_state_key(self, dep_type: str, dep_id: str) -> Optional[str]: + """Resolve a dependency ID to its canonical source-state key. + + Returns the actual key in config.state.source[dep_type], or None if + the dependency is not present in source state. Handles composite keys: synthetics_tests uses '{public_id}#{monitor_id}' keys in source state, so an exact match on a bare public_id fails. - Prefix matching resolves this. - - Note: ServiceLevelObjectives.connect_id uses k.endswith(_id) for its - synthetics lookup, but SLO's resource_connections has - "synthetics_tests": [] (empty attr list), so find_attr is never called - for that path and this method is never invoked for it. Prefix-only - matching is therefore safe. + Prefix matching resolves the canonical key and ensures the BFS queues + the correct key for subsequent _source_dependencies_for_resource lookups. """ source = self.config.state.source.get(dep_type, {}) if dep_id in source: - return True + return dep_id if dep_type == "synthetics_tests": - return any(k.startswith(dep_id + "#") for k in source) - return False + return next((k for k in source if k.startswith(dep_id + "#")), None) + return None def _source_dependencies_for_resource(self, resource_type: str, _id: str) -> Set[Tuple[str, str]]: """Return all (dep_type, dep_id) referenced by a source resource. @@ -532,8 +530,9 @@ def _discover_missing_dependencies(self) -> Set[Tuple[str, str]]: if not r_class or not r_class.resource_config.resource_connections: continue for dep_type, dep_id in self._source_dependencies_for_resource(rt, _id): - if self._dep_in_source_state(dep_type, dep_id): - queue.append((dep_type, dep_id)) # present — scan for ITS deps + source_key = self._source_state_key(dep_type, dep_id) + if source_key is not None: + queue.append((dep_type, source_key)) # canonical key — scan for ITS deps else: missing.add((dep_type, dep_id)) return missing @@ -578,8 +577,10 @@ async def _import_missing_dep_cb(self, q_item: Tuple[str, str]) -> None: self.config.logger.warning(f"skipping unknown dependency type: {resource_type}", _id=_id) return - # Skip if already imported (best-effort dedup + circular dep safety) - if _id in self.config.state.source[resource_type]: + # Skip if already imported (best-effort dedup + circular dep safety). + # Uses _source_state_key to handle composite keys (e.g., synthetics_tests + # '{public_id}#{monitor_id}') so a bare public_id deduplicates correctly. + if self._source_state_key(resource_type, _id) is not None: return try: @@ -606,7 +607,7 @@ async def _import_missing_dep_cb(self, q_item: Tuple[str, str]) -> None: r_class = self.config.resources[resource_type] if r_class.resource_config.resource_connections: for dep_type, dep_id in self._source_dependencies_for_resource(resource_type, _id): - if not self._dep_in_source_state(dep_type, dep_id): + if self._source_state_key(dep_type, dep_id) is None: self.worker.work_queue.put_nowait((dep_type, dep_id)) except Exception as e: self.config.logger.error( diff --git a/tests/unit/test_import_force_missing_deps.py b/tests/unit/test_import_force_missing_deps.py index 4d2548d5..74e5c3aa 100644 --- a/tests/unit/test_import_force_missing_deps.py +++ b/tests/unit/test_import_force_missing_deps.py @@ -850,31 +850,86 @@ def test_synthetics_tests_connect_id_mobile_pinned(import_test): # --------------------------------------------------------------------------- -def test_dep_in_source_state_exact_match(import_test): - """_dep_in_source_state: exact key match returns True.""" +def test_source_state_key_exact_match(import_test): + """_source_state_key: exact key present → returns that key.""" handler, config = import_test config.state.source["dashboards"]["dash-1"] = {"id": "dash-1"} - assert handler._dep_in_source_state("dashboards", "dash-1") is True + assert handler._source_state_key("dashboards", "dash-1") == "dash-1" -def test_dep_in_source_state_miss(import_test): - """_dep_in_source_state: key absent from source → False.""" +def test_source_state_key_miss(import_test): + """_source_state_key: key absent from source → None.""" handler, config = import_test - assert handler._dep_in_source_state("dashboards", "nonexistent") is False + assert handler._source_state_key("dashboards", "nonexistent") is None -def test_dep_in_source_state_synthetics_prefix_match(import_test): - """_dep_in_source_state: synthetics_tests composite key matched by prefix.""" +def test_source_state_key_synthetics_prefix_match(import_test): + """_source_state_key: bare public_id resolves to composite key.""" handler, config = import_test config.state.source["synthetics_tests"]["abc-public#999"] = {"public_id": "abc-public", "monitor_id": 999} - assert handler._dep_in_source_state("synthetics_tests", "abc-public") is True + assert handler._source_state_key("synthetics_tests", "abc-public") == "abc-public#999" -def test_dep_in_source_state_synthetics_no_false_positive(import_test): - """_dep_in_source_state: 'abc' must NOT match 'abcdef#123' — prefix only.""" +def test_source_state_key_synthetics_no_false_positive(import_test): + """_source_state_key: 'abc' must NOT match 'abcdef#123' — prefix only.""" handler, config = import_test config.state.source["synthetics_tests"]["abcdef#123"] = {"public_id": "abcdef", "monitor_id": 123} - assert handler._dep_in_source_state("synthetics_tests", "abc") is False + assert handler._source_state_key("synthetics_tests", "abc") is None + + +def test_discover_synthetics_present_dep_no_keyerror(import_test): + """BFS walks through synthetics_tests composite key without KeyError. + + Bug #1 regression: _source_state_key now returns the canonical key + ('abc-public#999'), so _source_dependencies_for_resource receives the + correct key and the exact dict lookup doesn't raise KeyError. + """ + handler, config = import_test + setup_state( + config, + { + "synthetics_global_variables": { + "sgv-1": {"id": "sgv-1", "parse_test_public_id": "abc-public"}, + }, + "synthetics_tests": { + "abc-public#999": {"public_id": "abc-public", "monitor_id": 999, "locations": []}, + }, + }, + resources_arg=["synthetics_global_variables"], + ) + + # Must not raise KeyError when BFS walks into abc-public#999 + result = handler._discover_missing_dependencies() + + # The synthetics_test is present — it should NOT be in missing set + assert ("synthetics_tests", "abc-public") not in result + assert ("synthetics_tests", "abc-public#999") not in result + + +def test_slo_monitor_ids_backed_by_synthetics(import_test): + """SLO with monitor_ids backed by a synthetics test must not appear as missing monitor. + + Bug #2 regression: extract_source_ids on SLO now filters out monitor IDs + that correspond to existing synthetics_tests composite keys. + """ + handler, config = import_test + setup_state( + config, + { + "service_level_objectives": { + "slo-1": {"id": "slo-1", "monitor_ids": [999]}, + }, + "synthetics_tests": { + "abc-public#999": {"public_id": "abc-public", "monitor_id": 999, "locations": []}, + }, + }, + resources_arg=["service_level_objectives"], + ) + + result = handler._discover_missing_dependencies() + + # Monitor 999 is backed by a synthetics test — must NOT appear as missing monitor + assert ("monitors", "999") not in result def test_discover_no_connections_empty_result(import_test): @@ -972,7 +1027,6 @@ def test_extract_source_ids_overrides_complete(config): "logs_indexes_order", # Custom remapping; base returns correct raw names "synthetics_global_variables", # Composite key handled by _dep_in_source_state "synthetics_test_suites", # Same as synthetics_global_variables - "service_level_objectives", # Empty synthetics_tests paths; base works for monitors "logs_pipelines_order", # Invalid-entry filtering is a sync concern only } for rt_name, r_class in config.resources.items(): From 6f41167cfd876c10566ec5c4586133161052aa92 Mon Sep 17 00:00:00 2001 From: "michael.richey" Date: Mon, 27 Apr 2026 16:24:31 -0400 Subject: [PATCH 5/5] fix(sync): lazy-load mobile app versions in SyntheticsTests.import_resource - Replace class-level mutable `versions: List = []` with instance-level `self.versions: Optional[List[Dict]] = None` in __init__ to prevent state leaking across instances - Add _ensure_mobile_versions_loaded(client) lazy loader that fetches SyntheticsMobileApplicationsVersions.get_resources() on first call only; uses None sentinel so orgs with zero versions don't refetch every import - Update get_resources() and import_resource() mobile branch to use the lazy loader, fixing the bug where force-missing-dep imports of mobile tests set mobileApplicationsVersions=[] because get_resources() was never called - Add 2 regression tests: lazy load on direct import, and None-vs-[] sentinel (no double fetch for empty version list) Co-Authored-By: Claude Sonnet 4.6 --- datadog_sync/model/synthetics_tests.py | 17 ++++-- tests/unit/test_import_force_missing_deps.py | 60 ++++++++++++++++++++ 2 files changed, 73 insertions(+), 4 deletions(-) diff --git a/datadog_sync/model/synthetics_tests.py b/datadog_sync/model/synthetics_tests.py index 5bfb2d7d..33968aad 100644 --- a/datadog_sync/model/synthetics_tests.py +++ b/datadog_sync/model/synthetics_tests.py @@ -90,7 +90,10 @@ class SyntheticsTests(BaseResource): network_base_path: str = "/api/v2/synthetics/tests/network" network_delete_path: str = "/api/v2/synthetics/tests/bulk-delete" get_params = {"include_metadata": "true"} - versions: List = [] + + def __init__(self, config): + super().__init__(config) + self.versions: Optional[List[Dict]] = None @staticmethod def _unwrap_network_response(resp: Dict) -> Dict: @@ -144,13 +147,18 @@ async def _delete_test(self, client: CustomClient, test_type: str, public_id: st body = {"public_ids": [public_id]} await client.post(self.resource_config.base_path + "/delete", body) + async def _ensure_mobile_versions_loaded(self, client: CustomClient) -> List[Dict]: + if self.versions is None: + versions_resource = SyntheticsMobileApplicationsVersions(self.config) + self.versions = await versions_resource.get_resources(client) + return self.versions + async def get_resources(self, client: CustomClient) -> List[Dict]: resp = await client.get( self.resource_config.base_path, params=self.get_params, ) - versions = SyntheticsMobileApplicationsVersions(self.config) - self.versions = await versions.get_resources(client) + await self._ensure_mobile_versions_loaded(client) return resp["tests"] async def import_resource(self, _id: Optional[str] = None, resource: Optional[Dict] = None) -> Tuple[str, Dict]: @@ -197,9 +205,10 @@ async def import_resource(self, _id: Optional[str] = None, resource: Optional[Di self.mobile_test_path.format(_id), params=self.get_params, ) + mobile_versions = await self._ensure_mobile_versions_loaded(source_client) versions = [ i["id"] - for i in self.versions + for i in mobile_versions if i["application_id"] == resource["options"]["mobileApplication"]["applicationId"] ] resource["mobileApplicationsVersions"] = list(set(versions)) diff --git a/tests/unit/test_import_force_missing_deps.py b/tests/unit/test_import_force_missing_deps.py index 74e5c3aa..b1b996bc 100644 --- a/tests/unit/test_import_force_missing_deps.py +++ b/tests/unit/test_import_force_missing_deps.py @@ -1037,3 +1037,63 @@ def test_extract_source_ids_overrides_complete(config): f"{rt_name} has custom connect_id but no extract_source_ids override " f"and is not in the verified-safe allowlist" ) + + +# --------------------------------------------------------------------------- +# Cycle J — GREEN/GREEN: mobile versions lazy loading regression tests +# --------------------------------------------------------------------------- + + +def test_mobile_synthetics_import_loads_versions_lazily(import_test): + """Mobile test imported without get_resources() still populates mobileApplicationsVersions. + + Regression: when synthetics_tests is imported as a missing dep via _import_missing_dep_cb, + get_resources() is never called, so self.versions was [] and mobileApplicationsVersions + was silently empty. _ensure_mobile_versions_loaded fixes this. + """ + from datadog_sync.model.synthetics_mobile_applications_versions import SyntheticsMobileApplicationsVersions + + _, config = import_test + r_class = config.resources["synthetics_tests"] + r_class.versions = None # ensure clean state + + mobile_resource = { + "public_id": "test-pub-1", + "monitor_id": 42, + "type": "mobile", + "options": {"mobileApplication": {"applicationId": "app-1"}}, + } + config.source_client.get = AsyncMock(return_value=mobile_resource) + + mock_versions = [{"id": "ver-1", "application_id": "app-1"}, {"id": "ver-2", "application_id": "other-app"}] + with patch.object(SyntheticsMobileApplicationsVersions, "get_resources", new=AsyncMock(return_value=mock_versions)): + _key, result = asyncio.run(r_class.import_resource(resource=mobile_resource)) + + assert result["mobileApplicationsVersions"] == ["ver-1"] + assert r_class.versions == mock_versions # lazily populated + + +def test_mobile_versions_not_refetched_when_empty(import_test): + """_ensure_mobile_versions_loaded: org with zero versions sets versions=[] and does not refetch. + + Regression for the None-vs-[] sentinel: if we used `not self.versions` instead of + `self.versions is None`, an org with zero versions would refetch on every import. + """ + from datadog_sync.model.synthetics_mobile_applications_versions import SyntheticsMobileApplicationsVersions + + _, config = import_test + r_class = config.resources["synthetics_tests"] + r_class.versions = None # ensure clean state + + async def run_twice(): + r1 = await r_class._ensure_mobile_versions_loaded(config.source_client) + r2 = await r_class._ensure_mobile_versions_loaded(config.source_client) + return r1, r2 + + mock_get = AsyncMock(return_value=[]) + with patch.object(SyntheticsMobileApplicationsVersions, "get_resources", mock_get): + result1, result2 = asyncio.run(run_twice()) + + assert result1 == [] + assert result2 == [] + mock_get.assert_called_once() # must NOT refetch