From 4bdbc1ec288dbb58fb7fac73a6c7af1364a3c3c5 Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Thu, 14 May 2026 16:25:51 -0700 Subject: [PATCH 1/5] out tests - add out stage tests. squashed commits to rebase onto main Signed-off-by: Alina (Xi) Li --- .../core/operator/stages/out/__init__.py | 0 .../stages/out/test_out_acceptance.py | 542 +++++++++++++ .../out/test_out_document_field_errors.py | 269 +++++++ .../stages/out/test_out_namespace_errors.py | 345 ++++++++ .../out/test_out_stage_argument_errors.py | 380 +++++++++ .../out/test_out_target_restriction_errors.py | 492 ++++++++++++ .../stages/out/test_out_timeseries.py | 601 ++++++++++++++ .../out/test_out_timeseries_bucket_errors.py | 735 ++++++++++++++++++ .../test_out_timeseries_field_type_errors.py | 574 ++++++++++++++ .../out/test_out_timeseries_value_errors.py | 665 ++++++++++++++++ .../stages/out/test_out_write_behavior.py | 437 +++++++++++ .../stages/out/test_out_write_properties.py | 539 +++++++++++++ .../operator/stages/out/utils/__init__.py | 0 .../stages/out/utils/out_test_helpers.py | 42 + .../stages/test_stages_combination_out.py | 374 +++++++++ documentdb_tests/framework/error_codes.py | 9 + 16 files changed, 6004 insertions(+) create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/out/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_acceptance.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_document_field_errors.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_namespace_errors.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_stage_argument_errors.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_target_restriction_errors.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_timeseries.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_timeseries_bucket_errors.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_timeseries_field_type_errors.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_timeseries_value_errors.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_write_behavior.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_write_properties.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/out/utils/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/out/utils/out_test_helpers.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/test_stages_combination_out.py diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/out/__init__.py b/documentdb_tests/compatibility/tests/core/operator/stages/out/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_acceptance.py b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_acceptance.py new file mode 100644 index 00000000..bac34acb --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_acceptance.py @@ -0,0 +1,542 @@ +"""Tests for $out stage - syntax, name acceptance, and options.""" + +from __future__ import annotations + +from datetime import datetime +from typing import Any, cast + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.stages.out.utils.out_test_helpers import ( + OutTestCase, +) +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + populate_collection, +) +from documentdb_tests.framework.assertions import ( + assertResult, + assertSuccess, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Syntax Forms]: $out accepts a string (same-database output), a +# document with db/coll (cross-database output), or a document with db/coll +# and timeseries (time series collection output), and each form writes the +# pipeline results to the specified target. +OUT_SYNTAX_FORMS_TESTS: list[OutTestCase] = [ + OutTestCase( + "string_form_same_database", + docs=[{"_id": 1, "value": 10}, {"_id": 2, "value": 20}], + target_coll="syntax_string_target", + out_spec=None, + expected_type="collection", + expected_options={}, + msg="$out string form should write results to a collection in the same database", + ), + OutTestCase( + "document_form_db_and_coll", + docs=[{"_id": 1, "value": 10}, {"_id": 2, "value": 20}], + target_coll="syntax_doc_target", + out_spec={}, + expected_type="collection", + expected_options={}, + msg="$out document form with db and coll should write results to the specified collection", + ), + OutTestCase( + "document_form_with_timeseries", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "value": 10}], + target_coll="syntax_ts_target", + out_spec={"timeseries": {"timeField": "ts"}}, + expected_type="timeseries", + expected_options={ + "timeseries": { + "timeField": "ts", + "granularity": "seconds", + "bucketMaxSpanSeconds": 3_600, + } + }, + msg="$out document form with timeseries should create a time series collection", + ), +] + +# Property [Null as Absent]: null values for timeseries and its sub-fields +# (metaField, granularity, bucketMaxSpanSeconds, bucketRoundingSeconds) are +# treated as absent, producing the same collection as if the field were omitted. +OUT_NULL_SUCCESS_TESTS: list[OutTestCase] = [ + OutTestCase( + "null_timeseries_regular_collection", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "value": 10}], + target_coll="target_ts_null", + out_spec={"timeseries": None}, + expected_type="collection", + expected_options={}, + msg="$out should treat timeseries null as absent and create a regular collection", + ), + OutTestCase( + "null_meta_field_omitted", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "value": 10}], + target_coll="target_meta_null", + out_spec={"timeseries": {"timeField": "ts", "metaField": None}}, + expected_type="timeseries", + expected_options={ + "timeseries": { + "timeField": "ts", + "granularity": "seconds", + "bucketMaxSpanSeconds": 3_600, + } + }, + msg="$out should treat metaField null as absent and omit it from timeseries options", + ), + OutTestCase( + "null_granularity_defaults_to_seconds", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "value": 10}], + target_coll="target_gran_null", + out_spec={"timeseries": {"timeField": "ts", "granularity": None}}, + expected_type="timeseries", + expected_options={ + "timeseries": { + "timeField": "ts", + "granularity": "seconds", + "bucketMaxSpanSeconds": 3_600, + } + }, + msg="$out should treat granularity null as absent and default to 'seconds'", + ), + OutTestCase( + "null_bucket_params_defaults_to_granularity", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "value": 10}], + target_coll="target_bucket_null", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": None, + "bucketRoundingSeconds": None, + } + }, + expected_type="timeseries", + expected_options={ + "timeseries": { + "timeField": "ts", + "granularity": "seconds", + "bucketMaxSpanSeconds": 3_600, + } + }, + msg=( + "$out should treat null bucketMaxSpanSeconds and bucketRoundingSeconds" + " as absent and default to granularity-based bucketing" + ), + ), +] + +# Property [Collection Name Acceptance]: any non-empty string of non-null +# bytes that does not match a rejection rule is accepted as a collection name. +OUT_COLLECTION_NAME_ACCEPTANCE_TESTS: list[OutTestCase] = [ + OutTestCase( + "control_character", + docs=[{"_id": 1}], + target_coll="\x01", + expected_type="collection", + expected_options={}, + msg="$out should accept a control character as a collection name", + ), + OutTestCase( + "embedded_control_character", + docs=[{"_id": 1}], + target_coll="test\x1fcoll", + expected_type="collection", + expected_options={}, + msg="$out should accept embedded control characters in a collection name", + ), + OutTestCase( + "unicode_no_break_space", + docs=[{"_id": 1}], + target_coll="\u00a0", + expected_type="collection", + expected_options={}, + msg="$out should accept Unicode no-break space as a collection name", + ), + OutTestCase( + "zero_width_space", + docs=[{"_id": 1}], + target_coll="\u200b", + expected_type="collection", + expected_options={}, + msg="$out should accept zero-width space as a collection name", + ), + OutTestCase( + "bom_character", + docs=[{"_id": 1}], + target_coll="\ufeff", + expected_type="collection", + expected_options={}, + msg="$out should accept BOM character as a collection name", + ), + OutTestCase( + "emoji", + docs=[{"_id": 1}], + target_coll="\U0001f389", + expected_type="collection", + expected_options={}, + msg="$out should accept emoji as a collection name", + ), + OutTestCase( + "cjk_characters", + docs=[{"_id": 1}], + target_coll="\u4e2d\u6587", + expected_type="collection", + expected_options={}, + msg="$out should accept CJK characters as a collection name", + ), + OutTestCase( + "punctuation", + docs=[{"_id": 1}], + target_coll="a!@#b", + expected_type="collection", + expected_options={}, + msg="$out should accept punctuation in a collection name", + ), + OutTestCase( + "single_character", + docs=[{"_id": 1}], + target_coll="a", + expected_type="collection", + expected_options={}, + msg="$out should accept a single-character collection name", + ), + OutTestCase( + "single_digit", + docs=[{"_id": 1}], + target_coll="1", + expected_type="collection", + expected_options={}, + msg="$out should accept a single-digit collection name", + ), + OutTestCase( + "digits_only", + docs=[{"_id": 1}], + target_coll="123", + expected_type="collection", + expected_options={}, + msg="$out should accept a digits-only collection name", + ), + OutTestCase( + "temp_prefix", + docs=[{"_id": 1}], + target_coll="tmp.agg_out.", + expected_type="collection", + expected_options={}, + msg="$out should accept the tmp.agg_out. prefix as a regular collection name", + ), +] + +OUT_ACCEPTANCE_TESTS = ( + OUT_SYNTAX_FORMS_TESTS + OUT_NULL_SUCCESS_TESTS + OUT_COLLECTION_NAME_ACCEPTANCE_TESTS +) + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_ACCEPTANCE_TESTS)) +def test_out_acceptance(collection, test_case: OutTestCase): + """Test $out writes results and creates the correct collection type.""" + populate_collection(collection, test_case) + out_stage = test_case.build_out_stage(collection) + execute_command( + collection, + {"aggregate": collection.name, "pipeline": [out_stage], "cursor": {}}, + ) + result = execute_command( + collection, + {"listCollections": 1, "filter": {"name": test_case.target_coll}}, + ) + raw_doc = cast(dict, result)["cursor"]["firstBatch"][0] + expected_info: dict[str, Any] = { + "name": test_case.target_coll, + "type": test_case.expected_type, + "options": test_case.expected_options, + "info": raw_doc["info"], + } + if "idIndex" in raw_doc: + expected_info["idIndex"] = raw_doc["idIndex"] + assertSuccess(result, [expected_info], msg=test_case.msg) + + +# Property [Nested Pipeline Restriction - View Source]: $out from a view +# source (not in the view definition) succeeds. +OUT_VIEW_SOURCE_SUCCESS_TESTS: list[OutTestCase] = [ + OutTestCase( + "view_source_out", + docs=[{"_id": 1, "value": 10}], + target_coll="view_source_out_target", + pipeline=[{"$out": "view_source_out_target"}], + setup=lambda c: ( + c.database.drop_collection("good_view_for_out"), + c.database.command( + { + "create": "good_view_for_out", + "viewOn": c.name, + "pipeline": [{"$match": {"_id": 1}}], + } + ), + ), + expected=[{"_id": 1, "value": 10}], + msg="$out from a view source should write the view's results to the target collection", + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_VIEW_SOURCE_SUCCESS_TESTS)) +def test_out_from_view_source_succeeds(collection, test_case: OutTestCase): + """Test $out from a view source succeeds.""" + populate_collection(collection, test_case) + if test_case.setup: + test_case.setup(collection) + db = collection.database + execute_command( + db["good_view_for_out"], + { + "aggregate": "good_view_for_out", + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + result = execute_command( + collection, + {"find": test_case.target_coll, "filter": {}}, + ) + assertResult(result, expected=test_case.expected, msg=test_case.msg) + + +# Property [Aggregation Options]: standard aggregation options (collation, +# hint, maxTimeMS, allowDiskUse, bypassDocumentValidation) are accepted +# with $out pipelines. +OUT_AGGREGATION_OPTION_SUCCESS_TESTS: list[OutTestCase] = [ + OutTestCase( + "agg_opts_collation", + docs=[{"_id": 1, "value": 10}], + target_coll="agg_opts_target", + pipeline=[{"$out": "agg_opts_target"}], + out_spec={"collation": {"locale": "en", "strength": 2}}, + msg="$out should succeed with aggregation option collation", + ), + OutTestCase( + "agg_opts_hint", + docs=[{"_id": 1, "value": 10}], + target_coll="agg_opts_target", + pipeline=[{"$out": "agg_opts_target"}], + out_spec={"hint": "_id_"}, + msg="$out should succeed with aggregation option hint", + ), + OutTestCase( + "agg_opts_max_time_ms", + docs=[{"_id": 1, "value": 10}], + target_coll="agg_opts_target", + pipeline=[{"$out": "agg_opts_target"}], + out_spec={"maxTimeMS": 60_000}, + msg="$out should succeed with aggregation option maxTimeMS", + ), + OutTestCase( + "agg_opts_allow_disk_use", + docs=[{"_id": 1, "value": 10}], + target_coll="agg_opts_target", + pipeline=[{"$out": "agg_opts_target"}], + out_spec={"allowDiskUse": True}, + msg="$out should succeed with aggregation option allowDiskUse", + ), + OutTestCase( + "agg_opts_bypass_doc_validation", + docs=[{"_id": 1, "value": 10}], + target_coll="agg_opts_target", + pipeline=[{"$out": "agg_opts_target"}], + out_spec={"bypassDocumentValidation": True}, + msg="$out should succeed with aggregation option bypassDocumentValidation", + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_AGGREGATION_OPTION_SUCCESS_TESTS)) +def test_out_aggregation_options(collection, test_case: OutTestCase): + """Test $out succeeds with standard aggregation options.""" + populate_collection(collection, test_case) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + **test_case.out_spec, + }, + ) + assertSuccess( + result, + [], + msg=test_case.msg, + ) + + +# Property [Read Concern Acceptance]: non-linearizable read concerns +# (majority, local, available) are accepted with $out pipelines. +OUT_READ_CONCERN_ACCEPTANCE_TESTS: list[OutTestCase] = [ + OutTestCase( + "rc_majority", + docs=[{"_id": 1, "value": 10}], + target_coll="rc_majority_target", + pipeline=[{"$out": "rc_majority_target"}], + out_spec={"readConcern": "majority"}, + msg="$out should succeed with readConcern level 'majority'", + ), + OutTestCase( + "rc_local", + docs=[{"_id": 1, "value": 10}], + target_coll="rc_local_target", + pipeline=[{"$out": "rc_local_target"}], + out_spec={"readConcern": "local"}, + msg="$out should succeed with readConcern level 'local'", + ), + OutTestCase( + "rc_available", + docs=[{"_id": 1, "value": 10}], + target_coll="rc_available_target", + pipeline=[{"$out": "rc_available_target"}], + out_spec={"readConcern": "available"}, + msg="$out should succeed with readConcern level 'available'", + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_READ_CONCERN_ACCEPTANCE_TESTS)) +def test_out_read_concern_acceptance(collection, test_case: OutTestCase): + """Test $out succeeds with non-linearizable read concern levels.""" + populate_collection(collection, test_case) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + "readConcern": {"level": test_case.out_spec["readConcern"]}, + }, + ) + assertSuccess( + result, + [], + msg=test_case.msg, + ) + + +# Property [Schema Validation Success]: when the target collection has +# validationAction set to warn the write succeeds, and +# bypassDocumentValidation bypasses schema validation errors. +OUT_SCHEMA_VALIDATION_SUCCESS_TESTS: list[OutTestCase] = [ + OutTestCase( + "schema_val_warn", + docs=[{"_id": 1, "value": "not_a_number"}], + target_coll="schema_val_warn_target", + pipeline=[{"$out": "schema_val_warn_target"}], + out_spec={"bypassDocumentValidation": False}, + setup=lambda c: ( + c.database.drop_collection("schema_val_warn_target"), + c.database.command( + { + "create": "schema_val_warn_target", + "validator": { + "$jsonSchema": { + "bsonType": "object", + "required": ["value"], + "properties": {"value": {"bsonType": "int"}}, + } + }, + "validationAction": "warn", + } + ), + ), + expected=[{"_id": 1, "value": "not_a_number"}], + msg="$out should succeed with validationAction='warn'", + ), + OutTestCase( + "schema_val_bypass", + docs=[{"_id": 1, "value": "not_a_number"}], + target_coll="schema_val_bypass_target", + pipeline=[{"$out": "schema_val_bypass_target"}], + out_spec={"bypassDocumentValidation": True}, + setup=lambda c: ( + c.database.drop_collection("schema_val_bypass_target"), + c.database.command( + { + "create": "schema_val_bypass_target", + "validator": { + "$jsonSchema": { + "bsonType": "object", + "required": ["value"], + "properties": {"value": {"bsonType": "int"}}, + } + }, + "validationAction": "error", + } + ), + ), + expected=[{"_id": 1, "value": "not_a_number"}], + msg="$out should succeed with bypassDocumentValidation=True", + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_SCHEMA_VALIDATION_SUCCESS_TESTS)) +def test_out_schema_validation_success(collection, test_case: OutTestCase): + """Test $out succeeds when schema validation is warn or bypassed.""" + populate_collection(collection, test_case) + if test_case.setup: + test_case.setup(collection) + cmd: dict[str, Any] = { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + } + if test_case.out_spec["bypassDocumentValidation"]: + cmd["bypassDocumentValidation"] = True + execute_command(collection, cmd) + result = execute_command( + collection, + {"find": test_case.target_coll, "filter": {}, "projection": {"_id": 1, "value": 1}}, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg) + + +# Property [Index Constraint Errors - Nonexistent Target]: when a unique +# index violation occurs writing to a nonexistent target, the target +# collection is not created. +OUT_INDEX_NONEXISTENT_TARGET_NOT_CREATED_TESTS: list[OutTestCase] = [ + OutTestCase( + "idx_nonexist_not_created", + docs=[{"_id": 1, "x": 1}, {"_id": 2, "x": 2}], + target_coll="idx_nonexist_target", + pipeline=[ + {"$unset": "_id"}, + {"$addFields": {"_id": "same"}}, + {"$out": "idx_nonexist_target"}, + ], + expected=[], + msg="$out should not create the target collection when a unique index violation occurs", + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_INDEX_NONEXISTENT_TARGET_NOT_CREATED_TESTS)) +def test_out_unique_violation_nonexistent_target_not_created(collection, test_case: OutTestCase): + """Test $out does not create the target when a unique index violation occurs.""" + populate_collection(collection, test_case) + collection.database.drop_collection(test_case.target_coll) + execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + result = execute_command( + collection, + {"listCollections": 1, "filter": {"name": test_case.target_coll}}, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_document_field_errors.py b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_document_field_errors.py new file mode 100644 index 00000000..d4e63f4a --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_document_field_errors.py @@ -0,0 +1,269 @@ +"""Tests for $out stage - document field type errors.""" + +from __future__ import annotations + +from datetime import datetime + +import pytest +from bson import ( + Binary, + Code, + Decimal128, + Int64, + MaxKey, + MinKey, + ObjectId, + Regex, + Timestamp, +) + +from documentdb_tests.compatibility.tests.core.operator.stages.out.utils.out_test_helpers import ( + OutTestCase, +) +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + populate_collection, +) +from documentdb_tests.framework.assertions import ( + assertResult, +) +from documentdb_tests.framework.error_codes import ( + TYPE_MISMATCH_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Document Form Field Type Errors]: non-string types for db or +# coll in document form produce a type mismatch error, with db checked +# before coll when both have type errors. +OUT_DOCUMENT_FIELD_TYPE_ERROR_TESTS: list[OutTestCase] = [ + OutTestCase( + "db_type_int32", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": 42, "coll": "target"}}], + msg="$out should reject int32 db as a type mismatch", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "db_type_bool", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": True, "coll": "target"}}], + msg="$out should reject bool db as a type mismatch", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "db_type_array", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": ["test"], "coll": "target"}}], + msg="$out should reject array db as a type mismatch", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "db_type_object", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": {"x": 1}, "coll": "target"}}], + msg="$out should reject object db as a type mismatch", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "db_type_int64", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": Int64(1), "coll": "target"}}], + msg="$out should reject Int64 db as a type mismatch", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "db_type_double", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": 1.0, "coll": "target"}}], + msg="$out should reject double db as a type mismatch", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "db_type_decimal128", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": Decimal128("1"), "coll": "target"}}], + msg="$out should reject Decimal128 db as a type mismatch", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "db_type_objectid", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": ObjectId(), "coll": "target"}}], + msg="$out should reject ObjectId db as a type mismatch", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "db_type_datetime", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": datetime(2024, 1, 1), "coll": "target"}}], + msg="$out should reject datetime db as a type mismatch", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "db_type_binary", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": Binary(b"\x01"), "coll": "target"}}], + msg="$out should reject Binary db as a type mismatch", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "db_type_regex", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": Regex("abc"), "coll": "target"}}], + msg="$out should reject Regex db as a type mismatch", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "db_type_timestamp", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": Timestamp(1, 1), "coll": "target"}}], + msg="$out should reject Timestamp db as a type mismatch", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "db_type_minkey", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": MinKey(), "coll": "target"}}], + msg="$out should reject MinKey db as a type mismatch", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "db_type_maxkey", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": MaxKey(), "coll": "target"}}], + msg="$out should reject MaxKey db as a type mismatch", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "db_type_code", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": Code("function() {}"), "coll": "target"}}], + msg="$out should reject Code db as a type mismatch", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "coll_type_int32", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": 42}}], + msg="$out should reject int32 coll as a type mismatch", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "coll_type_bool", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": True}}], + msg="$out should reject bool coll as a type mismatch", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "coll_type_array", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": ["target"]}}], + msg="$out should reject array coll as a type mismatch", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "coll_type_object", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": {"x": 1}}}], + msg="$out should reject object coll as a type mismatch", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "coll_type_int64", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": Int64(1)}}], + msg="$out should reject Int64 coll as a type mismatch", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "coll_type_double", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": 1.0}}], + msg="$out should reject double coll as a type mismatch", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "coll_type_decimal128", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": Decimal128("1")}}], + msg="$out should reject Decimal128 coll as a type mismatch", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "coll_type_objectid", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": ObjectId()}}], + msg="$out should reject ObjectId coll as a type mismatch", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "coll_type_datetime", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": datetime(2024, 1, 1)}}], + msg="$out should reject datetime coll as a type mismatch", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "coll_type_binary", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": Binary(b"\x01")}}], + msg="$out should reject Binary coll as a type mismatch", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "coll_type_regex", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": Regex("abc")}}], + msg="$out should reject Regex coll as a type mismatch", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "coll_type_timestamp", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": Timestamp(1, 1)}}], + msg="$out should reject Timestamp coll as a type mismatch", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "coll_type_minkey", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": MinKey()}}], + msg="$out should reject MinKey coll as a type mismatch", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "coll_type_maxkey", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": MaxKey()}}], + msg="$out should reject MaxKey coll as a type mismatch", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "coll_type_code", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": Code("function() {}")}}], + msg="$out should reject Code coll as a type mismatch", + error_code=TYPE_MISMATCH_ERROR, + ), +] + +# Property [Document Form Unknown Fields]: any field other than db, coll, +# and timeseries in the document form is rejected as an unknown field, and +# field name matching is case-sensitive and whitespace-sensitive. + + +OUT_DOCUMENT_FIELD_ERROR_TESTS = OUT_DOCUMENT_FIELD_TYPE_ERROR_TESTS + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_DOCUMENT_FIELD_ERROR_TESTS)) +def test_out_error(collection, test_case: OutTestCase): + """Test $out rejects invalid configurations with the expected error code.""" + populate_collection(collection, test_case) + pipeline = test_case.pipeline + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": pipeline, "cursor": {}}, + ) + assertResult(result, error_code=test_case.error_code, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_namespace_errors.py b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_namespace_errors.py new file mode 100644 index 00000000..dffaad88 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_namespace_errors.py @@ -0,0 +1,345 @@ +"""Tests for $out stage - namespace and pipeline position errors.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.stages.out.utils.out_test_helpers import ( + OutTestCase, +) +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + populate_collection, +) +from documentdb_tests.framework.assertions import ( + assertResult, +) +from documentdb_tests.framework.error_codes import ( + FACET_PIPELINE_INVALID_STAGE_ERROR, + ILLEGAL_OPERATION_ERROR, + INVALID_NAMESPACE_ERROR, + LOOKUP_SUB_PIPELINE_NOT_ALLOWED_ERROR, + OUT_NOT_LAST_STAGE_ERROR, + OUT_RESTRICTED_DATABASE_ERROR, + OUT_SPECIAL_COLLECTION_ERROR, + UNION_WITH_SUB_PIPELINE_NOT_ALLOWED_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Collection Name Validation Errors]: invalid collection names +# are rejected with the appropriate error code based on the violation type, +# with namespace errors taking precedence over illegal operation errors. +OUT_COLLECTION_NAME_VALIDATION_ERROR_TESTS: list[OutTestCase] = [ + OutTestCase( + "coll_empty_string", + docs=[{"_id": 1}], + pipeline=[{"$out": ""}], + msg="$out should reject empty string collection name as invalid namespace", + error_code=INVALID_NAMESPACE_ERROR, + ), + OutTestCase( + "coll_null_byte", + docs=[{"_id": 1}], + pipeline=[{"$out": "test\x00coll"}], + msg="$out should reject collection name containing null byte as invalid namespace", + error_code=INVALID_NAMESPACE_ERROR, + ), + OutTestCase( + "coll_leading_dot", + docs=[{"_id": 1}], + pipeline=[{"$out": ".leading"}], + msg="$out should reject collection name with leading dot as invalid namespace", + error_code=INVALID_NAMESPACE_ERROR, + ), + OutTestCase( + "coll_system_prefix", + docs=[{"_id": 1}], + pipeline=[{"$out": "system.test"}], + msg="$out should reject system. prefix collection name as a special collection", + error_code=OUT_SPECIAL_COLLECTION_ERROR, + ), + OutTestCase( + "coll_system_buckets_prefix", + docs=[{"_id": 1}], + pipeline=[{"$out": "system.buckets.test"}], + msg="$out should reject system.buckets. prefix as a special collection", + error_code=OUT_SPECIAL_COLLECTION_ERROR, + ), + OutTestCase( + "coll_dollar_prefix", + docs=[{"_id": 1}], + pipeline=[{"$out": "$name"}], + msg="$out should reject dollar-prefixed collection name as illegal operation", + error_code=ILLEGAL_OPERATION_ERROR, + ), + OutTestCase( + "coll_double_dollar_prefix", + docs=[{"_id": 1}], + pipeline=[{"$out": "$$name"}], + msg="$out should reject double-dollar-prefixed collection name as illegal operation", + error_code=ILLEGAL_OPERATION_ERROR, + ), + OutTestCase( + "coll_bare_dollar", + docs=[{"_id": 1}], + pipeline=[{"$out": "$"}], + msg="$out should reject bare dollar collection name as illegal operation", + error_code=ILLEGAL_OPERATION_ERROR, + ), + OutTestCase( + "coll_bare_double_dollar", + docs=[{"_id": 1}], + pipeline=[{"$out": "$$"}], + msg="$out should reject bare double-dollar collection name as illegal operation", + error_code=ILLEGAL_OPERATION_ERROR, + ), + OutTestCase( + "coll_namespace_exceeds_255_bytes", + docs=[{"_id": 1}], + pipeline=[{"$out": "a" * 255}], + msg="$out should reject namespace exceeding 255 bytes as illegal operation", + error_code=ILLEGAL_OPERATION_ERROR, + ), +] + +# Property [Database Name Validation Errors]: invalid database names +# containing dots, slashes, backslashes, ASCII spaces, null bytes, dollar +# prefixes, or empty strings are rejected as invalid namespaces. +OUT_DATABASE_NAME_VALIDATION_ERROR_TESTS: list[OutTestCase] = [ + OutTestCase( + "db_empty_string", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "", "coll": "target"}}], + msg="$out should reject empty string database name", + error_code=INVALID_NAMESPACE_ERROR, + ), + OutTestCase( + "db_leading_dot", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": ".leading", "coll": "target"}}], + msg="$out should reject database name with leading dot", + error_code=INVALID_NAMESPACE_ERROR, + ), + OutTestCase( + "db_middle_dot", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "a.b", "coll": "target"}}], + msg="$out should reject database name with middle dot", + error_code=INVALID_NAMESPACE_ERROR, + ), + OutTestCase( + "db_trailing_dot", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "trailing.", "coll": "target"}}], + msg="$out should reject database name with trailing dot", + error_code=INVALID_NAMESPACE_ERROR, + ), + OutTestCase( + "db_dollar_prefix", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "$name", "coll": "target"}}], + msg="$out should reject dollar-prefixed database name", + error_code=INVALID_NAMESPACE_ERROR, + ), + OutTestCase( + "db_bare_dollar", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "$", "coll": "target"}}], + msg="$out should reject bare dollar database name", + error_code=INVALID_NAMESPACE_ERROR, + ), + OutTestCase( + "db_bare_double_dollar", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "$$", "coll": "target"}}], + msg="$out should reject bare double-dollar database name", + error_code=INVALID_NAMESPACE_ERROR, + ), + OutTestCase( + "db_null_byte", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test\x00db", "coll": "target"}}], + msg="$out should reject database name containing null byte", + error_code=INVALID_NAMESPACE_ERROR, + ), + OutTestCase( + "db_slash", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "a/b", "coll": "target"}}], + msg="$out should reject database name containing slash", + error_code=INVALID_NAMESPACE_ERROR, + ), + OutTestCase( + "db_backslash", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "a\\b", "coll": "target"}}], + msg="$out should reject database name containing backslash", + error_code=INVALID_NAMESPACE_ERROR, + ), + OutTestCase( + "db_ascii_space", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": " ", "coll": "target"}}], + msg="$out should reject database name that is a single ASCII space", + error_code=INVALID_NAMESPACE_ERROR, + ), + OutTestCase( + "db_ascii_space_mixed", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "a b", "coll": "target"}}], + msg="$out should reject database name containing an ASCII space", + error_code=INVALID_NAMESPACE_ERROR, + ), +] + +# Property [Restricted Database Errors]: writing to the reserved system +# databases (admin, config, local) produces a restricted database error. +OUT_RESTRICTED_DATABASE_ERROR_TESTS: list[OutTestCase] = [ + OutTestCase( + "restricted_db_admin", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "admin", "coll": "target"}}], + msg="$out should reject writing to the admin database", + error_code=OUT_RESTRICTED_DATABASE_ERROR, + ), + OutTestCase( + "restricted_db_config", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "config", "coll": "target"}}], + msg="$out should reject writing to the config database", + error_code=OUT_RESTRICTED_DATABASE_ERROR, + ), + OutTestCase( + "restricted_db_local", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "local", "coll": "target"}}], + msg="$out should reject writing to the local database", + error_code=OUT_RESTRICTED_DATABASE_ERROR, + ), +] + +# Property [Pipeline Position Errors]: $out must be the last stage in a +# pipeline - placing it before another stage, duplicating it, or combining +# it with $merge in either order produces a pipeline position error. +OUT_PIPELINE_POSITION_ERROR_TESTS: list[OutTestCase] = [ + OutTestCase( + "out_not_last_stage", + docs=[{"_id": 1}], + pipeline=[{"$out": "target"}, {"$match": {"_id": 1}}], + msg="$out not as the last stage should produce a pipeline position error", + error_code=OUT_NOT_LAST_STAGE_ERROR, + ), + OutTestCase( + "two_out_stages", + docs=[{"_id": 1}], + pipeline=[{"$out": "target1"}, {"$out": "target2"}], + msg="Two $out stages in the same pipeline should produce a pipeline position error", + error_code=OUT_NOT_LAST_STAGE_ERROR, + ), + OutTestCase( + "out_then_merge", + docs=[{"_id": 1}], + pipeline=[{"$out": "target"}, {"$merge": {"into": "target2"}}], + msg="$out followed by $merge should produce a pipeline position error", + error_code=OUT_NOT_LAST_STAGE_ERROR, + ), + OutTestCase( + "merge_then_out", + docs=[{"_id": 1}], + pipeline=[{"$merge": {"into": "target"}}, {"$out": "target2"}], + msg="$merge followed by $out should produce a pipeline position error", + error_code=OUT_NOT_LAST_STAGE_ERROR, + ), +] + +# Property [Nested Pipeline Restriction Errors]: $out is not allowed inside +# nested pipelines ($lookup, $facet, $unionWith) or in view definitions, and +# the innermost nesting restriction applies when stages are nested. +OUT_NESTED_PIPELINE_RESTRICTION_ERROR_TESTS: list[OutTestCase] = [ + OutTestCase( + "out_inside_lookup", + docs=[{"_id": 1}], + pipeline=[ + { + "$lookup": { + "from": "other", + "pipeline": [{"$out": "target"}], + "as": "result", + } + } + ], + msg="$out inside a $lookup nested pipeline should be rejected", + error_code=LOOKUP_SUB_PIPELINE_NOT_ALLOWED_ERROR, + ), + OutTestCase( + "out_inside_facet", + docs=[{"_id": 1}], + pipeline=[{"$facet": {"branch": [{"$out": "target"}]}}], + msg="$out inside a $facet nested pipeline should be rejected", + error_code=FACET_PIPELINE_INVALID_STAGE_ERROR, + ), + OutTestCase( + "out_inside_union_with", + docs=[{"_id": 1}], + pipeline=[ + { + "$unionWith": { + "coll": "other", + "pipeline": [{"$out": "target"}], + } + } + ], + msg="$out inside a $unionWith nested pipeline should be rejected", + error_code=UNION_WITH_SUB_PIPELINE_NOT_ALLOWED_ERROR, + ), + OutTestCase( + "out_inside_lookup_inside_facet", + docs=[{"_id": 1}], + pipeline=[ + { + "$facet": { + "branch": [ + { + "$lookup": { + "from": "other", + "pipeline": [{"$out": "target"}], + "as": "r", + } + } + ] + } + } + ], + msg=( + "$out nested inside $lookup inside $facet should produce the" + " innermost nesting error ($lookup restriction)" + ), + error_code=LOOKUP_SUB_PIPELINE_NOT_ALLOWED_ERROR, + ), +] + +# Property [Timeseries Field Type Errors]: all timeseries sub-fields reject +# non-accepted types with a type mismatch error - timeseries accepts only +# object, timeField/metaField/granularity accept only string, and + + +OUT_NAMESPACE_ERROR_TESTS = ( + OUT_COLLECTION_NAME_VALIDATION_ERROR_TESTS + + OUT_DATABASE_NAME_VALIDATION_ERROR_TESTS + + OUT_RESTRICTED_DATABASE_ERROR_TESTS + + OUT_PIPELINE_POSITION_ERROR_TESTS + + OUT_NESTED_PIPELINE_RESTRICTION_ERROR_TESTS +) + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_NAMESPACE_ERROR_TESTS)) +def test_out_error(collection, test_case: OutTestCase): + """Test $out rejects invalid configurations with the expected error code.""" + populate_collection(collection, test_case) + pipeline = test_case.pipeline + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": pipeline, "cursor": {}}, + ) + assertResult(result, error_code=test_case.error_code, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_stage_argument_errors.py b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_stage_argument_errors.py new file mode 100644 index 00000000..81d1db7a --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_stage_argument_errors.py @@ -0,0 +1,380 @@ +"""Tests for $out stage - stage argument type errors, unknown field errors, null missing errors.""" + +from __future__ import annotations + +from datetime import datetime + +import pytest +from bson import ( + Binary, + Code, + Decimal128, + Int64, + MaxKey, + MinKey, + ObjectId, + Regex, + Timestamp, +) + +from documentdb_tests.compatibility.tests.core.operator.stages.out.utils.out_test_helpers import ( + OutTestCase, +) +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + populate_collection, +) +from documentdb_tests.framework.assertions import ( + assertResult, +) +from documentdb_tests.framework.bson_helpers import build_raw_bson_doc +from documentdb_tests.framework.error_codes import ( + DUPLICATE_FIELD_ERROR, + INVALID_OPTIONS_ERROR, + MISSING_FIELD_ERROR, + OUT_ARGUMENT_TYPE_ERROR, + UNRECOGNIZED_COMMAND_FIELD_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Null as Missing (Errors)]: null values for db, coll, and +# timeField are treated as missing rather than as type errors, and a null +# bucket parameter paired with a valid one produces an incomplete-pair +# error. +OUT_NULL_MISSING_ERROR_TESTS: list[OutTestCase] = [ + OutTestCase( + "null_db_missing", + docs=[{"_id": 1}], + target_coll="target", + pipeline=[{"$out": {"db": None, "coll": "target"}}], + msg="$out should treat null db as missing, not as a type error", + error_code=MISSING_FIELD_ERROR, + ), + OutTestCase( + "null_coll_missing", + docs=[{"_id": 1}], + target_coll="target", + pipeline=[{"$out": {"db": "test", "coll": None}}], + msg="$out should treat null coll as missing, not as a type error", + error_code=MISSING_FIELD_ERROR, + ), + OutTestCase( + "null_time_field_missing", + docs=[{"_id": 1}], + target_coll="target", + pipeline=[{"$out": {"db": "test", "coll": "target", "timeseries": {"timeField": None}}}], + msg="$out should treat null timeField as missing, not as a type error", + error_code=MISSING_FIELD_ERROR, + ), + OutTestCase( + "null_bucket_max_with_valid_rounding", + docs=[{"_id": 1}], + target_coll="target", + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": None, + "bucketRoundingSeconds": 100, + }, + } + } + ], + msg=( + "$out should reject null bucketMaxSpanSeconds paired with valid" + " bucketRoundingSeconds as an incomplete pair" + ), + error_code=INVALID_OPTIONS_ERROR, + ), + OutTestCase( + "null_bucket_rounding_with_valid_max", + docs=[{"_id": 1}], + target_coll="target", + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": 100, + "bucketRoundingSeconds": None, + }, + } + } + ], + msg=( + "$out should reject null bucketRoundingSeconds paired with valid" + " bucketMaxSpanSeconds as an incomplete pair" + ), + error_code=INVALID_OPTIONS_ERROR, + ), +] + +# Property [Stage Argument Type Errors]: any type other than string or +# document produces a stage argument type error, including arrays regardless +# of contents, size, nesting, or element types. +OUT_STAGE_ARGUMENT_TYPE_ERROR_TESTS: list[OutTestCase] = [ + OutTestCase( + "arg_type_int32", + docs=[{"_id": 1}], + pipeline=[{"$out": 42}], + msg="$out should reject int32 argument", + error_code=OUT_ARGUMENT_TYPE_ERROR, + ), + OutTestCase( + "arg_type_int64", + docs=[{"_id": 1}], + pipeline=[{"$out": Int64(42)}], + msg="$out should reject Int64 argument", + error_code=OUT_ARGUMENT_TYPE_ERROR, + ), + OutTestCase( + "arg_type_float", + docs=[{"_id": 1}], + pipeline=[{"$out": 3.14}], + msg="$out should reject float argument", + error_code=OUT_ARGUMENT_TYPE_ERROR, + ), + OutTestCase( + "arg_type_decimal128", + docs=[{"_id": 1}], + pipeline=[{"$out": Decimal128("99.9")}], + msg="$out should reject Decimal128 argument", + error_code=OUT_ARGUMENT_TYPE_ERROR, + ), + OutTestCase( + "arg_type_bool", + docs=[{"_id": 1}], + pipeline=[{"$out": True}], + msg="$out should reject boolean argument", + error_code=OUT_ARGUMENT_TYPE_ERROR, + ), + OutTestCase( + "arg_type_null", + docs=[{"_id": 1}], + pipeline=[{"$out": None}], + msg="$out should reject null argument", + error_code=OUT_ARGUMENT_TYPE_ERROR, + ), + OutTestCase( + "arg_type_binary", + docs=[{"_id": 1}], + pipeline=[{"$out": Binary(b"\x01")}], + msg="$out should reject Binary argument", + error_code=OUT_ARGUMENT_TYPE_ERROR, + ), + OutTestCase( + "arg_type_objectid", + docs=[{"_id": 1}], + pipeline=[{"$out": ObjectId("507f1f77bcf86cd799439011")}], + msg="$out should reject ObjectId argument", + error_code=OUT_ARGUMENT_TYPE_ERROR, + ), + OutTestCase( + "arg_type_datetime", + docs=[{"_id": 1}], + pipeline=[{"$out": datetime(2024, 1, 1)}], + msg="$out should reject datetime argument", + error_code=OUT_ARGUMENT_TYPE_ERROR, + ), + OutTestCase( + "arg_type_regex", + docs=[{"_id": 1}], + pipeline=[{"$out": Regex("abc")}], + msg="$out should reject Regex argument", + error_code=OUT_ARGUMENT_TYPE_ERROR, + ), + OutTestCase( + "arg_type_timestamp", + docs=[{"_id": 1}], + pipeline=[{"$out": Timestamp(1, 1)}], + msg="$out should reject Timestamp argument", + error_code=OUT_ARGUMENT_TYPE_ERROR, + ), + OutTestCase( + "arg_type_minkey", + docs=[{"_id": 1}], + pipeline=[{"$out": MinKey()}], + msg="$out should reject MinKey argument", + error_code=OUT_ARGUMENT_TYPE_ERROR, + ), + OutTestCase( + "arg_type_maxkey", + docs=[{"_id": 1}], + pipeline=[{"$out": MaxKey()}], + msg="$out should reject MaxKey argument", + error_code=OUT_ARGUMENT_TYPE_ERROR, + ), + OutTestCase( + "arg_type_code", + docs=[{"_id": 1}], + pipeline=[{"$out": Code("function() {}")}], + msg="$out should reject Code argument", + error_code=OUT_ARGUMENT_TYPE_ERROR, + ), + OutTestCase( + "arg_type_array_empty", + docs=[{"_id": 1}], + pipeline=[{"$out": []}], + msg="$out should reject empty array argument", + error_code=OUT_ARGUMENT_TYPE_ERROR, + ), + OutTestCase( + "arg_type_array_of_string", + docs=[{"_id": 1}], + pipeline=[{"$out": ["target"]}], + msg="$out should reject array containing a string", + error_code=OUT_ARGUMENT_TYPE_ERROR, + ), + OutTestCase( + "arg_type_array_of_document", + docs=[{"_id": 1}], + pipeline=[{"$out": [{"db": "test", "coll": "target"}]}], + msg="$out should reject array containing a document", + error_code=OUT_ARGUMENT_TYPE_ERROR, + ), + OutTestCase( + "arg_type_array_nested", + docs=[{"_id": 1}], + pipeline=[{"$out": [[1, 2]]}], + msg="$out should reject nested array argument", + error_code=OUT_ARGUMENT_TYPE_ERROR, + ), + OutTestCase( + "arg_type_array_mixed_types", + docs=[{"_id": 1}], + pipeline=[{"$out": [1, "a", None]}], + msg="$out should reject array with mixed element types", + error_code=OUT_ARGUMENT_TYPE_ERROR, + ), +] + +# Property [Document Form Unknown Fields]: any field other than db, coll, +# and timeseries in the document form is rejected as an unknown field, and +# field name matching is case-sensitive and whitespace-sensitive. +OUT_UNKNOWN_FIELD_ERROR_TESTS: list[OutTestCase] = [ + OutTestCase( + "unknown_field", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": "target", "extra": "x"}}], + msg="$out should reject unknown field 'extra' in document form", + error_code=UNRECOGNIZED_COMMAND_FIELD_ERROR, + ), + OutTestCase( + "unknown_field_case_sensitive_db", + docs=[{"_id": 1}], + pipeline=[{"$out": {"Db": "test", "coll": "target"}}], + msg="$out should reject 'Db' as unknown (case-sensitive)", + error_code=UNRECOGNIZED_COMMAND_FIELD_ERROR, + ), + OutTestCase( + "unknown_field_case_sensitive_coll", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "Coll": "target"}}], + msg="$out should reject 'Coll' as unknown (case-sensitive)", + error_code=UNRECOGNIZED_COMMAND_FIELD_ERROR, + ), + OutTestCase( + "unknown_field_case_sensitive_timeseries", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": "target", "Timeseries": {"timeField": "ts"}}}], + msg="$out should reject 'Timeseries' as unknown (case-sensitive)", + error_code=UNRECOGNIZED_COMMAND_FIELD_ERROR, + ), + OutTestCase( + "unknown_field_whitespace_sensitive_db", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db ": "test", "coll": "target"}}], + msg="$out should reject 'db ' as unknown (whitespace-sensitive)", + error_code=UNRECOGNIZED_COMMAND_FIELD_ERROR, + ), + OutTestCase( + "unknown_field_whitespace_sensitive_coll", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", " coll": "target"}}], + msg="$out should reject ' coll' as unknown (whitespace-sensitive)", + error_code=UNRECOGNIZED_COMMAND_FIELD_ERROR, + ), + OutTestCase( + "expression_like_object", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": "target", "$expr": {"$literal": 1}}}], + msg="$out should treat expression-like objects as unknown fields", + error_code=UNRECOGNIZED_COMMAND_FIELD_ERROR, + ), + OutTestCase( + "expression_like_dollar_prefix", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": "target", "$merge": "x"}}], + msg="$out should treat $-prefixed fields as unknown fields", + error_code=UNRECOGNIZED_COMMAND_FIELD_ERROR, + ), +] + +# Property [Collection Name Validation Errors]: invalid collection names +# are rejected with the appropriate error code based on the violation type, +# with namespace errors taking precedence over illegal operation errors. + +# Property [Document Form Duplicate Fields]: duplicate fields (db or coll +# appearing twice) in the document form produce a duplicate field error. +OUT_DUPLICATE_FIELD_ERROR_TESTS: list[OutTestCase] = [ + OutTestCase( + "duplicate_db_field", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": build_raw_bson_doc( + [ + ("db", "test"), + ("coll", "target"), + ("db", "test"), + ] + ) + } + ], + msg="$out should reject duplicate 'db' field in document form", + error_code=DUPLICATE_FIELD_ERROR, + ), + OutTestCase( + "duplicate_coll_field", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": build_raw_bson_doc( + [ + ("db", "test"), + ("coll", "target"), + ("coll", "target"), + ] + ) + } + ], + msg="$out should reject duplicate 'coll' field in document form", + error_code=DUPLICATE_FIELD_ERROR, + ), +] + + +OUT_STAGE_ARGUMENT_ERROR_TESTS = ( + OUT_NULL_MISSING_ERROR_TESTS + + OUT_STAGE_ARGUMENT_TYPE_ERROR_TESTS + + OUT_UNKNOWN_FIELD_ERROR_TESTS + + OUT_DUPLICATE_FIELD_ERROR_TESTS +) + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_STAGE_ARGUMENT_ERROR_TESTS)) +def test_out_error(collection, test_case: OutTestCase): + """Test $out rejects invalid configurations with the expected error code.""" + populate_collection(collection, test_case) + pipeline = test_case.pipeline + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": pipeline, "cursor": {}}, + ) + assertResult(result, error_code=test_case.error_code, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_target_restriction_errors.py b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_target_restriction_errors.py new file mode 100644 index 00000000..033cf242 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_target_restriction_errors.py @@ -0,0 +1,492 @@ +"""Tests for $out stage - target collection restriction errors.""" + +from __future__ import annotations + +from datetime import datetime +from typing import cast + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.stages.out.utils.out_test_helpers import ( + OutTestCase, +) +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + populate_collection, +) +from documentdb_tests.framework.assertions import ( + assertFailureCode, + assertResult, + assertSuccess, +) +from documentdb_tests.framework.error_codes import ( + COMMAND_NOT_SUPPORTED_ON_VIEW_ERROR, + DOCUMENT_VALIDATION_FAILURE_ERROR, + DUPLICATE_KEY_ERROR, + ILLEGAL_OPERATION_ERROR, + INVALID_OPTIONS_ERROR, + OPTION_NOT_SUPPORTED_ON_VIEW_ERROR, + OUT_CAPPED_COLLECTION_ERROR, + OUT_TIMESERIES_COLLECTION_TYPE_ERROR, + OUT_TIMESERIES_OPTIONS_MISMATCH_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Target Collection Restriction Errors]: $out rejects writing to +# capped collections and views, and writing to a view with timeseries options +# produces a timeseries collection type error instead of the view-specific +# error. +OUT_TARGET_RESTRICTION_ERROR_TESTS: list[OutTestCase] = [ + OutTestCase( + "capped_target", + docs=[{"_id": 1, "value": 10}], + target_coll="capped_out_target", + setup=lambda c: ( + c.database.drop_collection("capped_out_target"), + c.database.create_collection("capped_out_target", capped=True, size=1_048_576), + ), + msg="$out should reject writing to a capped collection", + error_code=OUT_CAPPED_COLLECTION_ERROR, + ), + OutTestCase( + "view_target", + docs=[{"_id": 1, "value": 10}], + target_coll="view_out_target", + setup=lambda c: ( + c.database.drop_collection("view_out_target"), + c.database.command({"create": "view_out_target", "viewOn": c.name, "pipeline": []}), + ), + msg="$out should reject writing to a view", + error_code=COMMAND_NOT_SUPPORTED_ON_VIEW_ERROR, + ), + OutTestCase( + "view_ts_target", + docs=[{"_id": 1, "value": 10}], + target_coll="view_ts_out_target", + out_spec={"timeseries": {"timeField": "ts"}}, + setup=lambda c: ( + c.database.drop_collection("view_ts_out_target"), + c.database.command({"create": "view_ts_out_target", "viewOn": c.name, "pipeline": []}), + ), + msg=( + "$out to a view with timeseries options should produce a timeseries" + " collection type error, not the view-specific error" + ), + error_code=OUT_TIMESERIES_COLLECTION_TYPE_ERROR, + ), +] + +# Property [Timeseries Existing Collection Errors]: writing with timeseries +# options to an existing regular collection produces a timeseries collection +# type error, and writing with mismatched timeseries options to an existing +# time series collection produces a timeseries options mismatch error +# regardless of which option differs. +OUT_TIMESERIES_EXISTING_COLLECTION_ERROR_TESTS: list[OutTestCase] = [ + OutTestCase( + "ts_to_regular", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="ts_to_regular_target", + out_spec={"timeseries": {"timeField": "ts"}}, + setup=lambda c: ( + c.database.drop_collection("ts_to_regular_target"), + c.database.create_collection("ts_to_regular_target"), + ), + msg=( + "$out with timeseries options to an existing regular collection" + " should produce a timeseries collection type error" + ), + error_code=OUT_TIMESERIES_COLLECTION_TYPE_ERROR, + ), + OutTestCase( + "ts_mismatch_different_time_field", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="ts_mismatch_target", + out_spec={"timeseries": {"timeField": "other"}}, + setup=lambda c: ( + c.database.drop_collection("ts_mismatch_target"), + c.database.command({"create": "ts_mismatch_target", "timeseries": {"timeField": "ts"}}), + ), + msg=( + "$out with mismatched timeseries options to an existing time series" + " collection should produce a timeseries options mismatch error" + ), + error_code=OUT_TIMESERIES_OPTIONS_MISMATCH_ERROR, + ), + OutTestCase( + "ts_mismatch_meta_field_present_vs_absent", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="ts_mismatch_target", + out_spec={"timeseries": {"timeField": "ts", "metaField": "m"}}, + setup=lambda c: ( + c.database.drop_collection("ts_mismatch_target"), + c.database.command({"create": "ts_mismatch_target", "timeseries": {"timeField": "ts"}}), + ), + msg=( + "$out with mismatched timeseries options to an existing time series" + " collection should produce a timeseries options mismatch error" + ), + error_code=OUT_TIMESERIES_OPTIONS_MISMATCH_ERROR, + ), + OutTestCase( + "ts_mismatch_different_meta_field", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="ts_mismatch_target", + out_spec={"timeseries": {"timeField": "ts", "metaField": "other"}}, + setup=lambda c: ( + c.database.drop_collection("ts_mismatch_target"), + c.database.command( + { + "create": "ts_mismatch_target", + "timeseries": {"timeField": "ts", "metaField": "m"}, + } + ), + ), + msg=( + "$out with mismatched timeseries options to an existing time series" + " collection should produce a timeseries options mismatch error" + ), + error_code=OUT_TIMESERIES_OPTIONS_MISMATCH_ERROR, + ), + OutTestCase( + "ts_mismatch_different_granularity", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="ts_mismatch_target", + out_spec={"timeseries": {"timeField": "ts", "granularity": "hours"}}, + setup=lambda c: ( + c.database.drop_collection("ts_mismatch_target"), + c.database.command( + { + "create": "ts_mismatch_target", + "timeseries": {"timeField": "ts", "granularity": "seconds"}, + } + ), + ), + msg=( + "$out with mismatched timeseries options to an existing time series" + " collection should produce a timeseries options mismatch error" + ), + error_code=OUT_TIMESERIES_OPTIONS_MISMATCH_ERROR, + ), + OutTestCase( + "ts_mismatch_granularity_vs_bucket_options", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="ts_mismatch_target", + out_spec={"timeseries": {"timeField": "ts", "granularity": "hours"}}, + setup=lambda c: ( + c.database.drop_collection("ts_mismatch_target"), + c.database.command( + { + "create": "ts_mismatch_target", + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": 100, + "bucketRoundingSeconds": 100, + }, + } + ), + ), + msg=( + "$out with mismatched timeseries options to an existing time series" + " collection should produce a timeseries options mismatch error" + ), + error_code=OUT_TIMESERIES_OPTIONS_MISMATCH_ERROR, + ), + OutTestCase( + "ts_mismatch_different_bucket_values", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="ts_mismatch_target", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": 200, + "bucketRoundingSeconds": 200, + } + }, + setup=lambda c: ( + c.database.drop_collection("ts_mismatch_target"), + c.database.command( + { + "create": "ts_mismatch_target", + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": 100, + "bucketRoundingSeconds": 100, + }, + } + ), + ), + msg=( + "$out with mismatched timeseries options to an existing time series" + " collection should produce a timeseries options mismatch error" + ), + error_code=OUT_TIMESERIES_OPTIONS_MISMATCH_ERROR, + ), +] + +# Property [Index Constraint Errors]: unique index violations (including +# compound unique indexes) and duplicate _id values in the output produce a +# duplicate key error, and when a unique index violation occurs writing to a +# nonexistent target, the target collection is not created. +OUT_INDEX_CONSTRAINT_ERROR_TESTS: list[OutTestCase] = [ + OutTestCase( + "idx_unique", + docs=[{"_id": 1, "x": 1}, {"_id": 2, "x": 1}], + target_coll="idx_unique_target", + setup=lambda c: ( + c.database["idx_unique_target"].insert_many( + [{"_id": 90, "x": 90}, {"_id": 91, "x": 91}] + ), + c.database["idx_unique_target"].create_index("x", unique=True), + ), + msg="$out should produce a duplicate key error on unique index violation", + error_code=DUPLICATE_KEY_ERROR, + ), + OutTestCase( + "idx_compound", + docs=[{"_id": 1, "a": 1, "b": 2}, {"_id": 2, "a": 1, "b": 2}], + target_coll="idx_compound_target", + setup=lambda c: ( + c.database["idx_compound_target"].insert_one({"_id": 99, "a": 99, "b": 99}), + c.database["idx_compound_target"].create_index([("a", 1), ("b", 1)], unique=True), + ), + msg="$out should produce a duplicate key error on compound unique index violation", + error_code=DUPLICATE_KEY_ERROR, + ), + OutTestCase( + "idx_dup_id", + docs=[{"_id": 1, "x": 1}, {"_id": 2, "x": 2}], + target_coll="idx_dup_id_target", + pipeline=[ + {"$unset": "_id"}, + {"$addFields": {"_id": "same"}}, + {"$out": "idx_dup_id_target"}, + ], + msg="$out should produce a duplicate key error when output contains duplicate _id values", + error_code=DUPLICATE_KEY_ERROR, + ), +] + +# Property [Read Concern Errors]: linearizable read concern with $out +# produces an invalid options error. +OUT_READ_CONCERN_ERROR_TESTS: list[OutTestCase] = [ + OutTestCase( + "rc_linearizable", + docs=[{"_id": 1, "value": 10}], + target_coll="rc_linearizable_target", + pipeline=[ + {"$out": "rc_linearizable_target"}, + ], + msg="$out should reject linearizable read concern", + error_code=INVALID_OPTIONS_ERROR, + ), +] + + +OUT_TARGET_RESTRICTION_TESTS = ( + OUT_TARGET_RESTRICTION_ERROR_TESTS + + OUT_TIMESERIES_EXISTING_COLLECTION_ERROR_TESTS + + OUT_INDEX_CONSTRAINT_ERROR_TESTS +) + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_TARGET_RESTRICTION_TESTS)) +def test_out_target_restriction_error(collection, test_case: OutTestCase): + """Test $out rejects invalid target configurations with the expected error code.""" + populate_collection(collection, test_case) + if test_case.setup: + test_case.setup(collection) + if test_case.pipeline: + pipeline = test_case.pipeline + else: + pipeline = [test_case.build_out_stage(collection)] + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": pipeline, "cursor": {}}, + ) + assertResult(result, error_code=test_case.error_code, msg=test_case.msg) + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_READ_CONCERN_ERROR_TESTS)) +def test_out_read_concern_error(collection, test_case: OutTestCase): + """Test $out rejects invalid read concern levels.""" + populate_collection(collection, test_case) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + "readConcern": {"level": "linearizable"}, + }, + ) + assertResult(result, error_code=test_case.error_code, msg=test_case.msg) + + +# Property [Nested Pipeline Restriction - View Definition]: $out in a view +# definition is rejected. +OUT_VIEW_DEFINITION_ERROR_TESTS: list[OutTestCase] = [ + OutTestCase( + "view_def_out", + docs=[{"_id": 1, "value": 10}], + pipeline=[{"$out": "target"}], + error_code=OPTION_NOT_SUPPORTED_ON_VIEW_ERROR, + msg="$out in a view definition should produce an invalid view pipeline error", + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_VIEW_DEFINITION_ERROR_TESTS)) +def test_out_in_view_definition_error(collection, test_case: OutTestCase): + """Test $out in a view definition is rejected.""" + populate_collection(collection, test_case) + result = execute_command( + collection, + { + "create": "bad_view", + "viewOn": collection.name, + "pipeline": test_case.pipeline, + }, + ) + assertResult(result, error_code=test_case.error_code, msg=test_case.msg) + + +# Property [Schema Validation Errors]: when the target collection has +# validationAction set to error and an invalid document is produced, the +# write fails with a document validation failure error and the pre-existing +# collection is unchanged. +OUT_SCHEMA_VALIDATION_ERROR_TESTS: list[OutTestCase] = [ + OutTestCase( + "schema_val_err", + docs=[{"_id": 1, "value": "not_a_number"}], + target_coll="schema_val_error_target", + pipeline=[{"$out": "schema_val_error_target"}], + setup=lambda c: ( + c.database.drop_collection("schema_val_error_target"), + c.database.command( + { + "create": "schema_val_error_target", + "validator": { + "$jsonSchema": { + "bsonType": "object", + "required": ["value"], + "properties": {"value": {"bsonType": "int"}}, + } + }, + "validationAction": "error", + } + ), + c.database["schema_val_error_target"].insert_one({"_id": 99, "value": 42}), + ), + error_code=DOCUMENT_VALIDATION_FAILURE_ERROR, + expected=[{"_id": 99, "value": 42}], + msg="$out should fail with document validation failure when validationAction is error", + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_SCHEMA_VALIDATION_ERROR_TESTS)) +def test_out_schema_validation_error(collection, test_case: OutTestCase): + """Test $out fails with document validation failure when validationAction is error.""" + populate_collection(collection, test_case) + if test_case.setup: + test_case.setup(collection) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + assertFailureCode(result, cast(int, test_case.error_code), msg=test_case.msg) + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_SCHEMA_VALIDATION_ERROR_TESTS)) +def test_out_schema_validation_error_unchanged(collection, test_case: OutTestCase): + """Test $out schema validation failure leaves the pre-existing collection unchanged.""" + populate_collection(collection, test_case) + if test_case.setup: + test_case.setup(collection) + execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + result = execute_command( + collection, + {"find": test_case.target_coll, "filter": {}, "projection": {"_id": 1, "value": 1}}, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg) + + +# Property [Transaction Errors]: using $out inside a transaction produces +# an error. +OUT_TRANSACTION_ERROR_TESTS: list[OutTestCase] = [ + OutTestCase( + "transaction_out", + docs=[{"_id": 1, "value": 10}], + pipeline=[{"$out": "txn_target"}], + error_code=ILLEGAL_OPERATION_ERROR, + msg="$out inside a transaction should produce an error", + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_TRANSACTION_ERROR_TESTS)) +def test_out_transaction_error(collection, test_case: OutTestCase): + """Test $out inside a transaction produces an error.""" + populate_collection(collection, test_case) + # Verify the pipeline works outside a transaction first. + execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + command = {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}} + client = collection.database.client + with client.start_session() as session: + session.start_transaction() + try: + result = collection.database.command(command, session=session) + except Exception as e: + result = e + finally: + session.abort_transaction() + assertResult(result, error_code=test_case.error_code, msg=test_case.msg) + + +# Property [Byte-Based Namespace Limit]: the namespace length limit (255 +# bytes) is byte-based, not character-based - multi-byte characters consume +# more of the limit per character than single-byte characters. +OUT_BYTE_NAMESPACE_LIMIT_TESTS: list[OutTestCase] = [ + OutTestCase( + "byte_limit", + docs=[{"_id": 1}], + error_code=ILLEGAL_OPERATION_ERROR, + msg=( + "$out should reject a collection name that exceeds 255 namespace bytes" + " even though the character count is within the single-byte limit" + ), + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_BYTE_NAMESPACE_LIMIT_TESTS)) +def test_out_byte_based_namespace_limit(collection, test_case: OutTestCase): + """Test $out namespace limit is byte-based, not character-based.""" + populate_collection(collection, test_case) + db_name = collection.database.name + # Namespace = db_name + "." + coll_name; limit is 255 bytes. + prefix_bytes = len(db_name.encode("utf-8")) + 1 + max_coll_bytes = 255 - prefix_bytes + # CJK character U+4E2D is 3 bytes in UTF-8. Use enough CJK characters + # to exceed the byte limit while staying under the character count that + # would fit with single-byte characters. + cjk_char_count = (max_coll_bytes // 3) + 1 + cjk_name = "\u4e2d" * cjk_char_count + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": [{"$out": cjk_name}], "cursor": {}}, + ) + assertResult(result, error_code=test_case.error_code, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_timeseries.py b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_timeseries.py new file mode 100644 index 00000000..1728c39a --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_timeseries.py @@ -0,0 +1,601 @@ +"""Tests for $out stage - timeseries collection creation and options.""" + +from __future__ import annotations + +from datetime import datetime, timezone +from typing import Any, cast + +import pytest +from bson import Decimal128, Int64 + +from documentdb_tests.compatibility.tests.core.operator.stages.out.utils.out_test_helpers import ( + OutTestCase, +) +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + populate_collection, +) +from documentdb_tests.framework.assertions import ( + assertResult, + assertSuccess, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import ( + DECIMAL128_ONE_AND_HALF, + DECIMAL128_TWO_AND_HALF, +) + +# Property [Timeseries Collection Creation]: $out creates a new time +# series collection when valid timeseries options are provided and the +# target does not exist, including edge cases where metaField is "_id" or +# matches timeField. +OUT_TIMESERIES_CREATION_TESTS: list[OutTestCase] = [ + OutTestCase( + "ts_meta_field_is_id", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "value": 10}], + target_coll="ts_creation_meta_id", + out_spec={"timeseries": {"timeField": "ts", "metaField": "_id"}}, + expected_type="timeseries", + expected_options={ + "timeseries": { + "timeField": "ts", + "metaField": "_id", + "granularity": "seconds", + "bucketMaxSpanSeconds": 3_600, + } + }, + msg='$out should accept metaField set to "_id" without error', + ), + OutTestCase( + "ts_meta_field_same_as_time_field", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "value": 10}], + target_coll="ts_creation_meta_same", + out_spec={"timeseries": {"timeField": "ts", "metaField": "ts"}}, + expected_type="timeseries", + expected_options={ + "timeseries": { + "timeField": "ts", + "metaField": "ts", + "granularity": "seconds", + "bucketMaxSpanSeconds": 3_600, + } + }, + msg="$out should accept metaField set to the same value as timeField without error", + ), +] + +# Property [Bucket Param Type Acceptance]: bucketMaxSpanSeconds and +# bucketRoundingSeconds accept int32, Int64, float, and Decimal128, and the +# equality check between them is type-insensitive. +OUT_BUCKET_PARAM_TYPE_ACCEPTANCE_TESTS: list[OutTestCase] = [ + OutTestCase( + "bucket_int32", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_int32", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": 100, + "bucketRoundingSeconds": 100, + } + }, + expected_type="timeseries", + expected_options={ + "timeseries": { + "timeField": "ts", + "bucketRoundingSeconds": 100, + "bucketMaxSpanSeconds": 100, + } + }, + msg="$out should accept int32 for bucket parameters", + ), + OutTestCase( + "bucket_int64", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_int64", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": Int64(100), + "bucketRoundingSeconds": Int64(100), + } + }, + expected_type="timeseries", + expected_options={ + "timeseries": { + "timeField": "ts", + "bucketRoundingSeconds": 100, + "bucketMaxSpanSeconds": 100, + } + }, + msg="$out should accept Int64 for bucket parameters", + ), + OutTestCase( + "bucket_float", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_float", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": 100.0, + "bucketRoundingSeconds": 100.0, + } + }, + expected_type="timeseries", + expected_options={ + "timeseries": { + "timeField": "ts", + "bucketRoundingSeconds": 100, + "bucketMaxSpanSeconds": 100, + } + }, + msg="$out should accept float for bucket parameters", + ), + OutTestCase( + "bucket_decimal128", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_decimal128", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": Decimal128("100"), + "bucketRoundingSeconds": Decimal128("100"), + } + }, + expected_type="timeseries", + expected_options={ + "timeseries": { + "timeField": "ts", + "bucketRoundingSeconds": 100, + "bucketMaxSpanSeconds": 100, + } + }, + msg="$out should accept Decimal128 for bucket parameters", + ), + OutTestCase( + "bucket_cross_int32_int64", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_cross_i32_i64", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": 100, + "bucketRoundingSeconds": Int64(100), + } + }, + expected_type="timeseries", + expected_options={ + "timeseries": { + "timeField": "ts", + "bucketRoundingSeconds": 100, + "bucketMaxSpanSeconds": 100, + } + }, + msg="$out should accept cross-type int32/Int64 bucket parameters", + ), + OutTestCase( + "bucket_cross_float_decimal128", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_cross_f_d128", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": 100.0, + "bucketRoundingSeconds": Decimal128("100"), + } + }, + expected_type="timeseries", + expected_options={ + "timeseries": { + "timeField": "ts", + "bucketRoundingSeconds": 100, + "bucketMaxSpanSeconds": 100, + } + }, + msg="$out should accept cross-type float/Decimal128 bucket parameters", + ), + OutTestCase( + "bucket_float_truncation_success", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_float_trunc", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": 1.5, + "bucketRoundingSeconds": 1.5, + } + }, + expected_type="timeseries", + expected_options={ + "timeseries": { + "timeField": "ts", + "bucketRoundingSeconds": 1, + "bucketMaxSpanSeconds": 1, + } + }, + msg="$out should truncate float 1.5 to int32 1 for bucket parameters", + ), + OutTestCase( + "bucket_decimal128_bankers_rounding", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_dec_bankers", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": DECIMAL128_ONE_AND_HALF, + "bucketRoundingSeconds": DECIMAL128_ONE_AND_HALF, + } + }, + expected_type="timeseries", + expected_options={ + "timeseries": { + "timeField": "ts", + "bucketRoundingSeconds": 2, + "bucketMaxSpanSeconds": 2, + } + }, + msg="$out should round Decimal128 1.5 to 2 (banker's rounding) for bucket parameters", + ), + OutTestCase( + "bucket_decimal128_bankers_round_down", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_dec_bank_dn", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": DECIMAL128_TWO_AND_HALF, + "bucketRoundingSeconds": DECIMAL128_TWO_AND_HALF, + } + }, + expected_type="timeseries", + expected_options={ + "timeseries": { + "timeField": "ts", + "bucketRoundingSeconds": 2, + "bucketMaxSpanSeconds": 2, + } + }, + msg="$out should round Decimal128 2.5 to 2 (banker's rounding) for bucket parameters", + ), + OutTestCase( + "bucket_cross_coerced_equality", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_cross_coerce", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": 2, + "bucketRoundingSeconds": DECIMAL128_ONE_AND_HALF, + } + }, + expected_type="timeseries", + expected_options={ + "timeseries": { + "timeField": "ts", + "bucketRoundingSeconds": 2, + "bucketMaxSpanSeconds": 2, + } + }, + msg=( + "$out should accept cross-type bucket params when coerced values are" + " equal (int32 2 and Decimal128 1.5 -> 2)" + ), + ), + OutTestCase( + "bucket_range_min", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_range_min", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": 1, + "bucketRoundingSeconds": 1, + } + }, + expected_type="timeseries", + expected_options={ + "timeseries": { + "timeField": "ts", + "bucketRoundingSeconds": 1, + "bucketMaxSpanSeconds": 1, + } + }, + msg="$out should accept bucket parameters at the minimum valid value (1)", + ), + OutTestCase( + "bucket_range_max", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_range_max", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": 31_536_000, + "bucketRoundingSeconds": 31_536_000, + } + }, + expected_type="timeseries", + expected_options={ + "timeseries": { + "timeField": "ts", + "bucketRoundingSeconds": 31_536_000, + "bucketMaxSpanSeconds": 31_536_000, + } + }, + msg="$out should accept bucket parameters at the maximum valid value (31536000)", + ), +] + +# Property [Timeseries Granularity]: valid granularity values ("seconds", +# "minutes", "hours") are accepted and each produces the corresponding +# bucketMaxSpanSeconds default. +OUT_TIMESERIES_GRANULARITY_TESTS: list[OutTestCase] = [ + OutTestCase( + "granularity_seconds", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="ts_gran_seconds", + out_spec={"timeseries": {"timeField": "ts", "granularity": "seconds"}}, + expected_type="timeseries", + expected_options={ + "timeseries": { + "timeField": "ts", + "granularity": "seconds", + "bucketMaxSpanSeconds": 3_600, + } + }, + msg="$out should accept granularity 'seconds'", + ), + OutTestCase( + "granularity_minutes", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="ts_gran_minutes", + out_spec={"timeseries": {"timeField": "ts", "granularity": "minutes"}}, + expected_type="timeseries", + expected_options={ + "timeseries": { + "timeField": "ts", + "granularity": "minutes", + "bucketMaxSpanSeconds": 86_400, + } + }, + msg="$out should accept granularity 'minutes'", + ), + OutTestCase( + "granularity_hours", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="ts_gran_hours", + out_spec={"timeseries": {"timeField": "ts", "granularity": "hours"}}, + expected_type="timeseries", + expected_options={ + "timeseries": { + "timeField": "ts", + "granularity": "hours", + "bucketMaxSpanSeconds": 2_592_000, + } + }, + msg="$out should accept granularity 'hours'", + ), +] + +OUT_TIMESERIES_TESTS = ( + OUT_TIMESERIES_CREATION_TESTS + + OUT_BUCKET_PARAM_TYPE_ACCEPTANCE_TESTS + + OUT_TIMESERIES_GRANULARITY_TESTS +) + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_TIMESERIES_TESTS)) +def test_out_timeseries(collection, test_case: OutTestCase): + """Test $out writes results and creates the correct collection type.""" + populate_collection(collection, test_case) + out_stage = test_case.build_out_stage(collection) + execute_command( + collection, + {"aggregate": collection.name, "pipeline": [out_stage], "cursor": {}}, + ) + result = execute_command( + collection, + {"listCollections": 1, "filter": {"name": test_case.target_coll}}, + ) + raw_doc = cast(dict, result)["cursor"]["firstBatch"][0] + expected_info: dict[str, Any] = { + "name": test_case.target_coll, + "type": test_case.expected_type, + "options": test_case.expected_options, + "info": raw_doc["info"], + } + if "idIndex" in raw_doc: + expected_info["idIndex"] = raw_doc["idIndex"] + assertSuccess(result, [expected_info], msg=test_case.msg) + + +# Property [Timeseries Cross-Database]: $out creates a time series collection +# in a different database when timeseries options are specified with a +# cross-database target. +OUT_TIMESERIES_CROSS_DB_TESTS: list[OutTestCase] = [ + OutTestCase( + "ts_cross_db", + docs=[{"_id": 1, "ts": datetime(2024, 7, 1), "value": 70}], + target_coll="ts_cross_target", + target_db="out_ts_cross_db_target", + pipeline=[ + { + "$out": { + "db": "out_ts_cross_db_target", + "coll": "ts_cross_target", + "timeseries": {"timeField": "ts"}, + } + } + ], + expected=[{"ts": datetime(2024, 7, 1, tzinfo=timezone.utc), "value": 70}], + msg="$out should create a time series collection in a different database", + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_TIMESERIES_CROSS_DB_TESTS)) +def test_out_timeseries_cross_db(collection, test_case: OutTestCase): + """Test $out creates a time series collection in a different database.""" + populate_collection(collection, test_case) + client = collection.database.client + client.drop_database(test_case.target_db) + try: + execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + result = execute_command( + client[test_case.target_db][test_case.target_coll], + { + "find": test_case.target_coll, + "filter": {}, + "projection": {"_id": 0}, + }, + ) + assertResult(result, expected=test_case.expected, msg=test_case.msg) + finally: + client.drop_database(test_case.target_db) + + +# Property [Timeseries DateTime Acceptance]: all datetime boundary values +# are accepted as timeField values when writing to a timeseries collection +# via $out, including Unix epoch, pre-epoch, far future, minimum datetime, +# and millisecond precision. +OUT_TIMESERIES_DATETIME_ACCEPTANCE_TESTS: list[OutTestCase] = [ + OutTestCase( + "ts_datetime_epoch", + docs=[{"_id": 1, "ts": datetime(1970, 1, 1), "v": 1}], + target_coll="ts_dt_epoch", + out_spec={"timeseries": {"timeField": "ts"}}, + expected=[{"ts": datetime(1970, 1, 1, tzinfo=timezone.utc), "v": 1}], + msg="$out timeseries should accept Unix epoch as timeField value", + ), + OutTestCase( + "ts_datetime_pre_epoch", + docs=[{"_id": 1, "ts": datetime(1960, 6, 15), "v": 2}], + target_coll="ts_dt_pre_epoch", + out_spec={"timeseries": {"timeField": "ts"}}, + expected=[{"ts": datetime(1960, 6, 15, tzinfo=timezone.utc), "v": 2}], + msg="$out timeseries should accept pre-epoch dates as timeField value", + ), + OutTestCase( + "ts_datetime_far_future", + docs=[{"_id": 1, "ts": datetime(9999, 12, 31, 23, 59, 59), "v": 3}], + target_coll="ts_dt_far_future", + out_spec={"timeseries": {"timeField": "ts"}}, + expected=[{"ts": datetime(9999, 12, 31, 23, 59, 59, tzinfo=timezone.utc), "v": 3}], + msg="$out timeseries should accept far future dates as timeField value", + ), + OutTestCase( + "ts_datetime_minimum", + docs=[{"_id": 1, "ts": datetime(1, 1, 1), "v": 4}], + target_coll="ts_dt_minimum", + out_spec={"timeseries": {"timeField": "ts"}}, + expected=[{"ts": datetime(1, 1, 1, tzinfo=timezone.utc), "v": 4}], + msg="$out timeseries should accept minimum datetime (0001-01-01) as timeField value", + ), + OutTestCase( + "ts_datetime_millisecond_precision", + docs=[{"_id": 1, "ts": datetime(2024, 6, 15, 12, 30, 45, 123_000), "v": 5}], + target_coll="ts_dt_millis", + out_spec={"timeseries": {"timeField": "ts"}}, + expected=[{"ts": datetime(2024, 6, 15, 12, 30, 45, 123_000, tzinfo=timezone.utc), "v": 5}], + msg="$out timeseries should accept datetimes with millisecond precision as timeField value", + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_TIMESERIES_DATETIME_ACCEPTANCE_TESTS)) +def test_out_timeseries_datetime_acceptance(collection, test_case: OutTestCase): + """Test $out timeseries accepts datetime boundary values as timeField.""" + populate_collection(collection, test_case) + out_stage = test_case.build_out_stage(collection) + execute_command( + collection, + {"aggregate": collection.name, "pipeline": [out_stage], "cursor": {}}, + ) + result = execute_command( + collection, + { + "find": test_case.target_coll, + "filter": {}, + "projection": {"_id": 0, "ts": 1, "v": 1}, + }, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg) + + +# Property [Timeseries Existing Collection]: writing to an existing time +# series collection succeeds both with matching timeseries options and +# without specifying timeseries options (string and document form). +OUT_TIMESERIES_EXISTING_TESTS: list[OutTestCase] = [ + OutTestCase( + "ts_existing_matching_options", + docs=[{"_id": 1, "ts": datetime(2024, 6, 1), "value": 60}], + target_coll="ts_existing_target", + out_spec={"timeseries": {"timeField": "ts"}}, + setup=lambda c: ( + c.database.drop_collection("ts_existing_target"), + c.database.command({"create": "ts_existing_target", "timeseries": {"timeField": "ts"}}), + ), + expected=[{"ts": datetime(2024, 6, 1, tzinfo=timezone.utc), "value": 60}], + msg=( + "$out should write to an existing time series collection with" + " matching timeseries options" + ), + ), + OutTestCase( + "ts_existing_string_form", + docs=[{"_id": 1, "ts": datetime(2024, 6, 1), "value": 60}], + target_coll="ts_existing_target", + setup=lambda c: ( + c.database.drop_collection("ts_existing_target"), + c.database.command({"create": "ts_existing_target", "timeseries": {"timeField": "ts"}}), + ), + expected=[{"ts": datetime(2024, 6, 1, tzinfo=timezone.utc), "value": 60}], + msg=( + "$out should write to an existing time series collection using" + " string form without timeseries options" + ), + ), + OutTestCase( + "ts_existing_document_form", + docs=[{"_id": 1, "ts": datetime(2024, 6, 1), "value": 60}], + target_coll="ts_existing_target", + out_spec={}, + setup=lambda c: ( + c.database.drop_collection("ts_existing_target"), + c.database.command({"create": "ts_existing_target", "timeseries": {"timeField": "ts"}}), + ), + expected=[{"ts": datetime(2024, 6, 1, tzinfo=timezone.utc), "value": 60}], + msg=( + "$out should write to an existing time series collection using" + " document form without timeseries options" + ), + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_TIMESERIES_EXISTING_TESTS)) +def test_out_timeseries_existing(collection, test_case: OutTestCase): + """Test $out writes to an existing time series collection.""" + populate_collection(collection, test_case) + if test_case.setup: + test_case.setup(collection) + out_stage = test_case.build_out_stage(collection) + execute_command( + collection, + {"aggregate": collection.name, "pipeline": [out_stage], "cursor": {}}, + ) + result = execute_command( + collection, + { + "find": test_case.target_coll, + "filter": {}, + "projection": {"_id": 0, "ts": 1, "value": 1}, + }, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_timeseries_bucket_errors.py b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_timeseries_bucket_errors.py new file mode 100644 index 00000000..23ae91c5 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_timeseries_bucket_errors.py @@ -0,0 +1,735 @@ +"""Tests for $out stage - timeseries granularity/bucket type errors.""" + +from __future__ import annotations + +from datetime import datetime + +import pytest +from bson import ( + Binary, + Code, + Decimal128, + Int64, + MaxKey, + MinKey, + ObjectId, + Regex, + Timestamp, +) + +from documentdb_tests.compatibility.tests.core.operator.stages.out.utils.out_test_helpers import ( + OutTestCase, +) +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + populate_collection, +) +from documentdb_tests.framework.assertions import ( + assertResult, +) +from documentdb_tests.framework.error_codes import ( + TYPE_MISMATCH_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Continuation of timeseries field type errors: granularity, bucketMaxSpanSeconds, +# bucketRoundingSeconds type errors. +OUT_TIMESERIES_BUCKET_TYPE_ERROR_TESTS: list[OutTestCase] = [ + OutTestCase( + "ts_granularity_type_int32", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "granularity": 42}, + } + } + ], + msg="$out should reject int32 as granularity type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_granularity_type_int64", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "granularity": Int64(42)}, + } + } + ], + msg="$out should reject int64 as granularity type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_granularity_type_float", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "granularity": 3.14}, + } + } + ], + msg="$out should reject float as granularity type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_granularity_type_decimal128", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "granularity": Decimal128("99.9")}, + } + } + ], + msg="$out should reject decimal128 as granularity type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_granularity_type_bool", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "granularity": True}, + } + } + ], + msg="$out should reject bool as granularity type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_granularity_type_array_with_object", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "granularity": [{"timeField": "ts"}]}, + } + } + ], + msg="$out should reject array_with_object as granularity type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_granularity_type_binary", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "granularity": Binary(b"\x01")}, + } + } + ], + msg="$out should reject binary as granularity type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_granularity_type_objectid", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": { + "timeField": "ts", + "granularity": ObjectId("507f1f77bcf86cd799439011"), + }, + } + } + ], + msg="$out should reject objectid as granularity type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_granularity_type_datetime", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "granularity": datetime(2024, 1, 1)}, + } + } + ], + msg="$out should reject datetime as granularity type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_granularity_type_regex", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "granularity": Regex("abc")}, + } + } + ], + msg="$out should reject regex as granularity type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_granularity_type_timestamp", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "granularity": Timestamp(1, 1)}, + } + } + ], + msg="$out should reject timestamp as granularity type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_granularity_type_minkey", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "granularity": MinKey()}, + } + } + ], + msg="$out should reject minkey as granularity type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_granularity_type_maxkey", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "granularity": MaxKey()}, + } + } + ], + msg="$out should reject maxkey as granularity type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_granularity_type_code", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "granularity": Code("function() {}")}, + } + } + ], + msg="$out should reject code as granularity type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_granularity_type_object", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "granularity": {"x": 1}}, + } + } + ], + msg="$out should reject object as granularity type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_bucket_max_type_bool", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": True, + "bucketRoundingSeconds": 100, + }, + } + } + ], + msg="$out should reject bool as bucketMaxSpanSeconds type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_bucket_max_type_string", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": "invalid", + "bucketRoundingSeconds": 100, + }, + } + } + ], + msg="$out should reject string as bucketMaxSpanSeconds type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_bucket_max_type_array_with_object", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": [{"timeField": "ts"}], + "bucketRoundingSeconds": 100, + }, + } + } + ], + msg="$out should reject array_with_object as bucketMaxSpanSeconds type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_bucket_max_type_binary", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": Binary(b"\x01"), + "bucketRoundingSeconds": 100, + }, + } + } + ], + msg="$out should reject binary as bucketMaxSpanSeconds type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_bucket_max_type_objectid", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": ObjectId("507f1f77bcf86cd799439011"), + "bucketRoundingSeconds": 100, + }, + } + } + ], + msg="$out should reject objectid as bucketMaxSpanSeconds type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_bucket_max_type_datetime", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": datetime(2024, 1, 1), + "bucketRoundingSeconds": 100, + }, + } + } + ], + msg="$out should reject datetime as bucketMaxSpanSeconds type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_bucket_max_type_regex", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": Regex("abc"), + "bucketRoundingSeconds": 100, + }, + } + } + ], + msg="$out should reject regex as bucketMaxSpanSeconds type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_bucket_max_type_timestamp", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": Timestamp(1, 1), + "bucketRoundingSeconds": 100, + }, + } + } + ], + msg="$out should reject timestamp as bucketMaxSpanSeconds type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_bucket_max_type_minkey", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": MinKey(), + "bucketRoundingSeconds": 100, + }, + } + } + ], + msg="$out should reject minkey as bucketMaxSpanSeconds type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_bucket_max_type_maxkey", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": MaxKey(), + "bucketRoundingSeconds": 100, + }, + } + } + ], + msg="$out should reject maxkey as bucketMaxSpanSeconds type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_bucket_max_type_code", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": Code("function() {}"), + "bucketRoundingSeconds": 100, + }, + } + } + ], + msg="$out should reject code as bucketMaxSpanSeconds type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_bucket_max_type_object", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": {"x": 1}, + "bucketRoundingSeconds": 100, + }, + } + } + ], + msg="$out should reject object as bucketMaxSpanSeconds type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_bucket_round_type_bool", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": { + "timeField": "ts", + "bucketRoundingSeconds": True, + "bucketMaxSpanSeconds": 100, + }, + } + } + ], + msg="$out should reject bool as bucketRoundingSeconds type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_bucket_round_type_string", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": { + "timeField": "ts", + "bucketRoundingSeconds": "invalid", + "bucketMaxSpanSeconds": 100, + }, + } + } + ], + msg="$out should reject string as bucketRoundingSeconds type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_bucket_round_type_array_with_object", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": { + "timeField": "ts", + "bucketRoundingSeconds": [{"timeField": "ts"}], + "bucketMaxSpanSeconds": 100, + }, + } + } + ], + msg="$out should reject array_with_object as bucketRoundingSeconds type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_bucket_round_type_binary", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": { + "timeField": "ts", + "bucketRoundingSeconds": Binary(b"\x01"), + "bucketMaxSpanSeconds": 100, + }, + } + } + ], + msg="$out should reject binary as bucketRoundingSeconds type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_bucket_round_type_objectid", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": { + "timeField": "ts", + "bucketRoundingSeconds": ObjectId("507f1f77bcf86cd799439011"), + "bucketMaxSpanSeconds": 100, + }, + } + } + ], + msg="$out should reject objectid as bucketRoundingSeconds type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_bucket_round_type_datetime", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": { + "timeField": "ts", + "bucketRoundingSeconds": datetime(2024, 1, 1), + "bucketMaxSpanSeconds": 100, + }, + } + } + ], + msg="$out should reject datetime as bucketRoundingSeconds type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_bucket_round_type_regex", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": { + "timeField": "ts", + "bucketRoundingSeconds": Regex("abc"), + "bucketMaxSpanSeconds": 100, + }, + } + } + ], + msg="$out should reject regex as bucketRoundingSeconds type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_bucket_round_type_timestamp", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": { + "timeField": "ts", + "bucketRoundingSeconds": Timestamp(1, 1), + "bucketMaxSpanSeconds": 100, + }, + } + } + ], + msg="$out should reject timestamp as bucketRoundingSeconds type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_bucket_round_type_minkey", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": { + "timeField": "ts", + "bucketRoundingSeconds": MinKey(), + "bucketMaxSpanSeconds": 100, + }, + } + } + ], + msg="$out should reject minkey as bucketRoundingSeconds type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_bucket_round_type_maxkey", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": { + "timeField": "ts", + "bucketRoundingSeconds": MaxKey(), + "bucketMaxSpanSeconds": 100, + }, + } + } + ], + msg="$out should reject maxkey as bucketRoundingSeconds type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_bucket_round_type_code", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": { + "timeField": "ts", + "bucketRoundingSeconds": Code("function() {}"), + "bucketMaxSpanSeconds": 100, + }, + } + } + ], + msg="$out should reject code as bucketRoundingSeconds type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_bucket_round_type_object", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": { + "timeField": "ts", + "bucketRoundingSeconds": {"x": 1}, + "bucketMaxSpanSeconds": 100, + }, + } + } + ], + msg="$out should reject object as bucketRoundingSeconds type", + error_code=TYPE_MISMATCH_ERROR, + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_TIMESERIES_BUCKET_TYPE_ERROR_TESTS)) +def test_out_error(collection, test_case: OutTestCase): + """Test $out rejects invalid configurations with the expected error code.""" + populate_collection(collection, test_case) + pipeline = test_case.pipeline + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": pipeline, "cursor": {}}, + ) + assertResult(result, error_code=test_case.error_code, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_timeseries_field_type_errors.py b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_timeseries_field_type_errors.py new file mode 100644 index 00000000..0818823b --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_timeseries_field_type_errors.py @@ -0,0 +1,574 @@ +"""Tests for $out stage - timeseries field/timeField/metaField type errors.""" + +from __future__ import annotations + +from datetime import datetime + +import pytest +from bson import ( + Binary, + Code, + Decimal128, + Int64, + MaxKey, + MinKey, + ObjectId, + Regex, + Timestamp, +) + +from documentdb_tests.compatibility.tests.core.operator.stages.out.utils.out_test_helpers import ( + OutTestCase, +) +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + populate_collection, +) +from documentdb_tests.framework.assertions import ( + assertResult, +) +from documentdb_tests.framework.error_codes import ( + TYPE_MISMATCH_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Timeseries Field Type Errors]: all timeseries sub-fields reject +# non-accepted types with a type mismatch error - timeseries accepts only +# object, timeField/metaField/granularity accept only string, and +# bucketMaxSpanSeconds/bucketRoundingSeconds accept only numeric types +# (int32, Int64, float, Decimal128). + +OUT_TIMESERIES_FIELD_TYPE_ERROR_TESTS: list[OutTestCase] = [ + OutTestCase( + "ts_field_type_int32", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": "target", "timeseries": 42}}], + msg="$out should reject int32 as timeseries field type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_field_type_int64", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": "target", "timeseries": Int64(42)}}], + msg="$out should reject int64 as timeseries field type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_field_type_float", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": "target", "timeseries": 3.14}}], + msg="$out should reject float as timeseries field type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_field_type_decimal128", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": "target", "timeseries": Decimal128("99.9")}}], + msg="$out should reject decimal128 as timeseries field type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_field_type_bool", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": "target", "timeseries": True}}], + msg="$out should reject bool as timeseries field type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_field_type_string", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": "target", "timeseries": "invalid"}}], + msg="$out should reject string as timeseries field type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_field_type_array_empty", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": "target", "timeseries": []}}], + msg="$out should reject array_empty as timeseries field type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_field_type_array_with_object", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": "target", "timeseries": [{"timeField": "ts"}]}}], + msg="$out should reject array_with_object as timeseries field type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_field_type_binary", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": "target", "timeseries": Binary(b"\x01")}}], + msg="$out should reject binary as timeseries field type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_field_type_objectid", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": ObjectId("507f1f77bcf86cd799439011"), + } + } + ], + msg="$out should reject objectid as timeseries field type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_field_type_datetime", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": "target", "timeseries": datetime(2024, 1, 1)}}], + msg="$out should reject datetime as timeseries field type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_field_type_regex", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": "target", "timeseries": Regex("abc")}}], + msg="$out should reject regex as timeseries field type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_field_type_timestamp", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": "target", "timeseries": Timestamp(1, 1)}}], + msg="$out should reject timestamp as timeseries field type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_field_type_minkey", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": "target", "timeseries": MinKey()}}], + msg="$out should reject minkey as timeseries field type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_field_type_maxkey", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": "target", "timeseries": MaxKey()}}], + msg="$out should reject maxkey as timeseries field type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_field_type_code", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": "target", "timeseries": Code("function() {}")}}], + msg="$out should reject code as timeseries field type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_time_field_type_int32", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": "target", "timeseries": {"timeField": 42}}}], + msg="$out should reject int32 as timeField type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_time_field_type_int64", + docs=[{"_id": 1}], + pipeline=[ + {"$out": {"db": "test", "coll": "target", "timeseries": {"timeField": Int64(42)}}} + ], + msg="$out should reject int64 as timeField type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_time_field_type_float", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": "target", "timeseries": {"timeField": 3.14}}}], + msg="$out should reject float as timeField type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_time_field_type_decimal128", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": Decimal128("99.9")}, + } + } + ], + msg="$out should reject decimal128 as timeField type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_time_field_type_bool", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": "target", "timeseries": {"timeField": True}}}], + msg="$out should reject bool as timeField type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_time_field_type_array_with_object", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": [{"timeField": "ts"}]}, + } + } + ], + msg="$out should reject array_with_object as timeField type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_time_field_type_binary", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": Binary(b"\x01")}, + } + } + ], + msg="$out should reject binary as timeField type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_time_field_type_objectid", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": ObjectId("507f1f77bcf86cd799439011")}, + } + } + ], + msg="$out should reject objectid as timeField type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_time_field_type_datetime", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": datetime(2024, 1, 1)}, + } + } + ], + msg="$out should reject datetime as timeField type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_time_field_type_regex", + docs=[{"_id": 1}], + pipeline=[ + {"$out": {"db": "test", "coll": "target", "timeseries": {"timeField": Regex("abc")}}} + ], + msg="$out should reject regex as timeField type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_time_field_type_timestamp", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": Timestamp(1, 1)}, + } + } + ], + msg="$out should reject timestamp as timeField type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_time_field_type_minkey", + docs=[{"_id": 1}], + pipeline=[ + {"$out": {"db": "test", "coll": "target", "timeseries": {"timeField": MinKey()}}} + ], + msg="$out should reject minkey as timeField type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_time_field_type_maxkey", + docs=[{"_id": 1}], + pipeline=[ + {"$out": {"db": "test", "coll": "target", "timeseries": {"timeField": MaxKey()}}} + ], + msg="$out should reject maxkey as timeField type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_time_field_type_code", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": Code("function() {}")}, + } + } + ], + msg="$out should reject code as timeField type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_time_field_type_object", + docs=[{"_id": 1}], + pipeline=[ + {"$out": {"db": "test", "coll": "target", "timeseries": {"timeField": {"x": 1}}}} + ], + msg="$out should reject object as timeField type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_meta_field_type_int32", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "metaField": 42}, + } + } + ], + msg="$out should reject int32 as metaField type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_meta_field_type_int64", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "metaField": Int64(42)}, + } + } + ], + msg="$out should reject int64 as metaField type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_meta_field_type_float", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "metaField": 3.14}, + } + } + ], + msg="$out should reject float as metaField type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_meta_field_type_decimal128", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "metaField": Decimal128("99.9")}, + } + } + ], + msg="$out should reject decimal128 as metaField type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_meta_field_type_bool", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "metaField": True}, + } + } + ], + msg="$out should reject bool as metaField type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_meta_field_type_array_with_object", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "metaField": [{"timeField": "ts"}]}, + } + } + ], + msg="$out should reject array_with_object as metaField type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_meta_field_type_binary", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "metaField": Binary(b"\x01")}, + } + } + ], + msg="$out should reject binary as metaField type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_meta_field_type_objectid", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": { + "timeField": "ts", + "metaField": ObjectId("507f1f77bcf86cd799439011"), + }, + } + } + ], + msg="$out should reject objectid as metaField type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_meta_field_type_datetime", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "metaField": datetime(2024, 1, 1)}, + } + } + ], + msg="$out should reject datetime as metaField type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_meta_field_type_regex", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "metaField": Regex("abc")}, + } + } + ], + msg="$out should reject regex as metaField type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_meta_field_type_timestamp", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "metaField": Timestamp(1, 1)}, + } + } + ], + msg="$out should reject timestamp as metaField type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_meta_field_type_minkey", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "metaField": MinKey()}, + } + } + ], + msg="$out should reject minkey as metaField type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_meta_field_type_maxkey", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "metaField": MaxKey()}, + } + } + ], + msg="$out should reject maxkey as metaField type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_meta_field_type_code", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "metaField": Code("function() {}")}, + } + } + ], + msg="$out should reject code as metaField type", + error_code=TYPE_MISMATCH_ERROR, + ), + OutTestCase( + "ts_meta_field_type_object", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "metaField": {"x": 1}}, + } + } + ], + msg="$out should reject object as metaField type", + error_code=TYPE_MISMATCH_ERROR, + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_TIMESERIES_FIELD_TYPE_ERROR_TESTS)) +def test_out_error(collection, test_case: OutTestCase): + """Test $out rejects invalid configurations with the expected error code.""" + populate_collection(collection, test_case) + pipeline = test_case.pipeline + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": pipeline, "cursor": {}}, + ) + assertResult(result, error_code=test_case.error_code, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_timeseries_value_errors.py b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_timeseries_value_errors.py new file mode 100644 index 00000000..1ebe783c --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_timeseries_value_errors.py @@ -0,0 +1,665 @@ +"""Tests for $out stage - timeseries value/range errors.""" + +from __future__ import annotations + +from datetime import datetime + +import pytest +from bson import Decimal128, Int64 + +from documentdb_tests.compatibility.tests.core.operator.stages.out.utils.out_test_helpers import ( + OutTestCase, +) +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + populate_collection, +) +from documentdb_tests.framework.assertions import ( + assertResult, +) +from documentdb_tests.framework.error_codes import ( + BAD_VALUE_ERROR, + INVALID_OPTIONS_ERROR, + MISSING_FIELD_ERROR, + UNRECOGNIZED_COMMAND_FIELD_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import ( + DECIMAL128_HALF, + DECIMAL128_INFINITY, + DECIMAL128_NAN, + DECIMAL128_NEGATIVE_INFINITY, + DECIMAL128_NEGATIVE_ONE_AND_HALF, + DECIMAL128_NEGATIVE_ZERO, + DOUBLE_MAX, + DOUBLE_MAX_SAFE_INTEGER, + DOUBLE_MIN_SUBNORMAL, + DOUBLE_NEGATIVE_ZERO, + FLOAT_INFINITY, + FLOAT_NAN, + FLOAT_NEGATIVE_INFINITY, + INT32_MAX, + INT32_MIN, + INT32_OVERFLOW, + INT32_UNDERFLOW, +) + +# Property [Bucket Param Range Validation]: bucket parameter values outside +# the valid range (1 to 31536000) after numeric coercion to int32 produce +# error code 2. +OUT_BUCKET_PARAM_RANGE_ERROR_TESTS: list[OutTestCase] = [ + OutTestCase( + "bucket_zero", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_err_zero", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": 0, + "bucketRoundingSeconds": 0, + } + }, + msg="$out should reject bucket parameter value 0 (below minimum)", + error_code=BAD_VALUE_ERROR, + ), + OutTestCase( + "bucket_negative", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_err_neg", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": -1, + "bucketRoundingSeconds": -1, + } + }, + msg="$out should reject negative bucket parameter values", + error_code=BAD_VALUE_ERROR, + ), + OutTestCase( + "bucket_above_max", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_err_above", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": 31_536_001, + "bucketRoundingSeconds": 31_536_001, + } + }, + msg="$out should reject bucket parameter values above 31536000", + error_code=BAD_VALUE_ERROR, + ), + OutTestCase( + "bucket_float_truncates_to_zero", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_err_f_zero", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": 0.5, + "bucketRoundingSeconds": 0.5, + } + }, + msg="$out should reject float 0.5 (truncates to 0, below minimum)", + error_code=BAD_VALUE_ERROR, + ), + OutTestCase( + "bucket_float_negative_truncation", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_err_f_neg", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": -1.5, + "bucketRoundingSeconds": -1.5, + } + }, + msg="$out should reject float -1.5 (truncates to -1, below minimum)", + error_code=BAD_VALUE_ERROR, + ), + OutTestCase( + "bucket_float_nan", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_err_f_nan", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": FLOAT_NAN, + "bucketRoundingSeconds": FLOAT_NAN, + } + }, + msg="$out should reject float NaN (converts to 0, below minimum)", + error_code=BAD_VALUE_ERROR, + ), + OutTestCase( + "bucket_float_neg_zero", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_err_f_nz", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": DOUBLE_NEGATIVE_ZERO, + "bucketRoundingSeconds": DOUBLE_NEGATIVE_ZERO, + } + }, + msg="$out should reject float negative zero (converts to 0, below minimum)", + error_code=BAD_VALUE_ERROR, + ), + OutTestCase( + "bucket_float_inf", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_err_f_inf", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": FLOAT_INFINITY, + "bucketRoundingSeconds": FLOAT_INFINITY, + } + }, + msg="$out should reject float +Infinity (clamps to int32 max, above max range)", + error_code=BAD_VALUE_ERROR, + ), + OutTestCase( + "bucket_float_neg_inf", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_err_f_ninf", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": FLOAT_NEGATIVE_INFINITY, + "bucketRoundingSeconds": FLOAT_NEGATIVE_INFINITY, + } + }, + msg="$out should reject float -Infinity (clamps to int32 min, below minimum)", + error_code=BAD_VALUE_ERROR, + ), + OutTestCase( + "bucket_float_subnormal", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_err_f_sub", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": DOUBLE_MIN_SUBNORMAL, + "bucketRoundingSeconds": DOUBLE_MIN_SUBNORMAL, + } + }, + msg="$out should reject float subnormal (truncates to 0, below minimum)", + error_code=BAD_VALUE_ERROR, + ), + OutTestCase( + "bucket_decimal128_neg_rounds", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_err_d_neg", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": DECIMAL128_NEGATIVE_ONE_AND_HALF, + "bucketRoundingSeconds": DECIMAL128_NEGATIVE_ONE_AND_HALF, + } + }, + msg="$out should reject Decimal128 -1.5 (rounds to -2, below minimum)", + error_code=BAD_VALUE_ERROR, + ), + OutTestCase( + "bucket_decimal128_half_to_zero", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_err_d_half", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": DECIMAL128_HALF, + "bucketRoundingSeconds": DECIMAL128_HALF, + } + }, + msg="$out should reject Decimal128 0.5 (banker's rounds to 0, below minimum)", + error_code=BAD_VALUE_ERROR, + ), + OutTestCase( + "bucket_decimal128_nan", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_err_d_nan", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": DECIMAL128_NAN, + "bucketRoundingSeconds": DECIMAL128_NAN, + } + }, + msg="$out should reject Decimal128 NaN (converts to 0, below minimum)", + error_code=BAD_VALUE_ERROR, + ), + OutTestCase( + "bucket_decimal128_inf", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_err_d_inf", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": DECIMAL128_INFINITY, + "bucketRoundingSeconds": DECIMAL128_INFINITY, + } + }, + msg="$out should reject Decimal128 +Infinity (clamps to int32 max, above max range)", + error_code=BAD_VALUE_ERROR, + ), + OutTestCase( + "bucket_decimal128_neg_inf", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_err_d_ninf", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": DECIMAL128_NEGATIVE_INFINITY, + "bucketRoundingSeconds": DECIMAL128_NEGATIVE_INFINITY, + } + }, + msg="$out should reject Decimal128 -Infinity (clamps to int32 min, below minimum)", + error_code=BAD_VALUE_ERROR, + ), + OutTestCase( + "bucket_decimal128_neg_zero", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_err_d_nz", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": DECIMAL128_NEGATIVE_ZERO, + "bucketRoundingSeconds": DECIMAL128_NEGATIVE_ZERO, + } + }, + msg="$out should reject Decimal128 -0 (converts to 0, below minimum)", + error_code=BAD_VALUE_ERROR, + ), + OutTestCase( + "bucket_int64_above_int32_max", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_err_i64_hi", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": Int64(INT32_OVERFLOW), + "bucketRoundingSeconds": Int64(INT32_OVERFLOW), + } + }, + msg="$out should reject Int64 above int32 max (clamps to int32 max, above max range)", + error_code=BAD_VALUE_ERROR, + ), + OutTestCase( + "bucket_int64_int32_min", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_err_i64_lo", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": Int64(INT32_MIN), + "bucketRoundingSeconds": Int64(INT32_MIN), + } + }, + msg="$out should reject Int64 at int32 min (below minimum)", + error_code=BAD_VALUE_ERROR, + ), + OutTestCase( + "bucket_int64_below_int32_min", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_err_i64_uf", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": Int64(INT32_UNDERFLOW), + "bucketRoundingSeconds": Int64(INT32_UNDERFLOW), + } + }, + msg="$out should reject Int64 below int32 min (clamps to int32 min, below minimum)", + error_code=BAD_VALUE_ERROR, + ), + OutTestCase( + "bucket_float_max_safe_int", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_err_f_msi", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": DOUBLE_MAX_SAFE_INTEGER, + "bucketRoundingSeconds": DOUBLE_MAX_SAFE_INTEGER, + } + }, + msg="$out should reject float max safe integer (clamps to int32 max, above max range)", + error_code=BAD_VALUE_ERROR, + ), + OutTestCase( + "bucket_float_dbl_max", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_err_dblmax", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": DOUBLE_MAX, + "bucketRoundingSeconds": DOUBLE_MAX, + } + }, + msg="$out should reject float DBL_MAX (clamps to int32 max, above max range)", + error_code=BAD_VALUE_ERROR, + ), + OutTestCase( + "bucket_decimal128_large", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_err_d_lg", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": Decimal128("1E+100"), + "bucketRoundingSeconds": Decimal128("1E+100"), + } + }, + msg="$out should reject Decimal128 1E+100 (clamps to int32 max, above max range)", + error_code=BAD_VALUE_ERROR, + ), + OutTestCase( + "bucket_int32_max", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_err_i32max", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": INT32_MAX, + "bucketRoundingSeconds": INT32_MAX, + } + }, + msg="$out should reject int32 max (above max range 31536000)", + error_code=BAD_VALUE_ERROR, + ), + OutTestCase( + "bucket_int32_min", + docs=[{"_id": 1, "ts": datetime(2024, 1, 1), "v": 1}], + target_coll="bucket_err_i32min", + out_spec={ + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": INT32_MIN, + "bucketRoundingSeconds": INT32_MIN, + } + }, + msg="$out should reject int32 min (below minimum)", + error_code=BAD_VALUE_ERROR, + ), +] + +# Property [Null as Missing (Errors)]: null values for db, coll, and +# timeField are treated as missing rather than as type errors, and a null +# bucket parameter paired with a valid one produces an incomplete-pair + +# Property [Timeseries Missing and Unknown Field Errors]: missing timeField +# inside the timeseries document produces a missing key error, and unknown +# fields inside the timeseries document produce an unrecognized field error. +OUT_TIMESERIES_MISSING_UNKNOWN_FIELD_ERROR_TESTS: list[OutTestCase] = [ + OutTestCase( + "ts_missing_time_field_empty_ts", + docs=[{"_id": 1}], + pipeline=[{"$out": {"db": "test", "coll": "target", "timeseries": {}}}], + msg="$out should reject an empty timeseries document (missing timeField)", + error_code=MISSING_FIELD_ERROR, + ), + OutTestCase( + "ts_missing_time_field_with_meta", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"metaField": "m"}, + } + } + ], + msg="$out should reject timeseries with metaField but missing timeField", + error_code=MISSING_FIELD_ERROR, + ), + OutTestCase( + "ts_unknown_field", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "extra": "x"}, + } + } + ], + msg="$out should reject unknown field inside timeseries document", + error_code=UNRECOGNIZED_COMMAND_FIELD_ERROR, + ), + OutTestCase( + "ts_unknown_field_case_sensitive", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "TimeField": "ts2"}, + } + } + ], + msg="$out should reject case-variant field name inside timeseries as unknown", + error_code=UNRECOGNIZED_COMMAND_FIELD_ERROR, + ), +] + +# Property [Timeseries Granularity Errors]: invalid granularity strings +# produce error code 2 because validation is case-sensitive and only +# "seconds", "minutes", and "hours" are accepted. +OUT_TIMESERIES_GRANULARITY_ERROR_TESTS: list[OutTestCase] = [ + OutTestCase( + "granularity_capitalized", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "granularity": "Seconds"}, + } + } + ], + msg="$out should reject capitalized 'Seconds' as an invalid granularity", + error_code=BAD_VALUE_ERROR, + ), + OutTestCase( + "granularity_all_caps", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "granularity": "HOURS"}, + } + } + ], + msg="$out should reject all-caps 'HOURS' as an invalid granularity", + error_code=BAD_VALUE_ERROR, + ), + OutTestCase( + "granularity_empty_string", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "granularity": ""}, + } + } + ], + msg="$out should reject empty string as an invalid granularity", + error_code=BAD_VALUE_ERROR, + ), + OutTestCase( + "granularity_arbitrary_string", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "granularity": "invalid"}, + } + } + ], + msg="$out should reject an arbitrary string as an invalid granularity", + error_code=BAD_VALUE_ERROR, + ), + OutTestCase( + "granularity_singular_form", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": {"timeField": "ts", "granularity": "second"}, + } + } + ], + msg="$out should reject singular form 'second' as an invalid granularity", + error_code=BAD_VALUE_ERROR, + ), +] + +# Property [Bucket Param Pairing Errors]: bucketMaxSpanSeconds and +# bucketRoundingSeconds must be specified together, must be equal, and +# cannot be combined with granularity. +OUT_BUCKET_PARAM_PAIRING_ERROR_TESTS: list[OutTestCase] = [ + OutTestCase( + "bucket_max_without_rounding", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": 100, + }, + } + } + ], + msg="$out should reject bucketMaxSpanSeconds without bucketRoundingSeconds", + error_code=INVALID_OPTIONS_ERROR, + ), + OutTestCase( + "bucket_rounding_without_max", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": { + "timeField": "ts", + "bucketRoundingSeconds": 100, + }, + } + } + ], + msg="$out should reject bucketRoundingSeconds without bucketMaxSpanSeconds", + error_code=INVALID_OPTIONS_ERROR, + ), + OutTestCase( + "bucket_params_not_equal", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": { + "timeField": "ts", + "bucketMaxSpanSeconds": 100, + "bucketRoundingSeconds": 200, + }, + } + } + ], + msg="$out should reject unequal bucketMaxSpanSeconds and bucketRoundingSeconds", + error_code=INVALID_OPTIONS_ERROR, + ), + OutTestCase( + "granularity_with_bucket_params", + docs=[{"_id": 1}], + pipeline=[ + { + "$out": { + "db": "test", + "coll": "target", + "timeseries": { + "timeField": "ts", + "granularity": "seconds", + "bucketMaxSpanSeconds": 100, + "bucketRoundingSeconds": 100, + }, + } + } + ], + msg="$out should reject granularity combined with bucket parameters", + error_code=INVALID_OPTIONS_ERROR, + ), +] + +# Property [Timeseries Document Errors]: $out fails with error code 2 +# when writing a document whose timeField value is not a valid datetime or +# when the timeField is missing entirely. +OUT_TIMESERIES_DOCUMENT_ERROR_TESTS: list[OutTestCase] = [ + OutTestCase( + "ts_doc_non_date_time_field", + docs=[{"_id": 1, "ts": "not_a_date", "v": 1}], + target_coll="ts_doc_err_nondate", + out_spec={"timeseries": {"timeField": "ts"}}, + msg=( + "$out should fail when writing a document with a non-date" + " value in the timeField to a timeseries collection" + ), + error_code=BAD_VALUE_ERROR, + ), + OutTestCase( + "ts_doc_missing_time_field", + docs=[{"_id": 1, "v": 1}], + target_coll="ts_doc_err_missing", + out_spec={"timeseries": {"timeField": "ts"}}, + msg=( + "$out should fail when writing a document missing the" + " timeField entirely to a timeseries collection" + ), + error_code=BAD_VALUE_ERROR, + ), +] + + +OUT_TIMESERIES_VALUE_ERROR_TESTS = ( + OUT_BUCKET_PARAM_RANGE_ERROR_TESTS + + OUT_TIMESERIES_MISSING_UNKNOWN_FIELD_ERROR_TESTS + + OUT_TIMESERIES_GRANULARITY_ERROR_TESTS + + OUT_BUCKET_PARAM_PAIRING_ERROR_TESTS + + OUT_TIMESERIES_DOCUMENT_ERROR_TESTS +) + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_TIMESERIES_VALUE_ERROR_TESTS)) +def test_out_error(collection, test_case: OutTestCase): + """Test $out rejects invalid configurations with the expected error code.""" + populate_collection(collection, test_case) + if test_case.pipeline: + pipeline = test_case.pipeline + else: + pipeline = [test_case.build_out_stage(collection)] + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": pipeline, "cursor": {}}, + ) + + assertResult(result, error_code=test_case.error_code, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_write_behavior.py b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_write_behavior.py new file mode 100644 index 00000000..c22e981e --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_write_behavior.py @@ -0,0 +1,437 @@ +"""Tests for $out stage - write behavior.""" + +from __future__ import annotations + +import threading +from typing import cast + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.stages.out.utils.out_test_helpers import ( + OutTestCase, +) +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + populate_collection, +) +from documentdb_tests.framework.assertions import ( + assertSuccess, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Database Name Acceptance]: any non-empty string of non-null +# bytes that does not contain a slash, backslash, dot, ASCII space, or dollar +# prefix is accepted as a database name. +OUT_DATABASE_NAME_ACCEPTANCE_TESTS: list[OutTestCase] = [ + OutTestCase( + "db_control_character", + docs=[{"_id": 1}], + target_coll="target", + target_db="\x01", + msg="$out should accept a control character as a database name", + ), + OutTestCase( + "db_unicode_no_break_space", + docs=[{"_id": 1}], + target_coll="target", + target_db="\u00a0", + msg="$out should accept Unicode no-break space as a database name", + ), + OutTestCase( + "db_zero_width_space", + docs=[{"_id": 1}], + target_coll="target", + target_db="\u200b", + msg="$out should accept zero-width space as a database name", + ), + OutTestCase( + "db_emoji", + docs=[{"_id": 1}], + target_coll="target", + target_db="\U0001f389", + msg="$out should accept emoji as a database name", + ), + OutTestCase( + "db_cjk_characters", + docs=[{"_id": 1}], + target_coll="target", + target_db="\u4e2d\u6587", + msg="$out should accept CJK characters as a database name", + ), + OutTestCase( + "db_punctuation", + docs=[{"_id": 1}], + target_coll="target", + target_db="a!@#b", + msg="$out should accept punctuation in a database name", + ), + OutTestCase( + "db_single_character", + docs=[{"_id": 1}], + target_coll="target", + target_db="a", + msg="$out should accept a single-character database name", + ), + OutTestCase( + "db_digits_only", + docs=[{"_id": 1}], + target_coll="target", + target_db="123", + msg="$out should accept a digits-only database name", + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_DATABASE_NAME_ACCEPTANCE_TESTS)) +def test_out_database_name_acceptance(collection, test_case: OutTestCase): + """Test $out accepts various character classes as database names.""" + populate_collection(collection, test_case) + db_name = test_case.target_db # type: ignore[arg-type] + client = collection.database.client + client.drop_database(db_name) + try: + out_stage = test_case.build_out_stage(collection) + execute_command( + collection, + {"aggregate": collection.name, "pipeline": [out_stage], "cursor": {}}, + ) + target_db = client[db_name] + result = execute_command( + target_db[test_case.target_coll], + {"listCollections": 1, "filter": {"name": test_case.target_coll}}, + ) + raw_doc = cast(dict, result)["cursor"]["firstBatch"][0] + assertSuccess( + result, + [ + { + "name": test_case.target_coll, + "type": "collection", + "options": {}, + "info": raw_doc["info"], + "idIndex": raw_doc["idIndex"], + } + ], + msg=test_case.msg, + ) + finally: + client.drop_database(db_name) + + +# Property [Collection Creation]: $out creates a new collection when the +# target does not exist, and an empty pipeline result creates an empty +# collection or empties an existing one. +# +# Property [Collection Replacement - Atomic Replace]: an existing collection +# is atomically replaced with the new pipeline results upon $out completion. +# +# Property [Collection Replacement - Failure Rollback]: if the aggregation +# fails during $out, the pre-existing collection and its documents are +# unchanged. +OUT_FIND_AFTER_OUT_TESTS: list[OutTestCase] = [ + OutTestCase( + "new_collection_created", + docs=[{"_id": 1, "value": 10}, {"_id": 2, "value": 20}], + target_coll="creation_new_target", + pipeline=[{"$out": "creation_new_target"}], + expected=[{"_id": 1, "value": 10}, {"_id": 2, "value": 20}], + msg="$out should create a new collection when the target does not exist", + ), + OutTestCase( + "empty_pipeline_empties_existing_collection", + docs=[], + target_coll="creation_emptied_target", + pipeline=[{"$out": "creation_emptied_target"}], + setup=lambda c: c.database["creation_emptied_target"].insert_one({"_id": 99, "old": True}), + expected=[], + msg="$out with no documents should empty an existing collection", + ), + OutTestCase( + "replacement_atomic", + docs=[{"_id": 10, "new": True}, {"_id": 20, "new": True}], + target_coll="replacement_atomic_target", + pipeline=[{"$out": "replacement_atomic_target"}], + setup=lambda c: c.database["replacement_atomic_target"].insert_many( + [{"_id": 1, "old": True}, {"_id": 2, "old": True}] + ), + expected=[{"_id": 10, "new": True}, {"_id": 20, "new": True}], + msg="$out should replace existing documents with new pipeline results", + ), + OutTestCase( + "failure_rollback_docs", + docs=[{"_id": 10, "x": 1}, {"_id": 20, "x": 1}], + target_coll="replacement_fail_target", + pipeline=[{"$out": "replacement_fail_target"}], + setup=lambda c: ( + c.database["replacement_fail_target"].insert_many( + [{"_id": 1, "x": 1}, {"_id": 2, "x": 2}] + ), + c.database["replacement_fail_target"].create_index("x", unique=True), + ), + expected=[{"_id": 1, "x": 1}, {"_id": 2, "x": 2}], + msg="$out failure should leave pre-existing documents unchanged", + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_FIND_AFTER_OUT_TESTS)) +def test_out_find_after_out(collection, test_case: OutTestCase): + """Test $out write behavior verified via find on the target collection.""" + populate_collection(collection, test_case) + if test_case.setup: + test_case.setup(collection) + execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + result = execute_command( + collection, + {"find": test_case.target_coll, "filter": {}, "sort": {"_id": 1}}, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg) + + +@pytest.mark.aggregate +def test_out_empty_pipeline_creates_collection(collection): + """Test $out with no documents creates an empty collection.""" + target_coll = "creation_empty_target" + execute_command( + collection, + {"aggregate": collection.name, "pipeline": [{"$out": target_coll}], "cursor": {}}, + ) + # Use listCollections because find on a non-existent collection also + # returns empty, which would make this test pass even without creation. + result = execute_command( + collection, + {"listCollections": 1, "filter": {"name": target_coll}, "nameOnly": True}, + ) + assertSuccess( + result, + [{"name": target_coll, "type": "collection"}], + msg="$out with no documents should create an empty collection", + ) + + +# Property [Database Creation]: $out creates a new database when the output +# database does not exist. + + +@pytest.mark.aggregate +def test_out_database_creation(collection): + """Test $out creates a new database when the output database does not exist.""" + collection.insert_many([{"_id": 1, "value": 10}]) + db = collection.database + client = db.client + cross_db_name = db.name + "_cross" + target_coll_name = "creation_cross_db_target" + client.drop_database(cross_db_name) + try: + execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [{"$out": {"db": cross_db_name, "coll": target_coll_name}}], + "cursor": {}, + }, + ) + target_coll = client[cross_db_name][target_coll_name] + result = execute_command( + target_coll, + {"find": target_coll_name, "filter": {}}, + ) + assertSuccess( + result, + [{"_id": 1, "value": 10}], + msg="$out should create a new database when the output database does not exist", + ) + finally: + client.drop_database(cross_db_name) + + +# Property [Collection Replacement - Self-Replacement]: writing to the same +# collection as the input succeeds and the collection contains the transformed +# results. +OUT_REPLACEMENT_SELF_TESTS: list[OutTestCase] = [ + OutTestCase( + "replacement_self", + docs=[{"_id": 1, "value": 10}, {"_id": 2, "value": 20}], + expected=[ + {"_id": 1, "value": 10, "doubled": 20}, + {"_id": 2, "value": 20, "doubled": 40}, + ], + msg="$out self-replacement should contain transformed results", + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_REPLACEMENT_SELF_TESTS)) +def test_out_replacement_self(collection, test_case: OutTestCase): + """Test $out self-replacement writes transformed results back to the source.""" + populate_collection(collection, test_case) + pipeline = [ + {"$addFields": {"doubled": {"$multiply": ["$value", 2]}}}, + {"$out": collection.name}, + ] + execute_command( + collection, + {"aggregate": collection.name, "pipeline": pipeline, "cursor": {}}, + ) + result = execute_command( + collection, {"find": collection.name, "filter": {}, "sort": {"_id": 1}} + ) + assertSuccess(result, test_case.expected, msg=test_case.msg) + + +# Property [Collection Replacement - Index Preservation]: indexes from the +# previous collection are preserved after $out replaces its contents. +# +# Property [Collection Replacement - Failure Rollback]: if the aggregation +# fails during $out, the pre-existing collection and its indexes are unchanged. +OUT_INDEX_AFTER_OUT_TESTS: list[OutTestCase] = [ + OutTestCase( + "replacement_preserves_indexes", + docs=[{"_id": 10, "x": 100}, {"_id": 20, "x": 200}], + target_coll="replacement_idx_target", + pipeline=[{"$out": "replacement_idx_target"}], + setup=lambda c: ( + c.database["replacement_idx_target"].insert_one({"_id": 1, "x": 1}), + c.database["replacement_idx_target"].create_index("x", name="x_idx", unique=True), + ), + expected=[ + {"v": 2, "key": {"_id": 1}, "name": "_id_"}, + {"v": 2, "key": {"x": 1}, "name": "x_idx", "unique": True}, + ], + msg="$out should preserve indexes from the previous collection", + ), + OutTestCase( + "failure_rollback_indexes", + docs=[{"_id": 10, "x": 1}, {"_id": 20, "x": 1}], + target_coll="replacement_fail_target", + pipeline=[{"$out": "replacement_fail_target"}], + setup=lambda c: ( + c.database["replacement_fail_target"].insert_many( + [{"_id": 1, "x": 1}, {"_id": 2, "x": 2}] + ), + c.database["replacement_fail_target"].create_index("x", unique=True), + ), + expected=[ + {"v": 2, "key": {"_id": 1}, "name": "_id_"}, + {"v": 2, "key": {"x": 1}, "name": "x_1", "unique": True}, + ], + msg="$out failure should leave pre-existing indexes unchanged", + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_INDEX_AFTER_OUT_TESTS)) +def test_out_index_after_out(collection, test_case: OutTestCase): + """Test $out index behavior verified via listIndexes on the target collection.""" + populate_collection(collection, test_case) + if test_case.setup: + test_case.setup(collection) + execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + result = execute_command( + collection, + {"listIndexes": test_case.target_coll}, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_doc_order=True) + + +# Property [Temporary Collection]: $out uses a temporary collection during +# execution and cleans it up after completion. + + +@pytest.mark.aggregate +def test_out_temp_collection_observed(collection): + """Test $out uses a temporary collection during execution.""" + collection.insert_many([{"_id": i, "value": i} for i in range(10_000)]) + db = collection.database + + found_tmp: list[str] = [] + stop = threading.Event() + + def poll_collections() -> None: + while not stop.is_set(): + try: + names = db.list_collection_names() + for name in names: + if name.startswith("tmp.agg_out."): + found_tmp.append(name) + return + except Exception: + pass + + t = threading.Thread(target=poll_collections, daemon=True) + t.start() + + execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [{"$out": "creation_temp_target"}], + "cursor": {}, + }, + ) + + stop.set() + t.join(timeout=5) + + assertSuccess( + len(found_tmp) > 0, + True, + raw_res=True, + msg="$out should use a temp collection during execution", + ) + + +@pytest.mark.aggregate +def test_out_temp_collection_cleaned_up(collection): + """Test $out cleans up the temporary collection after completion.""" + collection.insert_many([{"_id": i, "value": i} for i in range(10_000)]) + db = collection.database + + found_tmp: list[str] = [] + stop = threading.Event() + + def poll_collections() -> None: + while not stop.is_set(): + try: + names = db.list_collection_names() + for name in names: + if name.startswith("tmp.agg_out."): + found_tmp.append(name) + return + except Exception: + pass + + t = threading.Thread(target=poll_collections, daemon=True) + t.start() + + execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [{"$out": "creation_temp_target"}], + "cursor": {}, + }, + ) + + stop.set() + t.join(timeout=5) + + result = execute_command( + collection, + {"listCollections": 1, "filter": {"name": {"$regex": "^tmp\\.agg_out\\."}}}, + ) + assertSuccess( + result, + [], + msg="$out should clean up temp collection after completion", + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_write_properties.py b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_write_properties.py new file mode 100644 index 00000000..4ce39382 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_write_properties.py @@ -0,0 +1,539 @@ +"""Tests for $out stage - individual write properties.""" + +from __future__ import annotations + +from datetime import datetime +from typing import cast + +import pytest +from bson import ( + Binary, + Code, + Decimal128, + Int64, + MaxKey, + MinKey, + ObjectId, + Regex, + Timestamp, +) + +from documentdb_tests.compatibility.tests.core.operator.stages.out.utils.out_test_helpers import ( + OutTestCase, +) +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + populate_collection, +) +from documentdb_tests.framework.assertions import ( + assertSuccess, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Write Behavior - Auto-Generated _id]: documents with _id removed +# via a pipeline stage receive auto-generated ObjectId _id values in the +# output collection. +OUT_AUTO_GENERATED_ID_TESTS: list[OutTestCase] = [ + OutTestCase( + "auto_id", + docs=[{"_id": 1, "value": 10}, {"_id": 2, "value": 20}], + target_coll="write_auto_id_target", + pipeline=[{"$unset": "_id"}, {"$out": "write_auto_id_target"}], + expected=2, + msg="$out should auto-generate ObjectId _id when _id is removed", + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_AUTO_GENERATED_ID_TESTS)) +def test_out_auto_generated_id(collection, test_case: OutTestCase): + """Test $out auto-generates ObjectId _id when _id is removed.""" + populate_collection(collection, test_case) + execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + # Filter by _id type to confirm auto-generated ObjectIds. + result = execute_command( + collection, + { + "aggregate": test_case.target_coll, + "pipeline": [ + {"$match": {"_id": {"$type": "objectId"}}}, + {"$count": "n"}, + ], + "cursor": {}, + }, + ) + assertSuccess(result, [{"n": test_case.expected}], msg=test_case.msg) + + +# Property [Write Behavior - Empty Cursor]: the aggregation cursor returned +# by a pipeline ending with $out contains an empty result list. +OUT_EMPTY_CURSOR_TESTS: list[OutTestCase] = [ + OutTestCase( + "empty_cursor", + docs=[{"_id": 1, "value": 10}], + target_coll="write_cursor_target", + pipeline=[{"$out": "write_cursor_target"}], + expected=[], + msg="$out aggregation cursor should return an empty result list", + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_EMPTY_CURSOR_TESTS)) +def test_out_empty_cursor(collection, test_case: OutTestCase): + """Test $out returns an empty cursor result.""" + populate_collection(collection, test_case) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg) + + +# Property [Write Behavior - Explain No Write]: explain does not perform the +# write - the target collection is not created or modified. +OUT_EXPLAIN_NO_WRITE_TESTS: list[OutTestCase] = [ + OutTestCase( + "explain_no_write", + docs=[{"_id": 1, "value": 10}], + target_coll="write_explain_target", + pipeline=[{"$out": "write_explain_target"}], + expected=[], + msg="explain with $out should not create the target collection", + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_EXPLAIN_NO_WRITE_TESTS)) +def test_out_explain_no_write(collection, test_case: OutTestCase): + """Test explain with $out does not create or modify the target collection.""" + populate_collection(collection, test_case) + execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + "explain": True, + }, + ) + result = execute_command( + collection, + {"listCollections": 1, "filter": {"name": test_case.target_coll}}, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg) + + +OUT_EXPLAIN_NO_MODIFY_TESTS: list[OutTestCase] = [ + OutTestCase( + "explain_no_modify", + docs=[{"_id": 10, "new": True}], + target_coll="write_explain_existing_target", + pipeline=[{"$out": "write_explain_existing_target"}], + setup=lambda c: c.database["write_explain_existing_target"].insert_many( + [{"_id": 1, "old": True}, {"_id": 2, "old": True}] + ), + expected=[{"_id": 1, "old": True}, {"_id": 2, "old": True}], + msg="explain with $out should not modify existing target collection", + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_EXPLAIN_NO_MODIFY_TESTS)) +def test_out_explain_no_modify(collection, test_case: OutTestCase): + """Test explain with $out does not modify an existing target collection.""" + populate_collection(collection, test_case) + if test_case.setup: + test_case.setup(collection) + execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + "explain": True, + }, + ) + result = execute_command( + collection, + {"find": test_case.target_coll, "filter": {}, "sort": {"_id": 1}}, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg) + + +# Property [Write Behavior - Idempotent]: running the same $out pipeline to +# the same target twice produces the same result in the target collection. +OUT_IDEMPOTENT_TESTS: list[OutTestCase] = [ + OutTestCase( + "idempotent", + docs=[{"_id": 1, "value": 10}, {"_id": 2, "value": 20}], + target_coll="write_idempotent_target", + pipeline=[{"$out": "write_idempotent_target"}], + expected=[{"_id": 1, "value": 10}, {"_id": 2, "value": 20}], + msg="$out should produce the same result when run twice to the same target", + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_IDEMPOTENT_TESTS)) +def test_out_idempotent(collection, test_case: OutTestCase): + """Test $out is idempotent when run twice to the same target.""" + populate_collection(collection, test_case) + execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + result = execute_command( + collection, + {"find": test_case.target_coll, "filter": {}, "sort": {"_id": 1}}, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg) + + +# Property [Write Behavior - BSON Round-Trip]: all BSON types representable +# by pymongo round-trip through $out without modification. +OUT_BSON_ROUND_TRIP_TESTS: list[OutTestCase] = [ + OutTestCase( + "bson_round_trip", + docs=[ + { + "_id": 1, + "double_val": 3.14, + "string_val": "hello", + "object_val": {"nested": True}, + "array_val": [1, 2, 3], + "binary_val": Binary(b"\x01\x02\x03"), + "objectid_val": ObjectId("507f1f77bcf86cd799439011"), + "bool_val": True, + "date_val": datetime(2024, 1, 1), + "null_val": None, + "regex_val": Regex("abc", "i"), + "int32_val": 42, + "timestamp_val": Timestamp(1_234_567_890, 1), + "int64_val": Int64(9_876_543_210), + "decimal128_val": Decimal128("123.456"), + "minkey_val": MinKey(), + "maxkey_val": MaxKey(), + "code_val": Code("function() {}"), + } + ], + target_coll="write_bson_target", + pipeline=[{"$out": "write_bson_target"}], + msg="all BSON types should round-trip through $out without modification", + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_BSON_ROUND_TRIP_TESTS)) +def test_out_bson_round_trip(collection, test_case: OutTestCase): + """Test all BSON types round-trip through $out without modification.""" + populate_collection(collection, test_case) + execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + source_result = execute_command( + collection, + {"find": collection.name, "filter": {}}, + ) + target_result = execute_command( + collection, + {"find": test_case.target_coll, "filter": {}}, + ) + assertSuccess( + target_result, + cast(dict, source_result)["cursor"]["firstBatch"], + msg=test_case.msg, + ) + + +# Property [Write Behavior - Large Documents]: documents up to 15 MB are +# written successfully through $out. +OUT_LARGE_DOCUMENT_TESTS: list[OutTestCase] = [ + OutTestCase( + "large_doc", + docs=[{"_id": 1, "data": "x" * (15 * 1_024 * 1_024)}], + target_coll="write_large_target", + pipeline=[{"$out": "write_large_target"}], + expected=[{"_id": 1}], + msg="$out should successfully write a 15 MB document", + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_LARGE_DOCUMENT_TESTS)) +def test_out_large_document(collection, test_case: OutTestCase): + """Test $out writes documents up to 15 MB successfully.""" + populate_collection(collection, test_case) + execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + result = execute_command( + collection, + {"find": test_case.target_coll, "filter": {}, "projection": {"_id": 1}}, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg) + + +# Property [No Unicode Normalization - Collections]: precomposed and combining +# forms of the same character create separate, distinct collections - no +# Unicode normalization is applied to collection names. + + +@pytest.mark.aggregate +def test_out_no_unicode_normalization_precomposed(collection): + """Test $out writes to precomposed Unicode collection name correctly.""" + collection.insert_many([{"_id": 1, "form": "precomposed"}, {"_id": 2, "form": "combining"}]) + execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [{"$match": {"_id": 1}}, {"$out": "\u00e9"}], # precomposed e-acute + "cursor": {}, + }, + ) + execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [{"$match": {"_id": 2}}, {"$out": "\u0065\u0301"}], # combining e-acute + "cursor": {}, + }, + ) + result = execute_command( + collection, + {"find": "\u00e9", "filter": {}}, + ) + assertSuccess( + result, + [{"_id": 1, "form": "precomposed"}], + msg="$out should write to precomposed Unicode collection name", + ) + + +@pytest.mark.aggregate +def test_out_no_unicode_normalization_combining(collection): + """Test $out writes to combining Unicode collection name correctly.""" + collection.insert_many([{"_id": 1, "form": "precomposed"}, {"_id": 2, "form": "combining"}]) + execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [{"$match": {"_id": 1}}, {"$out": "\u00e9"}], # precomposed e-acute + "cursor": {}, + }, + ) + execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [{"$match": {"_id": 2}}, {"$out": "\u0065\u0301"}], # combining e-acute + "cursor": {}, + }, + ) + result = execute_command( + collection, + {"find": "\u0065\u0301", "filter": {}}, + ) + assertSuccess( + result, + [{"_id": 2, "form": "combining"}], + msg="$out should write to combining Unicode collection name", + ) + + +@pytest.mark.aggregate +def test_out_no_unicode_normalization_distinct_colls(collection): + """Test $out creates separate collections for precomposed and combining Unicode forms.""" + collection.insert_many([{"_id": 1, "form": "precomposed"}, {"_id": 2, "form": "combining"}]) + execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [{"$match": {"_id": 1}}, {"$out": "\u00e9"}], # precomposed e-acute + "cursor": {}, + }, + ) + execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [{"$match": {"_id": 2}}, {"$out": "\u0065\u0301"}], # combining e-acute + "cursor": {}, + }, + ) + result = execute_command( + collection, + { + "listCollections": 1, + "filter": {"name": {"$in": ["\u00e9", "\u0065\u0301"]}}, + "nameOnly": True, + }, + ) + assertSuccess( + result, + [ + {"name": "\u00e9", "type": "collection"}, + {"name": "\u0065\u0301", "type": "collection"}, + ], + msg="$out should create separate collections for precomposed and combining forms", + ignore_doc_order=True, + ) + + +# Property [No Unicode Normalization - Databases]: precomposed and combining +# forms of the same character create separate, distinct databases - no Unicode +# normalization is applied to database names. + + +@pytest.mark.aggregate +def test_out_no_unicode_normalization_db_precomposed(collection): + """Test $out writes to precomposed Unicode database name correctly.""" + collection.insert_many([{"_id": 1, "form": "precomposed"}, {"_id": 2, "form": "combining"}]) + client = collection.database.client + client.drop_database("\u00e9") + client.drop_database("\u0065\u0301") + try: + execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$match": {"_id": 1}}, + {"$out": {"db": "\u00e9", "coll": "target"}}, # precomposed e-acute + ], + "cursor": {}, + }, + ) + execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$match": {"_id": 2}}, + {"$out": {"db": "\u0065\u0301", "coll": "target"}}, # combining e-acute + ], + "cursor": {}, + }, + ) + result = execute_command( + client["\u00e9"]["target"], + {"find": "target", "filter": {}}, + ) + assertSuccess( + result, + [{"_id": 1, "form": "precomposed"}], + msg="$out should write to precomposed Unicode database name", + ) + finally: + client.drop_database("\u00e9") + client.drop_database("\u0065\u0301") + + +@pytest.mark.aggregate +def test_out_no_unicode_normalization_db_combining(collection): + """Test $out writes to combining Unicode database name correctly.""" + collection.insert_many([{"_id": 1, "form": "precomposed"}, {"_id": 2, "form": "combining"}]) + client = collection.database.client + client.drop_database("\u00e9") + client.drop_database("\u0065\u0301") + try: + execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$match": {"_id": 1}}, + {"$out": {"db": "\u00e9", "coll": "target"}}, # precomposed e-acute + ], + "cursor": {}, + }, + ) + execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$match": {"_id": 2}}, + {"$out": {"db": "\u0065\u0301", "coll": "target"}}, # combining e-acute + ], + "cursor": {}, + }, + ) + result = execute_command( + client["\u0065\u0301"]["target"], + {"find": "target", "filter": {}}, + ) + assertSuccess( + result, + [{"_id": 2, "form": "combining"}], + msg="$out should write to combining Unicode database name", + ) + finally: + client.drop_database("\u00e9") + client.drop_database("\u0065\u0301") + + +@pytest.mark.aggregate +def test_out_no_unicode_normalization_db_distinct(collection): + """Test $out creates separate databases for precomposed and combining Unicode forms.""" + collection.insert_many([{"_id": 1, "form": "precomposed"}, {"_id": 2, "form": "combining"}]) + client = collection.database.client + client.drop_database("\u00e9") + client.drop_database("\u0065\u0301") + try: + execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$match": {"_id": 1}}, + {"$out": {"db": "\u00e9", "coll": "target"}}, # precomposed e-acute + ], + "cursor": {}, + }, + ) + execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$match": {"_id": 2}}, + {"$out": {"db": "\u0065\u0301", "coll": "target"}}, # combining e-acute + ], + "cursor": {}, + }, + ) + # Verify precomposed database has exactly 1 document (not 2, which + # would mean both forms mapped to the same database). + result = execute_command( + client["\u00e9"]["target"], + { + "aggregate": "target", + "pipeline": [{"$count": "n"}], + "cursor": {}, + }, + ) + assertSuccess( + result, + [{"n": 1}], + msg="$out should create separate databases for precomposed and combining forms", + ) + finally: + client.drop_database("\u00e9") + client.drop_database("\u0065\u0301") diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/out/utils/__init__.py b/documentdb_tests/compatibility/tests/core/operator/stages/out/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/out/utils/out_test_helpers.py b/documentdb_tests/compatibility/tests/core/operator/stages/out/utils/out_test_helpers.py new file mode 100644 index 00000000..c75db4f1 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/out/utils/out_test_helpers.py @@ -0,0 +1,42 @@ +"""Shared helpers for $out stage tests.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +from pymongo.collection import Collection + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) + + +@dataclass(frozen=True) +class OutTestCase(StageTestCase): + """Data-driven test case for ``$out`` stage tests. + + Attributes: + target_coll: Name of the output collection. + target_db: Target database name. ``None`` means use the current database. + out_spec: Extra fields to merge into the ``$out`` document form. + expected_type: Expected collection type after ``$out`` runs. + expected_options: Expected collection options after ``$out`` runs. + """ + + target_coll: str = "target" + target_db: str | None = None + out_spec: Any = None + expected_type: str = "collection" + expected_options: dict[str, Any] | None = None + + def build_out_stage(self, collection: Collection) -> dict[str, Any]: + """Build the ``$out`` stage spec from this test case.""" + db_name = self.target_db or collection.database.name + target = self.target_coll + if self.out_spec is not None or self.target_db is not None: + spec: dict[str, Any] = {"db": db_name, "coll": target} + if self.out_spec: + spec.update(self.out_spec) + return {"$out": spec} + return {"$out": target} diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_combination_out.py b/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_combination_out.py new file mode 100644 index 00000000..53d74e66 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_combination_out.py @@ -0,0 +1,374 @@ +"""Tests for $out stage — pipeline integration with other stages.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, + populate_collection, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Pipeline Integration]: $out composes correctly with other +# aggregation stages — $match filters before writing, $project reshapes +# output, $group aggregates, $sort/$limit/$skip paginate, $unwind expands +# arrays, $addFields enriches, $replaceRoot restructures, $redact prunes, +# $lookup joins, and $unionWith merges collections. +OUT_PIPELINE_INTEGRATION_TESTS: list[StageTestCase] = [ + StageTestCase( + "match_equality", + docs=[ + {"_id": 1, "status": "active", "val": 10}, + {"_id": 2, "status": "inactive", "val": 20}, + {"_id": 3, "status": "active", "val": 30}, + ], + pipeline=[ + {"$match": {"status": "active"}}, + {"$out": "integration_out"}, + ], + expected=[ + {"_id": 1, "status": "active", "val": 10}, + {"_id": 3, "status": "active", "val": 30}, + ], + msg="$out should write only the documents that pass the $match filter", + ), + StageTestCase( + "match_comparison", + docs=[ + {"_id": 1, "val": 5}, + {"_id": 2, "val": 15}, + {"_id": 3, "val": 25}, + ], + pipeline=[ + {"$match": {"val": {"$gte": 15}}}, + {"$out": "integration_out"}, + ], + expected=[ + {"_id": 2, "val": 15}, + {"_id": 3, "val": 25}, + ], + msg="$out should write documents matching a comparison $match filter", + ), + StageTestCase( + "match_no_results", + docs=[ + {"_id": 1, "val": 10}, + {"_id": 2, "val": 20}, + ], + pipeline=[ + {"$match": {"val": {"$gt": 100}}}, + {"$out": "integration_out"}, + ], + expected=[], + msg="$out should create an empty collection when $match filters all documents", + ), + StageTestCase( + "project_inclusion", + docs=[ + {"_id": 1, "a": 1, "b": 2, "c": 3}, + {"_id": 2, "a": 4, "b": 5, "c": 6}, + ], + pipeline=[ + {"$project": {"a": 1, "b": 1}}, + {"$out": "integration_out"}, + ], + expected=[ + {"_id": 1, "a": 1, "b": 2}, + {"_id": 2, "a": 4, "b": 5}, + ], + msg="$out should write only the fields kept by an inclusion $project", + ), + StageTestCase( + "project_computed", + docs=[ + {"_id": 1, "x": 10}, + {"_id": 2, "x": 20}, + ], + pipeline=[ + {"$project": {"doubled": {"$multiply": ["$x", 2]}}}, + {"$out": "integration_out"}, + ], + expected=[ + {"_id": 1, "doubled": 20}, + {"_id": 2, "doubled": 40}, + ], + msg="$out should write computed fields from a $project stage", + ), + StageTestCase( + "group_sum", + docs=[ + {"_id": 1, "cat": "a", "val": 10}, + {"_id": 2, "cat": "a", "val": 20}, + {"_id": 3, "cat": "b", "val": 30}, + ], + pipeline=[ + {"$group": {"_id": "$cat", "total": {"$sum": "$val"}}}, + {"$out": "integration_out"}, + ], + expected=[ + {"_id": "a", "total": 30}, + {"_id": "b", "total": 30}, + ], + msg="$out should write $group $sum results to the target collection", + ), + StageTestCase( + "group_count", + docs=[ + {"_id": 1, "cat": "x"}, + {"_id": 2, "cat": "x"}, + {"_id": 3, "cat": "y"}, + ], + pipeline=[ + {"$group": {"_id": "$cat", "n": {"$sum": 1}}}, + {"$out": "integration_out"}, + ], + expected=[ + {"_id": "x", "n": 2}, + {"_id": "y", "n": 1}, + ], + msg="$out should write $group count results to the target collection", + ), + StageTestCase( + "sort_limit_top_n", + docs=[ + {"_id": 1, "val": 50}, + {"_id": 2, "val": 10}, + {"_id": 3, "val": 40}, + {"_id": 4, "val": 30}, + {"_id": 5, "val": 20}, + ], + pipeline=[ + {"$sort": {"val": -1}}, + {"$limit": 3}, + {"$out": "integration_out"}, + ], + expected=[ + {"_id": 1, "val": 50}, + {"_id": 3, "val": 40}, + {"_id": 4, "val": 30}, + ], + msg="$out should write the top-N sorted documents after $sort and $limit", + ), + StageTestCase( + "skip_limit_page", + docs=[ + {"_id": 1, "val": 10}, + {"_id": 2, "val": 20}, + {"_id": 3, "val": 30}, + {"_id": 4, "val": 40}, + {"_id": 5, "val": 50}, + ], + pipeline=[ + {"$sort": {"_id": 1}}, + {"$skip": 1}, + {"$limit": 2}, + {"$out": "integration_out"}, + ], + expected=[ + {"_id": 2, "val": 20}, + {"_id": 3, "val": 30}, + ], + msg="$out should write the paginated window from $skip and $limit", + ), + StageTestCase( + "unwind_group_tag_count", + docs=[ + {"_id": 1, "tags": ["a", "b"]}, + {"_id": 2, "tags": ["b", "c"]}, + {"_id": 3, "tags": ["a"]}, + ], + pipeline=[ + {"$unwind": "$tags"}, + {"$group": {"_id": "$tags", "count": {"$sum": 1}}}, + {"$out": "integration_out"}, + ], + expected=[ + {"_id": "a", "count": 2}, + {"_id": "b", "count": 2}, + {"_id": "c", "count": 1}, + ], + msg="$out should write unwound-then-grouped tag counts to the target collection", + ), + StageTestCase( + "addfields_computed", + docs=[ + {"_id": 1, "price": 100, "qty": 3}, + {"_id": 2, "price": 200, "qty": 1}, + ], + pipeline=[ + {"$addFields": {"total": {"$multiply": ["$price", "$qty"]}}}, + {"$out": "integration_out"}, + ], + expected=[ + {"_id": 1, "price": 100, "qty": 3, "total": 300}, + {"_id": 2, "price": 200, "qty": 1, "total": 200}, + ], + msg="$out should write documents enriched by $addFields to the target collection", + ), + StageTestCase( + "replaceroot_nested", + docs=[ + {"_id": 1, "inner": {"a": 10, "b": 20}}, + {"_id": 2, "inner": {"a": 30, "b": 40}}, + ], + pipeline=[ + {"$replaceRoot": {"newRoot": "$inner"}}, + {"$addFields": {"_id": "$a"}}, + {"$out": "integration_out"}, + ], + expected=[ + {"_id": 10, "a": 10, "b": 20}, + {"_id": 30, "a": 30, "b": 40}, + ], + msg="$out should write the new root structure after $replaceRoot", + ), + StageTestCase( + "redact_keep_prune", + docs=[ + {"_id": 1, "level": 1, "data": "public"}, + {"_id": 2, "level": 5, "data": "secret"}, + {"_id": 3, "level": 2, "data": "internal"}, + ], + pipeline=[ + { + "$redact": { + "$cond": { + "if": {"$lte": ["$level", 2]}, + "then": "$$KEEP", + "else": "$$PRUNE", + } + } + }, + {"$out": "integration_out"}, + ], + expected=[ + {"_id": 1, "level": 1, "data": "public"}, + {"_id": 3, "level": 2, "data": "internal"}, + ], + msg="$out should write only documents kept by $redact", + ), + StageTestCase( + "lookup_equality", + docs=[ + {"_id": 1, "ref": 1}, + {"_id": 2, "ref": 2}, + ], + setup=lambda c: c.database["integration_foreign"].insert_many( + [ + {"_id": 1, "label": "first"}, + {"_id": 2, "label": "second"}, + ] + ), + pipeline=[ + { + "$lookup": { + "from": "integration_foreign", + "localField": "ref", + "foreignField": "_id", + "as": "joined", + } + }, + {"$project": {"ref": 1, "label": {"$arrayElemAt": ["$joined.label", 0]}}}, + {"$out": "integration_out"}, + ], + expected=[ + {"_id": 1, "ref": 1, "label": "first"}, + {"_id": 2, "ref": 2, "label": "second"}, + ], + msg="$out should write $lookup-joined documents to the target collection", + ), + StageTestCase( + "unionwith_merge", + docs=[ + {"_id": 1, "source": "main"}, + {"_id": 2, "source": "main"}, + ], + setup=lambda c: c.database["integration_foreign"].insert_many( + [ + {"_id": 3, "source": "other"}, + {"_id": 4, "source": "other"}, + ] + ), + pipeline=[ + {"$unionWith": {"coll": "integration_foreign"}}, + {"$out": "integration_out"}, + ], + expected=[ + {"_id": 1, "source": "main"}, + {"_id": 2, "source": "main"}, + {"_id": 3, "source": "other"}, + {"_id": 4, "source": "other"}, + ], + msg="$out should write $unionWith-merged documents to the target collection", + ), + StageTestCase( + "match_group_sort_out", + docs=[ + {"_id": 1, "dept": "eng", "salary": 100}, + {"_id": 2, "dept": "eng", "salary": 150}, + {"_id": 3, "dept": "sales", "salary": 80}, + {"_id": 4, "dept": "sales", "salary": 120}, + {"_id": 5, "dept": "hr", "salary": 90}, + ], + pipeline=[ + {"$match": {"salary": {"$gte": 90}}}, + {"$group": {"_id": "$dept", "avg_salary": {"$avg": "$salary"}}}, + {"$sort": {"avg_salary": -1}}, + {"$out": "integration_out"}, + ], + expected=[ + {"_id": "eng", "avg_salary": 125.0}, + {"_id": "hr", "avg_salary": 90.0}, + {"_id": "sales", "avg_salary": 120.0}, + ], + msg="$out should write correctly after $match, $group, and $sort combined", + ), + StageTestCase( + "project_addfields_match_out", + docs=[ + {"_id": 1, "price": 50, "qty": 4}, + {"_id": 2, "price": 30, "qty": 10}, + {"_id": 3, "price": 20, "qty": 2}, + ], + pipeline=[ + {"$project": {"price": 1, "qty": 1}}, + {"$addFields": {"revenue": {"$multiply": ["$price", "$qty"]}}}, + {"$match": {"revenue": {"$gte": 200}}}, + {"$out": "integration_out"}, + ], + expected=[ + {"_id": 1, "price": 50, "qty": 4, "revenue": 200}, + {"_id": 2, "price": 30, "qty": 10, "revenue": 300}, + ], + msg="$out should write correctly after $project, $addFields, and $match combined", + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(OUT_PIPELINE_INTEGRATION_TESTS)) +def test_out_pipeline_integration(collection, test_case: StageTestCase): + """Test $out pipeline integration with other stages.""" + populate_collection(collection, test_case) + if test_case.setup: + test_case.setup(collection) + db = collection.database + execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + result = execute_command( + collection, + {"find": "integration_out", "filter": {}, "sort": {"_id": 1}}, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) + db.drop_collection("integration_out") + db.drop_collection("integration_foreign") diff --git a/documentdb_tests/framework/error_codes.py b/documentdb_tests/framework/error_codes.py index 97932e87..a62d2a94 100644 --- a/documentdb_tests/framework/error_codes.py +++ b/documentdb_tests/framework/error_codes.py @@ -6,6 +6,7 @@ BAD_VALUE_ERROR = 2 GRAPH_CONTAINS_CYCLE_ERROR = 5 FAILED_TO_PARSE_ERROR = 9 +UNAUTHORIZED_ERROR = 13 TYPE_MISMATCH_ERROR = 14 OVERFLOW_ERROR = 15 ILLEGAL_OPERATION_ERROR = 20 @@ -20,6 +21,7 @@ UNKNOWN_REPL_WRITE_CONCERN_ERROR = 79 INDEX_OPTIONS_CONFLICT_ERROR = 85 INDEX_KEY_SPECS_CONFLICT_ERROR = 86 +DOCUMENT_VALIDATION_FAILURE_ERROR = 121 INCOMPATIBLE_COLLATION_VERSION_ERROR = 161 VIEW_DEPTH_LIMIT_ERROR = 165 COMMAND_NOT_SUPPORTED_ON_VIEW_ERROR = 166 @@ -90,6 +92,7 @@ MAP_MISSING_INPUT_ERROR = 16880 MAP_MISSING_IN_ERROR = 16882 MAP_INPUT_NOT_ARRAY_ERROR = 16883 +OUT_ARGUMENT_TYPE_ERROR = 16990 ALL_ELEMENTS_TRUE_NON_ARRAY_ERROR = 17040 ANY_ELEMENTS_TRUE_NON_ARRAY_ERROR = 17041 SET_IS_SUBSET_SECOND_NOT_ARRAY_ERROR = 17042 @@ -105,11 +108,13 @@ COND_MISSING_ELSE_ERROR = 17082 COND_EXTRA_FIELD_ERROR = 17083 SIZE_NOT_ARRAY_ERROR = 17124 +OUT_CAPPED_COLLECTION_ERROR = 17152 LET_UNDEFINED_VARIABLE_ERROR = 17276 META_NON_STRING_ERROR = 17307 UNSUPPORTED_META_FIELD_ERROR = 17308 SORT_NON_META_OBJECT_ERROR = 17312 MATCH_TEXT_NOT_FIRST_STAGE_ERROR = 17313 +OUT_SPECIAL_COLLECTION_ERROR = 17385 GROUP_INCLUSION_STYLE_ERROR = 17390 DATETOSTRING_INVALID_FORMAT_TYPE_ERROR = 18533 DATETOSTRING_UNKNOWN_FIELD_ERROR = 18534 @@ -175,6 +180,7 @@ PROJECT_EXCLUSION_IN_INCLUSION_ERROR = 31254 HASHED_COMPOUND_MULTIPLE_ERROR = 31303 PROJECT_VALUE_IN_EXCLUSION_ERROR = 31310 +OUT_RESTRICTED_DATABASE_ERROR = 31321 PROJECT_UNKNOWN_EXPRESSION_ERROR = 31325 UNION_WITH_SUB_PIPELINE_NOT_ALLOWED_ERROR = 31441 REVERSE_ARRAY_NOT_ARRAY_ERROR = 34435 @@ -290,6 +296,7 @@ DATEFROMSTRING_MISSING_DATESTRING_ERROR = 40542 CHANGE_STREAM_NOT_ALLOWED_ERROR = 40573 FACET_PIPELINE_INVALID_STAGE_ERROR = 40600 +OUT_NOT_LAST_STAGE_ERROR = 40601 NOT_FIRST_STAGE_ERROR = 40602 GEO_NEAR_NOT_FIRST_STAGE_ERROR = 40603 DATEFROMSTRING_INVALID_FORMAT_TYPE_ERROR = 40684 @@ -419,6 +426,8 @@ ENCRYPTED_FIELD_RANGE_TYPE_ERROR = 6775201 ENCRYPTED_FIELD_RANGE_MIN_MAX_TYPE_ERROR = 7018200 WILDCARD_STRING_TYPE_ERROR = 7246202 +OUT_TIMESERIES_COLLECTION_TYPE_ERROR = 7268700 +OUT_TIMESERIES_OPTIONS_MISMATCH_ERROR = 7406103 SORT_DUPLICATE_KEY_ERROR = 7472500 N_ACCUMULATOR_INVALID_N_ERROR = 7548606 GEO_NEAR_MIN_DISTANCE_NOT_CONSTANT_ERROR = 7555701 From c9ebb0c170c01d7ff3eec6de74e63aa1d1ae7592 Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Fri, 15 May 2026 10:31:29 -0700 Subject: [PATCH 2/5] Rename test file Signed-off-by: Alina (Xi) Li --- ..._out_namespace_errors.py => test_out_pipeline_errors.py} | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) rename documentdb_tests/compatibility/tests/core/operator/stages/out/{test_out_namespace_errors.py => test_out_pipeline_errors.py} (98%) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_namespace_errors.py b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_pipeline_errors.py similarity index 98% rename from documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_namespace_errors.py rename to documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_pipeline_errors.py index dffaad88..b0909ad2 100644 --- a/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_namespace_errors.py +++ b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_pipeline_errors.py @@ -1,4 +1,4 @@ -"""Tests for $out stage - namespace and pipeline position errors.""" +"""Tests for $out stage - namespace, pipeline position, and nesting errors.""" from __future__ import annotations @@ -323,7 +323,7 @@ # object, timeField/metaField/granularity accept only string, and -OUT_NAMESPACE_ERROR_TESTS = ( +OUT_PIPELINE_ERROR_TESTS = ( OUT_COLLECTION_NAME_VALIDATION_ERROR_TESTS + OUT_DATABASE_NAME_VALIDATION_ERROR_TESTS + OUT_RESTRICTED_DATABASE_ERROR_TESTS @@ -333,7 +333,7 @@ @pytest.mark.aggregate -@pytest.mark.parametrize("test_case", pytest_params(OUT_NAMESPACE_ERROR_TESTS)) +@pytest.mark.parametrize("test_case", pytest_params(OUT_PIPELINE_ERROR_TESTS)) def test_out_error(collection, test_case: OutTestCase): """Test $out rejects invalid configurations with the expected error code.""" populate_collection(collection, test_case) From 35e7033e30b515f705611951d5cab652078ed897 Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Fri, 15 May 2026 11:16:33 -0700 Subject: [PATCH 3/5] chore: trigger CI rerun Signed-off-by: Alina (Xi) Li From 62090cca830af78521a0aa62017c9542ef85ba10 Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Fri, 15 May 2026 15:28:40 -0700 Subject: [PATCH 4/5] style fix for assertFailureCode Signed-off-by: Alina (Xi) Li --- .../stages/out/test_out_target_restriction_errors.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_target_restriction_errors.py b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_target_restriction_errors.py index 033cf242..d3e4b6e9 100644 --- a/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_target_restriction_errors.py +++ b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_target_restriction_errors.py @@ -3,7 +3,6 @@ from __future__ import annotations from datetime import datetime -from typing import cast import pytest @@ -389,7 +388,7 @@ def test_out_in_view_definition_error(collection, test_case: OutTestCase): @pytest.mark.aggregate @pytest.mark.parametrize("test_case", pytest_params(OUT_SCHEMA_VALIDATION_ERROR_TESTS)) -def test_out_schema_validation_error(collection, test_case: OutTestCase): +def test_out_schema_validation_error(collection, test_case): """Test $out fails with document validation failure when validationAction is error.""" populate_collection(collection, test_case) if test_case.setup: @@ -398,7 +397,7 @@ def test_out_schema_validation_error(collection, test_case: OutTestCase): collection, {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, ) - assertFailureCode(result, cast(int, test_case.error_code), msg=test_case.msg) + assertFailureCode(result, test_case.error_code, msg=test_case.msg) @pytest.mark.aggregate From 4ad8affe813b0d71cf76a182e46333adfad0ce73 Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Fri, 15 May 2026 16:39:47 -0700 Subject: [PATCH 5/5] use collectionname_suffix to make parallel runs safe Signed-off-by: Alina (Xi) Li separate outlier cases Signed-off-by: Alina (Xi) Li separate outlier cases Signed-off-by: Alina (Xi) Li --- .../stages/out/test_out_acceptance.py | 66 +++--- .../out/test_out_target_restriction_errors.py | 102 +++++---- .../stages/out/test_out_timeseries.py | 36 +-- .../stages/out/test_out_write_behavior.py | 55 ++--- .../stages/out/test_out_write_properties.py | 51 +++-- .../stages/out/utils/out_test_helpers.py | 10 +- .../stages/test_stages_combination_out.py | 205 ++++++++++-------- 7 files changed, 291 insertions(+), 234 deletions(-) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_acceptance.py b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_acceptance.py index bac34acb..67e92bf7 100644 --- a/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_acceptance.py +++ b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_acceptance.py @@ -240,6 +240,7 @@ def test_out_acceptance(collection, test_case: OutTestCase): """Test $out writes results and creates the correct collection type.""" populate_collection(collection, test_case) + target = test_case.resolve_target_coll(collection) out_stage = test_case.build_out_stage(collection) execute_command( collection, @@ -247,11 +248,11 @@ def test_out_acceptance(collection, test_case: OutTestCase): ) result = execute_command( collection, - {"listCollections": 1, "filter": {"name": test_case.target_coll}}, + {"listCollections": 1, "filter": {"name": target}}, ) raw_doc = cast(dict, result)["cursor"]["firstBatch"][0] expected_info: dict[str, Any] = { - "name": test_case.target_coll, + "name": target, "type": test_case.expected_type, "options": test_case.expected_options, "info": raw_doc["info"], @@ -268,12 +269,11 @@ def test_out_acceptance(collection, test_case: OutTestCase): "view_source_out", docs=[{"_id": 1, "value": 10}], target_coll="view_source_out_target", - pipeline=[{"$out": "view_source_out_target"}], setup=lambda c: ( - c.database.drop_collection("good_view_for_out"), + c.database.drop_collection(f"{c.name}_good_view_for_out"), c.database.command( { - "create": "good_view_for_out", + "create": f"{c.name}_good_view_for_out", "viewOn": c.name, "pipeline": [{"$match": {"_id": 1}}], } @@ -293,17 +293,19 @@ def test_out_from_view_source_succeeds(collection, test_case: OutTestCase): if test_case.setup: test_case.setup(collection) db = collection.database + view_name = f"{collection.name}_good_view_for_out" + target = test_case.resolve_target_coll(collection) execute_command( - db["good_view_for_out"], + db[view_name], { - "aggregate": "good_view_for_out", - "pipeline": test_case.pipeline, + "aggregate": view_name, + "pipeline": [{"$out": target}], "cursor": {}, }, ) result = execute_command( collection, - {"find": test_case.target_coll, "filter": {}}, + {"find": target, "filter": {}}, ) assertResult(result, expected=test_case.expected, msg=test_case.msg) @@ -316,7 +318,6 @@ def test_out_from_view_source_succeeds(collection, test_case: OutTestCase): "agg_opts_collation", docs=[{"_id": 1, "value": 10}], target_coll="agg_opts_target", - pipeline=[{"$out": "agg_opts_target"}], out_spec={"collation": {"locale": "en", "strength": 2}}, msg="$out should succeed with aggregation option collation", ), @@ -324,7 +325,6 @@ def test_out_from_view_source_succeeds(collection, test_case: OutTestCase): "agg_opts_hint", docs=[{"_id": 1, "value": 10}], target_coll="agg_opts_target", - pipeline=[{"$out": "agg_opts_target"}], out_spec={"hint": "_id_"}, msg="$out should succeed with aggregation option hint", ), @@ -332,7 +332,6 @@ def test_out_from_view_source_succeeds(collection, test_case: OutTestCase): "agg_opts_max_time_ms", docs=[{"_id": 1, "value": 10}], target_coll="agg_opts_target", - pipeline=[{"$out": "agg_opts_target"}], out_spec={"maxTimeMS": 60_000}, msg="$out should succeed with aggregation option maxTimeMS", ), @@ -340,7 +339,6 @@ def test_out_from_view_source_succeeds(collection, test_case: OutTestCase): "agg_opts_allow_disk_use", docs=[{"_id": 1, "value": 10}], target_coll="agg_opts_target", - pipeline=[{"$out": "agg_opts_target"}], out_spec={"allowDiskUse": True}, msg="$out should succeed with aggregation option allowDiskUse", ), @@ -348,7 +346,6 @@ def test_out_from_view_source_succeeds(collection, test_case: OutTestCase): "agg_opts_bypass_doc_validation", docs=[{"_id": 1, "value": 10}], target_coll="agg_opts_target", - pipeline=[{"$out": "agg_opts_target"}], out_spec={"bypassDocumentValidation": True}, msg="$out should succeed with aggregation option bypassDocumentValidation", ), @@ -360,11 +357,12 @@ def test_out_from_view_source_succeeds(collection, test_case: OutTestCase): def test_out_aggregation_options(collection, test_case: OutTestCase): """Test $out succeeds with standard aggregation options.""" populate_collection(collection, test_case) + target = test_case.resolve_target_coll(collection) result = execute_command( collection, { "aggregate": collection.name, - "pipeline": test_case.pipeline, + "pipeline": [{"$out": target}], "cursor": {}, **test_case.out_spec, }, @@ -383,7 +381,6 @@ def test_out_aggregation_options(collection, test_case: OutTestCase): "rc_majority", docs=[{"_id": 1, "value": 10}], target_coll="rc_majority_target", - pipeline=[{"$out": "rc_majority_target"}], out_spec={"readConcern": "majority"}, msg="$out should succeed with readConcern level 'majority'", ), @@ -391,7 +388,6 @@ def test_out_aggregation_options(collection, test_case: OutTestCase): "rc_local", docs=[{"_id": 1, "value": 10}], target_coll="rc_local_target", - pipeline=[{"$out": "rc_local_target"}], out_spec={"readConcern": "local"}, msg="$out should succeed with readConcern level 'local'", ), @@ -399,7 +395,6 @@ def test_out_aggregation_options(collection, test_case: OutTestCase): "rc_available", docs=[{"_id": 1, "value": 10}], target_coll="rc_available_target", - pipeline=[{"$out": "rc_available_target"}], out_spec={"readConcern": "available"}, msg="$out should succeed with readConcern level 'available'", ), @@ -411,11 +406,12 @@ def test_out_aggregation_options(collection, test_case: OutTestCase): def test_out_read_concern_acceptance(collection, test_case: OutTestCase): """Test $out succeeds with non-linearizable read concern levels.""" populate_collection(collection, test_case) + target = test_case.resolve_target_coll(collection) result = execute_command( collection, { "aggregate": collection.name, - "pipeline": test_case.pipeline, + "pipeline": [{"$out": target}], "cursor": {}, "readConcern": {"level": test_case.out_spec["readConcern"]}, }, @@ -435,13 +431,12 @@ def test_out_read_concern_acceptance(collection, test_case: OutTestCase): "schema_val_warn", docs=[{"_id": 1, "value": "not_a_number"}], target_coll="schema_val_warn_target", - pipeline=[{"$out": "schema_val_warn_target"}], out_spec={"bypassDocumentValidation": False}, setup=lambda c: ( - c.database.drop_collection("schema_val_warn_target"), + c.database.drop_collection(f"{c.name}_schema_val_warn_target"), c.database.command( { - "create": "schema_val_warn_target", + "create": f"{c.name}_schema_val_warn_target", "validator": { "$jsonSchema": { "bsonType": "object", @@ -460,13 +455,12 @@ def test_out_read_concern_acceptance(collection, test_case: OutTestCase): "schema_val_bypass", docs=[{"_id": 1, "value": "not_a_number"}], target_coll="schema_val_bypass_target", - pipeline=[{"$out": "schema_val_bypass_target"}], out_spec={"bypassDocumentValidation": True}, setup=lambda c: ( - c.database.drop_collection("schema_val_bypass_target"), + c.database.drop_collection(f"{c.name}_schema_val_bypass_target"), c.database.command( { - "create": "schema_val_bypass_target", + "create": f"{c.name}_schema_val_bypass_target", "validator": { "$jsonSchema": { "bsonType": "object", @@ -491,9 +485,10 @@ def test_out_schema_validation_success(collection, test_case: OutTestCase): populate_collection(collection, test_case) if test_case.setup: test_case.setup(collection) + target = test_case.resolve_target_coll(collection) cmd: dict[str, Any] = { "aggregate": collection.name, - "pipeline": test_case.pipeline, + "pipeline": [{"$out": target}], "cursor": {}, } if test_case.out_spec["bypassDocumentValidation"]: @@ -501,7 +496,7 @@ def test_out_schema_validation_success(collection, test_case: OutTestCase): execute_command(collection, cmd) result = execute_command( collection, - {"find": test_case.target_coll, "filter": {}, "projection": {"_id": 1, "value": 1}}, + {"find": target, "filter": {}, "projection": {"_id": 1, "value": 1}}, ) assertSuccess(result, test_case.expected, msg=test_case.msg) @@ -514,11 +509,6 @@ def test_out_schema_validation_success(collection, test_case: OutTestCase): "idx_nonexist_not_created", docs=[{"_id": 1, "x": 1}, {"_id": 2, "x": 2}], target_coll="idx_nonexist_target", - pipeline=[ - {"$unset": "_id"}, - {"$addFields": {"_id": "same"}}, - {"$out": "idx_nonexist_target"}, - ], expected=[], msg="$out should not create the target collection when a unique index violation occurs", ), @@ -530,13 +520,19 @@ def test_out_schema_validation_success(collection, test_case: OutTestCase): def test_out_unique_violation_nonexistent_target_not_created(collection, test_case: OutTestCase): """Test $out does not create the target when a unique index violation occurs.""" populate_collection(collection, test_case) - collection.database.drop_collection(test_case.target_coll) + target = test_case.resolve_target_coll(collection) + collection.database.drop_collection(target) + pipeline = [ + {"$unset": "_id"}, + {"$addFields": {"_id": "same"}}, + {"$out": target}, + ] execute_command( collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + {"aggregate": collection.name, "pipeline": pipeline, "cursor": {}}, ) result = execute_command( collection, - {"listCollections": 1, "filter": {"name": test_case.target_coll}}, + {"listCollections": 1, "filter": {"name": target}}, ) assertSuccess(result, test_case.expected, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_target_restriction_errors.py b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_target_restriction_errors.py index d3e4b6e9..6648d4eb 100644 --- a/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_target_restriction_errors.py +++ b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_target_restriction_errors.py @@ -41,8 +41,10 @@ docs=[{"_id": 1, "value": 10}], target_coll="capped_out_target", setup=lambda c: ( - c.database.drop_collection("capped_out_target"), - c.database.create_collection("capped_out_target", capped=True, size=1_048_576), + c.database.drop_collection(f"{c.name}_capped_out_target"), + c.database.create_collection( + f"{c.name}_capped_out_target", capped=True, size=1_048_576 + ), ), msg="$out should reject writing to a capped collection", error_code=OUT_CAPPED_COLLECTION_ERROR, @@ -52,8 +54,10 @@ docs=[{"_id": 1, "value": 10}], target_coll="view_out_target", setup=lambda c: ( - c.database.drop_collection("view_out_target"), - c.database.command({"create": "view_out_target", "viewOn": c.name, "pipeline": []}), + c.database.drop_collection(f"{c.name}_view_out_target"), + c.database.command( + {"create": f"{c.name}_view_out_target", "viewOn": c.name, "pipeline": []} + ), ), msg="$out should reject writing to a view", error_code=COMMAND_NOT_SUPPORTED_ON_VIEW_ERROR, @@ -64,8 +68,10 @@ target_coll="view_ts_out_target", out_spec={"timeseries": {"timeField": "ts"}}, setup=lambda c: ( - c.database.drop_collection("view_ts_out_target"), - c.database.command({"create": "view_ts_out_target", "viewOn": c.name, "pipeline": []}), + c.database.drop_collection(f"{c.name}_view_ts_out_target"), + c.database.command( + {"create": f"{c.name}_view_ts_out_target", "viewOn": c.name, "pipeline": []} + ), ), msg=( "$out to a view with timeseries options should produce a timeseries" @@ -87,8 +93,8 @@ target_coll="ts_to_regular_target", out_spec={"timeseries": {"timeField": "ts"}}, setup=lambda c: ( - c.database.drop_collection("ts_to_regular_target"), - c.database.create_collection("ts_to_regular_target"), + c.database.drop_collection(f"{c.name}_ts_to_regular_target"), + c.database.create_collection(f"{c.name}_ts_to_regular_target"), ), msg=( "$out with timeseries options to an existing regular collection" @@ -102,8 +108,10 @@ target_coll="ts_mismatch_target", out_spec={"timeseries": {"timeField": "other"}}, setup=lambda c: ( - c.database.drop_collection("ts_mismatch_target"), - c.database.command({"create": "ts_mismatch_target", "timeseries": {"timeField": "ts"}}), + c.database.drop_collection(f"{c.name}_ts_mismatch_target"), + c.database.command( + {"create": f"{c.name}_ts_mismatch_target", "timeseries": {"timeField": "ts"}} + ), ), msg=( "$out with mismatched timeseries options to an existing time series" @@ -117,8 +125,10 @@ target_coll="ts_mismatch_target", out_spec={"timeseries": {"timeField": "ts", "metaField": "m"}}, setup=lambda c: ( - c.database.drop_collection("ts_mismatch_target"), - c.database.command({"create": "ts_mismatch_target", "timeseries": {"timeField": "ts"}}), + c.database.drop_collection(f"{c.name}_ts_mismatch_target"), + c.database.command( + {"create": f"{c.name}_ts_mismatch_target", "timeseries": {"timeField": "ts"}} + ), ), msg=( "$out with mismatched timeseries options to an existing time series" @@ -132,10 +142,10 @@ target_coll="ts_mismatch_target", out_spec={"timeseries": {"timeField": "ts", "metaField": "other"}}, setup=lambda c: ( - c.database.drop_collection("ts_mismatch_target"), + c.database.drop_collection(f"{c.name}_ts_mismatch_target"), c.database.command( { - "create": "ts_mismatch_target", + "create": f"{c.name}_ts_mismatch_target", "timeseries": {"timeField": "ts", "metaField": "m"}, } ), @@ -152,10 +162,10 @@ target_coll="ts_mismatch_target", out_spec={"timeseries": {"timeField": "ts", "granularity": "hours"}}, setup=lambda c: ( - c.database.drop_collection("ts_mismatch_target"), + c.database.drop_collection(f"{c.name}_ts_mismatch_target"), c.database.command( { - "create": "ts_mismatch_target", + "create": f"{c.name}_ts_mismatch_target", "timeseries": {"timeField": "ts", "granularity": "seconds"}, } ), @@ -172,10 +182,10 @@ target_coll="ts_mismatch_target", out_spec={"timeseries": {"timeField": "ts", "granularity": "hours"}}, setup=lambda c: ( - c.database.drop_collection("ts_mismatch_target"), + c.database.drop_collection(f"{c.name}_ts_mismatch_target"), c.database.command( { - "create": "ts_mismatch_target", + "create": f"{c.name}_ts_mismatch_target", "timeseries": { "timeField": "ts", "bucketMaxSpanSeconds": 100, @@ -202,10 +212,10 @@ } }, setup=lambda c: ( - c.database.drop_collection("ts_mismatch_target"), + c.database.drop_collection(f"{c.name}_ts_mismatch_target"), c.database.command( { - "create": "ts_mismatch_target", + "create": f"{c.name}_ts_mismatch_target", "timeseries": { "timeField": "ts", "bucketMaxSpanSeconds": 100, @@ -232,10 +242,10 @@ docs=[{"_id": 1, "x": 1}, {"_id": 2, "x": 1}], target_coll="idx_unique_target", setup=lambda c: ( - c.database["idx_unique_target"].insert_many( + c.database[f"{c.name}_idx_unique_target"].insert_many( [{"_id": 90, "x": 90}, {"_id": 91, "x": 91}] ), - c.database["idx_unique_target"].create_index("x", unique=True), + c.database[f"{c.name}_idx_unique_target"].create_index("x", unique=True), ), msg="$out should produce a duplicate key error on unique index violation", error_code=DUPLICATE_KEY_ERROR, @@ -245,8 +255,10 @@ docs=[{"_id": 1, "a": 1, "b": 2}, {"_id": 2, "a": 1, "b": 2}], target_coll="idx_compound_target", setup=lambda c: ( - c.database["idx_compound_target"].insert_one({"_id": 99, "a": 99, "b": 99}), - c.database["idx_compound_target"].create_index([("a", 1), ("b", 1)], unique=True), + c.database[f"{c.name}_idx_compound_target"].insert_one({"_id": 99, "a": 99, "b": 99}), + c.database[f"{c.name}_idx_compound_target"].create_index( + [("a", 1), ("b", 1)], unique=True + ), ), msg="$out should produce a duplicate key error on compound unique index violation", error_code=DUPLICATE_KEY_ERROR, @@ -258,7 +270,6 @@ pipeline=[ {"$unset": "_id"}, {"$addFields": {"_id": "same"}}, - {"$out": "idx_dup_id_target"}, ], msg="$out should produce a duplicate key error when output contains duplicate _id values", error_code=DUPLICATE_KEY_ERROR, @@ -272,9 +283,6 @@ "rc_linearizable", docs=[{"_id": 1, "value": 10}], target_coll="rc_linearizable_target", - pipeline=[ - {"$out": "rc_linearizable_target"}, - ], msg="$out should reject linearizable read concern", error_code=INVALID_OPTIONS_ERROR, ), @@ -295,10 +303,7 @@ def test_out_target_restriction_error(collection, test_case: OutTestCase): populate_collection(collection, test_case) if test_case.setup: test_case.setup(collection) - if test_case.pipeline: - pipeline = test_case.pipeline - else: - pipeline = [test_case.build_out_stage(collection)] + pipeline = list(test_case.pipeline or []) + [test_case.build_out_stage(collection)] result = execute_command( collection, {"aggregate": collection.name, "pipeline": pipeline, "cursor": {}}, @@ -311,11 +316,12 @@ def test_out_target_restriction_error(collection, test_case: OutTestCase): def test_out_read_concern_error(collection, test_case: OutTestCase): """Test $out rejects invalid read concern levels.""" populate_collection(collection, test_case) + pipeline = [test_case.build_out_stage(collection)] result = execute_command( collection, { "aggregate": collection.name, - "pipeline": test_case.pipeline, + "pipeline": pipeline, "cursor": {}, "readConcern": {"level": "linearizable"}, }, @@ -329,7 +335,7 @@ def test_out_read_concern_error(collection, test_case: OutTestCase): OutTestCase( "view_def_out", docs=[{"_id": 1, "value": 10}], - pipeline=[{"$out": "target"}], + target_coll="view_def_target", error_code=OPTION_NOT_SUPPORTED_ON_VIEW_ERROR, msg="$out in a view definition should produce an invalid view pipeline error", ), @@ -341,12 +347,13 @@ def test_out_read_concern_error(collection, test_case: OutTestCase): def test_out_in_view_definition_error(collection, test_case: OutTestCase): """Test $out in a view definition is rejected.""" populate_collection(collection, test_case) + pipeline = [test_case.build_out_stage(collection)] result = execute_command( collection, { - "create": "bad_view", + "create": f"{collection.name}_bad_view", "viewOn": collection.name, - "pipeline": test_case.pipeline, + "pipeline": pipeline, }, ) assertResult(result, error_code=test_case.error_code, msg=test_case.msg) @@ -361,12 +368,11 @@ def test_out_in_view_definition_error(collection, test_case: OutTestCase): "schema_val_err", docs=[{"_id": 1, "value": "not_a_number"}], target_coll="schema_val_error_target", - pipeline=[{"$out": "schema_val_error_target"}], setup=lambda c: ( - c.database.drop_collection("schema_val_error_target"), + c.database.drop_collection(f"{c.name}_schema_val_error_target"), c.database.command( { - "create": "schema_val_error_target", + "create": f"{c.name}_schema_val_error_target", "validator": { "$jsonSchema": { "bsonType": "object", @@ -377,7 +383,7 @@ def test_out_in_view_definition_error(collection, test_case: OutTestCase): "validationAction": "error", } ), - c.database["schema_val_error_target"].insert_one({"_id": 99, "value": 42}), + c.database[f"{c.name}_schema_val_error_target"].insert_one({"_id": 99, "value": 42}), ), error_code=DOCUMENT_VALIDATION_FAILURE_ERROR, expected=[{"_id": 99, "value": 42}], @@ -393,9 +399,10 @@ def test_out_schema_validation_error(collection, test_case): populate_collection(collection, test_case) if test_case.setup: test_case.setup(collection) + pipeline = [test_case.build_out_stage(collection)] result = execute_command( collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + {"aggregate": collection.name, "pipeline": pipeline, "cursor": {}}, ) assertFailureCode(result, test_case.error_code, msg=test_case.msg) @@ -407,13 +414,15 @@ def test_out_schema_validation_error_unchanged(collection, test_case: OutTestCas populate_collection(collection, test_case) if test_case.setup: test_case.setup(collection) + pipeline = [test_case.build_out_stage(collection)] execute_command( collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + {"aggregate": collection.name, "pipeline": pipeline, "cursor": {}}, ) + target = test_case.resolve_target_coll(collection) result = execute_command( collection, - {"find": test_case.target_coll, "filter": {}, "projection": {"_id": 1, "value": 1}}, + {"find": target, "filter": {}, "projection": {"_id": 1, "value": 1}}, ) assertSuccess(result, test_case.expected, msg=test_case.msg) @@ -424,7 +433,7 @@ def test_out_schema_validation_error_unchanged(collection, test_case: OutTestCas OutTestCase( "transaction_out", docs=[{"_id": 1, "value": 10}], - pipeline=[{"$out": "txn_target"}], + target_coll="txn_target", error_code=ILLEGAL_OPERATION_ERROR, msg="$out inside a transaction should produce an error", ), @@ -436,12 +445,13 @@ def test_out_schema_validation_error_unchanged(collection, test_case: OutTestCas def test_out_transaction_error(collection, test_case: OutTestCase): """Test $out inside a transaction produces an error.""" populate_collection(collection, test_case) + pipeline = [test_case.build_out_stage(collection)] # Verify the pipeline works outside a transaction first. execute_command( collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + {"aggregate": collection.name, "pipeline": pipeline, "cursor": {}}, ) - command = {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}} + command = {"aggregate": collection.name, "pipeline": pipeline, "cursor": {}} client = collection.database.client with client.start_session() as session: session.start_transaction() diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_timeseries.py b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_timeseries.py index 1728c39a..3ce70136 100644 --- a/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_timeseries.py +++ b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_timeseries.py @@ -395,11 +395,11 @@ def test_out_timeseries(collection, test_case: OutTestCase): ) result = execute_command( collection, - {"listCollections": 1, "filter": {"name": test_case.target_coll}}, + {"listCollections": 1, "filter": {"name": test_case.resolve_target_coll(collection)}}, ) raw_doc = cast(dict, result)["cursor"]["firstBatch"][0] expected_info: dict[str, Any] = { - "name": test_case.target_coll, + "name": test_case.resolve_target_coll(collection), "type": test_case.expected_type, "options": test_case.expected_options, "info": raw_doc["info"], @@ -443,12 +443,16 @@ def test_out_timeseries_cross_db(collection, test_case: OutTestCase): try: execute_command( collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + { + "aggregate": collection.name, + "pipeline": [test_case.build_out_stage(collection)], + "cursor": {}, + }, ) result = execute_command( - client[test_case.target_db][test_case.target_coll], + client[test_case.target_db][test_case.resolve_target_coll(collection)], { - "find": test_case.target_coll, + "find": test_case.resolve_target_coll(collection), "filter": {}, "projection": {"_id": 0}, }, @@ -519,7 +523,7 @@ def test_out_timeseries_datetime_acceptance(collection, test_case: OutTestCase): result = execute_command( collection, { - "find": test_case.target_coll, + "find": test_case.resolve_target_coll(collection), "filter": {}, "projection": {"_id": 0, "ts": 1, "v": 1}, }, @@ -537,8 +541,10 @@ def test_out_timeseries_datetime_acceptance(collection, test_case: OutTestCase): target_coll="ts_existing_target", out_spec={"timeseries": {"timeField": "ts"}}, setup=lambda c: ( - c.database.drop_collection("ts_existing_target"), - c.database.command({"create": "ts_existing_target", "timeseries": {"timeField": "ts"}}), + c.database.drop_collection(f"{c.name}_ts_existing_target"), + c.database.command( + {"create": f"{c.name}_ts_existing_target", "timeseries": {"timeField": "ts"}} + ), ), expected=[{"ts": datetime(2024, 6, 1, tzinfo=timezone.utc), "value": 60}], msg=( @@ -551,8 +557,10 @@ def test_out_timeseries_datetime_acceptance(collection, test_case: OutTestCase): docs=[{"_id": 1, "ts": datetime(2024, 6, 1), "value": 60}], target_coll="ts_existing_target", setup=lambda c: ( - c.database.drop_collection("ts_existing_target"), - c.database.command({"create": "ts_existing_target", "timeseries": {"timeField": "ts"}}), + c.database.drop_collection(f"{c.name}_ts_existing_target"), + c.database.command( + {"create": f"{c.name}_ts_existing_target", "timeseries": {"timeField": "ts"}} + ), ), expected=[{"ts": datetime(2024, 6, 1, tzinfo=timezone.utc), "value": 60}], msg=( @@ -566,8 +574,10 @@ def test_out_timeseries_datetime_acceptance(collection, test_case: OutTestCase): target_coll="ts_existing_target", out_spec={}, setup=lambda c: ( - c.database.drop_collection("ts_existing_target"), - c.database.command({"create": "ts_existing_target", "timeseries": {"timeField": "ts"}}), + c.database.drop_collection(f"{c.name}_ts_existing_target"), + c.database.command( + {"create": f"{c.name}_ts_existing_target", "timeseries": {"timeField": "ts"}} + ), ), expected=[{"ts": datetime(2024, 6, 1, tzinfo=timezone.utc), "value": 60}], msg=( @@ -593,7 +603,7 @@ def test_out_timeseries_existing(collection, test_case: OutTestCase): result = execute_command( collection, { - "find": test_case.target_coll, + "find": test_case.resolve_target_coll(collection), "filter": {}, "projection": {"_id": 0, "ts": 1, "value": 1}, }, diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_write_behavior.py b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_write_behavior.py index c22e981e..aa3888ac 100644 --- a/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_write_behavior.py +++ b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_write_behavior.py @@ -88,6 +88,7 @@ def test_out_database_name_acceptance(collection, test_case: OutTestCase): """Test $out accepts various character classes as database names.""" populate_collection(collection, test_case) db_name = test_case.target_db # type: ignore[arg-type] + target_coll_name = test_case.resolve_target_coll(collection) client = collection.database.client client.drop_database(db_name) try: @@ -98,15 +99,15 @@ def test_out_database_name_acceptance(collection, test_case: OutTestCase): ) target_db = client[db_name] result = execute_command( - target_db[test_case.target_coll], - {"listCollections": 1, "filter": {"name": test_case.target_coll}}, + target_db[target_coll_name], + {"listCollections": 1, "filter": {"name": target_coll_name}}, ) raw_doc = cast(dict, result)["cursor"]["firstBatch"][0] assertSuccess( result, [ { - "name": test_case.target_coll, + "name": target_coll_name, "type": "collection", "options": {}, "info": raw_doc["info"], @@ -134,7 +135,6 @@ def test_out_database_name_acceptance(collection, test_case: OutTestCase): "new_collection_created", docs=[{"_id": 1, "value": 10}, {"_id": 2, "value": 20}], target_coll="creation_new_target", - pipeline=[{"$out": "creation_new_target"}], expected=[{"_id": 1, "value": 10}, {"_id": 2, "value": 20}], msg="$out should create a new collection when the target does not exist", ), @@ -142,8 +142,9 @@ def test_out_database_name_acceptance(collection, test_case: OutTestCase): "empty_pipeline_empties_existing_collection", docs=[], target_coll="creation_emptied_target", - pipeline=[{"$out": "creation_emptied_target"}], - setup=lambda c: c.database["creation_emptied_target"].insert_one({"_id": 99, "old": True}), + setup=lambda c: c.database[f"{c.name}_creation_emptied_target"].insert_one( + {"_id": 99, "old": True} + ), expected=[], msg="$out with no documents should empty an existing collection", ), @@ -151,8 +152,7 @@ def test_out_database_name_acceptance(collection, test_case: OutTestCase): "replacement_atomic", docs=[{"_id": 10, "new": True}, {"_id": 20, "new": True}], target_coll="replacement_atomic_target", - pipeline=[{"$out": "replacement_atomic_target"}], - setup=lambda c: c.database["replacement_atomic_target"].insert_many( + setup=lambda c: c.database[f"{c.name}_replacement_atomic_target"].insert_many( [{"_id": 1, "old": True}, {"_id": 2, "old": True}] ), expected=[{"_id": 10, "new": True}, {"_id": 20, "new": True}], @@ -162,12 +162,11 @@ def test_out_database_name_acceptance(collection, test_case: OutTestCase): "failure_rollback_docs", docs=[{"_id": 10, "x": 1}, {"_id": 20, "x": 1}], target_coll="replacement_fail_target", - pipeline=[{"$out": "replacement_fail_target"}], setup=lambda c: ( - c.database["replacement_fail_target"].insert_many( + c.database[f"{c.name}_replacement_fail_target"].insert_many( [{"_id": 1, "x": 1}, {"_id": 2, "x": 2}] ), - c.database["replacement_fail_target"].create_index("x", unique=True), + c.database[f"{c.name}_replacement_fail_target"].create_index("x", unique=True), ), expected=[{"_id": 1, "x": 1}, {"_id": 2, "x": 2}], msg="$out failure should leave pre-existing documents unchanged", @@ -182,13 +181,15 @@ def test_out_find_after_out(collection, test_case: OutTestCase): populate_collection(collection, test_case) if test_case.setup: test_case.setup(collection) + out_stage = test_case.build_out_stage(collection) execute_command( collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + {"aggregate": collection.name, "pipeline": [out_stage], "cursor": {}}, ) + target_coll_name = test_case.resolve_target_coll(collection) result = execute_command( collection, - {"find": test_case.target_coll, "filter": {}, "sort": {"_id": 1}}, + {"find": target_coll_name, "filter": {}, "sort": {"_id": 1}}, ) assertSuccess(result, test_case.expected, msg=test_case.msg) @@ -196,7 +197,7 @@ def test_out_find_after_out(collection, test_case: OutTestCase): @pytest.mark.aggregate def test_out_empty_pipeline_creates_collection(collection): """Test $out with no documents creates an empty collection.""" - target_coll = "creation_empty_target" + target_coll = f"{collection.name}_creation_empty_target" execute_command( collection, {"aggregate": collection.name, "pipeline": [{"$out": target_coll}], "cursor": {}}, @@ -225,7 +226,7 @@ def test_out_database_creation(collection): db = collection.database client = db.client cross_db_name = db.name + "_cross" - target_coll_name = "creation_cross_db_target" + target_coll_name = f"{collection.name}_creation_cross_db_target" client.drop_database(cross_db_name) try: execute_command( @@ -295,10 +296,11 @@ def test_out_replacement_self(collection, test_case: OutTestCase): "replacement_preserves_indexes", docs=[{"_id": 10, "x": 100}, {"_id": 20, "x": 200}], target_coll="replacement_idx_target", - pipeline=[{"$out": "replacement_idx_target"}], setup=lambda c: ( - c.database["replacement_idx_target"].insert_one({"_id": 1, "x": 1}), - c.database["replacement_idx_target"].create_index("x", name="x_idx", unique=True), + c.database[f"{c.name}_replacement_idx_target"].insert_one({"_id": 1, "x": 1}), + c.database[f"{c.name}_replacement_idx_target"].create_index( + "x", name="x_idx", unique=True + ), ), expected=[ {"v": 2, "key": {"_id": 1}, "name": "_id_"}, @@ -310,12 +312,11 @@ def test_out_replacement_self(collection, test_case: OutTestCase): "failure_rollback_indexes", docs=[{"_id": 10, "x": 1}, {"_id": 20, "x": 1}], target_coll="replacement_fail_target", - pipeline=[{"$out": "replacement_fail_target"}], setup=lambda c: ( - c.database["replacement_fail_target"].insert_many( + c.database[f"{c.name}_replacement_fail_target"].insert_many( [{"_id": 1, "x": 1}, {"_id": 2, "x": 2}] ), - c.database["replacement_fail_target"].create_index("x", unique=True), + c.database[f"{c.name}_replacement_fail_target"].create_index("x", unique=True), ), expected=[ {"v": 2, "key": {"_id": 1}, "name": "_id_"}, @@ -333,13 +334,15 @@ def test_out_index_after_out(collection, test_case: OutTestCase): populate_collection(collection, test_case) if test_case.setup: test_case.setup(collection) + out_stage = test_case.build_out_stage(collection) execute_command( collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + {"aggregate": collection.name, "pipeline": [out_stage], "cursor": {}}, ) + target_coll_name = test_case.resolve_target_coll(collection) result = execute_command( collection, - {"listIndexes": test_case.target_coll}, + {"listIndexes": target_coll_name}, ) assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_doc_order=True) @@ -353,6 +356,7 @@ def test_out_temp_collection_observed(collection): """Test $out uses a temporary collection during execution.""" collection.insert_many([{"_id": i, "value": i} for i in range(10_000)]) db = collection.database + target_coll = f"{collection.name}_creation_temp_target" found_tmp: list[str] = [] stop = threading.Event() @@ -375,7 +379,7 @@ def poll_collections() -> None: collection, { "aggregate": collection.name, - "pipeline": [{"$out": "creation_temp_target"}], + "pipeline": [{"$out": target_coll}], "cursor": {}, }, ) @@ -396,6 +400,7 @@ def test_out_temp_collection_cleaned_up(collection): """Test $out cleans up the temporary collection after completion.""" collection.insert_many([{"_id": i, "value": i} for i in range(10_000)]) db = collection.database + target_coll = f"{collection.name}_creation_temp_target" found_tmp: list[str] = [] stop = threading.Event() @@ -418,7 +423,7 @@ def poll_collections() -> None: collection, { "aggregate": collection.name, - "pipeline": [{"$out": "creation_temp_target"}], + "pipeline": [{"$out": target_coll}], "cursor": {}, }, ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_write_properties.py b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_write_properties.py index 4ce39382..ac953669 100644 --- a/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_write_properties.py +++ b/documentdb_tests/compatibility/tests/core/operator/stages/out/test_out_write_properties.py @@ -38,7 +38,7 @@ "auto_id", docs=[{"_id": 1, "value": 10}, {"_id": 2, "value": 20}], target_coll="write_auto_id_target", - pipeline=[{"$unset": "_id"}, {"$out": "write_auto_id_target"}], + pipeline=[{"$unset": "_id"}], expected=2, msg="$out should auto-generate ObjectId _id when _id is removed", ), @@ -50,15 +50,17 @@ def test_out_auto_generated_id(collection, test_case: OutTestCase): """Test $out auto-generates ObjectId _id when _id is removed.""" populate_collection(collection, test_case) + target = test_case.resolve_target_coll(collection) + pipeline = test_case.pipeline + [{"$out": target}] execute_command( collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + {"aggregate": collection.name, "pipeline": pipeline, "cursor": {}}, ) # Filter by _id type to confirm auto-generated ObjectIds. result = execute_command( collection, { - "aggregate": test_case.target_coll, + "aggregate": target, "pipeline": [ {"$match": {"_id": {"$type": "objectId"}}}, {"$count": "n"}, @@ -76,7 +78,6 @@ def test_out_auto_generated_id(collection, test_case: OutTestCase): "empty_cursor", docs=[{"_id": 1, "value": 10}], target_coll="write_cursor_target", - pipeline=[{"$out": "write_cursor_target"}], expected=[], msg="$out aggregation cursor should return an empty result list", ), @@ -88,9 +89,10 @@ def test_out_auto_generated_id(collection, test_case: OutTestCase): def test_out_empty_cursor(collection, test_case: OutTestCase): """Test $out returns an empty cursor result.""" populate_collection(collection, test_case) + out_stage = test_case.build_out_stage(collection) result = execute_command( collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + {"aggregate": collection.name, "pipeline": [out_stage], "cursor": {}}, ) assertSuccess(result, test_case.expected, msg=test_case.msg) @@ -102,7 +104,6 @@ def test_out_empty_cursor(collection, test_case: OutTestCase): "explain_no_write", docs=[{"_id": 1, "value": 10}], target_coll="write_explain_target", - pipeline=[{"$out": "write_explain_target"}], expected=[], msg="explain with $out should not create the target collection", ), @@ -114,18 +115,20 @@ def test_out_empty_cursor(collection, test_case: OutTestCase): def test_out_explain_no_write(collection, test_case: OutTestCase): """Test explain with $out does not create or modify the target collection.""" populate_collection(collection, test_case) + target = test_case.resolve_target_coll(collection) + out_stage = test_case.build_out_stage(collection) execute_command( collection, { "aggregate": collection.name, - "pipeline": test_case.pipeline, + "pipeline": [out_stage], "cursor": {}, "explain": True, }, ) result = execute_command( collection, - {"listCollections": 1, "filter": {"name": test_case.target_coll}}, + {"listCollections": 1, "filter": {"name": target}}, ) assertSuccess(result, test_case.expected, msg=test_case.msg) @@ -135,8 +138,7 @@ def test_out_explain_no_write(collection, test_case: OutTestCase): "explain_no_modify", docs=[{"_id": 10, "new": True}], target_coll="write_explain_existing_target", - pipeline=[{"$out": "write_explain_existing_target"}], - setup=lambda c: c.database["write_explain_existing_target"].insert_many( + setup=lambda c: c.database[f"{c.name}_write_explain_existing_target"].insert_many( [{"_id": 1, "old": True}, {"_id": 2, "old": True}] ), expected=[{"_id": 1, "old": True}, {"_id": 2, "old": True}], @@ -152,18 +154,20 @@ def test_out_explain_no_modify(collection, test_case: OutTestCase): populate_collection(collection, test_case) if test_case.setup: test_case.setup(collection) + target = test_case.resolve_target_coll(collection) + out_stage = test_case.build_out_stage(collection) execute_command( collection, { "aggregate": collection.name, - "pipeline": test_case.pipeline, + "pipeline": [out_stage], "cursor": {}, "explain": True, }, ) result = execute_command( collection, - {"find": test_case.target_coll, "filter": {}, "sort": {"_id": 1}}, + {"find": target, "filter": {}, "sort": {"_id": 1}}, ) assertSuccess(result, test_case.expected, msg=test_case.msg) @@ -175,7 +179,6 @@ def test_out_explain_no_modify(collection, test_case: OutTestCase): "idempotent", docs=[{"_id": 1, "value": 10}, {"_id": 2, "value": 20}], target_coll="write_idempotent_target", - pipeline=[{"$out": "write_idempotent_target"}], expected=[{"_id": 1, "value": 10}, {"_id": 2, "value": 20}], msg="$out should produce the same result when run twice to the same target", ), @@ -187,17 +190,19 @@ def test_out_explain_no_modify(collection, test_case: OutTestCase): def test_out_idempotent(collection, test_case: OutTestCase): """Test $out is idempotent when run twice to the same target.""" populate_collection(collection, test_case) + target = test_case.resolve_target_coll(collection) + out_stage = test_case.build_out_stage(collection) execute_command( collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + {"aggregate": collection.name, "pipeline": [out_stage], "cursor": {}}, ) execute_command( collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + {"aggregate": collection.name, "pipeline": [out_stage], "cursor": {}}, ) result = execute_command( collection, - {"find": test_case.target_coll, "filter": {}, "sort": {"_id": 1}}, + {"find": target, "filter": {}, "sort": {"_id": 1}}, ) assertSuccess(result, test_case.expected, msg=test_case.msg) @@ -230,7 +235,6 @@ def test_out_idempotent(collection, test_case: OutTestCase): } ], target_coll="write_bson_target", - pipeline=[{"$out": "write_bson_target"}], msg="all BSON types should round-trip through $out without modification", ), ] @@ -241,9 +245,11 @@ def test_out_idempotent(collection, test_case: OutTestCase): def test_out_bson_round_trip(collection, test_case: OutTestCase): """Test all BSON types round-trip through $out without modification.""" populate_collection(collection, test_case) + target = test_case.resolve_target_coll(collection) + out_stage = test_case.build_out_stage(collection) execute_command( collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + {"aggregate": collection.name, "pipeline": [out_stage], "cursor": {}}, ) source_result = execute_command( collection, @@ -251,7 +257,7 @@ def test_out_bson_round_trip(collection, test_case: OutTestCase): ) target_result = execute_command( collection, - {"find": test_case.target_coll, "filter": {}}, + {"find": target, "filter": {}}, ) assertSuccess( target_result, @@ -267,7 +273,6 @@ def test_out_bson_round_trip(collection, test_case: OutTestCase): "large_doc", docs=[{"_id": 1, "data": "x" * (15 * 1_024 * 1_024)}], target_coll="write_large_target", - pipeline=[{"$out": "write_large_target"}], expected=[{"_id": 1}], msg="$out should successfully write a 15 MB document", ), @@ -279,13 +284,15 @@ def test_out_bson_round_trip(collection, test_case: OutTestCase): def test_out_large_document(collection, test_case: OutTestCase): """Test $out writes documents up to 15 MB successfully.""" populate_collection(collection, test_case) + target = test_case.resolve_target_coll(collection) + out_stage = test_case.build_out_stage(collection) execute_command( collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + {"aggregate": collection.name, "pipeline": [out_stage], "cursor": {}}, ) result = execute_command( collection, - {"find": test_case.target_coll, "filter": {}, "projection": {"_id": 1}}, + {"find": target, "filter": {}, "projection": {"_id": 1}}, ) assertSuccess(result, test_case.expected, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/out/utils/out_test_helpers.py b/documentdb_tests/compatibility/tests/core/operator/stages/out/utils/out_test_helpers.py index c75db4f1..e269e87b 100644 --- a/documentdb_tests/compatibility/tests/core/operator/stages/out/utils/out_test_helpers.py +++ b/documentdb_tests/compatibility/tests/core/operator/stages/out/utils/out_test_helpers.py @@ -17,7 +17,9 @@ class OutTestCase(StageTestCase): """Data-driven test case for ``$out`` stage tests. Attributes: - target_coll: Name of the output collection. + target_coll: Suffix for the output collection name. At runtime the + full name is ``f"{collection.name}_{target_coll}"`` — call + :meth:`resolve_target_coll` to obtain it. target_db: Target database name. ``None`` means use the current database. out_spec: Extra fields to merge into the ``$out`` document form. expected_type: Expected collection type after ``$out`` runs. @@ -30,10 +32,14 @@ class OutTestCase(StageTestCase): expected_type: str = "collection" expected_options: dict[str, Any] | None = None + def resolve_target_coll(self, collection: Collection) -> str: + """Return the full target collection name, unique per test worker.""" + return f"{collection.name}_{self.target_coll}" + def build_out_stage(self, collection: Collection) -> dict[str, Any]: """Build the ``$out`` stage spec from this test case.""" db_name = self.target_db or collection.database.name - target = self.target_coll + target = self.resolve_target_coll(collection) if self.out_spec is not None or self.target_db is not None: spec: dict[str, Any] = {"db": db_name, "coll": target} if self.out_spec: diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_combination_out.py b/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_combination_out.py index 53d74e66..bb37e21c 100644 --- a/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_combination_out.py +++ b/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_combination_out.py @@ -4,8 +4,10 @@ import pytest +from documentdb_tests.compatibility.tests.core.operator.stages.out.utils.out_test_helpers import ( + OutTestCase, +) from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( - StageTestCase, populate_collection, ) from documentdb_tests.framework.assertions import assertResult @@ -17,8 +19,8 @@ # output, $group aggregates, $sort/$limit/$skip paginate, $unwind expands # arrays, $addFields enriches, $replaceRoot restructures, $redact prunes, # $lookup joins, and $unionWith merges collections. -OUT_PIPELINE_INTEGRATION_TESTS: list[StageTestCase] = [ - StageTestCase( +OUT_PIPELINE_INTEGRATION_TESTS: list[OutTestCase] = [ + OutTestCase( "match_equality", docs=[ {"_id": 1, "status": "active", "val": 10}, @@ -27,15 +29,15 @@ ], pipeline=[ {"$match": {"status": "active"}}, - {"$out": "integration_out"}, ], + target_coll="integration_out", expected=[ {"_id": 1, "status": "active", "val": 10}, {"_id": 3, "status": "active", "val": 30}, ], msg="$out should write only the documents that pass the $match filter", ), - StageTestCase( + OutTestCase( "match_comparison", docs=[ {"_id": 1, "val": 5}, @@ -44,15 +46,15 @@ ], pipeline=[ {"$match": {"val": {"$gte": 15}}}, - {"$out": "integration_out"}, ], + target_coll="integration_out", expected=[ {"_id": 2, "val": 15}, {"_id": 3, "val": 25}, ], msg="$out should write documents matching a comparison $match filter", ), - StageTestCase( + OutTestCase( "match_no_results", docs=[ {"_id": 1, "val": 10}, @@ -60,12 +62,12 @@ ], pipeline=[ {"$match": {"val": {"$gt": 100}}}, - {"$out": "integration_out"}, ], + target_coll="integration_out", expected=[], msg="$out should create an empty collection when $match filters all documents", ), - StageTestCase( + OutTestCase( "project_inclusion", docs=[ {"_id": 1, "a": 1, "b": 2, "c": 3}, @@ -73,15 +75,15 @@ ], pipeline=[ {"$project": {"a": 1, "b": 1}}, - {"$out": "integration_out"}, ], + target_coll="integration_out", expected=[ {"_id": 1, "a": 1, "b": 2}, {"_id": 2, "a": 4, "b": 5}, ], msg="$out should write only the fields kept by an inclusion $project", ), - StageTestCase( + OutTestCase( "project_computed", docs=[ {"_id": 1, "x": 10}, @@ -89,15 +91,15 @@ ], pipeline=[ {"$project": {"doubled": {"$multiply": ["$x", 2]}}}, - {"$out": "integration_out"}, ], + target_coll="integration_out", expected=[ {"_id": 1, "doubled": 20}, {"_id": 2, "doubled": 40}, ], msg="$out should write computed fields from a $project stage", ), - StageTestCase( + OutTestCase( "group_sum", docs=[ {"_id": 1, "cat": "a", "val": 10}, @@ -106,15 +108,15 @@ ], pipeline=[ {"$group": {"_id": "$cat", "total": {"$sum": "$val"}}}, - {"$out": "integration_out"}, ], + target_coll="integration_out", expected=[ {"_id": "a", "total": 30}, {"_id": "b", "total": 30}, ], msg="$out should write $group $sum results to the target collection", ), - StageTestCase( + OutTestCase( "group_count", docs=[ {"_id": 1, "cat": "x"}, @@ -123,15 +125,15 @@ ], pipeline=[ {"$group": {"_id": "$cat", "n": {"$sum": 1}}}, - {"$out": "integration_out"}, ], + target_coll="integration_out", expected=[ {"_id": "x", "n": 2}, {"_id": "y", "n": 1}, ], msg="$out should write $group count results to the target collection", ), - StageTestCase( + OutTestCase( "sort_limit_top_n", docs=[ {"_id": 1, "val": 50}, @@ -143,8 +145,8 @@ pipeline=[ {"$sort": {"val": -1}}, {"$limit": 3}, - {"$out": "integration_out"}, ], + target_coll="integration_out", expected=[ {"_id": 1, "val": 50}, {"_id": 3, "val": 40}, @@ -152,7 +154,7 @@ ], msg="$out should write the top-N sorted documents after $sort and $limit", ), - StageTestCase( + OutTestCase( "skip_limit_page", docs=[ {"_id": 1, "val": 10}, @@ -165,15 +167,15 @@ {"$sort": {"_id": 1}}, {"$skip": 1}, {"$limit": 2}, - {"$out": "integration_out"}, ], + target_coll="integration_out", expected=[ {"_id": 2, "val": 20}, {"_id": 3, "val": 30}, ], msg="$out should write the paginated window from $skip and $limit", ), - StageTestCase( + OutTestCase( "unwind_group_tag_count", docs=[ {"_id": 1, "tags": ["a", "b"]}, @@ -183,8 +185,8 @@ pipeline=[ {"$unwind": "$tags"}, {"$group": {"_id": "$tags", "count": {"$sum": 1}}}, - {"$out": "integration_out"}, ], + target_coll="integration_out", expected=[ {"_id": "a", "count": 2}, {"_id": "b", "count": 2}, @@ -192,7 +194,7 @@ ], msg="$out should write unwound-then-grouped tag counts to the target collection", ), - StageTestCase( + OutTestCase( "addfields_computed", docs=[ {"_id": 1, "price": 100, "qty": 3}, @@ -200,15 +202,15 @@ ], pipeline=[ {"$addFields": {"total": {"$multiply": ["$price", "$qty"]}}}, - {"$out": "integration_out"}, ], + target_coll="integration_out", expected=[ {"_id": 1, "price": 100, "qty": 3, "total": 300}, {"_id": 2, "price": 200, "qty": 1, "total": 200}, ], msg="$out should write documents enriched by $addFields to the target collection", ), - StageTestCase( + OutTestCase( "replaceroot_nested", docs=[ {"_id": 1, "inner": {"a": 10, "b": 20}}, @@ -217,15 +219,15 @@ pipeline=[ {"$replaceRoot": {"newRoot": "$inner"}}, {"$addFields": {"_id": "$a"}}, - {"$out": "integration_out"}, ], + target_coll="integration_out", expected=[ {"_id": 10, "a": 10, "b": 20}, {"_id": 30, "a": 30, "b": 40}, ], msg="$out should write the new root structure after $replaceRoot", ), - StageTestCase( + OutTestCase( "redact_keep_prune", docs=[ {"_id": 1, "level": 1, "data": "public"}, @@ -242,69 +244,15 @@ } } }, - {"$out": "integration_out"}, ], + target_coll="integration_out", expected=[ {"_id": 1, "level": 1, "data": "public"}, {"_id": 3, "level": 2, "data": "internal"}, ], msg="$out should write only documents kept by $redact", ), - StageTestCase( - "lookup_equality", - docs=[ - {"_id": 1, "ref": 1}, - {"_id": 2, "ref": 2}, - ], - setup=lambda c: c.database["integration_foreign"].insert_many( - [ - {"_id": 1, "label": "first"}, - {"_id": 2, "label": "second"}, - ] - ), - pipeline=[ - { - "$lookup": { - "from": "integration_foreign", - "localField": "ref", - "foreignField": "_id", - "as": "joined", - } - }, - {"$project": {"ref": 1, "label": {"$arrayElemAt": ["$joined.label", 0]}}}, - {"$out": "integration_out"}, - ], - expected=[ - {"_id": 1, "ref": 1, "label": "first"}, - {"_id": 2, "ref": 2, "label": "second"}, - ], - msg="$out should write $lookup-joined documents to the target collection", - ), - StageTestCase( - "unionwith_merge", - docs=[ - {"_id": 1, "source": "main"}, - {"_id": 2, "source": "main"}, - ], - setup=lambda c: c.database["integration_foreign"].insert_many( - [ - {"_id": 3, "source": "other"}, - {"_id": 4, "source": "other"}, - ] - ), - pipeline=[ - {"$unionWith": {"coll": "integration_foreign"}}, - {"$out": "integration_out"}, - ], - expected=[ - {"_id": 1, "source": "main"}, - {"_id": 2, "source": "main"}, - {"_id": 3, "source": "other"}, - {"_id": 4, "source": "other"}, - ], - msg="$out should write $unionWith-merged documents to the target collection", - ), - StageTestCase( + OutTestCase( "match_group_sort_out", docs=[ {"_id": 1, "dept": "eng", "salary": 100}, @@ -317,8 +265,8 @@ {"$match": {"salary": {"$gte": 90}}}, {"$group": {"_id": "$dept", "avg_salary": {"$avg": "$salary"}}}, {"$sort": {"avg_salary": -1}}, - {"$out": "integration_out"}, ], + target_coll="integration_out", expected=[ {"_id": "eng", "avg_salary": 125.0}, {"_id": "hr", "avg_salary": 90.0}, @@ -326,7 +274,7 @@ ], msg="$out should write correctly after $match, $group, and $sort combined", ), - StageTestCase( + OutTestCase( "project_addfields_match_out", docs=[ {"_id": 1, "price": 50, "qty": 4}, @@ -337,8 +285,8 @@ {"$project": {"price": 1, "qty": 1}}, {"$addFields": {"revenue": {"$multiply": ["$price", "$qty"]}}}, {"$match": {"revenue": {"$gte": 200}}}, - {"$out": "integration_out"}, ], + target_coll="integration_out", expected=[ {"_id": 1, "price": 50, "qty": 4, "revenue": 200}, {"_id": 2, "price": 30, "qty": 10, "revenue": 300}, @@ -350,19 +298,21 @@ @pytest.mark.aggregate @pytest.mark.parametrize("test_case", pytest_params(OUT_PIPELINE_INTEGRATION_TESTS)) -def test_out_pipeline_integration(collection, test_case: StageTestCase): +def test_out_pipeline_integration(collection, test_case: OutTestCase): """Test $out pipeline integration with other stages.""" populate_collection(collection, test_case) if test_case.setup: test_case.setup(collection) db = collection.database + target = test_case.resolve_target_coll(collection) + pipeline = list(test_case.pipeline) + [test_case.build_out_stage(collection)] execute_command( collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + {"aggregate": collection.name, "pipeline": pipeline, "cursor": {}}, ) result = execute_command( collection, - {"find": "integration_out", "filter": {}, "sort": {"_id": 1}}, + {"find": target, "filter": {}, "sort": {"_id": 1}}, ) assertResult( result, @@ -370,5 +320,78 @@ def test_out_pipeline_integration(collection, test_case: StageTestCase): error_code=test_case.error_code, msg=test_case.msg, ) - db.drop_collection("integration_out") - db.drop_collection("integration_foreign") + db.drop_collection(target) + + +@pytest.mark.aggregate +def test_out_lookup_equality(collection): + """Test $out after $lookup with equality join.""" + db = collection.database + foreign_name = f"{collection.name}_integration_foreign" + out_name = f"{collection.name}_integration_out" + collection.insert_many([{"_id": 1, "ref": 1}, {"_id": 2, "ref": 2}]) + db[foreign_name].insert_many([{"_id": 1, "label": "first"}, {"_id": 2, "label": "second"}]) + pipeline = [ + { + "$lookup": { + "from": foreign_name, + "localField": "ref", + "foreignField": "_id", + "as": "joined", + } + }, + {"$project": {"ref": 1, "label": {"$arrayElemAt": ["$joined.label", 0]}}}, + {"$out": out_name}, + ] + execute_command( + collection, + {"aggregate": collection.name, "pipeline": pipeline, "cursor": {}}, + ) + result = execute_command( + collection, + {"find": out_name, "filter": {}, "sort": {"_id": 1}}, + ) + assertResult( + result, + expected=[ + {"_id": 1, "ref": 1, "label": "first"}, + {"_id": 2, "ref": 2, "label": "second"}, + ], + msg="$out should write $lookup-joined documents to the target collection", + ) + db.drop_collection(out_name) + db.drop_collection(foreign_name) + + +@pytest.mark.aggregate +def test_out_unionwith_merge(collection): + """Test $out after $unionWith merging two collections.""" + db = collection.database + foreign_name = f"{collection.name}_integration_foreign" + out_name = f"{collection.name}_integration_out" + collection.insert_many([{"_id": 1, "source": "main"}, {"_id": 2, "source": "main"}]) + db[foreign_name].insert_many([{"_id": 3, "source": "other"}, {"_id": 4, "source": "other"}]) + pipeline = [ + {"$unionWith": {"coll": foreign_name}}, + {"$out": out_name}, + ] + execute_command( + collection, + {"aggregate": collection.name, "pipeline": pipeline, "cursor": {}}, + ) + result = execute_command( + collection, + {"find": out_name, "filter": {}, "sort": {"_id": 1}}, + ) + assertResult( + result, + expected=[ + {"_id": 1, "source": "main"}, + {"_id": 2, "source": "main"}, + {"_id": 3, "source": "other"}, + {"_id": 4, "source": "other"}, + ], + msg="$out should write $unionWith-merged documents to the target collection", + ) + db.drop_collection(out_name) + db.drop_collection(foreign_name)