diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/__init__.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_errors.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_errors.py new file mode 100644 index 00000000..d163850f --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_errors.py @@ -0,0 +1,189 @@ +""" +Tests for $avg accumulator error handling. + +Covers arity validation (rejects array syntax in $group, $bucket, $bucketAuto) +and expression error propagation ($toInt, $divide, $mod). +""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + CONVERSION_FAILURE_ERROR, + DIVIDE_BY_ZERO_V2_ERROR, + GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + MODULO_BY_ZERO_V2_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Arity]: $avg in accumulator context is a unary operator and +# rejects array syntax in $group, $bucket, and $bucketAuto. +AVG_ARITY_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "arity_multi_element_group", + pipeline=[{"$group": {"_id": None, "result": {"$avg": ["$v", "$v"]}}}], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$avg should reject multi-element array syntax in $group", + ), + AccumulatorTestCase( + "arity_empty_array_group", + pipeline=[{"$group": {"_id": None, "result": {"$avg": []}}}], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$avg should reject empty array syntax in $group", + ), + AccumulatorTestCase( + "arity_single_element_group", + pipeline=[{"$group": {"_id": None, "result": {"$avg": ["$v"]}}}], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$avg should reject single-element array syntax in $group", + ), + AccumulatorTestCase( + "arity_multi_element_bucket", + pipeline=[ + { + "$bucket": { + "groupBy": "$v", + "boundaries": [0, 10], + "output": {"result": {"$avg": ["$v", "$v"]}}, + } + } + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$avg should reject multi-element array syntax in $bucket", + ), + AccumulatorTestCase( + "arity_empty_array_bucket", + pipeline=[ + { + "$bucket": { + "groupBy": "$v", + "boundaries": [0, 10], + "output": {"result": {"$avg": []}}, + } + } + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$avg should reject empty array syntax in $bucket", + ), + AccumulatorTestCase( + "arity_single_element_bucket", + pipeline=[ + { + "$bucket": { + "groupBy": "$v", + "boundaries": [0, 10], + "output": {"result": {"$avg": ["$v"]}}, + } + } + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$avg should reject single-element array syntax in $bucket", + ), + AccumulatorTestCase( + "arity_multi_element_bucket_auto", + pipeline=[ + { + "$bucketAuto": { + "groupBy": "$v", + "buckets": 1, + "output": {"result": {"$avg": ["$v", "$v"]}}, + } + } + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$avg should reject multi-element array syntax in $bucketAuto", + ), + AccumulatorTestCase( + "arity_empty_array_bucket_auto", + pipeline=[ + { + "$bucketAuto": { + "groupBy": "$v", + "buckets": 1, + "output": {"result": {"$avg": []}}, + } + } + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$avg should reject empty array syntax in $bucketAuto", + ), + AccumulatorTestCase( + "arity_single_element_bucket_auto", + pipeline=[ + { + "$bucketAuto": { + "groupBy": "$v", + "buckets": 1, + "output": {"result": {"$avg": ["$v"]}}, + } + } + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$avg should reject single-element array syntax in $bucketAuto", + ), +] + +# Property [Expression Error Propagation]: errors from sub-expressions +# propagate through $avg without being caught or suppressed. +AVG_EXPRESSION_ERROR_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "error_prop_toint_non_convertible", + docs=[{"v": "hello"}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": {"$toInt": "$v"}}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + error_code=CONVERSION_FAILURE_ERROR, + msg="$avg should propagate $toInt conversion error for non-convertible value", + ), + AccumulatorTestCase( + "error_prop_divide_by_zero", + docs=[{"v": 10}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": {"$divide": ["$v", 0]}}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + error_code=DIVIDE_BY_ZERO_V2_ERROR, + msg="$avg should propagate $divide by zero error", + ), + AccumulatorTestCase( + "error_prop_mod_by_zero", + docs=[{"v": 10}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": {"$mod": ["$v", 0]}}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + error_code=MODULO_BY_ZERO_V2_ERROR, + msg="$avg should propagate $mod by zero error", + ), +] + +AVG_ERROR_TESTS: list[AccumulatorTestCase] = AVG_ARITY_TESTS + AVG_EXPRESSION_ERROR_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(AVG_ERROR_TESTS)) +def test_avg_errors(collection, test_case: AccumulatorTestCase): + """Test $avg accumulator error handling.""" + if test_case.docs: + collection.insert_many(test_case.docs) + else: + collection.insert_one({"v": 1}) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_field_lookup.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_field_lookup.py new file mode 100644 index 00000000..45173106 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_field_lookup.py @@ -0,0 +1,217 @@ +""" +Tests for $avg accumulator expression types and field lookup in $group context. + +Covers expression types (literal, field path, computed expressions, conditional) +and field path resolution (simple, nested, missing, array traversal). +""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Expression Type]: $avg accepts field paths, computed expressions, +# literals, and conditional expressions in $group context. + +AVG_EXPRESSION_TYPE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "field_path", + docs=[ + {"_id": 1, "value": 10}, + {"_id": 2, "value": 20}, + {"_id": 3, "value": 30}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$value"}}}], + expected=[{"_id": None, "avg": 20.0}], + msg="$avg with field path should average field values", + ), + AccumulatorTestCase( + "computed_expression", + docs=[ + {"_id": 1, "a": 2, "b": 3}, + {"_id": 2, "a": 4, "b": 6}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": {"$multiply": ["$a", "$b"]}}}}], + # (2*3 + 4*6) / 2 = (6 + 24) / 2 = 15 + expected=[{"_id": None, "avg": 15.0}], + msg="$avg with computed expression should average computed values", + ), + AccumulatorTestCase( + "literal_numeric", + docs=[ + {"_id": 1}, + {"_id": 2}, + {"_id": 3}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": 5}}}], + expected=[{"_id": None, "avg": 5.0}], + msg="$avg with literal numeric should return that constant", + ), + AccumulatorTestCase( + "literal_null", + docs=[{"_id": 1}, {"_id": 2}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": None}}}], + expected=[{"_id": None, "avg": None}], + msg="$avg with null literal should return null", + ), + AccumulatorTestCase( + "cond_expression", + docs=[ + {"_id": 1, "value": 10, "include": True}, + {"_id": 2, "value": 20, "include": False}, + {"_id": 3, "value": 30, "include": True}, + ], + pipeline=[ + { + "$group": { + "_id": None, + "avg": { + "$avg": { + "$cond": [ + "$include", + "$value", + None, + ] + } + }, + } + }, + ], + # Only values 10 and 30 contribute (null is ignored), avg = 20 + expected=[{"_id": None, "avg": 20.0}], + msg="$avg with $cond should average only non-null conditional results", + ), + AccumulatorTestCase( + "ifnull_expression", + docs=[ + {"_id": 1, "value": 10}, + {"_id": 2}, + {"_id": 3, "value": 30}, + ], + pipeline=[ + { + "$group": { + "_id": None, + "avg": {"$avg": {"$ifNull": ["$value", 0]}}, + } + }, + ], + # (10 + 0 + 30) / 3 = 13.333... + expected=[{"_id": None, "avg": 13.333333333333334}], + msg="$avg with $ifNull should replace missing with 0", + ), +] + +# Property [Field Resolution]: field path resolution behaviors with $avg in $group context. + +AVG_FIELD_RESOLUTION_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "nested_field_path", + docs=[ + {"_id": 1, "nested": {"value": 10}}, + {"_id": 2, "nested": {"value": 20}}, + {"_id": 3, "nested": {"value": 30}}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$nested.value"}}}], + expected=[{"_id": None, "avg": 20.0}], + msg="$avg with nested field path should resolve and average", + ), + AccumulatorTestCase( + "missing_field", + docs=[ + {"_id": 1, "value": 10}, + {"_id": 2, "value": 20}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$nonexistent"}}}], + expected=[{"_id": None, "avg": None}], + msg="$avg with non-existent field should return null", + ), + AccumulatorTestCase( + "field_resolves_to_array", + docs=[ + {"_id": 1, "value": [1, 2, 3]}, + {"_id": 2, "value": [4, 5, 6]}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$value"}}}], + expected=[{"_id": None, "avg": None}], + msg="$avg in $group should treat array values as non-numeric", + ), + AccumulatorTestCase( + "mixed_array_and_numeric", + docs=[ + {"_id": 1, "value": [1, 2, 3]}, + {"_id": 2, "value": 10}, + {"_id": 3, "value": 20}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$value"}}}], + # Array is ignored: (10 + 20) / 2 = 15 + expected=[{"_id": None, "avg": 15.0}], + msg="$avg in $group should ignore array values and average numerics", + ), + AccumulatorTestCase( + "deeply_nested_path", + docs=[ + {"_id": 1, "a": {"b": {"c": {"d": 10}}}}, + {"_id": 2, "a": {"b": {"c": {"d": 20}}}}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$a.b.c.d"}}}], + expected=[{"_id": None, "avg": 15.0}], + msg="$avg with deeply nested path should resolve correctly", + ), + AccumulatorTestCase( + "intermediate_null", + docs=[ + {"_id": 1, "a": {"b": 10}}, + {"_id": 2, "a": None}, + {"_id": 3, "a": {"b": 30}}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$a.b"}}}], + # Doc 2 has null intermediate, treated as missing: (10 + 30) / 2 = 20 + expected=[{"_id": None, "avg": 20.0}], + msg="$avg should treat null intermediate as missing", + ), + AccumulatorTestCase( + "multiple_accumulators", + docs=[ + {"_id": 1, "a": 10, "b": 100}, + {"_id": 2, "a": 20, "b": 200}, + ], + pipeline=[ + { + "$group": { + "_id": None, + "avg_a": {"$avg": "$a"}, + "avg_b": {"$avg": "$b"}, + } + }, + ], + expected=[{"_id": None, "avg_a": 15.0, "avg_b": 150.0}], + msg="Multiple $avg accumulators should work independently", + ), +] + +AVG_FIELD_LOOKUP_TESTS: list[AccumulatorTestCase] = ( + AVG_EXPRESSION_TYPE_TESTS + AVG_FIELD_RESOLUTION_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(AVG_FIELD_LOOKUP_TESTS)) +def test_avg_field_lookup(collection, test_case: AccumulatorTestCase): + """Test $avg field lookup and expression types in $group context.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult(result, expected=test_case.expected, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_boundaries.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_boundaries.py new file mode 100644 index 00000000..a1ed3c86 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_boundaries.py @@ -0,0 +1,519 @@ +""" +Tests for $avg accumulator boundary values and overflow in $group context. + +Covers int32/int64 boundary values, double boundary values (subnormal, normal, +near-max), Decimal128 precision and boundary values, and sum overflow behavior. +""" + +from __future__ import annotations + +import pytest +from bson import Decimal128, Int64 + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import ( + DECIMAL128_INFINITY, + DECIMAL128_INT64_OVERFLOW, + DECIMAL128_LARGE_EXPONENT, + DECIMAL128_MAX, + DECIMAL128_MIN, + DECIMAL128_MIN_POSITIVE, + DECIMAL128_SMALL_EXPONENT, + DECIMAL128_TRAILING_ZERO, + DOUBLE_FROM_INT64_MAX, + DOUBLE_MAX, + DOUBLE_MAX_SAFE_INTEGER, + DOUBLE_MIN_NEGATIVE_SUBNORMAL, + DOUBLE_MIN_NORMAL, + DOUBLE_MIN_SUBNORMAL, + DOUBLE_NEAR_MAX, + DOUBLE_NEAR_MIN, + DOUBLE_ZERO, + FLOAT_INFINITY, + INT32_MAX, + INT32_MAX_MINUS_1, + INT32_MIN, + INT64_MAX, + INT64_MAX_MINUS_1, + INT64_MIN, + INT64_MIN_PLUS_1, + INT64_ZERO, +) + +# Property [Integer Boundaries]: $avg handles int32 and int64 boundary values +# including MAX, MIN, adjacent values, and overflow combinations. +AVG_INT_BOUNDARY_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + id="int32_zeros", + docs=[{"v": 0}, {"v": 0}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_ZERO}], + msg="$avg should return 0.0 for two int32 zeros", + ), + AccumulatorTestCase( + id="int32_one_neg_one", + docs=[{"v": 1}, {"v": -1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_ZERO}], + msg="$avg should return 0.0 for int32 1 and -1", + ), + AccumulatorTestCase( + id="int32_max_pair", + docs=[{"_id": 0, "v": INT32_MAX}, {"_id": 1, "v": INT32_MAX}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": float(INT32_MAX)}], + msg="avg of two INT32_MAX should return INT32_MAX as double", + ), + AccumulatorTestCase( + id="int32_min_pair", + docs=[{"_id": 0, "v": INT32_MIN}, {"_id": 1, "v": INT32_MIN}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": float(INT32_MIN)}], + msg="avg of two INT32_MIN should return INT32_MIN as double", + ), + AccumulatorTestCase( + id="int32_max_and_min", + docs=[{"_id": 0, "v": INT32_MAX}, {"_id": 1, "v": INT32_MIN}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + # (2147483647 + -2147483648) / 2 = -0.5 + expected=[{"_id": None, "avg": -0.5}], + msg="avg of INT32_MAX and INT32_MIN should be -0.5", + ), + AccumulatorTestCase( + id="int32_adjacent_max", + docs=[{"v": INT32_MAX_MINUS_1}, {"v": INT32_MAX}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": 2_147_483_646.5}], + msg="$avg of adjacent int32 MAX values should produce exact double", + ), + AccumulatorTestCase( + id="int32_adjacent_min", + docs=[{"v": INT32_MIN}, {"v": INT32_MIN + 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": -2_147_483_647.5}], + msg="$avg of adjacent int32 MIN values should produce exact double", + ), + AccumulatorTestCase( + id="int64_max_pair", + docs=[{"_id": 0, "v": INT64_MAX}, {"_id": 1, "v": INT64_MAX}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 9.223372036854776e18}], + msg="avg of two INT64_MAX should handle overflow", + ), + AccumulatorTestCase( + id="int64_min_pair", + docs=[{"_id": 0, "v": INT64_MIN}, {"_id": 1, "v": INT64_MIN}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": -9.223372036854776e18}], + msg="avg of two INT64_MIN should handle overflow", + ), + AccumulatorTestCase( + id="int64_max_and_zero", + docs=[{"v": INT64_MAX}, {"v": INT64_ZERO}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_FROM_INT64_MAX / 2}], + msg="$avg should handle int64 MAX with precision loss in double", + ), + AccumulatorTestCase( + id="int64_max_and_min", + docs=[{"v": INT64_MAX}, {"v": INT64_MIN}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": -0.5}], + msg="$avg should handle int64 MAX and MIN together", + ), + AccumulatorTestCase( + id="int64_max_and_one", + docs=[{"_id": 0, "v": INT64_MAX}, {"_id": 1, "v": Int64(1)}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 4.611686018427388e18}], + msg="avg of INT64_MAX and 1", + ), + AccumulatorTestCase( + id="int64_adjacent_max", + docs=[{"v": INT64_MAX_MINUS_1}, {"v": INT64_MAX}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_FROM_INT64_MAX}], + msg="$avg of adjacent int64 MAX values should produce double with precision loss", + ), + AccumulatorTestCase( + id="int64_adjacent_min", + docs=[{"v": INT64_MIN_PLUS_1}, {"v": INT64_MIN}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": -DOUBLE_FROM_INT64_MAX}], + msg="$avg of adjacent int64 MIN values should produce double with precision loss", + ), +] + +# Property [Double Boundaries]: $avg handles double boundary values +# including subnormal, minimum normal, near-max, and max safe integer. +AVG_DOUBLE_BOUNDARY_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + id="double_whole_number", + docs=[{"v": 3.0}, {"v": 5.0}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": 4.0}], + msg="$avg should produce correct average for whole-number floats", + ), + AccumulatorTestCase( + id="double_subnormal_positive", + docs=[{"v": DOUBLE_MIN_SUBNORMAL}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_MIN_SUBNORMAL}], + msg="$avg should handle positive subnormal value correctly", + ), + AccumulatorTestCase( + id="double_subnormal_negative", + docs=[{"v": DOUBLE_MIN_NEGATIVE_SUBNORMAL}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_MIN_NEGATIVE_SUBNORMAL}], + msg="$avg should handle negative subnormal value correctly", + ), + AccumulatorTestCase( + id="double_subnormal_pair", + docs=[ + {"_id": 0, "v": DOUBLE_MIN_SUBNORMAL}, + {"_id": 1, "v": DOUBLE_MIN_SUBNORMAL}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": DOUBLE_MIN_SUBNORMAL}], + msg="avg of two subnormal doubles should return subnormal", + ), + AccumulatorTestCase( + id="double_min_normal", + docs=[{"v": DOUBLE_MIN_NORMAL}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_MIN_NORMAL}], + msg="$avg should handle smallest positive normal double correctly", + ), + AccumulatorTestCase( + id="double_max_single", + docs=[{"v": DOUBLE_MAX}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_MAX}], + msg="$avg should handle DBL_MAX as a single value correctly", + ), + AccumulatorTestCase( + id="double_max_safe_integer", + docs=[{"v": float(DOUBLE_MAX_SAFE_INTEGER)}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": float(DOUBLE_MAX_SAFE_INTEGER)}], + msg="$avg should handle max safe integer value correctly", + ), + AccumulatorTestCase( + id="double_max_safe_integer_pair", + docs=[ + {"v": float(DOUBLE_MAX_SAFE_INTEGER)}, + {"v": float(DOUBLE_MAX_SAFE_INTEGER)}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": float(DOUBLE_MAX_SAFE_INTEGER)}], + msg="$avg of two max safe integer values should return that value", + ), + AccumulatorTestCase( + id="double_near_min_pair", + docs=[{"v": DOUBLE_NEAR_MIN}, {"v": DOUBLE_NEAR_MIN}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_NEAR_MIN}], + msg="$avg should handle values near minimum normal correctly", + ), + AccumulatorTestCase( + id="double_near_max_single", + docs=[{"v": DOUBLE_NEAR_MAX}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_NEAR_MAX}], + msg="$avg should handle values near maximum finite correctly", + ), +] + +# Property [Decimal128 Precision]: $avg preserves Decimal128 precision +# across extreme exponent differences, trailing zeros, and boundary values. +AVG_DECIMAL128_BOUNDARY_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + id="decimal128_full_precision", + docs=[ + {"v": Decimal128("1.000000000000000000000000000000001")}, + {"v": Decimal128("1.000000000000000000000000000000003")}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": Decimal128("1.000000000000000000000000000000002")}], + msg="$avg should preserve full 34-digit Decimal128 precision", + ), + AccumulatorTestCase( + id="decimal128_high_precision", + docs=[ + { + "_id": 0, + "v": Decimal128("1.000000000000000000000000000000001"), + }, + { + "_id": 1, + "v": Decimal128("2.999999999999999999999999999999999"), + }, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": Decimal128("2.000000000000000000000000000000000")}], + msg="decimal128 avg should preserve high precision", + ), + AccumulatorTestCase( + id="decimal128_34_digit_integer", + docs=[ + {"v": Decimal128("1234567890123456789012345678901234")}, + {"v": Decimal128("1234567890123456789012345678901234")}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": Decimal128("1234567890123456789012345678901234")}], + msg="$avg should preserve 34-digit integer Decimal128 values", + ), + AccumulatorTestCase( + id="decimal128_trailing_zeros", + docs=[{"v": Decimal128("2.00")}, {"v": Decimal128("4.00")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": Decimal128("3.00")}], + msg="$avg should preserve trailing zeros in Decimal128 results", + ), + AccumulatorTestCase( + id="decimal128_trailing_zeros_single_digit", + docs=[{"v": DECIMAL128_TRAILING_ZERO}, {"v": Decimal128("3.0")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": Decimal128("2.0")}], + msg="$avg should preserve single trailing zero in Decimal128 results", + ), + AccumulatorTestCase( + id="decimal128_subnormal_pair", + docs=[{"v": DECIMAL128_MIN_POSITIVE}, {"v": DECIMAL128_MIN_POSITIVE}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DECIMAL128_MIN_POSITIVE}], + msg="$avg should handle Decimal128 subnormal values correctly", + ), + AccumulatorTestCase( + id="decimal128_subnormal_single", + docs=[{"v": DECIMAL128_MIN_POSITIVE}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DECIMAL128_MIN_POSITIVE}], + msg="$avg should handle a single Decimal128 subnormal value", + ), + AccumulatorTestCase( + id="decimal128_near_max_single", + docs=[{"v": DECIMAL128_MAX}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DECIMAL128_MAX}], + msg="$avg should handle a single near-maximum Decimal128 value", + ), + AccumulatorTestCase( + id="decimal128_near_max_with_small", + docs=[{"v": DECIMAL128_MAX}, {"v": Decimal128("1")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": Decimal128("5.000000000000000000000000000000000E+6144")}], + msg="$avg should handle near-maximum Decimal128 averaged with a small value", + ), + AccumulatorTestCase( + id="decimal128_max_and_min", + docs=[{"_id": 0, "v": DECIMAL128_MAX}, {"_id": 1, "v": DECIMAL128_MIN}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": Decimal128("0")}], + msg="avg of DECIMAL128_MAX and DECIMAL128_MIN", + ), + AccumulatorTestCase( + id="decimal128_large_exponent", + docs=[ + {"_id": 0, "v": DECIMAL128_LARGE_EXPONENT}, + {"_id": 1, "v": DECIMAL128_LARGE_EXPONENT}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": DECIMAL128_LARGE_EXPONENT}], + msg="avg of two identical large exponent values should return same value", + ), + AccumulatorTestCase( + id="decimal128_small_exponent", + docs=[ + {"_id": 0, "v": DECIMAL128_SMALL_EXPONENT}, + {"_id": 1, "v": DECIMAL128_SMALL_EXPONENT}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": DECIMAL128_SMALL_EXPONENT}], + msg="avg of two identical small exponent values should return same value", + ), + AccumulatorTestCase( + id="decimal128_extreme_exponent_diff", + docs=[ + {"_id": 0, "v": Decimal128("1E+6144")}, + {"_id": 1, "v": Decimal128("1")}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[ + { + "_id": None, + "avg": Decimal128("5.00000000000000000000000000000000E+6143"), + } + ], + msg="avg with extreme exponent difference", + ), + AccumulatorTestCase( + id="decimal128_exceeds_int64", + docs=[ + {"v": DECIMAL128_INT64_OVERFLOW}, + {"v": DECIMAL128_INT64_OVERFLOW}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DECIMAL128_INT64_OVERFLOW}], + msg="$avg should produce Decimal128 for values exceeding int64 range", + ), +] + +# Property [Overflow]: sum overflow during accumulation produces Infinity for +# doubles and Decimal128, and int32/int64 overflow is handled via type +# promotion without error. +AVG_OVERFLOW_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + id="overflow_double_near_max_pair", + docs=[{"_id": 0, "v": DOUBLE_NEAR_MAX}, {"_id": 1, "v": DOUBLE_NEAR_MAX}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": float("inf")}], + msg="avg of two DOUBLE_NEAR_MAX overflows sum to inf", + ), + AccumulatorTestCase( + id="overflow_double_max", + docs=[{"v": DOUBLE_MAX}, {"v": DOUBLE_MAX}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": FLOAT_INFINITY}], + msg="$avg should return Infinity when two DBL_MAX values overflow the sum", + ), + AccumulatorTestCase( + id="overflow_decimal128_max", + docs=[{"v": DECIMAL128_MAX}, {"v": DECIMAL128_MAX}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DECIMAL128_INFINITY}], + msg="$avg should return Decimal128 Infinity when two Decimal128 max values overflow", + ), + AccumulatorTestCase( + id="overflow_int32_sum", + docs=[{"v": INT32_MAX}, {"v": INT32_MAX}, {"v": INT32_MAX}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": float(INT32_MAX)}], + msg="$avg should handle int32 sum overflow via type promotion without error", + ), + AccumulatorTestCase( + id="overflow_int64_sum", + docs=[{"v": INT64_MAX}, {"v": INT64_MAX}, {"v": INT64_MAX}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_FROM_INT64_MAX}], + msg="$avg should handle int64 sum overflow by converting to double", + ), +] + +AVG_GROUP_BOUNDARY_TESTS: list[AccumulatorTestCase] = ( + AVG_INT_BOUNDARY_TESTS + + AVG_DOUBLE_BOUNDARY_TESTS + + AVG_DECIMAL128_BOUNDARY_TESTS + + AVG_OVERFLOW_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(AVG_GROUP_BOUNDARY_TESTS)) +def test_avg_group_boundaries(collection, test_case: AccumulatorTestCase): + """Test $avg accumulator boundary values in $group context.""" + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult(result, expected=test_case.expected, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_context.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_context.py new file mode 100644 index 00000000..b2a2ca4d --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_context.py @@ -0,0 +1,327 @@ +""" +Tests for $avg accumulator in $group context. + +Covers numeric equivalence in grouping, single/empty groups, +precision edge cases, multiple groups, and comparison with $sum. +""" + +from __future__ import annotations + +import pytest +from bson import Decimal128, Int64 + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Numeric Equivalence]: numerically equivalent group keys +# (int32, int64, double, Decimal128) produce a single group. + +NUMERIC_EQUIVALENCE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + id="numeric_equivalence_grouping", + docs=[ + {"_id": 1, "key": 1, "value": 10}, + {"_id": 2, "key": Int64(1), "value": 20}, + {"_id": 3, "key": 1.0, "value": 30}, + {"_id": 4, "key": Decimal128("1"), "value": 40}, + ], + pipeline=[ + {"$group": {"_id": "$key", "avg": {"$avg": "$value"}}}, + ], + expected=[{"_id": 1, "avg": 25.0}], + msg="Numerically equivalent group keys should produce a single group", + ), + AccumulatorTestCase( + id="zero_equivalence", + docs=[ + {"_id": 1, "key": 0, "value": 10}, + {"_id": 2, "key": Int64(0), "value": 20}, + {"_id": 3, "key": 0.0, "value": 30}, + {"_id": 4, "key": Decimal128("0"), "value": 40}, + ], + pipeline=[ + {"$group": {"_id": "$key", "avg": {"$avg": "$value"}}}, + ], + expected=[{"_id": 0, "avg": 25.0}], + msg="All zero representations should group together", + ), +] + +# Property [Single and Empty Groups]: $avg returns correct results for +# single-document groups, empty collections, and null group IDs. + +SINGLE_EMPTY_GROUP_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + id="single_document", + docs=[{"_id": 1, "category": "A", "value": 42}], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[{"_id": "A", "avg": 42.0}], + msg="$avg of single document should return that value as double", + ), + AccumulatorTestCase( + id="single_document_non_numeric", + docs=[{"_id": 1, "category": "A", "value": "hello"}], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[{"_id": "A", "avg": None}], + msg="$avg of single non-numeric document should return null", + ), + AccumulatorTestCase( + id="single_document_null", + docs=[{"_id": 1, "category": "A", "value": None}], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[{"_id": "A", "avg": None}], + msg="$avg of single null document should return null", + ), + AccumulatorTestCase( + id="single_document_missing_field", + docs=[{"_id": 1, "category": "A"}], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[{"_id": "A", "avg": None}], + msg="$avg of single document with missing field should return null", + ), + AccumulatorTestCase( + id="empty_collection", + docs=None, + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + ], + expected=[], + msg="$avg on empty collection should produce no output", + ), + AccumulatorTestCase( + id="all_filtered_out", + docs=[ + {"_id": 1, "category": "A", "value": 10}, + {"_id": 2, "category": "A", "value": 20}, + ], + pipeline=[ + {"$match": {"category": "Z"}}, + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + ], + expected=[], + msg="$avg after filtering all documents should produce no output", + ), + AccumulatorTestCase( + id="null_id", + docs=[ + {"_id": 1, "value": 10}, + {"_id": 2, "value": 20}, + {"_id": 3, "value": 30}, + ], + pipeline=[ + {"$group": {"_id": None, "avg": {"$avg": "$value"}}}, + ], + expected=[{"_id": None, "avg": 20.0}], + msg="$avg with _id: null should average entire collection", + ), + AccumulatorTestCase( + id="single_document_int64", + docs=[{"v": Int64(42)}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": 42.0}], + msg="$avg should return the value as double for a single int64 document", + ), +] + +# Property [Precision]: $avg produces correct fractional and repeating +# decimal results and handles large document counts. + +PRECISION_EDGE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + id="odd_sum_two_int32", + docs=[ + {"_id": 1, "category": "A", "value": 1}, + {"_id": 2, "category": "A", "value": 2}, + ], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[{"_id": "A", "avg": 1.5}], + msg="$avg of 1 and 2 should return 1.5", + ), + AccumulatorTestCase( + id="repeating_decimal", + docs=[ + {"_id": 1, "category": "A", "value": 1}, + {"_id": 2, "category": "A", "value": 1}, + {"_id": 3, "category": "A", "value": 2}, + ], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + ], + expected=[{"_id": "A", "avg": 1.3333333333333333}], + msg="$avg of 1,1,2 should return 4/3", + ), + AccumulatorTestCase( + id="sequence_1_to_100", + docs=[{"_id": i, "category": "A", "value": i} for i in range(1, 101)], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[{"_id": "A", "avg": 50.5}], + msg="$avg of 1..100 should return 50.5", + ), + AccumulatorTestCase( + id="large_count_identical", + docs=[{"_id": i, "category": "A", "value": 7} for i in range(1000)], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[{"_id": "A", "avg": 7.0}], + msg="$avg of 1000 identical values should return that value", + ), +] + +# Property [Multiple Groups]: $avg computes independent averages per group +# with different counts, null groups, and mixed types. + +MULTIPLE_GROUPS_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + id="different_counts", + docs=[ + {"_id": 1, "category": "A", "value": 10}, + {"_id": 2, "category": "B", "value": 20}, + {"_id": 3, "category": "B", "value": 40}, + {"_id": 4, "category": "C", "value": 5}, + {"_id": 5, "category": "C", "value": 10}, + {"_id": 6, "category": "C", "value": 15}, + ], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[ + {"_id": "A", "avg": 10.0}, + {"_id": "B", "avg": 30.0}, + {"_id": "C", "avg": 10.0}, + ], + msg="$avg should compute correct average per group with different counts", + ), + AccumulatorTestCase( + id="one_all_nulls_one_all_numeric", + docs=[ + {"_id": 1, "category": "A", "value": None}, + {"_id": 2, "category": "A", "value": None}, + {"_id": 3, "category": "B", "value": 10}, + {"_id": 4, "category": "B", "value": 20}, + ], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[ + {"_id": "A", "avg": None}, + {"_id": "B", "avg": 15.0}, + ], + msg="Group with all nulls returns null, group with numerics returns average", + ), + AccumulatorTestCase( + id="mixed_types_per_group", + docs=[ + {"_id": 1, "category": "int", "value": 10}, + {"_id": 2, "category": "int", "value": 20}, + {"_id": 3, "category": "dec", "value": Decimal128("10")}, + {"_id": 4, "category": "dec", "value": Decimal128("20")}, + ], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[ + {"_id": "dec", "avg": Decimal128("15")}, + {"_id": "int", "avg": 15.0}, + ], + msg="Int group returns double, Decimal128 group returns Decimal128", + ), +] + +# Property [Comparison with Related Operators]: $avg results are consistent +# with $sum/$count, and non-numeric handling differs from $sum. + +COMPARISON_WITH_RELATED_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + id="equals_sum_divided_by_count", + docs=[ + {"_id": 1, "category": "A", "value": 10}, + {"_id": 2, "category": "A", "value": 20}, + {"_id": 3, "category": "A", "value": 30}, + {"_id": 4, "category": "A", "value": 40}, + ], + pipeline=[ + { + "$group": { + "_id": "$category", + "avg": {"$avg": "$value"}, + "sum": {"$sum": "$value"}, + "count": {"$sum": 1}, + } + }, + ], + expected=[{"_id": "A", "avg": 25.0, "sum": 100, "count": 4}], + msg="$avg should equal $sum / count", + ), + AccumulatorTestCase( + id="vs_sum_non_numeric_handling", + docs=[ + {"_id": 1, "category": "A", "value": "hello"}, + {"_id": 2, "category": "A", "value": "world"}, + ], + pipeline=[ + { + "$group": { + "_id": "$category", + "avg": {"$avg": "$value"}, + "sum": {"$sum": "$value"}, + } + }, + ], + expected=[{"_id": "A", "avg": None, "sum": 0}], + msg="$avg returns null for non-numeric but $sum returns 0", + ), +] + +AVG_GROUP_CONTEXT_TESTS: list[AccumulatorTestCase] = ( + NUMERIC_EQUIVALENCE_TESTS + + SINGLE_EMPTY_GROUP_TESTS + + PRECISION_EDGE_TESTS + + MULTIPLE_GROUPS_TESTS + + COMPARISON_WITH_RELATED_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(AVG_GROUP_CONTEXT_TESTS)) +def test_avg_group_context(collection, test_case: AccumulatorTestCase): + """Test $avg in $group context with grouping behavior.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult(result, expected=test_case.expected, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_types.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_types.py new file mode 100644 index 00000000..5397303e --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_types.py @@ -0,0 +1,258 @@ +""" +Tests for $avg accumulator type promotion and return type in $group context. + +Covers type promotion rules (int32, int64, double, Decimal128), return type +verification via $type, and negative zero normalization. +""" + +from __future__ import annotations + +import pytest +from bson import Decimal128, Int64 + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import ( + DECIMAL128_NEGATIVE_ZERO, + DECIMAL128_ZERO, + DOUBLE_NEGATIVE_ZERO, + DOUBLE_ZERO, +) + +# Property [Type Promotion]: $avg returns double for integer and double inputs, +# and Decimal128 when any input is Decimal128. +AVG_TYPE_PROMOTION_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "all_int32", + docs=[{"_id": 0, "v": 10}, {"_id": 1, "v": 20}, {"_id": 2, "v": 30}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 20.0}], + msg="int32 avg should return double", + ), + AccumulatorTestCase( + "all_int64", + docs=[ + {"_id": 0, "v": Int64(10)}, + {"_id": 1, "v": Int64(20)}, + {"_id": 2, "v": Int64(30)}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 20.0}], + msg="int64 avg should return double", + ), + AccumulatorTestCase( + "all_double", + docs=[{"_id": 0, "v": 10.0}, {"_id": 1, "v": 20.0}, {"_id": 2, "v": 30.0}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 20.0}], + msg="double avg should return double", + ), + AccumulatorTestCase( + "all_decimal128", + docs=[ + {"_id": 0, "v": Decimal128("10")}, + {"_id": 1, "v": Decimal128("20")}, + {"_id": 2, "v": Decimal128("30")}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": Decimal128("20")}], + msg="decimal128 avg should return decimal128", + ), + AccumulatorTestCase( + "int32_and_int64", + docs=[{"_id": 0, "v": 10}, {"_id": 1, "v": Int64(20)}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 15.0}], + msg="int32+int64 avg should return double", + ), + AccumulatorTestCase( + "int32_and_double", + docs=[{"_id": 0, "v": 10}, {"_id": 1, "v": 20.0}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 15.0}], + msg="int32+double avg should return double", + ), + AccumulatorTestCase( + "int32_and_decimal128", + docs=[{"_id": 0, "v": 10}, {"_id": 1, "v": Decimal128("20")}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": Decimal128("15")}], + msg="int32+decimal128 avg should return decimal128", + ), + AccumulatorTestCase( + "int64_and_decimal128", + docs=[{"_id": 0, "v": Int64(10)}, {"_id": 1, "v": Decimal128("20")}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": Decimal128("15")}], + msg="int64+decimal128 avg should return decimal128", + ), + AccumulatorTestCase( + "double_and_decimal128", + docs=[{"_id": 0, "v": 10.0}, {"_id": 1, "v": Decimal128("20")}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": Decimal128("15")}], + msg="double+decimal128 avg should return decimal128", + ), + AccumulatorTestCase( + "all_four_types", + docs=[ + {"_id": 0, "v": 10}, + {"_id": 1, "v": Int64(20)}, + {"_id": 2, "v": 30.0}, + {"_id": 3, "v": Decimal128("40")}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": Decimal128("25")}], + msg="all four numeric types avg should return decimal128", + ), + AccumulatorTestCase( + "fractional_result_from_int32", + docs=[{"_id": 0, "v": 1}, {"_id": 1, "v": 2}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 1.5}], + msg="int32 avg producing fraction should return double", + ), +] + +# Property [Negative Zero]: $avg normalizes negative zero to positive zero +# for both double and Decimal128. +AVG_NEGATIVE_ZERO_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "negative_zero_double", + docs=[ + {"_id": 0, "v": DOUBLE_NEGATIVE_ZERO}, + {"_id": 1, "v": DOUBLE_NEGATIVE_ZERO}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": DOUBLE_ZERO}], + msg="Double -0.0 avg should normalize to 0.0", + ), + AccumulatorTestCase( + "negative_zero_decimal128", + docs=[ + {"_id": 0, "v": DECIMAL128_NEGATIVE_ZERO}, + {"_id": 1, "v": DECIMAL128_NEGATIVE_ZERO}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": DECIMAL128_ZERO}], + msg="Decimal128 -0 avg should normalize to 0", + ), +] + +# Property [Return Type]: the result is double by default, but Decimal128 if +# any input value is Decimal128. +AVG_RETURN_TYPE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "type_int32_only", + docs=[{"v": 2}, {"v": 4}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, + ], + expected=[{"type": "double"}], + msg="$avg should return double when all inputs are int32", + ), + AccumulatorTestCase( + "type_int64_only", + docs=[{"v": Int64(2)}, {"v": Int64(4)}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, + ], + expected=[{"type": "double"}], + msg="$avg should return double when all inputs are int64", + ), + AccumulatorTestCase( + "type_int32_int64", + docs=[{"v": 2}, {"v": Int64(4)}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, + ], + expected=[{"type": "double"}], + msg="$avg should return double for int32 and int64 mix", + ), + AccumulatorTestCase( + "type_int32_double", + docs=[{"v": 2}, {"v": 4.0}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, + ], + expected=[{"type": "double"}], + msg="$avg should return double for int32 and double mix", + ), + AccumulatorTestCase( + "type_int64_double", + docs=[{"v": Int64(2)}, {"v": 4.0}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, + ], + expected=[{"type": "double"}], + msg="$avg should return double for int64 and double mix", + ), + AccumulatorTestCase( + "type_int32_decimal128", + docs=[{"v": 2}, {"v": Decimal128("4")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, + ], + expected=[{"type": "decimal"}], + msg="$avg should return Decimal128 when any input is Decimal128", + ), + AccumulatorTestCase( + "type_int64_decimal128", + docs=[{"v": Int64(2)}, {"v": Decimal128("4")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, + ], + expected=[{"type": "decimal"}], + msg="$avg should return Decimal128 for int64 and Decimal128 mix", + ), + AccumulatorTestCase( + "type_double_decimal128", + docs=[{"v": 2.0}, {"v": Decimal128("4")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, + ], + expected=[{"type": "decimal"}], + msg="$avg should return Decimal128 for double and Decimal128 mix", + ), + AccumulatorTestCase( + "type_decimal128_before_int32", + docs=[{"v": Decimal128("4")}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, + ], + expected=[{"type": "decimal"}], + msg="$avg should return Decimal128 regardless of document order", + ), +] + +AVG_GROUP_TYPE_TESTS: list[AccumulatorTestCase] = ( + AVG_TYPE_PROMOTION_TESTS + AVG_NEGATIVE_ZERO_TESTS + AVG_RETURN_TYPE_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(AVG_GROUP_TYPE_TESTS)) +def test_avg_group_types(collection, test_case: AccumulatorTestCase): + """Test $avg type promotion and return type in $group context.""" + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult(result, expected=test_case.expected, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_non_numeric.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_non_numeric.py new file mode 100644 index 00000000..831216cb --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_non_numeric.py @@ -0,0 +1,255 @@ +""" +Tests for $avg accumulator non-numeric type handling in $group context. + +Covers all non-numeric BSON types (string, boolean, object, ObjectId, datetime, +Timestamp, Binary, Regex, Code, MinKey, MaxKey, arrays) and verifies they are +silently ignored and excluded from both sum and count. +""" + +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest +from bson import Binary, Code, MaxKey, MinKey, ObjectId, Regex, Timestamp + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Non-Numeric Types Ignored]: all non-numeric BSON types are +# silently ignored and excluded from both sum and count, producing null +# when no numeric values remain. +AVG_NON_NUMERIC_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "string", + docs=[{"v": "hello"}, {"v": "world"}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore string values and return null", + ), + AccumulatorTestCase( + "boolean_true", + docs=[{"v": True}, {"v": True}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore boolean true without coercing to numeric", + ), + AccumulatorTestCase( + "boolean_false", + docs=[{"v": False}, {"v": False}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore boolean false without coercing to numeric", + ), + AccumulatorTestCase( + "boolean_not_numeric", + docs=[{"_id": 0, "v": False}, {"_id": 1, "v": True}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": None}], + msg="Booleans should not be treated as 0/1 in avg", + ), + AccumulatorTestCase( + "object", + docs=[{"v": {"x": 1}}, {"v": {"y": 2}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore plain objects", + ), + AccumulatorTestCase( + "empty_object", + docs=[{"v": {}}, {"v": {}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore empty objects", + ), + AccumulatorTestCase( + "objectid", + docs=[{"v": ObjectId()}, {"v": ObjectId()}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore ObjectId values", + ), + AccumulatorTestCase( + "datetime", + docs=[ + {"v": datetime(2023, 1, 1, tzinfo=timezone.utc)}, + {"v": datetime(2024, 1, 1, tzinfo=timezone.utc)}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore datetime values", + ), + AccumulatorTestCase( + "timestamp", + docs=[{"v": Timestamp(1, 1)}, {"v": Timestamp(2, 1)}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore Timestamp values", + ), + AccumulatorTestCase( + "binary", + docs=[{"v": Binary(b"\x01")}, {"v": Binary(b"\x02")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore Binary values", + ), + AccumulatorTestCase( + "regex", + docs=[{"v": Regex("abc")}, {"v": Regex("def")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore Regex values", + ), + AccumulatorTestCase( + "code", + docs=[{"v": Code("x")}, {"v": Code("y")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore Code values", + ), + AccumulatorTestCase( + "minkey", + docs=[{"v": MinKey()}, {"v": MinKey()}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore MinKey values", + ), + AccumulatorTestCase( + "maxkey", + docs=[{"v": MaxKey()}, {"v": MaxKey()}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore MaxKey values", + ), + AccumulatorTestCase( + "array", + docs=[{"v": [1, 2, 3]}, {"v": [4, 5]}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore arrays without unwrapping", + ), + AccumulatorTestCase( + "single_element_array", + docs=[{"v": [42]}, {"v": [7]}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should not unwrap single-element numeric arrays", + ), + AccumulatorTestCase( + "empty_array", + docs=[{"v": []}, {"v": []}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore empty arrays", + ), + AccumulatorTestCase( + "nested_array", + docs=[{"v": [[1, 2]]}, {"v": [[3]]}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore nested arrays", + ), + AccumulatorTestCase( + "array_from_expression", + docs=[{"v": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": {"$literal": [1, 2, 3]}}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should treat array expressions as non-numeric", + ), + AccumulatorTestCase( + "mixed_with_numerics", + docs=[{"v": "hello"}, {"v": 10}, {"v": True}, {"v": 20}, {"v": [5]}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": 15.0}], + msg="$avg should compute average only over numeric values, ignoring non-numerics", + ), + AccumulatorTestCase( + "all_non_numeric", + docs=[ + {"_id": 0, "v": "a"}, + {"_id": 1, "v": True}, + {"_id": 2, "v": [1]}, + {"_id": 3, "v": {"x": 1}}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": None}], + msg="All non-numeric values should return null", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(AVG_NON_NUMERIC_TESTS)) +def test_avg_non_numeric(collection, test_case: AccumulatorTestCase): + """Test $avg non-numeric type handling in $group context.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult(result, expected=test_case.expected, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_null_missing.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_null_missing.py new file mode 100644 index 00000000..e1a222aa --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_null_missing.py @@ -0,0 +1,118 @@ +""" +Tests for $avg accumulator null and missing value handling in $group context. + +Covers null values, missing fields, $$REMOVE, and combinations with numeric values. +""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Null and Missing Ignored]: null values, missing fields, and +# $$REMOVE are treated as non-numeric and excluded from both the sum and +# count, producing null when no numeric values remain. +AVG_NULL_MISSING_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "all_null", + docs=[{"_id": 0, "v": None}, {"_id": 1, "v": None}, {"_id": 2, "v": None}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": None}], + msg="$avg should return null when all values in the group are null", + ), + AccumulatorTestCase( + "single_null", + docs=[{"v": None}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should return null when the only value is null", + ), + AccumulatorTestCase( + "some_null", + docs=[{"_id": 0, "v": 10}, {"_id": 1, "v": None}, {"_id": 2, "v": 30}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 20.0}], + msg="$avg should exclude null from both sum and count", + ), + AccumulatorTestCase( + "all_missing", + docs=[{"_id": 0, "other": 0}, {"_id": 1, "other": 1}, {"_id": 2, "other": 2}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": None}], + msg="$avg should return null when all values reference missing fields", + ), + AccumulatorTestCase( + "single_missing", + docs=[{"x": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should return null when the only value is a missing field", + ), + AccumulatorTestCase( + "some_missing", + docs=[{"_id": 0, "v": 10}, {"_id": 1}, {"_id": 2, "v": 30}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 20.0}], + msg="$avg should exclude missing fields from both sum and count", + ), + AccumulatorTestCase( + "mixed_null_and_missing_no_numerics", + docs=[{"v": None}, {"x": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should return null when values are a mix of null and missing", + ), + AccumulatorTestCase( + "mix_null_missing_numeric", + docs=[ + {"_id": 0, "v": 10}, + {"_id": 1, "v": None}, + {"_id": 2}, + {"_id": 3, "v": 30}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 20.0}], + msg="Only numeric values should contribute to average", + ), + AccumulatorTestCase( + "remove_only", + docs=[{"v": 5}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": {"$cond": [False, 1, "$$REMOVE"]}}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should treat $$REMOVE as missing and return null", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(AVG_NULL_MISSING_TESTS)) +def test_avg_null_missing(collection, test_case: AccumulatorTestCase): + """Test $avg null and missing value handling in $group context.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult(result, expected=test_case.expected, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_pipeline_contexts.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_pipeline_contexts.py new file mode 100644 index 00000000..3aa3025b --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_pipeline_contexts.py @@ -0,0 +1,486 @@ +""" +Tests for $avg in various pipeline contexts. + +Covers $group, $bucket, $setWindowFields, $project/$addFields, +$match+$expr, and pipeline interaction patterns. +""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult, assertSuccess +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# --- $group with computed _id --- + +# Property [Group Computed ID]: $avg with computed _id expression in $group. +AVG_GROUP_COMPUTED_ID_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "group_computed_id", + docs=[ + {"_id": 1, "value": 10, "score": 80}, + {"_id": 2, "value": 20, "score": 90}, + {"_id": 3, "value": 30, "score": 85}, + {"_id": 4, "value": 40, "score": 95}, + ], + pipeline=[ + { + "$group": { + "_id": {"$gt": ["$score", 85]}, + "avg": {"$avg": "$value"}, + } + }, + {"$sort": {"_id": 1}}, + ], + # score <= 85: docs 1,3 -> avg(10,30) = 20 + # score > 85: docs 2,4 -> avg(20,40) = 30 + expected=[ + {"_id": False, "avg": 20.0}, + {"_id": True, "avg": 30.0}, + ], + msg="$avg with computed _id should group and average correctly", + ), +] + +# --- $bucket / $bucketAuto --- + +# Property [Bucket]: $avg in $bucket and $bucketAuto output specifications. +AVG_BUCKET_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "bucket", + docs=[ + {"_id": 1, "score": 15, "value": 10}, + {"_id": 2, "score": 25, "value": 20}, + {"_id": 3, "score": 35, "value": 30}, + {"_id": 4, "score": 45, "value": 40}, + ], + pipeline=[ + { + "$bucket": { + "groupBy": "$score", + "boundaries": [0, 20, 40, 60], + "output": {"avg_value": {"$avg": "$value"}}, + } + }, + ], + expected=[ + {"_id": 0, "avg_value": 10.0}, + {"_id": 20, "avg_value": 25.0}, + {"_id": 40, "avg_value": 40.0}, + ], + msg="$avg in $bucket should compute average per bucket", + ), + AccumulatorTestCase( + "bucketauto", + docs=[ + {"_id": 1, "score": 10, "value": 100}, + {"_id": 2, "score": 20, "value": 200}, + {"_id": 3, "score": 30, "value": 300}, + {"_id": 4, "score": 40, "value": 400}, + ], + pipeline=[ + { + "$bucketAuto": { + "groupBy": "$score", + "buckets": 2, + "output": {"avg_value": {"$avg": "$value"}}, + } + }, + ], + expected=[ + {"_id": {"min": 10, "max": 30}, "avg_value": 150.0}, + {"_id": {"min": 30, "max": 40}, "avg_value": 350.0}, + ], + msg="$avg in $bucketAuto should compute average per auto-bucket", + ), +] + +# --- $setWindowFields --- + +# Property [Window]: $avg in $setWindowFields with various window types. +AVG_WINDOW_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "window_unbounded", + docs=[ + {"_id": 1, "value": 10}, + {"_id": 2, "value": 20}, + {"_id": 3, "value": 30}, + ], + pipeline=[ + {"$sort": {"_id": 1}}, + { + "$setWindowFields": { + "sortBy": {"_id": 1}, + "output": { + "avg": { + "$avg": "$value", + "window": {"documents": ["unbounded", "unbounded"]}, + } + }, + } + }, + {"$project": {"_id": 1, "value": 1, "avg": 1}}, + ], + expected=[ + {"_id": 1, "value": 10, "avg": 20.0}, + {"_id": 2, "value": 20, "avg": 20.0}, + {"_id": 3, "value": 30, "avg": 20.0}, + ], + msg="$avg with unbounded window should return full partition average", + ), + AccumulatorTestCase( + "window_cumulative", + docs=[ + {"_id": 1, "value": 10}, + {"_id": 2, "value": 20}, + {"_id": 3, "value": 30}, + ], + pipeline=[ + {"$sort": {"_id": 1}}, + { + "$setWindowFields": { + "sortBy": {"_id": 1}, + "output": { + "avg": { + "$avg": "$value", + "window": {"documents": ["unbounded", "current"]}, + } + }, + } + }, + {"$project": {"_id": 1, "value": 1, "avg": 1}}, + ], + expected=[ + {"_id": 1, "value": 10, "avg": 10.0}, + {"_id": 2, "value": 20, "avg": 15.0}, + {"_id": 3, "value": 30, "avg": 20.0}, + ], + msg="$avg with cumulative window should compute running average", + ), + AccumulatorTestCase( + "window_sliding", + docs=[ + {"_id": 1, "value": 10}, + {"_id": 2, "value": 20}, + {"_id": 3, "value": 30}, + {"_id": 4, "value": 40}, + ], + pipeline=[ + {"$sort": {"_id": 1}}, + { + "$setWindowFields": { + "sortBy": {"_id": 1}, + "output": { + "avg": { + "$avg": "$value", + "window": {"documents": [-1, 1]}, + } + }, + } + }, + {"$project": {"_id": 1, "value": 1, "avg": 1}}, + ], + # avg(10,20), avg(10,20,30), avg(20,30,40), avg(30,40) + expected=[ + {"_id": 1, "value": 10, "avg": 15.0}, + {"_id": 2, "value": 20, "avg": 20.0}, + {"_id": 3, "value": 30, "avg": 30.0}, + {"_id": 4, "value": 40, "avg": 35.0}, + ], + msg="$avg with sliding window should compute local average", + ), + AccumulatorTestCase( + "window_current_only", + docs=[ + {"_id": 1, "value": 10}, + {"_id": 2, "value": 20}, + {"_id": 3, "value": 30}, + ], + pipeline=[ + {"$sort": {"_id": 1}}, + { + "$setWindowFields": { + "sortBy": {"_id": 1}, + "output": { + "avg": { + "$avg": "$value", + "window": {"documents": [0, 0]}, + } + }, + } + }, + {"$project": {"_id": 1, "value": 1, "avg": 1}}, + ], + expected=[ + {"_id": 1, "value": 10, "avg": 10.0}, + {"_id": 2, "value": 20, "avg": 20.0}, + {"_id": 3, "value": 30, "avg": 30.0}, + ], + msg="$avg with [0,0] window should return current document value", + ), + AccumulatorTestCase( + "window_with_nulls", + docs=[ + {"_id": 1, "value": 10}, + {"_id": 2, "value": None}, + {"_id": 3, "value": 30}, + ], + pipeline=[ + {"$sort": {"_id": 1}}, + { + "$setWindowFields": { + "sortBy": {"_id": 1}, + "output": { + "avg": { + "$avg": "$value", + "window": {"documents": ["unbounded", "unbounded"]}, + } + }, + } + }, + {"$project": {"_id": 1, "value": 1, "avg": 1}}, + ], + expected=[ + {"_id": 1, "value": 10, "avg": 20.0}, + {"_id": 2, "value": None, "avg": 20.0}, + {"_id": 3, "value": 30, "avg": 20.0}, + ], + msg="$avg in window should ignore null values", + ), + AccumulatorTestCase( + "window_range_based", + docs=[ + {"_id": 1, "pos": 0, "value": 10}, + {"_id": 2, "pos": 5, "value": 20}, + {"_id": 3, "pos": 10, "value": 30}, + {"_id": 4, "pos": 15, "value": 40}, + ], + pipeline=[ + {"$sort": {"pos": 1}}, + { + "$setWindowFields": { + "sortBy": {"pos": 1}, + "output": { + "avg": { + "$avg": "$value", + "window": {"range": [-5, 5]}, + } + }, + } + }, + {"$project": {"_id": 1, "pos": 1, "value": 1, "avg": 1}}, + ], + # pos=0: range [-5,5] includes pos 0,5 -> avg(10,20)=15 + # pos=5: range [0,10] includes pos 0,5,10 -> avg(10,20,30)=20 + # pos=10: range [5,15] includes pos 5,10,15 -> avg(20,30,40)=30 + # pos=15: range [10,20] includes pos 10,15 -> avg(30,40)=35 + expected=[ + {"_id": 1, "pos": 0, "value": 10, "avg": 15.0}, + {"_id": 2, "pos": 5, "value": 20, "avg": 20.0}, + {"_id": 3, "pos": 10, "value": 30, "avg": 30.0}, + {"_id": 4, "pos": 15, "value": 40, "avg": 35.0}, + ], + msg="$avg with range-based window should compute average within range", + ), + AccumulatorTestCase( + "window_multiple_partitions", + docs=[ + {"_id": 1, "group": "A", "value": 10}, + {"_id": 2, "group": "A", "value": 20}, + {"_id": 3, "group": "A", "value": 30}, + {"_id": 4, "group": "B", "value": 100}, + {"_id": 5, "group": "B", "value": 200}, + ], + pipeline=[ + {"$sort": {"_id": 1}}, + { + "$setWindowFields": { + "partitionBy": "$group", + "sortBy": {"_id": 1}, + "output": { + "avg": { + "$avg": "$value", + "window": {"documents": ["unbounded", "unbounded"]}, + } + }, + } + }, + {"$project": {"_id": 1, "group": 1, "avg": 1}}, + ], + expected=[ + {"_id": 1, "group": "A", "avg": 20.0}, + {"_id": 2, "group": "A", "avg": 20.0}, + {"_id": 3, "group": "A", "avg": 20.0}, + {"_id": 4, "group": "B", "avg": 150.0}, + {"_id": 5, "group": "B", "avg": 150.0}, + ], + msg="$avg should compute independent averages per partition", + ), +] + +# --- Expression contexts ($project, $addFields, $match+$expr) --- + +# Property [Expression Context]: $avg used in expression contexts. +AVG_EXPRESSION_CONTEXT_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "in_addfields", + docs=[ + {"_id": 1, "scores": [80, 90, 100]}, + ], + pipeline=[ + {"$addFields": {"avg_score": {"$avg": "$scores"}}}, + {"$project": {"_id": 0, "avg_score": 1}}, + ], + expected=[{"avg_score": 90.0}], + msg="$avg in $addFields should traverse array field and average", + ), + AccumulatorTestCase( + "in_match_expr", + docs=[ + {"_id": 1, "scores": [80, 90, 100]}, + {"_id": 2, "scores": [40, 50, 60]}, + {"_id": 3, "scores": [70, 80, 90]}, + ], + pipeline=[ + {"$match": {"$expr": {"$gt": [{"$avg": "$scores"}, 75]}}}, + {"$project": {"_id": 1}}, + {"$sort": {"_id": 1}}, + ], + # avg([80,90,100])=90 > 75, avg([40,50,60])=50 < 75, avg([70,80,90])=80 > 75 + expected=[{"_id": 1}, {"_id": 3}], + msg="$avg in $match $expr should filter based on computed average", + ), +] + +# --- Pipeline interaction patterns --- + +# Property [Pipeline Interaction]: $avg combined with other pipeline stages. +AVG_PIPELINE_INTERACTION_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "group_after_unwind", + docs=[ + {"_id": 1, "category": "A", "values": [10, 20]}, + {"_id": 2, "category": "A", "values": [30]}, + ], + pipeline=[ + {"$unwind": "$values"}, + {"$group": {"_id": "$category", "avg": {"$avg": "$values"}}}, + ], + # Unwound: 10, 20, 30 -> avg = 20 + expected=[{"_id": "A", "avg": 20.0}], + msg="$avg after $unwind should average all unwound values", + ), + AccumulatorTestCase( + "group_after_match", + docs=[ + {"_id": 1, "category": "A", "value": 10, "active": True}, + {"_id": 2, "category": "A", "value": 20, "active": False}, + {"_id": 3, "category": "A", "value": 30, "active": True}, + ], + pipeline=[ + {"$match": {"active": True}}, + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + ], + # Only active docs: avg(10, 30) = 20 + expected=[{"_id": "A", "avg": 20.0}], + msg="$avg after $match should only average filtered documents", + ), + AccumulatorTestCase( + "project_after_group", + docs=[ + {"_id": 1, "category": "A", "value": 10}, + {"_id": 2, "category": "A", "value": 20}, + {"_id": 3, "category": "B", "value": 30}, + {"_id": 4, "category": "B", "value": 40}, + ], + pipeline=[ + { + "$group": { + "_id": "$category", + "sum": {"$sum": "$value"}, + "count": {"$sum": 1}, + } + }, + {"$sort": {"_id": 1}}, + { + "$project": { + "_id": 1, + "manual_avg": {"$divide": ["$sum", "$count"]}, + } + }, + ], + expected=[ + {"_id": "A", "manual_avg": 15.0}, + {"_id": "B", "manual_avg": 35.0}, + ], + msg="Manual average via $divide after $group should work", + ), + AccumulatorTestCase( + "group_after_project_rename", + docs=[ + {"_id": 1, "cat": "A", "val": 10}, + {"_id": 2, "cat": "A", "val": 20}, + ], + pipeline=[ + {"$project": {"category": "$cat", "value": "$val"}}, + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + ], + expected=[{"_id": "A", "avg": 15.0}], + msg="$avg should work on renamed fields from $project", + ), +] + +# --- Combined list --- + +AVG_PIPELINE_CONTEXT_TESTS: list[AccumulatorTestCase] = ( + AVG_GROUP_COMPUTED_ID_TESTS + + AVG_BUCKET_TESTS + + AVG_WINDOW_TESTS + + AVG_EXPRESSION_CONTEXT_TESTS + + AVG_PIPELINE_INTERACTION_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(AVG_PIPELINE_CONTEXT_TESTS)) +def test_avg_pipeline_contexts(collection, test_case: AccumulatorTestCase): + """Test $avg in various pipeline contexts.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult(result, expected=test_case.expected, msg=test_case.msg) + + +def test_avg_in_project_array_literal(collection): + """Test $avg in $project with array of literal values. + + This test uses ``aggregate: 1`` with ``$documents`` instead of a + collection, so it is kept as a standalone test. + """ + result = execute_command( + collection, + { + "aggregate": 1, + "pipeline": [ + {"$documents": [{}]}, + {"$project": {"_id": 0, "avg": {"$avg": [10, 20, 30]}}}, + ], + "cursor": {}, + }, + ) + assertSuccess( + result, + [{"avg": 20.0}], + msg="$avg in $project with literal array should average values", + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_special_numeric.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_special_numeric.py new file mode 100644 index 00000000..05325e38 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_special_numeric.py @@ -0,0 +1,152 @@ +""" +Tests for $avg accumulator special numeric value handling in $group context. + +Covers NaN behavior, Infinity behavior, and cross-type interactions +for both double and Decimal128 types. +""" + +from __future__ import annotations + +import math + +import pytest +from bson import Decimal128 + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import ( + DECIMAL128_INFINITY, + DECIMAL128_NAN, + DECIMAL128_NEGATIVE_INFINITY, + FLOAT_INFINITY, + FLOAT_NEGATIVE_INFINITY, +) + +# Property [NaN]: NaN is numeric and produces NaN in the result; +# NaN with Infinity produces NaN; cross-type NaN promotes to Decimal128. +AVG_NAN_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "nan_with_finite", + docs=[{"_id": 0, "v": 10}, {"_id": 1, "v": float("nan")}, {"_id": 2, "v": 30}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": pytest.approx(math.nan, nan_ok=True)}], + msg="NaN among finite values should produce NaN result", + ), + AccumulatorTestCase( + "all_nan", + docs=[{"_id": 0, "v": float("nan")}, {"_id": 1, "v": float("nan")}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": pytest.approx(math.nan, nan_ok=True)}], + msg="All NaN values should return NaN", + ), + AccumulatorTestCase( + "nan_with_infinity", + docs=[{"_id": 0, "v": float("nan")}, {"_id": 1, "v": FLOAT_INFINITY}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": pytest.approx(math.nan, nan_ok=True)}], + msg="NaN with Infinity should produce NaN", + ), + AccumulatorTestCase( + "decimal128_nan_with_finite", + docs=[ + {"_id": 0, "v": Decimal128("10")}, + {"_id": 1, "v": DECIMAL128_NAN}, + {"_id": 2, "v": Decimal128("30")}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": DECIMAL128_NAN}], + msg="Decimal128 NaN among finite values should produce Decimal128 NaN", + ), + AccumulatorTestCase( + "decimal128_nan_with_infinity", + docs=[{"v": DECIMAL128_NAN}, {"v": DECIMAL128_INFINITY}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": Decimal128("NaN")}], + msg="Decimal128 NaN with Decimal128 Infinity should produce Decimal128 NaN", + ), + AccumulatorTestCase( + "cross_type_nan", + docs=[{"_id": 0, "v": float("nan")}, {"_id": 1, "v": Decimal128("5")}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": DECIMAL128_NAN}], + msg="double NaN with Decimal128 should return Decimal128 NaN", + ), +] + +# Property [Infinity]: Infinity with finite values produces Infinity; +# Infinity with -Infinity produces NaN. +AVG_INFINITY_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "infinity_with_finite", + docs=[{"_id": 0, "v": FLOAT_INFINITY}, {"_id": 1, "v": 10}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": FLOAT_INFINITY}], + msg="Infinity with finite value should produce Infinity", + ), + AccumulatorTestCase( + "negative_infinity_with_finite", + docs=[{"_id": 0, "v": FLOAT_NEGATIVE_INFINITY}, {"_id": 1, "v": 10}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": FLOAT_NEGATIVE_INFINITY}], + msg="-Infinity with finite value should produce -Infinity", + ), + AccumulatorTestCase( + "inf_and_neg_inf", + docs=[{"_id": 0, "v": FLOAT_INFINITY}, {"_id": 1, "v": FLOAT_NEGATIVE_INFINITY}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": pytest.approx(math.nan, nan_ok=True)}], + msg="Infinity with -Infinity should produce NaN", + ), + AccumulatorTestCase( + "decimal128_infinity_with_finite", + docs=[{"_id": 0, "v": DECIMAL128_INFINITY}, {"_id": 1, "v": Decimal128("10")}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": DECIMAL128_INFINITY}], + msg="Decimal128 Infinity with finite value should produce Decimal128 Infinity", + ), + AccumulatorTestCase( + "decimal128_neg_infinity_with_finite", + docs=[{"v": DECIMAL128_NEGATIVE_INFINITY}, {"v": Decimal128("5")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DECIMAL128_NEGATIVE_INFINITY}], + msg="Decimal128 -Infinity with finite value should produce Decimal128 -Infinity", + ), + AccumulatorTestCase( + "decimal128_inf_and_neg_inf", + docs=[ + {"_id": 0, "v": DECIMAL128_INFINITY}, + {"_id": 1, "v": DECIMAL128_NEGATIVE_INFINITY}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": DECIMAL128_NAN}], + msg="Decimal128 Infinity with -Infinity should produce Decimal128 NaN", + ), +] + +AVG_SPECIAL_NUMERIC_TESTS: list[AccumulatorTestCase] = AVG_NAN_TESTS + AVG_INFINITY_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(AVG_SPECIAL_NUMERIC_TESTS)) +def test_avg_special_numeric(collection, test_case: AccumulatorTestCase): + """Test $avg special numeric value handling in $group context.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult(result, expected=test_case.expected, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_avg_integration.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_avg_integration.py new file mode 100644 index 00000000..5b4b9666 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_avg_integration.py @@ -0,0 +1,372 @@ +"""Tests for $avg accumulator composed with sibling accumulators in the same $group.""" + +from __future__ import annotations + +import pytest +from bson import Decimal128, Int64 + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils.accumulator_test_case import ( # noqa: E501 + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Avg with Sum]: $avg and $sum coexist in the same $group and +# independently compute the mean and the total. $avg always returns double +# for integer inputs; $sum returns int32 when all inputs are int32. +AVG_WITH_SUM_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "avg_sum_single_group", + docs=[ + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + {"cat": "a", "v": 30}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "total": {"$sum": "$v"}, + } + } + ], + expected=[{"_id": "a", "mean": 20.0, "total": 60}], + msg="$avg and $sum should independently produce mean and total", + ), + AccumulatorTestCase( + "avg_sum_multiple_groups", + docs=[ + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + {"cat": "b", "v": 5}, + {"cat": "b", "v": 15}, + {"cat": "b", "v": 25}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "total": {"$sum": "$v"}, + } + } + ], + expected=[ + {"_id": "a", "mean": 15.0, "total": 30}, + {"_id": "b", "mean": 15.0, "total": 45}, + ], + msg="$avg and $sum should produce correct results across multiple groups", + ), + AccumulatorTestCase( + "avg_sum_null_handling_diverges", + docs=[ + {"cat": "a", "v": None}, + {"cat": "a", "v": 10}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "total": {"$sum": "$v"}, + } + } + ], + expected=[{"_id": "a", "mean": 10.0, "total": 10}], + msg="$avg and $sum should both ignore null (avg=10.0 from one value, sum=10)", + ), + AccumulatorTestCase( + "avg_sum_all_null_diverges", + docs=[ + {"cat": "a", "v": None}, + {"cat": "a", "v": None}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "total": {"$sum": "$v"}, + } + } + ], + expected=[{"_id": "a", "mean": None, "total": 0}], + msg="$avg returns null but $sum returns 0 when all values are null", + ), +] + +# Property [Avg with Count]: $avg of a field and $sum with constant 1 (count +# pattern) coexist, independently computing a mean and a document count. +AVG_WITH_COUNT_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "avg_count_basic", + docs=[ + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + {"cat": "b", "v": 5}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "count": {"$sum": 1}, + } + } + ], + expected=[ + {"_id": "a", "mean": 15.0, "count": 2}, + {"_id": "b", "mean": 5.0, "count": 1}, + ], + msg="$avg of field and $sum(1) should independently compute mean and count", + ), + AccumulatorTestCase( + "avg_count_non_numeric_ignored_but_counted", + docs=[ + {"cat": "a", "v": "hello"}, + {"cat": "a", "v": 10}, + {"cat": "a", "v": True}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "count": {"$sum": 1}, + } + } + ], + expected=[{"_id": "a", "mean": 10.0, "count": 3}], + msg="$avg ignores non-numeric values but $sum(1) counts all documents", + ), +] + +# Property [Avg with Min/Max]: $avg, $min, and $max coexist in the same +# $group, each independently computing the mean, minimum, and maximum. +AVG_WITH_MIN_MAX_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "avg_min_max_basic", + docs=[ + {"cat": "a", "v": 30}, + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "lo": {"$min": "$v"}, + "hi": {"$max": "$v"}, + } + } + ], + expected=[{"_id": "a", "mean": 20.0, "lo": 10, "hi": 30}], + msg="$avg, $min, and $max should independently compute mean, min, and max", + ), + AccumulatorTestCase( + "avg_min_max_mixed_types", + docs=[ + {"cat": "a", "v": 5}, + {"cat": "a", "v": Int64(100)}, + {"cat": "a", "v": 2.5}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "lo": {"$min": "$v"}, + "hi": {"$max": "$v"}, + } + } + ], + expected=[{"_id": "a", "mean": 35.833333333333336, "lo": 2.5, "hi": Int64(100)}], + msg="$avg should return double while $min/$max preserve original types", + ), +] + +# Property [Avg with First/Last]: $avg computes the mean while $first/$last +# pick positional values from the group. A preceding $sort establishes order +# for $first and $last; $avg is order-independent. +AVG_WITH_FIRST_LAST_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "avg_first_last_with_sort", + docs=[ + {"cat": "a", "v": 30}, + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + ], + pipeline=[ + {"$sort": {"v": 1}}, + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "first_v": {"$first": "$v"}, + "last_v": {"$last": "$v"}, + } + }, + ], + expected=[{"_id": "a", "mean": 20.0, "first_v": 10, "last_v": 30}], + msg="$avg should compute mean while $first/$last pick sorted extremes", + ), +] + +# Property [Avg with Push/AddToSet]: $avg computes the mean while $push +# collects all values and $addToSet collects unique values. +AVG_WITH_PUSH_ADDTOSET_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "avg_push_addtoset", + docs=[ + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + {"cat": "a", "v": 10}, + ], + pipeline=[ + {"$sort": {"v": 1}}, + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "all_vals": {"$push": "$v"}, + "unique_vals": {"$addToSet": "$v"}, + } + }, + ], + expected=[ + { + "_id": "a", + "mean": 13.333333333333334, + "all_vals": [10, 10, 20], + "unique_vals": [10, 20], + }, + ], + msg="$avg computes mean while $push keeps all values and $addToSet keeps unique values", + ), +] + +# Property [Avg with MergeObjects]: $avg computes the mean while +# $mergeObjects combines per-document metadata into a single object. +AVG_WITH_MERGE_OBJECTS_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "avg_merge_objects", + docs=[ + {"cat": "a", "v": 10, "meta": {"src": "x"}}, + {"cat": "a", "v": 20, "meta": {"quality": "high"}}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "merged": {"$mergeObjects": "$meta"}, + } + } + ], + expected=[ + {"_id": "a", "mean": 15.0, "merged": {"src": "x", "quality": "high"}}, + ], + msg="$avg computes mean while $mergeObjects combines metadata objects", + ), +] + +# Property [Multiple Avg Expressions]: multiple $avg accumulators in the same +# $group independently average different fields or expressions. +MULTIPLE_AVG_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "multiple_avg_different_fields", + docs=[ + {"cat": "a", "price": 100, "qty": 2}, + {"cat": "a", "price": 200, "qty": 3}, + {"cat": "b", "price": 50, "qty": 10}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "avg_price": {"$avg": "$price"}, + "avg_qty": {"$avg": "$qty"}, + } + } + ], + expected=[ + {"_id": "a", "avg_price": 150.0, "avg_qty": 2.5}, + {"_id": "b", "avg_price": 50.0, "avg_qty": 10.0}, + ], + msg="Multiple $avg accumulators should independently average different fields", + ), + AccumulatorTestCase( + "multiple_avg_different_expressions", + docs=[ + {"cat": "a", "price": 100, "qty": 2, "revenue": 200}, + {"cat": "a", "price": 200, "qty": 3, "revenue": 600}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "avg_price": {"$avg": "$price"}, + "avg_revenue": {"$avg": "$revenue"}, + } + } + ], + expected=[{"_id": "a", "avg_price": 150.0, "avg_revenue": 400.0}], + msg="Multiple $avg accumulators should independently average different fields", + ), +] + +# Property [Avg Type Promotion with Sibling]: $avg promoting to Decimal128 +# does not interfere with sibling accumulators that return simpler types. +AVG_TYPE_PROMOTION_WITH_SIBLING_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "avg_decimal128_with_int_count", + docs=[ + {"cat": "a", "v": Decimal128("1.5")}, + {"cat": "a", "v": Decimal128("2.5")}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "count": {"$sum": 1}, + } + } + ], + expected=[{"_id": "a", "mean": Decimal128("2.0"), "count": 2}], + msg="$avg promoting to Decimal128 should not affect sibling $sum(1) returning int32", + ), +] + +AVG_INTEGRATION_TESTS = ( + AVG_WITH_SUM_TESTS + + AVG_WITH_COUNT_TESTS + + AVG_WITH_MIN_MAX_TESTS + + AVG_WITH_FIRST_LAST_TESTS + + AVG_WITH_PUSH_ADDTOSET_TESTS + + AVG_WITH_MERGE_OBJECTS_TESTS + + MULTIPLE_AVG_TESTS + + AVG_TYPE_PROMOTION_WITH_SIBLING_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(AVG_INTEGRATION_TESTS)) +def test_accumulators_avg_integration(collection, test_case: AccumulatorTestCase): + """Test $avg accumulator composed with sibling accumulators in the same $group.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline or [], "cursor": {}}, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ignore_doc_order=True, + ignore_order_in=["unique_vals"], + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/utils/__init__.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/utils/__init__.py new file mode 100644 index 00000000..544a6ec5 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/utils/__init__.py @@ -0,0 +1,3 @@ +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils.accumulator_test_case import ( # noqa: E501, F401 + AccumulatorTestCase, +) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/utils/accumulator_test_case.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/utils/accumulator_test_case.py new file mode 100644 index 00000000..f21a9f62 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/utils/accumulator_test_case.py @@ -0,0 +1,16 @@ +"""Shared test case for accumulator tests.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +from documentdb_tests.framework.test_case import BaseTestCase + + +@dataclass(frozen=True) +class AccumulatorTestCase(BaseTestCase): + """Test case for accumulator tests.""" + + docs: list[dict] | None = None + pipeline: list[dict[str, Any]] | None = None diff --git a/documentdb_tests/framework/error_codes.py b/documentdb_tests/framework/error_codes.py index 53315bbb..d16c9634 100644 --- a/documentdb_tests/framework/error_codes.py +++ b/documentdb_tests/framework/error_codes.py @@ -354,6 +354,8 @@ PLAN_CACHE_STATS_ALL_HOSTS_NOT_SHARDED_ERROR = 4503200 ACCUMULATOR_NULL_FUNCTION_ERROR = 4544702 ACCUMULATOR_MISSING_ACCUMULATE_ARGS_ERROR = 4544710 +DIVIDE_BY_ZERO_V2_ERROR = 4848401 +MODULO_BY_ZERO_V2_ERROR = 4848403 ARRAY_TO_OBJECT_NULL_BYTE_PAIR_KEY_ERROR = 4940400 ARRAY_TO_OBJECT_NULL_BYTE_KV_KEY_ERROR = 4940401 SKIP_INVALID_ARGUMENT_ERROR = 5107200