From a5ab5ea800ad970518b95a73d9b8304f86075b84 Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Thu, 14 May 2026 16:34:34 -0700 Subject: [PATCH 1/9] avg tests used tests from local generate and Daniel F Signed-off-by: Alina (Xi) Li --- .../accumulators/avg/test_accumulator_avg.py | 988 ++++++++++++++++++ .../accumulators/avg/test_avg_field_lookup.py | 349 +++++++ .../avg/test_avg_group_boundaries.py | 190 ++++ .../avg/test_avg_group_context.py | 449 ++++++++ .../accumulators/avg/test_avg_group_types.py | 410 ++++++++ .../avg/test_avg_pipeline_contexts.py | 643 ++++++++++++ documentdb_tests/framework/error_codes.py | 2 + 7 files changed, 3031 insertions(+) create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_field_lookup.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_boundaries.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_context.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_types.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_pipeline_contexts.py diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg.py new file mode 100644 index 00000000..53d6d2b4 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg.py @@ -0,0 +1,988 @@ +"""Tests for $avg accumulator in $group context.""" + +from __future__ import annotations + +from dataclasses import dataclass +from datetime import datetime, timezone +from typing import Any + +import pytest +from bson import Binary, Code, Decimal128, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp + +from documentdb_tests.framework.assertions import ( + assertFailureCode, + assertResult, + assertSuccess, +) +from documentdb_tests.framework.error_codes import ( + CONVERSION_FAILURE_ERROR, + DIVIDE_BY_ZERO_V2_ERROR, + GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + MODULO_BY_ZERO_V2_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_case import BaseTestCase +from documentdb_tests.framework.test_constants import ( + DECIMAL128_INFINITY, + DECIMAL128_INT64_OVERFLOW, + DECIMAL128_MAX, + DECIMAL128_MIN_POSITIVE, + DECIMAL128_NAN, + DECIMAL128_NEGATIVE_INFINITY, + DECIMAL128_NEGATIVE_ZERO, + DECIMAL128_TRAILING_ZERO, + DECIMAL128_ZERO, + DOUBLE_FROM_INT64_MAX, + DOUBLE_MAX, + DOUBLE_MAX_SAFE_INTEGER, + DOUBLE_MIN_NEGATIVE_SUBNORMAL, + DOUBLE_MIN_NORMAL, + DOUBLE_MIN_SUBNORMAL, + DOUBLE_NEAR_MAX, + DOUBLE_NEAR_MIN, + DOUBLE_NEGATIVE_ZERO, + DOUBLE_ZERO, + FLOAT_INFINITY, + FLOAT_NAN, + FLOAT_NEGATIVE_INFINITY, + INT32_MAX, + INT32_MAX_MINUS_1, + INT32_MIN, + INT64_MAX, + INT64_MAX_MINUS_1, + INT64_MIN, + INT64_MIN_PLUS_1, + INT64_ZERO, +) + + +@dataclass(frozen=True) +class AvgAccumulatorTest(BaseTestCase): + """Test case for $avg accumulator.""" + + docs: list[dict] | None = None + expression: Any = None + + +# Property [Null and Missing Ignored]: null values, missing fields, and +# $$REMOVE are treated as non-numeric and excluded from both the sum and +# count, producing null when no numeric values remain. +AVG_NULL_MISSING_TESTS: list[AvgAccumulatorTest] = [ + AvgAccumulatorTest( + "null_all_null", + docs=[{"v": None}, {"v": None}], + expression="$v", + expected=None, + msg="$avg should return null when all values in the group are null", + ), + AvgAccumulatorTest( + "null_all_missing", + docs=[{"x": 1}, {"x": 2}], + expression="$v", + expected=None, + msg="$avg should return null when all values reference missing fields", + ), + AvgAccumulatorTest( + "null_single_null", + docs=[{"v": None}], + expression="$v", + expected=None, + msg="$avg should return null when the only value is null", + ), + AvgAccumulatorTest( + "null_single_missing", + docs=[{"x": 1}], + expression="$v", + expected=None, + msg="$avg should return null when the only value is a missing field", + ), + AvgAccumulatorTest( + "null_mixed_null_and_missing", + docs=[{"v": None}, {"x": 1}], + expression="$v", + expected=None, + msg="$avg should return null when values are a mix of null and missing", + ), + AvgAccumulatorTest( + "null_with_numerics", + docs=[{"v": None}, {"v": 10}, {"v": 20}], + expression="$v", + expected=15.0, + msg="$avg should exclude null from both sum and count", + ), + AvgAccumulatorTest( + "null_missing_with_numerics", + docs=[{"x": 1}, {"v": 10}, {"v": 20}], + expression="$v", + expected=15.0, + msg="$avg should exclude missing fields from both sum and count", + ), + AvgAccumulatorTest( + "null_mixed_null_missing_with_numerics", + docs=[{"v": None}, {"x": 1}, {"v": 30}], + expression="$v", + expected=30.0, + msg="$avg should exclude both null and missing, averaging only numerics", + ), + AvgAccumulatorTest( + "null_remove_only", + docs=[{"v": 5}], + expression={"$cond": [False, 1, "$$REMOVE"]}, + expected=None, + msg="$avg should treat $$REMOVE as missing and return null", + ), +] + +# Property [Non-Numeric Types Ignored]: all non-numeric BSON types are +# silently ignored and excluded from both sum and count, producing null +# when no numeric values remain. +AVG_NON_NUMERIC_TESTS: list[AvgAccumulatorTest] = [ + AvgAccumulatorTest( + "non_numeric_string", + docs=[{"v": "hello"}, {"v": "world"}], + expression="$v", + expected=None, + msg="$avg should ignore string values and return null", + ), + AvgAccumulatorTest( + "non_numeric_boolean_true", + docs=[{"v": True}, {"v": True}], + expression="$v", + expected=None, + msg="$avg should ignore boolean true without coercing to numeric", + ), + AvgAccumulatorTest( + "non_numeric_boolean_false", + docs=[{"v": False}, {"v": False}], + expression="$v", + expected=None, + msg="$avg should ignore boolean false without coercing to numeric", + ), + AvgAccumulatorTest( + "non_numeric_object", + docs=[{"v": {"x": 1}}, {"v": {"y": 2}}], + expression="$v", + expected=None, + msg="$avg should ignore plain objects", + ), + AvgAccumulatorTest( + "non_numeric_empty_object", + docs=[{"v": {}}, {"v": {}}], + expression="$v", + expected=None, + msg="$avg should ignore empty objects", + ), + AvgAccumulatorTest( + "non_numeric_objectid", + docs=[{"v": ObjectId()}, {"v": ObjectId()}], + expression="$v", + expected=None, + msg="$avg should ignore ObjectId values", + ), + AvgAccumulatorTest( + "non_numeric_datetime", + docs=[ + {"v": datetime(2023, 1, 1, tzinfo=timezone.utc)}, + {"v": datetime(2024, 1, 1, tzinfo=timezone.utc)}, + ], + expression="$v", + expected=None, + msg="$avg should ignore datetime values", + ), + AvgAccumulatorTest( + "non_numeric_timestamp", + docs=[{"v": Timestamp(1, 1)}, {"v": Timestamp(2, 1)}], + expression="$v", + expected=None, + msg="$avg should ignore Timestamp values", + ), + AvgAccumulatorTest( + "non_numeric_binary", + docs=[{"v": Binary(b"\x01")}, {"v": Binary(b"\x02")}], + expression="$v", + expected=None, + msg="$avg should ignore Binary values", + ), + AvgAccumulatorTest( + "non_numeric_regex", + docs=[{"v": Regex("abc")}, {"v": Regex("def")}], + expression="$v", + expected=None, + msg="$avg should ignore Regex values", + ), + AvgAccumulatorTest( + "non_numeric_code", + docs=[{"v": Code("x")}, {"v": Code("y")}], + expression="$v", + expected=None, + msg="$avg should ignore Code values", + ), + AvgAccumulatorTest( + "non_numeric_minkey", + docs=[{"v": MinKey()}, {"v": MinKey()}], + expression="$v", + expected=None, + msg="$avg should ignore MinKey values", + ), + AvgAccumulatorTest( + "non_numeric_maxkey", + docs=[{"v": MaxKey()}, {"v": MaxKey()}], + expression="$v", + expected=None, + msg="$avg should ignore MaxKey values", + ), + AvgAccumulatorTest( + "non_numeric_array", + docs=[{"v": [1, 2, 3]}, {"v": [4, 5]}], + expression="$v", + expected=None, + msg="$avg should ignore arrays without unwrapping", + ), + AvgAccumulatorTest( + "non_numeric_single_element_array", + docs=[{"v": [42]}, {"v": [7]}], + expression="$v", + expected=None, + msg="$avg should not unwrap single-element numeric arrays", + ), + AvgAccumulatorTest( + "non_numeric_empty_array", + docs=[{"v": []}, {"v": []}], + expression="$v", + expected=None, + msg="$avg should ignore empty arrays", + ), + AvgAccumulatorTest( + "non_numeric_nested_array", + docs=[{"v": [[1, 2]]}, {"v": [[3]]}], + expression="$v", + expected=None, + msg="$avg should ignore nested arrays", + ), + AvgAccumulatorTest( + "non_numeric_mixed_with_numerics", + docs=[{"v": "hello"}, {"v": 10}, {"v": True}, {"v": 20}, {"v": [5]}], + expression="$v", + expected=15.0, + msg="$avg should compute average only over numeric values, ignoring non-numerics", + ), + AvgAccumulatorTest( + "non_numeric_array_from_expression", + docs=[{"v": 1}], + expression={"$literal": [1, 2, 3]}, + expected=None, + msg="$avg should treat array expressions as non-numeric", + ), +] + +# Property [Special Numeric Values]: NaN is numeric and propagates to the +# result, Infinity dominates finite values, Infinity + -Infinity produces +# NaN, and negative zero is not preserved. +AVG_SPECIAL_NUMERIC_TESTS: list[AvgAccumulatorTest] = [ + AvgAccumulatorTest( + "special_nan_propagates", + docs=[{"v": FLOAT_NAN}, {"v": 5.0}], + expression="$v", + expected=pytest.approx(FLOAT_NAN, nan_ok=True), + msg="$avg should return NaN when any value is NaN", + ), + AvgAccumulatorTest( + "special_nan_over_infinity", + docs=[{"v": FLOAT_NAN}, {"v": FLOAT_INFINITY}], + expression="$v", + expected=pytest.approx(FLOAT_NAN, nan_ok=True), + msg="$avg should return NaN when group contains both NaN and Infinity", + ), + AvgAccumulatorTest( + "special_infinity_dominates", + docs=[{"v": FLOAT_INFINITY}, {"v": 5.0}], + expression="$v", + expected=FLOAT_INFINITY, + msg="$avg should return Infinity when Infinity dominates finite values", + ), + AvgAccumulatorTest( + "special_neg_infinity_dominates", + docs=[{"v": FLOAT_NEGATIVE_INFINITY}, {"v": 5.0}], + expression="$v", + expected=FLOAT_NEGATIVE_INFINITY, + msg="$avg should return -Infinity when -Infinity dominates finite values", + ), + AvgAccumulatorTest( + "special_inf_plus_neg_inf", + docs=[{"v": FLOAT_INFINITY}, {"v": FLOAT_NEGATIVE_INFINITY}], + expression="$v", + expected=pytest.approx(FLOAT_NAN, nan_ok=True), + msg="$avg should return NaN when group contains Infinity and -Infinity", + ), + AvgAccumulatorTest( + "special_neg_zero_not_preserved", + docs=[{"v": DOUBLE_NEGATIVE_ZERO}], + expression="$v", + expected=DOUBLE_ZERO, + msg="$avg should not preserve negative zero", + ), + AvgAccumulatorTest( + "special_decimal_neg_zero_not_preserved", + docs=[{"v": DECIMAL128_NEGATIVE_ZERO}], + expression="$v", + expected=DECIMAL128_ZERO, + msg="$avg should not preserve Decimal128 negative zero", + ), + AvgAccumulatorTest( + "special_decimal_nan_propagates", + docs=[{"v": DECIMAL128_NAN}, {"v": Decimal128("5")}], + expression="$v", + expected=Decimal128("NaN"), + msg="$avg should return Decimal128 NaN when any Decimal128 value is NaN", + ), + AvgAccumulatorTest( + "special_decimal_nan_over_infinity", + docs=[{"v": DECIMAL128_NAN}, {"v": DECIMAL128_INFINITY}], + expression="$v", + expected=Decimal128("NaN"), + msg="$avg should return Decimal128 NaN when group contains Decimal128 NaN and Infinity", + ), + AvgAccumulatorTest( + "special_decimal_infinity", + docs=[{"v": DECIMAL128_INFINITY}, {"v": Decimal128("5")}], + expression="$v", + expected=DECIMAL128_INFINITY, + msg="$avg should return Decimal128 Infinity when Decimal128 Infinity is present", + ), + AvgAccumulatorTest( + "special_decimal_neg_infinity_dominates", + docs=[{"v": DECIMAL128_NEGATIVE_INFINITY}, {"v": Decimal128("5")}], + expression="$v", + expected=DECIMAL128_NEGATIVE_INFINITY, + msg="$avg should return Decimal128 -Infinity when Decimal128 -Infinity dominates", + ), + AvgAccumulatorTest( + "special_decimal_inf_plus_neg_inf", + docs=[{"v": DECIMAL128_INFINITY}, {"v": DECIMAL128_NEGATIVE_INFINITY}], + expression="$v", + expected=Decimal128("NaN"), + msg="$avg should return Decimal128 NaN for Decimal128 Infinity + -Infinity", + ), +] + +# Property [Integer Boundaries]: int32 boundary values produce exact double +# results, and int64 boundary values produce double results with potential +# precision loss. +AVG_INTEGER_BOUNDARY_TESTS: list[AvgAccumulatorTest] = [ + AvgAccumulatorTest( + "int_boundary_int32_zeros", + docs=[{"v": 0}, {"v": 0}], + expression="$v", + expected=DOUBLE_ZERO, + msg="$avg should return 0.0 for two int32 zeros", + ), + AvgAccumulatorTest( + "int_boundary_int32_one_neg_one", + docs=[{"v": 1}, {"v": -1}], + expression="$v", + expected=DOUBLE_ZERO, + msg="$avg should return 0.0 for int32 1 and -1", + ), + AvgAccumulatorTest( + "int_boundary_int32_max", + docs=[{"v": INT32_MAX}, {"v": 0}], + expression="$v", + expected=1_073_741_823.5, + msg="$avg should handle int32 MAX correctly", + ), + AvgAccumulatorTest( + "int_boundary_int32_min", + docs=[{"v": INT32_MIN}, {"v": INT32_MIN}], + expression="$v", + expected=float(INT32_MIN), + msg="$avg should handle int32 MIN correctly", + ), + AvgAccumulatorTest( + "int_boundary_int32_max_and_min", + docs=[{"v": INT32_MAX}, {"v": INT32_MIN}], + expression="$v", + expected=-0.5, + msg="$avg should handle int32 MAX and MIN together", + ), + AvgAccumulatorTest( + "int_boundary_int32_adjacent_max", + docs=[{"v": INT32_MAX_MINUS_1}, {"v": INT32_MAX}], + expression="$v", + expected=2_147_483_646.5, + msg="$avg of adjacent int32 MAX values should produce exact double", + ), + AvgAccumulatorTest( + "int_boundary_int32_adjacent_min", + docs=[{"v": INT32_MIN}, {"v": INT32_MIN + 1}], + expression="$v", + expected=-2_147_483_647.5, + msg="$avg of adjacent int32 MIN values should produce exact double", + ), + AvgAccumulatorTest( + "int_boundary_int64_max", + docs=[{"v": INT64_MAX}, {"v": INT64_ZERO}], + expression="$v", + expected=DOUBLE_FROM_INT64_MAX / 2, + msg="$avg should handle int64 MAX with precision loss in double", + ), + AvgAccumulatorTest( + "int_boundary_int64_min", + docs=[{"v": INT64_MIN}, {"v": INT64_MIN}], + expression="$v", + expected=-DOUBLE_FROM_INT64_MAX, + msg="$avg should handle int64 MIN with precision loss in double", + ), + AvgAccumulatorTest( + "int_boundary_int64_max_and_min", + docs=[{"v": INT64_MAX}, {"v": INT64_MIN}], + expression="$v", + expected=-0.5, + msg="$avg should handle int64 MAX and MIN together", + ), + AvgAccumulatorTest( + "int_boundary_int64_adjacent_max", + docs=[{"v": INT64_MAX_MINUS_1}, {"v": INT64_MAX}], + expression="$v", + expected=DOUBLE_FROM_INT64_MAX, + msg="$avg of adjacent int64 MAX values should produce double with precision loss", + ), + AvgAccumulatorTest( + "int_boundary_int64_adjacent_min", + docs=[{"v": INT64_MIN_PLUS_1}, {"v": INT64_MIN}], + expression="$v", + expected=-DOUBLE_FROM_INT64_MAX, + msg="$avg of adjacent int64 MIN values should produce double with precision loss", + ), +] + +# Property [Float Boundaries]: subnormal, minimum normal, maximum finite, +# near-precision-limit, and whole-number double values are averaged correctly. +AVG_FLOAT_BOUNDARY_TESTS: list[AvgAccumulatorTest] = [ + AvgAccumulatorTest( + "float_boundary_whole_number", + docs=[{"v": 3.0}, {"v": 5.0}], + expression="$v", + expected=4.0, + msg="$avg should produce correct average for whole-number floats", + ), + AvgAccumulatorTest( + "float_boundary_subnormal_positive", + docs=[{"v": DOUBLE_MIN_SUBNORMAL}], + expression="$v", + expected=DOUBLE_MIN_SUBNORMAL, + msg="$avg should handle positive subnormal value correctly", + ), + AvgAccumulatorTest( + "float_boundary_subnormal_negative", + docs=[{"v": DOUBLE_MIN_NEGATIVE_SUBNORMAL}], + expression="$v", + expected=DOUBLE_MIN_NEGATIVE_SUBNORMAL, + msg="$avg should handle negative subnormal value correctly", + ), + AvgAccumulatorTest( + "float_boundary_subnormal_avg", + docs=[{"v": DOUBLE_MIN_SUBNORMAL}, {"v": DOUBLE_MIN_SUBNORMAL}], + expression="$v", + expected=DOUBLE_MIN_SUBNORMAL, + msg="$avg of two identical subnormal values should return that value", + ), + AvgAccumulatorTest( + "float_boundary_min_normal", + docs=[{"v": DOUBLE_MIN_NORMAL}], + expression="$v", + expected=DOUBLE_MIN_NORMAL, + msg="$avg should handle smallest positive normal double correctly", + ), + AvgAccumulatorTest( + "float_boundary_max_single", + docs=[{"v": DOUBLE_MAX}], + expression="$v", + expected=DOUBLE_MAX, + msg="$avg should handle DBL_MAX as a single value correctly", + ), + AvgAccumulatorTest( + "float_boundary_max_safe_integer", + docs=[{"v": float(DOUBLE_MAX_SAFE_INTEGER)}], + expression="$v", + expected=float(DOUBLE_MAX_SAFE_INTEGER), + msg="$avg should handle max safe integer value correctly", + ), + AvgAccumulatorTest( + "float_boundary_max_safe_integer_avg", + docs=[ + {"v": float(DOUBLE_MAX_SAFE_INTEGER)}, + {"v": float(DOUBLE_MAX_SAFE_INTEGER)}, + ], + expression="$v", + expected=float(DOUBLE_MAX_SAFE_INTEGER), + msg="$avg of two max safe integer values should return that value", + ), + AvgAccumulatorTest( + "float_boundary_near_min", + docs=[{"v": DOUBLE_NEAR_MIN}, {"v": DOUBLE_NEAR_MIN}], + expression="$v", + expected=DOUBLE_NEAR_MIN, + msg="$avg should handle values near minimum normal correctly", + ), + AvgAccumulatorTest( + "float_boundary_near_max_single", + docs=[{"v": DOUBLE_NEAR_MAX}], + expression="$v", + expected=DOUBLE_NEAR_MAX, + msg="$avg should handle values near maximum finite correctly", + ), +] + +# Property [Decimal128 Behavior]: full 34-digit precision and trailing zeros +# are preserved, subnormal and near-maximum values are handled correctly, and +# values exceeding int64 range produce Decimal128 results. +AVG_DECIMAL128_TESTS: list[AvgAccumulatorTest] = [ + AvgAccumulatorTest( + "decimal128_full_precision", + docs=[ + {"v": Decimal128("1.000000000000000000000000000000001")}, + {"v": Decimal128("1.000000000000000000000000000000003")}, + ], + expression="$v", + expected=Decimal128("1.000000000000000000000000000000002"), + msg="$avg should preserve full 34-digit Decimal128 precision", + ), + AvgAccumulatorTest( + "decimal128_34_digit_integer", + docs=[ + {"v": Decimal128("1234567890123456789012345678901234")}, + {"v": Decimal128("1234567890123456789012345678901234")}, + ], + expression="$v", + expected=Decimal128("1234567890123456789012345678901234"), + msg="$avg should preserve 34-digit integer Decimal128 values", + ), + AvgAccumulatorTest( + "decimal128_trailing_zeros", + docs=[{"v": Decimal128("2.00")}, {"v": Decimal128("4.00")}], + expression="$v", + expected=Decimal128("3.00"), + msg="$avg should preserve trailing zeros in Decimal128 results", + ), + AvgAccumulatorTest( + "decimal128_trailing_zeros_single_digit", + docs=[{"v": DECIMAL128_TRAILING_ZERO}, {"v": Decimal128("3.0")}], + expression="$v", + expected=Decimal128("2.0"), + msg="$avg should preserve single trailing zero in Decimal128 results", + ), + AvgAccumulatorTest( + "decimal128_subnormal", + docs=[{"v": DECIMAL128_MIN_POSITIVE}, {"v": DECIMAL128_MIN_POSITIVE}], + expression="$v", + expected=DECIMAL128_MIN_POSITIVE, + msg="$avg should handle Decimal128 subnormal values correctly", + ), + AvgAccumulatorTest( + "decimal128_subnormal_single", + docs=[{"v": DECIMAL128_MIN_POSITIVE}], + expression="$v", + expected=DECIMAL128_MIN_POSITIVE, + msg="$avg should handle a single Decimal128 subnormal value", + ), + AvgAccumulatorTest( + "decimal128_near_max_single", + docs=[{"v": DECIMAL128_MAX}], + expression="$v", + expected=DECIMAL128_MAX, + msg="$avg should handle a single near-maximum Decimal128 value", + ), + AvgAccumulatorTest( + "decimal128_near_max_with_small", + docs=[{"v": DECIMAL128_MAX}, {"v": Decimal128("1")}], + expression="$v", + expected=Decimal128("5.000000000000000000000000000000000E+6144"), + msg="$avg should handle near-maximum Decimal128 averaged with a small value", + ), + AvgAccumulatorTest( + "decimal128_exceeds_int64", + docs=[ + {"v": DECIMAL128_INT64_OVERFLOW}, + {"v": DECIMAL128_INT64_OVERFLOW}, + ], + expression="$v", + expected=DECIMAL128_INT64_OVERFLOW, + msg="$avg should produce Decimal128 for values exceeding int64 range", + ), +] + +# Property [Overflow]: sum overflow during accumulation produces Infinity for +# doubles and Decimal128, and int32/int64 overflow is handled via type +# promotion without error. +AVG_OVERFLOW_TESTS: list[AvgAccumulatorTest] = [ + AvgAccumulatorTest( + "overflow_double_max", + docs=[{"v": DOUBLE_MAX}, {"v": DOUBLE_MAX}], + expression="$v", + expected=FLOAT_INFINITY, + msg="$avg should return Infinity when two DBL_MAX values overflow the sum", + ), + AvgAccumulatorTest( + "overflow_decimal128_max", + docs=[{"v": DECIMAL128_MAX}, {"v": DECIMAL128_MAX}], + expression="$v", + expected=DECIMAL128_INFINITY, + msg="$avg should return Decimal128 Infinity when two Decimal128 max values overflow", + ), + AvgAccumulatorTest( + "overflow_int32_sum", + docs=[{"v": INT32_MAX}, {"v": INT32_MAX}, {"v": INT32_MAX}], + expression="$v", + expected=float(INT32_MAX), + msg="$avg should handle int32 sum overflow via type promotion without error", + ), + AvgAccumulatorTest( + "overflow_int64_sum", + docs=[{"v": INT64_MAX}, {"v": INT64_MAX}, {"v": INT64_MAX}], + expression="$v", + expected=DOUBLE_FROM_INT64_MAX, + msg="$avg should handle int64 sum overflow by converting to double", + ), +] + +# Property [Expression Arguments]: $avg accepts any expression as its operand, +# evaluating it per-document before accumulation. +AVG_EXPRESSION_ARGS_TESTS: list[AvgAccumulatorTest] = [ + AvgAccumulatorTest( + "expr_constant_literal", + docs=[{"x": 1}, {"x": 2}, {"x": 3}], + expression=5, + expected=5.0, + msg="$avg should return the constant value when expression is a numeric literal", + ), + AvgAccumulatorTest( + "expr_nested_add", + docs=[{"a": 2, "b": 3}, {"a": 4, "b": 6}], + expression={"$add": ["$a", "$b"]}, + expected=7.5, + msg="$avg should evaluate nested $add expression per-document before averaging", + ), +] + +# Property [Edge Cases]: a single-document group returns the value itself +# (as double or Decimal128), a single non-numeric document returns null, and +# an empty collection produces no group output. +AVG_EDGE_CASE_TESTS: list[AvgAccumulatorTest] = [ + AvgAccumulatorTest( + "edge_single_int32", + docs=[{"v": 7}], + expression="$v", + expected=7.0, + msg="$avg should return the value as double for a single int32 document", + ), + AvgAccumulatorTest( + "edge_single_int64", + docs=[{"v": Int64(42)}], + expression="$v", + expected=42.0, + msg="$avg should return the value as double for a single int64 document", + ), + AvgAccumulatorTest( + "edge_single_non_numeric", + docs=[{"v": "hello"}], + expression="$v", + expected=None, + msg="$avg should return null for a single non-numeric document", + ), +] + +AVG_SUCCESS_TESTS = ( + AVG_NULL_MISSING_TESTS + + AVG_NON_NUMERIC_TESTS + + AVG_SPECIAL_NUMERIC_TESTS + + AVG_INTEGER_BOUNDARY_TESTS + + AVG_FLOAT_BOUNDARY_TESTS + + AVG_DECIMAL128_TESTS + + AVG_OVERFLOW_TESTS + + AVG_EXPRESSION_ARGS_TESTS + + AVG_EDGE_CASE_TESTS +) + +# Property [Expression Error Propagation]: errors from sub-expressions +# propagate through $avg without being caught or suppressed. +AVG_EXPRESSION_ERROR_TESTS: list[AvgAccumulatorTest] = [ + AvgAccumulatorTest( + "error_prop_toint_non_convertible", + docs=[{"v": "hello"}], + expression={"$toInt": "$v"}, + error_code=CONVERSION_FAILURE_ERROR, + msg="$avg should propagate $toInt conversion error for non-convertible value", + ), + AvgAccumulatorTest( + "error_prop_divide_by_zero", + docs=[{"v": 10}], + expression={"$divide": ["$v", 0]}, + error_code=DIVIDE_BY_ZERO_V2_ERROR, + msg="$avg should propagate $divide by zero error", + ), + AvgAccumulatorTest( + "error_prop_mod_by_zero", + docs=[{"v": 10}], + expression={"$mod": ["$v", 0]}, + error_code=MODULO_BY_ZERO_V2_ERROR, + msg="$avg should propagate $mod by zero error", + ), +] + +AVG_TESTS = AVG_SUCCESS_TESTS + AVG_EXPRESSION_ERROR_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(AVG_TESTS)) +def test_accumulator_avg(collection, test_case: AvgAccumulatorTest): + """Test $avg accumulator behavior.""" + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$group": {"_id": None, "result": {"$avg": test_case.expression}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + "cursor": {}, + }, + ) + assertResult( + result, + expected=[{"result": test_case.expected}] if test_case.error_code is None else None, + error_code=test_case.error_code, + msg=test_case.msg, + ) + + +def test_accumulator_avg_empty_collection(collection): + """Test $avg returns no documents for an empty collection.""" + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + "cursor": {}, + }, + ) + assertSuccess( + result, + [], + msg="$avg should produce no group output for an empty collection", + ) + + +# Property [Return Type]: the result is double by default, but Decimal128 if +# any input value is Decimal128. +AVG_RETURN_TYPE_TESTS: list[AvgAccumulatorTest] = [ + AvgAccumulatorTest( + "type_int32_only", + docs=[{"v": 2}, {"v": 4}], + expression="$v", + expected="double", + msg="$avg should return double when all inputs are int32", + ), + AvgAccumulatorTest( + "type_int64_only", + docs=[{"v": Int64(2)}, {"v": Int64(4)}], + expression="$v", + expected="double", + msg="$avg should return double when all inputs are int64", + ), + AvgAccumulatorTest( + "type_int32_int64", + docs=[{"v": 2}, {"v": Int64(4)}], + expression="$v", + expected="double", + msg="$avg should return double for int32 and int64 mix", + ), + AvgAccumulatorTest( + "type_int32_double", + docs=[{"v": 2}, {"v": 4.0}], + expression="$v", + expected="double", + msg="$avg should return double for int32 and double mix", + ), + AvgAccumulatorTest( + "type_int64_double", + docs=[{"v": Int64(2)}, {"v": 4.0}], + expression="$v", + expected="double", + msg="$avg should return double for int64 and double mix", + ), + AvgAccumulatorTest( + "type_int32_decimal128", + docs=[{"v": 2}, {"v": Decimal128("4")}], + expression="$v", + expected="decimal", + msg="$avg should return Decimal128 when any input is Decimal128", + ), + AvgAccumulatorTest( + "type_int64_decimal128", + docs=[{"v": Int64(2)}, {"v": Decimal128("4")}], + expression="$v", + expected="decimal", + msg="$avg should return Decimal128 for int64 and Decimal128 mix", + ), + AvgAccumulatorTest( + "type_double_decimal128", + docs=[{"v": 2.0}, {"v": Decimal128("4")}], + expression="$v", + expected="decimal", + msg="$avg should return Decimal128 for double and Decimal128 mix", + ), + AvgAccumulatorTest( + "type_decimal128_before_int32", + docs=[{"v": Decimal128("4")}, {"v": 2}], + expression="$v", + expected="decimal", + msg="$avg should return Decimal128 regardless of document order", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(AVG_RETURN_TYPE_TESTS)) +def test_accumulator_avg_return_type(collection, test_case: AvgAccumulatorTest): + """Test $avg accumulator return type.""" + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$group": {"_id": None, "result": {"$avg": test_case.expression}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, + ], + "cursor": {}, + }, + ) + assertSuccess(result, [{"type": test_case.expected}], msg=test_case.msg) + + +@dataclass(frozen=True) +class AvgArityTest(BaseTestCase): + """Test case for $avg arity rejection.""" + + pipeline: list[dict] = None # type: ignore[assignment] + + +# Property [Arity]: $avg in accumulator context is a unary operator and +# rejects array syntax in $group, $bucket, and $bucketAuto. +AVG_ARITY_TESTS: list[AvgArityTest] = [ + AvgArityTest( + "arity_multi_element_group", + pipeline=[{"$group": {"_id": None, "result": {"$avg": ["$v", "$v"]}}}], + msg="$avg should reject multi-element array syntax in $group", + ), + AvgArityTest( + "arity_empty_array_group", + pipeline=[{"$group": {"_id": None, "result": {"$avg": []}}}], + msg="$avg should reject empty array syntax in $group", + ), + AvgArityTest( + "arity_single_element_group", + pipeline=[{"$group": {"_id": None, "result": {"$avg": ["$v"]}}}], + msg="$avg should reject single-element array syntax in $group", + ), + AvgArityTest( + "arity_multi_element_bucket", + pipeline=[ + { + "$bucket": { + "groupBy": "$v", + "boundaries": [0, 10], + "output": {"result": {"$avg": ["$v", "$v"]}}, + } + } + ], + msg="$avg should reject multi-element array syntax in $bucket", + ), + AvgArityTest( + "arity_empty_array_bucket", + pipeline=[ + { + "$bucket": { + "groupBy": "$v", + "boundaries": [0, 10], + "output": {"result": {"$avg": []}}, + } + } + ], + msg="$avg should reject empty array syntax in $bucket", + ), + AvgArityTest( + "arity_single_element_bucket", + pipeline=[ + { + "$bucket": { + "groupBy": "$v", + "boundaries": [0, 10], + "output": {"result": {"$avg": ["$v"]}}, + } + } + ], + msg="$avg should reject single-element array syntax in $bucket", + ), + AvgArityTest( + "arity_multi_element_bucket_auto", + pipeline=[ + { + "$bucketAuto": { + "groupBy": "$v", + "buckets": 1, + "output": {"result": {"$avg": ["$v", "$v"]}}, + } + } + ], + msg="$avg should reject multi-element array syntax in $bucketAuto", + ), + AvgArityTest( + "arity_empty_array_bucket_auto", + pipeline=[ + { + "$bucketAuto": { + "groupBy": "$v", + "buckets": 1, + "output": {"result": {"$avg": []}}, + } + } + ], + msg="$avg should reject empty array syntax in $bucketAuto", + ), + AvgArityTest( + "arity_single_element_bucket_auto", + pipeline=[ + { + "$bucketAuto": { + "groupBy": "$v", + "buckets": 1, + "output": {"result": {"$avg": ["$v"]}}, + } + } + ], + msg="$avg should reject single-element array syntax in $bucketAuto", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(AVG_ARITY_TESTS)) +def test_accumulator_avg_arity(collection, test_case: AvgArityTest): + """Test $avg rejects array syntax in accumulator context.""" + collection.insert_one({"v": 1}) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertFailureCode( + result, + GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_field_lookup.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_field_lookup.py new file mode 100644 index 00000000..cfaefca8 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_field_lookup.py @@ -0,0 +1,349 @@ +""" +Tests for $avg accumulator expression types and field lookup in $group context. + +Covers expression types (literal, field path, computed expressions, system variables) +and field path resolution (simple, nested, missing, array traversal). +""" + +from __future__ import annotations + +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.executor import execute_command + +# --- Helpers --- + + +def _group_avg(collection, docs, avg_expr="$value"): + """Insert docs and run $group with $avg on given expression.""" + collection.insert_many(docs) + return execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$group": {"_id": None, "avg": {"$avg": avg_expr}}}, + ], + "cursor": {}, + }, + ) + + +# --- 11. Expression Types (per-operator) --- + + +def test_avg_group_field_path(collection): + """Test $avg with simple field path expression in $group.""" + result = _group_avg( + collection, + [ + {"_id": 1, "value": 10}, + {"_id": 2, "value": 20}, + {"_id": 3, "value": 30}, + ], + ) + assertSuccess( + result, + [{"_id": None, "avg": 20.0}], + msg="$avg with field path should average field values", + ) + + +def test_avg_group_computed_expression(collection): + """Test $avg with computed expression in $group.""" + collection.insert_many( + [ + {"_id": 1, "a": 2, "b": 3}, + {"_id": 2, "a": 4, "b": 6}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$group": {"_id": None, "avg": {"$avg": {"$multiply": ["$a", "$b"]}}}}, + ], + "cursor": {}, + }, + ) + # (2*3 + 4*6) / 2 = (6 + 24) / 2 = 15 + assertSuccess( + result, + [{"_id": None, "avg": 15.0}], + msg="$avg with computed expression should average computed values", + ) + + +def test_avg_group_literal_numeric(collection): + """Test $avg with literal numeric value in $group returns that constant.""" + collection.insert_many( + [ + {"_id": 1}, + {"_id": 2}, + {"_id": 3}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$group": {"_id": None, "avg": {"$avg": 5}}}, + ], + "cursor": {}, + }, + ) + assertSuccess( + result, + [{"_id": None, "avg": 5.0}], + msg="$avg with literal numeric should return that constant", + ) + + +def test_avg_group_literal_null(collection): + """Test $avg with null literal in $group returns null.""" + collection.insert_many([{"_id": 1}, {"_id": 2}]) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$group": {"_id": None, "avg": {"$avg": None}}}, + ], + "cursor": {}, + }, + ) + assertSuccess( + result, + [{"_id": None, "avg": None}], + msg="$avg with null literal should return null", + ) + + +def test_avg_group_cond_expression(collection): + """Test $avg with $cond expression in $group.""" + collection.insert_many( + [ + {"_id": 1, "value": 10, "include": True}, + {"_id": 2, "value": 20, "include": False}, + {"_id": 3, "value": 30, "include": True}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + { + "$group": { + "_id": None, + "avg": { + "$avg": { + "$cond": [ + "$include", + "$value", + None, + ] + } + }, + } + }, + ], + "cursor": {}, + }, + ) + # Only values 10 and 30 contribute (null is ignored), avg = 20 + assertSuccess( + result, + [{"_id": None, "avg": 20.0}], + msg="$avg with $cond should average only non-null conditional results", + ) + + +def test_avg_group_ifnull_expression(collection): + """Test $avg with $ifNull expression replacing missing values.""" + collection.insert_many( + [ + {"_id": 1, "value": 10}, + {"_id": 2}, + {"_id": 3, "value": 30}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + { + "$group": { + "_id": None, + "avg": {"$avg": {"$ifNull": ["$value", 0]}}, + } + }, + ], + "cursor": {}, + }, + ) + # (10 + 0 + 30) / 3 = 13.333... + assertSuccess( + result, + [{"_id": None, "avg": 13.333333333333334}], + msg="$avg with $ifNull should replace missing with 0", + ) + + +# --- 12. Field Lookup --- + + +def test_avg_group_nested_field_path(collection): + """Test $avg with nested field path in $group.""" + result = _group_avg( + collection, + [ + {"_id": 1, "nested": {"value": 10}}, + {"_id": 2, "nested": {"value": 20}}, + {"_id": 3, "nested": {"value": 30}}, + ], + avg_expr="$nested.value", + ) + assertSuccess( + result, + [{"_id": None, "avg": 20.0}], + msg="$avg with nested field path should resolve and average", + ) + + +def test_avg_group_missing_field(collection): + """Test $avg with non-existent field path returns null.""" + result = _group_avg( + collection, + [ + {"_id": 1, "value": 10}, + {"_id": 2, "value": 20}, + ], + avg_expr="$nonexistent", + ) + assertSuccess( + result, + [{"_id": None, "avg": None}], + msg="$avg with non-existent field should return null", + ) + + +def test_avg_group_some_missing_field(collection): + """Test $avg where some documents have the field and others don't.""" + result = _group_avg( + collection, + [ + {"_id": 1, "value": 10}, + {"_id": 2}, + {"_id": 3, "value": 30}, + ], + ) + # Missing values are ignored: (10 + 30) / 2 = 20 + assertSuccess( + result, + [{"_id": None, "avg": 20.0}], + msg="$avg should ignore documents with missing field", + ) + + +def test_avg_group_field_resolves_to_array(collection): + """Test $avg where field resolves to an array in $group — treated as non-numeric.""" + result = _group_avg( + collection, + [ + {"_id": 1, "value": [1, 2, 3]}, + {"_id": 2, "value": [4, 5, 6]}, + ], + ) + assertSuccess( + result, + [{"_id": None, "avg": None}], + msg="$avg in $group should treat array values as non-numeric", + ) + + +def test_avg_group_mixed_array_and_numeric(collection): + """Test $avg where some docs have arrays and others have numerics.""" + result = _group_avg( + collection, + [ + {"_id": 1, "value": [1, 2, 3]}, + {"_id": 2, "value": 10}, + {"_id": 3, "value": 20}, + ], + ) + # Array is ignored: (10 + 20) / 2 = 15 + assertSuccess( + result, + [{"_id": None, "avg": 15.0}], + msg="$avg in $group should ignore array values and average numerics", + ) + + +def test_avg_group_deeply_nested_path(collection): + """Test $avg with deeply nested field path.""" + result = _group_avg( + collection, + [ + {"_id": 1, "a": {"b": {"c": {"d": 10}}}}, + {"_id": 2, "a": {"b": {"c": {"d": 20}}}}, + ], + avg_expr="$a.b.c.d", + ) + assertSuccess( + result, + [{"_id": None, "avg": 15.0}], + msg="$avg with deeply nested path should resolve correctly", + ) + + +def test_avg_group_intermediate_null(collection): + """Test $avg where intermediate field in path is null.""" + result = _group_avg( + collection, + [ + {"_id": 1, "a": {"b": 10}}, + {"_id": 2, "a": None}, + {"_id": 3, "a": {"b": 30}}, + ], + avg_expr="$a.b", + ) + # Doc 2 has null intermediate, treated as missing: (10 + 30) / 2 = 20 + assertSuccess( + result, + [{"_id": None, "avg": 20.0}], + msg="$avg should treat null intermediate as missing", + ) + + +def test_avg_group_multiple_accumulators(collection): + """Test multiple $avg accumulators in same $group stage.""" + collection.insert_many( + [ + {"_id": 1, "a": 10, "b": 100}, + {"_id": 2, "a": 20, "b": 200}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + { + "$group": { + "_id": None, + "avg_a": {"$avg": "$a"}, + "avg_b": {"$avg": "$b"}, + } + }, + ], + "cursor": {}, + }, + ) + assertSuccess( + result, + [{"_id": None, "avg_a": 15.0, "avg_b": 150.0}], + msg="Multiple $avg accumulators should work independently", + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_boundaries.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_boundaries.py new file mode 100644 index 00000000..70dea493 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_boundaries.py @@ -0,0 +1,190 @@ +""" +Tests for $avg accumulator overflow, boundary values, and decimal128 precision +in $group context. + +These test the accumulator's running sum behavior across documents, +which differs from expression-context evaluation on a single array. +""" + +from __future__ import annotations + +from bson import Decimal128, Int64 + +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.test_constants import ( + DECIMAL128_LARGE_EXPONENT, + DECIMAL128_MAX, + DECIMAL128_MIN, + DECIMAL128_SMALL_EXPONENT, + DOUBLE_MIN_SUBNORMAL, + DOUBLE_NEAR_MAX, + INT32_MAX, + INT32_MIN, + INT64_MAX, + INT64_MIN, +) + +# --- Helpers --- + + +def _group_avg_values(collection, values): + """Insert documents with given values and return $avg across all.""" + docs = [{"_id": i, "v": v} for i, v in enumerate(values)] + collection.insert_many(docs) + return execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$group": {"_id": None, "avg": {"$avg": "$v"}}}, + ], + "cursor": {}, + }, + ) + + +# --- Integer Boundary Values --- + + +def test_avg_group_int32_max_pair(collection): + """Test $avg of two INT32_MAX values.""" + result = _group_avg_values(collection, [INT32_MAX, INT32_MAX]) + assertSuccess( + result, + [{"_id": None, "avg": float(INT32_MAX)}], + msg="avg of two INT32_MAX should return INT32_MAX as double", + ) + + +def test_avg_group_int32_min_pair(collection): + """Test $avg of two INT32_MIN values.""" + result = _group_avg_values(collection, [INT32_MIN, INT32_MIN]) + assertSuccess( + result, + [{"_id": None, "avg": float(INT32_MIN)}], + msg="avg of two INT32_MIN should return INT32_MIN as double", + ) + + +def test_avg_group_int32_max_and_min(collection): + """Test $avg of INT32_MAX and INT32_MIN.""" + result = _group_avg_values(collection, [INT32_MAX, INT32_MIN]) + # (2147483647 + -2147483648) / 2 = -0.5 + assertSuccess( + result, + [{"_id": None, "avg": -0.5}], + msg="avg of INT32_MAX and INT32_MIN should be -0.5", + ) + + +def test_avg_group_int64_max_pair(collection): + """Test $avg of two INT64_MAX values — potential precision loss in double.""" + result = _group_avg_values(collection, [INT64_MAX, INT64_MAX]) + assertSuccess( + result, + [{"_id": None, "avg": 9.223372036854776e18}], + msg="avg of two INT64_MAX should handle overflow", + ) + + +def test_avg_group_int64_min_pair(collection): + """Test $avg of two INT64_MIN values.""" + result = _group_avg_values(collection, [INT64_MIN, INT64_MIN]) + assertSuccess( + result, + [{"_id": None, "avg": -9.223372036854776e18}], + msg="avg of two INT64_MIN should handle overflow", + ) + + +def test_avg_group_int64_max_and_one(collection): + """Test $avg of INT64_MAX and 1.""" + result = _group_avg_values(collection, [INT64_MAX, Int64(1)]) + assertSuccess( + result, + [{"_id": None, "avg": 4.611686018427388e18}], + msg="avg of INT64_MAX and 1", + ) + + +# --- Double Boundary Values --- + + +def test_avg_group_double_near_max_pair(collection): + """Test $avg of two DOUBLE_NEAR_MAX values — sum overflows to inf.""" + result = _group_avg_values(collection, [DOUBLE_NEAR_MAX, DOUBLE_NEAR_MAX]) + assertSuccess( + result, + [{"_id": None, "avg": float("inf")}], + msg="avg of two DOUBLE_NEAR_MAX overflows sum to inf", + ) + + +def test_avg_group_double_subnormal(collection): + """Test $avg of subnormal double values.""" + result = _group_avg_values(collection, [DOUBLE_MIN_SUBNORMAL, DOUBLE_MIN_SUBNORMAL]) + assertSuccess( + result, + [{"_id": None, "avg": DOUBLE_MIN_SUBNORMAL}], + msg="avg of two subnormal doubles should return subnormal", + ) + + +# --- Decimal128 Precision --- + + +def test_avg_group_decimal128_high_precision(collection): + """Test $avg of decimal128 values requiring high precision.""" + result = _group_avg_values( + collection, + [ + Decimal128("1.000000000000000000000000000000001"), + Decimal128("2.999999999999999999999999999999999"), + ], + ) + assertSuccess( + result, + [{"_id": None, "avg": Decimal128("2.000000000000000000000000000000000")}], + msg="decimal128 avg should preserve high precision", + ) + + +def test_avg_group_decimal128_large_exponent(collection): + """Test $avg with decimal128 large exponent values.""" + result = _group_avg_values(collection, [DECIMAL128_LARGE_EXPONENT, DECIMAL128_LARGE_EXPONENT]) + assertSuccess( + result, + [{"_id": None, "avg": DECIMAL128_LARGE_EXPONENT}], + msg="avg of two identical large exponent values should return same value", + ) + + +def test_avg_group_decimal128_small_exponent(collection): + """Test $avg with decimal128 small exponent values.""" + result = _group_avg_values(collection, [DECIMAL128_SMALL_EXPONENT, DECIMAL128_SMALL_EXPONENT]) + assertSuccess( + result, + [{"_id": None, "avg": DECIMAL128_SMALL_EXPONENT}], + msg="avg of two identical small exponent values should return same value", + ) + + +def test_avg_group_decimal128_max_and_min(collection): + """Test $avg of DECIMAL128_MAX and DECIMAL128_MIN.""" + result = _group_avg_values(collection, [DECIMAL128_MAX, DECIMAL128_MIN]) + assertSuccess( + result, + [{"_id": None, "avg": Decimal128("0")}], + msg="avg of DECIMAL128_MAX and DECIMAL128_MIN", + ) + + +def test_avg_group_decimal128_extreme_exponent_diff(collection): + """Test $avg of values with extreme exponent difference.""" + result = _group_avg_values(collection, [Decimal128("1E+6144"), Decimal128("1")]) + assertSuccess( + result, + [{"_id": None, "avg": Decimal128("5.00000000000000000000000000000000E+6143")}], + msg="avg with extreme exponent difference", + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_context.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_context.py new file mode 100644 index 00000000..93eaaeb0 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_context.py @@ -0,0 +1,449 @@ +""" +Tests for $avg accumulator in $group context. + +Covers numeric equivalence in grouping, single/empty groups, +precision edge cases, multiple groups, and comparison with $sum. +""" + +from __future__ import annotations + +from bson import Decimal128, Int64 + +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.executor import execute_command + +# --- Helpers --- + + +def _group_avg(collection, docs, group_id="$category", field="$value"): + """Insert docs and run $group with $avg.""" + collection.insert_many(docs) + return execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$group": {"_id": group_id, "avg": {"$avg": field}}}, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + }, + ) + + +# --- 13. Numeric Equivalence in Grouping --- + + +def test_avg_group_numeric_equivalence_grouping(collection): + """Test $avg groups numerically equivalent values of different types into same group.""" + collection.insert_many( + [ + {"_id": 1, "key": 1, "value": 10}, + {"_id": 2, "key": Int64(1), "value": 20}, + {"_id": 3, "key": 1.0, "value": 30}, + {"_id": 4, "key": Decimal128("1"), "value": 40}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$group": {"_id": "$key", "avg": {"$avg": "$value"}}}, + ], + "cursor": {}, + }, + ) + assertSuccess( + result, + [{"_id": 1, "avg": 25.0}], + msg="Numerically equivalent group keys should produce a single group", + ) + + +def test_avg_group_zero_equivalence(collection): + """Test $avg groups all zero representations into same group.""" + collection.insert_many( + [ + {"_id": 1, "key": 0, "value": 10}, + {"_id": 2, "key": Int64(0), "value": 20}, + {"_id": 3, "key": 0.0, "value": 30}, + {"_id": 4, "key": Decimal128("0"), "value": 40}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$group": {"_id": "$key", "avg": {"$avg": "$value"}}}, + ], + "cursor": {}, + }, + ) + assertSuccess( + result, + [{"_id": 0, "avg": 25.0}], + msg="All zero representations should group together", + ) + + +# --- 16. Single Document Group / Empty Group --- + + +def test_avg_group_single_document(collection): + """Test $avg with single document in group returns that value.""" + result = _group_avg( + collection, + [{"_id": 1, "category": "A", "value": 42}], + ) + assertSuccess( + result, + [{"_id": "A", "avg": 42.0}], + msg="$avg of single document should return that value as double", + ) + + +def test_avg_group_single_document_non_numeric(collection): + """Test $avg with single non-numeric document returns null.""" + result = _group_avg( + collection, + [{"_id": 1, "category": "A", "value": "hello"}], + ) + assertSuccess( + result, + [{"_id": "A", "avg": None}], + msg="$avg of single non-numeric document should return null", + ) + + +def test_avg_group_single_document_null(collection): + """Test $avg with single null document returns null.""" + result = _group_avg( + collection, + [{"_id": 1, "category": "A", "value": None}], + ) + assertSuccess( + result, + [{"_id": "A", "avg": None}], + msg="$avg of single null document should return null", + ) + + +def test_avg_group_single_document_missing_field(collection): + """Test $avg with single document missing the field returns null.""" + result = _group_avg( + collection, + [{"_id": 1, "category": "A"}], + ) + assertSuccess( + result, + [{"_id": "A", "avg": None}], + msg="$avg of single document with missing field should return null", + ) + + +def test_avg_group_empty_collection(collection): + """Test $avg on empty collection produces no output documents.""" + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + ], + "cursor": {}, + }, + ) + assertSuccess( + result, + [], + msg="$avg on empty collection should produce no output", + ) + + +def test_avg_group_all_filtered_out(collection): + """Test $avg where $match filters all documents produces no output.""" + collection.insert_many( + [ + {"_id": 1, "category": "A", "value": 10}, + {"_id": 2, "category": "A", "value": 20}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$match": {"category": "Z"}}, + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + ], + "cursor": {}, + }, + ) + assertSuccess( + result, + [], + msg="$avg after filtering all documents should produce no output", + ) + + +def test_avg_group_null_id(collection): + """Test $avg with _id: null groups entire collection.""" + collection.insert_many( + [ + {"_id": 1, "value": 10}, + {"_id": 2, "value": 20}, + {"_id": 3, "value": 30}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$group": {"_id": None, "avg": {"$avg": "$value"}}}, + ], + "cursor": {}, + }, + ) + assertSuccess( + result, + [{"_id": None, "avg": 20.0}], + msg="$avg with _id: null should average entire collection", + ) + + +# --- 18. Precision Edge Cases --- + + +def test_avg_group_odd_sum_two_int32(collection): + """Test $avg of two int32 values whose sum is odd produces fractional result.""" + result = _group_avg( + collection, + [ + {"_id": 1, "category": "A", "value": 1}, + {"_id": 2, "category": "A", "value": 2}, + ], + ) + assertSuccess( + result, + [{"_id": "A", "avg": 1.5}], + msg="$avg of 1 and 2 should return 1.5", + ) + + +def test_avg_group_repeating_decimal(collection): + """Test $avg producing repeating decimal (1+1+2)/3.""" + collection.insert_many( + [ + {"_id": 1, "category": "A", "value": 1}, + {"_id": 2, "category": "A", "value": 1}, + {"_id": 3, "category": "A", "value": 2}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + ], + "cursor": {}, + }, + ) + assertSuccess( + result, + [{"_id": "A", "avg": 1.3333333333333333}], + msg="$avg of 1,1,2 should return 4/3", + ) + + +def test_avg_group_sequence_1_to_100(collection): + """Test $avg of sequence 1..100 returns 50.5.""" + docs = [{"_id": i, "category": "A", "value": i} for i in range(1, 101)] + result = _group_avg(collection, docs) + assertSuccess( + result, + [{"_id": "A", "avg": 50.5}], + msg="$avg of 1..100 should return 50.5", + ) + + +def test_avg_group_large_count_identical(collection): + """Test $avg of 1000 identical values returns that value.""" + docs = [{"_id": i, "category": "A", "value": 7} for i in range(1000)] + result = _group_avg(collection, docs) + assertSuccess( + result, + [{"_id": "A", "avg": 7.0}], + msg="$avg of 1000 identical values should return that value", + ) + + +# --- 20. Multiple Groups with Different Characteristics --- + + +def test_avg_group_different_counts(collection): + """Test $avg where groups have different document counts.""" + collection.insert_many( + [ + {"_id": 1, "category": "A", "value": 10}, + {"_id": 2, "category": "B", "value": 20}, + {"_id": 3, "category": "B", "value": 40}, + {"_id": 4, "category": "C", "value": 5}, + {"_id": 5, "category": "C", "value": 10}, + {"_id": 6, "category": "C", "value": 15}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + }, + ) + assertSuccess( + result, + [ + {"_id": "A", "avg": 10.0}, + {"_id": "B", "avg": 30.0}, + {"_id": "C", "avg": 10.0}, + ], + msg="$avg should compute correct average per group with different counts", + ) + + +def test_avg_group_one_all_nulls_one_all_numeric(collection): + """Test $avg where one group has all nulls and another has numerics.""" + collection.insert_many( + [ + {"_id": 1, "category": "A", "value": None}, + {"_id": 2, "category": "A", "value": None}, + {"_id": 3, "category": "B", "value": 10}, + {"_id": 4, "category": "B", "value": 20}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + }, + ) + assertSuccess( + result, + [ + {"_id": "A", "avg": None}, + {"_id": "B", "avg": 15.0}, + ], + msg="Group with all nulls returns null, group with numerics returns average", + ) + + +def test_avg_group_mixed_types_per_group(collection): + """Test $avg where groups have different numeric type distributions.""" + collection.insert_many( + [ + {"_id": 1, "category": "int", "value": 10}, + {"_id": 2, "category": "int", "value": 20}, + {"_id": 3, "category": "dec", "value": Decimal128("10")}, + {"_id": 4, "category": "dec", "value": Decimal128("20")}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + }, + ) + assertSuccess( + result, + [ + {"_id": "dec", "avg": Decimal128("15")}, + {"_id": "int", "avg": 15.0}, + ], + msg="Int group returns double, Decimal128 group returns Decimal128", + ) + + +# --- 21. Comparison with Related Operators --- + + +def test_avg_equals_sum_divided_by_count(collection): + """Test $avg equals $sum / count for int32 values.""" + collection.insert_many( + [ + {"_id": 1, "category": "A", "value": 10}, + {"_id": 2, "category": "A", "value": 20}, + {"_id": 3, "category": "A", "value": 30}, + {"_id": 4, "category": "A", "value": 40}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + { + "$group": { + "_id": "$category", + "avg": {"$avg": "$value"}, + "sum": {"$sum": "$value"}, + "count": {"$sum": 1}, + } + }, + ], + "cursor": {}, + }, + ) + # avg should be 25.0, sum should be 100, count should be 4 + assertSuccess( + result, + [{"_id": "A", "avg": 25.0, "sum": 100, "count": 4}], + msg="$avg should equal $sum / count", + ) + + +def test_avg_vs_sum_non_numeric_handling(collection): + """Test $avg returns null but $sum returns 0 when all values are non-numeric.""" + collection.insert_many( + [ + {"_id": 1, "category": "A", "value": "hello"}, + {"_id": 2, "category": "A", "value": "world"}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + { + "$group": { + "_id": "$category", + "avg": {"$avg": "$value"}, + "sum": {"$sum": "$value"}, + } + }, + ], + "cursor": {}, + }, + ) + assertSuccess( + result, + [{"_id": "A", "avg": None, "sum": 0}], + msg="$avg returns null for non-numeric but $sum returns 0", + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_types.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_types.py new file mode 100644 index 00000000..17dc26f6 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_types.py @@ -0,0 +1,410 @@ +""" +Tests for $avg accumulator data type handling in $group context. + +Covers type promotion rules, NaN/Infinity propagation, null/missing handling, +and non-numeric type ignoring when accumulating across documents. +""" + +from __future__ import annotations + +from bson import Decimal128, Int64 + +from documentdb_tests.framework.assertions import assertSuccess, assertSuccessNaN +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.test_constants import ( + DECIMAL128_INFINITY, + DECIMAL128_NAN, + DECIMAL128_NEGATIVE_INFINITY, + DECIMAL128_NEGATIVE_ZERO, + DECIMAL128_ZERO, + DOUBLE_NEGATIVE_ZERO, + DOUBLE_ZERO, + FLOAT_INFINITY, + FLOAT_NEGATIVE_INFINITY, +) + +# --- Helpers --- + + +def _group_avg_values(collection, values): + """Insert documents with given values and return $avg across all.""" + docs = [{"_id": i, "v": v} for i, v in enumerate(values)] + collection.insert_many(docs) + return execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$group": {"_id": None, "avg": {"$avg": "$v"}}}, + ], + "cursor": {}, + }, + ) + + +# --- Type Promotion in $group --- + + +def test_avg_group_all_int32(collection): + """Test $avg over int32 documents returns double.""" + result = _group_avg_values(collection, [10, 20, 30]) + assertSuccess(result, [{"_id": None, "avg": 20.0}], msg="int32 avg should return double") + + +def test_avg_group_all_int64(collection): + """Test $avg over int64 documents returns double.""" + result = _group_avg_values(collection, [Int64(10), Int64(20), Int64(30)]) + assertSuccess(result, [{"_id": None, "avg": 20.0}], msg="int64 avg should return double") + + +def test_avg_group_all_double(collection): + """Test $avg over double documents returns double.""" + result = _group_avg_values(collection, [10.0, 20.0, 30.0]) + assertSuccess(result, [{"_id": None, "avg": 20.0}], msg="double avg should return double") + + +def test_avg_group_all_decimal128(collection): + """Test $avg over decimal128 documents returns decimal128.""" + result = _group_avg_values(collection, [Decimal128("10"), Decimal128("20"), Decimal128("30")]) + assertSuccess( + result, + [{"_id": None, "avg": Decimal128("20")}], + msg="decimal128 avg should return decimal128", + ) + + +def test_avg_group_int32_and_int64(collection): + """Test $avg over mixed int32/int64 returns double.""" + result = _group_avg_values(collection, [10, Int64(20)]) + assertSuccess(result, [{"_id": None, "avg": 15.0}], msg="int32+int64 avg should return double") + + +def test_avg_group_int32_and_double(collection): + """Test $avg over mixed int32/double returns double.""" + result = _group_avg_values(collection, [10, 20.0]) + assertSuccess(result, [{"_id": None, "avg": 15.0}], msg="int32+double avg should return double") + + +def test_avg_group_int32_and_decimal128(collection): + """Test $avg over mixed int32/decimal128 returns decimal128.""" + result = _group_avg_values(collection, [10, Decimal128("20")]) + assertSuccess( + result, + [{"_id": None, "avg": Decimal128("15")}], + msg="int32+decimal128 avg should return decimal128", + ) + + +def test_avg_group_int64_and_decimal128(collection): + """Test $avg over mixed int64/decimal128 returns decimal128.""" + result = _group_avg_values(collection, [Int64(10), Decimal128("20")]) + assertSuccess( + result, + [{"_id": None, "avg": Decimal128("15")}], + msg="int64+decimal128 avg should return decimal128", + ) + + +def test_avg_group_double_and_decimal128(collection): + """Test $avg over mixed double/decimal128 returns decimal128.""" + result = _group_avg_values(collection, [10.0, Decimal128("20")]) + assertSuccess( + result, + [{"_id": None, "avg": Decimal128("15")}], + msg="double+decimal128 avg should return decimal128", + ) + + +def test_avg_group_all_four_types(collection): + """Test $avg over all numeric types returns decimal128.""" + result = _group_avg_values(collection, [10, Int64(20), 30.0, Decimal128("40")]) + assertSuccess( + result, + [{"_id": None, "avg": Decimal128("25")}], + msg="all four numeric types avg should return decimal128", + ) + + +def test_avg_group_fractional_result_from_int32(collection): + """Test $avg of int32 values producing fractional result returns double.""" + result = _group_avg_values(collection, [1, 2]) + assertSuccess( + result, + [{"_id": None, "avg": 1.5}], + msg="int32 avg producing fraction should return double", + ) + + +# --- NaN Propagation in $group --- + + +def test_avg_group_nan_propagates(collection): + """Test $avg where one document has NaN propagates NaN.""" + result = _group_avg_values(collection, [10, float("nan"), 30]) + assertSuccessNaN( + result, + [{"_id": None, "avg": float("nan")}], + msg="NaN in group should propagate to result", + ) + + +def test_avg_group_all_nan(collection): + """Test $avg where all documents have NaN returns NaN.""" + result = _group_avg_values(collection, [float("nan"), float("nan")]) + assertSuccessNaN( + result, + [{"_id": None, "avg": float("nan")}], + msg="All NaN in group should return NaN", + ) + + +def test_avg_group_decimal128_nan_propagates(collection): + """Test $avg where one document has Decimal128 NaN propagates.""" + result = _group_avg_values(collection, [Decimal128("10"), DECIMAL128_NAN, Decimal128("30")]) + assertSuccessNaN( + result, + [{"_id": None, "avg": DECIMAL128_NAN}], + msg="Decimal128 NaN in group should propagate", + ) + + +def test_avg_group_nan_dominates_infinity(collection): + """Test $avg with NaN and Infinity returns NaN.""" + result = _group_avg_values(collection, [float("nan"), FLOAT_INFINITY]) + assertSuccessNaN( + result, + [{"_id": None, "avg": float("nan")}], + msg="NaN should dominate Infinity in group", + ) + + +def test_avg_group_cross_type_nan_decimal(collection): + """Test $avg with double NaN and Decimal128 value returns Decimal128 NaN.""" + result = _group_avg_values(collection, [float("nan"), Decimal128("5")]) + assertSuccessNaN( + result, + [{"_id": None, "avg": DECIMAL128_NAN}], + msg="double NaN + Decimal128 should return Decimal128 NaN", + ) + + +# --- Infinity in $group --- + + +def test_avg_group_infinity(collection): + """Test $avg where documents include Infinity returns Infinity.""" + result = _group_avg_values(collection, [FLOAT_INFINITY, 10]) + assertSuccess( + result, + [{"_id": None, "avg": FLOAT_INFINITY}], + msg="Infinity in group should propagate", + ) + + +def test_avg_group_negative_infinity(collection): + """Test $avg where documents include -Infinity returns -Infinity.""" + result = _group_avg_values(collection, [FLOAT_NEGATIVE_INFINITY, 10]) + assertSuccess( + result, + [{"_id": None, "avg": FLOAT_NEGATIVE_INFINITY}], + msg="-Infinity in group should propagate", + ) + + +def test_avg_group_inf_neg_inf_cancel(collection): + """Test $avg with Infinity and -Infinity documents returns NaN.""" + result = _group_avg_values(collection, [FLOAT_INFINITY, FLOAT_NEGATIVE_INFINITY]) + assertSuccessNaN( + result, + [{"_id": None, "avg": float("nan")}], + msg="Infinity + -Infinity in group should return NaN", + ) + + +def test_avg_group_decimal128_infinity(collection): + """Test $avg with Decimal128 Infinity documents.""" + result = _group_avg_values(collection, [DECIMAL128_INFINITY, Decimal128("10")]) + assertSuccess( + result, + [{"_id": None, "avg": DECIMAL128_INFINITY}], + msg="Decimal128 Infinity in group should propagate", + ) + + +def test_avg_group_decimal128_inf_neg_inf_cancel(collection): + """Test $avg with Decimal128 Infinity and -Infinity returns Decimal128 NaN.""" + result = _group_avg_values(collection, [DECIMAL128_INFINITY, DECIMAL128_NEGATIVE_INFINITY]) + assertSuccessNaN( + result, + [{"_id": None, "avg": DECIMAL128_NAN}], + msg="Decimal128 Inf + -Inf in group should return Decimal128 NaN", + ) + + +# --- Null / Missing in $group --- + + +def test_avg_group_all_null(collection): + """Test $avg where all documents have null returns null.""" + result = _group_avg_values(collection, [None, None, None]) + assertSuccess(result, [{"_id": None, "avg": None}], msg="All null in group should return null") + + +def test_avg_group_some_null(collection): + """Test $avg ignores null documents and averages the rest.""" + result = _group_avg_values(collection, [10, None, 30]) + assertSuccess( + result, + [{"_id": None, "avg": 20.0}], + msg="Null docs should be ignored, avg of 10 and 30 is 20", + ) + + +def test_avg_group_all_missing(collection): + """Test $avg where all documents are missing the field returns null.""" + docs = [{"_id": i, "other": i} for i in range(3)] + collection.insert_many(docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + "cursor": {}, + }, + ) + assertSuccess(result, [{"_id": None, "avg": None}], msg="All missing fields should return null") + + +def test_avg_group_some_missing(collection): + """Test $avg ignores documents with missing field.""" + collection.insert_many( + [ + {"_id": 0, "v": 10}, + {"_id": 1}, + {"_id": 2, "v": 30}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + "cursor": {}, + }, + ) + assertSuccess( + result, + [{"_id": None, "avg": 20.0}], + msg="Missing field docs should be ignored", + ) + + +def test_avg_group_mix_null_missing_numeric(collection): + """Test $avg with mix of null, missing, and numeric values.""" + collection.insert_many( + [ + {"_id": 0, "v": 10}, + {"_id": 1, "v": None}, + {"_id": 2}, + {"_id": 3, "v": 30}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + "cursor": {}, + }, + ) + assertSuccess( + result, + [{"_id": None, "avg": 20.0}], + msg="Only numeric values should contribute to average", + ) + + +# --- Non-numeric types ignored in $group --- + + +def test_avg_group_ignores_strings(collection): + """Test $avg ignores string values in group.""" + result = _group_avg_values(collection, [10, "hello", 30]) + assertSuccess( + result, + [{"_id": None, "avg": 20.0}], + msg="String values should be ignored in group avg", + ) + + +def test_avg_group_ignores_booleans(collection): + """Test $avg ignores boolean values in group.""" + result = _group_avg_values(collection, [10, True, False, 30]) + assertSuccess( + result, + [{"_id": None, "avg": 20.0}], + msg="Boolean values should be ignored in group avg", + ) + + +def test_avg_group_ignores_arrays(collection): + """Test $avg ignores array values in group.""" + result = _group_avg_values(collection, [10, [1, 2, 3], 30]) + assertSuccess( + result, + [{"_id": None, "avg": 20.0}], + msg="Array values should be ignored in group avg", + ) + + +def test_avg_group_ignores_objects(collection): + """Test $avg ignores embedded document values in group.""" + result = _group_avg_values(collection, [10, {"nested": 99}, 30]) + assertSuccess( + result, + [{"_id": None, "avg": 20.0}], + msg="Object values should be ignored in group avg", + ) + + +def test_avg_group_all_non_numeric(collection): + """Test $avg returns null when all values are non-numeric.""" + result = _group_avg_values(collection, ["a", True, [1], {"x": 1}]) + assertSuccess( + result, + [{"_id": None, "avg": None}], + msg="All non-numeric values should return null", + ) + + +def test_avg_group_boolean_not_numeric(collection): + """Test $avg treats boolean as non-numeric (false != 0, true != 1).""" + result = _group_avg_values(collection, [False, True]) + assertSuccess( + result, + [{"_id": None, "avg": None}], + msg="Booleans should not be treated as 0/1 in avg", + ) + + +# --- Negative Zero in $group --- + + +def test_avg_group_negative_zero_double(collection): + """Test $avg normalizes double negative zero to positive zero.""" + result = _group_avg_values(collection, [DOUBLE_NEGATIVE_ZERO, DOUBLE_NEGATIVE_ZERO]) + assertSuccess( + result, + [{"_id": None, "avg": DOUBLE_ZERO}], + msg="Double -0.0 avg should normalize to 0.0", + ) + + +def test_avg_group_negative_zero_decimal128(collection): + """Test $avg normalizes Decimal128 negative zero to positive zero.""" + result = _group_avg_values(collection, [DECIMAL128_NEGATIVE_ZERO, DECIMAL128_NEGATIVE_ZERO]) + assertSuccess( + result, + [{"_id": None, "avg": DECIMAL128_ZERO}], + msg="Decimal128 -0 avg should normalize to 0", + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_pipeline_contexts.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_pipeline_contexts.py new file mode 100644 index 00000000..c5601f48 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_pipeline_contexts.py @@ -0,0 +1,643 @@ +""" +Tests for $avg in various pipeline contexts. + +Covers $group, $bucket, $setWindowFields, $project/$addFields, +$match+$expr, and pipeline interaction patterns. +""" + +from __future__ import annotations + +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.executor import execute_command + +# --- 14. Pipeline Contexts --- + +# -- $group with computed _id -- + + +def test_avg_group_computed_id(collection): + """Test $avg with computed _id expression in $group.""" + collection.insert_many( + [ + {"_id": 1, "value": 10, "score": 80}, + {"_id": 2, "value": 20, "score": 90}, + {"_id": 3, "value": 30, "score": 85}, + {"_id": 4, "value": 40, "score": 95}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + { + "$group": { + "_id": {"$gt": ["$score", 85]}, + "avg": {"$avg": "$value"}, + } + }, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + }, + ) + # score <= 85: docs 1,3 → avg(10,30) = 20 + # score > 85: docs 2,4 → avg(20,40) = 30 + assertSuccess( + result, + [ + {"_id": False, "avg": 20.0}, + {"_id": True, "avg": 30.0}, + ], + msg="$avg with computed _id should group and average correctly", + ) + + +# -- $bucket -- + + +def test_avg_bucket(collection): + """Test $avg in $bucket output specification.""" + collection.insert_many( + [ + {"_id": 1, "score": 15, "value": 10}, + {"_id": 2, "score": 25, "value": 20}, + {"_id": 3, "score": 35, "value": 30}, + {"_id": 4, "score": 45, "value": 40}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + { + "$bucket": { + "groupBy": "$score", + "boundaries": [0, 20, 40, 60], + "output": {"avg_value": {"$avg": "$value"}}, + } + }, + ], + "cursor": {}, + }, + ) + assertSuccess( + result, + [ + {"_id": 0, "avg_value": 10.0}, + {"_id": 20, "avg_value": 25.0}, + {"_id": 40, "avg_value": 40.0}, + ], + msg="$avg in $bucket should compute average per bucket", + ) + + +# -- $setWindowFields -- + + +def test_avg_window_unbounded(collection): + """Test $avg with unbounded window returns partition average.""" + collection.insert_many( + [ + {"_id": 1, "value": 10}, + {"_id": 2, "value": 20}, + {"_id": 3, "value": 30}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$sort": {"_id": 1}}, + { + "$setWindowFields": { + "sortBy": {"_id": 1}, + "output": { + "avg": { + "$avg": "$value", + "window": {"documents": ["unbounded", "unbounded"]}, + } + }, + } + }, + {"$project": {"_id": 1, "value": 1, "avg": 1}}, + ], + "cursor": {}, + }, + ) + assertSuccess( + result, + [ + {"_id": 1, "value": 10, "avg": 20.0}, + {"_id": 2, "value": 20, "avg": 20.0}, + {"_id": 3, "value": 30, "avg": 20.0}, + ], + msg="$avg with unbounded window should return full partition average", + ) + + +def test_avg_window_cumulative(collection): + """Test $avg with cumulative window [unbounded, current].""" + collection.insert_many( + [ + {"_id": 1, "value": 10}, + {"_id": 2, "value": 20}, + {"_id": 3, "value": 30}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$sort": {"_id": 1}}, + { + "$setWindowFields": { + "sortBy": {"_id": 1}, + "output": { + "avg": { + "$avg": "$value", + "window": {"documents": ["unbounded", "current"]}, + } + }, + } + }, + {"$project": {"_id": 1, "value": 1, "avg": 1}}, + ], + "cursor": {}, + }, + ) + assertSuccess( + result, + [ + {"_id": 1, "value": 10, "avg": 10.0}, + {"_id": 2, "value": 20, "avg": 15.0}, + {"_id": 3, "value": 30, "avg": 20.0}, + ], + msg="$avg with cumulative window should compute running average", + ) + + +def test_avg_window_sliding(collection): + """Test $avg with sliding window [-1, 1].""" + collection.insert_many( + [ + {"_id": 1, "value": 10}, + {"_id": 2, "value": 20}, + {"_id": 3, "value": 30}, + {"_id": 4, "value": 40}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$sort": {"_id": 1}}, + { + "$setWindowFields": { + "sortBy": {"_id": 1}, + "output": { + "avg": { + "$avg": "$value", + "window": {"documents": [-1, 1]}, + } + }, + } + }, + {"$project": {"_id": 1, "value": 1, "avg": 1}}, + ], + "cursor": {}, + }, + ) + assertSuccess( + result, + [ + {"_id": 1, "value": 10, "avg": 15.0}, # avg(10,20) + {"_id": 2, "value": 20, "avg": 20.0}, # avg(10,20,30) + {"_id": 3, "value": 30, "avg": 30.0}, # avg(20,30,40) + {"_id": 4, "value": 40, "avg": 35.0}, # avg(30,40) + ], + msg="$avg with sliding window should compute local average", + ) + + +def test_avg_window_current_only(collection): + """Test $avg with window [0, 0] returns current document value.""" + collection.insert_many( + [ + {"_id": 1, "value": 10}, + {"_id": 2, "value": 20}, + {"_id": 3, "value": 30}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$sort": {"_id": 1}}, + { + "$setWindowFields": { + "sortBy": {"_id": 1}, + "output": { + "avg": { + "$avg": "$value", + "window": {"documents": [0, 0]}, + } + }, + } + }, + {"$project": {"_id": 1, "value": 1, "avg": 1}}, + ], + "cursor": {}, + }, + ) + assertSuccess( + result, + [ + {"_id": 1, "value": 10, "avg": 10.0}, + {"_id": 2, "value": 20, "avg": 20.0}, + {"_id": 3, "value": 30, "avg": 30.0}, + ], + msg="$avg with [0,0] window should return current document value", + ) + + +def test_avg_window_with_nulls(collection): + """Test $avg in $setWindowFields ignores null values in window.""" + collection.insert_many( + [ + {"_id": 1, "value": 10}, + {"_id": 2, "value": None}, + {"_id": 3, "value": 30}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$sort": {"_id": 1}}, + { + "$setWindowFields": { + "sortBy": {"_id": 1}, + "output": { + "avg": { + "$avg": "$value", + "window": {"documents": ["unbounded", "unbounded"]}, + } + }, + } + }, + {"$project": {"_id": 1, "value": 1, "avg": 1}}, + ], + "cursor": {}, + }, + ) + assertSuccess( + result, + [ + {"_id": 1, "value": 10, "avg": 20.0}, + {"_id": 2, "value": None, "avg": 20.0}, + {"_id": 3, "value": 30, "avg": 20.0}, + ], + msg="$avg in window should ignore null values", + ) + + +# -- $project / $addFields context -- + + +def test_avg_in_project_array_literal(collection): + """Test $avg in $project with array of literal values.""" + result = execute_command( + collection, + { + "aggregate": 1, + "pipeline": [ + {"$documents": [{}]}, + {"$project": {"_id": 0, "avg": {"$avg": [10, 20, 30]}}}, + ], + "cursor": {}, + }, + ) + assertSuccess( + result, + [{"avg": 20.0}], + msg="$avg in $project with literal array should average values", + ) + + +def test_avg_in_addfields(collection): + """Test $avg in $addFields context.""" + collection.insert_many( + [ + {"_id": 1, "scores": [80, 90, 100]}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$addFields": {"avg_score": {"$avg": "$scores"}}}, + {"$project": {"_id": 0, "avg_score": 1}}, + ], + "cursor": {}, + }, + ) + assertSuccess( + result, + [{"avg_score": 90.0}], + msg="$avg in $addFields should traverse array field and average", + ) + + +def test_avg_in_match_expr(collection): + """Test $avg used inside $expr in $match stage.""" + collection.insert_many( + [ + {"_id": 1, "scores": [80, 90, 100]}, + {"_id": 2, "scores": [40, 50, 60]}, + {"_id": 3, "scores": [70, 80, 90]}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$match": {"$expr": {"$gt": [{"$avg": "$scores"}, 75]}}}, + {"$project": {"_id": 1}}, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + }, + ) + # avg([80,90,100])=90 > 75 ✓, avg([40,50,60])=50 < 75 ✗, avg([70,80,90])=80 > 75 ✓ + assertSuccess( + result, + [{"_id": 1}, {"_id": 3}], + msg="$avg in $match $expr should filter based on computed average", + ) + + +# --- 19. Pipeline Interaction --- + + +def test_avg_bucketauto(collection): + """Test $avg in $bucketAuto output specification.""" + collection.insert_many( + [ + {"_id": 1, "score": 10, "value": 100}, + {"_id": 2, "score": 20, "value": 200}, + {"_id": 3, "score": 30, "value": 300}, + {"_id": 4, "score": 40, "value": 400}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + { + "$bucketAuto": { + "groupBy": "$score", + "buckets": 2, + "output": {"avg_value": {"$avg": "$value"}}, + } + }, + ], + "cursor": {}, + }, + ) + assertSuccess( + result, + [ + {"_id": {"min": 10, "max": 30}, "avg_value": 150.0}, + {"_id": {"min": 30, "max": 40}, "avg_value": 350.0}, + ], + msg="$avg in $bucketAuto should compute average per auto-bucket", + ) + + +def test_avg_window_range_based(collection): + """Test $avg with range-based window on numeric sort key.""" + collection.insert_many( + [ + {"_id": 1, "pos": 0, "value": 10}, + {"_id": 2, "pos": 5, "value": 20}, + {"_id": 3, "pos": 10, "value": 30}, + {"_id": 4, "pos": 15, "value": 40}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$sort": {"pos": 1}}, + { + "$setWindowFields": { + "sortBy": {"pos": 1}, + "output": { + "avg": { + "$avg": "$value", + "window": {"range": [-5, 5]}, + } + }, + } + }, + {"$project": {"_id": 1, "pos": 1, "value": 1, "avg": 1}}, + ], + "cursor": {}, + }, + ) + # pos=0: range [-5,5] includes pos 0,5 → avg(10,20)=15 + # pos=5: range [0,10] includes pos 0,5,10 → avg(10,20,30)=20 + # pos=10: range [5,15] includes pos 5,10,15 → avg(20,30,40)=30 + # pos=15: range [10,20] includes pos 10,15 → avg(30,40)=35 + assertSuccess( + result, + [ + {"_id": 1, "pos": 0, "value": 10, "avg": 15.0}, + {"_id": 2, "pos": 5, "value": 20, "avg": 20.0}, + {"_id": 3, "pos": 10, "value": 30, "avg": 30.0}, + {"_id": 4, "pos": 15, "value": 40, "avg": 35.0}, + ], + msg="$avg with range-based window should compute average within range", + ) + + +def test_avg_window_multiple_partitions(collection): + """Test $avg in $setWindowFields with multiple partitions of different sizes.""" + collection.insert_many( + [ + {"_id": 1, "group": "A", "value": 10}, + {"_id": 2, "group": "A", "value": 20}, + {"_id": 3, "group": "A", "value": 30}, + {"_id": 4, "group": "B", "value": 100}, + {"_id": 5, "group": "B", "value": 200}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$sort": {"_id": 1}}, + { + "$setWindowFields": { + "partitionBy": "$group", + "sortBy": {"_id": 1}, + "output": { + "avg": { + "$avg": "$value", + "window": {"documents": ["unbounded", "unbounded"]}, + } + }, + } + }, + {"$project": {"_id": 1, "group": 1, "avg": 1}}, + ], + "cursor": {}, + }, + ) + assertSuccess( + result, + [ + {"_id": 1, "group": "A", "avg": 20.0}, + {"_id": 2, "group": "A", "avg": 20.0}, + {"_id": 3, "group": "A", "avg": 20.0}, + {"_id": 4, "group": "B", "avg": 150.0}, + {"_id": 5, "group": "B", "avg": 150.0}, + ], + msg="$avg should compute independent averages per partition", + ) + + +def test_avg_group_after_unwind(collection): + """Test $avg in $group after $unwind averages unwound values.""" + collection.insert_many( + [ + {"_id": 1, "category": "A", "values": [10, 20]}, + {"_id": 2, "category": "A", "values": [30]}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$unwind": "$values"}, + {"$group": {"_id": "$category", "avg": {"$avg": "$values"}}}, + ], + "cursor": {}, + }, + ) + # Unwound: 10, 20, 30 → avg = 20 + assertSuccess( + result, + [{"_id": "A", "avg": 20.0}], + msg="$avg after $unwind should average all unwound values", + ) + + +def test_avg_group_after_match(collection): + """Test $avg in $group after $match filters documents.""" + collection.insert_many( + [ + {"_id": 1, "category": "A", "value": 10, "active": True}, + {"_id": 2, "category": "A", "value": 20, "active": False}, + {"_id": 3, "category": "A", "value": 30, "active": True}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$match": {"active": True}}, + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + ], + "cursor": {}, + }, + ) + # Only active docs: avg(10, 30) = 20 + assertSuccess( + result, + [{"_id": "A", "avg": 20.0}], + msg="$avg after $match should only average filtered documents", + ) + + +def test_avg_in_project_after_group(collection): + """Test $avg in $project after $group uses grouped results.""" + collection.insert_many( + [ + {"_id": 1, "category": "A", "value": 10}, + {"_id": 2, "category": "A", "value": 20}, + {"_id": 3, "category": "B", "value": 30}, + {"_id": 4, "category": "B", "value": 40}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + { + "$group": { + "_id": "$category", + "sum": {"$sum": "$value"}, + "count": {"$sum": 1}, + } + }, + {"$sort": {"_id": 1}}, + { + "$project": { + "_id": 1, + "manual_avg": {"$divide": ["$sum", "$count"]}, + } + }, + ], + "cursor": {}, + }, + ) + assertSuccess( + result, + [ + {"_id": "A", "manual_avg": 15.0}, + {"_id": "B", "manual_avg": 35.0}, + ], + msg="Manual average via $divide after $group should work", + ) + + +def test_avg_group_after_project_rename(collection): + """Test $avg in $group after $project that renames fields.""" + collection.insert_many( + [ + {"_id": 1, "cat": "A", "val": 10}, + {"_id": 2, "cat": "A", "val": 20}, + ] + ) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$project": {"category": "$cat", "value": "$val"}}, + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + ], + "cursor": {}, + }, + ) + assertSuccess( + result, + [{"_id": "A", "avg": 15.0}], + msg="$avg should work on renamed fields from $project", + ) diff --git a/documentdb_tests/framework/error_codes.py b/documentdb_tests/framework/error_codes.py index 53315bbb..d16c9634 100644 --- a/documentdb_tests/framework/error_codes.py +++ b/documentdb_tests/framework/error_codes.py @@ -354,6 +354,8 @@ PLAN_CACHE_STATS_ALL_HOSTS_NOT_SHARDED_ERROR = 4503200 ACCUMULATOR_NULL_FUNCTION_ERROR = 4544702 ACCUMULATOR_MISSING_ACCUMULATE_ARGS_ERROR = 4544710 +DIVIDE_BY_ZERO_V2_ERROR = 4848401 +MODULO_BY_ZERO_V2_ERROR = 4848403 ARRAY_TO_OBJECT_NULL_BYTE_PAIR_KEY_ERROR = 4940400 ARRAY_TO_OBJECT_NULL_BYTE_KV_KEY_ERROR = 4940401 SKIP_INVALID_ARGUMENT_ERROR = 5107200 From 0fa625a39e82cbbbb1a512738bd805dbd36efb5f Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Thu, 14 May 2026 16:51:44 -0700 Subject: [PATCH 2/9] copied AccumulatorTestCase from sum branch Signed-off-by: Alina (Xi) Li --- .../core/operator/accumulators/utils/__init__.py | 3 +++ .../accumulators/utils/accumulator_test_case.py | 16 ++++++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/utils/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/utils/accumulator_test_case.py diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/utils/__init__.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/utils/__init__.py new file mode 100644 index 00000000..544a6ec5 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/utils/__init__.py @@ -0,0 +1,3 @@ +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils.accumulator_test_case import ( # noqa: E501, F401 + AccumulatorTestCase, +) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/utils/accumulator_test_case.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/utils/accumulator_test_case.py new file mode 100644 index 00000000..f21a9f62 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/utils/accumulator_test_case.py @@ -0,0 +1,16 @@ +"""Shared test case for accumulator tests.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +from documentdb_tests.framework.test_case import BaseTestCase + + +@dataclass(frozen=True) +class AccumulatorTestCase(BaseTestCase): + """Test case for accumulator tests.""" + + docs: list[dict] | None = None + pipeline: list[dict[str, Any]] | None = None From 3cf606b2278845c0ae805eb1e5345f7dc2143c48 Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Thu, 14 May 2026 17:17:32 -0700 Subject: [PATCH 3/9] convert tests to use Signed-off-by: Alina (Xi) Li --- .../accumulators/avg/test_accumulator_avg.py | 924 +++++++++++------- .../accumulators/avg/test_avg_field_lookup.py | 428 +++----- .../avg/test_avg_group_boundaries.py | 290 +++--- .../avg/test_avg_group_context.py | 580 +++++------ .../accumulators/avg/test_avg_group_types.py | 661 ++++++------- .../avg/test_avg_pipeline_contexts.py | 877 +++++++---------- 6 files changed, 1781 insertions(+), 1979 deletions(-) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg.py index 53d6d2b4..857013c2 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg.py @@ -2,18 +2,16 @@ from __future__ import annotations -from dataclasses import dataclass +import math from datetime import datetime, timezone -from typing import Any import pytest from bson import Binary, Code, Decimal128, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp -from documentdb_tests.framework.assertions import ( - assertFailureCode, - assertResult, - assertSuccess, +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, ) +from documentdb_tests.framework.assertions import assertResult, assertSuccess from documentdb_tests.framework.error_codes import ( CONVERSION_FAILURE_ERROR, DIVIDE_BY_ZERO_V2_ERROR, @@ -22,7 +20,6 @@ ) from documentdb_tests.framework.executor import execute_command from documentdb_tests.framework.parametrize import pytest_params -from documentdb_tests.framework.test_case import BaseTestCase from documentdb_tests.framework.test_constants import ( DECIMAL128_INFINITY, DECIMAL128_INT64_OVERFLOW, @@ -56,80 +53,98 @@ INT64_ZERO, ) - -@dataclass(frozen=True) -class AvgAccumulatorTest(BaseTestCase): - """Test case for $avg accumulator.""" - - docs: list[dict] | None = None - expression: Any = None - - # Property [Null and Missing Ignored]: null values, missing fields, and # $$REMOVE are treated as non-numeric and excluded from both the sum and # count, producing null when no numeric values remain. -AVG_NULL_MISSING_TESTS: list[AvgAccumulatorTest] = [ - AvgAccumulatorTest( +AVG_NULL_MISSING_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( "null_all_null", docs=[{"v": None}, {"v": None}], - expression="$v", - expected=None, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], msg="$avg should return null when all values in the group are null", ), - AvgAccumulatorTest( + AccumulatorTestCase( "null_all_missing", docs=[{"x": 1}, {"x": 2}], - expression="$v", - expected=None, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], msg="$avg should return null when all values reference missing fields", ), - AvgAccumulatorTest( + AccumulatorTestCase( "null_single_null", docs=[{"v": None}], - expression="$v", - expected=None, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], msg="$avg should return null when the only value is null", ), - AvgAccumulatorTest( + AccumulatorTestCase( "null_single_missing", docs=[{"x": 1}], - expression="$v", - expected=None, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], msg="$avg should return null when the only value is a missing field", ), - AvgAccumulatorTest( + AccumulatorTestCase( "null_mixed_null_and_missing", docs=[{"v": None}, {"x": 1}], - expression="$v", - expected=None, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], msg="$avg should return null when values are a mix of null and missing", ), - AvgAccumulatorTest( + AccumulatorTestCase( "null_with_numerics", docs=[{"v": None}, {"v": 10}, {"v": 20}], - expression="$v", - expected=15.0, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": 15.0}], msg="$avg should exclude null from both sum and count", ), - AvgAccumulatorTest( + AccumulatorTestCase( "null_missing_with_numerics", docs=[{"x": 1}, {"v": 10}, {"v": 20}], - expression="$v", - expected=15.0, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": 15.0}], msg="$avg should exclude missing fields from both sum and count", ), - AvgAccumulatorTest( + AccumulatorTestCase( "null_mixed_null_missing_with_numerics", docs=[{"v": None}, {"x": 1}, {"v": 30}], - expression="$v", - expected=30.0, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": 30.0}], msg="$avg should exclude both null and missing, averaging only numerics", ), - AvgAccumulatorTest( + AccumulatorTestCase( "null_remove_only", docs=[{"v": 5}], - expression={"$cond": [False, 1, "$$REMOVE"]}, - expected=None, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": {"$cond": [False, 1, "$$REMOVE"]}}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], msg="$avg should treat $$REMOVE as missing and return null", ), ] @@ -137,141 +152,198 @@ class AvgAccumulatorTest(BaseTestCase): # Property [Non-Numeric Types Ignored]: all non-numeric BSON types are # silently ignored and excluded from both sum and count, producing null # when no numeric values remain. -AVG_NON_NUMERIC_TESTS: list[AvgAccumulatorTest] = [ - AvgAccumulatorTest( +AVG_NON_NUMERIC_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( "non_numeric_string", docs=[{"v": "hello"}, {"v": "world"}], - expression="$v", - expected=None, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], msg="$avg should ignore string values and return null", ), - AvgAccumulatorTest( + AccumulatorTestCase( "non_numeric_boolean_true", docs=[{"v": True}, {"v": True}], - expression="$v", - expected=None, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], msg="$avg should ignore boolean true without coercing to numeric", ), - AvgAccumulatorTest( + AccumulatorTestCase( "non_numeric_boolean_false", docs=[{"v": False}, {"v": False}], - expression="$v", - expected=None, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], msg="$avg should ignore boolean false without coercing to numeric", ), - AvgAccumulatorTest( + AccumulatorTestCase( "non_numeric_object", docs=[{"v": {"x": 1}}, {"v": {"y": 2}}], - expression="$v", - expected=None, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], msg="$avg should ignore plain objects", ), - AvgAccumulatorTest( + AccumulatorTestCase( "non_numeric_empty_object", docs=[{"v": {}}, {"v": {}}], - expression="$v", - expected=None, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], msg="$avg should ignore empty objects", ), - AvgAccumulatorTest( + AccumulatorTestCase( "non_numeric_objectid", docs=[{"v": ObjectId()}, {"v": ObjectId()}], - expression="$v", - expected=None, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], msg="$avg should ignore ObjectId values", ), - AvgAccumulatorTest( + AccumulatorTestCase( "non_numeric_datetime", docs=[ {"v": datetime(2023, 1, 1, tzinfo=timezone.utc)}, {"v": datetime(2024, 1, 1, tzinfo=timezone.utc)}, ], - expression="$v", - expected=None, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], msg="$avg should ignore datetime values", ), - AvgAccumulatorTest( + AccumulatorTestCase( "non_numeric_timestamp", docs=[{"v": Timestamp(1, 1)}, {"v": Timestamp(2, 1)}], - expression="$v", - expected=None, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], msg="$avg should ignore Timestamp values", ), - AvgAccumulatorTest( + AccumulatorTestCase( "non_numeric_binary", docs=[{"v": Binary(b"\x01")}, {"v": Binary(b"\x02")}], - expression="$v", - expected=None, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], msg="$avg should ignore Binary values", ), - AvgAccumulatorTest( + AccumulatorTestCase( "non_numeric_regex", docs=[{"v": Regex("abc")}, {"v": Regex("def")}], - expression="$v", - expected=None, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], msg="$avg should ignore Regex values", ), - AvgAccumulatorTest( + AccumulatorTestCase( "non_numeric_code", docs=[{"v": Code("x")}, {"v": Code("y")}], - expression="$v", - expected=None, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], msg="$avg should ignore Code values", ), - AvgAccumulatorTest( + AccumulatorTestCase( "non_numeric_minkey", docs=[{"v": MinKey()}, {"v": MinKey()}], - expression="$v", - expected=None, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], msg="$avg should ignore MinKey values", ), - AvgAccumulatorTest( + AccumulatorTestCase( "non_numeric_maxkey", docs=[{"v": MaxKey()}, {"v": MaxKey()}], - expression="$v", - expected=None, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], msg="$avg should ignore MaxKey values", ), - AvgAccumulatorTest( + AccumulatorTestCase( "non_numeric_array", docs=[{"v": [1, 2, 3]}, {"v": [4, 5]}], - expression="$v", - expected=None, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], msg="$avg should ignore arrays without unwrapping", ), - AvgAccumulatorTest( + AccumulatorTestCase( "non_numeric_single_element_array", docs=[{"v": [42]}, {"v": [7]}], - expression="$v", - expected=None, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], msg="$avg should not unwrap single-element numeric arrays", ), - AvgAccumulatorTest( + AccumulatorTestCase( "non_numeric_empty_array", docs=[{"v": []}, {"v": []}], - expression="$v", - expected=None, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], msg="$avg should ignore empty arrays", ), - AvgAccumulatorTest( + AccumulatorTestCase( "non_numeric_nested_array", docs=[{"v": [[1, 2]]}, {"v": [[3]]}], - expression="$v", - expected=None, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], msg="$avg should ignore nested arrays", ), - AvgAccumulatorTest( + AccumulatorTestCase( "non_numeric_mixed_with_numerics", docs=[{"v": "hello"}, {"v": 10}, {"v": True}, {"v": 20}, {"v": [5]}], - expression="$v", - expected=15.0, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": 15.0}], msg="$avg should compute average only over numeric values, ignoring non-numerics", ), - AvgAccumulatorTest( + AccumulatorTestCase( "non_numeric_array_from_expression", docs=[{"v": 1}], - expression={"$literal": [1, 2, 3]}, - expected=None, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": {"$literal": [1, 2, 3]}}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], msg="$avg should treat array expressions as non-numeric", ), ] @@ -279,89 +351,125 @@ class AvgAccumulatorTest(BaseTestCase): # Property [Special Numeric Values]: NaN is numeric and propagates to the # result, Infinity dominates finite values, Infinity + -Infinity produces # NaN, and negative zero is not preserved. -AVG_SPECIAL_NUMERIC_TESTS: list[AvgAccumulatorTest] = [ - AvgAccumulatorTest( +AVG_SPECIAL_NUMERIC_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( "special_nan_propagates", docs=[{"v": FLOAT_NAN}, {"v": 5.0}], - expression="$v", - expected=pytest.approx(FLOAT_NAN, nan_ok=True), + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": pytest.approx(math.nan, nan_ok=True)}], msg="$avg should return NaN when any value is NaN", ), - AvgAccumulatorTest( + AccumulatorTestCase( "special_nan_over_infinity", docs=[{"v": FLOAT_NAN}, {"v": FLOAT_INFINITY}], - expression="$v", - expected=pytest.approx(FLOAT_NAN, nan_ok=True), + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": pytest.approx(math.nan, nan_ok=True)}], msg="$avg should return NaN when group contains both NaN and Infinity", ), - AvgAccumulatorTest( + AccumulatorTestCase( "special_infinity_dominates", docs=[{"v": FLOAT_INFINITY}, {"v": 5.0}], - expression="$v", - expected=FLOAT_INFINITY, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": FLOAT_INFINITY}], msg="$avg should return Infinity when Infinity dominates finite values", ), - AvgAccumulatorTest( + AccumulatorTestCase( "special_neg_infinity_dominates", docs=[{"v": FLOAT_NEGATIVE_INFINITY}, {"v": 5.0}], - expression="$v", - expected=FLOAT_NEGATIVE_INFINITY, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": FLOAT_NEGATIVE_INFINITY}], msg="$avg should return -Infinity when -Infinity dominates finite values", ), - AvgAccumulatorTest( + AccumulatorTestCase( "special_inf_plus_neg_inf", docs=[{"v": FLOAT_INFINITY}, {"v": FLOAT_NEGATIVE_INFINITY}], - expression="$v", - expected=pytest.approx(FLOAT_NAN, nan_ok=True), + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": pytest.approx(math.nan, nan_ok=True)}], msg="$avg should return NaN when group contains Infinity and -Infinity", ), - AvgAccumulatorTest( + AccumulatorTestCase( "special_neg_zero_not_preserved", docs=[{"v": DOUBLE_NEGATIVE_ZERO}], - expression="$v", - expected=DOUBLE_ZERO, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_ZERO}], msg="$avg should not preserve negative zero", ), - AvgAccumulatorTest( + AccumulatorTestCase( "special_decimal_neg_zero_not_preserved", docs=[{"v": DECIMAL128_NEGATIVE_ZERO}], - expression="$v", - expected=DECIMAL128_ZERO, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DECIMAL128_ZERO}], msg="$avg should not preserve Decimal128 negative zero", ), - AvgAccumulatorTest( + AccumulatorTestCase( "special_decimal_nan_propagates", docs=[{"v": DECIMAL128_NAN}, {"v": Decimal128("5")}], - expression="$v", - expected=Decimal128("NaN"), + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": Decimal128("NaN")}], msg="$avg should return Decimal128 NaN when any Decimal128 value is NaN", ), - AvgAccumulatorTest( + AccumulatorTestCase( "special_decimal_nan_over_infinity", docs=[{"v": DECIMAL128_NAN}, {"v": DECIMAL128_INFINITY}], - expression="$v", - expected=Decimal128("NaN"), + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": Decimal128("NaN")}], msg="$avg should return Decimal128 NaN when group contains Decimal128 NaN and Infinity", ), - AvgAccumulatorTest( + AccumulatorTestCase( "special_decimal_infinity", docs=[{"v": DECIMAL128_INFINITY}, {"v": Decimal128("5")}], - expression="$v", - expected=DECIMAL128_INFINITY, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DECIMAL128_INFINITY}], msg="$avg should return Decimal128 Infinity when Decimal128 Infinity is present", ), - AvgAccumulatorTest( + AccumulatorTestCase( "special_decimal_neg_infinity_dominates", docs=[{"v": DECIMAL128_NEGATIVE_INFINITY}, {"v": Decimal128("5")}], - expression="$v", - expected=DECIMAL128_NEGATIVE_INFINITY, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DECIMAL128_NEGATIVE_INFINITY}], msg="$avg should return Decimal128 -Infinity when Decimal128 -Infinity dominates", ), - AvgAccumulatorTest( + AccumulatorTestCase( "special_decimal_inf_plus_neg_inf", docs=[{"v": DECIMAL128_INFINITY}, {"v": DECIMAL128_NEGATIVE_INFINITY}], - expression="$v", - expected=Decimal128("NaN"), + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": Decimal128("NaN")}], msg="$avg should return Decimal128 NaN for Decimal128 Infinity + -Infinity", ), ] @@ -369,167 +477,233 @@ class AvgAccumulatorTest(BaseTestCase): # Property [Integer Boundaries]: int32 boundary values produce exact double # results, and int64 boundary values produce double results with potential # precision loss. -AVG_INTEGER_BOUNDARY_TESTS: list[AvgAccumulatorTest] = [ - AvgAccumulatorTest( +AVG_INTEGER_BOUNDARY_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( "int_boundary_int32_zeros", docs=[{"v": 0}, {"v": 0}], - expression="$v", - expected=DOUBLE_ZERO, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_ZERO}], msg="$avg should return 0.0 for two int32 zeros", ), - AvgAccumulatorTest( + AccumulatorTestCase( "int_boundary_int32_one_neg_one", docs=[{"v": 1}, {"v": -1}], - expression="$v", - expected=DOUBLE_ZERO, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_ZERO}], msg="$avg should return 0.0 for int32 1 and -1", ), - AvgAccumulatorTest( + AccumulatorTestCase( "int_boundary_int32_max", docs=[{"v": INT32_MAX}, {"v": 0}], - expression="$v", - expected=1_073_741_823.5, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": 1_073_741_823.5}], msg="$avg should handle int32 MAX correctly", ), - AvgAccumulatorTest( + AccumulatorTestCase( "int_boundary_int32_min", docs=[{"v": INT32_MIN}, {"v": INT32_MIN}], - expression="$v", - expected=float(INT32_MIN), + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": float(INT32_MIN)}], msg="$avg should handle int32 MIN correctly", ), - AvgAccumulatorTest( + AccumulatorTestCase( "int_boundary_int32_max_and_min", docs=[{"v": INT32_MAX}, {"v": INT32_MIN}], - expression="$v", - expected=-0.5, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": -0.5}], msg="$avg should handle int32 MAX and MIN together", ), - AvgAccumulatorTest( + AccumulatorTestCase( "int_boundary_int32_adjacent_max", docs=[{"v": INT32_MAX_MINUS_1}, {"v": INT32_MAX}], - expression="$v", - expected=2_147_483_646.5, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": 2_147_483_646.5}], msg="$avg of adjacent int32 MAX values should produce exact double", ), - AvgAccumulatorTest( + AccumulatorTestCase( "int_boundary_int32_adjacent_min", docs=[{"v": INT32_MIN}, {"v": INT32_MIN + 1}], - expression="$v", - expected=-2_147_483_647.5, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": -2_147_483_647.5}], msg="$avg of adjacent int32 MIN values should produce exact double", ), - AvgAccumulatorTest( + AccumulatorTestCase( "int_boundary_int64_max", docs=[{"v": INT64_MAX}, {"v": INT64_ZERO}], - expression="$v", - expected=DOUBLE_FROM_INT64_MAX / 2, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_FROM_INT64_MAX / 2}], msg="$avg should handle int64 MAX with precision loss in double", ), - AvgAccumulatorTest( + AccumulatorTestCase( "int_boundary_int64_min", docs=[{"v": INT64_MIN}, {"v": INT64_MIN}], - expression="$v", - expected=-DOUBLE_FROM_INT64_MAX, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": -DOUBLE_FROM_INT64_MAX}], msg="$avg should handle int64 MIN with precision loss in double", ), - AvgAccumulatorTest( + AccumulatorTestCase( "int_boundary_int64_max_and_min", docs=[{"v": INT64_MAX}, {"v": INT64_MIN}], - expression="$v", - expected=-0.5, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": -0.5}], msg="$avg should handle int64 MAX and MIN together", ), - AvgAccumulatorTest( + AccumulatorTestCase( "int_boundary_int64_adjacent_max", docs=[{"v": INT64_MAX_MINUS_1}, {"v": INT64_MAX}], - expression="$v", - expected=DOUBLE_FROM_INT64_MAX, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_FROM_INT64_MAX}], msg="$avg of adjacent int64 MAX values should produce double with precision loss", ), - AvgAccumulatorTest( + AccumulatorTestCase( "int_boundary_int64_adjacent_min", docs=[{"v": INT64_MIN_PLUS_1}, {"v": INT64_MIN}], - expression="$v", - expected=-DOUBLE_FROM_INT64_MAX, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": -DOUBLE_FROM_INT64_MAX}], msg="$avg of adjacent int64 MIN values should produce double with precision loss", ), ] # Property [Float Boundaries]: subnormal, minimum normal, maximum finite, # near-precision-limit, and whole-number double values are averaged correctly. -AVG_FLOAT_BOUNDARY_TESTS: list[AvgAccumulatorTest] = [ - AvgAccumulatorTest( +AVG_FLOAT_BOUNDARY_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( "float_boundary_whole_number", docs=[{"v": 3.0}, {"v": 5.0}], - expression="$v", - expected=4.0, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": 4.0}], msg="$avg should produce correct average for whole-number floats", ), - AvgAccumulatorTest( + AccumulatorTestCase( "float_boundary_subnormal_positive", docs=[{"v": DOUBLE_MIN_SUBNORMAL}], - expression="$v", - expected=DOUBLE_MIN_SUBNORMAL, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_MIN_SUBNORMAL}], msg="$avg should handle positive subnormal value correctly", ), - AvgAccumulatorTest( + AccumulatorTestCase( "float_boundary_subnormal_negative", docs=[{"v": DOUBLE_MIN_NEGATIVE_SUBNORMAL}], - expression="$v", - expected=DOUBLE_MIN_NEGATIVE_SUBNORMAL, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_MIN_NEGATIVE_SUBNORMAL}], msg="$avg should handle negative subnormal value correctly", ), - AvgAccumulatorTest( + AccumulatorTestCase( "float_boundary_subnormal_avg", docs=[{"v": DOUBLE_MIN_SUBNORMAL}, {"v": DOUBLE_MIN_SUBNORMAL}], - expression="$v", - expected=DOUBLE_MIN_SUBNORMAL, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_MIN_SUBNORMAL}], msg="$avg of two identical subnormal values should return that value", ), - AvgAccumulatorTest( + AccumulatorTestCase( "float_boundary_min_normal", docs=[{"v": DOUBLE_MIN_NORMAL}], - expression="$v", - expected=DOUBLE_MIN_NORMAL, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_MIN_NORMAL}], msg="$avg should handle smallest positive normal double correctly", ), - AvgAccumulatorTest( + AccumulatorTestCase( "float_boundary_max_single", docs=[{"v": DOUBLE_MAX}], - expression="$v", - expected=DOUBLE_MAX, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_MAX}], msg="$avg should handle DBL_MAX as a single value correctly", ), - AvgAccumulatorTest( + AccumulatorTestCase( "float_boundary_max_safe_integer", docs=[{"v": float(DOUBLE_MAX_SAFE_INTEGER)}], - expression="$v", - expected=float(DOUBLE_MAX_SAFE_INTEGER), + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": float(DOUBLE_MAX_SAFE_INTEGER)}], msg="$avg should handle max safe integer value correctly", ), - AvgAccumulatorTest( + AccumulatorTestCase( "float_boundary_max_safe_integer_avg", docs=[ {"v": float(DOUBLE_MAX_SAFE_INTEGER)}, {"v": float(DOUBLE_MAX_SAFE_INTEGER)}, ], - expression="$v", - expected=float(DOUBLE_MAX_SAFE_INTEGER), + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": float(DOUBLE_MAX_SAFE_INTEGER)}], msg="$avg of two max safe integer values should return that value", ), - AvgAccumulatorTest( + AccumulatorTestCase( "float_boundary_near_min", docs=[{"v": DOUBLE_NEAR_MIN}, {"v": DOUBLE_NEAR_MIN}], - expression="$v", - expected=DOUBLE_NEAR_MIN, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_NEAR_MIN}], msg="$avg should handle values near minimum normal correctly", ), - AvgAccumulatorTest( + AccumulatorTestCase( "float_boundary_near_max_single", docs=[{"v": DOUBLE_NEAR_MAX}], - expression="$v", - expected=DOUBLE_NEAR_MAX, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_NEAR_MAX}], msg="$avg should handle values near maximum finite correctly", ), ] @@ -537,77 +711,104 @@ class AvgAccumulatorTest(BaseTestCase): # Property [Decimal128 Behavior]: full 34-digit precision and trailing zeros # are preserved, subnormal and near-maximum values are handled correctly, and # values exceeding int64 range produce Decimal128 results. -AVG_DECIMAL128_TESTS: list[AvgAccumulatorTest] = [ - AvgAccumulatorTest( +AVG_DECIMAL128_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( "decimal128_full_precision", docs=[ {"v": Decimal128("1.000000000000000000000000000000001")}, {"v": Decimal128("1.000000000000000000000000000000003")}, ], - expression="$v", - expected=Decimal128("1.000000000000000000000000000000002"), + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": Decimal128("1.000000000000000000000000000000002")}], msg="$avg should preserve full 34-digit Decimal128 precision", ), - AvgAccumulatorTest( + AccumulatorTestCase( "decimal128_34_digit_integer", docs=[ {"v": Decimal128("1234567890123456789012345678901234")}, {"v": Decimal128("1234567890123456789012345678901234")}, ], - expression="$v", - expected=Decimal128("1234567890123456789012345678901234"), + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": Decimal128("1234567890123456789012345678901234")}], msg="$avg should preserve 34-digit integer Decimal128 values", ), - AvgAccumulatorTest( + AccumulatorTestCase( "decimal128_trailing_zeros", docs=[{"v": Decimal128("2.00")}, {"v": Decimal128("4.00")}], - expression="$v", - expected=Decimal128("3.00"), + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": Decimal128("3.00")}], msg="$avg should preserve trailing zeros in Decimal128 results", ), - AvgAccumulatorTest( + AccumulatorTestCase( "decimal128_trailing_zeros_single_digit", docs=[{"v": DECIMAL128_TRAILING_ZERO}, {"v": Decimal128("3.0")}], - expression="$v", - expected=Decimal128("2.0"), + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": Decimal128("2.0")}], msg="$avg should preserve single trailing zero in Decimal128 results", ), - AvgAccumulatorTest( + AccumulatorTestCase( "decimal128_subnormal", docs=[{"v": DECIMAL128_MIN_POSITIVE}, {"v": DECIMAL128_MIN_POSITIVE}], - expression="$v", - expected=DECIMAL128_MIN_POSITIVE, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DECIMAL128_MIN_POSITIVE}], msg="$avg should handle Decimal128 subnormal values correctly", ), - AvgAccumulatorTest( + AccumulatorTestCase( "decimal128_subnormal_single", docs=[{"v": DECIMAL128_MIN_POSITIVE}], - expression="$v", - expected=DECIMAL128_MIN_POSITIVE, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DECIMAL128_MIN_POSITIVE}], msg="$avg should handle a single Decimal128 subnormal value", ), - AvgAccumulatorTest( + AccumulatorTestCase( "decimal128_near_max_single", docs=[{"v": DECIMAL128_MAX}], - expression="$v", - expected=DECIMAL128_MAX, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DECIMAL128_MAX}], msg="$avg should handle a single near-maximum Decimal128 value", ), - AvgAccumulatorTest( + AccumulatorTestCase( "decimal128_near_max_with_small", docs=[{"v": DECIMAL128_MAX}, {"v": Decimal128("1")}], - expression="$v", - expected=Decimal128("5.000000000000000000000000000000000E+6144"), + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": Decimal128("5.000000000000000000000000000000000E+6144")}], msg="$avg should handle near-maximum Decimal128 averaged with a small value", ), - AvgAccumulatorTest( + AccumulatorTestCase( "decimal128_exceeds_int64", docs=[ {"v": DECIMAL128_INT64_OVERFLOW}, {"v": DECIMAL128_INT64_OVERFLOW}, ], - expression="$v", - expected=DECIMAL128_INT64_OVERFLOW, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DECIMAL128_INT64_OVERFLOW}], msg="$avg should produce Decimal128 for values exceeding int64 range", ), ] @@ -615,52 +816,70 @@ class AvgAccumulatorTest(BaseTestCase): # Property [Overflow]: sum overflow during accumulation produces Infinity for # doubles and Decimal128, and int32/int64 overflow is handled via type # promotion without error. -AVG_OVERFLOW_TESTS: list[AvgAccumulatorTest] = [ - AvgAccumulatorTest( +AVG_OVERFLOW_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( "overflow_double_max", docs=[{"v": DOUBLE_MAX}, {"v": DOUBLE_MAX}], - expression="$v", - expected=FLOAT_INFINITY, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": FLOAT_INFINITY}], msg="$avg should return Infinity when two DBL_MAX values overflow the sum", ), - AvgAccumulatorTest( + AccumulatorTestCase( "overflow_decimal128_max", docs=[{"v": DECIMAL128_MAX}, {"v": DECIMAL128_MAX}], - expression="$v", - expected=DECIMAL128_INFINITY, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DECIMAL128_INFINITY}], msg="$avg should return Decimal128 Infinity when two Decimal128 max values overflow", ), - AvgAccumulatorTest( + AccumulatorTestCase( "overflow_int32_sum", docs=[{"v": INT32_MAX}, {"v": INT32_MAX}, {"v": INT32_MAX}], - expression="$v", - expected=float(INT32_MAX), + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": float(INT32_MAX)}], msg="$avg should handle int32 sum overflow via type promotion without error", ), - AvgAccumulatorTest( + AccumulatorTestCase( "overflow_int64_sum", docs=[{"v": INT64_MAX}, {"v": INT64_MAX}, {"v": INT64_MAX}], - expression="$v", - expected=DOUBLE_FROM_INT64_MAX, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_FROM_INT64_MAX}], msg="$avg should handle int64 sum overflow by converting to double", ), ] # Property [Expression Arguments]: $avg accepts any expression as its operand, # evaluating it per-document before accumulation. -AVG_EXPRESSION_ARGS_TESTS: list[AvgAccumulatorTest] = [ - AvgAccumulatorTest( +AVG_EXPRESSION_ARGS_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( "expr_constant_literal", docs=[{"x": 1}, {"x": 2}, {"x": 3}], - expression=5, - expected=5.0, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": 5}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": 5.0}], msg="$avg should return the constant value when expression is a numeric literal", ), - AvgAccumulatorTest( + AccumulatorTestCase( "expr_nested_add", docs=[{"a": 2, "b": 3}, {"a": 4, "b": 6}], - expression={"$add": ["$a", "$b"]}, - expected=7.5, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": {"$add": ["$a", "$b"]}}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": 7.5}], msg="$avg should evaluate nested $add expression per-document before averaging", ), ] @@ -668,26 +887,35 @@ class AvgAccumulatorTest(BaseTestCase): # Property [Edge Cases]: a single-document group returns the value itself # (as double or Decimal128), a single non-numeric document returns null, and # an empty collection produces no group output. -AVG_EDGE_CASE_TESTS: list[AvgAccumulatorTest] = [ - AvgAccumulatorTest( +AVG_EDGE_CASE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( "edge_single_int32", docs=[{"v": 7}], - expression="$v", - expected=7.0, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": 7.0}], msg="$avg should return the value as double for a single int32 document", ), - AvgAccumulatorTest( + AccumulatorTestCase( "edge_single_int64", docs=[{"v": Int64(42)}], - expression="$v", - expected=42.0, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": 42.0}], msg="$avg should return the value as double for a single int64 document", ), - AvgAccumulatorTest( + AccumulatorTestCase( "edge_single_non_numeric", docs=[{"v": "hello"}], - expression="$v", - expected=None, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], msg="$avg should return null for a single non-numeric document", ), ] @@ -706,25 +934,34 @@ class AvgAccumulatorTest(BaseTestCase): # Property [Expression Error Propagation]: errors from sub-expressions # propagate through $avg without being caught or suppressed. -AVG_EXPRESSION_ERROR_TESTS: list[AvgAccumulatorTest] = [ - AvgAccumulatorTest( +AVG_EXPRESSION_ERROR_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( "error_prop_toint_non_convertible", docs=[{"v": "hello"}], - expression={"$toInt": "$v"}, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": {"$toInt": "$v"}}}}, + {"$project": {"_id": 0, "result": 1}}, + ], error_code=CONVERSION_FAILURE_ERROR, msg="$avg should propagate $toInt conversion error for non-convertible value", ), - AvgAccumulatorTest( + AccumulatorTestCase( "error_prop_divide_by_zero", docs=[{"v": 10}], - expression={"$divide": ["$v", 0]}, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": {"$divide": ["$v", 0]}}}}, + {"$project": {"_id": 0, "result": 1}}, + ], error_code=DIVIDE_BY_ZERO_V2_ERROR, msg="$avg should propagate $divide by zero error", ), - AvgAccumulatorTest( + AccumulatorTestCase( "error_prop_mod_by_zero", docs=[{"v": 10}], - expression={"$mod": ["$v", 0]}, + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": {"$mod": ["$v", 0]}}}}, + {"$project": {"_id": 0, "result": 1}}, + ], error_code=MODULO_BY_ZERO_V2_ERROR, msg="$avg should propagate $mod by zero error", ), @@ -734,23 +971,20 @@ class AvgAccumulatorTest(BaseTestCase): @pytest.mark.parametrize("test_case", pytest_params(AVG_TESTS)) -def test_accumulator_avg(collection, test_case: AvgAccumulatorTest): +def test_accumulator_avg(collection, test_case: AccumulatorTestCase): """Test $avg accumulator behavior.""" collection.insert_many(test_case.docs) result = execute_command( collection, { "aggregate": collection.name, - "pipeline": [ - {"$group": {"_id": None, "result": {"$avg": test_case.expression}}}, - {"$project": {"_id": 0, "result": 1}}, - ], + "pipeline": test_case.pipeline, "cursor": {}, }, ) assertResult( result, - expected=[{"result": test_case.expected}] if test_case.error_code is None else None, + expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg, ) @@ -778,117 +1012,137 @@ def test_accumulator_avg_empty_collection(collection): # Property [Return Type]: the result is double by default, but Decimal128 if # any input value is Decimal128. -AVG_RETURN_TYPE_TESTS: list[AvgAccumulatorTest] = [ - AvgAccumulatorTest( +AVG_RETURN_TYPE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( "type_int32_only", docs=[{"v": 2}, {"v": 4}], - expression="$v", - expected="double", + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, + ], + expected=[{"type": "double"}], msg="$avg should return double when all inputs are int32", ), - AvgAccumulatorTest( + AccumulatorTestCase( "type_int64_only", docs=[{"v": Int64(2)}, {"v": Int64(4)}], - expression="$v", - expected="double", + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, + ], + expected=[{"type": "double"}], msg="$avg should return double when all inputs are int64", ), - AvgAccumulatorTest( + AccumulatorTestCase( "type_int32_int64", docs=[{"v": 2}, {"v": Int64(4)}], - expression="$v", - expected="double", + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, + ], + expected=[{"type": "double"}], msg="$avg should return double for int32 and int64 mix", ), - AvgAccumulatorTest( + AccumulatorTestCase( "type_int32_double", docs=[{"v": 2}, {"v": 4.0}], - expression="$v", - expected="double", + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, + ], + expected=[{"type": "double"}], msg="$avg should return double for int32 and double mix", ), - AvgAccumulatorTest( + AccumulatorTestCase( "type_int64_double", docs=[{"v": Int64(2)}, {"v": 4.0}], - expression="$v", - expected="double", + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, + ], + expected=[{"type": "double"}], msg="$avg should return double for int64 and double mix", ), - AvgAccumulatorTest( + AccumulatorTestCase( "type_int32_decimal128", docs=[{"v": 2}, {"v": Decimal128("4")}], - expression="$v", - expected="decimal", + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, + ], + expected=[{"type": "decimal"}], msg="$avg should return Decimal128 when any input is Decimal128", ), - AvgAccumulatorTest( + AccumulatorTestCase( "type_int64_decimal128", docs=[{"v": Int64(2)}, {"v": Decimal128("4")}], - expression="$v", - expected="decimal", + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, + ], + expected=[{"type": "decimal"}], msg="$avg should return Decimal128 for int64 and Decimal128 mix", ), - AvgAccumulatorTest( + AccumulatorTestCase( "type_double_decimal128", docs=[{"v": 2.0}, {"v": Decimal128("4")}], - expression="$v", - expected="decimal", + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, + ], + expected=[{"type": "decimal"}], msg="$avg should return Decimal128 for double and Decimal128 mix", ), - AvgAccumulatorTest( + AccumulatorTestCase( "type_decimal128_before_int32", docs=[{"v": Decimal128("4")}, {"v": 2}], - expression="$v", - expected="decimal", + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, + ], + expected=[{"type": "decimal"}], msg="$avg should return Decimal128 regardless of document order", ), ] @pytest.mark.parametrize("test_case", pytest_params(AVG_RETURN_TYPE_TESTS)) -def test_accumulator_avg_return_type(collection, test_case: AvgAccumulatorTest): +def test_accumulator_avg_return_type(collection, test_case: AccumulatorTestCase): """Test $avg accumulator return type.""" collection.insert_many(test_case.docs) result = execute_command( collection, { "aggregate": collection.name, - "pipeline": [ - {"$group": {"_id": None, "result": {"$avg": test_case.expression}}}, - {"$project": {"_id": 0, "type": {"$type": "$result"}}}, - ], + "pipeline": test_case.pipeline, "cursor": {}, }, ) - assertSuccess(result, [{"type": test_case.expected}], msg=test_case.msg) - - -@dataclass(frozen=True) -class AvgArityTest(BaseTestCase): - """Test case for $avg arity rejection.""" - - pipeline: list[dict] = None # type: ignore[assignment] + assertSuccess(result, test_case.expected, msg=test_case.msg) # Property [Arity]: $avg in accumulator context is a unary operator and # rejects array syntax in $group, $bucket, and $bucketAuto. -AVG_ARITY_TESTS: list[AvgArityTest] = [ - AvgArityTest( +AVG_ARITY_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( "arity_multi_element_group", pipeline=[{"$group": {"_id": None, "result": {"$avg": ["$v", "$v"]}}}], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, msg="$avg should reject multi-element array syntax in $group", ), - AvgArityTest( + AccumulatorTestCase( "arity_empty_array_group", pipeline=[{"$group": {"_id": None, "result": {"$avg": []}}}], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, msg="$avg should reject empty array syntax in $group", ), - AvgArityTest( + AccumulatorTestCase( "arity_single_element_group", pipeline=[{"$group": {"_id": None, "result": {"$avg": ["$v"]}}}], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, msg="$avg should reject single-element array syntax in $group", ), - AvgArityTest( + AccumulatorTestCase( "arity_multi_element_bucket", pipeline=[ { @@ -899,9 +1153,10 @@ class AvgArityTest(BaseTestCase): } } ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, msg="$avg should reject multi-element array syntax in $bucket", ), - AvgArityTest( + AccumulatorTestCase( "arity_empty_array_bucket", pipeline=[ { @@ -912,9 +1167,10 @@ class AvgArityTest(BaseTestCase): } } ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, msg="$avg should reject empty array syntax in $bucket", ), - AvgArityTest( + AccumulatorTestCase( "arity_single_element_bucket", pipeline=[ { @@ -925,9 +1181,10 @@ class AvgArityTest(BaseTestCase): } } ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, msg="$avg should reject single-element array syntax in $bucket", ), - AvgArityTest( + AccumulatorTestCase( "arity_multi_element_bucket_auto", pipeline=[ { @@ -938,9 +1195,10 @@ class AvgArityTest(BaseTestCase): } } ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, msg="$avg should reject multi-element array syntax in $bucketAuto", ), - AvgArityTest( + AccumulatorTestCase( "arity_empty_array_bucket_auto", pipeline=[ { @@ -951,9 +1209,10 @@ class AvgArityTest(BaseTestCase): } } ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, msg="$avg should reject empty array syntax in $bucketAuto", ), - AvgArityTest( + AccumulatorTestCase( "arity_single_element_bucket_auto", pipeline=[ { @@ -964,13 +1223,14 @@ class AvgArityTest(BaseTestCase): } } ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, msg="$avg should reject single-element array syntax in $bucketAuto", ), ] @pytest.mark.parametrize("test_case", pytest_params(AVG_ARITY_TESTS)) -def test_accumulator_avg_arity(collection, test_case: AvgArityTest): +def test_accumulator_avg_arity(collection, test_case: AccumulatorTestCase): """Test $avg rejects array syntax in accumulator context.""" collection.insert_one({"v": 1}) result = execute_command( @@ -981,8 +1241,8 @@ def test_accumulator_avg_arity(collection, test_case: AvgArityTest): "cursor": {}, }, ) - assertFailureCode( + assertResult( result, - GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + error_code=test_case.error_code, msg=test_case.msg, ) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_field_lookup.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_field_lookup.py index cfaefca8..ece6da7f 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_field_lookup.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_field_lookup.py @@ -1,349 +1,229 @@ """ Tests for $avg accumulator expression types and field lookup in $group context. -Covers expression types (literal, field path, computed expressions, system variables) +Covers expression types (literal, field path, computed expressions, conditional) and field path resolution (simple, nested, missing, array traversal). """ from __future__ import annotations -from documentdb_tests.framework.assertions import assertSuccess -from documentdb_tests.framework.executor import execute_command - -# --- Helpers --- - - -def _group_avg(collection, docs, avg_expr="$value"): - """Insert docs and run $group with $avg on given expression.""" - collection.insert_many(docs) - return execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - {"$group": {"_id": None, "avg": {"$avg": avg_expr}}}, - ], - "cursor": {}, - }, - ) +import pytest +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params -# --- 11. Expression Types (per-operator) --- - +# Property [Expression Type]: $avg accepts field paths, computed expressions, +# literals, and conditional expressions in $group context. -def test_avg_group_field_path(collection): - """Test $avg with simple field path expression in $group.""" - result = _group_avg( - collection, - [ +AVG_EXPRESSION_TYPE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "field_path", + docs=[ {"_id": 1, "value": 10}, {"_id": 2, "value": 20}, {"_id": 3, "value": 30}, ], - ) - assertSuccess( - result, - [{"_id": None, "avg": 20.0}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$value"}}}], + expected=[{"_id": None, "avg": 20.0}], msg="$avg with field path should average field values", - ) - - -def test_avg_group_computed_expression(collection): - """Test $avg with computed expression in $group.""" - collection.insert_many( - [ + ), + AccumulatorTestCase( + "computed_expression", + docs=[ {"_id": 1, "a": 2, "b": 3}, {"_id": 2, "a": 4, "b": 6}, - ] - ) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - {"$group": {"_id": None, "avg": {"$avg": {"$multiply": ["$a", "$b"]}}}}, - ], - "cursor": {}, - }, - ) - # (2*3 + 4*6) / 2 = (6 + 24) / 2 = 15 - assertSuccess( - result, - [{"_id": None, "avg": 15.0}], + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": {"$multiply": ["$a", "$b"]}}}}], + # (2*3 + 4*6) / 2 = (6 + 24) / 2 = 15 + expected=[{"_id": None, "avg": 15.0}], msg="$avg with computed expression should average computed values", - ) - - -def test_avg_group_literal_numeric(collection): - """Test $avg with literal numeric value in $group returns that constant.""" - collection.insert_many( - [ + ), + AccumulatorTestCase( + "literal_numeric", + docs=[ {"_id": 1}, {"_id": 2}, {"_id": 3}, - ] - ) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - {"$group": {"_id": None, "avg": {"$avg": 5}}}, - ], - "cursor": {}, - }, - ) - assertSuccess( - result, - [{"_id": None, "avg": 5.0}], + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": 5}}}], + expected=[{"_id": None, "avg": 5.0}], msg="$avg with literal numeric should return that constant", - ) - - -def test_avg_group_literal_null(collection): - """Test $avg with null literal in $group returns null.""" - collection.insert_many([{"_id": 1}, {"_id": 2}]) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - {"$group": {"_id": None, "avg": {"$avg": None}}}, - ], - "cursor": {}, - }, - ) - assertSuccess( - result, - [{"_id": None, "avg": None}], + ), + AccumulatorTestCase( + "literal_null", + docs=[{"_id": 1}, {"_id": 2}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": None}}}], + expected=[{"_id": None, "avg": None}], msg="$avg with null literal should return null", - ) - - -def test_avg_group_cond_expression(collection): - """Test $avg with $cond expression in $group.""" - collection.insert_many( - [ + ), + AccumulatorTestCase( + "cond_expression", + docs=[ {"_id": 1, "value": 10, "include": True}, {"_id": 2, "value": 20, "include": False}, {"_id": 3, "value": 30, "include": True}, - ] - ) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - { - "$group": { - "_id": None, - "avg": { - "$avg": { - "$cond": [ - "$include", - "$value", - None, - ] - } - }, - } - }, - ], - "cursor": {}, - }, - ) - # Only values 10 and 30 contribute (null is ignored), avg = 20 - assertSuccess( - result, - [{"_id": None, "avg": 20.0}], + ], + pipeline=[ + { + "$group": { + "_id": None, + "avg": { + "$avg": { + "$cond": [ + "$include", + "$value", + None, + ] + } + }, + } + }, + ], + # Only values 10 and 30 contribute (null is ignored), avg = 20 + expected=[{"_id": None, "avg": 20.0}], msg="$avg with $cond should average only non-null conditional results", - ) - - -def test_avg_group_ifnull_expression(collection): - """Test $avg with $ifNull expression replacing missing values.""" - collection.insert_many( - [ + ), + AccumulatorTestCase( + "ifnull_expression", + docs=[ {"_id": 1, "value": 10}, {"_id": 2}, {"_id": 3, "value": 30}, - ] - ) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - { - "$group": { - "_id": None, - "avg": {"$avg": {"$ifNull": ["$value", 0]}}, - } - }, - ], - "cursor": {}, - }, - ) - # (10 + 0 + 30) / 3 = 13.333... - assertSuccess( - result, - [{"_id": None, "avg": 13.333333333333334}], + ], + pipeline=[ + { + "$group": { + "_id": None, + "avg": {"$avg": {"$ifNull": ["$value", 0]}}, + } + }, + ], + # (10 + 0 + 30) / 3 = 13.333... + expected=[{"_id": None, "avg": 13.333333333333334}], msg="$avg with $ifNull should replace missing with 0", - ) - + ), +] -# --- 12. Field Lookup --- +# Property [Field Resolution]: field path resolution behaviors with $avg in $group context. - -def test_avg_group_nested_field_path(collection): - """Test $avg with nested field path in $group.""" - result = _group_avg( - collection, - [ +AVG_FIELD_RESOLUTION_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "nested_field_path", + docs=[ {"_id": 1, "nested": {"value": 10}}, {"_id": 2, "nested": {"value": 20}}, {"_id": 3, "nested": {"value": 30}}, ], - avg_expr="$nested.value", - ) - assertSuccess( - result, - [{"_id": None, "avg": 20.0}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$nested.value"}}}], + expected=[{"_id": None, "avg": 20.0}], msg="$avg with nested field path should resolve and average", - ) - - -def test_avg_group_missing_field(collection): - """Test $avg with non-existent field path returns null.""" - result = _group_avg( - collection, - [ + ), + AccumulatorTestCase( + "missing_field", + docs=[ {"_id": 1, "value": 10}, {"_id": 2, "value": 20}, ], - avg_expr="$nonexistent", - ) - assertSuccess( - result, - [{"_id": None, "avg": None}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$nonexistent"}}}], + expected=[{"_id": None, "avg": None}], msg="$avg with non-existent field should return null", - ) - - -def test_avg_group_some_missing_field(collection): - """Test $avg where some documents have the field and others don't.""" - result = _group_avg( - collection, - [ + ), + AccumulatorTestCase( + "some_missing_field", + docs=[ {"_id": 1, "value": 10}, {"_id": 2}, {"_id": 3, "value": 30}, ], - ) - # Missing values are ignored: (10 + 30) / 2 = 20 - assertSuccess( - result, - [{"_id": None, "avg": 20.0}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$value"}}}], + # Missing values are ignored: (10 + 30) / 2 = 20 + expected=[{"_id": None, "avg": 20.0}], msg="$avg should ignore documents with missing field", - ) - - -def test_avg_group_field_resolves_to_array(collection): - """Test $avg where field resolves to an array in $group — treated as non-numeric.""" - result = _group_avg( - collection, - [ + ), + AccumulatorTestCase( + "field_resolves_to_array", + docs=[ {"_id": 1, "value": [1, 2, 3]}, {"_id": 2, "value": [4, 5, 6]}, ], - ) - assertSuccess( - result, - [{"_id": None, "avg": None}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$value"}}}], + expected=[{"_id": None, "avg": None}], msg="$avg in $group should treat array values as non-numeric", - ) - - -def test_avg_group_mixed_array_and_numeric(collection): - """Test $avg where some docs have arrays and others have numerics.""" - result = _group_avg( - collection, - [ + ), + AccumulatorTestCase( + "mixed_array_and_numeric", + docs=[ {"_id": 1, "value": [1, 2, 3]}, {"_id": 2, "value": 10}, {"_id": 3, "value": 20}, ], - ) - # Array is ignored: (10 + 20) / 2 = 15 - assertSuccess( - result, - [{"_id": None, "avg": 15.0}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$value"}}}], + # Array is ignored: (10 + 20) / 2 = 15 + expected=[{"_id": None, "avg": 15.0}], msg="$avg in $group should ignore array values and average numerics", - ) - - -def test_avg_group_deeply_nested_path(collection): - """Test $avg with deeply nested field path.""" - result = _group_avg( - collection, - [ + ), + AccumulatorTestCase( + "deeply_nested_path", + docs=[ {"_id": 1, "a": {"b": {"c": {"d": 10}}}}, {"_id": 2, "a": {"b": {"c": {"d": 20}}}}, ], - avg_expr="$a.b.c.d", - ) - assertSuccess( - result, - [{"_id": None, "avg": 15.0}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$a.b.c.d"}}}], + expected=[{"_id": None, "avg": 15.0}], msg="$avg with deeply nested path should resolve correctly", - ) - - -def test_avg_group_intermediate_null(collection): - """Test $avg where intermediate field in path is null.""" - result = _group_avg( - collection, - [ + ), + AccumulatorTestCase( + "intermediate_null", + docs=[ {"_id": 1, "a": {"b": 10}}, {"_id": 2, "a": None}, {"_id": 3, "a": {"b": 30}}, ], - avg_expr="$a.b", - ) - # Doc 2 has null intermediate, treated as missing: (10 + 30) / 2 = 20 - assertSuccess( - result, - [{"_id": None, "avg": 20.0}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$a.b"}}}], + # Doc 2 has null intermediate, treated as missing: (10 + 30) / 2 = 20 + expected=[{"_id": None, "avg": 20.0}], msg="$avg should treat null intermediate as missing", - ) - - -def test_avg_group_multiple_accumulators(collection): - """Test multiple $avg accumulators in same $group stage.""" - collection.insert_many( - [ + ), + AccumulatorTestCase( + "multiple_accumulators", + docs=[ {"_id": 1, "a": 10, "b": 100}, {"_id": 2, "a": 20, "b": 200}, - ] - ) + ], + pipeline=[ + { + "$group": { + "_id": None, + "avg_a": {"$avg": "$a"}, + "avg_b": {"$avg": "$b"}, + } + }, + ], + expected=[{"_id": None, "avg_a": 15.0, "avg_b": 150.0}], + msg="Multiple $avg accumulators should work independently", + ), +] + +AVG_FIELD_LOOKUP_TESTS: list[AccumulatorTestCase] = ( + AVG_EXPRESSION_TYPE_TESTS + AVG_FIELD_RESOLUTION_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(AVG_FIELD_LOOKUP_TESTS)) +def test_avg_field_lookup(collection, test_case: AccumulatorTestCase): + """Test $avg field lookup and expression types in $group context.""" + if test_case.docs: + collection.insert_many(test_case.docs) result = execute_command( collection, { "aggregate": collection.name, - "pipeline": [ - { - "$group": { - "_id": None, - "avg_a": {"$avg": "$a"}, - "avg_b": {"$avg": "$b"}, - } - }, - ], + "pipeline": test_case.pipeline, "cursor": {}, }, ) - assertSuccess( - result, - [{"_id": None, "avg_a": 15.0, "avg_b": 150.0}], - msg="Multiple $avg accumulators should work independently", - ) + assertResult(result, expected=test_case.expected, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_boundaries.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_boundaries.py index 70dea493..d50329b0 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_boundaries.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_boundaries.py @@ -8,10 +8,15 @@ from __future__ import annotations +import pytest from bson import Decimal128, Int64 -from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params from documentdb_tests.framework.test_constants import ( DECIMAL128_LARGE_EXPONENT, DECIMAL128_MAX, @@ -25,166 +30,157 @@ INT64_MIN, ) -# --- Helpers --- - - -def _group_avg_values(collection, values): - """Insert documents with given values and return $avg across all.""" - docs = [{"_id": i, "v": v} for i, v in enumerate(values)] - collection.insert_many(docs) - return execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - {"$group": {"_id": None, "avg": {"$avg": "$v"}}}, - ], - "cursor": {}, - }, - ) +# Property [Integer Boundaries]: $avg handles int32 and int64 boundary values +# including MAX, MIN, and overflow combinations. - -# --- Integer Boundary Values --- - - -def test_avg_group_int32_max_pair(collection): - """Test $avg of two INT32_MAX values.""" - result = _group_avg_values(collection, [INT32_MAX, INT32_MAX]) - assertSuccess( - result, - [{"_id": None, "avg": float(INT32_MAX)}], +AVG_INT_BOUNDARY_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + id="int32_max_pair", + docs=[{"_id": 0, "v": INT32_MAX}, {"_id": 1, "v": INT32_MAX}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": float(INT32_MAX)}], msg="avg of two INT32_MAX should return INT32_MAX as double", - ) - - -def test_avg_group_int32_min_pair(collection): - """Test $avg of two INT32_MIN values.""" - result = _group_avg_values(collection, [INT32_MIN, INT32_MIN]) - assertSuccess( - result, - [{"_id": None, "avg": float(INT32_MIN)}], + ), + AccumulatorTestCase( + id="int32_min_pair", + docs=[{"_id": 0, "v": INT32_MIN}, {"_id": 1, "v": INT32_MIN}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": float(INT32_MIN)}], msg="avg of two INT32_MIN should return INT32_MIN as double", - ) - - -def test_avg_group_int32_max_and_min(collection): - """Test $avg of INT32_MAX and INT32_MIN.""" - result = _group_avg_values(collection, [INT32_MAX, INT32_MIN]) - # (2147483647 + -2147483648) / 2 = -0.5 - assertSuccess( - result, - [{"_id": None, "avg": -0.5}], + ), + AccumulatorTestCase( + id="int32_max_and_min", + docs=[{"_id": 0, "v": INT32_MAX}, {"_id": 1, "v": INT32_MIN}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + # (2147483647 + -2147483648) / 2 = -0.5 + expected=[{"_id": None, "avg": -0.5}], msg="avg of INT32_MAX and INT32_MIN should be -0.5", - ) - - -def test_avg_group_int64_max_pair(collection): - """Test $avg of two INT64_MAX values — potential precision loss in double.""" - result = _group_avg_values(collection, [INT64_MAX, INT64_MAX]) - assertSuccess( - result, - [{"_id": None, "avg": 9.223372036854776e18}], + ), + AccumulatorTestCase( + id="int64_max_pair", + docs=[{"_id": 0, "v": INT64_MAX}, {"_id": 1, "v": INT64_MAX}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 9.223372036854776e18}], msg="avg of two INT64_MAX should handle overflow", - ) - - -def test_avg_group_int64_min_pair(collection): - """Test $avg of two INT64_MIN values.""" - result = _group_avg_values(collection, [INT64_MIN, INT64_MIN]) - assertSuccess( - result, - [{"_id": None, "avg": -9.223372036854776e18}], + ), + AccumulatorTestCase( + id="int64_min_pair", + docs=[{"_id": 0, "v": INT64_MIN}, {"_id": 1, "v": INT64_MIN}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": -9.223372036854776e18}], msg="avg of two INT64_MIN should handle overflow", - ) - - -def test_avg_group_int64_max_and_one(collection): - """Test $avg of INT64_MAX and 1.""" - result = _group_avg_values(collection, [INT64_MAX, Int64(1)]) - assertSuccess( - result, - [{"_id": None, "avg": 4.611686018427388e18}], + ), + AccumulatorTestCase( + id="int64_max_and_one", + docs=[{"_id": 0, "v": INT64_MAX}, {"_id": 1, "v": Int64(1)}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 4.611686018427388e18}], msg="avg of INT64_MAX and 1", - ) - - -# --- Double Boundary Values --- - - -def test_avg_group_double_near_max_pair(collection): - """Test $avg of two DOUBLE_NEAR_MAX values — sum overflows to inf.""" - result = _group_avg_values(collection, [DOUBLE_NEAR_MAX, DOUBLE_NEAR_MAX]) - assertSuccess( - result, - [{"_id": None, "avg": float("inf")}], + ), +] + +# Property [Double Boundaries]: $avg handles double boundary values +# including near-max overflow and subnormal values. + +AVG_DOUBLE_BOUNDARY_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + id="double_near_max_pair", + docs=[{"_id": 0, "v": DOUBLE_NEAR_MAX}, {"_id": 1, "v": DOUBLE_NEAR_MAX}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": float("inf")}], msg="avg of two DOUBLE_NEAR_MAX overflows sum to inf", - ) - - -def test_avg_group_double_subnormal(collection): - """Test $avg of subnormal double values.""" - result = _group_avg_values(collection, [DOUBLE_MIN_SUBNORMAL, DOUBLE_MIN_SUBNORMAL]) - assertSuccess( - result, - [{"_id": None, "avg": DOUBLE_MIN_SUBNORMAL}], + ), + AccumulatorTestCase( + id="double_subnormal", + docs=[ + {"_id": 0, "v": DOUBLE_MIN_SUBNORMAL}, + {"_id": 1, "v": DOUBLE_MIN_SUBNORMAL}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": DOUBLE_MIN_SUBNORMAL}], msg="avg of two subnormal doubles should return subnormal", - ) - - -# --- Decimal128 Precision --- - - -def test_avg_group_decimal128_high_precision(collection): - """Test $avg of decimal128 values requiring high precision.""" - result = _group_avg_values( - collection, - [ - Decimal128("1.000000000000000000000000000000001"), - Decimal128("2.999999999999999999999999999999999"), + ), +] + +# Property [Decimal128 Precision]: $avg preserves Decimal128 precision +# across extreme exponent differences and boundary values. + +AVG_DECIMAL128_PRECISION_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + id="decimal128_high_precision", + docs=[ + { + "_id": 0, + "v": Decimal128("1.000000000000000000000000000000001"), + }, + { + "_id": 1, + "v": Decimal128("2.999999999999999999999999999999999"), + }, ], - ) - assertSuccess( - result, - [{"_id": None, "avg": Decimal128("2.000000000000000000000000000000000")}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": Decimal128("2.000000000000000000000000000000000")}], msg="decimal128 avg should preserve high precision", - ) - - -def test_avg_group_decimal128_large_exponent(collection): - """Test $avg with decimal128 large exponent values.""" - result = _group_avg_values(collection, [DECIMAL128_LARGE_EXPONENT, DECIMAL128_LARGE_EXPONENT]) - assertSuccess( - result, - [{"_id": None, "avg": DECIMAL128_LARGE_EXPONENT}], + ), + AccumulatorTestCase( + id="decimal128_large_exponent", + docs=[ + {"_id": 0, "v": DECIMAL128_LARGE_EXPONENT}, + {"_id": 1, "v": DECIMAL128_LARGE_EXPONENT}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": DECIMAL128_LARGE_EXPONENT}], msg="avg of two identical large exponent values should return same value", - ) - - -def test_avg_group_decimal128_small_exponent(collection): - """Test $avg with decimal128 small exponent values.""" - result = _group_avg_values(collection, [DECIMAL128_SMALL_EXPONENT, DECIMAL128_SMALL_EXPONENT]) - assertSuccess( - result, - [{"_id": None, "avg": DECIMAL128_SMALL_EXPONENT}], + ), + AccumulatorTestCase( + id="decimal128_small_exponent", + docs=[ + {"_id": 0, "v": DECIMAL128_SMALL_EXPONENT}, + {"_id": 1, "v": DECIMAL128_SMALL_EXPONENT}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": DECIMAL128_SMALL_EXPONENT}], msg="avg of two identical small exponent values should return same value", - ) - - -def test_avg_group_decimal128_max_and_min(collection): - """Test $avg of DECIMAL128_MAX and DECIMAL128_MIN.""" - result = _group_avg_values(collection, [DECIMAL128_MAX, DECIMAL128_MIN]) - assertSuccess( - result, - [{"_id": None, "avg": Decimal128("0")}], + ), + AccumulatorTestCase( + id="decimal128_max_and_min", + docs=[{"_id": 0, "v": DECIMAL128_MAX}, {"_id": 1, "v": DECIMAL128_MIN}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": Decimal128("0")}], msg="avg of DECIMAL128_MAX and DECIMAL128_MIN", - ) + ), + AccumulatorTestCase( + id="decimal128_extreme_exponent_diff", + docs=[ + {"_id": 0, "v": Decimal128("1E+6144")}, + {"_id": 1, "v": Decimal128("1")}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[ + { + "_id": None, + "avg": Decimal128("5.00000000000000000000000000000000E+6143"), + } + ], + msg="avg with extreme exponent difference", + ), +] +AVG_GROUP_BOUNDARY_TESTS: list[AccumulatorTestCase] = ( + AVG_INT_BOUNDARY_TESTS + AVG_DOUBLE_BOUNDARY_TESTS + AVG_DECIMAL128_PRECISION_TESTS +) -def test_avg_group_decimal128_extreme_exponent_diff(collection): - """Test $avg of values with extreme exponent difference.""" - result = _group_avg_values(collection, [Decimal128("1E+6144"), Decimal128("1")]) - assertSuccess( - result, - [{"_id": None, "avg": Decimal128("5.00000000000000000000000000000000E+6143")}], - msg="avg with extreme exponent difference", + +@pytest.mark.parametrize("test_case", pytest_params(AVG_GROUP_BOUNDARY_TESTS)) +def test_avg_group_boundaries(collection, test_case: AccumulatorTestCase): + """Test $avg accumulator boundary values in $group context.""" + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, ) + assertResult(result, expected=test_case.expected, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_context.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_context.py index 93eaaeb0..e9189f0b 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_context.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_context.py @@ -7,443 +7,311 @@ from __future__ import annotations +import pytest from bson import Decimal128, Int64 -from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params -# --- Helpers --- +# Property [Numeric Equivalence]: numerically equivalent group keys +# (int32, int64, double, Decimal128) produce a single group. - -def _group_avg(collection, docs, group_id="$category", field="$value"): - """Insert docs and run $group with $avg.""" - collection.insert_many(docs) - return execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - {"$group": {"_id": group_id, "avg": {"$avg": field}}}, - {"$sort": {"_id": 1}}, - ], - "cursor": {}, - }, - ) - - -# --- 13. Numeric Equivalence in Grouping --- - - -def test_avg_group_numeric_equivalence_grouping(collection): - """Test $avg groups numerically equivalent values of different types into same group.""" - collection.insert_many( - [ +NUMERIC_EQUIVALENCE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + id="numeric_equivalence_grouping", + docs=[ {"_id": 1, "key": 1, "value": 10}, {"_id": 2, "key": Int64(1), "value": 20}, {"_id": 3, "key": 1.0, "value": 30}, {"_id": 4, "key": Decimal128("1"), "value": 40}, - ] - ) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - {"$group": {"_id": "$key", "avg": {"$avg": "$value"}}}, - ], - "cursor": {}, - }, - ) - assertSuccess( - result, - [{"_id": 1, "avg": 25.0}], + ], + pipeline=[ + {"$group": {"_id": "$key", "avg": {"$avg": "$value"}}}, + ], + expected=[{"_id": 1, "avg": 25.0}], msg="Numerically equivalent group keys should produce a single group", - ) - - -def test_avg_group_zero_equivalence(collection): - """Test $avg groups all zero representations into same group.""" - collection.insert_many( - [ + ), + AccumulatorTestCase( + id="zero_equivalence", + docs=[ {"_id": 1, "key": 0, "value": 10}, {"_id": 2, "key": Int64(0), "value": 20}, {"_id": 3, "key": 0.0, "value": 30}, {"_id": 4, "key": Decimal128("0"), "value": 40}, - ] - ) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - {"$group": {"_id": "$key", "avg": {"$avg": "$value"}}}, - ], - "cursor": {}, - }, - ) - assertSuccess( - result, - [{"_id": 0, "avg": 25.0}], + ], + pipeline=[ + {"$group": {"_id": "$key", "avg": {"$avg": "$value"}}}, + ], + expected=[{"_id": 0, "avg": 25.0}], msg="All zero representations should group together", - ) - - -# --- 16. Single Document Group / Empty Group --- - - -def test_avg_group_single_document(collection): - """Test $avg with single document in group returns that value.""" - result = _group_avg( - collection, - [{"_id": 1, "category": "A", "value": 42}], - ) - assertSuccess( - result, - [{"_id": "A", "avg": 42.0}], + ), +] + +# Property [Single and Empty Groups]: $avg returns correct results for +# single-document groups, empty collections, and null group IDs. + +SINGLE_EMPTY_GROUP_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + id="single_document", + docs=[{"_id": 1, "category": "A", "value": 42}], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[{"_id": "A", "avg": 42.0}], msg="$avg of single document should return that value as double", - ) - - -def test_avg_group_single_document_non_numeric(collection): - """Test $avg with single non-numeric document returns null.""" - result = _group_avg( - collection, - [{"_id": 1, "category": "A", "value": "hello"}], - ) - assertSuccess( - result, - [{"_id": "A", "avg": None}], + ), + AccumulatorTestCase( + id="single_document_non_numeric", + docs=[{"_id": 1, "category": "A", "value": "hello"}], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[{"_id": "A", "avg": None}], msg="$avg of single non-numeric document should return null", - ) - - -def test_avg_group_single_document_null(collection): - """Test $avg with single null document returns null.""" - result = _group_avg( - collection, - [{"_id": 1, "category": "A", "value": None}], - ) - assertSuccess( - result, - [{"_id": "A", "avg": None}], + ), + AccumulatorTestCase( + id="single_document_null", + docs=[{"_id": 1, "category": "A", "value": None}], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[{"_id": "A", "avg": None}], msg="$avg of single null document should return null", - ) - - -def test_avg_group_single_document_missing_field(collection): - """Test $avg with single document missing the field returns null.""" - result = _group_avg( - collection, - [{"_id": 1, "category": "A"}], - ) - assertSuccess( - result, - [{"_id": "A", "avg": None}], + ), + AccumulatorTestCase( + id="single_document_missing_field", + docs=[{"_id": 1, "category": "A"}], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[{"_id": "A", "avg": None}], msg="$avg of single document with missing field should return null", - ) - - -def test_avg_group_empty_collection(collection): - """Test $avg on empty collection produces no output documents.""" - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, - ], - "cursor": {}, - }, - ) - assertSuccess( - result, - [], + ), + AccumulatorTestCase( + id="empty_collection", + docs=None, + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + ], + expected=[], msg="$avg on empty collection should produce no output", - ) - - -def test_avg_group_all_filtered_out(collection): - """Test $avg where $match filters all documents produces no output.""" - collection.insert_many( - [ + ), + AccumulatorTestCase( + id="all_filtered_out", + docs=[ {"_id": 1, "category": "A", "value": 10}, {"_id": 2, "category": "A", "value": 20}, - ] - ) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - {"$match": {"category": "Z"}}, - {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, - ], - "cursor": {}, - }, - ) - assertSuccess( - result, - [], + ], + pipeline=[ + {"$match": {"category": "Z"}}, + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + ], + expected=[], msg="$avg after filtering all documents should produce no output", - ) - - -def test_avg_group_null_id(collection): - """Test $avg with _id: null groups entire collection.""" - collection.insert_many( - [ + ), + AccumulatorTestCase( + id="null_id", + docs=[ {"_id": 1, "value": 10}, {"_id": 2, "value": 20}, {"_id": 3, "value": 30}, - ] - ) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - {"$group": {"_id": None, "avg": {"$avg": "$value"}}}, - ], - "cursor": {}, - }, - ) - assertSuccess( - result, - [{"_id": None, "avg": 20.0}], + ], + pipeline=[ + {"$group": {"_id": None, "avg": {"$avg": "$value"}}}, + ], + expected=[{"_id": None, "avg": 20.0}], msg="$avg with _id: null should average entire collection", - ) - + ), +] -# --- 18. Precision Edge Cases --- +# Property [Precision]: $avg produces correct fractional and repeating +# decimal results and handles large document counts. - -def test_avg_group_odd_sum_two_int32(collection): - """Test $avg of two int32 values whose sum is odd produces fractional result.""" - result = _group_avg( - collection, - [ +PRECISION_EDGE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + id="odd_sum_two_int32", + docs=[ {"_id": 1, "category": "A", "value": 1}, {"_id": 2, "category": "A", "value": 2}, ], - ) - assertSuccess( - result, - [{"_id": "A", "avg": 1.5}], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[{"_id": "A", "avg": 1.5}], msg="$avg of 1 and 2 should return 1.5", - ) - - -def test_avg_group_repeating_decimal(collection): - """Test $avg producing repeating decimal (1+1+2)/3.""" - collection.insert_many( - [ + ), + AccumulatorTestCase( + id="repeating_decimal", + docs=[ {"_id": 1, "category": "A", "value": 1}, {"_id": 2, "category": "A", "value": 1}, {"_id": 3, "category": "A", "value": 2}, - ] - ) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, - ], - "cursor": {}, - }, - ) - assertSuccess( - result, - [{"_id": "A", "avg": 1.3333333333333333}], + ], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + ], + expected=[{"_id": "A", "avg": 1.3333333333333333}], msg="$avg of 1,1,2 should return 4/3", - ) - - -def test_avg_group_sequence_1_to_100(collection): - """Test $avg of sequence 1..100 returns 50.5.""" - docs = [{"_id": i, "category": "A", "value": i} for i in range(1, 101)] - result = _group_avg(collection, docs) - assertSuccess( - result, - [{"_id": "A", "avg": 50.5}], + ), + AccumulatorTestCase( + id="sequence_1_to_100", + docs=[{"_id": i, "category": "A", "value": i} for i in range(1, 101)], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[{"_id": "A", "avg": 50.5}], msg="$avg of 1..100 should return 50.5", - ) - - -def test_avg_group_large_count_identical(collection): - """Test $avg of 1000 identical values returns that value.""" - docs = [{"_id": i, "category": "A", "value": 7} for i in range(1000)] - result = _group_avg(collection, docs) - assertSuccess( - result, - [{"_id": "A", "avg": 7.0}], + ), + AccumulatorTestCase( + id="large_count_identical", + docs=[{"_id": i, "category": "A", "value": 7} for i in range(1000)], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[{"_id": "A", "avg": 7.0}], msg="$avg of 1000 identical values should return that value", - ) + ), +] +# Property [Multiple Groups]: $avg computes independent averages per group +# with different counts, null groups, and mixed types. -# --- 20. Multiple Groups with Different Characteristics --- - - -def test_avg_group_different_counts(collection): - """Test $avg where groups have different document counts.""" - collection.insert_many( - [ +MULTIPLE_GROUPS_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + id="different_counts", + docs=[ {"_id": 1, "category": "A", "value": 10}, {"_id": 2, "category": "B", "value": 20}, {"_id": 3, "category": "B", "value": 40}, {"_id": 4, "category": "C", "value": 5}, {"_id": 5, "category": "C", "value": 10}, {"_id": 6, "category": "C", "value": 15}, - ] - ) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, - {"$sort": {"_id": 1}}, - ], - "cursor": {}, - }, - ) - assertSuccess( - result, - [ + ], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[ {"_id": "A", "avg": 10.0}, {"_id": "B", "avg": 30.0}, {"_id": "C", "avg": 10.0}, ], msg="$avg should compute correct average per group with different counts", - ) - - -def test_avg_group_one_all_nulls_one_all_numeric(collection): - """Test $avg where one group has all nulls and another has numerics.""" - collection.insert_many( - [ + ), + AccumulatorTestCase( + id="one_all_nulls_one_all_numeric", + docs=[ {"_id": 1, "category": "A", "value": None}, {"_id": 2, "category": "A", "value": None}, {"_id": 3, "category": "B", "value": 10}, {"_id": 4, "category": "B", "value": 20}, - ] - ) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, - {"$sort": {"_id": 1}}, - ], - "cursor": {}, - }, - ) - assertSuccess( - result, - [ + ], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[ {"_id": "A", "avg": None}, {"_id": "B", "avg": 15.0}, ], msg="Group with all nulls returns null, group with numerics returns average", - ) - - -def test_avg_group_mixed_types_per_group(collection): - """Test $avg where groups have different numeric type distributions.""" - collection.insert_many( - [ + ), + AccumulatorTestCase( + id="mixed_types_per_group", + docs=[ {"_id": 1, "category": "int", "value": 10}, {"_id": 2, "category": "int", "value": 20}, {"_id": 3, "category": "dec", "value": Decimal128("10")}, {"_id": 4, "category": "dec", "value": Decimal128("20")}, - ] - ) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, - {"$sort": {"_id": 1}}, - ], - "cursor": {}, - }, - ) - assertSuccess( - result, - [ + ], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[ {"_id": "dec", "avg": Decimal128("15")}, {"_id": "int", "avg": 15.0}, ], msg="Int group returns double, Decimal128 group returns Decimal128", - ) - + ), +] -# --- 21. Comparison with Related Operators --- +# Property [Comparison with Related Operators]: $avg results are consistent +# with $sum/$count, and non-numeric handling differs from $sum. - -def test_avg_equals_sum_divided_by_count(collection): - """Test $avg equals $sum / count for int32 values.""" - collection.insert_many( - [ +COMPARISON_WITH_RELATED_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + id="equals_sum_divided_by_count", + docs=[ {"_id": 1, "category": "A", "value": 10}, {"_id": 2, "category": "A", "value": 20}, {"_id": 3, "category": "A", "value": 30}, {"_id": 4, "category": "A", "value": 40}, - ] - ) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - { - "$group": { - "_id": "$category", - "avg": {"$avg": "$value"}, - "sum": {"$sum": "$value"}, - "count": {"$sum": 1}, - } - }, - ], - "cursor": {}, - }, - ) - # avg should be 25.0, sum should be 100, count should be 4 - assertSuccess( - result, - [{"_id": "A", "avg": 25.0, "sum": 100, "count": 4}], + ], + pipeline=[ + { + "$group": { + "_id": "$category", + "avg": {"$avg": "$value"}, + "sum": {"$sum": "$value"}, + "count": {"$sum": 1}, + } + }, + ], + expected=[{"_id": "A", "avg": 25.0, "sum": 100, "count": 4}], msg="$avg should equal $sum / count", - ) - - -def test_avg_vs_sum_non_numeric_handling(collection): - """Test $avg returns null but $sum returns 0 when all values are non-numeric.""" - collection.insert_many( - [ + ), + AccumulatorTestCase( + id="vs_sum_non_numeric_handling", + docs=[ {"_id": 1, "category": "A", "value": "hello"}, {"_id": 2, "category": "A", "value": "world"}, - ] - ) + ], + pipeline=[ + { + "$group": { + "_id": "$category", + "avg": {"$avg": "$value"}, + "sum": {"$sum": "$value"}, + } + }, + ], + expected=[{"_id": "A", "avg": None, "sum": 0}], + msg="$avg returns null for non-numeric but $sum returns 0", + ), +] + +AVG_GROUP_CONTEXT_TESTS: list[AccumulatorTestCase] = ( + NUMERIC_EQUIVALENCE_TESTS + + SINGLE_EMPTY_GROUP_TESTS + + PRECISION_EDGE_TESTS + + MULTIPLE_GROUPS_TESTS + + COMPARISON_WITH_RELATED_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(AVG_GROUP_CONTEXT_TESTS)) +def test_avg_group_context(collection, test_case: AccumulatorTestCase): + """Test $avg in $group context with grouping behavior.""" + if test_case.docs: + collection.insert_many(test_case.docs) result = execute_command( collection, { "aggregate": collection.name, - "pipeline": [ - { - "$group": { - "_id": "$category", - "avg": {"$avg": "$value"}, - "sum": {"$sum": "$value"}, - } - }, - ], + "pipeline": test_case.pipeline, "cursor": {}, }, ) - assertSuccess( - result, - [{"_id": "A", "avg": None, "sum": 0}], - msg="$avg returns null for non-numeric but $sum returns 0", - ) + assertResult(result, expected=test_case.expected, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_types.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_types.py index 17dc26f6..c9e81c84 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_types.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_types.py @@ -7,10 +7,17 @@ from __future__ import annotations +import math + +import pytest from bson import Decimal128, Int64 -from documentdb_tests.framework.assertions import assertSuccess, assertSuccessNaN +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params from documentdb_tests.framework.test_constants import ( DECIMAL128_INFINITY, DECIMAL128_NAN, @@ -23,388 +30,336 @@ FLOAT_NEGATIVE_INFINITY, ) -# --- Helpers --- - - -def _group_avg_values(collection, values): - """Insert documents with given values and return $avg across all.""" - docs = [{"_id": i, "v": v} for i, v in enumerate(values)] - collection.insert_many(docs) - return execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - {"$group": {"_id": None, "avg": {"$avg": "$v"}}}, - ], - "cursor": {}, - }, - ) - - -# --- Type Promotion in $group --- - - -def test_avg_group_all_int32(collection): - """Test $avg over int32 documents returns double.""" - result = _group_avg_values(collection, [10, 20, 30]) - assertSuccess(result, [{"_id": None, "avg": 20.0}], msg="int32 avg should return double") - - -def test_avg_group_all_int64(collection): - """Test $avg over int64 documents returns double.""" - result = _group_avg_values(collection, [Int64(10), Int64(20), Int64(30)]) - assertSuccess(result, [{"_id": None, "avg": 20.0}], msg="int64 avg should return double") - - -def test_avg_group_all_double(collection): - """Test $avg over double documents returns double.""" - result = _group_avg_values(collection, [10.0, 20.0, 30.0]) - assertSuccess(result, [{"_id": None, "avg": 20.0}], msg="double avg should return double") - - -def test_avg_group_all_decimal128(collection): - """Test $avg over decimal128 documents returns decimal128.""" - result = _group_avg_values(collection, [Decimal128("10"), Decimal128("20"), Decimal128("30")]) - assertSuccess( - result, - [{"_id": None, "avg": Decimal128("20")}], +# Property [Type Promotion]: $avg returns double for integer and double inputs, +# and Decimal128 when any input is Decimal128. +AVG_TYPE_PROMOTION_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "all_int32", + docs=[{"_id": 0, "v": 10}, {"_id": 1, "v": 20}, {"_id": 2, "v": 30}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 20.0}], + msg="int32 avg should return double", + ), + AccumulatorTestCase( + "all_int64", + docs=[ + {"_id": 0, "v": Int64(10)}, + {"_id": 1, "v": Int64(20)}, + {"_id": 2, "v": Int64(30)}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 20.0}], + msg="int64 avg should return double", + ), + AccumulatorTestCase( + "all_double", + docs=[{"_id": 0, "v": 10.0}, {"_id": 1, "v": 20.0}, {"_id": 2, "v": 30.0}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 20.0}], + msg="double avg should return double", + ), + AccumulatorTestCase( + "all_decimal128", + docs=[ + {"_id": 0, "v": Decimal128("10")}, + {"_id": 1, "v": Decimal128("20")}, + {"_id": 2, "v": Decimal128("30")}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": Decimal128("20")}], msg="decimal128 avg should return decimal128", - ) - - -def test_avg_group_int32_and_int64(collection): - """Test $avg over mixed int32/int64 returns double.""" - result = _group_avg_values(collection, [10, Int64(20)]) - assertSuccess(result, [{"_id": None, "avg": 15.0}], msg="int32+int64 avg should return double") - - -def test_avg_group_int32_and_double(collection): - """Test $avg over mixed int32/double returns double.""" - result = _group_avg_values(collection, [10, 20.0]) - assertSuccess(result, [{"_id": None, "avg": 15.0}], msg="int32+double avg should return double") - - -def test_avg_group_int32_and_decimal128(collection): - """Test $avg over mixed int32/decimal128 returns decimal128.""" - result = _group_avg_values(collection, [10, Decimal128("20")]) - assertSuccess( - result, - [{"_id": None, "avg": Decimal128("15")}], + ), + AccumulatorTestCase( + "int32_and_int64", + docs=[{"_id": 0, "v": 10}, {"_id": 1, "v": Int64(20)}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 15.0}], + msg="int32+int64 avg should return double", + ), + AccumulatorTestCase( + "int32_and_double", + docs=[{"_id": 0, "v": 10}, {"_id": 1, "v": 20.0}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 15.0}], + msg="int32+double avg should return double", + ), + AccumulatorTestCase( + "int32_and_decimal128", + docs=[{"_id": 0, "v": 10}, {"_id": 1, "v": Decimal128("20")}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": Decimal128("15")}], msg="int32+decimal128 avg should return decimal128", - ) - - -def test_avg_group_int64_and_decimal128(collection): - """Test $avg over mixed int64/decimal128 returns decimal128.""" - result = _group_avg_values(collection, [Int64(10), Decimal128("20")]) - assertSuccess( - result, - [{"_id": None, "avg": Decimal128("15")}], + ), + AccumulatorTestCase( + "int64_and_decimal128", + docs=[{"_id": 0, "v": Int64(10)}, {"_id": 1, "v": Decimal128("20")}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": Decimal128("15")}], msg="int64+decimal128 avg should return decimal128", - ) - - -def test_avg_group_double_and_decimal128(collection): - """Test $avg over mixed double/decimal128 returns decimal128.""" - result = _group_avg_values(collection, [10.0, Decimal128("20")]) - assertSuccess( - result, - [{"_id": None, "avg": Decimal128("15")}], + ), + AccumulatorTestCase( + "double_and_decimal128", + docs=[{"_id": 0, "v": 10.0}, {"_id": 1, "v": Decimal128("20")}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": Decimal128("15")}], msg="double+decimal128 avg should return decimal128", - ) - - -def test_avg_group_all_four_types(collection): - """Test $avg over all numeric types returns decimal128.""" - result = _group_avg_values(collection, [10, Int64(20), 30.0, Decimal128("40")]) - assertSuccess( - result, - [{"_id": None, "avg": Decimal128("25")}], + ), + AccumulatorTestCase( + "all_four_types", + docs=[ + {"_id": 0, "v": 10}, + {"_id": 1, "v": Int64(20)}, + {"_id": 2, "v": 30.0}, + {"_id": 3, "v": Decimal128("40")}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": Decimal128("25")}], msg="all four numeric types avg should return decimal128", - ) - - -def test_avg_group_fractional_result_from_int32(collection): - """Test $avg of int32 values producing fractional result returns double.""" - result = _group_avg_values(collection, [1, 2]) - assertSuccess( - result, - [{"_id": None, "avg": 1.5}], + ), + AccumulatorTestCase( + "fractional_result_from_int32", + docs=[{"_id": 0, "v": 1}, {"_id": 1, "v": 2}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 1.5}], msg="int32 avg producing fraction should return double", - ) - - -# --- NaN Propagation in $group --- - - -def test_avg_group_nan_propagates(collection): - """Test $avg where one document has NaN propagates NaN.""" - result = _group_avg_values(collection, [10, float("nan"), 30]) - assertSuccessNaN( - result, - [{"_id": None, "avg": float("nan")}], + ), +] + +# Property [NaN Propagation]: NaN is numeric and propagates to the result; +# NaN dominates Infinity and cross-type NaN promotes to Decimal128. +AVG_NAN_PROPAGATION_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "nan_propagates", + docs=[{"_id": 0, "v": 10}, {"_id": 1, "v": float("nan")}, {"_id": 2, "v": 30}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": pytest.approx(math.nan, nan_ok=True)}], msg="NaN in group should propagate to result", - ) - - -def test_avg_group_all_nan(collection): - """Test $avg where all documents have NaN returns NaN.""" - result = _group_avg_values(collection, [float("nan"), float("nan")]) - assertSuccessNaN( - result, - [{"_id": None, "avg": float("nan")}], + ), + AccumulatorTestCase( + "all_nan", + docs=[{"_id": 0, "v": float("nan")}, {"_id": 1, "v": float("nan")}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": pytest.approx(math.nan, nan_ok=True)}], msg="All NaN in group should return NaN", - ) - - -def test_avg_group_decimal128_nan_propagates(collection): - """Test $avg where one document has Decimal128 NaN propagates.""" - result = _group_avg_values(collection, [Decimal128("10"), DECIMAL128_NAN, Decimal128("30")]) - assertSuccessNaN( - result, - [{"_id": None, "avg": DECIMAL128_NAN}], + ), + AccumulatorTestCase( + "decimal128_nan", + docs=[ + {"_id": 0, "v": Decimal128("10")}, + {"_id": 1, "v": DECIMAL128_NAN}, + {"_id": 2, "v": Decimal128("30")}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": DECIMAL128_NAN}], msg="Decimal128 NaN in group should propagate", - ) - - -def test_avg_group_nan_dominates_infinity(collection): - """Test $avg with NaN and Infinity returns NaN.""" - result = _group_avg_values(collection, [float("nan"), FLOAT_INFINITY]) - assertSuccessNaN( - result, - [{"_id": None, "avg": float("nan")}], + ), + AccumulatorTestCase( + "nan_dominates_infinity", + docs=[{"_id": 0, "v": float("nan")}, {"_id": 1, "v": FLOAT_INFINITY}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": pytest.approx(math.nan, nan_ok=True)}], msg="NaN should dominate Infinity in group", - ) - - -def test_avg_group_cross_type_nan_decimal(collection): - """Test $avg with double NaN and Decimal128 value returns Decimal128 NaN.""" - result = _group_avg_values(collection, [float("nan"), Decimal128("5")]) - assertSuccessNaN( - result, - [{"_id": None, "avg": DECIMAL128_NAN}], + ), + AccumulatorTestCase( + "cross_type_nan", + docs=[{"_id": 0, "v": float("nan")}, {"_id": 1, "v": Decimal128("5")}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": DECIMAL128_NAN}], msg="double NaN + Decimal128 should return Decimal128 NaN", - ) - - -# --- Infinity in $group --- - - -def test_avg_group_infinity(collection): - """Test $avg where documents include Infinity returns Infinity.""" - result = _group_avg_values(collection, [FLOAT_INFINITY, 10]) - assertSuccess( - result, - [{"_id": None, "avg": FLOAT_INFINITY}], + ), +] + +# Property [Infinity]: Infinity dominates finite values, and +# Infinity + -Infinity cancels to NaN. +AVG_INFINITY_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "infinity", + docs=[{"_id": 0, "v": FLOAT_INFINITY}, {"_id": 1, "v": 10}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": FLOAT_INFINITY}], msg="Infinity in group should propagate", - ) - - -def test_avg_group_negative_infinity(collection): - """Test $avg where documents include -Infinity returns -Infinity.""" - result = _group_avg_values(collection, [FLOAT_NEGATIVE_INFINITY, 10]) - assertSuccess( - result, - [{"_id": None, "avg": FLOAT_NEGATIVE_INFINITY}], + ), + AccumulatorTestCase( + "negative_infinity", + docs=[{"_id": 0, "v": FLOAT_NEGATIVE_INFINITY}, {"_id": 1, "v": 10}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": FLOAT_NEGATIVE_INFINITY}], msg="-Infinity in group should propagate", - ) - - -def test_avg_group_inf_neg_inf_cancel(collection): - """Test $avg with Infinity and -Infinity documents returns NaN.""" - result = _group_avg_values(collection, [FLOAT_INFINITY, FLOAT_NEGATIVE_INFINITY]) - assertSuccessNaN( - result, - [{"_id": None, "avg": float("nan")}], + ), + AccumulatorTestCase( + "inf_neg_inf_cancel", + docs=[{"_id": 0, "v": FLOAT_INFINITY}, {"_id": 1, "v": FLOAT_NEGATIVE_INFINITY}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": pytest.approx(math.nan, nan_ok=True)}], msg="Infinity + -Infinity in group should return NaN", - ) - - -def test_avg_group_decimal128_infinity(collection): - """Test $avg with Decimal128 Infinity documents.""" - result = _group_avg_values(collection, [DECIMAL128_INFINITY, Decimal128("10")]) - assertSuccess( - result, - [{"_id": None, "avg": DECIMAL128_INFINITY}], + ), + AccumulatorTestCase( + "decimal128_infinity", + docs=[{"_id": 0, "v": DECIMAL128_INFINITY}, {"_id": 1, "v": Decimal128("10")}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": DECIMAL128_INFINITY}], msg="Decimal128 Infinity in group should propagate", - ) - - -def test_avg_group_decimal128_inf_neg_inf_cancel(collection): - """Test $avg with Decimal128 Infinity and -Infinity returns Decimal128 NaN.""" - result = _group_avg_values(collection, [DECIMAL128_INFINITY, DECIMAL128_NEGATIVE_INFINITY]) - assertSuccessNaN( - result, - [{"_id": None, "avg": DECIMAL128_NAN}], + ), + AccumulatorTestCase( + "decimal128_inf_neg_inf_cancel", + docs=[ + {"_id": 0, "v": DECIMAL128_INFINITY}, + {"_id": 1, "v": DECIMAL128_NEGATIVE_INFINITY}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": DECIMAL128_NAN}], msg="Decimal128 Inf + -Inf in group should return Decimal128 NaN", - ) - - -# --- Null / Missing in $group --- - - -def test_avg_group_all_null(collection): - """Test $avg where all documents have null returns null.""" - result = _group_avg_values(collection, [None, None, None]) - assertSuccess(result, [{"_id": None, "avg": None}], msg="All null in group should return null") - - -def test_avg_group_some_null(collection): - """Test $avg ignores null documents and averages the rest.""" - result = _group_avg_values(collection, [10, None, 30]) - assertSuccess( - result, - [{"_id": None, "avg": 20.0}], + ), +] + +# Property [Null and Missing]: null values and missing fields are excluded +# from both the sum and count, producing null when no numeric values remain. +AVG_NULL_MISSING_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "all_null", + docs=[{"_id": 0, "v": None}, {"_id": 1, "v": None}, {"_id": 2, "v": None}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": None}], + msg="All null in group should return null", + ), + AccumulatorTestCase( + "some_null", + docs=[{"_id": 0, "v": 10}, {"_id": 1, "v": None}, {"_id": 2, "v": 30}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 20.0}], msg="Null docs should be ignored, avg of 10 and 30 is 20", - ) - - -def test_avg_group_all_missing(collection): - """Test $avg where all documents are missing the field returns null.""" - docs = [{"_id": i, "other": i} for i in range(3)] - collection.insert_many(docs) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], - "cursor": {}, - }, - ) - assertSuccess(result, [{"_id": None, "avg": None}], msg="All missing fields should return null") - - -def test_avg_group_some_missing(collection): - """Test $avg ignores documents with missing field.""" - collection.insert_many( - [ - {"_id": 0, "v": 10}, - {"_id": 1}, - {"_id": 2, "v": 30}, - ] - ) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], - "cursor": {}, - }, - ) - assertSuccess( - result, - [{"_id": None, "avg": 20.0}], + ), + AccumulatorTestCase( + "all_missing", + docs=[{"_id": 0, "other": 0}, {"_id": 1, "other": 1}, {"_id": 2, "other": 2}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": None}], + msg="All missing fields should return null", + ), + AccumulatorTestCase( + "some_missing", + docs=[{"_id": 0, "v": 10}, {"_id": 1}, {"_id": 2, "v": 30}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 20.0}], msg="Missing field docs should be ignored", - ) - - -def test_avg_group_mix_null_missing_numeric(collection): - """Test $avg with mix of null, missing, and numeric values.""" - collection.insert_many( - [ + ), + AccumulatorTestCase( + "mix_null_missing_numeric", + docs=[ {"_id": 0, "v": 10}, {"_id": 1, "v": None}, {"_id": 2}, {"_id": 3, "v": 30}, - ] - ) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], - "cursor": {}, - }, - ) - assertSuccess( - result, - [{"_id": None, "avg": 20.0}], + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 20.0}], msg="Only numeric values should contribute to average", - ) - - -# --- Non-numeric types ignored in $group --- - - -def test_avg_group_ignores_strings(collection): - """Test $avg ignores string values in group.""" - result = _group_avg_values(collection, [10, "hello", 30]) - assertSuccess( - result, - [{"_id": None, "avg": 20.0}], + ), +] + +# Property [Non-Numeric Types Ignored]: non-numeric BSON types are silently +# ignored and excluded from both sum and count. +AVG_NON_NUMERIC_IGNORED_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "ignores_strings", + docs=[{"_id": 0, "v": 10}, {"_id": 1, "v": "hello"}, {"_id": 2, "v": 30}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 20.0}], msg="String values should be ignored in group avg", - ) - - -def test_avg_group_ignores_booleans(collection): - """Test $avg ignores boolean values in group.""" - result = _group_avg_values(collection, [10, True, False, 30]) - assertSuccess( - result, - [{"_id": None, "avg": 20.0}], + ), + AccumulatorTestCase( + "ignores_booleans", + docs=[ + {"_id": 0, "v": 10}, + {"_id": 1, "v": True}, + {"_id": 2, "v": False}, + {"_id": 3, "v": 30}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 20.0}], msg="Boolean values should be ignored in group avg", - ) - - -def test_avg_group_ignores_arrays(collection): - """Test $avg ignores array values in group.""" - result = _group_avg_values(collection, [10, [1, 2, 3], 30]) - assertSuccess( - result, - [{"_id": None, "avg": 20.0}], + ), + AccumulatorTestCase( + "ignores_arrays", + docs=[{"_id": 0, "v": 10}, {"_id": 1, "v": [1, 2, 3]}, {"_id": 2, "v": 30}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 20.0}], msg="Array values should be ignored in group avg", - ) - - -def test_avg_group_ignores_objects(collection): - """Test $avg ignores embedded document values in group.""" - result = _group_avg_values(collection, [10, {"nested": 99}, 30]) - assertSuccess( - result, - [{"_id": None, "avg": 20.0}], + ), + AccumulatorTestCase( + "ignores_objects", + docs=[{"_id": 0, "v": 10}, {"_id": 1, "v": {"nested": 99}}, {"_id": 2, "v": 30}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 20.0}], msg="Object values should be ignored in group avg", - ) - - -def test_avg_group_all_non_numeric(collection): - """Test $avg returns null when all values are non-numeric.""" - result = _group_avg_values(collection, ["a", True, [1], {"x": 1}]) - assertSuccess( - result, - [{"_id": None, "avg": None}], + ), + AccumulatorTestCase( + "all_non_numeric", + docs=[ + {"_id": 0, "v": "a"}, + {"_id": 1, "v": True}, + {"_id": 2, "v": [1]}, + {"_id": 3, "v": {"x": 1}}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": None}], msg="All non-numeric values should return null", - ) - - -def test_avg_group_boolean_not_numeric(collection): - """Test $avg treats boolean as non-numeric (false != 0, true != 1).""" - result = _group_avg_values(collection, [False, True]) - assertSuccess( - result, - [{"_id": None, "avg": None}], + ), + AccumulatorTestCase( + "boolean_not_numeric", + docs=[{"_id": 0, "v": False}, {"_id": 1, "v": True}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": None}], msg="Booleans should not be treated as 0/1 in avg", - ) - - -# --- Negative Zero in $group --- - - -def test_avg_group_negative_zero_double(collection): - """Test $avg normalizes double negative zero to positive zero.""" - result = _group_avg_values(collection, [DOUBLE_NEGATIVE_ZERO, DOUBLE_NEGATIVE_ZERO]) - assertSuccess( - result, - [{"_id": None, "avg": DOUBLE_ZERO}], + ), +] + +# Property [Negative Zero]: $avg normalizes negative zero to positive zero +# for both double and Decimal128. +AVG_NEGATIVE_ZERO_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "negative_zero_double", + docs=[ + {"_id": 0, "v": DOUBLE_NEGATIVE_ZERO}, + {"_id": 1, "v": DOUBLE_NEGATIVE_ZERO}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": DOUBLE_ZERO}], msg="Double -0.0 avg should normalize to 0.0", - ) + ), + AccumulatorTestCase( + "negative_zero_decimal128", + docs=[ + {"_id": 0, "v": DECIMAL128_NEGATIVE_ZERO}, + {"_id": 1, "v": DECIMAL128_NEGATIVE_ZERO}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": DECIMAL128_ZERO}], + msg="Decimal128 -0 avg should normalize to 0", + ), +] + +AVG_GROUP_TYPE_TESTS: list[AccumulatorTestCase] = ( + AVG_TYPE_PROMOTION_TESTS + + AVG_NAN_PROPAGATION_TESTS + + AVG_INFINITY_TESTS + + AVG_NULL_MISSING_TESTS + + AVG_NON_NUMERIC_IGNORED_TESTS + + AVG_NEGATIVE_ZERO_TESTS +) -def test_avg_group_negative_zero_decimal128(collection): - """Test $avg normalizes Decimal128 negative zero to positive zero.""" - result = _group_avg_values(collection, [DECIMAL128_NEGATIVE_ZERO, DECIMAL128_NEGATIVE_ZERO]) - assertSuccess( - result, - [{"_id": None, "avg": DECIMAL128_ZERO}], - msg="Decimal128 -0 avg should normalize to 0", +@pytest.mark.parametrize("test_case", pytest_params(AVG_GROUP_TYPE_TESTS)) +def test_avg_group_types(collection, test_case: AccumulatorTestCase): + """Test $avg data type handling in $group context.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, ) + assertResult(result, expected=test_case.expected, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_pipeline_contexts.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_pipeline_contexts.py index c5601f48..3aa3025b 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_pipeline_contexts.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_pipeline_contexts.py @@ -7,508 +7,312 @@ from __future__ import annotations -from documentdb_tests.framework.assertions import assertSuccess -from documentdb_tests.framework.executor import execute_command - -# --- 14. Pipeline Contexts --- +import pytest -# -- $group with computed _id -- +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult, assertSuccess +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +# --- $group with computed _id --- -def test_avg_group_computed_id(collection): - """Test $avg with computed _id expression in $group.""" - collection.insert_many( - [ +# Property [Group Computed ID]: $avg with computed _id expression in $group. +AVG_GROUP_COMPUTED_ID_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "group_computed_id", + docs=[ {"_id": 1, "value": 10, "score": 80}, {"_id": 2, "value": 20, "score": 90}, {"_id": 3, "value": 30, "score": 85}, {"_id": 4, "value": 40, "score": 95}, - ] - ) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - { - "$group": { - "_id": {"$gt": ["$score", 85]}, - "avg": {"$avg": "$value"}, - } - }, - {"$sort": {"_id": 1}}, - ], - "cursor": {}, - }, - ) - # score <= 85: docs 1,3 → avg(10,30) = 20 - # score > 85: docs 2,4 → avg(20,40) = 30 - assertSuccess( - result, - [ + ], + pipeline=[ + { + "$group": { + "_id": {"$gt": ["$score", 85]}, + "avg": {"$avg": "$value"}, + } + }, + {"$sort": {"_id": 1}}, + ], + # score <= 85: docs 1,3 -> avg(10,30) = 20 + # score > 85: docs 2,4 -> avg(20,40) = 30 + expected=[ {"_id": False, "avg": 20.0}, {"_id": True, "avg": 30.0}, ], msg="$avg with computed _id should group and average correctly", - ) - + ), +] -# -- $bucket -- +# --- $bucket / $bucketAuto --- - -def test_avg_bucket(collection): - """Test $avg in $bucket output specification.""" - collection.insert_many( - [ +# Property [Bucket]: $avg in $bucket and $bucketAuto output specifications. +AVG_BUCKET_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "bucket", + docs=[ {"_id": 1, "score": 15, "value": 10}, {"_id": 2, "score": 25, "value": 20}, {"_id": 3, "score": 35, "value": 30}, {"_id": 4, "score": 45, "value": 40}, - ] - ) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - { - "$bucket": { - "groupBy": "$score", - "boundaries": [0, 20, 40, 60], - "output": {"avg_value": {"$avg": "$value"}}, - } - }, - ], - "cursor": {}, - }, - ) - assertSuccess( - result, - [ + ], + pipeline=[ + { + "$bucket": { + "groupBy": "$score", + "boundaries": [0, 20, 40, 60], + "output": {"avg_value": {"$avg": "$value"}}, + } + }, + ], + expected=[ {"_id": 0, "avg_value": 10.0}, {"_id": 20, "avg_value": 25.0}, {"_id": 40, "avg_value": 40.0}, ], msg="$avg in $bucket should compute average per bucket", - ) - - -# -- $setWindowFields -- + ), + AccumulatorTestCase( + "bucketauto", + docs=[ + {"_id": 1, "score": 10, "value": 100}, + {"_id": 2, "score": 20, "value": 200}, + {"_id": 3, "score": 30, "value": 300}, + {"_id": 4, "score": 40, "value": 400}, + ], + pipeline=[ + { + "$bucketAuto": { + "groupBy": "$score", + "buckets": 2, + "output": {"avg_value": {"$avg": "$value"}}, + } + }, + ], + expected=[ + {"_id": {"min": 10, "max": 30}, "avg_value": 150.0}, + {"_id": {"min": 30, "max": 40}, "avg_value": 350.0}, + ], + msg="$avg in $bucketAuto should compute average per auto-bucket", + ), +] +# --- $setWindowFields --- -def test_avg_window_unbounded(collection): - """Test $avg with unbounded window returns partition average.""" - collection.insert_many( - [ +# Property [Window]: $avg in $setWindowFields with various window types. +AVG_WINDOW_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "window_unbounded", + docs=[ {"_id": 1, "value": 10}, {"_id": 2, "value": 20}, {"_id": 3, "value": 30}, - ] - ) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - {"$sort": {"_id": 1}}, - { - "$setWindowFields": { - "sortBy": {"_id": 1}, - "output": { - "avg": { - "$avg": "$value", - "window": {"documents": ["unbounded", "unbounded"]}, - } - }, - } - }, - {"$project": {"_id": 1, "value": 1, "avg": 1}}, - ], - "cursor": {}, - }, - ) - assertSuccess( - result, - [ + ], + pipeline=[ + {"$sort": {"_id": 1}}, + { + "$setWindowFields": { + "sortBy": {"_id": 1}, + "output": { + "avg": { + "$avg": "$value", + "window": {"documents": ["unbounded", "unbounded"]}, + } + }, + } + }, + {"$project": {"_id": 1, "value": 1, "avg": 1}}, + ], + expected=[ {"_id": 1, "value": 10, "avg": 20.0}, {"_id": 2, "value": 20, "avg": 20.0}, {"_id": 3, "value": 30, "avg": 20.0}, ], msg="$avg with unbounded window should return full partition average", - ) - - -def test_avg_window_cumulative(collection): - """Test $avg with cumulative window [unbounded, current].""" - collection.insert_many( - [ + ), + AccumulatorTestCase( + "window_cumulative", + docs=[ {"_id": 1, "value": 10}, {"_id": 2, "value": 20}, {"_id": 3, "value": 30}, - ] - ) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - {"$sort": {"_id": 1}}, - { - "$setWindowFields": { - "sortBy": {"_id": 1}, - "output": { - "avg": { - "$avg": "$value", - "window": {"documents": ["unbounded", "current"]}, - } - }, - } - }, - {"$project": {"_id": 1, "value": 1, "avg": 1}}, - ], - "cursor": {}, - }, - ) - assertSuccess( - result, - [ + ], + pipeline=[ + {"$sort": {"_id": 1}}, + { + "$setWindowFields": { + "sortBy": {"_id": 1}, + "output": { + "avg": { + "$avg": "$value", + "window": {"documents": ["unbounded", "current"]}, + } + }, + } + }, + {"$project": {"_id": 1, "value": 1, "avg": 1}}, + ], + expected=[ {"_id": 1, "value": 10, "avg": 10.0}, {"_id": 2, "value": 20, "avg": 15.0}, {"_id": 3, "value": 30, "avg": 20.0}, ], msg="$avg with cumulative window should compute running average", - ) - - -def test_avg_window_sliding(collection): - """Test $avg with sliding window [-1, 1].""" - collection.insert_many( - [ + ), + AccumulatorTestCase( + "window_sliding", + docs=[ {"_id": 1, "value": 10}, {"_id": 2, "value": 20}, {"_id": 3, "value": 30}, {"_id": 4, "value": 40}, - ] - ) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - {"$sort": {"_id": 1}}, - { - "$setWindowFields": { - "sortBy": {"_id": 1}, - "output": { - "avg": { - "$avg": "$value", - "window": {"documents": [-1, 1]}, - } - }, - } - }, - {"$project": {"_id": 1, "value": 1, "avg": 1}}, - ], - "cursor": {}, - }, - ) - assertSuccess( - result, - [ - {"_id": 1, "value": 10, "avg": 15.0}, # avg(10,20) - {"_id": 2, "value": 20, "avg": 20.0}, # avg(10,20,30) - {"_id": 3, "value": 30, "avg": 30.0}, # avg(20,30,40) - {"_id": 4, "value": 40, "avg": 35.0}, # avg(30,40) + ], + pipeline=[ + {"$sort": {"_id": 1}}, + { + "$setWindowFields": { + "sortBy": {"_id": 1}, + "output": { + "avg": { + "$avg": "$value", + "window": {"documents": [-1, 1]}, + } + }, + } + }, + {"$project": {"_id": 1, "value": 1, "avg": 1}}, + ], + # avg(10,20), avg(10,20,30), avg(20,30,40), avg(30,40) + expected=[ + {"_id": 1, "value": 10, "avg": 15.0}, + {"_id": 2, "value": 20, "avg": 20.0}, + {"_id": 3, "value": 30, "avg": 30.0}, + {"_id": 4, "value": 40, "avg": 35.0}, ], msg="$avg with sliding window should compute local average", - ) - - -def test_avg_window_current_only(collection): - """Test $avg with window [0, 0] returns current document value.""" - collection.insert_many( - [ + ), + AccumulatorTestCase( + "window_current_only", + docs=[ {"_id": 1, "value": 10}, {"_id": 2, "value": 20}, {"_id": 3, "value": 30}, - ] - ) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - {"$sort": {"_id": 1}}, - { - "$setWindowFields": { - "sortBy": {"_id": 1}, - "output": { - "avg": { - "$avg": "$value", - "window": {"documents": [0, 0]}, - } - }, - } - }, - {"$project": {"_id": 1, "value": 1, "avg": 1}}, - ], - "cursor": {}, - }, - ) - assertSuccess( - result, - [ + ], + pipeline=[ + {"$sort": {"_id": 1}}, + { + "$setWindowFields": { + "sortBy": {"_id": 1}, + "output": { + "avg": { + "$avg": "$value", + "window": {"documents": [0, 0]}, + } + }, + } + }, + {"$project": {"_id": 1, "value": 1, "avg": 1}}, + ], + expected=[ {"_id": 1, "value": 10, "avg": 10.0}, {"_id": 2, "value": 20, "avg": 20.0}, {"_id": 3, "value": 30, "avg": 30.0}, ], msg="$avg with [0,0] window should return current document value", - ) - - -def test_avg_window_with_nulls(collection): - """Test $avg in $setWindowFields ignores null values in window.""" - collection.insert_many( - [ + ), + AccumulatorTestCase( + "window_with_nulls", + docs=[ {"_id": 1, "value": 10}, {"_id": 2, "value": None}, {"_id": 3, "value": 30}, - ] - ) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - {"$sort": {"_id": 1}}, - { - "$setWindowFields": { - "sortBy": {"_id": 1}, - "output": { - "avg": { - "$avg": "$value", - "window": {"documents": ["unbounded", "unbounded"]}, - } - }, - } - }, - {"$project": {"_id": 1, "value": 1, "avg": 1}}, - ], - "cursor": {}, - }, - ) - assertSuccess( - result, - [ + ], + pipeline=[ + {"$sort": {"_id": 1}}, + { + "$setWindowFields": { + "sortBy": {"_id": 1}, + "output": { + "avg": { + "$avg": "$value", + "window": {"documents": ["unbounded", "unbounded"]}, + } + }, + } + }, + {"$project": {"_id": 1, "value": 1, "avg": 1}}, + ], + expected=[ {"_id": 1, "value": 10, "avg": 20.0}, {"_id": 2, "value": None, "avg": 20.0}, {"_id": 3, "value": 30, "avg": 20.0}, ], msg="$avg in window should ignore null values", - ) - - -# -- $project / $addFields context -- - - -def test_avg_in_project_array_literal(collection): - """Test $avg in $project with array of literal values.""" - result = execute_command( - collection, - { - "aggregate": 1, - "pipeline": [ - {"$documents": [{}]}, - {"$project": {"_id": 0, "avg": {"$avg": [10, 20, 30]}}}, - ], - "cursor": {}, - }, - ) - assertSuccess( - result, - [{"avg": 20.0}], - msg="$avg in $project with literal array should average values", - ) - - -def test_avg_in_addfields(collection): - """Test $avg in $addFields context.""" - collection.insert_many( - [ - {"_id": 1, "scores": [80, 90, 100]}, - ] - ) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - {"$addFields": {"avg_score": {"$avg": "$scores"}}}, - {"$project": {"_id": 0, "avg_score": 1}}, - ], - "cursor": {}, - }, - ) - assertSuccess( - result, - [{"avg_score": 90.0}], - msg="$avg in $addFields should traverse array field and average", - ) - - -def test_avg_in_match_expr(collection): - """Test $avg used inside $expr in $match stage.""" - collection.insert_many( - [ - {"_id": 1, "scores": [80, 90, 100]}, - {"_id": 2, "scores": [40, 50, 60]}, - {"_id": 3, "scores": [70, 80, 90]}, - ] - ) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - {"$match": {"$expr": {"$gt": [{"$avg": "$scores"}, 75]}}}, - {"$project": {"_id": 1}}, - {"$sort": {"_id": 1}}, - ], - "cursor": {}, - }, - ) - # avg([80,90,100])=90 > 75 ✓, avg([40,50,60])=50 < 75 ✗, avg([70,80,90])=80 > 75 ✓ - assertSuccess( - result, - [{"_id": 1}, {"_id": 3}], - msg="$avg in $match $expr should filter based on computed average", - ) - - -# --- 19. Pipeline Interaction --- - - -def test_avg_bucketauto(collection): - """Test $avg in $bucketAuto output specification.""" - collection.insert_many( - [ - {"_id": 1, "score": 10, "value": 100}, - {"_id": 2, "score": 20, "value": 200}, - {"_id": 3, "score": 30, "value": 300}, - {"_id": 4, "score": 40, "value": 400}, - ] - ) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - { - "$bucketAuto": { - "groupBy": "$score", - "buckets": 2, - "output": {"avg_value": {"$avg": "$value"}}, - } - }, - ], - "cursor": {}, - }, - ) - assertSuccess( - result, - [ - {"_id": {"min": 10, "max": 30}, "avg_value": 150.0}, - {"_id": {"min": 30, "max": 40}, "avg_value": 350.0}, - ], - msg="$avg in $bucketAuto should compute average per auto-bucket", - ) - - -def test_avg_window_range_based(collection): - """Test $avg with range-based window on numeric sort key.""" - collection.insert_many( - [ + ), + AccumulatorTestCase( + "window_range_based", + docs=[ {"_id": 1, "pos": 0, "value": 10}, {"_id": 2, "pos": 5, "value": 20}, {"_id": 3, "pos": 10, "value": 30}, {"_id": 4, "pos": 15, "value": 40}, - ] - ) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - {"$sort": {"pos": 1}}, - { - "$setWindowFields": { - "sortBy": {"pos": 1}, - "output": { - "avg": { - "$avg": "$value", - "window": {"range": [-5, 5]}, - } - }, - } - }, - {"$project": {"_id": 1, "pos": 1, "value": 1, "avg": 1}}, - ], - "cursor": {}, - }, - ) - # pos=0: range [-5,5] includes pos 0,5 → avg(10,20)=15 - # pos=5: range [0,10] includes pos 0,5,10 → avg(10,20,30)=20 - # pos=10: range [5,15] includes pos 5,10,15 → avg(20,30,40)=30 - # pos=15: range [10,20] includes pos 10,15 → avg(30,40)=35 - assertSuccess( - result, - [ + ], + pipeline=[ + {"$sort": {"pos": 1}}, + { + "$setWindowFields": { + "sortBy": {"pos": 1}, + "output": { + "avg": { + "$avg": "$value", + "window": {"range": [-5, 5]}, + } + }, + } + }, + {"$project": {"_id": 1, "pos": 1, "value": 1, "avg": 1}}, + ], + # pos=0: range [-5,5] includes pos 0,5 -> avg(10,20)=15 + # pos=5: range [0,10] includes pos 0,5,10 -> avg(10,20,30)=20 + # pos=10: range [5,15] includes pos 5,10,15 -> avg(20,30,40)=30 + # pos=15: range [10,20] includes pos 10,15 -> avg(30,40)=35 + expected=[ {"_id": 1, "pos": 0, "value": 10, "avg": 15.0}, {"_id": 2, "pos": 5, "value": 20, "avg": 20.0}, {"_id": 3, "pos": 10, "value": 30, "avg": 30.0}, {"_id": 4, "pos": 15, "value": 40, "avg": 35.0}, ], msg="$avg with range-based window should compute average within range", - ) - - -def test_avg_window_multiple_partitions(collection): - """Test $avg in $setWindowFields with multiple partitions of different sizes.""" - collection.insert_many( - [ + ), + AccumulatorTestCase( + "window_multiple_partitions", + docs=[ {"_id": 1, "group": "A", "value": 10}, {"_id": 2, "group": "A", "value": 20}, {"_id": 3, "group": "A", "value": 30}, {"_id": 4, "group": "B", "value": 100}, {"_id": 5, "group": "B", "value": 200}, - ] - ) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - {"$sort": {"_id": 1}}, - { - "$setWindowFields": { - "partitionBy": "$group", - "sortBy": {"_id": 1}, - "output": { - "avg": { - "$avg": "$value", - "window": {"documents": ["unbounded", "unbounded"]}, - } - }, - } - }, - {"$project": {"_id": 1, "group": 1, "avg": 1}}, - ], - "cursor": {}, - }, - ) - assertSuccess( - result, - [ + ], + pipeline=[ + {"$sort": {"_id": 1}}, + { + "$setWindowFields": { + "partitionBy": "$group", + "sortBy": {"_id": 1}, + "output": { + "avg": { + "$avg": "$value", + "window": {"documents": ["unbounded", "unbounded"]}, + } + }, + } + }, + {"$project": {"_id": 1, "group": 1, "avg": 1}}, + ], + expected=[ {"_id": 1, "group": "A", "avg": 20.0}, {"_id": 2, "group": "A", "avg": 20.0}, {"_id": 3, "group": "A", "avg": 20.0}, @@ -516,128 +320,167 @@ def test_avg_window_multiple_partitions(collection): {"_id": 5, "group": "B", "avg": 150.0}, ], msg="$avg should compute independent averages per partition", - ) + ), +] +# --- Expression contexts ($project, $addFields, $match+$expr) --- -def test_avg_group_after_unwind(collection): - """Test $avg in $group after $unwind averages unwound values.""" - collection.insert_many( - [ +# Property [Expression Context]: $avg used in expression contexts. +AVG_EXPRESSION_CONTEXT_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "in_addfields", + docs=[ + {"_id": 1, "scores": [80, 90, 100]}, + ], + pipeline=[ + {"$addFields": {"avg_score": {"$avg": "$scores"}}}, + {"$project": {"_id": 0, "avg_score": 1}}, + ], + expected=[{"avg_score": 90.0}], + msg="$avg in $addFields should traverse array field and average", + ), + AccumulatorTestCase( + "in_match_expr", + docs=[ + {"_id": 1, "scores": [80, 90, 100]}, + {"_id": 2, "scores": [40, 50, 60]}, + {"_id": 3, "scores": [70, 80, 90]}, + ], + pipeline=[ + {"$match": {"$expr": {"$gt": [{"$avg": "$scores"}, 75]}}}, + {"$project": {"_id": 1}}, + {"$sort": {"_id": 1}}, + ], + # avg([80,90,100])=90 > 75, avg([40,50,60])=50 < 75, avg([70,80,90])=80 > 75 + expected=[{"_id": 1}, {"_id": 3}], + msg="$avg in $match $expr should filter based on computed average", + ), +] + +# --- Pipeline interaction patterns --- + +# Property [Pipeline Interaction]: $avg combined with other pipeline stages. +AVG_PIPELINE_INTERACTION_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "group_after_unwind", + docs=[ {"_id": 1, "category": "A", "values": [10, 20]}, {"_id": 2, "category": "A", "values": [30]}, - ] - ) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - {"$unwind": "$values"}, - {"$group": {"_id": "$category", "avg": {"$avg": "$values"}}}, - ], - "cursor": {}, - }, - ) - # Unwound: 10, 20, 30 → avg = 20 - assertSuccess( - result, - [{"_id": "A", "avg": 20.0}], + ], + pipeline=[ + {"$unwind": "$values"}, + {"$group": {"_id": "$category", "avg": {"$avg": "$values"}}}, + ], + # Unwound: 10, 20, 30 -> avg = 20 + expected=[{"_id": "A", "avg": 20.0}], msg="$avg after $unwind should average all unwound values", - ) - - -def test_avg_group_after_match(collection): - """Test $avg in $group after $match filters documents.""" - collection.insert_many( - [ + ), + AccumulatorTestCase( + "group_after_match", + docs=[ {"_id": 1, "category": "A", "value": 10, "active": True}, {"_id": 2, "category": "A", "value": 20, "active": False}, {"_id": 3, "category": "A", "value": 30, "active": True}, - ] - ) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - {"$match": {"active": True}}, - {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, - ], - "cursor": {}, - }, - ) - # Only active docs: avg(10, 30) = 20 - assertSuccess( - result, - [{"_id": "A", "avg": 20.0}], + ], + pipeline=[ + {"$match": {"active": True}}, + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + ], + # Only active docs: avg(10, 30) = 20 + expected=[{"_id": "A", "avg": 20.0}], msg="$avg after $match should only average filtered documents", - ) - - -def test_avg_in_project_after_group(collection): - """Test $avg in $project after $group uses grouped results.""" - collection.insert_many( - [ + ), + AccumulatorTestCase( + "project_after_group", + docs=[ {"_id": 1, "category": "A", "value": 10}, {"_id": 2, "category": "A", "value": 20}, {"_id": 3, "category": "B", "value": 30}, {"_id": 4, "category": "B", "value": 40}, - ] - ) + ], + pipeline=[ + { + "$group": { + "_id": "$category", + "sum": {"$sum": "$value"}, + "count": {"$sum": 1}, + } + }, + {"$sort": {"_id": 1}}, + { + "$project": { + "_id": 1, + "manual_avg": {"$divide": ["$sum", "$count"]}, + } + }, + ], + expected=[ + {"_id": "A", "manual_avg": 15.0}, + {"_id": "B", "manual_avg": 35.0}, + ], + msg="Manual average via $divide after $group should work", + ), + AccumulatorTestCase( + "group_after_project_rename", + docs=[ + {"_id": 1, "cat": "A", "val": 10}, + {"_id": 2, "cat": "A", "val": 20}, + ], + pipeline=[ + {"$project": {"category": "$cat", "value": "$val"}}, + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + ], + expected=[{"_id": "A", "avg": 15.0}], + msg="$avg should work on renamed fields from $project", + ), +] + +# --- Combined list --- + +AVG_PIPELINE_CONTEXT_TESTS: list[AccumulatorTestCase] = ( + AVG_GROUP_COMPUTED_ID_TESTS + + AVG_BUCKET_TESTS + + AVG_WINDOW_TESTS + + AVG_EXPRESSION_CONTEXT_TESTS + + AVG_PIPELINE_INTERACTION_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(AVG_PIPELINE_CONTEXT_TESTS)) +def test_avg_pipeline_contexts(collection, test_case: AccumulatorTestCase): + """Test $avg in various pipeline contexts.""" + if test_case.docs: + collection.insert_many(test_case.docs) result = execute_command( collection, { "aggregate": collection.name, - "pipeline": [ - { - "$group": { - "_id": "$category", - "sum": {"$sum": "$value"}, - "count": {"$sum": 1}, - } - }, - {"$sort": {"_id": 1}}, - { - "$project": { - "_id": 1, - "manual_avg": {"$divide": ["$sum", "$count"]}, - } - }, - ], + "pipeline": test_case.pipeline, "cursor": {}, }, ) - assertSuccess( - result, - [ - {"_id": "A", "manual_avg": 15.0}, - {"_id": "B", "manual_avg": 35.0}, - ], - msg="Manual average via $divide after $group should work", - ) + assertResult(result, expected=test_case.expected, msg=test_case.msg) -def test_avg_group_after_project_rename(collection): - """Test $avg in $group after $project that renames fields.""" - collection.insert_many( - [ - {"_id": 1, "cat": "A", "val": 10}, - {"_id": 2, "cat": "A", "val": 20}, - ] - ) +def test_avg_in_project_array_literal(collection): + """Test $avg in $project with array of literal values. + + This test uses ``aggregate: 1`` with ``$documents`` instead of a + collection, so it is kept as a standalone test. + """ result = execute_command( collection, { - "aggregate": collection.name, + "aggregate": 1, "pipeline": [ - {"$project": {"category": "$cat", "value": "$val"}}, - {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$documents": [{}]}, + {"$project": {"_id": 0, "avg": {"$avg": [10, 20, 30]}}}, ], "cursor": {}, }, ) assertSuccess( result, - [{"_id": "A", "avg": 15.0}], - msg="$avg should work on renamed fields from $project", + [{"avg": 20.0}], + msg="$avg in $project with literal array should average values", ) From de7b0c058ffd68742f12d2b860ba2f9404d2ab8b Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Fri, 15 May 2026 12:08:43 -0700 Subject: [PATCH 4/9] Add init.py Signed-off-by: Alina (Xi) Li --- .../tests/core/operator/accumulators/avg/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/avg/__init__.py diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/__init__.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/__init__.py new file mode 100644 index 00000000..e69de29b From b993dd52ee0d850564f28c7ff294516992f7dca7 Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Fri, 15 May 2026 12:36:22 -0700 Subject: [PATCH 5/9] split into smaller test files Signed-off-by: Alina (Xi) Li --- .../accumulators/avg/test_accumulator_avg.py | 1248 ----------------- .../accumulators/avg/test_avg_errors.py | 189 +++ .../avg/test_avg_group_boundaries.py | 383 ++++- .../avg/test_avg_group_context.py | 39 +- .../accumulators/avg/test_avg_group_types.py | 287 ++-- .../accumulators/avg/test_avg_non_numeric.py | 255 ++++ .../accumulators/avg/test_avg_null_missing.py | 118 ++ .../avg/test_avg_special_numeric.py | 154 ++ 8 files changed, 1202 insertions(+), 1471 deletions(-) delete mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_errors.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_non_numeric.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_null_missing.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_special_numeric.py diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg.py deleted file mode 100644 index 857013c2..00000000 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg.py +++ /dev/null @@ -1,1248 +0,0 @@ -"""Tests for $avg accumulator in $group context.""" - -from __future__ import annotations - -import math -from datetime import datetime, timezone - -import pytest -from bson import Binary, Code, Decimal128, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp - -from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( - AccumulatorTestCase, -) -from documentdb_tests.framework.assertions import assertResult, assertSuccess -from documentdb_tests.framework.error_codes import ( - CONVERSION_FAILURE_ERROR, - DIVIDE_BY_ZERO_V2_ERROR, - GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - MODULO_BY_ZERO_V2_ERROR, -) -from documentdb_tests.framework.executor import execute_command -from documentdb_tests.framework.parametrize import pytest_params -from documentdb_tests.framework.test_constants import ( - DECIMAL128_INFINITY, - DECIMAL128_INT64_OVERFLOW, - DECIMAL128_MAX, - DECIMAL128_MIN_POSITIVE, - DECIMAL128_NAN, - DECIMAL128_NEGATIVE_INFINITY, - DECIMAL128_NEGATIVE_ZERO, - DECIMAL128_TRAILING_ZERO, - DECIMAL128_ZERO, - DOUBLE_FROM_INT64_MAX, - DOUBLE_MAX, - DOUBLE_MAX_SAFE_INTEGER, - DOUBLE_MIN_NEGATIVE_SUBNORMAL, - DOUBLE_MIN_NORMAL, - DOUBLE_MIN_SUBNORMAL, - DOUBLE_NEAR_MAX, - DOUBLE_NEAR_MIN, - DOUBLE_NEGATIVE_ZERO, - DOUBLE_ZERO, - FLOAT_INFINITY, - FLOAT_NAN, - FLOAT_NEGATIVE_INFINITY, - INT32_MAX, - INT32_MAX_MINUS_1, - INT32_MIN, - INT64_MAX, - INT64_MAX_MINUS_1, - INT64_MIN, - INT64_MIN_PLUS_1, - INT64_ZERO, -) - -# Property [Null and Missing Ignored]: null values, missing fields, and -# $$REMOVE are treated as non-numeric and excluded from both the sum and -# count, producing null when no numeric values remain. -AVG_NULL_MISSING_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "null_all_null", - docs=[{"v": None}, {"v": None}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": None}], - msg="$avg should return null when all values in the group are null", - ), - AccumulatorTestCase( - "null_all_missing", - docs=[{"x": 1}, {"x": 2}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": None}], - msg="$avg should return null when all values reference missing fields", - ), - AccumulatorTestCase( - "null_single_null", - docs=[{"v": None}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": None}], - msg="$avg should return null when the only value is null", - ), - AccumulatorTestCase( - "null_single_missing", - docs=[{"x": 1}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": None}], - msg="$avg should return null when the only value is a missing field", - ), - AccumulatorTestCase( - "null_mixed_null_and_missing", - docs=[{"v": None}, {"x": 1}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": None}], - msg="$avg should return null when values are a mix of null and missing", - ), - AccumulatorTestCase( - "null_with_numerics", - docs=[{"v": None}, {"v": 10}, {"v": 20}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": 15.0}], - msg="$avg should exclude null from both sum and count", - ), - AccumulatorTestCase( - "null_missing_with_numerics", - docs=[{"x": 1}, {"v": 10}, {"v": 20}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": 15.0}], - msg="$avg should exclude missing fields from both sum and count", - ), - AccumulatorTestCase( - "null_mixed_null_missing_with_numerics", - docs=[{"v": None}, {"x": 1}, {"v": 30}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": 30.0}], - msg="$avg should exclude both null and missing, averaging only numerics", - ), - AccumulatorTestCase( - "null_remove_only", - docs=[{"v": 5}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": {"$cond": [False, 1, "$$REMOVE"]}}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": None}], - msg="$avg should treat $$REMOVE as missing and return null", - ), -] - -# Property [Non-Numeric Types Ignored]: all non-numeric BSON types are -# silently ignored and excluded from both sum and count, producing null -# when no numeric values remain. -AVG_NON_NUMERIC_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "non_numeric_string", - docs=[{"v": "hello"}, {"v": "world"}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": None}], - msg="$avg should ignore string values and return null", - ), - AccumulatorTestCase( - "non_numeric_boolean_true", - docs=[{"v": True}, {"v": True}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": None}], - msg="$avg should ignore boolean true without coercing to numeric", - ), - AccumulatorTestCase( - "non_numeric_boolean_false", - docs=[{"v": False}, {"v": False}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": None}], - msg="$avg should ignore boolean false without coercing to numeric", - ), - AccumulatorTestCase( - "non_numeric_object", - docs=[{"v": {"x": 1}}, {"v": {"y": 2}}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": None}], - msg="$avg should ignore plain objects", - ), - AccumulatorTestCase( - "non_numeric_empty_object", - docs=[{"v": {}}, {"v": {}}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": None}], - msg="$avg should ignore empty objects", - ), - AccumulatorTestCase( - "non_numeric_objectid", - docs=[{"v": ObjectId()}, {"v": ObjectId()}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": None}], - msg="$avg should ignore ObjectId values", - ), - AccumulatorTestCase( - "non_numeric_datetime", - docs=[ - {"v": datetime(2023, 1, 1, tzinfo=timezone.utc)}, - {"v": datetime(2024, 1, 1, tzinfo=timezone.utc)}, - ], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": None}], - msg="$avg should ignore datetime values", - ), - AccumulatorTestCase( - "non_numeric_timestamp", - docs=[{"v": Timestamp(1, 1)}, {"v": Timestamp(2, 1)}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": None}], - msg="$avg should ignore Timestamp values", - ), - AccumulatorTestCase( - "non_numeric_binary", - docs=[{"v": Binary(b"\x01")}, {"v": Binary(b"\x02")}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": None}], - msg="$avg should ignore Binary values", - ), - AccumulatorTestCase( - "non_numeric_regex", - docs=[{"v": Regex("abc")}, {"v": Regex("def")}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": None}], - msg="$avg should ignore Regex values", - ), - AccumulatorTestCase( - "non_numeric_code", - docs=[{"v": Code("x")}, {"v": Code("y")}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": None}], - msg="$avg should ignore Code values", - ), - AccumulatorTestCase( - "non_numeric_minkey", - docs=[{"v": MinKey()}, {"v": MinKey()}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": None}], - msg="$avg should ignore MinKey values", - ), - AccumulatorTestCase( - "non_numeric_maxkey", - docs=[{"v": MaxKey()}, {"v": MaxKey()}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": None}], - msg="$avg should ignore MaxKey values", - ), - AccumulatorTestCase( - "non_numeric_array", - docs=[{"v": [1, 2, 3]}, {"v": [4, 5]}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": None}], - msg="$avg should ignore arrays without unwrapping", - ), - AccumulatorTestCase( - "non_numeric_single_element_array", - docs=[{"v": [42]}, {"v": [7]}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": None}], - msg="$avg should not unwrap single-element numeric arrays", - ), - AccumulatorTestCase( - "non_numeric_empty_array", - docs=[{"v": []}, {"v": []}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": None}], - msg="$avg should ignore empty arrays", - ), - AccumulatorTestCase( - "non_numeric_nested_array", - docs=[{"v": [[1, 2]]}, {"v": [[3]]}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": None}], - msg="$avg should ignore nested arrays", - ), - AccumulatorTestCase( - "non_numeric_mixed_with_numerics", - docs=[{"v": "hello"}, {"v": 10}, {"v": True}, {"v": 20}, {"v": [5]}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": 15.0}], - msg="$avg should compute average only over numeric values, ignoring non-numerics", - ), - AccumulatorTestCase( - "non_numeric_array_from_expression", - docs=[{"v": 1}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": {"$literal": [1, 2, 3]}}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": None}], - msg="$avg should treat array expressions as non-numeric", - ), -] - -# Property [Special Numeric Values]: NaN is numeric and propagates to the -# result, Infinity dominates finite values, Infinity + -Infinity produces -# NaN, and negative zero is not preserved. -AVG_SPECIAL_NUMERIC_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "special_nan_propagates", - docs=[{"v": FLOAT_NAN}, {"v": 5.0}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": pytest.approx(math.nan, nan_ok=True)}], - msg="$avg should return NaN when any value is NaN", - ), - AccumulatorTestCase( - "special_nan_over_infinity", - docs=[{"v": FLOAT_NAN}, {"v": FLOAT_INFINITY}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": pytest.approx(math.nan, nan_ok=True)}], - msg="$avg should return NaN when group contains both NaN and Infinity", - ), - AccumulatorTestCase( - "special_infinity_dominates", - docs=[{"v": FLOAT_INFINITY}, {"v": 5.0}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": FLOAT_INFINITY}], - msg="$avg should return Infinity when Infinity dominates finite values", - ), - AccumulatorTestCase( - "special_neg_infinity_dominates", - docs=[{"v": FLOAT_NEGATIVE_INFINITY}, {"v": 5.0}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": FLOAT_NEGATIVE_INFINITY}], - msg="$avg should return -Infinity when -Infinity dominates finite values", - ), - AccumulatorTestCase( - "special_inf_plus_neg_inf", - docs=[{"v": FLOAT_INFINITY}, {"v": FLOAT_NEGATIVE_INFINITY}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": pytest.approx(math.nan, nan_ok=True)}], - msg="$avg should return NaN when group contains Infinity and -Infinity", - ), - AccumulatorTestCase( - "special_neg_zero_not_preserved", - docs=[{"v": DOUBLE_NEGATIVE_ZERO}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": DOUBLE_ZERO}], - msg="$avg should not preserve negative zero", - ), - AccumulatorTestCase( - "special_decimal_neg_zero_not_preserved", - docs=[{"v": DECIMAL128_NEGATIVE_ZERO}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": DECIMAL128_ZERO}], - msg="$avg should not preserve Decimal128 negative zero", - ), - AccumulatorTestCase( - "special_decimal_nan_propagates", - docs=[{"v": DECIMAL128_NAN}, {"v": Decimal128("5")}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": Decimal128("NaN")}], - msg="$avg should return Decimal128 NaN when any Decimal128 value is NaN", - ), - AccumulatorTestCase( - "special_decimal_nan_over_infinity", - docs=[{"v": DECIMAL128_NAN}, {"v": DECIMAL128_INFINITY}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": Decimal128("NaN")}], - msg="$avg should return Decimal128 NaN when group contains Decimal128 NaN and Infinity", - ), - AccumulatorTestCase( - "special_decimal_infinity", - docs=[{"v": DECIMAL128_INFINITY}, {"v": Decimal128("5")}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": DECIMAL128_INFINITY}], - msg="$avg should return Decimal128 Infinity when Decimal128 Infinity is present", - ), - AccumulatorTestCase( - "special_decimal_neg_infinity_dominates", - docs=[{"v": DECIMAL128_NEGATIVE_INFINITY}, {"v": Decimal128("5")}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": DECIMAL128_NEGATIVE_INFINITY}], - msg="$avg should return Decimal128 -Infinity when Decimal128 -Infinity dominates", - ), - AccumulatorTestCase( - "special_decimal_inf_plus_neg_inf", - docs=[{"v": DECIMAL128_INFINITY}, {"v": DECIMAL128_NEGATIVE_INFINITY}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": Decimal128("NaN")}], - msg="$avg should return Decimal128 NaN for Decimal128 Infinity + -Infinity", - ), -] - -# Property [Integer Boundaries]: int32 boundary values produce exact double -# results, and int64 boundary values produce double results with potential -# precision loss. -AVG_INTEGER_BOUNDARY_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "int_boundary_int32_zeros", - docs=[{"v": 0}, {"v": 0}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": DOUBLE_ZERO}], - msg="$avg should return 0.0 for two int32 zeros", - ), - AccumulatorTestCase( - "int_boundary_int32_one_neg_one", - docs=[{"v": 1}, {"v": -1}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": DOUBLE_ZERO}], - msg="$avg should return 0.0 for int32 1 and -1", - ), - AccumulatorTestCase( - "int_boundary_int32_max", - docs=[{"v": INT32_MAX}, {"v": 0}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": 1_073_741_823.5}], - msg="$avg should handle int32 MAX correctly", - ), - AccumulatorTestCase( - "int_boundary_int32_min", - docs=[{"v": INT32_MIN}, {"v": INT32_MIN}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": float(INT32_MIN)}], - msg="$avg should handle int32 MIN correctly", - ), - AccumulatorTestCase( - "int_boundary_int32_max_and_min", - docs=[{"v": INT32_MAX}, {"v": INT32_MIN}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": -0.5}], - msg="$avg should handle int32 MAX and MIN together", - ), - AccumulatorTestCase( - "int_boundary_int32_adjacent_max", - docs=[{"v": INT32_MAX_MINUS_1}, {"v": INT32_MAX}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": 2_147_483_646.5}], - msg="$avg of adjacent int32 MAX values should produce exact double", - ), - AccumulatorTestCase( - "int_boundary_int32_adjacent_min", - docs=[{"v": INT32_MIN}, {"v": INT32_MIN + 1}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": -2_147_483_647.5}], - msg="$avg of adjacent int32 MIN values should produce exact double", - ), - AccumulatorTestCase( - "int_boundary_int64_max", - docs=[{"v": INT64_MAX}, {"v": INT64_ZERO}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": DOUBLE_FROM_INT64_MAX / 2}], - msg="$avg should handle int64 MAX with precision loss in double", - ), - AccumulatorTestCase( - "int_boundary_int64_min", - docs=[{"v": INT64_MIN}, {"v": INT64_MIN}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": -DOUBLE_FROM_INT64_MAX}], - msg="$avg should handle int64 MIN with precision loss in double", - ), - AccumulatorTestCase( - "int_boundary_int64_max_and_min", - docs=[{"v": INT64_MAX}, {"v": INT64_MIN}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": -0.5}], - msg="$avg should handle int64 MAX and MIN together", - ), - AccumulatorTestCase( - "int_boundary_int64_adjacent_max", - docs=[{"v": INT64_MAX_MINUS_1}, {"v": INT64_MAX}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": DOUBLE_FROM_INT64_MAX}], - msg="$avg of adjacent int64 MAX values should produce double with precision loss", - ), - AccumulatorTestCase( - "int_boundary_int64_adjacent_min", - docs=[{"v": INT64_MIN_PLUS_1}, {"v": INT64_MIN}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": -DOUBLE_FROM_INT64_MAX}], - msg="$avg of adjacent int64 MIN values should produce double with precision loss", - ), -] - -# Property [Float Boundaries]: subnormal, minimum normal, maximum finite, -# near-precision-limit, and whole-number double values are averaged correctly. -AVG_FLOAT_BOUNDARY_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "float_boundary_whole_number", - docs=[{"v": 3.0}, {"v": 5.0}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": 4.0}], - msg="$avg should produce correct average for whole-number floats", - ), - AccumulatorTestCase( - "float_boundary_subnormal_positive", - docs=[{"v": DOUBLE_MIN_SUBNORMAL}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": DOUBLE_MIN_SUBNORMAL}], - msg="$avg should handle positive subnormal value correctly", - ), - AccumulatorTestCase( - "float_boundary_subnormal_negative", - docs=[{"v": DOUBLE_MIN_NEGATIVE_SUBNORMAL}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": DOUBLE_MIN_NEGATIVE_SUBNORMAL}], - msg="$avg should handle negative subnormal value correctly", - ), - AccumulatorTestCase( - "float_boundary_subnormal_avg", - docs=[{"v": DOUBLE_MIN_SUBNORMAL}, {"v": DOUBLE_MIN_SUBNORMAL}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": DOUBLE_MIN_SUBNORMAL}], - msg="$avg of two identical subnormal values should return that value", - ), - AccumulatorTestCase( - "float_boundary_min_normal", - docs=[{"v": DOUBLE_MIN_NORMAL}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": DOUBLE_MIN_NORMAL}], - msg="$avg should handle smallest positive normal double correctly", - ), - AccumulatorTestCase( - "float_boundary_max_single", - docs=[{"v": DOUBLE_MAX}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": DOUBLE_MAX}], - msg="$avg should handle DBL_MAX as a single value correctly", - ), - AccumulatorTestCase( - "float_boundary_max_safe_integer", - docs=[{"v": float(DOUBLE_MAX_SAFE_INTEGER)}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": float(DOUBLE_MAX_SAFE_INTEGER)}], - msg="$avg should handle max safe integer value correctly", - ), - AccumulatorTestCase( - "float_boundary_max_safe_integer_avg", - docs=[ - {"v": float(DOUBLE_MAX_SAFE_INTEGER)}, - {"v": float(DOUBLE_MAX_SAFE_INTEGER)}, - ], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": float(DOUBLE_MAX_SAFE_INTEGER)}], - msg="$avg of two max safe integer values should return that value", - ), - AccumulatorTestCase( - "float_boundary_near_min", - docs=[{"v": DOUBLE_NEAR_MIN}, {"v": DOUBLE_NEAR_MIN}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": DOUBLE_NEAR_MIN}], - msg="$avg should handle values near minimum normal correctly", - ), - AccumulatorTestCase( - "float_boundary_near_max_single", - docs=[{"v": DOUBLE_NEAR_MAX}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": DOUBLE_NEAR_MAX}], - msg="$avg should handle values near maximum finite correctly", - ), -] - -# Property [Decimal128 Behavior]: full 34-digit precision and trailing zeros -# are preserved, subnormal and near-maximum values are handled correctly, and -# values exceeding int64 range produce Decimal128 results. -AVG_DECIMAL128_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "decimal128_full_precision", - docs=[ - {"v": Decimal128("1.000000000000000000000000000000001")}, - {"v": Decimal128("1.000000000000000000000000000000003")}, - ], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": Decimal128("1.000000000000000000000000000000002")}], - msg="$avg should preserve full 34-digit Decimal128 precision", - ), - AccumulatorTestCase( - "decimal128_34_digit_integer", - docs=[ - {"v": Decimal128("1234567890123456789012345678901234")}, - {"v": Decimal128("1234567890123456789012345678901234")}, - ], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": Decimal128("1234567890123456789012345678901234")}], - msg="$avg should preserve 34-digit integer Decimal128 values", - ), - AccumulatorTestCase( - "decimal128_trailing_zeros", - docs=[{"v": Decimal128("2.00")}, {"v": Decimal128("4.00")}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": Decimal128("3.00")}], - msg="$avg should preserve trailing zeros in Decimal128 results", - ), - AccumulatorTestCase( - "decimal128_trailing_zeros_single_digit", - docs=[{"v": DECIMAL128_TRAILING_ZERO}, {"v": Decimal128("3.0")}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": Decimal128("2.0")}], - msg="$avg should preserve single trailing zero in Decimal128 results", - ), - AccumulatorTestCase( - "decimal128_subnormal", - docs=[{"v": DECIMAL128_MIN_POSITIVE}, {"v": DECIMAL128_MIN_POSITIVE}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": DECIMAL128_MIN_POSITIVE}], - msg="$avg should handle Decimal128 subnormal values correctly", - ), - AccumulatorTestCase( - "decimal128_subnormal_single", - docs=[{"v": DECIMAL128_MIN_POSITIVE}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": DECIMAL128_MIN_POSITIVE}], - msg="$avg should handle a single Decimal128 subnormal value", - ), - AccumulatorTestCase( - "decimal128_near_max_single", - docs=[{"v": DECIMAL128_MAX}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": DECIMAL128_MAX}], - msg="$avg should handle a single near-maximum Decimal128 value", - ), - AccumulatorTestCase( - "decimal128_near_max_with_small", - docs=[{"v": DECIMAL128_MAX}, {"v": Decimal128("1")}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": Decimal128("5.000000000000000000000000000000000E+6144")}], - msg="$avg should handle near-maximum Decimal128 averaged with a small value", - ), - AccumulatorTestCase( - "decimal128_exceeds_int64", - docs=[ - {"v": DECIMAL128_INT64_OVERFLOW}, - {"v": DECIMAL128_INT64_OVERFLOW}, - ], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": DECIMAL128_INT64_OVERFLOW}], - msg="$avg should produce Decimal128 for values exceeding int64 range", - ), -] - -# Property [Overflow]: sum overflow during accumulation produces Infinity for -# doubles and Decimal128, and int32/int64 overflow is handled via type -# promotion without error. -AVG_OVERFLOW_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "overflow_double_max", - docs=[{"v": DOUBLE_MAX}, {"v": DOUBLE_MAX}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": FLOAT_INFINITY}], - msg="$avg should return Infinity when two DBL_MAX values overflow the sum", - ), - AccumulatorTestCase( - "overflow_decimal128_max", - docs=[{"v": DECIMAL128_MAX}, {"v": DECIMAL128_MAX}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": DECIMAL128_INFINITY}], - msg="$avg should return Decimal128 Infinity when two Decimal128 max values overflow", - ), - AccumulatorTestCase( - "overflow_int32_sum", - docs=[{"v": INT32_MAX}, {"v": INT32_MAX}, {"v": INT32_MAX}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": float(INT32_MAX)}], - msg="$avg should handle int32 sum overflow via type promotion without error", - ), - AccumulatorTestCase( - "overflow_int64_sum", - docs=[{"v": INT64_MAX}, {"v": INT64_MAX}, {"v": INT64_MAX}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": DOUBLE_FROM_INT64_MAX}], - msg="$avg should handle int64 sum overflow by converting to double", - ), -] - -# Property [Expression Arguments]: $avg accepts any expression as its operand, -# evaluating it per-document before accumulation. -AVG_EXPRESSION_ARGS_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "expr_constant_literal", - docs=[{"x": 1}, {"x": 2}, {"x": 3}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": 5}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": 5.0}], - msg="$avg should return the constant value when expression is a numeric literal", - ), - AccumulatorTestCase( - "expr_nested_add", - docs=[{"a": 2, "b": 3}, {"a": 4, "b": 6}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": {"$add": ["$a", "$b"]}}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": 7.5}], - msg="$avg should evaluate nested $add expression per-document before averaging", - ), -] - -# Property [Edge Cases]: a single-document group returns the value itself -# (as double or Decimal128), a single non-numeric document returns null, and -# an empty collection produces no group output. -AVG_EDGE_CASE_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "edge_single_int32", - docs=[{"v": 7}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": 7.0}], - msg="$avg should return the value as double for a single int32 document", - ), - AccumulatorTestCase( - "edge_single_int64", - docs=[{"v": Int64(42)}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": 42.0}], - msg="$avg should return the value as double for a single int64 document", - ), - AccumulatorTestCase( - "edge_single_non_numeric", - docs=[{"v": "hello"}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": None}], - msg="$avg should return null for a single non-numeric document", - ), -] - -AVG_SUCCESS_TESTS = ( - AVG_NULL_MISSING_TESTS - + AVG_NON_NUMERIC_TESTS - + AVG_SPECIAL_NUMERIC_TESTS - + AVG_INTEGER_BOUNDARY_TESTS - + AVG_FLOAT_BOUNDARY_TESTS - + AVG_DECIMAL128_TESTS - + AVG_OVERFLOW_TESTS - + AVG_EXPRESSION_ARGS_TESTS - + AVG_EDGE_CASE_TESTS -) - -# Property [Expression Error Propagation]: errors from sub-expressions -# propagate through $avg without being caught or suppressed. -AVG_EXPRESSION_ERROR_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "error_prop_toint_non_convertible", - docs=[{"v": "hello"}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": {"$toInt": "$v"}}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - error_code=CONVERSION_FAILURE_ERROR, - msg="$avg should propagate $toInt conversion error for non-convertible value", - ), - AccumulatorTestCase( - "error_prop_divide_by_zero", - docs=[{"v": 10}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": {"$divide": ["$v", 0]}}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - error_code=DIVIDE_BY_ZERO_V2_ERROR, - msg="$avg should propagate $divide by zero error", - ), - AccumulatorTestCase( - "error_prop_mod_by_zero", - docs=[{"v": 10}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": {"$mod": ["$v", 0]}}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - error_code=MODULO_BY_ZERO_V2_ERROR, - msg="$avg should propagate $mod by zero error", - ), -] - -AVG_TESTS = AVG_SUCCESS_TESTS + AVG_EXPRESSION_ERROR_TESTS - - -@pytest.mark.parametrize("test_case", pytest_params(AVG_TESTS)) -def test_accumulator_avg(collection, test_case: AccumulatorTestCase): - """Test $avg accumulator behavior.""" - collection.insert_many(test_case.docs) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": test_case.pipeline, - "cursor": {}, - }, - ) - assertResult( - result, - expected=test_case.expected, - error_code=test_case.error_code, - msg=test_case.msg, - ) - - -def test_accumulator_avg_empty_collection(collection): - """Test $avg returns no documents for an empty collection.""" - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": [ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - "cursor": {}, - }, - ) - assertSuccess( - result, - [], - msg="$avg should produce no group output for an empty collection", - ) - - -# Property [Return Type]: the result is double by default, but Decimal128 if -# any input value is Decimal128. -AVG_RETURN_TYPE_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "type_int32_only", - docs=[{"v": 2}, {"v": 4}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "type": {"$type": "$result"}}}, - ], - expected=[{"type": "double"}], - msg="$avg should return double when all inputs are int32", - ), - AccumulatorTestCase( - "type_int64_only", - docs=[{"v": Int64(2)}, {"v": Int64(4)}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "type": {"$type": "$result"}}}, - ], - expected=[{"type": "double"}], - msg="$avg should return double when all inputs are int64", - ), - AccumulatorTestCase( - "type_int32_int64", - docs=[{"v": 2}, {"v": Int64(4)}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "type": {"$type": "$result"}}}, - ], - expected=[{"type": "double"}], - msg="$avg should return double for int32 and int64 mix", - ), - AccumulatorTestCase( - "type_int32_double", - docs=[{"v": 2}, {"v": 4.0}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "type": {"$type": "$result"}}}, - ], - expected=[{"type": "double"}], - msg="$avg should return double for int32 and double mix", - ), - AccumulatorTestCase( - "type_int64_double", - docs=[{"v": Int64(2)}, {"v": 4.0}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "type": {"$type": "$result"}}}, - ], - expected=[{"type": "double"}], - msg="$avg should return double for int64 and double mix", - ), - AccumulatorTestCase( - "type_int32_decimal128", - docs=[{"v": 2}, {"v": Decimal128("4")}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "type": {"$type": "$result"}}}, - ], - expected=[{"type": "decimal"}], - msg="$avg should return Decimal128 when any input is Decimal128", - ), - AccumulatorTestCase( - "type_int64_decimal128", - docs=[{"v": Int64(2)}, {"v": Decimal128("4")}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "type": {"$type": "$result"}}}, - ], - expected=[{"type": "decimal"}], - msg="$avg should return Decimal128 for int64 and Decimal128 mix", - ), - AccumulatorTestCase( - "type_double_decimal128", - docs=[{"v": 2.0}, {"v": Decimal128("4")}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "type": {"$type": "$result"}}}, - ], - expected=[{"type": "decimal"}], - msg="$avg should return Decimal128 for double and Decimal128 mix", - ), - AccumulatorTestCase( - "type_decimal128_before_int32", - docs=[{"v": Decimal128("4")}, {"v": 2}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "type": {"$type": "$result"}}}, - ], - expected=[{"type": "decimal"}], - msg="$avg should return Decimal128 regardless of document order", - ), -] - - -@pytest.mark.parametrize("test_case", pytest_params(AVG_RETURN_TYPE_TESTS)) -def test_accumulator_avg_return_type(collection, test_case: AccumulatorTestCase): - """Test $avg accumulator return type.""" - collection.insert_many(test_case.docs) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": test_case.pipeline, - "cursor": {}, - }, - ) - assertSuccess(result, test_case.expected, msg=test_case.msg) - - -# Property [Arity]: $avg in accumulator context is a unary operator and -# rejects array syntax in $group, $bucket, and $bucketAuto. -AVG_ARITY_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "arity_multi_element_group", - pipeline=[{"$group": {"_id": None, "result": {"$avg": ["$v", "$v"]}}}], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$avg should reject multi-element array syntax in $group", - ), - AccumulatorTestCase( - "arity_empty_array_group", - pipeline=[{"$group": {"_id": None, "result": {"$avg": []}}}], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$avg should reject empty array syntax in $group", - ), - AccumulatorTestCase( - "arity_single_element_group", - pipeline=[{"$group": {"_id": None, "result": {"$avg": ["$v"]}}}], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$avg should reject single-element array syntax in $group", - ), - AccumulatorTestCase( - "arity_multi_element_bucket", - pipeline=[ - { - "$bucket": { - "groupBy": "$v", - "boundaries": [0, 10], - "output": {"result": {"$avg": ["$v", "$v"]}}, - } - } - ], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$avg should reject multi-element array syntax in $bucket", - ), - AccumulatorTestCase( - "arity_empty_array_bucket", - pipeline=[ - { - "$bucket": { - "groupBy": "$v", - "boundaries": [0, 10], - "output": {"result": {"$avg": []}}, - } - } - ], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$avg should reject empty array syntax in $bucket", - ), - AccumulatorTestCase( - "arity_single_element_bucket", - pipeline=[ - { - "$bucket": { - "groupBy": "$v", - "boundaries": [0, 10], - "output": {"result": {"$avg": ["$v"]}}, - } - } - ], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$avg should reject single-element array syntax in $bucket", - ), - AccumulatorTestCase( - "arity_multi_element_bucket_auto", - pipeline=[ - { - "$bucketAuto": { - "groupBy": "$v", - "buckets": 1, - "output": {"result": {"$avg": ["$v", "$v"]}}, - } - } - ], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$avg should reject multi-element array syntax in $bucketAuto", - ), - AccumulatorTestCase( - "arity_empty_array_bucket_auto", - pipeline=[ - { - "$bucketAuto": { - "groupBy": "$v", - "buckets": 1, - "output": {"result": {"$avg": []}}, - } - } - ], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$avg should reject empty array syntax in $bucketAuto", - ), - AccumulatorTestCase( - "arity_single_element_bucket_auto", - pipeline=[ - { - "$bucketAuto": { - "groupBy": "$v", - "buckets": 1, - "output": {"result": {"$avg": ["$v"]}}, - } - } - ], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$avg should reject single-element array syntax in $bucketAuto", - ), -] - - -@pytest.mark.parametrize("test_case", pytest_params(AVG_ARITY_TESTS)) -def test_accumulator_avg_arity(collection, test_case: AccumulatorTestCase): - """Test $avg rejects array syntax in accumulator context.""" - collection.insert_one({"v": 1}) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": test_case.pipeline, - "cursor": {}, - }, - ) - assertResult( - result, - error_code=test_case.error_code, - msg=test_case.msg, - ) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_errors.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_errors.py new file mode 100644 index 00000000..d163850f --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_errors.py @@ -0,0 +1,189 @@ +""" +Tests for $avg accumulator error handling. + +Covers arity validation (rejects array syntax in $group, $bucket, $bucketAuto) +and expression error propagation ($toInt, $divide, $mod). +""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + CONVERSION_FAILURE_ERROR, + DIVIDE_BY_ZERO_V2_ERROR, + GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + MODULO_BY_ZERO_V2_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Arity]: $avg in accumulator context is a unary operator and +# rejects array syntax in $group, $bucket, and $bucketAuto. +AVG_ARITY_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "arity_multi_element_group", + pipeline=[{"$group": {"_id": None, "result": {"$avg": ["$v", "$v"]}}}], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$avg should reject multi-element array syntax in $group", + ), + AccumulatorTestCase( + "arity_empty_array_group", + pipeline=[{"$group": {"_id": None, "result": {"$avg": []}}}], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$avg should reject empty array syntax in $group", + ), + AccumulatorTestCase( + "arity_single_element_group", + pipeline=[{"$group": {"_id": None, "result": {"$avg": ["$v"]}}}], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$avg should reject single-element array syntax in $group", + ), + AccumulatorTestCase( + "arity_multi_element_bucket", + pipeline=[ + { + "$bucket": { + "groupBy": "$v", + "boundaries": [0, 10], + "output": {"result": {"$avg": ["$v", "$v"]}}, + } + } + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$avg should reject multi-element array syntax in $bucket", + ), + AccumulatorTestCase( + "arity_empty_array_bucket", + pipeline=[ + { + "$bucket": { + "groupBy": "$v", + "boundaries": [0, 10], + "output": {"result": {"$avg": []}}, + } + } + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$avg should reject empty array syntax in $bucket", + ), + AccumulatorTestCase( + "arity_single_element_bucket", + pipeline=[ + { + "$bucket": { + "groupBy": "$v", + "boundaries": [0, 10], + "output": {"result": {"$avg": ["$v"]}}, + } + } + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$avg should reject single-element array syntax in $bucket", + ), + AccumulatorTestCase( + "arity_multi_element_bucket_auto", + pipeline=[ + { + "$bucketAuto": { + "groupBy": "$v", + "buckets": 1, + "output": {"result": {"$avg": ["$v", "$v"]}}, + } + } + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$avg should reject multi-element array syntax in $bucketAuto", + ), + AccumulatorTestCase( + "arity_empty_array_bucket_auto", + pipeline=[ + { + "$bucketAuto": { + "groupBy": "$v", + "buckets": 1, + "output": {"result": {"$avg": []}}, + } + } + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$avg should reject empty array syntax in $bucketAuto", + ), + AccumulatorTestCase( + "arity_single_element_bucket_auto", + pipeline=[ + { + "$bucketAuto": { + "groupBy": "$v", + "buckets": 1, + "output": {"result": {"$avg": ["$v"]}}, + } + } + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$avg should reject single-element array syntax in $bucketAuto", + ), +] + +# Property [Expression Error Propagation]: errors from sub-expressions +# propagate through $avg without being caught or suppressed. +AVG_EXPRESSION_ERROR_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "error_prop_toint_non_convertible", + docs=[{"v": "hello"}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": {"$toInt": "$v"}}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + error_code=CONVERSION_FAILURE_ERROR, + msg="$avg should propagate $toInt conversion error for non-convertible value", + ), + AccumulatorTestCase( + "error_prop_divide_by_zero", + docs=[{"v": 10}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": {"$divide": ["$v", 0]}}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + error_code=DIVIDE_BY_ZERO_V2_ERROR, + msg="$avg should propagate $divide by zero error", + ), + AccumulatorTestCase( + "error_prop_mod_by_zero", + docs=[{"v": 10}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": {"$mod": ["$v", 0]}}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + error_code=MODULO_BY_ZERO_V2_ERROR, + msg="$avg should propagate $mod by zero error", + ), +] + +AVG_ERROR_TESTS: list[AccumulatorTestCase] = AVG_ARITY_TESTS + AVG_EXPRESSION_ERROR_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(AVG_ERROR_TESTS)) +def test_avg_errors(collection, test_case: AccumulatorTestCase): + """Test $avg accumulator error handling.""" + if test_case.docs: + collection.insert_many(test_case.docs) + else: + collection.insert_one({"v": 1}) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_boundaries.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_boundaries.py index d50329b0..ad590544 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_boundaries.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_boundaries.py @@ -1,9 +1,8 @@ """ -Tests for $avg accumulator overflow, boundary values, and decimal128 precision -in $group context. +Tests for $avg accumulator boundary values and overflow in $group context. -These test the accumulator's running sum behavior across documents, -which differs from expression-context evaluation on a single array. +Covers int32/int64 boundary values, double boundary values (subnormal, normal, +near-max), Decimal128 precision and boundary values, and sum overflow behavior. """ from __future__ import annotations @@ -18,22 +17,57 @@ from documentdb_tests.framework.executor import execute_command from documentdb_tests.framework.parametrize import pytest_params from documentdb_tests.framework.test_constants import ( + DECIMAL128_INFINITY, + DECIMAL128_INT64_OVERFLOW, DECIMAL128_LARGE_EXPONENT, DECIMAL128_MAX, DECIMAL128_MIN, + DECIMAL128_MIN_POSITIVE, DECIMAL128_SMALL_EXPONENT, + DECIMAL128_TRAILING_ZERO, + DOUBLE_FROM_INT64_MAX, + DOUBLE_MAX, + DOUBLE_MAX_SAFE_INTEGER, + DOUBLE_MIN_NEGATIVE_SUBNORMAL, + DOUBLE_MIN_NORMAL, DOUBLE_MIN_SUBNORMAL, DOUBLE_NEAR_MAX, + DOUBLE_NEAR_MIN, + DOUBLE_ZERO, + FLOAT_INFINITY, INT32_MAX, + INT32_MAX_MINUS_1, INT32_MIN, INT64_MAX, + INT64_MAX_MINUS_1, INT64_MIN, + INT64_MIN_PLUS_1, + INT64_ZERO, ) # Property [Integer Boundaries]: $avg handles int32 and int64 boundary values -# including MAX, MIN, and overflow combinations. - +# including MAX, MIN, adjacent values, and overflow combinations. AVG_INT_BOUNDARY_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + id="int32_zeros", + docs=[{"v": 0}, {"v": 0}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_ZERO}], + msg="$avg should return 0.0 for two int32 zeros", + ), + AccumulatorTestCase( + id="int32_one_neg_one", + docs=[{"v": 1}, {"v": -1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_ZERO}], + msg="$avg should return 0.0 for int32 1 and -1", + ), AccumulatorTestCase( id="int32_max_pair", docs=[{"_id": 0, "v": INT32_MAX}, {"_id": 1, "v": INT32_MAX}], @@ -56,6 +90,26 @@ expected=[{"_id": None, "avg": -0.5}], msg="avg of INT32_MAX and INT32_MIN should be -0.5", ), + AccumulatorTestCase( + id="int32_adjacent_max", + docs=[{"v": INT32_MAX_MINUS_1}, {"v": INT32_MAX}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": 2_147_483_646.5}], + msg="$avg of adjacent int32 MAX values should produce exact double", + ), + AccumulatorTestCase( + id="int32_adjacent_min", + docs=[{"v": INT32_MIN}, {"v": INT32_MIN + 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": -2_147_483_647.5}], + msg="$avg of adjacent int32 MIN values should produce exact double", + ), AccumulatorTestCase( id="int64_max_pair", docs=[{"_id": 0, "v": INT64_MAX}, {"_id": 1, "v": INT64_MAX}], @@ -70,6 +124,26 @@ expected=[{"_id": None, "avg": -9.223372036854776e18}], msg="avg of two INT64_MIN should handle overflow", ), + AccumulatorTestCase( + id="int64_max_and_zero", + docs=[{"v": INT64_MAX}, {"v": INT64_ZERO}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_FROM_INT64_MAX / 2}], + msg="$avg should handle int64 MAX with precision loss in double", + ), + AccumulatorTestCase( + id="int64_max_and_min", + docs=[{"v": INT64_MAX}, {"v": INT64_MIN}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": -0.5}], + msg="$avg should handle int64 MAX and MIN together", + ), AccumulatorTestCase( id="int64_max_and_one", docs=[{"_id": 0, "v": INT64_MAX}, {"_id": 1, "v": Int64(1)}], @@ -77,21 +151,63 @@ expected=[{"_id": None, "avg": 4.611686018427388e18}], msg="avg of INT64_MAX and 1", ), + AccumulatorTestCase( + id="int64_adjacent_max", + docs=[{"v": INT64_MAX_MINUS_1}, {"v": INT64_MAX}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_FROM_INT64_MAX}], + msg="$avg of adjacent int64 MAX values should produce double with precision loss", + ), + AccumulatorTestCase( + id="int64_adjacent_min", + docs=[{"v": INT64_MIN_PLUS_1}, {"v": INT64_MIN}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": -DOUBLE_FROM_INT64_MAX}], + msg="$avg of adjacent int64 MIN values should produce double with precision loss", + ), ] # Property [Double Boundaries]: $avg handles double boundary values -# including near-max overflow and subnormal values. - +# including subnormal, minimum normal, near-max, and max safe integer. AVG_DOUBLE_BOUNDARY_TESTS: list[AccumulatorTestCase] = [ AccumulatorTestCase( - id="double_near_max_pair", - docs=[{"_id": 0, "v": DOUBLE_NEAR_MAX}, {"_id": 1, "v": DOUBLE_NEAR_MAX}], - pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], - expected=[{"_id": None, "avg": float("inf")}], - msg="avg of two DOUBLE_NEAR_MAX overflows sum to inf", + id="double_whole_number", + docs=[{"v": 3.0}, {"v": 5.0}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": 4.0}], + msg="$avg should produce correct average for whole-number floats", ), AccumulatorTestCase( - id="double_subnormal", + id="double_subnormal_positive", + docs=[{"v": DOUBLE_MIN_SUBNORMAL}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_MIN_SUBNORMAL}], + msg="$avg should handle positive subnormal value correctly", + ), + AccumulatorTestCase( + id="double_subnormal_negative", + docs=[{"v": DOUBLE_MIN_NEGATIVE_SUBNORMAL}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_MIN_NEGATIVE_SUBNORMAL}], + msg="$avg should handle negative subnormal value correctly", + ), + AccumulatorTestCase( + id="double_subnormal_pair", docs=[ {"_id": 0, "v": DOUBLE_MIN_SUBNORMAL}, {"_id": 1, "v": DOUBLE_MIN_SUBNORMAL}, @@ -100,12 +216,94 @@ expected=[{"_id": None, "avg": DOUBLE_MIN_SUBNORMAL}], msg="avg of two subnormal doubles should return subnormal", ), + AccumulatorTestCase( + id="double_min_normal", + docs=[{"v": DOUBLE_MIN_NORMAL}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_MIN_NORMAL}], + msg="$avg should handle smallest positive normal double correctly", + ), + AccumulatorTestCase( + id="double_max_single", + docs=[{"v": DOUBLE_MAX}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_MAX}], + msg="$avg should handle DBL_MAX as a single value correctly", + ), + AccumulatorTestCase( + id="double_max_safe_integer", + docs=[{"v": float(DOUBLE_MAX_SAFE_INTEGER)}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": float(DOUBLE_MAX_SAFE_INTEGER)}], + msg="$avg should handle max safe integer value correctly", + ), + AccumulatorTestCase( + id="double_max_safe_integer_pair", + docs=[ + {"v": float(DOUBLE_MAX_SAFE_INTEGER)}, + {"v": float(DOUBLE_MAX_SAFE_INTEGER)}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": float(DOUBLE_MAX_SAFE_INTEGER)}], + msg="$avg of two max safe integer values should return that value", + ), + AccumulatorTestCase( + id="double_near_min_pair", + docs=[{"v": DOUBLE_NEAR_MIN}, {"v": DOUBLE_NEAR_MIN}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_NEAR_MIN}], + msg="$avg should handle values near minimum normal correctly", + ), + AccumulatorTestCase( + id="double_near_max_single", + docs=[{"v": DOUBLE_NEAR_MAX}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_NEAR_MAX}], + msg="$avg should handle values near maximum finite correctly", + ), + AccumulatorTestCase( + id="double_near_max_pair", + docs=[{"_id": 0, "v": DOUBLE_NEAR_MAX}, {"_id": 1, "v": DOUBLE_NEAR_MAX}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": float("inf")}], + msg="avg of two DOUBLE_NEAR_MAX overflows sum to inf", + ), ] # Property [Decimal128 Precision]: $avg preserves Decimal128 precision -# across extreme exponent differences and boundary values. - -AVG_DECIMAL128_PRECISION_TESTS: list[AccumulatorTestCase] = [ +# across extreme exponent differences, trailing zeros, and boundary values. +AVG_DECIMAL128_BOUNDARY_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + id="decimal128_full_precision", + docs=[ + {"v": Decimal128("1.000000000000000000000000000000001")}, + {"v": Decimal128("1.000000000000000000000000000000003")}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": Decimal128("1.000000000000000000000000000000002")}], + msg="$avg should preserve full 34-digit Decimal128 precision", + ), AccumulatorTestCase( id="decimal128_high_precision", docs=[ @@ -122,6 +320,86 @@ expected=[{"_id": None, "avg": Decimal128("2.000000000000000000000000000000000")}], msg="decimal128 avg should preserve high precision", ), + AccumulatorTestCase( + id="decimal128_34_digit_integer", + docs=[ + {"v": Decimal128("1234567890123456789012345678901234")}, + {"v": Decimal128("1234567890123456789012345678901234")}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": Decimal128("1234567890123456789012345678901234")}], + msg="$avg should preserve 34-digit integer Decimal128 values", + ), + AccumulatorTestCase( + id="decimal128_trailing_zeros", + docs=[{"v": Decimal128("2.00")}, {"v": Decimal128("4.00")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": Decimal128("3.00")}], + msg="$avg should preserve trailing zeros in Decimal128 results", + ), + AccumulatorTestCase( + id="decimal128_trailing_zeros_single_digit", + docs=[{"v": DECIMAL128_TRAILING_ZERO}, {"v": Decimal128("3.0")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": Decimal128("2.0")}], + msg="$avg should preserve single trailing zero in Decimal128 results", + ), + AccumulatorTestCase( + id="decimal128_subnormal_pair", + docs=[{"v": DECIMAL128_MIN_POSITIVE}, {"v": DECIMAL128_MIN_POSITIVE}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DECIMAL128_MIN_POSITIVE}], + msg="$avg should handle Decimal128 subnormal values correctly", + ), + AccumulatorTestCase( + id="decimal128_subnormal_single", + docs=[{"v": DECIMAL128_MIN_POSITIVE}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DECIMAL128_MIN_POSITIVE}], + msg="$avg should handle a single Decimal128 subnormal value", + ), + AccumulatorTestCase( + id="decimal128_near_max_single", + docs=[{"v": DECIMAL128_MAX}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DECIMAL128_MAX}], + msg="$avg should handle a single near-maximum Decimal128 value", + ), + AccumulatorTestCase( + id="decimal128_near_max_with_small", + docs=[{"v": DECIMAL128_MAX}, {"v": Decimal128("1")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": Decimal128("5.000000000000000000000000000000000E+6144")}], + msg="$avg should handle near-maximum Decimal128 averaged with a small value", + ), + AccumulatorTestCase( + id="decimal128_max_and_min", + docs=[{"_id": 0, "v": DECIMAL128_MAX}, {"_id": 1, "v": DECIMAL128_MIN}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": Decimal128("0")}], + msg="avg of DECIMAL128_MAX and DECIMAL128_MIN", + ), AccumulatorTestCase( id="decimal128_large_exponent", docs=[ @@ -142,13 +420,6 @@ expected=[{"_id": None, "avg": DECIMAL128_SMALL_EXPONENT}], msg="avg of two identical small exponent values should return same value", ), - AccumulatorTestCase( - id="decimal128_max_and_min", - docs=[{"_id": 0, "v": DECIMAL128_MAX}, {"_id": 1, "v": DECIMAL128_MIN}], - pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], - expected=[{"_id": None, "avg": Decimal128("0")}], - msg="avg of DECIMAL128_MAX and DECIMAL128_MIN", - ), AccumulatorTestCase( id="decimal128_extreme_exponent_diff", docs=[ @@ -164,10 +435,72 @@ ], msg="avg with extreme exponent difference", ), + AccumulatorTestCase( + id="decimal128_exceeds_int64", + docs=[ + {"v": DECIMAL128_INT64_OVERFLOW}, + {"v": DECIMAL128_INT64_OVERFLOW}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DECIMAL128_INT64_OVERFLOW}], + msg="$avg should produce Decimal128 for values exceeding int64 range", + ), +] + +# Property [Overflow]: sum overflow during accumulation produces Infinity for +# doubles and Decimal128, and int32/int64 overflow is handled via type +# promotion without error. +AVG_OVERFLOW_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + id="overflow_double_max", + docs=[{"v": DOUBLE_MAX}, {"v": DOUBLE_MAX}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": FLOAT_INFINITY}], + msg="$avg should return Infinity when two DBL_MAX values overflow the sum", + ), + AccumulatorTestCase( + id="overflow_decimal128_max", + docs=[{"v": DECIMAL128_MAX}, {"v": DECIMAL128_MAX}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DECIMAL128_INFINITY}], + msg="$avg should return Decimal128 Infinity when two Decimal128 max values overflow", + ), + AccumulatorTestCase( + id="overflow_int32_sum", + docs=[{"v": INT32_MAX}, {"v": INT32_MAX}, {"v": INT32_MAX}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": float(INT32_MAX)}], + msg="$avg should handle int32 sum overflow via type promotion without error", + ), + AccumulatorTestCase( + id="overflow_int64_sum", + docs=[{"v": INT64_MAX}, {"v": INT64_MAX}, {"v": INT64_MAX}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_FROM_INT64_MAX}], + msg="$avg should handle int64 sum overflow by converting to double", + ), ] AVG_GROUP_BOUNDARY_TESTS: list[AccumulatorTestCase] = ( - AVG_INT_BOUNDARY_TESTS + AVG_DOUBLE_BOUNDARY_TESTS + AVG_DECIMAL128_PRECISION_TESTS + AVG_INT_BOUNDARY_TESTS + + AVG_DOUBLE_BOUNDARY_TESTS + + AVG_DECIMAL128_BOUNDARY_TESTS + + AVG_OVERFLOW_TESTS ) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_context.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_context.py index e9189f0b..2bfd0ee2 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_context.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_context.py @@ -2,7 +2,8 @@ Tests for $avg accumulator in $group context. Covers numeric equivalence in grouping, single/empty groups, -precision edge cases, multiple groups, and comparison with $sum. +precision edge cases, multiple groups, comparison with $sum, +and expression argument handling. """ from __future__ import annotations @@ -130,6 +131,16 @@ expected=[{"_id": None, "avg": 20.0}], msg="$avg with _id: null should average entire collection", ), + AccumulatorTestCase( + id="single_document_int64", + docs=[{"v": Int64(42)}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": 42.0}], + msg="$avg should return the value as double for a single int64 document", + ), ] # Property [Precision]: $avg produces correct fractional and repeating @@ -292,12 +303,38 @@ ), ] +# Property [Expression Arguments]: $avg accepts any expression as its operand, +# evaluating it per-document before accumulation. +EXPRESSION_ARGS_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + id="expr_constant_literal", + docs=[{"x": 1}, {"x": 2}, {"x": 3}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": 5}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": 5.0}], + msg="$avg should return the constant value when expression is a numeric literal", + ), + AccumulatorTestCase( + id="expr_nested_add", + docs=[{"a": 2, "b": 3}, {"a": 4, "b": 6}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": {"$add": ["$a", "$b"]}}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": 7.5}], + msg="$avg should evaluate nested $add expression per-document before averaging", + ), +] + AVG_GROUP_CONTEXT_TESTS: list[AccumulatorTestCase] = ( NUMERIC_EQUIVALENCE_TESTS + SINGLE_EMPTY_GROUP_TESTS + PRECISION_EDGE_TESTS + MULTIPLE_GROUPS_TESTS + COMPARISON_WITH_RELATED_TESTS + + EXPRESSION_ARGS_TESTS ) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_types.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_types.py index c9e81c84..5397303e 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_types.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_types.py @@ -1,14 +1,12 @@ """ -Tests for $avg accumulator data type handling in $group context. +Tests for $avg accumulator type promotion and return type in $group context. -Covers type promotion rules, NaN/Infinity propagation, null/missing handling, -and non-numeric type ignoring when accumulating across documents. +Covers type promotion rules (int32, int64, double, Decimal128), return type +verification via $type, and negative zero normalization. """ from __future__ import annotations -import math - import pytest from bson import Decimal128, Int64 @@ -19,15 +17,10 @@ from documentdb_tests.framework.executor import execute_command from documentdb_tests.framework.parametrize import pytest_params from documentdb_tests.framework.test_constants import ( - DECIMAL128_INFINITY, - DECIMAL128_NAN, - DECIMAL128_NEGATIVE_INFINITY, DECIMAL128_NEGATIVE_ZERO, DECIMAL128_ZERO, DOUBLE_NEGATIVE_ZERO, DOUBLE_ZERO, - FLOAT_INFINITY, - FLOAT_NEGATIVE_INFINITY, ) # Property [Type Promotion]: $avg returns double for integer and double inputs, @@ -125,235 +118,135 @@ ), ] -# Property [NaN Propagation]: NaN is numeric and propagates to the result; -# NaN dominates Infinity and cross-type NaN promotes to Decimal128. -AVG_NAN_PROPAGATION_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "nan_propagates", - docs=[{"_id": 0, "v": 10}, {"_id": 1, "v": float("nan")}, {"_id": 2, "v": 30}], - pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], - expected=[{"_id": None, "avg": pytest.approx(math.nan, nan_ok=True)}], - msg="NaN in group should propagate to result", - ), - AccumulatorTestCase( - "all_nan", - docs=[{"_id": 0, "v": float("nan")}, {"_id": 1, "v": float("nan")}], - pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], - expected=[{"_id": None, "avg": pytest.approx(math.nan, nan_ok=True)}], - msg="All NaN in group should return NaN", - ), +# Property [Negative Zero]: $avg normalizes negative zero to positive zero +# for both double and Decimal128. +AVG_NEGATIVE_ZERO_TESTS: list[AccumulatorTestCase] = [ AccumulatorTestCase( - "decimal128_nan", + "negative_zero_double", docs=[ - {"_id": 0, "v": Decimal128("10")}, - {"_id": 1, "v": DECIMAL128_NAN}, - {"_id": 2, "v": Decimal128("30")}, + {"_id": 0, "v": DOUBLE_NEGATIVE_ZERO}, + {"_id": 1, "v": DOUBLE_NEGATIVE_ZERO}, ], pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], - expected=[{"_id": None, "avg": DECIMAL128_NAN}], - msg="Decimal128 NaN in group should propagate", - ), - AccumulatorTestCase( - "nan_dominates_infinity", - docs=[{"_id": 0, "v": float("nan")}, {"_id": 1, "v": FLOAT_INFINITY}], - pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], - expected=[{"_id": None, "avg": pytest.approx(math.nan, nan_ok=True)}], - msg="NaN should dominate Infinity in group", - ), - AccumulatorTestCase( - "cross_type_nan", - docs=[{"_id": 0, "v": float("nan")}, {"_id": 1, "v": Decimal128("5")}], - pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], - expected=[{"_id": None, "avg": DECIMAL128_NAN}], - msg="double NaN + Decimal128 should return Decimal128 NaN", - ), -] - -# Property [Infinity]: Infinity dominates finite values, and -# Infinity + -Infinity cancels to NaN. -AVG_INFINITY_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "infinity", - docs=[{"_id": 0, "v": FLOAT_INFINITY}, {"_id": 1, "v": 10}], - pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], - expected=[{"_id": None, "avg": FLOAT_INFINITY}], - msg="Infinity in group should propagate", - ), - AccumulatorTestCase( - "negative_infinity", - docs=[{"_id": 0, "v": FLOAT_NEGATIVE_INFINITY}, {"_id": 1, "v": 10}], - pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], - expected=[{"_id": None, "avg": FLOAT_NEGATIVE_INFINITY}], - msg="-Infinity in group should propagate", - ), - AccumulatorTestCase( - "inf_neg_inf_cancel", - docs=[{"_id": 0, "v": FLOAT_INFINITY}, {"_id": 1, "v": FLOAT_NEGATIVE_INFINITY}], - pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], - expected=[{"_id": None, "avg": pytest.approx(math.nan, nan_ok=True)}], - msg="Infinity + -Infinity in group should return NaN", - ), - AccumulatorTestCase( - "decimal128_infinity", - docs=[{"_id": 0, "v": DECIMAL128_INFINITY}, {"_id": 1, "v": Decimal128("10")}], - pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], - expected=[{"_id": None, "avg": DECIMAL128_INFINITY}], - msg="Decimal128 Infinity in group should propagate", + expected=[{"_id": None, "avg": DOUBLE_ZERO}], + msg="Double -0.0 avg should normalize to 0.0", ), AccumulatorTestCase( - "decimal128_inf_neg_inf_cancel", + "negative_zero_decimal128", docs=[ - {"_id": 0, "v": DECIMAL128_INFINITY}, - {"_id": 1, "v": DECIMAL128_NEGATIVE_INFINITY}, + {"_id": 0, "v": DECIMAL128_NEGATIVE_ZERO}, + {"_id": 1, "v": DECIMAL128_NEGATIVE_ZERO}, ], pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], - expected=[{"_id": None, "avg": DECIMAL128_NAN}], - msg="Decimal128 Inf + -Inf in group should return Decimal128 NaN", + expected=[{"_id": None, "avg": DECIMAL128_ZERO}], + msg="Decimal128 -0 avg should normalize to 0", ), ] -# Property [Null and Missing]: null values and missing fields are excluded -# from both the sum and count, producing null when no numeric values remain. -AVG_NULL_MISSING_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "all_null", - docs=[{"_id": 0, "v": None}, {"_id": 1, "v": None}, {"_id": 2, "v": None}], - pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], - expected=[{"_id": None, "avg": None}], - msg="All null in group should return null", - ), - AccumulatorTestCase( - "some_null", - docs=[{"_id": 0, "v": 10}, {"_id": 1, "v": None}, {"_id": 2, "v": 30}], - pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], - expected=[{"_id": None, "avg": 20.0}], - msg="Null docs should be ignored, avg of 10 and 30 is 20", - ), - AccumulatorTestCase( - "all_missing", - docs=[{"_id": 0, "other": 0}, {"_id": 1, "other": 1}, {"_id": 2, "other": 2}], - pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], - expected=[{"_id": None, "avg": None}], - msg="All missing fields should return null", - ), - AccumulatorTestCase( - "some_missing", - docs=[{"_id": 0, "v": 10}, {"_id": 1}, {"_id": 2, "v": 30}], - pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], - expected=[{"_id": None, "avg": 20.0}], - msg="Missing field docs should be ignored", - ), - AccumulatorTestCase( - "mix_null_missing_numeric", - docs=[ - {"_id": 0, "v": 10}, - {"_id": 1, "v": None}, - {"_id": 2}, - {"_id": 3, "v": 30}, +# Property [Return Type]: the result is double by default, but Decimal128 if +# any input value is Decimal128. +AVG_RETURN_TYPE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "type_int32_only", + docs=[{"v": 2}, {"v": 4}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, ], - pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], - expected=[{"_id": None, "avg": 20.0}], - msg="Only numeric values should contribute to average", + expected=[{"type": "double"}], + msg="$avg should return double when all inputs are int32", ), -] - -# Property [Non-Numeric Types Ignored]: non-numeric BSON types are silently -# ignored and excluded from both sum and count. -AVG_NON_NUMERIC_IGNORED_TESTS: list[AccumulatorTestCase] = [ AccumulatorTestCase( - "ignores_strings", - docs=[{"_id": 0, "v": 10}, {"_id": 1, "v": "hello"}, {"_id": 2, "v": 30}], - pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], - expected=[{"_id": None, "avg": 20.0}], - msg="String values should be ignored in group avg", + "type_int64_only", + docs=[{"v": Int64(2)}, {"v": Int64(4)}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, + ], + expected=[{"type": "double"}], + msg="$avg should return double when all inputs are int64", ), AccumulatorTestCase( - "ignores_booleans", - docs=[ - {"_id": 0, "v": 10}, - {"_id": 1, "v": True}, - {"_id": 2, "v": False}, - {"_id": 3, "v": 30}, + "type_int32_int64", + docs=[{"v": 2}, {"v": Int64(4)}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, ], - pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], - expected=[{"_id": None, "avg": 20.0}], - msg="Boolean values should be ignored in group avg", + expected=[{"type": "double"}], + msg="$avg should return double for int32 and int64 mix", ), AccumulatorTestCase( - "ignores_arrays", - docs=[{"_id": 0, "v": 10}, {"_id": 1, "v": [1, 2, 3]}, {"_id": 2, "v": 30}], - pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], - expected=[{"_id": None, "avg": 20.0}], - msg="Array values should be ignored in group avg", + "type_int32_double", + docs=[{"v": 2}, {"v": 4.0}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, + ], + expected=[{"type": "double"}], + msg="$avg should return double for int32 and double mix", ), AccumulatorTestCase( - "ignores_objects", - docs=[{"_id": 0, "v": 10}, {"_id": 1, "v": {"nested": 99}}, {"_id": 2, "v": 30}], - pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], - expected=[{"_id": None, "avg": 20.0}], - msg="Object values should be ignored in group avg", + "type_int64_double", + docs=[{"v": Int64(2)}, {"v": 4.0}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, + ], + expected=[{"type": "double"}], + msg="$avg should return double for int64 and double mix", ), AccumulatorTestCase( - "all_non_numeric", - docs=[ - {"_id": 0, "v": "a"}, - {"_id": 1, "v": True}, - {"_id": 2, "v": [1]}, - {"_id": 3, "v": {"x": 1}}, + "type_int32_decimal128", + docs=[{"v": 2}, {"v": Decimal128("4")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, ], - pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], - expected=[{"_id": None, "avg": None}], - msg="All non-numeric values should return null", + expected=[{"type": "decimal"}], + msg="$avg should return Decimal128 when any input is Decimal128", ), AccumulatorTestCase( - "boolean_not_numeric", - docs=[{"_id": 0, "v": False}, {"_id": 1, "v": True}], - pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], - expected=[{"_id": None, "avg": None}], - msg="Booleans should not be treated as 0/1 in avg", + "type_int64_decimal128", + docs=[{"v": Int64(2)}, {"v": Decimal128("4")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, + ], + expected=[{"type": "decimal"}], + msg="$avg should return Decimal128 for int64 and Decimal128 mix", ), -] - -# Property [Negative Zero]: $avg normalizes negative zero to positive zero -# for both double and Decimal128. -AVG_NEGATIVE_ZERO_TESTS: list[AccumulatorTestCase] = [ AccumulatorTestCase( - "negative_zero_double", - docs=[ - {"_id": 0, "v": DOUBLE_NEGATIVE_ZERO}, - {"_id": 1, "v": DOUBLE_NEGATIVE_ZERO}, + "type_double_decimal128", + docs=[{"v": 2.0}, {"v": Decimal128("4")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, ], - pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], - expected=[{"_id": None, "avg": DOUBLE_ZERO}], - msg="Double -0.0 avg should normalize to 0.0", + expected=[{"type": "decimal"}], + msg="$avg should return Decimal128 for double and Decimal128 mix", ), AccumulatorTestCase( - "negative_zero_decimal128", - docs=[ - {"_id": 0, "v": DECIMAL128_NEGATIVE_ZERO}, - {"_id": 1, "v": DECIMAL128_NEGATIVE_ZERO}, + "type_decimal128_before_int32", + docs=[{"v": Decimal128("4")}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, ], - pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], - expected=[{"_id": None, "avg": DECIMAL128_ZERO}], - msg="Decimal128 -0 avg should normalize to 0", + expected=[{"type": "decimal"}], + msg="$avg should return Decimal128 regardless of document order", ), ] AVG_GROUP_TYPE_TESTS: list[AccumulatorTestCase] = ( - AVG_TYPE_PROMOTION_TESTS - + AVG_NAN_PROPAGATION_TESTS - + AVG_INFINITY_TESTS - + AVG_NULL_MISSING_TESTS - + AVG_NON_NUMERIC_IGNORED_TESTS - + AVG_NEGATIVE_ZERO_TESTS + AVG_TYPE_PROMOTION_TESTS + AVG_NEGATIVE_ZERO_TESTS + AVG_RETURN_TYPE_TESTS ) @pytest.mark.parametrize("test_case", pytest_params(AVG_GROUP_TYPE_TESTS)) def test_avg_group_types(collection, test_case: AccumulatorTestCase): - """Test $avg data type handling in $group context.""" - if test_case.docs: - collection.insert_many(test_case.docs) + """Test $avg type promotion and return type in $group context.""" + collection.insert_many(test_case.docs) result = execute_command( collection, { diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_non_numeric.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_non_numeric.py new file mode 100644 index 00000000..831216cb --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_non_numeric.py @@ -0,0 +1,255 @@ +""" +Tests for $avg accumulator non-numeric type handling in $group context. + +Covers all non-numeric BSON types (string, boolean, object, ObjectId, datetime, +Timestamp, Binary, Regex, Code, MinKey, MaxKey, arrays) and verifies they are +silently ignored and excluded from both sum and count. +""" + +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest +from bson import Binary, Code, MaxKey, MinKey, ObjectId, Regex, Timestamp + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Non-Numeric Types Ignored]: all non-numeric BSON types are +# silently ignored and excluded from both sum and count, producing null +# when no numeric values remain. +AVG_NON_NUMERIC_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "string", + docs=[{"v": "hello"}, {"v": "world"}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore string values and return null", + ), + AccumulatorTestCase( + "boolean_true", + docs=[{"v": True}, {"v": True}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore boolean true without coercing to numeric", + ), + AccumulatorTestCase( + "boolean_false", + docs=[{"v": False}, {"v": False}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore boolean false without coercing to numeric", + ), + AccumulatorTestCase( + "boolean_not_numeric", + docs=[{"_id": 0, "v": False}, {"_id": 1, "v": True}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": None}], + msg="Booleans should not be treated as 0/1 in avg", + ), + AccumulatorTestCase( + "object", + docs=[{"v": {"x": 1}}, {"v": {"y": 2}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore plain objects", + ), + AccumulatorTestCase( + "empty_object", + docs=[{"v": {}}, {"v": {}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore empty objects", + ), + AccumulatorTestCase( + "objectid", + docs=[{"v": ObjectId()}, {"v": ObjectId()}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore ObjectId values", + ), + AccumulatorTestCase( + "datetime", + docs=[ + {"v": datetime(2023, 1, 1, tzinfo=timezone.utc)}, + {"v": datetime(2024, 1, 1, tzinfo=timezone.utc)}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore datetime values", + ), + AccumulatorTestCase( + "timestamp", + docs=[{"v": Timestamp(1, 1)}, {"v": Timestamp(2, 1)}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore Timestamp values", + ), + AccumulatorTestCase( + "binary", + docs=[{"v": Binary(b"\x01")}, {"v": Binary(b"\x02")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore Binary values", + ), + AccumulatorTestCase( + "regex", + docs=[{"v": Regex("abc")}, {"v": Regex("def")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore Regex values", + ), + AccumulatorTestCase( + "code", + docs=[{"v": Code("x")}, {"v": Code("y")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore Code values", + ), + AccumulatorTestCase( + "minkey", + docs=[{"v": MinKey()}, {"v": MinKey()}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore MinKey values", + ), + AccumulatorTestCase( + "maxkey", + docs=[{"v": MaxKey()}, {"v": MaxKey()}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore MaxKey values", + ), + AccumulatorTestCase( + "array", + docs=[{"v": [1, 2, 3]}, {"v": [4, 5]}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore arrays without unwrapping", + ), + AccumulatorTestCase( + "single_element_array", + docs=[{"v": [42]}, {"v": [7]}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should not unwrap single-element numeric arrays", + ), + AccumulatorTestCase( + "empty_array", + docs=[{"v": []}, {"v": []}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore empty arrays", + ), + AccumulatorTestCase( + "nested_array", + docs=[{"v": [[1, 2]]}, {"v": [[3]]}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore nested arrays", + ), + AccumulatorTestCase( + "array_from_expression", + docs=[{"v": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": {"$literal": [1, 2, 3]}}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should treat array expressions as non-numeric", + ), + AccumulatorTestCase( + "mixed_with_numerics", + docs=[{"v": "hello"}, {"v": 10}, {"v": True}, {"v": 20}, {"v": [5]}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": 15.0}], + msg="$avg should compute average only over numeric values, ignoring non-numerics", + ), + AccumulatorTestCase( + "all_non_numeric", + docs=[ + {"_id": 0, "v": "a"}, + {"_id": 1, "v": True}, + {"_id": 2, "v": [1]}, + {"_id": 3, "v": {"x": 1}}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": None}], + msg="All non-numeric values should return null", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(AVG_NON_NUMERIC_TESTS)) +def test_avg_non_numeric(collection, test_case: AccumulatorTestCase): + """Test $avg non-numeric type handling in $group context.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult(result, expected=test_case.expected, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_null_missing.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_null_missing.py new file mode 100644 index 00000000..e1a222aa --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_null_missing.py @@ -0,0 +1,118 @@ +""" +Tests for $avg accumulator null and missing value handling in $group context. + +Covers null values, missing fields, $$REMOVE, and combinations with numeric values. +""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Null and Missing Ignored]: null values, missing fields, and +# $$REMOVE are treated as non-numeric and excluded from both the sum and +# count, producing null when no numeric values remain. +AVG_NULL_MISSING_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "all_null", + docs=[{"_id": 0, "v": None}, {"_id": 1, "v": None}, {"_id": 2, "v": None}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": None}], + msg="$avg should return null when all values in the group are null", + ), + AccumulatorTestCase( + "single_null", + docs=[{"v": None}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should return null when the only value is null", + ), + AccumulatorTestCase( + "some_null", + docs=[{"_id": 0, "v": 10}, {"_id": 1, "v": None}, {"_id": 2, "v": 30}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 20.0}], + msg="$avg should exclude null from both sum and count", + ), + AccumulatorTestCase( + "all_missing", + docs=[{"_id": 0, "other": 0}, {"_id": 1, "other": 1}, {"_id": 2, "other": 2}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": None}], + msg="$avg should return null when all values reference missing fields", + ), + AccumulatorTestCase( + "single_missing", + docs=[{"x": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should return null when the only value is a missing field", + ), + AccumulatorTestCase( + "some_missing", + docs=[{"_id": 0, "v": 10}, {"_id": 1}, {"_id": 2, "v": 30}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 20.0}], + msg="$avg should exclude missing fields from both sum and count", + ), + AccumulatorTestCase( + "mixed_null_and_missing_no_numerics", + docs=[{"v": None}, {"x": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should return null when values are a mix of null and missing", + ), + AccumulatorTestCase( + "mix_null_missing_numeric", + docs=[ + {"_id": 0, "v": 10}, + {"_id": 1, "v": None}, + {"_id": 2}, + {"_id": 3, "v": 30}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 20.0}], + msg="Only numeric values should contribute to average", + ), + AccumulatorTestCase( + "remove_only", + docs=[{"v": 5}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": {"$cond": [False, 1, "$$REMOVE"]}}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should treat $$REMOVE as missing and return null", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(AVG_NULL_MISSING_TESTS)) +def test_avg_null_missing(collection, test_case: AccumulatorTestCase): + """Test $avg null and missing value handling in $group context.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult(result, expected=test_case.expected, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_special_numeric.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_special_numeric.py new file mode 100644 index 00000000..5cf5a41e --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_special_numeric.py @@ -0,0 +1,154 @@ +""" +Tests for $avg accumulator special numeric value handling in $group context. + +Covers NaN propagation, Infinity dominance, and cross-type interactions +for both double and Decimal128 types. +""" + +from __future__ import annotations + +import math + +import pytest +from bson import Decimal128 + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import ( + DECIMAL128_INFINITY, + DECIMAL128_NAN, + DECIMAL128_NEGATIVE_INFINITY, + FLOAT_INFINITY, + FLOAT_NEGATIVE_INFINITY, +) + +# Property [NaN Propagation]: NaN is numeric and propagates to the result; +# NaN dominates Infinity and cross-type NaN promotes to Decimal128. +AVG_NAN_PROPAGATION_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "nan_propagates", + docs=[{"_id": 0, "v": 10}, {"_id": 1, "v": float("nan")}, {"_id": 2, "v": 30}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": pytest.approx(math.nan, nan_ok=True)}], + msg="NaN in group should propagate to result", + ), + AccumulatorTestCase( + "all_nan", + docs=[{"_id": 0, "v": float("nan")}, {"_id": 1, "v": float("nan")}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": pytest.approx(math.nan, nan_ok=True)}], + msg="All NaN in group should return NaN", + ), + AccumulatorTestCase( + "nan_dominates_infinity", + docs=[{"_id": 0, "v": float("nan")}, {"_id": 1, "v": FLOAT_INFINITY}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": pytest.approx(math.nan, nan_ok=True)}], + msg="NaN should dominate Infinity in group", + ), + AccumulatorTestCase( + "decimal128_nan", + docs=[ + {"_id": 0, "v": Decimal128("10")}, + {"_id": 1, "v": DECIMAL128_NAN}, + {"_id": 2, "v": Decimal128("30")}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": DECIMAL128_NAN}], + msg="Decimal128 NaN in group should propagate", + ), + AccumulatorTestCase( + "decimal128_nan_over_infinity", + docs=[{"v": DECIMAL128_NAN}, {"v": DECIMAL128_INFINITY}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": Decimal128("NaN")}], + msg="$avg should return Decimal128 NaN when group contains Decimal128 NaN and Infinity", + ), + AccumulatorTestCase( + "cross_type_nan", + docs=[{"_id": 0, "v": float("nan")}, {"_id": 1, "v": Decimal128("5")}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": DECIMAL128_NAN}], + msg="double NaN + Decimal128 should return Decimal128 NaN", + ), +] + +# Property [Infinity]: Infinity dominates finite values, and +# Infinity + -Infinity cancels to NaN. +AVG_INFINITY_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "infinity", + docs=[{"_id": 0, "v": FLOAT_INFINITY}, {"_id": 1, "v": 10}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": FLOAT_INFINITY}], + msg="Infinity in group should propagate", + ), + AccumulatorTestCase( + "negative_infinity", + docs=[{"_id": 0, "v": FLOAT_NEGATIVE_INFINITY}, {"_id": 1, "v": 10}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": FLOAT_NEGATIVE_INFINITY}], + msg="-Infinity in group should propagate", + ), + AccumulatorTestCase( + "inf_neg_inf_cancel", + docs=[{"_id": 0, "v": FLOAT_INFINITY}, {"_id": 1, "v": FLOAT_NEGATIVE_INFINITY}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": pytest.approx(math.nan, nan_ok=True)}], + msg="Infinity + -Infinity in group should return NaN", + ), + AccumulatorTestCase( + "decimal128_infinity", + docs=[{"_id": 0, "v": DECIMAL128_INFINITY}, {"_id": 1, "v": Decimal128("10")}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": DECIMAL128_INFINITY}], + msg="Decimal128 Infinity in group should propagate", + ), + AccumulatorTestCase( + "decimal128_neg_infinity_dominates", + docs=[{"v": DECIMAL128_NEGATIVE_INFINITY}, {"v": Decimal128("5")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DECIMAL128_NEGATIVE_INFINITY}], + msg="$avg should return Decimal128 -Infinity when Decimal128 -Infinity dominates", + ), + AccumulatorTestCase( + "decimal128_inf_neg_inf_cancel", + docs=[ + {"_id": 0, "v": DECIMAL128_INFINITY}, + {"_id": 1, "v": DECIMAL128_NEGATIVE_INFINITY}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": DECIMAL128_NAN}], + msg="Decimal128 Inf + -Inf in group should return Decimal128 NaN", + ), +] + +AVG_SPECIAL_NUMERIC_TESTS: list[AccumulatorTestCase] = ( + AVG_NAN_PROPAGATION_TESTS + AVG_INFINITY_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(AVG_SPECIAL_NUMERIC_TESTS)) +def test_avg_special_numeric(collection, test_case: AccumulatorTestCase): + """Test $avg special numeric value handling in $group context.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult(result, expected=test_case.expected, msg=test_case.msg) From 7df661ee16b5515c82443686c4a53c415f436c12 Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Fri, 15 May 2026 14:09:14 -0700 Subject: [PATCH 6/9] remove duplicate tests Signed-off-by: Alina (Xi) Li --- .../accumulators/avg/test_avg_field_lookup.py | 12 -------- .../avg/test_avg_group_boundaries.py | 14 ++++----- .../avg/test_avg_group_context.py | 29 +------------------ 3 files changed, 8 insertions(+), 47 deletions(-) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_field_lookup.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_field_lookup.py index ece6da7f..45173106 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_field_lookup.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_field_lookup.py @@ -132,18 +132,6 @@ expected=[{"_id": None, "avg": None}], msg="$avg with non-existent field should return null", ), - AccumulatorTestCase( - "some_missing_field", - docs=[ - {"_id": 1, "value": 10}, - {"_id": 2}, - {"_id": 3, "value": 30}, - ], - pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$value"}}}], - # Missing values are ignored: (10 + 30) / 2 = 20 - expected=[{"_id": None, "avg": 20.0}], - msg="$avg should ignore documents with missing field", - ), AccumulatorTestCase( "field_resolves_to_array", docs=[ diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_boundaries.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_boundaries.py index ad590544..a1ed3c86 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_boundaries.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_boundaries.py @@ -279,13 +279,6 @@ expected=[{"result": DOUBLE_NEAR_MAX}], msg="$avg should handle values near maximum finite correctly", ), - AccumulatorTestCase( - id="double_near_max_pair", - docs=[{"_id": 0, "v": DOUBLE_NEAR_MAX}, {"_id": 1, "v": DOUBLE_NEAR_MAX}], - pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], - expected=[{"_id": None, "avg": float("inf")}], - msg="avg of two DOUBLE_NEAR_MAX overflows sum to inf", - ), ] # Property [Decimal128 Precision]: $avg preserves Decimal128 precision @@ -454,6 +447,13 @@ # doubles and Decimal128, and int32/int64 overflow is handled via type # promotion without error. AVG_OVERFLOW_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + id="overflow_double_near_max_pair", + docs=[{"_id": 0, "v": DOUBLE_NEAR_MAX}, {"_id": 1, "v": DOUBLE_NEAR_MAX}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": float("inf")}], + msg="avg of two DOUBLE_NEAR_MAX overflows sum to inf", + ), AccumulatorTestCase( id="overflow_double_max", docs=[{"v": DOUBLE_MAX}, {"v": DOUBLE_MAX}], diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_context.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_context.py index 2bfd0ee2..b2a2ca4d 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_context.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_context.py @@ -2,8 +2,7 @@ Tests for $avg accumulator in $group context. Covers numeric equivalence in grouping, single/empty groups, -precision edge cases, multiple groups, comparison with $sum, -and expression argument handling. +precision edge cases, multiple groups, and comparison with $sum. """ from __future__ import annotations @@ -303,38 +302,12 @@ ), ] -# Property [Expression Arguments]: $avg accepts any expression as its operand, -# evaluating it per-document before accumulation. -EXPRESSION_ARGS_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - id="expr_constant_literal", - docs=[{"x": 1}, {"x": 2}, {"x": 3}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": 5}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": 5.0}], - msg="$avg should return the constant value when expression is a numeric literal", - ), - AccumulatorTestCase( - id="expr_nested_add", - docs=[{"a": 2, "b": 3}, {"a": 4, "b": 6}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": {"$add": ["$a", "$b"]}}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": 7.5}], - msg="$avg should evaluate nested $add expression per-document before averaging", - ), -] - AVG_GROUP_CONTEXT_TESTS: list[AccumulatorTestCase] = ( NUMERIC_EQUIVALENCE_TESTS + SINGLE_EMPTY_GROUP_TESTS + PRECISION_EDGE_TESTS + MULTIPLE_GROUPS_TESTS + COMPARISON_WITH_RELATED_TESTS - + EXPRESSION_ARGS_TESTS ) From 616c4fe8934219a91c34330c11c99c1dbe6d9920 Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Fri, 15 May 2026 14:26:52 -0700 Subject: [PATCH 7/9] rename to make tests clearer Signed-off-by: Alina (Xi) Li --- .../avg/test_avg_special_numeric.py | 60 +++++++++---------- 1 file changed, 29 insertions(+), 31 deletions(-) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_special_numeric.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_special_numeric.py index 5cf5a41e..05325e38 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_special_numeric.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_special_numeric.py @@ -1,7 +1,7 @@ """ Tests for $avg accumulator special numeric value handling in $group context. -Covers NaN propagation, Infinity dominance, and cross-type interactions +Covers NaN behavior, Infinity behavior, and cross-type interactions for both double and Decimal128 types. """ @@ -26,32 +26,32 @@ FLOAT_NEGATIVE_INFINITY, ) -# Property [NaN Propagation]: NaN is numeric and propagates to the result; -# NaN dominates Infinity and cross-type NaN promotes to Decimal128. -AVG_NAN_PROPAGATION_TESTS: list[AccumulatorTestCase] = [ +# Property [NaN]: NaN is numeric and produces NaN in the result; +# NaN with Infinity produces NaN; cross-type NaN promotes to Decimal128. +AVG_NAN_TESTS: list[AccumulatorTestCase] = [ AccumulatorTestCase( - "nan_propagates", + "nan_with_finite", docs=[{"_id": 0, "v": 10}, {"_id": 1, "v": float("nan")}, {"_id": 2, "v": 30}], pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], expected=[{"_id": None, "avg": pytest.approx(math.nan, nan_ok=True)}], - msg="NaN in group should propagate to result", + msg="NaN among finite values should produce NaN result", ), AccumulatorTestCase( "all_nan", docs=[{"_id": 0, "v": float("nan")}, {"_id": 1, "v": float("nan")}], pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], expected=[{"_id": None, "avg": pytest.approx(math.nan, nan_ok=True)}], - msg="All NaN in group should return NaN", + msg="All NaN values should return NaN", ), AccumulatorTestCase( - "nan_dominates_infinity", + "nan_with_infinity", docs=[{"_id": 0, "v": float("nan")}, {"_id": 1, "v": FLOAT_INFINITY}], pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], expected=[{"_id": None, "avg": pytest.approx(math.nan, nan_ok=True)}], - msg="NaN should dominate Infinity in group", + msg="NaN with Infinity should produce NaN", ), AccumulatorTestCase( - "decimal128_nan", + "decimal128_nan_with_finite", docs=[ {"_id": 0, "v": Decimal128("10")}, {"_id": 1, "v": DECIMAL128_NAN}, @@ -59,83 +59,81 @@ ], pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], expected=[{"_id": None, "avg": DECIMAL128_NAN}], - msg="Decimal128 NaN in group should propagate", + msg="Decimal128 NaN among finite values should produce Decimal128 NaN", ), AccumulatorTestCase( - "decimal128_nan_over_infinity", + "decimal128_nan_with_infinity", docs=[{"v": DECIMAL128_NAN}, {"v": DECIMAL128_INFINITY}], pipeline=[ {"$group": {"_id": None, "result": {"$avg": "$v"}}}, {"$project": {"_id": 0, "result": 1}}, ], expected=[{"result": Decimal128("NaN")}], - msg="$avg should return Decimal128 NaN when group contains Decimal128 NaN and Infinity", + msg="Decimal128 NaN with Decimal128 Infinity should produce Decimal128 NaN", ), AccumulatorTestCase( "cross_type_nan", docs=[{"_id": 0, "v": float("nan")}, {"_id": 1, "v": Decimal128("5")}], pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], expected=[{"_id": None, "avg": DECIMAL128_NAN}], - msg="double NaN + Decimal128 should return Decimal128 NaN", + msg="double NaN with Decimal128 should return Decimal128 NaN", ), ] -# Property [Infinity]: Infinity dominates finite values, and -# Infinity + -Infinity cancels to NaN. +# Property [Infinity]: Infinity with finite values produces Infinity; +# Infinity with -Infinity produces NaN. AVG_INFINITY_TESTS: list[AccumulatorTestCase] = [ AccumulatorTestCase( - "infinity", + "infinity_with_finite", docs=[{"_id": 0, "v": FLOAT_INFINITY}, {"_id": 1, "v": 10}], pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], expected=[{"_id": None, "avg": FLOAT_INFINITY}], - msg="Infinity in group should propagate", + msg="Infinity with finite value should produce Infinity", ), AccumulatorTestCase( - "negative_infinity", + "negative_infinity_with_finite", docs=[{"_id": 0, "v": FLOAT_NEGATIVE_INFINITY}, {"_id": 1, "v": 10}], pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], expected=[{"_id": None, "avg": FLOAT_NEGATIVE_INFINITY}], - msg="-Infinity in group should propagate", + msg="-Infinity with finite value should produce -Infinity", ), AccumulatorTestCase( - "inf_neg_inf_cancel", + "inf_and_neg_inf", docs=[{"_id": 0, "v": FLOAT_INFINITY}, {"_id": 1, "v": FLOAT_NEGATIVE_INFINITY}], pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], expected=[{"_id": None, "avg": pytest.approx(math.nan, nan_ok=True)}], - msg="Infinity + -Infinity in group should return NaN", + msg="Infinity with -Infinity should produce NaN", ), AccumulatorTestCase( - "decimal128_infinity", + "decimal128_infinity_with_finite", docs=[{"_id": 0, "v": DECIMAL128_INFINITY}, {"_id": 1, "v": Decimal128("10")}], pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], expected=[{"_id": None, "avg": DECIMAL128_INFINITY}], - msg="Decimal128 Infinity in group should propagate", + msg="Decimal128 Infinity with finite value should produce Decimal128 Infinity", ), AccumulatorTestCase( - "decimal128_neg_infinity_dominates", + "decimal128_neg_infinity_with_finite", docs=[{"v": DECIMAL128_NEGATIVE_INFINITY}, {"v": Decimal128("5")}], pipeline=[ {"$group": {"_id": None, "result": {"$avg": "$v"}}}, {"$project": {"_id": 0, "result": 1}}, ], expected=[{"result": DECIMAL128_NEGATIVE_INFINITY}], - msg="$avg should return Decimal128 -Infinity when Decimal128 -Infinity dominates", + msg="Decimal128 -Infinity with finite value should produce Decimal128 -Infinity", ), AccumulatorTestCase( - "decimal128_inf_neg_inf_cancel", + "decimal128_inf_and_neg_inf", docs=[ {"_id": 0, "v": DECIMAL128_INFINITY}, {"_id": 1, "v": DECIMAL128_NEGATIVE_INFINITY}, ], pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], expected=[{"_id": None, "avg": DECIMAL128_NAN}], - msg="Decimal128 Inf + -Inf in group should return Decimal128 NaN", + msg="Decimal128 Infinity with -Infinity should produce Decimal128 NaN", ), ] -AVG_SPECIAL_NUMERIC_TESTS: list[AccumulatorTestCase] = ( - AVG_NAN_PROPAGATION_TESTS + AVG_INFINITY_TESTS -) +AVG_SPECIAL_NUMERIC_TESTS: list[AccumulatorTestCase] = AVG_NAN_TESTS + AVG_INFINITY_TESTS @pytest.mark.parametrize("test_case", pytest_params(AVG_SPECIAL_NUMERIC_TESTS)) From b51ae3128458d3560c6193cfeba2bbf848c58e7e Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Fri, 15 May 2026 14:32:26 -0700 Subject: [PATCH 8/9] Avg integration tests Signed-off-by: Alina (Xi) Li --- .../test_accumulators_avg_integration.py | 372 ++++++++++++++++++ 1 file changed, 372 insertions(+) create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_avg_integration.py diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_avg_integration.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_avg_integration.py new file mode 100644 index 00000000..e851d31a --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_avg_integration.py @@ -0,0 +1,372 @@ +"""Tests for $avg accumulator composed with sibling accumulators in the same $group.""" + +from __future__ import annotations + +import pytest +from bson import Decimal128, Int64 + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils.accumulator_test_case import ( # noqa: E501 + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Avg with Sum]: $avg and $sum coexist in the same $group and +# independently compute the mean and the total. $avg always returns double +# for integer inputs; $sum returns int32 when all inputs are int32. +AVG_WITH_SUM_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "avg_sum_single_group", + docs=[ + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + {"cat": "a", "v": 30}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "total": {"$sum": "$v"}, + } + } + ], + expected=[{"_id": "a", "mean": 20.0, "total": 60}], + msg="$avg and $sum should independently produce mean and total", + ), + AccumulatorTestCase( + "avg_sum_multiple_groups", + docs=[ + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + {"cat": "b", "v": 5}, + {"cat": "b", "v": 15}, + {"cat": "b", "v": 25}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "total": {"$sum": "$v"}, + } + } + ], + expected=[ + {"_id": "a", "mean": 15.0, "total": 30}, + {"_id": "b", "mean": 15.0, "total": 45}, + ], + msg="$avg and $sum should produce correct results across multiple groups", + ), + AccumulatorTestCase( + "avg_sum_null_handling_diverges", + docs=[ + {"cat": "a", "v": None}, + {"cat": "a", "v": 10}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "total": {"$sum": "$v"}, + } + } + ], + expected=[{"_id": "a", "mean": 10.0, "total": 10}], + msg="$avg and $sum should both ignore null (avg=10.0 from one value, sum=10)", + ), + AccumulatorTestCase( + "avg_sum_all_null_diverges", + docs=[ + {"cat": "a", "v": None}, + {"cat": "a", "v": None}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "total": {"$sum": "$v"}, + } + } + ], + expected=[{"_id": "a", "mean": None, "total": 0}], + msg="$avg returns null but $sum returns 0 when all values are null", + ), +] + +# Property [Avg with Count]: $avg of a field and $sum with constant 1 (count +# pattern) coexist, independently computing a mean and a document count. +AVG_WITH_COUNT_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "avg_count_basic", + docs=[ + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + {"cat": "b", "v": 5}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "count": {"$sum": 1}, + } + } + ], + expected=[ + {"_id": "a", "mean": 15.0, "count": 2}, + {"_id": "b", "mean": 5.0, "count": 1}, + ], + msg="$avg of field and $sum(1) should independently compute mean and count", + ), + AccumulatorTestCase( + "avg_count_non_numeric_ignored_but_counted", + docs=[ + {"cat": "a", "v": "hello"}, + {"cat": "a", "v": 10}, + {"cat": "a", "v": True}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "count": {"$sum": 1}, + } + } + ], + expected=[{"_id": "a", "mean": 10.0, "count": 3}], + msg="$avg ignores non-numeric values but $sum(1) counts all documents", + ), +] + +# Property [Avg with Min/Max]: $avg, $min, and $max coexist in the same +# $group, each independently computing the mean, minimum, and maximum. +AVG_WITH_MIN_MAX_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "avg_min_max_basic", + docs=[ + {"cat": "a", "v": 30}, + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "lo": {"$min": "$v"}, + "hi": {"$max": "$v"}, + } + } + ], + expected=[{"_id": "a", "mean": 20.0, "lo": 10, "hi": 30}], + msg="$avg, $min, and $max should independently compute mean, min, and max", + ), + AccumulatorTestCase( + "avg_min_max_mixed_types", + docs=[ + {"cat": "a", "v": 5}, + {"cat": "a", "v": Int64(100)}, + {"cat": "a", "v": 2.5}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "lo": {"$min": "$v"}, + "hi": {"$max": "$v"}, + } + } + ], + expected=[{"_id": "a", "mean": 35.833333333333336, "lo": 2.5, "hi": Int64(100)}], + msg="$avg should return double while $min/$max preserve original types", + ), +] + +# Property [Avg with First/Last]: $avg computes the mean while $first/$last +# pick positional values from the group. A preceding $sort establishes order +# for $first and $last; $avg is order-independent. +AVG_WITH_FIRST_LAST_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "avg_first_last_with_sort", + docs=[ + {"cat": "a", "v": 30}, + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + ], + pipeline=[ + {"$sort": {"v": 1}}, + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "first_v": {"$first": "$v"}, + "last_v": {"$last": "$v"}, + } + }, + ], + expected=[{"_id": "a", "mean": 20.0, "first_v": 10, "last_v": 30}], + msg="$avg should compute mean while $first/$last pick sorted extremes", + ), +] + +# Property [Avg with Push/AddToSet]: $avg computes the mean while $push +# collects all values and $addToSet collects unique values. +AVG_WITH_PUSH_ADDTOSET_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "avg_push_addtoset", + docs=[ + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + {"cat": "a", "v": 10}, + ], + pipeline=[ + {"$sort": {"v": 1}}, + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "all_vals": {"$push": "$v"}, + "unique_vals": {"$addToSet": "$v"}, + } + }, + ], + expected=[ + { + "_id": "a", + "mean": 13.333333333333334, + "all_vals": [10, 10, 20], + "unique_vals": [10, 20], + }, + ], + msg="$avg computes mean while $push keeps all values and $addToSet keeps unique values", + ), +] + +# Property [Avg with MergeObjects]: $avg computes the mean while +# $mergeObjects combines per-document metadata into a single object. +AVG_WITH_MERGE_OBJECTS_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "avg_merge_objects", + docs=[ + {"cat": "a", "v": 10, "meta": {"src": "x"}}, + {"cat": "a", "v": 20, "meta": {"quality": "high"}}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "merged": {"$mergeObjects": "$meta"}, + } + } + ], + expected=[ + {"_id": "a", "mean": 15.0, "merged": {"src": "x", "quality": "high"}}, + ], + msg="$avg computes mean while $mergeObjects combines metadata objects", + ), +] + +# Property [Multiple Avg Expressions]: multiple $avg accumulators in the same +# $group independently average different fields or expressions. +MULTIPLE_AVG_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "multiple_avg_different_fields", + docs=[ + {"cat": "a", "price": 100, "qty": 2}, + {"cat": "a", "price": 200, "qty": 3}, + {"cat": "b", "price": 50, "qty": 10}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "avg_price": {"$avg": "$price"}, + "avg_qty": {"$avg": "$qty"}, + } + } + ], + expected=[ + {"_id": "a", "avg_price": 150.0, "avg_qty": 2.5}, + {"_id": "b", "avg_price": 50.0, "avg_qty": 10.0}, + ], + msg="Multiple $avg accumulators should independently average different fields", + ), + AccumulatorTestCase( + "multiple_avg_field_and_expression", + docs=[ + {"cat": "a", "price": 100, "qty": 2}, + {"cat": "a", "price": 200, "qty": 3}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "avg_price": {"$avg": "$price"}, + "avg_revenue": {"$avg": {"$multiply": ["$price", "$qty"]}}, + } + } + ], + expected=[{"_id": "a", "avg_price": 150.0, "avg_revenue": 400.0}], + msg="$avg should independently average a field and a computed expression", + ), +] + +# Property [Avg Type Promotion with Sibling]: $avg promoting to Decimal128 +# does not interfere with sibling accumulators that return simpler types. +AVG_TYPE_PROMOTION_WITH_SIBLING_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "avg_decimal128_with_int_count", + docs=[ + {"cat": "a", "v": Decimal128("1.5")}, + {"cat": "a", "v": Decimal128("2.5")}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "count": {"$sum": 1}, + } + } + ], + expected=[{"_id": "a", "mean": Decimal128("2.0"), "count": 2}], + msg="$avg promoting to Decimal128 should not affect sibling $sum(1) returning int32", + ), +] + +AVG_INTEGRATION_TESTS = ( + AVG_WITH_SUM_TESTS + + AVG_WITH_COUNT_TESTS + + AVG_WITH_MIN_MAX_TESTS + + AVG_WITH_FIRST_LAST_TESTS + + AVG_WITH_PUSH_ADDTOSET_TESTS + + AVG_WITH_MERGE_OBJECTS_TESTS + + MULTIPLE_AVG_TESTS + + AVG_TYPE_PROMOTION_WITH_SIBLING_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(AVG_INTEGRATION_TESTS)) +def test_accumulators_avg_integration(collection, test_case: AccumulatorTestCase): + """Test $avg accumulator composed with sibling accumulators in the same $group.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline or [], "cursor": {}}, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ignore_doc_order=True, + ignore_order_in=["unique_vals"], + ) From 0f86d6e6b03ac72e2542af9e9e08374842e7325e Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Fri, 15 May 2026 14:40:11 -0700 Subject: [PATCH 9/9] style changes Signed-off-by: Alina (Xi) Li --- .../accumulators/test_accumulators_avg_integration.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_avg_integration.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_avg_integration.py index e851d31a..5b4b9666 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_avg_integration.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_avg_integration.py @@ -299,22 +299,22 @@ msg="Multiple $avg accumulators should independently average different fields", ), AccumulatorTestCase( - "multiple_avg_field_and_expression", + "multiple_avg_different_expressions", docs=[ - {"cat": "a", "price": 100, "qty": 2}, - {"cat": "a", "price": 200, "qty": 3}, + {"cat": "a", "price": 100, "qty": 2, "revenue": 200}, + {"cat": "a", "price": 200, "qty": 3, "revenue": 600}, ], pipeline=[ { "$group": { "_id": "$cat", "avg_price": {"$avg": "$price"}, - "avg_revenue": {"$avg": {"$multiply": ["$price", "$qty"]}}, + "avg_revenue": {"$avg": "$revenue"}, } } ], expected=[{"_id": "a", "avg_price": 150.0, "avg_revenue": 400.0}], - msg="$avg should independently average a field and a computed expression", + msg="Multiple $avg accumulators should independently average different fields", ), ]