diff --git a/CHANGELOG.md b/CHANGELOG.md index 6182531869..118077e1e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ * [ENHANCEMENT] Tenant Federation: Add a local cache to regex resolver. #7363 * [ENHANCEMENT] Query Scheduler: Add `cortex_query_scheduler_tracked_requests` metric to track the current number of requests held by the scheduler. #7355 * [BUGFIX] Fix nil when ingester_query_max_attempts > 1. #7369 +* [BUGFIX] Querier: Fix queryWithRetry and labelsWithRetry returning (nil, nil) on cancelled context by propagating ctx.Err(). #7370 ## 1.21.0 in progress diff --git a/pkg/querier/distributor_queryable.go b/pkg/querier/distributor_queryable.go index 6ab0b592bf..8d82cd7887 100644 --- a/pkg/querier/distributor_queryable.go +++ b/pkg/querier/distributor_queryable.go @@ -166,16 +166,6 @@ func (q *distributorQuerier) streamingSelect(ctx context.Context, sortSeries, pa return storage.ErrSeriesSet(err) } - // Guard against nil results. This can happen when queryWithRetry's - // backoff loop exits without executing (e.g., context cancelled before - // the first attempt), returning (nil, nil). See #7364. - if results == nil { - if err != nil { - return storage.ErrSeriesSet(err) - } - return storage.EmptySeriesSet() - } - serieses := make([]storage.Series, 0, len(results.Chunkseries)) for _, result := range results.Chunkseries { // Sometimes the ingester can send series that have no data. @@ -236,6 +226,13 @@ func (q *distributorQuerier) queryWithRetry(ctx context.Context, queryFunc func( retries.Wait() } + // If the loop never executed (e.g. context cancelled before the first + // attempt), result and err are both nil. Return the context error so + // callers don't receive a nil result with no error. + if err == nil { + err = ctx.Err() + } + return result, err } @@ -299,7 +296,7 @@ func (q *distributorQuerier) LabelNames(ctx context.Context, hints *storage.Labe } func (q *distributorQuerier) labelsWithRetry(ctx context.Context, labelsFunc func() ([]string, error)) ([]string, error) { - if q.ingesterQueryMaxAttempts == 1 { + if q.ingesterQueryMaxAttempts <= 1 { return labelsFunc() } @@ -322,6 +319,13 @@ func (q *distributorQuerier) labelsWithRetry(ctx context.Context, labelsFunc fun retries.Wait() } + // If the loop never executed (e.g. context cancelled before the first + // attempt), result and err are both nil. Return the context error so + // callers don't receive a nil result with no error. + if err == nil { + err = ctx.Err() + } + return result, err } diff --git a/pkg/querier/distributor_queryable_test.go b/pkg/querier/distributor_queryable_test.go index cbec09d8bc..7ec3f583a1 100644 --- a/pkg/querier/distributor_queryable_test.go +++ b/pkg/querier/distributor_queryable_test.go @@ -418,9 +418,8 @@ func TestDistributorQuerier_Select_CancelledContext_NoRetry(t *testing.T) { // // When ingesterQueryMaxAttempts > 1 and the context is cancelled before the // retry loop starts (e.g. query timeout or another querier goroutine failing), -// backoff.Ongoing() returns false immediately. The result variable stays nil, -// queryWithRetry returns (nil, nil), and streamingSelect dereferences the nil -// result at line 169 → panic. +// backoff.Ongoing() returns false immediately. queryWithRetry now propagates +// ctx.Err() so callers always see a non-nil error. func TestDistributorQuerier_Select_CancelledContext(t *testing.T) { t.Parallel() @@ -440,14 +439,37 @@ func TestDistributorQuerier_Select_CancelledContext(t *testing.T) { querier, err := queryable.Querier(mint, maxt) require.NoError(t, err) - // This should NOT panic. Before the fix, the cancelled context causes - // queryWithRetry to return (nil, nil), and streamingSelect dereferences - // the nil result: panic: runtime error: invalid memory address or nil - // pointer dereference [signal SIGSEGV ... addr=0x8] - require.NotPanics(t, func() { - seriesSet := querier.Select(ctx, true, &storage.SelectHints{Start: mint, End: maxt}) - // With a cancelled context, we expect either an error or an empty result. - _ = seriesSet.Err() + seriesSet := querier.Select(ctx, true, &storage.SelectHints{Start: mint, End: maxt}) + require.ErrorIs(t, seriesSet.Err(), context.Canceled) +} + +// TestDistributorQuerier_Labels_CancelledContext verifies that labelsWithRetry +// propagates ctx.Err() when the context is cancelled before the retry loop +// executes. +func TestDistributorQuerier_Labels_CancelledContext(t *testing.T) { + t.Parallel() + + ctx := user.InjectOrgID(context.Background(), "0") + ctx, cancel := context.WithCancel(ctx) + cancel() + + d := &MockDistributor{} + + ingesterQueryMaxAttempts := 2 + queryable := newDistributorQueryable(d, true, true, batch.NewChunkMergeIterator, 0, func(string) bool { + return true + }, ingesterQueryMaxAttempts) + querier, err := queryable.Querier(mint, maxt) + require.NoError(t, err) + + t.Run("LabelNames", func(t *testing.T) { + _, _, err := querier.LabelNames(ctx, nil) + require.ErrorIs(t, err, context.Canceled) + }) + + t.Run("LabelValues", func(t *testing.T) { + _, _, err := querier.LabelValues(ctx, "foo", nil) + require.ErrorIs(t, err, context.Canceled) }) }