Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
* [ENHANCEMENT] Tenant Federation: Add a local cache to regex resolver. #7363
* [ENHANCEMENT] Query Scheduler: Add `cortex_query_scheduler_tracked_requests` metric to track the current number of requests held by the scheduler. #7355
* [BUGFIX] Fix nil when ingester_query_max_attempts > 1. #7369
* [BUGFIX] Querier: Fix queryWithRetry and labelsWithRetry returning (nil, nil) on cancelled context by propagating ctx.Err(). #7370

## 1.21.0 in progress

Expand Down
26 changes: 15 additions & 11 deletions pkg/querier/distributor_queryable.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,16 +166,6 @@ func (q *distributorQuerier) streamingSelect(ctx context.Context, sortSeries, pa
return storage.ErrSeriesSet(err)
}

// Guard against nil results. This can happen when queryWithRetry's
// backoff loop exits without executing (e.g., context cancelled before
// the first attempt), returning (nil, nil). See #7364.
if results == nil {
if err != nil {
return storage.ErrSeriesSet(err)
}
return storage.EmptySeriesSet()
}

serieses := make([]storage.Series, 0, len(results.Chunkseries))
for _, result := range results.Chunkseries {
// Sometimes the ingester can send series that have no data.
Expand Down Expand Up @@ -236,6 +226,13 @@ func (q *distributorQuerier) queryWithRetry(ctx context.Context, queryFunc func(
retries.Wait()
}

// If the loop never executed (e.g. context cancelled before the first
// attempt), result and err are both nil. Return the context error so
// callers don't receive a nil result with no error.
if err == nil {
err = ctx.Err()
}

return result, err
}

Expand Down Expand Up @@ -299,7 +296,7 @@ func (q *distributorQuerier) LabelNames(ctx context.Context, hints *storage.Labe
}

func (q *distributorQuerier) labelsWithRetry(ctx context.Context, labelsFunc func() ([]string, error)) ([]string, error) {
if q.ingesterQueryMaxAttempts == 1 {
if q.ingesterQueryMaxAttempts <= 1 {
return labelsFunc()
}

Expand All @@ -322,6 +319,13 @@ func (q *distributorQuerier) labelsWithRetry(ctx context.Context, labelsFunc fun
retries.Wait()
}

// If the loop never executed (e.g. context cancelled before the first
// attempt), result and err are both nil. Return the context error so
// callers don't receive a nil result with no error.
if err == nil {
err = ctx.Err()
}

return result, err
}

Expand Down
44 changes: 33 additions & 11 deletions pkg/querier/distributor_queryable_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -418,9 +418,8 @@ func TestDistributorQuerier_Select_CancelledContext_NoRetry(t *testing.T) {
//
// When ingesterQueryMaxAttempts > 1 and the context is cancelled before the
// retry loop starts (e.g. query timeout or another querier goroutine failing),
// backoff.Ongoing() returns false immediately. The result variable stays nil,
// queryWithRetry returns (nil, nil), and streamingSelect dereferences the nil
// result at line 169 → panic.
// backoff.Ongoing() returns false immediately. queryWithRetry now propagates
// ctx.Err() so callers always see a non-nil error.
func TestDistributorQuerier_Select_CancelledContext(t *testing.T) {
t.Parallel()

Expand All @@ -440,14 +439,37 @@ func TestDistributorQuerier_Select_CancelledContext(t *testing.T) {
querier, err := queryable.Querier(mint, maxt)
require.NoError(t, err)

// This should NOT panic. Before the fix, the cancelled context causes
// queryWithRetry to return (nil, nil), and streamingSelect dereferences
// the nil result: panic: runtime error: invalid memory address or nil
// pointer dereference [signal SIGSEGV ... addr=0x8]
require.NotPanics(t, func() {
seriesSet := querier.Select(ctx, true, &storage.SelectHints{Start: mint, End: maxt})
// With a cancelled context, we expect either an error or an empty result.
_ = seriesSet.Err()
seriesSet := querier.Select(ctx, true, &storage.SelectHints{Start: mint, End: maxt})
require.ErrorIs(t, seriesSet.Err(), context.Canceled)
}

// TestDistributorQuerier_Labels_CancelledContext verifies that labelsWithRetry
// propagates ctx.Err() when the context is cancelled before the retry loop
// executes.
func TestDistributorQuerier_Labels_CancelledContext(t *testing.T) {
t.Parallel()

ctx := user.InjectOrgID(context.Background(), "0")
ctx, cancel := context.WithCancel(ctx)
cancel()

d := &MockDistributor{}

ingesterQueryMaxAttempts := 2
queryable := newDistributorQueryable(d, true, true, batch.NewChunkMergeIterator, 0, func(string) bool {
return true
}, ingesterQueryMaxAttempts)
querier, err := queryable.Querier(mint, maxt)
require.NoError(t, err)

t.Run("LabelNames", func(t *testing.T) {
_, _, err := querier.LabelNames(ctx, nil)
require.ErrorIs(t, err, context.Canceled)
})

t.Run("LabelValues", func(t *testing.T) {
_, _, err := querier.LabelValues(ctx, "foo", nil)
require.ErrorIs(t, err, context.Canceled)
})
}

Expand Down
Loading