Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions src/platform/endpoint/node/automodeService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import { IExperimentationService } from '../../telemetry/common/nullExperimentat
import { ITelemetryService } from '../../telemetry/common/telemetry';
import { ICAPIClientService } from '../common/capiClient';
import { AutoChatEndpoint } from './autoChatEndpoint';
import { RouterDecisionFetcher } from './routerDecisionFetcher';
import { RouterDecisionFetcher, RoutingContextSignals } from './routerDecisionFetcher';

interface AutoModeAPIResponse {
available_models: string[];
Expand Down Expand Up @@ -145,7 +145,7 @@ export interface IAutomodeService {

export class AutomodeService extends Disposable implements IAutomodeService {
readonly _serviceBrand: undefined;
private readonly _autoModelCache: Map<string, { endpoint: AutoChatEndpoint; tokenBank: AutoModeTokenBank; lastSessionToken?: string; lastRoutedPrompt?: string }> = new Map();
private readonly _autoModelCache: Map<string, { endpoint: AutoChatEndpoint; tokenBank: AutoModeTokenBank; lastSessionToken?: string; lastRoutedPrompt?: string; turnCount: number }> = new Map();
private _reserveTokens: DisposableMap<ChatLocation, AutoModeTokenBank> = new DisposableMap();
private readonly _routerDecisionFetcher: RouterDecisionFetcher;

Expand Down Expand Up @@ -230,7 +230,14 @@ export class AutomodeService extends Disposable implements IAutomodeService {
// Router fallback reason isn't set here because we don't want telemetry for this case
} else {
try {
const result = await this._routerDecisionFetcher.getRouterDecision(prompt, token.session_token, token.available_models);
const contextSignals: RoutingContextSignals = {
session_id: conversationId !== 'unknown' ? conversationId : undefined,
reference_count: chatRequest?.references?.length,
Comment thread
aashna marked this conversation as resolved.
prompt_char_count: prompt.length,
previous_model: entry?.endpoint?.model,
turn_number: (entry?.turnCount ?? 0) + 1,
Comment thread
aashna marked this conversation as resolved.
};
Comment thread
aashna marked this conversation as resolved.
const result = await this._routerDecisionFetcher.getRouterDecision(prompt, token.session_token, token.available_models, undefined, contextSignals);
Comment thread
aashna marked this conversation as resolved.
if (!result.candidate_models.length) {
routerFallbackReason = 'emptyCandidateList';
} else if (entry?.endpoint) {
Expand Down Expand Up @@ -289,7 +296,8 @@ export class AutomodeService extends Disposable implements IAutomodeService {
? cachedEndpoint
: this._instantiationService.createInstance(AutoChatEndpoint, selectedModel, token.session_token, token.discounted_costs?.[selectedModel.model] || 0, this._calculateDiscountRange(token.discounted_costs));

this._autoModelCache.set(conversationId, { endpoint: autoEndpoint, tokenBank, lastSessionToken: token.session_token, lastRoutedPrompt });
const isNewTurn = !entry || lastRoutedPrompt !== entry.lastRoutedPrompt;
this._autoModelCache.set(conversationId, { endpoint: autoEndpoint, tokenBank, lastSessionToken: token.session_token, lastRoutedPrompt, turnCount: (entry?.turnCount ?? 0) + (isNewTurn ? 1 : 0) });
return autoEndpoint;
}

Expand Down
11 changes: 9 additions & 2 deletions src/platform/endpoint/node/routerDecisionFetcher.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,13 @@ export interface RouterDecisionResponse {
sticky_override?: boolean;
}

export interface RoutingContextSignals {
turn_number?: number;
session_id?: string;
previous_model?: string;
reference_count?: number;
prompt_char_count?: number;
}
Comment thread
aashna marked this conversation as resolved.

/**
* Fetches routing decisions from a classification API to determine which model should handle a query.
Expand All @@ -41,9 +48,9 @@ export class RouterDecisionFetcher {
) {
}

async getRouterDecision(query: string, autoModeToken: string, availableModels: string[], stickyThreshold?: number): Promise<RouterDecisionResponse> {
async getRouterDecision(query: string, autoModeToken: string, availableModels: string[], stickyThreshold?: number, contextSignals?: RoutingContextSignals): Promise<RouterDecisionResponse> {
const startTime = Date.now();
const requestBody: Record<string, unknown> = { prompt: query, available_models: availableModels };
const requestBody: Record<string, unknown> = { prompt: query, available_models: availableModels, ...contextSignals };
Comment thread
aashna marked this conversation as resolved.
if (stickyThreshold !== undefined) {
requestBody.sticky_threshold = stickyThreshold;
}
Expand Down
53 changes: 53 additions & 0 deletions src/platform/endpoint/node/test/automodeService.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,59 @@ describe('AutomodeService', () => {
expect(result.model).toBe('gpt-4o');
});

it('should include context signals in router request body', async () => {
enableRouter();

const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');

let capturedBody: string | undefined;
(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((req: any, opts: any) => {
if (opts?.type === RequestType.ModelRouter) {
capturedBody = req.body;
return Promise.resolve({
ok: true,
text: vi.fn().mockResolvedValue(JSON.stringify({
predicted_label: 'needs_reasoning',
confidence: 0.85,
latency_ms: 50,
chosen_model: 'gpt-4o',
candidate_models: ['gpt-4o', 'gpt-4o-mini'],
scores: { needs_reasoning: 0.85, no_reasoning: 0.15 },
sticky_override: false
}))
});
}
return Promise.resolve({
ok: true,
json: vi.fn().mockResolvedValue({
available_models: ['gpt-4o', 'gpt-4o-mini'],
expires_at: Math.floor(Date.now() / 1000) + 3600,
session_token: 'test-token'
})
});
});

automodeService = createService();

const chatRequest: Partial<ChatRequest> = {
location: ChatLocation.Panel,
prompt: 'test prompt',
references: [{ id: 'ref1', value: 'some ref' } as any],
sessionId: 'test-session-123',
};

await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [mockChatEndpoint, gpt4oEndpoint]);

expect(capturedBody).toBeDefined();
const parsed = JSON.parse(capturedBody!);
expect(parsed.prompt).toBe('test prompt');
expect(parsed.prompt_char_count).toBe('test prompt'.length);
expect(parsed.reference_count).toBe(1);
expect(parsed.turn_number).toBe(1);
expect(parsed.session_id).toBe('test-session-123');
expect(parsed.previous_model).toBeUndefined();
});

it('should not use router when routing is not enabled', async () => {
// Routing not enabled via UseAutoModeRouting config
automodeService = createService();
Expand Down
Loading