diff --git a/Cotabby/Models/SuggestionModels.swift b/Cotabby/Models/SuggestionModels.swift index 9daa148..761c892 100644 --- a/Cotabby/Models/SuggestionModels.swift +++ b/Cotabby/Models/SuggestionModels.swift @@ -43,20 +43,22 @@ enum SuggestionWordCountPreset: String, CaseIterable, Equatable, Hashable, Senda } /// Token budget is the sole governor of completion length on the local model (the in-prompt - /// word-range cue was removed), so it must track the upper word bound closely. Sized at - /// ~1.5x the upper word count to leave headroom for multi-token words (contractions, proper - /// nouns, punctuation) without overrunning the preset. The earlier 50% bump (17/27/45) let - /// completions blow past the setting, e.g. ~12 words on the shortest preset (#271). + /// word-range cue was removed), so it must track the upper word bound closely. English BPE + /// averages ~1.3 tokens per word, so these are sized at ~1.25x the upper word count: the cap + /// lands at or just under the upper bound instead of past it. History: a ~1.5x sizing + /// (6/11/18/30) still overran because real prose uses fewer tokens per word than that assumed, + /// and an earlier 50% bump overran further, e.g. ~12 words on the shortest preset (#271). When + /// unsure, bias shorter; a clipped suggestion is cheaper than one that blows past the setting. var suggestedPredictionTokenBudget: Int { switch self { case .twoToFour: - return 6 + return 5 case .fourToSeven: - return 11 + return 9 case .sevenToTwelve: - return 18 + return 15 case .twelveToTwenty: - return 30 + return 25 } } } @@ -101,8 +103,10 @@ struct SuggestionConfiguration: Equatable, Sendable { /// The configuration shipped by the app today. /// These are product defaults, not temporary debug overrides. static let standard = SuggestionConfiguration( - // Keep completions short so ghost text stays fast and easy to accept. - maxPredictionTokens: 8, + // Floor for the per-request token budget (see SuggestionRequestFactory.activeMaxPredictionTokens). + // Held at the smallest word-count preset (2-4 words) so that preset's budget governs instead + // of being silently raised; keeps ghost text short, fast, and easy to accept. + maxPredictionTokens: 5, // Aggressive debounce: 20ms keeps time-to-first-suggestion low while still collapsing // bursts (superseded generations are cancelled; the host-publish poll absorbs AX lag). debounceMilliseconds: 20, diff --git a/CotabbyTests/ModelAndPresentationValueTests.swift b/CotabbyTests/ModelAndPresentationValueTests.swift index 77db606..fd576f8 100644 --- a/CotabbyTests/ModelAndPresentationValueTests.swift +++ b/CotabbyTests/ModelAndPresentationValueTests.swift @@ -40,16 +40,16 @@ final class SuggestionTextColorCodecTests: XCTestCase { final class SuggestionModelValueTests: XCTestCase { func test_wordCountPresetsExposeMatchingPromptInstructionsAndTokenBudgets() { XCTAssertEqual(SuggestionWordCountPreset.twoToFour.promptInstruction, "Return only the next 2 to 4 words.") - XCTAssertEqual(SuggestionWordCountPreset.twoToFour.suggestedPredictionTokenBudget, 6) + XCTAssertEqual(SuggestionWordCountPreset.twoToFour.suggestedPredictionTokenBudget, 5) XCTAssertEqual(SuggestionWordCountPreset.fourToSeven.promptInstruction, "Return only the next 4 to 7 words.") - XCTAssertEqual(SuggestionWordCountPreset.fourToSeven.suggestedPredictionTokenBudget, 11) + XCTAssertEqual(SuggestionWordCountPreset.fourToSeven.suggestedPredictionTokenBudget, 9) XCTAssertEqual(SuggestionWordCountPreset.sevenToTwelve.promptInstruction, "Return only the next 7 to 12 words.") - XCTAssertEqual(SuggestionWordCountPreset.sevenToTwelve.suggestedPredictionTokenBudget, 18) + XCTAssertEqual(SuggestionWordCountPreset.sevenToTwelve.suggestedPredictionTokenBudget, 15) XCTAssertEqual(SuggestionWordCountPreset.twelveToTwenty.promptInstruction, "Return only the next 12 to 20 words.") - XCTAssertEqual(SuggestionWordCountPreset.twelveToTwenty.suggestedPredictionTokenBudget, 30) + XCTAssertEqual(SuggestionWordCountPreset.twelveToTwenty.suggestedPredictionTokenBudget, 25) } func test_activeSuggestionSession_clampsConsumedCountAndSlicesByCharacters() { diff --git a/CotabbyTests/SuggestionRequestFactoryTests.swift b/CotabbyTests/SuggestionRequestFactoryTests.swift index ad8abe0..9b635a1 100644 --- a/CotabbyTests/SuggestionRequestFactoryTests.swift +++ b/CotabbyTests/SuggestionRequestFactoryTests.swift @@ -167,7 +167,7 @@ final class SuggestionRequestFactoryTests: XCTestCase { result.request.completionLengthInstruction, "Return only the next 12 to 20 words." ) - XCTAssertEqual(result.request.maxPredictionTokens, 30) + XCTAssertEqual(result.request.maxPredictionTokens, 25) XCTAssertEqual(result.promptPreview, result.request.prompt) }