From da4925a5dd2c8a38d74d97d3cd12d56ef83495da Mon Sep 17 00:00:00 2001 From: Jacob Fu <141651335+FuJacob@users.noreply.github.com> Date: Sun, 31 May 2026 12:03:42 -0700 Subject: [PATCH 1/2] Wire generation-time quality controls into the app Point the CotabbyInference package at the engine branch that adds the token masks, mid-word continuation, and KV snapshot APIs, and use them: - The always-on nonprintable token mask now applies automatically (control, chat-template, and unused tokens can no longer be emitted as visible text), with no app code beyond the pin bump. - single_line is set from the focused field (LlamaGenerationOptions.singleLine = !isMultiLineEnabled) so single-line fields never receive a multi-line completion at the source instead of being truncated after the fact. - forceWordContinuation fires only when the caret sits strictly inside a word (MidWordContinuationPolicy), so the engine constrains the first token to continue that word without affecting ordinary next-word predictions. Threads singleLine / forceWordContinuation through LlamaGenerationOptions into LlamaRuntimeCore (sampling config + setForceWordContinuation before each decodePrompt, fresh and reuse paths). Adds MidWordContinuationPolicy + tests. --- Cotabby.xcodeproj/project.pbxproj | 10 +++++- Cotabby/Models/LlamaRuntimeModels.swift | 5 +++ .../Services/Runtime/LlamaRuntimeCore.swift | 10 +++++- .../Runtime/LlamaSuggestionEngine.swift | 7 ++++- .../Support/MidWordContinuationPolicy.swift | 27 ++++++++++++++++ .../MidWordContinuationPolicyTests.swift | 31 +++++++++++++++++++ project.yml | 2 +- 7 files changed, 88 insertions(+), 4 deletions(-) create mode 100644 Cotabby/Support/MidWordContinuationPolicy.swift create mode 100644 CotabbyTests/MidWordContinuationPolicyTests.swift diff --git a/Cotabby.xcodeproj/project.pbxproj b/Cotabby.xcodeproj/project.pbxproj index 713eea1f..e7aae8c3 100644 --- a/Cotabby.xcodeproj/project.pbxproj +++ b/Cotabby.xcodeproj/project.pbxproj @@ -121,6 +121,7 @@ 76FD91607794883F8E121450 /* CaretGeometrySelector.swift in Sources */ = {isa = PBXBuildFile; fileRef = E3C84377F352140759B448C9 /* CaretGeometrySelector.swift */; }; 78FAE5DB691A1B71042B9D20 /* AboutPaneView.swift in Sources */ = {isa = PBXBuildFile; fileRef = A3FA53BBC3D81503C1D17477 /* AboutPaneView.swift */; }; 7B6A63F5DCC2C163CDFD2A5C /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = BC4F887528AE74AC0DD30314 /* Assets.xcassets */; }; + 7C36DBA762E19C8C31676D44 /* MidWordContinuationPolicyTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1274F897631B1B3A835D157F /* MidWordContinuationPolicyTests.swift */; }; 7C94725B4837DEC9ECF1BC54 /* CompletionRenderMode.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5A03E565A11581FD2150B142 /* CompletionRenderMode.swift */; }; 7D6BB9AF72F7076A4E5EE96F /* DownloadableModelCatalogView.swift in Sources */ = {isa = PBXBuildFile; fileRef = BB5C2AE9A7E55495D26AD074 /* DownloadableModelCatalogView.swift */; }; 7E9413CE7C999C4612348248 /* SuggestionSessionReconcilerTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 9C8F07AC52C7A482F5FE34C5 /* SuggestionSessionReconcilerTests.swift */; }; @@ -150,6 +151,7 @@ 98E2E14A069384C1088CDB44 /* PromptContextSanitizer.swift in Sources */ = {isa = PBXBuildFile; fileRef = FA4B45B91D4DEAC979C3113E /* PromptContextSanitizer.swift */; }; 9ABF75CDA78B27453C3F5B34 /* WelcomeView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 264CA64B2AB1611F82E5B760 /* WelcomeView.swift */; }; 9ADFFF634912F638D079E1C7 /* SentenceBoundaryClassifier.swift in Sources */ = {isa = PBXBuildFile; fileRef = D4B56C250DDEF3E81F9DCBD7 /* SentenceBoundaryClassifier.swift */; }; + 9CEBD6AF4405F1BBE0E3D16C /* MidWordContinuationPolicy.swift in Sources */ = {isa = PBXBuildFile; fileRef = 357C18383B047F24A531BDCD /* MidWordContinuationPolicy.swift */; }; 9F2FDCABCC941CBECAA3B4AB /* CotabbyInference in Frameworks */ = {isa = PBXBuildFile; productRef = 48A46AD6B613CF06072603E4 /* CotabbyInference */; }; A0657CE0488F69F0BD559CBC /* SuggestionCoordinator+Acceptance.swift in Sources */ = {isa = PBXBuildFile; fileRef = 72B13136DF7318F3E96DF0D3 /* SuggestionCoordinator+Acceptance.swift */; }; A0BB87E3665EF6C209034798 /* GhostSuggestionLayoutTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5AD3F4F9FBE82007E4E15F58 /* GhostSuggestionLayoutTests.swift */; }; @@ -255,6 +257,7 @@ 0D80CC2CCAAFE3F23FB8C37A /* PromptContextSanitizerTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PromptContextSanitizerTests.swift; sourceTree = ""; }; 0F5E263AB69029D5E13D5EE8 /* FocusDebugOverlayController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FocusDebugOverlayController.swift; sourceTree = ""; }; 110CB0B53016644EF7840301 /* HuggingFaceAPIClient.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HuggingFaceAPIClient.swift; sourceTree = ""; }; + 1274F897631B1B3A835D157F /* MidWordContinuationPolicyTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MidWordContinuationPolicyTests.swift; sourceTree = ""; }; 12DD19BCE610808F1E38702D /* PermissionOverlayTrackerTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PermissionOverlayTrackerTests.swift; sourceTree = ""; }; 1441B2D89DAE6878DAD11F17 /* EmojiMatcher.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = EmojiMatcher.swift; sourceTree = ""; }; 18D990E515E1AE4F312F4E95 /* BundledRuntimeLocatorTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BundledRuntimeLocatorTests.swift; sourceTree = ""; }; @@ -287,6 +290,7 @@ 335BF59EE80F3A0143B79740 /* GhostFontSizeStabilizerTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GhostFontSizeStabilizerTests.swift; sourceTree = ""; }; 3384FD33776960103D6E22A9 /* EmojiPickerView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = EmojiPickerView.swift; sourceTree = ""; }; 352AF5B2834FEE1F597394E4 /* ApplicationBundleMetadata.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ApplicationBundleMetadata.swift; sourceTree = ""; }; + 357C18383B047F24A531BDCD /* MidWordContinuationPolicy.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MidWordContinuationPolicy.swift; sourceTree = ""; }; 3609CC88A5280B3AA40414DF /* SuggestionAvailabilityEvaluator.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SuggestionAvailabilityEvaluator.swift; sourceTree = ""; }; 384FBCF5D7A3A446C5BE2B8D /* SuggestionEngineRouter.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SuggestionEngineRouter.swift; sourceTree = ""; }; 386C98FFCF76EC1C8C7E82BB /* SuggestionModels.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SuggestionModels.swift; sourceTree = ""; }; @@ -704,6 +708,7 @@ 5807E8508D9355D0271A00C5 /* LaunchAtLoginStateTests.swift */, 3009812A35A1CDEF16295AB7 /* LlamaPromptRendererTests.swift */, 52BAFA2F989C3C4F7FB892B5 /* MarkerSelectionSynthesizerTests.swift */, + 1274F897631B1B3A835D157F /* MidWordContinuationPolicyTests.swift */, FC83D14A7557BC0196E59007 /* MirrorOverlayLayoutTests.swift */, 03766F6253FF17639230C0F6 /* ModelAndPresentationValueTests.swift */, A829F28F01FAE76CA7244BBC /* ModelFileValidatorTests.swift */, @@ -849,6 +854,7 @@ B5679E08C9A09065531C37B5 /* LlamaPromptRenderer.swift */, 8D610FCA3A97249DCCE7D0B8 /* LLMIOFileHandler.swift */, A863F41C0C03D7B4AC5DC002 /* MarkerSelectionSynthesizer.swift */, + 357C18383B047F24A531BDCD /* MidWordContinuationPolicy.swift */, 54150A507B03221F137D539B /* MirrorOverlayLayout.swift */, 24F613F0E2F7046E6532A09C /* OnboardingTemplateFeatureList.swift */, FA878B447441BB4F3E327CC8 /* OnboardingTemplateRecommender.swift */, @@ -1074,6 +1080,7 @@ 0333B3CE8F189DD1BEC4AD26 /* MenuBarSections.swift in Sources */, AECC7289DA796B071B4FE3C0 /* MenuBarStatusLabelView.swift in Sources */, 5E92E3C1EB41D482FC06BC52 /* MenuBarView.swift in Sources */, + 9CEBD6AF4405F1BBE0E3D16C /* MidWordContinuationPolicy.swift in Sources */, 31515DDD173535C4AC777853 /* MirrorOverlayLayout.swift in Sources */, 2F227738D7834B1A7A81D1D6 /* ModelDownloadManager.swift in Sources */, 317883210D1D1D5CD654E562 /* ModelFileValidator.swift in Sources */, @@ -1184,6 +1191,7 @@ E27E6377D36D4981301568DD /* LaunchAtLoginStateTests.swift in Sources */, 190C571B3CDFE117F4D15484 /* LlamaPromptRendererTests.swift in Sources */, 87806DE08881D11F2608A13D /* MarkerSelectionSynthesizerTests.swift in Sources */, + 7C36DBA762E19C8C31676D44 /* MidWordContinuationPolicyTests.swift in Sources */, 14D77F0B8A195AC2FA8D24A9 /* MirrorOverlayLayoutTests.swift in Sources */, 25D4FC8D191A50F63E6391F9 /* ModelAndPresentationValueTests.swift in Sources */, 65478B0DABF5460C32D4C458 /* ModelFileValidatorTests.swift in Sources */, @@ -1519,7 +1527,7 @@ isa = XCRemoteSwiftPackageReference; repositoryURL = "https://github.com/FuJacob/cotabbyinference.git"; requirement = { - branch = main; + branch = "feat/generation-quality-controls"; kind = branch; }; }; diff --git a/Cotabby/Models/LlamaRuntimeModels.swift b/Cotabby/Models/LlamaRuntimeModels.swift index f20f6763..7cfc1e52 100644 --- a/Cotabby/Models/LlamaRuntimeModels.swift +++ b/Cotabby/Models/LlamaRuntimeModels.swift @@ -201,6 +201,11 @@ struct LlamaGenerationOptions: Equatable, Sendable { let repetitionPenalty: Double var seed: UInt32? + /// Masks line-break tokens so single-line fields never receive a multi-line completion. + var singleLine: Bool = false + /// Constrains the first generated token to continue the current word (mid-word carets only). + var forceWordContinuation: Bool = false + static func summary(maxPredictionTokens: Int, temperature: Double) -> LlamaGenerationOptions { LlamaGenerationOptions( maxPredictionTokens: maxPredictionTokens, diff --git a/Cotabby/Services/Runtime/LlamaRuntimeCore.swift b/Cotabby/Services/Runtime/LlamaRuntimeCore.swift index a1e19769..8baf2274 100644 --- a/Cotabby/Services/Runtime/LlamaRuntimeCore.swift +++ b/Cotabby/Services/Runtime/LlamaRuntimeCore.swift @@ -387,6 +387,9 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable { let remaining = Array(promptTokens[reusableTokenCount...]) if !remaining.isEmpty { + // Seed for the reuse path is sampled at the end of this decodePrompt; apply + // the word-continuation constraint to it just like the fresh path does. + engine.setForceWordContinuation(autocompleteSequenceID, options.forceWordContinuation) var mutableRemaining = remaining let status = engine.decodePrompt( autocompleteSequenceID, @@ -423,6 +426,10 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable { throw LlamaRuntimeError.generationFailed("Unable to create inference sequence.") } + // The engine samples the first (seed) token at the end of decodePrompt, so set the + // word-continuation constraint here, before decoding. + engine.setForceWordContinuation(seqID, options.forceWordContinuation) + var tokens = promptTokens let status = engine.decodePrompt(seqID, &tokens, Int32(tokens.count), 0) guard status == .ok else { @@ -460,7 +467,8 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable { top_p: Float(options.topP), min_p: Float(options.minP), repetition_penalty: Float(options.repetitionPenalty), - seed: options.seed ?? 0 + seed: options.seed ?? 0, + single_line: options.singleLine ) } diff --git a/Cotabby/Services/Runtime/LlamaSuggestionEngine.swift b/Cotabby/Services/Runtime/LlamaSuggestionEngine.swift index 03266932..f7935310 100644 --- a/Cotabby/Services/Runtime/LlamaSuggestionEngine.swift +++ b/Cotabby/Services/Runtime/LlamaSuggestionEngine.swift @@ -45,7 +45,12 @@ final class LlamaSuggestionEngine { topP: request.topP, minP: request.minP, repetitionPenalty: request.repetitionPenalty, - seed: request.randomSeed + seed: request.randomSeed, + singleLine: !request.isMultiLineEnabled, + forceWordContinuation: MidWordContinuationPolicy.shouldForceContinuation( + precedingText: request.context.precedingText, + trailingText: request.context.trailingText + ) ) ) try Task.checkCancellation() diff --git a/Cotabby/Support/MidWordContinuationPolicy.swift b/Cotabby/Support/MidWordContinuationPolicy.swift new file mode 100644 index 00000000..e7e99177 --- /dev/null +++ b/Cotabby/Support/MidWordContinuationPolicy.swift @@ -0,0 +1,27 @@ +import Foundation + +/// File overview: +/// Decides whether the first generated token should be constrained to continue the current word. +/// +/// Why this file exists: +/// The engine can force the first sampled token to be a word continuation (no leading whitespace), +/// which heals mid-word completions. But forcing it at a normal word boundary would break the +/// common "predict the next word" case, where a leading space is exactly what we want. This policy +/// keeps the trigger deliberately narrow: it only fires when the caret sits strictly inside a word +/// (a word character on both sides). At a word end (nothing or a non-word character after the +/// caret) it returns false so ordinary next-word predictions are untouched. +enum MidWordContinuationPolicy { + static func shouldForceContinuation(precedingText: String, trailingText: String) -> Bool { + guard let before = precedingText.last, isWordCharacter(before) else { + return false + } + guard let after = trailingText.first, isWordCharacter(after) else { + return false + } + return true + } + + private static func isWordCharacter(_ character: Character) -> Bool { + character.isLetter || character.isNumber + } +} diff --git a/CotabbyTests/MidWordContinuationPolicyTests.swift b/CotabbyTests/MidWordContinuationPolicyTests.swift new file mode 100644 index 00000000..17be7616 --- /dev/null +++ b/CotabbyTests/MidWordContinuationPolicyTests.swift @@ -0,0 +1,31 @@ +import XCTest +@testable import Cotabby + +/// Pure-function tests for the mid-word continuation trigger. +final class MidWordContinuationPolicyTests: XCTestCase { + + func test_caretInsideWord_forcesContinuation() { + XCTAssertTrue( + MidWordContinuationPolicy.shouldForceContinuation(precedingText: "I am wri", trailingText: "ting") + ) + } + + func test_caretAtWordEnd_doesNotForce() { + // Nothing after the caret: a normal word boundary, where next-word predictions belong. + XCTAssertFalse( + MidWordContinuationPolicy.shouldForceContinuation(precedingText: "The quick brown fox", trailingText: "") + ) + } + + func test_spaceBeforeCaret_doesNotForce() { + XCTAssertFalse( + MidWordContinuationPolicy.shouldForceContinuation(precedingText: "hello ", trailingText: "world") + ) + } + + func test_punctuationAfterCaret_doesNotForce() { + XCTAssertFalse( + MidWordContinuationPolicy.shouldForceContinuation(precedingText: "done", trailingText: ". Next") + ) + } +} diff --git a/project.yml b/project.yml index 273be9a2..b1727e8c 100644 --- a/project.yml +++ b/project.yml @@ -13,7 +13,7 @@ packages: exactVersion: 2.9.1 CotabbyInference: url: https://github.com/FuJacob/cotabbyinference.git - branch: main + branch: feat/generation-quality-controls swift-log: url: https://github.com/apple/swift-log.git from: 1.12.1 From ca104196ff9bf38943ba3c50d94b8220543db9d2 Mon Sep 17 00:00:00 2001 From: Jacob Fu <141651335+FuJacob@users.noreply.github.com> Date: Sun, 31 May 2026 12:19:36 -0700 Subject: [PATCH 2/2] Add confidence-based suppression (off by default) Use the engine's new per-token logprob to drop completions the model itself was unsure about. LlamaRuntimeCore accumulates the average per-token log-probability and, when LlamaGenerationOptions.confidenceFloor is raised above its default of -infinity, suppresses completions below the floor. ConfidenceSuppressionPolicy holds the pure decision and is unit-tested. Disabled by default, so behavior is unchanged until a caller opts in; wiring a Settings control and full multi-candidate N-best ranking remain follow-ups. --- Cotabby.xcodeproj/project.pbxproj | 8 +++++ Cotabby/Models/LlamaRuntimeModels.swift | 4 +++ .../Services/Runtime/LlamaRuntimeCore.swift | 19 ++++++++++++ .../Support/ConfidenceSuppressionPolicy.swift | 21 +++++++++++++ .../ConfidenceSuppressionPolicyTests.swift | 31 +++++++++++++++++++ 5 files changed, 83 insertions(+) create mode 100644 Cotabby/Support/ConfidenceSuppressionPolicy.swift create mode 100644 CotabbyTests/ConfidenceSuppressionPolicyTests.swift diff --git a/Cotabby.xcodeproj/project.pbxproj b/Cotabby.xcodeproj/project.pbxproj index e7aae8c3..4860e7c0 100644 --- a/Cotabby.xcodeproj/project.pbxproj +++ b/Cotabby.xcodeproj/project.pbxproj @@ -70,6 +70,7 @@ 3CF1A4E39F24917DF0470A7D /* PromptPolicyTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4696A84D17890B154533A08F /* PromptPolicyTests.swift */; }; 4134ADBE464D00BB748BD9AE /* GeneralPaneView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 07480CE96ED0EBD94817C6B1 /* GeneralPaneView.swift */; }; 4190F8A76196B16ED94D0A55 /* VisualContextModels.swift in Sources */ = {isa = PBXBuildFile; fileRef = BE97A8169438D593C6C23412 /* VisualContextModels.swift */; }; + 429CE592897D8A952F2916C3 /* ConfidenceSuppressionPolicy.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1BD71ECC2AE4821B643E0935 /* ConfidenceSuppressionPolicy.swift */; }; 42D40F37086294D0E58200C5 /* GhostFontSizeStabilizer.swift in Sources */ = {isa = PBXBuildFile; fileRef = 9458F0820B3161FE9CF1DDAF /* GhostFontSizeStabilizer.swift */; }; 4531645066A73971EB2A5FA1 /* EmojiCatalog.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AC3BF78835C8F2C315932F1 /* EmojiCatalog.swift */; }; 46F341472191BC451B6BF6B5 /* SuggestionRequestFactory.swift in Sources */ = {isa = PBXBuildFile; fileRef = DDE858CB1E687E3CEB8FDD5B /* SuggestionRequestFactory.swift */; }; @@ -143,6 +144,7 @@ 90DC9508F27F712EB61EEB06 /* PermissionReminderView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 656F58E56FE9BC087B6F1D33 /* PermissionReminderView.swift */; }; 91C27021750AC03AA4A0115A /* HuggingFaceAPIClient.swift in Sources */ = {isa = PBXBuildFile; fileRef = 110CB0B53016644EF7840301 /* HuggingFaceAPIClient.swift */; }; 91D1F16B8C5DA281D4B7F699 /* CustomRulesTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = AD752451330486FE270018B0 /* CustomRulesTests.swift */; }; + 91D8189EFCD1BA992EA6F038 /* ConfidenceSuppressionPolicyTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 06FF2B0A3094A952A8EBA9B5 /* ConfidenceSuppressionPolicyTests.swift */; }; 924489CEE8171F7AD8579D71 /* FocusDebugOverlayController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0F5E263AB69029D5E13D5EE8 /* FocusDebugOverlayController.swift */; }; 934885ACC2DEA20B27F10948 /* PromptContextSanitizerTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0D80CC2CCAAFE3F23FB8C37A /* PromptContextSanitizerTests.swift */; }; 96498E097A5899AFC9F0C853 /* EmojiCatalogMatcherTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 292DC9D4D9D5D26AE882E39B /* EmojiCatalogMatcherTests.swift */; }; @@ -248,6 +250,7 @@ 04D853218B0A77B0CE090828 /* BrowserAppDetectorTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BrowserAppDetectorTests.swift; sourceTree = ""; }; 04E25414C307A20B6F9F20EC /* FocusSnapshotResolver.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FocusSnapshotResolver.swift; sourceTree = ""; }; 050D929E13BE52E6282B64D2 /* VisualContextStartCoalescerTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VisualContextStartCoalescerTests.swift; sourceTree = ""; }; + 06FF2B0A3094A952A8EBA9B5 /* ConfidenceSuppressionPolicyTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConfidenceSuppressionPolicyTests.swift; sourceTree = ""; }; 07480CE96ED0EBD94817C6B1 /* GeneralPaneView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GeneralPaneView.swift; sourceTree = ""; }; 0850B07CCDBA67C756C6EC59 /* ShortcutConflictTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ShortcutConflictTests.swift; sourceTree = ""; }; 09FADF683BE7B3558377FA76 /* FocusPollBackoff.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FocusPollBackoff.swift; sourceTree = ""; }; @@ -264,6 +267,7 @@ 19BE12C28A4AB8A4A58C2FF7 /* SettingsPaneScaffold.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SettingsPaneScaffold.swift; sourceTree = ""; }; 19DB9558F4D3AFB108D71649 /* SuggestionStateHelperTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SuggestionStateHelperTests.swift; sourceTree = ""; }; 1A8414BEB7E34F57607E37FE /* EmojiVariantResolver.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = EmojiVariantResolver.swift; sourceTree = ""; }; + 1BD71ECC2AE4821B643E0935 /* ConfidenceSuppressionPolicy.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConfidenceSuppressionPolicy.swift; sourceTree = ""; }; 1CE61E74928C221B8BB261C6 /* SuggestionTextColorCodec.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SuggestionTextColorCodec.swift; sourceTree = ""; }; 1D00A031C0D9CF2A7A2330D9 /* PermissionDragSourceView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PermissionDragSourceView.swift; sourceTree = ""; }; 1E0513E3B23937B099A3CFF2 /* WordCountFormatterTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WordCountFormatterTests.swift; sourceTree = ""; }; @@ -684,6 +688,7 @@ EFD89799BB82AF7A92559AEB /* ClipboardContentDistillerTests.swift */, 90B0D133AB77A2503FB08827 /* ClipboardRelevanceFilterTests.swift */, D504BEB224E0C176F5FCFF6E /* CompletionRenderModePolicyTests.swift */, + 06FF2B0A3094A952A8EBA9B5 /* ConfidenceSuppressionPolicyTests.swift */, AF1E065C7FFB697FCEB2FA5C /* CotabbyTestFixtures.swift */, AD752451330486FE270018B0 /* CustomRulesTests.swift */, C1C5DE0F3FF63545000E2453 /* DisplayCoordinateConverterTests.swift */, @@ -832,6 +837,7 @@ 96495E4147D828C0B1B22765 /* ClipboardContentDistiller.swift */, D3A2AC525DC664DB540D4F19 /* ClipboardRelevanceFilter.swift */, 53CF416511099C6818110F01 /* CompletionRenderModePolicy.swift */, + 1BD71ECC2AE4821B643E0935 /* ConfidenceSuppressionPolicy.swift */, C7B2D34A6F3AC9DFD61350F7 /* CotabbyDebugOptions.swift */, 29ED42C4BDD0C521101AF95E /* DeviceInfo.swift */, 74BD1D4DB27D5D96D1E06096 /* DisplayCoordinateConverter.swift */, @@ -1016,6 +1022,7 @@ 157A55EB796BEB7819B90D5D /* ClipboardRelevanceFilter.swift in Sources */, 7C94725B4837DEC9ECF1BC54 /* CompletionRenderMode.swift in Sources */, 3985F0F2B3178DBB945B1064 /* CompletionRenderModePolicy.swift in Sources */, + 429CE592897D8A952F2916C3 /* ConfidenceSuppressionPolicy.swift in Sources */, 8B2DFC860803C0A7C4D34A36 /* ContextBuffer.swift in Sources */, AA2E09FF7E430D66ECA8ECD5 /* CotabbyApp.swift in Sources */, FCC571EC239846F06007BFCA /* CotabbyAppEnvironment.swift in Sources */, @@ -1167,6 +1174,7 @@ 8865B95FE84198D70390DF80 /* ClipboardContentDistillerTests.swift in Sources */, BFCA7FAFDAEBF586AB615567 /* ClipboardRelevanceFilterTests.swift in Sources */, 25F91CEF38400FD1ADB6B1AF /* CompletionRenderModePolicyTests.swift in Sources */, + 91D8189EFCD1BA992EA6F038 /* ConfidenceSuppressionPolicyTests.swift in Sources */, 5E10EFC426217CB7218A5847 /* CotabbyTestFixtures.swift in Sources */, 91D1F16B8C5DA281D4B7F699 /* CustomRulesTests.swift in Sources */, 56611BA0087710277140E9E6 /* DisplayCoordinateConverterTests.swift in Sources */, diff --git a/Cotabby/Models/LlamaRuntimeModels.swift b/Cotabby/Models/LlamaRuntimeModels.swift index 7cfc1e52..fe162d6c 100644 --- a/Cotabby/Models/LlamaRuntimeModels.swift +++ b/Cotabby/Models/LlamaRuntimeModels.swift @@ -206,6 +206,10 @@ struct LlamaGenerationOptions: Equatable, Sendable { /// Constrains the first generated token to continue the current word (mid-word carets only). var forceWordContinuation: Bool = false + /// Average per-token log-probability below which a completion is suppressed as low-confidence. + /// Defaults to -infinity, which disables suppression entirely. + var confidenceFloor: Double = -.infinity + static func summary(maxPredictionTokens: Int, temperature: Double) -> LlamaGenerationOptions { LlamaGenerationOptions( maxPredictionTokens: maxPredictionTokens, diff --git a/Cotabby/Services/Runtime/LlamaRuntimeCore.swift b/Cotabby/Services/Runtime/LlamaRuntimeCore.swift index 8baf2274..5403e6e8 100644 --- a/Cotabby/Services/Runtime/LlamaRuntimeCore.swift +++ b/Cotabby/Services/Runtime/LlamaRuntimeCore.swift @@ -190,6 +190,7 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable { var generatedText = "" var tokensGenerated = 0 + var sumLogprob = 0.0 var stopReason = "budget_exhausted" for _ in 0 ..< options.maxPredictionTokens { @@ -216,6 +217,7 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable { let piece = Self.extractPiece(result) generatedText += piece tokensGenerated += 1 + sumLogprob += Double(result.logprob) } CotabbyLogger.runtime.debug( @@ -228,6 +230,23 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable { ] ) + // Confidence suppression: drop completions the model itself was unsure about. Disabled by + // default (confidenceFloor == -infinity); the KV-trim defer above still runs on early return. + if tokensGenerated > 0, + ConfidenceSuppressionPolicy.shouldSuppress( + averageLogprob: sumLogprob / Double(tokensGenerated), + floor: options.confidenceFloor + ) { + CotabbyLogger.runtime.debug( + "Suppressed low-confidence completion", + metadata: [ + "tokens_generated": .stringConvertible(tokensGenerated), + "avg_logprob": .stringConvertible(sumLogprob / Double(tokensGenerated)) + ] + ) + return "" + } + return generatedText } diff --git a/Cotabby/Support/ConfidenceSuppressionPolicy.swift b/Cotabby/Support/ConfidenceSuppressionPolicy.swift new file mode 100644 index 00000000..0491ca77 --- /dev/null +++ b/Cotabby/Support/ConfidenceSuppressionPolicy.swift @@ -0,0 +1,21 @@ +import Foundation + +/// File overview: +/// Decides whether a completion is too low-confidence to show, based on the model's own +/// per-token log-probabilities. +/// +/// Why this file exists: +/// The guiding principle is that a suppressed completion beats a wrong one. The engine now reports +/// a per-token log-probability, so we can drop completions the model itself was unsure about +/// instead of showing a confident-looking guess. The policy is pure and isolated so the threshold +/// is easy to test and tune. A floor of negative infinity (the default) disables suppression, so +/// this is a no-op until a caller opts in by raising the floor. +enum ConfidenceSuppressionPolicy { + /// Suppress when the completion's average per-token log-probability is below `floor`. + static func shouldSuppress(averageLogprob: Double, floor: Double) -> Bool { + guard floor > -.infinity else { + return false + } + return averageLogprob < floor + } +} diff --git a/CotabbyTests/ConfidenceSuppressionPolicyTests.swift b/CotabbyTests/ConfidenceSuppressionPolicyTests.swift new file mode 100644 index 00000000..d4d4fe2d --- /dev/null +++ b/CotabbyTests/ConfidenceSuppressionPolicyTests.swift @@ -0,0 +1,31 @@ +import XCTest +@testable import Cotabby + +/// Pure-function tests for confidence-based suppression. +final class ConfidenceSuppressionPolicyTests: XCTestCase { + + func test_disabledFloor_neverSuppresses() { + // The default floor (-infinity) means suppression is off, even for very low confidence. + XCTAssertFalse( + ConfidenceSuppressionPolicy.shouldSuppress(averageLogprob: -50.0, floor: -.infinity) + ) + } + + func test_belowFloor_suppresses() { + XCTAssertTrue( + ConfidenceSuppressionPolicy.shouldSuppress(averageLogprob: -3.0, floor: -2.0) + ) + } + + func test_aboveFloor_doesNotSuppress() { + XCTAssertFalse( + ConfidenceSuppressionPolicy.shouldSuppress(averageLogprob: -1.0, floor: -2.0) + ) + } + + func test_atFloor_doesNotSuppress() { + XCTAssertFalse( + ConfidenceSuppressionPolicy.shouldSuppress(averageLogprob: -2.0, floor: -2.0) + ) + } +}