diff --git a/Cotabby.xcodeproj/project.pbxproj b/Cotabby.xcodeproj/project.pbxproj index f9b4f9c7..601cf7bc 100644 --- a/Cotabby.xcodeproj/project.pbxproj +++ b/Cotabby.xcodeproj/project.pbxproj @@ -38,10 +38,8 @@ 156E6AB3D24134EEC29FDB93 /* FocusSnapshotResolverSelectionTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = BA705EDFE1C41294F0E381F1 /* FocusSnapshotResolverSelectionTests.swift */; }; 157A55EB796BEB7819B90D5D /* ClipboardRelevanceFilter.swift in Sources */ = {isa = PBXBuildFile; fileRef = D3A2AC525DC664DB540D4F19 /* ClipboardRelevanceFilter.swift */; }; 15FA56CEF6FB5FF54C2FBA6F /* PermissionAndContextModelTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = E7F42112F14026E6253BB865 /* PermissionAndContextModelTests.swift */; }; - 190C571B3CDFE117F4D15484 /* LlamaPromptRendererTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3009812A35A1CDEF16295AB7 /* LlamaPromptRendererTests.swift */; }; 19CB55B62977376E9AE8D428 /* VisualContextStartCoalescer.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2F01FAC4F57EB08471521196 /* VisualContextStartCoalescer.swift */; }; 1B3FFCB9A979F49BF86EAAD4 /* ScreenshotContextGeneratorTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = B2BFD19A159680A495EE02FD /* ScreenshotContextGeneratorTests.swift */; }; - 1C4A2BAB2CCADF0A70B70AC6 /* LlamaPromptRenderer.swift in Sources */ = {isa = PBXBuildFile; fileRef = B5679E08C9A09065531C37B5 /* LlamaPromptRenderer.swift */; }; 1D1C6FF0B8F50AC14A1000F4 /* SentenceBoundaryClassifierTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2D7360A6D4261989A66658ED /* SentenceBoundaryClassifierTests.swift */; }; 1F8CC88AFFE67C08944CF506 /* WindowScreenshotService.swift in Sources */ = {isa = PBXBuildFile; fileRef = 77B0121E7BB173F8A2B0B108 /* WindowScreenshotService.swift */; }; 2197B68F1E4D0C3497DAC061 /* LlamaSuggestionEngine.swift in Sources */ = {isa = PBXBuildFile; fileRef = BE04620C905041680116BE80 /* LlamaSuggestionEngine.swift */; }; @@ -221,6 +219,7 @@ E17CAA453B1F534D284F0D89 /* PermissionHostApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = B6ACCB12E4DB32D2F2BEA567 /* PermissionHostApp.swift */; }; E27E6377D36D4981301568DD /* LaunchAtLoginStateTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5807E8508D9355D0271A00C5 /* LaunchAtLoginStateTests.swift */; }; E313639E71AE1374D2B9A956 /* SuggestionWorkController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6B2D97BAA3618A7D0357AC44 /* SuggestionWorkController.swift */; }; + E38801433B99E65BD7E45A0E /* LlamaPromptCacheHintTrackerTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0CA88BB29BC8727878C99E95 /* LlamaPromptCacheHintTrackerTests.swift */; }; E4382BEA8A8551612E5966B9 /* BaseCompletionPromptRenderer.swift in Sources */ = {isa = PBXBuildFile; fileRef = 85EF79E6144D6C6AD062B569 /* BaseCompletionPromptRenderer.swift */; }; E51FA12B690428CA431328FC /* WritingPaneView.swift in Sources */ = {isa = PBXBuildFile; fileRef = D48B95B6665109B6C6A63B42 /* WritingPaneView.swift */; }; E6EE3C13FA31F261CD734C69 /* DownloadOutcomeClassifier.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3DE1975F3B5F4A70478DBF41 /* DownloadOutcomeClassifier.swift */; }; @@ -277,6 +276,7 @@ 0A3D1125B962CBE0269EEDDB /* SuggestionInserter.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SuggestionInserter.swift; sourceTree = ""; }; 0AC3BF78835C8F2C315932F1 /* EmojiCatalog.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = EmojiCatalog.swift; sourceTree = ""; }; 0C383AE85B971A9605787358 /* FocusModels.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FocusModels.swift; sourceTree = ""; }; + 0CA88BB29BC8727878C99E95 /* LlamaPromptCacheHintTrackerTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LlamaPromptCacheHintTrackerTests.swift; sourceTree = ""; }; 0D80CC2CCAAFE3F23FB8C37A /* PromptContextSanitizerTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PromptContextSanitizerTests.swift; sourceTree = ""; }; 0F5E263AB69029D5E13D5EE8 /* FocusDebugOverlayController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FocusDebugOverlayController.swift; sourceTree = ""; }; 110CB0B53016644EF7840301 /* HuggingFaceAPIClient.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HuggingFaceAPIClient.swift; sourceTree = ""; }; @@ -309,7 +309,6 @@ 2D1F9CEBAB0F330F8E7B61D8 /* InputSuppressionController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = InputSuppressionController.swift; sourceTree = ""; }; 2D7360A6D4261989A66658ED /* SentenceBoundaryClassifierTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SentenceBoundaryClassifierTests.swift; sourceTree = ""; }; 2F01FAC4F57EB08471521196 /* VisualContextStartCoalescer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VisualContextStartCoalescer.swift; sourceTree = ""; }; - 3009812A35A1CDEF16295AB7 /* LlamaPromptRendererTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LlamaPromptRendererTests.swift; sourceTree = ""; }; 312C7306D916963F519CE0D9 /* EmojiTriggerStateMachine.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = EmojiTriggerStateMachine.swift; sourceTree = ""; }; 328847A0F494360033366791 /* TextDirectionDetector.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TextDirectionDetector.swift; sourceTree = ""; }; 3350EDE01ED5125520C79D53 /* SettingsCoordinator.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SettingsCoordinator.swift; sourceTree = ""; }; @@ -423,7 +422,6 @@ B2F95847D76893C8A5B504B4 /* SuggestionOverlayStabilityGate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SuggestionOverlayStabilityGate.swift; sourceTree = ""; }; B424E2AC97C99D335B0D5751 /* SuggestionTextNormalizer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SuggestionTextNormalizer.swift; sourceTree = ""; }; B4B4A2E2DD6733658EC05BD8 /* DownloadFileRescuer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DownloadFileRescuer.swift; sourceTree = ""; }; - B5679E08C9A09065531C37B5 /* LlamaPromptRenderer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LlamaPromptRenderer.swift; sourceTree = ""; }; B6ACCB12E4DB32D2F2BEA567 /* PermissionHostApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PermissionHostApp.swift; sourceTree = ""; }; B6D42CD456B4B3C988B148A6 /* FocusTrackingModel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FocusTrackingModel.swift; sourceTree = ""; }; B7B185BA246A526CBA85E581 /* EmojiPickerPanelLayoutTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = EmojiPickerPanelLayoutTests.swift; sourceTree = ""; }; @@ -753,7 +751,7 @@ 43D627C4A55359EAF4676FF7 /* InsertionSafetyGateTests.swift */, 4793D4EA5D36D7E5CC216C27 /* LanguageSupportTests.swift */, 5807E8508D9355D0271A00C5 /* LaunchAtLoginStateTests.swift */, - 3009812A35A1CDEF16295AB7 /* LlamaPromptRendererTests.swift */, + 0CA88BB29BC8727878C99E95 /* LlamaPromptCacheHintTrackerTests.swift */, 52BAFA2F989C3C4F7FB892B5 /* MarkerSelectionSynthesizerTests.swift */, 1274F897631B1B3A835D157F /* MidWordContinuationPolicyTests.swift */, FC83D14A7557BC0196E59007 /* MirrorOverlayLayoutTests.swift */, @@ -907,7 +905,6 @@ 41BBD5A4BA08CABE77860886 /* HardwareCapabilityProbe.swift */, 7D472F9F396672E57873303B /* InsertionSafetyGate.swift */, EAAE6B395FAB604DF059280A /* KeyCodeLabels.swift */, - B5679E08C9A09065531C37B5 /* LlamaPromptRenderer.swift */, 8D610FCA3A97249DCCE7D0B8 /* LLMIOFileHandler.swift */, A863F41C0C03D7B4AC5DC002 /* MarkerSelectionSynthesizer.swift */, 357C18383B047F24A531BDCD /* MidWordContinuationPolicy.swift */, @@ -1134,7 +1131,6 @@ 0A2DDD946654076675AC0FC6 /* LanguageCatalog.swift in Sources */, 51C069603DA16830868F1628 /* LanguageTagsEditor.swift in Sources */, F0DEEE8A866ABB560E7A7E6A /* LaunchAtLoginService.swift in Sources */, - 1C4A2BAB2CCADF0A70B70AC6 /* LlamaPromptRenderer.swift in Sources */, 66D9E37B12A9265D4733E72E /* LlamaRuntimeCore.swift in Sources */, 54BDF0D9C3DC7175555BD0F6 /* LlamaRuntimeManager.swift in Sources */, 4CAFD8F3444FEDC9ACAFF529 /* LlamaRuntimeModels.swift in Sources */, @@ -1263,7 +1259,7 @@ 83EC3543DC45B1601F119BF9 /* InsertionSafetyGateTests.swift in Sources */, E912D4617AE1376061DF1F00 /* LanguageSupportTests.swift in Sources */, E27E6377D36D4981301568DD /* LaunchAtLoginStateTests.swift in Sources */, - 190C571B3CDFE117F4D15484 /* LlamaPromptRendererTests.swift in Sources */, + E38801433B99E65BD7E45A0E /* LlamaPromptCacheHintTrackerTests.swift in Sources */, 87806DE08881D11F2608A13D /* MarkerSelectionSynthesizerTests.swift in Sources */, 7C36DBA762E19C8C31676D44 /* MidWordContinuationPolicyTests.swift in Sources */, 14D77F0B8A195AC2FA8D24A9 /* MirrorOverlayLayoutTests.swift in Sources */, diff --git a/Cotabby/Models/LlamaRuntimeModels.swift b/Cotabby/Models/LlamaRuntimeModels.swift index fe162d6c..864775c2 100644 --- a/Cotabby/Models/LlamaRuntimeModels.swift +++ b/Cotabby/Models/LlamaRuntimeModels.swift @@ -95,70 +95,61 @@ struct DownloadableRuntimeModel: Equatable, Hashable, Sendable, Identifiable { enum RuntimeModelCatalog { static func displayName(for filename: String) -> String { switch filename { - case "Qwen3-0.6B-Q4_K_M.gguf": - return "tabby-1-mini" - case "gemma-4-E2B-it-Q4_K_M.gguf": - return "tabby-1-base" - case "gemma-4-E4B-it-Q4_K_M.gguf": - return "tabby-1-pro" - case "SmolLM2-135M-Instruct-q8_0.gguf": - return "tabby-1-nano" + case "Qwen3.5-0.8B-Base.i1-Q6_K.gguf": + return "tabby-2-mini" + case "Qwen3.5-2B-Base.i1-Q4_K_M.gguf": + return "tabby-2-base" + case "Qwen3.5-4B-Base.i1-Q4_K_M.gguf": + return "tabby-2-pro" + case "gemma-4-E2B.i1-Q6_K.gguf": + return "tabby-2-gemma-mini" + case "gemma-4-E4B.i1-Q4_K_M.gguf": + return "tabby-2-gemma-pro" default: return filename } } - /// Canonical downloadable GGUF model list shown in Welcome and menu UI. - /// - /// `expectedSizeBytes` and `sha256` were captured from HuggingFace's CDN - /// response headers (`x-linked-size` and `x-linked-etag` respectively). - /// To refresh after a model is updated upstream: - /// - /// curl -sIL "" | grep -iE "^(x-linked-size|x-linked-etag):" + /// Builds a HuggingFace direct-download URL from a repo and file path. + private static func hfURL(_ repo: String, _ file: String) -> URL { + // Force-unwrap is safe: inputs are compile-time literals forming a valid URL. + URL(string: "https://huggingface.co/\(repo)/resolve/main/\(file)?download=true")! + } + + /// Canonical downloadable base GGUF models for Cotabby 2's base-model continuation path. + /// Qwen3.5 / Gemma base checkpoints from mradermacher's i1 GGUF repos. `expectedSizeBytes` and + /// `sha256` stay nil pending CDN-header capture; the download manager skips size/hash + /// validation when they are nil. Old instruct GGUFs are intentionally no longer listed. static let downloadableModels: [DownloadableRuntimeModel] = [ DownloadableRuntimeModel( - filename: "SmolLM2-135M-Instruct-q8_0.gguf", - displayName: displayName(for: "SmolLM2-135M-Instruct-q8_0.gguf"), - downloadURL: URL( - string: - "https://huggingface.co/Mungert/SmolLM2-135M-Instruct-GGUF/resolve/main/SmolLM2-135M-Instruct-q8_0.gguf?download=true" - )!, - approximateSizeInGigabytes: 0.1, - expectedSizeBytes: 144_811_552, - sha256: "bc64cce8e1c11e4ed870633b557e04af718249c817c4cf8a6784116144ec3e28" + filename: "Qwen3.5-0.8B-Base.i1-Q6_K.gguf", + displayName: displayName(for: "Qwen3.5-0.8B-Base.i1-Q6_K.gguf"), + downloadURL: hfURL("mradermacher/Qwen3.5-0.8B-Base-i1-GGUF", "Qwen3.5-0.8B-Base.i1-Q6_K.gguf"), + approximateSizeInGigabytes: 0.8 + ), + DownloadableRuntimeModel( + filename: "Qwen3.5-2B-Base.i1-Q4_K_M.gguf", + displayName: displayName(for: "Qwen3.5-2B-Base.i1-Q4_K_M.gguf"), + downloadURL: hfURL("mradermacher/Qwen3.5-2B-Base-i1-GGUF", "Qwen3.5-2B-Base.i1-Q4_K_M.gguf"), + approximateSizeInGigabytes: 1.4 ), DownloadableRuntimeModel( - filename: "Qwen3-0.6B-Q4_K_M.gguf", - displayName: displayName(for: "Qwen3-0.6B-Q4_K_M.gguf"), - downloadURL: URL( - string: - "https://huggingface.co/unsloth/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B-Q4_K_M.gguf?download=true" - )!, - approximateSizeInGigabytes: 0.4, - expectedSizeBytes: 396_705_472, - sha256: "ac2d97712095a558e31573f62f466a3f9d93990898b0ec79d7c974c1780d524a" + filename: "Qwen3.5-4B-Base.i1-Q4_K_M.gguf", + displayName: displayName(for: "Qwen3.5-4B-Base.i1-Q4_K_M.gguf"), + downloadURL: hfURL("mradermacher/Qwen3.5-4B-Base-i1-GGUF", "Qwen3.5-4B-Base.i1-Q4_K_M.gguf"), + approximateSizeInGigabytes: 2.6 ), DownloadableRuntimeModel( - filename: "gemma-4-E2B-it-Q4_K_M.gguf", - displayName: displayName(for: "gemma-4-E2B-it-Q4_K_M.gguf"), - downloadURL: URL( - string: - "https://huggingface.co/unsloth/gemma-4-E2B-it-GGUF/resolve/main/gemma-4-E2B-it-Q4_K_M.gguf?download=true" - )!, - approximateSizeInGigabytes: 3.1, - expectedSizeBytes: 3_106_736_256, - sha256: "9378bc471710229ef165709b62e34bfb62231420ddaf6d729e727305b5b8672d" + filename: "gemma-4-E2B.i1-Q6_K.gguf", + displayName: displayName(for: "gemma-4-E2B.i1-Q6_K.gguf"), + downloadURL: hfURL("mradermacher/gemma-4-E2B-i1-GGUF", "gemma-4-E2B.i1-Q6_K.gguf"), + approximateSizeInGigabytes: 4.5 ), DownloadableRuntimeModel( - filename: "gemma-4-E4B-it-Q4_K_M.gguf", - displayName: displayName(for: "gemma-4-E4B-it-Q4_K_M.gguf"), - downloadURL: URL( - string: - "https://huggingface.co/unsloth/gemma-4-E4B-it-GGUF/resolve/main/gemma-4-E4B-it-Q4_K_M.gguf?download=true" - )!, - approximateSizeInGigabytes: 5.0, - expectedSizeBytes: 4_977_169_568, - sha256: "519b9793ed6ce0ff530f1b7c96e848e08e49e7af4d57bb97f76215963a54146d" + filename: "gemma-4-E4B.i1-Q4_K_M.gguf", + displayName: displayName(for: "gemma-4-E4B.i1-Q4_K_M.gguf"), + downloadURL: hfURL("mradermacher/gemma-4-E4B-i1-GGUF", "gemma-4-E4B.i1-Q4_K_M.gguf"), + approximateSizeInGigabytes: 5.0 ) ] } @@ -176,10 +167,11 @@ struct LlamaRuntimeConfiguration: Equatable, Sendable { static let `default` = LlamaRuntimeConfiguration( runtimeDirectoryPath: nil, preferredModelNames: [ - "gemma-4-E4B-it-Q4_K_M.gguf", - "gemma-4-E2B-it-Q4_K_M.gguf", - "Qwen3-0.6B-Q4_K_M.gguf", - "SmolLM2-135M-Instruct-q8_0.gguf" + "Qwen3.5-2B-Base.i1-Q4_K_M.gguf", + "Qwen3.5-0.8B-Base.i1-Q6_K.gguf", + "Qwen3.5-4B-Base.i1-Q4_K_M.gguf", + "gemma-4-E2B.i1-Q6_K.gguf", + "gemma-4-E4B.i1-Q4_K_M.gguf" ], contextWindowTokens: 2048, batchSize: 512, diff --git a/Cotabby/Models/OnboardingTemplate.swift b/Cotabby/Models/OnboardingTemplate.swift index 8c4d07eb..67f05a84 100644 --- a/Cotabby/Models/OnboardingTemplate.swift +++ b/Cotabby/Models/OnboardingTemplate.swift @@ -97,11 +97,11 @@ enum OnboardingTemplate: String, CaseIterable, Identifiable, Equatable, Sendable var openSourceModelFilename: String { switch self { case .quick: - return "SmolLM2-135M-Instruct-q8_0.gguf" + return "Qwen3.5-0.8B-Base.i1-Q6_K.gguf" case .everyday: - return "gemma-4-E2B-it-Q4_K_M.gguf" + return "Qwen3.5-2B-Base.i1-Q4_K_M.gguf" case .powerful: - return "gemma-4-E4B-it-Q4_K_M.gguf" + return "Qwen3.5-4B-Base.i1-Q4_K_M.gguf" } } } diff --git a/Cotabby/Models/SuggestionEngineModels.swift b/Cotabby/Models/SuggestionEngineModels.swift index 0269e2e4..75f66e15 100644 --- a/Cotabby/Models/SuggestionEngineModels.swift +++ b/Cotabby/Models/SuggestionEngineModels.swift @@ -63,7 +63,7 @@ struct SuggestionSettingsSnapshot: Equatable, Sendable { let selectedEngine: SuggestionEngineKind let selectedWordCountPreset: SuggestionWordCountPreset let isClipboardContextEnabled: Bool - /// User-authored profile data for Cotabby's single instruction-rendered completion prompt. + /// User-authored profile data for Cotabby's base-model completion prompt. /// This travels in the snapshot so generation uses the same value the Settings UI shows. let userName: String /// User-authored style rules, carried in the snapshot so generation uses the same value the @@ -86,10 +86,6 @@ struct SuggestionSettingsSnapshot: Equatable, Sendable { /// When true, the screenshot/OCR visual-context pipeline is skipped entirely for lower-latency /// suggestions. Defaults to false. Only affects visual context — predictions still run. let isFastModeEnabled: Bool - /// Experimental: when true and the Open Source engine is selected, the local path uses the - /// base-model continuation prompt (no instruction preamble, prefix last) instead of the - /// instruction-rendered prompt. Default false, so existing installs are byte-for-byte unchanged. - let useBaseCompletionPipeline: Bool /// User preference for how suggestions are presented (inline ghost text vs popup card vs auto /// based on caret geometry quality). Travels in the snapshot so consumers can react to changes /// without subscribing to the settings model directly. diff --git a/Cotabby/Models/SuggestionSettingsModel.swift b/Cotabby/Models/SuggestionSettingsModel.swift index 0aacf199..c773e947 100644 --- a/Cotabby/Models/SuggestionSettingsModel.swift +++ b/Cotabby/Models/SuggestionSettingsModel.swift @@ -39,9 +39,6 @@ final class SuggestionSettingsModel: ObservableObject { @Published private(set) var selectedWordCountPreset: SuggestionWordCountPreset @Published private(set) var isClipboardContextEnabled: Bool @Published private(set) var isFastModeEnabled: Bool - /// Experimental, opt-in via the `cotabbyBaseCompletionPipelineEnabled` default. Routes the local - /// llama path through the base-model continuation prompt. No UI yet; read at launch. - @Published private(set) var useBaseCompletionPipeline: Bool /// Whether the Performance pane is recording per-request latency. Defaults to false so the /// default user never pays any extra storage or write cost — recording only kicks in once the /// user opts in from Settings. @@ -96,7 +93,6 @@ final class SuggestionSettingsModel: ObservableObject { private static let legacyShortPresetRawValue = "3-7" private static let clipboardContextEnabledDefaultsKey = "cotabbyClipboardContextEnabled" private static let fastModeEnabledDefaultsKey = "cotabbyFastModeEnabled" - private static let baseCompletionPipelineEnabledDefaultsKey = "cotabbyBaseCompletionPipelineEnabled" private static let performanceTrackingEnabledDefaultsKey = "cotabbyPerformanceTrackingEnabled" private static let menuBarWordCountVisibleDefaultsKey = "cotabbyMenuBarWordCountVisible" private static let mirrorPreferenceDefaultsKey = "cotabbyMirrorPreference" @@ -194,10 +190,6 @@ final class SuggestionSettingsModel: ObservableObject { // into fast mode turns it off. let resolvedFastModeEnabled = userDefaults.object(forKey: Self.fastModeEnabledDefaultsKey) as? Bool ?? false - // Experimental base-model pipeline. Defaults to false so the merged-but-dark path changes - // nothing for existing users until the flag is explicitly set. - let resolvedBaseCompletionPipelineEnabled = - userDefaults.object(forKey: Self.baseCompletionPipelineEnabledDefaultsKey) as? Bool ?? false // Defaults to false so the metrics ring buffer stays empty until the user explicitly opts // in from the Performance pane. let resolvedPerformanceTrackingEnabled = @@ -326,7 +318,6 @@ final class SuggestionSettingsModel: ObservableObject { selectedWordCountPreset = resolvedWordCountPreset isClipboardContextEnabled = resolvedClipboardContextEnabled isFastModeEnabled = resolvedFastModeEnabled - useBaseCompletionPipeline = resolvedBaseCompletionPipelineEnabled isPerformanceTrackingEnabled = resolvedPerformanceTrackingEnabled isMenuBarWordCountVisible = resolvedMenuBarWordCountVisible mirrorPreference = resolvedMirrorPreference @@ -362,7 +353,6 @@ final class SuggestionSettingsModel: ObservableObject { persistSelectedWordCountPreset(resolvedWordCountPreset) persistClipboardContextEnabled(resolvedClipboardContextEnabled) persistFastModeEnabled(resolvedFastModeEnabled) - userDefaults.set(resolvedBaseCompletionPipelineEnabled, forKey: Self.baseCompletionPipelineEnabledDefaultsKey) persistPerformanceTrackingEnabled(resolvedPerformanceTrackingEnabled) persistMenuBarWordCountVisible(resolvedMenuBarWordCountVisible) persistMirrorPreference(resolvedMirrorPreference) @@ -420,7 +410,6 @@ final class SuggestionSettingsModel: ObservableObject { isMultiLineEnabled: isMultiLineEnabled, autoAcceptTrailingPunctuation: autoAcceptTrailingPunctuation, isFastModeEnabled: isFastModeEnabled, - useBaseCompletionPipeline: useBaseCompletionPipeline, mirrorPreference: mirrorPreference, acceptanceGranularity: acceptanceGranularity ) @@ -1120,8 +1109,8 @@ extension SuggestionSettingsModel: SuggestionSettingsProviding { // The outer CombineLatest stack is already at Combine's per-operator cap, so each new // top-level setting gets layered above via another `CombineLatest`. `extendedContext` joins // alongside `acceptanceGranularity` here for the same reason. - return Publishers.CombineLatest4(primary, $acceptanceGranularity, $extendedContext, $useBaseCompletionPipeline) - .map { primaryTuple, granularity, extendedContext, baseCompletionEnabled in + return Publishers.CombineLatest3(primary, $acceptanceGranularity, $extendedContext) + .map { primaryTuple, granularity, extendedContext in let (combinedSettings, presentationToggles, profile, timing) = primaryTuple let (globallyEnabled, disabledAppRules, engine, wordCountPreset) = combinedSettings let (clipboardContextEnabled, fastModeEnabled, mirrorPreference) = presentationToggles @@ -1142,7 +1131,6 @@ extension SuggestionSettingsModel: SuggestionSettingsProviding { isMultiLineEnabled: multiLine, autoAcceptTrailingPunctuation: autoAcceptPunctuation, isFastModeEnabled: fastModeEnabled, - useBaseCompletionPipeline: baseCompletionEnabled, mirrorPreference: mirrorPreference, acceptanceGranularity: granularity ) diff --git a/Cotabby/Support/BaseCompletionPromptRenderer.swift b/Cotabby/Support/BaseCompletionPromptRenderer.swift index d700127b..a7665564 100644 --- a/Cotabby/Support/BaseCompletionPromptRenderer.swift +++ b/Cotabby/Support/BaseCompletionPromptRenderer.swift @@ -1,14 +1,11 @@ import Foundation /// File overview: -/// Renders the prompt for the experimental base-model completion pipeline (Open Source engine with -/// `useBaseCompletionPipeline` enabled). +/// Renders the prompt for Cotabby's base-model completion pipeline (the Open Source / llama path). /// -/// Why this exists separately from `LlamaPromptRenderer`: -/// `LlamaPromptRenderer` wraps the user's text in an instruction blob ("Task: ... do not answer the -/// user ...") for instruction-tuned models. A *base* model has no instruction-following channel and -/// will happily continue a bare "Task:" line as if it were the document, so that prompt shape leaks -/// scaffolding into the ghost text. This renderer instead treats the model as a pure text continuer: +/// Design: a *base* model has no instruction-following channel and will happily continue a bare +/// "Task:" line as if it were the document, so an instruction-blob prompt would leak scaffolding into +/// the ghost text. This renderer instead treats the model as a pure text continuer: /// /// - No task preamble and no standalone `Label:` lines. /// - Custom instructions work by *conditioning*, not obedience: persona, voice, and language are diff --git a/Cotabby/Support/FoundationModelPromptRenderer.swift b/Cotabby/Support/FoundationModelPromptRenderer.swift index c5cc4b3e..bf362492 100644 --- a/Cotabby/Support/FoundationModelPromptRenderer.swift +++ b/Cotabby/Support/FoundationModelPromptRenderer.swift @@ -48,7 +48,7 @@ enum FoundationModelPromptRenderer { // We intentionally do NOT inject the user's name here. On the chat-tuned system model a // stated name is the single biggest trigger for breaking character ("Jacob, how are - // you"). The llama backend still personalizes via `LlamaPromptRenderer`; Apple's model + // you"). The llama backend personalizes via `BaseCompletionPromptRenderer`; Apple's model // does not get the name until we can scope it to contexts that actually need it. // Two few-shot examples (down from five) carry the heavy anti-drift signal. The first diff --git a/Cotabby/Support/LlamaPromptRenderer.swift b/Cotabby/Support/LlamaPromptRenderer.swift deleted file mode 100644 index 854005ff..00000000 --- a/Cotabby/Support/LlamaPromptRenderer.swift +++ /dev/null @@ -1,105 +0,0 @@ -import Foundation - -/// File overview: -/// Renders the single prompt string consumed by the local llama runtime. -/// -/// Why this file exists: -/// llama.cpp does not give us a separate "instructions" channel the way Foundation Models does. -/// That means all base behavior, user preferences, and request context must be composed into one -/// prompt string. Keeping that composition isolated here prevents prompt policy from leaking into -/// `SuggestionRequestFactory` or the runtime lifecycle layer. -enum LlamaPromptRenderer { - /// Renders Cotabby's local-model prompt. - /// - /// Cotabby always uses the instruction-rendered path so profile context and base autocomplete - /// rules travel through one prompt contract instead of drifting across separate modes. - static func prompt( - prefixText: String, - applicationName: String, - completionLengthInstruction: String, - userName: String?, - customRules: [String] = [], - extendedContext: String? = nil, - languageInstruction: String? = nil, - clipboardContext: String? = nil, - visualContextSummary: String? = nil - ) -> String { - var sections = [ - "Task:", - "- Continue the user's existing text exactly at the caret position.", - "- This is autocomplete, not chat. Do not answer the user or start a conversation.", - "- Never repeat, restate, or quote the text before the caret.", - "- Use clipboard context only when it directly helps the inline continuation.", - "- Return plain text only with no thinking, labels, bullets, markdown, quotes, or explanation." - ] - - var profileSections: [String] = [] - if let name = userName, !name.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { - profileSections.append("- The user's name is \(name).") - } - if !profileSections.isEmpty { - sections.append("") - sections.append("User Profile Context:") - sections.append(contentsOf: profileSections) - } - - // User style rules render after the base task rules and profile, with an explicit - // subordination line so a user "rule" can never override the autocomplete/output contract - // above (prompt-injection guard). - let trimmedRules = customRules - .map { $0.trimmingCharacters(in: .whitespacesAndNewlines) } - .filter { !$0.isEmpty } - if !trimmedRules.isEmpty { - sections.append("") - sections.append("Your style preferences:") - sections.append(contentsOf: trimmedRules.map { "- \($0)" }) - sections.append("Apply these only when they fit the continuation naturally; never break the rules above.") - } - - // Free-form user-authored reference notes (glossary, jargon, style guide). Rendered as a - // verbatim block rather than line-by-line bullets so the user's structure (lists, headings, - // examples) is preserved. The subordination line is the same prompt-injection guard used - // for style preferences above: this is reference material, not an override of the base - // autocomplete contract. - if let extendedContext, !extendedContext.isEmpty { - sections.append("") - sections.append("Reference notes from the user:") - sections.append(extendedContext) - sections.append("Use these notes only when they fit the continuation naturally; never break the rules above.") - } - - sections.append("") - sections.append("Screen context:") - sections.append("User is on \(applicationName).") - if let summary = visualContextSummary, !summary.isEmpty { - sections.append("Screen content:") - sections.append(summary) - } - if let clipboardContext, !clipboardContext.isEmpty { - sections.append("User's clipboard:") - sections.append(clipboardContext) - } - - // The final task cue sits immediately before the prefix so small instruct models see the - // current length policy right before the text they must continue, while the prefix itself - // still remains the last payload in the prompt. - sections.append("") - sections.append("Final instruction:") - // The declared-language hint sits in the late, high-attention block right before the prefix - // so small instruct models actually weigh it — without it they tend to drift to English when - // the surrounding text is short or ambiguous. - if let languageInstruction, !languageInstruction.isEmpty { - sections.append("- \(languageInstruction)") - } - // Experiment: the explicit word-range line (`completionLengthInstruction`) is intentionally - // omitted from the local-model prompt so length is governed purely by the token budget - // (`SuggestionWordCountPreset.suggestedPredictionTokenBudget`). The parameter stays wired so - // re-enabling the in-prompt cue is a one-line change. Apple Intelligence still gets the cue. - _ = completionLengthInstruction - sections.append("- The next line must begin directly with the continuation text.") - sections.append("Text before caret:") - sections.append(prefixText) - - return sections.joined(separator: "\n") - } -} diff --git a/Cotabby/Support/SuggestionRequestFactory.swift b/Cotabby/Support/SuggestionRequestFactory.swift index b13223ee..0ec7dd3c 100644 --- a/Cotabby/Support/SuggestionRequestFactory.swift +++ b/Cotabby/Support/SuggestionRequestFactory.swift @@ -62,33 +62,21 @@ enum SuggestionRequestFactory { let boundedVisualContextSummary = activeVisualContextSummary( rawSummary: visualContextSummary ) - let prompt: String - if settings.useBaseCompletionPipeline, settings.selectedEngine == .llamaOpenSource { - // Base-model continuation path: no instruction blob, prefix last, trailing-trimmed. - // Custom instructions/persona condition the output rather than being obeyed. - prompt = BaseCompletionPromptRenderer.prompt( - prefixText: prefixText, - applicationName: context.applicationName, - userName: userName, - customRules: customRules, - extendedContext: activeExtendedContext, - languageInstruction: languageInstruction, - clipboardContext: boundedClipboardContext, - visualContextSummary: boundedVisualContextSummary - ) - } else { - prompt = LlamaPromptRenderer.prompt( - prefixText: prefixText, - applicationName: context.applicationName, - completionLengthInstruction: completionLengthInstruction, - userName: userName, - customRules: customRules, - extendedContext: activeExtendedContext, - languageInstruction: languageInstruction, - clipboardContext: boundedClipboardContext, - visualContextSummary: boundedVisualContextSummary - ) - } + // Cotabby 2 is a base-model continuation product on the Open Source path, so the local + // prompt is always the base render: no instruction blob, prefix last, trailing-trimmed. + // Custom instructions and persona condition the output rather than being obeyed. The + // Foundation Models path builds its own messages from these same request fields, so this + // prompt string is only consumed by the llama engine. + let prompt = BaseCompletionPromptRenderer.prompt( + prefixText: prefixText, + applicationName: context.applicationName, + userName: userName, + customRules: customRules, + extendedContext: activeExtendedContext, + languageInstruction: languageInstruction, + clipboardContext: boundedClipboardContext, + visualContextSummary: boundedVisualContextSummary + ) let request = SuggestionRequest( context: context, diff --git a/CotabbyTests/CotabbyTestFixtures.swift b/CotabbyTests/CotabbyTestFixtures.swift index f11410e8..80a6ba6e 100644 --- a/CotabbyTests/CotabbyTestFixtures.swift +++ b/CotabbyTests/CotabbyTestFixtures.swift @@ -224,7 +224,6 @@ enum CotabbyTestFixtures { isMultiLineEnabled: Bool = false, autoAcceptTrailingPunctuation: Bool = true, isFastModeEnabled: Bool = false, - useBaseCompletionPipeline: Bool = false, mirrorPreference: MirrorPreference = .auto, acceptanceGranularity: AcceptanceGranularity = .word ) -> SuggestionSettingsSnapshot { @@ -243,7 +242,6 @@ enum CotabbyTestFixtures { isMultiLineEnabled: isMultiLineEnabled, autoAcceptTrailingPunctuation: autoAcceptTrailingPunctuation, isFastModeEnabled: isFastModeEnabled, - useBaseCompletionPipeline: useBaseCompletionPipeline, mirrorPreference: mirrorPreference, acceptanceGranularity: acceptanceGranularity ) diff --git a/CotabbyTests/CustomRulesTests.swift b/CotabbyTests/CustomRulesTests.swift index afcc866b..dbce38f0 100644 --- a/CotabbyTests/CustomRulesTests.swift +++ b/CotabbyTests/CustomRulesTests.swift @@ -35,42 +35,6 @@ final class CustomRulesTests: XCTestCase { XCTAssertEqual(CustomRulesCatalog.normalize(many).count, CustomRulesCatalog.maxRules) } - // MARK: - llama rendering - - func test_llamaRenderer_emitsRulesAfterBaseRulesWithSubordination() { - let prompt = LlamaPromptRenderer.prompt( - prefixText: "Hello", - applicationName: "Notes", - completionLengthInstruction: "Return only the next few words.", - userName: nil, - customRules: ["Use British spelling", "Never use em dashes"] - ) - - XCTAssertTrue(prompt.contains("Your style preferences:")) - XCTAssertTrue(prompt.contains("- Use British spelling")) - XCTAssertTrue(prompt.contains("- Never use em dashes")) - XCTAssertTrue(prompt.contains("never break the rules above")) - - // The base task rules must precede the user style section. - let baseIndex = try? XCTUnwrap(prompt.range(of: "Task:")) - let rulesIndex = try? XCTUnwrap(prompt.range(of: "Your style preferences:")) - if let baseIndex, let rulesIndex { - XCTAssertLessThan(baseIndex.lowerBound, rulesIndex.lowerBound) - } - } - - func test_llamaRenderer_emitsNoRuleSectionWhenEmpty() { - let prompt = LlamaPromptRenderer.prompt( - prefixText: "Hello", - applicationName: "Notes", - completionLengthInstruction: "Return only the next few words.", - userName: nil, - customRules: [] - ) - - XCTAssertFalse(prompt.contains("Your style preferences:")) - } - // MARK: - foundation model rendering func test_foundationModelInstructions_includeRules() { diff --git a/CotabbyTests/ExtendedContextTests.swift b/CotabbyTests/ExtendedContextTests.swift index a0e374a0..f7745e7b 100644 --- a/CotabbyTests/ExtendedContextTests.swift +++ b/CotabbyTests/ExtendedContextTests.swift @@ -113,48 +113,10 @@ final class ExtendedContextTests: XCTestCase { configuration: .standard ) - XCTAssertTrue(result.promptPreview.contains("Reference notes from the user:")) + XCTAssertTrue(result.promptPreview.contains("Notes the writer keeps in mind:")) XCTAssertTrue(result.promptPreview.contains("RULE: Every other word should be 'meow'")) } - // MARK: - llama rendering - - func test_llamaRenderer_emitsReferenceNotesAfterCustomRulesWithSubordination() { - let prompt = LlamaPromptRenderer.prompt( - prefixText: "Hello", - applicationName: "Notes", - completionLengthInstruction: "Return only the next few words.", - userName: nil, - customRules: ["Use British spelling"], - extendedContext: "Project codenames: Aurora = the iOS app. Borealis = the macOS app." - ) - - XCTAssertTrue(prompt.contains("Reference notes from the user:")) - XCTAssertTrue(prompt.contains("Project codenames: Aurora = the iOS app.")) - XCTAssertTrue(prompt.contains("never break the rules above")) - - // Reference notes must follow custom rules, which must themselves follow the base task block. - guard let baseRange = prompt.range(of: "Task:"), - let rulesRange = prompt.range(of: "Your style preferences:"), - let notesRange = prompt.range(of: "Reference notes from the user:") - else { - return XCTFail("expected base/rules/notes sections to be present") - } - XCTAssertLessThan(baseRange.lowerBound, rulesRange.lowerBound) - XCTAssertLessThan(rulesRange.lowerBound, notesRange.lowerBound) - } - - func test_llamaRenderer_emitsNoReferenceNotesSectionWhenNil() { - let prompt = LlamaPromptRenderer.prompt( - prefixText: "Hello", - applicationName: "Notes", - completionLengthInstruction: "Return only the next few words.", - userName: nil - ) - - XCTAssertFalse(prompt.contains("Reference notes from the user:")) - } - // MARK: - foundation model rendering /// Reference notes live in the cached instructions channel so they're not re-tokenized on diff --git a/CotabbyTests/LanguageSupportTests.swift b/CotabbyTests/LanguageSupportTests.swift index 3761e5c9..0ff2c580 100644 --- a/CotabbyTests/LanguageSupportTests.swift +++ b/CotabbyTests/LanguageSupportTests.swift @@ -69,38 +69,6 @@ final class LanguageSupportTests: XCTestCase { // MARK: - rendering - func test_llamaRenderer_placesLanguageHintInFinalBlock() { - // The length cue is no longer rendered (token-budget-only experiment), so this guards that - // the language hint still lands in the late, high-attention final-instruction block. - let prompt = LlamaPromptRenderer.prompt( - prefixText: "Hola", - applicationName: "Notes", - completionLengthInstruction: "UNIQUE_LENGTH_CUE", - userName: nil, - languageInstruction: LanguageCatalog.promptInstruction(for: ["Spanish"]) - ) - - XCTAssertFalse(prompt.contains("UNIQUE_LENGTH_CUE")) - - guard let finalRange = prompt.range(of: "Final instruction:"), - let langRange = prompt.range(of: "Spanish") else { - XCTFail("Expected final instruction header and language hint in the prompt") - return - } - XCTAssertLessThan(finalRange.lowerBound, langRange.lowerBound) - } - - func test_llamaRenderer_emitsNoLanguageLineWhenNoneDeclared() { - let prompt = LlamaPromptRenderer.prompt( - prefixText: "Hello", - applicationName: "Notes", - completionLengthInstruction: "Return only the next few words.", - userName: nil, - languageInstruction: LanguageCatalog.promptInstruction(for: []) - ) - XCTAssertFalse(prompt.contains("usually writes in")) - } - func test_foundationModelInstructions_includeLanguageHint() { let request = CotabbyTestFixtures.suggestionRequest( languageInstruction: LanguageCatalog.promptInstruction(for: ["Japanese"]) diff --git a/CotabbyTests/LlamaPromptCacheHintTrackerTests.swift b/CotabbyTests/LlamaPromptCacheHintTrackerTests.swift new file mode 100644 index 00000000..4f406965 --- /dev/null +++ b/CotabbyTests/LlamaPromptCacheHintTrackerTests.swift @@ -0,0 +1,108 @@ +import CoreGraphics +import XCTest +@testable import Cotabby + +/// Tests for `LlamaPromptCacheHintTracker`, the conservative byte-prefix hint the llama engine +/// passes into the runtime to reuse KV state across keystrokes. Pure-function and deterministic: +/// the tracker only advertises reuse for the same focused field and sampling fingerprint. +final class LlamaPromptCacheHintTrackerTests: XCTestCase { + + // MARK: - cache hints + + func test_cacheHint_nilBeforeSuccessfulRequestIsRecorded() { + var tracker = LlamaPromptCacheHintTracker() + + XCTAssertNil(tracker.cachedPrefixBytes(for: makeRequest(prompt: "hello"))) + } + + func test_cacheHint_returnsCommonPrefixBytesForSameFocusedField() { + var tracker = LlamaPromptCacheHintTracker() + tracker.recordSuccessfulRequest(makeRequest(prompt: "hello")) + + XCTAssertEqual( + tracker.cachedPrefixBytes(for: makeRequest(prompt: "hello!")), + "hello".utf8.count + ) + } + + func test_cacheHint_invalidatesWhenFocusedFieldChanges() { + var tracker = LlamaPromptCacheHintTracker() + tracker.recordSuccessfulRequest(makeRequest(prompt: "hello", elementIdentifier: "field-a")) + + XCTAssertNil( + tracker.cachedPrefixBytes(for: makeRequest(prompt: "hello!", elementIdentifier: "field-b")) + ) + } + + func test_cacheHint_prefersStableInputFrameOverUnstableElementIdentifier() { + var tracker = LlamaPromptCacheHintTracker() + let fieldFrame = CGRect(x: 10, y: 20, width: 300, height: 44) + tracker.recordSuccessfulRequest( + makeRequest(prompt: "hello", elementIdentifier: "field-a", inputFrameRect: fieldFrame) + ) + + XCTAssertEqual( + tracker.cachedPrefixBytes( + for: makeRequest(prompt: "hello!", elementIdentifier: "field-b", inputFrameRect: fieldFrame) + ), + "hello".utf8.count + ) + } + + func test_cacheHint_invalidatesWhenSamplingFingerprintChanges() { + var tracker = LlamaPromptCacheHintTracker() + tracker.recordSuccessfulRequest(makeRequest(prompt: "hello", topK: 20)) + + XCTAssertNil(tracker.cachedPrefixBytes(for: makeRequest(prompt: "hello!", topK: 40))) + } + + // MARK: - helpers + + private func makeRequest( + prompt: String, + elementIdentifier: String = "field", + topK: Int = 20, + inputFrameRect: CGRect? = nil + ) -> SuggestionRequest { + let snapshot = FocusedInputSnapshot( + applicationName: "TestApp", + bundleIdentifier: "com.example.TestApp", + processIdentifier: 123, + elementIdentifier: elementIdentifier, + role: "AXTextField", + subrole: nil, + caretRect: .zero, + inputFrameRect: inputFrameRect, + caretSource: "test", + caretQuality: .exact, + observedCharWidth: nil, + precedingText: prompt, + trailingText: "", + selection: NSRange(location: prompt.count, length: 0), + isSecure: false + ) + let context = FocusedInputContext(snapshot: snapshot, generation: 1) + + return SuggestionRequest( + context: context, + prefixText: prompt, + prompt: prompt, + generation: context.generation, + maxPredictionTokens: 8, + temperature: 0.1, + topK: topK, + topP: 0.7, + minP: 0.08, + repetitionPenalty: 1.05, + randomSeed: 42, + maxSuffixCharacters: 192, + completionLengthInstruction: "Return only the next few words.", + userName: nil, + customRules: [], + languageInstruction: nil, + clipboardContext: nil, + visualContextSummary: nil, + isMultiLineEnabled: false + ) + } +} diff --git a/CotabbyTests/LlamaPromptRendererTests.swift b/CotabbyTests/LlamaPromptRendererTests.swift deleted file mode 100644 index 1ac9b039..00000000 --- a/CotabbyTests/LlamaPromptRendererTests.swift +++ /dev/null @@ -1,240 +0,0 @@ -import CoreGraphics -import XCTest -@testable import Cotabby - -/// Tests for the prompt-rendering boundary between DECIDE and GENERATE. -/// -/// These are pure-function tests — no mocks, no I/O. The whole point of -/// LlamaPromptRenderer is that given the same inputs, it returns the exact -/// same string, so every assertion here is deterministic. -final class LlamaPromptRendererTests: XCTestCase { - - // MARK: - cache hints - - func test_cacheHint_nilBeforeSuccessfulRequestIsRecorded() { - var tracker = LlamaPromptCacheHintTracker() - - XCTAssertNil(tracker.cachedPrefixBytes(for: makeRequest(prompt: "hello"))) - } - - func test_cacheHint_returnsCommonPrefixBytesForSameFocusedField() { - var tracker = LlamaPromptCacheHintTracker() - tracker.recordSuccessfulRequest(makeRequest(prompt: "hello")) - - XCTAssertEqual( - tracker.cachedPrefixBytes(for: makeRequest(prompt: "hello!")), - "hello".utf8.count - ) - } - - func test_cacheHint_invalidatesWhenFocusedFieldChanges() { - var tracker = LlamaPromptCacheHintTracker() - tracker.recordSuccessfulRequest(makeRequest(prompt: "hello", elementIdentifier: "field-a")) - - XCTAssertNil( - tracker.cachedPrefixBytes(for: makeRequest(prompt: "hello!", elementIdentifier: "field-b")) - ) - } - - func test_cacheHint_prefersStableInputFrameOverUnstableElementIdentifier() { - var tracker = LlamaPromptCacheHintTracker() - let fieldFrame = CGRect(x: 10, y: 20, width: 300, height: 44) - tracker.recordSuccessfulRequest( - makeRequest(prompt: "hello", elementIdentifier: "field-a", inputFrameRect: fieldFrame) - ) - - XCTAssertEqual( - tracker.cachedPrefixBytes( - for: makeRequest(prompt: "hello!", elementIdentifier: "field-b", inputFrameRect: fieldFrame) - ), - "hello".utf8.count - ) - } - - func test_cacheHint_invalidatesWhenSamplingFingerprintChanges() { - var tracker = LlamaPromptCacheHintTracker() - tracker.recordSuccessfulRequest(makeRequest(prompt: "hello", topK: 20)) - - XCTAssertNil(tracker.cachedPrefixBytes(for: makeRequest(prompt: "hello!", topK: 40))) - } - - // MARK: - instruction prompt - - /// The structural contract for local instruct models: stable task rules first, supporting - /// context in the middle, then a late length cue right before the prefix the model must - /// continue. Losing one of these sections tends to degrade prompt-following without throwing. - func test_instructionPrompt_containsTaskScreenContextAndFinalInstruction() { - let prompt = LlamaPromptRenderer.prompt( - prefixText: "Once upon", - applicationName: "Messages", - completionLengthInstruction: "Keep completion short.", - userName: nil - ) - - XCTAssertTrue(prompt.contains("Task:"), "instruction prompt should include Task section") - XCTAssertTrue( - prompt.contains("Screen context:"), - "instruction prompt should include Screen context section" - ) - XCTAssertTrue( - prompt.contains("Final instruction:"), - "instruction prompt should include a late final instruction section" - ) - XCTAssertTrue(prompt.contains("Text before caret:"), "instruction prompt should include the prefix header") - } - - func test_instructionPrompt_includesApplicationNameAndPrefix() { - let prompt = LlamaPromptRenderer.prompt( - prefixText: "My prefix text here", - applicationName: "Slack", - completionLengthInstruction: "Short.", - userName: nil - ) - - XCTAssertTrue(prompt.contains("User is on Slack.")) - XCTAssertTrue(prompt.contains("My prefix text here")) - } - - /// Length is enforced by the token budget, not by an in-prompt word range, so the - /// completion-length cue must never reach the local-model prompt even if a caller passes one. - func test_instructionPrompt_omitsCompletionLengthInstruction() { - // Experiment: the local-model prompt no longer carries the word-range cue; length is - // governed solely by the token budget. The cue must not leak into the prompt even when a - // caller still passes one. - let prompt = LlamaPromptRenderer.prompt( - prefixText: "PREFIX_BODY_XYZ", - applicationName: "App", - completionLengthInstruction: "UNIQUE_LENGTH_MARKER_7_TO_12_WORDS", - userName: nil - ) - - XCTAssertFalse(prompt.contains("UNIQUE_LENGTH_MARKER_7_TO_12_WORDS")) - - guard let finalInstructionRange = prompt.range(of: "Final instruction:"), - let prefixRange = prompt.range(of: "PREFIX_BODY_XYZ") else { - XCTFail("Expected final instruction header and prefix in the prompt") - return - } - - XCTAssertLessThan(finalInstructionRange.lowerBound, prefixRange.lowerBound) - } - - func test_instructionPrompt_includesProfileContextWhenProvided() { - let prompt = LlamaPromptRenderer.prompt( - prefixText: "x", - applicationName: "App", - completionLengthInstruction: "Short.", - userName: "UNIQUE_NAME_MARKER_ZQRT" - ) - - XCTAssertTrue(prompt.contains("UNIQUE_NAME_MARKER_ZQRT"), - "instruction prompt should carry user-provided profile name") - } - - /// The prefix remains the last payload in the prompt so the model still ends on the actual - /// text it must continue, even though the length cue is moved later in the prompt. - func test_instructionPrompt_prefixAppearsAfterScreenContextAndEndsPrompt() { - let prompt = LlamaPromptRenderer.prompt( - prefixText: "PREFIX_BODY_XYZ", - applicationName: "App", - completionLengthInstruction: "Short.", - userName: nil - ) - - guard let contextRange = prompt.range(of: "Screen context:"), - let prefixRange = prompt.range(of: "PREFIX_BODY_XYZ") else { - XCTFail("Expected both Screen context: and PREFIX_BODY_XYZ in the prompt") - return - } - - XCTAssertLessThan(contextRange.lowerBound, prefixRange.lowerBound, - "prefix must appear after the Screen context header") - XCTAssertTrue(prompt.hasSuffix("PREFIX_BODY_XYZ")) - } - - func test_instructionPrompt_includesVisualContextSummaryWhenProvided() { - let prompt = LlamaPromptRenderer.prompt( - prefixText: "PREFIX", - applicationName: "App", - completionLengthInstruction: "Short.", - userName: nil, - visualContextSummary: "A window describing a cat." - ) - - XCTAssertTrue(prompt.contains("Screen content:")) - XCTAssertTrue(prompt.contains("A window describing a cat.")) - } - - func test_instructionPrompt_includesClipboardContextWhenProvided() { - let prompt = LlamaPromptRenderer.prompt( - prefixText: "PREFIX", - applicationName: "App", - completionLengthInstruction: "Short.", - userName: nil, - clipboardContext: "UNIQUE_CLIPBOARD_MARKER" - ) - - XCTAssertTrue(prompt.contains("User's clipboard:")) - XCTAssertTrue(prompt.contains("UNIQUE_CLIPBOARD_MARKER")) - } - - func test_instructionPrompt_omitsVisualContextSummaryWhenNil() { - let prompt = LlamaPromptRenderer.prompt( - prefixText: "PREFIX", - applicationName: "App", - completionLengthInstruction: "Short.", - userName: nil, - visualContextSummary: nil - ) - - XCTAssertFalse(prompt.contains("Screen content:")) - } - - private func makeRequest( - prompt: String, - elementIdentifier: String = "field", - topK: Int = 20, - inputFrameRect: CGRect? = nil - ) -> SuggestionRequest { - let snapshot = FocusedInputSnapshot( - applicationName: "TestApp", - bundleIdentifier: "com.example.TestApp", - processIdentifier: 123, - elementIdentifier: elementIdentifier, - role: "AXTextField", - subrole: nil, - caretRect: .zero, - inputFrameRect: inputFrameRect, - caretSource: "test", - caretQuality: .exact, - observedCharWidth: nil, - precedingText: prompt, - trailingText: "", - selection: NSRange(location: prompt.count, length: 0), - isSecure: false - ) - let context = FocusedInputContext(snapshot: snapshot, generation: 1) - - return SuggestionRequest( - context: context, - prefixText: prompt, - prompt: prompt, - generation: context.generation, - maxPredictionTokens: 8, - temperature: 0.1, - topK: topK, - topP: 0.7, - minP: 0.08, - repetitionPenalty: 1.05, - randomSeed: 42, - maxSuffixCharacters: 192, - completionLengthInstruction: "Return only the next few words.", - userName: nil, - customRules: [], - languageInstruction: nil, - clipboardContext: nil, - visualContextSummary: nil, - isMultiLineEnabled: false - ) - } -} diff --git a/CotabbyTests/ModelAndPresentationValueTests.swift b/CotabbyTests/ModelAndPresentationValueTests.swift index fd576f8a..013fc491 100644 --- a/CotabbyTests/ModelAndPresentationValueTests.swift +++ b/CotabbyTests/ModelAndPresentationValueTests.swift @@ -147,12 +147,12 @@ final class RuntimeAndInputModelValueTests: XCTestCase { func test_runtimeModelCatalogMapsKnownNamesAndLeavesCustomNamesAlone() { XCTAssertEqual( - RuntimeModelCatalog.displayName(for: "Qwen3-0.6B-Q4_K_M.gguf"), - "tabby-1-mini" + RuntimeModelCatalog.displayName(for: "Qwen3.5-2B-Base.i1-Q4_K_M.gguf"), + "tabby-2-base" ) XCTAssertEqual( - RuntimeModelCatalog.displayName(for: "gemma-4-E2B-it-Q4_K_M.gguf"), - "tabby-1-base" + RuntimeModelCatalog.displayName(for: "Qwen3.5-0.8B-Base.i1-Q6_K.gguf"), + "tabby-2-mini" ) // Retired models fall back to their raw filename like any unknown local GGUF. XCTAssertEqual( diff --git a/CotabbyTests/OnboardingTemplateRecommenderTests.swift b/CotabbyTests/OnboardingTemplateRecommenderTests.swift index 2385debb..1ede3f37 100644 --- a/CotabbyTests/OnboardingTemplateRecommenderTests.swift +++ b/CotabbyTests/OnboardingTemplateRecommenderTests.swift @@ -46,9 +46,9 @@ final class OnboardingTemplateRecommenderTests: XCTestCase { func testOpenSourceTiersMapToTheirLocalModels() { let expected: [OnboardingTemplate: String] = [ - .quick: "SmolLM2-135M-Instruct-q8_0.gguf", - .everyday: "gemma-4-E2B-it-Q4_K_M.gguf", - .powerful: "gemma-4-E4B-it-Q4_K_M.gguf" + .quick: "Qwen3.5-0.8B-Base.i1-Q6_K.gguf", + .everyday: "Qwen3.5-2B-Base.i1-Q4_K_M.gguf", + .powerful: "Qwen3.5-4B-Base.i1-Q4_K_M.gguf" ] for (template, filename) in expected { let plan = OnboardingTemplateRecommender.resolvePlan(for: template, engine: .llamaOpenSource) diff --git a/CotabbyTests/SuggestionRequestFactoryTests.swift b/CotabbyTests/SuggestionRequestFactoryTests.swift index 9b635a10..eb8e7642 100644 --- a/CotabbyTests/SuggestionRequestFactoryTests.swift +++ b/CotabbyTests/SuggestionRequestFactoryTests.swift @@ -239,7 +239,7 @@ final class SuggestionRequestFactoryTests: XCTestCase { ) XCTAssertEqual(result.request.clipboardContext, "Copied project notes.") - XCTAssertTrue(result.promptPreview.contains("User's clipboard:")) + XCTAssertTrue(result.promptPreview.contains("On the clipboard:")) XCTAssertTrue(result.promptPreview.contains("Copied project notes.")) } @@ -272,7 +272,7 @@ final class SuggestionRequestFactoryTests: XCTestCase { ) XCTAssertNil(result.request.clipboardContext) - XCTAssertFalse(result.promptPreview.contains("User's clipboard:")) + XCTAssertFalse(result.promptPreview.contains("On the clipboard:")) XCTAssertFalse(result.promptPreview.contains("Copied project notes.")) }