diff --git a/Cotabby.xcodeproj/project.pbxproj b/Cotabby.xcodeproj/project.pbxproj
index f9b4f9c7..601cf7bc 100644
--- a/Cotabby.xcodeproj/project.pbxproj
+++ b/Cotabby.xcodeproj/project.pbxproj
@@ -38,10 +38,8 @@
 		156E6AB3D24134EEC29FDB93 /* FocusSnapshotResolverSelectionTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = BA705EDFE1C41294F0E381F1 /* FocusSnapshotResolverSelectionTests.swift */; };
 		157A55EB796BEB7819B90D5D /* ClipboardRelevanceFilter.swift in Sources */ = {isa = PBXBuildFile; fileRef = D3A2AC525DC664DB540D4F19 /* ClipboardRelevanceFilter.swift */; };
 		15FA56CEF6FB5FF54C2FBA6F /* PermissionAndContextModelTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = E7F42112F14026E6253BB865 /* PermissionAndContextModelTests.swift */; };
-		190C571B3CDFE117F4D15484 /* LlamaPromptRendererTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3009812A35A1CDEF16295AB7 /* LlamaPromptRendererTests.swift */; };
 		19CB55B62977376E9AE8D428 /* VisualContextStartCoalescer.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2F01FAC4F57EB08471521196 /* VisualContextStartCoalescer.swift */; };
 		1B3FFCB9A979F49BF86EAAD4 /* ScreenshotContextGeneratorTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = B2BFD19A159680A495EE02FD /* ScreenshotContextGeneratorTests.swift */; };
-		1C4A2BAB2CCADF0A70B70AC6 /* LlamaPromptRenderer.swift in Sources */ = {isa = PBXBuildFile; fileRef = B5679E08C9A09065531C37B5 /* LlamaPromptRenderer.swift */; };
 		1D1C6FF0B8F50AC14A1000F4 /* SentenceBoundaryClassifierTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2D7360A6D4261989A66658ED /* SentenceBoundaryClassifierTests.swift */; };
 		1F8CC88AFFE67C08944CF506 /* WindowScreenshotService.swift in Sources */ = {isa = PBXBuildFile; fileRef = 77B0121E7BB173F8A2B0B108 /* WindowScreenshotService.swift */; };
 		2197B68F1E4D0C3497DAC061 /* LlamaSuggestionEngine.swift in Sources */ = {isa = PBXBuildFile; fileRef = BE04620C905041680116BE80 /* LlamaSuggestionEngine.swift */; };
@@ -221,6 +219,7 @@
 		E17CAA453B1F534D284F0D89 /* PermissionHostApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = B6ACCB12E4DB32D2F2BEA567 /* PermissionHostApp.swift */; };
 		E27E6377D36D4981301568DD /* LaunchAtLoginStateTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5807E8508D9355D0271A00C5 /* LaunchAtLoginStateTests.swift */; };
 		E313639E71AE1374D2B9A956 /* SuggestionWorkController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6B2D97BAA3618A7D0357AC44 /* SuggestionWorkController.swift */; };
+		E38801433B99E65BD7E45A0E /* LlamaPromptCacheHintTrackerTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0CA88BB29BC8727878C99E95 /* LlamaPromptCacheHintTrackerTests.swift */; };
 		E4382BEA8A8551612E5966B9 /* BaseCompletionPromptRenderer.swift in Sources */ = {isa = PBXBuildFile; fileRef = 85EF79E6144D6C6AD062B569 /* BaseCompletionPromptRenderer.swift */; };
 		E51FA12B690428CA431328FC /* WritingPaneView.swift in Sources */ = {isa = PBXBuildFile; fileRef = D48B95B6665109B6C6A63B42 /* WritingPaneView.swift */; };
 		E6EE3C13FA31F261CD734C69 /* DownloadOutcomeClassifier.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3DE1975F3B5F4A70478DBF41 /* DownloadOutcomeClassifier.swift */; };
@@ -277,6 +276,7 @@
 		0A3D1125B962CBE0269EEDDB /* SuggestionInserter.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SuggestionInserter.swift; sourceTree = "<group>"; };
 		0AC3BF78835C8F2C315932F1 /* EmojiCatalog.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = EmojiCatalog.swift; sourceTree = "<group>"; };
 		0C383AE85B971A9605787358 /* FocusModels.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FocusModels.swift; sourceTree = "<group>"; };
+		0CA88BB29BC8727878C99E95 /* LlamaPromptCacheHintTrackerTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LlamaPromptCacheHintTrackerTests.swift; sourceTree = "<group>"; };
 		0D80CC2CCAAFE3F23FB8C37A /* PromptContextSanitizerTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PromptContextSanitizerTests.swift; sourceTree = "<group>"; };
 		0F5E263AB69029D5E13D5EE8 /* FocusDebugOverlayController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FocusDebugOverlayController.swift; sourceTree = "<group>"; };
 		110CB0B53016644EF7840301 /* HuggingFaceAPIClient.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HuggingFaceAPIClient.swift; sourceTree = "<group>"; };
@@ -309,7 +309,6 @@
 		2D1F9CEBAB0F330F8E7B61D8 /* InputSuppressionController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = InputSuppressionController.swift; sourceTree = "<group>"; };
 		2D7360A6D4261989A66658ED /* SentenceBoundaryClassifierTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SentenceBoundaryClassifierTests.swift; sourceTree = "<group>"; };
 		2F01FAC4F57EB08471521196 /* VisualContextStartCoalescer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VisualContextStartCoalescer.swift; sourceTree = "<group>"; };
-		3009812A35A1CDEF16295AB7 /* LlamaPromptRendererTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LlamaPromptRendererTests.swift; sourceTree = "<group>"; };
 		312C7306D916963F519CE0D9 /* EmojiTriggerStateMachine.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = EmojiTriggerStateMachine.swift; sourceTree = "<group>"; };
 		328847A0F494360033366791 /* TextDirectionDetector.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TextDirectionDetector.swift; sourceTree = "<group>"; };
 		3350EDE01ED5125520C79D53 /* SettingsCoordinator.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SettingsCoordinator.swift; sourceTree = "<group>"; };
@@ -423,7 +422,6 @@
 		B2F95847D76893C8A5B504B4 /* SuggestionOverlayStabilityGate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SuggestionOverlayStabilityGate.swift; sourceTree = "<group>"; };
 		B424E2AC97C99D335B0D5751 /* SuggestionTextNormalizer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SuggestionTextNormalizer.swift; sourceTree = "<group>"; };
 		B4B4A2E2DD6733658EC05BD8 /* DownloadFileRescuer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DownloadFileRescuer.swift; sourceTree = "<group>"; };
-		B5679E08C9A09065531C37B5 /* LlamaPromptRenderer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LlamaPromptRenderer.swift; sourceTree = "<group>"; };
 		B6ACCB12E4DB32D2F2BEA567 /* PermissionHostApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PermissionHostApp.swift; sourceTree = "<group>"; };
 		B6D42CD456B4B3C988B148A6 /* FocusTrackingModel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FocusTrackingModel.swift; sourceTree = "<group>"; };
 		B7B185BA246A526CBA85E581 /* EmojiPickerPanelLayoutTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = EmojiPickerPanelLayoutTests.swift; sourceTree = "<group>"; };
@@ -753,7 +751,7 @@
 				43D627C4A55359EAF4676FF7 /* InsertionSafetyGateTests.swift */,
 				4793D4EA5D36D7E5CC216C27 /* LanguageSupportTests.swift */,
 				5807E8508D9355D0271A00C5 /* LaunchAtLoginStateTests.swift */,
-				3009812A35A1CDEF16295AB7 /* LlamaPromptRendererTests.swift */,
+				0CA88BB29BC8727878C99E95 /* LlamaPromptCacheHintTrackerTests.swift */,
 				52BAFA2F989C3C4F7FB892B5 /* MarkerSelectionSynthesizerTests.swift */,
 				1274F897631B1B3A835D157F /* MidWordContinuationPolicyTests.swift */,
 				FC83D14A7557BC0196E59007 /* MirrorOverlayLayoutTests.swift */,
@@ -907,7 +905,6 @@
 				41BBD5A4BA08CABE77860886 /* HardwareCapabilityProbe.swift */,
 				7D472F9F396672E57873303B /* InsertionSafetyGate.swift */,
 				EAAE6B395FAB604DF059280A /* KeyCodeLabels.swift */,
-				B5679E08C9A09065531C37B5 /* LlamaPromptRenderer.swift */,
 				8D610FCA3A97249DCCE7D0B8 /* LLMIOFileHandler.swift */,
 				A863F41C0C03D7B4AC5DC002 /* MarkerSelectionSynthesizer.swift */,
 				357C18383B047F24A531BDCD /* MidWordContinuationPolicy.swift */,
@@ -1134,7 +1131,6 @@
 				0A2DDD946654076675AC0FC6 /* LanguageCatalog.swift in Sources */,
 				51C069603DA16830868F1628 /* LanguageTagsEditor.swift in Sources */,
 				F0DEEE8A866ABB560E7A7E6A /* LaunchAtLoginService.swift in Sources */,
-				1C4A2BAB2CCADF0A70B70AC6 /* LlamaPromptRenderer.swift in Sources */,
 				66D9E37B12A9265D4733E72E /* LlamaRuntimeCore.swift in Sources */,
 				54BDF0D9C3DC7175555BD0F6 /* LlamaRuntimeManager.swift in Sources */,
 				4CAFD8F3444FEDC9ACAFF529 /* LlamaRuntimeModels.swift in Sources */,
@@ -1263,7 +1259,7 @@
 				83EC3543DC45B1601F119BF9 /* InsertionSafetyGateTests.swift in Sources */,
 				E912D4617AE1376061DF1F00 /* LanguageSupportTests.swift in Sources */,
 				E27E6377D36D4981301568DD /* LaunchAtLoginStateTests.swift in Sources */,
-				190C571B3CDFE117F4D15484 /* LlamaPromptRendererTests.swift in Sources */,
+				E38801433B99E65BD7E45A0E /* LlamaPromptCacheHintTrackerTests.swift in Sources */,
 				87806DE08881D11F2608A13D /* MarkerSelectionSynthesizerTests.swift in Sources */,
 				7C36DBA762E19C8C31676D44 /* MidWordContinuationPolicyTests.swift in Sources */,
 				14D77F0B8A195AC2FA8D24A9 /* MirrorOverlayLayoutTests.swift in Sources */,
diff --git a/Cotabby/Models/LlamaRuntimeModels.swift b/Cotabby/Models/LlamaRuntimeModels.swift
index fe162d6c..864775c2 100644
--- a/Cotabby/Models/LlamaRuntimeModels.swift
+++ b/Cotabby/Models/LlamaRuntimeModels.swift
@@ -95,70 +95,61 @@ struct DownloadableRuntimeModel: Equatable, Hashable, Sendable, Identifiable {
 enum RuntimeModelCatalog {
     static func displayName(for filename: String) -> String {
         switch filename {
-        case "Qwen3-0.6B-Q4_K_M.gguf":
-            return "tabby-1-mini"
-        case "gemma-4-E2B-it-Q4_K_M.gguf":
-            return "tabby-1-base"
-        case "gemma-4-E4B-it-Q4_K_M.gguf":
-            return "tabby-1-pro"
-        case "SmolLM2-135M-Instruct-q8_0.gguf":
-            return "tabby-1-nano"
+        case "Qwen3.5-0.8B-Base.i1-Q6_K.gguf":
+            return "tabby-2-mini"
+        case "Qwen3.5-2B-Base.i1-Q4_K_M.gguf":
+            return "tabby-2-base"
+        case "Qwen3.5-4B-Base.i1-Q4_K_M.gguf":
+            return "tabby-2-pro"
+        case "gemma-4-E2B.i1-Q6_K.gguf":
+            return "tabby-2-gemma-mini"
+        case "gemma-4-E4B.i1-Q4_K_M.gguf":
+            return "tabby-2-gemma-pro"
         default:
             return filename
         }
     }
 
-    /// Canonical downloadable GGUF model list shown in Welcome and menu UI.
-    ///
-    /// `expectedSizeBytes` and `sha256` were captured from HuggingFace's CDN
-    /// response headers (`x-linked-size` and `x-linked-etag` respectively).
-    /// To refresh after a model is updated upstream:
-    ///
-    ///   curl -sIL "<URL>" | grep -iE "^(x-linked-size|x-linked-etag):"
+    /// Builds a HuggingFace direct-download URL from a repo and file path.
+    private static func hfURL(_ repo: String, _ file: String) -> URL {
+        // Force-unwrap is safe: inputs are compile-time literals forming a valid URL.
+        URL(string: "https://huggingface.co/\(repo)/resolve/main/\(file)?download=true")!
+    }
+
+    /// Canonical downloadable base GGUF models for Cotabby 2's base-model continuation path.
+    /// Qwen3.5 / Gemma base checkpoints from mradermacher's i1 GGUF repos. `expectedSizeBytes` and
+    /// `sha256` stay nil pending CDN-header capture; the download manager skips size/hash
+    /// validation when they are nil. Old instruct GGUFs are intentionally no longer listed.
     static let downloadableModels: [DownloadableRuntimeModel] = [
         DownloadableRuntimeModel(
-            filename: "SmolLM2-135M-Instruct-q8_0.gguf",
-            displayName: displayName(for: "SmolLM2-135M-Instruct-q8_0.gguf"),
-            downloadURL: URL(
-                string:
-                    "https://huggingface.co/Mungert/SmolLM2-135M-Instruct-GGUF/resolve/main/SmolLM2-135M-Instruct-q8_0.gguf?download=true"
-            )!,
-            approximateSizeInGigabytes: 0.1,
-            expectedSizeBytes: 144_811_552,
-            sha256: "bc64cce8e1c11e4ed870633b557e04af718249c817c4cf8a6784116144ec3e28"
+            filename: "Qwen3.5-0.8B-Base.i1-Q6_K.gguf",
+            displayName: displayName(for: "Qwen3.5-0.8B-Base.i1-Q6_K.gguf"),
+            downloadURL: hfURL("mradermacher/Qwen3.5-0.8B-Base-i1-GGUF", "Qwen3.5-0.8B-Base.i1-Q6_K.gguf"),
+            approximateSizeInGigabytes: 0.8
+        ),
+        DownloadableRuntimeModel(
+            filename: "Qwen3.5-2B-Base.i1-Q4_K_M.gguf",
+            displayName: displayName(for: "Qwen3.5-2B-Base.i1-Q4_K_M.gguf"),
+            downloadURL: hfURL("mradermacher/Qwen3.5-2B-Base-i1-GGUF", "Qwen3.5-2B-Base.i1-Q4_K_M.gguf"),
+            approximateSizeInGigabytes: 1.4
         ),
         DownloadableRuntimeModel(
-            filename: "Qwen3-0.6B-Q4_K_M.gguf",
-            displayName: displayName(for: "Qwen3-0.6B-Q4_K_M.gguf"),
-            downloadURL: URL(
-                string:
-                    "https://huggingface.co/unsloth/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B-Q4_K_M.gguf?download=true"
-            )!,
-            approximateSizeInGigabytes: 0.4,
-            expectedSizeBytes: 396_705_472,
-            sha256: "ac2d97712095a558e31573f62f466a3f9d93990898b0ec79d7c974c1780d524a"
+            filename: "Qwen3.5-4B-Base.i1-Q4_K_M.gguf",
+            displayName: displayName(for: "Qwen3.5-4B-Base.i1-Q4_K_M.gguf"),
+            downloadURL: hfURL("mradermacher/Qwen3.5-4B-Base-i1-GGUF", "Qwen3.5-4B-Base.i1-Q4_K_M.gguf"),
+            approximateSizeInGigabytes: 2.6
         ),
         DownloadableRuntimeModel(
-            filename: "gemma-4-E2B-it-Q4_K_M.gguf",
-            displayName: displayName(for: "gemma-4-E2B-it-Q4_K_M.gguf"),
-            downloadURL: URL(
-                string:
-                    "https://huggingface.co/unsloth/gemma-4-E2B-it-GGUF/resolve/main/gemma-4-E2B-it-Q4_K_M.gguf?download=true"
-            )!,
-            approximateSizeInGigabytes: 3.1,
-            expectedSizeBytes: 3_106_736_256,
-            sha256: "9378bc471710229ef165709b62e34bfb62231420ddaf6d729e727305b5b8672d"
+            filename: "gemma-4-E2B.i1-Q6_K.gguf",
+            displayName: displayName(for: "gemma-4-E2B.i1-Q6_K.gguf"),
+            downloadURL: hfURL("mradermacher/gemma-4-E2B-i1-GGUF", "gemma-4-E2B.i1-Q6_K.gguf"),
+            approximateSizeInGigabytes: 4.5
         ),
         DownloadableRuntimeModel(
-            filename: "gemma-4-E4B-it-Q4_K_M.gguf",
-            displayName: displayName(for: "gemma-4-E4B-it-Q4_K_M.gguf"),
-            downloadURL: URL(
-                string:
-                    "https://huggingface.co/unsloth/gemma-4-E4B-it-GGUF/resolve/main/gemma-4-E4B-it-Q4_K_M.gguf?download=true"
-            )!,
-            approximateSizeInGigabytes: 5.0,
-            expectedSizeBytes: 4_977_169_568,
-            sha256: "519b9793ed6ce0ff530f1b7c96e848e08e49e7af4d57bb97f76215963a54146d"
+            filename: "gemma-4-E4B.i1-Q4_K_M.gguf",
+            displayName: displayName(for: "gemma-4-E4B.i1-Q4_K_M.gguf"),
+            downloadURL: hfURL("mradermacher/gemma-4-E4B-i1-GGUF", "gemma-4-E4B.i1-Q4_K_M.gguf"),
+            approximateSizeInGigabytes: 5.0
         )
     ]
 }
@@ -176,10 +167,11 @@ struct LlamaRuntimeConfiguration: Equatable, Sendable {
     static let `default` = LlamaRuntimeConfiguration(
         runtimeDirectoryPath: nil,
         preferredModelNames: [
-            "gemma-4-E4B-it-Q4_K_M.gguf",
-            "gemma-4-E2B-it-Q4_K_M.gguf",
-            "Qwen3-0.6B-Q4_K_M.gguf",
-            "SmolLM2-135M-Instruct-q8_0.gguf"
+            "Qwen3.5-2B-Base.i1-Q4_K_M.gguf",
+            "Qwen3.5-0.8B-Base.i1-Q6_K.gguf",
+            "Qwen3.5-4B-Base.i1-Q4_K_M.gguf",
+            "gemma-4-E2B.i1-Q6_K.gguf",
+            "gemma-4-E4B.i1-Q4_K_M.gguf"
         ],
         contextWindowTokens: 2048,
         batchSize: 512,
diff --git a/Cotabby/Models/OnboardingTemplate.swift b/Cotabby/Models/OnboardingTemplate.swift
index 8c4d07eb..67f05a84 100644
--- a/Cotabby/Models/OnboardingTemplate.swift
+++ b/Cotabby/Models/OnboardingTemplate.swift
@@ -97,11 +97,11 @@ enum OnboardingTemplate: String, CaseIterable, Identifiable, Equatable, Sendable
     var openSourceModelFilename: String {
         switch self {
         case .quick:
-            return "SmolLM2-135M-Instruct-q8_0.gguf"
+            return "Qwen3.5-0.8B-Base.i1-Q6_K.gguf"
         case .everyday:
-            return "gemma-4-E2B-it-Q4_K_M.gguf"
+            return "Qwen3.5-2B-Base.i1-Q4_K_M.gguf"
         case .powerful:
-            return "gemma-4-E4B-it-Q4_K_M.gguf"
+            return "Qwen3.5-4B-Base.i1-Q4_K_M.gguf"
         }
     }
 }
diff --git a/Cotabby/Models/SuggestionEngineModels.swift b/Cotabby/Models/SuggestionEngineModels.swift
index 0269e2e4..75f66e15 100644
--- a/Cotabby/Models/SuggestionEngineModels.swift
+++ b/Cotabby/Models/SuggestionEngineModels.swift
@@ -63,7 +63,7 @@ struct SuggestionSettingsSnapshot: Equatable, Sendable {
     let selectedEngine: SuggestionEngineKind
     let selectedWordCountPreset: SuggestionWordCountPreset
     let isClipboardContextEnabled: Bool
-    /// User-authored profile data for Cotabby's single instruction-rendered completion prompt.
+    /// User-authored profile data for Cotabby's base-model completion prompt.
     /// This travels in the snapshot so generation uses the same value the Settings UI shows.
     let userName: String
     /// User-authored style rules, carried in the snapshot so generation uses the same value the
@@ -86,10 +86,6 @@ struct SuggestionSettingsSnapshot: Equatable, Sendable {
     /// When true, the screenshot/OCR visual-context pipeline is skipped entirely for lower-latency
     /// suggestions. Defaults to false. Only affects visual context — predictions still run.
     let isFastModeEnabled: Bool
-    /// Experimental: when true and the Open Source engine is selected, the local path uses the
-    /// base-model continuation prompt (no instruction preamble, prefix last) instead of the
-    /// instruction-rendered prompt. Default false, so existing installs are byte-for-byte unchanged.
-    let useBaseCompletionPipeline: Bool
     /// User preference for how suggestions are presented (inline ghost text vs popup card vs auto
     /// based on caret geometry quality). Travels in the snapshot so consumers can react to changes
     /// without subscribing to the settings model directly.
diff --git a/Cotabby/Models/SuggestionSettingsModel.swift b/Cotabby/Models/SuggestionSettingsModel.swift
index 0aacf199..c773e947 100644
--- a/Cotabby/Models/SuggestionSettingsModel.swift
+++ b/Cotabby/Models/SuggestionSettingsModel.swift
@@ -39,9 +39,6 @@ final class SuggestionSettingsModel: ObservableObject {
     @Published private(set) var selectedWordCountPreset: SuggestionWordCountPreset
     @Published private(set) var isClipboardContextEnabled: Bool
     @Published private(set) var isFastModeEnabled: Bool
-    /// Experimental, opt-in via the `cotabbyBaseCompletionPipelineEnabled` default. Routes the local
-    /// llama path through the base-model continuation prompt. No UI yet; read at launch.
-    @Published private(set) var useBaseCompletionPipeline: Bool
     /// Whether the Performance pane is recording per-request latency. Defaults to false so the
     /// default user never pays any extra storage or write cost — recording only kicks in once the
     /// user opts in from Settings.
@@ -96,7 +93,6 @@ final class SuggestionSettingsModel: ObservableObject {
     private static let legacyShortPresetRawValue = "3-7"
     private static let clipboardContextEnabledDefaultsKey = "cotabbyClipboardContextEnabled"
     private static let fastModeEnabledDefaultsKey = "cotabbyFastModeEnabled"
-    private static let baseCompletionPipelineEnabledDefaultsKey = "cotabbyBaseCompletionPipelineEnabled"
     private static let performanceTrackingEnabledDefaultsKey = "cotabbyPerformanceTrackingEnabled"
     private static let menuBarWordCountVisibleDefaultsKey = "cotabbyMenuBarWordCountVisible"
     private static let mirrorPreferenceDefaultsKey = "cotabbyMirrorPreference"
@@ -194,10 +190,6 @@ final class SuggestionSettingsModel: ObservableObject {
         // into fast mode turns it off.
         let resolvedFastModeEnabled =
             userDefaults.object(forKey: Self.fastModeEnabledDefaultsKey) as? Bool ?? false
-        // Experimental base-model pipeline. Defaults to false so the merged-but-dark path changes
-        // nothing for existing users until the flag is explicitly set.
-        let resolvedBaseCompletionPipelineEnabled =
-            userDefaults.object(forKey: Self.baseCompletionPipelineEnabledDefaultsKey) as? Bool ?? false
         // Defaults to false so the metrics ring buffer stays empty until the user explicitly opts
         // in from the Performance pane.
         let resolvedPerformanceTrackingEnabled =
@@ -326,7 +318,6 @@ final class SuggestionSettingsModel: ObservableObject {
         selectedWordCountPreset = resolvedWordCountPreset
         isClipboardContextEnabled = resolvedClipboardContextEnabled
         isFastModeEnabled = resolvedFastModeEnabled
-        useBaseCompletionPipeline = resolvedBaseCompletionPipelineEnabled
         isPerformanceTrackingEnabled = resolvedPerformanceTrackingEnabled
         isMenuBarWordCountVisible = resolvedMenuBarWordCountVisible
         mirrorPreference = resolvedMirrorPreference
@@ -362,7 +353,6 @@ final class SuggestionSettingsModel: ObservableObject {
         persistSelectedWordCountPreset(resolvedWordCountPreset)
         persistClipboardContextEnabled(resolvedClipboardContextEnabled)
         persistFastModeEnabled(resolvedFastModeEnabled)
-        userDefaults.set(resolvedBaseCompletionPipelineEnabled, forKey: Self.baseCompletionPipelineEnabledDefaultsKey)
         persistPerformanceTrackingEnabled(resolvedPerformanceTrackingEnabled)
         persistMenuBarWordCountVisible(resolvedMenuBarWordCountVisible)
         persistMirrorPreference(resolvedMirrorPreference)
@@ -420,7 +410,6 @@ final class SuggestionSettingsModel: ObservableObject {
             isMultiLineEnabled: isMultiLineEnabled,
             autoAcceptTrailingPunctuation: autoAcceptTrailingPunctuation,
             isFastModeEnabled: isFastModeEnabled,
-            useBaseCompletionPipeline: useBaseCompletionPipeline,
             mirrorPreference: mirrorPreference,
             acceptanceGranularity: acceptanceGranularity
         )
@@ -1120,8 +1109,8 @@ extension SuggestionSettingsModel: SuggestionSettingsProviding {
         // The outer CombineLatest stack is already at Combine's per-operator cap, so each new
         // top-level setting gets layered above via another `CombineLatest`. `extendedContext` joins
         // alongside `acceptanceGranularity` here for the same reason.
-        return Publishers.CombineLatest4(primary, $acceptanceGranularity, $extendedContext, $useBaseCompletionPipeline)
-            .map { primaryTuple, granularity, extendedContext, baseCompletionEnabled in
+        return Publishers.CombineLatest3(primary, $acceptanceGranularity, $extendedContext)
+            .map { primaryTuple, granularity, extendedContext in
                 let (combinedSettings, presentationToggles, profile, timing) = primaryTuple
                 let (globallyEnabled, disabledAppRules, engine, wordCountPreset) = combinedSettings
                 let (clipboardContextEnabled, fastModeEnabled, mirrorPreference) = presentationToggles
@@ -1142,7 +1131,6 @@ extension SuggestionSettingsModel: SuggestionSettingsProviding {
                     isMultiLineEnabled: multiLine,
                     autoAcceptTrailingPunctuation: autoAcceptPunctuation,
                     isFastModeEnabled: fastModeEnabled,
-                    useBaseCompletionPipeline: baseCompletionEnabled,
                     mirrorPreference: mirrorPreference,
                     acceptanceGranularity: granularity
                 )
diff --git a/Cotabby/Support/BaseCompletionPromptRenderer.swift b/Cotabby/Support/BaseCompletionPromptRenderer.swift
index d700127b..a7665564 100644
--- a/Cotabby/Support/BaseCompletionPromptRenderer.swift
+++ b/Cotabby/Support/BaseCompletionPromptRenderer.swift
@@ -1,14 +1,11 @@
 import Foundation
 
 /// File overview:
-/// Renders the prompt for the experimental base-model completion pipeline (Open Source engine with
-/// `useBaseCompletionPipeline` enabled).
+/// Renders the prompt for Cotabby's base-model completion pipeline (the Open Source / llama path).
 ///
-/// Why this exists separately from `LlamaPromptRenderer`:
-/// `LlamaPromptRenderer` wraps the user's text in an instruction blob ("Task: ... do not answer the
-/// user ...") for instruction-tuned models. A *base* model has no instruction-following channel and
-/// will happily continue a bare "Task:" line as if it were the document, so that prompt shape leaks
-/// scaffolding into the ghost text. This renderer instead treats the model as a pure text continuer:
+/// Design: a *base* model has no instruction-following channel and will happily continue a bare
+/// "Task:" line as if it were the document, so an instruction-blob prompt would leak scaffolding into
+/// the ghost text. This renderer instead treats the model as a pure text continuer:
 ///
 /// - No task preamble and no standalone `Label:` lines.
 /// - Custom instructions work by *conditioning*, not obedience: persona, voice, and language are
diff --git a/Cotabby/Support/FoundationModelPromptRenderer.swift b/Cotabby/Support/FoundationModelPromptRenderer.swift
index c5cc4b3e..bf362492 100644
--- a/Cotabby/Support/FoundationModelPromptRenderer.swift
+++ b/Cotabby/Support/FoundationModelPromptRenderer.swift
@@ -48,7 +48,7 @@ enum FoundationModelPromptRenderer {
 
         // We intentionally do NOT inject the user's name here. On the chat-tuned system model a
         // stated name is the single biggest trigger for breaking character ("Jacob, how are
-        // you"). The llama backend still personalizes via `LlamaPromptRenderer`; Apple's model
+        // you"). The llama backend personalizes via `BaseCompletionPromptRenderer`; Apple's model
         // does not get the name until we can scope it to contexts that actually need it.
 
         // Two few-shot examples (down from five) carry the heavy anti-drift signal. The first
diff --git a/Cotabby/Support/LlamaPromptRenderer.swift b/Cotabby/Support/LlamaPromptRenderer.swift
deleted file mode 100644
index 854005ff..00000000
--- a/Cotabby/Support/LlamaPromptRenderer.swift
+++ /dev/null
@@ -1,105 +0,0 @@
-import Foundation
-
-/// File overview:
-/// Renders the single prompt string consumed by the local llama runtime.
-///
-/// Why this file exists:
-/// llama.cpp does not give us a separate "instructions" channel the way Foundation Models does.
-/// That means all base behavior, user preferences, and request context must be composed into one
-/// prompt string. Keeping that composition isolated here prevents prompt policy from leaking into
-/// `SuggestionRequestFactory` or the runtime lifecycle layer.
-enum LlamaPromptRenderer {
-    /// Renders Cotabby's local-model prompt.
-    ///
-    /// Cotabby always uses the instruction-rendered path so profile context and base autocomplete
-    /// rules travel through one prompt contract instead of drifting across separate modes.
-    static func prompt(
-        prefixText: String,
-        applicationName: String,
-        completionLengthInstruction: String,
-        userName: String?,
-        customRules: [String] = [],
-        extendedContext: String? = nil,
-        languageInstruction: String? = nil,
-        clipboardContext: String? = nil,
-        visualContextSummary: String? = nil
-    ) -> String {
-        var sections = [
-            "Task:",
-            "- Continue the user's existing text exactly at the caret position.",
-            "- This is autocomplete, not chat. Do not answer the user or start a conversation.",
-            "- Never repeat, restate, or quote the text before the caret.",
-            "- Use clipboard context only when it directly helps the inline continuation.",
-            "- Return plain text only with no thinking, labels, bullets, markdown, quotes, or explanation."
-        ]
-
-        var profileSections: [String] = []
-        if let name = userName, !name.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
-            profileSections.append("- The user's name is \(name).")
-        }
-        if !profileSections.isEmpty {
-            sections.append("")
-            sections.append("User Profile Context:")
-            sections.append(contentsOf: profileSections)
-        }
-
-        // User style rules render after the base task rules and profile, with an explicit
-        // subordination line so a user "rule" can never override the autocomplete/output contract
-        // above (prompt-injection guard).
-        let trimmedRules = customRules
-            .map { $0.trimmingCharacters(in: .whitespacesAndNewlines) }
-            .filter { !$0.isEmpty }
-        if !trimmedRules.isEmpty {
-            sections.append("")
-            sections.append("Your style preferences:")
-            sections.append(contentsOf: trimmedRules.map { "- \($0)" })
-            sections.append("Apply these only when they fit the continuation naturally; never break the rules above.")
-        }
-
-        // Free-form user-authored reference notes (glossary, jargon, style guide). Rendered as a
-        // verbatim block rather than line-by-line bullets so the user's structure (lists, headings,
-        // examples) is preserved. The subordination line is the same prompt-injection guard used
-        // for style preferences above: this is reference material, not an override of the base
-        // autocomplete contract.
-        if let extendedContext, !extendedContext.isEmpty {
-            sections.append("")
-            sections.append("Reference notes from the user:")
-            sections.append(extendedContext)
-            sections.append("Use these notes only when they fit the continuation naturally; never break the rules above.")
-        }
-
-        sections.append("")
-        sections.append("Screen context:")
-        sections.append("User is on \(applicationName).")
-        if let summary = visualContextSummary, !summary.isEmpty {
-            sections.append("Screen content:")
-            sections.append(summary)
-        }
-        if let clipboardContext, !clipboardContext.isEmpty {
-            sections.append("User's clipboard:")
-            sections.append(clipboardContext)
-        }
-
-        // The final task cue sits immediately before the prefix so small instruct models see the
-        // current length policy right before the text they must continue, while the prefix itself
-        // still remains the last payload in the prompt.
-        sections.append("")
-        sections.append("Final instruction:")
-        // The declared-language hint sits in the late, high-attention block right before the prefix
-        // so small instruct models actually weigh it — without it they tend to drift to English when
-        // the surrounding text is short or ambiguous.
-        if let languageInstruction, !languageInstruction.isEmpty {
-            sections.append("- \(languageInstruction)")
-        }
-        // Experiment: the explicit word-range line (`completionLengthInstruction`) is intentionally
-        // omitted from the local-model prompt so length is governed purely by the token budget
-        // (`SuggestionWordCountPreset.suggestedPredictionTokenBudget`). The parameter stays wired so
-        // re-enabling the in-prompt cue is a one-line change. Apple Intelligence still gets the cue.
-        _ = completionLengthInstruction
-        sections.append("- The next line must begin directly with the continuation text.")
-        sections.append("Text before caret:")
-        sections.append(prefixText)
-
-        return sections.joined(separator: "\n")
-    }
-}
diff --git a/Cotabby/Support/SuggestionRequestFactory.swift b/Cotabby/Support/SuggestionRequestFactory.swift
index b13223ee..0ec7dd3c 100644
--- a/Cotabby/Support/SuggestionRequestFactory.swift
+++ b/Cotabby/Support/SuggestionRequestFactory.swift
@@ -62,33 +62,21 @@ enum SuggestionRequestFactory {
         let boundedVisualContextSummary = activeVisualContextSummary(
             rawSummary: visualContextSummary
         )
-        let prompt: String
-        if settings.useBaseCompletionPipeline, settings.selectedEngine == .llamaOpenSource {
-            // Base-model continuation path: no instruction blob, prefix last, trailing-trimmed.
-            // Custom instructions/persona condition the output rather than being obeyed.
-            prompt = BaseCompletionPromptRenderer.prompt(
-                prefixText: prefixText,
-                applicationName: context.applicationName,
-                userName: userName,
-                customRules: customRules,
-                extendedContext: activeExtendedContext,
-                languageInstruction: languageInstruction,
-                clipboardContext: boundedClipboardContext,
-                visualContextSummary: boundedVisualContextSummary
-            )
-        } else {
-            prompt = LlamaPromptRenderer.prompt(
-                prefixText: prefixText,
-                applicationName: context.applicationName,
-                completionLengthInstruction: completionLengthInstruction,
-                userName: userName,
-                customRules: customRules,
-                extendedContext: activeExtendedContext,
-                languageInstruction: languageInstruction,
-                clipboardContext: boundedClipboardContext,
-                visualContextSummary: boundedVisualContextSummary
-            )
-        }
+        // Cotabby 2 is a base-model continuation product on the Open Source path, so the local
+        // prompt is always the base render: no instruction blob, prefix last, trailing-trimmed.
+        // Custom instructions and persona condition the output rather than being obeyed. The
+        // Foundation Models path builds its own messages from these same request fields, so this
+        // prompt string is only consumed by the llama engine.
+        let prompt = BaseCompletionPromptRenderer.prompt(
+            prefixText: prefixText,
+            applicationName: context.applicationName,
+            userName: userName,
+            customRules: customRules,
+            extendedContext: activeExtendedContext,
+            languageInstruction: languageInstruction,
+            clipboardContext: boundedClipboardContext,
+            visualContextSummary: boundedVisualContextSummary
+        )
 
         let request = SuggestionRequest(
             context: context,
diff --git a/CotabbyTests/CotabbyTestFixtures.swift b/CotabbyTests/CotabbyTestFixtures.swift
index f11410e8..80a6ba6e 100644
--- a/CotabbyTests/CotabbyTestFixtures.swift
+++ b/CotabbyTests/CotabbyTestFixtures.swift
@@ -224,7 +224,6 @@ enum CotabbyTestFixtures {
         isMultiLineEnabled: Bool = false,
         autoAcceptTrailingPunctuation: Bool = true,
         isFastModeEnabled: Bool = false,
-        useBaseCompletionPipeline: Bool = false,
         mirrorPreference: MirrorPreference = .auto,
         acceptanceGranularity: AcceptanceGranularity = .word
     ) -> SuggestionSettingsSnapshot {
@@ -243,7 +242,6 @@ enum CotabbyTestFixtures {
             isMultiLineEnabled: isMultiLineEnabled,
             autoAcceptTrailingPunctuation: autoAcceptTrailingPunctuation,
             isFastModeEnabled: isFastModeEnabled,
-            useBaseCompletionPipeline: useBaseCompletionPipeline,
             mirrorPreference: mirrorPreference,
             acceptanceGranularity: acceptanceGranularity
         )
diff --git a/CotabbyTests/CustomRulesTests.swift b/CotabbyTests/CustomRulesTests.swift
index afcc866b..dbce38f0 100644
--- a/CotabbyTests/CustomRulesTests.swift
+++ b/CotabbyTests/CustomRulesTests.swift
@@ -35,42 +35,6 @@ final class CustomRulesTests: XCTestCase {
         XCTAssertEqual(CustomRulesCatalog.normalize(many).count, CustomRulesCatalog.maxRules)
     }
 
-    // MARK: - llama rendering
-
-    func test_llamaRenderer_emitsRulesAfterBaseRulesWithSubordination() {
-        let prompt = LlamaPromptRenderer.prompt(
-            prefixText: "Hello",
-            applicationName: "Notes",
-            completionLengthInstruction: "Return only the next few words.",
-            userName: nil,
-            customRules: ["Use British spelling", "Never use em dashes"]
-        )
-
-        XCTAssertTrue(prompt.contains("Your style preferences:"))
-        XCTAssertTrue(prompt.contains("- Use British spelling"))
-        XCTAssertTrue(prompt.contains("- Never use em dashes"))
-        XCTAssertTrue(prompt.contains("never break the rules above"))
-
-        // The base task rules must precede the user style section.
-        let baseIndex = try? XCTUnwrap(prompt.range(of: "Task:"))
-        let rulesIndex = try? XCTUnwrap(prompt.range(of: "Your style preferences:"))
-        if let baseIndex, let rulesIndex {
-            XCTAssertLessThan(baseIndex.lowerBound, rulesIndex.lowerBound)
-        }
-    }
-
-    func test_llamaRenderer_emitsNoRuleSectionWhenEmpty() {
-        let prompt = LlamaPromptRenderer.prompt(
-            prefixText: "Hello",
-            applicationName: "Notes",
-            completionLengthInstruction: "Return only the next few words.",
-            userName: nil,
-            customRules: []
-        )
-
-        XCTAssertFalse(prompt.contains("Your style preferences:"))
-    }
-
     // MARK: - foundation model rendering
 
     func test_foundationModelInstructions_includeRules() {
diff --git a/CotabbyTests/ExtendedContextTests.swift b/CotabbyTests/ExtendedContextTests.swift
index a0e374a0..f7745e7b 100644
--- a/CotabbyTests/ExtendedContextTests.swift
+++ b/CotabbyTests/ExtendedContextTests.swift
@@ -113,48 +113,10 @@ final class ExtendedContextTests: XCTestCase {
             configuration: .standard
         )
 
-        XCTAssertTrue(result.promptPreview.contains("Reference notes from the user:"))
+        XCTAssertTrue(result.promptPreview.contains("Notes the writer keeps in mind:"))
         XCTAssertTrue(result.promptPreview.contains("RULE: Every other word should be 'meow'"))
     }
 
-    // MARK: - llama rendering
-
-    func test_llamaRenderer_emitsReferenceNotesAfterCustomRulesWithSubordination() {
-        let prompt = LlamaPromptRenderer.prompt(
-            prefixText: "Hello",
-            applicationName: "Notes",
-            completionLengthInstruction: "Return only the next few words.",
-            userName: nil,
-            customRules: ["Use British spelling"],
-            extendedContext: "Project codenames: Aurora = the iOS app. Borealis = the macOS app."
-        )
-
-        XCTAssertTrue(prompt.contains("Reference notes from the user:"))
-        XCTAssertTrue(prompt.contains("Project codenames: Aurora = the iOS app."))
-        XCTAssertTrue(prompt.contains("never break the rules above"))
-
-        // Reference notes must follow custom rules, which must themselves follow the base task block.
-        guard let baseRange = prompt.range(of: "Task:"),
-              let rulesRange = prompt.range(of: "Your style preferences:"),
-              let notesRange = prompt.range(of: "Reference notes from the user:")
-        else {
-            return XCTFail("expected base/rules/notes sections to be present")
-        }
-        XCTAssertLessThan(baseRange.lowerBound, rulesRange.lowerBound)
-        XCTAssertLessThan(rulesRange.lowerBound, notesRange.lowerBound)
-    }
-
-    func test_llamaRenderer_emitsNoReferenceNotesSectionWhenNil() {
-        let prompt = LlamaPromptRenderer.prompt(
-            prefixText: "Hello",
-            applicationName: "Notes",
-            completionLengthInstruction: "Return only the next few words.",
-            userName: nil
-        )
-
-        XCTAssertFalse(prompt.contains("Reference notes from the user:"))
-    }
-
     // MARK: - foundation model rendering
 
     /// Reference notes live in the cached instructions channel so they're not re-tokenized on
diff --git a/CotabbyTests/LanguageSupportTests.swift b/CotabbyTests/LanguageSupportTests.swift
index 3761e5c9..0ff2c580 100644
--- a/CotabbyTests/LanguageSupportTests.swift
+++ b/CotabbyTests/LanguageSupportTests.swift
@@ -69,38 +69,6 @@ final class LanguageSupportTests: XCTestCase {
 
     // MARK: - rendering
 
-    func test_llamaRenderer_placesLanguageHintInFinalBlock() {
-        // The length cue is no longer rendered (token-budget-only experiment), so this guards that
-        // the language hint still lands in the late, high-attention final-instruction block.
-        let prompt = LlamaPromptRenderer.prompt(
-            prefixText: "Hola",
-            applicationName: "Notes",
-            completionLengthInstruction: "UNIQUE_LENGTH_CUE",
-            userName: nil,
-            languageInstruction: LanguageCatalog.promptInstruction(for: ["Spanish"])
-        )
-
-        XCTAssertFalse(prompt.contains("UNIQUE_LENGTH_CUE"))
-
-        guard let finalRange = prompt.range(of: "Final instruction:"),
-              let langRange = prompt.range(of: "Spanish") else {
-            XCTFail("Expected final instruction header and language hint in the prompt")
-            return
-        }
-        XCTAssertLessThan(finalRange.lowerBound, langRange.lowerBound)
-    }
-
-    func test_llamaRenderer_emitsNoLanguageLineWhenNoneDeclared() {
-        let prompt = LlamaPromptRenderer.prompt(
-            prefixText: "Hello",
-            applicationName: "Notes",
-            completionLengthInstruction: "Return only the next few words.",
-            userName: nil,
-            languageInstruction: LanguageCatalog.promptInstruction(for: [])
-        )
-        XCTAssertFalse(prompt.contains("usually writes in"))
-    }
-
     func test_foundationModelInstructions_includeLanguageHint() {
         let request = CotabbyTestFixtures.suggestionRequest(
             languageInstruction: LanguageCatalog.promptInstruction(for: ["Japanese"])
diff --git a/CotabbyTests/LlamaPromptCacheHintTrackerTests.swift b/CotabbyTests/LlamaPromptCacheHintTrackerTests.swift
new file mode 100644
index 00000000..4f406965
--- /dev/null
+++ b/CotabbyTests/LlamaPromptCacheHintTrackerTests.swift
@@ -0,0 +1,108 @@
+import CoreGraphics
+import XCTest
+@testable import Cotabby
+
+/// Tests for `LlamaPromptCacheHintTracker`, the conservative byte-prefix hint the llama engine
+/// passes into the runtime to reuse KV state across keystrokes. Pure-function and deterministic:
+/// the tracker only advertises reuse for the same focused field and sampling fingerprint.
+final class LlamaPromptCacheHintTrackerTests: XCTestCase {
+
+    // MARK: - cache hints
+
+    func test_cacheHint_nilBeforeSuccessfulRequestIsRecorded() {
+        var tracker = LlamaPromptCacheHintTracker()
+
+        XCTAssertNil(tracker.cachedPrefixBytes(for: makeRequest(prompt: "hello")))
+    }
+
+    func test_cacheHint_returnsCommonPrefixBytesForSameFocusedField() {
+        var tracker = LlamaPromptCacheHintTracker()
+        tracker.recordSuccessfulRequest(makeRequest(prompt: "hello"))
+
+        XCTAssertEqual(
+            tracker.cachedPrefixBytes(for: makeRequest(prompt: "hello!")),
+            "hello".utf8.count
+        )
+    }
+
+    func test_cacheHint_invalidatesWhenFocusedFieldChanges() {
+        var tracker = LlamaPromptCacheHintTracker()
+        tracker.recordSuccessfulRequest(makeRequest(prompt: "hello", elementIdentifier: "field-a"))
+
+        XCTAssertNil(
+            tracker.cachedPrefixBytes(for: makeRequest(prompt: "hello!", elementIdentifier: "field-b"))
+        )
+    }
+
+    func test_cacheHint_prefersStableInputFrameOverUnstableElementIdentifier() {
+        var tracker = LlamaPromptCacheHintTracker()
+        let fieldFrame = CGRect(x: 10, y: 20, width: 300, height: 44)
+        tracker.recordSuccessfulRequest(
+            makeRequest(prompt: "hello", elementIdentifier: "field-a", inputFrameRect: fieldFrame)
+        )
+
+        XCTAssertEqual(
+            tracker.cachedPrefixBytes(
+                for: makeRequest(prompt: "hello!", elementIdentifier: "field-b", inputFrameRect: fieldFrame)
+            ),
+            "hello".utf8.count
+        )
+    }
+
+    func test_cacheHint_invalidatesWhenSamplingFingerprintChanges() {
+        var tracker = LlamaPromptCacheHintTracker()
+        tracker.recordSuccessfulRequest(makeRequest(prompt: "hello", topK: 20))
+
+        XCTAssertNil(tracker.cachedPrefixBytes(for: makeRequest(prompt: "hello!", topK: 40)))
+    }
+
+    // MARK: - helpers
+
+    private func makeRequest(
+        prompt: String,
+        elementIdentifier: String = "field",
+        topK: Int = 20,
+        inputFrameRect: CGRect? = nil
+    ) -> SuggestionRequest {
+        let snapshot = FocusedInputSnapshot(
+            applicationName: "TestApp",
+            bundleIdentifier: "com.example.TestApp",
+            processIdentifier: 123,
+            elementIdentifier: elementIdentifier,
+            role: "AXTextField",
+            subrole: nil,
+            caretRect: .zero,
+            inputFrameRect: inputFrameRect,
+            caretSource: "test",
+            caretQuality: .exact,
+            observedCharWidth: nil,
+            precedingText: prompt,
+            trailingText: "",
+            selection: NSRange(location: prompt.count, length: 0),
+            isSecure: false
+        )
+        let context = FocusedInputContext(snapshot: snapshot, generation: 1)
+
+        return SuggestionRequest(
+            context: context,
+            prefixText: prompt,
+            prompt: prompt,
+            generation: context.generation,
+            maxPredictionTokens: 8,
+            temperature: 0.1,
+            topK: topK,
+            topP: 0.7,
+            minP: 0.08,
+            repetitionPenalty: 1.05,
+            randomSeed: 42,
+            maxSuffixCharacters: 192,
+            completionLengthInstruction: "Return only the next few words.",
+            userName: nil,
+            customRules: [],
+            languageInstruction: nil,
+            clipboardContext: nil,
+            visualContextSummary: nil,
+            isMultiLineEnabled: false
+        )
+    }
+}
diff --git a/CotabbyTests/LlamaPromptRendererTests.swift b/CotabbyTests/LlamaPromptRendererTests.swift
deleted file mode 100644
index 1ac9b039..00000000
--- a/CotabbyTests/LlamaPromptRendererTests.swift
+++ /dev/null
@@ -1,240 +0,0 @@
-import CoreGraphics
-import XCTest
-@testable import Cotabby
-
-/// Tests for the prompt-rendering boundary between DECIDE and GENERATE.
-///
-/// These are pure-function tests — no mocks, no I/O. The whole point of
-/// LlamaPromptRenderer is that given the same inputs, it returns the exact
-/// same string, so every assertion here is deterministic.
-final class LlamaPromptRendererTests: XCTestCase {
-
-    // MARK: - cache hints
-
-    func test_cacheHint_nilBeforeSuccessfulRequestIsRecorded() {
-        var tracker = LlamaPromptCacheHintTracker()
-
-        XCTAssertNil(tracker.cachedPrefixBytes(for: makeRequest(prompt: "hello")))
-    }
-
-    func test_cacheHint_returnsCommonPrefixBytesForSameFocusedField() {
-        var tracker = LlamaPromptCacheHintTracker()
-        tracker.recordSuccessfulRequest(makeRequest(prompt: "hello"))
-
-        XCTAssertEqual(
-            tracker.cachedPrefixBytes(for: makeRequest(prompt: "hello!")),
-            "hello".utf8.count
-        )
-    }
-
-    func test_cacheHint_invalidatesWhenFocusedFieldChanges() {
-        var tracker = LlamaPromptCacheHintTracker()
-        tracker.recordSuccessfulRequest(makeRequest(prompt: "hello", elementIdentifier: "field-a"))
-
-        XCTAssertNil(
-            tracker.cachedPrefixBytes(for: makeRequest(prompt: "hello!", elementIdentifier: "field-b"))
-        )
-    }
-
-    func test_cacheHint_prefersStableInputFrameOverUnstableElementIdentifier() {
-        var tracker = LlamaPromptCacheHintTracker()
-        let fieldFrame = CGRect(x: 10, y: 20, width: 300, height: 44)
-        tracker.recordSuccessfulRequest(
-            makeRequest(prompt: "hello", elementIdentifier: "field-a", inputFrameRect: fieldFrame)
-        )
-
-        XCTAssertEqual(
-            tracker.cachedPrefixBytes(
-                for: makeRequest(prompt: "hello!", elementIdentifier: "field-b", inputFrameRect: fieldFrame)
-            ),
-            "hello".utf8.count
-        )
-    }
-
-    func test_cacheHint_invalidatesWhenSamplingFingerprintChanges() {
-        var tracker = LlamaPromptCacheHintTracker()
-        tracker.recordSuccessfulRequest(makeRequest(prompt: "hello", topK: 20))
-
-        XCTAssertNil(tracker.cachedPrefixBytes(for: makeRequest(prompt: "hello!", topK: 40)))
-    }
-
-    // MARK: - instruction prompt
-
-    /// The structural contract for local instruct models: stable task rules first, supporting
-    /// context in the middle, then a late length cue right before the prefix the model must
-    /// continue. Losing one of these sections tends to degrade prompt-following without throwing.
-    func test_instructionPrompt_containsTaskScreenContextAndFinalInstruction() {
-        let prompt = LlamaPromptRenderer.prompt(
-            prefixText: "Once upon",
-            applicationName: "Messages",
-            completionLengthInstruction: "Keep completion short.",
-            userName: nil
-        )
-
-        XCTAssertTrue(prompt.contains("Task:"), "instruction prompt should include Task section")
-        XCTAssertTrue(
-            prompt.contains("Screen context:"),
-            "instruction prompt should include Screen context section"
-        )
-        XCTAssertTrue(
-            prompt.contains("Final instruction:"),
-            "instruction prompt should include a late final instruction section"
-        )
-        XCTAssertTrue(prompt.contains("Text before caret:"), "instruction prompt should include the prefix header")
-    }
-
-    func test_instructionPrompt_includesApplicationNameAndPrefix() {
-        let prompt = LlamaPromptRenderer.prompt(
-            prefixText: "My prefix text here",
-            applicationName: "Slack",
-            completionLengthInstruction: "Short.",
-            userName: nil
-        )
-
-        XCTAssertTrue(prompt.contains("User is on Slack."))
-        XCTAssertTrue(prompt.contains("My prefix text here"))
-    }
-
-    /// Length is enforced by the token budget, not by an in-prompt word range, so the
-    /// completion-length cue must never reach the local-model prompt even if a caller passes one.
-    func test_instructionPrompt_omitsCompletionLengthInstruction() {
-        // Experiment: the local-model prompt no longer carries the word-range cue; length is
-        // governed solely by the token budget. The cue must not leak into the prompt even when a
-        // caller still passes one.
-        let prompt = LlamaPromptRenderer.prompt(
-            prefixText: "PREFIX_BODY_XYZ",
-            applicationName: "App",
-            completionLengthInstruction: "UNIQUE_LENGTH_MARKER_7_TO_12_WORDS",
-            userName: nil
-        )
-
-        XCTAssertFalse(prompt.contains("UNIQUE_LENGTH_MARKER_7_TO_12_WORDS"))
-
-        guard let finalInstructionRange = prompt.range(of: "Final instruction:"),
-              let prefixRange = prompt.range(of: "PREFIX_BODY_XYZ") else {
-            XCTFail("Expected final instruction header and prefix in the prompt")
-            return
-        }
-
-        XCTAssertLessThan(finalInstructionRange.lowerBound, prefixRange.lowerBound)
-    }
-
-    func test_instructionPrompt_includesProfileContextWhenProvided() {
-        let prompt = LlamaPromptRenderer.prompt(
-            prefixText: "x",
-            applicationName: "App",
-            completionLengthInstruction: "Short.",
-            userName: "UNIQUE_NAME_MARKER_ZQRT"
-        )
-
-        XCTAssertTrue(prompt.contains("UNIQUE_NAME_MARKER_ZQRT"),
-                      "instruction prompt should carry user-provided profile name")
-    }
-
-    /// The prefix remains the last payload in the prompt so the model still ends on the actual
-    /// text it must continue, even though the length cue is moved later in the prompt.
-    func test_instructionPrompt_prefixAppearsAfterScreenContextAndEndsPrompt() {
-        let prompt = LlamaPromptRenderer.prompt(
-            prefixText: "PREFIX_BODY_XYZ",
-            applicationName: "App",
-            completionLengthInstruction: "Short.",
-            userName: nil
-        )
-
-        guard let contextRange = prompt.range(of: "Screen context:"),
-              let prefixRange = prompt.range(of: "PREFIX_BODY_XYZ") else {
-            XCTFail("Expected both Screen context: and PREFIX_BODY_XYZ in the prompt")
-            return
-        }
-
-        XCTAssertLessThan(contextRange.lowerBound, prefixRange.lowerBound,
-                          "prefix must appear after the Screen context header")
-        XCTAssertTrue(prompt.hasSuffix("PREFIX_BODY_XYZ"))
-    }
-
-    func test_instructionPrompt_includesVisualContextSummaryWhenProvided() {
-        let prompt = LlamaPromptRenderer.prompt(
-            prefixText: "PREFIX",
-            applicationName: "App",
-            completionLengthInstruction: "Short.",
-            userName: nil,
-            visualContextSummary: "A window describing a cat."
-        )
-
-        XCTAssertTrue(prompt.contains("Screen content:"))
-        XCTAssertTrue(prompt.contains("A window describing a cat."))
-    }
-
-    func test_instructionPrompt_includesClipboardContextWhenProvided() {
-        let prompt = LlamaPromptRenderer.prompt(
-            prefixText: "PREFIX",
-            applicationName: "App",
-            completionLengthInstruction: "Short.",
-            userName: nil,
-            clipboardContext: "UNIQUE_CLIPBOARD_MARKER"
-        )
-
-        XCTAssertTrue(prompt.contains("User's clipboard:"))
-        XCTAssertTrue(prompt.contains("UNIQUE_CLIPBOARD_MARKER"))
-    }
-
-    func test_instructionPrompt_omitsVisualContextSummaryWhenNil() {
-        let prompt = LlamaPromptRenderer.prompt(
-            prefixText: "PREFIX",
-            applicationName: "App",
-            completionLengthInstruction: "Short.",
-            userName: nil,
-            visualContextSummary: nil
-        )
-
-        XCTAssertFalse(prompt.contains("Screen content:"))
-    }
-
-    private func makeRequest(
-        prompt: String,
-        elementIdentifier: String = "field",
-        topK: Int = 20,
-        inputFrameRect: CGRect? = nil
-    ) -> SuggestionRequest {
-        let snapshot = FocusedInputSnapshot(
-            applicationName: "TestApp",
-            bundleIdentifier: "com.example.TestApp",
-            processIdentifier: 123,
-            elementIdentifier: elementIdentifier,
-            role: "AXTextField",
-            subrole: nil,
-            caretRect: .zero,
-            inputFrameRect: inputFrameRect,
-            caretSource: "test",
-            caretQuality: .exact,
-            observedCharWidth: nil,
-            precedingText: prompt,
-            trailingText: "",
-            selection: NSRange(location: prompt.count, length: 0),
-            isSecure: false
-        )
-        let context = FocusedInputContext(snapshot: snapshot, generation: 1)
-
-        return SuggestionRequest(
-            context: context,
-            prefixText: prompt,
-            prompt: prompt,
-            generation: context.generation,
-            maxPredictionTokens: 8,
-            temperature: 0.1,
-            topK: topK,
-            topP: 0.7,
-            minP: 0.08,
-            repetitionPenalty: 1.05,
-            randomSeed: 42,
-            maxSuffixCharacters: 192,
-            completionLengthInstruction: "Return only the next few words.",
-            userName: nil,
-            customRules: [],
-            languageInstruction: nil,
-            clipboardContext: nil,
-            visualContextSummary: nil,
-            isMultiLineEnabled: false
-        )
-    }
-}
diff --git a/CotabbyTests/ModelAndPresentationValueTests.swift b/CotabbyTests/ModelAndPresentationValueTests.swift
index fd576f8a..013fc491 100644
--- a/CotabbyTests/ModelAndPresentationValueTests.swift
+++ b/CotabbyTests/ModelAndPresentationValueTests.swift
@@ -147,12 +147,12 @@ final class RuntimeAndInputModelValueTests: XCTestCase {
 
     func test_runtimeModelCatalogMapsKnownNamesAndLeavesCustomNamesAlone() {
         XCTAssertEqual(
-            RuntimeModelCatalog.displayName(for: "Qwen3-0.6B-Q4_K_M.gguf"),
-            "tabby-1-mini"
+            RuntimeModelCatalog.displayName(for: "Qwen3.5-2B-Base.i1-Q4_K_M.gguf"),
+            "tabby-2-base"
         )
         XCTAssertEqual(
-            RuntimeModelCatalog.displayName(for: "gemma-4-E2B-it-Q4_K_M.gguf"),
-            "tabby-1-base"
+            RuntimeModelCatalog.displayName(for: "Qwen3.5-0.8B-Base.i1-Q6_K.gguf"),
+            "tabby-2-mini"
         )
         // Retired models fall back to their raw filename like any unknown local GGUF.
         XCTAssertEqual(
diff --git a/CotabbyTests/OnboardingTemplateRecommenderTests.swift b/CotabbyTests/OnboardingTemplateRecommenderTests.swift
index 2385debb..1ede3f37 100644
--- a/CotabbyTests/OnboardingTemplateRecommenderTests.swift
+++ b/CotabbyTests/OnboardingTemplateRecommenderTests.swift
@@ -46,9 +46,9 @@ final class OnboardingTemplateRecommenderTests: XCTestCase {
 
     func testOpenSourceTiersMapToTheirLocalModels() {
         let expected: [OnboardingTemplate: String] = [
-            .quick: "SmolLM2-135M-Instruct-q8_0.gguf",
-            .everyday: "gemma-4-E2B-it-Q4_K_M.gguf",
-            .powerful: "gemma-4-E4B-it-Q4_K_M.gguf"
+            .quick: "Qwen3.5-0.8B-Base.i1-Q6_K.gguf",
+            .everyday: "Qwen3.5-2B-Base.i1-Q4_K_M.gguf",
+            .powerful: "Qwen3.5-4B-Base.i1-Q4_K_M.gguf"
         ]
         for (template, filename) in expected {
             let plan = OnboardingTemplateRecommender.resolvePlan(for: template, engine: .llamaOpenSource)
diff --git a/CotabbyTests/SuggestionRequestFactoryTests.swift b/CotabbyTests/SuggestionRequestFactoryTests.swift
index 9b635a10..eb8e7642 100644
--- a/CotabbyTests/SuggestionRequestFactoryTests.swift
+++ b/CotabbyTests/SuggestionRequestFactoryTests.swift
@@ -239,7 +239,7 @@ final class SuggestionRequestFactoryTests: XCTestCase {
         )
 
         XCTAssertEqual(result.request.clipboardContext, "Copied project notes.")
-        XCTAssertTrue(result.promptPreview.contains("User's clipboard:"))
+        XCTAssertTrue(result.promptPreview.contains("On the clipboard:"))
         XCTAssertTrue(result.promptPreview.contains("Copied project notes."))
     }
 
@@ -272,7 +272,7 @@ final class SuggestionRequestFactoryTests: XCTestCase {
         )
 
         XCTAssertNil(result.request.clipboardContext)
-        XCTAssertFalse(result.promptPreview.contains("User's clipboard:"))
+        XCTAssertFalse(result.promptPreview.contains("On the clipboard:"))
         XCTAssertFalse(result.promptPreview.contains("Copied project notes."))
     }