FuJacob · FuJacob · Jun 1, 2026 · May 31, 2026 · Jun 1, 2026 · Jun 1, 2026
diff --git a/Cotabby.xcodeproj/project.pbxproj b/Cotabby.xcodeproj/project.pbxproj
diff --git a/Cotabby/Models/LlamaRuntimeModels.swift b/Cotabby/Models/LlamaRuntimeModels.swift
@@ -201,6 +201,15 @@ struct LlamaGenerationOptions: Equatable, Sendable {
     let repetitionPenalty: Double
     var seed: UInt32?
 
+    /// Masks line-break tokens so single-line fields never receive a multi-line completion.
+    var singleLine: Bool = false
+    /// Constrains the first generated token to continue the current word (mid-word carets only).
+    var forceWordContinuation: Bool = false
+
+    /// Average per-token log-probability below which a completion is suppressed as low-confidence.
+    /// Defaults to -infinity, which disables suppression entirely.
+    var confidenceFloor: Double = -.infinity
+
     static func summary(maxPredictionTokens: Int, temperature: Double) -> LlamaGenerationOptions {
         LlamaGenerationOptions(
             maxPredictionTokens: maxPredictionTokens,

diff --git a/Cotabby/Services/Runtime/LlamaRuntimeCore.swift b/Cotabby/Services/Runtime/LlamaRuntimeCore.swift
@@ -190,6 +190,7 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable {
 
         var generatedText = ""
         var tokensGenerated = 0
+        var sumLogprob = 0.0
         var stopReason = "budget_exhausted"
 
         for _ in 0 ..< options.maxPredictionTokens {
@@ -216,6 +217,7 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable {
             let piece = Self.extractPiece(result)
             generatedText += piece
             tokensGenerated += 1
+            sumLogprob += Double(result.logprob)
         }
 
         CotabbyLogger.runtime.debug(
@@ -228,6 +230,23 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable {
             ]
         )
 
+        // Confidence suppression: drop completions the model itself was unsure about. Disabled by
+        // default (confidenceFloor == -infinity); the KV-trim defer above still runs on early return.
+        if tokensGenerated > 0,
+           ConfidenceSuppressionPolicy.shouldSuppress(
+               averageLogprob: sumLogprob / Double(tokensGenerated),
+               floor: options.confidenceFloor
+           ) {
+            CotabbyLogger.runtime.debug(
+                "Suppressed low-confidence completion",
+                metadata: [
+                    "tokens_generated": .stringConvertible(tokensGenerated),
+                    "avg_logprob": .stringConvertible(sumLogprob / Double(tokensGenerated))
+                ]
+            )
+            return ""
+        }
+
         return generatedText
     }
 
@@ -387,6 +406,9 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable {
 
                     let remaining = Array(promptTokens[reusableTokenCount...])
                     if !remaining.isEmpty {
+                        // Seed for the reuse path is sampled at the end of this decodePrompt; apply
+                        // the word-continuation constraint to it just like the fresh path does.
+                        engine.setForceWordContinuation(autocompleteSequenceID, options.forceWordContinuation)
                         var mutableRemaining = remaining
                         let status = engine.decodePrompt(
                             autocompleteSequenceID,
@@ -423,6 +445,10 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable {
             throw LlamaRuntimeError.generationFailed("Unable to create inference sequence.")
         }
 
+        // The engine samples the first (seed) token at the end of decodePrompt, so set the
+        // word-continuation constraint here, before decoding.
+        engine.setForceWordContinuation(seqID, options.forceWordContinuation)
+
         var tokens = promptTokens
         let status = engine.decodePrompt(seqID, &tokens, Int32(tokens.count), 0)
         guard status == .ok else {
@@ -460,7 +486,8 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable {
             top_p: Float(options.topP),
             min_p: Float(options.minP),
             repetition_penalty: Float(options.repetitionPenalty),
-            seed: options.seed ?? 0
+            seed: options.seed ?? 0,
+            single_line: options.singleLine
         )
     }
 

diff --git a/Cotabby/Services/Runtime/LlamaSuggestionEngine.swift b/Cotabby/Services/Runtime/LlamaSuggestionEngine.swift
@@ -45,7 +45,12 @@ final class LlamaSuggestionEngine {
                     topP: request.topP,
                     minP: request.minP,
                     repetitionPenalty: request.repetitionPenalty,
-                    seed: request.randomSeed
+                    seed: request.randomSeed,
+                    singleLine: !request.isMultiLineEnabled,
+                    forceWordContinuation: MidWordContinuationPolicy.shouldForceContinuation(
+                        precedingText: request.context.precedingText,
+                        trailingText: request.context.trailingText
+                    )
                 )
             )
             try Task.checkCancellation()

diff --git a/Cotabby/Support/ConfidenceSuppressionPolicy.swift b/Cotabby/Support/ConfidenceSuppressionPolicy.swift
@@ -0,0 +1,21 @@
+import Foundation
+
+/// File overview:
+/// Decides whether a completion is too low-confidence to show, based on the model's own
+/// per-token log-probabilities.
+///
+/// Why this file exists:
+/// The guiding principle is that a suppressed completion beats a wrong one. The engine now reports
+/// a per-token log-probability, so we can drop completions the model itself was unsure about
+/// instead of showing a confident-looking guess. The policy is pure and isolated so the threshold
+/// is easy to test and tune. A floor of negative infinity (the default) disables suppression, so
+/// this is a no-op until a caller opts in by raising the floor.
+enum ConfidenceSuppressionPolicy {
+    /// Suppress when the completion's average per-token log-probability is below `floor`.
+    static func shouldSuppress(averageLogprob: Double, floor: Double) -> Bool {
+        guard floor > -.infinity else {
+            return false
+        }
+        return averageLogprob < floor
+    }
+}
diff --git a/Cotabby/Support/InsertionSafetyGate.swift b/Cotabby/Support/InsertionSafetyGate.swift
@@ -0,0 +1,41 @@
+import Foundation
+
+/// File overview:
+/// Rejects completions that are technically non-empty but would insert nothing a user wants.
+///
+/// Why this file exists:
+/// `SuggestionInserter` previously only refused a fully empty string, so a completion carrying an
+/// interior control character or a U+FFFD replacement glyph (from lossy detokenization) could reach
+/// ghost text and be committed on Tab. This gate is the single predicate for "is this safe to put
+/// on screen and insert."
+///
+/// Scope note: this intentionally does NOT reject punctuation-only output. A lone ")", ".", or "?"
+/// is a legitimate inline completion (closing a bracket, ending a sentence), so judging punctuation
+/// here would suppress useful suggestions. The gate is limited to unambiguous junk.
+enum InsertionSafetyGate {
+    /// Returns true when `completion` is safe to display and insert.
+    static func isSafeToInsert(_ completion: String) -> Bool {
+        guard !completion.isEmpty else {
+            return false
+        }
+
+        var sawNonWhitespace = false
+        for scalar in completion.unicodeScalars {
+            // Replacement character: the detokenizer produced bytes it could not decode. Never text.
+            if scalar == "\u{FFFD}" {
+                return false
+            }
+            // C0 control range and DEL. Newlines are already handled upstream; an interior tab or
+            // other control character is corruption, not content.
+            if scalar.value < 0x20 || scalar.value == 0x7F {
+                return false
+            }
+            if !CharacterSet.whitespacesAndNewlines.contains(scalar) {
+                sawNonWhitespace = true
+            }
+        }
+
+        // Whitespace-only output is not a completion.
+        return sawNonWhitespace
+    }
+}
diff --git a/Cotabby/Support/MidWordContinuationPolicy.swift b/Cotabby/Support/MidWordContinuationPolicy.swift
@@ -0,0 +1,27 @@
+import Foundation
+
+/// File overview:
+/// Decides whether the first generated token should be constrained to continue the current word.
+///
+/// Why this file exists:
+/// The engine can force the first sampled token to be a word continuation (no leading whitespace),
+/// which heals mid-word completions. But forcing it at a normal word boundary would break the
+/// common "predict the next word" case, where a leading space is exactly what we want. This policy
+/// keeps the trigger deliberately narrow: it only fires when the caret sits strictly inside a word
+/// (a word character on both sides). At a word end (nothing or a non-word character after the
+/// caret) it returns false so ordinary next-word predictions are untouched.
+enum MidWordContinuationPolicy {
+    static func shouldForceContinuation(precedingText: String, trailingText: String) -> Bool {
+        guard let before = precedingText.last, isWordCharacter(before) else {
+            return false
+        }
+        guard let after = trailingText.first, isWordCharacter(after) else {
+            return false
+        }
+        return true
+    }
+
+    private static func isWordCharacter(_ character: Character) -> Bool {
+        character.isLetter || character.isNumber
+    }
+}
diff --git a/Cotabby/Support/SentenceBoundaryClassifier.swift b/Cotabby/Support/SentenceBoundaryClassifier.swift
@@ -0,0 +1,65 @@
+import Foundation
+
+/// File overview:
+/// Decides whether a period actually ends a sentence, so phrase-level acceptance does not stop
+/// early on decimals, list numbers, single-letter initials, or common abbreviations.
+///
+/// Why this file exists:
+/// Phrase acceptance treats any `.` as a sentence terminator. That breaks "version 1.2", "U.S.",
+/// "e.g.", and a numbered "1." mid-tail. A purely structural scanner cannot resolve every case, but
+/// it can resolve the frequent ones with a few local rules. `!` and `?` are always terminal and do
+/// not need this; only the period is ambiguous.
+enum SentenceBoundaryClassifier {
+    /// Lowercased abbreviations whose trailing period is part of the word, not a sentence end.
+    private static let abbreviations: Set<String> = [
+        "mr", "mrs", "ms", "dr", "st", "vs", "eg", "ie", "etc", "no", "fig", "approx", "inc", "ltd"
+    ]
+
+    /// Whether the period at `periodIndex` in `text` ends a sentence. The caller guarantees that
+    /// `text[periodIndex]` is ".".
+    static func isTerminalPeriod(in text: String, at periodIndex: String.Index) -> Bool {
+        guard periodIndex > text.startIndex else {
+            // A leading period has no preceding word to qualify it; treat it as terminal so behavior
+            // matches the previous unconditional rule for this edge.
+            return true
+        }
+
+        let beforeIndex = text.index(before: periodIndex)
+        let beforeChar = text[beforeIndex]
+
+        // Decimals, version numbers, and list/ordinal markers ("1.", "3.14") are not sentence ends.
+        if beforeChar.isNumber {
+            return false
+        }
+
+        if beforeChar.isLetter {
+            // Single-letter initial ("U.", the "S." in "U.S."): the letter stands alone, with a
+            // non-letter (or nothing) before it.
+            let priorIsLetter = beforeIndex > text.startIndex && text[text.index(before: beforeIndex)].isLetter
+            if !priorIsLetter {
+                return false
+            }
+            // Known abbreviation ending in a period.
+            if abbreviations.contains(trailingLetters(in: text, endingBefore: periodIndex).lowercased()) {
+                return false
+            }
+        }
+
+        return true
+    }
+
+    /// The run of letters in `text` ending just before `index`.
+    private static func trailingLetters(in text: String, endingBefore index: String.Index) -> String {
+        var letters: [Character] = []
+        var cursor = index
+        while cursor > text.startIndex {
+            let previous = text.index(before: cursor)
+            guard text[previous].isLetter else {
+                break
+            }
+            letters.append(text[previous])
+            cursor = previous
+        }
+        return String(letters.reversed())
+    }
+}
diff --git a/Cotabby/Support/SuggestionSessionReconciler.swift b/Cotabby/Support/SuggestionSessionReconciler.swift
@@ -270,10 +270,11 @@ enum SuggestionSessionReconciler {
     /// quoted-prose case (`"done." Next` → stop after the closing quote). Without the walk-back,
     /// the chunk's last character would be `"` rather than `.` and phrase mode would over-accept
     /// the next sentence. Token-interior punctuation like the dots in `U.S.A` does NOT trigger
-    /// an early break because the chunk's tail (after walking) is `A`, not `.`. The known
-    /// false-positive is when the tail itself ends with `U.S.A.` — the trailing period reads as
-    /// a sentence terminator and the user has to press once more for the next phrase. Rule-based
-    /// scanners can't disambiguate that without NLP; Cursor and Copilot behave the same way.
+    /// an early break because the chunk's tail (after walking) is `A`, not `.`. Periods are further
+    /// disambiguated by `SentenceBoundaryClassifier`, so decimals ("1.2"), list numbers ("1."),
+    /// single-letter initials, and common abbreviations ("e.g.", "U.S.") do not end a phrase. Truly
+    /// ambiguous cases (a real sentence ending in an abbreviation) lean toward continuing, which is
+    /// the safe default for phrase acceptance.
     ///
     /// The `autoAcceptTrailingPunctuation` flag is passed through to each underlying chunk call
     /// but does not change the final phrase output: a tail like `you?` with the flag off yields
@@ -333,7 +334,16 @@ enum SuggestionSessionReconciler {
             return false
         }
         let prev = text.index(before: index)
-        return text[prev].isPhraseSentenceTerminator
+        guard text[prev].isPhraseSentenceTerminator else {
+            return false
+        }
+        // `!` and `?` always end a sentence. A period is ambiguous: decimals, list/ordinal numbers,
+        // single-letter initials, and common abbreviations are not sentence ends, so consult the
+        // classifier rather than treating every "." as terminal.
+        if text[prev] == "." {
+            return SentenceBoundaryClassifier.isTerminalPeriod(in: text, at: prev)
+        }
+        return true
     }
 
     /// Returns the index just past a word token's final alphanumeric character when that token has

diff --git a/Cotabby/Support/SuggestionTextNormalizer.swift b/Cotabby/Support/SuggestionTextNormalizer.swift
@@ -20,14 +20,9 @@ enum SuggestionTextNormalizer {
         normalized = normalized.replacingOccurrences(of: "<|im_end|>", with: "")
         normalized = normalized.replacingOccurrences(of: "<|im_start|>", with: "")
 
-        // Thinking-capable models may emit <think>…</think> reasoning blocks. Strip complete
-        // blocks first, then any trailing open tag left when generation hit the token limit.
-        if let thinkRange = normalized.range(of: "<think>[\\s\\S]*?</think>", options: .regularExpression) {
-            normalized.replaceSubrange(thinkRange, with: "")
-        }
-        if let openTag = normalized.range(of: "<think>[\\s\\S]*", options: .regularExpression) {
-            normalized.replaceSubrange(openTag, with: "")
-        }
+        // Thinking-capable models may emit <think>…</think> reasoning blocks. Strip them here so
+        // the reasoning text never reaches the continuation logic below.
+        normalized = stripThinkBlocks(normalized)
 
         for prompt in [request.prompt] + promptEchoCandidates {
             if !prompt.isEmpty, normalized.hasPrefix(prompt) {
@@ -82,8 +77,10 @@ enum SuggestionTextNormalizer {
         // If the model starts by repeating text that already exists after the caret, we treat the
         // suggestion as unusable. Showing only the remainder often produces confusing mid-word
         // ghosts, so the coordinator should regenerate instead.
-        if !request.context.trailingText.isEmpty,
-            normalized.hasPrefix(request.context.trailingText) {
+        if TrailingDuplicationFilter.duplicatesTrailingText(
+            normalized,
+            trailingText: request.context.trailingText
+        ) {
             return ""
         }
 
@@ -104,9 +101,29 @@ enum SuggestionTextNormalizer {
             normalized = String(normalized.drop(while: { $0.isWhitespace }))
         }
 
+        // Final safety gate: never surface control characters, replacement glyphs, or
+        // whitespace-only output as ghost text. Returning empty makes the coordinator treat this
+        // as "no suggestion" and regenerate rather than insert junk on Tab.
+        guard InsertionSafetyGate.isSafeToInsert(normalized) else {
+            return ""
+        }
+
         return normalized
     }
 
+    /// Removes `<think>…</think>` reasoning blocks: complete blocks first, then any dangling open
+    /// tag left when generation hit the token limit before the block was closed.
+    private static func stripThinkBlocks(_ text: String) -> String {
+        var result = text
+        if let complete = result.range(of: "<think>[\\s\\S]*?</think>", options: .regularExpression) {
+            result.replaceSubrange(complete, with: "")
+        }
+        if let dangling = result.range(of: "<think>[\\s\\S]*", options: .regularExpression) {
+            result.replaceSubrange(dangling, with: "")
+        }
+        return result
+    }
+
     /// Finds the longest suffix of `precedingText` (at any word offset) that matches a prefix
     /// of `suggestion`, then strips that overlap. Returns empty if the entire suggestion is echoed.
     ///