Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 41 additions & 1 deletion Cotabby.xcodeproj/project.pbxproj

Large diffs are not rendered by default.

9 changes: 9 additions & 0 deletions Cotabby/Models/LlamaRuntimeModels.swift
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,15 @@ struct LlamaGenerationOptions: Equatable, Sendable {
let repetitionPenalty: Double
var seed: UInt32?

/// Masks line-break tokens so single-line fields never receive a multi-line completion.
var singleLine: Bool = false
/// Constrains the first generated token to continue the current word (mid-word carets only).
var forceWordContinuation: Bool = false

/// Average per-token log-probability below which a completion is suppressed as low-confidence.
/// Defaults to -infinity, which disables suppression entirely.
var confidenceFloor: Double = -.infinity

static func summary(maxPredictionTokens: Int, temperature: Double) -> LlamaGenerationOptions {
LlamaGenerationOptions(
maxPredictionTokens: maxPredictionTokens,
Expand Down
29 changes: 28 additions & 1 deletion Cotabby/Services/Runtime/LlamaRuntimeCore.swift
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable {

var generatedText = ""
var tokensGenerated = 0
var sumLogprob = 0.0
var stopReason = "budget_exhausted"

for _ in 0 ..< options.maxPredictionTokens {
Expand All @@ -216,6 +217,7 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable {
let piece = Self.extractPiece(result)
generatedText += piece
tokensGenerated += 1
sumLogprob += Double(result.logprob)
}

CotabbyLogger.runtime.debug(
Expand All @@ -228,6 +230,23 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable {
]
)

// Confidence suppression: drop completions the model itself was unsure about. Disabled by
// default (confidenceFloor == -infinity); the KV-trim defer above still runs on early return.
if tokensGenerated > 0,
ConfidenceSuppressionPolicy.shouldSuppress(
averageLogprob: sumLogprob / Double(tokensGenerated),
floor: options.confidenceFloor
) {
CotabbyLogger.runtime.debug(
"Suppressed low-confidence completion",
metadata: [
"tokens_generated": .stringConvertible(tokensGenerated),
"avg_logprob": .stringConvertible(sumLogprob / Double(tokensGenerated))
]
)
return ""
}

return generatedText
}

Expand Down Expand Up @@ -387,6 +406,9 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable {

let remaining = Array(promptTokens[reusableTokenCount...])
if !remaining.isEmpty {
// Seed for the reuse path is sampled at the end of this decodePrompt; apply
// the word-continuation constraint to it just like the fresh path does.
engine.setForceWordContinuation(autocompleteSequenceID, options.forceWordContinuation)
var mutableRemaining = remaining
let status = engine.decodePrompt(
autocompleteSequenceID,
Expand Down Expand Up @@ -423,6 +445,10 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable {
throw LlamaRuntimeError.generationFailed("Unable to create inference sequence.")
}

// The engine samples the first (seed) token at the end of decodePrompt, so set the
// word-continuation constraint here, before decoding.
engine.setForceWordContinuation(seqID, options.forceWordContinuation)

var tokens = promptTokens
let status = engine.decodePrompt(seqID, &tokens, Int32(tokens.count), 0)
guard status == .ok else {
Expand Down Expand Up @@ -460,7 +486,8 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable {
top_p: Float(options.topP),
min_p: Float(options.minP),
repetition_penalty: Float(options.repetitionPenalty),
seed: options.seed ?? 0
seed: options.seed ?? 0,
single_line: options.singleLine
)
}

Expand Down
7 changes: 6 additions & 1 deletion Cotabby/Services/Runtime/LlamaSuggestionEngine.swift
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,12 @@ final class LlamaSuggestionEngine {
topP: request.topP,
minP: request.minP,
repetitionPenalty: request.repetitionPenalty,
seed: request.randomSeed
seed: request.randomSeed,
singleLine: !request.isMultiLineEnabled,
forceWordContinuation: MidWordContinuationPolicy.shouldForceContinuation(
precedingText: request.context.precedingText,
trailingText: request.context.trailingText
)
)
)
try Task.checkCancellation()
Expand Down
21 changes: 21 additions & 0 deletions Cotabby/Support/ConfidenceSuppressionPolicy.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import Foundation

/// File overview:
/// Decides whether a completion is too low-confidence to show, based on the model's own
/// per-token log-probabilities.
///
/// Why this file exists:
/// The guiding principle is that a suppressed completion beats a wrong one. The engine now reports
/// a per-token log-probability, so we can drop completions the model itself was unsure about
/// instead of showing a confident-looking guess. The policy is pure and isolated so the threshold
/// is easy to test and tune. A floor of negative infinity (the default) disables suppression, so
/// this is a no-op until a caller opts in by raising the floor.
enum ConfidenceSuppressionPolicy {
/// Suppress when the completion's average per-token log-probability is below `floor`.
static func shouldSuppress(averageLogprob: Double, floor: Double) -> Bool {
guard floor > -.infinity else {
return false
}
return averageLogprob < floor
}
}
41 changes: 41 additions & 0 deletions Cotabby/Support/InsertionSafetyGate.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import Foundation

/// File overview:
/// Rejects completions that are technically non-empty but would insert nothing a user wants.
///
/// Why this file exists:
/// `SuggestionInserter` previously only refused a fully empty string, so a completion carrying an
/// interior control character or a U+FFFD replacement glyph (from lossy detokenization) could reach
/// ghost text and be committed on Tab. This gate is the single predicate for "is this safe to put
/// on screen and insert."
///
/// Scope note: this intentionally does NOT reject punctuation-only output. A lone ")", ".", or "?"
/// is a legitimate inline completion (closing a bracket, ending a sentence), so judging punctuation
/// here would suppress useful suggestions. The gate is limited to unambiguous junk.
enum InsertionSafetyGate {
/// Returns true when `completion` is safe to display and insert.
static func isSafeToInsert(_ completion: String) -> Bool {
guard !completion.isEmpty else {
return false
}

var sawNonWhitespace = false
for scalar in completion.unicodeScalars {
// Replacement character: the detokenizer produced bytes it could not decode. Never text.
if scalar == "\u{FFFD}" {
return false
}
// C0 control range and DEL. Newlines are already handled upstream; an interior tab or
// other control character is corruption, not content.
if scalar.value < 0x20 || scalar.value == 0x7F {
return false
}
if !CharacterSet.whitespacesAndNewlines.contains(scalar) {
sawNonWhitespace = true
}
}

// Whitespace-only output is not a completion.
return sawNonWhitespace
}
}
27 changes: 27 additions & 0 deletions Cotabby/Support/MidWordContinuationPolicy.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import Foundation

/// File overview:
/// Decides whether the first generated token should be constrained to continue the current word.
///
/// Why this file exists:
/// The engine can force the first sampled token to be a word continuation (no leading whitespace),
/// which heals mid-word completions. But forcing it at a normal word boundary would break the
/// common "predict the next word" case, where a leading space is exactly what we want. This policy
/// keeps the trigger deliberately narrow: it only fires when the caret sits strictly inside a word
/// (a word character on both sides). At a word end (nothing or a non-word character after the
/// caret) it returns false so ordinary next-word predictions are untouched.
enum MidWordContinuationPolicy {
static func shouldForceContinuation(precedingText: String, trailingText: String) -> Bool {
guard let before = precedingText.last, isWordCharacter(before) else {
return false
}
guard let after = trailingText.first, isWordCharacter(after) else {
return false
}
return true
}

private static func isWordCharacter(_ character: Character) -> Bool {
character.isLetter || character.isNumber
}
}
65 changes: 65 additions & 0 deletions Cotabby/Support/SentenceBoundaryClassifier.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import Foundation

/// File overview:
/// Decides whether a period actually ends a sentence, so phrase-level acceptance does not stop
/// early on decimals, list numbers, single-letter initials, or common abbreviations.
///
/// Why this file exists:
/// Phrase acceptance treats any `.` as a sentence terminator. That breaks "version 1.2", "U.S.",
/// "e.g.", and a numbered "1." mid-tail. A purely structural scanner cannot resolve every case, but
/// it can resolve the frequent ones with a few local rules. `!` and `?` are always terminal and do
/// not need this; only the period is ambiguous.
enum SentenceBoundaryClassifier {
/// Lowercased abbreviations whose trailing period is part of the word, not a sentence end.
private static let abbreviations: Set<String> = [
"mr", "mrs", "ms", "dr", "st", "vs", "eg", "ie", "etc", "no", "fig", "approx", "inc", "ltd"
]

/// Whether the period at `periodIndex` in `text` ends a sentence. The caller guarantees that
/// `text[periodIndex]` is ".".
static func isTerminalPeriod(in text: String, at periodIndex: String.Index) -> Bool {
guard periodIndex > text.startIndex else {
// A leading period has no preceding word to qualify it; treat it as terminal so behavior
// matches the previous unconditional rule for this edge.
return true
}

let beforeIndex = text.index(before: periodIndex)
let beforeChar = text[beforeIndex]

// Decimals, version numbers, and list/ordinal markers ("1.", "3.14") are not sentence ends.
if beforeChar.isNumber {
return false
}

if beforeChar.isLetter {
// Single-letter initial ("U.", the "S." in "U.S."): the letter stands alone, with a
// non-letter (or nothing) before it.
let priorIsLetter = beforeIndex > text.startIndex && text[text.index(before: beforeIndex)].isLetter
if !priorIsLetter {
return false
}
// Known abbreviation ending in a period.
if abbreviations.contains(trailingLetters(in: text, endingBefore: periodIndex).lowercased()) {
return false
}
}

return true
}

/// The run of letters in `text` ending just before `index`.
private static func trailingLetters(in text: String, endingBefore index: String.Index) -> String {
var letters: [Character] = []
var cursor = index
while cursor > text.startIndex {
let previous = text.index(before: cursor)
guard text[previous].isLetter else {
break
}
letters.append(text[previous])
cursor = previous
}
return String(letters.reversed())
}
}
20 changes: 15 additions & 5 deletions Cotabby/Support/SuggestionSessionReconciler.swift
Original file line number Diff line number Diff line change
Expand Up @@ -270,10 +270,11 @@ enum SuggestionSessionReconciler {
/// quoted-prose case (`"done." Next` → stop after the closing quote). Without the walk-back,
/// the chunk's last character would be `"` rather than `.` and phrase mode would over-accept
/// the next sentence. Token-interior punctuation like the dots in `U.S.A` does NOT trigger
/// an early break because the chunk's tail (after walking) is `A`, not `.`. The known
/// false-positive is when the tail itself ends with `U.S.A.` — the trailing period reads as
/// a sentence terminator and the user has to press once more for the next phrase. Rule-based
/// scanners can't disambiguate that without NLP; Cursor and Copilot behave the same way.
/// an early break because the chunk's tail (after walking) is `A`, not `.`. Periods are further
/// disambiguated by `SentenceBoundaryClassifier`, so decimals ("1.2"), list numbers ("1."),
/// single-letter initials, and common abbreviations ("e.g.", "U.S.") do not end a phrase. Truly
/// ambiguous cases (a real sentence ending in an abbreviation) lean toward continuing, which is
/// the safe default for phrase acceptance.
///
/// The `autoAcceptTrailingPunctuation` flag is passed through to each underlying chunk call
/// but does not change the final phrase output: a tail like `you?` with the flag off yields
Expand Down Expand Up @@ -333,7 +334,16 @@ enum SuggestionSessionReconciler {
return false
}
let prev = text.index(before: index)
return text[prev].isPhraseSentenceTerminator
guard text[prev].isPhraseSentenceTerminator else {
return false
}
// `!` and `?` always end a sentence. A period is ambiguous: decimals, list/ordinal numbers,
// single-letter initials, and common abbreviations are not sentence ends, so consult the
// classifier rather than treating every "." as terminal.
if text[prev] == "." {
return SentenceBoundaryClassifier.isTerminalPeriod(in: text, at: prev)
}
return true
}

/// Returns the index just past a word token's final alphanumeric character when that token has
Expand Down
37 changes: 27 additions & 10 deletions Cotabby/Support/SuggestionTextNormalizer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,9 @@ enum SuggestionTextNormalizer {
normalized = normalized.replacingOccurrences(of: "<|im_end|>", with: "")
normalized = normalized.replacingOccurrences(of: "<|im_start|>", with: "")

// Thinking-capable models may emit <think>…</think> reasoning blocks. Strip complete
// blocks first, then any trailing open tag left when generation hit the token limit.
if let thinkRange = normalized.range(of: "<think>[\\s\\S]*?</think>", options: .regularExpression) {
normalized.replaceSubrange(thinkRange, with: "")
}
if let openTag = normalized.range(of: "<think>[\\s\\S]*", options: .regularExpression) {
normalized.replaceSubrange(openTag, with: "")
}
// Thinking-capable models may emit <think>…</think> reasoning blocks. Strip them here so
// the reasoning text never reaches the continuation logic below.
normalized = stripThinkBlocks(normalized)

for prompt in [request.prompt] + promptEchoCandidates {
if !prompt.isEmpty, normalized.hasPrefix(prompt) {
Expand Down Expand Up @@ -82,8 +77,10 @@ enum SuggestionTextNormalizer {
// If the model starts by repeating text that already exists after the caret, we treat the
// suggestion as unusable. Showing only the remainder often produces confusing mid-word
// ghosts, so the coordinator should regenerate instead.
if !request.context.trailingText.isEmpty,
normalized.hasPrefix(request.context.trailingText) {
if TrailingDuplicationFilter.duplicatesTrailingText(
normalized,
trailingText: request.context.trailingText
) {
return ""
}

Expand All @@ -104,9 +101,29 @@ enum SuggestionTextNormalizer {
normalized = String(normalized.drop(while: { $0.isWhitespace }))
}

// Final safety gate: never surface control characters, replacement glyphs, or
// whitespace-only output as ghost text. Returning empty makes the coordinator treat this
// as "no suggestion" and regenerate rather than insert junk on Tab.
guard InsertionSafetyGate.isSafeToInsert(normalized) else {
return ""
}

return normalized
}

/// Removes `<think>…</think>` reasoning blocks: complete blocks first, then any dangling open
/// tag left when generation hit the token limit before the block was closed.
private static func stripThinkBlocks(_ text: String) -> String {
var result = text
if let complete = result.range(of: "<think>[\\s\\S]*?</think>", options: .regularExpression) {
result.replaceSubrange(complete, with: "")
}
if let dangling = result.range(of: "<think>[\\s\\S]*", options: .regularExpression) {
result.replaceSubrange(dangling, with: "")
}
return result
}

/// Finds the longest suffix of `precedingText` (at any word offset) that matches a prefix
/// of `suggestion`, then strips that overlap. Returns empty if the entire suggestion is echoed.
///
Expand Down
Loading