Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions Cotabby/Support/SuggestionTextNormalizer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,18 @@ enum SuggestionTextNormalizer {
// continuation that followed.
normalized = normalized.trimmingCharacters(in: .newlines)

// Backstop for prompt-scaffolding hallucination. Small instruct models sometimes parrot the
// prompt's section headers ("App:", "Text before caret:", "Continuation:") as the first
// thing they emit: sometimes as their own line, sometimes inline before the real text, and
// sometimes as labels the model invents that were never in our prompt at all. None of these
// are valid ghost text. Stripping a leading run of known labels runs before the single-line
// collapse so a model that stacks "Task:\nText before caret:\nreal continuation" still
// surfaces the real continuation instead of collapsing to the first label line. This is a
// best-effort catch, not the fix: the durable fix is feeding instruct models their own chat
// template so instructions never read as content in the first place.
normalized = stripLeadingScaffoldingLabels(normalized)
normalized = normalized.trimmingCharacters(in: .newlines)

if request.isMultiLineEnabled {
// Multi-line mode: keep content up to the first blank-line boundary (double newline)
// to prevent runaway paragraph generation while still allowing multi-line completions.
Expand Down Expand Up @@ -144,4 +156,47 @@ enum SuggestionTextNormalizer {
let afterLastEchoed = lastEchoedWord.endIndex
return String(suggestion[afterLastEchoed...])
}

/// Section-header labels Cotabby's prompts use, plus close variants small models tend to
/// hallucinate. Matching is anchored to this known set so legitimate user text that merely
/// contains a colon ("Note: buy milk", "TODO: ship it") is never treated as scaffolding.
/// Ordered longest-first at match time so "Text before the caret:" wins over "Text before".
private static let scaffoldingLabels: [String] = [
"Text before the caret:",
"Text before caret:",
"Text after the caret:",
"Text after caret:",
"User Profile Context:",
"Your style preferences:",
"Final instruction:",
"Screen context:",
"Screen content:",
"User's clipboard:",
"Continuation:",
"Application:",
"Task:",
"App:"
]

/// Removes a leading run of known prompt-scaffolding labels (see `scaffoldingLabels`), whether
/// each sits on its own line or inline before the continuation. Only labels at the very start
/// are stripped; a label appearing later in the text is left alone because by then it is far
/// more likely to be real user content than echoed scaffolding.
private static func stripLeadingScaffoldingLabels(_ text: String) -> String {
let labelsByLengthDescending = scaffoldingLabels.sorted { $0.count > $1.count }
var working = text
Comment on lines +185 to +187
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 The sorted call re-runs on every invocation of stripLeadingScaffoldingLabels because the property is a stored let but the sort is computed each time the function executes. Since scaffoldingLabels is already a static let, the sorted variant can also be a static let — it'll be computed once at class initialisation rather than once per suggestion normalisation call.

Suggested change
private static func stripLeadingScaffoldingLabels(_ text: String) -> String {
let labelsByLengthDescending = scaffoldingLabels.sorted { $0.count > $1.count }
var working = text
private static let labelsByLengthDescending = scaffoldingLabels.sorted { $0.count > $1.count }
private static func stripLeadingScaffoldingLabels(_ text: String) -> String {
var working = text

Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!

Fix in Codex Fix in Claude Code


while true {
// Look past leading whitespace/newlines to find the first real token. We only commit to
// dropping that whitespace if a label actually matches; otherwise `working` is returned
// untouched so the caller's existing leading-space handling still sees the original.
let leading = String(working.drop(while: { $0.isWhitespace }))
guard let label = labelsByLengthDescending.first(where: {
leading.range(of: $0, options: [.caseInsensitive, .anchored]) != nil
}) else {
return working
}
working = String(leading.dropFirst(label.count))
Comment thread
greptile-apps[bot] marked this conversation as resolved.
}
}
}
82 changes: 82 additions & 0 deletions CotabbyTests/SuggestionTextNormalizerTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -134,4 +134,86 @@ final class SuggestionTextNormalizerTests: XCTestCase {

XCTAssertEqual(normalized, "")
}

func test_normalize_stripsLeadingInlineScaffoldingLabel() {
// Caret sits right after a space, so the exposed leading space is dropped and the
// continuation surfaces cleanly without the echoed "Text before caret:" header.
let request = CotabbyTestFixtures.suggestionRequest(
prefixText: "I am ",
prompt: "PROMPT",
precedingText: "I am "
)

let normalized = SuggestionTextNormalizer.normalize(
"Text before caret: going to the store",
for: request
)

XCTAssertEqual(normalized, "going to the store")
}

func test_normalize_stripsHallucinatedAppLabel() {
let request = CotabbyTestFixtures.suggestionRequest(
prefixText: "send the ",
prompt: "PROMPT",
precedingText: "send the "
)

let normalized = SuggestionTextNormalizer.normalize(
"App: report by Friday",
for: request
)

XCTAssertEqual(normalized, "report by Friday")
}

func test_normalize_stripsStackedScaffoldingLabelLines() {
// Stacked labels across newlines must be peeled before the single-line collapse, otherwise
// the collapse would keep only the first label line ("Task:") and the real text would be
// lost.
let request = CotabbyTestFixtures.suggestionRequest(
prefixText: "The ",
prompt: "PROMPT",
precedingText: "The "
)

let normalized = SuggestionTextNormalizer.normalize(
"Task:\nText before caret:\nquick brown fox",
for: request
)

XCTAssertEqual(normalized, "quick brown fox")
}

func test_normalize_keepsLegitimateNonLabelColon() {
// A colon that is not a known scaffolding label is real user content and must survive.
let request = CotabbyTestFixtures.suggestionRequest(
prefixText: "my list ",
prompt: "PROMPT",
precedingText: "my list "
)

let normalized = SuggestionTextNormalizer.normalize(
"TODO: buy milk",
for: request
)

XCTAssertEqual(normalized, "TODO: buy milk")
}

func test_normalize_keepsLabelLikeTextWhenNotLeading() {
// "Task:" appears mid-continuation, not at the start, so it is real text and stays.
let request = CotabbyTestFixtures.suggestionRequest(
prefixText: "finish the ",
prompt: "PROMPT",
precedingText: "finish the "
)

let normalized = SuggestionTextNormalizer.normalize(
"first Task: review",
for: request
)

XCTAssertEqual(normalized, "first Task: review")
}
}