diff --git a/README.md b/README.md index fd492bd..f179bed 100644 --- a/README.md +++ b/README.md @@ -71,37 +71,35 @@ This isn't just a terminal in the cloud. Running coding agents on Databricks giv ## MLflow Tracing -Every Claude Code session is **automatically traced** to a Databricks MLflow experiment — zero configuration required. +Claude Code and Codex sessions can both be **automatically traced** to a single Databricks MLflow experiment — flip one switch to turn them on. + +### Turning it on + +Set **`MLFLOW_TRACING_ENABLED=true`** in `app.yaml` (or your shell for local dev). That single variable enables tracing for both CLIs. Tracing is **off by default** to keep deploys lightweight — opt in when you want it. + +```yaml +# app.yaml +env: + - name: MLFLOW_TRACING_ENABLED + value: "true" +``` ### How it works ``` -Claude Code session starts +MLFLOW_TRACING_ENABLED=true │ - ▼ - Environment vars set automatically: - MLFLOW_TRACKING_URI=databricks - MLFLOW_EXPERIMENT_NAME=/Users/{you}/{app-name} + ├──► Claude Code: Stop hook fires on session end → + │ mlflow.claude_code.hooks.stop_hook_handler() logs the transcript │ - ▼ - You work normally — code, debug, deploy - │ - ▼ - Session ends → Stop hook fires - │ - ▼ - Full session transcript logged as an MLflow trace - at /Users/{you}/{app-name} in your workspace + └──► Codex: @mlflow/codex notify hook fires after each turn → + trace appended to the experiment ``` -### What gets traced - -When a Claude Code session ends, the **Stop hook** automatically calls `mlflow.claude_code.hooks.stop_hook_handler()`, which captures the full session transcript — your prompts, agent actions, tool calls, and outputs — and logs it as an MLflow trace. +Both land in the same MLflow experiment, so you can compare runs across agents side by side. ### Where traces live -Traces are stored in a Databricks MLflow experiment at: - ``` /Users/{your-email}/{app-name} ``` @@ -116,17 +114,17 @@ View them in the Databricks UI: **Workspace > Machine Learning > Experiments**. ### Configuration -Tracing is configured during app startup by `setup_mlflow.py`, which merges the following into `~/.claude/settings.json`: +Tracing is wired up during app startup: | Setting | Value | Purpose | |---------|-------|---------| -| `MLFLOW_CLAUDE_TRACING_ENABLED` | `true` | Enables Claude Code tracing | -| `MLFLOW_TRACKING_URI` | `databricks` | Routes traces to Databricks backend | +| `MLFLOW_TRACING_ENABLED` | `true`/`false` (default `false`) | Master switch for Claude + Codex | +| `MLFLOW_CLAUDE_TRACING_ENABLED` | mirrors `MLFLOW_TRACING_ENABLED` | Gates Claude's Stop hook at runtime | +| `MLFLOW_TRACKING_URI` | `databricks` | Routes traces to the Databricks backend | | `MLFLOW_EXPERIMENT_NAME` | `/Users/{owner}/{app}` | Target experiment path | -| `OTEL_EXPORTER_OTLP_ENDPOINT` | `""` | Overrides container OTEL to prevent trace loss | -| Stop hook | `uv run python -c "from mlflow.claude_code.hooks import stop_hook_handler; stop_hook_handler()"` | Fires on session end | +| `MLFLOW_EXPERIMENT_ID` | resolved from name | Set in `~/.codex/.env` (Codex needs an ID) | -Tracing is skipped gracefully if `APP_OWNER` is not set (e.g., local dev without Databricks). +Tracing setup is skipped gracefully when `APP_OWNER` is not set (e.g., local dev without Databricks) or when `MLFLOW_TRACING_ENABLED` is left at its default `false`. --- @@ -288,6 +286,7 @@ This template repo opens that vision up for every Databricks user — no IDE set | `CODEX_MODEL` | No | Codex model name (default: `databricks-gpt-5-5`) | | `GEMINI_MODEL` | No | Gemini model name (default: `databricks-gemini-2-5-pro`) | | `DATABRICKS_GATEWAY_HOST` | No | AI Gateway URL override. Auto-discovered from `DATABRICKS_WORKSPACE_ID` if unset | +| `MLFLOW_TRACING_ENABLED` | No | Set to `"true"` to enable MLflow tracing for Claude and Codex in one switch (default `"false"`) | ### Security Model diff --git a/app.yaml.template b/app.yaml.template index 3edd241..3eb9965 100644 --- a/app.yaml.template +++ b/app.yaml.template @@ -26,3 +26,8 @@ env: # completes the rollout and auto memory is on by default, this can be removed entirely. - name: CLAUDE_CODE_DISABLE_AUTO_MEMORY value: 0 + # Set MLFLOW_TRACING_ENABLED=true to auto-trace Claude, Codex, and Gemini + # sessions into /Users/{app_owner}/{app_name}. Default off to keep installs + # lightweight — opt in when you want observability across all three agents. + - name: MLFLOW_TRACING_ENABLED + value: "false" diff --git a/setup_codex.py b/setup_codex.py index 6be864f..f2fdd20 100644 --- a/setup_codex.py +++ b/setup_codex.py @@ -8,12 +8,19 @@ Config: ~/.codex/config.toml with custom model_providers for Databricks. Auth: Bearer token via DATABRICKS_TOKEN environment variable. """ +import json import os import shutil import subprocess from pathlib import Path -from utils import adapt_instructions_file, ensure_https, get_gateway_host, get_npm_version +from utils import ( + adapt_instructions_file, + ensure_https, + get_gateway_host, + get_npm_version, + resolve_mlflow_experiment_id, +) # Set HOME if not properly set if not os.environ.get("HOME") or os.environ["HOME"] == "/": @@ -101,6 +108,12 @@ shutil.copyfile(catalog_src, catalog_dst) print(f"Codex model catalog copied: {catalog_dst}") +# Optional: MLflow tracing notify hook (one switch enables Claude + Codex) +tracing_enabled = os.environ.get("MLFLOW_TRACING_ENABLED", "false").lower() == "true" +notify_line = "" +if tracing_enabled: + notify_line = 'notify = ["mlflow-codex", "notify-hook"]\n' + # Codex CLI uses TOML config with custom model_providers config_content = f"""# Databricks Model Serving Configuration for Codex CLI # Generated by setup_codex.py @@ -113,6 +126,7 @@ # Disable web_search - not supported by Databricks Responses API web_search = "disabled" +{notify_line} # Databricks custom provider [model_providers.databricks] name = "Databricks Model Serving" @@ -128,12 +142,56 @@ # 4. Write OPENAI_API_KEY to shell profile for Codex to pick up # Codex reads from env_key specified in config (OPENAI_API_KEY) # We set this via the environment, but also write a .env file as backup -env_content = f"""# Databricks token for Codex CLI (OpenAI-compatible endpoint) -OPENAI_API_KEY={auth_token} -""" +env_lines = [ + "# Databricks token for Codex CLI (OpenAI-compatible endpoint)", + f"OPENAI_API_KEY={auth_token}", +] + +# MLflow tracing env vars (read by @mlflow/codex notify hook) +app_owner = os.environ.get("APP_OWNER", "") +app_name = os.environ.get("DATABRICKS_APP_NAME", "coding-agents") +experiment_name = f"/Users/{app_owner}/{app_name}" if app_owner else "" + +if tracing_enabled and experiment_name: + experiment_id = resolve_mlflow_experiment_id(host, token, experiment_name) + + # Install @mlflow/codex (provides the `mlflow-codex` binary used by the notify hook) + mlflow_codex_bin = local_bin / "mlflow-codex" + if not mlflow_codex_bin.exists(): + npm_prefix = str(home / ".local") + print("Installing @mlflow/codex for MLflow tracing...") + result = subprocess.run( + ["npm", "install", "-g", f"--prefix={npm_prefix}", "@mlflow/codex"], + capture_output=True, text=True, + env={**os.environ, "HOME": str(home)}, + ) + if result.returncode == 0: + print(f"@mlflow/codex installed to {mlflow_codex_bin}") + else: + print(f"WARNING: @mlflow/codex install failed (rc={result.returncode}): {result.stderr.strip()[:300]}") + + # Pass MLflow connection details via env (override mlflow-tracing.json). + # DATABRICKS_HOST/TOKEN are inherited from the app process — no need to + # re-write them here (and that avoids stale-token bugs on PAT rotation). + env_lines.extend([ + "", + "# MLflow tracing (enabled by MLFLOW_TRACING_ENABLED=true)", + "MLFLOW_TRACKING_URI=databricks", + ]) + if experiment_id: + env_lines.append(f"MLFLOW_EXPERIMENT_ID={experiment_id}") + else: + env_lines.append(f"MLFLOW_EXPERIMENT_NAME={experiment_name}") + + # mlflow-tracing.json as a fallback for the notify hook when env isn't loaded + tracing_cfg = {"trackingUri": "databricks"} + if experiment_id: + tracing_cfg["experimentId"] = experiment_id + (codex_dir / "mlflow-tracing.json").write_text(json.dumps(tracing_cfg, indent=2)) + print(f"Codex MLflow tracing configured: experiment_id={experiment_id or 'unresolved'}") env_path = codex_dir / ".env" -env_path.write_text(env_content) +env_path.write_text("\n".join(env_lines) + "\n") env_path.chmod(0o600) print(f"Codex CLI env configured: {env_path}") diff --git a/setup_mlflow.py b/setup_mlflow.py index 9d305d3..9a3ac03 100644 --- a/setup_mlflow.py +++ b/setup_mlflow.py @@ -1,8 +1,8 @@ -"""Configure MLflow tracing for Claude Code sessions. +"""Wire up Claude Code's Stop hook for MLflow tracing. -Merges MLflow env vars and a Stop hook into ~/.claude/settings.json so that -every Claude Code session automatically logs traces to a Databricks MLflow -experiment at /Users/{app_owner}/{app_name}. +Gated on MLFLOW_TRACING_ENABLED — the same switch enables Codex and Gemini +tracing in their respective setup scripts. Traces land in +/Users/{app_owner}/{app_name}. """ import os @@ -31,17 +31,23 @@ experiment_name = f"/Users/{app_owner}/{app_name}" -# Merge MLflow env vars +# Single switch that controls tracing for Claude, Codex, and Gemini. +# Defaults to "false" so opt-in requires explicit configuration. +tracing_enabled = os.environ.get("MLFLOW_TRACING_ENABLED", "false").lower() == "true" + +# Merge MLflow env vars (always written so flipping the flag at runtime works +# without rerunning setup — Claude reads MLFLOW_CLAUDE_TRACING_ENABLED on launch). settings.setdefault("env", {}) -settings["env"]["MLFLOW_CLAUDE_TRACING_ENABLED"] = "false" +settings["env"]["MLFLOW_CLAUDE_TRACING_ENABLED"] = "true" if tracing_enabled else "false" settings["env"]["MLFLOW_TRACKING_URI"] = "databricks" settings["env"]["MLFLOW_EXPERIMENT_NAME"] = experiment_name # Override container-level OTEL endpoint so MLflow uses its native MlflowV3SpanExporter # instead of sending traces to a non-existent localhost:4314 OTLP collector settings["env"]["OTEL_EXPORTER_OTLP_ENDPOINT"] = "" -# Add Stop hook (processes full transcript at session end) -# Use `uv run python` so mlflow resolves correctly regardless of venv paths +# Add Stop hook (processes full transcript at session end). +# The hook is harmless when MLFLOW_CLAUDE_TRACING_ENABLED=false — mlflow's +# stop_hook_handler short-circuits if tracing isn't enabled. python_cmd = "uv run python" mlflow_hook = { "hooks": [ @@ -54,11 +60,19 @@ existing_hooks = settings.get("hooks", {}) stop_hooks = existing_hooks.get("Stop", []) -stop_hooks.append(mlflow_hook) +# Avoid duplicating the hook if setup runs multiple times +already_present = any( + "stop_hook_handler" in h.get("hooks", [{}])[0].get("command", "") + for h in stop_hooks if isinstance(h, dict) +) +if not already_present: + stop_hooks.append(mlflow_hook) existing_hooks["Stop"] = stop_hooks settings["hooks"] = existing_hooks settings_path.write_text(json.dumps(settings, indent=2)) -print(f"MLflow tracing enabled: experiment={experiment_name}") +print(f"MLflow tracing {'ENABLED' if tracing_enabled else 'disabled'}: experiment={experiment_name}") print(f" Tracking URI: databricks") print(f" Settings updated: {settings_path}") +if not tracing_enabled: + print(" Set MLFLOW_TRACING_ENABLED=true (in app.yaml) to enable Claude + Codex + Gemini tracing.") diff --git a/tests/test_mlflow_tracing.py b/tests/test_mlflow_tracing.py index 02a6eb1..fb6e975 100644 --- a/tests/test_mlflow_tracing.py +++ b/tests/test_mlflow_tracing.py @@ -60,13 +60,23 @@ def read_settings(tmp_path): class TestMlflowEnvVars: """Verify MLflow environment variables are added to settings.json.""" - def test_tracing_enabled(self, tmp_path): + def test_tracing_disabled_by_default(self, tmp_path): write_existing_settings(tmp_path, {"env": {"ANTHROPIC_MODEL": "test"}}) result = run_setup_mlflow(tmp_path, {"APP_OWNER": "jane@company.com"}) assert result.returncode == 0 settings = read_settings(tmp_path) assert settings["env"]["MLFLOW_CLAUDE_TRACING_ENABLED"] == "false" + def test_tracing_enabled_via_master_switch(self, tmp_path): + write_existing_settings(tmp_path, {"env": {"ANTHROPIC_MODEL": "test"}}) + result = run_setup_mlflow(tmp_path, { + "APP_OWNER": "jane@company.com", + "MLFLOW_TRACING_ENABLED": "true", + }) + assert result.returncode == 0 + settings = read_settings(tmp_path) + assert settings["env"]["MLFLOW_CLAUDE_TRACING_ENABLED"] == "true" + def test_tracking_uri(self, tmp_path): write_existing_settings(tmp_path, {"env": {}}) result = run_setup_mlflow(tmp_path, {"APP_OWNER": "jane@company.com"}) diff --git a/utils.py b/utils.py index f93605d..8270693 100644 --- a/utils.py +++ b/utils.py @@ -226,3 +226,30 @@ def ensure_https(url: str) -> str: if not url.startswith(("http://", "https://")): return f"https://{url}" return url + + +def resolve_mlflow_experiment_id(host: str, token: str, experiment_name: str) -> str | None: + """Look up (or create) a Databricks MLflow experiment by name and return its ID. + + Used by Codex and Gemini CLI tracing setup — both need an experiment *ID*, + not name, in their config files / OTLP headers. + + Returns None on any failure so callers can degrade gracefully. + """ + if not host or not token or not experiment_name: + return None + try: + from databricks.sdk import WorkspaceClient + from databricks.sdk.errors import ResourceDoesNotExist + + w = WorkspaceClient(host=ensure_https(host.rstrip("/")), token=token) + try: + exp = w.experiments.get_by_name(experiment_name=experiment_name) + if exp and exp.experiment: + return exp.experiment.experiment_id + except ResourceDoesNotExist: + pass # fall through to create + return w.experiments.create_experiment(name=experiment_name).experiment_id + except Exception as exc: + logger.warning(f"Could not resolve MLflow experiment '{experiment_name}': {exc}") + return None