supermodeltools · jonathanpopham · Apr 13, 2026 · Apr 13, 2026 · coderabbitai · Apr 13, 2026
@@ -14,6 +14,11 @@ func init() {
 	var opts analyze.Options
 	var noShards bool
 	var threeFile bool
+	var narrate bool
+	var tour bool
+	var tourStrategy string
+	var tourSeed string
+	var tourBudget int
 
 	c := &cobra.Command{
 		Use:   "analyze [path]",
@@ -25,7 +30,16 @@ Results are cached locally by content hash. Subsequent commands
 (dead-code, blast-radius, graph) reuse the cache automatically.
 
 By default, .graph.* shard files are written next to each source file.
-Use --no-shards to skip writing graph files.`,
+Use --no-shards to skip writing graph files.
+
+Linearization flags:
+  --narrate          prefix each shard with a prose narrative preamble
+  --tour             also emit .supermodel/TOUR.md (the reading spine)
+  --tour-strategy    topo | bfs-seed | dfs-seed | centrality (default: topo)
+  --tour-seed        seed file for bfs-seed/dfs-seed
+  --tour-budget      chunk tour into chapters of this token budget
+
+See docs/linearization.md for design.`,
 		Args: cobra.MaximumNArgs(1),
 		RunE: func(cmd *cobra.Command, args []string) error {
 			cfg, err := config.Load()
@@ -38,6 +52,9 @@ Use --no-shards to skip writing graph files.`,
 			if noShards && threeFile {
 				return fmt.Errorf("--three-file cannot be used with --no-shards")
 			}
+			if noShards && (narrate || tour) {
+				return fmt.Errorf("--narrate and --tour require shards (cannot combine with --no-shards)")
+			}
 			dir := "."
 			if len(args) > 0 {
 				dir = args[0]
@@ -46,7 +63,15 @@ Use --no-shards to skip writing graph files.`,
 				// Shard mode: Generate handles the full pipeline (API call +
 				// cache + shards) in a single upload. Running analyze.Run
 				// first would duplicate the API call.
-				return shards.Generate(cmd.Context(), cfg, dir, shards.GenerateOptions{Force: opts.Force, ThreeFile: threeFile})
+				return shards.Generate(cmd.Context(), cfg, dir, shards.GenerateOptions{
+					Force:        opts.Force,
+					ThreeFile:    threeFile,
+					Narrate:      narrate,
+					Tour:         tour,
+					TourStrategy: tourStrategy,
+					TourSeed:     tourSeed,
+					TourBudget:   tourBudget,
+				})
 			}
 			return analyze.Run(cmd.Context(), cfg, dir, opts)
 		},
@@ -56,6 +81,11 @@ Use --no-shards to skip writing graph files.`,
 	c.Flags().StringVarP(&opts.Output, "output", "o", "", "output format: human|json")
 	c.Flags().BoolVar(&noShards, "no-shards", false, "skip writing .graph.* shard files")
 	c.Flags().BoolVar(&threeFile, "three-file", false, "generate .calls/.deps/.impact files instead of single .graph")
+	c.Flags().BoolVar(&narrate, "narrate", false, "prefix each shard with a prose narrative preamble")
+	c.Flags().BoolVar(&tour, "tour", false, "also emit .supermodel/TOUR.md — the linear reading spine")
+	c.Flags().StringVar(&tourStrategy, "tour-strategy", "topo", "tour ordering: topo | bfs-seed | dfs-seed | centrality")
+	c.Flags().StringVar(&tourSeed, "tour-seed", "", "seed file for bfs-seed / dfs-seed strategies")
+	c.Flags().IntVar(&tourBudget, "tour-budget", 0, "chunk tour into chapters of this token budget (0 = single file)")
 
 	rootCmd.AddCommand(c)
 }
@@ -0,0 +1,103 @@
+package cmd
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+
+	"github.com/spf13/cobra"
+
+	"github.com/supermodeltools/cli/internal/api"
+	"github.com/supermodeltools/cli/internal/shards"
+	"github.com/supermodeltools/cli/internal/ui"
+)
+
+func init() {
+	var strategyName string
+	var seed string
+	var narrate bool
+	var budgetTokens int
+	var dryRun bool
+
+	c := &cobra.Command{
+		Use:   "tour [path]",
+		Short: "Emit a linearized reading order over the code graph",
+		Long: `Generates .supermodel/TOUR.md — a single-file reading spine that walks the
+repository in a strategy-chosen order, grouped by domain/subdomain, with each
+entry linking to its per-file shard. This gives agents a deterministic path
+through the codebase instead of N independent shards with no order.
+
+Strategies:
+  topo         reverse-topological over imports (leaves first, roots last)
+  bfs-seed     breadth-first from --seed outward (focused tours)
+  dfs-seed     depth-first from --seed outward
+  centrality   files with the largest blast radius first
+
+When --narrate is set, each existing .graph.* shard is rewritten with a prose
+preamble describing the file's role as sentences (rather than only structured
+arrows). Same data, different rendering targeted at LLM reading style.
+
+When --budget-tokens is set and the tour exceeds the budget, TOUR.md becomes an
+index linking to TOUR.01.md, TOUR.02.md, ... sized to fit one chapter per turn.
+
+Reads .supermodel/shards.json produced by 'supermodel analyze'. No API call.
+See docs/linearization.md for the design rationale.`,
+		Args: cobra.MaximumNArgs(1),
+		RunE: func(_ *cobra.Command, args []string) error {
+			dir := "."
+			if len(args) > 0 {
+				dir = args[0]
+			}
+			repoDir, err := filepath.Abs(dir)
+			if err != nil {
+				return fmt.Errorf("resolving path: %w", err)
+			}
+			cacheFile := filepath.Join(repoDir, ".supermodel", "shards.json")
+			data, err := os.ReadFile(cacheFile)
+			if err != nil {
+				return fmt.Errorf("reading cache %s: %w (run `supermodel analyze` first)", cacheFile, err)
+			}
+			var ir api.ShardIR
+			if err := json.Unmarshal(data, &ir); err != nil {
+				return fmt.Errorf("parsing cache: %w", err)
+			}
+			cache := shards.NewCache()
+			cache.Build(&ir)
+
+			strategy, err := shards.ResolveStrategy(strategyName, seed)
+			if err != nil {
+				return err
+			}
+
+			out, err := shards.WriteTour(repoDir, cache, strategy, budgetTokens, dryRun)
+			if err != nil {
+				return err
+			}
+			if !dryRun {
+				ui.Success("Wrote tour to %s (strategy: %s)", out, strategy.Name())
+			}
+
+			if narrate {
+				files := cache.SourceFiles()
+				written, rerr := shards.RenderAll(repoDir, cache, files, true, dryRun)
+				if rerr != nil {
+					return fmt.Errorf("re-rendering shards with narrative: %w", rerr)
+				}
+				if !dryRun {
+					ui.Success("Re-wrote %d shards with narrative preamble", written)
+				}
+			}
+			return nil
+		},
+	}
+
+	c.Flags().StringVar(&strategyName, "strategy", "topo",
+		"linearization strategy: topo | bfs-seed | dfs-seed | centrality")
+	c.Flags().StringVar(&seed, "seed", "", "seed file path (required for bfs-seed / dfs-seed)")
+	c.Flags().BoolVar(&narrate, "narrate", false, "also rewrite existing .graph.* shards with a prose narrative preamble")
+	c.Flags().IntVar(&budgetTokens, "budget-tokens", 0, "chunk tour into chapters of this token budget (0 = single file)")
+	c.Flags().BoolVar(&dryRun, "dry-run", false, "print what would be written without touching disk")
+
+	rootCmd.AddCommand(c)
+}
@@ -0,0 +1,180 @@
+# Graph Linearization for Sharding
+
+## Thesis
+
+LLMs are one-dimensional. They consume a token stream and attend to positions
+within it. Graphs are multi-dimensional: nodes are connected by edges that
+don't live on the token axis. A model handed a blob of JSON nodes and edges has
+to do pointer-chasing on UUIDs inside a single attention pass — work that scales
+badly with graph size and burns context.
+
+**Graph linearization** is the deliberate serialization of a graph into a
+reading order the model can consume left-to-right, with local neighborhoods
+kept close in the token stream and adjacency rendered as prose rather than
+identifiers. See Xypolopoulos et al., *Graph Linearization Methods for Reasoning
+on Graphs with Large Language Models* (arXiv:2410.19494) for the underlying
+principles: centrality and degeneracy-based orderings substantially beat random
+serialization on LLM graph-reasoning tasks.
+
+## Where the CLI stands today
+
+`supermodel analyze` already writes per-file sidecar shards (`.graph.ext` or
+`.calls / .deps / .impact`). Those shards are **file-level linearization**:
+each sidecar collapses a subgraph into a `[deps] / [calls] / [impact]` text
+layout the model reads before touching the source file.
+
+Two things are missing:
+
+1. **No reading order across files.** Agents see N independent shards and have
+   to guess which to read first. There is no spine.
+2. **No prose adjacency inside a shard.** Call relationships are rendered as
+   `name ← other    path:line` arrows. Accurate and terse, but the model
+   reconstructs sentences on the fly every time.
+
+Sharding produces the units. Linearization produces the **order and
+narrative** over those units.
+
+## Design: the Tour
+
+A *tour* is a single markdown file — `.supermodel/TOUR.md` — that serializes
+the whole repository graph into a linear walk. It is the spine that makes the
+existing shards navigable.
+
+```
+TOUR.md                         ← linear walk (this feature)
+src/auth/session.go             ← source file
+src/auth/session.graph.go       ← existing shard (per-file linearization)
+```
+
+Agents read `TOUR.md` once to get the layout, then open shards + source in the
+order the tour presents them.
+
+### Structure of TOUR.md
+
+```markdown
+# Repository Tour — supermodel-cli
+
+**Strategy:** reverse-topological over the import graph
+(leaves → roots). Read top-to-bottom to see dependencies before dependents.
+
+## Domain: Analyze
+### Subdomain: Pipeline
+- **internal/analyze/handler.go** — orchestrates upload + render
+  reads: api, config, shards · read by: cmd/analyze.go
+  risk: MEDIUM · [shard](../internal/analyze/handler.graph.go)
+
+## Domain: Shards
+### Subdomain: Rendering
+- **internal/shards/render.go** — emits .graph sidecars per source file
+  reads: api · read by: internal/shards/handler.go
+  risk: LOW · [shard](../internal/shards/render.graph.go)
+...
+```
+
+One prose line per file — name, domain, adjacency, risk, shard pointer. Linear
+order is the strategy's output. The agent reads prefix-to-suffix.
+
+### Linearization strategies
+
+Strategies are interchangeable. The default is `topo` because it matches how
+humans read codebases ("what are the leaves, then what depends on them").
+
+| Strategy     | Ordering                                                | Best for                                |
+|--------------|---------------------------------------------------------|-----------------------------------------|
+| `topo`       | reverse-topological over imports (leaves first)         | whole-codebase onboarding               |
+| `bfs-seed`   | BFS from `--seed <file>` outward                        | focused tasks, blast radius walks       |
+| `dfs-seed`   | DFS from `--seed <file>` — depth-first exploration      | tracing a request through layers        |
+| `centrality` | PageRank-like over importers (most-depended-on first)   | "what's the core of this codebase"      |
+
+Cycles are broken by file-path lexicographic order (deterministic, boring).
+
+### Prose narrative preamble (opt-in)
+
+Tour generation also lets you inject a prose preamble into each existing shard
+with `--narrate`:
+
+```go
+// @generated supermodel-shard — do not edit
+//
+// Narrative: parseConfig (Domain Config / Loading) is called by main
+// (cmd/root.go:42) and serverInit (cmd/server.go:18). It calls readFile
+// and json.Unmarshal. Imports: os, encoding/json. Risk: LOW.
+//
+// [deps]
+// imports     os
+// imports     encoding/json
+// ...
+```
+
+The preamble is a one-paragraph summary derived from the same cache used for
+the structured sections — no new data, just a second rendering targeted at the
+model's native reading style. Flag-gated so users can A/B.
+
+## CLI surface (implemented)
+
+Standalone:
+
+```
+supermodel tour [--strategy topo|bfs-seed|dfs-seed|centrality]
+                [--seed <file>]
+                [--narrate]
+                [--budget-tokens <N>]
+                [--dry-run]
+                [path]
+```
+
+Integrated with `analyze` so a single command emits shards + spine:
+
+```
+supermodel analyze [--tour]
+                   [--tour-strategy topo|bfs-seed|dfs-seed|centrality]
+                   [--tour-seed <file>]
+                   [--tour-budget <N>]
+                   [--narrate]
+                   [path]
+```
+
+- Reads `.supermodel/shards.json` (errors if absent — prompts `analyze` first).
+- Writes `.supermodel/TOUR.md`.
+- With `--narrate`, rewrites existing `.graph.*` shards in place to include a
+  prose narrative preamble.
+- `--budget-tokens` chunks the tour into `TOUR.01.md`, `TOUR.02.md`, ... with
+  `TOUR.md` becoming an index. Each chapter has prev/next cross-links.
+
+No API call. No new cache. Pure reshaping of what `analyze` already produced.
+
+## Why this shape
+
+- **Same vertical slice.** Tour lives inside `internal/shards/` — it consumes
+  the shard cache and emits a companion artifact. No cross-slice dependency.
+- **Additive.** Default behavior of `analyze` is unchanged. Tour is opt-in.
+- **Deterministic.** Lexicographic tiebreaks, stable sort; tour file is safe to
+  commit or diff.
+- **Strategy-pluggable.** The `Strategy` interface is small (one method:
+  `Order(cache) []string`), so we can add more orderings without touching the
+  renderer.
+
+## Open questions
+
+- Should tour output default-render inline snippets of each shard, or strictly
+  link to them? Inline is self-contained (one file to read) but duplicates
+  content; linked is DRY but requires the agent to follow pointers.
+- Should there be a `--focus <glob>` filter so tours scope to a subtree?
+- Does `arch-docs` want to consume TOUR.md as its entry point (replacing its
+  own traversal)?
+- Running `supermodel tour` with a different `--budget-tokens` should probably
+  clean up stale `TOUR.NN.md` files from a prior chunked run. Cosmetic.
+- Benchmark: we need numbers. Plan to wire through
+  `supermodeltools/supermodel-benchmarks/shard-ab-test/` to measure
+  agent performance with/without TOUR + narrate.
+
+## References
+
+- Xypolopoulos et al., *Graph Linearization Methods for Reasoning on Graphs
+  with Large Language Models*, arXiv:2410.19494
+- `supermodeltools/codegraph-graphrag` — BFS narrative walks, the thesis doc
+  in the org
+- `supermodeltools/graph2md` — per-node markdown emission (another
+  linearization strategy)
+- `supermodeltools/mcp/src/tools/explore-function.ts` — `describeNode()`
+  prose format, cross-subsystem markers
@@ -183,7 +183,7 @@ func (d *Daemon) loadOrGenerate(ctx context.Context) error {
 			d.mu.Unlock()
 
 			files := d.cache.SourceFiles()
-			written, renderErr := RenderAll(d.cfg.RepoDir, d.cache, files, false)
+			written, renderErr := RenderAll(d.cfg.RepoDir, d.cache, files, false, false)
 			if renderErr != nil {
 				return renderErr
 			}
@@ -226,7 +226,7 @@ func (d *Daemon) fullGenerate(ctx context.Context) error {
 	d.mu.Unlock()
 
 	files := d.cache.SourceFiles()
-	written, err := RenderAll(d.cfg.RepoDir, d.cache, files, false)
+	written, err := RenderAll(d.cfg.RepoDir, d.cache, files, false, false)
 	if err != nil {
 		return err
 	}
@@ -301,7 +301,7 @@ func (d *Daemon) incrementalUpdate(ctx context.Context, changedFiles []string) {
 
 	d.logf("Re-rendering %d affected shards", len(affected))
 
-	written, err := RenderAll(d.cfg.RepoDir, cacheSnapshot, affected, false)
+	written, err := RenderAll(d.cfg.RepoDir, cacheSnapshot, affected, false, false)
 	if err != nil {
 		d.logf("Render error: %v", err)
 		return