supermodeltools · jonathanpopham · Apr 13, 2026 · Apr 13, 2026 · Apr 10, 2026 · Apr 10, 2026
@@ -4,3 +4,18 @@ npm/bin/
 /supermodel
 /cli
 .supermodel/
+
+# macOS
+.DS_Store
+
+# Python
+__pycache__/
+*.pyc
+*.pyo
+
+# Go test coverage
+cover*.out
+
+# Supermodel graph sidecars
+*.graph.go
+*.graph.py
@@ -0,0 +1,22 @@
+# Django Source — supermodel context enabled
+
+This is the Django framework source. The auth package is at `django/contrib/auth/`.
+
+## Graph shard files
+
+`supermodel analyze` has run on this repo. Every source file has a `.graph.py`
+sidecar with pre-computed context: imports, exports, callers, callees, and
+cross-module relationships.
+
+**Read the `.graph.py` file before the source file.** It shows you the full
+picture in far fewer tokens. For example:
+
+- Wondering what `django/contrib/auth/__init__.py` exports and what it calls?
+  → read `django/contrib/auth/__init__.graph.py` first
+- Need to know what signals the auth system emits and where they're defined?
+  → check `django/contrib/auth/signals.graph.py`
+- Want to see what depends on a given module?
+  → the `.graph.py` file lists known callers
+
+Use the graph files to navigate efficiently. Only drop into the source when you
+need implementation details the graph doesn't cover.
@@ -0,0 +1,10 @@
+This repository has .graph.* files next to source files containing code relationship data from Supermodel.
+
+The naming convention: for src/Foo.py the graph file is src/Foo.graph.py (insert .graph before the extension). Each graph file has up to three sections:
+- [deps] — what this file imports and what imports it
+- [calls] — function call relationships with file paths and line numbers
+- [impact] — blast radius: risk level, affected domains, direct/transitive dependents
+
+**Read the .graph file before the source file.** It shows the full dependency and call picture in far fewer tokens. Construct the path directly — don't ls the directory to discover it.
+
+Before grepping to understand how code connects, check the relevant .graph files. They already answer most structural navigation questions: what calls what, what imports what, and what breaks if you change something. When you grep for a function name, .graph files appear in results showing every caller and callee — use this to navigate instead of searching for each one individually.
@@ -0,0 +1,31 @@
+# Benchmark container: naked Claude Code on django/django
+FROM python:3.12-slim
+
+# System deps + Node.js 20
+RUN apt-get update && apt-get install -y curl ca-certificates git && \
+    curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
+    apt-get install -y nodejs && \
+    rm -rf /var/lib/apt/lists/*
+
+# Install Claude Code
+RUN npm install -g @anthropic-ai/claude-code
+
+# Clone Django source at a fixed tag
+RUN git clone --depth=1 --branch 5.0.6 \
+    https://github.com/django/django.git /app
+
+# Install Django in editable mode
+RUN pip install --no-cache-dir -e /app
+
+# Drop in the change_tracking test app
+COPY change_tracking/ /app/tests/change_tracking/
+
+# Copy task
+COPY task.md /benchmark/task.md
+
+# Non-root user (Claude refuses to run as root)
+RUN useradd -m -s /bin/bash bench && chown -R bench:bench /app /benchmark
+USER bench
+
+COPY entrypoint.naked.sh /entrypoint.sh
+ENTRYPOINT ["/bin/bash", "/entrypoint.sh"]
@@ -0,0 +1,46 @@
+# Benchmark container: Claude Code + supermodel on django/django
+# Build from repo root: docker build -f benchmark/Dockerfile.supermodel -t bench-supermodel .
+
+# Stage 1: Build supermodel binary
+FROM golang:alpine AS supermodel-builder
+ENV GOTOOLCHAIN=auto
+WORKDIR /build
+COPY . .
+RUN go build \
+    -ldflags="-s -w -X github.com/supermodeltools/cli/internal/build.Version=benchmark" \
+    -o /build/supermodel \
+    .
+
+# Stage 2: Runtime
+FROM python:3.12-slim
+
+# System deps + Node.js 20
+RUN apt-get update && apt-get install -y curl ca-certificates git && \
+    curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
+    apt-get install -y nodejs && \
+    rm -rf /var/lib/apt/lists/*
+
+# Install Claude Code + supermodel
+RUN npm install -g @anthropic-ai/claude-code
+COPY --from=supermodel-builder /build/supermodel /usr/local/bin/supermodel
+
+# Clone Django source at a fixed tag
+RUN git clone --depth=1 --branch 5.0.6 \
+    https://github.com/django/django.git /app
+
+# Install Django in editable mode
+RUN pip install --no-cache-dir -e /app
+
+# Drop in the change_tracking test app
+COPY benchmark/change_tracking/ /app/tests/change_tracking/
+
+# Copy task + CLAUDE.md
+COPY benchmark/task.md /benchmark/task.md
+COPY benchmark/CLAUDE.md /app/CLAUDE.md
+
+# Non-root user
+RUN useradd -m -s /bin/bash bench && chown -R bench:bench /app /benchmark
+USER bench
+
+COPY benchmark/entrypoint.supermodel.sh /entrypoint.sh
+ENTRYPOINT ["/bin/bash", "/entrypoint.sh"]
@@ -0,0 +1 @@
+# Implement your solution here.
@@ -0,0 +1,68 @@
+from django.test import TestCase
+from django.contrib.auth import get_user_model
+
+User = get_user_model()
+
+
+class EmailChangeTrackingTest(TestCase):
+
+    def test_change_is_recorded(self):
+        from change_tracking.models import EmailChangeRecord
+        user = User.objects.create_user('alice', email='alice@old.com', password='pass')
+        user.email = 'alice@new.com'
+        user.save()
+        self.assertEqual(EmailChangeRecord.objects.filter(user=user).count(), 1)
+
+    def test_old_email_recorded(self):
+        from change_tracking.models import EmailChangeRecord
+        user = User.objects.create_user('bob', email='bob@old.com', password='pass')
+        user.email = 'bob@new.com'
+        user.save()
+        self.assertEqual(EmailChangeRecord.objects.get(user=user).old_email, 'bob@old.com')
+
+    def test_new_email_recorded(self):
+        from change_tracking.models import EmailChangeRecord
+        user = User.objects.create_user('carol', email='carol@old.com', password='pass')
+        user.email = 'carol@new.com'
+        user.save()
+        self.assertEqual(EmailChangeRecord.objects.get(user=user).new_email, 'carol@new.com')
+
+    def test_timestamp_recorded(self):
+        from change_tracking.models import EmailChangeRecord
+        from django.utils import timezone
+        user = User.objects.create_user('dave', email='dave@old.com', password='pass')
+        before = timezone.now()
+        user.email = 'dave@new.com'
+        user.save()
+        after = timezone.now()
+        ts = EmailChangeRecord.objects.get(user=user).changed_at
+        self.assertTrue(before <= ts <= after)
+
+    def test_no_record_on_create(self):
+        from change_tracking.models import EmailChangeRecord
+        User.objects.create_user('eve', email='eve@example.com', password='pass')
+        self.assertEqual(EmailChangeRecord.objects.count(), 0)
+
+    def test_no_record_when_email_unchanged(self):
+        from change_tracking.models import EmailChangeRecord
+        user = User.objects.create_user('frank', email='frank@example.com', password='pass')
+        user.first_name = 'Frank'
+        user.save()
+        self.assertEqual(EmailChangeRecord.objects.count(), 0)
+
+    def test_multiple_changes_all_recorded(self):
+        from change_tracking.models import EmailChangeRecord
+        user = User.objects.create_user('grace', email='grace@v1.com', password='pass')
+        user.email = 'grace@v2.com'
+        user.save()
+        user.email = 'grace@v3.com'
+        user.save()
+        self.assertEqual(EmailChangeRecord.objects.filter(user=user).count(), 2)
+
+    def test_records_deleted_with_user(self):
+        from change_tracking.models import EmailChangeRecord
+        user = User.objects.create_user('henry', email='henry@old.com', password='pass')
+        user.email = 'henry@new.com'
+        user.save()
+        user.delete()
+        self.assertEqual(EmailChangeRecord.objects.count(), 0)
@@ -0,0 +1,99 @@
+#!/bin/bash
+# Usage: ./benchmark/compare.sh results/naked.txt results/supermodel.txt
+# Can also be run standalone after a benchmark run.
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+NAKED_LOG="${1:-$SCRIPT_DIR/results/naked.txt}"
+SUPERMODEL_LOG="${2:-$SCRIPT_DIR/results/supermodel.txt}"
+
+if [[ ! -f "$NAKED_LOG" ]]; then
+  echo "error: naked log not found: $NAKED_LOG" >&2
+  exit 1
+fi
+if [[ ! -f "$SUPERMODEL_LOG" ]]; then
+  echo "error: supermodel log not found: $SUPERMODEL_LOG" >&2
+  exit 1
+fi
+
+# ── Helpers ───────────────────────────────────────────────────────────────────
+
+extract_tests() {
+  local log="$1"
+  # Django test runner outputs lines like:
+  #   Ran 15 tests in 0.123s
+  #   OK  or  FAILED (failures=2, errors=1)
+  local ran errors failures
+  ran=$(grep -oP 'Ran \K[0-9]+' "$log" 2>/dev/null | tail -1 || echo "?")
+  failures=$(grep -oP 'failures=\K[0-9]+' "$log" 2>/dev/null | tail -1 || echo "0")
+  errors=$(grep -oP 'errors=\K[0-9]+' "$log" 2>/dev/null | tail -1 || echo "0")
+  local status="PASS"
+  if grep -q 'FAILED' "$log" 2>/dev/null; then
+    status="FAIL"
+  fi
+  echo "$ran tests | $status | failures=$failures errors=$errors"
+}
+
+extract_cost() {
+  local log="$1"
+  # Claude Code stream-json emits a final result object with costUSD.
+  # Try several patterns in order of specificity.
+  local cost
+  cost=$(grep -oP '"costUSD"\s*:\s*\K[0-9.]+' "$log" 2>/dev/null | tail -1) && { echo "\$$cost"; return; }
+  cost=$(grep -oP '"cost_usd"\s*:\s*\K[0-9.]+' "$log" 2>/dev/null | tail -1) && { echo "\$$cost"; return; }
+  cost=$(grep -oP 'Total cost[^0-9]*\K[0-9.]+' "$log" 2>/dev/null | tail -1) && { echo "\$$cost"; return; }
+  echo "(not found — check log for token counts)"
+}
+
+extract_tokens() {
+  local log="$1"
+  local input output
+  input=$(grep -oP '"input_tokens"\s*:\s*\K[0-9]+' "$log" 2>/dev/null | \
+    awk '{s+=$1} END {print s+0}')
+  output=$(grep -oP '"output_tokens"\s*:\s*\K[0-9]+' "$log" 2>/dev/null | \
+    awk '{s+=$1} END {print s+0}')
+  echo "in=${input:-?} out=${output:-?}"
+}
+
+# ── Report ────────────────────────────────────────────────────────────────────
+
+printf '\n'
+printf '%-26s  %-20s  %-20s\n' "" "naked" "supermodel"
+printf '%-26s  %-20s  %-20s\n' "$(printf '%0.s─' {1..26})" "$(printf '%0.s─' {1..20})" "$(printf '%0.s─' {1..20})"
+printf '%-26s  %-20s  %-20s\n' "Tests"      "$(extract_tests "$NAKED_LOG")"     "$(extract_tests "$SUPERMODEL_LOG")"
+printf '%-26s  %-20s  %-20s\n' "API cost"   "$(extract_cost "$NAKED_LOG")"      "$(extract_cost "$SUPERMODEL_LOG")"
+printf '%-26s  %-20s  %-20s\n' "Tokens"     "$(extract_tokens "$NAKED_LOG")"    "$(extract_tokens "$SUPERMODEL_LOG")"
+printf '\n'
+
+# ── Diff feature test outcomes ────────────────────────────────────────────────
+
+naked_priority_pass=$(grep -c 'PriorityFeature.*ok\|ok.*PriorityFeature' "$NAKED_LOG" 2>/dev/null || \
+  (grep 'PriorityFeature' "$NAKED_LOG" | grep -c 'ok' || echo 0))
+sm_priority_pass=$(grep -c 'PriorityFeature.*ok\|ok.*PriorityFeature' "$SUPERMODEL_LOG" 2>/dev/null || \
+  (grep 'PriorityFeature' "$SUPERMODEL_LOG" | grep -c 'ok' || echo 0))
+
+echo "Priority feature tests (naked):      $naked_priority_pass / 8 passing"
+echo "Priority feature tests (supermodel): $sm_priority_pass / 8 passing"
+echo
+
+# ── Show cost delta ───────────────────────────────────────────────────────────
+
+naked_cost=$(grep -oP '"costUSD"\s*:\s*\K[0-9.]+' "$NAKED_LOG" 2>/dev/null | tail -1 || echo "")
+sm_cost=$(grep -oP '"costUSD"\s*:\s*\K[0-9.]+' "$SUPERMODEL_LOG" 2>/dev/null | tail -1 || echo "")
+
+if [[ -n "$naked_cost" && -n "$sm_cost" ]]; then
+  python3 - <<PYEOF
+naked = float("$naked_cost")
+sm    = float("$sm_cost")
+delta = naked - sm
+pct   = (delta / naked * 100) if naked > 0 else 0
+sign  = "cheaper" if delta > 0 else "more expensive"
+print(f"supermodel was \${abs(delta):.4f} ({abs(pct):.1f}%) {sign} than naked")
+PYEOF
+fi
+
+echo
+echo "Full logs:"
+echo "  naked:      $NAKED_LOG"
+echo "  supermodel: $SUPERMODEL_LOG"
@@ -0,0 +1,22 @@
+#!/usr/bin/env python
+"""Django's command-line utility for administrative tasks."""
+import os
+import sys
+
+
+def main():
+    """Run administrative tasks."""
+    os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'mysite.settings')
+    try:
+        from django.core.management import execute_from_command_line
+    except ImportError as exc:
+        raise ImportError(
+            "Couldn't import Django. Are you sure it's installed and "
+            "available on your PYTHONPATH environment variable? Did you "
+            "forget to activate a virtual environment?"
+        ) from exc
+    execute_from_command_line(sys.argv)
+
+
+if __name__ == '__main__':
+    main()
@@ -0,0 +1,30 @@
+from pathlib import Path
+
+BASE_DIR = Path(__file__).resolve().parent.parent
+
+SECRET_KEY = 'django-insecure-benchmark-key-not-for-production'
+
+DEBUG = True
+
+ALLOWED_HOSTS = ['*']
+
+INSTALLED_APPS = [
+    'django.contrib.contenttypes',
+    'django.contrib.auth',
+    'todos',
+]
+
+MIDDLEWARE = [
+    'django.middleware.common.CommonMiddleware',
+]
+
+ROOT_URLCONF = 'mysite.urls'
+
+DATABASES = {
+    'default': {
+        'ENGINE': 'django.db.backends.sqlite3',
+        'NAME': BASE_DIR / 'db.sqlite3',
+    }
+}
+
+DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
@@ -0,0 +1,5 @@
+from django.urls import path, include
+
+urlpatterns = [
+    path('api/', include('todos.urls')),
+]
@@ -0,0 +1,5 @@
+import os
+from django.core.wsgi import get_wsgi_application
+
+os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'mysite.settings')
+application = get_wsgi_application()
@@ -0,0 +1 @@
+Django==5.0.4
@@ -0,0 +1,4 @@
+from django.contrib import admin
+from .models import Todo
+
+admin.site.register(Todo)
@@ -0,0 +1,6 @@
+from django.apps import AppConfig
+
+
+class TodosConfig(AppConfig):
+    default_auto_field = 'django.db.models.BigAutoField'
+    name = 'todos'
@@ -0,0 +1,13 @@
+from django.db import models
+
+
+class Todo(models.Model):
+    title = models.CharField(max_length=200)
+    completed = models.BooleanField(default=False)
+    created_at = models.DateTimeField(auto_now_add=True)
+
+    def __str__(self):
+        return self.title
+
+    class Meta:
+        ordering = ['-created_at']