From 1d51414f2434f51938fb2499676d8208073feed0 Mon Sep 17 00:00:00 2001 From: Jaap de Haan <261428+jdehaan@users.noreply.github.com> Date: Sun, 17 May 2026 15:15:03 +0200 Subject: [PATCH] fix: Generate and check dependencies --- .gitignore | 1 + DEPENDENCIES.md | 4 +- Makefile | 9 +- tools/gen_deps_doc.py | 127 --------------- tools/hah-deps/Cargo.toml | 16 ++ tools/hah-deps/src/main.rs | 313 +++++++++++++++++++++++++++++++++++++ 6 files changed, 338 insertions(+), 132 deletions(-) delete mode 100644 tools/gen_deps_doc.py create mode 100644 tools/hah-deps/Cargo.toml create mode 100644 tools/hah-deps/src/main.rs diff --git a/.gitignore b/.gitignore index e27ef70..f247f64 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /target /tools/hah-metrics/target +/tools/hah-deps/target Cargo.lock diff --git a/DEPENDENCIES.md b/DEPENDENCIES.md index be949e0..8788aab 100644 --- a/DEPENDENCIES.md +++ b/DEPENDENCIES.md @@ -12,12 +12,12 @@ _Generated by `make doc-dependencies` — do not edit by hand._ | [clap](https://crates.io/crates/clap) | 4 | MIT OR Apache-2.0 | A simple to use, efficient, and full-featured Command Line Argument Parser | | [colored](https://crates.io/crates/colored) | 2 | MPL-2.0 | The most simple way to add colors in your terminal | | [dirs](https://crates.io/crates/dirs) | 5 | MIT OR Apache-2.0 | A tiny low-level library that provides platform-specific standard locations of directories for config, cache and other data on Linux, Windows, macOS and Redox by leveraging the mechanisms defined by the XDG base/user directory specifications on Linux, the Known Folder API on Windows, and the Standard Directory guidelines on macOS | -| [regex](https://crates.io/crates/regex) | 1 | MIT OR Apache-2.0 | An implementation of regular expressions for Rust. This implementation uses -finite automata and guarantees linear time matching on all inputs | +| [regex](https://crates.io/crates/regex) | 1 | MIT OR Apache-2.0 | An implementation of regular expressions for Rust. This implementation uses finite automata and guarantees linear time matching on all inputs | | [serde](https://crates.io/crates/serde) | 1 | MIT OR Apache-2.0 | A generic serialization/deserialization framework | | [serde_json](https://crates.io/crates/serde_json) | 1 | MIT OR Apache-2.0 | A JSON serialization file format | | [serde_yaml_ng](https://crates.io/crates/serde_yaml_ng) | 0.9 | MIT OR Apache-2.0 | YAML data format for Serde | | [walkdir](https://crates.io/crates/walkdir) | 2 | Unlicense/MIT | Recursively walk a directory | +| [winnow](https://crates.io/crates/winnow) | 1 | MIT | A byte-oriented, zero-copy, parser combinators library | ## Development-only Dependencies diff --git a/Makefile b/Makefile index 5fee1f9..a58c8c9 100644 --- a/Makefile +++ b/Makefile @@ -12,8 +12,8 @@ setup: rustup component add llvm-tools-preview clippy rustfmt cargo install cargo-audit cargo install cargo-llvm-cov - python3 --version cargo build --manifest-path tools/hah-metrics/Cargo.toml --release + cargo build --manifest-path tools/hah-deps/Cargo.toml --release fmt: cargo fmt --all @@ -42,7 +42,10 @@ metrics: --max-length $(METRIC_MAX_LENGTH) doc-dependencies: - python3 tools/gen_deps_doc.py > DEPENDENCIES.md + cargo run --manifest-path tools/hah-deps/Cargo.toml --release --quiet > DEPENDENCIES.md -check: fmt-check lint test audit coverage-ci metrics +check-dependencies: + cargo run --manifest-path tools/hah-deps/Cargo.toml --release --quiet -- --check + +check: fmt-check lint test audit coverage-ci metrics check-dependencies diff --git a/tools/gen_deps_doc.py b/tools/gen_deps_doc.py deleted file mode 100644 index 43e56f0..0000000 --- a/tools/gen_deps_doc.py +++ /dev/null @@ -1,127 +0,0 @@ -#!/usr/bin/env python3 -"""Generate DEPENDENCIES.md from cargo metadata. - -Writes to stdout; redirect to DEPENDENCIES.md: - - python3 tools/gen_deps_doc.py > DEPENDENCIES.md - -or via the Makefile target: - - make doc-dependencies -""" - -import json -import subprocess -import sys -from collections import defaultdict - - -def req_display(req: str) -> str: - """Convert a cargo version requirement to a short display string. - - '^0.9.35' -> '0.9', '^1' -> '1', '^0.0.12' -> '0.0' - """ - s = req.lstrip("^~=>< ") - # Drop any trailing comma-separated constraints (e.g. ">=1.0, <2.0") - s = s.split(",")[0].strip() - parts = s.split(".") - if parts[0] == "0" and len(parts) >= 2: - return f"0.{parts[1]}" - return parts[0] - - -def main() -> None: - meta = json.loads( - subprocess.check_output( - ["cargo", "metadata", "--format-version", "1"], - stderr=subprocess.DEVNULL, - ) - ) - - workspace_member_ids: set[str] = set(meta["workspace_members"]) - pkgs_by_id: dict[str, dict] = {p["id"]: p for p in meta["packages"]} - workspace_names: set[str] = {pkgs_by_id[wid]["name"] for wid in workspace_member_ids} - - # Which deps are declared optional in each workspace member - optional_in: dict[str, set[str]] = {} - req_in: dict[str, dict[str, str]] = {} - for wid in workspace_member_ids: - pkg = pkgs_by_id[wid] - wm_name = pkg["name"] - optional_in[wm_name] = {d["name"] for d in pkg["dependencies"] if d.get("optional", False)} - req_in[wm_name] = {d["name"]: d["req"] for d in pkg["dependencies"]} - - # Collect direct external deps across all workspace members. - # A dep is "runtime" if it appears as kind=None (normal) AND is NOT optional - # in the declaring member. Everything else is "dev-only". - direct: dict[str, dict] = {} - - for node in meta["resolve"]["nodes"]: - if node["id"] not in workspace_member_ids: - continue - wm_name = pkgs_by_id[node["id"]]["name"] - - for dep in node["deps"]: - dep_pkg = pkgs_by_id[dep["pkg"]] - dep_name = dep_pkg["name"] - if dep_name in workspace_names: - continue # skip internal workspace crates - - if dep_name not in direct: - direct[dep_name] = { - "runtime": False, - "req": req_in[wm_name].get(dep_name, "?"), - "pkg": dep_pkg, - } - - for kind_info in dep["dep_kinds"]: - if kind_info["kind"] is None and dep_name not in optional_in[wm_name]: - direct[dep_name]["runtime"] = True - - runtime = sorted( - [(n, d) for n, d in direct.items() if d["runtime"]], - key=lambda x: x[0].lower(), - ) - dev_only = sorted( - [(n, d) for n, d in direct.items() if not d["runtime"]], - key=lambda x: x[0].lower(), - ) - - def row(name: str, info: dict) -> str: - pkg = info["pkg"] - ver = req_display(info["req"]) - lic = (pkg.get("license") or "unknown").strip() - desc = (pkg.get("description") or "").strip().rstrip(".") - url = f"https://crates.io/crates/{name}" - return f"| [{name}]({url}) | {ver} | {lic} | {desc} |" - - col_header = "| Crate | Version | License | Purpose |\n| ----- | ------- | ------- | ------- |" - - lines = [ - "# Dependencies", - "", - "Direct dependencies of the HaH workspace crates.", - "_Generated by `make doc-dependencies` — do not edit by hand._", - "", - "## Runtime Dependencies", - "", - col_header, - ] - for name, info in runtime: - lines.append(row(name, info)) - - lines += [ - "", - "## Development-only Dependencies", - "", - col_header, - ] - for name, info in dev_only: - lines.append(row(name, info)) - - lines.append("") - sys.stdout.write("\n".join(lines)) - - -if __name__ == "__main__": - main() diff --git a/tools/hah-deps/Cargo.toml b/tools/hah-deps/Cargo.toml new file mode 100644 index 0000000..77281a6 --- /dev/null +++ b/tools/hah-deps/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "hah-deps" +version = "0.1.0" +edition = "2021" + +[workspace] + +[[bin]] +name = "hah-deps" +path = "src/main.rs" + +[dependencies] +anyhow = "1" +clap = { version = "4", features = ["derive"] } +serde = { version = "1", features = ["derive"] } +serde_json = "1" diff --git a/tools/hah-deps/src/main.rs b/tools/hah-deps/src/main.rs new file mode 100644 index 0000000..931001b --- /dev/null +++ b/tools/hah-deps/src/main.rs @@ -0,0 +1,313 @@ +//! Generate DEPENDENCIES.md from `cargo metadata`. +//! +//! Usage: +//! hah-deps # writes to stdout +//! hah-deps --check # exits 1 if output differs from DEPENDENCIES.md + +use std::collections::{HashMap, HashSet}; +use std::process::Command; + +use anyhow::{Context, Result, anyhow}; +use clap::Parser; +use serde::Deserialize; + +// ── CLI ─────────────────────────────────────────────────────────────────────── + +#[derive(Parser)] +#[command(name = "hah-deps", about = "Generate DEPENDENCIES.md from cargo metadata")] +struct Args { + /// Instead of printing, verify that DEPENDENCIES.md matches the generated output. + /// Exits with code 1 and a diff summary if they differ. + #[arg(long)] + check: bool, + + /// Path to DEPENDENCIES.md (only used with --check) + #[arg(long, default_value = "DEPENDENCIES.md")] + file: String, +} + +// ── Cargo metadata types ────────────────────────────────────────────────────── + +#[derive(Deserialize)] +struct Metadata { + workspace_members: Vec, + packages: Vec, + resolve: Resolve, +} + +#[derive(Deserialize)] +struct Package { + id: String, + name: String, + license: Option, + description: Option, + dependencies: Vec, +} + +#[derive(Deserialize)] +struct Dependency { + name: String, + req: String, + #[serde(default)] + optional: bool, +} + +#[derive(Deserialize)] +struct Resolve { + nodes: Vec, +} + +#[derive(Deserialize)] +struct Node { + id: String, + deps: Vec, +} + +#[derive(Deserialize)] +struct NodeDep { + pkg: String, + dep_kinds: Vec, +} + +#[derive(Deserialize)] +struct DepKind { + kind: Option, // null = normal, "dev", "build" +} + +// ── Version display ─────────────────────────────────────────────────────────── + +/// Convert a cargo version requirement to a short display string. +/// `^0.9.35` → `0.9`, `^1` → `1`, `^0.0.12` → `0.0` +fn req_display(req: &str) -> String { + let s = req.trim_start_matches(|c: char| "^~=><".contains(c) || c == ' '); + let s = s.split(',').next().unwrap_or(s).trim(); + let parts: Vec<&str> = s.split('.').collect(); + if parts.first().copied() == Some("0") && parts.len() >= 2 { + format!("0.{}", parts[1]) + } else { + parts[0].to_string() + } +} + +// ── Description sanitization ────────────────────────────────────────────────── + +/// Collapse whitespace/newlines and strip a trailing period so the description +/// fits cleanly in a single Markdown table cell. +fn sanitize_description(raw: &str) -> String { + raw.split_whitespace() + .collect::>() + .join(" ") + .trim_end_matches('.') + .to_string() +} + +// ── Dependency collection ───────────────────────────────────────────────────── + +struct DepInfo { + runtime: bool, + req: String, + license: String, + description: String, +} + +fn collect_deps(meta: &Metadata) -> HashMap { + let workspace_ids: HashSet<&str> = meta.workspace_members.iter().map(String::as_str).collect(); + let pkgs_by_id: HashMap<&str, &Package> = + meta.packages.iter().map(|p| (p.id.as_str(), p)).collect(); + let workspace_names: HashSet<&str> = workspace_ids + .iter() + .filter_map(|id| pkgs_by_id.get(id)) + .map(|p| p.name.as_str()) + .collect(); + + // Per-workspace-member: which dep names are optional / what req was declared + let mut optional_in: HashMap<&str, HashSet<&str>> = HashMap::new(); + let mut req_in: HashMap<&str, HashMap<&str, &str>> = HashMap::new(); + for id in &workspace_ids { + if let Some(pkg) = pkgs_by_id.get(id) { + optional_in.insert( + pkg.name.as_str(), + pkg.dependencies + .iter() + .filter(|d| d.optional) + .map(|d| d.name.as_str()) + .collect(), + ); + req_in.insert( + pkg.name.as_str(), + pkg.dependencies + .iter() + .map(|d| (d.name.as_str(), d.req.as_str())) + .collect(), + ); + } + } + + let mut direct: HashMap = HashMap::new(); + + for node in &meta.resolve.nodes { + if !workspace_ids.contains(node.id.as_str()) { + continue; + } + let wm_name = match pkgs_by_id.get(node.id.as_str()) { + Some(p) => p.name.as_str(), + None => continue, + }; + + for dep in &node.deps { + let dep_pkg = match pkgs_by_id.get(dep.pkg.as_str()) { + Some(p) => p, + None => continue, + }; + if workspace_names.contains(dep_pkg.name.as_str()) { + continue; // skip internal workspace crates + } + + let is_optional = optional_in + .get(wm_name) + .map(|s| s.contains(dep_pkg.name.as_str())) + .unwrap_or(false); + + let is_normal_runtime = dep + .dep_kinds + .iter() + .any(|k| k.kind.is_none() && !is_optional); + + let entry = direct.entry(dep_pkg.name.clone()).or_insert_with(|| { + let req = req_in + .get(wm_name) + .and_then(|m| m.get(dep_pkg.name.as_str())) + .copied() + .unwrap_or("?"); + DepInfo { + runtime: false, + req: req_display(req), + license: dep_pkg + .license + .as_deref() + .unwrap_or("unknown") + .trim() + .to_string(), + description: sanitize_description( + dep_pkg.description.as_deref().unwrap_or(""), + ), + } + }); + + if is_normal_runtime { + entry.runtime = true; + } + } + } + + direct +} + +// ── Markdown rendering ──────────────────────────────────────────────────────── + +fn table_header() -> &'static str { + "| Crate | Version | License | Purpose |\n| ----- | ------- | ------- | ------- |" +} + +fn table_row(name: &str, info: &DepInfo) -> String { + format!( + "| [{}](https://crates.io/crates/{}) | {} | {} | {} |", + name, name, info.req, info.license, info.description + ) +} + +fn render(deps: &HashMap) -> String { + let mut runtime: Vec<(&str, &DepInfo)> = deps + .iter() + .filter(|(_, d)| d.runtime) + .map(|(n, d)| (n.as_str(), d)) + .collect(); + runtime.sort_by_key(|(n, _)| n.to_ascii_lowercase()); + + let mut dev_only: Vec<(&str, &DepInfo)> = deps + .iter() + .filter(|(_, d)| !d.runtime) + .map(|(n, d)| (n.as_str(), d)) + .collect(); + dev_only.sort_by_key(|(n, _)| n.to_ascii_lowercase()); + + let mut lines: Vec = vec![ + "# Dependencies".into(), + String::new(), + "Direct dependencies of the HaH workspace crates.".into(), + "_Generated by `make doc-dependencies` — do not edit by hand._".into(), + String::new(), + "## Runtime Dependencies".into(), + String::new(), + table_header().into(), + ]; + for (name, info) in &runtime { + lines.push(table_row(name, info)); + } + + lines.push(String::new()); + lines.push("## Development-only Dependencies".into()); + lines.push(String::new()); + lines.push(table_header().into()); + for (name, info) in &dev_only { + lines.push(table_row(name, info)); + } + + lines.push(String::new()); + lines.join("\n") +} + +// ── Entry point ─────────────────────────────────────────────────────────────── + +fn main() -> Result<()> { + let args = Args::parse(); + + let output = Command::new("cargo") + .args(["metadata", "--format-version", "1"]) + .stderr(std::process::Stdio::null()) + .output() + .context("failed to run `cargo metadata`")?; + + if !output.status.success() { + return Err(anyhow!("`cargo metadata` exited with {}", output.status)); + } + + let meta: Metadata = + serde_json::from_slice(&output.stdout).context("failed to parse cargo metadata JSON")?; + + let deps = collect_deps(&meta); + let generated = render(&deps); + + if args.check { + let on_disk = std::fs::read_to_string(&args.file) + .with_context(|| format!("failed to read {}", args.file))?; + if generated == on_disk { + eprintln!("{} is up to date.", args.file); + return Ok(()); + } + eprintln!( + "ERROR: {} is out of date. Run `make doc-dependencies` to regenerate.", + args.file + ); + // Print a simple line-diff summary + let old: Vec<&str> = on_disk.lines().collect(); + let new: Vec<&str> = generated.lines().collect(); + for (i, (a, b)) in old.iter().zip(new.iter()).enumerate() { + if a != b { + eprintln!(" line {}: -{}", i + 1, a); + eprintln!(" line {}: +{}", i + 1, b); + } + } + if old.len() != new.len() { + eprintln!( + " (line count differs: {} vs {})", + old.len(), + new.len() + ); + } + std::process::exit(1); + } + + print!("{}", generated); + Ok(()) +}