From 6a4c8eeb83c0e1888d73f6a9e3adc69fc9c3c9ae Mon Sep 17 00:00:00 2001 From: Austin Gill Date: Thu, 19 Feb 2026 17:54:15 -0600 Subject: [PATCH 1/7] Add depquery tool --- crates/csvizmo-depgraph/src/algorithm/mod.rs | 1 + .../src/algorithm/query/edges.rs | 346 +++++++++++ .../src/algorithm/query/metrics.rs | 300 ++++++++++ .../src/algorithm/query/mod.rs | 18 + .../src/algorithm/query/nodes.rs | 538 ++++++++++++++++++ crates/csvizmo-depgraph/src/bin/depquery.rs | 117 ++++ crates/csvizmo-depgraph/tests/depquery.rs | 466 +++++++++++++++ 7 files changed, 1786 insertions(+) create mode 100644 crates/csvizmo-depgraph/src/algorithm/query/edges.rs create mode 100644 crates/csvizmo-depgraph/src/algorithm/query/metrics.rs create mode 100644 crates/csvizmo-depgraph/src/algorithm/query/mod.rs create mode 100644 crates/csvizmo-depgraph/src/algorithm/query/nodes.rs create mode 100644 crates/csvizmo-depgraph/src/bin/depquery.rs create mode 100644 crates/csvizmo-depgraph/tests/depquery.rs diff --git a/crates/csvizmo-depgraph/src/algorithm/mod.rs b/crates/csvizmo-depgraph/src/algorithm/mod.rs index e816e11..4ef2c4a 100644 --- a/crates/csvizmo-depgraph/src/algorithm/mod.rs +++ b/crates/csvizmo-depgraph/src/algorithm/mod.rs @@ -3,6 +3,7 @@ pub mod cycles; pub mod filter; pub mod flatten; pub mod merge; +pub mod query; pub mod reverse; pub mod select; pub mod shorten; diff --git a/crates/csvizmo-depgraph/src/algorithm/query/edges.rs b/crates/csvizmo-depgraph/src/algorithm/query/edges.rs new file mode 100644 index 0000000..1db9333 --- /dev/null +++ b/crates/csvizmo-depgraph/src/algorithm/query/edges.rs @@ -0,0 +1,346 @@ +use clap::Parser; + +use super::OutputFields; +use crate::DepGraph; +use crate::algorithm::{MatchKey, build_globset}; + +#[derive(Debug, Default, Clone, Copy, clap::ValueEnum)] +pub enum EdgeSort { + #[default] + None, + Source, + Target, +} + +impl std::fmt::Display for EdgeSort { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use clap::ValueEnum; + + f.write_str(self.to_possible_value().unwrap().get_name()) + } +} + +#[derive(Clone, Debug, Default, Parser)] +pub struct EdgesArgs { + /// Include edges where source OR target matches (repeatable, OR by default) + #[clap(short = 'g', long)] + pub include: Vec, + + /// Exclude edges where source OR target matches (repeatable, OR) + #[clap(short = 'x', long)] + pub exclude: Vec, + + /// Combine include patterns with AND instead of OR + #[clap(long)] + pub and: bool, + + /// What patterns match against + #[clap(long, default_value_t = MatchKey::default())] + pub key: MatchKey, + + /// Sort order + #[clap(long, default_value_t = EdgeSort::None)] + pub sort: EdgeSort, + + /// Reverse the sort order + #[clap(short = 'r', long)] + pub reverse: bool, + + /// Show only first N results (applied after sort) + #[clap(short = 'n', long)] + pub limit: Option, + + /// What to print for endpoints + #[clap(long, default_value_t = OutputFields::Label)] + pub format: OutputFields, +} + +/// Returns (source_display, target_display, edge_label) tuples. +pub fn edges( + graph: &DepGraph, + args: &EdgesArgs, +) -> eyre::Result)>> { + let all_nodes = graph.all_nodes(); + let all_edges = graph.all_edges(); + + let include_set = if !args.include.is_empty() { + Some(build_globset(&args.include)?) + } else { + None + }; + + let exclude_set = if !args.exclude.is_empty() { + Some(build_globset(&args.exclude)?) + } else { + None + }; + + let mut result: Vec<(String, String, Option)> = Vec::new(); + + for edge in all_edges { + let from_info = all_nodes.get(&edge.from); + let to_info = all_nodes.get(&edge.to); + + // Skip edges with dangling endpoints + let (from_info, to_info) = match (from_info, to_info) { + (Some(f), Some(t)) => (f, t), + _ => continue, + }; + + let from_text = match args.key { + MatchKey::Id => edge.from.as_str(), + MatchKey::Label => from_info.label.as_str(), + }; + let to_text = match args.key { + MatchKey::Id => edge.to.as_str(), + MatchKey::Label => to_info.label.as_str(), + }; + + // Include filter: edge included if source OR target matches + if let Some(ref include) = include_set { + let source_match = if args.and { + include.matches(from_text).len() == args.include.len() + } else { + include.is_match(from_text) + }; + let target_match = if args.and { + include.matches(to_text).len() == args.include.len() + } else { + include.is_match(to_text) + }; + if !source_match && !target_match { + continue; + } + } + + // Exclude filter: edge excluded if source OR target matches + if let Some(ref exclude) = exclude_set + && (exclude.is_match(from_text) || exclude.is_match(to_text)) + { + continue; + } + + let source_display = match args.format { + OutputFields::Id => edge.from.clone(), + OutputFields::Label => from_info.label.clone(), + }; + let target_display = match args.format { + OutputFields::Id => edge.to.clone(), + OutputFields::Label => to_info.label.clone(), + }; + + result.push((source_display, target_display, edge.label.clone())); + } + + // Sort + match args.sort { + EdgeSort::None => { + if args.reverse { + result.reverse(); + } + } + EdgeSort::Source => { + result.sort_by(|a, b| a.0.cmp(&b.0).then_with(|| a.1.cmp(&b.1))); + if args.reverse { + result.reverse(); + } + } + EdgeSort::Target => { + result.sort_by(|a, b| a.1.cmp(&b.1).then_with(|| a.0.cmp(&b.0))); + if args.reverse { + result.reverse(); + } + } + } + + // Limit + if let Some(limit) = args.limit { + result.truncate(limit); + } + + Ok(result) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{Edge, NodeInfo}; + + fn make_graph(nodes: &[(&str, &str)], edges: &[(&str, &str)]) -> DepGraph { + DepGraph { + nodes: nodes + .iter() + .map(|(id, label)| (id.to_string(), NodeInfo::new(*label))) + .collect(), + edges: edges + .iter() + .map(|(from, to)| Edge { + from: from.to_string(), + to: to.to_string(), + ..Default::default() + }) + .collect(), + ..Default::default() + } + } + + fn make_graph_with_labels( + nodes: &[(&str, &str)], + edge_specs: &[(&str, &str, Option<&str>)], + ) -> DepGraph { + DepGraph { + nodes: nodes + .iter() + .map(|(id, label)| (id.to_string(), NodeInfo::new(*label))) + .collect(), + edges: edge_specs + .iter() + .map(|(from, to, label)| Edge { + from: from.to_string(), + to: to.to_string(), + label: label.map(|l| l.to_string()), + ..Default::default() + }) + .collect(), + ..Default::default() + } + } + + fn pairs(result: &[(String, String, Option)]) -> Vec<(&str, &str)> { + result + .iter() + .map(|(s, t, _)| (s.as_str(), t.as_str())) + .collect() + } + + #[test] + fn all_edges_default() { + let g = make_graph( + &[("a", "A"), ("b", "B"), ("c", "C")], + &[("a", "b"), ("b", "c")], + ); + let result = edges(&g, &EdgesArgs::default()).unwrap(); + assert_eq!(pairs(&result), vec![("A", "B"), ("B", "C")]); + } + + #[test] + fn include_filter() { + let g = make_graph( + &[("a", "alpha"), ("b", "beta"), ("c", "gamma")], + &[("a", "b"), ("b", "c"), ("a", "c")], + ); + let args = EdgesArgs { + include: vec!["alpha".to_string()], + ..Default::default() + }; + let result = edges(&g, &args).unwrap(); + // edges where source or target is "alpha": a->b, a->c + assert_eq!(pairs(&result), vec![("alpha", "beta"), ("alpha", "gamma")]); + } + + #[test] + fn exclude_filter() { + let g = make_graph( + &[("a", "alpha"), ("b", "beta"), ("c", "gamma")], + &[("a", "b"), ("b", "c"), ("a", "c")], + ); + let args = EdgesArgs { + exclude: vec!["beta".to_string()], + ..Default::default() + }; + let result = edges(&g, &args).unwrap(); + // exclude edges touching beta: a->b and b->c removed, only a->c remains + assert_eq!(pairs(&result), vec![("alpha", "gamma")]); + } + + #[test] + fn sort_by_source() { + let g = make_graph( + &[("a", "C"), ("b", "A"), ("c", "B")], + &[("a", "b"), ("c", "a"), ("b", "c")], + ); + let args = EdgesArgs { + sort: EdgeSort::Source, + ..Default::default() + }; + let result = edges(&g, &args).unwrap(); + assert_eq!(pairs(&result), vec![("A", "B"), ("B", "C"), ("C", "A")]); + } + + #[test] + fn sort_by_target() { + let g = make_graph( + &[("a", "C"), ("b", "A"), ("c", "B")], + &[("a", "b"), ("c", "a"), ("b", "c")], + ); + let args = EdgesArgs { + sort: EdgeSort::Target, + ..Default::default() + }; + let result = edges(&g, &args).unwrap(); + assert_eq!(pairs(&result), vec![("C", "A"), ("A", "B"), ("B", "C")]); + } + + #[test] + fn format_id() { + let g = make_graph(&[("a", "Alpha"), ("b", "Beta")], &[("a", "b")]); + let args = EdgesArgs { + format: OutputFields::Id, + ..Default::default() + }; + let result = edges(&g, &args).unwrap(); + assert_eq!(pairs(&result), vec![("a", "b")]); + } + + #[test] + fn edge_label_included() { + let g = make_graph_with_labels(&[("a", "A"), ("b", "B")], &[("a", "b", Some("depends"))]); + let result = edges(&g, &EdgesArgs::default()).unwrap(); + assert_eq!(result[0].2.as_deref(), Some("depends")); + } + + #[test] + fn limit_edges() { + let g = make_graph( + &[("a", "A"), ("b", "B"), ("c", "C")], + &[("a", "b"), ("b", "c"), ("a", "c")], + ); + let args = EdgesArgs { + limit: Some(2), + ..Default::default() + }; + let result = edges(&g, &args).unwrap(); + assert_eq!(result.len(), 2); + } + + #[test] + fn reverse_edges() { + let g = make_graph( + &[("a", "A"), ("b", "B"), ("c", "C")], + &[("a", "b"), ("b", "c")], + ); + let args = EdgesArgs { + reverse: true, + ..Default::default() + }; + let result = edges(&g, &args).unwrap(); + assert_eq!(pairs(&result), vec![("B", "C"), ("A", "B")]); + } + + #[test] + fn skips_dangling_edges() { + let g = DepGraph { + nodes: [("a".to_string(), NodeInfo::new("A"))] + .into_iter() + .collect(), + edges: vec![Edge { + from: "a".to_string(), + to: "missing".to_string(), + ..Default::default() + }], + ..Default::default() + }; + let result = edges(&g, &EdgesArgs::default()).unwrap(); + assert!(result.is_empty()); + } +} diff --git a/crates/csvizmo-depgraph/src/algorithm/query/metrics.rs b/crates/csvizmo-depgraph/src/algorithm/query/metrics.rs new file mode 100644 index 0000000..2cee7e9 --- /dev/null +++ b/crates/csvizmo-depgraph/src/algorithm/query/metrics.rs @@ -0,0 +1,300 @@ +use std::collections::HashSet; + +use petgraph::Direction; +use petgraph::algo::{connected_components, tarjan_scc, toposort}; + +use crate::{DepGraph, FlatGraphView}; + +#[derive(Debug)] +pub struct GraphMetrics { + pub nodes: usize, + pub edges: usize, + pub roots: usize, + pub leaves: usize, + pub max_depth: Option, + pub max_fan_out: usize, + pub max_fan_in: usize, + pub avg_fan_out: f64, + pub density: f64, + pub cycles: usize, + pub diamonds: usize, + pub components: usize, +} + +pub fn metrics(graph: &DepGraph) -> GraphMetrics { + let view = FlatGraphView::new(graph); + let node_count = view.pg.node_count(); + let edge_count = view.pg.edge_count(); + + let mut roots = 0usize; + let mut leaves = 0usize; + let mut max_fan_out = 0usize; + let mut max_fan_in = 0usize; + + for idx in view.pg.node_indices() { + let in_deg = view.pg.neighbors_directed(idx, Direction::Incoming).count(); + let out_deg = view.pg.neighbors_directed(idx, Direction::Outgoing).count(); + if in_deg == 0 { + roots += 1; + } + if out_deg == 0 { + leaves += 1; + } + max_fan_in = max_fan_in.max(in_deg); + max_fan_out = max_fan_out.max(out_deg); + } + + let avg_fan_out = if node_count > 0 { + edge_count as f64 / node_count as f64 + } else { + 0.0 + }; + + let density = if node_count > 1 { + edge_count as f64 / (node_count as f64 * (node_count as f64 - 1.0)) + } else { + 0.0 + }; + + // Cycles: count SCCs with 2+ nodes + let sccs = tarjan_scc(&view.pg); + let cycle_count = sccs.iter().filter(|scc| scc.len() >= 2).count(); + + // Max depth: longest path from any root to any leaf via topo-order DP + let max_depth = if cycle_count > 0 { + None + } else { + match toposort(&view.pg, None) { + Ok(sorted) => { + let mut dist = vec![0usize; node_count]; + let mut max_d = 0usize; + for &node in &sorted { + let d = dist[node.index()]; + for neighbor in view.pg.neighbors_directed(node, Direction::Outgoing) { + let nd = d + 1; + if nd > dist[neighbor.index()] { + dist[neighbor.index()] = nd; + } + max_d = max_d.max(nd); + } + } + Some(max_d) + } + Err(_) => None, + } + }; + + // Weakly connected components + let components = connected_components(&view.pg); + + // Diamonds: nodes with in-degree >= 2 whose parents share a common ancestor + let diamonds = count_diamonds(&view); + + GraphMetrics { + nodes: node_count, + edges: edge_count, + roots, + leaves, + max_depth, + max_fan_out, + max_fan_in, + avg_fan_out, + density, + cycles: cycle_count, + diamonds, + components, + } +} + +// Count "merge points" -- nodes with 2+ parents that share a common ancestor. +fn count_diamonds(view: &FlatGraphView) -> usize { + let mut count = 0; + for idx in view.pg.node_indices() { + let parents: Vec<_> = view + .pg + .neighbors_directed(idx, Direction::Incoming) + .collect(); + if parents.len() < 2 { + continue; + } + + // For each parent, compute the set of ancestors (excluding the node itself). + // If any two parents share an ancestor, this node is a diamond. + let mut is_diamond = false; + let mut seen_ancestors: HashSet<_> = HashSet::new(); + for &parent in &parents { + let ancestors = view.bfs([parent], Direction::Incoming, None); + for ancestor in &ancestors { + if !seen_ancestors.insert(*ancestor) { + is_diamond = true; + break; + } + } + if is_diamond { + break; + } + } + if is_diamond { + count += 1; + } + } + count +} + +impl std::fmt::Display for GraphMetrics { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "nodes\t{}", self.nodes)?; + writeln!(f, "edges\t{}", self.edges)?; + writeln!(f, "roots\t{}", self.roots)?; + writeln!(f, "leaves\t{}", self.leaves)?; + match self.max_depth { + Some(d) => writeln!(f, "max_depth\t{d}")?, + None => writeln!(f, "max_depth\t")?, + } + writeln!(f, "max_fan_out\t{}", self.max_fan_out)?; + writeln!(f, "max_fan_in\t{}", self.max_fan_in)?; + writeln!(f, "avg_fan_out\t{:.2}", self.avg_fan_out)?; + writeln!(f, "density\t{:.6}", self.density)?; + writeln!(f, "cycles\t{}", self.cycles)?; + writeln!(f, "diamonds\t{}", self.diamonds)?; + writeln!(f, "components\t{}", self.components)?; + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{Edge, NodeInfo}; + + fn make_graph(nodes: &[(&str, &str)], edges: &[(&str, &str)]) -> DepGraph { + DepGraph { + nodes: nodes + .iter() + .map(|(id, label)| (id.to_string(), NodeInfo::new(*label))) + .collect(), + edges: edges + .iter() + .map(|(from, to)| Edge { + from: from.to_string(), + to: to.to_string(), + ..Default::default() + }) + .collect(), + ..Default::default() + } + } + + #[test] + fn empty_graph() { + let g = DepGraph::default(); + let m = metrics(&g); + assert_eq!(m.nodes, 0); + assert_eq!(m.edges, 0); + assert_eq!(m.roots, 0); + assert_eq!(m.leaves, 0); + assert_eq!(m.max_depth, Some(0)); + assert_eq!(m.max_fan_out, 0); + assert_eq!(m.max_fan_in, 0); + assert_eq!(m.avg_fan_out, 0.0); + assert_eq!(m.density, 0.0); + assert_eq!(m.cycles, 0); + assert_eq!(m.diamonds, 0); + assert_eq!(m.components, 0); + } + + #[test] + fn single_node() { + let g = make_graph(&[("a", "A")], &[]); + let m = metrics(&g); + assert_eq!(m.nodes, 1); + assert_eq!(m.edges, 0); + assert_eq!(m.roots, 1); + assert_eq!(m.leaves, 1); + assert_eq!(m.max_depth, Some(0)); + assert_eq!(m.density, 0.0); + assert_eq!(m.components, 1); + } + + #[test] + fn chain() { + // a -> b -> c + let g = make_graph( + &[("a", "A"), ("b", "B"), ("c", "C")], + &[("a", "b"), ("b", "c")], + ); + let m = metrics(&g); + assert_eq!(m.nodes, 3); + assert_eq!(m.edges, 2); + assert_eq!(m.roots, 1); + assert_eq!(m.leaves, 1); + assert_eq!(m.max_depth, Some(2)); + assert_eq!(m.max_fan_out, 1); + assert_eq!(m.max_fan_in, 1); + assert!((m.avg_fan_out - 2.0 / 3.0).abs() < 1e-9); + assert!((m.density - 2.0 / 6.0).abs() < 1e-9); + assert_eq!(m.cycles, 0); + assert_eq!(m.diamonds, 0); + assert_eq!(m.components, 1); + } + + #[test] + fn diamond() { + // a -> b, a -> c, b -> d, c -> d + let g = make_graph( + &[("a", "A"), ("b", "B"), ("c", "C"), ("d", "D")], + &[("a", "b"), ("a", "c"), ("b", "d"), ("c", "d")], + ); + let m = metrics(&g); + assert_eq!(m.nodes, 4); + assert_eq!(m.edges, 4); + assert_eq!(m.roots, 1); + assert_eq!(m.leaves, 1); + assert_eq!(m.max_depth, Some(2)); + assert_eq!(m.max_fan_out, 2); + assert_eq!(m.max_fan_in, 2); + assert_eq!(m.cycles, 0); + assert_eq!(m.diamonds, 1); + assert_eq!(m.components, 1); + } + + #[test] + fn cycle_graph() { + // a -> b -> c -> a + let g = make_graph( + &[("a", "A"), ("b", "B"), ("c", "C")], + &[("a", "b"), ("b", "c"), ("c", "a")], + ); + let m = metrics(&g); + assert_eq!(m.cycles, 1); + assert!(m.max_depth.is_none()); + assert_eq!(m.roots, 0); + assert_eq!(m.leaves, 0); + } + + #[test] + fn disjoint_components() { + // a -> b, c -> d (two components) + let g = make_graph( + &[("a", "A"), ("b", "B"), ("c", "C"), ("d", "D")], + &[("a", "b"), ("c", "d")], + ); + let m = metrics(&g); + assert_eq!(m.components, 2); + assert_eq!(m.roots, 2); + assert_eq!(m.leaves, 2); + } + + #[test] + fn display_format() { + let g = make_graph( + &[("a", "A"), ("b", "B"), ("c", "C")], + &[("a", "b"), ("b", "c")], + ); + let m = metrics(&g); + let output = m.to_string(); + assert!(output.contains("nodes\t3\n")); + assert!(output.contains("edges\t2\n")); + assert!(output.contains("max_depth\t2\n")); + } +} diff --git a/crates/csvizmo-depgraph/src/algorithm/query/mod.rs b/crates/csvizmo-depgraph/src/algorithm/query/mod.rs new file mode 100644 index 0000000..bc4c467 --- /dev/null +++ b/crates/csvizmo-depgraph/src/algorithm/query/mod.rs @@ -0,0 +1,18 @@ +pub mod edges; +pub mod metrics; +pub mod nodes; + +#[derive(Debug, Default, Clone, Copy, clap::ValueEnum)] +pub enum OutputFields { + Id, + #[default] + Label, +} + +impl std::fmt::Display for OutputFields { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use clap::ValueEnum; + + f.write_str(self.to_possible_value().unwrap().get_name()) + } +} diff --git a/crates/csvizmo-depgraph/src/algorithm/query/nodes.rs b/crates/csvizmo-depgraph/src/algorithm/query/nodes.rs new file mode 100644 index 0000000..6df8aa2 --- /dev/null +++ b/crates/csvizmo-depgraph/src/algorithm/query/nodes.rs @@ -0,0 +1,538 @@ +use std::collections::HashSet; + +use clap::Parser; +use petgraph::Direction; +use petgraph::algo::toposort; +use petgraph::graph::NodeIndex; + +use super::OutputFields; +use crate::algorithm::{MatchKey, build_globset}; +use crate::{DepGraph, FlatGraphView}; + +#[derive(Debug, Default, Clone, Copy, clap::ValueEnum)] +pub enum NodeSelection { + #[default] + All, + Roots, + Leaves, +} + +impl std::fmt::Display for NodeSelection { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use clap::ValueEnum; + + f.write_str(self.to_possible_value().unwrap().get_name()) + } +} + +#[derive(Debug, Default, Clone, Copy, clap::ValueEnum)] +pub enum NodeSort { + #[default] + None, + Topo, + InDegree, + OutDegree, + Ancestors, + Descendants, +} + +impl std::fmt::Display for NodeSort { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use clap::ValueEnum; + + f.write_str(self.to_possible_value().unwrap().get_name()) + } +} + +#[derive(Clone, Debug, Default, Parser)] +pub struct NodesArgs { + /// Which nodes to start from + #[clap(long, default_value_t = NodeSelection::All)] + pub select: NodeSelection, + + /// Include pattern (repeatable, OR by default) + #[clap(short = 'g', long)] + pub include: Vec, + + /// Exclude pattern (repeatable, OR) + #[clap(short = 'x', long)] + pub exclude: Vec, + + /// Combine include patterns with AND instead of OR + #[clap(long)] + pub and: bool, + + /// What patterns match against + #[clap(long, default_value_t = MatchKey::default())] + pub key: MatchKey, + + /// Sort order + #[clap(long, default_value_t = NodeSort::None)] + pub sort: NodeSort, + + /// Reverse the sort order + #[clap(short = 'r', long)] + pub reverse: bool, + + /// Show only first N results (applied after sort) + #[clap(short = 'n', long)] + pub limit: Option, + + /// What to print + #[clap(long, default_value_t = OutputFields::Label)] + pub format: OutputFields, + + /// Include dependencies of selected nodes + #[clap(long, alias = "children")] + pub deps: bool, + + /// Include reverse dependencies (ancestors) of selected nodes + #[clap(long, alias = "parents")] + pub rdeps: bool, + + /// Max traversal depth (implies --deps) + #[clap(long)] + pub depth: Option, +} + +/// Returns (id, label, count) tuples for matching nodes. +/// +/// The count is `Some(n)` when the sort order is numeric (in-degree, +/// out-degree, ancestors, descendants) and `None` otherwise. +pub fn nodes( + graph: &DepGraph, + args: &NodesArgs, +) -> eyre::Result)>> { + let view = FlatGraphView::new(graph); + let all_nodes = graph.all_nodes(); + + // 1. Select initial set + let mut selected: Vec = match args.select { + NodeSelection::All => view.pg.node_indices().collect(), + NodeSelection::Roots => view.roots().collect(), + NodeSelection::Leaves => view + .pg + .node_indices() + .filter(|&idx| { + view.pg + .neighbors_directed(idx, Direction::Outgoing) + .next() + .is_none() + }) + .collect(), + }; + + // 2. Apply include patterns + if !args.include.is_empty() { + let include_set = build_globset(&args.include)?; + selected.retain(|&idx| { + let id = view.idx_to_id[idx.index()]; + let info = &all_nodes[id]; + let text = match args.key { + MatchKey::Id => id, + MatchKey::Label => info.label.as_str(), + }; + if args.and { + include_set.matches(text).len() == args.include.len() + } else { + include_set.is_match(text) + } + }); + } + + // 3. Apply exclude patterns + if !args.exclude.is_empty() { + let exclude_set = build_globset(&args.exclude)?; + selected.retain(|&idx| { + let id = view.idx_to_id[idx.index()]; + let info = &all_nodes[id]; + let text = match args.key { + MatchKey::Id => id, + MatchKey::Label => info.label.as_str(), + }; + !exclude_set.is_match(text) + }); + } + + // 4. Expand with --deps / --rdeps / --depth + let deps = args.deps || args.depth.is_some(); + if deps || args.rdeps { + let seeds: HashSet = selected.iter().copied().collect(); + let mut expanded = HashSet::new(); + + if deps { + expanded.extend(view.bfs(seeds.iter().copied(), Direction::Outgoing, args.depth)); + } + if args.rdeps { + expanded.extend(view.bfs(seeds.iter().copied(), Direction::Incoming, args.depth)); + } + + // Keep seeds in all cases + expanded.extend(seeds); + selected = expanded.into_iter().collect(); + } + + // 5. Sort (returns sorted indices with optional counts) + let sorted = sort_nodes(&selected, &args.sort, args.reverse, &view); + + // 6. Apply limit + let sorted = if let Some(limit) = args.limit { + &sorted[..limit.min(sorted.len())] + } else { + &sorted + }; + + // 7. Map to output + let result = sorted + .iter() + .map(|&(idx, count)| { + let id = view.idx_to_id[idx.index()]; + let info = &all_nodes[id]; + (id.to_string(), info.label.clone(), count) + }) + .collect(); + + Ok(result) +} + +fn sort_nodes( + nodes: &[NodeIndex], + sort: &NodeSort, + reverse: bool, + view: &FlatGraphView, +) -> Vec<(NodeIndex, Option)> { + let mut result: Vec<(NodeIndex, Option)> = match sort { + NodeSort::None => nodes.iter().map(|&idx| (idx, None)).collect(), + NodeSort::Topo => { + if let Ok(sorted) = toposort(&view.pg, Option::None) { + let node_set: HashSet = nodes.iter().copied().collect(); + sorted + .into_iter() + .filter(|idx| node_set.contains(idx)) + .map(|idx| (idx, None)) + .collect() + } else { + // Graph has cycles, fall back to insertion order + nodes.iter().map(|&idx| (idx, None)).collect() + } + } + NodeSort::InDegree => { + let mut v: Vec<(NodeIndex, Option)> = nodes + .iter() + .map(|&idx| { + let count = view.pg.neighbors_directed(idx, Direction::Incoming).count(); + (idx, Some(count)) + }) + .collect(); + v.sort_by(|a, b| { + b.1.cmp(&a.1) + .then_with(|| view.idx_to_id[a.0.index()].cmp(view.idx_to_id[b.0.index()])) + }); + v + } + NodeSort::OutDegree => { + let mut v: Vec<(NodeIndex, Option)> = nodes + .iter() + .map(|&idx| { + let count = view.pg.neighbors_directed(idx, Direction::Outgoing).count(); + (idx, Some(count)) + }) + .collect(); + v.sort_by(|a, b| { + b.1.cmp(&a.1) + .then_with(|| view.idx_to_id[a.0.index()].cmp(view.idx_to_id[b.0.index()])) + }); + v + } + NodeSort::Ancestors => { + let mut v: Vec<(NodeIndex, Option)> = nodes + .iter() + .map(|&idx| { + let count = view.bfs([idx], Direction::Incoming, None).len() - 1; + (idx, Some(count)) + }) + .collect(); + v.sort_by(|a, b| { + b.1.cmp(&a.1) + .then_with(|| view.idx_to_id[a.0.index()].cmp(view.idx_to_id[b.0.index()])) + }); + v + } + NodeSort::Descendants => { + let mut v: Vec<(NodeIndex, Option)> = nodes + .iter() + .map(|&idx| { + let count = view.bfs([idx], Direction::Outgoing, None).len() - 1; + (idx, Some(count)) + }) + .collect(); + v.sort_by(|a, b| { + b.1.cmp(&a.1) + .then_with(|| view.idx_to_id[a.0.index()].cmp(view.idx_to_id[b.0.index()])) + }); + v + } + }; + if reverse { + result.reverse(); + } + result +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{Edge, NodeInfo}; + + fn make_graph(nodes: &[(&str, &str)], edges: &[(&str, &str)]) -> DepGraph { + DepGraph { + nodes: nodes + .iter() + .map(|(id, label)| (id.to_string(), NodeInfo::new(*label))) + .collect(), + edges: edges + .iter() + .map(|(from, to)| Edge { + from: from.to_string(), + to: to.to_string(), + ..Default::default() + }) + .collect(), + ..Default::default() + } + } + + fn labels(result: &[(String, String, Option)]) -> Vec<&str> { + result.iter().map(|(_, l, _)| l.as_str()).collect() + } + + fn ids(result: &[(String, String, Option)]) -> Vec<&str> { + result.iter().map(|(id, _, _)| id.as_str()).collect() + } + + fn counts(result: &[(String, String, Option)]) -> Vec> { + result.iter().map(|(_, _, c)| *c).collect() + } + + #[test] + fn all_nodes_default() { + let g = make_graph( + &[("a", "A"), ("b", "B"), ("c", "C")], + &[("a", "b"), ("b", "c")], + ); + let result = nodes(&g, &NodesArgs::default()).unwrap(); + assert_eq!(ids(&result), vec!["a", "b", "c"]); + } + + #[test] + fn select_roots() { + let g = make_graph( + &[("a", "A"), ("b", "B"), ("c", "C")], + &[("a", "b"), ("b", "c")], + ); + let args = NodesArgs { + select: NodeSelection::Roots, + ..Default::default() + }; + let result = nodes(&g, &args).unwrap(); + assert_eq!(ids(&result), vec!["a"]); + } + + #[test] + fn select_leaves() { + let g = make_graph( + &[("a", "A"), ("b", "B"), ("c", "C")], + &[("a", "b"), ("b", "c")], + ); + let args = NodesArgs { + select: NodeSelection::Leaves, + ..Default::default() + }; + let result = nodes(&g, &args).unwrap(); + assert_eq!(ids(&result), vec!["c"]); + } + + #[test] + fn include_pattern() { + let g = make_graph(&[("a", "alpha"), ("b", "beta"), ("c", "gamma")], &[]); + let args = NodesArgs { + include: vec!["al*".to_string()], + ..Default::default() + }; + let result = nodes(&g, &args).unwrap(); + assert_eq!(labels(&result), vec!["alpha"]); + } + + #[test] + fn exclude_pattern() { + let g = make_graph(&[("a", "alpha"), ("b", "beta"), ("c", "gamma")], &[]); + let args = NodesArgs { + exclude: vec!["b*".to_string()], + ..Default::default() + }; + let result = nodes(&g, &args).unwrap(); + assert_eq!(labels(&result), vec!["alpha", "gamma"]); + } + + #[test] + fn include_and_mode() { + let g = make_graph( + &[("a", "foo-alpha"), ("b", "foo-beta"), ("c", "bar-alpha")], + &[], + ); + let args = NodesArgs { + include: vec!["foo*".to_string(), "*alpha".to_string()], + and: true, + ..Default::default() + }; + let result = nodes(&g, &args).unwrap(); + assert_eq!(labels(&result), vec!["foo-alpha"]); + } + + #[test] + fn sort_topo() { + // a -> c, b -> c (a and b are roots, c is leaf) + let g = make_graph( + &[("c", "C"), ("a", "A"), ("b", "B")], + &[("a", "c"), ("b", "c")], + ); + let args = NodesArgs { + sort: NodeSort::Topo, + ..Default::default() + }; + let result = nodes(&g, &args).unwrap(); + // c must come after a and b + let c_pos = ids(&result).iter().position(|&x| x == "c").unwrap(); + let a_pos = ids(&result).iter().position(|&x| x == "a").unwrap(); + let b_pos = ids(&result).iter().position(|&x| x == "b").unwrap(); + assert!(a_pos < c_pos); + assert!(b_pos < c_pos); + } + + #[test] + fn sort_out_degree() { + // a -> b, a -> c, b -> c + let g = make_graph( + &[("a", "A"), ("b", "B"), ("c", "C")], + &[("a", "b"), ("a", "c"), ("b", "c")], + ); + let args = NodesArgs { + sort: NodeSort::OutDegree, + ..Default::default() + }; + let result = nodes(&g, &args).unwrap(); + // a has out-degree 2, b has 1, c has 0 (descending) + assert_eq!(ids(&result), vec!["a", "b", "c"]); + assert_eq!(counts(&result), vec![Some(2), Some(1), Some(0)]); + } + + #[test] + fn sort_in_degree() { + // a -> c, b -> c + let g = make_graph( + &[("a", "A"), ("b", "B"), ("c", "C")], + &[("a", "c"), ("b", "c")], + ); + let args = NodesArgs { + sort: NodeSort::InDegree, + ..Default::default() + }; + let result = nodes(&g, &args).unwrap(); + // c has in-degree 2, a and b have 0 + assert_eq!(ids(&result)[0], "c"); + assert_eq!(counts(&result), vec![Some(2), Some(0), Some(0)]); + } + + #[test] + fn limit() { + let g = make_graph( + &[("a", "A"), ("b", "B"), ("c", "C")], + &[("a", "b"), ("b", "c")], + ); + let args = NodesArgs { + sort: NodeSort::Topo, + limit: Some(2), + ..Default::default() + }; + let result = nodes(&g, &args).unwrap(); + assert_eq!(result.len(), 2); + } + + #[test] + fn deps_expansion() { + // a -> b -> c: select roots, then expand deps + let g = make_graph( + &[("a", "A"), ("b", "B"), ("c", "C")], + &[("a", "b"), ("b", "c")], + ); + let args = NodesArgs { + select: NodeSelection::Roots, + deps: true, + sort: NodeSort::Topo, + ..Default::default() + }; + let result = nodes(&g, &args).unwrap(); + assert_eq!(ids(&result), vec!["a", "b", "c"]); + } + + #[test] + fn rdeps_expansion() { + // a -> b -> c: select leaves, then expand rdeps + let g = make_graph( + &[("a", "A"), ("b", "B"), ("c", "C")], + &[("a", "b"), ("b", "c")], + ); + let args = NodesArgs { + select: NodeSelection::Leaves, + rdeps: true, + sort: NodeSort::Topo, + ..Default::default() + }; + let result = nodes(&g, &args).unwrap(); + assert_eq!(ids(&result), vec!["a", "b", "c"]); + } + + #[test] + fn depth_limited_expansion() { + // a -> b -> c -> d: select root a, deps with depth 1 + let g = make_graph( + &[("a", "A"), ("b", "B"), ("c", "C"), ("d", "D")], + &[("a", "b"), ("b", "c"), ("c", "d")], + ); + let args = NodesArgs { + select: NodeSelection::Roots, + depth: Some(1), + sort: NodeSort::Topo, + ..Default::default() + }; + let result = nodes(&g, &args).unwrap(); + assert_eq!(ids(&result), vec!["a", "b"]); + } + + #[test] + fn reverse_sort() { + let g = make_graph( + &[("a", "A"), ("b", "B"), ("c", "C")], + &[("a", "b"), ("b", "c")], + ); + let args = NodesArgs { + sort: NodeSort::Topo, + reverse: true, + ..Default::default() + }; + let result = nodes(&g, &args).unwrap(); + assert_eq!(ids(&result), vec!["c", "b", "a"]); + } + + #[test] + fn match_by_id() { + let g = make_graph(&[("node1", "Alpha"), ("node2", "Beta")], &[]); + let args = NodesArgs { + include: vec!["node1".to_string()], + key: MatchKey::Id, + ..Default::default() + }; + let result = nodes(&g, &args).unwrap(); + assert_eq!(ids(&result), vec!["node1"]); + } +} diff --git a/crates/csvizmo-depgraph/src/bin/depquery.rs b/crates/csvizmo-depgraph/src/bin/depquery.rs new file mode 100644 index 0000000..84f97d0 --- /dev/null +++ b/crates/csvizmo-depgraph/src/bin/depquery.rs @@ -0,0 +1,117 @@ +use std::io::{IsTerminal, Read, Write}; +use std::path::PathBuf; + +use clap::{Parser, Subcommand}; +use csvizmo_depgraph::algorithm::query::edges::EdgesArgs; +use csvizmo_depgraph::algorithm::query::nodes::NodesArgs; +use csvizmo_depgraph::algorithm::query::{OutputFields, metrics}; +use csvizmo_depgraph::parse::InputFormat; +use csvizmo_utils::stdio::get_input_reader; + +/// Query properties of dependency graphs. +/// +/// Produces plain text output (not graph output) answering +/// "what's in this graph?" -- listing nodes, edges, and computing metrics. +#[derive(Debug, Parser)] +#[clap(version, verbatim_doc_comment)] +struct Args { + /// Logging level + #[clap(long, default_value_t = tracing::Level::INFO)] + log_level: tracing::Level, + + /// Input file (stdin if '-' or omitted) + #[clap(short, long, global = true)] + input: Option, + + /// Input format (auto-detected from extension/content if omitted) + #[clap(short = 'I', long, global = true)] + input_format: Option, + + #[clap(subcommand)] + command: Command, +} + +#[derive(Debug, Subcommand)] +enum Command { + /// List nodes with optional filtering and sorting + Nodes(NodesArgs), + /// List edges with optional filtering and sorting + Edges(EdgesArgs), + /// Compute and display graph metrics + Metrics, +} + +fn main() -> eyre::Result<()> { + let use_color = std::io::stderr().is_terminal(); + if use_color { + color_eyre::install()?; + } + + let args = Args::parse(); + + let filter = tracing_subscriber::EnvFilter::builder() + .with_default_directive(args.log_level.into()) + .with_env_var("CSV_LOG") + .from_env_lossy(); + tracing_subscriber::fmt() + .with_env_filter(filter) + .with_ansi(use_color) + .with_writer(std::io::stderr) + .init(); + + // Normalize `-` to None -- it means stdio, not a file path. + let is_stdio = |p: &PathBuf| p.as_os_str() == "-"; + let input_path = args.input.filter(|p| !is_stdio(p)); + + let mut input = get_input_reader(&input_path)?; + let mut input_text = String::new(); + input.read_to_string(&mut input_text)?; + + let input_format = csvizmo_depgraph::parse::resolve_input_format( + args.input_format, + input_path.as_deref(), + &input_text, + )?; + + let graph = csvizmo_depgraph::parse::parse(input_format, &input_text)?; + tracing::info!( + "Parsed graph with {} nodes, {} edges, and {} subgraphs", + graph.all_nodes().len(), + graph.all_edges().len(), + graph.subgraphs.len() + ); + + let stdout = std::io::stdout(); + let mut out = stdout.lock(); + + match &args.command { + Command::Nodes(nodes_args) => { + let result = csvizmo_depgraph::algorithm::query::nodes::nodes(&graph, nodes_args)?; + for (id, label, count) in &result { + let field = match nodes_args.format { + OutputFields::Id => id.as_str(), + OutputFields::Label => label.as_str(), + }; + match count { + Some(n) => writeln!(out, "{field}\t{n}")?, + None => writeln!(out, "{field}")?, + } + } + } + Command::Edges(edges_args) => { + let result = csvizmo_depgraph::algorithm::query::edges::edges(&graph, edges_args)?; + for (source, target, label) in &result { + match label { + Some(l) if !l.is_empty() => writeln!(out, "{source}\t{target}\t{l}")?, + _ => writeln!(out, "{source}\t{target}")?, + } + } + } + Command::Metrics => { + let m = metrics::metrics(&graph); + write!(out, "{m}")?; + } + } + + Ok(()) +} diff --git a/crates/csvizmo-depgraph/tests/depquery.rs b/crates/csvizmo-depgraph/tests/depquery.rs new file mode 100644 index 0000000..8fcc0ae --- /dev/null +++ b/crates/csvizmo-depgraph/tests/depquery.rs @@ -0,0 +1,466 @@ +use csvizmo_test::{CommandExt, tool}; +use pretty_assertions::assert_eq; + +// Test graph: a -> b -> c, a -> c +const SIMPLE_GRAPH: &str = "1\talpha\n2\tbeta\n3\tgamma\n#\n1\t2\n2\t3\n1\t3\n"; + +// Chain: a -> b -> c -> d +const CHAIN_GRAPH: &str = "a\nb\nc\nd\n#\na\tb\nb\tc\nc\td\n"; + +// Diamond: a -> b, a -> c, b -> d, c -> d +const DIAMOND_GRAPH: &str = "a\nb\nc\nd\n#\na\tb\na\tc\nb\td\nc\td\n"; + +// -- nodes subcommand -- + +#[test] +fn nodes_all_default() { + let output = tool!("depquery") + .args(["nodes", "--input-format", "tgf"]) + .write_stdin(SIMPLE_GRAPH) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "alpha\nbeta\ngamma\n"); +} + +#[test] +fn nodes_format_id() { + let output = tool!("depquery") + .args(["nodes", "--format", "id", "--input-format", "tgf"]) + .write_stdin(SIMPLE_GRAPH) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "1\n2\n3\n"); +} + +#[test] +fn nodes_select_roots() { + let output = tool!("depquery") + .args(["nodes", "--select", "roots", "--input-format", "tgf"]) + .write_stdin(CHAIN_GRAPH) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "a\n"); +} + +#[test] +fn nodes_select_leaves() { + let output = tool!("depquery") + .args(["nodes", "--select", "leaves", "--input-format", "tgf"]) + .write_stdin(CHAIN_GRAPH) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "d\n"); +} + +#[test] +fn nodes_include_pattern() { + let output = tool!("depquery") + .args(["nodes", "-g", "al*", "--input-format", "tgf"]) + .write_stdin(SIMPLE_GRAPH) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "alpha\n"); +} + +#[test] +fn nodes_exclude_pattern() { + let output = tool!("depquery") + .args(["nodes", "-x", "b*", "--input-format", "tgf"]) + .write_stdin(SIMPLE_GRAPH) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "alpha\ngamma\n"); +} + +#[test] +fn nodes_sort_topo() { + let output = tool!("depquery") + .args(["nodes", "--sort", "topo", "--input-format", "tgf"]) + .write_stdin(CHAIN_GRAPH) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "a\nb\nc\nd\n"); +} + +#[test] +fn nodes_sort_topo_reverse() { + let output = tool!("depquery") + .args([ + "nodes", + "--sort", + "topo", + "--reverse", + "--input-format", + "tgf", + ]) + .write_stdin(CHAIN_GRAPH) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "d\nc\nb\na\n"); +} + +#[test] +fn nodes_sort_out_degree() { + let output = tool!("depquery") + .args(["nodes", "--sort", "out-degree", "--input-format", "tgf"]) + .write_stdin(SIMPLE_GRAPH) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + // alpha(id=1) has out-degree 2, beta(id=2) has 1, gamma(id=3) has 0 + assert_eq!(stdout, "alpha\t2\nbeta\t1\ngamma\t0\n"); +} + +#[test] +fn nodes_sort_in_degree() { + let output = tool!("depquery") + .args(["nodes", "--sort", "in-degree", "--input-format", "tgf"]) + .write_stdin(SIMPLE_GRAPH) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + // gamma(id=3) has in-degree 2, beta(id=2) has 1, alpha(id=1) has 0 + assert_eq!(stdout, "gamma\t2\nbeta\t1\nalpha\t0\n"); +} + +#[test] +fn nodes_limit() { + let output = tool!("depquery") + .args([ + "nodes", + "--sort", + "topo", + "--limit", + "2", + "--input-format", + "tgf", + ]) + .write_stdin(CHAIN_GRAPH) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "a\nb\n"); +} + +#[test] +fn nodes_deps_from_roots() { + let output = tool!("depquery") + .args([ + "nodes", + "--select", + "roots", + "--deps", + "--depth", + "1", + "--sort", + "topo", + "--input-format", + "tgf", + ]) + .write_stdin(CHAIN_GRAPH) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "a\nb\n"); +} + +#[test] +fn nodes_rdeps_from_leaves() { + let output = tool!("depquery") + .args([ + "nodes", + "--select", + "leaves", + "--rdeps", + "--depth", + "1", + "--sort", + "topo", + "--input-format", + "tgf", + ]) + .write_stdin(CHAIN_GRAPH) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "c\nd\n"); +} + +#[test] +fn nodes_match_by_id() { + let output = tool!("depquery") + .args(["nodes", "-g", "1", "--key", "id", "--input-format", "tgf"]) + .write_stdin(SIMPLE_GRAPH) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "alpha\n"); +} + +#[test] +fn nodes_include_and_mode() { + let graph = "foo-alpha\nfoo-beta\nbar-alpha\n#\n"; + let output = tool!("depquery") + .args([ + "nodes", + "-g", + "foo*", + "-g", + "*alpha", + "--and", + "--input-format", + "tgf", + ]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "foo-alpha\n"); +} + +// -- edges subcommand -- + +#[test] +fn edges_all_default() { + let output = tool!("depquery") + .args(["edges", "--input-format", "tgf"]) + .write_stdin(SIMPLE_GRAPH) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + // Edge order follows TGF input: 1->2, 2->3, 1->3 + assert_eq!(stdout, "alpha\tbeta\nbeta\tgamma\nalpha\tgamma\n"); +} + +#[test] +fn edges_format_id() { + let output = tool!("depquery") + .args(["edges", "--format", "id", "--input-format", "tgf"]) + .write_stdin(SIMPLE_GRAPH) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + // Edge order follows TGF input: 1->2, 2->3, 1->3 + assert_eq!(stdout, "1\t2\n2\t3\n1\t3\n"); +} + +#[test] +fn edges_include_filter() { + let output = tool!("depquery") + .args(["edges", "-g", "alpha", "--input-format", "tgf"]) + .write_stdin(SIMPLE_GRAPH) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + // edges touching "alpha": alpha->beta, alpha->gamma + assert_eq!(stdout, "alpha\tbeta\nalpha\tgamma\n"); +} + +#[test] +fn edges_exclude_filter() { + let output = tool!("depquery") + .args(["edges", "-x", "gamma", "--input-format", "tgf"]) + .write_stdin(SIMPLE_GRAPH) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + // exclude edges touching gamma: only alpha->beta remains + assert_eq!(stdout, "alpha\tbeta\n"); +} + +#[test] +fn edges_sort_by_source() { + let graph = "a\tC\nb\tA\nc\tB\n#\na\tb\nc\ta\nb\tc\n"; + let output = tool!("depquery") + .args(["edges", "--sort", "source", "--input-format", "tgf"]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "A\tB\nB\tC\nC\tA\n"); +} + +#[test] +fn edges_limit() { + let output = tool!("depquery") + .args(["edges", "--limit", "1", "--input-format", "tgf"]) + .write_stdin(SIMPLE_GRAPH) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "alpha\tbeta\n"); +} + +// -- metrics subcommand -- + +#[test] +fn metrics_chain() { + let output = tool!("depquery") + .args(["metrics", "--input-format", "tgf"]) + .write_stdin(CHAIN_GRAPH) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!( + stdout, + "\ +nodes\t4 +edges\t3 +roots\t1 +leaves\t1 +max_depth\t3 +max_fan_out\t1 +max_fan_in\t1 +avg_fan_out\t0.75 +density\t0.250000 +cycles\t0 +diamonds\t0 +components\t1 +" + ); +} + +#[test] +fn metrics_diamond() { + let output = tool!("depquery") + .args(["metrics", "--input-format", "tgf"]) + .write_stdin(DIAMOND_GRAPH) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!( + stdout, + "\ +nodes\t4 +edges\t4 +roots\t1 +leaves\t1 +max_depth\t2 +max_fan_out\t2 +max_fan_in\t2 +avg_fan_out\t1.00 +density\t0.333333 +cycles\t0 +diamonds\t1 +components\t1 +" + ); +} + +#[test] +fn metrics_cycle() { + let graph = "a\nb\nc\n#\na\tb\nb\tc\nc\ta\n"; + let output = tool!("depquery") + .args(["metrics", "--input-format", "tgf"]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!( + stdout, + "\ +nodes\t3 +edges\t3 +roots\t0 +leaves\t0 +max_depth\t +max_fan_out\t1 +max_fan_in\t1 +avg_fan_out\t1.00 +density\t0.500000 +cycles\t1 +diamonds\t0 +components\t1 +" + ); +} + +#[test] +fn metrics_empty() { + let output = tool!("depquery") + .args(["metrics", "--input-format", "tgf"]) + .write_stdin("#\n") + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!( + stdout, + "\ +nodes\t0 +edges\t0 +roots\t0 +leaves\t0 +max_depth\t0 +max_fan_out\t0 +max_fan_in\t0 +avg_fan_out\t0.00 +density\t0.000000 +cycles\t0 +diamonds\t0 +components\t0 +" + ); +} + +#[test] +fn metrics_disjoint() { + // Two separate components: a -> b, c -> d + let graph = "a\nb\nc\nd\n#\na\tb\nc\td\n"; + let output = tool!("depquery") + .args(["metrics", "--input-format", "tgf"]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!( + stdout, + "\ +nodes\t4 +edges\t2 +roots\t2 +leaves\t2 +max_depth\t1 +max_fan_out\t1 +max_fan_in\t1 +avg_fan_out\t0.50 +density\t0.166667 +cycles\t0 +diamonds\t0 +components\t2 +" + ); +} From 2f30ed90d3953e094bc0877d61fdb09089d1f119 Mon Sep 17 00:00:00 2001 From: Austin Gill Date: Thu, 19 Feb 2026 18:16:15 -0600 Subject: [PATCH 2/7] Remove redundant type annotations --- crates/csvizmo-depgraph/src/algorithm/cycles.rs | 2 +- crates/csvizmo-depgraph/src/algorithm/filter.rs | 2 +- crates/csvizmo-depgraph/src/algorithm/merge.rs | 4 ++-- crates/csvizmo-depgraph/src/algorithm/query/edges.rs | 2 +- crates/csvizmo-depgraph/src/algorithm/shorten.rs | 4 ++-- crates/csvizmo-depgraph/src/algorithm/simplify.rs | 2 +- crates/csvizmo-depgraph/src/algorithm/sub.rs | 4 ++-- crates/csvizmo-depgraph/src/emit/walk.rs | 4 ++-- crates/csvizmo-depgraph/src/graph.rs | 2 +- crates/csvizmo-depgraph/src/parse/dot.rs | 2 +- crates/csvizmo-depgraph/src/parse/mermaid.rs | 2 +- crates/csvizmo-depgraph/src/parse/tree.rs | 2 +- 12 files changed, 16 insertions(+), 16 deletions(-) diff --git a/crates/csvizmo-depgraph/src/algorithm/cycles.rs b/crates/csvizmo-depgraph/src/algorithm/cycles.rs index 94abb2c..86f17b3 100644 --- a/crates/csvizmo-depgraph/src/algorithm/cycles.rs +++ b/crates/csvizmo-depgraph/src/algorithm/cycles.rs @@ -33,7 +33,7 @@ pub fn cycles(graph: &DepGraph, _args: &CyclesArgs) -> eyre::Result { } // Map each cycle node to its SCC index. - let mut node_to_scc: HashMap = HashMap::new(); + let mut node_to_scc = HashMap::new(); for (i, scc) in cycle_sccs.iter().enumerate() { for &node in scc { node_to_scc.insert(node, i); diff --git a/crates/csvizmo-depgraph/src/algorithm/filter.rs b/crates/csvizmo-depgraph/src/algorithm/filter.rs index facc775..907b1b9 100644 --- a/crates/csvizmo-depgraph/src/algorithm/filter.rs +++ b/crates/csvizmo-depgraph/src/algorithm/filter.rs @@ -116,7 +116,7 @@ pub fn filter(graph: &DepGraph, args: &FilterArgs) -> eyre::Result { .map(|e| (e.from.clone(), e.to.clone())) .collect(); - let mut bypass_edges: Vec<(String, String)> = Vec::new(); + let mut bypass_edges = Vec::new(); for &idx in &matched { let preds = surviving_neighbors(&view.pg, idx, Direction::Incoming, &keep); let succs = surviving_neighbors(&view.pg, idx, Direction::Outgoing, &keep); diff --git a/crates/csvizmo-depgraph/src/algorithm/merge.rs b/crates/csvizmo-depgraph/src/algorithm/merge.rs index a542321..ef8bc73 100644 --- a/crates/csvizmo-depgraph/src/algorithm/merge.rs +++ b/crates/csvizmo-depgraph/src/algorithm/merge.rs @@ -13,7 +13,7 @@ pub fn merge(graphs: &[DepGraph]) -> DepGraph { let mut nodes: IndexMap = IndexMap::new(); let mut edge_map: IndexMap<(String, String), Edge> = IndexMap::new(); let mut named_subgraphs: IndexMap> = IndexMap::new(); - let mut unnamed_subgraphs: Vec = Vec::new(); + let mut unnamed_subgraphs = Vec::new(); for graph in graphs { for (id, info) in &graph.nodes { @@ -46,7 +46,7 @@ pub fn merge(graphs: &[DepGraph]) -> DepGraph { } } - let mut subgraphs: Vec = Vec::new(); + let mut subgraphs = Vec::new(); for (id, sgs) in named_subgraphs { let mut merged = merge(&sgs); merged.id = Some(id); diff --git a/crates/csvizmo-depgraph/src/algorithm/query/edges.rs b/crates/csvizmo-depgraph/src/algorithm/query/edges.rs index 1db9333..6aee77b 100644 --- a/crates/csvizmo-depgraph/src/algorithm/query/edges.rs +++ b/crates/csvizmo-depgraph/src/algorithm/query/edges.rs @@ -75,7 +75,7 @@ pub fn edges( None }; - let mut result: Vec<(String, String, Option)> = Vec::new(); + let mut result = Vec::new(); for edge in all_edges { let from_info = all_nodes.get(&edge.from); diff --git a/crates/csvizmo-depgraph/src/algorithm/shorten.rs b/crates/csvizmo-depgraph/src/algorithm/shorten.rs index 48cd88c..4394f77 100644 --- a/crates/csvizmo-depgraph/src/algorithm/shorten.rs +++ b/crates/csvizmo-depgraph/src/algorithm/shorten.rs @@ -161,7 +161,7 @@ fn remap_graph( id_map: &IndexMap, label_map: &IndexMap, ) -> DepGraph { - let mut placed: HashSet = HashSet::new(); + let mut placed = HashSet::new(); remap_inner(graph, id_map, label_map, &mut placed) } @@ -194,7 +194,7 @@ fn remap_inner( .filter(|sg| !sg.nodes.is_empty() || !sg.subgraphs.is_empty()) .collect(); - let mut seen_edges: HashSet<(String, String, Option)> = HashSet::new(); + let mut seen_edges = HashSet::new(); let edges: Vec = graph .edges .iter() diff --git a/crates/csvizmo-depgraph/src/algorithm/simplify.rs b/crates/csvizmo-depgraph/src/algorithm/simplify.rs index 40c4ca0..c64f076 100644 --- a/crates/csvizmo-depgraph/src/algorithm/simplify.rs +++ b/crates/csvizmo-depgraph/src/algorithm/simplify.rs @@ -26,7 +26,7 @@ pub fn simplify(graph: &DepGraph) -> eyre::Result { let (reduction, _closure) = dag_transitive_reduction_closure(&adj); // Build set of edges to keep: (from_id, to_id) pairs present in the reduction. - let mut keep_edges: HashSet<(&str, &str)> = HashSet::new(); + let mut keep_edges = HashSet::new(); for from_topo in 0..reduction.node_count() { let from_id = view.idx_to_id[sorted[from_topo].index()]; for to_topo in reduction.neighbors(from_topo as u32) { diff --git a/crates/csvizmo-depgraph/src/algorithm/sub.rs b/crates/csvizmo-depgraph/src/algorithm/sub.rs index 2a2f53f..9091bc6 100644 --- a/crates/csvizmo-depgraph/src/algorithm/sub.rs +++ b/crates/csvizmo-depgraph/src/algorithm/sub.rs @@ -128,7 +128,7 @@ pub fn sub(graph: &DepGraph, substitution: &Substitution, key: &SubKey) -> DepGr fn sub_id(graph: &DepGraph, substitution: &Substitution) -> DepGraph { // Build old->new ID mapping from all nodes across all subgraphs. let all_nodes = graph.all_nodes(); - let mut id_map: IndexMap = IndexMap::new(); + let mut id_map = IndexMap::new(); for old_id in all_nodes.keys() { let new_id = substitution.apply(old_id); id_map.insert(old_id.clone(), new_id); @@ -184,7 +184,7 @@ fn remap_subgraph( .collect(); // Remap edges, remove self-loops, deduplicate. - let mut seen_edges: HashSet<(String, String, Option)> = HashSet::new(); + let mut seen_edges = HashSet::new(); let edges: Vec = graph .edges .iter() diff --git a/crates/csvizmo-depgraph/src/emit/walk.rs b/crates/csvizmo-depgraph/src/emit/walk.rs index e47c695..cfe4c4e 100644 --- a/crates/csvizmo-depgraph/src/emit/walk.rs +++ b/crates/csvizmo-depgraph/src/emit/walk.rs @@ -63,8 +63,8 @@ pub fn walk(graph: &DepGraph, visitor: &mut dyn TreeVisitor) -> eyre::Result<()> .filter(|n| !targets.contains(n)) .collect(); - let mut visited: HashSet<&str> = HashSet::new(); - let mut in_progress: HashSet<&str> = HashSet::new(); + let mut visited = HashSet::new(); + let mut in_progress = HashSet::new(); let root_count = roots.len(); for (i, root) in roots.iter().enumerate() { diff --git a/crates/csvizmo-depgraph/src/graph.rs b/crates/csvizmo-depgraph/src/graph.rs index 1e456ee..e9de384 100644 --- a/crates/csvizmo-depgraph/src/graph.rs +++ b/crates/csvizmo-depgraph/src/graph.rs @@ -202,7 +202,7 @@ impl<'a> FlatGraphView<'a> { max_depth: Option, ) -> HashSet { let mut visited = HashSet::new(); - let mut queue: VecDeque<(NodeIndex, usize)> = VecDeque::new(); + let mut queue = VecDeque::new(); for seed in seeds { if visited.insert(seed) { queue.push_back((seed, 0)); diff --git a/crates/csvizmo-depgraph/src/parse/dot.rs b/crates/csvizmo-depgraph/src/parse/dot.rs index b684e65..8c0253e 100644 --- a/crates/csvizmo-depgraph/src/parse/dot.rs +++ b/crates/csvizmo-depgraph/src/parse/dot.rs @@ -250,7 +250,7 @@ fn add_edges(edge_stmt: &AstEdgeStmt, dep: &mut DepGraph) { } // Collect all endpoints in the chain: from -> to1 -> to2 -> ... - let mut endpoints: Vec> = Vec::new(); + let mut endpoints = Vec::new(); endpoints.push(edge_stmt.from.as_ref()); let mut rhs = &edge_stmt.next; loop { diff --git a/crates/csvizmo-depgraph/src/parse/mermaid.rs b/crates/csvizmo-depgraph/src/parse/mermaid.rs index 9a7700e..05daa16 100644 --- a/crates/csvizmo-depgraph/src/parse/mermaid.rs +++ b/crates/csvizmo-depgraph/src/parse/mermaid.rs @@ -65,7 +65,7 @@ pub fn parse(input: &str) -> eyre::Result { }); // Collect which nodes belong to subgraphs so we can partition them. - let mut subgraph_node_ids: std::collections::HashSet<&str> = std::collections::HashSet::new(); + let mut subgraph_node_ids = std::collections::HashSet::new(); for sg in &graph.subgraphs { for node_id in &sg.nodes { subgraph_node_ids.insert(node_id.as_str()); diff --git a/crates/csvizmo-depgraph/src/parse/tree.rs b/crates/csvizmo-depgraph/src/parse/tree.rs index e78a1f2..80c60a7 100644 --- a/crates/csvizmo-depgraph/src/parse/tree.rs +++ b/crates/csvizmo-depgraph/src/parse/tree.rs @@ -58,7 +58,7 @@ fn is_summary(line: &str) -> bool { pub fn parse(input: &str) -> eyre::Result { let mut graph = DepGraph::default(); // stack[i] = node ID (full path) of the most recent node at depth i - let mut stack: Vec = Vec::new(); + let mut stack = Vec::new(); for raw_line in input.lines() { // Some `tree` builds use NO-BREAK SPACE (U+00A0) in continuation prefixes. From 563d5fdab03c10ae0b55b993800c29e0ae12ef20 Mon Sep 17 00:00:00 2001 From: Austin Gill Date: Thu, 19 Feb 2026 18:41:58 -0600 Subject: [PATCH 3/7] Fix tests not compiling or passing with --no-default-features --- crates/csvizmo-depgraph/src/emit/dot.rs | 1 + crates/csvizmo-depgraph/tests/deptransform.rs | 2 ++ 2 files changed, 3 insertions(+) diff --git a/crates/csvizmo-depgraph/src/emit/dot.rs b/crates/csvizmo-depgraph/src/emit/dot.rs index 6bb993c..bcb0e45 100644 --- a/crates/csvizmo-depgraph/src/emit/dot.rs +++ b/crates/csvizmo-depgraph/src/emit/dot.rs @@ -223,6 +223,7 @@ mod tests { } #[test] + #[cfg(feature = "dot")] fn quote_unquote_roundtrip() { let cases = [ "hello", diff --git a/crates/csvizmo-depgraph/tests/deptransform.rs b/crates/csvizmo-depgraph/tests/deptransform.rs index c74c8e1..20dc794 100644 --- a/crates/csvizmo-depgraph/tests/deptransform.rs +++ b/crates/csvizmo-depgraph/tests/deptransform.rs @@ -488,6 +488,7 @@ fn merge_requires_two_files() { } #[test] +#[cfg(feature = "dot")] fn merge_preserves_subgraphs() { let mut f1 = NamedTempFile::new().unwrap(); write!( @@ -547,6 +548,7 @@ digraph { // -- flatten integration tests -- #[test] +#[cfg(feature = "dot")] fn flatten_removes_subgraphs() { let graph = "\ digraph { From c257d7a6c232731d03510d9a7bb7f691ed1b0a3e Mon Sep 17 00:00:00 2001 From: Austin Gill Date: Thu, 19 Feb 2026 18:42:23 -0600 Subject: [PATCH 4/7] Add depquery blurb to README --- README.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/README.md b/README.md index 13f5444..20f3f5d 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ Gizmos for working with CSVs * [canspam](#canspam) -- generate random CAN traffic * [canstruct](#canstruct) -- reconstruct NMEA 2000 Fast Packet / ISO 11783-3 Transport Protocol sessions +* [depquery](#depquery) -- query properties of dependency graphs * [bbclasses](#bbclasses) -- generate BitBake recipe inheritance diagrams # Philosophy @@ -315,6 +316,30 @@ $ cat data/depconv/bitbake.curl.task-depends.dot | > The `deptransform` tool shares the same GPL-2.0 license caveat as `depconv` with respect to DOT > parsing. +## depquery + +Query properties of dependency graphs. Lists nodes, edges, and computes graph metrics. Supports the +same input formats as `depconv`, and is designed to be used in pipelines. + +```sh +# Show the 5 crates with the most dependencies: +$ cargo metadata --format-version=1 | + depquery nodes --sort out-degree --limit 5 +csvizmo-depgraph 20 +csvizmo-stats 16 +csvizmo-can 12 +csvizmo-minpath 11 +tracing-subscriber 10 +``` + +The `depquery` tool supports outputting `nodes`, `edges`, and `metrics`. The output is intended to +be machine-readable, and is tab-separated. + +> [!NOTE] +> +> The `depquery` tool shares the same GPL-2.0 license caveat as `depconv` with respect to DOT +> parsing. + ## can2csv Parse basic data from a CAN frame into a CSV record. Faster than `sed`, and also parses the canid. From 4520189e5b964c06f6a1690be18123956d111049 Mon Sep 17 00:00:00 2001 From: Austin Gill Date: Thu, 19 Feb 2026 18:46:28 -0600 Subject: [PATCH 5/7] Rename --ancestors to --rdeps for consistency with --deps --- .../csvizmo-depgraph/src/algorithm/filter.rs | 36 +++++++++---------- .../csvizmo-depgraph/src/algorithm/select.rs | 36 +++++++++---------- crates/csvizmo-depgraph/src/bin/depfilter.rs | 4 +-- crates/csvizmo-depgraph/tests/depfilter.rs | 16 ++++----- 4 files changed, 44 insertions(+), 48 deletions(-) diff --git a/crates/csvizmo-depgraph/src/algorithm/filter.rs b/crates/csvizmo-depgraph/src/algorithm/filter.rs index 907b1b9..07d7367 100644 --- a/crates/csvizmo-depgraph/src/algorithm/filter.rs +++ b/crates/csvizmo-depgraph/src/algorithm/filter.rs @@ -22,12 +22,12 @@ pub struct FilterArgs { pub key: MatchKey, /// Also remove all dependencies of matched nodes (cascade) - #[clap(long)] + #[clap(long, alias = "children")] pub deps: bool, - /// Also remove all ancestors of matched nodes (cascade) - #[clap(long)] - pub ancestors: bool, + /// Also remove all reverse dependencies of matched nodes (cascade) + #[clap(long, alias = "parents", alias = "ancestors")] + pub rdeps: bool, /// Preserve graph connectivity when removing nodes /// (creates direct edges, no self-loops or parallel edges) @@ -56,8 +56,8 @@ impl FilterArgs { self } - pub fn ancestors(mut self) -> Self { - self.ancestors = true; + pub fn rdeps(mut self) -> Self { + self.rdeps = true; self } @@ -90,12 +90,12 @@ pub fn filter(graph: &DepGraph, args: &FilterArgs) -> eyre::Result { } } - // Cascade removal via BFS if --deps or --ancestors is set. - if args.deps && args.ancestors { + // Cascade removal via BFS if --deps or --rdeps is set. + if args.deps && args.rdeps { let seeds = matched.clone(); matched = view.bfs(seeds.clone(), Direction::Outgoing, None); matched.extend(view.bfs(seeds, Direction::Incoming, None)); - } else if args.ancestors { + } else if args.rdeps { matched = view.bfs(matched, Direction::Incoming, None); } else if args.deps { matched = view.bfs(matched, Direction::Outgoing, None); @@ -312,43 +312,43 @@ mod tests { } #[test] - fn with_ancestors_cascade() { + fn with_rdeps_cascade() { // a -> b -> c let g = make_graph( &[("a", "a"), ("b", "b"), ("c", "c")], &[("a", "b"), ("b", "c")], vec![], ); - let args = FilterArgs::default().pattern("c").ancestors(); + let args = FilterArgs::default().pattern("c").rdeps(); let result = filter(&g, &args).unwrap(); assert!(node_ids(&result).is_empty()); } #[test] - fn with_deps_and_ancestors_cascade() { - // a -> b -> c -> d: filter b with both deps and ancestors + fn with_deps_and_rdeps_cascade() { + // a -> b -> c -> d: filter b with both deps and rdeps let g = make_graph( &[("a", "a"), ("b", "b"), ("c", "c"), ("d", "d")], &[("a", "b"), ("b", "c"), ("c", "d")], vec![], ); - let args = FilterArgs::default().pattern("b").deps().ancestors(); + let args = FilterArgs::default().pattern("b").deps().rdeps(); let result = filter(&g, &args).unwrap(); - // b + ancestors (a) + deps (c, d) = all removed + // b + rdeps (a) + deps (c, d) = all removed assert!(node_ids(&result).is_empty()); } #[test] - fn with_deps_and_ancestors_cascade_partial() { + fn with_deps_and_rdeps_cascade_partial() { // a -> b -> c, d -> c: filter b with both directions let g = make_graph( &[("a", "a"), ("b", "b"), ("c", "c"), ("d", "d")], &[("a", "b"), ("b", "c"), ("d", "c")], vec![], ); - let args = FilterArgs::default().pattern("b").deps().ancestors(); + let args = FilterArgs::default().pattern("b").deps().rdeps(); let result = filter(&g, &args).unwrap(); - // b removed, ancestors (a) removed, deps (c) removed, but d survives + // b removed, rdeps (a) removed, deps (c) removed, but d survives assert_eq!(node_ids(&result), vec!["d"]); assert!(edge_pairs(&result).is_empty()); } diff --git a/crates/csvizmo-depgraph/src/algorithm/select.rs b/crates/csvizmo-depgraph/src/algorithm/select.rs index 53fcfbe..c500207 100644 --- a/crates/csvizmo-depgraph/src/algorithm/select.rs +++ b/crates/csvizmo-depgraph/src/algorithm/select.rs @@ -21,12 +21,12 @@ pub struct SelectArgs { pub key: MatchKey, /// Include all dependencies of selected nodes - #[clap(long)] + #[clap(long, alias = "children")] pub deps: bool, - /// Include all ancestors of selected nodes - #[clap(long)] - pub ancestors: bool, + /// Include all reverse dependencies of selected nodes + #[clap(long, alias = "parents", alias = "ancestors")] + pub rdeps: bool, /// Traverse up to N layers (implies --deps if no direction given) #[clap(long)] @@ -54,8 +54,8 @@ impl SelectArgs { self } - pub fn ancestors(mut self) -> Self { - self.ancestors = true; + pub fn rdeps(mut self) -> Self { + self.rdeps = true; self } @@ -70,7 +70,7 @@ pub fn select(graph: &DepGraph, args: &SelectArgs) -> eyre::Result { let view = FlatGraphView::new(graph); // No filters at all -> pass through the entire graph unchanged. - let no_traversal = !args.deps && !args.ancestors && args.depth.is_none(); + let no_traversal = !args.deps && !args.rdeps && args.depth.is_none(); if args.pattern.is_empty() && no_traversal { return Ok(graph.clone()); } @@ -101,11 +101,11 @@ pub fn select(graph: &DepGraph, args: &SelectArgs) -> eyre::Result { // --depth without an explicit direction implies --deps let deps = args.deps || args.depth.is_some(); - if deps && args.ancestors { + if deps && args.rdeps { let seeds = keep.clone(); keep = view.bfs(seeds.clone(), Direction::Outgoing, args.depth); keep.extend(view.bfs(seeds, Direction::Incoming, args.depth)); - } else if args.ancestors { + } else if args.rdeps { keep = view.bfs(keep, Direction::Incoming, args.depth); } else if deps { keep = view.bfs(keep, Direction::Outgoing, args.depth); @@ -251,14 +251,14 @@ mod tests { } #[test] - fn with_ancestors() { + fn with_rdeps() { // a -> b -> c let g = make_graph( &[("a", "a"), ("b", "b"), ("c", "c")], &[("a", "b"), ("b", "c")], vec![], ); - let args = SelectArgs::default().pattern("c").ancestors(); + let args = SelectArgs::default().pattern("c").rdeps(); let result = select(&g, &args).unwrap(); assert_eq!(node_ids(&result), vec!["a", "b", "c"]); } @@ -278,14 +278,14 @@ mod tests { } #[test] - fn with_deps_and_ancestors() { - // a -> b -> c -> d: select b with both deps and ancestors + fn with_deps_and_rdeps() { + // a -> b -> c -> d: select b with both deps and rdeps let g = make_graph( &[("a", "a"), ("b", "b"), ("c", "c"), ("d", "d")], &[("a", "b"), ("b", "c"), ("c", "d")], vec![], ); - let args = SelectArgs::default().pattern("b").deps().ancestors(); + let args = SelectArgs::default().pattern("b").deps().rdeps(); let result = select(&g, &args).unwrap(); assert_eq!(node_ids(&result), vec!["a", "b", "c", "d"]); assert_eq!( @@ -295,18 +295,14 @@ mod tests { } #[test] - fn with_deps_and_ancestors_depth_limited() { + fn with_deps_and_rdeps_depth_limited() { // a -> b -> c -> d -> e: select c with both directions, depth 1 let g = make_graph( &[("a", "a"), ("b", "b"), ("c", "c"), ("d", "d"), ("e", "e")], &[("a", "b"), ("b", "c"), ("c", "d"), ("d", "e")], vec![], ); - let args = SelectArgs::default() - .pattern("c") - .deps() - .ancestors() - .depth(1); + let args = SelectArgs::default().pattern("c").deps().rdeps().depth(1); let result = select(&g, &args).unwrap(); assert_eq!(node_ids(&result), vec!["b", "c", "d"]); assert_eq!(edge_pairs(&result), vec![("b", "c"), ("c", "d")]); diff --git a/crates/csvizmo-depgraph/src/bin/depfilter.rs b/crates/csvizmo-depgraph/src/bin/depfilter.rs index 29190c8..85e4d8f 100644 --- a/crates/csvizmo-depgraph/src/bin/depfilter.rs +++ b/crates/csvizmo-depgraph/src/bin/depfilter.rs @@ -44,9 +44,9 @@ struct Args { #[derive(Debug, Subcommand)] enum Command { - /// Select nodes matching patterns and optionally their dependencies/ancestors + /// Select nodes matching patterns and optionally their deps/rdeps Select(SelectArgs), - /// Remove nodes matching patterns and optionally cascade to dependencies/ancestors + /// Remove nodes matching patterns and optionally cascade to deps/rdeps Filter(FilterArgs), /// Extract the subgraph of all directed paths between matched query nodes Between(BetweenArgs), diff --git a/crates/csvizmo-depgraph/tests/depfilter.rs b/crates/csvizmo-depgraph/tests/depfilter.rs index 5518e30..21612e5 100644 --- a/crates/csvizmo-depgraph/tests/depfilter.rs +++ b/crates/csvizmo-depgraph/tests/depfilter.rs @@ -71,14 +71,14 @@ fn select_with_deps() { } #[test] -fn select_with_ancestors() { +fn select_with_rdeps() { // Select libbar and include all nodes that depend on it let output = tool!("depfilter") .args([ "select", "--pattern", "libbar", - "--ancestors", + "--rdeps", "--input-format", "tgf", "--output-format", @@ -169,7 +169,7 @@ fn select_multiple_patterns_and() { } #[test] -fn select_with_deps_and_ancestors() { +fn select_with_deps_and_rdeps() { // a -> b -> c -> d: select b with both directions gets everything let graph = "a\nb\nc\nd\n#\na\tb\nb\tc\nc\td\n"; let output = tool!("depfilter") @@ -178,7 +178,7 @@ fn select_with_deps_and_ancestors() { "--pattern", "b", "--deps", - "--ancestors", + "--rdeps", "--input-format", "tgf", "--output-format", @@ -260,13 +260,13 @@ fn filter_with_deps() { } #[test] -fn filter_with_ancestors() { +fn filter_with_rdeps() { let output = tool!("depfilter") .args([ "filter", "--pattern", "libbar", - "--ancestors", + "--rdeps", "--input-format", "tgf", "--output-format", @@ -281,7 +281,7 @@ fn filter_with_ancestors() { } #[test] -fn filter_with_deps_and_ancestors() { +fn filter_with_deps_and_rdeps() { // a -> b -> c, d -> c: filter b with both directions removes a, b, c but keeps d let graph = "a\nb\nc\nd\n#\na\tb\nb\tc\nd\tc\n"; let output = tool!("depfilter") @@ -290,7 +290,7 @@ fn filter_with_deps_and_ancestors() { "--pattern", "b", "--deps", - "--ancestors", + "--rdeps", "--input-format", "tgf", "--output-format", From dcebecf6277f6ac618ea277aa3bd7ac5bfef40cd Mon Sep 17 00:00:00 2001 From: Austin Gill Date: Thu, 19 Feb 2026 19:04:53 -0600 Subject: [PATCH 6/7] Rename --pattern to --include and --exclude; refactor depfilter --- README.md | 12 +- .../csvizmo-depgraph/src/algorithm/between.rs | 80 ++- .../csvizmo-depgraph/src/algorithm/filter.rs | 552 ------------------ crates/csvizmo-depgraph/src/algorithm/mod.rs | 1 - .../csvizmo-depgraph/src/algorithm/select.rs | 432 +++++++++++++- crates/csvizmo-depgraph/src/bin/depfilter.rs | 8 +- crates/csvizmo-depgraph/tests/depfilter.rs | 198 +++---- 7 files changed, 549 insertions(+), 734 deletions(-) delete mode 100644 crates/csvizmo-depgraph/src/algorithm/filter.rs diff --git a/README.md b/README.md index 20f3f5d..60efbc1 100644 --- a/README.md +++ b/README.md @@ -263,19 +263,17 @@ recover lost metadata. Filter or select subsets of dependency graphs. Works on the same formats as `depconv`, and is designed to be chained with pipes. -* `depfilter select` keeps only nodes matching the given patterns -* `depfilter filter` removes nodes matching the given patterns +* `depfilter select` keeps nodes matching `--include` patterns and/or removes `--exclude` patterns * `depfilter between` select nodes connecting multiple sets of query nodes * `depfilter cycles` select any cycles in the graph -Both subcommands have extra options to tune their behavior. +Each subcommand has extra options to tune its behavior. ```sh -# From a cargo dependency tree, select the subtree rooted at "clap", then filter out -# all the proc-macro crates and their dependencies: +# From a cargo dependency tree, select the subtree rooted at "clap", excluding +# all the proc-macro crates: $ cargo tree --depth 10 \ - | depfilter select -p "clap*" --deps -I cargo-tree -O tgf \ - | depfilter filter -p "*derive*" -p "*proc*" --deps -I tgf -O dot + | depfilter select -g "clap*" --deps -x "*derive*" -x "*proc*" -I cargo-tree -O dot digraph { clap [label="v4.5.57 clap"]; clap_builder [label="v4.5.57 clap_builder"]; diff --git a/crates/csvizmo-depgraph/src/algorithm/between.rs b/crates/csvizmo-depgraph/src/algorithm/between.rs index 47502ae..ce9b900 100644 --- a/crates/csvizmo-depgraph/src/algorithm/between.rs +++ b/crates/csvizmo-depgraph/src/algorithm/between.rs @@ -10,8 +10,12 @@ use crate::{DepGraph, FlatGraphView}; #[derive(Clone, Debug, Default, Parser)] pub struct BetweenArgs { /// Glob pattern selecting query endpoints (can be repeated, OR logic) - #[clap(short, long)] - pub pattern: Vec, + #[clap(short = 'g', long)] + pub include: Vec, + + /// Glob pattern to exclude nodes from result (can be repeated, OR logic) + #[clap(short = 'x', long)] + pub exclude: Vec, /// Match patterns against 'id' or 'label' #[clap(long, default_value_t = MatchKey::default())] @@ -19,8 +23,13 @@ pub struct BetweenArgs { } impl BetweenArgs { - pub fn pattern(mut self, p: impl Into) -> Self { - self.pattern.push(p.into()); + pub fn include(mut self, p: impl Into) -> Self { + self.include.push(p.into()); + self + } + + pub fn exclude(mut self, p: impl Into) -> Self { + self.exclude.push(p.into()); self } @@ -36,7 +45,7 @@ impl BetweenArgs { /// then for each ordered pair (qi, qj) collects nodes on directed paths from qi to qj /// via `forward(qi) & backward(qj)`. The union of all pairwise results is the keep set. pub fn between(graph: &DepGraph, args: &BetweenArgs) -> eyre::Result { - let globset = build_globset(&args.pattern)?; + let globset = build_globset(&args.include)?; let view = FlatGraphView::new(graph); // Match query nodes by glob pattern (OR logic). @@ -87,6 +96,22 @@ pub fn between(graph: &DepGraph, args: &BetweenArgs) -> eyre::Result { } } + // Remove nodes matching --exclude patterns from keep set. + if !args.exclude.is_empty() { + let exclude_globset = build_globset(&args.exclude)?; + for (id, info) in graph.all_nodes() { + let text = match args.key { + MatchKey::Id => id.as_str(), + MatchKey::Label => info.label.as_str(), + }; + if exclude_globset.is_match(text) + && let Some(&idx) = view.id_to_idx.get(id.as_str()) + { + keep.remove(&idx); + } + } + } + Ok(view.filter(&keep)) } @@ -138,7 +163,7 @@ mod tests { fn direct_path() { // a -> b: between a and b yields both let g = make_graph(&[("a", "a"), ("b", "b")], &[("a", "b")], vec![]); - let args = BetweenArgs::default().pattern("a").pattern("b"); + let args = BetweenArgs::default().include("a").include("b"); let result = between(&g, &args).unwrap(); assert_eq!(sorted_node_ids(&result), vec!["a", "b"]); assert_eq!(sorted_edge_pairs(&result), vec![("a", "b")]); @@ -152,7 +177,7 @@ mod tests { &[("a", "b"), ("b", "c")], vec![], ); - let args = BetweenArgs::default().pattern("a").pattern("c"); + let args = BetweenArgs::default().include("a").include("c"); let result = between(&g, &args).unwrap(); assert_eq!(sorted_node_ids(&result), vec!["a", "b", "c"]); assert_eq!(sorted_edge_pairs(&result), vec![("a", "b"), ("b", "c")]); @@ -166,7 +191,7 @@ mod tests { &[("a", "b"), ("c", "d")], vec![], ); - let args = BetweenArgs::default().pattern("a").pattern("c"); + let args = BetweenArgs::default().include("a").include("c"); let result = between(&g, &args).unwrap(); assert!(result.nodes.is_empty()); assert!(result.edges.is_empty()); @@ -180,7 +205,7 @@ mod tests { &[("a", "b"), ("a", "c"), ("b", "d"), ("c", "d")], vec![], ); - let args = BetweenArgs::default().pattern("a").pattern("d"); + let args = BetweenArgs::default().include("a").include("d"); let result = between(&g, &args).unwrap(); assert_eq!(sorted_node_ids(&result), vec!["a", "b", "c", "d"]); assert_eq!( @@ -198,9 +223,9 @@ mod tests { vec![], ); let args = BetweenArgs::default() - .pattern("a") - .pattern("b") - .pattern("d"); + .include("a") + .include("b") + .include("d"); let result = between(&g, &args).unwrap(); assert_eq!(sorted_node_ids(&result), vec!["a", "b", "c", "d"]); } @@ -208,7 +233,7 @@ mod tests { #[test] fn no_match_returns_empty() { let g = make_graph(&[("a", "a"), ("b", "b")], &[("a", "b")], vec![]); - let args = BetweenArgs::default().pattern("nonexistent"); + let args = BetweenArgs::default().include("nonexistent"); let result = between(&g, &args).unwrap(); assert!(result.nodes.is_empty()); assert!(result.edges.is_empty()); @@ -218,7 +243,7 @@ mod tests { fn single_match_returns_empty() { // Only one node matches -- need at least 2 for a path let g = make_graph(&[("a", "a"), ("b", "b")], &[("a", "b")], vec![]); - let args = BetweenArgs::default().pattern("a"); + let args = BetweenArgs::default().include("a"); let result = between(&g, &args).unwrap(); assert!(result.nodes.is_empty()); assert!(result.edges.is_empty()); @@ -232,7 +257,7 @@ mod tests { &[("a", "b"), ("b", "c"), ("c", "a")], vec![], ); - let args = BetweenArgs::default().pattern("a").pattern("c"); + let args = BetweenArgs::default().include("a").include("c"); let result = between(&g, &args).unwrap(); assert_eq!(sorted_node_ids(&result), vec!["a", "b", "c"]); } @@ -241,8 +266,8 @@ mod tests { fn match_by_id() { let g = make_graph(&[("1", "libfoo"), ("2", "libbar")], &[("1", "2")], vec![]); let args = BetweenArgs::default() - .pattern("1") - .pattern("2") + .include("1") + .include("2") .key(MatchKey::Id); let result = between(&g, &args).unwrap(); assert_eq!(sorted_node_ids(&result), vec!["1", "2"]); @@ -257,7 +282,7 @@ mod tests { &[("a", "b"), ("b", "c"), ("d", "e")], vec![], ); - let args = BetweenArgs::default().pattern("a").pattern("c"); + let args = BetweenArgs::default().include("a").include("c"); let result = between(&g, &args).unwrap(); assert_eq!(sorted_node_ids(&result), vec!["a", "b", "c"]); assert_eq!(sorted_edge_pairs(&result), vec![("a", "b"), ("b", "c")]); @@ -271,9 +296,26 @@ mod tests { &[("a", "b"), ("b", "c")], vec![], ); - let args = BetweenArgs::default().pattern("?"); + let args = BetweenArgs::default().include("?"); let result = between(&g, &args).unwrap(); assert_eq!(sorted_node_ids(&result), vec!["a", "b", "c"]); assert_eq!(sorted_edge_pairs(&result), vec![("a", "b"), ("b", "c")]); } + + #[test] + fn exclude_removes_from_result() { + // a -> b -> c: between a and c, exclude b + let g = make_graph( + &[("a", "a"), ("b", "b"), ("c", "c")], + &[("a", "b"), ("b", "c")], + vec![], + ); + let args = BetweenArgs::default() + .include("a") + .include("c") + .exclude("b"); + let result = between(&g, &args).unwrap(); + assert_eq!(sorted_node_ids(&result), vec!["a", "c"]); + assert!(sorted_edge_pairs(&result).is_empty()); + } } diff --git a/crates/csvizmo-depgraph/src/algorithm/filter.rs b/crates/csvizmo-depgraph/src/algorithm/filter.rs deleted file mode 100644 index 07d7367..0000000 --- a/crates/csvizmo-depgraph/src/algorithm/filter.rs +++ /dev/null @@ -1,552 +0,0 @@ -use std::collections::{HashSet, VecDeque}; - -use clap::Parser; -use petgraph::Direction; -use petgraph::graph::NodeIndex; - -use super::{MatchKey, build_globset}; -use crate::{DepGraph, Edge, FlatGraphView}; - -#[derive(Clone, Debug, Default, Parser)] -pub struct FilterArgs { - /// Glob pattern to remove nodes (can be repeated) - #[clap(short, long)] - pub pattern: Vec, - - /// Combine multiple patterns with AND instead of OR - #[clap(long)] - pub and: bool, - - /// Match patterns against 'id' or 'label' - #[clap(long, default_value_t = MatchKey::default())] - pub key: MatchKey, - - /// Also remove all dependencies of matched nodes (cascade) - #[clap(long, alias = "children")] - pub deps: bool, - - /// Also remove all reverse dependencies of matched nodes (cascade) - #[clap(long, alias = "parents", alias = "ancestors")] - pub rdeps: bool, - - /// Preserve graph connectivity when removing nodes - /// (creates direct edges, no self-loops or parallel edges) - #[clap(long)] - pub preserve_connectivity: bool, -} - -impl FilterArgs { - pub fn pattern(mut self, p: impl Into) -> Self { - self.pattern.push(p.into()); - self - } - - pub fn and(mut self) -> Self { - self.and = true; - self - } - - pub fn key(mut self, k: MatchKey) -> Self { - self.key = k; - self - } - - pub fn deps(mut self) -> Self { - self.deps = true; - self - } - - pub fn rdeps(mut self) -> Self { - self.rdeps = true; - self - } - - pub fn preserve_connectivity(mut self) -> Self { - self.preserve_connectivity = true; - self - } -} - -pub fn filter(graph: &DepGraph, args: &FilterArgs) -> eyre::Result { - let globset = build_globset(&args.pattern)?; - let view = FlatGraphView::new(graph); - - // Find nodes that match the patterns (these will be removed). - let mut matched = HashSet::new(); - for (id, info) in graph.all_nodes() { - let text = match args.key { - MatchKey::Id => id.as_str(), - MatchKey::Label => info.label.as_str(), - }; - - let is_match = if args.and { - globset.matches(text).len() == args.pattern.len() - } else { - globset.is_match(text) - }; - - if is_match && let Some(&idx) = view.id_to_idx.get(id.as_str()) { - matched.insert(idx); - } - } - - // Cascade removal via BFS if --deps or --rdeps is set. - if args.deps && args.rdeps { - let seeds = matched.clone(); - matched = view.bfs(seeds.clone(), Direction::Outgoing, None); - matched.extend(view.bfs(seeds, Direction::Incoming, None)); - } else if args.rdeps { - matched = view.bfs(matched, Direction::Incoming, None); - } else if args.deps { - matched = view.bfs(matched, Direction::Outgoing, None); - } - - // Keep set = all nodes minus matched nodes. - let all_nodes: HashSet<_> = view.id_to_idx.values().copied().collect(); - let keep: HashSet<_> = all_nodes.difference(&matched).copied().collect(); - - let mut result = view.filter(&keep); - - // Bypass removed nodes: connect their surviving predecessors to surviving successors. - // BFS through chains of removed nodes so that A->B->C->D with B,C removed produces A->D. - if args.preserve_connectivity { - let mut existing: HashSet<(String, String)> = result - .all_edges() - .iter() - .map(|e| (e.from.clone(), e.to.clone())) - .collect(); - - let mut bypass_edges = Vec::new(); - for &idx in &matched { - let preds = surviving_neighbors(&view.pg, idx, Direction::Incoming, &keep); - let succs = surviving_neighbors(&view.pg, idx, Direction::Outgoing, &keep); - - for &pred in &preds { - let from = view.idx_to_id[pred.index()]; - for &succ in &succs { - let to = view.idx_to_id[succ.index()]; - if from != to && existing.insert((from.to_string(), to.to_string())) { - bypass_edges.push((from.to_string(), to.to_string())); - } - } - } - } - - for (from, to) in bypass_edges { - insert_edge(&mut result, &from, &to); - } - - // Bypass edges were inserted directly into the graph fields, so any - // caches populated earlier (e.g. by the `existing` set above) are stale. - result.clear_caches(); - } - - Ok(result) -} - -/// Insert a bypass edge into the deepest subgraph that contains both endpoints. -/// Falls back to the root graph if the endpoints are in different subgraphs. -fn insert_edge(graph: &mut DepGraph, from: &str, to: &str) { - for sg in &mut graph.subgraphs { - let has_from = sg.all_nodes().contains_key(from); - let has_to = sg.all_nodes().contains_key(to); - if has_from && has_to { - return insert_edge(sg, from, to); - } - } - graph.edges.push(Edge { - from: from.to_string(), - to: to.to_string(), - ..Default::default() - }); -} - -/// BFS from `start` in `direction`, traversing through removed nodes (those not in `keep`), -/// returning surviving nodes found at the boundary. -fn surviving_neighbors( - pg: &petgraph::Graph<(), ()>, - start: NodeIndex, - direction: Direction, - keep: &HashSet, -) -> Vec { - let mut result = Vec::new(); - let mut visited = HashSet::new(); - let mut queue = VecDeque::new(); - visited.insert(start); - queue.push_back(start); - - while let Some(node) = queue.pop_front() { - for neighbor in pg.neighbors_directed(node, direction) { - if !visited.insert(neighbor) { - continue; - } - if keep.contains(&neighbor) { - result.push(neighbor); - } else { - queue.push_back(neighbor); - } - } - } - - result -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::{DepGraph, Edge, NodeInfo}; - - fn make_graph( - nodes: &[(&str, &str)], - edges: &[(&str, &str)], - subgraphs: Vec, - ) -> DepGraph { - DepGraph { - nodes: nodes - .iter() - .map(|(id, label)| (id.to_string(), NodeInfo::new(*label))) - .collect(), - edges: edges - .iter() - .map(|(from, to)| Edge { - from: from.to_string(), - to: to.to_string(), - ..Default::default() - }) - .collect(), - subgraphs, - ..Default::default() - } - } - - fn node_ids(graph: &DepGraph) -> Vec<&str> { - graph.nodes.keys().map(|s| s.as_str()).collect() - } - - fn edge_pairs(graph: &DepGraph) -> Vec<(&str, &str)> { - graph - .edges - .iter() - .map(|e| (e.from.as_str(), e.to.as_str())) - .collect() - } - - // -- pattern matching -- - - #[test] - fn single_pattern() { - // myapp -> libfoo -> libbar, myapp -> libbar - let g = make_graph( - &[ - ("libfoo", "libfoo"), - ("libbar", "libbar"), - ("myapp", "myapp"), - ], - &[ - ("myapp", "libfoo"), - ("myapp", "libbar"), - ("libfoo", "libbar"), - ], - vec![], - ); - let args = FilterArgs::default().pattern("libfoo"); - let result = filter(&g, &args).unwrap(); - assert_eq!(node_ids(&result), vec!["libbar", "myapp"]); - assert_eq!(edge_pairs(&result), vec![("myapp", "libbar")]); - } - - #[test] - fn multiple_patterns_or() { - let g = make_graph( - &[("a", "a"), ("b", "b"), ("c", "c")], - &[("a", "b"), ("b", "c")], - vec![], - ); - let args = FilterArgs::default().pattern("a").pattern("b"); - let result = filter(&g, &args).unwrap(); - assert_eq!(node_ids(&result), vec!["c"]); - assert!(edge_pairs(&result).is_empty()); - } - - #[test] - fn multiple_patterns_and() { - let g = make_graph( - &[ - ("libfoo-alpha", "libfoo-alpha"), - ("libfoo-beta", "libfoo-beta"), - ("libbar-alpha", "libbar-alpha"), - ], - &[], - vec![], - ); - let args = FilterArgs::default() - .pattern("libfoo*") - .pattern("*alpha") - .and(); - let result = filter(&g, &args).unwrap(); - assert_eq!(node_ids(&result), vec!["libfoo-beta", "libbar-alpha"]); - } - - #[test] - fn no_match_returns_unchanged() { - let g = make_graph(&[("a", "a"), ("b", "b")], &[("a", "b")], vec![]); - let args = FilterArgs::default().pattern("nonexistent"); - let result = filter(&g, &args).unwrap(); - assert_eq!(node_ids(&result), vec!["a", "b"]); - assert_eq!(edge_pairs(&result), vec![("a", "b")]); - } - - // -- traversal -- - - #[test] - fn with_deps_cascade() { - // a -> b -> c, a -> c - let g = make_graph( - &[("a", "a"), ("b", "b"), ("c", "c")], - &[("a", "b"), ("b", "c"), ("a", "c")], - vec![], - ); - let args = FilterArgs::default().pattern("a").deps(); - let result = filter(&g, &args).unwrap(); - assert!(node_ids(&result).is_empty()); - } - - #[test] - fn with_rdeps_cascade() { - // a -> b -> c - let g = make_graph( - &[("a", "a"), ("b", "b"), ("c", "c")], - &[("a", "b"), ("b", "c")], - vec![], - ); - let args = FilterArgs::default().pattern("c").rdeps(); - let result = filter(&g, &args).unwrap(); - assert!(node_ids(&result).is_empty()); - } - - #[test] - fn with_deps_and_rdeps_cascade() { - // a -> b -> c -> d: filter b with both deps and rdeps - let g = make_graph( - &[("a", "a"), ("b", "b"), ("c", "c"), ("d", "d")], - &[("a", "b"), ("b", "c"), ("c", "d")], - vec![], - ); - let args = FilterArgs::default().pattern("b").deps().rdeps(); - let result = filter(&g, &args).unwrap(); - // b + rdeps (a) + deps (c, d) = all removed - assert!(node_ids(&result).is_empty()); - } - - #[test] - fn with_deps_and_rdeps_cascade_partial() { - // a -> b -> c, d -> c: filter b with both directions - let g = make_graph( - &[("a", "a"), ("b", "b"), ("c", "c"), ("d", "d")], - &[("a", "b"), ("b", "c"), ("d", "c")], - vec![], - ); - let args = FilterArgs::default().pattern("b").deps().rdeps(); - let result = filter(&g, &args).unwrap(); - // b removed, rdeps (a) removed, deps (c) removed, but d survives - assert_eq!(node_ids(&result), vec!["d"]); - assert!(edge_pairs(&result).is_empty()); - } - - // -- preserve connectivity -- - - #[test] - fn preserve_connectivity_bypass() { - // a -> b -> c: remove b, get a -> c - let g = make_graph( - &[("a", "a"), ("b", "b"), ("c", "c")], - &[("a", "b"), ("b", "c")], - vec![], - ); - let args = FilterArgs::default().pattern("b").preserve_connectivity(); - let result = filter(&g, &args).unwrap(); - assert_eq!(node_ids(&result), vec!["a", "c"]); - assert_eq!(edge_pairs(&result), vec![("a", "c")]); - } - - #[test] - fn preserve_connectivity_chain() { - // a -> b -> c -> d: remove b and c, get a -> d - let g = make_graph( - &[("a", "a"), ("b", "b"), ("c", "c"), ("d", "d")], - &[("a", "b"), ("b", "c"), ("c", "d")], - vec![], - ); - let args = FilterArgs::default() - .pattern("b") - .pattern("c") - .preserve_connectivity(); - let result = filter(&g, &args).unwrap(); - assert_eq!(node_ids(&result), vec!["a", "d"]); - assert_eq!(edge_pairs(&result), vec![("a", "d")]); - } - - #[test] - fn preserve_connectivity_diamond_through_removed() { - // a -> b -> d, a -> c -> d: remove b and c, get single a -> d - let g = make_graph( - &[("a", "a"), ("b", "b"), ("c", "c"), ("d", "d")], - &[("a", "b"), ("a", "c"), ("b", "d"), ("c", "d")], - vec![], - ); - let args = FilterArgs::default() - .pattern("b") - .pattern("c") - .preserve_connectivity(); - let result = filter(&g, &args).unwrap(); - assert_eq!(node_ids(&result), vec!["a", "d"]); - assert_eq!(edge_pairs(&result), vec![("a", "d")]); - } - - #[test] - fn preserve_connectivity_no_self_loops() { - // a -> b -> a: remove b, should not create a -> a - let g = make_graph(&[("a", "a"), ("b", "b")], &[("a", "b"), ("b", "a")], vec![]); - let args = FilterArgs::default().pattern("b").preserve_connectivity(); - let result = filter(&g, &args).unwrap(); - assert_eq!(node_ids(&result), vec!["a"]); - assert!(edge_pairs(&result).is_empty()); - } - - #[test] - fn preserve_connectivity_no_parallel_edges() { - // a -> b -> c, a -> c: remove b, should not duplicate a -> c - let g = make_graph( - &[("a", "a"), ("b", "b"), ("c", "c")], - &[("a", "b"), ("b", "c"), ("a", "c")], - vec![], - ); - let args = FilterArgs::default().pattern("b").preserve_connectivity(); - let result = filter(&g, &args).unwrap(); - assert_eq!(node_ids(&result), vec!["a", "c"]); - assert_eq!(edge_pairs(&result), vec![("a", "c")]); - } - - // -- subgraphs -- - - #[test] - fn preserves_subgraph_structure() { - // root: a, subgraph: { b, c, b->c }, edge a->b at root - // filter a keeps b, c in subgraph with their edge - let g = make_graph( - &[("a", "a")], - &[("a", "b")], - vec![make_graph(&[("b", "b"), ("c", "c")], &[("b", "c")], vec![])], - ); - let args = FilterArgs::default().pattern("a"); - let result = filter(&g, &args).unwrap(); - assert!(result.nodes.is_empty()); - assert!(result.edges.is_empty()); - assert_eq!(result.subgraphs.len(), 1); - assert_eq!(node_ids(&result.subgraphs[0]), vec!["b", "c"]); - assert_eq!(edge_pairs(&result.subgraphs[0]), vec![("b", "c")]); - } - - // -- preserve connectivity + cascade -- - - #[test] - fn preserve_connectivity_with_deps_cascade() { - // a -> b -> c -> d: remove b with --deps --preserve-connectivity - // b, c, d removed; a survives with no edges (nothing to bypass to) - let g = make_graph( - &[("a", "a"), ("b", "b"), ("c", "c"), ("d", "d")], - &[("a", "b"), ("b", "c"), ("c", "d")], - vec![], - ); - let args = FilterArgs::default() - .pattern("b") - .deps() - .preserve_connectivity(); - let result = filter(&g, &args).unwrap(); - assert_eq!(node_ids(&result), vec!["a"]); - assert!(edge_pairs(&result).is_empty()); - } - - #[test] - fn preserve_connectivity_with_deps_cascade_bypass() { - // a -> b -> c, a -> d, d -> c: remove b with --deps --preserve-connectivity - // b, c removed (deps of b); a, d survive; a had edge to c via b, bypass a -> (nothing, - // c is removed). d -> c is also removed. Only a, d remain with no edges. - let g = make_graph( - &[("a", "a"), ("b", "b"), ("c", "c"), ("d", "d")], - &[("a", "b"), ("b", "c"), ("a", "d"), ("d", "c")], - vec![], - ); - let args = FilterArgs::default() - .pattern("b") - .deps() - .preserve_connectivity(); - let result = filter(&g, &args).unwrap(); - assert_eq!(node_ids(&result), vec!["a", "d"]); - assert_eq!(edge_pairs(&result), vec![("a", "d")]); - } - - // -- preserve connectivity with subgraphs -- - - #[test] - fn preserve_connectivity_bypass_in_subgraph() { - // subgraph { a -> b -> c }: remove b, bypass a -> c should be in the subgraph - let g = make_graph( - &[], - &[], - vec![make_graph( - &[("a", "a"), ("b", "b"), ("c", "c")], - &[("a", "b"), ("b", "c")], - vec![], - )], - ); - let args = FilterArgs::default().pattern("b").preserve_connectivity(); - let result = filter(&g, &args).unwrap(); - // bypass a -> c should be inside the subgraph, not at root - assert!(result.edges.is_empty()); - assert_eq!(result.subgraphs.len(), 1); - let sg = &result.subgraphs[0]; - assert_eq!(node_ids(sg), vec!["a", "c"]); - assert_eq!(edge_pairs(sg), vec![("a", "c")]); - } - - #[test] - fn preserve_connectivity_no_parallel_edges_in_subgraph() { - // subgraph { a -> b -> c, a -> c }: remove b, should not duplicate a -> c - let g = make_graph( - &[], - &[], - vec![make_graph( - &[("a", "a"), ("b", "b"), ("c", "c")], - &[("a", "b"), ("b", "c"), ("a", "c")], - vec![], - )], - ); - let args = FilterArgs::default().pattern("b").preserve_connectivity(); - let result = filter(&g, &args).unwrap(); - assert!(result.edges.is_empty()); - assert_eq!(result.subgraphs.len(), 1); - let sg = &result.subgraphs[0]; - assert_eq!(node_ids(sg), vec!["a", "c"]); - assert_eq!(edge_pairs(sg), vec![("a", "c")]); - } - - #[test] - fn preserve_connectivity_cross_subgraph_bypass_at_root() { - // subgraph1 { a }, subgraph2 { c }, root: b, edges a->b, b->c at root - // remove b, bypass a->c should be at root - let g = make_graph( - &[("b", "b")], - &[("a", "b"), ("b", "c")], - vec![ - make_graph(&[("a", "a")], &[], vec![]), - make_graph(&[("c", "c")], &[], vec![]), - ], - ); - let args = FilterArgs::default().pattern("b").preserve_connectivity(); - let result = filter(&g, &args).unwrap(); - assert!(result.nodes.is_empty()); - assert_eq!(edge_pairs(&result), vec![("a", "c")]); - assert_eq!(result.subgraphs.len(), 2); - } -} diff --git a/crates/csvizmo-depgraph/src/algorithm/mod.rs b/crates/csvizmo-depgraph/src/algorithm/mod.rs index 4ef2c4a..a8e9ebb 100644 --- a/crates/csvizmo-depgraph/src/algorithm/mod.rs +++ b/crates/csvizmo-depgraph/src/algorithm/mod.rs @@ -1,6 +1,5 @@ pub mod between; pub mod cycles; -pub mod filter; pub mod flatten; pub mod merge; pub mod query; diff --git a/crates/csvizmo-depgraph/src/algorithm/select.rs b/crates/csvizmo-depgraph/src/algorithm/select.rs index c500207..6debee8 100644 --- a/crates/csvizmo-depgraph/src/algorithm/select.rs +++ b/crates/csvizmo-depgraph/src/algorithm/select.rs @@ -1,18 +1,23 @@ -use std::collections::HashSet; +use std::collections::{HashSet, VecDeque}; use clap::Parser; use petgraph::Direction; +use petgraph::graph::NodeIndex; use super::{MatchKey, build_globset}; -use crate::{DepGraph, FlatGraphView}; +use crate::{DepGraph, Edge, FlatGraphView}; #[derive(Clone, Debug, Default, Parser)] pub struct SelectArgs { - /// Glob pattern to select nodes (can be repeated) - #[clap(short, long)] - pub pattern: Vec, + /// Glob pattern to include nodes (can be repeated) + #[clap(short = 'g', long)] + pub include: Vec, - /// Combine multiple patterns with AND instead of OR + /// Glob pattern to exclude nodes (can be repeated, always OR) + #[clap(short = 'x', long)] + pub exclude: Vec, + + /// Combine multiple include patterns with AND instead of OR #[clap(long)] pub and: bool, @@ -31,11 +36,21 @@ pub struct SelectArgs { /// Traverse up to N layers (implies --deps if no direction given) #[clap(long)] pub depth: Option, + + /// Preserve graph connectivity when excluding nodes + /// (creates direct edges, no self-loops or parallel edges) + #[clap(long)] + pub preserve_connectivity: bool, } impl SelectArgs { - pub fn pattern(mut self, p: impl Into) -> Self { - self.pattern.push(p.into()); + pub fn include(mut self, p: impl Into) -> Self { + self.include.push(p.into()); + self + } + + pub fn exclude(mut self, p: impl Into) -> Self { + self.exclude.push(p.into()); self } @@ -63,21 +78,34 @@ impl SelectArgs { self.depth = Some(n); self } + + pub fn preserve_connectivity(mut self) -> Self { + self.preserve_connectivity = true; + self + } } pub fn select(graph: &DepGraph, args: &SelectArgs) -> eyre::Result { - let globset = build_globset(&args.pattern)?; let view = FlatGraphView::new(graph); // No filters at all -> pass through the entire graph unchanged. let no_traversal = !args.deps && !args.rdeps && args.depth.is_none(); - if args.pattern.is_empty() && no_traversal { + if args.include.is_empty() && args.exclude.is_empty() && no_traversal { return Ok(graph.clone()); } - // If no patterns given, seed from root nodes; otherwise match by pattern. - let mut keep: HashSet<_> = if args.pattern.is_empty() { - view.roots().collect() + let include_globset = build_globset(&args.include)?; + let exclude_globset = build_globset(&args.exclude)?; + + // Build the initial keep set from --include patterns (or all nodes / roots). + let has_traversal = args.deps || args.rdeps || args.depth.is_some(); + let mut keep: HashSet<_> = if args.include.is_empty() { + if has_traversal { + view.roots().collect() + } else { + // Only --exclude given, no traversal: start with all nodes. + view.id_to_idx.values().copied().collect() + } } else { let mut matched = HashSet::new(); for (id, info) in graph.all_nodes() { @@ -87,9 +115,9 @@ pub fn select(graph: &DepGraph, args: &SelectArgs) -> eyre::Result { }; let is_match = if args.and { - globset.matches(text).len() == args.pattern.len() + include_globset.matches(text).len() == args.include.len() } else { - globset.is_match(text) + include_globset.is_match(text) }; if is_match && let Some(&idx) = view.id_to_idx.get(id.as_str()) { @@ -99,6 +127,7 @@ pub fn select(graph: &DepGraph, args: &SelectArgs) -> eyre::Result { matched }; + // Expand keep set via --deps/--rdeps/--depth (only applies to include). // --depth without an explicit direction implies --deps let deps = args.deps || args.depth.is_some(); if deps && args.rdeps { @@ -111,7 +140,112 @@ pub fn select(graph: &DepGraph, args: &SelectArgs) -> eyre::Result { keep = view.bfs(keep, Direction::Outgoing, args.depth); } - Ok(view.filter(&keep)) + // Remove nodes matching --exclude patterns from keep set. + let excluded: HashSet = if !args.exclude.is_empty() { + let mut matched = HashSet::new(); + for (id, info) in graph.all_nodes() { + let text = match args.key { + MatchKey::Id => id.as_str(), + MatchKey::Label => info.label.as_str(), + }; + + if exclude_globset.is_match(text) + && let Some(&idx) = view.id_to_idx.get(id.as_str()) + { + matched.insert(idx); + } + } + let excluded = keep.intersection(&matched).copied().collect::>(); + for &idx in &excluded { + keep.remove(&idx); + } + excluded + } else { + HashSet::new() + }; + + let mut result = view.filter(&keep); + + // Bypass excluded nodes: connect their surviving predecessors to surviving successors. + // BFS through chains of excluded nodes so that A->B->C->D with B,C excluded produces A->D. + if args.preserve_connectivity && !excluded.is_empty() { + let mut existing: HashSet<(String, String)> = result + .all_edges() + .iter() + .map(|e| (e.from.clone(), e.to.clone())) + .collect(); + + let mut bypass_edges = Vec::new(); + for &idx in &excluded { + let preds = surviving_neighbors(&view.pg, idx, Direction::Incoming, &keep); + let succs = surviving_neighbors(&view.pg, idx, Direction::Outgoing, &keep); + + for &pred in &preds { + let from = view.idx_to_id[pred.index()]; + for &succ in &succs { + let to = view.idx_to_id[succ.index()]; + if from != to && existing.insert((from.to_string(), to.to_string())) { + bypass_edges.push((from.to_string(), to.to_string())); + } + } + } + } + + for (from, to) in bypass_edges { + insert_edge(&mut result, &from, &to); + } + + result.clear_caches(); + } + + Ok(result) +} + +/// Insert a bypass edge into the deepest subgraph that contains both endpoints. +/// Falls back to the root graph if the endpoints are in different subgraphs. +fn insert_edge(graph: &mut DepGraph, from: &str, to: &str) { + for sg in &mut graph.subgraphs { + let has_from = sg.all_nodes().contains_key(from); + let has_to = sg.all_nodes().contains_key(to); + if has_from && has_to { + return insert_edge(sg, from, to); + } + } + graph.edges.push(Edge { + from: from.to_string(), + to: to.to_string(), + ..Default::default() + }); +} + +/// BFS from `start` in `direction`, traversing through removed nodes (those not in `keep`), +/// returning surviving nodes found at the boundary. +fn surviving_neighbors( + pg: &petgraph::Graph<(), ()>, + start: NodeIndex, + direction: Direction, + keep: &HashSet, +) -> Vec { + let mut result = Vec::new(); + let mut visited = HashSet::new(); + let mut queue = VecDeque::new(); + visited.insert(start); + queue.push_back(start); + + while let Some(node) = queue.pop_front() { + for neighbor in pg.neighbors_directed(node, direction) { + if !visited.insert(neighbor) { + continue; + } + if keep.contains(&neighbor) { + result.push(neighbor); + } else { + queue.push_back(neighbor); + } + } + } + + result } #[cfg(test)] @@ -154,7 +288,7 @@ mod tests { .collect() } - // -- pattern matching -- + // -- include pattern matching -- #[test] fn single_glob_pattern() { @@ -172,7 +306,7 @@ mod tests { ], vec![], ); - let args = SelectArgs::default().pattern("lib*"); + let args = SelectArgs::default().include("lib*"); let result = select(&g, &args).unwrap(); assert_eq!(node_ids(&result), vec!["libfoo", "libbar"]); assert_eq!(edge_pairs(&result), vec![("libfoo", "libbar")]); @@ -181,7 +315,7 @@ mod tests { #[test] fn match_by_id() { let g = make_graph(&[("1", "libfoo"), ("2", "libbar")], &[("1", "2")], vec![]); - let args = SelectArgs::default().pattern("1").key(MatchKey::Id); + let args = SelectArgs::default().include("1").key(MatchKey::Id); let result = select(&g, &args).unwrap(); assert_eq!(node_ids(&result), vec!["1"]); } @@ -189,7 +323,7 @@ mod tests { #[test] fn match_by_label() { let g = make_graph(&[("1", "libfoo"), ("2", "libbar")], &[("1", "2")], vec![]); - let args = SelectArgs::default().pattern("libbar"); + let args = SelectArgs::default().include("libbar"); let result = select(&g, &args).unwrap(); assert_eq!(node_ids(&result), vec!["2"]); } @@ -201,7 +335,7 @@ mod tests { &[("a", "b"), ("b", "c")], vec![], ); - let args = SelectArgs::default().pattern("a").pattern("c"); + let args = SelectArgs::default().include("a").include("c"); let result = select(&g, &args).unwrap(); assert_eq!(node_ids(&result), vec!["a", "c"]); assert!(edge_pairs(&result).is_empty()); @@ -219,8 +353,8 @@ mod tests { vec![], ); let args = SelectArgs::default() - .pattern("libfoo*") - .pattern("*alpha") + .include("libfoo*") + .include("*alpha") .and(); let result = select(&g, &args).unwrap(); assert_eq!(node_ids(&result), vec!["libfoo-alpha"]); @@ -229,7 +363,7 @@ mod tests { #[test] fn no_match_produces_empty_graph() { let g = make_graph(&[("a", "a"), ("b", "b")], &[("a", "b")], vec![]); - let args = SelectArgs::default().pattern("nonexistent"); + let args = SelectArgs::default().include("nonexistent"); let result = select(&g, &args).unwrap(); assert!(result.nodes.is_empty()); assert!(result.edges.is_empty()); @@ -245,7 +379,7 @@ mod tests { &[("a", "b"), ("b", "c"), ("a", "c")], vec![], ); - let args = SelectArgs::default().pattern("a").deps(); + let args = SelectArgs::default().include("a").deps(); let result = select(&g, &args).unwrap(); assert_eq!(node_ids(&result), vec!["a", "b", "c"]); } @@ -258,7 +392,7 @@ mod tests { &[("a", "b"), ("b", "c")], vec![], ); - let args = SelectArgs::default().pattern("c").rdeps(); + let args = SelectArgs::default().include("c").rdeps(); let result = select(&g, &args).unwrap(); assert_eq!(node_ids(&result), vec!["a", "b", "c"]); } @@ -271,7 +405,7 @@ mod tests { &[("a", "b"), ("b", "c"), ("c", "d")], vec![], ); - let args = SelectArgs::default().pattern("a").deps().depth(1); + let args = SelectArgs::default().include("a").deps().depth(1); let result = select(&g, &args).unwrap(); assert_eq!(node_ids(&result), vec!["a", "b"]); assert_eq!(edge_pairs(&result), vec![("a", "b")]); @@ -285,7 +419,7 @@ mod tests { &[("a", "b"), ("b", "c"), ("c", "d")], vec![], ); - let args = SelectArgs::default().pattern("b").deps().rdeps(); + let args = SelectArgs::default().include("b").deps().rdeps(); let result = select(&g, &args).unwrap(); assert_eq!(node_ids(&result), vec!["a", "b", "c", "d"]); assert_eq!( @@ -302,7 +436,7 @@ mod tests { &[("a", "b"), ("b", "c"), ("c", "d"), ("d", "e")], vec![], ); - let args = SelectArgs::default().pattern("c").deps().rdeps().depth(1); + let args = SelectArgs::default().include("c").deps().rdeps().depth(1); let result = select(&g, &args).unwrap(); assert_eq!(node_ids(&result), vec!["b", "c", "d"]); assert_eq!(edge_pairs(&result), vec![("b", "c"), ("c", "d")]); @@ -362,10 +496,248 @@ mod tests { &[("a", "b"), ("b", "c")], vec![make_graph(&[("c", "c")], &[], vec![])], ); - let args = SelectArgs::default().pattern("a").deps(); + let args = SelectArgs::default().include("a").deps(); let result = select(&g, &args).unwrap(); assert_eq!(node_ids(&result), vec!["a", "b"]); assert_eq!(result.subgraphs.len(), 1); assert_eq!(node_ids(&result.subgraphs[0]), vec!["c"]); } + + // -- exclude pattern matching -- + + #[test] + fn exclude_single_pattern() { + // myapp -> libfoo -> libbar, myapp -> libbar + let g = make_graph( + &[ + ("libfoo", "libfoo"), + ("libbar", "libbar"), + ("myapp", "myapp"), + ], + &[ + ("myapp", "libfoo"), + ("myapp", "libbar"), + ("libfoo", "libbar"), + ], + vec![], + ); + let args = SelectArgs::default().exclude("libfoo"); + let result = select(&g, &args).unwrap(); + assert_eq!(node_ids(&result), vec!["libbar", "myapp"]); + assert_eq!(edge_pairs(&result), vec![("myapp", "libbar")]); + } + + #[test] + fn exclude_multiple_patterns_or() { + let g = make_graph( + &[("a", "a"), ("b", "b"), ("c", "c")], + &[("a", "b"), ("b", "c")], + vec![], + ); + let args = SelectArgs::default().exclude("a").exclude("b"); + let result = select(&g, &args).unwrap(); + assert_eq!(node_ids(&result), vec!["c"]); + assert!(edge_pairs(&result).is_empty()); + } + + #[test] + fn exclude_no_match_returns_unchanged() { + let g = make_graph(&[("a", "a"), ("b", "b")], &[("a", "b")], vec![]); + let args = SelectArgs::default().exclude("nonexistent"); + let result = select(&g, &args).unwrap(); + assert_eq!(node_ids(&result), vec!["a", "b"]); + assert_eq!(edge_pairs(&result), vec![("a", "b")]); + } + + // -- exclude with preserve connectivity -- + + #[test] + fn preserve_connectivity_bypass() { + // a -> b -> c: exclude b, get a -> c + let g = make_graph( + &[("a", "a"), ("b", "b"), ("c", "c")], + &[("a", "b"), ("b", "c")], + vec![], + ); + let args = SelectArgs::default().exclude("b").preserve_connectivity(); + let result = select(&g, &args).unwrap(); + assert_eq!(node_ids(&result), vec!["a", "c"]); + assert_eq!(edge_pairs(&result), vec![("a", "c")]); + } + + #[test] + fn preserve_connectivity_chain() { + // a -> b -> c -> d: exclude b and c, get a -> d + let g = make_graph( + &[("a", "a"), ("b", "b"), ("c", "c"), ("d", "d")], + &[("a", "b"), ("b", "c"), ("c", "d")], + vec![], + ); + let args = SelectArgs::default() + .exclude("b") + .exclude("c") + .preserve_connectivity(); + let result = select(&g, &args).unwrap(); + assert_eq!(node_ids(&result), vec!["a", "d"]); + assert_eq!(edge_pairs(&result), vec![("a", "d")]); + } + + #[test] + fn preserve_connectivity_diamond_through_excluded() { + // a -> b -> d, a -> c -> d: exclude b and c, get single a -> d + let g = make_graph( + &[("a", "a"), ("b", "b"), ("c", "c"), ("d", "d")], + &[("a", "b"), ("a", "c"), ("b", "d"), ("c", "d")], + vec![], + ); + let args = SelectArgs::default() + .exclude("b") + .exclude("c") + .preserve_connectivity(); + let result = select(&g, &args).unwrap(); + assert_eq!(node_ids(&result), vec!["a", "d"]); + assert_eq!(edge_pairs(&result), vec![("a", "d")]); + } + + #[test] + fn preserve_connectivity_no_self_loops() { + // a -> b -> a: exclude b, should not create a -> a + let g = make_graph(&[("a", "a"), ("b", "b")], &[("a", "b"), ("b", "a")], vec![]); + let args = SelectArgs::default().exclude("b").preserve_connectivity(); + let result = select(&g, &args).unwrap(); + assert_eq!(node_ids(&result), vec!["a"]); + assert!(edge_pairs(&result).is_empty()); + } + + #[test] + fn preserve_connectivity_no_parallel_edges() { + // a -> b -> c, a -> c: exclude b, should not duplicate a -> c + let g = make_graph( + &[("a", "a"), ("b", "b"), ("c", "c")], + &[("a", "b"), ("b", "c"), ("a", "c")], + vec![], + ); + let args = SelectArgs::default().exclude("b").preserve_connectivity(); + let result = select(&g, &args).unwrap(); + assert_eq!(node_ids(&result), vec!["a", "c"]); + assert_eq!(edge_pairs(&result), vec![("a", "c")]); + } + + // -- exclude with subgraphs -- + + #[test] + fn exclude_preserves_subgraph_structure() { + // root: a, subgraph: { b, c, b->c }, edge a->b at root + // exclude a keeps b, c in subgraph with their edge + let g = make_graph( + &[("a", "a")], + &[("a", "b")], + vec![make_graph(&[("b", "b"), ("c", "c")], &[("b", "c")], vec![])], + ); + let args = SelectArgs::default().exclude("a"); + let result = select(&g, &args).unwrap(); + assert!(result.nodes.is_empty()); + assert!(result.edges.is_empty()); + assert_eq!(result.subgraphs.len(), 1); + assert_eq!(node_ids(&result.subgraphs[0]), vec!["b", "c"]); + assert_eq!(edge_pairs(&result.subgraphs[0]), vec![("b", "c")]); + } + + // -- preserve connectivity with subgraphs -- + + #[test] + fn preserve_connectivity_bypass_in_subgraph() { + // subgraph { a -> b -> c }: exclude b, bypass a -> c should be in the subgraph + let g = make_graph( + &[], + &[], + vec![make_graph( + &[("a", "a"), ("b", "b"), ("c", "c")], + &[("a", "b"), ("b", "c")], + vec![], + )], + ); + let args = SelectArgs::default().exclude("b").preserve_connectivity(); + let result = select(&g, &args).unwrap(); + // bypass a -> c should be inside the subgraph, not at root + assert!(result.edges.is_empty()); + assert_eq!(result.subgraphs.len(), 1); + let sg = &result.subgraphs[0]; + assert_eq!(node_ids(sg), vec!["a", "c"]); + assert_eq!(edge_pairs(sg), vec![("a", "c")]); + } + + #[test] + fn preserve_connectivity_no_parallel_edges_in_subgraph() { + // subgraph { a -> b -> c, a -> c }: exclude b, should not duplicate a -> c + let g = make_graph( + &[], + &[], + vec![make_graph( + &[("a", "a"), ("b", "b"), ("c", "c")], + &[("a", "b"), ("b", "c"), ("a", "c")], + vec![], + )], + ); + let args = SelectArgs::default().exclude("b").preserve_connectivity(); + let result = select(&g, &args).unwrap(); + assert!(result.edges.is_empty()); + assert_eq!(result.subgraphs.len(), 1); + let sg = &result.subgraphs[0]; + assert_eq!(node_ids(sg), vec!["a", "c"]); + assert_eq!(edge_pairs(sg), vec![("a", "c")]); + } + + #[test] + fn preserve_connectivity_cross_subgraph_bypass_at_root() { + // subgraph1 { a }, subgraph2 { c }, root: b, edges a->b, b->c at root + // exclude b, bypass a->c should be at root + let g = make_graph( + &[("b", "b")], + &[("a", "b"), ("b", "c")], + vec![ + make_graph(&[("a", "a")], &[], vec![]), + make_graph(&[("c", "c")], &[], vec![]), + ], + ); + let args = SelectArgs::default().exclude("b").preserve_connectivity(); + let result = select(&g, &args).unwrap(); + assert!(result.nodes.is_empty()); + assert_eq!(edge_pairs(&result), vec![("a", "c")]); + assert_eq!(result.subgraphs.len(), 2); + } + + // -- combined include + exclude -- + + #[test] + fn include_with_exclude() { + // a -> b -> c -> d: include a with deps, then exclude c + let g = make_graph( + &[("a", "a"), ("b", "b"), ("c", "c"), ("d", "d")], + &[("a", "b"), ("b", "c"), ("c", "d")], + vec![], + ); + let args = SelectArgs::default().include("a").deps().exclude("c"); + let result = select(&g, &args).unwrap(); + assert_eq!(node_ids(&result), vec!["a", "b", "d"]); + assert_eq!(edge_pairs(&result), vec![("a", "b")]); + } + + #[test] + fn include_with_exclude_preserve_connectivity() { + // a -> b -> c -> d: include a with deps, exclude c with preserve connectivity + let g = make_graph( + &[("a", "a"), ("b", "b"), ("c", "c"), ("d", "d")], + &[("a", "b"), ("b", "c"), ("c", "d")], + vec![], + ); + let args = SelectArgs::default() + .include("a") + .deps() + .exclude("c") + .preserve_connectivity(); + let result = select(&g, &args).unwrap(); + assert_eq!(node_ids(&result), vec!["a", "b", "d"]); + assert_eq!(edge_pairs(&result), vec![("a", "b"), ("b", "d")]); + } } diff --git a/crates/csvizmo-depgraph/src/bin/depfilter.rs b/crates/csvizmo-depgraph/src/bin/depfilter.rs index 85e4d8f..0234dd2 100644 --- a/crates/csvizmo-depgraph/src/bin/depfilter.rs +++ b/crates/csvizmo-depgraph/src/bin/depfilter.rs @@ -5,15 +5,14 @@ use clap::{Parser, Subcommand}; use csvizmo_depgraph::algorithm; use csvizmo_depgraph::algorithm::between::BetweenArgs; use csvizmo_depgraph::algorithm::cycles::CyclesArgs; -use csvizmo_depgraph::algorithm::filter::FilterArgs; use csvizmo_depgraph::algorithm::select::SelectArgs; use csvizmo_depgraph::emit::OutputFormat; use csvizmo_depgraph::parse::InputFormat; use csvizmo_utils::stdio::{get_input_reader, get_output_writer}; -/// Filter or select nodes from dependency graphs. +/// Select or exclude nodes from dependency graphs. /// -/// Operations are performed via select or filter subcommands. +/// Operations are performed via select, between, or cycles subcommands. /// Chain operations by piping: depfilter ... | depfilter ... #[derive(Debug, Parser)] #[clap(version, verbatim_doc_comment)] @@ -46,8 +45,6 @@ struct Args { enum Command { /// Select nodes matching patterns and optionally their deps/rdeps Select(SelectArgs), - /// Remove nodes matching patterns and optionally cascade to deps/rdeps - Filter(FilterArgs), /// Extract the subgraph of all directed paths between matched query nodes Between(BetweenArgs), /// Detect cycles (strongly connected components) and output each as a subgraph @@ -99,7 +96,6 @@ fn main() -> eyre::Result<()> { let graph = match &args.command { Command::Select(select_args) => algorithm::select::select(&graph, select_args)?, - Command::Filter(filter_args) => algorithm::filter::filter(&graph, filter_args)?, Command::Between(between_args) => algorithm::between::between(&graph, between_args)?, Command::Cycles(cycles_args) => algorithm::cycles::cycles(&graph, cycles_args)?, }; diff --git a/crates/csvizmo-depgraph/tests/depfilter.rs b/crates/csvizmo-depgraph/tests/depfilter.rs index 21612e5..8e13e36 100644 --- a/crates/csvizmo-depgraph/tests/depfilter.rs +++ b/crates/csvizmo-depgraph/tests/depfilter.rs @@ -10,7 +10,7 @@ fn select_single_pattern() { let output = tool!("depfilter") .args([ "select", - "--pattern", + "--include", "lib*", "--input-format", "tgf", @@ -30,7 +30,7 @@ fn select_by_id() { let output = tool!("depfilter") .args([ "select", - "--pattern", + "--include", "1", "--key", "id", @@ -53,7 +53,7 @@ fn select_with_deps() { let output = tool!("depfilter") .args([ "select", - "--pattern", + "--include", "myapp", "--deps", "--input-format", @@ -76,7 +76,7 @@ fn select_with_rdeps() { let output = tool!("depfilter") .args([ "select", - "--pattern", + "--include", "libbar", "--rdeps", "--input-format", @@ -101,7 +101,7 @@ fn select_with_depth() { let output = tool!("depfilter") .args([ "select", - "--pattern", + "--include", "a", "--deps", "--depth", @@ -149,9 +149,9 @@ fn select_multiple_patterns_and() { let output = tool!("depfilter") .args([ "select", - "--pattern", + "--include", "libfoo*", - "--pattern", + "--include", "*alpha", "--and", "--input-format", @@ -175,7 +175,7 @@ fn select_with_deps_and_rdeps() { let output = tool!("depfilter") .args([ "select", - "--pattern", + "--include", "b", "--deps", "--rdeps", @@ -192,14 +192,14 @@ fn select_with_deps_and_rdeps() { assert_eq!(stdout, "a\nb\nc\nd\n#\na\tb\nb\tc\nc\td\n"); } -// -- filter integration tests: one per CLI flag -- +// -- exclude integration tests -- #[test] -fn filter_single_pattern() { +fn exclude_single_pattern() { let output = tool!("depfilter") .args([ - "filter", - "--pattern", + "select", + "--exclude", "libfoo", "--input-format", "tgf", @@ -215,58 +215,14 @@ fn filter_single_pattern() { } #[test] -fn filter_with_and() { - let graph = "libfoo-alpha\nlibfoo-beta\nlibbar-alpha\n#\n"; - let output = tool!("depfilter") - .args([ - "filter", - "--pattern", - "libfoo*", - "--pattern", - "*alpha", - "--and", - "--input-format", - "tgf", - "--output-format", - "tgf", - ]) - .write_stdin(graph) - .captured_output() - .unwrap(); - assert!(output.status.success()); - let stdout = String::from_utf8_lossy(&output.stdout); - assert_eq!(stdout, "libfoo-beta\nlibbar-alpha\n#\n"); -} - -#[test] -fn filter_with_deps() { - let output = tool!("depfilter") - .args([ - "filter", - "--pattern", - "myapp", - "--deps", - "--input-format", - "tgf", - "--output-format", - "tgf", - ]) - .write_stdin(SIMPLE_GRAPH) - .captured_output() - .unwrap(); - assert!(output.status.success()); - let stdout = String::from_utf8_lossy(&output.stdout); - assert_eq!(stdout, "#\n"); -} - -#[test] -fn filter_with_rdeps() { +fn exclude_by_id() { let output = tool!("depfilter") .args([ - "filter", - "--pattern", - "libbar", - "--rdeps", + "select", + "--exclude", + "1", + "--key", + "id", "--input-format", "tgf", "--output-format", @@ -277,75 +233,52 @@ fn filter_with_rdeps() { .unwrap(); assert!(output.status.success()); let stdout = String::from_utf8_lossy(&output.stdout); - assert_eq!(stdout, "#\n"); + assert_eq!(stdout, "2\tlibbar\n3\tmyapp\n#\n3\t2\n"); } #[test] -fn filter_with_deps_and_rdeps() { - // a -> b -> c, d -> c: filter b with both directions removes a, b, c but keeps d - let graph = "a\nb\nc\nd\n#\na\tb\nb\tc\nd\tc\n"; +fn exclude_with_preserve_connectivity() { + let chain_graph = "a\nb\nc\n#\na\tb\nb\tc\n"; let output = tool!("depfilter") .args([ - "filter", - "--pattern", + "select", + "--exclude", "b", - "--deps", - "--rdeps", - "--input-format", - "tgf", - "--output-format", - "tgf", - ]) - .write_stdin(graph) - .captured_output() - .unwrap(); - assert!(output.status.success()); - let stdout = String::from_utf8_lossy(&output.stdout); - assert_eq!(stdout, "d\n#\n"); -} - -#[test] -fn filter_by_id() { - let output = tool!("depfilter") - .args([ - "filter", - "--pattern", - "1", - "--key", - "id", + "--preserve-connectivity", "--input-format", "tgf", "--output-format", "tgf", ]) - .write_stdin(SIMPLE_GRAPH) + .write_stdin(chain_graph) .captured_output() .unwrap(); assert!(output.status.success()); let stdout = String::from_utf8_lossy(&output.stdout); - assert_eq!(stdout, "2\tlibbar\n3\tmyapp\n#\n3\t2\n"); + assert_eq!(stdout, "a\nc\n#\na\tc\n"); } #[test] -fn filter_with_preserve_connectivity() { - let chain_graph = "a\nb\nc\n#\na\tb\nb\tc\n"; +fn exclude_multiple_patterns() { + let graph = "a\nb\nc\nd\n#\na\tb\nb\tc\nc\td\n"; let output = tool!("depfilter") .args([ - "filter", - "--pattern", + "select", + "--exclude", "b", - "--preserve-connectivity", + "--exclude", + "c", "--input-format", "tgf", "--output-format", "tgf", ]) - .write_stdin(chain_graph) + .write_stdin(graph) .captured_output() .unwrap(); assert!(output.status.success()); let stdout = String::from_utf8_lossy(&output.stdout); - assert_eq!(stdout, "a\nc\n#\na\tc\n"); + assert_eq!(stdout, "a\nd\n#\n"); } #[test] @@ -353,7 +286,7 @@ fn select_dot_output() { let output = tool!("depfilter") .args([ "select", - "--pattern", + "--include", "lib*", "--input-format", "tgf", @@ -379,8 +312,8 @@ digraph { #[cfg(feature = "dot")] #[test] -fn filter_preserve_connectivity_subgraph() { - // subgraph { a -> b -> c }: remove b, bypass a -> c stays in subgraph +fn exclude_preserve_connectivity_subgraph() { + // subgraph { a -> b -> c }: exclude b, bypass a -> c stays in subgraph let dot_input = "\ digraph { subgraph cluster_0 { @@ -394,8 +327,8 @@ digraph { "; let output = tool!("depfilter") .args([ - "filter", - "--pattern", + "select", + "--exclude", "b", "--preserve-connectivity", "--input-format", @@ -424,7 +357,7 @@ digraph { #[cfg(feature = "dot")] #[test] -fn filter_dot_input() { +fn exclude_dot_input() { let dot_input = "\ digraph { \"1\" [label=\"libfoo\"]; @@ -437,8 +370,8 @@ digraph { "; let output = tool!("depfilter") .args([ - "filter", - "--pattern", + "select", + "--exclude", "libfoo", "--input-format", "dot", @@ -453,6 +386,33 @@ digraph { assert_eq!(stdout, "2\tlibbar\n3\tmyapp\n#\n3\t2\n"); } +// -- include + exclude combined integration test -- + +#[test] +fn include_with_exclude() { + // a -> b -> c -> d: include a with deps, exclude c + let graph = "a\nb\nc\nd\n#\na\tb\nb\tc\nc\td\n"; + let output = tool!("depfilter") + .args([ + "select", + "-g", + "a", + "--deps", + "-x", + "c", + "--input-format", + "tgf", + "--output-format", + "tgf", + ]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "a\nb\nd\n#\na\tb\n"); +} + // -- between integration tests -- #[test] @@ -462,9 +422,9 @@ fn between_two_nodes() { let output = tool!("depfilter") .args([ "between", - "-p", + "-g", "a", - "-p", + "-g", "c", "--input-format", "tgf", @@ -484,9 +444,9 @@ fn between_by_id() { let output = tool!("depfilter") .args([ "between", - "-p", + "-g", "1", - "-p", + "-g", "2", "--key", "id", @@ -510,7 +470,7 @@ fn between_glob_multiple_nodes() { let output = tool!("depfilter") .args([ "between", - "-p", + "-g", "?", "--input-format", "tgf", @@ -531,7 +491,7 @@ fn between_no_matching_patterns() { let output = tool!("depfilter") .args([ "between", - "-p", + "-g", "nonexistent", "--input-format", "tgf", @@ -553,9 +513,9 @@ fn between_no_path() { let output = tool!("depfilter") .args([ "between", - "-p", + "-g", "a", - "-p", + "-g", "c", "--input-format", "tgf", @@ -580,9 +540,9 @@ fn between_cargo_metadata_fixture() { let output = tool!("depfilter") .args([ "between", - "-p", + "-g", "csvizmo-depgraph", - "-p", + "-g", "clap*", "--input-format", "cargo-metadata", From 297676a054ebda5e510c7ea899b394656900b7bf Mon Sep 17 00:00:00 2001 From: Austin Gill Date: Thu, 19 Feb 2026 19:06:46 -0600 Subject: [PATCH 7/7] Release csvizmo 0.7.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index cd8bc8f..28fe422 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ members = ["crates/*"] resolver = "3" [workspace.package] -version = "0.6.0" +version = "0.7.0" edition = "2024" license = "MIT" rust-version = "1.93"