diff --git a/Cargo.toml b/Cargo.toml index 1bb355e..cd8bc8f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,6 +36,7 @@ itertools-num = "0.1.3" kernel-density-estimation = "0.2.0" ordered-float = "5.0.0" petgraph = "0.7" +regex = "1" pathdiff = "0.2.3" serde = { version = "1.0.217", features = ["derive"] } serde_json = "1" diff --git a/README.md b/README.md index 85dc855..ff06f31 100644 --- a/README.md +++ b/README.md @@ -270,6 +270,33 @@ digraph { > The `depfilter` tool shares the same GPL-2.0 license caveat as `depconv` with respect to DOT > parsing. +## deptransform + +Structural transformations on dependency graphs. Works on the same formats as `depconv`, and is +designed to be chained with pipes. + +The `deptransform` tool supports the following subcommands: + +* `deptransform reverse` - reverse the direction of all edges in the graph +* `deptransform simplify` - remove redundant edges (e.g. if A->B and B->C, then A->C is redundant) +* `deptransform shorten` - shorten node IDs that look like paths (`minpath`, but for node IDs) +* `deptransform sub` - `sed`, but for node IDs and node / edge attributes +* `deptransform merge` - merge multiple graphs into one +* `deptransform flatten` - recursively flatten subgraphs into the parent graph + +```sh +# Collapse bitbake task-level nodes IDs (acl-native.do_compile -> acl-native), then remove the +# now-misleading node labels +$ cat data/depconv/bitbake.curl.task-depends.dot | + deptransform sub --key=id 's/\.do_.*//' | + deptransform sub --key=node:label 's/.*//' | +``` + +> [!NOTE] +> +> The `deptransform` tool shares the same GPL-2.0 license caveat as `depconv` with respect to DOT +> parsing. + ## can2csv Parse basic data from a CAN frame into a CSV record. Faster than `sed`, and also parses the canid. diff --git a/crates/csvizmo-depgraph/Cargo.toml b/crates/csvizmo-depgraph/Cargo.toml index d453493..7d9015a 100644 --- a/crates/csvizmo-depgraph/Cargo.toml +++ b/crates/csvizmo-depgraph/Cargo.toml @@ -18,7 +18,9 @@ either = { workspace = true, optional = true } eyre.workspace = true globset.workspace = true indexmap.workspace = true +csvizmo-minpath.workspace = true petgraph.workspace = true +regex.workspace = true mermaid-rs-renderer.workspace = true serde.workspace = true serde_json.workspace = true @@ -32,3 +34,4 @@ dot = ["dep:dot-parser", "dep:either"] [dev-dependencies] csvizmo-test.workspace = true pretty_assertions.workspace = true +tempfile.workspace = true diff --git a/crates/csvizmo-depgraph/src/algorithm/flatten.rs b/crates/csvizmo-depgraph/src/algorithm/flatten.rs new file mode 100644 index 0000000..91ef61b --- /dev/null +++ b/crates/csvizmo-depgraph/src/algorithm/flatten.rs @@ -0,0 +1,71 @@ +use crate::DepGraph; + +/// Flatten a graph by moving all nodes and edges from subgraphs to the top level. +pub fn flatten(graph: &DepGraph) -> DepGraph { + DepGraph { + id: graph.id.clone(), + attrs: graph.attrs.clone(), + nodes: graph.all_nodes().clone(), + edges: graph.all_edges().clone(), + ..Default::default() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{Edge, NodeInfo}; + + #[test] + fn flat_graph_unchanged() { + let g = DepGraph { + nodes: [("a", "A"), ("b", "B")] + .into_iter() + .map(|(id, label)| (id.to_string(), NodeInfo::new(label))) + .collect(), + edges: vec![Edge { + from: "a".to_string(), + to: "b".to_string(), + ..Default::default() + }], + ..Default::default() + }; + let result = flatten(&g); + assert_eq!(result.nodes.len(), 2); + assert_eq!(result.edges.len(), 1); + assert!(result.subgraphs.is_empty()); + } + + #[test] + fn flattens_subgraph_nodes_and_edges() { + let sub = DepGraph { + nodes: [("c", "C")] + .into_iter() + .map(|(id, label)| (id.to_string(), NodeInfo::new(label))) + .collect(), + edges: vec![Edge { + from: "b".to_string(), + to: "c".to_string(), + ..Default::default() + }], + ..Default::default() + }; + let g = DepGraph { + nodes: [("a", "A"), ("b", "B")] + .into_iter() + .map(|(id, label)| (id.to_string(), NodeInfo::new(label))) + .collect(), + edges: vec![Edge { + from: "a".to_string(), + to: "b".to_string(), + ..Default::default() + }], + subgraphs: vec![sub], + ..Default::default() + }; + let result = flatten(&g); + assert_eq!(result.nodes.len(), 3); + assert_eq!(result.edges.len(), 2); + assert!(result.subgraphs.is_empty()); + } +} diff --git a/crates/csvizmo-depgraph/src/algorithm/merge.rs b/crates/csvizmo-depgraph/src/algorithm/merge.rs new file mode 100644 index 0000000..a542321 --- /dev/null +++ b/crates/csvizmo-depgraph/src/algorithm/merge.rs @@ -0,0 +1,256 @@ +use indexmap::IndexMap; + +use crate::{DepGraph, Edge, NodeInfo}; + +/// Merge multiple dependency graphs into one. +/// +/// Nodes are unioned by ID (later graphs overwrite on collision). +/// Edges are deduplicated by (from, to): the first label wins, and +/// attributes are merged with earlier values taking precedence. +/// Named subgraphs with the same ID are recursively merged; +/// unnamed subgraphs are kept as-is. +pub fn merge(graphs: &[DepGraph]) -> DepGraph { + let mut nodes: IndexMap = IndexMap::new(); + let mut edge_map: IndexMap<(String, String), Edge> = IndexMap::new(); + let mut named_subgraphs: IndexMap> = IndexMap::new(); + let mut unnamed_subgraphs: Vec = Vec::new(); + + for graph in graphs { + for (id, info) in &graph.nodes { + nodes.insert(id.clone(), info.clone()); + } + for edge in &graph.edges { + let key = (edge.from.clone(), edge.to.clone()); + match edge_map.get_mut(&key) { + Some(existing) => { + if existing.label.is_none() { + existing.label.clone_from(&edge.label); + } + for (k, v) in &edge.attrs { + existing.attrs.entry(k.clone()).or_insert_with(|| v.clone()); + } + } + None => { + edge_map.insert(key, edge.clone()); + } + } + } + for sg in &graph.subgraphs { + match &sg.id { + Some(id) => named_subgraphs + .entry(id.clone()) + .or_default() + .push(sg.clone()), + None => unnamed_subgraphs.push(sg.clone()), + } + } + } + + let mut subgraphs: Vec = Vec::new(); + for (id, sgs) in named_subgraphs { + let mut merged = merge(&sgs); + merged.id = Some(id); + subgraphs.push(merged); + } + subgraphs.extend(unnamed_subgraphs); + + DepGraph { + nodes, + edges: edge_map.into_values().collect(), + subgraphs, + ..Default::default() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn make_graph(nodes: &[(&str, &str)], edges: &[(&str, &str)]) -> DepGraph { + DepGraph { + nodes: nodes + .iter() + .map(|(id, label)| (id.to_string(), NodeInfo::new(*label))) + .collect(), + edges: edges + .iter() + .map(|(from, to)| Edge { + from: from.to_string(), + to: to.to_string(), + ..Default::default() + }) + .collect(), + ..Default::default() + } + } + + fn node_ids(graph: &DepGraph) -> Vec<&str> { + graph.nodes.keys().map(|s| s.as_str()).collect() + } + + fn edge_pairs(graph: &DepGraph) -> Vec<(&str, &str)> { + graph + .edges + .iter() + .map(|e| (e.from.as_str(), e.to.as_str())) + .collect() + } + + #[test] + fn merge_disjoint() { + let g1 = make_graph(&[("a", "A")], &[]); + let g2 = make_graph(&[("b", "B")], &[]); + let result = merge(&[g1, g2]); + assert_eq!(node_ids(&result), vec!["a", "b"]); + } + + #[test] + fn merge_overlapping_nodes() { + let g1 = make_graph(&[("a", "A1")], &[]); + let g2 = make_graph(&[("a", "A2")], &[]); + let result = merge(&[g1, g2]); + assert_eq!(node_ids(&result), vec!["a"]); + // Later graph overwrites + assert_eq!(result.nodes["a"].label, "A2"); + } + + #[test] + fn merge_edges() { + let g1 = make_graph(&[("a", "A"), ("b", "B")], &[("a", "b")]); + let g2 = make_graph(&[("b", "B"), ("c", "C")], &[("b", "c")]); + let result = merge(&[g1, g2]); + assert_eq!(edge_pairs(&result), vec![("a", "b"), ("b", "c")]); + } + + #[test] + fn merge_deduplicates_edges() { + let g1 = make_graph(&[("a", "A"), ("b", "B")], &[("a", "b")]); + let g2 = make_graph(&[("a", "A"), ("b", "B")], &[("a", "b")]); + let result = merge(&[g1, g2]); + assert_eq!(edge_pairs(&result), vec![("a", "b")]); + } + + #[test] + fn merge_edges_first_label_wins() { + let mut g1 = make_graph(&[("a", "A"), ("b", "B")], &[]); + g1.edges.push(Edge { + from: "a".to_string(), + to: "b".to_string(), + label: Some("uses".to_string()), + ..Default::default() + }); + let mut g2 = make_graph(&[("a", "A"), ("b", "B")], &[]); + g2.edges.push(Edge { + from: "a".to_string(), + to: "b".to_string(), + label: Some("depends_on".to_string()), + ..Default::default() + }); + let result = merge(&[g1, g2]); + assert_eq!(result.edges.len(), 1); + assert_eq!(result.edges[0].label.as_deref(), Some("uses")); + } + + #[test] + fn merge_edges_attrs_merged() { + let mut g1 = make_graph(&[("a", "A"), ("b", "B")], &[]); + let mut e1 = Edge { + from: "a".to_string(), + to: "b".to_string(), + ..Default::default() + }; + e1.attrs.insert("color".to_string(), "red".to_string()); + g1.edges.push(e1); + + let mut g2 = make_graph(&[("a", "A"), ("b", "B")], &[]); + let mut e2 = Edge { + from: "a".to_string(), + to: "b".to_string(), + ..Default::default() + }; + e2.attrs.insert("color".to_string(), "blue".to_string()); + e2.attrs.insert("style".to_string(), "dashed".to_string()); + g2.edges.push(e2); + + let result = merge(&[g1, g2]); + assert_eq!(result.edges.len(), 1); + // First wins for conflicting attrs + assert_eq!(result.edges[0].attrs["color"], "red"); + // New attrs from later graph are added + assert_eq!(result.edges[0].attrs["style"], "dashed"); + } + + #[test] + fn merge_empty() { + let result = merge(&[DepGraph::default(), DepGraph::default()]); + assert!(result.nodes.is_empty()); + assert!(result.edges.is_empty()); + } + + fn make_subgraph(id: &str, nodes: &[(&str, &str)], edges: &[(&str, &str)]) -> DepGraph { + DepGraph { + id: Some(id.to_string()), + nodes: nodes + .iter() + .map(|(id, label)| (id.to_string(), NodeInfo::new(*label))) + .collect(), + edges: edges + .iter() + .map(|(from, to)| Edge { + from: from.to_string(), + to: to.to_string(), + ..Default::default() + }) + .collect(), + ..Default::default() + } + } + + #[test] + fn merge_preserves_subgraphs() { + let g1 = DepGraph { + nodes: [("a", "A")] + .into_iter() + .map(|(id, label)| (id.to_string(), NodeInfo::new(label))) + .collect(), + subgraphs: vec![make_subgraph("cluster_0", &[("c", "C")], &[])], + ..Default::default() + }; + let g2 = make_graph(&[("b", "B")], &[]); + let result = merge(&[g1, g2]); + assert_eq!(node_ids(&result), vec!["a", "b"]); + assert_eq!(result.subgraphs.len(), 1); + assert_eq!(result.subgraphs[0].id.as_deref(), Some("cluster_0")); + assert_eq!(node_ids(&result.subgraphs[0]), vec!["c"]); + } + + #[test] + fn merge_named_subgraphs_by_id() { + let g1 = DepGraph { + subgraphs: vec![make_subgraph("cluster_0", &[("a", "A")], &[])], + ..Default::default() + }; + let g2 = DepGraph { + subgraphs: vec![make_subgraph("cluster_0", &[("b", "B")], &[])], + ..Default::default() + }; + let result = merge(&[g1, g2]); + assert_eq!(result.subgraphs.len(), 1); + assert_eq!(result.subgraphs[0].id.as_deref(), Some("cluster_0")); + assert_eq!(node_ids(&result.subgraphs[0]), vec!["a", "b"]); + } + + #[test] + fn merge_disjoint_subgraphs() { + let g1 = DepGraph { + subgraphs: vec![make_subgraph("cluster_a", &[("a", "A")], &[])], + ..Default::default() + }; + let g2 = DepGraph { + subgraphs: vec![make_subgraph("cluster_b", &[("b", "B")], &[])], + ..Default::default() + }; + let result = merge(&[g1, g2]); + assert_eq!(result.subgraphs.len(), 2); + } +} diff --git a/crates/csvizmo-depgraph/src/algorithm/mod.rs b/crates/csvizmo-depgraph/src/algorithm/mod.rs index d43e1b5..e816e11 100644 --- a/crates/csvizmo-depgraph/src/algorithm/mod.rs +++ b/crates/csvizmo-depgraph/src/algorithm/mod.rs @@ -1,7 +1,13 @@ pub mod between; pub mod cycles; pub mod filter; +pub mod flatten; +pub mod merge; +pub mod reverse; pub mod select; +pub mod shorten; +pub mod simplify; +pub mod sub; use globset::{Glob, GlobSet, GlobSetBuilder}; diff --git a/crates/csvizmo-depgraph/src/algorithm/reverse.rs b/crates/csvizmo-depgraph/src/algorithm/reverse.rs new file mode 100644 index 0000000..9eb880e --- /dev/null +++ b/crates/csvizmo-depgraph/src/algorithm/reverse.rs @@ -0,0 +1,115 @@ +use crate::DepGraph; + +/// Reverse the direction of all edges in the graph. +pub fn reverse(graph: &DepGraph) -> DepGraph { + reverse_inner(graph) +} + +fn reverse_inner(graph: &DepGraph) -> DepGraph { + DepGraph { + id: graph.id.clone(), + attrs: graph.attrs.clone(), + nodes: graph.nodes.clone(), + edges: graph + .edges + .iter() + .map(|e| crate::Edge { + from: e.to.clone(), + to: e.from.clone(), + label: e.label.clone(), + attrs: e.attrs.clone(), + }) + .collect(), + subgraphs: graph.subgraphs.iter().map(reverse_inner).collect(), + ..Default::default() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{Edge, NodeInfo}; + + fn make_graph( + nodes: &[(&str, &str)], + edges: &[(&str, &str)], + subgraphs: Vec, + ) -> DepGraph { + DepGraph { + nodes: nodes + .iter() + .map(|(id, label)| (id.to_string(), NodeInfo::new(*label))) + .collect(), + edges: edges + .iter() + .map(|(from, to)| Edge { + from: from.to_string(), + to: to.to_string(), + ..Default::default() + }) + .collect(), + subgraphs, + ..Default::default() + } + } + + fn edge_pairs(graph: &DepGraph) -> Vec<(&str, &str)> { + graph + .edges + .iter() + .map(|e| (e.from.as_str(), e.to.as_str())) + .collect() + } + + #[test] + fn reverses_edges() { + let g = make_graph( + &[("a", "A"), ("b", "B"), ("c", "C")], + &[("a", "b"), ("b", "c")], + vec![], + ); + let result = reverse(&g); + assert_eq!(edge_pairs(&result), vec![("b", "a"), ("c", "b")]); + } + + #[test] + fn preserves_nodes() { + let g = make_graph(&[("a", "A"), ("b", "B")], &[("a", "b")], vec![]); + let result = reverse(&g); + assert_eq!(result.nodes.len(), 2); + assert_eq!(result.nodes["a"].label, "A"); + assert_eq!(result.nodes["b"].label, "B"); + } + + #[test] + fn reverses_subgraph_edges() { + let sub = make_graph(&[("c", "C")], &[("c", "d")], vec![]); + let g = make_graph(&[("a", "A"), ("d", "D")], &[("a", "d")], vec![sub]); + let result = reverse(&g); + assert_eq!(edge_pairs(&result), vec![("d", "a")]); + assert_eq!(edge_pairs(&result.subgraphs[0]), vec![("d", "c")]); + } + + #[test] + fn empty_graph() { + let g = DepGraph::default(); + let result = reverse(&g); + assert!(result.nodes.is_empty()); + assert!(result.edges.is_empty()); + } + + #[test] + fn preserves_edge_attrs() { + let mut g = make_graph(&[("a", "A"), ("b", "B")], &[], vec![]); + g.edges.push(Edge { + from: "a".to_string(), + to: "b".to_string(), + label: Some("dep".to_string()), + ..Default::default() + }); + let result = reverse(&g); + assert_eq!(result.edges[0].from, "b"); + assert_eq!(result.edges[0].to, "a"); + assert_eq!(result.edges[0].label.as_deref(), Some("dep")); + } +} diff --git a/crates/csvizmo-depgraph/src/algorithm/shorten.rs b/crates/csvizmo-depgraph/src/algorithm/shorten.rs new file mode 100644 index 0000000..48cd88c --- /dev/null +++ b/crates/csvizmo-depgraph/src/algorithm/shorten.rs @@ -0,0 +1,409 @@ +use std::collections::HashSet; +use std::path::PathBuf; + +use csvizmo_minpath::PathTransforms; +use indexmap::IndexMap; + +use crate::{DepGraph, Edge, NodeInfo}; + +/// Which fields to shorten. +#[derive(Debug, Clone, Copy, Default, clap::ValueEnum)] +pub enum ShortenKey { + Id, + Label, + #[default] + Both, +} + +impl std::fmt::Display for ShortenKey { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use clap::ValueEnum; + + f.write_str(self.to_possible_value().unwrap().get_name()) + } +} + +/// Shorten node IDs and/or labels using minpath transforms. +pub fn shorten( + graph: &DepGraph, + separator: &str, + key: ShortenKey, + transforms: &PathTransforms, +) -> DepGraph { + let all_nodes = graph.all_nodes(); + + let shorten_ids = matches!(key, ShortenKey::Id | ShortenKey::Both); + let shorten_labels = matches!(key, ShortenKey::Label | ShortenKey::Both); + + // Build ID mapping. + let id_map: IndexMap = if shorten_ids { + build_mapping(all_nodes.keys().map(|s| s.as_str()), separator, transforms) + } else { + IndexMap::new() + }; + + // Build label mapping. + let label_map: IndexMap = if shorten_labels { + let labels: Vec<&str> = all_nodes.values().map(|n| n.label.as_str()).collect(); + build_mapping(labels.into_iter(), separator, transforms) + } else { + IndexMap::new() + }; + + remap_graph(graph, &id_map, &label_map) +} + +/// Convert strings to path form, apply minpath transforms, convert back. +fn build_mapping<'a>( + values: impl Iterator, + separator: &str, + transforms: &PathTransforms, +) -> IndexMap { + let originals: Vec<&str> = values.collect(); + + // Convert to path form by replacing separator with '/'. + let paths: Vec = originals + .iter() + .map(|s| s.replace(separator, "/")) + .collect(); + + let shortened = transforms.build(&paths); + + let mut mapping = IndexMap::new(); + for (i, original) in originals.iter().enumerate() { + let short = shortened + .shorten(&paths[i]) + .to_string_lossy() + .replace('/', separator); + if short != *original { + mapping.insert(original.to_string(), short); + } + } + mapping +} + +/// Build a [`PathTransforms`] from CLI arguments. +/// +/// If no transform flags are set, defaults to `strip_common_prefix + minimal_unique_suffix`. +pub fn build_transforms(args: &ShortenArgs) -> PathTransforms { + let any_set = args.home_dir + || args.resolve_relative + || args.relative_to.is_some() + || !args.strip_prefix.is_empty() + || args.smart_abbreviate + || args.strip_common_prefix + || args.minimal_unique_suffix + || args.single_letter; + + if any_set { + PathTransforms::new() + .strip_prefix(args.strip_prefix.clone()) + .home_dir(args.home_dir) + .resolve_relative(args.resolve_relative) + .relative_to(args.relative_to.as_ref()) + .smart_abbreviate(args.smart_abbreviate) + .strip_common_prefix(args.strip_common_prefix) + .minimal_unique_suffix(args.minimal_unique_suffix) + .single_letter(args.single_letter) + } else { + PathTransforms::new() + .strip_common_prefix(true) + .minimal_unique_suffix(true) + } +} + +/// CLI arguments for the shorten subcommand. +#[derive(Clone, Debug, Default, clap::Parser)] +pub struct ShortenArgs { + /// Character used to split node IDs into path components + #[clap(long, default_value = "/")] + pub separator: String, + + /// Which fields to shorten: id, label, or both + #[clap(long, default_value_t = ShortenKey::default())] + pub key: ShortenKey, + + /// Replace `/home/$USER` with `~` + #[clap(long)] + pub home_dir: bool, + + /// Normalize . and .. path components + #[clap(long)] + pub resolve_relative: bool, + + /// Make paths relative to a base path + #[clap(long)] + pub relative_to: Option, + + /// Remove the given prefix (can be repeated) + #[clap(long)] + pub strip_prefix: Vec, + + /// Abbreviate common directory names (Documents -> docs, etc.) + #[clap(long)] + pub smart_abbreviate: bool, + + /// Remove the prefix shared by all paths + #[clap(long)] + pub strip_common_prefix: bool, + + /// Shorten to the minimal unique suffix + #[clap(long)] + pub minimal_unique_suffix: bool, + + /// Abbreviate directory components to single letters + #[clap(long)] + pub single_letter: bool, +} + +fn remap_graph( + graph: &DepGraph, + id_map: &IndexMap, + label_map: &IndexMap, +) -> DepGraph { + let mut placed: HashSet = HashSet::new(); + remap_inner(graph, id_map, label_map, &mut placed) +} + +fn remap_inner( + graph: &DepGraph, + id_map: &IndexMap, + label_map: &IndexMap, + placed: &mut HashSet, +) -> DepGraph { + let nodes: IndexMap = graph + .nodes + .iter() + .filter_map(|(id, info)| { + let new_id = id_map.get(id).unwrap_or(id).clone(); + if !placed.insert(new_id.clone()) { + return None; // already placed (ID collision from shortening) + } + let mut info = info.clone(); + if let Some(new_label) = label_map.get(&info.label) { + info.label = new_label.clone(); + } + Some((new_id, info)) + }) + .collect(); + + let subgraphs: Vec = graph + .subgraphs + .iter() + .map(|sg| remap_inner(sg, id_map, label_map, placed)) + .filter(|sg| !sg.nodes.is_empty() || !sg.subgraphs.is_empty()) + .collect(); + + let mut seen_edges: HashSet<(String, String, Option)> = HashSet::new(); + let edges: Vec = graph + .edges + .iter() + .filter_map(|e| { + let from = id_map.get(&e.from).unwrap_or(&e.from).clone(); + let to = id_map.get(&e.to).unwrap_or(&e.to).clone(); + if from == to { + return None; // self-loop from collision + } + let key = (from.clone(), to.clone(), e.label.clone()); + if !seen_edges.insert(key) { + return None; // duplicate + } + Some(Edge { + from, + to, + label: e.label.clone(), + attrs: e.attrs.clone(), + }) + }) + .collect(); + + DepGraph { + id: graph.id.clone(), + attrs: graph.attrs.clone(), + nodes, + edges, + subgraphs, + ..Default::default() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn make_graph(nodes: &[(&str, &str)], edges: &[(&str, &str)]) -> DepGraph { + DepGraph { + nodes: nodes + .iter() + .map(|(id, label)| (id.to_string(), NodeInfo::new(*label))) + .collect(), + edges: edges + .iter() + .map(|(from, to)| Edge { + from: from.to_string(), + to: to.to_string(), + ..Default::default() + }) + .collect(), + ..Default::default() + } + } + + fn node_ids(graph: &DepGraph) -> Vec<&str> { + graph.nodes.keys().map(|s| s.as_str()).collect() + } + + fn node_labels(graph: &DepGraph) -> Vec<&str> { + graph.nodes.values().map(|n| n.label.as_str()).collect() + } + + fn default_transforms() -> PathTransforms { + PathTransforms::new() + .strip_common_prefix(true) + .minimal_unique_suffix(true) + } + + #[test] + fn shorten_strips_common_prefix() { + let g = make_graph( + &[ + ("src/foo/bar.rs", "src/foo/bar.rs"), + ("src/foo/baz.rs", "src/foo/baz.rs"), + ], + &[("src/foo/bar.rs", "src/foo/baz.rs")], + ); + let result = shorten(&g, "/", ShortenKey::Both, &default_transforms()); + assert_eq!(node_ids(&result), vec!["bar.rs", "baz.rs"]); + assert_eq!(node_labels(&result), vec!["bar.rs", "baz.rs"]); + } + + #[test] + fn shorten_with_dot_separator() { + let g = make_graph( + &[ + ("com.example.foo", "com.example.foo"), + ("com.example.bar", "com.example.bar"), + ], + &[], + ); + let result = shorten(&g, ".", ShortenKey::Both, &default_transforms()); + assert_eq!(node_ids(&result), vec!["foo", "bar"]); + } + + #[test] + fn shorten_id_only() { + let g = make_graph( + &[ + ("src/foo/bar.rs", "Original Label 1"), + ("src/foo/baz.rs", "Original Label 2"), + ], + &[], + ); + let result = shorten(&g, "/", ShortenKey::Id, &default_transforms()); + assert_eq!(node_ids(&result), vec!["bar.rs", "baz.rs"]); + // Labels unchanged + assert_eq!( + node_labels(&result), + vec!["Original Label 1", "Original Label 2"] + ); + } + + #[test] + fn shorten_label_only() { + let g = make_graph(&[("id1", "src/foo/bar.rs"), ("id2", "src/foo/baz.rs")], &[]); + let result = shorten(&g, "/", ShortenKey::Label, &default_transforms()); + // IDs unchanged + assert_eq!(node_ids(&result), vec!["id1", "id2"]); + assert_eq!(node_labels(&result), vec!["bar.rs", "baz.rs"]); + } + + #[test] + fn shorten_updates_edge_endpoints() { + let g = make_graph( + &[("src/foo/bar.rs", "bar"), ("src/foo/baz.rs", "baz")], + &[("src/foo/bar.rs", "src/foo/baz.rs")], + ); + let result = shorten(&g, "/", ShortenKey::Id, &default_transforms()); + assert_eq!(result.edges[0].from, "bar.rs"); + assert_eq!(result.edges[0].to, "baz.rs"); + } + + #[test] + fn shorten_empty_graph() { + let g = DepGraph::default(); + let result = shorten(&g, "/", ShortenKey::Both, &default_transforms()); + assert!(result.nodes.is_empty()); + } + + #[test] + fn build_transforms_defaults() { + // No flags set -> strip_common_prefix + minimal_unique_suffix + let args = ShortenArgs::default(); + let transforms = build_transforms(&args); + let g = make_graph( + &[ + ("src/foo/bar.rs", "src/foo/bar.rs"), + ("src/foo/baz.rs", "src/foo/baz.rs"), + ], + &[], + ); + let result = shorten(&g, "/", ShortenKey::Id, &transforms); + assert_eq!(node_ids(&result), vec!["bar.rs", "baz.rs"]); + } + + #[test] + fn build_transforms_explicit_single_letter() { + let args = ShortenArgs { + single_letter: true, + ..Default::default() + }; + let transforms = build_transforms(&args); + // single_letter only, no strip_common_prefix or minimal_unique_suffix + let g = make_graph(&[("src/foo/bar.rs", "src/foo/bar.rs")], &[]); + let result = shorten(&g, "/", ShortenKey::Id, &transforms); + assert_eq!(node_ids(&result), vec!["s/f/bar.rs"]); + } + + fn colliding_transforms() -> PathTransforms { + PathTransforms::new().single_letter(true) + } + + #[test] + fn collision_removes_self_loops() { + // src/utils/parse.rs -> s/u/parse.rs + // src/uber/parse.rs -> s/u/parse.rs (collision) + // Edge between them becomes a self-loop and should be removed. + let g = make_graph( + &[ + ("src/utils/parse.rs", "src/utils/parse.rs"), + ("src/uber/parse.rs", "src/uber/parse.rs"), + ], + &[("src/utils/parse.rs", "src/uber/parse.rs")], + ); + let result = shorten(&g, "/", ShortenKey::Id, &colliding_transforms()); + assert_eq!(node_ids(&result), vec!["s/u/parse.rs"]); + assert!(result.edges.is_empty()); + } + + #[test] + fn collision_deduplicates_edges() { + // Both a.x and a.y collide to the same ID, both have edges to b. + // After remapping, only one edge should remain. + let g = make_graph( + &[ + ("src/utils/parse.rs", "src/utils/parse.rs"), + ("src/uber/parse.rs", "src/uber/parse.rs"), + ("lib/out.rs", "lib/out.rs"), + ], + &[ + ("src/utils/parse.rs", "lib/out.rs"), + ("src/uber/parse.rs", "lib/out.rs"), + ], + ); + let result = shorten(&g, "/", ShortenKey::Id, &colliding_transforms()); + assert_eq!(node_ids(&result), vec!["s/u/parse.rs", "l/out.rs"]); + assert_eq!(result.edges.len(), 1); + assert_eq!(result.edges[0].from, "s/u/parse.rs"); + assert_eq!(result.edges[0].to, "l/out.rs"); + } +} diff --git a/crates/csvizmo-depgraph/src/algorithm/simplify.rs b/crates/csvizmo-depgraph/src/algorithm/simplify.rs new file mode 100644 index 0000000..40c4ca0 --- /dev/null +++ b/crates/csvizmo-depgraph/src/algorithm/simplify.rs @@ -0,0 +1,155 @@ +use std::collections::HashSet; + +use petgraph::algo::toposort; +use petgraph::algo::tred::{dag_to_toposorted_adjacency_list, dag_transitive_reduction_closure}; +use petgraph::visit::{IntoNeighbors, NodeCount}; + +use crate::{DepGraph, FlatGraphView}; + +/// Remove redundant edges via transitive reduction. +/// +/// If A->B->C and A->C exist, the direct A->C edge is redundant and is removed. +/// Only works on DAGs. Returns an error if the graph contains cycles. +pub fn simplify(graph: &DepGraph) -> eyre::Result { + let view = FlatGraphView::new(graph); + + let sorted = toposort(&view.pg, None).map_err(|_| { + eyre::eyre!( + "graph contains cycles; transitive reduction requires a DAG. \ + Use `depfilter cycles` to identify them." + ) + })?; + + // revmap maps original node index -> topo position. + // sorted[topo_position] maps back to the original NodeIndex. + let (adj, _revmap) = dag_to_toposorted_adjacency_list::<_, u32>(&view.pg, &sorted); + let (reduction, _closure) = dag_transitive_reduction_closure(&adj); + + // Build set of edges to keep: (from_id, to_id) pairs present in the reduction. + let mut keep_edges: HashSet<(&str, &str)> = HashSet::new(); + for from_topo in 0..reduction.node_count() { + let from_id = view.idx_to_id[sorted[from_topo].index()]; + for to_topo in reduction.neighbors(from_topo as u32) { + let to_id = view.idx_to_id[sorted[to_topo as usize].index()]; + keep_edges.insert((from_id, to_id)); + } + } + + Ok(filter_edges(graph, &keep_edges)) +} + +fn filter_edges<'a>(graph: &DepGraph, keep: &HashSet<(&'a str, &'a str)>) -> DepGraph { + DepGraph { + id: graph.id.clone(), + attrs: graph.attrs.clone(), + nodes: graph.nodes.clone(), + edges: graph + .edges + .iter() + .filter(|e| keep.contains(&(e.from.as_str(), e.to.as_str()))) + .cloned() + .collect(), + subgraphs: graph + .subgraphs + .iter() + .map(|sg| filter_edges(sg, keep)) + .collect(), + ..Default::default() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{Edge, NodeInfo}; + + fn make_graph(nodes: &[(&str, &str)], edges: &[(&str, &str)]) -> DepGraph { + DepGraph { + nodes: nodes + .iter() + .map(|(id, label)| (id.to_string(), NodeInfo::new(*label))) + .collect(), + edges: edges + .iter() + .map(|(from, to)| Edge { + from: from.to_string(), + to: to.to_string(), + ..Default::default() + }) + .collect(), + ..Default::default() + } + } + + fn edge_pairs(graph: &DepGraph) -> Vec<(&str, &str)> { + let mut pairs: Vec<_> = graph + .edges + .iter() + .map(|e| (e.from.as_str(), e.to.as_str())) + .collect(); + pairs.sort(); + pairs + } + + #[test] + fn removes_redundant_edge() { + // a -> b -> c, a -> c: the direct a->c is redundant + let g = make_graph( + &[("a", "A"), ("b", "B"), ("c", "C")], + &[("a", "b"), ("b", "c"), ("a", "c")], + ); + let result = simplify(&g).unwrap(); + assert_eq!(edge_pairs(&result), vec![("a", "b"), ("b", "c")]); + } + + #[test] + fn keeps_all_edges_when_none_redundant() { + // a -> b -> c: no redundant edges + let g = make_graph( + &[("a", "A"), ("b", "B"), ("c", "C")], + &[("a", "b"), ("b", "c")], + ); + let result = simplify(&g).unwrap(); + assert_eq!(edge_pairs(&result), vec![("a", "b"), ("b", "c")]); + } + + #[test] + fn diamond_reduces() { + // a -> b -> d, a -> c -> d, a -> d: a->d is redundant + let g = make_graph( + &[("a", "A"), ("b", "B"), ("c", "C"), ("d", "D")], + &[("a", "b"), ("a", "c"), ("b", "d"), ("c", "d"), ("a", "d")], + ); + let result = simplify(&g).unwrap(); + assert_eq!( + edge_pairs(&result), + vec![("a", "b"), ("a", "c"), ("b", "d"), ("c", "d")] + ); + } + + #[test] + fn errors_on_cycle() { + let g = make_graph(&[("a", "A"), ("b", "B")], &[("a", "b"), ("b", "a")]); + let msg = simplify(&g).err().expect("expected error").to_string(); + assert!(msg.contains("cycles"), "error message: {msg}"); + } + + #[test] + fn empty_graph() { + let g = DepGraph::default(); + let result = simplify(&g).unwrap(); + assert!(result.edges.is_empty()); + } + + #[test] + fn reverse_insertion_order() { + // Nodes inserted in reverse topological order to exercise the topo mapping. + // c -> b -> a with c -> a redundant. + let g = make_graph( + &[("c", "C"), ("b", "B"), ("a", "A")], + &[("c", "b"), ("b", "a"), ("c", "a")], + ); + let result = simplify(&g).unwrap(); + assert_eq!(edge_pairs(&result), vec![("b", "a"), ("c", "b")]); + } +} diff --git a/crates/csvizmo-depgraph/src/algorithm/sub.rs b/crates/csvizmo-depgraph/src/algorithm/sub.rs new file mode 100644 index 0000000..2a2f53f --- /dev/null +++ b/crates/csvizmo-depgraph/src/algorithm/sub.rs @@ -0,0 +1,639 @@ +use std::collections::HashSet; + +use indexmap::IndexMap; +use regex::Regex; + +use crate::{DepGraph, Edge, NodeInfo}; + +/// Which field to apply the substitution to. +#[derive(Debug, Clone)] +pub enum SubKey { + /// Apply to node IDs; merge nodes when IDs collide. + Id, + /// Apply to a named node field (label, or an attribute name). + Node(String), + /// Apply to a named edge field (label, or an attribute name). + Edge(String), +} + +impl SubKey { + /// Parse a `--key` value: `id`, `node:NAME`, or `edge:NAME`. + pub fn parse(s: &str) -> eyre::Result { + match s { + "id" => Ok(Self::Id), + s if s.starts_with("node:") => Ok(Self::Node(s["node:".len()..].to_string())), + s if s.starts_with("edge:") => Ok(Self::Edge(s["edge:".len()..].to_string())), + _ => eyre::bail!( + "invalid --key value: {s:?}. Expected 'id', 'node:NAME', or 'edge:NAME'" + ), + } + } +} + +/// Parsed sed-style substitution: `s/pattern/replacement/`. +pub struct Substitution { + pub regex: Regex, + pub replacement: String, +} + +impl Substitution { + /// Parse a sed-style `s/pattern/replacement/` string. + /// + /// The first character after `s` is the delimiter. Supports any single-byte + /// ASCII delimiter character (e.g. `/`, `|`, `#`, etc.). + pub fn parse(s: &str) -> eyre::Result { + let s = s.as_bytes(); + if s.is_empty() || s[0] != b's' { + eyre::bail!("substitution must start with 's'"); + } + if s.len() < 4 { + eyre::bail!("substitution too short"); + } + + let delim = s[1]; + // Find the second delimiter (end of pattern), skipping escaped delimiters. + let pattern_start = 2; + let pattern_end = find_unescaped(s, delim, pattern_start) + .ok_or_else(|| eyre::eyre!("missing second delimiter in substitution"))?; + let replacement_start = pattern_end + 1; + // The trailing delimiter is optional. + let replacement_end = find_unescaped(s, delim, replacement_start).unwrap_or(s.len()); + + let pattern = std::str::from_utf8(&s[pattern_start..pattern_end])?; + let replacement = std::str::from_utf8(&s[replacement_start..replacement_end])?; + // Unescape \ in the replacement. The pattern doesn't need this + // because the regex engine already interprets \ as a literal match. + // But the regex crate treats \ literally in replacements, so \ + // would produce a spurious backslash without unescaping. + let replacement = unescape_delimiter(replacement, delim); + + let regex = Regex::new(pattern).map_err(|e| eyre::eyre!("invalid regex: {e}"))?; + + Ok(Self { regex, replacement }) + } + + fn apply(&self, input: &str) -> String { + self.regex + .replace_all(input, &self.replacement) + .into_owned() + } +} + +/// Remove backslash escapes before the delimiter character in a string. +/// +/// Only `\` sequences are unescaped; other backslash sequences are left as-is. +fn unescape_delimiter(s: &str, delim: u8) -> String { + let bytes = s.as_bytes(); + let mut result = Vec::with_capacity(bytes.len()); + let mut i = 0; + while i < bytes.len() { + if bytes[i] == b'\\' && i + 1 < bytes.len() && bytes[i + 1] == delim { + result.push(delim); + i += 2; + } else { + result.push(bytes[i]); + i += 1; + } + } + // Safety: we only removed ASCII backslashes before ASCII delimiter bytes, + // so all other valid UTF-8 sequences are preserved. + String::from_utf8(result).expect("unescape produced invalid UTF-8") +} + +/// Find the next unescaped occurrence of `delim` starting at `start`. +fn find_unescaped(s: &[u8], delim: u8, start: usize) -> Option { + let mut i = start; + while i < s.len() { + if s[i] == b'\\' { + i += 2; // skip escaped character + } else if s[i] == delim { + return Some(i); + } else { + i += 1; + } + } + None +} + +/// Apply a sed-style substitution to a dependency graph. +pub fn sub(graph: &DepGraph, substitution: &Substitution, key: &SubKey) -> DepGraph { + match key { + SubKey::Id => sub_id(graph, substitution), + SubKey::Node(field) => sub_node_field(graph, substitution, field), + SubKey::Edge(field) => sub_edge_field(graph, substitution, field), + } +} + +/// Apply substitution to node IDs, merging nodes that collide. +fn sub_id(graph: &DepGraph, substitution: &Substitution) -> DepGraph { + // Build old->new ID mapping from all nodes across all subgraphs. + let all_nodes = graph.all_nodes(); + let mut id_map: IndexMap = IndexMap::new(); + for old_id in all_nodes.keys() { + let new_id = substitution.apply(old_id); + id_map.insert(old_id.clone(), new_id); + } + + // Track which new IDs we've already placed (first subgraph wins). + let mut placed: HashSet = HashSet::new(); + + remap_subgraph(graph, &id_map, &mut placed) +} + +/// Recursively remap node IDs in a graph/subgraph, merging colliding nodes. +fn remap_subgraph( + graph: &DepGraph, + id_map: &IndexMap, + placed: &mut HashSet, +) -> DepGraph { + // Remap nodes in this level, merging on collision. + // Nodes whose new ID is empty are removed. + let mut nodes: IndexMap = IndexMap::new(); + for (old_id, info) in &graph.nodes { + let new_id = &id_map[old_id]; + if new_id.is_empty() || placed.contains(new_id) { + continue; + } + match nodes.get_mut(new_id) { + Some(existing) => { + // Merge: keep first label, first non-None node_type, merge attrs (first wins). + if existing.node_type.is_none() { + existing.node_type.clone_from(&info.node_type); + } + for (k, v) in &info.attrs { + existing.attrs.entry(k.clone()).or_insert_with(|| v.clone()); + } + } + None => { + nodes.insert(new_id.clone(), info.clone()); + } + } + } + + // Record all new IDs placed at this level. + for new_id in nodes.keys() { + placed.insert(new_id.clone()); + } + + // Recurse into subgraphs. + let subgraphs: Vec = graph + .subgraphs + .iter() + .map(|sg| remap_subgraph(sg, id_map, placed)) + .filter(|sg| !sg.nodes.is_empty() || !sg.subgraphs.is_empty()) + .collect(); + + // Remap edges, remove self-loops, deduplicate. + let mut seen_edges: HashSet<(String, String, Option)> = HashSet::new(); + let edges: Vec = graph + .edges + .iter() + .filter_map(|e| { + let from = id_map.get(&e.from).unwrap_or(&e.from); + let to = id_map.get(&e.to).unwrap_or(&e.to); + if from.is_empty() || to.is_empty() { + return None; // endpoint was removed + } + if from == to { + return None; // self-loop + } + let key = (from.clone(), to.clone(), e.label.clone()); + if !seen_edges.insert(key) { + return None; // duplicate + } + Some(Edge { + from: from.clone(), + to: to.clone(), + label: e.label.clone(), + attrs: e.attrs.clone(), + }) + }) + .collect(); + + DepGraph { + id: graph.id.clone(), + attrs: graph.attrs.clone(), + nodes, + edges, + subgraphs, + ..Default::default() + } +} + +/// Apply substitution to a node field (label, node_type, or attribute). +/// Empty results are treated as removal: label resets to node ID, +/// Option fields become None, and attributes are deleted. +fn sub_node_field(graph: &DepGraph, substitution: &Substitution, field: &str) -> DepGraph { + DepGraph { + id: graph.id.clone(), + attrs: graph.attrs.clone(), + nodes: graph + .nodes + .iter() + .map(|(id, info)| { + let mut info = info.clone(); + match field { + "label" => { + let new_label = substitution.apply(&info.label); + info.label = if new_label.is_empty() { + id.clone() + } else { + new_label + }; + } + "node_type" => { + if let Some(ref nt) = info.node_type { + let new_nt = substitution.apply(nt); + info.node_type = if new_nt.is_empty() { + None + } else { + Some(new_nt) + }; + } + } + _ => { + if let Some(val) = info.attrs.get(field) { + let new_val = substitution.apply(val); + if new_val.is_empty() { + info.attrs.swap_remove(field); + } else { + info.attrs.insert(field.to_string(), new_val); + } + } + } + } + (id.clone(), info) + }) + .collect(), + edges: graph.edges.clone(), + subgraphs: graph + .subgraphs + .iter() + .map(|sg| sub_node_field(sg, substitution, field)) + .collect(), + ..Default::default() + } +} + +/// Apply substitution to an edge field (label or attribute). +fn sub_edge_field(graph: &DepGraph, substitution: &Substitution, field: &str) -> DepGraph { + DepGraph { + id: graph.id.clone(), + attrs: graph.attrs.clone(), + nodes: graph.nodes.clone(), + edges: graph + .edges + .iter() + .map(|e| { + let mut e = e.clone(); + if field == "label" { + if let Some(ref label) = e.label { + let new_label = substitution.apply(label); + e.label = if new_label.is_empty() { + None + } else { + Some(new_label) + }; + } + } else if let Some(val) = e.attrs.get(field) { + let new_val = substitution.apply(val); + if new_val.is_empty() { + e.attrs.swap_remove(field); + } else { + e.attrs.insert(field.to_string(), new_val); + } + } + e + }) + .collect(), + subgraphs: graph + .subgraphs + .iter() + .map(|sg| sub_edge_field(sg, substitution, field)) + .collect(), + ..Default::default() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn make_graph( + nodes: &[(&str, &str)], + edges: &[(&str, &str)], + subgraphs: Vec, + ) -> DepGraph { + DepGraph { + nodes: nodes + .iter() + .map(|(id, label)| (id.to_string(), NodeInfo::new(*label))) + .collect(), + edges: edges + .iter() + .map(|(from, to)| Edge { + from: from.to_string(), + to: to.to_string(), + ..Default::default() + }) + .collect(), + subgraphs, + ..Default::default() + } + } + + fn node_ids(graph: &DepGraph) -> Vec<&str> { + graph.nodes.keys().map(|s| s.as_str()).collect() + } + + fn edge_pairs(graph: &DepGraph) -> Vec<(&str, &str)> { + graph + .edges + .iter() + .map(|e| (e.from.as_str(), e.to.as_str())) + .collect() + } + + // -- Substitution parsing -- + + #[test] + fn parse_basic() { + let s = Substitution::parse("s/foo/bar/").unwrap(); + assert_eq!(s.apply("foobar"), "barbar"); + } + + #[test] + fn parse_alternate_delimiter() { + let s = Substitution::parse("s|foo|bar|").unwrap(); + assert_eq!(s.apply("foo"), "bar"); + } + + #[test] + fn parse_no_trailing_delimiter() { + let s = Substitution::parse("s/foo/bar").unwrap(); + assert_eq!(s.apply("foo"), "bar"); + } + + #[test] + fn parse_capture_groups() { + let s = Substitution::parse("s/([^.]+)\\..*/$1/").unwrap(); + assert_eq!(s.apply("acl-native.do_compile"), "acl-native"); + } + + #[test] + fn parse_empty_replacement() { + let s = Substitution::parse("s/\\.do_.*//").unwrap(); + assert_eq!(s.apply("acl-native.do_compile"), "acl-native"); + } + + #[test] + fn parse_escaped_delimiter_in_replacement() { + // s/a/b\/c/ -- replacement should be b/c, not b\/c + let s = Substitution::parse("s/a/b\\/c/").unwrap(); + assert_eq!(s.apply("a"), "b/c"); + } + + #[test] + fn parse_escaped_alt_delimiter_in_replacement() { + // s|a|b\|c| -- replacement should be b|c + let s = Substitution::parse("s|a|b\\|c|").unwrap(); + assert_eq!(s.apply("a"), "b|c"); + } + + #[test] + fn parse_non_delimiter_backslash_preserved_in_replacement() { + // s/a/b\\nc/ -- \n is not the delimiter, so kept as literal \n + let s = Substitution::parse("s/a/b\\nc/").unwrap(); + assert_eq!(s.apply("a"), "b\\nc"); + } + + #[test] + fn parse_invalid_no_s() { + assert!(Substitution::parse("foo").is_err()); + } + + // -- SubKey parsing -- + + #[test] + fn subkey_id() { + matches!(SubKey::parse("id").unwrap(), SubKey::Id); + } + + #[test] + fn subkey_node() { + match SubKey::parse("node:label").unwrap() { + SubKey::Node(name) => assert_eq!(name, "label"), + _ => panic!("expected Node"), + } + } + + #[test] + fn subkey_edge() { + match SubKey::parse("edge:label").unwrap() { + SubKey::Edge(name) => assert_eq!(name, "label"), + _ => panic!("expected Edge"), + } + } + + #[test] + fn subkey_invalid() { + assert!(SubKey::parse("invalid").is_err()); + } + + // -- sub with id key -- + + #[test] + fn sub_id_no_collision() { + let g = make_graph( + &[("a.do_compile", "A"), ("b.do_build", "B")], + &[("a.do_compile", "b.do_build")], + vec![], + ); + let s = Substitution::parse("s/\\.do_.*//").unwrap(); + let result = sub(&g, &s, &SubKey::Id); + assert_eq!(node_ids(&result), vec!["a", "b"]); + assert_eq!(edge_pairs(&result), vec![("a", "b")]); + } + + #[test] + fn sub_id_merges_nodes() { + let g = make_graph( + &[("a.do_compile", "A"), ("a.do_build", "B")], + &[("a.do_compile", "a.do_build")], + vec![], + ); + let s = Substitution::parse("s/\\.do_.*//").unwrap(); + let result = sub(&g, &s, &SubKey::Id); + // Both map to "a", merged into one node + assert_eq!(node_ids(&result), vec!["a"]); + // Self-loop removed + assert!(result.edges.is_empty()); + // First node's label wins + assert_eq!(result.nodes["a"].label, "A"); + } + + #[test] + fn sub_id_deduplicates_edges() { + // a.x -> b, a.y -> b: both map to a -> b, should deduplicate + let g = make_graph( + &[("a.x", "A"), ("a.y", "A"), ("b", "B")], + &[("a.x", "b"), ("a.y", "b")], + vec![], + ); + let s = Substitution::parse("s/\\..*//").unwrap(); + let result = sub(&g, &s, &SubKey::Id); + assert_eq!(node_ids(&result), vec!["a", "b"]); + assert_eq!(edge_pairs(&result), vec![("a", "b")]); + } + + #[test] + fn sub_id_subgraph_first_wins() { + // subgraph has a.x, root has a.y. Both map to "a". + // Subgraph nodes are processed first in document order (root level first), + // but here a.y is at root and a.x is in subgraph. + let sub_g = make_graph(&[("a.x", "SubA")], &[], vec![]); + let g = make_graph(&[("a.y", "RootA")], &[], vec![sub_g]); + let s = Substitution::parse("s/\\..*//").unwrap(); + let result = sub(&g, &s, &SubKey::Id); + // Root level is processed first, so "a" appears at root + assert_eq!(node_ids(&result), vec!["a"]); + assert_eq!(result.nodes["a"].label, "RootA"); + // Subgraph should be empty and dropped + assert!(result.subgraphs.is_empty()); + } + + #[test] + fn sub_id_drops_empty_subgraphs() { + let sub_g = make_graph(&[("a.x", "A")], &[], vec![]); + let g = make_graph(&[("a.y", "A")], &[], vec![sub_g]); + let s = Substitution::parse("s/\\..*//").unwrap(); + let result = sub(&g, &s, &SubKey::Id); + assert!(result.subgraphs.is_empty()); + } + + // -- sub removing nodes (empty ID) -- + + #[test] + fn sub_id_removes_empty() { + // b matches the pattern and maps to ""; it should be removed along with its edges. + let g = make_graph( + &[("a", "A"), ("b.remove", "B"), ("c", "C")], + &[("a", "b.remove"), ("b.remove", "c"), ("a", "c")], + vec![], + ); + let s = Substitution::parse("s/^b\\..*//").unwrap(); + let result = sub(&g, &s, &SubKey::Id); + assert_eq!(node_ids(&result), vec!["a", "c"]); + assert_eq!(edge_pairs(&result), vec![("a", "c")]); + } + + #[test] + fn sub_id_removes_all_empty() { + // All nodes map to "" -- result should be completely empty. + let g = make_graph(&[("a", "A"), ("b", "B")], &[("a", "b")], vec![]); + let s = Substitution::parse("s/.*//").unwrap(); + let result = sub(&g, &s, &SubKey::Id); + assert!(result.nodes.is_empty()); + assert!(result.edges.is_empty()); + } + + // -- sub with node key -- + + #[test] + fn sub_node_label() { + let g = make_graph(&[("a", "hello world"), ("b", "goodbye world")], &[], vec![]); + let s = Substitution::parse("s/world/earth/").unwrap(); + let result = sub(&g, &s, &SubKey::Node("label".to_string())); + assert_eq!(result.nodes["a"].label, "hello earth"); + assert_eq!(result.nodes["b"].label, "goodbye earth"); + } + + #[test] + fn sub_node_attr() { + let mut g = make_graph(&[("a", "A")], &[], vec![]); + g.nodes + .get_mut("a") + .unwrap() + .attrs + .insert("color".to_string(), "red".to_string()); + let s = Substitution::parse("s/red/blue/").unwrap(); + let result = sub(&g, &s, &SubKey::Node("color".to_string())); + assert_eq!(result.nodes["a"].attrs["color"], "blue"); + } + + #[test] + fn sub_node_label_resets_to_id_when_empty() { + let g = make_graph(&[("a", "hello"), ("b", "goodbye")], &[], vec![]); + let s = Substitution::parse("s/.*//").unwrap(); + let result = sub(&g, &s, &SubKey::Node("label".to_string())); + assert_eq!(result.nodes["a"].label, "a"); + assert_eq!(result.nodes["b"].label, "b"); + } + + #[test] + fn sub_node_type_removes_empty() { + let mut g = make_graph(&[("a", "A")], &[], vec![]); + g.nodes.get_mut("a").unwrap().node_type = Some("lib".to_string()); + let s = Substitution::parse("s/.*//").unwrap(); + let result = sub(&g, &s, &SubKey::Node("node_type".to_string())); + assert_eq!(result.nodes["a"].node_type, None); + } + + #[test] + fn sub_node_attr_removes_empty() { + let mut g = make_graph(&[("a", "A")], &[], vec![]); + g.nodes + .get_mut("a") + .unwrap() + .attrs + .insert("color".to_string(), "red".to_string()); + let s = Substitution::parse("s/.*//").unwrap(); + let result = sub(&g, &s, &SubKey::Node("color".to_string())); + assert!(!result.nodes["a"].attrs.contains_key("color")); + } + + // -- sub with edge key -- + + #[test] + fn sub_edge_label() { + let mut g = make_graph(&[("a", "A"), ("b", "B")], &[], vec![]); + g.edges.push(Edge { + from: "a".to_string(), + to: "b".to_string(), + label: Some("depends_on".to_string()), + ..Default::default() + }); + let s = Substitution::parse("s/depends_on/uses/").unwrap(); + let result = sub(&g, &s, &SubKey::Edge("label".to_string())); + assert_eq!(result.edges[0].label.as_deref(), Some("uses")); + } + + #[test] + fn sub_edge_label_removes_empty() { + let mut g = make_graph(&[("a", "A"), ("b", "B")], &[], vec![]); + g.edges.push(Edge { + from: "a".to_string(), + to: "b".to_string(), + label: Some("depends_on".to_string()), + ..Default::default() + }); + let s = Substitution::parse("s/.*//").unwrap(); + let result = sub(&g, &s, &SubKey::Edge("label".to_string())); + assert_eq!(result.edges[0].label, None); + } + + #[test] + fn sub_edge_attr_removes_empty() { + let mut g = make_graph(&[("a", "A"), ("b", "B")], &[], vec![]); + let mut edge = Edge { + from: "a".to_string(), + to: "b".to_string(), + ..Default::default() + }; + edge.attrs.insert("style".to_string(), "dashed".to_string()); + g.edges.push(edge); + let s = Substitution::parse("s/.*//").unwrap(); + let result = sub(&g, &s, &SubKey::Edge("style".to_string())); + assert!(!result.edges[0].attrs.contains_key("style")); + } +} diff --git a/crates/csvizmo-depgraph/src/bin/deptransform.rs b/crates/csvizmo-depgraph/src/bin/deptransform.rs new file mode 100644 index 0000000..f8c64e8 --- /dev/null +++ b/crates/csvizmo-depgraph/src/bin/deptransform.rs @@ -0,0 +1,178 @@ +use std::io::{IsTerminal, Read}; +use std::path::PathBuf; + +use clap::{Parser, Subcommand}; +use csvizmo_depgraph::algorithm::shorten::ShortenArgs; +use csvizmo_depgraph::algorithm::sub::{SubKey, Substitution}; +use csvizmo_depgraph::emit::OutputFormat; +use csvizmo_depgraph::parse::InputFormat; +use csvizmo_depgraph::{DepGraph, algorithm}; +use csvizmo_utils::stdio::{get_input_reader, get_output_writer}; + +/// Arguments for the `sub` subcommand. +#[derive(Debug, clap::Parser)] +struct SubArgs { + /// Sed-style substitution: s/pattern/replacement/ + /// + /// Uses Rust regex syntax: (...) for capture groups, $1/${name} in replacement. + /// Supports alternate delimiters: s|...|...|, s#...#...#, etc. + expr: String, + + /// Field to apply substitution to: id, node:NAME, or edge:NAME + #[clap(long, default_value = "id")] + key: String, +} + +/// Arguments for the `merge` subcommand. +#[derive(Debug, clap::Parser)] +struct MergeArgs { + /// Input files to merge (use '-' for stdin, at most once). + /// The global --input/-i flag, if set, is included as an additional file. + #[clap(required = true)] + files: Vec, +} + +/// Structural transformations on dependency graphs. +/// +/// Operations are performed via subcommands. +/// Chain operations by piping: deptransform ... | deptransform ... +#[derive(Debug, Parser)] +#[clap(version, verbatim_doc_comment)] +struct Args { + /// Logging level + #[clap(long, default_value_t = tracing::Level::INFO)] + log_level: tracing::Level, + + /// Input file (stdin if '-' or omitted) + #[clap(short, long, global = true)] + input: Option, + + /// Input format (auto-detected from extension/content if omitted) + #[clap(short = 'I', long, global = true)] + input_format: Option, + + /// Output file (stdout if '-' or omitted) + #[clap(short, long, global = true)] + output: Option, + + /// Output format (auto-detected from extension, defaults to DOT) + #[clap(short = 'O', long, global = true)] + output_format: Option, + + #[clap(subcommand)] + command: Command, +} + +#[derive(Debug, Subcommand)] +enum Command { + /// Reverse the direction of all edges + Reverse, + /// Remove redundant edges via transitive reduction + Simplify, + /// Shorten node IDs and/or labels using path transforms + Shorten(ShortenArgs), + /// Apply sed-style regex substitution to graph fields + /// + /// Uses Rust regex syntax: (...) for capture groups, $1/${name} in replacement. + /// When applied to node IDs, nodes that map to the same ID are merged. + Sub(SubArgs), + /// Merge multiple graphs into one + /// + /// Nodes are unioned by ID (later files overwrite on collision). + /// Edges are deduplicated by (from, to); first label wins, attributes are merged. + /// The global --input/-i flag, if set, is included as the first file. + Merge(MergeArgs), + /// Flatten subgraphs into a single top-level graph + Flatten, +} + +fn main() -> eyre::Result<()> { + let use_color = std::io::stderr().is_terminal(); + if use_color { + color_eyre::install()?; + } + + let args = Args::parse(); + + let filter = tracing_subscriber::EnvFilter::builder() + .with_default_directive(args.log_level.into()) + .with_env_var("CSV_LOG") + .from_env_lossy(); + tracing_subscriber::fmt() + .with_env_filter(filter) + .with_ansi(use_color) + .with_writer(std::io::stderr) + .init(); + + // Normalize `-` to None -- it means stdio, not a file path. + let is_stdio = |p: &PathBuf| p.as_os_str() == "-"; + let output_path = args.output.filter(|p| !is_stdio(p)); + let output_format = + csvizmo_depgraph::emit::resolve_output_format(args.output_format, output_path.as_deref())?; + + let graph = match &args.command { + // Merge can't handle the same input handling as the rest of the commands + Command::Merge(merge_args) => { + let mut files = Vec::new(); + if let Some(input) = &args.input { + files.push(input); + } + files.extend(&merge_args.files); + if files.len() < 2 { + eyre::bail!("merge requires at least 2 input files"); + } + let mut graphs = Vec::new(); + for file in &files { + graphs.push(read_graph(Some(file), args.input_format)?); + } + algorithm::merge::merge(&graphs) + } + command => { + let graph = read_graph(args.input.as_ref(), args.input_format)?; + tracing::info!( + "Parsed graph with {} nodes, {} edges, and {} subgraphs", + graph.all_nodes().len(), + graph.all_edges().len(), + graph.subgraphs.len() + ); + + match command { + Command::Reverse => algorithm::reverse::reverse(&graph), + Command::Simplify => algorithm::simplify::simplify(&graph)?, + Command::Shorten(shorten_args) => { + let transforms = algorithm::shorten::build_transforms(shorten_args); + algorithm::shorten::shorten( + &graph, + &shorten_args.separator, + shorten_args.key, + &transforms, + ) + } + Command::Sub(sub_args) => { + let substitution = Substitution::parse(&sub_args.expr)?; + let key = SubKey::parse(&sub_args.key)?; + algorithm::sub::sub(&graph, &substitution, &key) + } + Command::Flatten => algorithm::flatten::flatten(&graph), + Command::Merge(_) => unreachable!(), + } + } + }; + + let mut output = get_output_writer(&output_path)?; + csvizmo_depgraph::emit::emit(output_format, &graph, &mut output)?; + + Ok(()) +} + +/// Read and parse a graph from a file path (or stdin if None / "-"). +fn read_graph(path: Option<&PathBuf>, input_format: Option) -> eyre::Result { + let is_stdio = |p: &PathBuf| p.as_os_str() == "-"; + let file_path: Option = path.filter(|p| !is_stdio(p)).cloned(); + let mut reader = get_input_reader(&file_path)?; + let mut text = String::new(); + reader.read_to_string(&mut text)?; + let fmt = + csvizmo_depgraph::parse::resolve_input_format(input_format, file_path.as_deref(), &text)?; + csvizmo_depgraph::parse::parse(fmt, &text) +} diff --git a/crates/csvizmo-depgraph/tests/deptransform.rs b/crates/csvizmo-depgraph/tests/deptransform.rs new file mode 100644 index 0000000..c74c8e1 --- /dev/null +++ b/crates/csvizmo-depgraph/tests/deptransform.rs @@ -0,0 +1,594 @@ +use std::io::Write; + +use csvizmo_test::{CommandExt, tool}; +use pretty_assertions::assert_eq; +use tempfile::NamedTempFile; + +// -- reverse integration tests -- + +#[test] +fn reverse_simple_chain() { + // a -> b -> c becomes c -> b, b -> a + let graph = "a\nb\nc\n#\na\tb\nb\tc\n"; + let output = tool!("deptransform") + .args(["reverse", "--input-format", "tgf", "--output-format", "tgf"]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "a\nb\nc\n#\nb\ta\nc\tb\n"); +} + +#[test] +fn reverse_preserves_labels() { + let graph = "1\tAlpha\n2\tBeta\n#\n1\t2\n"; + let output = tool!("deptransform") + .args(["reverse", "--input-format", "tgf", "--output-format", "tgf"]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "1\tAlpha\n2\tBeta\n#\n2\t1\n"); +} + +#[test] +fn reverse_empty_graph() { + let graph = "#\n"; + let output = tool!("deptransform") + .args(["reverse", "--input-format", "tgf", "--output-format", "tgf"]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "#\n"); +} + +#[test] +fn reverse_dot_output() { + let graph = "a\nb\n#\na\tb\n"; + let output = tool!("deptransform") + .args(["reverse", "--input-format", "tgf", "--output-format", "dot"]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!( + stdout, + "\ +digraph { + a; + b; + b -> a; +} +" + ); +} + +// -- simplify integration tests -- + +#[test] +fn simplify_removes_redundant_edge() { + // a -> b -> c, a -> c: the direct a->c is redundant + let graph = "a\nb\nc\n#\na\tb\nb\tc\na\tc\n"; + let output = tool!("deptransform") + .args([ + "simplify", + "--input-format", + "tgf", + "--output-format", + "tgf", + ]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "a\nb\nc\n#\na\tb\nb\tc\n"); +} + +#[test] +fn simplify_diamond() { + // a -> b -> d, a -> c -> d, a -> d: a->d is redundant + let graph = "a\nb\nc\nd\n#\na\tb\na\tc\nb\td\nc\td\na\td\n"; + let output = tool!("deptransform") + .args([ + "simplify", + "--input-format", + "tgf", + "--output-format", + "tgf", + ]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "a\nb\nc\nd\n#\na\tb\na\tc\nb\td\nc\td\n"); +} + +#[test] +fn simplify_no_redundant_edges() { + // a -> b -> c: nothing to remove + let graph = "a\nb\nc\n#\na\tb\nb\tc\n"; + let output = tool!("deptransform") + .args([ + "simplify", + "--input-format", + "tgf", + "--output-format", + "tgf", + ]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "a\nb\nc\n#\na\tb\nb\tc\n"); +} + +#[test] +fn simplify_errors_on_cycle() { + // a -> b -> a: cycle, should fail + let graph = "a\nb\n#\na\tb\nb\ta\n"; + let output = tool!("deptransform") + .args([ + "simplify", + "--input-format", + "tgf", + "--output-format", + "tgf", + ]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(!output.status.success()); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!(stderr.contains("cycles"), "stderr: {stderr}"); +} + +// -- shorten integration tests -- + +#[test] +fn shorten_default_strips_common_prefix() { + // Nodes share common prefix "src/foo/" -- defaults strip it + let graph = "src/foo/bar.rs\nsrc/foo/baz.rs\n#\nsrc/foo/bar.rs\tsrc/foo/baz.rs\n"; + let output = tool!("deptransform") + .args(["shorten", "--input-format", "tgf", "--output-format", "tgf"]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "bar.rs\nbaz.rs\n#\nbar.rs\tbaz.rs\n"); +} + +#[test] +fn shorten_dot_separator() { + let graph = "com.example.foo\ncom.example.bar\n#\ncom.example.foo\tcom.example.bar\n"; + let output = tool!("deptransform") + .args([ + "shorten", + "--separator", + ".", + "--input-format", + "tgf", + "--output-format", + "tgf", + ]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "foo\nbar\n#\nfoo\tbar\n"); +} + +#[test] +fn shorten_id_only() { + // --key id: shorten IDs but leave labels untouched + let graph = "src/foo/bar.rs\tOriginal\nsrc/foo/baz.rs\tOther\n#\n"; + let output = tool!("deptransform") + .args([ + "shorten", + "--key", + "id", + "--input-format", + "tgf", + "--output-format", + "tgf", + ]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "bar.rs\tOriginal\nbaz.rs\tOther\n#\n"); +} + +#[test] +fn shorten_single_letter() { + // Explicit --single-letter overrides defaults + let graph = "src/foo/bar.rs\n#\n"; + let output = tool!("deptransform") + .args([ + "shorten", + "--single-letter", + "--input-format", + "tgf", + "--output-format", + "tgf", + ]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "s/f/bar.rs\n#\n"); +} + +// -- sub integration tests -- + +#[test] +fn sub_id_no_collision() { + // Rename node IDs with no collisions; labels are preserved from original + let graph = "a.do_compile\nb.do_build\n#\na.do_compile\tb.do_build\n"; + let output = tool!("deptransform") + .args([ + "sub", + "s/\\.do_.*//", + "--input-format", + "tgf", + "--output-format", + "tgf", + ]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "a\ta.do_compile\nb\tb.do_build\n#\na\tb\n"); +} + +#[test] +fn sub_id_merges_and_removes_self_loops() { + // Two nodes map to the same ID; self-loop removed, first label wins + let graph = "a.x\ta.x\na.y\ta.y\n#\na.x\ta.y\n"; + let output = tool!("deptransform") + .args([ + "sub", + "s/\\..*//", + "--input-format", + "tgf", + "--output-format", + "tgf", + ]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "a\ta.x\n#\n"); +} + +#[test] +fn sub_id_deduplicates_edges() { + // a.x -> b, a.y -> b both become a -> b; only one edge kept + let graph = "a.x\ta.x\na.y\ta.y\nb\tb\n#\na.x\tb\na.y\tb\n"; + let output = tool!("deptransform") + .args([ + "sub", + "s/\\..*//", + "--input-format", + "tgf", + "--output-format", + "tgf", + ]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "a\ta.x\nb\n#\na\tb\n"); +} + +#[test] +fn sub_node_label() { + let graph = "a\thello world\nb\tgoodbye world\n#\n"; + let output = tool!("deptransform") + .args([ + "sub", + "--key", + "node:label", + "s/world/earth/", + "--input-format", + "tgf", + "--output-format", + "tgf", + ]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "a\thello earth\nb\tgoodbye earth\n#\n"); +} + +#[test] +fn sub_alternate_delimiter() { + let graph = "a/b\nc/d\n#\na/b\tc/d\n"; + let output = tool!("deptransform") + .args([ + "sub", + "s|/|.|", + "--input-format", + "tgf", + "--output-format", + "tgf", + ]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "a.b\ta/b\nc.d\tc/d\n#\na.b\tc.d\n"); +} + +#[test] +fn sub_capture_groups() { + // Use capture group to extract first component + let graph = "foo.bar\nbaz.qux\n#\nfoo.bar\tbaz.qux\n"; + let output = tool!("deptransform") + .args([ + "sub", + "s/([^.]+)\\..*/$1/", + "--input-format", + "tgf", + "--output-format", + "tgf", + ]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "foo\tfoo.bar\nbaz\tbaz.qux\n#\nfoo\tbaz\n"); +} + +#[test] +fn sub_invalid_expr() { + let graph = "a\n#\n"; + let output = tool!("deptransform") + .args([ + "sub", + "not-a-substitution", + "--input-format", + "tgf", + "--output-format", + "tgf", + ]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(!output.status.success()); +} + +// -- merge integration tests -- + +#[test] +fn merge_two_files() { + let mut f1 = NamedTempFile::new().unwrap(); + write!(f1, "a\nb\n#\na\tb\n").unwrap(); + + let mut f2 = NamedTempFile::new().unwrap(); + write!(f2, "c\nd\n#\nc\td\n").unwrap(); + + let output = tool!("deptransform") + .args(["merge", "--output-format", "tgf", "--input-format", "tgf"]) + .arg(f1.path()) + .arg(f2.path()) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "a\nb\nc\nd\n#\na\tb\nc\td\n"); +} + +#[test] +fn merge_overlapping_nodes() { + let mut f1 = NamedTempFile::new().unwrap(); + write!(f1, "a\tFirst\nb\n#\na\tb\n").unwrap(); + + let mut f2 = NamedTempFile::new().unwrap(); + write!(f2, "a\tSecond\nc\n#\na\tc\n").unwrap(); + + let output = tool!("deptransform") + .args(["merge", "--output-format", "tgf", "--input-format", "tgf"]) + .arg(f1.path()) + .arg(f2.path()) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + // Later file overwrites node "a" label; edges unioned + assert_eq!(stdout, "a\tSecond\nb\nc\n#\na\tb\na\tc\n"); +} + +#[test] +fn merge_deduplicates_edges() { + let mut f1 = NamedTempFile::new().unwrap(); + write!(f1, "a\nb\n#\na\tb\n").unwrap(); + + let mut f2 = NamedTempFile::new().unwrap(); + write!(f2, "a\nb\n#\na\tb\n").unwrap(); + + let output = tool!("deptransform") + .args(["merge", "--output-format", "tgf", "--input-format", "tgf"]) + .arg(f1.path()) + .arg(f2.path()) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "a\nb\n#\na\tb\n"); +} + +#[test] +fn merge_with_stdin() { + let mut f1 = NamedTempFile::new().unwrap(); + write!(f1, "a\nb\n#\na\tb\n").unwrap(); + + let output = tool!("deptransform") + .args(["merge", "--output-format", "tgf", "--input-format", "tgf"]) + .arg(f1.path()) + .arg("-") + .write_stdin("c\nd\n#\nc\td\n") + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "a\nb\nc\nd\n#\na\tb\nc\td\n"); +} + +#[test] +fn merge_input_flag_included() { + let mut f1 = NamedTempFile::new().unwrap(); + write!(f1, "a\nb\n#\na\tb\n").unwrap(); + + let mut f2 = NamedTempFile::new().unwrap(); + write!(f2, "c\nd\n#\nc\td\n").unwrap(); + + // --input provides the first file, positional provides the second + let output = tool!("deptransform") + .args(["--input-format", "tgf", "--output-format", "tgf", "-i"]) + .arg(f1.path()) + .arg("merge") + .arg(f2.path()) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "a\nb\nc\nd\n#\na\tb\nc\td\n"); +} + +#[test] +fn merge_requires_two_files() { + let mut f1 = NamedTempFile::new().unwrap(); + write!(f1, "a\n#\n").unwrap(); + + let output = tool!("deptransform") + .args(["merge", "--input-format", "tgf", "--output-format", "tgf"]) + .arg(f1.path()) + .captured_output() + .unwrap(); + assert!(!output.status.success()); +} + +#[test] +fn merge_preserves_subgraphs() { + let mut f1 = NamedTempFile::new().unwrap(); + write!( + f1, + "\ +digraph {{ + subgraph cluster_0 {{ + a; + b; + a -> b; + }} + c; + b -> c; +}} +" + ) + .unwrap(); + + let mut f2 = NamedTempFile::new().unwrap(); + write!( + f2, + "\ +digraph {{ + d; + c -> d; +}} +" + ) + .unwrap(); + + let output = tool!("deptransform") + .args(["merge", "--input-format", "dot", "--output-format", "dot"]) + .arg(f1.path()) + .arg(f2.path()) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!( + stdout, + "\ +digraph { + subgraph cluster_0 { + a; + b; + a -> b; + } + c; + d; + b -> c; + c -> d; +} +" + ); +} + +// -- flatten integration tests -- + +#[test] +fn flatten_removes_subgraphs() { + let graph = "\ +digraph { + subgraph cluster_0 { + a; + b; + a -> b; + } + c; + b -> c; +} +"; + let output = tool!("deptransform") + .args(["flatten", "--input-format", "dot", "--output-format", "dot"]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!( + stdout, + "\ +digraph { + c; + a; + b; + b -> c; + a -> b; +} +" + ); +} + +#[test] +fn flatten_no_subgraphs_unchanged() { + let graph = "a\nb\n#\na\tb\n"; + let output = tool!("deptransform") + .args(["flatten", "--input-format", "tgf", "--output-format", "tgf"]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "a\nb\n#\na\tb\n"); +}