Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions graphconstructor/operators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from .knn_selector import KNNSelector
from .locally_adaptive_sparsification import LocallyAdaptiveSparsification
from .marginal_likelihood import MarginalLikelihoodFilter
from .metric_distance import MetricDistanceFilter
from .minimum_spanning_tree import MinimumSpanningTree
from .noise_corrected import NoiseCorrected
from .weight_threshold import WeightThreshold
Expand All @@ -16,6 +17,7 @@
"KNNSelector",
"LocallyAdaptiveSparsification",
"MarginalLikelihoodFilter",
"MetricDistanceFilter",
"MinimumSpanningTree",
"NoiseCorrected",
"WeightThreshold",
Expand Down
114 changes: 114 additions & 0 deletions graphconstructor/operators/metric_distance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
from dataclasses import dataclass
from typing import Literal
import networkx as nx
from distanceclosure.dijkstra import single_source_dijkstra_path_length
from networkx.algorithms.shortest_paths.weighted import _weight_function
from ..graph import Graph
from .base import GraphOperator


Mode = Literal["distance", "similarity"]


@dataclass(slots=True)
class MetricDistanceFilter(GraphOperator):
"""
Metric Distance Backbone filter for undirected weighted similarity or distance graphs.

The method follows the distance backbone approach described in:
Simas, T., Correia, R.B., & Rocha, L.M. (2021).
"The distance backbone of complex networks."
Journal of Complex Networks, 9(6), cnab021. https://doi.org/10.1093/comnet/cnab021

Parameters
----------
weight : str, optional
Edge property containing distance values, by default 'weight'
distortion : bool, optional
Whether to compute and return distortion values, by default False
verbose : bool, optional
Prints statements as it computes, by default False
"""

weight: str = "weight"
distortion: bool = False
verbose: bool = False
mode: Mode = "distance"
supported_modes = ["distance", "similarity"]

@staticmethod
def _compute_distortions(D: GraphOperator, B, weight="weight", disjunction=sum):
G = D.copy()

G.remove_edges_from(B.edges())
weight_function = _weight_function(B, weight)

svals = dict()
for u in G.nodes():
metric_dist = single_source_dijkstra_path_length(
B, source=u, weight_function=weight_function, disjunction=disjunction
)
for v in G.neighbors(u):
svals[(u, v)] = G[u][v][weight] / metric_dist[v]

return svals

def _directed_filter(self, G: Graph) -> Graph:
raise NotImplementedError("MetricDistanceFilter is defined only for undirected graphs.")

def _undirected_filter(self, D):
disjunction = sum

# The backbone algorithm is defined for distances.
if D.mode == "distance":
D_distance = D
else:
D_distance = D.convert_mode("distance")

D_nx = D_distance.to_networkx()
G = D_nx.copy()
weight_function = _weight_function(G, self.weight)

if self.verbose:
total = G.number_of_nodes()
i = 0

for u, _ in sorted(G.degree(weight=self.weight), key=lambda x: x[1]):
if self.verbose:
i += 1
per = i / total
print(f"Backbone: Dijkstra: {i} of {total} ({per:.2%})")

metric_dist = single_source_dijkstra_path_length(
G, source=u, weight_function=weight_function, disjunction=disjunction
)
for v in list(G.neighbors(u)):
if metric_dist[v] < G[u][v][self.weight]:
G.remove_edge(u, v)

sparse_adj = nx.to_scipy_sparse_array(G, weight=self.weight)

filtered_graph = Graph(
sparse_adj,
directed=False,
weighted=True,
mode="distance",
meta=None if D.meta is None else D.meta.copy(),
)

# Optional output conversion.
if self.mode == "similarity":
filtered_graph = filtered_graph.convert_mode("similarity")

if self.distortion:
svals = self._compute_distortions(D_nx, G, weight=self.weight, disjunction=disjunction)
return filtered_graph, svals
else:
return filtered_graph

def apply(self, G: Graph) -> Graph:
self._check_mode_supported(G)
if G.directed:
return self._directed_filter(G)
else:
return self._undirected_filter(G)
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ networkx = ">3.4.0"
pandas = ">=2.1.1"
scipy= ">=1.14.0"
matplotlib= ">=3.8.0"
distanceclosure = ">=0.5.0"

[tool.poetry.group.dev.dependencies]
decorator = "^5.1.1"
Expand Down
196 changes: 196 additions & 0 deletions tests/test_metric_distance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
import networkx as nx
import numpy as np
import pytest
import scipy.sparse as sp
from graphconstructor import Graph
from graphconstructor.operators import MetricDistanceFilter


def _csr(data, rows, cols, n):
return sp.csr_matrix(
(np.asarray(data, float), (np.asarray(rows, int), np.asarray(cols, int))),
shape=(n, n),
)


def simple_undirected_graph():
A = _csr(
data=[0.5, 0.5, 0.3, 0.3, 0.8, 0.8],
rows=[0, 1, 0, 2, 1, 2],
cols=[1, 0, 2, 0, 2, 1],
n=3,
)

return Graph.from_csr(A, directed=False, weighted=True, mode="similarity")


def simple_directed_graph():
A = _csr(
data=[0.5, 0.5, 0.3],
rows=[0, 0, 1],
cols=[1, 2, 2],
n=3,
)

return Graph.from_csr(A, directed=True, weighted=True, mode="similarity")


def test_basic_undirected_filtering():
G0 = simple_undirected_graph()

out = MetricDistanceFilter(distortion=False, verbose=False).apply(G0)

assert isinstance(out, Graph)
assert not out.directed
assert out.weighted

original_edges = G0.to_networkx().number_of_edges()
result_edges = out.to_networkx().number_of_edges()
assert result_edges <= original_edges


def test_undirected_filtering_distortion():
G0 = simple_undirected_graph()

out = MetricDistanceFilter(distortion=True, verbose=False).apply(G0)

assert isinstance(out, tuple)
assert len(out) == 2

filtered_graph, svals = out
assert isinstance(filtered_graph, Graph)
assert isinstance(svals, dict)

if svals:
key = next(iter(svals.keys()))
assert isinstance(key, tuple)
assert len(key) == 2


def test_directed_graph_not_implemented():
G0 = simple_directed_graph()
with pytest.raises(NotImplementedError):
MetricDistanceFilter().apply(G0)


def test_edge_removal_logic():
G0 = simple_undirected_graph()
out = MetricDistanceFilter().apply(G0)

original_nx = G0.to_networkx()
out_nx = out.to_networkx()

assert out_nx.number_of_edges() <= original_nx.number_of_edges()

if nx.is_connected(original_nx):
assert nx.is_connected(out_nx)


def test_isolated_nodes():
A = _csr(
data=[0.5, 0.5],
rows=[0, 1],
cols=[1, 0],
n=3,
)
G0 = Graph.from_csr(A, directed=False, weighted=True, mode="distance")
out = MetricDistanceFilter().apply(G0)

assert out.to_networkx().number_of_nodes() == 3
assert 2 in out.to_networkx().nodes()


def test_empty_graph():
A = _csr(data=[], rows=[], cols=[], n=3)
G0 = Graph.from_csr(A, directed=False, weighted=True, mode="distance")

out = MetricDistanceFilter().apply(G0)

assert out.to_networkx().number_of_edges() == 0
assert out.to_networkx().number_of_nodes() == 3


def test_distance_mode_removes_semimetric_edge():
"""In distance mode, an edge is removed if an indirect path is shorter."""
A = _csr(
data=[
1.0, 1.0, # 0 -- 1
1.0, 1.0, # 1 -- 2
3.0, 3.0, # 0 -- 2, longer than 0 -- 1 -- 2
],
rows=[0, 1, 1, 2, 0, 2],
cols=[1, 0, 2, 1, 2, 0],
n=3,
)

G0 = Graph.from_csr(A, directed=False, weighted=True, mode="distance")
out = MetricDistanceFilter(mode="distance").apply(G0)
out_nx = out.to_networkx()

assert out_nx.has_edge(0, 1)
assert out_nx.has_edge(1, 2)
assert not out_nx.has_edge(0, 2)


def test_similarity_mode_converts_similarity_before_filtering():
"""
In similarity mode, strong similarities should behave like short distances.

The weak edge 0 -- 2 should be removed because 0 -- 1 -- 2 is the
stronger / closer indirect connection.
"""
A = _csr(
data=[
0.9, 0.9, # 0 -- 1, strong similarity
0.9, 0.9, # 1 -- 2, strong similarity
0.1, 0.1, # 0 -- 2, weak similarity
],
rows=[0, 1, 1, 2, 0, 2],
cols=[1, 0, 2, 1, 2, 0],
n=3,
)

G0 = Graph.from_csr(A, directed=False, weighted=True, mode="similarity")
out = MetricDistanceFilter(mode="similarity").apply(G0)
out_nx = out.to_networkx()

assert out_nx.has_edge(0, 1)
assert out_nx.has_edge(1, 2)
assert not out_nx.has_edge(0, 2)


def test_similarity_and_distance_inputs_give_equivalent_backbone_when_consistent():
"""
A similarity graph and its corresponding distance graph should produce
the same backbone topology.
"""
A_sim = _csr(
data=[
0.9, 0.9,
0.9, 0.9,
0.1, 0.1,
],
rows=[0, 1, 1, 2, 0, 2],
cols=[1, 0, 2, 1, 2, 0],
n=3,
)

A_dist = _csr(
data=[
0.1, 0.1,
0.1, 0.1,
0.9, 0.9,
],
rows=[0, 1, 1, 2, 0, 2],
cols=[1, 0, 2, 1, 2, 0],
n=3,
)

G_sim = Graph.from_csr(A_sim, directed=False, weighted=True, mode="similarity")
G_dist = Graph.from_csr(A_dist, directed=False, weighted=True, mode="distance")

out_sim = MetricDistanceFilter(mode="similarity").apply(G_sim).to_networkx()
out_dist = MetricDistanceFilter(mode="distance").apply(G_dist).to_networkx()

assert set(out_sim.edges()) == set(out_dist.edges())
assert set(out_dist.edges()) == {(0, 1), (1, 2)}
Loading