Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions graphconstructor/adapters.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from dataclasses import dataclass
from typing import Optional
import numpy as np
from numpy.typing import NDArray
from .types import MatrixMode
Expand All @@ -8,6 +7,7 @@

@dataclass
class MatrixInput:
"""Input for graph constructors that take a dense matrix."""
matrix: NDArray
mode: MatrixMode # "distance" or "similarity"

Expand All @@ -19,6 +19,7 @@ def __post_init__(self) -> None:

@dataclass
class KNNInput:
"""Input for graph constructors that take KNN graphs."""
indices: NDArray[np.int_]
distances: NDArray

Expand All @@ -31,9 +32,7 @@ def __post_init__(self) -> None:

@dataclass
class ANNInput:
# A fitted ANN index, e.g., pynndescent.NNDescent
"""A fitted ANN index, e.g., pynndescent.NNDescent"""
index: object
# Optionally, a query set to build edges from (defaults to the index's training set)
query_data: Optional[NDArray] = None

# We don't verify protocol strictly at runtime; we use duck typing in constructors.
query_data: NDArray | None = None
17 changes: 9 additions & 8 deletions graphconstructor/graph.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
from collections.abc import Iterable, Sequence
from dataclasses import dataclass
from typing import Iterable, Literal, Optional, Sequence
from typing import Literal
import networkx as nx
import numpy as np
import pandas as pd
Expand Down Expand Up @@ -35,8 +36,8 @@ class Graph:
weighted: bool
mode: str
metadata: pd.DataFrame | None = None
ignore_selfloops: Optional[bool] = None
keep_explicit_zeros: Optional[bool] = None
ignore_selfloops: bool | None = None
keep_explicit_zeros: bool | None = None

def __post_init__(self):
# Check mode
Expand Down Expand Up @@ -171,8 +172,8 @@ def from_edges(
directed: bool = False,
weighted: bool = True,
metadata: pd.DataFrame | None = None,
ignore_selfloops: Optional[bool] = None,
keep_explicit_zeros: Optional[bool] = None,
ignore_selfloops: bool | None = None,
keep_explicit_zeros: bool | None = None,
sym_op: SymOp = "max",
) -> "Graph":
"""Build from an edge list. For undirected=True, we symmetrize later."""
Expand All @@ -188,7 +189,7 @@ def from_edges(
rows = edges[:, 0].astype(int, copy=False)
cols = edges[:, 1].astype(int, copy=False)
else:
rows, cols = map(np.asarray, zip(*edges)) if edges else (np.array([], int), np.array([], int))
rows, cols = map(np.asarray, zip(*edges, strict=True)) if edges else (np.array([], int), np.array([], int))

if weights is None:
data = np.ones_like(rows, dtype=float)
Expand Down Expand Up @@ -468,7 +469,7 @@ def to_igraph(self):
weights = coo.data[mask] if self.weighted else np.ones(mask.sum(), dtype=float)

g = ig.Graph(n=self.n_nodes, directed=self.directed)
g.add_edges(list(zip(rows.tolist(), cols.tolist())))
g.add_edges(list(zip(rows.tolist(), cols.tolist(), strict=True)))
if self.weighted:
g.es["weight"] = weights.tolist()
else:
Expand Down Expand Up @@ -581,7 +582,7 @@ def to_cytoscape(
values = coo.data[edge_mask]

edges = []
for edge_idx, (src, dst, value) in enumerate(zip(rows, cols, values)):
for edge_idx, (src, dst, value) in enumerate(zip(rows, cols, values, strict=True)):
data = {
"id": f"e{edge_idx}",
"source": node_ids[int(src)],
Expand Down
10 changes: 7 additions & 3 deletions graphconstructor/importers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def from_dense(
metadata=None,
sym_op="max"
) -> Graph:
"""Construct a Graph from a dense adjacency matrix."""
return Graph.from_dense(arr, directed=directed, weighted=weighted, mode=mode, metadata=metadata, sym_op=sym_op)


Expand All @@ -28,6 +29,7 @@ def from_csr(
metadata=None,
sym_op="max"
) -> Graph:
"""Construct a Graph from a sparse adjacency matrix."""
return Graph.from_csr(adj, directed=directed, weighted=weighted, mode=mode, metadata=metadata, sym_op=sym_op)


Expand All @@ -40,6 +42,7 @@ def from_knn(
metadata=None,
sym_op="max"
) -> Graph:
"""Construct a Graph from KNN neighbor indices and distances."""
ind, dist = _coerce_knn_inputs(indices, distances)
n_query, k = ind.shape

Expand Down Expand Up @@ -71,10 +74,11 @@ def from_ann(
metadata=None,
sym_op="max"
) -> Graph:
"""Construct a Graph from a fitted ANN index, e.g., pynndescent.NNDescent."""
idx = ann.index if hasattr(ann, "index") else ann
if hasattr(idx, "indices_") and getattr(idx, "indices_") is not None:
ind = np.asarray(getattr(idx, "indices_"))[:, :k]
dist = np.asarray(getattr(idx, "distances_"))[:, :k]
if hasattr(idx, "indices_") and idx.indices_ is not None:
ind = np.asarray(idx.indices_)[:, :k]
dist = np.asarray(idx.distances_)[:, :k]
else:
if query_data is None:
raise TypeError("from_ann requires query_data when index has no cached neighbors.")
Expand Down
4 changes: 2 additions & 2 deletions graphconstructor/operators/doubly_stochastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def apply(self, G: Graph) -> Graph:
warnings.warn(
"DoublyStochasticNormalize stopped early because scaling factors "
"became very large. Result may not be doubly stochastic.",
RuntimeWarning,
RuntimeWarning, stacklevel=2,
)
break

Expand Down Expand Up @@ -117,7 +117,7 @@ def apply(self, G: Graph) -> Graph:
if not converged:
warnings.warn(
"DoublyStochasticNormalize did not converge within max_iter.",
RuntimeWarning,
RuntimeWarning, stacklevel=2,
)

# Apply scaling once: A' = diag(r) * A * diag(c) (CSR-friendly)
Expand Down
6 changes: 3 additions & 3 deletions graphconstructor/operators/enhanced_configuration_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ def _undirected(self, G: Graph) -> Graph:
import warnings
warnings.warn(
f"ECM optimisation did not converge: {res.message}",
RuntimeWarning,
RuntimeWarning, stacklevel=2,
)

# ---- p-value matrix ----------------------------------------------
Expand All @@ -381,11 +381,11 @@ def _undirected(self, G: Graph) -> Graph:
W_lower_original = sp.tril(W_original, k=-1).tocoo()
original_lookup = {
(int(i), int(j)): w
for i, j, w in zip(W_lower_original.row, W_lower_original.col, W_lower_original.data)
for i, j, w in zip(W_lower_original.row, W_lower_original.col, W_lower_original.data, strict=True)
}

original_weights = np.array(
[original_lookup[(int(i), int(j))] for i, j in zip(row, col)],
[original_lookup[(int(i), int(j))] for i, j in zip(row, col, strict=True)],
dtype=W_original.dtype,
)

Expand Down
4 changes: 2 additions & 2 deletions graphconstructor/operators/knn_selector.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from dataclasses import dataclass
from typing import Literal, Optional
from typing import Literal
import numpy as np
import scipy.sparse as sp
from ..graph import Graph
Expand Down Expand Up @@ -30,7 +30,7 @@ class KNNSelector(GraphOperator):
"""
k: int
mutual: bool = False
mutual_k: Optional[int] = None
mutual_k: int | None = None
mode: Mode = "distance"
supported_modes = ["similarity", "distance"]

Expand Down
2 changes: 1 addition & 1 deletion graphconstructor/operators/metric_distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def _compute_distortions(D: GraphOperator, B, weight="weight", disjunction=sum):
G.remove_edges_from(B.edges())
weight_function = _weight_function(B, weight)

svals = dict()
svals = {}
for u in G.nodes():
metric_dist = single_source_dijkstra_path_length(
B, source=u, weight_function=weight_function, disjunction=disjunction
Expand Down
8 changes: 4 additions & 4 deletions graphconstructor/types.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from __future__ import annotations
from typing import Literal, Optional, Protocol, Tuple
from typing import Literal, Protocol
import numpy as np
from numpy.typing import ArrayLike, NDArray
from scipy.sparse import csr_matrix
Expand All @@ -14,9 +14,9 @@ class ANNLike(Protocol):
The minimal surface we rely on mirrors PyNNDescent and similar libraries.
"""

def query(self, X: ArrayLike, k: int) -> Tuple[NDArray[np.int_], NDArray[np.floating]]: # indices, distances
def query(self, X: ArrayLike, k: int) -> tuple[NDArray[np.int_], NDArray[np.floating]]: # indices, distances
...

# Optional attributes commonly present on fitted ANN indexes
indices_: Optional[NDArray[np.int_]]
distances_: Optional[NDArray[np.floating]]
indices_: NDArray[np.int_] | None
distances_: NDArray[np.floating] | None
16 changes: 7 additions & 9 deletions graphconstructor/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import Callable, Literal, Tuple, Union
from collections.abc import Callable
from typing import Literal
import numpy as np
import scipy.sparse as sp
from numpy.typing import NDArray
Expand All @@ -8,10 +9,7 @@

# Type aliases for clarity
Mode = Literal["distance", "similarity"]
ConversionMethod = Union[
Literal["reciprocal", "negative", "exp", "gaussian"],
Callable[[np.ndarray], np.ndarray]
]
ConversionMethod = Literal["reciprocal", "negative", "exp", "gaussian"] | Callable[[np.ndarray], np.ndarray]


def _validate_square_matrix(M: np.ndarray) -> None:
Expand Down Expand Up @@ -40,7 +38,7 @@ def _drop_diagonal(A: sp.csr_matrix) -> sp.csr_matrix:
return sp.csr_matrix((coo.data[mask], (coo.row[mask], coo.col[mask])), shape=A.shape)


def _coerce_knn_inputs(indices, distances) -> Tuple[np.ndarray, np.ndarray]:
def _coerce_knn_inputs(indices, distances) -> tuple[np.ndarray, np.ndarray]:
ind = _to_numpy(indices)
dist = _to_numpy(distances)
if ind.shape != dist.shape:
Expand All @@ -60,7 +58,7 @@ def _csr_from_edges(n: int, rows: np.ndarray, cols: np.ndarray, weights: np.ndar
return csr_matrix((weights, (rows, cols)), shape=(n, n))


def _as_csr_square(M: NDArray | spmatrix) -> Tuple[sp.csr_matrix, int]:
def _as_csr_square(M: NDArray | spmatrix) -> tuple[sp.csr_matrix, int]:
"""Return (CSR, n) for a square matrix without densifying.

If `M` is dense, convert to CSR. If `M` is sparse, convert format to CSR
Expand All @@ -78,7 +76,7 @@ def _as_csr_square(M: NDArray | spmatrix) -> Tuple[sp.csr_matrix, int]:
return sp.csr_matrix(arr), arr.shape[0]


def _topk_per_row_sparse(csr: sp.csr_matrix, k: int, *, largest: bool) -> Tuple[np.ndarray, np.ndarray]:
def _topk_per_row_sparse(csr: sp.csr_matrix, k: int, *, largest: bool) -> tuple[np.ndarray, np.ndarray]:
"""Return (indices, values) of top-k entries per row from CSR matrix.

This operates strictly on the row's nonzeros without densifying.
Expand Down Expand Up @@ -120,7 +118,7 @@ def _topk_per_row_sparse(csr: sp.csr_matrix, k: int, *, largest: bool) -> Tuple[
return ind, vals


def _knn_from_matrix(M: NDArray | spmatrix, k: int, *, mode: MatrixMode) -> Tuple[np.ndarray, np.ndarray]:
def _knn_from_matrix(M: NDArray | spmatrix, k: int, *, mode: MatrixMode) -> tuple[np.ndarray, np.ndarray]:
"""Compute kNN (indices, values) from a square distance/similarity matrix.

Supports dense and sparse inputs without densifying sparse matrices.
Expand Down
12 changes: 6 additions & 6 deletions graphconstructor/visualization/graph_statistics.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Iterable, Optional, Tuple
from collections.abc import Iterable
import matplotlib.pyplot as plt
import numpy as np

Expand All @@ -8,13 +8,13 @@ def plot_degree_distribution(
*,
x_scale: str = "log",
y_scale: str = "log",
ax: Optional[plt.Axes] = None,
ax: plt.Axes | None = None,
normalize: bool = True,
include_zero_degree: bool = False,
label: Optional[str] = None,
label: str | None = None,
marker: str = "o",
markersize: float = 5.0,
) -> Tuple[plt.Figure, plt.Axes]:
) -> tuple[plt.Figure, plt.Axes]:
"""
Plot the degree distribution p(k) vs k for a single graph.

Expand Down Expand Up @@ -128,11 +128,11 @@ def plot_degree_distributions_grid(
y_scale: str = "log",
normalize: bool = True,
include_zero_degree: bool = False,
figsize: Optional[Tuple[float, float]] = None,
figsize: tuple[float, float] | None = None,
tight_layout: bool = True,
sharex: bool = False,
sharey: bool = False,
) -> Tuple[plt.Figure, np.ndarray]:
) -> tuple[plt.Figure, np.ndarray]:
"""
Plot a grid of degree distribution plots for multiple graphs.

Expand Down
41 changes: 19 additions & 22 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "graphconstructor"
version = "0.3.0"
version = "0.4.0"
description = "Python library for graph construction from similarity or distance metrics"
authors = ["Florian Huber <florian.huber@hs-duesseldorf.de>"]

Expand Down Expand Up @@ -56,28 +56,25 @@ line-ending = "lf"

[tool.ruff.lint]
# TODO: add some rules in future, eg. W291/292
extend-select = ["D", "E", "I"]
# TODO: some of these rules should be enforced in the future, eg. E713
extend-select = ["D", "E", "I", "F", "Q", "UP", "B", "C4"]
# TODO: some of these rules should be enforced in the future
ignore = [
"D100",
"D101",
"D102",
"D103",
"D104",
"D105",
"D107",
"D200",
"D201",
"D202",
"D203",
"D204",
"D205",
"D209",
"D210",
"D212",
"D213",
"D4",
"E713"
"D100",
"D102",
"D104",
"D105",
"D107",
"D200",
"D201",
"D202",
"D203",
"D204",
"D205",
"D209",
"D210",
"D212",
"D213",
"D4",
]

[tool.ruff.lint.isort]
Expand Down
8 changes: 4 additions & 4 deletions tests/test_disparity.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,10 @@ def test_disparity_directed_min_out_in_formula():

alpha = 0.2
expect_keep = np.minimum(pv_out, pv_in) <= alpha
expected = set(zip(rows[expect_keep].tolist(), cols[expect_keep].tolist()))
expected = set(zip(rows[expect_keep].tolist(), cols[expect_keep].tolist(), strict=True))

out = DisparityFilter(alpha=alpha).apply(G0)
got = set(zip(*out.adj.nonzero()))
got = set(zip(*out.adj.nonzero(), strict=True))
assert got == expected
assert out.directed and out.weighted
assert np.isfinite(out.adj.data).all()
Expand All @@ -74,8 +74,8 @@ def test_disparity_undirected_or_superset_and():
assert (G_or.adj != G_or.adj.T).nnz == 0
assert (G_and.adj != G_and.adj.T).nnz == 0

e_or = set(zip(*sp.triu(G_or.adj, k=1).nonzero()))
e_and = set(zip(*sp.triu(G_and.adj, k=1).nonzero()))
e_or = set(zip(*sp.triu(G_or.adj, k=1).nonzero(), strict=True))
e_and = set(zip(*sp.triu(G_and.adj, k=1).nonzero(), strict=True))
assert e_and.issubset(e_or)


Expand Down
Loading
Loading