diff --git a/graphconstructor/adapters.py b/graphconstructor/adapters.py index ca9347f..5da4ab9 100644 --- a/graphconstructor/adapters.py +++ b/graphconstructor/adapters.py @@ -1,5 +1,4 @@ from dataclasses import dataclass -from typing import Optional import numpy as np from numpy.typing import NDArray from .types import MatrixMode @@ -8,6 +7,7 @@ @dataclass class MatrixInput: + """Input for graph constructors that take a dense matrix.""" matrix: NDArray mode: MatrixMode # "distance" or "similarity" @@ -19,6 +19,7 @@ def __post_init__(self) -> None: @dataclass class KNNInput: + """Input for graph constructors that take KNN graphs.""" indices: NDArray[np.int_] distances: NDArray @@ -31,9 +32,7 @@ def __post_init__(self) -> None: @dataclass class ANNInput: - # A fitted ANN index, e.g., pynndescent.NNDescent + """A fitted ANN index, e.g., pynndescent.NNDescent""" index: object # Optionally, a query set to build edges from (defaults to the index's training set) - query_data: Optional[NDArray] = None - - # We don't verify protocol strictly at runtime; we use duck typing in constructors. + query_data: NDArray | None = None diff --git a/graphconstructor/graph.py b/graphconstructor/graph.py index fe55fe1..fd4fd49 100644 --- a/graphconstructor/graph.py +++ b/graphconstructor/graph.py @@ -1,6 +1,7 @@ import json +from collections.abc import Iterable, Sequence from dataclasses import dataclass -from typing import Iterable, Literal, Optional, Sequence +from typing import Literal import networkx as nx import numpy as np import pandas as pd @@ -35,8 +36,8 @@ class Graph: weighted: bool mode: str metadata: pd.DataFrame | None = None - ignore_selfloops: Optional[bool] = None - keep_explicit_zeros: Optional[bool] = None + ignore_selfloops: bool | None = None + keep_explicit_zeros: bool | None = None def __post_init__(self): # Check mode @@ -171,8 +172,8 @@ def from_edges( directed: bool = False, weighted: bool = True, metadata: pd.DataFrame | None = None, - ignore_selfloops: Optional[bool] = None, - keep_explicit_zeros: Optional[bool] = None, + ignore_selfloops: bool | None = None, + keep_explicit_zeros: bool | None = None, sym_op: SymOp = "max", ) -> "Graph": """Build from an edge list. For undirected=True, we symmetrize later.""" @@ -188,7 +189,7 @@ def from_edges( rows = edges[:, 0].astype(int, copy=False) cols = edges[:, 1].astype(int, copy=False) else: - rows, cols = map(np.asarray, zip(*edges)) if edges else (np.array([], int), np.array([], int)) + rows, cols = map(np.asarray, zip(*edges, strict=True)) if edges else (np.array([], int), np.array([], int)) if weights is None: data = np.ones_like(rows, dtype=float) @@ -468,7 +469,7 @@ def to_igraph(self): weights = coo.data[mask] if self.weighted else np.ones(mask.sum(), dtype=float) g = ig.Graph(n=self.n_nodes, directed=self.directed) - g.add_edges(list(zip(rows.tolist(), cols.tolist()))) + g.add_edges(list(zip(rows.tolist(), cols.tolist(), strict=True))) if self.weighted: g.es["weight"] = weights.tolist() else: @@ -581,7 +582,7 @@ def to_cytoscape( values = coo.data[edge_mask] edges = [] - for edge_idx, (src, dst, value) in enumerate(zip(rows, cols, values)): + for edge_idx, (src, dst, value) in enumerate(zip(rows, cols, values, strict=True)): data = { "id": f"e{edge_idx}", "source": node_ids[int(src)], diff --git a/graphconstructor/importers.py b/graphconstructor/importers.py index 71bc602..b48e3f5 100644 --- a/graphconstructor/importers.py +++ b/graphconstructor/importers.py @@ -16,6 +16,7 @@ def from_dense( metadata=None, sym_op="max" ) -> Graph: + """Construct a Graph from a dense adjacency matrix.""" return Graph.from_dense(arr, directed=directed, weighted=weighted, mode=mode, metadata=metadata, sym_op=sym_op) @@ -28,6 +29,7 @@ def from_csr( metadata=None, sym_op="max" ) -> Graph: + """Construct a Graph from a sparse adjacency matrix.""" return Graph.from_csr(adj, directed=directed, weighted=weighted, mode=mode, metadata=metadata, sym_op=sym_op) @@ -40,6 +42,7 @@ def from_knn( metadata=None, sym_op="max" ) -> Graph: + """Construct a Graph from KNN neighbor indices and distances.""" ind, dist = _coerce_knn_inputs(indices, distances) n_query, k = ind.shape @@ -71,10 +74,11 @@ def from_ann( metadata=None, sym_op="max" ) -> Graph: + """Construct a Graph from a fitted ANN index, e.g., pynndescent.NNDescent.""" idx = ann.index if hasattr(ann, "index") else ann - if hasattr(idx, "indices_") and getattr(idx, "indices_") is not None: - ind = np.asarray(getattr(idx, "indices_"))[:, :k] - dist = np.asarray(getattr(idx, "distances_"))[:, :k] + if hasattr(idx, "indices_") and idx.indices_ is not None: + ind = np.asarray(idx.indices_)[:, :k] + dist = np.asarray(idx.distances_)[:, :k] else: if query_data is None: raise TypeError("from_ann requires query_data when index has no cached neighbors.") diff --git a/graphconstructor/operators/doubly_stochastic.py b/graphconstructor/operators/doubly_stochastic.py index 9c07900..b804bd4 100644 --- a/graphconstructor/operators/doubly_stochastic.py +++ b/graphconstructor/operators/doubly_stochastic.py @@ -85,7 +85,7 @@ def apply(self, G: Graph) -> Graph: warnings.warn( "DoublyStochasticNormalize stopped early because scaling factors " "became very large. Result may not be doubly stochastic.", - RuntimeWarning, + RuntimeWarning, stacklevel=2, ) break @@ -117,7 +117,7 @@ def apply(self, G: Graph) -> Graph: if not converged: warnings.warn( "DoublyStochasticNormalize did not converge within max_iter.", - RuntimeWarning, + RuntimeWarning, stacklevel=2, ) # Apply scaling once: A' = diag(r) * A * diag(c) (CSR-friendly) diff --git a/graphconstructor/operators/enhanced_configuration_model.py b/graphconstructor/operators/enhanced_configuration_model.py index 7262ff6..9758010 100644 --- a/graphconstructor/operators/enhanced_configuration_model.py +++ b/graphconstructor/operators/enhanced_configuration_model.py @@ -362,7 +362,7 @@ def _undirected(self, G: Graph) -> Graph: import warnings warnings.warn( f"ECM optimisation did not converge: {res.message}", - RuntimeWarning, + RuntimeWarning, stacklevel=2, ) # ---- p-value matrix ---------------------------------------------- @@ -381,11 +381,11 @@ def _undirected(self, G: Graph) -> Graph: W_lower_original = sp.tril(W_original, k=-1).tocoo() original_lookup = { (int(i), int(j)): w - for i, j, w in zip(W_lower_original.row, W_lower_original.col, W_lower_original.data) + for i, j, w in zip(W_lower_original.row, W_lower_original.col, W_lower_original.data, strict=True) } original_weights = np.array( - [original_lookup[(int(i), int(j))] for i, j in zip(row, col)], + [original_lookup[(int(i), int(j))] for i, j in zip(row, col, strict=True)], dtype=W_original.dtype, ) diff --git a/graphconstructor/operators/knn_selector.py b/graphconstructor/operators/knn_selector.py index 25a9b69..ec21b21 100644 --- a/graphconstructor/operators/knn_selector.py +++ b/graphconstructor/operators/knn_selector.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import Literal, Optional +from typing import Literal import numpy as np import scipy.sparse as sp from ..graph import Graph @@ -30,7 +30,7 @@ class KNNSelector(GraphOperator): """ k: int mutual: bool = False - mutual_k: Optional[int] = None + mutual_k: int | None = None mode: Mode = "distance" supported_modes = ["similarity", "distance"] diff --git a/graphconstructor/operators/metric_distance.py b/graphconstructor/operators/metric_distance.py index ff7fa18..aa30487 100644 --- a/graphconstructor/operators/metric_distance.py +++ b/graphconstructor/operators/metric_distance.py @@ -43,7 +43,7 @@ def _compute_distortions(D: GraphOperator, B, weight="weight", disjunction=sum): G.remove_edges_from(B.edges()) weight_function = _weight_function(B, weight) - svals = dict() + svals = {} for u in G.nodes(): metric_dist = single_source_dijkstra_path_length( B, source=u, weight_function=weight_function, disjunction=disjunction diff --git a/graphconstructor/types.py b/graphconstructor/types.py index 9685fcd..41eed55 100644 --- a/graphconstructor/types.py +++ b/graphconstructor/types.py @@ -1,5 +1,5 @@ from __future__ import annotations -from typing import Literal, Optional, Protocol, Tuple +from typing import Literal, Protocol import numpy as np from numpy.typing import ArrayLike, NDArray from scipy.sparse import csr_matrix @@ -14,9 +14,9 @@ class ANNLike(Protocol): The minimal surface we rely on mirrors PyNNDescent and similar libraries. """ - def query(self, X: ArrayLike, k: int) -> Tuple[NDArray[np.int_], NDArray[np.floating]]: # indices, distances + def query(self, X: ArrayLike, k: int) -> tuple[NDArray[np.int_], NDArray[np.floating]]: # indices, distances ... # Optional attributes commonly present on fitted ANN indexes - indices_: Optional[NDArray[np.int_]] - distances_: Optional[NDArray[np.floating]] + indices_: NDArray[np.int_] | None + distances_: NDArray[np.floating] | None diff --git a/graphconstructor/utils.py b/graphconstructor/utils.py index 74721b4..65b4c8f 100644 --- a/graphconstructor/utils.py +++ b/graphconstructor/utils.py @@ -1,4 +1,5 @@ -from typing import Callable, Literal, Tuple, Union +from collections.abc import Callable +from typing import Literal import numpy as np import scipy.sparse as sp from numpy.typing import NDArray @@ -8,10 +9,7 @@ # Type aliases for clarity Mode = Literal["distance", "similarity"] -ConversionMethod = Union[ - Literal["reciprocal", "negative", "exp", "gaussian"], - Callable[[np.ndarray], np.ndarray] - ] +ConversionMethod = Literal["reciprocal", "negative", "exp", "gaussian"] | Callable[[np.ndarray], np.ndarray] def _validate_square_matrix(M: np.ndarray) -> None: @@ -40,7 +38,7 @@ def _drop_diagonal(A: sp.csr_matrix) -> sp.csr_matrix: return sp.csr_matrix((coo.data[mask], (coo.row[mask], coo.col[mask])), shape=A.shape) -def _coerce_knn_inputs(indices, distances) -> Tuple[np.ndarray, np.ndarray]: +def _coerce_knn_inputs(indices, distances) -> tuple[np.ndarray, np.ndarray]: ind = _to_numpy(indices) dist = _to_numpy(distances) if ind.shape != dist.shape: @@ -60,7 +58,7 @@ def _csr_from_edges(n: int, rows: np.ndarray, cols: np.ndarray, weights: np.ndar return csr_matrix((weights, (rows, cols)), shape=(n, n)) -def _as_csr_square(M: NDArray | spmatrix) -> Tuple[sp.csr_matrix, int]: +def _as_csr_square(M: NDArray | spmatrix) -> tuple[sp.csr_matrix, int]: """Return (CSR, n) for a square matrix without densifying. If `M` is dense, convert to CSR. If `M` is sparse, convert format to CSR @@ -78,7 +76,7 @@ def _as_csr_square(M: NDArray | spmatrix) -> Tuple[sp.csr_matrix, int]: return sp.csr_matrix(arr), arr.shape[0] -def _topk_per_row_sparse(csr: sp.csr_matrix, k: int, *, largest: bool) -> Tuple[np.ndarray, np.ndarray]: +def _topk_per_row_sparse(csr: sp.csr_matrix, k: int, *, largest: bool) -> tuple[np.ndarray, np.ndarray]: """Return (indices, values) of top-k entries per row from CSR matrix. This operates strictly on the row's nonzeros without densifying. @@ -120,7 +118,7 @@ def _topk_per_row_sparse(csr: sp.csr_matrix, k: int, *, largest: bool) -> Tuple[ return ind, vals -def _knn_from_matrix(M: NDArray | spmatrix, k: int, *, mode: MatrixMode) -> Tuple[np.ndarray, np.ndarray]: +def _knn_from_matrix(M: NDArray | spmatrix, k: int, *, mode: MatrixMode) -> tuple[np.ndarray, np.ndarray]: """Compute kNN (indices, values) from a square distance/similarity matrix. Supports dense and sparse inputs without densifying sparse matrices. diff --git a/graphconstructor/visualization/graph_statistics.py b/graphconstructor/visualization/graph_statistics.py index b3a98b6..32d9992 100644 --- a/graphconstructor/visualization/graph_statistics.py +++ b/graphconstructor/visualization/graph_statistics.py @@ -1,4 +1,4 @@ -from typing import Iterable, Optional, Tuple +from collections.abc import Iterable import matplotlib.pyplot as plt import numpy as np @@ -8,13 +8,13 @@ def plot_degree_distribution( *, x_scale: str = "log", y_scale: str = "log", - ax: Optional[plt.Axes] = None, + ax: plt.Axes | None = None, normalize: bool = True, include_zero_degree: bool = False, - label: Optional[str] = None, + label: str | None = None, marker: str = "o", markersize: float = 5.0, -) -> Tuple[plt.Figure, plt.Axes]: +) -> tuple[plt.Figure, plt.Axes]: """ Plot the degree distribution p(k) vs k for a single graph. @@ -128,11 +128,11 @@ def plot_degree_distributions_grid( y_scale: str = "log", normalize: bool = True, include_zero_degree: bool = False, - figsize: Optional[Tuple[float, float]] = None, + figsize: tuple[float, float] | None = None, tight_layout: bool = True, sharex: bool = False, sharey: bool = False, -) -> Tuple[plt.Figure, np.ndarray]: +) -> tuple[plt.Figure, np.ndarray]: """ Plot a grid of degree distribution plots for multiple graphs. diff --git a/pyproject.toml b/pyproject.toml index 84a4171..a4806d9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "graphconstructor" -version = "0.3.0" +version = "0.4.0" description = "Python library for graph construction from similarity or distance metrics" authors = ["Florian Huber "] @@ -56,28 +56,25 @@ line-ending = "lf" [tool.ruff.lint] # TODO: add some rules in future, eg. W291/292 -extend-select = ["D", "E", "I"] -# TODO: some of these rules should be enforced in the future, eg. E713 +extend-select = ["D", "E", "I", "F", "Q", "UP", "B", "C4"] +# TODO: some of these rules should be enforced in the future ignore = [ - "D100", - "D101", - "D102", - "D103", - "D104", - "D105", - "D107", - "D200", - "D201", - "D202", - "D203", - "D204", - "D205", - "D209", - "D210", - "D212", - "D213", - "D4", - "E713" + "D100", + "D102", + "D104", + "D105", + "D107", + "D200", + "D201", + "D202", + "D203", + "D204", + "D205", + "D209", + "D210", + "D212", + "D213", + "D4", ] [tool.ruff.lint.isort] diff --git a/tests/test_disparity.py b/tests/test_disparity.py index 98f7e75..082e285 100644 --- a/tests/test_disparity.py +++ b/tests/test_disparity.py @@ -47,10 +47,10 @@ def test_disparity_directed_min_out_in_formula(): alpha = 0.2 expect_keep = np.minimum(pv_out, pv_in) <= alpha - expected = set(zip(rows[expect_keep].tolist(), cols[expect_keep].tolist())) + expected = set(zip(rows[expect_keep].tolist(), cols[expect_keep].tolist(), strict=True)) out = DisparityFilter(alpha=alpha).apply(G0) - got = set(zip(*out.adj.nonzero())) + got = set(zip(*out.adj.nonzero(), strict=True)) assert got == expected assert out.directed and out.weighted assert np.isfinite(out.adj.data).all() @@ -74,8 +74,8 @@ def test_disparity_undirected_or_superset_and(): assert (G_or.adj != G_or.adj.T).nnz == 0 assert (G_and.adj != G_and.adj.T).nnz == 0 - e_or = set(zip(*sp.triu(G_or.adj, k=1).nonzero())) - e_and = set(zip(*sp.triu(G_and.adj, k=1).nonzero())) + e_or = set(zip(*sp.triu(G_or.adj, k=1).nonzero(), strict=True)) + e_and = set(zip(*sp.triu(G_and.adj, k=1).nonzero(), strict=True)) assert e_and.issubset(e_or) diff --git a/tests/test_locally_adaptive_sparsification.py b/tests/test_locally_adaptive_sparsification.py index f66ae74..58c2d19 100644 --- a/tests/test_locally_adaptive_sparsification.py +++ b/tests/test_locally_adaptive_sparsification.py @@ -53,15 +53,15 @@ def test_lans_undirected_and_subset_or_and_alpha_monotonicity(): G_or = LocallyAdaptiveSparsification(alpha=0.30, rule="or").apply(G0) G_and = LocallyAdaptiveSparsification(alpha=0.30, rule="and").apply(G0) - e_or = set(zip(*sp.triu(G_or.adj, k=1).nonzero())) - e_and = set(zip(*sp.triu(G_and.adj, k=1).nonzero())) + e_or = set(zip(*sp.triu(G_or.adj, k=1).nonzero(), strict=True)) + e_and = set(zip(*sp.triu(G_and.adj, k=1).nonzero(), strict=True)) assert e_and.issubset(e_or) # "and" ⊆ "or" # Monotonicity in alpha: larger alpha keeps (weakly) more edges G_lo = LocallyAdaptiveSparsification(alpha=0.10, rule="or").apply(G0) G_hi = LocallyAdaptiveSparsification(alpha=0.40, rule="or").apply(G0) - e_lo = set(zip(*sp.triu(G_lo.adj, k=1).nonzero())) - e_hi = set(zip(*sp.triu(G_hi.adj, k=1).nonzero())) + e_lo = set(zip(*sp.triu(G_lo.adj, k=1).nonzero(), strict=True)) + e_hi = set(zip(*sp.triu(G_hi.adj, k=1).nonzero(), strict=True)) assert e_lo.issubset(e_hi) # Symmetry guaranteed diff --git a/tests/test_marginal_likelihood_filter.py b/tests/test_marginal_likelihood_filter.py index f9da3f8..9280429 100644 --- a/tests/test_marginal_likelihood_filter.py +++ b/tests/test_marginal_likelihood_filter.py @@ -46,14 +46,14 @@ def test_mlf_undirected_matches_binomial_tail(): alpha = 0.05 keep = pvals <= alpha - expected_edges = set(zip(Au.row[keep].tolist(), Au.col[keep].tolist())) + expected_edges = set(zip(Au.row[keep].tolist(), Au.col[keep].tolist(), strict=True)) # Run operator out = MarginalLikelihoodFilter(alpha=alpha).apply(G0) A2 = out.adj # Extract kept undirected edges from result (upper triangle) kept_u = sp.triu(A2, k=1).tocoo() - got_edges = set(zip(kept_u.row.tolist(), kept_u.col.tolist())) + got_edges = set(zip(kept_u.row.tolist(), kept_u.col.tolist(), strict=True)) assert got_edges == expected_edges # Symmetry and flags preserved @@ -90,11 +90,11 @@ def test_mlf_directed_uses_out_in_degrees(): alpha = 0.10 keep = pvals <= alpha - expected_arcs = set(zip(coo.row[keep].tolist(), coo.col[keep].tolist())) + expected_arcs = set(zip(coo.row[keep].tolist(), coo.col[keep].tolist(), strict=True)) out = MarginalLikelihoodFilter(alpha=alpha).apply(G0) coo2 = out.adj.tocoo() - got_arcs = set(zip(coo2.row.tolist(), coo2.col.tolist())) + got_arcs = set(zip(coo2.row.tolist(), coo2.col.tolist(), strict=True)) assert got_arcs == expected_arcs assert out.directed and out.weighted @@ -116,8 +116,8 @@ def test_mlf_alpha_monotonicity(): G_small = MarginalLikelihoodFilter(alpha=0.01).apply(G0) G_large = MarginalLikelihoodFilter(alpha=0.2).apply(G0) - e_small = set(zip(*sp.triu(G_small.adj, k=1).nonzero())) - e_large = set(zip(*sp.triu(G_large.adj, k=1).nonzero())) + e_small = set(zip(*sp.triu(G_small.adj, k=1).nonzero(), strict=True)) + e_large = set(zip(*sp.triu(G_large.adj, k=1).nonzero(), strict=True)) assert e_small.issubset(e_large) diff --git a/tests/test_minimum_spanning_tree.py b/tests/test_minimum_spanning_tree.py index 449a256..1ac5e64 100644 --- a/tests/test_minimum_spanning_tree.py +++ b/tests/test_minimum_spanning_tree.py @@ -184,7 +184,7 @@ def test_mst_similarity_preserves_original_weights_on_tree_edges(): A_tree = T.adj.toarray() rows, cols = np.where(A_tree > 0) - for r, c in zip(rows, cols): + for r, c in zip(rows, cols, strict=True): assert A_tree[r, c] == pytest.approx(A_orig[r, c]) diff --git a/tests/test_noise_corrected.py b/tests/test_noise_corrected.py index 7ceedab..8ce1d03 100644 --- a/tests/test_noise_corrected.py +++ b/tests/test_noise_corrected.py @@ -32,8 +32,8 @@ def test_nc_undirected_symmetry_and_monotonicity(): assert (G_lo.adj != G_lo.adj.T).nnz == 0 assert (G_hi.adj != G_hi.adj.T).nnz == 0 - e_lo = set(zip(*sp.triu(G_lo.adj, k=1).nonzero())) - e_hi = set(zip(*sp.triu(G_hi.adj, k=1).nonzero())) + e_lo = set(zip(*sp.triu(G_lo.adj, k=1).nonzero(), strict=True)) + e_hi = set(zip(*sp.triu(G_hi.adj, k=1).nonzero(), strict=True)) assert e_hi.issubset(e_lo) @@ -50,8 +50,8 @@ def test_nc_directed_monotonicity_and_no_negatives(): G1 = NoiseCorrected(delta=1.0).apply(G0) G2 = NoiseCorrected(delta=2.0).apply(G0) - e1 = set(zip(*G1.adj.nonzero())) - e2 = set(zip(*G2.adj.nonzero())) + e1 = set(zip(*G1.adj.nonzero(), strict=True)) + e2 = set(zip(*G2.adj.nonzero(), strict=True)) assert e2.issubset(e1) # No NaNs/Inf diff --git a/tests/test_utils.py b/tests/test_utils.py index 07cb10a..b7a9604 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -17,7 +17,7 @@ # ---- helpers ---- def _rowwise_pairset(indices_row, values_row): """Create a set of (idx, val) pairs ignoring order for a single row.""" - return set(zip(indices_row.tolist(), values_row.tolist())) + return set(zip(indices_row.tolist(), values_row.tolist(), strict=True)) # ---- _validate_square_matrix ----