matchms · florian-huber · May 28, 2026 · May 28, 2026 · May 28, 2026 · May 28, 2026
diff --git a/graphconstructor/adapters.py b/graphconstructor/adapters.py
@@ -1,5 +1,4 @@
 from dataclasses import dataclass
-from typing import Optional
 import numpy as np
 from numpy.typing import NDArray
 from .types import MatrixMode
@@ -8,6 +7,7 @@
 
 @dataclass
 class MatrixInput:
+    """Input for graph constructors that take a dense matrix."""
     matrix: NDArray
     mode: MatrixMode  # "distance" or "similarity"
 
@@ -19,6 +19,7 @@ def __post_init__(self) -> None:
 
 @dataclass
 class KNNInput:
+    """Input for graph constructors that take KNN graphs."""
     indices: NDArray[np.int_]
     distances: NDArray
 
@@ -31,9 +32,7 @@ def __post_init__(self) -> None:
 
 @dataclass
 class ANNInput:
-    # A fitted ANN index, e.g., pynndescent.NNDescent
+    """A fitted ANN index, e.g., pynndescent.NNDescent"""
     index: object
     # Optionally, a query set to build edges from (defaults to the index's training set)
-    query_data: Optional[NDArray] = None
-
-    # We don't verify protocol strictly at runtime; we use duck typing in constructors.
+    query_data: NDArray | None = None
diff --git a/graphconstructor/graph.py b/graphconstructor/graph.py
@@ -1,6 +1,7 @@
 import json
+from collections.abc import Iterable, Sequence
 from dataclasses import dataclass
-from typing import Iterable, Literal, Optional, Sequence
+from typing import Literal
 import networkx as nx
 import numpy as np
 import pandas as pd
@@ -35,8 +36,8 @@ class Graph:
     weighted: bool
     mode: str
     metadata: pd.DataFrame | None = None
-    ignore_selfloops: Optional[bool] = None
-    keep_explicit_zeros: Optional[bool] = None
+    ignore_selfloops: bool | None = None
+    keep_explicit_zeros: bool | None = None
 
     def __post_init__(self):
         # Check mode
@@ -171,8 +172,8 @@ def from_edges(
             directed: bool = False,
             weighted: bool = True,
             metadata: pd.DataFrame | None = None,
-            ignore_selfloops: Optional[bool] = None,
-            keep_explicit_zeros: Optional[bool] = None,
+            ignore_selfloops: bool | None = None,
+            keep_explicit_zeros: bool | None = None,
             sym_op: SymOp = "max",
         ) -> "Graph":
         """Build from an edge list. For undirected=True, we symmetrize later."""
@@ -188,7 +189,7 @@ def from_edges(
             rows = edges[:, 0].astype(int, copy=False)
             cols = edges[:, 1].astype(int, copy=False)
         else:
-            rows, cols = map(np.asarray, zip(*edges)) if edges else (np.array([], int), np.array([], int))
+            rows, cols = map(np.asarray, zip(*edges, strict=True)) if edges else (np.array([], int), np.array([], int))
 
         if weights is None:
             data = np.ones_like(rows, dtype=float)
@@ -468,7 +469,7 @@ def to_igraph(self):
         weights = coo.data[mask] if self.weighted else np.ones(mask.sum(), dtype=float)
 
         g = ig.Graph(n=self.n_nodes, directed=self.directed)
-        g.add_edges(list(zip(rows.tolist(), cols.tolist())))
+        g.add_edges(list(zip(rows.tolist(), cols.tolist(), strict=True)))
         if self.weighted:
             g.es["weight"] = weights.tolist()
         else:
@@ -581,7 +582,7 @@ def to_cytoscape(
         values = coo.data[edge_mask]
 
         edges = []
-        for edge_idx, (src, dst, value) in enumerate(zip(rows, cols, values)):
+        for edge_idx, (src, dst, value) in enumerate(zip(rows, cols, values, strict=True)):
             data = {
                 "id": f"e{edge_idx}",
                 "source": node_ids[int(src)],

diff --git a/graphconstructor/importers.py b/graphconstructor/importers.py
@@ -16,6 +16,7 @@ def from_dense(
         metadata=None,
         sym_op="max"
         ) -> Graph:
+    """Construct a Graph from a dense adjacency matrix."""
     return Graph.from_dense(arr, directed=directed, weighted=weighted, mode=mode, metadata=metadata, sym_op=sym_op)
 
 
@@ -28,6 +29,7 @@ def from_csr(
         metadata=None,
         sym_op="max"
         ) -> Graph:
+    """Construct a Graph from a sparse adjacency matrix."""
     return Graph.from_csr(adj, directed=directed, weighted=weighted, mode=mode, metadata=metadata, sym_op=sym_op)
 
 
@@ -40,6 +42,7 @@ def from_knn(
         metadata=None,
         sym_op="max"
         ) -> Graph:
+    """Construct a Graph from KNN neighbor indices and distances."""
     ind, dist = _coerce_knn_inputs(indices, distances)
     n_query, k = ind.shape
 
@@ -71,10 +74,11 @@ def from_ann(
         metadata=None,
         sym_op="max"
         ) -> Graph:
+    """Construct a Graph from a fitted ANN index, e.g., pynndescent.NNDescent."""
     idx = ann.index if hasattr(ann, "index") else ann
-    if hasattr(idx, "indices_") and getattr(idx, "indices_") is not None:
-        ind = np.asarray(getattr(idx, "indices_"))[:, :k]
-        dist = np.asarray(getattr(idx, "distances_"))[:, :k]
+    if hasattr(idx, "indices_") and idx.indices_ is not None:
+        ind = np.asarray(idx.indices_)[:, :k]
+        dist = np.asarray(idx.distances_)[:, :k]
     else:
         if query_data is None:
             raise TypeError("from_ann requires query_data when index has no cached neighbors.")

diff --git a/graphconstructor/operators/doubly_stochastic.py b/graphconstructor/operators/doubly_stochastic.py
@@ -85,7 +85,7 @@ def apply(self, G: Graph) -> Graph:
                 warnings.warn(
                     "DoublyStochasticNormalize stopped early because scaling factors "
                     "became very large. Result may not be doubly stochastic.",
-                    RuntimeWarning,
+                    RuntimeWarning, stacklevel=2,
                 )
                 break
 
@@ -117,7 +117,7 @@ def apply(self, G: Graph) -> Graph:
         if not converged:
             warnings.warn(
                 "DoublyStochasticNormalize did not converge within max_iter.",
-                RuntimeWarning,
+                RuntimeWarning, stacklevel=2,
             )
 
         # Apply scaling once: A' = diag(r) * A * diag(c)  (CSR-friendly)

diff --git a/graphconstructor/operators/enhanced_configuration_model.py b/graphconstructor/operators/enhanced_configuration_model.py
@@ -362,7 +362,7 @@ def _undirected(self, G: Graph) -> Graph:
             import warnings
             warnings.warn(
                 f"ECM optimisation did not converge: {res.message}",
-                RuntimeWarning,
+                RuntimeWarning, stacklevel=2,
             )
 
         # ---- p-value matrix ----------------------------------------------
@@ -381,11 +381,11 @@ def _undirected(self, G: Graph) -> Graph:
         W_lower_original = sp.tril(W_original, k=-1).tocoo()
         original_lookup = {
             (int(i), int(j)): w
-            for i, j, w in zip(W_lower_original.row, W_lower_original.col, W_lower_original.data)
+            for i, j, w in zip(W_lower_original.row, W_lower_original.col, W_lower_original.data, strict=True)
         }
 
         original_weights = np.array(
-            [original_lookup[(int(i), int(j))] for i, j in zip(row, col)],
+            [original_lookup[(int(i), int(j))] for i, j in zip(row, col, strict=True)],
             dtype=W_original.dtype,
         )
 

diff --git a/graphconstructor/operators/knn_selector.py b/graphconstructor/operators/knn_selector.py
@@ -1,5 +1,5 @@
 from dataclasses import dataclass
-from typing import Literal, Optional
+from typing import Literal
 import numpy as np
 import scipy.sparse as sp
 from ..graph import Graph
@@ -30,7 +30,7 @@ class KNNSelector(GraphOperator):
     """
     k: int
     mutual: bool = False
-    mutual_k: Optional[int] = None
+    mutual_k: int | None = None
     mode: Mode = "distance"
     supported_modes = ["similarity", "distance"]
 

diff --git a/graphconstructor/operators/metric_distance.py b/graphconstructor/operators/metric_distance.py
@@ -43,7 +43,7 @@ def _compute_distortions(D: GraphOperator, B, weight="weight", disjunction=sum):
         G.remove_edges_from(B.edges())
         weight_function = _weight_function(B, weight)
 
-        svals = dict()
+        svals = {}
         for u in G.nodes():
             metric_dist = single_source_dijkstra_path_length(
                 B, source=u, weight_function=weight_function, disjunction=disjunction

diff --git a/graphconstructor/types.py b/graphconstructor/types.py
@@ -1,5 +1,5 @@
 from __future__ import annotations
-from typing import Literal, Optional, Protocol, Tuple
+from typing import Literal, Protocol
 import numpy as np
 from numpy.typing import ArrayLike, NDArray
 from scipy.sparse import csr_matrix
@@ -14,9 +14,9 @@ class ANNLike(Protocol):
     The minimal surface we rely on mirrors PyNNDescent and similar libraries.
     """
 
-    def query(self, X: ArrayLike, k: int) -> Tuple[NDArray[np.int_], NDArray[np.floating]]:  # indices, distances
+    def query(self, X: ArrayLike, k: int) -> tuple[NDArray[np.int_], NDArray[np.floating]]:  # indices, distances
         ...
 
     # Optional attributes commonly present on fitted ANN indexes
-    indices_: Optional[NDArray[np.int_]]
-    distances_: Optional[NDArray[np.floating]]
+    indices_: NDArray[np.int_] | None
+    distances_: NDArray[np.floating] | None
diff --git a/graphconstructor/utils.py b/graphconstructor/utils.py
@@ -1,4 +1,5 @@
-from typing import Callable, Literal, Tuple, Union
+from collections.abc import Callable
+from typing import Literal
 import numpy as np
 import scipy.sparse as sp
 from numpy.typing import NDArray
@@ -8,10 +9,7 @@
 
 # Type aliases for clarity
 Mode = Literal["distance", "similarity"]
-ConversionMethod = Union[
-    Literal["reciprocal", "negative", "exp", "gaussian"],
-    Callable[[np.ndarray], np.ndarray]
-    ]
+ConversionMethod = Literal["reciprocal", "negative", "exp", "gaussian"] | Callable[[np.ndarray], np.ndarray]
 
 
 def _validate_square_matrix(M: np.ndarray) -> None:
@@ -40,7 +38,7 @@ def _drop_diagonal(A: sp.csr_matrix) -> sp.csr_matrix:
     return sp.csr_matrix((coo.data[mask], (coo.row[mask], coo.col[mask])), shape=A.shape)
 
 
-def _coerce_knn_inputs(indices, distances) -> Tuple[np.ndarray, np.ndarray]:
+def _coerce_knn_inputs(indices, distances) -> tuple[np.ndarray, np.ndarray]:
     ind = _to_numpy(indices)
     dist = _to_numpy(distances)
     if ind.shape != dist.shape:
@@ -60,7 +58,7 @@ def _csr_from_edges(n: int, rows: np.ndarray, cols: np.ndarray, weights: np.ndar
     return csr_matrix((weights, (rows, cols)), shape=(n, n))
 
 
-def _as_csr_square(M: NDArray | spmatrix) -> Tuple[sp.csr_matrix, int]:
+def _as_csr_square(M: NDArray | spmatrix) -> tuple[sp.csr_matrix, int]:
     """Return (CSR, n) for a square matrix without densifying.
 
     If `M` is dense, convert to CSR. If `M` is sparse, convert format to CSR
@@ -78,7 +76,7 @@ def _as_csr_square(M: NDArray | spmatrix) -> Tuple[sp.csr_matrix, int]:
     return sp.csr_matrix(arr), arr.shape[0]
 
 
-def _topk_per_row_sparse(csr: sp.csr_matrix, k: int, *, largest: bool) -> Tuple[np.ndarray, np.ndarray]:
+def _topk_per_row_sparse(csr: sp.csr_matrix, k: int, *, largest: bool) -> tuple[np.ndarray, np.ndarray]:
     """Return (indices, values) of top-k entries per row from CSR matrix.
 
     This operates strictly on the row's nonzeros without densifying.
@@ -120,7 +118,7 @@ def _topk_per_row_sparse(csr: sp.csr_matrix, k: int, *, largest: bool) -> Tuple[
     return ind, vals
 
 
-def _knn_from_matrix(M: NDArray | spmatrix, k: int, *, mode: MatrixMode) -> Tuple[np.ndarray, np.ndarray]:
+def _knn_from_matrix(M: NDArray | spmatrix, k: int, *, mode: MatrixMode) -> tuple[np.ndarray, np.ndarray]:
     """Compute kNN (indices, values) from a square distance/similarity matrix.
 
     Supports dense and sparse inputs without densifying sparse matrices.

diff --git a/graphconstructor/visualization/graph_statistics.py b/graphconstructor/visualization/graph_statistics.py
@@ -1,4 +1,4 @@
-from typing import Iterable, Optional, Tuple
+from collections.abc import Iterable
 import matplotlib.pyplot as plt
 import numpy as np
 
@@ -8,13 +8,13 @@ def plot_degree_distribution(
     *,
     x_scale: str = "log",
     y_scale: str = "log",
-    ax: Optional[plt.Axes] = None,
+    ax: plt.Axes | None = None,
     normalize: bool = True,
     include_zero_degree: bool = False,
-    label: Optional[str] = None,
+    label: str | None = None,
     marker: str = "o",
     markersize: float = 5.0,
-) -> Tuple[plt.Figure, plt.Axes]:
+) -> tuple[plt.Figure, plt.Axes]:
     """
     Plot the degree distribution p(k) vs k for a single graph.
 
@@ -128,11 +128,11 @@ def plot_degree_distributions_grid(
     y_scale: str = "log",
     normalize: bool = True,
     include_zero_degree: bool = False,
-    figsize: Optional[Tuple[float, float]] = None,
+    figsize: tuple[float, float] | None = None,
     tight_layout: bool = True,
     sharex: bool = False,
     sharey: bool = False,
-) -> Tuple[plt.Figure, np.ndarray]:
+) -> tuple[plt.Figure, np.ndarray]:
     """
     Plot a grid of degree distribution plots for multiple graphs.
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "graphconstructor"
-version = "0.3.0"
+version = "0.4.0"
 description = "Python library for graph construction from similarity or distance metrics"
 authors = ["Florian Huber <florian.huber@hs-duesseldorf.de>"]
 
@@ -56,28 +56,25 @@ line-ending = "lf"
 
 [tool.ruff.lint]
 # TODO: add some rules in future, eg. W291/292
-extend-select = ["D", "E", "I"]
-# TODO: some of these rules should be enforced in the future, eg. E713
+extend-select = ["D", "E", "I", "F", "Q", "UP", "B", "C4"]
+# TODO: some of these rules should be enforced in the future
 ignore = [
-    "D100",
-    "D101",
-    "D102",
-    "D103",
-    "D104",
-    "D105",
-    "D107",
-    "D200",
-    "D201",
-    "D202",
-    "D203",
-    "D204",
-    "D205",
-    "D209",
-    "D210",
-    "D212",
-    "D213",
-    "D4",
-    "E713"
+  "D100",
+  "D102",
+  "D104",
+  "D105",
+  "D107",
+  "D200",
+  "D201",
+  "D202",
+  "D203",
+  "D204",
+  "D205",
+  "D209",
+  "D210",
+  "D212",
+  "D213",
+  "D4",
 ]
 
 [tool.ruff.lint.isort]

diff --git a/tests/test_disparity.py b/tests/test_disparity.py
@@ -47,10 +47,10 @@ def test_disparity_directed_min_out_in_formula():
 
     alpha = 0.2
     expect_keep = np.minimum(pv_out, pv_in) <= alpha
-    expected = set(zip(rows[expect_keep].tolist(), cols[expect_keep].tolist()))
+    expected = set(zip(rows[expect_keep].tolist(), cols[expect_keep].tolist(), strict=True))
 
     out = DisparityFilter(alpha=alpha).apply(G0)
-    got = set(zip(*out.adj.nonzero()))
+    got = set(zip(*out.adj.nonzero(), strict=True))
     assert got == expected
     assert out.directed and out.weighted
     assert np.isfinite(out.adj.data).all()
@@ -74,8 +74,8 @@ def test_disparity_undirected_or_superset_and():
     assert (G_or.adj != G_or.adj.T).nnz == 0
     assert (G_and.adj != G_and.adj.T).nnz == 0
 
-    e_or = set(zip(*sp.triu(G_or.adj, k=1).nonzero()))
-    e_and = set(zip(*sp.triu(G_and.adj, k=1).nonzero()))
+    e_or = set(zip(*sp.triu(G_or.adj, k=1).nonzero(), strict=True))
+    e_and = set(zip(*sp.triu(G_and.adj, k=1).nonzero(), strict=True))
     assert e_and.issubset(e_or)