Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 41 additions & 41 deletions graphconstructor/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,15 @@ class Graph:
- `directed`: True if directed, else undirected (stored symmetric)
- `weighted`: True if edge weights are meaningful; if False, all edges are 1.0
- `mode`: "distance" or "similarity" (for interpretation of weights)
- `meta`: pandas DataFrame with n rows (optional). May have a 'name' column.
- `metadata`: pandas DataFrame with n rows (optional). May have a 'name' column.
- `ignore_selfloops`: If True, self-loops are ignored/removed (default for undirected graphs)
- `keep_explicit_zeros`: If True, explicit zeros in adjacency are kept (default for distance graphs)
"""
adj: sp.csr_matrix
directed: bool
weighted: bool
mode: str
meta: pd.DataFrame | None = None
metadata: pd.DataFrame | None = None
ignore_selfloops: Optional[bool] = None
keep_explicit_zeros: Optional[bool] = None

Expand Down Expand Up @@ -109,7 +109,7 @@ def from_csr(
*,
directed: bool = False,
weighted: bool = True,
meta: pd.DataFrame | None = None,
metadata: pd.DataFrame | None = None,
ignore_selfloops: bool = None,
keep_explicit_zeros: bool = None,
sym_op: SymOp = "max",
Expand Down Expand Up @@ -139,15 +139,15 @@ def from_csr(
A = _drop_diagonal(A)

n = A.shape[0]
if meta is not None:
if len(meta) != n:
raise ValueError(f"meta has {len(meta)} rows but adjacency is {n}x{n}.")
meta = meta.reset_index(drop=True)
if metadata is not None:
if len(metadata) != n:
raise ValueError(f"metadata has {len(metadata)} rows but adjacency is {n}x{n}.")
metadata = metadata.reset_index(drop=True)
return cls(
adj=A.astype(float, copy=False),
directed=directed, weighted=weighted,
mode=mode, ignore_selfloops=ignore_selfloops,
meta=meta,
metadata=metadata,
keep_explicit_zeros=keep_explicit_zeros,
)

Expand All @@ -170,7 +170,7 @@ def from_edges(
*,
directed: bool = False,
weighted: bool = True,
meta: pd.DataFrame | None = None,
metadata: pd.DataFrame | None = None,
ignore_selfloops: Optional[bool] = None,
keep_explicit_zeros: Optional[bool] = None,
sym_op: SymOp = "max",
Expand Down Expand Up @@ -210,7 +210,7 @@ def from_edges(
weighted=weighted_eff,
mode=mode,
ignore_selfloops=ignore_selfloops,
meta=meta, sym_op=sym_op,
metadata=metadata, sym_op=sym_op,
keep_explicit_zeros=keep_explicit_zeros,
)

Expand Down Expand Up @@ -296,17 +296,17 @@ def from_graphml(
attrs = G_nx.nodes[node]
row = {col: attrs.get(col, None) for col in all_cols}
rows.append(row)
meta = pd.DataFrame(rows)
metadata = pd.DataFrame(rows)
else:
meta = None
metadata = None

# Build Graph via from_csr to respect the usual symmetrization / defaults
return cls.from_csr(
A,
mode=mode,
directed=directed,
weighted=weighted,
meta=meta,
metadata=metadata,
ignore_selfloops=ignore_selfloops,
keep_explicit_zeros=keep_explicit_zeros,
)
Expand All @@ -331,8 +331,8 @@ def has_self_loops(self) -> bool:

@property
def node_names(self) -> list[str] | list[int]:
if self.meta is not None and "name" in self.meta.columns:
return self.meta["name"].tolist()
if self.metadata is not None and "name" in self.metadata.columns:
return self.metadata["name"].tolist()
return list(range(self.n_nodes))

# -------- Editing --------
Expand All @@ -344,8 +344,8 @@ def drop(self, nodes: Iterable[int | str]) -> "Graph":
nodes = [nodes]

to_drop_idx: set[int] = set()
if self.meta is not None and "name" in self.meta.columns:
name_to_idx = {name: i for i, name in enumerate(self.meta["name"].tolist())}
if self.metadata is not None and "name" in self.metadata.columns:
name_to_idx = {name: i for i, name in enumerate(self.metadata["name"].tolist())}
else:
name_to_idx = {}

Expand All @@ -366,8 +366,8 @@ def drop(self, nodes: Iterable[int | str]) -> "Graph":
keep_mask[list(to_drop_idx)] = False

A2 = self.adj[keep_mask][:, keep_mask].tocsr(copy=False)
meta2 = self.meta.loc[keep_mask].reset_index(drop=True) if self.meta is not None else None
return Graph(adj=A2, directed=self.directed, weighted=self.weighted, mode=self.mode, meta=meta2)
metadata2 = self.metadata.loc[keep_mask].reset_index(drop=True) if self.metadata is not None else None
return Graph(adj=A2, directed=self.directed, weighted=self.weighted, mode=self.mode, metadata=metadata2)

# ----- Convert distance/similarity -----
def convert_mode(
Expand Down Expand Up @@ -432,7 +432,7 @@ def convert_mode(
mode=target_mode,
directed=self.directed,
weighted=self.weighted,
meta=None if self.meta is None else self.meta.copy(),
metadata=None if self.metadata is None else self.metadata.copy(),
)

# -------- Exporters --------
Expand All @@ -445,10 +445,10 @@ def to_networkx(self):

create_using = nx.DiGraph if self.directed else nx.Graph
G = nx.from_scipy_sparse_array(self.adj, create_using=create_using)
# attach node attributes from meta
if self.meta is not None:
for col in self.meta.columns:
nx.set_node_attributes(G, {i: self.meta.iloc[i, self.meta.columns.get_loc(col)]
# attach node attributes from metadata
if self.metadata is not None:
for col in self.metadata.columns:
nx.set_node_attributes(G, {i: self.metadata.iloc[i, self.metadata.columns.get_loc(col)]
for i in range(self.n_nodes)}, name=col)
return G

Expand All @@ -475,9 +475,9 @@ def to_igraph(self):
g.es["weight"] = [1.0] * len(rows)

# node attributes
if self.meta is not None:
for col in self.meta.columns:
g.vs[col] = self.meta[col].tolist()
if self.metadata is not None:
for col in self.metadata.columns:
g.vs[col] = self.metadata[col].tolist()
return g

def to_graphml(self, path, *, include_graph_attrs: bool = True) -> None:
Expand Down Expand Up @@ -536,9 +536,9 @@ def to_cytoscape(
"""

if node_id_col is not None:
if self.meta is None or node_id_col not in self.meta.columns:
if self.metadata is None or node_id_col not in self.metadata.columns:
raise KeyError(f"Column '{node_id_col}' not found in metadata.")
node_ids = self.meta[node_id_col].astype(str).tolist()
node_ids = self.metadata[node_id_col].astype(str).tolist()
else:
node_ids = [str(i) for i in range(self.n_nodes)]

Expand All @@ -554,14 +554,14 @@ def to_cytoscape(

if (
node_label_col is not None
and self.meta is not None
and node_label_col in self.meta.columns
and self.metadata is not None
and node_label_col in self.metadata.columns
):
data["label"] = self.meta.iloc[i][node_label_col]
data["label"] = self.metadata.iloc[i][node_label_col]

if self.meta is not None:
for col in self.meta.columns:
value = self.meta.iloc[i][col]
if self.metadata is not None:
for col in self.metadata.columns:
value = self.metadata.iloc[i][col]
if pd.isna(value):
value = None
data[col] = value
Expand Down Expand Up @@ -624,17 +624,17 @@ def copy(self) -> "Graph":
directed=self.directed,
weighted=self.weighted,
mode=self.mode,
meta=None if self.meta is None else self.meta.copy(),
metadata=None if self.metadata is None else self.metadata.copy(),
)

def sorted_by(self, col: str) -> "Graph":
"""Return a new graph with nodes permuted by ascending meta[col]."""
if self.meta is None or col not in self.meta.columns:
"""Return a new graph with nodes permuted by ascending metadata[col]."""
if self.metadata is None or col not in self.metadata.columns:
raise KeyError(f"Column '{col}' not found in metadata.")
order = np.argsort(self.meta[col].to_numpy())
order = np.argsort(self.metadata[col].to_numpy())
A2 = self.adj[order][:, order]
meta2 = self.meta.iloc[order].reset_index(drop=True)
return Graph(adj=A2, directed=self.directed, weighted=self.weighted, mode=self.mode, meta=meta2)
metadata2 = self.metadata.iloc[order].reset_index(drop=True)
return Graph(adj=A2, directed=self.directed, weighted=self.weighted, mode=self.mode, metadata=metadata2)

def degree(self, ignore_weights: bool = False) -> np.ndarray | tuple[np.ndarray, np.ndarray]:
"""Return node degree(s).
Expand Down
49 changes: 41 additions & 8 deletions graphconstructor/importers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,39 @@

Mode = Literal["distance", "similarity"]

def from_dense(arr, mode, *, directed=False, weighted=True, meta=None, sym_op="max") -> Graph:
return Graph.from_dense(arr, directed=directed, weighted=weighted, mode=mode, meta=meta, sym_op=sym_op)
def from_dense(
arr,
mode,
*,
directed=False,
weighted=True,
metadata=None,
sym_op="max"
) -> Graph:
return Graph.from_dense(arr, directed=directed, weighted=weighted, mode=mode, metadata=metadata, sym_op=sym_op)


def from_csr(adj, mode, *, directed=False, weighted=True, meta=None, sym_op="max") -> Graph:
return Graph.from_csr(adj, directed=directed, weighted=weighted, mode=mode, meta=meta, sym_op=sym_op)
def from_csr(
adj,
mode,
*,
directed=False,
weighted=True,
metadata=None,
sym_op="max"
) -> Graph:
return Graph.from_csr(adj, directed=directed, weighted=weighted, mode=mode, metadata=metadata, sym_op=sym_op)


def from_knn(indices, distances, *, store_weights=True, directed=False, meta=None, sym_op="max") -> Graph:
def from_knn(
indices,
distances,
*,
store_weights=True,
directed=False,
metadata=None,
sym_op="max"
) -> Graph:
ind, dist = _coerce_knn_inputs(indices, distances)
n_query, k = ind.shape

Expand All @@ -34,10 +58,19 @@ def from_knn(indices, distances, *, store_weights=True, directed=False, meta=Non
n_full = _infer_n_from_indices(ind)
A = sp.csr_matrix((weights, (rows, cols)), shape=(n_full, n_full))
return Graph.from_csr(A, directed=directed, weighted=store_weights, mode="distance",
meta=meta, sym_op=sym_op)
metadata=metadata, sym_op=sym_op)


def from_ann(ann, query_data, k: int, *, store_weights=True, directed=False, meta=None, sym_op="max") -> Graph:
def from_ann(
ann,
query_data,
k: int,
*,
store_weights=True,
directed=False,
metadata=None,
sym_op="max"
) -> Graph:
idx = ann.index if hasattr(ann, "index") else ann
if hasattr(idx, "indices_") and getattr(idx, "indices_") is not None:
ind = np.asarray(getattr(idx, "indices_"))[:, :k]
Expand All @@ -47,7 +80,7 @@ def from_ann(ann, query_data, k: int, *, store_weights=True, directed=False, met
raise TypeError("from_ann requires query_data when index has no cached neighbors.")
ind, dist = idx.query(query_data, k=k)
return from_knn(ind, dist, store_weights=store_weights, directed=directed,
meta=meta, sym_op=sym_op)
metadata=metadata, sym_op=sym_op)


# helper functions ---------------------------------------------
Expand Down
34 changes: 19 additions & 15 deletions graphconstructor/operators/disparity.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,23 +38,24 @@ class DisparityFilter(GraphOperator):
For undirected graphs only. If "or" (default), keep if either endpoint
finds the edge significant (this matches the R backbone code).
If "and", require both endpoints to be significant (stricter).
copy_meta : bool
copy_metadata : bool
Copy metadata (True) or keep reference (False).
"""
alpha: float = 0.05
rule: UndirectedRule = "or"
copy_meta: bool = True
copy_metadata: bool = True
supported_modes = ["similarity"]

def _undirected(self, G: Graph) -> Graph:
A = G.adj.tocsr(copy=False)
if (A.data < 0).any():
raise ValueError("DisparityFilter requires nonnegative weights.")
if A.nnz == 0:
return Graph.from_csr(A.copy(), directed=False, weighted=G.weighted,
meta=(G.meta.copy() if (self.copy_meta and G.meta is not None) else G.meta),
mode="similarity",
sym_op="max")
return Graph.from_csr(
A.copy(), directed=False, weighted=G.weighted,
metadata=(G.metadata.copy() if (self.copy_metadata and G.metadata is not None) else G.metadata),
mode="similarity",
sym_op="max")

# strengths and degrees (row-wise)
strength = np.asarray(A.sum(axis=1)).ravel()
Expand Down Expand Up @@ -92,18 +93,20 @@ def _undirected(self, G: Graph) -> Graph:
A_f = sp.csr_matrix((w[keep], (rows[keep], cols[keep])), shape=A.shape)
# Symmetrize to be safe (weights preserved as in input)
A_f = A_f.maximum(A_f.T)
return Graph.from_csr(A_f, directed=False, weighted=G.weighted,
mode=G.mode,
meta=(G.meta.copy() if (self.copy_meta and G.meta is not None) else G.meta),
sym_op="max")
return Graph.from_csr(
A_f, directed=False, weighted=G.weighted,
mode=G.mode,
metadata=(G.metadata.copy() if (self.copy_metadata and G.metadata is not None) else G.metadata),
sym_op="max")

def _directed(self, G: Graph) -> Graph:
A = G.adj.tocsr(copy=False)
if (A.data < 0).any():
raise ValueError("DisparityFilter requires nonnegative weights.")
if A.nnz == 0:
return Graph.from_csr(A.copy(), directed=True, weighted=G.weighted,
meta=(G.meta.copy() if (self.copy_meta and G.meta is not None) else G.meta))
return Graph.from_csr(
A.copy(), directed=True, weighted=G.weighted,
metadata=(G.metadata.copy() if (self.copy_metadata and G.metadata is not None) else G.metadata))


s_out = np.asarray(A.sum(axis=1)).ravel()
Expand Down Expand Up @@ -132,9 +135,10 @@ def _directed(self, G: Graph) -> Graph:

keep = np.minimum(pval_out, pval_in) <= self.alpha
A_f = sp.csr_matrix((w[keep], (rows[keep], cols[keep])), shape=A.shape)
return Graph.from_csr(A_f, directed=True, weighted=G.weighted,
mode=G.mode,
meta=(G.meta.copy() if (self.copy_meta and G.meta is not None) else G.meta))
return Graph.from_csr(
A_f, directed=True, weighted=G.weighted,
mode=G.mode,
metadata=(G.metadata.copy() if (self.copy_metadata and G.metadata is not None) else G.metadata))

def apply(self, G: Graph) -> Graph:
self._check_mode_supported(G)
Expand Down
Loading
Loading