From 63354923e91dfa4cb0b411ca31b924ccd676fc37 Mon Sep 17 00:00:00 2001 From: Michael Zargham Date: Sat, 21 Mar 2026 01:57:31 -0400 Subject: [PATCH] fix load + add another example --- README.md | 19 +- docs/index.md | 2 +- docs/tutorial.md | 341 ++++ .../11_local_partition/local_partition.py | 117 ++ examples/README.md | 1 + knowledgecomplex/schema.py | 138 +- mkdocs.yml | 1 + site/404.html | 35 +- site/api/analysis/index.html | 35 +- site/api/clique/index.html | 35 +- site/api/codecs/index.html | 35 +- site/api/diff/index.html | 35 +- site/api/exceptions/index.html | 35 +- site/api/filtration/index.html | 35 +- site/api/graph/index.html | 47 +- site/api/io/index.html | 35 +- site/api/schema/index.html | 521 +++++- site/api/viz/index.html | 35 +- site/index.html | 100 +- site/ontology/index.html | 37 +- site/search/search_index.json | 2 +- site/sitemap.xml | 4 + site/sitemap.xml.gz | Bin 286 -> 294 bytes site/tutorial/index.html | 1456 +++++++++++++++++ 24 files changed, 2942 insertions(+), 159 deletions(-) create mode 100644 docs/tutorial.md create mode 100644 examples/11_local_partition/local_partition.py create mode 100644 site/tutorial/index.html diff --git a/README.md b/README.md index ab4f06f..b3b03d6 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ print(df) print(kc.dump_graph()) # Turtle string ``` -See [`examples/`](examples/) for 10 runnable examples covering all features below. +See [`examples/`](examples/) for 11 runnable examples covering all features below. ## Topological queries @@ -132,6 +132,23 @@ chi = euler_characteristic(kc) # V - E + F pr = edge_pagerank(kc, "e1") # personalized edge PageRank vector ``` +## Local partitioning + +Find clusters via diffusion — spread probability from a seed and sweep to find natural bottlenecks: + +```python +from knowledgecomplex.analysis import local_partition, edge_local_partition + +# Vertex clusters via PageRank or heat kernel diffusion +cut = local_partition(kc, seed="alice", method="pagerank") +cut.vertices # vertex IDs on the small side +cut.conductance # lower = cleaner partition + +# Edge clusters via Hodge Laplacian diffusion +edge_cut = edge_local_partition(kc, seed_edge="e1", method="hodge_pagerank") +edge_cut.edges # relationship cluster around e1 +``` + ## Filtrations and time-varying complexes Filtrations model strictly growing subcomplexes. Diffs model arbitrary add/remove sequences: diff --git a/docs/index.md b/docs/index.md index 77f326b..45695c8 100644 --- a/docs/index.md +++ b/docs/index.md @@ -64,7 +64,7 @@ from knowledgecomplex.ontologies import operations, brand, research sb = brand.schema() # audience/theme with resonance, interplay, overlap ``` -See the [examples/](https://github.com/blockscience/knowledgecomplex/tree/main/examples) directory for 10 runnable examples. +See the [examples/](https://github.com/blockscience/knowledgecomplex/tree/main/examples) directory for 11 runnable examples. ## API Reference diff --git a/docs/tutorial.md b/docs/tutorial.md new file mode 100644 index 0000000..b62382b --- /dev/null +++ b/docs/tutorial.md @@ -0,0 +1,341 @@ +# Tutorial + +A progressive walkthrough of knowledgecomplex, from schema definition through algebraic topology. + +## 1. Define a schema + +A schema declares vertex, edge, and face types with attributes. The `SchemaBuilder` generates OWL and SHACL automatically. + +```python +from knowledgecomplex import SchemaBuilder, vocab, text + +sb = SchemaBuilder(namespace="vv") + +# Vertex types (subclass of kc:Vertex) +sb.add_vertex_type("requirement", attributes={"title": text()}) +sb.add_vertex_type("test_case", attributes={"title": text()}) + +# Edge type with controlled vocabulary (enforced via sh:in) +sb.add_edge_type("verifies", attributes={ + "status": vocab("passing", "failing", "pending"), +}) + +# Face type +sb.add_face_type("coverage") +``` + +### Attribute descriptors + +| Descriptor | What it generates | Example | +|---|---|---| +| `text()` | `xsd:string`, required, single-valued | `title: text()` | +| `text(required=False)` | `xsd:string`, optional | `notes: text(required=False)` | +| `text(multiple=True)` | `xsd:string`, required, multi-valued | `tags: text(multiple=True)` | +| `vocab("a", "b")` | `sh:in ("a" "b")`, required, single-valued | `status: vocab("pass", "fail")` | + +### Type inheritance and binding + +Types can inherit from other user-defined types. Child types can bind inherited attributes to fixed values: + +```python +sb.add_vertex_type("document", attributes={"title": text(), "category": text()}) +sb.add_vertex_type("specification", parent="document", + attributes={"format": text()}, + bind={"category": "structural"}) +``` + +### Introspection + +```python +sb.describe_type("specification") +# {'name': 'specification', 'kind': 'vertex', 'parent': 'document', +# 'own_attributes': {'format': text()}, +# 'inherited_attributes': {'title': text(), 'category': text()}, +# 'all_attributes': {'title': text(), 'category': text(), 'format': text()}, +# 'bound': {'category': 'structural'}} + +sb.type_names(kind="vertex") # ['document', 'specification'] +``` + +## 2. Build a complex + +A `KnowledgeComplex` manages instances. Every write triggers SHACL verification — the graph is always in a valid state. + +```python +from knowledgecomplex import KnowledgeComplex + +kc = KnowledgeComplex(schema=sb) + +# Vertices have no boundary — always valid +kc.add_vertex("req-001", type="requirement", title="Boot time < 5s") +kc.add_vertex("tc-001", type="test_case", title="Boot smoke test") +kc.add_vertex("tc-002", type="test_case", title="Boot regression") + +# Edges need their boundary vertices to already exist (slice rule) +kc.add_edge("ver-001", type="verifies", + vertices={"req-001", "tc-001"}, status="passing") +kc.add_edge("ver-002", type="verifies", + vertices={"req-001", "tc-002"}, status="pending") +kc.add_edge("ver-003", type="verifies", + vertices={"tc-001", "tc-002"}, status="passing") + +# Faces need 3 boundary edges forming a closed triangle +kc.add_face("cov-001", type="coverage", + boundary=["ver-001", "ver-002", "ver-003"]) +``` + +### What gets enforced + +| Constraint | When | What happens | +|---|---|---| +| Type must be registered | Before RDF assertions | `ValidationError` | +| Boundary cardinality (2 for edges, 3 for faces) | Before SHACL | `ValueError` | +| Boundary elements must exist in complex (slice rule) | SHACL on write | `ValidationError` + rollback | +| Vocab values must be in allowed set | SHACL on write | `ValidationError` + rollback | +| Face boundary edges must form closed triangle | SHACL on write | `ValidationError` + rollback | + +### Element handles + +```python +elem = kc.element("req-001") +elem.id # "req-001" +elem.type # "requirement" +elem.attrs # {"title": "Boot time < 5s"} + +kc.element_ids(type="test_case") # ["tc-001", "tc-002"] +kc.elements(type="test_case") # [Element('tc-001', ...), Element('tc-002', ...)] +``` + +## 3. Topological queries + +Every query returns `set[str]` for natural set algebra. All accept an optional `type=` filter. + +```python +# Boundary operator ∂ +kc.boundary("ver-001") # {'req-001', 'tc-001'} (edge → vertices) +kc.boundary("cov-001") # {'ver-001', 'ver-002', 'ver-003'} (face → edges) +kc.boundary("req-001") # set() (vertex → empty) + +# Coboundary (inverse boundary) +kc.coboundary("req-001") # {'ver-001', 'ver-002'} (vertex → incident edges) + +# Star: all simplices containing σ as a face +kc.star("req-001") # req-001 + incident edges + incident faces + +# Closure: smallest subcomplex containing σ +kc.closure("cov-001") # cov-001 + 3 edges + 3 vertices + +# Link: Cl(St(σ)) \ St(σ) +kc.link("req-001") + +# Skeleton: elements up to dimension k +kc.skeleton(0) # vertices only +kc.skeleton(1) # vertices + edges + +# Degree +kc.degree("req-001") # 2 + +# Subcomplex check +kc.is_subcomplex({"req-001", "tc-001", "ver-001"}) # True +kc.is_subcomplex({"ver-001"}) # False (missing vertices) + +# Set algebra composes naturally +shared = kc.star("req-001") & kc.star("tc-001") +``` + +## 4. Local partitioning + +The topological queries above use combinatorial adjacency — boundary, star, and closure walk the simplicial structure directly. Local partitioning uses **diffusion** instead: spread probability from a seed and sweep the result to find a natural cluster boundary. This finds structure that combinatorial queries miss. + +Requires `pip install knowledgecomplex[analysis]`. + +### Graph partitioning (vertex clusters) + +Diffuse from a seed vertex using personalized PageRank or the heat kernel, then sweep the resulting distribution to find a cut with low conductance: + +```python +from knowledgecomplex.analysis import ( + approximate_pagerank, heat_kernel_pagerank, + sweep_cut, local_partition, +) + +# Approximate PageRank: push-based diffusion (Andersen-Chung-Lang) +p, r = approximate_pagerank(kc, seed="req-001", alpha=0.15) +# p is a sparse dict of vertex → probability; more mass near seed + +# Heat kernel PageRank: exponential diffusion (Fan Chung) +rho = heat_kernel_pagerank(kc, seed="req-001", t=5.0) +# t controls locality: small t = tight cluster, large t = broad spread + +# Sweep either distribution to find a low-conductance cut +cut = sweep_cut(kc, p) +cut.vertices # set of vertex IDs on the small side +cut.conductance # Cheeger ratio — lower means cleaner partition + +# Or use local_partition for the full pipeline in one call +cut = local_partition(kc, seed="req-001", method="pagerank") +cut = local_partition(kc, seed="req-001", method="heat_kernel") +``` + +### Edge partitioning (simplicial clusters) + +The simplicial version replaces the graph Laplacian with the **Hodge Laplacian** on edges. Instead of partitioning vertices, it partitions edges — finding clusters of relationships: + +```python +from knowledgecomplex.analysis import edge_local_partition + +# Hodge PageRank: (βI + L₁)⁻¹ χ_e — diffusion on the edge space +cut = edge_local_partition(kc, seed_edge="ver-001", method="hodge_pagerank") + +# Hodge heat kernel: e^{-tL₁} χ_e — exponential diffusion on edges +cut = edge_local_partition(kc, seed_edge="ver-001", method="hodge_heat", t=5.0) + +cut.edges # set of edge IDs in the cluster +cut.conductance # edge conductance +``` + +The key difference: graph partitioning asks "which vertices are near this vertex?" while edge partitioning asks "which relationships are near this relationship?" — a question that only makes sense in a simplicial complex, not in a plain graph. + +## 5. Algebraic topology + +Requires `pip install knowledgecomplex[analysis]`. + +```python +from knowledgecomplex.analysis import ( + boundary_matrices, betti_numbers, euler_characteristic, + hodge_laplacian, edge_pagerank, hodge_decomposition, hodge_analysis, +) + +# Boundary matrices (sparse) +bm = boundary_matrices(kc) +# bm.B1: (n_vertices × n_edges), bm.B2: (n_edges × n_faces) +# Invariant: B1 @ B2 = 0 (∂₁ ∘ ∂₂ = 0) + +# Betti numbers +betti = betti_numbers(kc) # [β₀, β₁, β₂] +chi = euler_characteristic(kc) # V - E + F = β₀ - β₁ + β₂ + +# Hodge Laplacian +L1 = hodge_laplacian(kc) # B1ᵀB1 + B2B2ᵀ (symmetric PSD) +# dim(ker L₁) = β₁ + +# Edge PageRank +pr = edge_pagerank(kc, "ver-001", beta=0.1) # (βI + L₁)⁻¹ χ_e + +# Hodge decomposition: flow = gradient + curl + harmonic +decomp = hodge_decomposition(kc, pr) +# decomp.gradient — im(B1ᵀ), vertex-driven flow +# decomp.curl — im(B2), face-driven circulation +# decomp.harmonic — ker(L₁), topological cycles + +# Full analysis in one call +results = hodge_analysis(kc, beta=0.1) +``` + +All analysis functions accept an optional `weights` dict mapping element IDs to scalar weights, which factor into the Laplacian as diagonal weight matrices. + +## 6. Filtrations + +A filtration is a nested sequence of valid subcomplexes: C₀ ⊆ C₁ ⊆ ... ⊆ Cₘ. + +```python +from knowledgecomplex import Filtration + +filt = Filtration(kc) +filt.append({"req-001"}) # must be valid subcomplex +filt.append_closure({"ver-001"}) # auto-closes + unions with previous +filt.append_closure({"cov-001"}) # adds face + all boundary + +filt.birth("cov-001") # index where element first appears +filt.new_at(2) # elements added at step 2 (Cₚ \ Cₚ₋₁) +filt[1] # set of element IDs at step 1 + +# Build from a scoring function +filt2 = Filtration.from_function(kc, lambda eid: some_score(eid)) +``` + +## 7. Clique inference + +Discover higher-order structure hiding in the edge graph: + +```python +from knowledgecomplex import find_cliques, infer_faces + +# Pure query — what triangles exist? +triangles = find_cliques(kc, k=3) + +# Fill in all triangles as typed faces +added = infer_faces(kc, "coverage") + +# Preview without modifying +preview = infer_faces(kc, "coverage", dry_run=True) +``` + +## 8. Export and load + +```python +# Export schema + instance to a directory +kc.export("output/my_complex") +# Creates: ontology.ttl, shapes.ttl, instance.ttl, queries/*.sparql + +# Reconstruct from exported files +kc2 = KnowledgeComplex.load("output/my_complex") +kc2.audit().conforms # True +``` + +Multi-format serialization: + +```python +from knowledgecomplex import save_graph, load_graph + +save_graph(kc, "data.jsonld", format="json-ld") +load_graph(kc, "data.ttl") # additive loading +``` + +## 9. Verification and audit + +```python +# Throwing verification +kc.verify() # raises ValidationError on failure + +# Non-throwing audit +report = kc.audit() +report.conforms # bool +report.violations # list[AuditViolation] +print(report) # human-readable summary + +# Deferred verification for bulk construction +with kc.deferred_verification(): + for item in big_dataset: + kc.add_vertex(item.id, type=item.type, **item.attrs) + # ... add edges, faces ... +# Single SHACL pass runs on exit + +# Static file verification (no Python objects needed) +from knowledgecomplex import audit_file +report = audit_file("data/instance.ttl", shapes="data/shapes.ttl", + ontology="data/ontology.ttl") +``` + +## 10. Pre-built ontologies + +Three ontologies ship with the package: + +```python +from knowledgecomplex.ontologies import operations, brand, research + +sb = operations.schema() # actor, activity, resource +sb = brand.schema() # audience, theme +sb = research.schema() # paper, concept, note +``` + +## Gotchas + +| Issue | Detail | +|---|---| +| **Slice rule** | Boundary elements must exist before the element that references them. Add vertices → edges → faces. | +| **Closed triangle** | A face's 3 edges must span exactly 3 vertices in a cycle. An open fan or 4-vertex path will fail. | +| **`remove_element`** | No post-removal verification. Remove faces before their edges, edges before their vertices. | +| **Schema after `load()`** | `load()` recovers type names, kinds, attributes, and parent relationships from OWL + SHACL. Full `describe_type()` introspection works after loading. | +| **Deferred verification** | Inside the context manager, intermediate states need not be valid. Verification runs once on exit. | +| **Face orientation** | Boundary matrix signs are computed internally to guarantee ∂₁∘∂₂ = 0. The orientation is consistent but not guaranteed to match external conventions. | diff --git a/examples/11_local_partition/local_partition.py b/examples/11_local_partition/local_partition.py new file mode 100644 index 0000000..3ed0263 --- /dev/null +++ b/examples/11_local_partition/local_partition.py @@ -0,0 +1,117 @@ +""" +local_partition.py — Find clusters using diffusion, not just adjacency. + +Builds a "barbell" complex: two triangles joined by a single bridge edge. +Uses graph partitioning (PageRank and heat kernel) to find the two clusters, +then uses edge partitioning (Hodge Laplacian) to cluster the relationships. + +The key insight: topological queries (boundary, star, closure) walk the +combinatorial structure directly. Partitioning uses *diffusion* — spreading +probability from a seed and sweeping the result to find where the flow +bottlenecks. This finds natural cluster boundaries that adjacency alone +can't identify. + +Run: + pip install knowledgecomplex[analysis] + python examples/11_local_partition/local_partition.py +""" + +from knowledgecomplex import SchemaBuilder, KnowledgeComplex +from knowledgecomplex.analysis import ( + approximate_pagerank, + heat_kernel_pagerank, + sweep_cut, + local_partition, + edge_local_partition, + betti_numbers, +) + +# ── Build a barbell: two triangles joined by a bridge ──────────────────── + +sb = SchemaBuilder(namespace="net") +sb.add_vertex_type("Node") +sb.add_edge_type("Link") +sb.add_face_type("Cell") + +kc = KnowledgeComplex(schema=sb) + +# Left triangle +kc.add_vertex("L1", type="Node") +kc.add_vertex("L2", type="Node") +kc.add_vertex("L3", type="Node") +kc.add_edge("eL12", type="Link", vertices={"L1", "L2"}) +kc.add_edge("eL23", type="Link", vertices={"L2", "L3"}) +kc.add_edge("eL13", type="Link", vertices={"L1", "L3"}) +kc.add_face("fL", type="Cell", boundary=["eL12", "eL23", "eL13"]) + +# Bridge +kc.add_vertex("B", type="Node") +kc.add_edge("bridge", type="Link", vertices={"L2", "B"}) + +# Right triangle +kc.add_vertex("R1", type="Node") +kc.add_vertex("R2", type="Node") +kc.add_edge("eR1B", type="Link", vertices={"B", "R1"}) +kc.add_edge("eR12", type="Link", vertices={"R1", "R2"}) +kc.add_edge("eRB2", type="Link", vertices={"B", "R2"}) +kc.add_face("fR", type="Cell", boundary=["eR1B", "eR12", "eRB2"]) + +print(f"Complex: {len(kc.element_ids())} elements") +print(f"Betti numbers: {betti_numbers(kc)}") +print() + +# ── Graph partitioning: vertex clusters via PageRank ───────────────────── + +print("=== Approximate PageRank from L1 ===") +p, r = approximate_pagerank(kc, seed="L1", alpha=0.15) +for v in sorted(p, key=lambda v: -p[v]): + print(f" {v:4s} {p[v]:.4f}") +print() + +cut = sweep_cut(kc, p) +print(f"Best sweep cut: {sorted(cut.vertices)}") +print(f" conductance: {cut.conductance:.4f}") +print() + +# ── Heat kernel: different diffusion profile ───────────────────────────── + +print("=== Heat Kernel PageRank from L1 (t=3) ===") +rho = heat_kernel_pagerank(kc, seed="L1", t=3.0) +for v in sorted(rho, key=lambda v: -rho[v]): + print(f" {v:4s} {rho[v]:.4f}") +print() + +cut_hk = sweep_cut(kc, rho) +print(f"Heat kernel cut: {sorted(cut_hk.vertices)}") +print(f" conductance: {cut_hk.conductance:.4f}") +print() + +# ── local_partition: one-call convenience ──────────────────────────────── + +print("=== local_partition (one call) ===") +cut_pr = local_partition(kc, seed="L1", method="pagerank") +cut_hk2 = local_partition(kc, seed="L1", method="heat_kernel") +print(f"PageRank method: {sorted(cut_pr.vertices)}, conductance={cut_pr.conductance:.4f}") +print(f"Heat kernel method: {sorted(cut_hk2.vertices)}, conductance={cut_hk2.conductance:.4f}") +print() + +# ── Edge partitioning: relationship clusters via Hodge Laplacian ───────── + +print("=== Edge local partition (Hodge) ===") +print("Starting from edge eL12 (inside left triangle):") +edge_cut_pr = edge_local_partition(kc, seed_edge="eL12", method="hodge_pagerank") +print(f" Hodge PageRank: {sorted(edge_cut_pr.edges)}, conductance={edge_cut_pr.conductance:.4f}") + +edge_cut_hk = edge_local_partition(kc, seed_edge="eL12", method="hodge_heat", t=3.0) +print(f" Hodge heat: {sorted(edge_cut_hk.edges)}, conductance={edge_cut_hk.conductance:.4f}") +print() + +# ── Compare: combinatorial vs diffusion ────────────────────────────────── + +print("=== Combinatorial star vs diffusion partition ===") +star_L1 = kc.star("L1") +print(f"Star(L1): {sorted(star_L1)}") +print(f" → includes everything L1 touches, regardless of cluster structure") +print() +print(f"Partition(L1): {sorted(cut_pr.vertices)}") +print(f" → finds the natural cluster boundary via diffusion bottleneck") diff --git a/examples/README.md b/examples/README.md index 38bb1a5..ccbd08d 100644 --- a/examples/README.md +++ b/examples/README.md @@ -15,6 +15,7 @@ Generated outputs (PNGs, temp files) go into an `output/` subdirectory. | 08 | `08_temporal_sweep/` | Non-filtration time slicing: elements with active_from/active_until, parameterized queries | | 09 | `09_diff_sequence/` | ComplexDiff and ComplexSequence: sprint-by-sprint evolution with SPARQL UPDATE export/import | | 10 | `10_markdown_codec/` | MarkdownCodec: round-trip KC elements to YAML+markdown files, verify consistency | +| 11 | `11_local_partition/` | Local partitioning via diffusion: PageRank, heat kernel, Hodge edge partition on a barbell graph | ## Running diff --git a/knowledgecomplex/schema.py b/knowledgecomplex/schema.py index 871d5a4..7b4d046 100644 --- a/knowledgecomplex/schema.py +++ b/knowledgecomplex/schema.py @@ -1092,51 +1092,151 @@ def load(cls, path: str | Path) -> "SchemaBuilder": # Discover model namespace: find a namespace binding that is not # one of the well-known prefixes (kc, kcs, sh, owl, rdfs, rdf, xsd) - well_known = { - str(_KC), str(_KCS), str(_SH), - str(OWL), str(RDFS), str(RDF), str(XSD), - } + # Discover model namespace: find types that are subclasses of + # kc:Vertex, kc:Edge, or kc:Face, then resolve their namespace. + kc_bases = {_KC.Vertex, _KC.Edge, _KC.Face} + candidate_iris: set[URIRef] = set() + for kc_class in kc_bases: + for subj in owl_graph.subjects(RDFS.subClassOf, kc_class): + if subj not in kc_bases and subj != _KC.Element: + candidate_iris.add(subj) + + # Also find types that subclass the candidates (inheritance) + changed = True + while changed: + changed = False + for parent in list(candidate_iris): + for child in owl_graph.subjects(RDFS.subClassOf, parent): + if child not in candidate_iris and child not in kc_bases: + candidate_iris.add(child) + changed = True + + # Resolve namespace from the first candidate IRI namespace = None ns_obj = None - for prefix, uri in owl_graph.namespaces(): - uri_str = str(uri) - if prefix and uri_str not in well_known and uri_str.startswith("https://example.org/"): - # Skip shape namespaces (ending with /shape#) - if "/shape#" in uri_str: - continue - namespace = prefix - ns_obj = Namespace(uri_str) - break + if candidate_iris: + sample_iri = str(next(iter(candidate_iris))) + # IRI is like https://example.org/test#document → namespace is "test" + for prefix, uri in owl_graph.namespaces(): + uri_str = str(uri) + if sample_iri.startswith(uri_str) and prefix: + namespace = prefix + ns_obj = Namespace(uri_str) + break if namespace is None: raise ValueError( f"Could not detect model namespace in {p / 'ontology.ttl'}. " - "Expected a namespace binding like 'aaa: '." + "No user-defined types (subclasses of kc:Vertex/Edge/Face) found." ) + # Derive shape namespace from model namespace + # e.g. https://example.org/ex# → https://example.org/ex/shape# + base_iri = str(ns_obj) + shape_base = base_iri.rstrip("#") + "/shape#" + # Build instance without calling __init__ sb = object.__new__(cls) sb._namespace = namespace - sb._base_iri = str(ns_obj) + sb._base_iri = base_iri sb._ns = ns_obj - sb._nss = Namespace(f"https://example.org/{namespace}/shape#") + sb._nss = Namespace(shape_base) sb._owl_graph = owl_graph sb._shacl_graph = shacl_graph sb._attr_domains = {} sb._queries = {} # Reconstruct _types registry from OWL subclass triples + # Two passes: first find direct subclasses of kc:Vertex/Edge/Face, + # then find user types that subclass other user types (inheritance). sb._types = {} kind_map = { _KC.Vertex: "vertex", _KC.Edge: "edge", _KC.Face: "face", } + kc_base_iris = set(kind_map.keys()) | {_KC.Element, _KC.Complex} + user_type_iris: dict[str, URIRef] = {} # local_name → IRI + + # Pass 1: direct subclasses of kc base types for kc_class, kind in kind_map.items(): for type_iri in owl_graph.subjects(RDFS.subClassOf, kc_class): - # Extract local name from IRI + if type_iri in kc_base_iris: + continue local_name = str(type_iri).replace(sb._base_iri, "") - if local_name: - sb._types[local_name] = {"kind": kind} + if local_name and not local_name.startswith("http"): + sb._types[local_name] = { + "kind": kind, + "attributes": {}, + "parent": None, + "bind": {}, + } + user_type_iris[local_name] = type_iri + + # Pass 2: find user types that subclass other user types (inheritance) + # Keep discovering until no new types are found + iri_to_name = {v: k for k, v in user_type_iris.items()} + changed = True + while changed: + changed = False + for parent_iri, parent_name in list(iri_to_name.items()): + for child_iri in owl_graph.subjects(RDFS.subClassOf, parent_iri): + if child_iri in kc_base_iris: + continue + child_name = str(child_iri).replace(sb._base_iri, "") + if child_name and not child_name.startswith("http") and child_name not in sb._types: + sb._types[child_name] = { + "kind": sb._types[parent_name]["kind"], + "attributes": {}, + "parent": parent_name, + "bind": {}, + } + user_type_iris[child_name] = child_iri + iri_to_name[child_iri] = child_name + changed = True + + # Recover attributes from SHACL property shapes + for name, type_iri in user_type_iris.items(): + shape_iri = sb._nss[f"{name}Shape"] + attrs = {} + bind = {} + + for _, _, prop_node in shacl_graph.triples((shape_iri, _SH.property, None)): + path = shacl_graph.value(prop_node, _SH.path) + if path is None: + continue + path_str = str(path) + if not path_str.startswith(sb._base_iri): + continue + attr_name = path_str[len(sb._base_iri):] + + # Check for sh:hasValue → bind + has_value = shacl_graph.value(prop_node, _SH.hasValue) + if has_value is not None: + bind[attr_name] = str(has_value) + continue + + # Check for sh:in → VocabDescriptor + in_list = shacl_graph.value(prop_node, _SH["in"]) + if in_list is not None: + values = list(Collection(shacl_graph, in_list)) + vocab_values = tuple(str(v) for v in values) + max_count = shacl_graph.value(prop_node, _SH.maxCount) + multiple = max_count is None + attrs[attr_name] = VocabDescriptor( + values=vocab_values, multiple=multiple + ) + else: + # TextDescriptor + min_count = shacl_graph.value(prop_node, _SH.minCount) + max_count = shacl_graph.value(prop_node, _SH.maxCount) + required = min_count is not None and int(min_count) >= 1 + multiple = max_count is None + attrs[attr_name] = TextDescriptor( + required=required, multiple=multiple + ) + + sb._types[name]["attributes"] = attrs + sb._types[name]["bind"] = bind return sb diff --git a/mkdocs.yml b/mkdocs.yml index 95b7ebe..a92b11e 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -21,6 +21,7 @@ plugins: nav: - Home: index.md + - Tutorial: tutorial.md - Ontology: ontology.md - API Reference: - Schema: api/schema.md diff --git a/site/404.html b/site/404.html index f2b16b3..a879d28 100644 --- a/site/404.html +++ b/site/404.html @@ -239,6 +239,33 @@ +
  • + + + + + + + + Tutorial + + + + + + + + +
  • + + + + + + + + +
  • @@ -280,10 +307,10 @@ - + -