From 4432cb97bc8084abfb47b5cfe189361eeef9a269 Mon Sep 17 00:00:00 2001 From: bplatz Date: Sat, 9 May 2026 08:03:01 -0400 Subject: [PATCH 1/9] feat: Add --remote support to fluree drop Wires the existing POST /drop endpoint into the CLI's drop command so remote/auto-routed drops work the same way as list, reindex, and the other admin operations. The server-side endpoint already handled the ledger -> graph-source fallback; this just exposes it through the CLI. --- docs/cli/server-integration.md | 13 ++++- fluree-db-cli/src/cli.rs | 4 ++ fluree-db-cli/src/commands/drop.rs | 76 +++++++++++++++++++++++++++++- fluree-db-cli/src/lib.rs | 8 +++- 4 files changed, 97 insertions(+), 4 deletions(-) diff --git a/docs/cli/server-integration.md b/docs/cli/server-integration.md index 6470dcd0e..790a028c6 100644 --- a/docs/cli/server-integration.md +++ b/docs/cli/server-integration.md @@ -94,6 +94,16 @@ Required endpoints: The `--remote-name` flag allows publishing under a different name on the remote (e.g., `fluree publish origin mydb --remote-name production-db`). +### `fluree drop --remote ` (admin-protected) + +- `POST {api_base_url}/drop` with `{"ledger": "", "hard": true}` + +Drops a ledger or graph source on the remote server. The CLI sends `hard: true` (no soft-drop surface today). The server resolves `name` as a ledger first, then as a graph source — see the [`fluree drop` graph source fallback](#fluree-drop-name-graph-source-fallback) section below for the resolution order and response shape. + +When `--remote` is omitted, the CLI auto-routes through a locally running `fluree server start` if `server.meta.json` is present and the PID is alive, falling back to direct local execution otherwise. Pass `--direct` to skip auto-routing. The `--force` flag is required in all modes to confirm deletion. + +`--remote` does not affect local state: dropping a ledger remotely never touches the local active-ledger pointer or local storage. + ### `fluree create --from .flpack` (native ledger import) - No server endpoint required (local-only operation) @@ -1283,5 +1293,6 @@ fluree iceberg map my-gs \ fluree list # should show mydb (Ledger) + my-gs (Iceberg) fluree info my-gs # should show Iceberg config + R2RML mapping fluree show t:1 --remote origin # should show decoded commit with resolved IRIs -fluree drop my-gs --force # should drop the graph source +fluree drop my-gs --force # should drop the graph source locally +fluree drop local-db --remote origin --force # should drop the published ledger on the remote ``` diff --git a/fluree-db-cli/src/cli.rs b/fluree-db-cli/src/cli.rs index d20535460..bd2bbb4f3 100644 --- a/fluree-db-cli/src/cli.rs +++ b/fluree-db-cli/src/cli.rs @@ -293,6 +293,10 @@ pub enum Commands { /// Required flag to confirm deletion #[arg(long)] force: bool, + + /// Execute against a remote server (by remote name, e.g., "origin") + #[arg(long)] + remote: Option, }, /// Insert data into a ledger diff --git a/fluree-db-cli/src/commands/drop.rs b/fluree-db-cli/src/commands/drop.rs index 5ee43bcd1..45f706ae7 100644 --- a/fluree-db-cli/src/commands/drop.rs +++ b/fluree-db-cli/src/commands/drop.rs @@ -1,16 +1,90 @@ use crate::config; use crate::context; use crate::error::{CliError, CliResult}; +use crate::remote_client::RemoteLedgerClient; use fluree_db_api::admin::DropStatus; use fluree_db_api::server_defaults::FlureeDir; -pub async fn run(name: &str, force: bool, dirs: &FlureeDir) -> CliResult<()> { +pub async fn run( + name: &str, + force: bool, + dirs: &FlureeDir, + remote_flag: Option<&str>, + direct: bool, +) -> CliResult<()> { if !force { return Err(CliError::Usage(format!( "use --force to confirm deletion of '{name}'" ))); } + if let Some(remote_name) = remote_flag { + let client = context::build_remote_client(remote_name, dirs).await?; + let result = run_remote(name, &client).await; + context::persist_refreshed_tokens(&client, remote_name, dirs).await; + return result; + } + + if !direct { + if let Some(client) = context::try_server_route_client(dirs) { + let result = run_remote(name, &client).await; + context::persist_refreshed_tokens(&client, context::LOCAL_SERVER_REMOTE, dirs).await; + return result; + } + } + + run_local(name, dirs).await +} + +async fn run_remote(name: &str, client: &RemoteLedgerClient) -> CliResult<()> { + let response = client + .drop_resource(name, true) + .await + .map_err(|e| CliError::Remote(format!("failed to drop '{name}': {e}")))?; + + let status = response + .get("status") + .and_then(|v| v.as_str()) + .ok_or_else(|| CliError::Remote("unexpected drop response: missing status".into()))?; + let ledger_id = response + .get("ledger_id") + .and_then(|v| v.as_str()) + .unwrap_or(name); + + match status { + "dropped" => { + if let Some(files) = response + .get("files_deleted") + .and_then(serde_json::Value::as_u64) + { + if files > 0 { + println!("Dropped '{ledger_id}' (deleted {files} artifacts)"); + } else { + println!("Dropped '{ledger_id}'"); + } + } else { + println!("Dropped '{ledger_id}'"); + } + } + "already_retracted" => println!("'{ledger_id}' was already dropped"), + "not_found" => return Err(CliError::NotFound(format!("'{name}' not found"))), + other => { + return Err(CliError::Remote(format!( + "unexpected drop status '{other}'" + ))) + } + } + + if let Some(warnings) = response.get("warnings").and_then(|v| v.as_array()) { + for warning in warnings.iter().filter_map(|v| v.as_str()) { + eprintln!(" warning: {warning}"); + } + } + + Ok(()) +} + +async fn run_local(name: &str, dirs: &FlureeDir) -> CliResult<()> { let fluree = context::build_fluree(dirs)?; // Try dropping as a ledger first diff --git a/fluree-db-cli/src/lib.rs b/fluree-db-cli/src/lib.rs index 6bc0d7791..daa539430 100644 --- a/fluree-db-cli/src/lib.rs +++ b/fluree-db-cli/src/lib.rs @@ -128,9 +128,13 @@ pub async fn run(cli: Cli) -> error::CliResult<()> { commands::branch::run(action, &fluree_dir, direct).await } - Commands::Drop { name, force } => { + Commands::Drop { + name, + force, + remote, + } => { let fluree_dir = config::require_fluree_dir(config_path)?; - commands::drop::run(&name, force, &fluree_dir).await + commands::drop::run(&name, force, &fluree_dir, remote.as_deref(), direct).await } Commands::Insert { From 2ac2ebc9b19049db89b47e3e8c7d3b9003a7b988 Mon Sep 17 00:00:00 2001 From: bplatz Date: Sat, 9 May 2026 10:03:59 -0400 Subject: [PATCH 2/9] feat: Add log + RDF export remote support, fix drop auto-route MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds `fluree log --remote` and `fluree export --remote` so users can browse a remote's commit history and export RDF directly without cloning. Both follow the same three-mode dispatch (explicit remote, auto-route via local server, local execution) used by list/reindex/ iceberg drop. New endpoints: - GET /v1/fluree/log/*ledger — paginated CommitSummary list, read-auth. - POST /v1/fluree/export/*ledger — RDF export (Turtle/NT/NQuads/TriG/ JSON-LD), admin-protected. Export bypasses per-flake policy filtering today, so it lives alongside /create, /drop, /reindex rather than the data-read bracket of /query and /show. Fix drop auto-route: when `fluree drop` ran via auto-route to a local server, the active-ledger pointer was not cleared, leaving CLI state pointing at a deleted ledger. Explicit `--remote ` still leaves local state untouched (remote storage is separate). Docs: contracts in server-integration.md plus full endpoint entries in api/endpoints.md. --- docs/api/endpoints.md | 112 ++++++++++++++ docs/cli/server-integration.md | 201 ++++++++++++++++++++++++++ fluree-db-api/src/lib.rs | 37 +++++ fluree-db-cli/src/cli.rs | 8 + fluree-db-cli/src/commands/drop.rs | 9 ++ fluree-db-cli/src/commands/export.rs | 124 ++++++++++++++-- fluree-db-cli/src/commands/log.rs | 107 +++++++++++++- fluree-db-cli/src/lib.rs | 14 +- fluree-db-cli/src/remote_client.rs | 63 ++++++++ fluree-db-server/src/routes/export.rs | 167 +++++++++++++++++++++ fluree-db-server/src/routes/log.rs | 134 +++++++++++++++++ fluree-db-server/src/routes/mod.rs | 9 +- 12 files changed, 970 insertions(+), 15 deletions(-) create mode 100644 fluree-db-server/src/routes/export.rs create mode 100644 fluree-db-server/src/routes/log.rs diff --git a/docs/api/endpoints.md b/docs/api/endpoints.md index 5dc154fee..0ff654e0c 100644 --- a/docs/api/endpoints.md +++ b/docs/api/endpoints.md @@ -382,6 +382,60 @@ Each flake is a tuple: `[subject, predicate, object, datatype, operation]`. Oper **Peer mode:** Forwards to the transactor. +### GET /log/*ledger + +Return a paginated list of lightweight commit summaries (newest-first by `t`). Server-side equivalent of `fluree log`. Read-auth — does **not** require storage-replication permissions, unlike `/commits`. + +**URL:** + +``` +GET /log/?limit= +``` + +**Query Parameters:** + +- `limit` (optional, default `100`): Number of summaries to return. Server clamps to a hard maximum (reference: `5000`). + +**Request Headers:** + +```http +Authorization: Bearer (when data auth is enabled) +``` + +**Response Body (200 OK):** + +```json +{ + "ledger_id": "mydb:main", + "commits": [ + { + "t": 12, + "commit_id": "bafy...", + "time": "2026-04-25T12:00:00Z", + "asserts": 3, + "retracts": 0, + "flake_count": 3, + "message": null + } + ], + "count": 12, + "truncated": false +} +``` + +`commits` is strictly newest-first by `t` and capped by `limit`. `count` is the full chain length; `truncated == count > commits.len()`. `message` is extracted from `txn_meta` when an `f:message` entry with a string value is present, otherwise `null`. Each summary mirrors `fluree_db_core::CommitSummary`. + +**Branch-aware walk:** The walk loads commit envelopes via a branch-aware content store so it can cross fork points — pre-fork commits live under the source branch's namespace. + +**Responses:** + +- `200 OK`: Summaries returned (possibly empty array when the ledger has no commits) +- `401 Unauthorized`: Bearer token required but missing +- `404 Not Found`: Ledger does not exist; or the bearer cannot `can_read` +- `5xx`: Storage / nameservice errors during walk + +**Peer mode:** Forwards to the transactor. + ### GET /commits/*ledger Export commit blobs from a ledger using stable cursors. Pages walk backward via each commit's `parents` — O(limit) per page regardless of ledger size. Used by `fluree pull` and `fluree clone`. @@ -2233,6 +2287,64 @@ curl -X POST http://localhost:8090/v1/fluree/reindex \ When triggering indexing through the Rust API instead, see `Fluree::reindex` and `ReindexOptions`. For background incremental indexing (which runs automatically as commits are made), see [Background indexing](../indexing-and-search/background-indexing.md). +### POST /export/*ledger + +Return ledger data as RDF in the requested format (Turtle, N-Triples, N-Quads, TriG, or JSON-LD). Server-side equivalent of `fluree export`. + +**Auth bracket: admin-protected** — same middleware as `/create`, `/drop`, `/reindex`, and the branch admin endpoints. Today's implementation reads from the binary index without per-flake policy filtering, so it does not live in the data-read bracket alongside `/query` and `/show`. Adding policy-filtered streaming export would let it move to read-auth in the future. + +**URL:** + +``` +POST /export/ +``` + +**Request Body:** + +```json +{ + "format": "turtle", + "all_graphs": false, + "graph": "http://example.org/people", + "context": { "ex": "http://example.org/" }, + "at": "t:42" +} +``` + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `format` | string | No | `"turtle"` | One of `turtle`/`ttl`, `ntriples`/`nt`, `nquads`/`n-quads`, `trig`, `jsonld`/`json-ld`/`json`. Case-insensitive. | +| `all_graphs` | bool | No | `false` | Export every named graph as a dataset. Requires `format` ∈ `trig` / `nquads`. Mutually exclusive with `graph`. | +| `graph` | string | No | — | IRI of a single named graph to export. Mutually exclusive with `all_graphs`. | +| `context` | object | No | ledger default | Prefix map for Turtle/TriG/JSON-LD output. Either a bare object or `{"@context": {…}}`. | +| `at` | string | No | latest | Time spec — integer (`"42"`), ISO-8601 datetime, or commit CID prefix. | + +An empty body is treated as all-default (Turtle export at HEAD). + +**Response Headers:** + +| Format | Content-Type | +|--------|--------------| +| Turtle | `text/turtle; charset=utf-8` | +| N-Triples | `application/n-triples; charset=utf-8` | +| N-Quads | `application/n-quads; charset=utf-8` | +| TriG | `application/trig; charset=utf-8` | +| JSON-LD | `application/ld+json; charset=utf-8` | + +**Response Body (200 OK):** + +The raw RDF for the requested format. The reference server today buffers the full export in memory before responding; implementations are free to stream chunked bodies, and clients MUST be prepared to read until EOF. + +**Status Codes:** + +- `200 OK` — export complete +- `400 Bad Request` — unknown format; conflicting `all_graphs` + `graph`; `all_graphs` with non-dataset format; unknown graph IRI; malformed JSON; ledger not indexed (`ApiError::Config`) +- `401` / `403` — admin token required and absent/invalid +- `404 Not Found` — ledger does not exist +- `5xx` — storage / nameservice / encoding errors + +**Peer mode:** Forwards to the transactor. + ## Admin Authentication Administrative endpoints (`/create`, `/drop`, `/reindex`, branch operations, and Iceberg mapping when enabled) can be protected with Bearer token authentication. diff --git a/docs/cli/server-integration.md b/docs/cli/server-integration.md index 790a028c6..e79790f05 100644 --- a/docs/cli/server-integration.md +++ b/docs/cli/server-integration.md @@ -73,6 +73,22 @@ The `commit` query parameter accepts the same identifiers as the local `fluree s - `404 Not Found` — ledger or commit not found - `501 Not Implemented` — proxy storage mode (no local index available for decoding) +### `fluree log --remote` + +- `GET {api_base_url}/log/*ledger?limit=` + +Returns lightweight per-commit summaries newest-first by `t`. Read-auth (same bracket as `/show`) — does **not** require storage-replication permissions, unlike `/commits`. See [Commit Log Contract](#commit-log-contract) for the response shape and required server semantics. + +When `--remote` is omitted, the CLI auto-routes through a locally running `fluree server start` if one is detected; pass `--direct` to skip auto-routing and use the local commit-chain walker. + +### `fluree export --remote` (admin-protected) + +- `POST {api_base_url}/export/*ledger` + +Returns ledger data as RDF in the requested format (Turtle, N-Triples, N-Quads, TriG, or JSON-LD). **Admin-protected** — same bracket as `/create`, `/drop`, `/reindex`. RDF export today reads from the binary index without per-flake policy filtering, which is why it does not live in the data-read bracket alongside `/query` and `/show`. See [RDF Export Contract](#rdf-export-contract) for the request body fields and content-type mapping. + +When `--remote` is omitted, the CLI auto-routes through a locally running server when one is detected; pass `--direct` to bypass routing and use the local binary index. Tracked ledgers (no local data) require `--remote`. + ### `fluree publish [ledger]` (create + push) Creates a ledger on the remote and pushes all local commits in a single operation. @@ -893,6 +909,189 @@ These endpoints exist so a client can fetch bytes by CID without knowing storage `/storage/block` is only required for query peers that need server-mediated index-leaf access. +## Commit Log Contract + +`fluree log --remote` issues a single read-only request: + +``` +GET {api_base_url}/log/{ledger}?limit={n} +``` + +| Parameter | Type | Required | Server default | Description | +|-----------|------|----------|----------------|-------------| +| `ledger` (path) | string | Yes | — | Ledger ID, including branch suffix (`org/mydb` and `org/mydb:main` both work via the greedy `*ledger` capture) | +| `limit` | integer | No | `100` | Number of summaries to return (newest-first by `t`). Server clamps to a hard maximum (reference: `5000`). | + +### Auth + +Read-only. Requires a Bearer token when `data_auth.mode == required`; gates on +`can_read(ledger)`; returns `404` (not `403`) when the bearer cannot read the +ledger so it doesn't leak existence. Admin tokens are NOT required. + +### Response (`200 OK`) + +```jsonc +{ + "ledger_id": "mydb:main", + "commits": [ + { + "t": 12, + "commit_id": "bafy...", + "time": "2026-04-25T12:00:00Z", + "asserts": 3, + "retracts": 0, + "flake_count": 3, + "message": null + } + // ... newest-first by t + ], + "count": 12, + "truncated": false +} +``` + +| Field | Type | Notes | +|-------|------|-------| +| `ledger_id` | string | Ledger ID echoed from the request path. | +| `commits` | array | Per-commit summaries, **strictly newest-first by `t`**, capped at the resolved limit. | +| `count` | integer | Total commits in the chain (uncapped). `truncated == count > commits.len()`. | +| `truncated` | bool | `true` when the chain is longer than the returned page. | + +Each `commits[i]` mirrors `fluree_db_core::CommitSummary`: + +| Field | Type | Notes | +|-------|------|-------| +| `t` | integer | Transaction number. | +| `commit_id` | string | Content ID (CID) of the commit blob. | +| `time` | string \| null | ISO-8601 commit time, or `null` for legacy commits without a timestamp. | +| `asserts` | integer | Asserted flakes in this commit. | +| `retracts` | integer | Retracted flakes. | +| `flake_count` | integer | Total flakes (`asserts + retracts`). | +| `message` | string \| null | Extracted from `txn_meta` when an `f:message` entry with a string value is present. Returns `null` otherwise. | + +### Required semantics + +1. **Branch-aware walk.** The walk **must** load commit envelopes via a + branch-aware content store (the reference server uses + `branched_content_store_for_record`). Pre-fork commits live under the + source branch's namespace, so a flat per-branch store cannot reach them + and the response would be incomplete. +2. **Newest-first ordering.** `commits` is sorted strictly descending by + `t`. The CLI prints in this order without re-sorting. +3. **Empty ledger.** When the ledger exists but has no commits, return + `200 OK` with `commits: []` and `count: 0`. +4. **Hard cap.** Servers MUST enforce a hard maximum independent of the + client's `limit` (reference: `5000`). The CLI assumes the server caps + the response, and unbounded responses must not be reachable. + +### Error responses + +| Status | When | +|--------|------| +| `401` | Bearer required and absent/invalid. | +| `404` | Ledger does not exist; or the bearer cannot `can_read`. | +| `5xx` | Storage / nameservice errors during walk. | + +### Reference implementation + +| Concern | Canonical location | +|---------|-------------------| +| HTTP route + auth | `fluree-db-server/src/routes/log.rs::log_ledger_tail` | +| Underlying API | `fluree_db_api::Fluree::commit_log` | +| Walk + summary | `fluree_db_core::commit::walk_commit_summaries` | + +## RDF Export Contract + +`fluree export --remote` issues: + +``` +POST {api_base_url}/export/{ledger} +Content-Type: application/json + +{ + "format": "turtle", + "all_graphs": false, + "graph": "http://example.org/people", + "context": { "ex": "http://example.org/" }, + "at": "t:42" +} +``` + +| Field | Type | Required | Server default | Description | +|-------|------|----------|----------------|-------------| +| `format` | string | No | `"turtle"` | One of: `turtle`/`ttl`, `ntriples`/`nt`, `nquads`/`n-quads`, `trig`, `jsonld`/`json-ld`/`json`. Case-insensitive. | +| `all_graphs` | bool | No | `false` | Export every named graph as a dataset. Requires `format` ∈ `trig` / `nquads`. Mutually exclusive with `graph`. | +| `graph` | string | No | — | IRI of a single named graph to export. Mutually exclusive with `all_graphs`. | +| `context` | object | No | ledger default | Prefix map for Turtle/TriG/JSON-LD output. Either a bare object (`{ "ex": "..." }`) or `{ "@context": {...} }`. Falls back to the ledger's stored default context when absent. | +| `at` | string | No | latest | Time spec — integer (`"42"`), ISO-8601 datetime (`"2026-01-15T10:30:00Z"`), or commit CID prefix (`"bafy…"`). Identical to the local `--at` flag. | + +An empty body is accepted and treated as all-default (Turtle export at HEAD). + +### Auth + +**Admin-protected.** Same middleware as `/create`, `/drop`, `/reindex`, +and the branch admin endpoints — registered through +`v1_admin_protected_routes` in `fluree-db-server/src/routes/mod.rs`. + +Export today does **not** apply per-flake policy filtering: it reads +straight from the binary index. Putting it in the data-read bracket +alongside `/query` and `/show` would be a bulk policy bypass for any +bearer with `can_read(ledger)`. Adding policy-filtered streaming export +would let it move to read-auth in the future. + +### Response (`200 OK`) + +The body is the raw RDF for the requested format. `Content-Type` reflects +the chosen format: + +| Format | Content-Type | +|--------|--------------| +| Turtle | `text/turtle; charset=utf-8` | +| N-Triples | `application/n-triples; charset=utf-8` | +| N-Quads | `application/n-quads; charset=utf-8` | +| TriG | `application/trig; charset=utf-8` | +| JSON-LD | `application/ld+json; charset=utf-8` | + +The reference server today buffers the full export in memory before responding +(simple, sufficient for moderate-size ledgers). Implementations are free to +stream chunked bodies; clients MUST be prepared to read until EOF. + +### Required semantics + +1. **Format validation.** Reject unknown format strings with `400`. +2. **Dataset/format coupling.** When `all_graphs == true`, `format` must be + `trig` or `nquads`; otherwise return `400` with a message that mentions + the dataset format requirement (the local CLI surfaces the same error). +3. **Time spec parsing.** Same rules as the merge-preview / show + contracts: parse as integer first (`t`), then as ISO-8601 if it + contains both `-` and `:`, else as a commit CID prefix. +4. **Graph IRI resolution.** When `graph` is set, resolve via the ledger's + graph registry; an unknown IRI is a `400` (or `5xx` if you treat it as + a config error — the reference returns `400` via `ApiError::Config`). +5. **Index requirement.** Export reads from the binary index. If the + ledger has no index, the reference server surfaces `ApiError::Config` + ("no binary index available for export (is the ledger indexed?)"), + which the error mapper returns as `400 Bad Request`. Document that + shape if you implement equivalently — the CLI surfaces the message + verbatim. + +### Error responses + +| Status | When | +|--------|------| +| `400` | Unknown format; conflicting `all_graphs` + `graph`; `all_graphs` with non-dataset format; unknown graph IRI; malformed JSON; ledger not indexed. | +| `401` / `403` | Admin token required and absent/invalid. | +| `404` | Ledger does not exist. | +| `5xx` | Storage / nameservice / encoding errors during walk. | + +### Reference implementation + +| Concern | Canonical location | +|---------|-------------------| +| HTTP route + auth | `fluree-db-server/src/routes/export.rs::export_ledger_tail` | +| Builder | `fluree_db_api::export_builder::ExportBuilder` | +| Format encoders | `fluree_db_api::export` | + ## `/create` Contract - Endpoint: `POST {api_base_url}/create` @@ -1293,6 +1492,8 @@ fluree iceberg map my-gs \ fluree list # should show mydb (Ledger) + my-gs (Iceberg) fluree info my-gs # should show Iceberg config + R2RML mapping fluree show t:1 --remote origin # should show decoded commit with resolved IRIs +fluree log mydb --remote origin --oneline # should print the remote's commit chain newest-first +fluree export mydb --remote origin --format turtle > mydb-remote.ttl # should write Turtle to disk fluree drop my-gs --force # should drop the graph source locally fluree drop local-db --remote origin --force # should drop the published ledger on the remote ``` diff --git a/fluree-db-api/src/lib.rs b/fluree-db-api/src/lib.rs index 0e5c0713b..e8f54f88c 100644 --- a/fluree-db-api/src/lib.rs +++ b/fluree-db-api/src/lib.rs @@ -3244,6 +3244,43 @@ impl Fluree { export_builder::ExportBuilder::new(self, ledger_id.to_string()) } + /// Walk the commit chain for a ledger and return per-commit summaries. + /// + /// `limit` caps the number of returned summaries (newest-first by `t`). + /// The returned `total` reflects the full chain length regardless of cap; + /// truncation is implied by `summaries.len() < total`. + /// + /// Uses a branch-aware content store so the walk crosses fork points — + /// pre-fork commits live under the source branch's namespace, not the + /// current branch's. + pub async fn commit_log( + &self, + ledger_id: &str, + limit: Option, + ) -> Result<(Vec, usize)> { + let record = self + .nameservice() + .lookup(ledger_id) + .await? + .ok_or_else(|| ApiError::NotFound(ledger_id.to_string()))?; + + let head = match record.commit_head_id.as_ref() { + Some(id) => id.clone(), + None => return Ok((Vec::new(), 0)), + }; + + let store = fluree_db_nameservice::branched_content_store_for_record( + self.backend(), + self.nameservice(), + &record, + ) + .await?; + + let (summaries, total) = + fluree_db_core::walk_commit_summaries(&store, &head, 0, limit).await?; + Ok((summaries, total)) + } + /// Get the default JSON-LD context for a ledger. /// /// Reads the context CID from nameservice config and fetches the blob diff --git a/fluree-db-cli/src/cli.rs b/fluree-db-cli/src/cli.rs index bd2bbb4f3..88d06f2c1 100644 --- a/fluree-db-cli/src/cli.rs +++ b/fluree-db-cli/src/cli.rs @@ -543,6 +543,10 @@ pub enum Commands { /// Query at a specific point in time #[arg(long)] at: Option, + + /// Execute against a remote server (by remote name, e.g., "origin") + #[arg(long)] + remote: Option, }, /// Show commit log for a ledger @@ -557,6 +561,10 @@ pub enum Commands { /// Maximum number of commits to show #[arg(short = 'n', long)] count: Option, + + /// Execute against a remote server (by remote name, e.g., "origin") + #[arg(long)] + remote: Option, }, /// Show the contents of a commit (decoded flakes with resolved IRIs) diff --git a/fluree-db-cli/src/commands/drop.rs b/fluree-db-cli/src/commands/drop.rs index 45f706ae7..e82a3d6da 100644 --- a/fluree-db-cli/src/commands/drop.rs +++ b/fluree-db-cli/src/commands/drop.rs @@ -29,6 +29,15 @@ pub async fn run( if let Some(client) = context::try_server_route_client(dirs) { let result = run_remote(name, &client).await; context::persist_refreshed_tokens(&client, context::LOCAL_SERVER_REMOTE, dirs).await; + // Auto-route operates against the same on-disk storage as `--direct`, + // so a successful drop must also clear the local active-ledger pointer + // to avoid leaving CLI state pointing at a deleted ledger. + if result.is_ok() { + let active = config::read_active_ledger(dirs.data_dir()); + if active.as_deref() == Some(name) { + config::clear_active_ledger(dirs.data_dir())?; + } + } return result; } } diff --git a/fluree-db-cli/src/commands/export.rs b/fluree-db-cli/src/commands/export.rs index 3fb05398a..8daf3e19c 100644 --- a/fluree-db-cli/src/commands/export.rs +++ b/fluree-db-cli/src/commands/export.rs @@ -2,9 +2,10 @@ use crate::context; use crate::error::{CliError, CliResult}; +use crate::remote_client::RemoteLedgerClient; use fluree_db_api::export::ExportFormat; use fluree_db_api::server_defaults::FlureeDir; -use std::io::{self, BufWriter}; +use std::io::{self, BufWriter, Write}; use std::path::Path; #[allow(clippy::too_many_arguments)] @@ -17,40 +18,140 @@ pub async fn run( context_file: Option<&Path>, at: Option<&str>, dirs: &FlureeDir, + remote_flag: Option<&str>, + direct: bool, ) -> CliResult<()> { - // Check for tracked ledger — export requires local data - let store = crate::config::TomlSyncConfigStore::new(dirs.config_dir().to_path_buf()); let alias = context::resolve_ledger(explicit_ledger, dirs)?; - // Reject ledger#fragment syntax — use --graph instead if alias.contains('#') { return Err(CliError::Usage( "export does not support 'ledger#fragment' syntax; use --graph to export a specific named graph" .to_string(), )); } - if all_graphs && graph.is_some() { return Err(CliError::Usage( "cannot use both --all-graphs and --graph; choose one".to_string(), )); } - if store.get_tracked(&alias).is_some() - || store.get_tracked(&context::to_ledger_id(&alias)).is_some() + if let Some(remote_name) = remote_flag { + let client = context::build_remote_client(remote_name, dirs).await?; + let result = run_remote( + &alias, + format_str, + all_graphs, + graph, + context_expr, + context_file, + at, + &client, + ) + .await; + context::persist_refreshed_tokens(&client, remote_name, dirs).await; + return result; + } + + if !direct { + if let Some(client) = context::try_server_route_client(dirs) { + let result = run_remote( + &alias, + format_str, + all_graphs, + graph, + context_expr, + context_file, + at, + &client, + ) + .await; + context::persist_refreshed_tokens(&client, context::LOCAL_SERVER_REMOTE, dirs).await; + return result; + } + } + + run_local( + &alias, + format_str, + all_graphs, + graph, + context_expr, + context_file, + at, + dirs, + ) + .await +} + +#[allow(clippy::too_many_arguments)] +async fn run_remote( + alias: &str, + format_str: &str, + all_graphs: bool, + graph: Option<&str>, + context_expr: Option<&str>, + context_file: Option<&Path>, + at: Option<&str>, + client: &RemoteLedgerClient, +) -> CliResult<()> { + let context_override = resolve_context_override(context_expr, context_file)?; + + let mut body = serde_json::json!({ "format": format_str }); + if all_graphs { + body["all_graphs"] = serde_json::Value::Bool(true); + } + if let Some(iri) = graph { + body["graph"] = serde_json::Value::String(iri.to_string()); + } + if let Some(at_str) = at { + body["at"] = serde_json::Value::String(at_str.to_string()); + } + if let Some(ctx) = context_override { + body["context"] = ctx; + } + + let bytes = client + .export_rdf(alias, &body) + .await + .map_err(|e| CliError::Remote(format!("failed to export '{alias}': {e}")))?; + + let stdout = io::stdout().lock(); + let mut writer = BufWriter::new(stdout); + writer + .write_all(&bytes) + .map_err(|e| CliError::Config(format!("failed to write export to stdout: {e}")))?; + writer + .flush() + .map_err(|e| CliError::Config(format!("failed to flush stdout: {e}")))?; + Ok(()) +} + +#[allow(clippy::too_many_arguments)] +async fn run_local( + alias: &str, + format_str: &str, + all_graphs: bool, + graph: Option<&str>, + context_expr: Option<&str>, + context_file: Option<&Path>, + at: Option<&str>, + dirs: &FlureeDir, +) -> CliResult<()> { + let store = crate::config::TomlSyncConfigStore::new(dirs.config_dir().to_path_buf()); + if store.get_tracked(alias).is_some() + || store.get_tracked(&context::to_ledger_id(alias)).is_some() { return Err(CliError::Usage( - "export is not available for tracked ledgers (no local data).".to_string(), + "export is not available for tracked ledgers (no local data); pass --remote to export from the upstream." + .to_string(), )); } let fluree = context::build_fluree(dirs)?; - // Parse format string → ExportFormat let format = parse_format(format_str)?; - // Build the export - let mut builder = fluree.export(&alias).format(format); + let mut builder = fluree.export(alias).format(format); if all_graphs { builder = builder.all_graphs(); @@ -64,7 +165,6 @@ pub async fn run( builder = builder.as_of(crate::commands::query::parse_time_spec(at_str)); } - // Resolve context override (--context or --context-file) if let Some(ctx) = resolve_context_override(context_expr, context_file)? { builder = builder.context(&ctx); } diff --git a/fluree-db-cli/src/commands/log.rs b/fluree-db-cli/src/commands/log.rs index 7df3ebf13..799a8b8f4 100644 --- a/fluree-db-cli/src/commands/log.rs +++ b/fluree-db-cli/src/commands/log.rs @@ -1,5 +1,6 @@ use crate::context; use crate::error::{CliError, CliResult}; +use crate::remote_client::RemoteLedgerClient; use fluree_db_api::server_defaults::FlureeDir; use futures::StreamExt; @@ -8,6 +9,110 @@ pub async fn run( oneline: bool, count: Option, dirs: &FlureeDir, + remote_flag: Option<&str>, + direct: bool, +) -> CliResult<()> { + if let Some(remote_name) = remote_flag { + let alias = context::resolve_ledger(ledger, dirs)?; + let client = context::build_remote_client(remote_name, dirs).await?; + let result = run_remote(&alias, oneline, count, &client).await; + context::persist_refreshed_tokens(&client, remote_name, dirs).await; + return result; + } + + if !direct { + if let Some(client) = context::try_server_route_client(dirs) { + let alias = context::resolve_ledger(ledger, dirs)?; + let result = run_remote(&alias, oneline, count, &client).await; + context::persist_refreshed_tokens(&client, context::LOCAL_SERVER_REMOTE, dirs).await; + return result; + } + } + + run_local(ledger, oneline, count, dirs).await +} + +async fn run_remote( + alias: &str, + oneline: bool, + count: Option, + client: &RemoteLedgerClient, +) -> CliResult<()> { + let response = client + .commit_log(alias, count) + .await + .map_err(|e| CliError::Remote(format!("failed to fetch log for '{alias}': {e}")))?; + + let commits = response + .get("commits") + .and_then(|v| v.as_array()) + .ok_or_else(|| CliError::Remote("unexpected log response: missing commits array".into()))?; + + if commits.is_empty() { + println!("No commits found for ledger '{alias}'"); + return Ok(()); + } + + for commit in commits { + let t = commit + .get("t") + .and_then(serde_json::Value::as_i64) + .unwrap_or(0); + let commit_id = commit + .get("commit_id") + .and_then(|v| v.as_str()) + .unwrap_or(""); + let time = commit.get("time").and_then(|v| v.as_str()).unwrap_or(""); + let asserts = commit + .get("asserts") + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + let retracts = commit + .get("retracts") + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + let flake_count = commit + .get("flake_count") + .and_then(serde_json::Value::as_u64) + .unwrap_or(asserts + retracts); + let short = abbreviate_hash(commit_id); + + if oneline { + println!("t={t:<4} {short} {time}"); + } else { + println!("commit {short}"); + if !time.is_empty() { + println!("Date: {time}"); + } + println!("t: {t}"); + println!("Flakes: {flake_count}"); + println!(); + } + } + + if let Some(true) = response + .get("truncated") + .and_then(serde_json::Value::as_bool) + { + let total = response + .get("count") + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + eprintln!( + "(showing {} of {} commits — pass -n to widen)", + commits.len(), + total + ); + } + + Ok(()) +} + +async fn run_local( + ledger: Option<&str>, + oneline: bool, + count: Option, + dirs: &FlureeDir, ) -> CliResult<()> { // Check for tracked ledger — log requires local commit chain access let store = crate::config::TomlSyncConfigStore::new(dirs.config_dir().to_path_buf()); @@ -17,7 +122,7 @@ pub async fn run( { return Err(CliError::Usage( "commit log is not available for tracked ledgers (no local commit chain).\n \ - Use `fluree track status` to check remote state instead." + Use `fluree track status` to check remote state instead, or pass `--remote `." .to_string(), )); } diff --git a/fluree-db-cli/src/lib.rs b/fluree-db-cli/src/lib.rs index daa539430..f6ce771bd 100644 --- a/fluree-db-cli/src/lib.rs +++ b/fluree-db-cli/src/lib.rs @@ -284,6 +284,7 @@ pub async fn run(cli: Cli) -> error::CliResult<()> { context, context_file, at, + remote, } => { let fluree_dir = config::require_fluree_dir_or_global(config_path)?; commands::export::run( @@ -295,6 +296,8 @@ pub async fn run(cli: Cli) -> error::CliResult<()> { context_file.as_deref(), at.as_deref(), &fluree_dir, + remote.as_deref(), + direct, ) .await } @@ -303,9 +306,18 @@ pub async fn run(cli: Cli) -> error::CliResult<()> { ledger, oneline, count, + remote, } => { let fluree_dir = config::require_fluree_dir_or_global(config_path)?; - commands::log::run(ledger.as_deref(), oneline, count, &fluree_dir).await + commands::log::run( + ledger.as_deref(), + oneline, + count, + &fluree_dir, + remote.as_deref(), + direct, + ) + .await } Commands::Show { diff --git a/fluree-db-cli/src/remote_client.rs b/fluree-db-cli/src/remote_client.rs index 0239dd673..68d9b2896 100644 --- a/fluree-db-cli/src/remote_client.rs +++ b/fluree-db-cli/src/remote_client.rs @@ -1098,6 +1098,69 @@ impl RemoteLedgerClient { .await } + // ========================================================================= + // RDF export + // ========================================================================= + + /// Fetch an RDF export of a ledger from the remote. + /// + /// Calls `POST {base_url}/export/` with the JSON body documented + /// in the `/export` Contract. Returns the raw response bytes (the + /// requested RDF format) — the caller is responsible for writing them + /// to the desired sink. + pub async fn export_rdf( + &self, + ledger: &str, + body: &serde_json::Value, + ) -> Result { + let url = self.op_url("export", ledger); + let req_body = Some(RequestBody::Json(body)); + + let resp = self + .build_request(reqwest::Method::POST, &url, "application/json", &req_body) + .send() + .await + .map_err(Self::map_network_error)?; + + let resp = if resp.status() == StatusCode::UNAUTHORIZED && self.try_refresh().await { + self.build_request(reqwest::Method::POST, &url, "application/json", &req_body) + .send() + .await + .map_err(Self::map_network_error)? + } else { + resp + }; + + if !resp.status().is_success() { + return Err(Self::map_error(resp).await); + } + + resp.bytes() + .await + .map_err(|e| RemoteLedgerError::InvalidResponse(format!("read body: {e}"))) + } + + // ========================================================================= + // Commit log + // ========================================================================= + + /// Fetch lightweight commit summaries from the remote. + /// + /// Calls `GET {base_url}/log/?limit=`. The server returns + /// summaries newest-first by `t`, capped at the server's hard maximum. + pub async fn commit_log( + &self, + ledger: &str, + limit: Option, + ) -> Result { + let mut url = self.op_url("log", ledger); + if let Some(n) = limit { + url.push_str(&format!("?limit={n}")); + } + self.send_json(reqwest::Method::GET, &url, "application/json", None) + .await + } + // ========================================================================= // Reindex // ========================================================================= diff --git a/fluree-db-server/src/routes/export.rs b/fluree-db-server/src/routes/export.rs new file mode 100644 index 000000000..b538c3175 --- /dev/null +++ b/fluree-db-server/src/routes/export.rs @@ -0,0 +1,167 @@ +//! RDF export endpoint: `POST /v1/fluree/export/*ledger`. +//! +//! Returns ledger data as RDF in the requested format. The response body is +//! the raw RDF bytes; `Content-Type` reflects the format. +//! +//! **Auth bracket: admin-protected.** RDF export today reads from the binary +//! index without applying per-flake policy filtering, so it lives in the same +//! bracket as `/create`, `/drop`, and `/reindex` rather than the data-read +//! bracket of `/query` and `/show`. Adding policy-filtered streaming export +//! would let it move to read-auth in the future. + +use crate::config::ServerRole; +use crate::error::{Result, ServerError}; +use crate::extract::FlureeHeaders; +use crate::state::AppState; +use crate::telemetry::{create_request_span, extract_request_id, extract_trace_id}; +use axum::body::Body; +use axum::extract::{Path, Request, State}; +use axum::http::header; +use axum::response::{IntoResponse, Response}; +use fluree_db_api::export::ExportFormat; +use fluree_db_api::TimeSpec; +use serde::Deserialize; +use std::sync::Arc; +use tracing::Instrument; + +#[derive(Deserialize, Default)] +pub struct ExportRequest { + /// One of: `turtle`/`ttl`, `ntriples`/`nt`, `nquads`/`n-quads`, `trig`, + /// `jsonld`/`json-ld`/`json`. Default: `turtle`. + pub format: Option, + /// Export all named graphs. Requires a dataset format (`trig` or `nquads`). + #[serde(default)] + pub all_graphs: bool, + /// Export a single named graph by IRI. Mutually exclusive with `all_graphs`. + pub graph: Option, + /// Override the JSON-LD prefix context. Either a bare object (`{ "ex": "..." }`) + /// or a `{ "@context": {...} }` wrapper. + pub context: Option, + /// Time spec — transaction number, ISO-8601 datetime, or commit CID prefix. + pub at: Option, +} + +/// `POST /v1/fluree/export/` +pub async fn export_ledger_tail( + State(state): State>, + Path(ledger): Path, + headers: FlureeHeaders, + request: Request, +) -> Response { + if state.config.server_role == ServerRole::Peer { + let client = match state.forwarding_client.as_ref() { + Some(c) => c, + None => { + return ServerError::internal("Forwarding client not configured").into_response() + } + }; + return match client.forward(request).await { + Ok(resp) => resp, + Err(e) => e.into_response(), + }; + } + + export_local(state, ledger, headers, request) + .await + .into_response() +} + +async fn export_local( + state: Arc, + ledger: String, + headers: FlureeHeaders, + request: Request, +) -> Result { + let request_id = extract_request_id(&headers.raw, &state.telemetry_config); + let trace_id = extract_trace_id(&headers.raw); + + let span = create_request_span( + "ledger:export", + request_id.as_deref(), + trace_id.as_deref(), + Some(&ledger), + None, + None, + ); + async move { + tracing::info!(status = "start", "ledger export requested"); + + let body_bytes = axum::body::to_bytes(request.into_body(), 1024 * 1024) + .await + .map_err(|e| ServerError::bad_request(format!("Failed to read body: {e}")))?; + let req: ExportRequest = if body_bytes.is_empty() { + ExportRequest::default() + } else { + serde_json::from_slice(&body_bytes) + .map_err(|e| ServerError::bad_request(format!("Invalid JSON: {e}")))? + }; + + let format = parse_format(req.format.as_deref().unwrap_or("turtle"))?; + + let mut builder = state.fluree.export(&ledger).format(format); + if req.all_graphs { + builder = builder.all_graphs(); + } + if let Some(iri) = req.graph.as_deref() { + builder = builder.graph(iri); + } + if let Some(at_str) = req.at.as_deref() { + builder = builder.as_of(parse_time_spec(at_str)); + } + if let Some(ctx) = req.context.as_ref() { + builder = builder.context(ctx); + } + + let mut buf: Vec = Vec::new(); + let stats = builder.write_to(&mut buf).await.map_err(ServerError::Api)?; + tracing::info!( + status = "success", + triples = stats.triples_written, + bytes = buf.len(), + "ledger export complete" + ); + + let content_type = content_type_for(format); + let resp = Response::builder() + .status(200) + .header(header::CONTENT_TYPE, content_type) + .body(Body::from(buf)) + .map_err(|e| ServerError::internal(format!("failed to build response: {e}")))?; + Ok(resp) + } + .instrument(span) + .await +} + +fn parse_format(s: &str) -> Result { + match s.to_ascii_lowercase().as_str() { + "turtle" | "ttl" => Ok(ExportFormat::Turtle), + "ntriples" | "nt" => Ok(ExportFormat::NTriples), + "nquads" | "n-quads" => Ok(ExportFormat::NQuads), + "trig" => Ok(ExportFormat::TriG), + "jsonld" | "json-ld" | "json" => Ok(ExportFormat::JsonLd), + other => Err(ServerError::bad_request(format!( + "unknown export format '{other}'" + ))), + } +} + +fn parse_time_spec(at: &str) -> TimeSpec { + if let Ok(t) = at.parse::() { + TimeSpec::at_t(t) + } else if at.contains('-') && at.contains(':') { + TimeSpec::at_time(at.to_string()) + } else { + TimeSpec::at_commit(at.to_string()) + } +} + +fn content_type_for(format: ExportFormat) -> &'static str { + match format { + ExportFormat::Turtle => "text/turtle; charset=utf-8", + ExportFormat::NTriples => "application/n-triples; charset=utf-8", + ExportFormat::NQuads => "application/n-quads; charset=utf-8", + ExportFormat::TriG => "application/trig; charset=utf-8", + ExportFormat::JsonLd => "application/ld+json; charset=utf-8", + } +} diff --git a/fluree-db-server/src/routes/log.rs b/fluree-db-server/src/routes/log.rs new file mode 100644 index 000000000..d1990aafd --- /dev/null +++ b/fluree-db-server/src/routes/log.rs @@ -0,0 +1,134 @@ +//! Commit log endpoint: `GET /v1/fluree/log/*ledger`. +//! +//! Returns lightweight per-commit summaries (newest-first by `t`) for use by +//! `fluree log` and similar history views. Unlike `/commits`, this endpoint +//! uses normal data-read auth — it does not return raw commit blobs. + +use crate::config::ServerRole; +use crate::error::{Result, ServerError}; +use crate::extract::{FlureeHeaders, MaybeDataBearer}; +use crate::state::AppState; +use crate::telemetry::{ + create_request_span, extract_request_id, extract_trace_id, set_span_error_code, +}; +use axum::extract::{Path, Query, Request, State}; +use axum::response::{IntoResponse, Response}; +use axum::Json; +use fluree_db_api::CommitSummary; +use serde::{Deserialize, Serialize}; +use std::sync::Arc; +use tracing::Instrument; + +/// Hard cap on the number of summaries returned in one response, regardless +/// of the client's `limit` parameter. Mirrors the merge-preview hard cap. +const LOG_HARD_MAX_COMMITS: usize = 5_000; + +/// Default cap when the client omits `limit`. +const LOG_DEFAULT_LIMIT: usize = 100; + +#[derive(Deserialize)] +pub struct LogQuery { + /// Maximum summaries to return (newest-first). Server clamps to a hard max. + pub limit: Option, +} + +#[derive(Serialize)] +pub struct LogResponse { + pub ledger_id: String, + pub commits: Vec, + /// Total commits in the chain, regardless of `limit`. `truncated == count > + /// commits.len()`. + pub count: usize, + pub truncated: bool, +} + +/// `GET /v1/fluree/log/?limit=N` +pub async fn log_ledger_tail( + State(state): State>, + Path(ledger): Path, + headers: FlureeHeaders, + bearer: MaybeDataBearer, + Query(query): Query, + request: Request, +) -> Response { + if state.config.server_role == ServerRole::Peer { + let client = match state.forwarding_client.as_ref() { + Some(c) => c, + None => { + return ServerError::internal("Forwarding client not configured").into_response() + } + }; + return match client.forward(request).await { + Ok(resp) => resp, + Err(e) => e.into_response(), + }; + } + + log_local(state, ledger, headers, bearer, query) + .await + .into_response() +} + +async fn log_local( + state: Arc, + ledger: String, + headers: FlureeHeaders, + bearer: MaybeDataBearer, + query: LogQuery, +) -> Result { + let request_id = extract_request_id(&headers.raw, &state.telemetry_config); + let trace_id = extract_trace_id(&headers.raw); + + let span = create_request_span( + "commit:log", + request_id.as_deref(), + trace_id.as_deref(), + Some(&ledger), + None, + None, + ); + async move { + let span = tracing::Span::current(); + tracing::info!(status = "start", "commit log requested"); + + let data_auth = state.config.data_auth(); + if data_auth.mode == crate::config::DataAuthMode::Required && bearer.0.is_none() { + set_span_error_code(&span, "error:Unauthorized"); + return Err(ServerError::unauthorized("Bearer token required")); + } + if let Some(p) = bearer.0.as_ref() { + if !p.can_read(&ledger) { + set_span_error_code(&span, "error:Forbidden"); + return Err(ServerError::not_found("Ledger not found")); + } + } + + let limit = query + .limit + .unwrap_or(LOG_DEFAULT_LIMIT) + .min(LOG_HARD_MAX_COMMITS); + + let (commits, count) = state + .fluree + .commit_log(&ledger, Some(limit)) + .await + .map_err(ServerError::Api)?; + + let truncated = count > commits.len(); + tracing::info!( + status = "success", + count, + returned = commits.len(), + "commit log complete" + ); + Ok(Json(LogResponse { + ledger_id: ledger, + commits, + count, + truncated, + }) + .into_response()) + } + .instrument(span) + .await +} diff --git a/fluree-db-server/src/routes/mod.rs b/fluree-db-server/src/routes/mod.rs index fe7d91d81..3b9481992 100644 --- a/fluree-db-server/src/routes/mod.rs +++ b/fluree-db-server/src/routes/mod.rs @@ -5,9 +5,11 @@ mod admin_auth; mod commits; mod context; mod events; +mod export; #[cfg(feature = "iceberg")] mod iceberg; mod ledger; +mod log; mod nameservice_refs; mod pack; mod policy_auth; @@ -43,7 +45,10 @@ pub fn build_router(state: Arc) -> Router { .route("/drop-branch", post(ledger::drop_branch)) .route("/rebase", post(ledger::rebase)) .route("/merge", post(ledger::merge)) - .route("/revert", post(ledger::revert)); + .route("/revert", post(ledger::revert)) + // RDF export bypasses per-flake policy filtering today, so it lives in + // the admin-protected bracket alongside other root-level operations. + .route("/export/*ledger", post(export::export_ledger_tail)); #[cfg(feature = "iceberg")] let v1_admin_protected_routes = @@ -96,6 +101,8 @@ pub fn build_router(state: Arc) -> Router { .route("/push/*ledger", post(push::push_ledger_tail)) // Commit show endpoint (decoded commit with resolved IRIs) .route("/show/*ledger", get(show::show_ledger_tail)) + // Commit log endpoint (lightweight per-commit summaries) + .route("/log/*ledger", get(log::log_ledger_tail)) // Commit export endpoint (paginated, replication-grade auth) .route("/commits/*ledger", get(commits::commits_ledger_tail)) // Binary pack stream endpoint (efficient clone/pull) From 571101081d51a84518cf0f954622ca21b92029e4 Mon Sep 17 00:00:00 2001 From: bplatz Date: Sat, 9 May 2026 10:05:54 -0400 Subject: [PATCH 3/9] feat: Add `fluree export --format ledger` for `.flpack` archives MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `docs/cli/server-integration.md` has long claimed `fluree export --format ledger -o mydb.flpack` exists, and `fluree create --from .flpack` already imports the format, but the export side was never wired up — `parse_format` only accepted RDF formats and there was no `-o` flag. This closes that gap. API: - `pack::stream_archive` mirrors `stream_pack` but injects a `phase: "nameservice"` manifest frame before End. Unlike `stream_pack`, on producer failure it drops the sender and returns `Err(message)` instead of emitting an Error frame so the caller never persists a partial archive. - `Fluree::archive_ledger(ledger_id, include_indexes, writer)` resolves the ledger record, sources the manifest *and* pack heads from the same `LedgerView` snapshot (so they cannot disagree under cache lag), and writes frames to any `AsyncWrite` sink. The manifest's `index_head_id` / `index_t` are emitted only when index artifacts are actually archived, so `--no-indexes` no longer produces an archive that points at missing index data. CLI: - `fluree export` accepts `--format ledger` (alias `flpack`) and a new `-o, --output ` flag that works for any format. `--no-indexes` produces a smaller archive that the importer reindexes on load. - Refuses TTY stdout for binary archives and rejects `--remote`, `--at`, `--all-graphs`, `--graph`, and `--context*` for `--format ledger` since they don't apply to whole-ledger archives. - On producer-side archive failure, the partial output file is removed before the error is returned. Docs: - `docs/cli/server-integration.md`: `fluree export --format ledger` section now reflects what's implemented. - `docs/operations/pack-archive-restore.md`: replaces the "no dedicated command" stub with the actual CLI invocation; the Rust API section continues to cover non-CLI use cases (S3 upload, etc.). Round-trip verified: `fluree create flptest && fluree insert ... && fluree export flptest --format ledger -o flptest.flpack && fluree create restored --from flptest.flpack && fluree query restored ...` returns the original triple. Same with `--no-indexes`. Remote archive (`--format ledger --remote `) is intentionally deferred: it requires fetching the remote nameservice record and intercepting the `/pack` stream's End frame to inject the manifest. --- docs/cli/server-integration.md | 4 +- docs/operations/pack-archive-restore.md | 13 +- fluree-db-api/src/lib.rs | 103 +++++++++++ fluree-db-api/src/pack.rs | 54 ++++++ fluree-db-cli/src/cli.rs | 16 +- fluree-db-cli/src/commands/export.rs | 235 +++++++++++++++++++++--- fluree-db-cli/src/lib.rs | 4 + 7 files changed, 395 insertions(+), 34 deletions(-) diff --git a/docs/cli/server-integration.md b/docs/cli/server-integration.md index e79790f05..39788e5e6 100644 --- a/docs/cli/server-integration.md +++ b/docs/cli/server-integration.md @@ -128,9 +128,9 @@ Imports a `.flpack` file (native ledger pack) into a new local ledger. The `.flp ### `fluree export --format ledger` (native ledger export) -- No server endpoint required (local-only operation) +- No server endpoint required (local-only operation today) -Exports a full local ledger (all commits, indexes, dictionaries) as a `.flpack` file. See [Ledger portability](#ledger-portability-flpack-files) below. +Exports a full local ledger (all commits, txn blobs, and — unless `--no-indexes` — binary index artifacts) as a `.flpack` archive. The archive contains a `phase: "nameservice"` manifest frame so the importer can reconstruct the head pointers. Pass `-o ` to write to disk (required when stdout is a TTY). `--remote` is not yet supported for `--format ledger`. See [Ledger portability](#ledger-portability-flpack-files) below. ### `fluree query`, `fluree insert`, `fluree upsert`, `fluree update`, `fluree track`, `fluree info`, `fluree exists` diff --git a/docs/operations/pack-archive-restore.md b/docs/operations/pack-archive-restore.md index ab4de398f..799966b66 100644 --- a/docs/operations/pack-archive-restore.md +++ b/docs/operations/pack-archive-restore.md @@ -29,9 +29,18 @@ A pack can include just commits + txn blobs (compact, sufficient for full restor ### Archive (export to `.flpack`) -The CLI does not yet have a dedicated `fluree export --format flpack` command. To produce a `.flpack` file today, use the pack HTTP endpoint directly or the Rust API (see below). +```bash +fluree export mydb --format ledger -o mydb.flpack + +# Smaller archive without binary index artifacts (importer will reindex): +fluree export mydb --format ledger --no-indexes -o mydb.flpack +``` + +`--format ledger` (alias `--format flpack`) writes the full `fluree-pack-v1` archive — commits, txn blobs, and (unless `--no-indexes`) index artifacts — plus a `phase: "nameservice"` manifest frame that lets the importer reconstruct commit/index head pointers. + +`-o FILE` is required when stdout is a TTY (the archive is binary). Pipe-friendly forms work too: `fluree export mydb --format ledger > mydb.flpack`. Local-only today; `--remote` is not yet supported for `--format ledger`. -From the CLI, the closest equivalent is `fluree clone` which uses the pack protocol internally for transfer, then writes objects to local CAS. +Under the hood this calls `Fluree::archive_ledger` (see [Rust API usage](#rust-api-usage) below), which is also what consumers should use for non-CLI archive flows like S3 upload. ### Restore (import from `.flpack`) diff --git a/fluree-db-api/src/lib.rs b/fluree-db-api/src/lib.rs index e8f54f88c..27aafed12 100644 --- a/fluree-db-api/src/lib.rs +++ b/fluree-db-api/src/lib.rs @@ -3244,6 +3244,109 @@ impl Fluree { export_builder::ExportBuilder::new(self, ledger_id.to_string()) } + /// Stream a self-contained ledger archive (`.flpack`) for `ledger_id`. + /// + /// This is the export side of the `fluree create --from .flpack` + /// pipeline. Frame bytes (header → commits → optional indexes → + /// nameservice manifest → end) are written to `writer` in order, so the + /// caller can target a file, stdout, or any `AsyncWrite` sink without + /// buffering the full archive in memory. + /// + /// `include_indexes` controls whether binary index artifacts ride along + /// (`true` → instantly queryable on import; `false` → smaller archive, + /// import will need to reindex). When the ledger has no index root, the + /// flag is silently downgraded to commits-only. + pub async fn archive_ledger( + &self, + ledger_id: &str, + include_indexes: bool, + writer: &mut W, + ) -> Result { + use tokio::io::AsyncWriteExt as _; + + let record = self + .nameservice() + .lookup(ledger_id) + .await? + .ok_or_else(|| ApiError::NotFound(ledger_id.to_string()))?; + + let canonical_id = record.ledger_id.clone(); + let handle = self.ledger_cached(&canonical_id).await?; + + // Source the manifest *and* the pack request from the same view, so + // the archive bytes and the manifest pointers always agree. Reading + // the heads from the nameservice record while reading the pack + // contents from the cached handle could disagree if the cache is + // behind a freshly committed head. + let view = handle.snapshot().await; + + let head_commit_id = view.head_commit_id.clone().ok_or_else(|| { + ApiError::internal(format!("ledger {canonical_id} has no head commit to pack")) + })?; + + // `full_ledger_pack_request` silently drops the index when the + // ledger has none. Mirror that decision here so we never advertise + // an `index_head_id` we did not archive. + let archived_index = if include_indexes { + view.head_index_id.clone() + } else { + None + }; + let request = match archived_index.clone() { + Some(index_root) => pack::PackRequest::with_indexes( + vec![head_commit_id.clone()], + vec![], + index_root, + None, + ), + None => pack::PackRequest::commits(vec![head_commit_id.clone()], vec![]), + }; + + let mut manifest = serde_json::json!({ + "phase": "nameservice", + "ledger_id": canonical_id, + "name": record.name, + "branch": record.branch, + "commit_head_id": head_commit_id.to_string(), + "commit_t": view.t, + }); + if let Some(cid) = archived_index.as_ref() { + manifest["index_head_id"] = serde_json::Value::String(cid.to_string()); + manifest["index_t"] = serde_json::Value::from(view.index_t()); + } + + let (tx, mut rx) = tokio::sync::mpsc::channel::(64); + + // Run producer and consumer concurrently in the same task: the + // producer is borrowing `&self`, so we cannot `tokio::spawn` it + // without an owning handle. The bounded channel still gives us + // backpressure as long as the consumer keeps draining. + let producer = pack::stream_archive(self, &handle, &request, manifest, tx); + let consumer = async { + while let Some(chunk) = rx.recv().await { + let bytes = chunk.map_err(|e| ApiError::internal(format!("pack stream: {e}")))?; + writer + .write_all(&bytes) + .await + .map_err(|e| ApiError::internal(format!("archive write: {e}")))?; + } + writer + .flush() + .await + .map_err(|e| ApiError::internal(format!("archive flush: {e}")))?; + Ok::<_, ApiError>(()) + }; + + let (producer_result, consumer_result) = tokio::join!(producer, consumer); + // Surface a producer-side failure even if the consumer drained + // cleanly. Without this, a corrupt or empty archive would land on + // disk and `archive_ledger` would still report success. + let stats = producer_result + .map_err(|e| ApiError::internal(format!("archive generation failed: {e}")))?; + consumer_result?; + Ok(stats) + } + /// Walk the commit chain for a ledger and return per-commit summaries. /// /// `limit` caps the number of returned summaries (newest-first by `t`). diff --git a/fluree-db-api/src/pack.rs b/fluree-db-api/src/pack.rs index 1a10b3641..95d66b247 100644 --- a/fluree-db-api/src/pack.rs +++ b/fluree-db-api/src/pack.rs @@ -344,6 +344,60 @@ pub async fn stream_pack( } } +/// Generate a self-contained ledger archive stream (`.flpack`). +/// +/// On success, frames are: header → data frames → optional index manifest + +/// artifacts → `phase: "nameservice"` manifest → End. That manifest is what +/// allows `fluree create --from .flpack` to reconstruct the +/// nameservice record without contacting any remote — see +/// `docs/operations/pack-archive-restore.md` for the full format. +/// +/// `nameservice_manifest` should be a JSON object containing at least +/// `phase: "nameservice"`, `ledger_id`, `name`, `branch`, `commit_head_id`, +/// and `commit_t`. `index_head_id` / `index_t` should only be included when +/// the archive actually carries those artifacts. +/// +/// Unlike [`stream_pack`], on producer failure this **does not** emit an +/// Error frame followed by End. Instead it drops the sender and returns +/// `Err(message)`. The consumer sees the channel close cleanly, and callers +/// (e.g. `Fluree::archive_ledger`) propagate the error rather than persisting +/// a corrupt partial archive on disk. +pub async fn stream_archive( + fluree: &crate::Fluree, + handle: &LedgerHandle, + request: &PackRequest, + nameservice_manifest: serde_json::Value, + frame_tx: mpsc::Sender, +) -> std::result::Result { + let result = stream_pack_inner(fluree, handle, request, &frame_tx).await; + + match result { + Ok(stats) => { + let mut manifest_buf = Vec::with_capacity(512); + encode_manifest_frame(&nameservice_manifest, &mut manifest_buf); + frame_tx + .send(Ok(manifest_buf)) + .await + .map_err(|_| "client disconnected before manifest".to_string())?; + + let mut end_buf = Vec::new(); + encode_end_frame(&mut end_buf); + frame_tx + .send(Ok(end_buf)) + .await + .map_err(|_| "client disconnected before end".to_string())?; + Ok(stats) + } + Err(err_msg) => { + warn!(error = %err_msg, "archive stream error"); + // Drop the sender by returning; the consumer sees a clean close + // and we surface the error to the caller. No Error/End frames + // are emitted so we never persist a partial archive. + Err(err_msg) + } + } +} + async fn stream_pack_inner( fluree: &crate::Fluree, handle: &LedgerHandle, diff --git a/fluree-db-cli/src/cli.rs b/fluree-db-cli/src/cli.rs index 88d06f2c1..fd5a2a717 100644 --- a/fluree-db-cli/src/cli.rs +++ b/fluree-db-cli/src/cli.rs @@ -506,18 +506,30 @@ pub enum Commands { action: ContextAction, }, - /// Export ledger data as Turtle, N-Triples, N-Quads, TriG, or JSON-LD + /// Export ledger data as RDF (Turtle, N-Triples, N-Quads, TriG, JSON-LD) or as a `.flpack` archive Export { /// Ledger name (defaults to active ledger) ledger: Option, - /// Output format: turtle (ttl), ntriples (nt), jsonld, trig, or nquads (default: turtle) + /// Output format: turtle (ttl), ntriples (nt), jsonld, trig, nquads, + /// or ledger (`.flpack` archive — full ledger including commits and + /// indexes, importable via `fluree create --from .flpack`). /// /// Note: exporting all graphs requires a dataset-capable format /// (`trig` or `nquads`). #[arg(long, default_value = "turtle")] format: String, + /// Write output to FILE instead of stdout. Required for --format ledger + /// when stdout is a TTY (the archive is binary). + #[arg(long, short = 'o', value_name = "FILE")] + output: Option, + + /// For --format ledger only: skip binary index artifacts (smaller archive, + /// the importer will need to reindex before queries are efficient). + #[arg(long)] + no_indexes: bool, + /// Export all named graphs (dataset export), including system graphs. /// /// Use `--format trig` or `--format nquads` when this flag is set. diff --git a/fluree-db-cli/src/commands/export.rs b/fluree-db-cli/src/commands/export.rs index 8daf3e19c..050b2c75a 100644 --- a/fluree-db-cli/src/commands/export.rs +++ b/fluree-db-cli/src/commands/export.rs @@ -1,17 +1,26 @@ -//! `fluree export` — streaming RDF export via the API builder. +//! `fluree export` — streaming RDF export via the API builder, plus +//! `--format ledger` for full `.flpack` archive export. use crate::context; use crate::error::{CliError, CliResult}; use crate::remote_client::RemoteLedgerClient; +use colored::Colorize; use fluree_db_api::export::ExportFormat; use fluree_db_api::server_defaults::FlureeDir; -use std::io::{self, BufWriter, Write}; -use std::path::Path; +use std::io::{self, BufWriter, IsTerminal, Write}; +use std::path::{Path, PathBuf}; + +/// Whether the user requested the full ledger archive format. +fn is_ledger_format(s: &str) -> bool { + matches!(s.to_ascii_lowercase().as_str(), "ledger" | "flpack") +} #[allow(clippy::too_many_arguments)] pub async fn run( explicit_ledger: Option<&str>, format_str: &str, + output: Option<&Path>, + no_indexes: bool, all_graphs: bool, graph: Option<&str>, context_expr: Option<&str>, @@ -35,11 +44,28 @@ pub async fn run( )); } + if is_ledger_format(format_str) { + return run_ledger_archive( + &alias, + output, + no_indexes, + at, + all_graphs, + graph, + context_expr, + context_file, + dirs, + remote_flag, + ) + .await; + } + if let Some(remote_name) = remote_flag { let client = context::build_remote_client(remote_name, dirs).await?; - let result = run_remote( + let result = run_remote_rdf( &alias, format_str, + output, all_graphs, graph, context_expr, @@ -54,9 +80,10 @@ pub async fn run( if !direct { if let Some(client) = context::try_server_route_client(dirs) { - let result = run_remote( + let result = run_remote_rdf( &alias, format_str, + output, all_graphs, graph, context_expr, @@ -70,9 +97,10 @@ pub async fn run( } } - run_local( + run_local_rdf( &alias, format_str, + output, all_graphs, graph, context_expr, @@ -83,10 +111,130 @@ pub async fn run( .await } +// ============================================================================= +// Ledger archive (--format ledger / flpack) +// ============================================================================= + +#[allow(clippy::too_many_arguments)] +async fn run_ledger_archive( + alias: &str, + output: Option<&Path>, + no_indexes: bool, + at: Option<&str>, + all_graphs: bool, + graph: Option<&str>, + context_expr: Option<&str>, + context_file: Option<&Path>, + dirs: &FlureeDir, + remote_flag: Option<&str>, +) -> CliResult<()> { + if remote_flag.is_some() { + return Err(CliError::Usage( + "fluree export --format ledger does not yet support --remote; \ + run it against a local ledger or use the Rust API. \ + See docs/operations/pack-archive-restore.md." + .to_string(), + )); + } + if at.is_some() { + return Err(CliError::Usage( + "fluree export --format ledger does not support --at — archives capture the current head; \ + use a TimeTravel restore on import instead." + .to_string(), + )); + } + if all_graphs || graph.is_some() { + return Err(CliError::Usage( + "fluree export --format ledger archives the entire ledger; --all-graphs / --graph apply only to RDF formats" + .to_string(), + )); + } + if context_expr.is_some() || context_file.is_some() { + return Err(CliError::Usage( + "fluree export --format ledger does not use --context / --context-file (the archive is binary)" + .to_string(), + )); + } + + let store = crate::config::TomlSyncConfigStore::new(dirs.config_dir().to_path_buf()); + if store.get_tracked(alias).is_some() + || store.get_tracked(&context::to_ledger_id(alias)).is_some() + { + return Err(CliError::Usage( + "fluree export --format ledger requires local data and is not available for tracked ledgers" + .to_string(), + )); + } + + let fluree = context::build_fluree(dirs)?; + let ledger_id = context::to_ledger_id(alias); + + match output { + Some(path) => { + let path: PathBuf = path.to_path_buf(); + let file = tokio::fs::File::create(&path).await.map_err(|e| { + CliError::Config(format!("failed to create '{}': {e}", path.display())) + })?; + let mut writer = tokio::io::BufWriter::new(file); + let archive_result = fluree.archive_ledger(&ledger_id, !no_indexes, &mut writer).await; + // Drop writer before we touch the file again on the error path, + // so the underlying file handle is closed. + drop(writer); + + let stats = match archive_result { + Ok(stats) => stats, + Err(e) => { + // Don't leave a corrupt or empty .flpack on disk for the + // user to discover later — clean up and surface the error. + let _ = std::fs::remove_file(&path); + return Err(e.into()); + } + }; + eprintln!( + "{} Archived '{}' → {} ({} commits, {} txn blobs, {} index artifacts)", + "✓".green(), + alias, + path.display(), + stats.commits_sent, + stats.txn_blobs_sent, + stats.index_artifacts_sent, + ); + } + None => { + if io::stdout().is_terminal() { + return Err(CliError::Usage( + "refusing to write a binary .flpack archive to a TTY; pass -o or redirect stdout" + .to_string(), + )); + } + let stdout = tokio::io::stdout(); + let mut writer = tokio::io::BufWriter::new(stdout); + let stats = fluree + .archive_ledger(&ledger_id, !no_indexes, &mut writer) + .await?; + // stdout already owns its bytes; nothing to clean up on failure. + eprintln!( + "{} Archived '{}' to stdout ({} commits, {} txn blobs, {} index artifacts)", + "✓".green(), + alias, + stats.commits_sent, + stats.txn_blobs_sent, + stats.index_artifacts_sent, + ); + } + } + Ok(()) +} + +// ============================================================================= +// RDF formats (turtle, ntriples, nquads, trig, jsonld) +// ============================================================================= + #[allow(clippy::too_many_arguments)] -async fn run_remote( +async fn run_remote_rdf( alias: &str, format_str: &str, + output: Option<&Path>, all_graphs: bool, graph: Option<&str>, context_expr: Option<&str>, @@ -115,21 +263,14 @@ async fn run_remote( .await .map_err(|e| CliError::Remote(format!("failed to export '{alias}': {e}")))?; - let stdout = io::stdout().lock(); - let mut writer = BufWriter::new(stdout); - writer - .write_all(&bytes) - .map_err(|e| CliError::Config(format!("failed to write export to stdout: {e}")))?; - writer - .flush() - .map_err(|e| CliError::Config(format!("failed to flush stdout: {e}")))?; - Ok(()) + write_bytes_to_sink(&bytes, output) } #[allow(clippy::too_many_arguments)] -async fn run_local( +async fn run_local_rdf( alias: &str, format_str: &str, + output: Option<&Path>, all_graphs: bool, graph: Option<&str>, context_expr: Option<&str>, @@ -148,36 +289,74 @@ async fn run_local( } let fluree = context::build_fluree(dirs)?; - - let format = parse_format(format_str)?; + let format = parse_rdf_format(format_str)?; let mut builder = fluree.export(alias).format(format); if all_graphs { builder = builder.all_graphs(); } - if let Some(iri) = graph { builder = builder.graph(iri); } - if let Some(at_str) = at { builder = builder.as_of(crate::commands::query::parse_time_spec(at_str)); } - if let Some(ctx) = resolve_context_override(context_expr, context_file)? { builder = builder.context(&ctx); } - let stdout = io::stdout().lock(); - let mut writer = BufWriter::new(stdout); - builder.write_to(&mut writer).await?; + match output { + Some(path) => { + let file = std::fs::File::create(path).map_err(|e| { + CliError::Config(format!("failed to create '{}': {e}", path.display())) + })?; + let mut writer = BufWriter::new(file); + builder.write_to(&mut writer).await?; + writer + .flush() + .map_err(|e| CliError::Config(format!("failed to flush output: {e}")))?; + } + None => { + let stdout = io::stdout().lock(); + let mut writer = BufWriter::new(stdout); + builder.write_to(&mut writer).await?; + } + } Ok(()) } -/// Parse a CLI format string into an `ExportFormat`. -fn parse_format(s: &str) -> CliResult { +fn write_bytes_to_sink(bytes: &[u8], output: Option<&Path>) -> CliResult<()> { + match output { + Some(path) => { + let file = std::fs::File::create(path).map_err(|e| { + CliError::Config(format!("failed to create '{}': {e}", path.display())) + })?; + let mut writer = BufWriter::new(file); + writer + .write_all(bytes) + .map_err(|e| CliError::Config(format!("failed to write export: {e}")))?; + writer + .flush() + .map_err(|e| CliError::Config(format!("failed to flush output: {e}")))?; + } + None => { + let stdout = io::stdout().lock(); + let mut writer = BufWriter::new(stdout); + writer + .write_all(bytes) + .map_err(|e| CliError::Config(format!("failed to write export to stdout: {e}")))?; + writer + .flush() + .map_err(|e| CliError::Config(format!("failed to flush stdout: {e}")))?; + } + } + Ok(()) +} + +/// Parse a CLI format string into an `ExportFormat` (RDF formats only). +fn parse_rdf_format(s: &str) -> CliResult { match s.to_lowercase().as_str() { "turtle" | "ttl" => Ok(ExportFormat::Turtle), "ntriples" | "nt" => Ok(ExportFormat::NTriples), @@ -185,7 +364,7 @@ fn parse_format(s: &str) -> CliResult { "trig" => Ok(ExportFormat::TriG), "jsonld" | "json-ld" | "json" => Ok(ExportFormat::JsonLd), other => Err(CliError::Usage(format!( - "unknown export format '{other}'; valid formats: turtle, ntriples, nquads, trig, jsonld" + "unknown export format '{other}'; valid formats: turtle, ntriples, nquads, trig, jsonld, ledger" ))), } } diff --git a/fluree-db-cli/src/lib.rs b/fluree-db-cli/src/lib.rs index f6ce771bd..f48d8dc89 100644 --- a/fluree-db-cli/src/lib.rs +++ b/fluree-db-cli/src/lib.rs @@ -279,6 +279,8 @@ pub async fn run(cli: Cli) -> error::CliResult<()> { Commands::Export { ledger, format, + output, + no_indexes, all_graphs, graph, context, @@ -290,6 +292,8 @@ pub async fn run(cli: Cli) -> error::CliResult<()> { commands::export::run( ledger.as_deref(), &format, + output.as_deref(), + no_indexes, all_graphs, graph.as_deref(), context.as_deref(), From eb94f16dc61960599cc3dac08e2232592ff80a5d Mon Sep 17 00:00:00 2001 From: bplatz Date: Sun, 10 May 2026 07:42:29 -0400 Subject: [PATCH 4/9] feat: Add --remote to context/history/create + fix query --at scoping Closes the remaining gaps from the original `--remote` audit: - `fluree context get|set --remote` rides the existing `GET`/`PUT /context/*ledger` endpoints. New `RemoteLedgerClient::get_context` / `set_context` methods, three-mode dispatch in `commands/context_cmd.rs`. - `fluree history --remote` posts the existing JSON-LD history body to `POST /query/{ledger}` (ledger-scoped, not connection-level) so scoped read tokens authorize. Compact-IRI expansion still happens client-side; the body's `@context` is preserved for response display. - `fluree create --remote ` calls `POST /create` for the empty-ledger case. Refuses combinations with `--from`/`--memory` (those need local data ingestion) and points at `fluree publish` for the create-and-push workflow. Falls back to global config so the command works without a project-local `.fluree/`. Also addresses several reviewer findings from this branch: - `fluree query --remote --at ` now uses ledger-scoped query/explain endpoints (`POST /query/{ledger}`, `POST /explain/{ledger}`). The path drives `can_read`, the body's `from`/SPARQL `FROM` carries the `@t:N` suffix for snapshot resolution. Posting to the connection- level endpoint forced auth to derive the ledger ID from `from` and rejected scoped tokens. - `build_remote_mode` canonicalizes `ledger_alias` via `to_ledger_id` before storing as `LedgerMode::Tracked.remote_alias`, so one-shot `--remote` always sends the full `name:branch` form on the URL path. A token scoped to `mydb:main` would 404 if we sent `mydb`. - `--at --explain --remote` is refused outright rather than silently returning a HEAD-snapshot plan: the server's explain handler loads the ledger at HEAD regardless of any time-travel `from`. Run with `--direct` for a local time-travel explain, or drop `--at` to explain the HEAD plan against the remote. Open server-side items (out of scope here): - Both `/explain` and `/explain/{ledger}` need to honor body's `from` time-travel (delegate to the same `execute_dataset_query`-style path the regular query uses). Once that lands, the CLI's `--at --explain --remote` bail-out can be lifted. - Ledger-scoped `/explain` rejects SPARQL `FROM/FROM NAMED` outright; relaxing to accept same-ledger time-travel `FROM` is needed for the SPARQL flavor of the same fix. --- docs/cli/server-integration.md | 47 ++++++++++- fluree-db-cli/src/cli.rs | 18 +++++ fluree-db-cli/src/commands/context_cmd.rs | 98 +++++++++++++++++++++-- fluree-db-cli/src/commands/create.rs | 23 ++++++ fluree-db-cli/src/commands/export.rs | 4 +- fluree-db-cli/src/commands/history.rs | 96 ++++++++++++++++------ fluree-db-cli/src/commands/query.rs | 77 ++++++------------ fluree-db-cli/src/context.rs | 7 +- fluree-db-cli/src/lib.rs | 45 +++++++++-- fluree-db-cli/src/remote_client.rs | 40 +++++++++ 10 files changed, 360 insertions(+), 95 deletions(-) diff --git a/docs/cli/server-integration.md b/docs/cli/server-integration.md index 39788e5e6..9973046cd 100644 --- a/docs/cli/server-integration.md +++ b/docs/cli/server-integration.md @@ -73,6 +73,31 @@ The `commit` query parameter accepts the same identifiers as the local `fluree s - `404 Not Found` — ledger or commit not found - `501 Not Implemented` — proxy storage mode (no local index available for decoding) +### `fluree create --remote ` (admin-protected, empty ledger only) + +- `POST {api_base_url}/create` with `{"ledger": ""}` + +Creates an **empty** ledger on the remote server. The CLI rejects `--remote` together with `--from` / `--memory` (those import paths require local data ingestion); the suggested workflow is to create + populate locally, then run `fluree publish ` which calls `/exists`, `/create`, and `/push` in sequence. + +`--remote` does not touch local state — neither the active-ledger pointer nor the local storage tree. The CLI does not require a project-local `.fluree/` for `create --remote`; it falls back to global config (`$FLUREE_HOME` or the platform default) for remote registration lookups. Auto-routing through a local server is **not** done for `create`; you must pass `--remote ` explicitly. Without `--remote`, `fluree create` is local-only and does require a project `.fluree/`. + +### `fluree context get|set --remote` + +- `GET {api_base_url}/context/*ledger` (read) +- `PUT {api_base_url}/context/*ledger` (write) + +Read or replace the default JSON-LD context for a ledger. `get` returns the context as JSON; the unwrapped object is what the CLI prints. `set` accepts either a bare object (`{"ex": "http://example.org/"}`) or a `{"@context": {...}}` wrapper, and replies with `{"status": "updated"}` (or `409 Conflict` after CAS retries). + +`get` uses normal data-read auth (Bearer required when `data_auth.mode == required`, gates on `can_read(ledger)`). `set` uses normal write auth (`can_write(ledger)`). Auto-routing behaves the same way as other read/write commands — pass `--direct` to skip. + +### `fluree history --remote` + +- `POST {api_base_url}/query/*ledger` + +Server-side history queries via JSON-LD: the CLI builds the same `from`/`to`/`select`/`where` body it would send locally and POSTs it to the **ledger-scoped** query endpoint (`/query/{ledger}`). The path carries the bare ledger ID (e.g. `mydb:main`) so the server's `can_read` check matches normal scoped read tokens; the body's `from` carries the time-travel suffix (`mydb:main@t:N`) which the query engine uses to resolve the snapshot. Posting to the connection-level `/query` instead would force auth to read `from` for the ledger ID and reject any token not scoped to the time-travel form. + +Entity and predicate compact IRIs (`ex:alice` → `http://example.org/alice`) are expanded **client-side** using the project's stored prefix map before the request leaves the CLI, so the server never has to consult the local prefix table. The query body still ships its `@context` (also derived from local prefixes) so the server can compact response IRIs back into the user's preferred form for display. + ### `fluree log --remote` - `GET {api_base_url}/log/*ledger?limit=` @@ -148,6 +173,21 @@ listed below and, for JSON-LD bodies, also injects them into `opts`. To be CLI-compatible, your server must implement the contract in [Policy Enforcement Contract](#policy-enforcement-contract). +**Remote time travel (`--at`)** routes through the **ledger-scoped** endpoints +(`POST /query/{ledger}`, etc.): the URL path drives the bearer's +`can_read` check (so a token scoped to `mydb:main` matches), and the +time-travel suffix rides in the body's `from` (`mydb:main@t:N` for JSON-LD) +or in an injected `FROM ` clause (for SPARQL). Posting to +the connection-level endpoint instead would force auth to derive the +ledger ID from `from` and reject scoped tokens. + +**Known limitation: `--at` + `--explain` over `--remote` is refused.** The +server's explain handler (both connection- and ledger-scoped) loads the +ledger at HEAD regardless of any time-travel `from`, so a remote +`--at --explain` would silently return the HEAD plan. The CLI rejects the +combination outright; pass `--direct` for a local time-travel explain, or +drop `--at` to explain the HEAD plan against the remote. + ### `fluree branch list` (read-only) - `GET {api_base_url}/branch/{ledger}` — note **singular** `branch`, ledger is a @@ -1101,7 +1141,7 @@ stream chunked bodies; clients MUST be prepared to read until EOF. If no branch suffix is provided (e.g., `"mydb"`), the server MUST normalize to `"mydb:main"`. -Used by `fluree publish` (and potentially future `fluree create --remote`) to create a ledger on a remote server before pushing commits. +Used by `fluree publish` (which calls `/create` after `/exists` returns false) and by `fluree create --remote ` (empty-ledger creation on a remote server). ## `/reindex` Contract @@ -1494,6 +1534,11 @@ fluree info my-gs # should show Iceberg config + R2RML mapping fluree show t:1 --remote origin # should show decoded commit with resolved IRIs fluree log mydb --remote origin --oneline # should print the remote's commit chain newest-first fluree export mydb --remote origin --format turtle > mydb-remote.ttl # should write Turtle to disk +fluree context get mydb --remote origin # should print the remote ledger's default context +fluree context set mydb --remote origin -e '{"ex": "http://example.org/"}' # admin: replace context +fluree history http://example.org/alice --ledger mydb --remote origin --format json # remote history +fluree query mydb 'SELECT * WHERE { ?s ?p ?o }' --remote origin --at 1 # time-travel via /query/{ledger} +fluree create empty-db --remote origin # should create an empty ledger on the remote fluree drop my-gs --force # should drop the graph source locally fluree drop local-db --remote origin --force # should drop the published ledger on the remote ``` diff --git a/fluree-db-cli/src/cli.rs b/fluree-db-cli/src/cli.rs index fd5a2a717..81348ed80 100644 --- a/fluree-db-cli/src/cli.rs +++ b/fluree-db-cli/src/cli.rs @@ -248,6 +248,12 @@ pub enum Commands { /// Larger values produce fewer leaf files (shallower tree, bigger reads). #[arg(long, default_value_t = 10)] leaflets_per_leaf: usize, + + /// Create the ledger on a remote server (by remote name, e.g., "origin"). + /// Only valid with empty creates — incompatible with --from/--memory. + /// Use `fluree publish` if you also need to push local commits. + #[arg(long)] + remote: Option, }, /// Set the active ledger @@ -498,6 +504,10 @@ pub enum Commands { /// Output format (json, table, csv, or tsv) #[arg(long, default_value = "table")] format: String, + + /// Execute against a remote server (by remote name, e.g., "origin") + #[arg(long)] + remote: Option, }, /// Manage the default JSON-LD context for a ledger @@ -1295,6 +1305,10 @@ pub enum ContextAction { Get { /// Ledger name (defaults to active ledger) ledger: Option, + + /// Read from a remote server (by remote name, e.g., "origin") + #[arg(long)] + remote: Option, }, /// Set (replace) the default JSON-LD context for a ledger @@ -1314,6 +1328,10 @@ pub enum ContextAction { /// Read context from a JSON file #[arg(long, short = 'f')] file: Option, + + /// Write to a remote server (by remote name, e.g., "origin") + #[arg(long)] + remote: Option, }, } diff --git a/fluree-db-cli/src/commands/context_cmd.rs b/fluree-db-cli/src/commands/context_cmd.rs index b07eb60a8..b1862a1f3 100644 --- a/fluree-db-cli/src/commands/context_cmd.rs +++ b/fluree-db-cli/src/commands/context_cmd.rs @@ -2,15 +2,36 @@ use crate::context; use crate::error::{CliError, CliResult}; +use crate::remote_client::RemoteLedgerClient; use fluree_db_api::server_defaults::FlureeDir; use std::path::PathBuf; -/// `fluree context get [ledger]` -pub async fn get(explicit_ledger: Option<&str>, dirs: &FlureeDir) -> CliResult<()> { +/// `fluree context get [ledger] [--remote ]` +pub async fn get( + explicit_ledger: Option<&str>, + dirs: &FlureeDir, + remote_flag: Option<&str>, + direct: bool, +) -> CliResult<()> { let alias = context::resolve_ledger(explicit_ledger, dirs)?; - let fluree = context::build_fluree(dirs)?; let ledger_id = context::to_ledger_id(&alias); + if let Some(remote_name) = remote_flag { + let client = context::build_remote_client(remote_name, dirs).await?; + let result = run_remote_get(&alias, &ledger_id, &client).await; + context::persist_refreshed_tokens(&client, remote_name, dirs).await; + return result; + } + + if !direct { + if let Some(client) = context::try_server_route_client(dirs) { + let result = run_remote_get(&alias, &ledger_id, &client).await; + context::persist_refreshed_tokens(&client, context::LOCAL_SERVER_REMOTE, dirs).await; + return result; + } + } + + let fluree = context::build_fluree(dirs)?; match fluree.get_default_context(&ledger_id).await? { Some(ctx) => { println!( @@ -29,15 +50,38 @@ pub async fn get(explicit_ledger: Option<&str>, dirs: &FlureeDir) -> CliResult<( Ok(()) } -/// `fluree context set [ledger] -e '...' | -f file.json` +async fn run_remote_get( + alias: &str, + ledger_id: &str, + client: &RemoteLedgerClient, +) -> CliResult<()> { + let ctx = client + .get_context(ledger_id) + .await + .map_err(|e| CliError::Remote(format!("failed to get context for '{alias}': {e}")))?; + + if ctx.is_null() { + println!("null"); + eprintln!("No default context set for '{alias}'."); + } else { + println!( + "{}", + serde_json::to_string_pretty(&ctx).unwrap_or_else(|_| ctx.to_string()) + ); + } + Ok(()) +} + +/// `fluree context set [ledger] -e '...' | -f file.json [--remote ]` pub async fn set( explicit_ledger: Option<&str>, expr: Option<&str>, file: Option<&PathBuf>, dirs: &FlureeDir, + remote_flag: Option<&str>, + direct: bool, ) -> CliResult<()> { let alias = context::resolve_ledger(explicit_ledger, dirs)?; - let fluree = context::build_fluree(dirs)?; let ledger_id = context::to_ledger_id(&alias); // Read context from expr, file, or stdin @@ -63,14 +107,14 @@ pub async fn set( buf }; - let context: serde_json::Value = serde_json::from_str(&json_str) + let parsed: serde_json::Value = serde_json::from_str(&json_str) .map_err(|e| CliError::Usage(format!("invalid JSON: {e}")))?; // Accept either { "@context": {...} } wrapper or bare object - let ctx_value = if let Some(inner) = context.get("@context") { + let ctx_value = if let Some(inner) = parsed.get("@context") { inner.clone() } else { - context + parsed }; if !ctx_value.is_object() { @@ -79,6 +123,22 @@ pub async fn set( )); } + if let Some(remote_name) = remote_flag { + let client = context::build_remote_client(remote_name, dirs).await?; + let result = run_remote_set(&alias, &ledger_id, &ctx_value, &client).await; + context::persist_refreshed_tokens(&client, remote_name, dirs).await; + return result; + } + + if !direct { + if let Some(client) = context::try_server_route_client(dirs) { + let result = run_remote_set(&alias, &ledger_id, &ctx_value, &client).await; + context::persist_refreshed_tokens(&client, context::LOCAL_SERVER_REMOTE, dirs).await; + return result; + } + } + + let fluree = context::build_fluree(dirs)?; match fluree.set_default_context(&ledger_id, &ctx_value).await? { fluree_db_api::SetContextResult::Updated => { eprintln!("Default context updated for '{alias}'."); @@ -92,3 +152,25 @@ pub async fn set( Ok(()) } + +async fn run_remote_set( + alias: &str, + ledger_id: &str, + ctx_value: &serde_json::Value, + client: &RemoteLedgerClient, +) -> CliResult<()> { + let response = client + .set_context(ledger_id, ctx_value) + .await + .map_err(|e| CliError::Remote(format!("failed to set context for '{alias}': {e}")))?; + + let status = response + .get("status") + .and_then(|v| v.as_str()) + .unwrap_or("updated"); + match status { + "updated" => eprintln!("Default context updated for '{alias}'."), + other => eprintln!("Default context update returned status '{other}' for '{alias}'."), + } + Ok(()) +} diff --git a/fluree-db-cli/src/commands/create.rs b/fluree-db-cli/src/commands/create.rs index 5e233198a..ede8f7dc0 100644 --- a/fluree-db-cli/src/commands/create.rs +++ b/fluree-db-cli/src/commands/create.rs @@ -15,6 +15,29 @@ pub struct ImportOpts { pub leaflets_per_leaf: usize, } +/// `fluree create --remote ` — create an empty ledger on the +/// remote server. Only the empty-create case is supported; bulk imports +/// (`--from`, `--memory`) require local data ingestion and are dispatched +/// before this is reached. Active-ledger pointer is **not** touched — +/// remote storage is separate from local. +pub async fn run_remote(ledger: &str, remote_name: &str, dirs: &FlureeDir) -> CliResult<()> { + let client = context::build_remote_client(remote_name, dirs).await?; + let ledger_id = context::to_ledger_id(ledger); + let response = client.create_ledger(&ledger_id).await.map_err(|e| { + CliError::Remote(format!( + "failed to create '{ledger}' on remote '{remote_name}': {e}" + )) + })?; + context::persist_refreshed_tokens(&client, remote_name, dirs).await; + + let resolved = response + .get("ledger") + .and_then(|v| v.as_str()) + .unwrap_or(&ledger_id); + println!("Created ledger '{resolved}' on remote '{remote_name}'"); + Ok(()) +} + pub async fn run( ledger: &str, from: Option<&Path>, diff --git a/fluree-db-cli/src/commands/export.rs b/fluree-db-cli/src/commands/export.rs index 050b2c75a..78a6b6149 100644 --- a/fluree-db-cli/src/commands/export.rs +++ b/fluree-db-cli/src/commands/export.rs @@ -176,7 +176,9 @@ async fn run_ledger_archive( CliError::Config(format!("failed to create '{}': {e}", path.display())) })?; let mut writer = tokio::io::BufWriter::new(file); - let archive_result = fluree.archive_ledger(&ledger_id, !no_indexes, &mut writer).await; + let archive_result = fluree + .archive_ledger(&ledger_id, !no_indexes, &mut writer) + .await; // Drop writer before we touch the file again on the error path, // so the underlying file handle is closed. drop(writer); diff --git a/fluree-db-cli/src/commands/history.rs b/fluree-db-cli/src/commands/history.rs index 4a66244e0..bb2ba1587 100644 --- a/fluree-db-cli/src/commands/history.rs +++ b/fluree-db-cli/src/commands/history.rs @@ -2,9 +2,11 @@ use crate::config; use crate::context; use crate::error::{CliError, CliResult}; use crate::output::OutputFormatKind; +use crate::remote_client::RemoteLedgerClient; use fluree_db_api::server_defaults::FlureeDir; use std::path::Path; +#[allow(clippy::too_many_arguments)] pub async fn run( entity: &str, ledger: Option<&str>, @@ -13,27 +15,28 @@ pub async fn run( predicate: Option<&str>, format_str: &str, dirs: &FlureeDir, + remote_flag: Option<&str>, + direct: bool, ) -> CliResult<()> { - // Check for tracked ledger — history requires local query execution - let store = crate::config::TomlSyncConfigStore::new(dirs.config_dir().to_path_buf()); let alias = context::resolve_ledger(ledger, dirs)?; - if store.get_tracked(&alias).is_some() - || store.get_tracked(&context::to_ledger_id(&alias)).is_some() - { - return Err(CliError::Usage( - "history is not available for tracked ledgers (no server endpoint).\n \ - Use `fluree track status` to check remote state instead." - .to_string(), - )); - } - let fluree = context::build_fluree(dirs)?; + // Parse output format up-front so all paths share the validation. + let output_format = match format_str.to_lowercase().as_str() { + "json" => OutputFormatKind::Json, + "table" => OutputFormatKind::Table, + "csv" => OutputFormatKind::Csv, + other => { + return Err(CliError::Usage(format!( + "unknown output format '{other}'; valid formats: json, table, csv" + ))); + } + }; - // Expand compact IRIs using stored prefixes + // Expand compact IRIs using stored prefixes — done locally since prefixes + // are stored in the project's config and aren't available on the remote. let entity_iri = config::expand_iri(dirs.data_dir(), entity); let predicate_iri = predicate.map(|p| config::expand_iri(dirs.data_dir(), p)); - // Build the history query let query = build_history_query( &alias, &entity_iri, @@ -43,30 +46,71 @@ pub async fn run( dirs.data_dir(), ); - // Parse output format - let output_format = match format_str.to_lowercase().as_str() { - "json" => OutputFormatKind::Json, - "table" => OutputFormatKind::Table, - "csv" => OutputFormatKind::Csv, - other => { - return Err(CliError::Usage(format!( - "unknown output format '{other}'; valid formats: json, table, csv" - ))); + // Bare ledger ID (e.g. "mydb:main") for the auth-driving path segment. + // The body's `from` carries the time-travel suffix ("mydb:main@t:N"); + // the server's auth check uses the path, the query engine uses the body. + let ledger_id = context::to_ledger_id(&alias); + + if let Some(remote_name) = remote_flag { + let client = context::build_remote_client(remote_name, dirs).await?; + let result = run_remote(&alias, &ledger_id, &query, output_format, &client).await; + context::persist_refreshed_tokens(&client, remote_name, dirs).await; + return result; + } + + if !direct { + if let Some(client) = context::try_server_route_client(dirs) { + let result = run_remote(&alias, &ledger_id, &query, output_format, &client).await; + context::persist_refreshed_tokens(&client, context::LOCAL_SERVER_REMOTE, dirs).await; + return result; } - }; + } - // Execute the query via connection (required for from/to history support) + // Local path: tracked ledgers have no commit chain, so history can't run. + let store = crate::config::TomlSyncConfigStore::new(dirs.config_dir().to_path_buf()); + if store.get_tracked(&alias).is_some() + || store.get_tracked(&context::to_ledger_id(&alias)).is_some() + { + return Err(CliError::Usage( + "history is not available locally for tracked ledgers (no commit chain).\n \ + Use `fluree track status`, or pass `--remote ` to query the upstream." + .to_string(), + )); + } + + let fluree = context::build_fluree(dirs)?; let ledger_view = fluree.ledger(&alias).await?; let result = fluree.query_connection(&query).await?; let json = result.to_jsonld(&ledger_view.snapshot)?; - // Format output let output = format_history_result(&json, output_format)?; println!("{output}"); Ok(()) } +async fn run_remote( + alias: &str, + ledger_id: &str, + query: &serde_json::Value, + output_format: OutputFormatKind, + client: &RemoteLedgerClient, +) -> CliResult<()> { + // Use the ledger-scoped query path (`POST /query/{ledger}`) rather than + // connection-level. The server's auth check derives the ledger ID from + // the path when present, so a token scoped to `mydb:main` matches; if we + // posted to `/query` instead, auth would see body.from = `mydb:main@t:N` + // and reject scoped tokens. + let json = client + .query_jsonld(ledger_id, query) + .await + .map_err(|e| CliError::Remote(format!("failed to query history for '{alias}': {e}")))?; + + let output = format_history_result(&json, output_format)?; + println!("{output}"); + Ok(()) +} + /// Build a JSON-LD history query for an entity. fn build_history_query( alias: &str, diff --git a/fluree-db-cli/src/commands/query.rs b/fluree-db-cli/src/commands/query.rs index 140276ee5..6afef1223 100644 --- a/fluree-db-cli/src/commands/query.rs +++ b/fluree-db-cli/src/commands/query.rs @@ -244,49 +244,20 @@ pub async fn run( // Execute query via remote HTTP let timer = Instant::now(); let result = match (query_format, at, explain) { - (detect::QueryFormat::Sparql, Some(at_str), true) => { - // Remote time travel explain uses connection-scoped SPARQL: - // server requires FROM clause to identify the ledger/time. - if fluree_db_api::sparql_dataset_ledger_ids(&content) - .map(|v| !v.is_empty()) - .unwrap_or(false) - { - return Err(CliError::Usage( - "SPARQL query already contains FROM/FROM NAMED; \ - for remote time travel, encode time travel in the FROM IRI \ - (e.g., FROM ) instead of using --at" - .to_string(), - )); - } - let spec = parse_time_spec(at_str); - let suffix = time_spec_to_suffix(&spec); - let from_iri = attach_time_suffix_preserving_fragment(&remote_alias, &suffix); - let injected = inject_sparql_from_before_where(&content, &from_iri).ok_or_else( - || { - CliError::Usage( - "unable to inject SPARQL FROM clause for remote time travel; \ - please write the query as `SELECT ... WHERE { ... }` or include an explicit FROM" - .to_string(), - ) - }, - )?; - client.explain_connection_sparql(&injected).await? - } - (detect::QueryFormat::JsonLd, Some(at_str), true) => { - // Remote time travel explain uses connection-scoped JSON-LD: - // inject `"from": "@t:..."` and POST to /explain. - let spec = parse_time_spec(at_str); - let suffix = time_spec_to_suffix(&spec); - let from_id = attach_time_suffix_preserving_fragment(&remote_alias, &suffix); - let mut json_query: serde_json::Value = serde_json::from_str(&content)?; - if let Some(obj) = json_query.as_object_mut() { - obj.insert("from".to_string(), serde_json::Value::String(from_id)); - } else { - return Err(CliError::Input( - "JSON-LD query must be a JSON object".to_string(), - )); - } - client.explain_connection_jsonld(&json_query).await? + (detect::QueryFormat::Sparql | detect::QueryFormat::JsonLd, Some(_), true) => { + // Time-travel + explain isn't honored on the server side: + // both `/explain` and `/explain/{ledger}` load the ledger + // at HEAD and run explain there, so a remote --at --explain + // would silently return the HEAD plan. Refuse rather than + // mislead. Run with `--direct` for a local time-travel + // explain, or drop `--at` to explain the HEAD plan. + return Err(CliError::Usage( + "remote --at --explain is not supported: the server's explain handler \ + loads the ledger at HEAD regardless of any time-travel `from`. \ + Use `--direct` for a local time-travel explain, or drop `--at` to \ + explain the HEAD plan on the remote." + .to_string(), + )); } (detect::QueryFormat::Sparql, None, true) => { client.explain_sparql(&remote_alias, &content).await? @@ -296,12 +267,12 @@ pub async fn run( client.explain_jsonld(&remote_alias, &json_query).await? } (detect::QueryFormat::Sparql, Some(at_str), false) => { - // Remote time travel uses connection-scoped SPARQL: - // server requires FROM clause to identify the ledger/time. - // - // We inject a single FROM before WHERE for the common SELECT shape. - // If the query already has FROM/FROM NAMED, require the user to encode - // time travel there (avoid ambiguous semantics). + // Remote time-travel via ledger-scoped SPARQL: path drives + // auth (`can_read("mydb:main")` matches scoped tokens), + // injected FROM carries the @t:N suffix for snapshot + // resolution. We inject a single FROM before WHERE for + // the common SELECT shape; queries with their own + // FROM/FROM NAMED must encode time travel there. if fluree_db_api::sparql_dataset_ledger_ids(&content) .map(|v| !v.is_empty()) .unwrap_or(false) @@ -325,11 +296,11 @@ pub async fn run( ) }, )?; - client.query_connection_sparql(&injected).await? + client.query_sparql(&remote_alias, &injected).await? } (detect::QueryFormat::JsonLd, Some(at_str), false) => { - // Remote time travel uses connection-scoped JSON-LD: - // inject `"from": "@t:..."` and POST to /query. + // Remote time-travel via ledger-scoped JSON-LD: path + // drives auth, body's `from` carries the @t:N suffix. let spec = parse_time_spec(at_str); let suffix = time_spec_to_suffix(&spec); let from_id = attach_time_suffix_preserving_fragment(&remote_alias, &suffix); @@ -341,7 +312,7 @@ pub async fn run( "JSON-LD query must be a JSON object".to_string(), )); } - client.query_connection_jsonld(&json_query).await? + client.query_jsonld(&remote_alias, &json_query).await? } (detect::QueryFormat::Sparql, None, false) => { client.query_sparql(&remote_alias, &content).await? diff --git a/fluree-db-cli/src/context.rs b/fluree-db-cli/src/context.rs index e7bf6aa8c..d5308c17a 100644 --- a/fluree-db-cli/src/context.rs +++ b/fluree-db-cli/src/context.rs @@ -231,9 +231,14 @@ pub async fn build_remote_mode( }; let client = build_client_from_auth(&base_url, &remote.auth); + // Canonicalize the remote alias so the URL path carries the full + // `name:branch` form. The server's `can_read` check is a literal string + // match against the path, so a token scoped to `mydb:main` would 404 if + // we sent `mydb` here. + let remote_alias = to_ledger_id(ledger_alias); Ok(LedgerMode::Tracked { client: Box::new(client), - remote_alias: ledger_alias.to_string(), + remote_alias, local_alias: ledger_alias.to_string(), remote_name: remote_name_str.to_string(), }) diff --git a/fluree-db-cli/src/lib.rs b/fluree-db-cli/src/lib.rs index f48d8dc89..58ab7a6f6 100644 --- a/fluree-db-cli/src/lib.rs +++ b/fluree-db-cli/src/lib.rs @@ -50,15 +50,34 @@ pub async fn run(cli: Cli) -> error::CliResult<()> { parallelism, leaflet_rows, leaflets_per_leaf, + remote, } => { - let fluree_dir = config::require_fluree_dir(config_path)?; - if from.is_some() && memory.is_some() { return Err(error::CliError::Usage( "--from and --memory are mutually exclusive".into(), )); } + // `--remote` doesn't write any local state, so it must work even + // when the user has no project-local `.fluree/` directory — fall + // back to global config for remote registration lookups. + if let Some(remote_name) = remote { + if from.is_some() || memory.is_some() { + return Err(error::CliError::Usage( + "--remote can only create empty ledgers; \ + use `fluree publish ` to push local commits to a remote, \ + or run `fluree create` locally first then publish." + .to_string(), + )); + } + let fluree_dir = config::require_fluree_dir_or_global(config_path)?; + return commands::create::run_remote(&ledger, &remote_name, &fluree_dir).await; + } + + // Local-create paths still require a project `.fluree/` so the + // new ledger lands in a discoverable place rather than $FLUREE_HOME. + let fluree_dir = config::require_fluree_dir(config_path)?; + if let Some(memory_path) = memory { return commands::create::run_memory_import( &ledger, @@ -244,6 +263,7 @@ pub async fn run(cli: Cli) -> error::CliResult<()> { to, predicate, format, + remote, } => { let fluree_dir = config::require_fluree_dir_or_global(config_path)?; commands::history::run( @@ -254,6 +274,8 @@ pub async fn run(cli: Cli) -> error::CliResult<()> { predicate.as_deref(), &format, &fluree_dir, + remote.as_deref(), + direct, ) .await } @@ -261,15 +283,28 @@ pub async fn run(cli: Cli) -> error::CliResult<()> { Commands::Context { action } => { let fluree_dir = config::require_fluree_dir_or_global(config_path)?; match action { - cli::ContextAction::Get { ledger } => { - commands::context_cmd::get(ledger.as_deref(), &fluree_dir).await + cli::ContextAction::Get { ledger, remote } => { + commands::context_cmd::get( + ledger.as_deref(), + &fluree_dir, + remote.as_deref(), + direct, + ) + .await } - cli::ContextAction::Set { ledger, expr, file } => { + cli::ContextAction::Set { + ledger, + expr, + file, + remote, + } => { commands::context_cmd::set( ledger.as_deref(), expr.as_deref(), file.as_ref(), &fluree_dir, + remote.as_deref(), + direct, ) .await } diff --git a/fluree-db-cli/src/remote_client.rs b/fluree-db-cli/src/remote_client.rs index 68d9b2896..0379105e4 100644 --- a/fluree-db-cli/src/remote_client.rs +++ b/fluree-db-cli/src/remote_client.rs @@ -1140,6 +1140,46 @@ impl RemoteLedgerClient { .map_err(|e| RemoteLedgerError::InvalidResponse(format!("read body: {e}"))) } + // ========================================================================= + // Default context + // ========================================================================= + + /// Fetch the default JSON-LD context for a ledger. + /// + /// Calls `GET {base_url}/context/`. Server returns + /// `{ "@context": }`. Returns the unwrapped context value + /// (object or `Null`). + pub async fn get_context(&self, ledger: &str) -> Result { + let url = self.op_url("context", ledger); + let resp = self + .send_json(reqwest::Method::GET, &url, "application/json", None) + .await?; + Ok(resp + .get("@context") + .cloned() + .unwrap_or(serde_json::Value::Null)) + } + + /// Replace the default JSON-LD context for a ledger. + /// + /// Calls `PUT {base_url}/context/` with `context` as the body. + /// `context` should be the bare prefix→IRI object; the server also + /// accepts a `{ "@context": {...} }` wrapper. + pub async fn set_context( + &self, + ledger: &str, + context: &serde_json::Value, + ) -> Result { + let url = self.op_url("context", ledger); + self.send_json( + reqwest::Method::PUT, + &url, + "application/json", + Some(RequestBody::Json(context)), + ) + .await + } + // ========================================================================= // Commit log // ========================================================================= From 46532702a3946627bf40d6fdbd602faa3eee57db Mon Sep 17 00:00:00 2001 From: bplatz Date: Sun, 10 May 2026 08:36:03 -0400 Subject: [PATCH 5/9] feat: Add --remote support to fluree export --format ledger MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `fluree export --format ledger -o file.flpack` already worked locally (via `Fluree::archive_ledger`); this lifts the remote sub-gap so the same command also archives remote ledgers, e.g. cold-archiving a production ledger to local disk. Implementation: - `RemoteLedgerClient::archive_ledger_to_writer` fetches the remote pack stream via the existing `fetch_pack_response` (`POST /pack/...`), decodes it frame-by-frame as bytes arrive, forwards Header/Data/inner Manifest frames to the user's writer verbatim, and **swaps the terminal End frame** for a synthesized `phase: "nameservice"` manifest + End. The manifest is built from the supplied `NsRecord` so the on-disk byte stream is byte-compatible with `Fluree::archive_ledger`'s local output. Server `Error` frames are surfaced as a `RemoteLedgerError` and stop the copy without writing the End — the CLI cleans up the partial file. - `commands/export.rs::run_ledger_archive_remote` orchestrates the remote path: fetch the NsRecord (so we know the head CIDs and `t` values), build a `PackRequest` mirroring `Fluree::archive_ledger`'s index policy (commits-only when `--no-indexes` or the remote has no index root), then drive the streaming copy. On error the partial output file is removed. Both endpoints sit in the replication-grade auth bracket (`fluree.storage.*`), same as `fluree clone` / `pull`. Without those permissions the server returns `404 Not Found` for the NsRecord lookup to avoid existence leaks; the CLI surfaces this as `not found: ledger '...' not found on remote '...'`. Docs: - `server-integration.md`: replaces the "remote not yet supported" caveat with a section spelling out the two endpoints, the auth bracket, and the byte-compat guarantee. - `pack-archive-restore.md`: drops the "Local-only today" note and adds the `--remote` example. Rust API section continues to cover non-CLI flows (S3 upload, etc.). - Validation script gains an `export --remote ... --format ledger` line. --- docs/cli/server-integration.md | 22 +++- docs/operations/pack-archive-restore.md | 10 +- fluree-db-cli/src/commands/export.rs | 137 ++++++++++++++++++++--- fluree-db-cli/src/remote_client.rs | 142 +++++++++++++++++++++++- 4 files changed, 292 insertions(+), 19 deletions(-) diff --git a/docs/cli/server-integration.md b/docs/cli/server-integration.md index 9973046cd..08fc859f3 100644 --- a/docs/cli/server-integration.md +++ b/docs/cli/server-integration.md @@ -151,11 +151,26 @@ When `--remote` is omitted, the CLI auto-routes through a locally running `flure Imports a `.flpack` file (native ledger pack) into a new local ledger. The `.flpack` format uses the same `fluree-pack-v1` wire format as `POST /pack`. See [Ledger portability](#ledger-portability-flpack-files) below. -### `fluree export --format ledger` (native ledger export) +### `fluree export --format ledger` -- No server endpoint required (local-only operation today) +Exports a full ledger (all commits, txn blobs, and — unless `--no-indexes` — binary index artifacts) as a `.flpack` archive. The archive contains a `phase: "nameservice"` manifest frame so the importer can reconstruct the head pointers. Pass `-o ` to write to disk (required when stdout is a TTY). -Exports a full local ledger (all commits, txn blobs, and — unless `--no-indexes` — binary index artifacts) as a `.flpack` archive. The archive contains a `phase: "nameservice"` manifest frame so the importer can reconstruct the head pointers. Pass `-o ` to write to disk (required when stdout is a TTY). `--remote` is not yet supported for `--format ledger`. See [Ledger portability](#ledger-portability-flpack-files) below. +**Local mode (default):** + +- No server endpoint required. + +Streams from the local ledger via the `Fluree::archive_ledger` API. + +**Remote mode (`--remote `):** + +- `GET {api_base_url}/storage/ns/:ledger-id` (NsRecord lookup) +- `POST {api_base_url}/pack/*ledger` (binary `fluree-pack-v1` stream) + +The CLI fetches the remote `NsRecord` to learn the head CIDs and `t` values, then streams the pack response into the user's writer, swapping the terminal End frame for a synthesized `phase: "nameservice"` manifest + End. The resulting `.flpack` is byte-compatible with a locally-generated archive — `fluree create --from .flpack` doesn't care which side produced it. + +**Auth:** Both endpoints sit in the replication-grade bracket and require a Bearer token with `fluree.storage.*` permissions (same auth as `fluree clone`/`pull`). Without those permissions the server returns `404 Not Found` for `/storage/ns/:ledger-id` to avoid existence leaks; the CLI surfaces this as `not found: ledger '...' not found on remote '...'`. + +See [Ledger portability](#ledger-portability-flpack-files) below for the on-disk format and [Replication Auth Contract](#replication-auth-contract) for the auth semantics. ### `fluree query`, `fluree insert`, `fluree upsert`, `fluree update`, `fluree track`, `fluree info`, `fluree exists` @@ -1539,6 +1554,7 @@ fluree context set mydb --remote origin -e '{"ex": "http://example.org/"}' # ad fluree history http://example.org/alice --ledger mydb --remote origin --format json # remote history fluree query mydb 'SELECT * WHERE { ?s ?p ?o }' --remote origin --at 1 # time-travel via /query/{ledger} fluree create empty-db --remote origin # should create an empty ledger on the remote +fluree export mydb --remote origin --format ledger -o mydb-remote.flpack # archive remote ledger fluree drop my-gs --force # should drop the graph source locally fluree drop local-db --remote origin --force # should drop the published ledger on the remote ``` diff --git a/docs/operations/pack-archive-restore.md b/docs/operations/pack-archive-restore.md index 799966b66..cf23a5743 100644 --- a/docs/operations/pack-archive-restore.md +++ b/docs/operations/pack-archive-restore.md @@ -30,17 +30,23 @@ A pack can include just commits + txn blobs (compact, sufficient for full restor ### Archive (export to `.flpack`) ```bash +# Local ledger fluree export mydb --format ledger -o mydb.flpack # Smaller archive without binary index artifacts (importer will reindex): fluree export mydb --format ledger --no-indexes -o mydb.flpack + +# Remote ledger (cold-archive a production ledger to local disk): +fluree export mydb --remote prod --format ledger -o mydb.flpack ``` `--format ledger` (alias `--format flpack`) writes the full `fluree-pack-v1` archive — commits, txn blobs, and (unless `--no-indexes`) index artifacts — plus a `phase: "nameservice"` manifest frame that lets the importer reconstruct commit/index head pointers. -`-o FILE` is required when stdout is a TTY (the archive is binary). Pipe-friendly forms work too: `fluree export mydb --format ledger > mydb.flpack`. Local-only today; `--remote` is not yet supported for `--format ledger`. +`-o FILE` is required when stdout is a TTY (the archive is binary). Pipe-friendly forms work too: `fluree export mydb --format ledger > mydb.flpack`. + +The local path calls `Fluree::archive_ledger`. The `--remote` path calls `GET /storage/ns/:ledger-id` to fetch the remote NsRecord, then streams `POST /pack/*ledger` and substitutes the nameservice manifest in place of the terminal End frame on the fly — so a remote-sourced archive is byte-compatible with a locally-generated one. Both endpoints require a Bearer token with `fluree.storage.*` permissions (same auth bracket as `fluree clone` / `pull`). -Under the hood this calls `Fluree::archive_ledger` (see [Rust API usage](#rust-api-usage) below), which is also what consumers should use for non-CLI archive flows like S3 upload. +For non-CLI archive flows (S3 upload, custom storage), use `Fluree::archive_ledger` directly — see [Rust API usage](#rust-api-usage) below. ### Restore (import from `.flpack`) diff --git a/fluree-db-cli/src/commands/export.rs b/fluree-db-cli/src/commands/export.rs index 78a6b6149..94303ffba 100644 --- a/fluree-db-cli/src/commands/export.rs +++ b/fluree-db-cli/src/commands/export.rs @@ -128,14 +128,6 @@ async fn run_ledger_archive( dirs: &FlureeDir, remote_flag: Option<&str>, ) -> CliResult<()> { - if remote_flag.is_some() { - return Err(CliError::Usage( - "fluree export --format ledger does not yet support --remote; \ - run it against a local ledger or use the Rust API. \ - See docs/operations/pack-archive-restore.md." - .to_string(), - )); - } if at.is_some() { return Err(CliError::Usage( "fluree export --format ledger does not support --at — archives capture the current head; \ @@ -156,18 +148,23 @@ async fn run_ledger_archive( )); } + let ledger_id = context::to_ledger_id(alias); + + if let Some(remote_name) = remote_flag { + return run_ledger_archive_remote(alias, &ledger_id, output, no_indexes, dirs, remote_name) + .await; + } + let store = crate::config::TomlSyncConfigStore::new(dirs.config_dir().to_path_buf()); - if store.get_tracked(alias).is_some() - || store.get_tracked(&context::to_ledger_id(alias)).is_some() - { + if store.get_tracked(alias).is_some() || store.get_tracked(&ledger_id).is_some() { return Err(CliError::Usage( - "fluree export --format ledger requires local data and is not available for tracked ledgers" + "this alias points at a tracked ledger (no local data); \ + pass `--remote ` to archive the upstream copy." .to_string(), )); } let fluree = context::build_fluree(dirs)?; - let ledger_id = context::to_ledger_id(alias); match output { Some(path) => { @@ -228,6 +225,120 @@ async fn run_ledger_archive( Ok(()) } +/// Remote variant of `run_ledger_archive`. +/// +/// Fetches the remote `NsRecord` (so we can synthesize the trailing +/// nameservice manifest), then issues `POST /pack/{ledger}` and copies the +/// pack stream through the user's writer. The remote client swaps the +/// terminal End frame for `manifest + End` on the fly, producing a byte +/// stream that's identical in shape to a local archive. +async fn run_ledger_archive_remote( + alias: &str, + ledger_id: &str, + output: Option<&Path>, + no_indexes: bool, + dirs: &FlureeDir, + remote_name: &str, +) -> CliResult<()> { + use fluree_db_core::pack::PackRequest; + + let client = context::build_remote_client(remote_name, dirs).await?; + + // Pull the NsRecord first; we need its head CIDs and t values both to + // build the pack request and to construct the trailing manifest. + let record = client + .fetch_ns_record(ledger_id) + .await + .map_err(|e| { + CliError::Remote(format!( + "failed to fetch NsRecord for '{ledger_id}' on '{remote_name}': {e}" + )) + })? + .ok_or_else(|| { + CliError::NotFound(format!( + "ledger '{ledger_id}' not found on remote '{remote_name}'" + )) + })?; + + let head_commit_id = record.commit_head_id.clone().ok_or_else(|| { + CliError::Remote(format!( + "remote ledger '{ledger_id}' has no head commit to archive" + )) + })?; + + // Mirror `Fluree::archive_ledger`: only request indexes when the user + // wants them AND the remote actually has an index root. Otherwise the + // archive degrades to commits-only and the manifest will omit + // `index_head_id` accordingly. + let include_indexes = !no_indexes; + let request = match (include_indexes, record.index_head_id.clone()) { + (true, Some(index_root)) => { + PackRequest::with_indexes(vec![head_commit_id], vec![], index_root, None) + } + _ => PackRequest::commits(vec![head_commit_id], vec![]), + }; + + match output { + Some(path) => { + let path: PathBuf = path.to_path_buf(); + let file = tokio::fs::File::create(&path).await.map_err(|e| { + CliError::Config(format!("failed to create '{}': {e}", path.display())) + })?; + let mut writer = tokio::io::BufWriter::new(file); + let result = client + .archive_ledger_to_writer(ledger_id, &request, &record, &mut writer) + .await; + drop(writer); + context::persist_refreshed_tokens(&client, remote_name, dirs).await; + + let frames = match result { + Ok(frames) => frames, + Err(e) => { + let _ = std::fs::remove_file(&path); + return Err(CliError::Remote(format!( + "failed to archive '{alias}' from '{remote_name}': {e}" + ))); + } + }; + eprintln!( + "{} Archived '{}' from '{}' → {} ({} pack frames forwarded)", + "✓".green(), + alias, + remote_name, + path.display(), + frames, + ); + } + None => { + if io::stdout().is_terminal() { + return Err(CliError::Usage( + "refusing to write a binary .flpack archive to a TTY; pass -o or redirect stdout" + .to_string(), + )); + } + let stdout = tokio::io::stdout(); + let mut writer = tokio::io::BufWriter::new(stdout); + let frames = client + .archive_ledger_to_writer(ledger_id, &request, &record, &mut writer) + .await + .map_err(|e| { + CliError::Remote(format!( + "failed to archive '{alias}' from '{remote_name}': {e}" + )) + })?; + context::persist_refreshed_tokens(&client, remote_name, dirs).await; + eprintln!( + "{} Archived '{}' from '{}' to stdout ({} pack frames forwarded)", + "✓".green(), + alias, + remote_name, + frames, + ); + } + } + Ok(()) +} + // ============================================================================= // RDF formats (turtle, ntriples, nquads, trig, jsonld) // ============================================================================= diff --git a/fluree-db-cli/src/remote_client.rs b/fluree-db-cli/src/remote_client.rs index 0379105e4..79aa767e1 100644 --- a/fluree-db-cli/src/remote_client.rs +++ b/fluree-db-cli/src/remote_client.rs @@ -53,7 +53,10 @@ fn encode_ledger_segment(s: &str) -> Cow<'_, str> { use crate::cli::PolicyArgs; use fluree_db_api::{ExportCommitsResponse, PushCommitsResponse}; -use fluree_db_core::pack::PackRequest; +use fluree_db_core::pack::{ + decode_frame, encode_end_frame, encode_manifest_frame, read_stream_preamble, PackFrame, + PackRequest, DEFAULT_MAX_PAYLOAD, +}; use fluree_db_nameservice::NsRecord; /// Build the set of HTTP headers that carry policy enforcement options to a @@ -1635,6 +1638,143 @@ impl RemoteLedgerClient { } } + /// Stream a `.flpack` archive of a remote ledger to `writer`. + /// + /// Builds a `PackRequest` for the ledger's current head and POSTs to + /// `/pack/{ledger}`. As frames arrive, they are written through to + /// `writer` unchanged **except** for the terminating End frame: we + /// swap that for a synthesized `phase: "nameservice"` manifest frame + /// (constructed from the supplied `ns_record`) followed by End. The + /// resulting byte stream is byte-compatible with `Fluree::archive_ledger` + /// and importable via `fluree create --from .flpack`. + /// + /// Surfaces a server `Error` frame as a `RemoteLedgerError` and stops + /// without writing the End — callers should clean up partial output. + /// Returns the count of pack frames forwarded (header / data / inner + /// manifest), excluding the synthesized nameservice manifest and End. + pub async fn archive_ledger_to_writer( + &self, + ledger: &str, + request: &PackRequest, + ns_record: &NsRecord, + writer: &mut W, + ) -> Result { + use futures::StreamExt as _; + use tokio::io::AsyncWriteExt as _; + + let resp = self.fetch_pack_response(ledger, request).await?; + let resp = resp.ok_or_else(|| { + RemoteLedgerError::ServerError(format!( + "remote does not support /pack for '{ledger}' (404/405/406/501)" + )) + })?; + + // Frame-by-frame stream copy. We accumulate response bytes into a + // sliding buffer and decode frames as they become complete; that + // lets us recognize the terminal End frame and substitute the + // nameservice manifest in its place without buffering the whole + // archive in memory. + let mut stream = resp.bytes_stream(); + let mut buf: Vec = Vec::with_capacity(64 * 1024); + let mut preamble_consumed = false; + let mut frames_forwarded: usize = 0; + let mut end_seen = false; + + while let Some(chunk) = stream.next().await { + let bytes = + chunk.map_err(|e| RemoteLedgerError::Network(format!("pack stream: {e}")))?; + buf.extend_from_slice(&bytes); + + // Drain any complete preamble + frames out of the buffer. + loop { + if !preamble_consumed { + match read_stream_preamble(&buf) { + Ok(consumed) => { + // Forward the preamble bytes verbatim. + writer.write_all(&buf[..consumed]).await.map_err(|e| { + RemoteLedgerError::Network(format!("archive write: {e}")) + })?; + buf.drain(..consumed); + preamble_consumed = true; + } + Err(_) => break, // need more bytes + } + } + + if end_seen { + // Defensive: nothing should arrive after End. + break; + } + + match decode_frame(&buf, DEFAULT_MAX_PAYLOAD) { + Ok((frame, consumed)) => match frame { + PackFrame::End => { + // Don't forward End; we'll write manifest + End below. + buf.drain(..consumed); + end_seen = true; + } + PackFrame::Error(msg) => { + return Err(RemoteLedgerError::ServerError(format!( + "remote pack error: {msg}" + ))); + } + PackFrame::Header(_) | PackFrame::Data { .. } | PackFrame::Manifest(_) => { + // Forward verbatim. + writer.write_all(&buf[..consumed]).await.map_err(|e| { + RemoteLedgerError::Network(format!("archive write: {e}")) + })?; + buf.drain(..consumed); + frames_forwarded += 1; + } + }, + Err(_) => break, // need more bytes + } + } + } + + if !end_seen { + return Err(RemoteLedgerError::InvalidResponse( + "pack stream ended before End frame".to_string(), + )); + } + + // Synthesize the nameservice manifest from the NsRecord, mirroring + // `Fluree::archive_ledger`. Index fields ride along only when the + // pack request actually includes index artifacts — otherwise the + // restored ledger would point at index data we never archived. + let mut manifest = serde_json::json!({ + "phase": "nameservice", + "ledger_id": ns_record.ledger_id, + "name": ns_record.name, + "branch": ns_record.branch, + "commit_t": ns_record.commit_t, + }); + if let Some(cid) = ns_record.commit_head_id.as_ref() { + manifest["commit_head_id"] = serde_json::Value::String(cid.to_string()); + } + let archived_index = request.want_index_root_id.is_some(); + if archived_index { + if let Some(cid) = ns_record.index_head_id.as_ref() { + manifest["index_head_id"] = serde_json::Value::String(cid.to_string()); + manifest["index_t"] = serde_json::Value::from(ns_record.index_t); + } + } + + let mut tail = Vec::with_capacity(512); + encode_manifest_frame(&manifest, &mut tail); + encode_end_frame(&mut tail); + writer + .write_all(&tail) + .await + .map_err(|e| RemoteLedgerError::Network(format!("archive write: {e}")))?; + writer + .flush() + .await + .map_err(|e| RemoteLedgerError::Network(format!("archive flush: {e}")))?; + + Ok(frames_forwarded) + } + /// Fetch the NsRecord via the storage proxy. /// /// Returns `Ok(Some(record))` on 200, `Ok(None)` on 404. From 5591965be44aa9d4a1b9e405ac546fb358c4d540 Mon Sep 17 00:00:00 2001 From: bplatz Date: Sun, 10 May 2026 09:11:44 -0400 Subject: [PATCH 6/9] =?UTF-8?q?fix(cli):=20tighten=20remote=20ledger=20arc?= =?UTF-8?q?hive=20=E2=80=94=20error=20handling,=20tracked=20aliases,=20tes?= =?UTF-8?q?ts?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three follow-ups on the remote `--format ledger` archive added in the previous commit: - Distinguish `PackError::Incomplete` from fatal pack-decoder errors in the archive splicer. Previously every decoder error was treated as "need more bytes", so a corrupt FPK1 magic, an oversize payload, or an invalid frame type would buffer until EOF and surface as a misleading "ended before End frame". Now Incomplete loops, every other variant returns `InvalidResponse` immediately and the max-payload guard actually fires. - Resolve tracked aliases for `fluree export --remote --format ledger`. If `` is tracked at ``, archive the upstream copy under its `tracked.remote_alias`. Without this, a ledger tracked as `local -> upstream:main` would look up `local:main` on the remote and 404. Falls back to using the alias literally when it isn't tracked or `--remote` points elsewhere — matches the existing `resolve_ledger_mode` semantics. - Split the splicer out as `splice_archive_stream` and `build_archive_manifest` so the End-frame substitution and manifest synthesis are unit-testable without a live server. Five new tests cover: End → manifest+End substitution, chunk boundaries inside frames (single chunk vs many small ones produce identical output), index fields omitted when `archived_index = false`, server `Error` frame surfaced as `ServerError`, and corrupt magic surfaced as `InvalidResponse` rather than buffered until EOF. --- fluree-db-cli/src/commands/export.rs | 29 +- fluree-db-cli/src/remote_client.rs | 455 ++++++++++++++++++++------- 2 files changed, 374 insertions(+), 110 deletions(-) diff --git a/fluree-db-cli/src/commands/export.rs b/fluree-db-cli/src/commands/export.rs index 94303ffba..025fb2715 100644 --- a/fluree-db-cli/src/commands/export.rs +++ b/fluree-db-cli/src/commands/export.rs @@ -148,13 +148,36 @@ async fn run_ledger_archive( )); } - let ledger_id = context::to_ledger_id(alias); + let local_ledger_id = context::to_ledger_id(alias); if let Some(remote_name) = remote_flag { - return run_ledger_archive_remote(alias, &ledger_id, output, no_indexes, dirs, remote_name) - .await; + // When the alias is tracked AND points at this same remote, archive + // the upstream copy under its tracked `remote_alias`. This mirrors + // how every other --remote command resolves tracked aliases via + // `resolve_ledger_mode` -> `build_tracked_mode`. If --remote points + // at a different remote (or the alias isn't tracked), fall back to + // using the alias literally on that remote. + let store = crate::config::TomlSyncConfigStore::new(dirs.config_dir().to_path_buf()); + let tracked = store + .get_tracked(alias) + .or_else(|| store.get_tracked(&local_ledger_id)); + let remote_ledger_id = match tracked.as_ref() { + Some(t) if t.remote == remote_name => t.remote_alias.clone(), + _ => local_ledger_id.clone(), + }; + return run_ledger_archive_remote( + alias, + &remote_ledger_id, + output, + no_indexes, + dirs, + remote_name, + ) + .await; } + let ledger_id = local_ledger_id; + let store = crate::config::TomlSyncConfigStore::new(dirs.config_dir().to_path_buf()); if store.get_tracked(alias).is_some() || store.get_tracked(&ledger_id).is_some() { return Err(CliError::Usage( diff --git a/fluree-db-cli/src/remote_client.rs b/fluree-db-cli/src/remote_client.rs index 79aa767e1..e9fcee762 100644 --- a/fluree-db-cli/src/remote_client.rs +++ b/fluree-db-cli/src/remote_client.rs @@ -54,8 +54,8 @@ fn encode_ledger_segment(s: &str) -> Cow<'_, str> { use crate::cli::PolicyArgs; use fluree_db_api::{ExportCommitsResponse, PushCommitsResponse}; use fluree_db_core::pack::{ - decode_frame, encode_end_frame, encode_manifest_frame, read_stream_preamble, PackFrame, - PackRequest, DEFAULT_MAX_PAYLOAD, + decode_frame, encode_end_frame, encode_manifest_frame, read_stream_preamble, PackError, + PackFrame, PackRequest, DEFAULT_MAX_PAYLOAD, }; use fluree_db_nameservice::NsRecord; @@ -1660,7 +1660,6 @@ impl RemoteLedgerClient { writer: &mut W, ) -> Result { use futures::StreamExt as _; - use tokio::io::AsyncWriteExt as _; let resp = self.fetch_pack_response(ledger, request).await?; let resp = resp.ok_or_else(|| { @@ -1669,110 +1668,11 @@ impl RemoteLedgerClient { )) })?; - // Frame-by-frame stream copy. We accumulate response bytes into a - // sliding buffer and decode frames as they become complete; that - // lets us recognize the terminal End frame and substitute the - // nameservice manifest in its place without buffering the whole - // archive in memory. - let mut stream = resp.bytes_stream(); - let mut buf: Vec = Vec::with_capacity(64 * 1024); - let mut preamble_consumed = false; - let mut frames_forwarded: usize = 0; - let mut end_seen = false; - - while let Some(chunk) = stream.next().await { - let bytes = - chunk.map_err(|e| RemoteLedgerError::Network(format!("pack stream: {e}")))?; - buf.extend_from_slice(&bytes); - - // Drain any complete preamble + frames out of the buffer. - loop { - if !preamble_consumed { - match read_stream_preamble(&buf) { - Ok(consumed) => { - // Forward the preamble bytes verbatim. - writer.write_all(&buf[..consumed]).await.map_err(|e| { - RemoteLedgerError::Network(format!("archive write: {e}")) - })?; - buf.drain(..consumed); - preamble_consumed = true; - } - Err(_) => break, // need more bytes - } - } - - if end_seen { - // Defensive: nothing should arrive after End. - break; - } - - match decode_frame(&buf, DEFAULT_MAX_PAYLOAD) { - Ok((frame, consumed)) => match frame { - PackFrame::End => { - // Don't forward End; we'll write manifest + End below. - buf.drain(..consumed); - end_seen = true; - } - PackFrame::Error(msg) => { - return Err(RemoteLedgerError::ServerError(format!( - "remote pack error: {msg}" - ))); - } - PackFrame::Header(_) | PackFrame::Data { .. } | PackFrame::Manifest(_) => { - // Forward verbatim. - writer.write_all(&buf[..consumed]).await.map_err(|e| { - RemoteLedgerError::Network(format!("archive write: {e}")) - })?; - buf.drain(..consumed); - frames_forwarded += 1; - } - }, - Err(_) => break, // need more bytes - } - } - } - - if !end_seen { - return Err(RemoteLedgerError::InvalidResponse( - "pack stream ended before End frame".to_string(), - )); - } - - // Synthesize the nameservice manifest from the NsRecord, mirroring - // `Fluree::archive_ledger`. Index fields ride along only when the - // pack request actually includes index artifacts — otherwise the - // restored ledger would point at index data we never archived. - let mut manifest = serde_json::json!({ - "phase": "nameservice", - "ledger_id": ns_record.ledger_id, - "name": ns_record.name, - "branch": ns_record.branch, - "commit_t": ns_record.commit_t, - }); - if let Some(cid) = ns_record.commit_head_id.as_ref() { - manifest["commit_head_id"] = serde_json::Value::String(cid.to_string()); - } - let archived_index = request.want_index_root_id.is_some(); - if archived_index { - if let Some(cid) = ns_record.index_head_id.as_ref() { - manifest["index_head_id"] = serde_json::Value::String(cid.to_string()); - manifest["index_t"] = serde_json::Value::from(ns_record.index_t); - } - } - - let mut tail = Vec::with_capacity(512); - encode_manifest_frame(&manifest, &mut tail); - encode_end_frame(&mut tail); - writer - .write_all(&tail) - .await - .map_err(|e| RemoteLedgerError::Network(format!("archive write: {e}")))?; - writer - .flush() - .await - .map_err(|e| RemoteLedgerError::Network(format!("archive flush: {e}")))?; - - Ok(frames_forwarded) + let manifest = build_archive_manifest(ns_record, request.want_index_root_id.is_some()); + let stream = resp + .bytes_stream() + .map(|r| r.map(|b| b.to_vec()).map_err(|e| e.to_string())); + splice_archive_stream(stream, writer, &manifest).await } /// Fetch the NsRecord via the storage proxy. @@ -1841,6 +1741,143 @@ enum RequestBody<'a> { Text(&'a str), } +/// Build the `phase: "nameservice"` manifest emitted at the end of a +/// `.flpack` archive. Mirrors `Fluree::archive_ledger`'s synthesis: index +/// fields ride along only when index artifacts are actually archived. +pub(crate) fn build_archive_manifest( + ns_record: &NsRecord, + archived_index: bool, +) -> serde_json::Value { + let mut manifest = serde_json::json!({ + "phase": "nameservice", + "ledger_id": ns_record.ledger_id, + "name": ns_record.name, + "branch": ns_record.branch, + "commit_t": ns_record.commit_t, + }); + if let Some(cid) = ns_record.commit_head_id.as_ref() { + manifest["commit_head_id"] = serde_json::Value::String(cid.to_string()); + } + if archived_index { + if let Some(cid) = ns_record.index_head_id.as_ref() { + manifest["index_head_id"] = serde_json::Value::String(cid.to_string()); + manifest["index_t"] = serde_json::Value::from(ns_record.index_t); + } + } + manifest +} + +/// Drive a pack-stream copy from `stream` to `writer`, swapping the +/// terminal End frame for `manifest_frame` + End. Splitting this out from +/// `archive_ledger_to_writer` lets us test the frame-walking logic without +/// a real HTTP server: feed in pre-encoded chunks and compare the writer's +/// captured bytes byte-for-byte. +/// +/// The `stream` yields chunked archive bytes; chunk boundaries are +/// arbitrary (frames may straddle them). Returns the count of pack frames +/// forwarded, excluding the synthesized manifest and End. +pub(crate) async fn splice_archive_stream( + stream: S, + writer: &mut W, + manifest_frame: &serde_json::Value, +) -> Result +where + S: futures::Stream, String>> + Unpin, + W: tokio::io::AsyncWrite + Unpin + Send, +{ + use futures::StreamExt as _; + use tokio::io::AsyncWriteExt as _; + + let mut stream = stream; + let mut buf: Vec = Vec::with_capacity(64 * 1024); + let mut preamble_consumed = false; + let mut frames_forwarded: usize = 0; + let mut end_seen = false; + + while let Some(chunk) = stream.next().await { + let bytes = chunk.map_err(|e| RemoteLedgerError::Network(format!("pack stream: {e}")))?; + buf.extend_from_slice(&bytes); + + // Drain any complete preamble + frames out of the buffer. + // Distinguish `PackError::Incomplete` (need more bytes — not a + // protocol error) from every other variant. Without that split, + // a corrupt magic / oversize payload / invalid frame type would + // be swallowed as "need more" and the loop would buffer until + // EOF, defeating the decoder's max-payload guard and surfacing + // a misleading "ended before End frame" error. + loop { + if !preamble_consumed { + match read_stream_preamble(&buf) { + Ok(consumed) => { + writer.write_all(&buf[..consumed]).await.map_err(|e| { + RemoteLedgerError::Network(format!("archive write: {e}")) + })?; + buf.drain(..consumed); + preamble_consumed = true; + } + Err(PackError::Incomplete(_)) => break, + Err(e) => { + return Err(RemoteLedgerError::InvalidResponse(format!( + "invalid pack stream preamble: {e}" + ))); + } + } + } + + if end_seen { + break; + } + + match decode_frame(&buf, DEFAULT_MAX_PAYLOAD) { + Ok((frame, consumed)) => match frame { + PackFrame::End => { + buf.drain(..consumed); + end_seen = true; + } + PackFrame::Error(msg) => { + return Err(RemoteLedgerError::ServerError(format!( + "remote pack error: {msg}" + ))); + } + PackFrame::Header(_) | PackFrame::Data { .. } | PackFrame::Manifest(_) => { + writer.write_all(&buf[..consumed]).await.map_err(|e| { + RemoteLedgerError::Network(format!("archive write: {e}")) + })?; + buf.drain(..consumed); + frames_forwarded += 1; + } + }, + Err(PackError::Incomplete(_)) => break, + Err(e) => { + return Err(RemoteLedgerError::InvalidResponse(format!( + "invalid pack frame: {e}" + ))); + } + } + } + } + + if !end_seen { + return Err(RemoteLedgerError::InvalidResponse( + "pack stream ended before End frame".to_string(), + )); + } + + let mut tail = Vec::with_capacity(512); + encode_manifest_frame(manifest_frame, &mut tail); + encode_end_frame(&mut tail); + writer + .write_all(&tail) + .await + .map_err(|e| RemoteLedgerError::Network(format!("archive write: {e}")))?; + writer + .flush() + .await + .map_err(|e| RemoteLedgerError::Network(format!("archive flush: {e}")))?; + + Ok(frames_forwarded) +} + fn extract_error_message(body: &str) -> String { let trimmed = body.trim(); if trimmed.is_empty() { @@ -2004,4 +2041,208 @@ mod tests { let debug = format!("{client:?}"); assert!(debug.contains("has_refresh: true")); } + + // ========================================================================= + // splice_archive_stream — frame substitution tests + // ========================================================================= + // + // These exercise the End → manifest+End swap without needing a real + // server: build a valid pack stream in-memory, feed it as one or many + // chunks to `splice_archive_stream`, and assert the writer sees + // [preamble][header][data...][synthesized manifest][End], with the + // original End dropped. + + use fluree_db_core::pack::{ + encode_data_frame, encode_end_frame, encode_error_frame, encode_header_frame, + write_stream_preamble, PackHeader, PREAMBLE_SIZE, + }; + use fluree_db_core::{ContentId, ContentKind}; + use futures::stream; + + fn sample_ns_record() -> NsRecord { + let mut record = NsRecord::new("mydb".to_string(), "main".to_string()); + record.commit_head_id = Some(ContentId::new(ContentKind::Commit, b"head")); + record.commit_t = 7; + record.index_head_id = Some(ContentId::new(ContentKind::IndexRoot, b"idx")); + record.index_t = 5; + record + } + + /// Build a minimal valid pack stream: + /// [preamble][header frame][N data frames][end]. + fn build_pack_stream(data_payloads: &[&[u8]]) -> Vec { + let mut buf = Vec::new(); + write_stream_preamble(&mut buf); + let header = PackHeader::commits_only(Some(data_payloads.len() as u32), true); + encode_header_frame(&header, &mut buf); + for (i, payload) in data_payloads.iter().enumerate() { + let cid = ContentId::new(ContentKind::Commit, format!("commit-{i}").as_bytes()); + encode_data_frame(&cid, payload, &mut buf); + } + encode_end_frame(&mut buf); + buf + } + + fn drive_splice( + chunks: Vec>, + manifest: serde_json::Value, + ) -> Result<(usize, Vec), RemoteLedgerError> { + let stream = stream::iter(chunks.into_iter().map(Ok::, String>)); + let mut output: Vec = Vec::new(); + let frames = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .unwrap() + .block_on(super::splice_archive_stream(stream, &mut output, &manifest))?; + Ok((frames, output)) + } + + #[test] + fn splice_drops_end_and_appends_manifest_then_end() { + let pack = build_pack_stream(&[b"commit-bytes-1", b"commit-bytes-2"]); + let manifest = build_archive_manifest(&sample_ns_record(), true); + let (frames, output) = drive_splice(vec![pack.clone()], manifest.clone()).unwrap(); + + // Header + 2 data frames forwarded; manifest + End synthesized below. + assert_eq!(frames, 3); + + // Output must NOT match the input bytes verbatim — End was replaced. + assert_ne!(output, pack); + + // First (PREAMBLE_SIZE + header_frame_len) bytes of input must + // appear unchanged at the start of the output. + assert!(output.starts_with(&pack[..PREAMBLE_SIZE])); + + // The original End is one byte (FRAME_END = 0xFF). The output's + // last byte should also be that same End byte, but preceded by an + // injected manifest frame rather than appearing immediately after + // the last data frame. + let last = *output.last().expect("output not empty"); + assert_eq!(last, 0xFF, "trailing byte must still be End frame"); + + // Decode the output and verify the new manifest sits where the End + // used to be. + let mut pos = read_stream_preamble(&output).expect("valid preamble"); + let mut frames_seen: Vec<&'static str> = Vec::new(); + let mut last_manifest: Option = None; + loop { + let (frame, consumed) = + decode_frame(&output[pos..], DEFAULT_MAX_PAYLOAD).expect("decodable"); + pos += consumed; + match frame { + PackFrame::Header(_) => frames_seen.push("header"), + PackFrame::Data { .. } => frames_seen.push("data"), + PackFrame::Manifest(json) => { + last_manifest = Some(json); + frames_seen.push("manifest"); + } + PackFrame::End => { + frames_seen.push("end"); + break; + } + PackFrame::Error(_) => panic!("unexpected error frame"), + } + } + assert_eq!( + frames_seen, + vec!["header", "data", "data", "manifest", "end"] + ); + let m = last_manifest.expect("manifest frame present"); + assert_eq!(m.get("phase").and_then(|v| v.as_str()), Some("nameservice")); + assert_eq!(m.get("ledger_id"), manifest.get("ledger_id")); + assert_eq!(m.get("commit_t"), manifest.get("commit_t")); + assert_eq!(m.get("index_head_id"), manifest.get("index_head_id")); + } + + #[test] + fn splice_handles_chunk_boundaries_inside_frames() { + // Same pack, but split across many small chunks so that frame + // boundaries fall inside individual chunks. The buffered decode + // path must still produce identical output. + let pack = build_pack_stream(&[b"first-commit", b"second-commit"]); + let manifest = build_archive_manifest(&sample_ns_record(), true); + + let (frames_one, output_one) = drive_splice(vec![pack.clone()], manifest.clone()).unwrap(); + let chunked: Vec> = pack.chunks(7).map(<[u8]>::to_vec).collect(); + let (frames_many, output_many) = drive_splice(chunked, manifest).unwrap(); + + assert_eq!(frames_one, frames_many); + assert_eq!(output_one, output_many); + } + + #[test] + fn splice_omits_index_fields_when_archived_index_is_false() { + let pack = build_pack_stream(&[b"commit"]); + let manifest = build_archive_manifest(&sample_ns_record(), /* archived_index */ false); + assert!(manifest.get("index_head_id").is_none()); + assert!(manifest.get("index_t").is_none()); + + let (_, output) = drive_splice(vec![pack], manifest).unwrap(); + let mut pos = read_stream_preamble(&output).unwrap(); + let mut found_manifest_without_index = false; + loop { + let (frame, consumed) = decode_frame(&output[pos..], DEFAULT_MAX_PAYLOAD).unwrap(); + pos += consumed; + match frame { + PackFrame::Manifest(json) + if json.get("phase").and_then(|v| v.as_str()) == Some("nameservice") => + { + assert!(json.get("index_head_id").is_none()); + assert!(json.get("index_t").is_none()); + found_manifest_without_index = true; + } + PackFrame::End => break, + _ => {} + } + } + assert!( + found_manifest_without_index, + "nameservice manifest must be present without index fields" + ); + } + + #[test] + fn splice_propagates_server_error_frame() { + // Build a stream that emits an Error frame instead of End — the + // server signals failure mid-pack. We must surface this rather + // than silently truncating the archive. + let mut buf = Vec::new(); + write_stream_preamble(&mut buf); + let header = PackHeader::commits_only(Some(0), true); + encode_header_frame(&header, &mut buf); + encode_error_frame("simulated remote pack failure", &mut buf); + encode_end_frame(&mut buf); + + let manifest = build_archive_manifest(&sample_ns_record(), false); + let result = drive_splice(vec![buf], manifest); + match result { + Err(RemoteLedgerError::ServerError(msg)) => { + assert!(msg.contains("simulated remote pack failure")); + } + other => panic!("expected ServerError, got {other:?}"), + } + } + + #[test] + fn splice_rejects_invalid_magic_promptly_not_as_incomplete() { + // First 4 bytes should be the FPK1 magic; corrupt them and feed + // the rest of a valid stream. The decoder's preamble check must + // surface as a fatal `InvalidResponse` rather than being swallowed + // as "need more bytes" until EOF. + let mut bad = build_pack_stream(&[b"commit"]); + bad[0] = 0x00; // break the magic + bad[1] = 0x00; + + let manifest = build_archive_manifest(&sample_ns_record(), false); + let result = drive_splice(vec![bad], manifest); + match result { + Err(RemoteLedgerError::InvalidResponse(msg)) => { + assert!( + msg.contains("preamble") || msg.contains("magic"), + "expected magic/preamble error, got: {msg}" + ); + } + other => panic!("expected InvalidResponse for bad magic, got {other:?}"), + } + } } From 3a538b789118f3af66495f69c685bd14a279aa0f Mon Sep 17 00:00:00 2001 From: bplatz Date: Mon, 11 May 2026 06:04:23 -0400 Subject: [PATCH 7/9] feat(cli): lift --at --explain --remote bail-out The defensive refusal added when remote explain silently dropped time-travel `from` is no longer needed: the server-side fix in `fix(server): accept time-travel from in explain endpoints` (parent commit on this branch) accepts the request and routes it through the dataset-aware explain path. Both SPARQL `--at --explain --remote` and JSON-LD `--at --explain --remote` now flow through the same ledger-scoped paths the non-explain `--at` cases already use: - SPARQL: injects `FROM ` before WHERE, POSTs to `/explain/{ledger}` (which now accepts same-ledger time-travel FROM rather than rejecting all FROM clauses). - JSON-LD: injects `from: "ledger@t:N"` into the body, POSTs to `/explain/{ledger}`. Plan content for a given query text is largely independent of `t` because Fluree maintains a single set of index stats (latest), and the planner uses them regardless of query `t`. The value here is consistency with the query path and honoring an explicit request parameter, not producing meaningfully different plans. Doc updates in `docs/cli/server-integration.md`: replace the "known limitation: refused" callout with a note explaining the actual flow and the stats-singularity reality. --- docs/cli/server-integration.md | 16 +++++--- fluree-db-cli/src/commands/query.rs | 64 ++++++++++++++++++++++------- 2 files changed, 60 insertions(+), 20 deletions(-) diff --git a/docs/cli/server-integration.md b/docs/cli/server-integration.md index 08fc859f3..3a8b95650 100644 --- a/docs/cli/server-integration.md +++ b/docs/cli/server-integration.md @@ -196,12 +196,16 @@ or in an injected `FROM ` clause (for SPARQL). Posting to the connection-level endpoint instead would force auth to derive the ledger ID from `from` and reject scoped tokens. -**Known limitation: `--at` + `--explain` over `--remote` is refused.** The -server's explain handler (both connection- and ledger-scoped) loads the -ledger at HEAD regardless of any time-travel `from`, so a remote -`--at --explain` would silently return the HEAD plan. The CLI rejects the -combination outright; pass `--direct` for a local time-travel explain, or -drop `--at` to explain the HEAD plan against the remote. +**Remote `--at --explain` flows through the same ledger-scoped path.** The +CLI injects the time-travel suffix into `from` (JSON-LD) or as a `FROM +` clause (SPARQL), then POSTs to `POST /explain/{ledger}`. +The server's explain handlers route those requests through a +dataset-aware path so the request is processed against a view at the +requested `t`. Note that Fluree maintains one set of index stats +(latest), so explain plans for a given query text are largely +independent of `t` — the value of `--at --explain` is in honoring the +contract and consistency with the query path, not in producing +materially different plans. ### `fluree branch list` (read-only) diff --git a/fluree-db-cli/src/commands/query.rs b/fluree-db-cli/src/commands/query.rs index 6afef1223..fd198621f 100644 --- a/fluree-db-cli/src/commands/query.rs +++ b/fluree-db-cli/src/commands/query.rs @@ -244,20 +244,56 @@ pub async fn run( // Execute query via remote HTTP let timer = Instant::now(); let result = match (query_format, at, explain) { - (detect::QueryFormat::Sparql | detect::QueryFormat::JsonLd, Some(_), true) => { - // Time-travel + explain isn't honored on the server side: - // both `/explain` and `/explain/{ledger}` load the ledger - // at HEAD and run explain there, so a remote --at --explain - // would silently return the HEAD plan. Refuse rather than - // mislead. Run with `--direct` for a local time-travel - // explain, or drop `--at` to explain the HEAD plan. - return Err(CliError::Usage( - "remote --at --explain is not supported: the server's explain handler \ - loads the ledger at HEAD regardless of any time-travel `from`. \ - Use `--direct` for a local time-travel explain, or drop `--at` to \ - explain the HEAD plan on the remote." - .to_string(), - )); + (detect::QueryFormat::Sparql, Some(at_str), true) => { + // Remote `--at --explain` over SPARQL: inject the time + // suffix as a FROM and POST to the ledger-scoped explain + // endpoint. Same shape as the non-explain SPARQL `--at` + // case below — the server's `/explain/{ledger}` accepts + // same-ledger FROM with time travel (see the + // explain-time-travel fix). Queries with their own + // FROM/FROM NAMED must encode time travel there. + if fluree_db_api::sparql_dataset_ledger_ids(&content) + .map(|v| !v.is_empty()) + .unwrap_or(false) + { + return Err(CliError::Usage( + "SPARQL query already contains FROM/FROM NAMED; \ + for remote time travel, encode time travel in the FROM IRI \ + (e.g., FROM ) instead of using --at" + .to_string(), + )); + } + let spec = parse_time_spec(at_str); + let suffix = time_spec_to_suffix(&spec); + let from_iri = attach_time_suffix_preserving_fragment(&remote_alias, &suffix); + let injected = inject_sparql_from_before_where(&content, &from_iri).ok_or_else( + || { + CliError::Usage( + "unable to inject SPARQL FROM clause for remote time travel; \ + please write the query as `SELECT ... WHERE { ... }` or include an explicit FROM" + .to_string(), + ) + }, + )?; + client.explain_sparql(&remote_alias, &injected).await? + } + (detect::QueryFormat::JsonLd, Some(at_str), true) => { + // Remote `--at --explain` over JSON-LD: inject the + // time-suffixed `from` into the body and POST to the + // ledger-scoped explain endpoint. Path drives auth, + // body's `from` drives snapshot selection. + let spec = parse_time_spec(at_str); + let suffix = time_spec_to_suffix(&spec); + let from_id = attach_time_suffix_preserving_fragment(&remote_alias, &suffix); + let mut json_query: serde_json::Value = serde_json::from_str(&content)?; + if let Some(obj) = json_query.as_object_mut() { + obj.insert("from".to_string(), serde_json::Value::String(from_id)); + } else { + return Err(CliError::Input( + "JSON-LD query must be a JSON object".to_string(), + )); + } + client.explain_jsonld(&remote_alias, &json_query).await? } (detect::QueryFormat::Sparql, None, true) => { client.explain_sparql(&remote_alias, &content).await? From 89c25d38a50c1d5a0583bce239ce6f3357506438 Mon Sep 17 00:00:00 2001 From: bplatz Date: Mon, 11 May 2026 06:07:04 -0400 Subject: [PATCH 8/9] docs(cli/server-integration): sweep for terminology + add active-ledger detail - Expand the "Data API" intro list to reflect what's actually supported via --remote now (log, history, context, explain, etc.) plus the admin operations. - Drop the "resolve the snapshot" phrasing in the history --remote section; Fluree builds a historical *view* at the requested t, not a point-in-time snapshot (singular index, view does the time-traveling). - Spell out the active-ledger-pointer behavior on `fluree drop` more precisely: explicit --remote leaves local state alone; auto-route and --direct both clear the pointer when it matches the dropped ledger. - Add a `fluree query --remote --at --explain` line to the validation script to exercise the now-working combination. --- docs/cli/server-integration.md | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/docs/cli/server-integration.md b/docs/cli/server-integration.md index 3a8b95650..992dfb0f7 100644 --- a/docs/cli/server-integration.md +++ b/docs/cli/server-integration.md @@ -4,8 +4,8 @@ This document is for implementers building a custom server (for example in `../s The CLI supports two broad categories of remote operations: -- **Data API**: query/update/insert/upsert/info/exists/show (normal ledger operations). -- **Replication / sync**: clone/pull/fetch (content-addressed replication by CID, via pack + storage proxy). +- **Data API**: query / update / insert / upsert / info / exists / show / log / history / context / explain, plus admin operations like create / drop / reindex / branch (create / drop / rebase / merge) / publish / export. +- **Replication / sync**: clone / pull / fetch (content-addressed replication by CID, via pack + storage proxy) and ledger-archive (`export --format ledger`). ## Base URL And Discovery @@ -94,7 +94,7 @@ Read or replace the default JSON-LD context for a ledger. `get` returns the cont - `POST {api_base_url}/query/*ledger` -Server-side history queries via JSON-LD: the CLI builds the same `from`/`to`/`select`/`where` body it would send locally and POSTs it to the **ledger-scoped** query endpoint (`/query/{ledger}`). The path carries the bare ledger ID (e.g. `mydb:main`) so the server's `can_read` check matches normal scoped read tokens; the body's `from` carries the time-travel suffix (`mydb:main@t:N`) which the query engine uses to resolve the snapshot. Posting to the connection-level `/query` instead would force auth to read `from` for the ledger ID and reject any token not scoped to the time-travel form. +Server-side history queries via JSON-LD: the CLI builds the same `from`/`to`/`select`/`where` body it would send locally and POSTs it to the **ledger-scoped** query endpoint (`/query/{ledger}`). The path carries the bare ledger ID (e.g. `mydb:main`) so the server's `can_read` check matches normal scoped read tokens; the body's `from` carries the time-travel suffix (`mydb:main@t:N`) which the query engine uses to build a historical view at that `t`. Posting to the connection-level `/query` instead would force auth to read `from` for the ledger ID and reject any token not scoped to the time-travel form. Entity and predicate compact IRIs (`ex:alice` → `http://example.org/alice`) are expanded **client-side** using the project's stored prefix map before the request leaves the CLI, so the server never has to consult the local prefix table. The query body still ships its `@context` (also derived from local prefixes) so the server can compact response IRIs back into the user's preferred form for display. @@ -143,7 +143,11 @@ Drops a ledger or graph source on the remote server. The CLI sends `hard: true` When `--remote` is omitted, the CLI auto-routes through a locally running `fluree server start` if `server.meta.json` is present and the PID is alive, falling back to direct local execution otherwise. Pass `--direct` to skip auto-routing. The `--force` flag is required in all modes to confirm deletion. -`--remote` does not affect local state: dropping a ledger remotely never touches the local active-ledger pointer or local storage. +Active-ledger handling: + +- **`--remote `** (explicit): never touches local state. Remote storage is separate; the local active-ledger pointer and local storage are unaffected. +- **Auto-route** (no `--remote`, server running): same on-disk storage as `--direct`, so a successful drop also clears the local active-ledger pointer if it matched the dropped name. +- **`--direct`** (no `--remote`, no server): clears the active-ledger pointer if it matched. ### `fluree create --from .flpack` (native ledger import) @@ -1557,6 +1561,7 @@ fluree context get mydb --remote origin # should print the remote ledger's defa fluree context set mydb --remote origin -e '{"ex": "http://example.org/"}' # admin: replace context fluree history http://example.org/alice --ledger mydb --remote origin --format json # remote history fluree query mydb 'SELECT * WHERE { ?s ?p ?o }' --remote origin --at 1 # time-travel via /query/{ledger} +fluree query mydb 'SELECT * WHERE { ?s ?p ?o }' --remote origin --at 1 --explain --format json # time-travel explain via /explain/{ledger} fluree create empty-db --remote origin # should create an empty ledger on the remote fluree export mydb --remote origin --format ledger -o mydb-remote.flpack # archive remote ledger fluree drop my-gs --force # should drop the graph source locally From d3c3f8e2109615f7efa8e11c9f97773651d29ef4 Mon Sep 17 00:00:00 2001 From: bplatz Date: Mon, 11 May 2026 06:08:31 -0400 Subject: [PATCH 9/9] =?UTF-8?q?docs(cli):=20clarify=20`fluree=20drop`=20he?= =?UTF-8?q?lp=20=E2=80=94=20covers=20graph=20sources=20too?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `fluree drop ` resolves the name as a ledger first and falls back to a graph source, both locally and against `--remote` (the server's `/drop` does the same). The CLI's top-line help still said "Drop (delete) a ledger", giving no hint that the same command works for an Iceberg/BM25/etc. graph source — users were reaching for `fluree iceberg drop` instead. Update the about text and the arg help to mention graph sources, and point at `fluree iceberg drop` as the explicit variant. --- fluree-db-cli/src/cli.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fluree-db-cli/src/cli.rs b/fluree-db-cli/src/cli.rs index 81348ed80..059d05d5e 100644 --- a/fluree-db-cli/src/cli.rs +++ b/fluree-db-cli/src/cli.rs @@ -291,9 +291,11 @@ pub enum Commands { action: BranchAction, }, - /// Drop (delete) a ledger + /// Drop (delete) a ledger or graph source Drop { - /// Ledger name to drop + /// Ledger or graph source name to drop. The server resolves as a ledger + /// first, then as a graph source — `fluree iceberg drop` is the + /// explicit graph-source variant. name: String, /// Required flag to confirm deletion