diff --git a/docs/src/fragments/commands/explore.md b/docs/src/fragments/commands/explore.md
index 0767c41a6..e926a9541 100644
--- a/docs/src/fragments/commands/explore.md
+++ b/docs/src/fragments/commands/explore.md
@@ -44,10 +44,26 @@ sentry explore my-org/cli -F span.op -F "count()" \
 
 ### Metrics
 
+Use `--metric` (`-m`) to query metrics by name. The CLI auto-resolves the metric's type and unit.
+
+```bash
+# Sum a custom metric (e.g., LLM token usage) across an org
+sentry explore my-org/ -m llm.token_usage --dataset metrics --period 7d
+
+# Break down by a tag column (e.g., model name)
+sentry explore my-org/seer -F gen_ai.request.model \
+  -m llm.token_usage --dataset metrics --period 7d
+
+# Use a different aggregation (default is sum)
+sentry explore my-org/ -m cache.hit_rate --agg avg --dataset metrics
+```
+
+You can also use the raw tracemetrics format: `aggregation(value,metric_name,metric_type,unit)`.
+
 ```bash
-# Custom metric aggregations
-sentry explore my-org/cli -F transaction -F "avg(measurements.fcp)" \
-  --dataset metrics --period 24h
+sentry explore my-org/ \
+  -F "sum(value,llm.token_usage,distribution,none)" \
+  --dataset metrics --period 7d
 ```
 
 ### Logs
diff --git a/plugins/sentry-cli/skills/sentry-cli/references/explore.md b/plugins/sentry-cli/skills/sentry-cli/references/explore.md
index 33a521ad2..b222dba65 100644
--- a/plugins/sentry-cli/skills/sentry-cli/references/explore.md
+++ b/plugins/sentry-cli/skills/sentry-cli/references/explore.md
@@ -17,6 +17,8 @@ Query aggregate event data (Explore)
 
 **Flags:**
 - `-F, --field <value>... - API field or aggregate (repeatable). E.g., title, "count()", "p50(transaction.duration)"`
+- `-m, --metric <value> - Metric name for --dataset metrics. Auto-resolves type/unit via API.`
+- `--agg <value> - Aggregation for --metric (sum, avg, count, p50, p95, etc.) - (default: "sum")`
 - `-d, --dataset <value> - Dataset to query (errors, spans, metrics, logs, replays) - (default: "errors")`
 - `-q, --query <value> - Search query (Sentry search syntax)`
 - `-s, --sort <value> - Sort field (prefix with - for desc, e.g., "-count()")`
@@ -57,9 +59,19 @@ sentry explore my-org/cli -F span.op -F "p50(span.duration)" \
 sentry explore my-org/cli -F span.op -F "count()" \
   --dataset spans --sort "-count()"
 
-# Custom metric aggregations
-sentry explore my-org/cli -F transaction -F "avg(measurements.fcp)" \
-  --dataset metrics --period 24h
+# Sum a custom metric (e.g., LLM token usage) across an org
+sentry explore my-org/ -m llm.token_usage --dataset metrics --period 7d
+
+# Break down by a tag column (e.g., model name)
+sentry explore my-org/seer -F gen_ai.request.model \
+  -m llm.token_usage --dataset metrics --period 7d
+
+# Use a different aggregation (default is sum)
+sentry explore my-org/ -m cache.hit_rate --agg avg --dataset metrics
+
+sentry explore my-org/ \
+  -F "sum(value,llm.token_usage,distribution,none)" \
+  --dataset metrics --period 7d
 
 # Log severity counts in the last hour
 sentry explore my-org/cli -F severity -F "count()" \
diff --git a/src/commands/explore.ts b/src/commands/explore.ts
index 61b6b411e..ca02b9140 100644
--- a/src/commands/explore.ts
+++ b/src/commands/explore.ts
@@ -11,6 +11,7 @@ import {
   isReplaySortValue,
   listReplays,
   queryEvents,
+  queryMetricsMeta,
 } from "../lib/api-client.js";
 import { buildProjectQuery, validateLimit } from "../lib/arg-parsing.js";
 import {
@@ -33,6 +34,7 @@ import {
   paginationHint,
 } from "../lib/list-command.js";
 import { logger } from "../lib/logger.js";
+import { resolveMetricField } from "../lib/metrics-transform.js";
 import { withProgress } from "../lib/polling.js";
 import {
   DEFAULT_REPLAY_EXPLORE_FIELDS,
@@ -123,6 +125,8 @@ const API_TO_USER_DATASET = new Map(
 
 type ExploreFlags = {
   readonly field?: string[];
+  readonly metric?: string;
+  readonly agg: string;
   readonly dataset: string;
   readonly environment?: readonly string[];
   readonly query?: string;
@@ -306,12 +310,53 @@ function defaultFieldsForDataset(dataset: string): readonly string[] {
   return dataset === "replays" ? DEFAULT_REPLAY_EXPLORE_FIELDS : DEFAULT_FIELDS;
 }
 
+/** Append --metric / --agg flags to hint parts */
+function appendMetricHints(
+  parts: string[],
+  metric: string | undefined,
+  agg: string
+): void {
+  if (metric) {
+    parts.push(`-m "${metric}"`);
+    if (agg !== "sum") {
+      parts.push(`--agg ${agg}`);
+    }
+  }
+}
+
+/** Append non-default --field flags to hint parts */
+function appendFieldHints(
+  parts: string[],
+  rawFields: string[] | undefined,
+  dataset: string,
+  metricActive: boolean
+): void {
+  const fields = rawFields ?? [];
+  const fieldList = metricActive
+    ? fields.filter((f) => !isAggregate(f))
+    : fields;
+  const defaults = defaultFieldsForDataset(dataset).join(",");
+  if (fieldList.join(",") !== defaults && fieldList.length > 0) {
+    for (const f of fieldList) {
+      parts.push(`-F "${f}"`);
+    }
+  }
+}
+
 /** Append active non-default flags to a base command string */
 function appendFlagHints(
   base: string,
   flags: Pick<
     ExploreFlags,
-    "dataset" | "environment" | "sort" | "query" | "period" | "field" | "limit"
+    | "dataset"
+    | "environment"
+    | "sort"
+    | "query"
+    | "period"
+    | "field"
+    | "limit"
+    | "metric"
+    | "agg"
   >
 ): string {
   const parts: string[] = [];
@@ -323,19 +368,10 @@ function appendFlagHints(
       API_TO_USER_DATASET.get(flags.dataset) ?? flags.dataset;
     parts.push(`--dataset ${displayDataset}`);
   }
+  appendMetricHints(parts, flags.metric, flags.agg);
   appendSortHint(parts, flags.sort, defaultSort);
   appendQueryHint(parts, flags.query);
-  // Include --field flags when non-default
-  const fieldList = flags.field ?? [];
-  const currentFieldStr = fieldList.join(",");
-  if (
-    currentFieldStr !== defaultFieldsForDataset(flags.dataset).join(",") &&
-    fieldList.length > 0
-  ) {
-    for (const f of fieldList) {
-      parts.push(`-F "${f}"`);
-    }
-  }
+  appendFieldHints(parts, flags.field, flags.dataset, !!flags.metric);
   if (flags.limit !== DEFAULT_LIMIT) {
     parts.push(`--limit ${flags.limit}`);
   }
@@ -356,6 +392,53 @@ function findFirstAggregate(fieldList: string[]): string | undefined {
   return fieldList.find((f) => f.includes("(") && f.includes(")"));
 }
 
+/** True when the field looks like an aggregate call: `fn(...)`. */
+function isAggregate(field: string): boolean {
+  return field.includes("(") && field.endsWith(")");
+}
+
+/**
+ * True when the aggregate uses the tracemetrics comma-separated format:
+ * `aggregation(value,metric_name,metric_type,unit)`.
+ */
+function isTracemetricsAggregate(aggregate: string): boolean {
+  const parenIdx = aggregate.indexOf("(");
+  if (parenIdx < 0) {
+    return false;
+  }
+  const inner = aggregate.slice(parenIdx + 1, -1);
+  return inner.startsWith("value,") && inner.split(",").length === 4;
+}
+
+/**
+ * Validate that aggregate fields use the tracemetrics format when querying
+ * the `metricsEnhanced` dataset. Standard aggregates like `count()` or
+ * `avg(measurements.fcp)` are invalid — the API requires the four-part
+ * comma-separated format: `aggregation(value,metric_name,metric_type,unit)`.
+ */
+function validateMetricsFields(fieldList: string[]): void {
+  const badAggs = fieldList.filter(
+    (f) => isAggregate(f) && !isTracemetricsAggregate(f)
+  );
+  if (badAggs.length === 0) {
+    return;
+  }
+
+  throw new ValidationError(
+    `Invalid metrics aggregate${badAggs.length > 1 ? "s" : ""}: ${badAggs.join(", ")}\n\n` +
+      "The metrics dataset requires the format: aggregation(value,metric_name,metric_type,unit)\n\n" +
+      "Examples:\n" +
+      '  sentry explore my-org/ -F "sum(value,llm.token_usage,distribution,none)" --dataset metrics\n' +
+      '  sentry explore my-org/ -F gen_ai.request.model -F "avg(value,cache.hit_rate,distribution,none)" --dataset metrics\n\n' +
+      "Parameters:\n" +
+      '  - value: literal string "value"\n' +
+      "  - metric_name: the metric name emitted by the SDK (e.g., llm.token_usage)\n" +
+      "  - metric_type: distribution, gauge, counter, or set\n" +
+      "  - unit: none, byte, second, millisecond, etc.",
+    "field"
+  );
+}
+
 // ---------------------------------------------------------------------------
 // Dataset configuration
 // ---------------------------------------------------------------------------
@@ -508,7 +591,7 @@ export const exploreCommand = buildListCommand("explore", {
       "Datasets:\n" +
       "  errors   Error events (default)\n" +
       "  spans    Span data\n" +
-      "  metrics  Custom metrics\n" +
+      "  metrics  Custom metrics (tracemetrics format)\n" +
       "  logs     Log entries\n" +
       "  replays  Session replay search\n\n" +
       "Targets:\n" +
@@ -523,7 +606,11 @@ export const exploreCommand = buildListCommand("explore", {
       "--dataset spans\n" +
       "  sentry explore my-org/cli --dataset replays -F id -F user.email -F count_errors\n" +
       '  sentry explore -F span.op -F "count()" --dataset spans --period 1h\n' +
-      "  sentry explore --json",
+      "  sentry explore --json\n\n" +
+      "Metrics (auto mode — resolves type/unit automatically):\n" +
+      "  sentry explore my-org/ -m llm.token_usage --dataset metrics\n" +
+      "  sentry explore my-org/seer -F gen_ai.request.model -m llm.token_usage --dataset metrics --period 7d\n" +
+      "  sentry explore my-org/ -m cache.hit_rate --agg avg --dataset metrics",
   },
   output: {
     human: formatExploreHuman,
@@ -551,6 +638,19 @@ export const exploreCommand = buildListCommand("explore", {
         variadic: true,
         optional: true,
       },
+      metric: {
+        kind: "parsed",
+        parse: String,
+        brief:
+          "Metric name for --dataset metrics. Auto-resolves type/unit via API.",
+        optional: true,
+      },
+      agg: {
+        kind: "parsed",
+        parse: String,
+        brief: "Aggregation for --metric (sum, avg, count, p50, p95, etc.)",
+        default: "sum",
+      },
       dataset: {
         kind: "parsed",
         parse: parseDataset,
@@ -594,6 +694,7 @@ export const exploreCommand = buildListCommand("explore", {
       ...PERIOD_ALIASES,
       e: "environment",
       F: "field",
+      m: "metric",
       d: "dataset",
       q: "query",
       s: "sort",
@@ -608,14 +709,57 @@ export const exploreCommand = buildListCommand("explore", {
       "explore"
     );
 
-    const dataset = flags.dataset;
+    let dataset = flags.dataset;
+    const userSuppliedFields = flags.field && flags.field.length > 0;
     let fieldList = [...defaultFieldsForDataset(dataset)];
-    if (flags.field && flags.field.length > 0) {
+    if (userSuppliedFields) {
       fieldList = flags.field;
     }
     const timeRange = flags.period;
     const environment = parseReplayEnvironmentFilter(flags.environment);
 
+    // --metric auto mode: resolve metric name → tracemetrics aggregate
+    if (flags.metric) {
+      if (dataset !== "metricsEnhanced") {
+        log.warn("--metric implies --dataset metrics; switching dataset.");
+        dataset = "metricsEnhanced";
+      }
+
+      // Use the user's --period for metadata discovery so older metrics are found
+      const metaParams = timeRangeToApiParams(timeRange);
+      const metrics = await withProgress(
+        {
+          message: `Discovering metric '${flags.metric}'...`,
+          json: flags.json,
+        },
+        () =>
+          queryMetricsMeta(org, {
+            ...metaParams,
+            project,
+          })
+      );
+
+      const aggField = resolveMetricField(flags.metric, flags.agg, metrics);
+      // Prepend any user-supplied grouping fields, then the resolved aggregate
+      const groupByFields = userSuppliedFields
+        ? fieldList.filter((f) => !isAggregate(f))
+        : [];
+      fieldList = [...groupByFields, aggField];
+    } else if (dataset === "metricsEnhanced") {
+      if (!userSuppliedFields) {
+        throw new ValidationError(
+          "The metrics dataset requires --metric or explicit --field flags.\n\n" +
+            "Auto mode (recommended):\n" +
+            "  sentry explore my-org/ -m llm.token_usage --dataset metrics\n" +
+            "  sentry explore my-org/ -m llm.token_usage --agg avg --dataset metrics\n\n" +
+            "Manual mode (tracemetrics format):\n" +
+            '  sentry explore my-org/ -F "sum(value,llm.token_usage,distribution,none)" --dataset metrics',
+          "field"
+        );
+      }
+      validateMetricsFields(fieldList);
+    }
+
     const config = resolveDatasetConfig({
       dataset,
       fieldList,
@@ -656,11 +800,18 @@ export const exploreCommand = buildListCommand("explore", {
     const hasMore = !!nextCursor;
 
     const baseTarget = project ? `${org}/${project}` : `${org}/`;
+    const hintFlags = { ...flags, dataset };
     const nav = paginationHint({
       hasPrev,
       hasMore,
-      prevHint: appendFlagHints(`sentry explore ${baseTarget} -c prev`, flags),
-      nextHint: appendFlagHints(`sentry explore ${baseTarget} -c next`, flags),
+      prevHint: appendFlagHints(
+        `sentry explore ${baseTarget} -c prev`,
+        hintFlags
+      ),
+      nextHint: appendFlagHints(
+        `sentry explore ${baseTarget} -c next`,
+        hintFlags
+      ),
     });
 
     const hint = buildResultHint(response.data.length, nav);
diff --git a/src/lib/api-client.ts b/src/lib/api-client.ts
index fa59e9d67..c58beef66 100644
--- a/src/lib/api-client.ts
+++ b/src/lib/api-client.ts
@@ -27,7 +27,8 @@ export {
   queryAllWidgets,
   updateDashboard,
 } from "./api/dashboards.js";
-export { queryEvents } from "./api/discover.js";
+export type { MetricMeta } from "./api/discover.js";
+export { queryEvents, queryMetricsMeta } from "./api/discover.js";
 export {
   findEventAcrossOrgs,
   getEvent,
diff --git a/src/lib/api/discover.ts b/src/lib/api/discover.ts
index afb6f47a3..cdd73522a 100644
--- a/src/lib/api/discover.ts
+++ b/src/lib/api/discover.ts
@@ -86,6 +86,72 @@ async function fetchEventsPage(
   return { data, nextCursor };
 }
 
+/** Metric metadata returned by {@link queryMetricsMeta}. */
+export type MetricMeta = {
+  name: string;
+  type: string;
+  unit: string;
+};
+
+/**
+ * Discover available metrics for an org via the Events API.
+ *
+ * Queries `dataset=metricsEnhanced` with meta-fields (`metric.name`, etc.)
+ * — the same technique the Sentry Explore Metrics UI uses.
+ *
+ * Auto-paginates to collect all available metrics (bounded by
+ * {@link MAX_PAGINATION_PAGES} to prevent runaway loops).
+ */
+export async function queryMetricsMeta(
+  orgSlug: string,
+  options?: {
+    statsPeriod?: string;
+    start?: string;
+    end?: string;
+    project?: string;
+  }
+): Promise<MetricMeta[]> {
+  const regionUrl = await resolveOrgRegion(orgSlug);
+  const query = options?.project ? `project:${options.project}` : undefined;
+
+  const baseOptions: ExploreQueryOptions = {
+    fields: ["metric.name", "metric.type", "metric.unit"],
+    dataset: "metricsEnhanced",
+    query,
+    statsPeriod:
+      options?.start || options?.end
+        ? undefined
+        : (options?.statsPeriod ?? "7d"),
+    start: options?.start,
+    end: options?.end,
+  };
+
+  const allRows: Record<string, unknown>[] = [];
+  let cursor: string | undefined;
+
+  for (let page = 0; page < MAX_PAGINATION_PAGES; page += 1) {
+    const result = await fetchEventsPage(
+      regionUrl,
+      orgSlug,
+      { ...baseOptions, cursor },
+      API_MAX_PER_PAGE
+    );
+
+    allRows.push(...result.data.data);
+
+    if (!result.nextCursor) {
+      break;
+    }
+    cursor = result.nextCursor;
+  }
+
+  return allRows.map((row) => ({
+    name: String(row["metric.name"] ?? ""),
+    type: String(row["metric.type"] ?? "distribution"),
+    unit: String(row["metric.unit"] ?? "none"),
+  }));
+}
+
 /**
  * Query the Explore/Events endpoint for aggregate or tabular event data.
  *
diff --git a/src/lib/metrics-transform.ts b/src/lib/metrics-transform.ts
new file mode 100644
index 000000000..4215ab930
--- /dev/null
+++ b/src/lib/metrics-transform.ts
@@ -0,0 +1,74 @@
+/**
+ * Tracemetrics aggregate construction from simple metric names.
+ *
+ * Transforms user-friendly metric names (e.g., `llm.token_usage`) into the
+ * four-part tracemetrics format required by the Sentry Events API when
+ * querying `dataset=metricsEnhanced`: `aggregation(value,name,type,unit)`.
+ */
+
+import type { MetricMeta } from "./api/discover.js";
+import { ResolutionError } from "./errors.js";
+
+/** Valid tracemetrics aggregation functions. */
+const VALID_AGGS = new Set([
+  "sum",
+  "avg",
+  "count",
+  "min",
+  "max",
+  "p50",
+  "p75",
+  "p90",
+  "p95",
+  "p99",
+  "count_unique",
+]);
+
+/** Build a tracemetrics aggregate string from parts. */
+export function makeTracemetricsAggregate(
+  agg: string,
+  name: string,
+  type: string,
+  unit: string
+): string {
+  return `${agg}(value,${name},${type},${unit})`;
+}
+
+/**
+ * Resolve a simple metric name against discovered metadata and build
+ * the tracemetrics aggregate field.
+ *
+ * @throws {ResolutionError} when the metric name isn't found
+ */
+export function resolveMetricField(
+  metricName: string,
+  agg: string,
+  metrics: MetricMeta[]
+): string {
+  if (!VALID_AGGS.has(agg)) {
+    throw new ResolutionError(
+      `Aggregation '${agg}'`,
+      `not recognized. Valid aggregations: ${[...VALID_AGGS].join(", ")}`,
+      `sentry explore my-org/ -m ${metricName} --agg sum --dataset metrics`
+    );
+  }
+
+  const match = metrics.find((m) => m.name === metricName);
+  if (!match) {
+    const suggestions = metrics
+      .filter((m) => m.name.includes(metricName) || metricName.includes(m.name))
+      .slice(0, 5)
+      .map((m) => m.name);
+
+    throw new ResolutionError(
+      `Metric '${metricName}'`,
+      "not found in this project",
+      `sentry explore my-org/ -m ${metricName} --dataset metrics --period 7d`,
+      suggestions.length > 0
+        ? [`Similar metrics: ${suggestions.join(", ")}`]
+        : ["Use a wider --period to search for older metrics"]
+    );
+  }
+
+  return makeTracemetricsAggregate(agg, match.name, match.type, match.unit);
+}
diff --git a/test/commands/explore.test.ts b/test/commands/explore.test.ts
index ff3922486..0e248cd1b 100644
--- a/test/commands/explore.test.ts
+++ b/test/commands/explore.test.ts
@@ -91,12 +91,18 @@ const MOCK_REPLAYS_RESPONSE = [
 ];
 
 let queryEventsSpy: ReturnType<typeof spyOn>;
+let queryMetricsMetaSpy: ReturnType<typeof spyOn>;
 let listReplaysSpy: ReturnType<typeof spyOn>;
 let resolveTargetSpy: ReturnType<typeof spyOn>;
 let resolveCursorSpy: ReturnType<typeof spyOn>;
 let advancePaginationStateSpy: ReturnType<typeof spyOn>;
 let hasPreviousPageSpy: ReturnType<typeof spyOn>;
 
+const MOCK_METRICS_META = [
+  { name: "llm.token_usage", type: "distribution", unit: "none" },
+  { name: "cache.hit_rate", type: "distribution", unit: "none" },
+];
+
 beforeEach(async () => {
   func = (await exploreCommand.loader()) as unknown as ExploreFunc;
 
@@ -105,6 +111,8 @@ beforeEach(async () => {
     data: MOCK_EVENTS_RESPONSE,
     nextCursor: undefined,
   });
+  queryMetricsMetaSpy = spyOn(apiClient, "queryMetricsMeta");
+  queryMetricsMetaSpy.mockResolvedValue(MOCK_METRICS_META);
   listReplaysSpy = spyOn(apiClient, "listReplays");
   listReplaysSpy.mockResolvedValue({
     data: MOCK_REPLAYS_RESPONSE,
@@ -130,6 +138,7 @@ beforeEach(async () => {
 
 afterEach(() => {
   queryEventsSpy.mockRestore();
+  queryMetricsMetaSpy.mockRestore();
   listReplaysSpy.mockRestore();
   resolveTargetSpy.mockRestore();
   resolveCursorSpy.mockRestore();
@@ -139,6 +148,7 @@ afterEach(() => {
 
 const DEFAULT_FLAGS = {
   limit: 25,
+  agg: "sum",
   dataset: "errors",
   period: parsePeriod("24h"),
   json: false,
@@ -491,6 +501,181 @@ describe("sentry explore", () => {
     });
   });
 
+  describe("metrics dataset validation", () => {
+    test("rejects standard aggregates on metrics dataset", async () => {
+      resolveTargetSpy.mockResolvedValue({ org: "test-org" });
+      const { context } = createContext();
+
+      const promise = func.call(
+        context,
+        {
+          ...DEFAULT_FLAGS,
+          dataset: "metricsEnhanced",
+          field: ["title", "count()"],
+        },
+        "test-org/"
+      );
+
+      await expect(promise).rejects.toThrow(ValidationError);
+      await expect(promise).rejects.toThrow(/Invalid metrics aggregate/);
+    });
+
+    test("accepts valid tracemetrics aggregate format", async () => {
+      resolveTargetSpy.mockResolvedValue({ org: "test-org" });
+      const { context } = createContext();
+
+      await func.call(
+        context,
+        {
+          ...DEFAULT_FLAGS,
+          dataset: "metricsEnhanced",
+          field: [
+            "gen_ai.request.model",
+            "sum(value,llm.token_usage,distribution,none)",
+          ],
+        },
+        "test-org/"
+      );
+
+      expect(queryEventsSpy).toHaveBeenCalledWith(
+        "test-org",
+        expect.objectContaining({ dataset: "metricsEnhanced" })
+      );
+    });
+
+    test("requires --metric or --field for metrics dataset", async () => {
+      resolveTargetSpy.mockResolvedValue({ org: "test-org" });
+      const { context } = createContext();
+
+      const promise = func.call(
+        context,
+        { ...DEFAULT_FLAGS, dataset: "metricsEnhanced" },
+        "test-org/"
+      );
+
+      await expect(promise).rejects.toThrow(ValidationError);
+      await expect(promise).rejects.toThrow(
+        /requires --metric or explicit --field/
+      );
+    });
+
+    test("allows non-aggregate fields without tracemetrics format", async () => {
+      resolveTargetSpy.mockResolvedValue({ org: "test-org" });
+      const { context } = createContext();
+
+      await func.call(
+        context,
+        {
+          ...DEFAULT_FLAGS,
+          dataset: "metricsEnhanced",
+          field: ["gen_ai.request.model"],
+        },
+        "test-org/"
+      );
+
+      expect(queryEventsSpy).toHaveBeenCalled();
+    });
+
+    test("--metric auto-resolves metric type and unit", async () => {
+      resolveTargetSpy.mockResolvedValue({ org: "test-org" });
+      const { context } = createContext();
+
+      await func.call(
+        context,
+        {
+          ...DEFAULT_FLAGS,
+          dataset: "metricsEnhanced",
+          metric: "llm.token_usage",
+        },
+        "test-org/"
+      );
+
+      expect(queryMetricsMetaSpy).toHaveBeenCalledWith("test-org", {
+        statsPeriod: "24h",
+        project: undefined,
+      });
+      expect(queryEventsSpy).toHaveBeenCalledWith(
+        "test-org",
+        expect.objectContaining({
+          fields: ["sum(value,llm.token_usage,distribution,none)"],
+          dataset: "metricsEnhanced",
+        })
+      );
+    });
+
+    test("--metric with -F preserves grouping fields", async () => {
+      resolveTargetSpy.mockResolvedValue({ org: "test-org" });
+      const { context } = createContext();
+
+      await func.call(
+        context,
+        {
+          ...DEFAULT_FLAGS,
+          dataset: "metricsEnhanced",
+          metric: "llm.token_usage",
+          field: ["gen_ai.request.model"],
+        },
+        "test-org/"
+      );
+
+      expect(queryEventsSpy).toHaveBeenCalledWith(
+        "test-org",
+        expect.objectContaining({
+          fields: [
+            "gen_ai.request.model",
+            "sum(value,llm.token_usage,distribution,none)",
+          ],
+        })
+      );
+    });
+
+    test("--metric with --agg uses specified aggregation", async () => {
+      resolveTargetSpy.mockResolvedValue({ org: "test-org" });
+      const { context } = createContext();
+
+      await func.call(
+        context,
+        {
+          ...DEFAULT_FLAGS,
+          dataset: "metricsEnhanced",
+          metric: "cache.hit_rate",
+          agg: "avg",
+        },
+        "test-org/"
+      );
+
+      expect(queryEventsSpy).toHaveBeenCalledWith(
+        "test-org",
+        expect.objectContaining({
+          fields: ["avg(value,cache.hit_rate,distribution,none)"],
+        })
+      );
+    });
+
+    test("--metric without --dataset metrics auto-switches to metricsEnhanced", async () => {
+      resolveTargetSpy.mockResolvedValue({ org: "test-org" });
+      const { context } = createContext();
+
+      await func.call(
+        context,
+        {
+          ...DEFAULT_FLAGS,
+          dataset: "errors",
+          metric: "llm.token_usage",
+        },
+        "test-org/"
+      );
+
+      expect(queryEventsSpy).toHaveBeenCalledWith(
+        "test-org",
+        expect.objectContaining({
+          dataset: "metricsEnhanced",
+          fields: ["sum(value,llm.token_usage,distribution,none)"],
+        })
+      );
+    });
+  });
+
   describe("output", () => {
     test("renders human-readable table with results", async () => {
       resolveTargetSpy.mockResolvedValue({ org: "test-org" });
diff --git a/test/lib/metrics-transform.test.ts b/test/lib/metrics-transform.test.ts
new file mode 100644
index 000000000..f258f3753
--- /dev/null
+++ b/test/lib/metrics-transform.test.ts
@@ -0,0 +1,92 @@
+import { describe, expect, test } from "bun:test";
+import type { MetricMeta } from "../../src/lib/api/discover.js";
+import { ResolutionError } from "../../src/lib/errors.js";
+import {
+  makeTracemetricsAggregate,
+  resolveMetricField,
+} from "../../src/lib/metrics-transform.js";
+
+const SAMPLE_METRICS: MetricMeta[] = [
+  { name: "llm.token_usage", type: "distribution", unit: "none" },
+  { name: "cache.hit_rate", type: "distribution", unit: "none" },
+  { name: "http.response_time", type: "distribution", unit: "millisecond" },
+  { name: "request.count", type: "counter", unit: "none" },
+];
+
+describe("makeTracemetricsAggregate", () => {
+  test("builds standard format", () => {
+    expect(
+      makeTracemetricsAggregate(
+        "sum",
+        "llm.token_usage",
+        "distribution",
+        "none"
+      )
+    ).toBe("sum(value,llm.token_usage,distribution,none)");
+  });
+
+  test("preserves unit", () => {
+    expect(
+      makeTracemetricsAggregate(
+        "avg",
+        "http.response_time",
+        "distribution",
+        "millisecond"
+      )
+    ).toBe("avg(value,http.response_time,distribution,millisecond)");
+  });
+
+  test("works with p50 aggregation", () => {
+    expect(
+      makeTracemetricsAggregate("p50", "cache.hit_rate", "distribution", "none")
+    ).toBe("p50(value,cache.hit_rate,distribution,none)");
+  });
+});
+
+describe("resolveMetricField", () => {
+  test("resolves known metric with default agg", () => {
+    expect(resolveMetricField("llm.token_usage", "sum", SAMPLE_METRICS)).toBe(
+      "sum(value,llm.token_usage,distribution,none)"
+    );
+  });
+
+  test("resolves with custom agg", () => {
+    expect(resolveMetricField("cache.hit_rate", "avg", SAMPLE_METRICS)).toBe(
+      "avg(value,cache.hit_rate,distribution,none)"
+    );
+  });
+
+  test("preserves metric unit from metadata", () => {
+    expect(
+      resolveMetricField("http.response_time", "p95", SAMPLE_METRICS)
+    ).toBe("p95(value,http.response_time,distribution,millisecond)");
+  });
+
+  test("throws ResolutionError for unknown metric", () => {
+    expect(() =>
+      resolveMetricField("nonexistent.metric", "sum", SAMPLE_METRICS)
+    ).toThrow(ResolutionError);
+  });
+
+  test("suggests similar metrics when not found", () => {
+    try {
+      resolveMetricField("llm.token", "sum", SAMPLE_METRICS);
+      expect.unreachable("should have thrown");
+    } catch (err) {
+      expect(err).toBeInstanceOf(ResolutionError);
+      expect((err as ResolutionError).message).toContain("llm.token_usage");
+    }
+  });
+
+  test("throws ResolutionError for invalid aggregation", () => {
+    expect(() =>
+      resolveMetricField("llm.token_usage", "invalid_agg", SAMPLE_METRICS)
+    ).toThrow(ResolutionError);
+  });
+
+  test("resolves counter-type metric", () => {
+    expect(resolveMetricField("request.count", "sum", SAMPLE_METRICS)).toBe(
+      "sum(value,request.count,counter,none)"
+    );
+  });
+});