diff --git a/KERNEL_REV b/KERNEL_REV index 37b717a45..696572aef 100644 --- a/KERNEL_REV +++ b/KERNEL_REV @@ -1 +1 @@ -cbeaf44c66ebc57c5b3eed2a5c5d0290d4bf035b +f4ee6fec78aabce8c0ea9c1ff47fc11b8191d013 diff --git a/src/databricks/sql/backend/kernel/_errors.py b/src/databricks/sql/backend/kernel/_errors.py index 151bac96b..334866a37 100644 --- a/src/databricks/sql/backend/kernel/_errors.py +++ b/src/databricks/sql/backend/kernel/_errors.py @@ -38,6 +38,8 @@ from __future__ import annotations +import logging + from databricks.sql.exc import ( DatabaseError, Error, @@ -57,6 +59,31 @@ "(into the same venv as databricks-sql-connector)." ) from exc +# Route the kernel's Rust-side logs into Python's ``logging`` as soon as +# the extension loads. The kernel emits under the ``databricks.sql.kernel`` +# logger (a child of the connector's ``databricks.sql`` namespace), so a +# customer who configures ``databricks.sql`` logging gets kernel logs for +# free with no extra setup. +# +# This is a best-effort, non-essential feature: it must never take down +# ``use_kernel=True`` for a process. ``getattr`` guards against an older +# kernel wheel that predates the function. The ``try`` guards against the +# call itself throwing — note ``except BaseException`` is deliberate: a +# panic raised across the PyO3 boundary surfaces as +# ``pyo3_runtime.PanicException``, which derives from ``BaseException`` +# (not ``Exception``), so a narrower clause would let it escape module +# import and fail every kernel-backed connection. The kernel side is +# idempotent and returns rather than panics on a double install, but we +# do not rely on that here — the guard holds regardless of the Rust impl. +_kernel_init_logging = getattr(_kernel, "init_logging", None) +if _kernel_init_logging is not None: + try: + _kernel_init_logging() + except BaseException as exc: # noqa: BLE001 - see comment above re: PanicException + logging.getLogger(__name__).debug( + "kernel log bridge init failed; continuing without it: %r", exc + ) + # Map a kernel `code` slug to the PEP 249 exception class that best # captures it. The match isn't a perfect 1:1 — PEP 249 has a diff --git a/tests/e2e/test_kernel_backend.py b/tests/e2e/test_kernel_backend.py index 95d7d942e..8d988646d 100644 --- a/tests/e2e/test_kernel_backend.py +++ b/tests/e2e/test_kernel_backend.py @@ -127,6 +127,91 @@ def test_fetchall_arrow(conn): assert table.column_names == ["a", "b"] +# ─── Logging (Rust kernel -> Python logging bridge) ────────────────────────── +# +# Layer 3 of the logger-name drift guard (see also the Rust tests +# `klog::tests::klog_emits_contract_target` and +# `logging::tests::kernel_target_matches_contract` in the kernel repo). +# Asserts the *customer-facing* contract end-to-end: kernel logs reach +# Python `logging` under the `databricks.sql.kernel` logger, respect the +# level set on it, and the pyo3 boundary surfaces under +# `databricks.sql.kernel.pyo3`. If the kernel's tracing target or the +# pyo3-log wiring ever drifts, these fail. + +import logging + + +def test_kernel_logs_reach_python_logging(kernel_conn_params, caplog): + """A query at DEBUG produces records on the `databricks.sql.kernel` + logger — proving the tracing -> log -> pyo3-log -> logging chain.""" + with caplog.at_level(logging.DEBUG, logger="databricks.sql.kernel"): + c = sql.connect(**kernel_conn_params) + try: + with c.cursor() as cur: + cur.execute("SELECT 1 AS a") + cur.fetchall() + finally: + c.close() + + kernel_records = [ + r for r in caplog.records if r.name.startswith("databricks.sql.kernel") + ] + assert kernel_records, ( + "expected log records under the 'databricks.sql.kernel' logger; " + "the kernel tracing -> Python logging bridge did not deliver any" + ) + # The core kernel logger (not just any child) must be present — this + # is the customer-facing contract. + assert any( + r.name == "databricks.sql.kernel" for r in kernel_records + ), "expected core kernel records on the exact 'databricks.sql.kernel' logger" + # The pyo3-boundary breadcrumb (`databricks.sql.kernel.pyo3`) is a + # nice-to-have, but the exact sub-target is a kernel-internal naming + # detail — assert softly so a benign kernel change to the boundary + # breadcrumbs doesn't break the connector e2e suite. + if not any(r.name == "databricks.sql.kernel.pyo3" for r in kernel_records): + import warnings + + warnings.warn( + "no 'databricks.sql.kernel.pyo3' boundary records seen; " + "the kernel may have changed its pyo3 breadcrumb target", + stacklevel=2, + ) + + +def test_kernel_log_level_is_respected(kernel_conn_params, caplog): + """At WARNING on the kernel logger, no DEBUG/INFO kernel records reach + `caplog.records` — i.e. level control on `databricks.sql.kernel` + behaves like any other Python logger. + + Scope note: `caplog.at_level` sets the logger's level and attaches a + handler, so this asserts the *effective* outcome a customer sees + (sub-threshold records don't surface), not specifically that the Rust + side suppressed them at source. A DEBUG record that crossed the FFI + would still be dropped by Python's level check before reaching + `caplog`. Source-side suppression (and its per-record FFI cost + avoidance) is covered by the kernel-side filtering, not asserted + here.""" + with caplog.at_level(logging.WARNING, logger="databricks.sql.kernel"): + c = sql.connect(**kernel_conn_params) + try: + with c.cursor() as cur: + cur.execute("SELECT 1 AS a") + cur.fetchall() + finally: + c.close() + + debug_records = [ + r + for r in caplog.records + if r.name.startswith("databricks.sql.kernel") and r.levelno < logging.WARNING + ] + assert not debug_records, ( + "DEBUG/INFO kernel records leaked through at WARNING level: " + f"{[(r.name, r.levelname, r.getMessage()) for r in debug_records]}" + ) + + # ── Metadata ──────────────────────────────────────────────────────