From 75c9260b16d2838335d2c202270d448cf77449aa Mon Sep 17 00:00:00 2001 From: Vikrant Puppala Date: Thu, 4 Jun 2026 12:14:50 +0000 Subject: [PATCH 1/3] fix(kernel): stop premature sync statement close (H4); bump KERNEL_REV to batch 2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Connector half of the kernel batch-2 fixes (kernel PR #123). Bumps KERNEL_REV to pick up the batch-2 kernel surface. H4 — don't close the kernel Statement at sync execute-return: execute_command's finally used to `stmt.close()` immediately after `stmt.execute()` succeeded. For a large CloudFetch result with paginated chunk links (all_fetched=false), the kernel fetches later links lazily (get_result_chunks against the LIVE statement) during consumption, so a premature CloseStatement broke those fetches. The kernel now auto-closes the server statement when its result stream drains (ExecutedStatement::next_batch end-of-stream), with the executed-handle Drop as the backstop for partial/abandoned reads. So the connector now flips close_stmt=False on a successful execute and only closes on the error path (no executed handle was produced). The other batch-2 fixes (cancelled-class -> OperationalError, U2M refresh fail-fast, metadata statement close-on-drop, per-binding OAuth client_id) are entirely kernel-side and need no connector code beyond the KERNEL_REV bump. Tests: unit (sync execute does-not-close on success / does-close on failure) + e2e (large multi-chunk result drains without premature close + cursor reuse; server cancel maps to OperationalError not ProgrammingError). All e2e verified live against dogfood. Co-authored-by: Isaac Signed-off-by: Vikrant Puppala --- KERNEL_REV | 2 +- src/databricks/sql/backend/kernel/client.py | 21 +++++-- tests/e2e/test_kernel_backend.py | 58 +++++++++++++++++- tests/unit/test_kernel_client.py | 68 +++++++++++++++++++++ 4 files changed, 143 insertions(+), 6 deletions(-) diff --git a/KERNEL_REV b/KERNEL_REV index 37b717a45..977f5af41 100644 --- a/KERNEL_REV +++ b/KERNEL_REV @@ -1 +1 @@ -cbeaf44c66ebc57c5b3eed2a5c5d0290d4bf035b +eb7da65552c8e7e10b192a7a2577910102e4dc6b diff --git a/src/databricks/sql/backend/kernel/client.py b/src/databricks/sql/backend/kernel/client.py index 08bb0d36b..a57a7a821 100644 --- a/src/databricks/sql/backend/kernel/client.py +++ b/src/databricks/sql/backend/kernel/client.py @@ -473,16 +473,29 @@ def execute_command( # Canceller is best-effort; never block execute on it. pass executed = stmt.execute() + # Execute succeeded: the kernel now owns the statement + # lifecycle. It auto-closes the server statement when the + # result stream is fully drained (``ExecutedStatement:: + # next_batch`` end-of-stream), with the executed handle's + # ``Drop`` as the backstop for partial/abandoned reads. + # So we must NOT close ``stmt`` here: a premature + # ``CloseStatement`` at execute-return broke lazy + # CloudFetch chunk-link fetches (``get_result_chunks`` + # against the live statement) for large paginated-link + # results — the H4 gap. Closing here is left ONLY for the + # error path below, where no executed handle / result set + # was produced to reap it. + close_stmt = False except Exception as exc: raise _wrap_kernel_exception("execute_command", exc) from exc finally: with self._sync_cancellers_lock: self._sync_cancellers.pop(id(cursor), None) if close_stmt: - # Sync path: ``Statement`` is a lifecycle owner separate - # from the executed handle. Drop it here so the parent - # doesn't outlive its caller. Swallow close errors — - # they're not actionable. + # Reached only when ``stmt.execute()`` did not succeed + # (or async, which flipped the flag earlier): no executed + # handle owns the statement, so close it here to avoid a + # leak. Swallow close errors — not actionable. try: stmt.close() except Exception: diff --git a/tests/e2e/test_kernel_backend.py b/tests/e2e/test_kernel_backend.py index 95d7d942e..b0d289979 100644 --- a/tests/e2e/test_kernel_backend.py +++ b/tests/e2e/test_kernel_backend.py @@ -24,7 +24,12 @@ import pytest import databricks.sql as sql -from databricks.sql.exc import DatabaseError, NotSupportedError, ServerOperationError +from databricks.sql.exc import ( + DatabaseError, + NotSupportedError, + OperationalError, + ServerOperationError, +) # Skip the whole module unless the kernel wheel is genuinely installed. # ``pytest.importorskip`` alone isn't enough: the kernel unit tests inject a @@ -478,3 +483,54 @@ def cancel_after_delay(): finally: t.join() cur.close() + + +# ── Batch 2 ──────────────────────────────────────────────────────── + + +def test_large_result_drains_without_premature_close(conn): + """H4: a large multi-chunk result fully drains even though the + connector no longer closes the statement at execute-return — the + kernel auto-closes on drain. Guards the regression where a premature + CloseStatement broke lazy CloudFetch chunk-link fetches.""" + n = 5_000_000 + with conn.cursor() as cur: + cur.execute(f"SELECT id, cast(id AS string) s FROM range({n})") + rows = cur.fetchall() + assert len(rows) == n + # Cursor is reusable after the auto-close fired on the prior result. + cur.execute("SELECT 42 AS n") + assert cur.fetchall()[0][0] == 42 + + +def test_server_cancel_maps_to_operational_error(conn): + """A server-side cancel surfaces as OperationalError (cancelled + class), not ProgrammingError. We trigger it via a cross-thread + cancel of a running query; the raised exception must be in the + OperationalError family, not ProgrammingError.""" + import threading + import time + + from databricks.sql.exc import ProgrammingError + + cur = conn.cursor() + + def cancel_after_delay(): + time.sleep(15.0) + cur.cancel() + + t = threading.Thread(target=cancel_after_delay) + t.start() + try: + with pytest.raises(Exception) as exc_info: + cur.execute( + "SELECT count(*) FROM range(0, 1000000000000) " + "WHERE pow(rand(), 2) < 0.5 AND sqrt(id) > 1" + ) + # The cancellation must not masquerade as a caller-argument + # (ProgrammingError) error. It should be operational. + assert not isinstance(exc_info.value, ProgrammingError) + assert isinstance(exc_info.value, (OperationalError, DatabaseError)) + finally: + t.join() + cur.close() diff --git a/tests/unit/test_kernel_client.py b/tests/unit/test_kernel_client.py index 3cbf55540..0553f6109 100644 --- a/tests/unit/test_kernel_client.py +++ b/tests/unit/test_kernel_client.py @@ -572,6 +572,74 @@ def fake_execute(): assert id(cursor) not in c._sync_cancellers +def test_sync_execute_does_not_close_statement_on_success(): + """H4: on a successful sync execute(), the connector must NOT close + the parent kernel Statement — the kernel now auto-closes the server + statement when the result stream drains (with the executed handle's + Drop as backstop). A premature close() here broke lazy CloudFetch + chunk-link fetches for large paginated-link results.""" + c = _make_client() + c._kernel_session = MagicMock() + cursor = MagicMock() + cursor.arraysize = 100 + cursor.buffer_size_bytes = 1024 + + stmt = MagicMock() + stmt.execute.return_value = MagicMock( + statement_id="stmt-id", + arrow_schema=MagicMock(return_value=pa.schema([("x", pa.int64())])), + ) + c._kernel_session.statement.return_value = stmt + + c.execute_command( + operation="SELECT 1", + session_id=MagicMock(), + max_rows=1, + max_bytes=1, + lz4_compression=False, + cursor=cursor, + use_cloud_fetch=False, + parameters=[], + async_op=False, + enforce_embedded_schema_correctness=False, + ) + + # The kernel owns the statement lifecycle post-execute; connector + # leaves it alone (kernel auto-close-on-drain + Drop backstop). + stmt.close.assert_not_called() + + +def test_sync_execute_closes_statement_on_failure(): + """On the error path (execute raised, no executed handle / result + set produced), the connector still closes the parent Statement so + it isn't leaked.""" + c = _make_client() + c._kernel_session = MagicMock() + cursor = MagicMock() + cursor.arraysize = 100 + cursor.buffer_size_bytes = 1024 + + stmt = MagicMock() + stmt.execute.side_effect = RuntimeError("boom") + c._kernel_session.statement.return_value = stmt + + with pytest.raises(Exception): + c.execute_command( + operation="SELECT 1", + session_id=MagicMock(), + max_rows=1, + max_bytes=1, + lz4_compression=False, + cursor=cursor, + use_cloud_fetch=False, + parameters=[], + async_op=False, + enforce_embedded_schema_correctness=False, + ) + + stmt.close.assert_called_once_with() + + def test_get_columns_accepts_none_catalog(): """The kernel's `list_columns` honours `catalog=None` by issuing `SHOW COLUMNS IN ALL CATALOGS` server-side. The connector should From 6f56f117737c529769805d91c5e1fb63d8cae3d0 Mon Sep 17 00:00:00 2001 From: Vikrant Puppala Date: Thu, 4 Jun 2026 12:21:16 +0000 Subject: [PATCH 2/3] chore: re-pin KERNEL_REV to #123 HEAD after cancelled-test fix #123 picked up a follow-up commit fixing the wiremock cancelled-state assertions (ErrorCode::Cancelled). Bump the placeholder pin so the connector CI builds the corrected kernel. Still to be re-pinned to the squash-merge SHA before #830 merges. Co-authored-by: Isaac Signed-off-by: Vikrant Puppala --- KERNEL_REV | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/KERNEL_REV b/KERNEL_REV index 977f5af41..682e6c2ec 100644 --- a/KERNEL_REV +++ b/KERNEL_REV @@ -1 +1 @@ -eb7da65552c8e7e10b192a7a2577910102e4dc6b +4f7fbe700050a363adc87ae1b94c217df23fe5c9 From 97fbbbd6aedb36224e26ac4aca9436c879b86017 Mon Sep 17 00:00:00 2001 From: Vikrant Puppala Date: Fri, 5 Jun 2026 05:44:21 +0000 Subject: [PATCH 3/3] chore: re-pin KERNEL_REV to merged kernel #123 (f4ee6fe) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Kernel batch-2 (#123) is merged to kernel main (f4ee6fe, current main HEAD). Re-pin from the orphaned branch HEAD (4f7fbe7) to the merged SHA — reachable from main, no orphan-SHA risk. Verified against a wheel built from f4ee6fe: connector unit (102) + kernel e2e (H4 large-result drain + reuse, server-cancel -> OperationalError, staging fail-loud, diagnostic-info) all pass against the real merged kernel. Co-authored-by: Isaac Signed-off-by: Vikrant Puppala --- KERNEL_REV | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/KERNEL_REV b/KERNEL_REV index 682e6c2ec..696572aef 100644 --- a/KERNEL_REV +++ b/KERNEL_REV @@ -1 +1 @@ -4f7fbe700050a363adc87ae1b94c217df23fe5c9 +f4ee6fec78aabce8c0ea9c1ff47fc11b8191d013