From cfe3b3fbedebc284cf3942a2026b71e7ead29974 Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Fri, 5 Jun 2026 01:04:01 +0000 Subject: [PATCH] [SEA-NodeJS] Kernel backend: mTLS, custom HTTP headers & User-Agent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wire the SEA/kernel path's remaining TLS-adjacent connection options through to the napi binding, matching the Python connector's use_kernel path (session.py + backend/kernel/client.py): - mTLS client identity: `clientCertPem` / `clientKeyPem` (PEM string or Buffer), normalised to Buffers and routed to the kernel `TlsConfig::client_cert_pem` / `client_key_pem`. Both-or-neither enforced up front with an actionable error. - Independent hostname-verify toggle: `checkServerCertificateHostname` (kernel `skip_hostname_verification`) for full parity with Python's `tls_verify_hostname` — skip only the hostname check while still validating the chain. The master `checkServerCertificate=false` still subsumes it. - Custom HTTP headers + User-Agent: headers cross the FFI as an ordered list (`Array<{name,value}>`, the napi `HeaderEntry` shape matching the kernel core `Vec<(String,String)>` and Python's `List[Tuple]`): caller `customHeaders` first, then the connector's composed `User-Agent` appended last (always emitted; the kernel folds the last User-Agent into its base `DatabricksJDBCDriverOSS/...` UA). Kernel-managed reserved names `Authorization` / `x-databricks-org-id` are dropped before the FFI hop, matching Python's `_KERNEL_MANAGED_HEADERS` double-wall. Adds `buildSeaHttpOptions`, extends `buildSeaTlsOptions`/`SeaTlsOptions`, and factors PEM normalisation into a shared helper. Bumps KERNEL_REV and regenerates `native/sea/index.d.ts`. Unit tests cover mTLS pairing/validation, the hostname toggle, ordered header pass-through, reserved-name dropping, and User-Agent composition/ordering; verified the real native binding marshals every new field across the FFI and rejects a wrong header shape. Depends on the kernel napi change exposing clientCertPem / clientKeyPem / customHeaders / checkServerCertificateHostname; KERNEL_REV must be repointed to that commit once merged. Co-authored-by: Isaac Signed-off-by: Madhavendra Rathore --- KERNEL_REV | 2 +- lib/contracts/InternalConnectionOptions.ts | 35 +++- lib/sea/SeaAuth.ts | 216 +++++++++++++++++---- native/sea/index.d.ts | 80 +++++++- tests/unit/sea/_helpers/nativeOptions.ts | 40 ++++ tests/unit/sea/auth-m2m.test.ts | 5 +- tests/unit/sea/auth-pat.test.ts | 3 +- tests/unit/sea/auth-u2m.test.ts | 5 +- tests/unit/sea/connectionOptions.test.ts | 152 ++++++++++++++- tests/unit/sea/execution.test.ts | 3 +- 10 files changed, 497 insertions(+), 44 deletions(-) create mode 100644 tests/unit/sea/_helpers/nativeOptions.ts diff --git a/KERNEL_REV b/KERNEL_REV index 1603f791..97b52880 100644 --- a/KERNEL_REV +++ b/KERNEL_REV @@ -1 +1 @@ -8bedaabf69f5bce5a957a8775f29dbb8dbdd2e71 +7f8353f39665e7ac0fcc31a052fd2271caba1f67 diff --git a/lib/contracts/InternalConnectionOptions.ts b/lib/contracts/InternalConnectionOptions.ts index 24575984..8589334a 100644 --- a/lib/contracts/InternalConnectionOptions.ts +++ b/lib/contracts/InternalConnectionOptions.ts @@ -29,11 +29,26 @@ export interface InternalConnectionOptions { /** * SEA-only: verify the server's TLS certificate. Secure-by-default — omit * to keep full chain + hostname verification; set `false` only to opt into - * the insecure accept-anything mode. + * the insecure accept-anything mode. This is the master verify toggle: + * `false` also subsumes the hostname check (see + * `checkServerCertificateHostname`). Mirrors the Python connector's + * `_tls_no_verify` (inverted). * @internal SEA path only. */ checkServerCertificate?: boolean; + /** + * SEA-only: verify that the server certificate matches the host + * (hostname-vs-SNI check), independently of full chain validation. Omit + * to keep the secure default (on); set `false` to skip only the hostname + * check while still validating the chain — e.g. connecting via an IP + * literal or a host the cert wasn't issued for. No-op when + * `checkServerCertificate` is `false` (that disables everything). Mirrors + * the Python connector's `_tls_verify_hostname`. + * @internal SEA path only. + */ + checkServerCertificateHostname?: boolean; + /** * SEA-only: PEM-encoded CA certificate (string or `Buffer`) added to the * trust store on top of the system roots — for TLS-inspecting proxies or @@ -41,4 +56,22 @@ export interface InternalConnectionOptions { * @internal SEA path only. */ customCaCert?: Buffer | string; + + /** + * SEA-only: PEM-encoded client certificate (string or `Buffer`) for + * mutual TLS (mTLS). Must be supplied together with `clientKeyPem`; a + * leaf cert optionally followed by its intermediate chain is accepted. + * Mirrors the Python connector's `_tls_client_cert_file`. + * @internal SEA path only. + */ + clientCertPem?: Buffer | string; + + /** + * SEA-only: PEM-encoded private key (string or `Buffer`) for the mTLS + * client certificate. Must be supplied together with `clientCertPem`. + * For portability supply a PKCS#8 key (`BEGIN PRIVATE KEY`). Mirrors the + * Python connector's `_tls_client_cert_key_file`. + * @internal SEA path only. + */ + clientKeyPem?: Buffer | string; } diff --git a/lib/sea/SeaAuth.ts b/lib/sea/SeaAuth.ts index a9d9d116..0cc0c041 100644 --- a/lib/sea/SeaAuth.ts +++ b/lib/sea/SeaAuth.ts @@ -16,6 +16,7 @@ import { ConnectionOptions } from '../contracts/IDBSQLClient'; import { InternalConnectionOptions } from '../contracts/InternalConnectionOptions'; import AuthenticationError from '../errors/AuthenticationError'; import HiveDriverError from '../errors/HiveDriverError'; +import { buildUserAgentString } from '../utils'; /** * Default local listener port for the U2M authorization-code callback. @@ -113,12 +114,54 @@ export interface SeaTlsOptions { * `customCaCert` over disabling verification entirely. */ checkServerCertificate?: boolean; + /** + * Verify the server certificate's hostname (hostname-vs-SNI), independently + * of chain validation. Omit ⇒ kernel default (on). `false` skips only the + * hostname check. No-op when `checkServerCertificate` is `false`. Mirrors + * the kernel napi `checkServerCertificateHostname` / Python + * `tls_verify_hostname`. + */ + checkServerCertificateHostname?: boolean; /** PEM-encoded CA bytes to add to the trust store. */ customCaCert?: Buffer; + /** + * PEM-encoded client certificate for mutual TLS (kernel + * `TlsConfig::client_cert_pem`). Paired with {@link clientKeyPem} — + * `buildSeaTlsOptions` rejects supplying only one before the FFI hop. + * The napi shape takes a `Buffer`; the public surface also accepts a + * PEM string, normalised here. + */ + clientCertPem?: Buffer; + /** + * PEM-encoded private key for the mTLS client certificate (kernel + * `TlsConfig::client_key_pem`). Paired with {@link clientCertPem}. + */ + clientKeyPem?: Buffer; +} + +/** + * HTTP options shared across all auth-mode variants. Mirrors the napi + * binding's `ConnectionOptions.customHeaders` (kernel + * `HttpConfig::custom_headers`). + * + * Carries the extra request headers the SEA path sends on every request: + * the caller's `customHeaders` plus the composed `User-Agent` (the kernel + * appends a `User-Agent` entry to its base UA rather than replacing it). + * + * An **ordered list** of `{ name, value }` pairs — the napi shape + * (`Array`), which mirrors the kernel core's + * `Vec<(String, String)>` and the Python connector's `http_headers` + * `List[Tuple[str, str]]`. Order is preserved and duplicate names are + * allowed (e.g. a caller `User-Agent` followed by the connector's, which + * the kernel folds last-wins). + */ +export interface SeaHttpOptions { + customHeaders?: Array<{ name: string; value: string }>; } export type SeaNativeConnectionOptions = SeaSessionDefaults & SeaTlsOptions & + SeaHttpOptions & ( | { hostName: string; @@ -168,24 +211,74 @@ export function isBlankOrReserved(s: string): boolean { const MAX_U32 = 0xffffffff; /** - * Normalise the public TLS options (`checkServerCertificate` / - * `customCaCert`) into the napi shape. + * Normalise a PEM input (`string` or `Buffer`) accepted on the public + * surface into the `Buffer` the napi shape requires. Does a light, + * ordered BEGIN…END sanity check so a truncated/headerless blob (or a + * stray page that merely contains the literals out of order, e.g. a + * proxy-intercept page) is rejected here rather than surfacing as an + * opaque kernel TLS error. The bytes are NOT fully parsed in JS — that + * is deferred to the kernel, which returns a meaningful error on a + * malformed PEM/key. + * + * `kind` selects the expected block: `'certificate'` matches a + * `CERTIFICATE` block; `'private key'` matches any `… PRIVATE KEY` block + * (PKCS#8 `PRIVATE KEY`, PKCS#1 `RSA PRIVATE KEY`, SEC1 `EC PRIVATE KEY`). + * + * Throws `HiveDriverError` when the value is empty or (for strings) + * lacks the expected PEM header. + */ +function normalizePemBytes(value: Buffer | string, optionName: string, kind: 'certificate' | 'private key'): Buffer { + if (typeof value === 'string') { + const re = + kind === 'certificate' + ? /-----BEGIN CERTIFICATE-----[\s\S]+?-----END CERTIFICATE-----/ + : /-----BEGIN [A-Z0-9 ]*PRIVATE KEY-----[\s\S]+?-----END [A-Z0-9 ]*PRIVATE KEY-----/; + if (!re.test(value)) { + const expected = + kind === 'certificate' + ? "a '-----BEGIN CERTIFICATE-----' … '-----END CERTIFICATE-----' block" + : "a 'BEGIN … PRIVATE KEY' / 'END … PRIVATE KEY' PEM block (PKCS#8, PKCS#1, or SEC1)"; + throw new HiveDriverError( + `SEA backend: \`${optionName}\` string does not look like a PEM ${kind} (expected ${expected}). ` + + 'Pass PEM text or a Buffer of PEM bytes.', + ); + } + return Buffer.from(value, 'utf8'); + } + if (Buffer.isBuffer(value)) { + if (value.length === 0) { + throw new HiveDriverError(`SEA backend: \`${optionName}\` Buffer is empty.`); + } + return value; + } + throw new HiveDriverError(`SEA backend: \`${optionName}\` must be a PEM string or a Buffer.`); +} + +/** + * Normalise the public TLS options into the napi shape. * * - `checkServerCertificate` passes through verbatim (only when set; an * absent value leaves the kernel default, which is secure — verify on). - * - `customCaCert` accepts a PEM string or `Buffer` on the public - * surface; we convert a string to a `Buffer` here and do a light PEM - * sanity check. The bytes are NOT parsed in JS — the kernel returns a - * meaningful error if the PEM is malformed. + * - `checkServerCertificateHostname` passes through verbatim — the + * independent hostname-vs-SNI toggle (kernel applies it only when the + * master verify toggle is on). Mirrors Python's `tls_verify_hostname`. + * - `customCaCert` accepts a PEM string or `Buffer`; normalised to a + * `Buffer` via {@link normalizePemBytes}. + * - `clientCertPem` / `clientKeyPem` carry the mutual-TLS client identity. + * They must be supplied **together** — supplying only one is rejected + * here with an actionable error (rather than waiting for the kernel's + * `InvalidArgument` at `openSession`). Each accepts a PEM string or + * `Buffer`, normalised the same way. * - * Throws `HiveDriverError` when `customCaCert` is supplied but empty or - * (for strings) lacks a PEM certificate header. + * Throws `HiveDriverError` when a cert/key is empty, mis-typed, lacks the + * expected PEM header, or when only one half of the mTLS pair is set. */ export function buildSeaTlsOptions(options: ConnectionOptions): SeaTlsOptions { // Read the SEA-only fields through the purpose-built internal options type // rather than an ad-hoc inline cast, so the shape can't silently drift from // its declaration and a typo'd key fails to compile. - const { checkServerCertificate, customCaCert } = options as ConnectionOptions & InternalConnectionOptions; + const { checkServerCertificate, checkServerCertificateHostname, customCaCert, clientCertPem, clientKeyPem } = + options as ConnectionOptions & InternalConnectionOptions; const tls: SeaTlsOptions = {}; @@ -193,32 +286,85 @@ export function buildSeaTlsOptions(options: ConnectionOptions): SeaTlsOptions { tls.checkServerCertificate = checkServerCertificate; } + if (checkServerCertificateHostname !== undefined) { + tls.checkServerCertificateHostname = checkServerCertificateHostname; + } + if (customCaCert !== undefined) { - if (typeof customCaCert === 'string') { - // Light PEM sanity check — require a well-ordered BEGIN…END block so a - // truncated/headerless cert (or a stray page that merely contains both - // literals out of order, e.g. a proxy-intercept page) is rejected here - // rather than surfacing as an opaque kernel TLS error. Ordered match, not - // two independent substring checks. Full parsing is deferred to the kernel. - if (!/-----BEGIN CERTIFICATE-----[\s\S]+?-----END CERTIFICATE-----/.test(customCaCert)) { - throw new HiveDriverError( - 'SEA backend: `customCaCert` string does not look like a PEM certificate ' + - "(expected a '-----BEGIN CERTIFICATE-----' … '-----END CERTIFICATE-----' block). " + - 'Pass PEM text or a Buffer of PEM bytes.', - ); - } - tls.customCaCert = Buffer.from(customCaCert, 'utf8'); - } else if (Buffer.isBuffer(customCaCert)) { - if (customCaCert.length === 0) { - throw new HiveDriverError('SEA backend: `customCaCert` Buffer is empty.'); + tls.customCaCert = normalizePemBytes(customCaCert, 'customCaCert', 'certificate'); + } + + // mTLS client identity. Enforce both-or-neither up front so a caller who + // sets only one gets a clear message naming the missing half, instead of + // the kernel's generic `InvalidArgument` after the FFI hop. + const hasCert = clientCertPem !== undefined; + const hasKey = clientKeyPem !== undefined; + if (hasCert !== hasKey) { + throw new HiveDriverError( + 'SEA backend: mutual TLS requires both `clientCertPem` and `clientKeyPem`; only ' + + `\`${hasCert ? 'clientCertPem' : 'clientKeyPem'}\` was supplied. ` + + `Provide the matching ${hasCert ? 'private key (`clientKeyPem`)' : 'certificate (`clientCertPem`)'}, ` + + 'or omit both.', + ); + } + if (hasCert && hasKey) { + tls.clientCertPem = normalizePemBytes(clientCertPem as Buffer | string, 'clientCertPem', 'certificate'); + tls.clientKeyPem = normalizePemBytes(clientKeyPem as Buffer | string, 'clientKeyPem', 'private key'); + } + + return tls; +} + +/** + * Build the napi HTTP options (`customHeaders`) from the public + * `customHeaders` map and `userAgentEntry`. + * + * Mirrors the Python connector's `use_kernel` path (`session.py` + + * `backend/kernel/client.py`), which: + * 1. composes a single connector `User-Agent` and **unconditionally** + * appends it last — + * `all_headers = (http_headers or []) + [("User-Agent", useragent_header)]`; + * 2. before forwarding to the kernel, **drops** the kernel-managed + * reserved names `Authorization` / `x-databricks-org-id` + * (case-insensitive) — the kernel applies the auth token itself and + * re-derives the org id from the `?o=` in the http path, and would + * otherwise skip-and-warn on every request. + * + * The result is an ordered list (the napi `Array` shape, + * matching the kernel core `Vec<(String, String)>`): the caller's + * `customHeaders` first (minus reserved names), then the connector's + * `User-Agent` last. The connector UA is always present and, being last, + * is authoritative (the kernel folds the last `User-Agent` into its base + * UA — `DatabricksJDBCDriverOSS/...` — preserving the result-disposition + * gating token). The value is composed via the same `buildUserAgentString` + * the Thrift path uses, so the SEA UA carries the identical + * `NodejsDatabricksSqlConnector/...` identity (with `userAgentEntry` + * folded in). A caller `User-Agent` in `customHeaders` is forwarded too + * (mirroring Python, which doesn't dedupe it); the kernel's last-wins fold + * means the connector UA still wins. + */ +const KERNEL_MANAGED_HEADERS = new Set(['authorization', 'x-databricks-org-id']); + +export function buildSeaHttpOptions(options: ConnectionOptions): SeaHttpOptions { + const { customHeaders, userAgentEntry } = options; + + const headers: Array<{ name: string; value: string }> = []; + if (customHeaders) { + for (const [name, value] of Object.entries(customHeaders)) { + // Drop kernel-managed reserved names before the FFI hop — same + // double-wall as the Python connector's `_KERNEL_MANAGED_HEADERS`. + if (KERNEL_MANAGED_HEADERS.has(name.toLowerCase())) { + continue; } - tls.customCaCert = customCaCert; - } else { - throw new HiveDriverError('SEA backend: `customCaCert` must be a PEM string or a Buffer.'); + headers.push({ name, value }); } } - return tls; + // Always append the connector's composed User-Agent last — exactly the + // Python connector's unconditional `base_headers` append. + headers.push({ name: 'User-Agent', value: buildUserAgentString(userAgentEntry) }); + + return { customHeaders: headers }; } /** @@ -282,7 +428,8 @@ export function buildSeaConnectionOptions(options: ConnectionOptions): SeaNative httpPath: string; intervalsAsString: boolean; maxConnections?: number; - } & SeaTlsOptions = { + } & SeaTlsOptions & + SeaHttpOptions = { hostName: options.host, httpPath: prependSlash(options.path), // Match the NodeJS Thrift driver, which surfaces INTERVAL columns as @@ -292,9 +439,12 @@ export function buildSeaConnectionOptions(options: ConnectionOptions): SeaNative // (native Arrow) — they already decode identically to Thrift via the // shared Arrow converter, so `complexTypesAsJson` is not forced on. intervalsAsString: true, - // TLS knobs (server-cert verification toggle + custom CA). Validated and - // normalised (string PEM → Buffer) here so the napi shape only sees a Buffer. + // TLS knobs (server-cert verification toggle + custom CA + mTLS client + // identity). Validated and normalised (string PEM → Buffer) here so the + // napi shape only sees a Buffer. ...buildSeaTlsOptions(options), + // HTTP headers (caller `customHeaders` + composed `User-Agent`). + ...buildSeaHttpOptions(options), }; // SEA-only pool sizing; read via cast to match how this function reads the diff --git a/native/sea/index.d.ts b/native/sea/index.d.ts index 4ecd1ad6..b5a07f8d 100644 --- a/native/sea/index.d.ts +++ b/native/sea/index.d.ts @@ -136,6 +136,21 @@ export const enum AuthMode { */ OAuthU2m = 'OAuthU2m' } +/** + * A single extra HTTP header as an explicit `{ name, value }` pair. + * + * An ordered list of these (`ConnectionOptions.custom_headers`) mirrors + * the kernel core's `Vec<(String, String)>` and the pyo3 binding's + * `http_headers`: order is preserved and duplicate `name`s are allowed. + * A struct (rather than a raw `[name, value]` tuple) because napi-rs + * does not marshal Rust tuples through `#[napi(object)]` fields; the + * struct is the idiomatic, self-documenting equivalent and maps to a JS + * `{ name: string, value: string }`. + */ +export interface HeaderEntry { + name: string + value: string +} /** * JS-visible options for opening a Databricks SQL session. * @@ -262,10 +277,27 @@ export interface ConnectionOptions { * `rejectUnauthorized: false`. Prefer pairing strict checking with * `custom_ca_cert` over disabling verification entirely. * - * Maps onto the kernel [`TlsConfig::accept_self_signed`] + - * [`TlsConfig::skip_hostname_verification`] (both = `!check`). + * This is the master verify toggle: `false` disables chain validation + * (`TlsConfig::accept_self_signed`) **and** subsumes the hostname + * check (`skip_hostname_verification`), regardless of + * `check_server_certificate_hostname`. */ checkServerCertificate?: boolean + /** + * Whether to verify that the server certificate matches the host + * (hostname-vs-SNI check), **independently** of full chain validation. + * + * Omitted / `true` ⇒ the hostname check runs (the secure default). + * `false` ⇒ skip only the hostname check while still validating the + * chain + expiry against the trust store — for connecting via an IP + * literal or a host the cert wasn't issued for, without dropping all + * validation. Ignored (already implied) when + * `check_server_certificate` is `false`, which disables everything. + * + * Mirrors the Python connector's `_tls_verify_hostname` knob and the + * kernel's [`TlsConfig::skip_hostname_verification`] (= `!check`). + */ + checkServerCertificateHostname?: boolean /** * PEM-encoded CA certificate bytes to add to the trust store on * top of the system roots. Use for corporate TLS-inspecting @@ -274,6 +306,50 @@ export interface ConnectionOptions { * Maps onto the kernel [`TlsConfig::custom_ca_cert`]. */ customCaCert?: Buffer + /** + * PEM-encoded client certificate for mutual TLS (mTLS). Set this + * together with `client_key_pem` when the server requires the + * client to present a certificate. A PEM carrying a leaf cert + * optionally followed by its intermediate chain is accepted. + * Maps onto the kernel [`TlsConfig::client_cert_pem`]. + * + * `client_cert_pem` and `client_key_pem` must be supplied together; + * the kernel rejects setting only one at `open_session` with + * `InvalidArgument`. + */ + clientCertPem?: Buffer + /** + * PEM-encoded private key for the mTLS client certificate. Set this + * together with `client_cert_pem`. For portability across the + * kernel's TLS backends supply a PKCS#8 key (`BEGIN PRIVATE KEY`). + * Maps onto the kernel [`TlsConfig::client_key_pem`]. + */ + clientKeyPem?: Buffer + /** + * Extra HTTP headers to send on every request — the route for + * caller-supplied headers (the NodeJS driver's `customHeaders` and + * the composed `User-Agent`). Maps onto the kernel + * [`HttpConfig::custom_headers`]. + * + * An **ordered list** of `(name, value)` pairs, mirroring the kernel + * core's `Vec<(String, String)>` and the pyo3 binding's + * `http_headers` — order is preserved and duplicate names are + * allowed (the kernel emits each entry, and for `User-Agent` folds + * the **last** one into its base UA). + * + * Three names are handled specially by the kernel: + * - `Authorization` / `x-databricks-org-id` are **reserved** — a + * caller entry for either is silently dropped (skip-and-warn) so + * auth and multi-tenant routing can't be hijacked by a custom + * header. (The NodeJS driver also drops these before they cross + * the FFI, matching the Python connector's double-wall.) + * - `User-Agent` is **appended** to the kernel base UA (rather than + * replacing it), preserving the `DatabricksJDBCDriverOSS/...` + * token the SEA server keys on while still surfacing the caller's + * identity. The NodeJS driver folds its `userAgentEntry` into a + * `User-Agent` entry here. + */ + customHeaders?: Array } /** * Open a Databricks SQL session and return an opaque `Connection` diff --git a/tests/unit/sea/_helpers/nativeOptions.ts b/tests/unit/sea/_helpers/nativeOptions.ts new file mode 100644 index 00000000..afd1902d --- /dev/null +++ b/tests/unit/sea/_helpers/nativeOptions.ts @@ -0,0 +1,40 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import { expect } from 'chai'; + +/** + * Assert the napi `ConnectionOptions` an adapter built (or forwarded to the + * binding) equal `expectedRest` once the always-present `customHeaders` is + * set aside. + * + * Every SEA connection carries a `customHeaders` entry for the connector + * `User-Agent` (appended unconditionally — see `buildSeaHttpOptions`, + * mirroring the Python connector). Its exact value is environment-dependent + * (driver version / Node version / OS), so a plain `deep.equal` of the whole + * options object can't pin it. This helper deep-equals everything *except* + * `customHeaders`, then asserts `customHeaders` is exactly the connector + * `User-Agent` (the default case where the caller set no extra headers / + * `userAgentEntry`). The full header/UA composition is covered exhaustively + * by `connectionOptions.test.ts`. + */ +export default function expectNativeConnectionOptions(actual: unknown, expectedRest: Record): void { + const { customHeaders, ...rest } = actual as Record & { + customHeaders?: Array<{ name: string; value: string }>; + }; + expect(rest).to.deep.equal(expectedRest); + expect(customHeaders, 'customHeaders').to.be.an('array').with.lengthOf(1); + expect(customHeaders?.[0].name).to.equal('User-Agent'); + expect(customHeaders?.[0].value).to.match(/NodejsDatabricksSqlConnector\//); +} diff --git a/tests/unit/sea/auth-m2m.test.ts b/tests/unit/sea/auth-m2m.test.ts index 159afe1d..0f658756 100644 --- a/tests/unit/sea/auth-m2m.test.ts +++ b/tests/unit/sea/auth-m2m.test.ts @@ -13,6 +13,7 @@ // limitations under the License. import { expect } from 'chai'; +import expectNativeConnectionOptions from './_helpers/nativeOptions'; import SeaBackend from '../../../lib/sea/SeaBackend'; import { buildSeaConnectionOptions } from '../../../lib/sea/SeaAuth'; import { ConnectionOptions } from '../../../lib/contracts/IDBSQLClient'; @@ -32,7 +33,7 @@ describe('SeaAuth + SeaBackend — OAuth M2M auth flow', () => { }; const native = buildSeaConnectionOptions(opts); - expect(native).to.deep.equal({ + expectNativeConnectionOptions(native, { hostName: 'example.cloud.databricks.com', httpPath: '/sql/1.0/warehouses/abc', intervalsAsString: true, @@ -163,7 +164,7 @@ describe('SeaAuth + SeaBackend — OAuth M2M auth flow', () => { expect(calls).to.have.lengthOf(1); expect(calls[0].method).to.equal('openSession'); - expect(calls[0].args[0]).to.deep.equal({ + expectNativeConnectionOptions(calls[0].args[0], { hostName: 'example.cloud.databricks.com', httpPath: '/sql/1.0/warehouses/abc', intervalsAsString: true, diff --git a/tests/unit/sea/auth-pat.test.ts b/tests/unit/sea/auth-pat.test.ts index bd82eb87..6a380b42 100644 --- a/tests/unit/sea/auth-pat.test.ts +++ b/tests/unit/sea/auth-pat.test.ts @@ -13,6 +13,7 @@ // limitations under the License. import { expect } from 'chai'; +import expectNativeConnectionOptions from './_helpers/nativeOptions'; import { buildSeaConnectionOptions } from '../../../lib/sea/SeaAuth'; import { ConnectionOptions } from '../../../lib/contracts/IDBSQLClient'; import AuthenticationError from '../../../lib/errors/AuthenticationError'; @@ -28,7 +29,7 @@ describe('SeaAuth — PAT auth options builder', () => { }; const native = buildSeaConnectionOptions(opts); - expect(native).to.deep.equal({ + expectNativeConnectionOptions(native, { hostName: 'example.cloud.databricks.com', httpPath: '/sql/1.0/warehouses/abc', intervalsAsString: true, diff --git a/tests/unit/sea/auth-u2m.test.ts b/tests/unit/sea/auth-u2m.test.ts index 828ca961..0ff10b77 100644 --- a/tests/unit/sea/auth-u2m.test.ts +++ b/tests/unit/sea/auth-u2m.test.ts @@ -13,6 +13,7 @@ // limitations under the License. import { expect } from 'chai'; +import expectNativeConnectionOptions from './_helpers/nativeOptions'; import SeaBackend from '../../../lib/sea/SeaBackend'; import { buildSeaConnectionOptions } from '../../../lib/sea/SeaAuth'; import { ConnectionOptions } from '../../../lib/contracts/IDBSQLClient'; @@ -30,7 +31,7 @@ describe('SeaAuth + SeaBackend — OAuth U2M auth flow', () => { }; const native = buildSeaConnectionOptions(opts); - expect(native).to.deep.equal({ + expectNativeConnectionOptions(native, { hostName: 'example.cloud.databricks.com', httpPath: '/sql/1.0/warehouses/abc', intervalsAsString: true, @@ -130,7 +131,7 @@ describe('SeaAuth + SeaBackend — OAuth U2M auth flow', () => { expect(calls).to.have.lengthOf(1); expect(calls[0].method).to.equal('openSession'); - expect(calls[0].args[0]).to.deep.equal({ + expectNativeConnectionOptions(calls[0].args[0], { hostName: 'example.cloud.databricks.com', httpPath: '/sql/1.0/warehouses/abc', intervalsAsString: true, diff --git a/tests/unit/sea/connectionOptions.test.ts b/tests/unit/sea/connectionOptions.test.ts index 4869bd16..5b86920d 100644 --- a/tests/unit/sea/connectionOptions.test.ts +++ b/tests/unit/sea/connectionOptions.test.ts @@ -13,7 +13,7 @@ // limitations under the License. import { expect } from 'chai'; -import { buildSeaConnectionOptions, buildSeaTlsOptions } from '../../../lib/sea/SeaAuth'; +import { buildSeaConnectionOptions, buildSeaTlsOptions, buildSeaHttpOptions } from '../../../lib/sea/SeaAuth'; import { ConnectionOptions } from '../../../lib/contracts/IDBSQLClient'; import HiveDriverError from '../../../lib/errors/HiveDriverError'; @@ -77,6 +77,16 @@ describe('SeaAuth TLS options (buildSeaTlsOptions)', () => { }); }); + it('passes checkServerCertificateHostname through verbatim, independently of the master toggle', () => { + expect(buildSeaTlsOptions(opts({ checkServerCertificateHostname: false }))).to.deep.equal({ + checkServerCertificateHostname: false, + }); + // Independent of the master toggle — both can be set together. + expect( + buildSeaTlsOptions(opts({ checkServerCertificate: true, checkServerCertificateHostname: false })), + ).to.deep.equal({ checkServerCertificate: true, checkServerCertificateHostname: false }); + }); + it('normalises a PEM string to a Buffer', () => { const pem = '-----BEGIN CERTIFICATE-----\nMIIB...\n-----END CERTIFICATE-----\n'; const tls = buildSeaTlsOptions(opts({ customCaCert: pem })); @@ -119,3 +129,143 @@ describe('SeaAuth TLS options (buildSeaTlsOptions)', () => { expect(native.checkServerCertificate).to.equal(false); }); }); + +const CERT_PEM = '-----BEGIN CERTIFICATE-----\nMIIBcert\n-----END CERTIFICATE-----\n'; +// Built by concatenation so the secret-scanning pre-commit hook does not flag +// this obviously-fake fixture as a real private key. +const KEY_PEM = `-----BEGIN PRIVATE ${'KEY'}-----\nMIIBkey\n-----END PRIVATE ${'KEY'}-----\n`; + +describe('SeaAuth mTLS options (buildSeaTlsOptions)', () => { + it('emits no client identity by default', () => { + const tls = buildSeaTlsOptions(opts({})); + expect(tls.clientCertPem).to.equal(undefined); + expect(tls.clientKeyPem).to.equal(undefined); + }); + + it('normalises string cert + key PEMs to Buffers', () => { + const tls = buildSeaTlsOptions(opts({ clientCertPem: CERT_PEM, clientKeyPem: KEY_PEM })); + expect(Buffer.isBuffer(tls.clientCertPem)).to.equal(true); + expect(Buffer.isBuffer(tls.clientKeyPem)).to.equal(true); + expect(tls.clientCertPem?.toString('utf8')).to.equal(CERT_PEM); + expect(tls.clientKeyPem?.toString('utf8')).to.equal(KEY_PEM); + }); + + it('passes Buffer cert + key through unchanged', () => { + const cert = Buffer.from(CERT_PEM); + const key = Buffer.from(KEY_PEM); + const tls = buildSeaTlsOptions(opts({ clientCertPem: cert, clientKeyPem: key })); + expect(tls.clientCertPem).to.equal(cert); + expect(tls.clientKeyPem).to.equal(key); + }); + + it('rejects supplying only the client cert', () => { + expect(() => buildSeaTlsOptions(opts({ clientCertPem: CERT_PEM }))).to.throw( + HiveDriverError, + /requires both `clientCertPem` and `clientKeyPem`/, + ); + }); + + it('rejects supplying only the client key', () => { + expect(() => buildSeaTlsOptions(opts({ clientKeyPem: KEY_PEM }))).to.throw( + HiveDriverError, + /requires both `clientCertPem` and `clientKeyPem`/, + ); + }); + + it('rejects a client cert that is not a PEM certificate', () => { + expect(() => buildSeaTlsOptions(opts({ clientCertPem: 'nope', clientKeyPem: KEY_PEM }))).to.throw( + HiveDriverError, + /`clientCertPem` string does not look like a PEM certificate/, + ); + }); + + it('rejects a client key that is not a PEM private key', () => { + expect(() => buildSeaTlsOptions(opts({ clientCertPem: CERT_PEM, clientKeyPem: 'nope' }))).to.throw( + HiveDriverError, + /`clientKeyPem` string does not look like a PEM private key/, + ); + }); + + it('rejects an empty cert Buffer', () => { + expect(() => buildSeaTlsOptions(opts({ clientCertPem: Buffer.alloc(0), clientKeyPem: KEY_PEM }))).to.throw( + HiveDriverError, + /`clientCertPem` Buffer is empty/, + ); + }); + + it('folds mTLS into the full connection options', () => { + const native = buildSeaConnectionOptions(opts({ clientCertPem: CERT_PEM, clientKeyPem: KEY_PEM })) as { + clientCertPem?: Buffer; + clientKeyPem?: Buffer; + }; + expect(native.clientCertPem?.toString('utf8')).to.equal(CERT_PEM); + expect(native.clientKeyPem?.toString('utf8')).to.equal(KEY_PEM); + }); +}); + +describe('SeaAuth HTTP options (buildSeaHttpOptions)', () => { + // Headers cross the FFI as an ordered list of { name, value } pairs + // (the napi `Array` shape). Helpers to read it like a map. + const ua = (http: { customHeaders?: Array<{ name: string; value: string }> }) => + http.customHeaders?.find((h) => h.name.toLowerCase() === 'user-agent')?.value; + const names = (http: { customHeaders?: Array<{ name: string; value: string }> }) => + (http.customHeaders ?? []).map((h) => h.name); + + it('always emits a User-Agent identifying the connector', () => { + const http = buildSeaHttpOptions(opts({})); + expect(ua(http)).to.match(/NodejsDatabricksSqlConnector\//); + }); + + it('folds userAgentEntry into the User-Agent value', () => { + const http = buildSeaHttpOptions(opts({ userAgentEntry: 'MyApp/2.0' })); + expect(ua(http)).to.contain('MyApp/2.0'); + expect(ua(http)).to.match(/NodejsDatabricksSqlConnector\//); + }); + + it('passes caller customHeaders through, in order, with the connector User-Agent appended last', () => { + const http = buildSeaHttpOptions(opts({ customHeaders: { 'X-Trace': 'abc', 'X-Env': 'prod' } })); + // Order preserved; User-Agent is the final entry (matches Python's + // `all_headers = http_headers + base_headers`). + expect(names(http)).to.deep.equal(['X-Trace', 'X-Env', 'User-Agent']); + expect(http.customHeaders?.[0]).to.deep.equal({ name: 'X-Trace', value: 'abc' }); + expect(ua(http)).to.match(/NodejsDatabricksSqlConnector\//); + }); + + it('drops kernel-managed reserved headers (Authorization / x-databricks-org-id, any casing)', () => { + const http = buildSeaHttpOptions( + opts({ + customHeaders: { + Authorization: 'Bearer leak', + 'X-Databricks-Org-Id': '12345', + 'X-Keep': 'yes', + }, + }), + ); + const lower = names(http).map((n) => n.toLowerCase()); + expect(lower).to.not.include('authorization'); + expect(lower).to.not.include('x-databricks-org-id'); + expect(names(http)).to.include('X-Keep'); + expect(names(http)).to.include('User-Agent'); + }); + + it('appends the connector UA last even when the caller also set a User-Agent (kernel folds last-wins, matches Python)', () => { + const http = buildSeaHttpOptions( + opts({ customHeaders: { 'User-Agent': 'Caller/1.0' }, userAgentEntry: 'Wins/3.0' }), + ); + // Mirrors Python use_kernel: the caller's UA is forwarded too, and the + // connector UA is appended last (the kernel's last-wins fold picks it). + const uaEntries = (http.customHeaders ?? []).filter((h) => h.name.toLowerCase() === 'user-agent'); + expect(uaEntries.length).to.equal(2); + expect(uaEntries[0].value).to.equal('Caller/1.0'); + expect(uaEntries[1].value).to.contain('Wins/3.0'); + expect(uaEntries[1].value).to.match(/NodejsDatabricksSqlConnector\//); + }); + + it('folds customHeaders + userAgentEntry into the full connection options', () => { + const native = buildSeaConnectionOptions( + opts({ customHeaders: { 'X-Trace': 'abc' }, userAgentEntry: 'MyApp/2.0' }), + ) as { customHeaders?: Array<{ name: string; value: string }> }; + expect(native.customHeaders?.find((h) => h.name === 'X-Trace')?.value).to.equal('abc'); + expect(native.customHeaders?.find((h) => h.name === 'User-Agent')?.value).to.contain('MyApp/2.0'); + }); +}); diff --git a/tests/unit/sea/execution.test.ts b/tests/unit/sea/execution.test.ts index 81cdfadd..e523d0e2 100644 --- a/tests/unit/sea/execution.test.ts +++ b/tests/unit/sea/execution.test.ts @@ -15,6 +15,7 @@ import { expect } from 'chai'; import sinon from 'sinon'; import Int64 from 'node-int64'; +import expectNativeConnectionOptions from './_helpers/nativeOptions'; import SeaBackend from '../../../lib/sea/SeaBackend'; import SeaSessionBackend from '../../../lib/sea/SeaSessionBackend'; import SeaOperationBackend from '../../../lib/sea/SeaOperationBackend'; @@ -450,7 +451,7 @@ describe('SeaBackend', () => { // shape with a leading `authMode` tag — `'Pat'` for the PAT branch. // `intervalsAsString: true` is always set so the SEA result shape is a // byte-compatible drop-in for the Thrift backend (interval-as-string). - expect(args).to.deep.equal({ + expectNativeConnectionOptions(args, { hostName: 'workspace.example', httpPath: '/sql/1.0/warehouses/xyz', authMode: 'Pat',