Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion KERNEL_REV
Original file line number Diff line number Diff line change
@@ -1 +1 @@
8bedaabf69f5bce5a957a8775f29dbb8dbdd2e71
7f8353f39665e7ac0fcc31a052fd2271caba1f67
35 changes: 34 additions & 1 deletion lib/contracts/InternalConnectionOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,49 @@ export interface InternalConnectionOptions {
/**
* SEA-only: verify the server's TLS certificate. Secure-by-default — omit
* to keep full chain + hostname verification; set `false` only to opt into
* the insecure accept-anything mode.
* the insecure accept-anything mode. This is the master verify toggle:
* `false` also subsumes the hostname check (see
* `checkServerCertificateHostname`). Mirrors the Python connector's
* `_tls_no_verify` (inverted).
* @internal SEA path only.
*/
checkServerCertificate?: boolean;

/**
* SEA-only: verify that the server certificate matches the host
* (hostname-vs-SNI check), independently of full chain validation. Omit
* to keep the secure default (on); set `false` to skip only the hostname
* check while still validating the chain — e.g. connecting via an IP
* literal or a host the cert wasn't issued for. No-op when
* `checkServerCertificate` is `false` (that disables everything). Mirrors
* the Python connector's `_tls_verify_hostname`.
* @internal SEA path only.
*/
checkServerCertificateHostname?: boolean;

/**
* SEA-only: PEM-encoded CA certificate (string or `Buffer`) added to the
* trust store on top of the system roots — for TLS-inspecting proxies or
* on-prem internal CAs. Honoured regardless of `checkServerCertificate`.
* @internal SEA path only.
*/
customCaCert?: Buffer | string;

/**
* SEA-only: PEM-encoded client certificate (string or `Buffer`) for
* mutual TLS (mTLS). Must be supplied together with `clientKeyPem`; a
* leaf cert optionally followed by its intermediate chain is accepted.
* Mirrors the Python connector's `_tls_client_cert_file`.
* @internal SEA path only.
*/
clientCertPem?: Buffer | string;

/**
* SEA-only: PEM-encoded private key (string or `Buffer`) for the mTLS
* client certificate. Must be supplied together with `clientCertPem`.
* For portability supply a PKCS#8 key (`BEGIN PRIVATE KEY`). Mirrors the
* Python connector's `_tls_client_cert_key_file`.
* @internal SEA path only.
*/
clientKeyPem?: Buffer | string;
}
216 changes: 183 additions & 33 deletions lib/sea/SeaAuth.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import { ConnectionOptions } from '../contracts/IDBSQLClient';
import { InternalConnectionOptions } from '../contracts/InternalConnectionOptions';
import AuthenticationError from '../errors/AuthenticationError';
import HiveDriverError from '../errors/HiveDriverError';
import { buildUserAgentString } from '../utils';

/**
* Default local listener port for the U2M authorization-code callback.
Expand Down Expand Up @@ -113,12 +114,54 @@ export interface SeaTlsOptions {
* `customCaCert` over disabling verification entirely.
*/
checkServerCertificate?: boolean;
/**
* Verify the server certificate's hostname (hostname-vs-SNI), independently
* of chain validation. Omit ⇒ kernel default (on). `false` skips only the
* hostname check. No-op when `checkServerCertificate` is `false`. Mirrors
* the kernel napi `checkServerCertificateHostname` / Python
* `tls_verify_hostname`.
*/
checkServerCertificateHostname?: boolean;
/** PEM-encoded CA bytes to add to the trust store. */
customCaCert?: Buffer;
/**
* PEM-encoded client certificate for mutual TLS (kernel
* `TlsConfig::client_cert_pem`). Paired with {@link clientKeyPem} —
* `buildSeaTlsOptions` rejects supplying only one before the FFI hop.
* The napi shape takes a `Buffer`; the public surface also accepts a
* PEM string, normalised here.
*/
clientCertPem?: Buffer;
/**
* PEM-encoded private key for the mTLS client certificate (kernel
* `TlsConfig::client_key_pem`). Paired with {@link clientCertPem}.
*/
clientKeyPem?: Buffer;
}

/**
* HTTP options shared across all auth-mode variants. Mirrors the napi
* binding's `ConnectionOptions.customHeaders` (kernel
* `HttpConfig::custom_headers`).
*
* Carries the extra request headers the SEA path sends on every request:
* the caller's `customHeaders` plus the composed `User-Agent` (the kernel
* appends a `User-Agent` entry to its base UA rather than replacing it).
*
* An **ordered list** of `{ name, value }` pairs — the napi shape
* (`Array<HeaderEntry>`), which mirrors the kernel core's
* `Vec<(String, String)>` and the Python connector's `http_headers`
* `List[Tuple[str, str]]`. Order is preserved and duplicate names are
* allowed (e.g. a caller `User-Agent` followed by the connector's, which
* the kernel folds last-wins).
*/
export interface SeaHttpOptions {
customHeaders?: Array<{ name: string; value: string }>;
}

export type SeaNativeConnectionOptions = SeaSessionDefaults &
SeaTlsOptions &
SeaHttpOptions &
(
| {
hostName: string;
Expand Down Expand Up @@ -168,57 +211,160 @@ export function isBlankOrReserved(s: string): boolean {
const MAX_U32 = 0xffffffff;

/**
* Normalise the public TLS options (`checkServerCertificate` /
* `customCaCert`) into the napi shape.
* Normalise a PEM input (`string` or `Buffer`) accepted on the public
* surface into the `Buffer` the napi shape requires. Does a light,
* ordered BEGIN…END sanity check so a truncated/headerless blob (or a
* stray page that merely contains the literals out of order, e.g. a
* proxy-intercept page) is rejected here rather than surfacing as an
* opaque kernel TLS error. The bytes are NOT fully parsed in JS — that
* is deferred to the kernel, which returns a meaningful error on a
* malformed PEM/key.
*
* `kind` selects the expected block: `'certificate'` matches a
* `CERTIFICATE` block; `'private key'` matches any `… PRIVATE KEY` block
* (PKCS#8 `PRIVATE KEY`, PKCS#1 `RSA PRIVATE KEY`, SEC1 `EC PRIVATE KEY`).
*
* Throws `HiveDriverError` when the value is empty or (for strings)
* lacks the expected PEM header.
*/
function normalizePemBytes(value: Buffer | string, optionName: string, kind: 'certificate' | 'private key'): Buffer {
if (typeof value === 'string') {
const re =
kind === 'certificate'
? /-----BEGIN CERTIFICATE-----[\s\S]+?-----END CERTIFICATE-----/
: /-----BEGIN [A-Z0-9 ]*PRIVATE KEY-----[\s\S]+?-----END [A-Z0-9 ]*PRIVATE KEY-----/;
if (!re.test(value)) {
const expected =
kind === 'certificate'
? "a '-----BEGIN CERTIFICATE-----' … '-----END CERTIFICATE-----' block"
: "a 'BEGIN … PRIVATE KEY' / 'END … PRIVATE KEY' PEM block (PKCS#8, PKCS#1, or SEC1)";
throw new HiveDriverError(
`SEA backend: \`${optionName}\` string does not look like a PEM ${kind} (expected ${expected}). ` +
'Pass PEM text or a Buffer of PEM bytes.',
);
}
return Buffer.from(value, 'utf8');
}
if (Buffer.isBuffer(value)) {
if (value.length === 0) {
throw new HiveDriverError(`SEA backend: \`${optionName}\` Buffer is empty.`);
}
return value;
}
throw new HiveDriverError(`SEA backend: \`${optionName}\` must be a PEM string or a Buffer.`);
}

/**
* Normalise the public TLS options into the napi shape.
*
* - `checkServerCertificate` passes through verbatim (only when set; an
* absent value leaves the kernel default, which is secure — verify on).
* - `customCaCert` accepts a PEM string or `Buffer` on the public
* surface; we convert a string to a `Buffer` here and do a light PEM
* sanity check. The bytes are NOT parsed in JS — the kernel returns a
* meaningful error if the PEM is malformed.
* - `checkServerCertificateHostname` passes through verbatim — the
* independent hostname-vs-SNI toggle (kernel applies it only when the
* master verify toggle is on). Mirrors Python's `tls_verify_hostname`.
* - `customCaCert` accepts a PEM string or `Buffer`; normalised to a
* `Buffer` via {@link normalizePemBytes}.
* - `clientCertPem` / `clientKeyPem` carry the mutual-TLS client identity.
* They must be supplied **together** — supplying only one is rejected
* here with an actionable error (rather than waiting for the kernel's
* `InvalidArgument` at `openSession`). Each accepts a PEM string or
* `Buffer`, normalised the same way.
*
* Throws `HiveDriverError` when `customCaCert` is supplied but empty or
* (for strings) lacks a PEM certificate header.
* Throws `HiveDriverError` when a cert/key is empty, mis-typed, lacks the
* expected PEM header, or when only one half of the mTLS pair is set.
*/
export function buildSeaTlsOptions(options: ConnectionOptions): SeaTlsOptions {
// Read the SEA-only fields through the purpose-built internal options type
// rather than an ad-hoc inline cast, so the shape can't silently drift from
// its declaration and a typo'd key fails to compile.
const { checkServerCertificate, customCaCert } = options as ConnectionOptions & InternalConnectionOptions;
const { checkServerCertificate, checkServerCertificateHostname, customCaCert, clientCertPem, clientKeyPem } =
options as ConnectionOptions & InternalConnectionOptions;

const tls: SeaTlsOptions = {};

if (checkServerCertificate !== undefined) {
tls.checkServerCertificate = checkServerCertificate;
}

if (checkServerCertificateHostname !== undefined) {
tls.checkServerCertificateHostname = checkServerCertificateHostname;
}

if (customCaCert !== undefined) {
if (typeof customCaCert === 'string') {
// Light PEM sanity check — require a well-ordered BEGIN…END block so a
// truncated/headerless cert (or a stray page that merely contains both
// literals out of order, e.g. a proxy-intercept page) is rejected here
// rather than surfacing as an opaque kernel TLS error. Ordered match, not
// two independent substring checks. Full parsing is deferred to the kernel.
if (!/-----BEGIN CERTIFICATE-----[\s\S]+?-----END CERTIFICATE-----/.test(customCaCert)) {
throw new HiveDriverError(
'SEA backend: `customCaCert` string does not look like a PEM certificate ' +
"(expected a '-----BEGIN CERTIFICATE-----' … '-----END CERTIFICATE-----' block). " +
'Pass PEM text or a Buffer of PEM bytes.',
);
}
tls.customCaCert = Buffer.from(customCaCert, 'utf8');
} else if (Buffer.isBuffer(customCaCert)) {
if (customCaCert.length === 0) {
throw new HiveDriverError('SEA backend: `customCaCert` Buffer is empty.');
tls.customCaCert = normalizePemBytes(customCaCert, 'customCaCert', 'certificate');
}

// mTLS client identity. Enforce both-or-neither up front so a caller who
// sets only one gets a clear message naming the missing half, instead of
// the kernel's generic `InvalidArgument` after the FFI hop.
const hasCert = clientCertPem !== undefined;
const hasKey = clientKeyPem !== undefined;
if (hasCert !== hasKey) {
throw new HiveDriverError(
'SEA backend: mutual TLS requires both `clientCertPem` and `clientKeyPem`; only ' +
`\`${hasCert ? 'clientCertPem' : 'clientKeyPem'}\` was supplied. ` +
`Provide the matching ${hasCert ? 'private key (`clientKeyPem`)' : 'certificate (`clientCertPem`)'}, ` +
'or omit both.',
);
}
if (hasCert && hasKey) {
tls.clientCertPem = normalizePemBytes(clientCertPem as Buffer | string, 'clientCertPem', 'certificate');
tls.clientKeyPem = normalizePemBytes(clientKeyPem as Buffer | string, 'clientKeyPem', 'private key');
}

return tls;
}

/**
* Build the napi HTTP options (`customHeaders`) from the public
* `customHeaders` map and `userAgentEntry`.
*
* Mirrors the Python connector's `use_kernel` path (`session.py` +
* `backend/kernel/client.py`), which:
* 1. composes a single connector `User-Agent` and **unconditionally**
* appends it last —
* `all_headers = (http_headers or []) + [("User-Agent", useragent_header)]`;
* 2. before forwarding to the kernel, **drops** the kernel-managed
* reserved names `Authorization` / `x-databricks-org-id`
* (case-insensitive) — the kernel applies the auth token itself and
* re-derives the org id from the `?o=` in the http path, and would
* otherwise skip-and-warn on every request.
*
* The result is an ordered list (the napi `Array<HeaderEntry>` shape,
* matching the kernel core `Vec<(String, String)>`): the caller's
* `customHeaders` first (minus reserved names), then the connector's
* `User-Agent` last. The connector UA is always present and, being last,
* is authoritative (the kernel folds the last `User-Agent` into its base
* UA — `DatabricksJDBCDriverOSS/...` — preserving the result-disposition
* gating token). The value is composed via the same `buildUserAgentString`
* the Thrift path uses, so the SEA UA carries the identical
* `NodejsDatabricksSqlConnector/...` identity (with `userAgentEntry`
* folded in). A caller `User-Agent` in `customHeaders` is forwarded too
* (mirroring Python, which doesn't dedupe it); the kernel's last-wins fold
* means the connector UA still wins.
*/
const KERNEL_MANAGED_HEADERS = new Set(['authorization', 'x-databricks-org-id']);

export function buildSeaHttpOptions(options: ConnectionOptions): SeaHttpOptions {
const { customHeaders, userAgentEntry } = options;

const headers: Array<{ name: string; value: string }> = [];
if (customHeaders) {
for (const [name, value] of Object.entries(customHeaders)) {
// Drop kernel-managed reserved names before the FFI hop — same
// double-wall as the Python connector's `_KERNEL_MANAGED_HEADERS`.
if (KERNEL_MANAGED_HEADERS.has(name.toLowerCase())) {
continue;
}
tls.customCaCert = customCaCert;
} else {
throw new HiveDriverError('SEA backend: `customCaCert` must be a PEM string or a Buffer.');
headers.push({ name, value });
}
}

return tls;
// Always append the connector's composed User-Agent last — exactly the
// Python connector's unconditional `base_headers` append.
headers.push({ name: 'User-Agent', value: buildUserAgentString(userAgentEntry) });

return { customHeaders: headers };
}

/**
Expand Down Expand Up @@ -282,7 +428,8 @@ export function buildSeaConnectionOptions(options: ConnectionOptions): SeaNative
httpPath: string;
intervalsAsString: boolean;
maxConnections?: number;
} & SeaTlsOptions = {
} & SeaTlsOptions &
SeaHttpOptions = {
hostName: options.host,
httpPath: prependSlash(options.path),
// Match the NodeJS Thrift driver, which surfaces INTERVAL columns as
Expand All @@ -292,9 +439,12 @@ export function buildSeaConnectionOptions(options: ConnectionOptions): SeaNative
// (native Arrow) — they already decode identically to Thrift via the
// shared Arrow converter, so `complexTypesAsJson` is not forced on.
intervalsAsString: true,
// TLS knobs (server-cert verification toggle + custom CA). Validated and
// normalised (string PEM → Buffer) here so the napi shape only sees a Buffer.
// TLS knobs (server-cert verification toggle + custom CA + mTLS client
// identity). Validated and normalised (string PEM → Buffer) here so the
// napi shape only sees a Buffer.
...buildSeaTlsOptions(options),
// HTTP headers (caller `customHeaders` + composed `User-Agent`).
...buildSeaHttpOptions(options),
};

// SEA-only pool sizing; read via cast to match how this function reads the
Expand Down
Loading
Loading