From 3477416576e258d6457b0c2db14c4c8e07433a23 Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Fri, 5 Jun 2026 01:06:20 +0000 Subject: [PATCH 1/2] [SEA-NodeJS] map kernel SqlError to OperationStateError for Thrift parity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Server-reported SQL execution failures (kernel `SqlError` — bad query, missing table, divide-by-zero, invalid cast, param type mismatch) were surfaced as the base `HiveDriverError` on the SEA/kernel path, while the Thrift backend raises `OperationStateError(Error)` when the operation reaches ERROR_STATE. The comparator flagged every error path as a class mismatch (Thrift `OperationStateError` vs SEA `HiveDriverError`), and the Python kernel connector matches Thrift here — so the divergence was in this mapping, not the kernel. Map `SqlError` -> `OperationStateError(OperationStateErrorCode.Error)`, preserving the kernel message. `OperationStateError extends HiveDriverError`, so existing `instanceof HiveDriverError` catches are unaffected. Other code mappings (InvalidArgument -> ParameterError, auth, network, etc.) are unchanged. Verified against the comparator warehouse: all ERROR_PATHS cases (table-not-found, syntax error, unresolved column, divide-by-zero, invalid cast, param mismatch) now match the Thrift backend. Co-authored-by: Isaac Signed-off-by: Madhavendra Rathore --- lib/sea/SeaErrorMapping.ts | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/lib/sea/SeaErrorMapping.ts b/lib/sea/SeaErrorMapping.ts index b17d594a..1dcd693a 100644 --- a/lib/sea/SeaErrorMapping.ts +++ b/lib/sea/SeaErrorMapping.ts @@ -147,6 +147,20 @@ export function mapKernelErrorToJsError(kErr: KernelErrorShape): ErrorWithSqlSta error = new ParameterError(message); break; + case 'SqlError': { + // A server-reported SQL execution failure (kernel `SqlError`, e.g. a + // bad query, missing table, divide-by-zero, invalid cast). The Thrift + // backend surfaces the same situation as `OperationStateError(Error)` + // when the operation reaches ERROR_STATE (see DBSQLOperation), so map + // SqlError to the same class for backend parity. OperationStateError + // extends HiveDriverError, so existing `instanceof HiveDriverError` + // catches are unaffected. + const stateError = new OperationStateError(OperationStateErrorCode.Error); + stateError.message = message; + error = stateError; + break; + } + // All remaining kernel ErrorCode variants map to the base driver error class. // M0 intentionally does not introduce new error classes; M1 may add nuance. case 'NotFound': @@ -156,7 +170,6 @@ export function mapKernelErrorToJsError(kErr: KernelErrorShape): ErrorWithSqlSta case 'Internal': case 'InvalidStatementHandle': case 'NetworkError': - case 'SqlError': error = new HiveDriverError(message); break; From ebb51c0d0bf6263a162bb9f1b2d6b0bab51395bc Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Fri, 5 Jun 2026 01:21:10 +0000 Subject: [PATCH 2/2] [SEA-NodeJS] report INTERVAL columns as STRING_TYPE (Thrift / Python kernel parity) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SEA Arrow→Thrift type synthesis surfaced interval columns with the true INTERVAL_YEAR_MONTH / INTERVAL_DAY_TIME type ids, while the Thrift backend and the Python kernel connector both report interval columns with a STRING type code. The comparator flagged every interval column as a type-code mismatch. Map INTERVAL (via databricks.type_name, the rewritten-duration Int64 path, and the native Arrow interval fallback) to STRING_TYPE. The cell value is already rendered to the canonical interval string ("2-6" / "3 12:30:15.000000000") by ArrowResultConverter, which keys off the Arrow value type — not this synthesized TTypeId — so value formatting is unchanged. Verified against the comparator warehouse: STATEMENT_SELECT / EXTREME_VALUES interval columns now match the Thrift backend (type 7 + identical string value). Signed-off-by: Madhavendra Rathore --- lib/sea/SeaArrowIpc.ts | 23 ++++++++++++++++------- tests/unit/sea/SeaIntervalParity.test.ts | 9 ++++++--- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/lib/sea/SeaArrowIpc.ts b/lib/sea/SeaArrowIpc.ts index 95071895..6e09dcb4 100644 --- a/lib/sea/SeaArrowIpc.ts +++ b/lib/sea/SeaArrowIpc.ts @@ -162,6 +162,12 @@ function arrowTypeToTTypeId(field: Field): TTypeId { return TTypeId.TIMESTAMP_TYPE; case 'DECIMAL': return TTypeId.DECIMAL_TYPE; + // INTERVAL — surface as STRING_TYPE to match the Thrift backend and the + // Python kernel connector, both of which report interval columns with a + // string type code. The cell value is already rendered to the canonical + // interval string (e.g. "2-6" / "3 12:30:15.000000000") by + // ArrowResultConverter, which keys off the Arrow value type (not this + // synthesized TTypeId), so value formatting is unaffected. case 'INTERVAL': case 'INTERVAL DAY': case 'INTERVAL DAY TO HOUR': @@ -173,11 +179,10 @@ function arrowTypeToTTypeId(field: Field): TTypeId { case 'INTERVAL MINUTE': case 'INTERVAL MINUTE TO SECOND': case 'INTERVAL SECOND': - return TTypeId.INTERVAL_DAY_TIME_TYPE; case 'INTERVAL YEAR': case 'INTERVAL YEAR TO MONTH': case 'INTERVAL MONTH': - return TTypeId.INTERVAL_YEAR_MONTH_TYPE; + return TTypeId.STRING_TYPE; case 'ARRAY': return TTypeId.ARRAY_TYPE; case 'MAP': @@ -198,10 +203,12 @@ function arrowTypeToTTypeId(field: Field): TTypeId { if (DataType.isInt(arrowType)) { // Duration columns are rewritten to Int64 with a // `databricks.arrow.duration_unit` metadata marker (see - // `SeaArrowIpcDurationFix.ts`). Surface them as INTERVAL_DAY_TIME - // so the converter formats them back into the thrift string form. + // `SeaArrowIpcDurationFix.ts`). Surface them as STRING_TYPE (matching the + // Thrift backend and Python kernel) — the converter still formats the + // value into the thrift INTERVAL DAY-TIME string via the duration_unit + // metadata, independent of this type code. if (arrowType.bitWidth === 64 && field.metadata.has(DURATION_UNIT_METADATA_KEY)) { - return TTypeId.INTERVAL_DAY_TIME_TYPE; + return TTypeId.STRING_TYPE; } switch (arrowType.bitWidth) { case 8: @@ -233,8 +240,10 @@ function arrowTypeToTTypeId(field: Field): TTypeId { // pairs which the converter formats to thrift's `"Y-M"` / day-time // strings. if (DataType.isInterval(arrowType)) { - // unit 0 = YEAR_MONTH, unit 1 = DAY_TIME, unit 2 = MONTH_DAY_NANO - return arrowType.unit === 0 ? TTypeId.INTERVAL_YEAR_MONTH_TYPE : TTypeId.INTERVAL_DAY_TIME_TYPE; + // Surface native Arrow interval types as STRING_TYPE too (Thrift / Python + // kernel parity). The converter formats the value to the thrift "Y-M" / + // day-time string from the Arrow value, independent of this type code. + return TTypeId.STRING_TYPE; } if (DataType.isList(arrowType)) return TTypeId.ARRAY_TYPE; if (DataType.isMap(arrowType)) return TTypeId.MAP_TYPE; diff --git a/tests/unit/sea/SeaIntervalParity.test.ts b/tests/unit/sea/SeaIntervalParity.test.ts index bee5a5b9..c0cab289 100644 --- a/tests/unit/sea/SeaIntervalParity.test.ts +++ b/tests/unit/sea/SeaIntervalParity.test.ts @@ -403,12 +403,15 @@ describe('SeaOperationBackend — INTERVAL parity with thrift', () => { const backend = new SeaOperationBackend({ statement: stub, context: new ClientContextStub() }); // Round-trip the metadata to confirm we synthesise the right TTypeId. + // Interval columns are surfaced as STRING_TYPE — matching the Thrift + // backend and the Python kernel connector, both of which report interval + // columns with a string type code. The value is still rendered to the + // canonical interval string (asserted below), which is what makes this + // "interval parity with thrift". const metadata = await backend.getResultMetadata(); expect(metadata.schema?.columns?.[0]?.typeDesc.types?.[0]?.primitiveEntry?.type).to.equal( - // INTERVAL_DAY_TIME_TYPE = 30 in TCLIService_types - // We assert by importing the enum below to avoid magic numbers. // eslint-disable-next-line global-require, @typescript-eslint/no-var-requires - require('../../../thrift/TCLIService_types').TTypeId.INTERVAL_DAY_TIME_TYPE, + require('../../../thrift/TCLIService_types').TTypeId.STRING_TYPE, ); const rows = await backend.fetchChunk({ limit: 100 });