@@ -23,6 +23,145 @@ const { isArrowBigNumSymbol, bigNumToBigInt } = arrowUtils;
2323type ArrowSchema = Schema < TypeMap > ;
2424type ArrowSchemaField = Field < DataType < Type , TypeMap > > ;
2525
26+ /**
27+ * Metadata key carrying the original Arrow `Duration` time unit on
28+ * fields that were rewritten to `Int64` by the SEA IPC pre-processor
29+ * (`lib/sea/SeaArrowIpcDurationFix.ts`). We re-declare the constant
30+ * here (rather than importing it) so the converter has no compile-time
31+ * dependency on the SEA module — it's reused unchanged by the
32+ * thrift-path which has no SEA awareness.
33+ */
34+ const DURATION_UNIT_METADATA_KEY = 'databricks.arrow.duration_unit' ;
35+ const ZERO_BIGINT = BigInt ( 0 ) ;
36+ const NS_PER_MICRO = BigInt ( 1_000 ) ;
37+ const NS_PER_MILLI = BigInt ( 1_000_000 ) ;
38+ const NS_PER_SEC = BigInt ( 1_000_000_000 ) ;
39+ const MS_PER_DAY = BigInt ( 86_400_000 ) ;
40+ const NS_PER_MIN = NS_PER_SEC * BigInt ( 60 ) ;
41+ const NS_PER_HOUR = NS_PER_MIN * BigInt ( 60 ) ;
42+ const NS_PER_DAY = NS_PER_HOUR * BigInt ( 24 ) ;
43+
44+ /**
45+ * Format an Arrow `Interval[YearMonth]` or `Interval[DayTime]` value
46+ * into the canonical thrift string the JDBC/ODBC server emits:
47+ * YEAR-MONTH → `"Y-M"` (e.g. 1 year 2 months → `"1-2"`)
48+ * DAY-TIME → `"D HH:mm:ss.fffffffff"`
49+ * (e.g. 1 day 02:03:04 → `"1 02:03:04.000000000"`)
50+ *
51+ * Arrow surfaces these as `Int32Array(2)` via the `GetVisitor`
52+ * (`apache-arrow/visitor/get.js:177-185`):
53+ * YEAR-MONTH: `[years, months]` (years/months derived from a single
54+ * int32 holding total months)
55+ * DAY-TIME: `[days, milliseconds]` (legacy two-int32 form)
56+ *
57+ * Negative intervals: the FULL interval is emitted with a leading `-`
58+ * (Spark convention), and individual fields are unsigned. We mirror
59+ * Spark's display.
60+ */
61+ function formatArrowInterval ( value : any , valueType : any ) : string {
62+ // `value` is an Int32Array of length 2.
63+ const a = Number ( value [ 0 ] ) ;
64+ const b = Number ( value [ 1 ] ) ;
65+ // unit 0 = YEAR_MONTH, unit 1 = DAY_TIME, unit 2 = MONTH_DAY_NANO
66+ const unit = valueType ?. unit ;
67+ if ( unit === 0 ) {
68+ return formatYearMonth ( a , b ) ;
69+ }
70+ // DAY_TIME: a = days, b = milliseconds (within the day, can be ≥0 or <0)
71+ // We re-normalise: total milliseconds = a * 86_400_000 + b, then split into
72+ // days, hours, minutes, seconds, nanoseconds (nanoseconds is always 0
73+ // because the legacy IntervalDayTime carries only millisecond precision).
74+ const totalMs = BigInt ( a ) * MS_PER_DAY + BigInt ( b ) ;
75+ return formatDayTimeFromTotal ( totalMs * NS_PER_MILLI /* → ns */ , 'NANOSECOND' ) ;
76+ }
77+
78+ /**
79+ * Format the (years, months) decomposition into `"Y-M"` (or `"-Y-M"`
80+ * for negative intervals). Arrow's `getIntervalYearMonth` (in
81+ * `apache-arrow/visitor/get.js:179`) decomposes a signed total-months
82+ * int32 via integer truncation, so years and months always share the
83+ * same sign. We render the absolute values with a single leading `-`
84+ * to match the Spark display format used on the thrift path.
85+ */
86+ function formatYearMonth ( years : number , months : number ) : string {
87+ const total = years * 12 + months ;
88+ if ( total < 0 ) {
89+ const abs = - total ;
90+ const y = Math . trunc ( abs / 12 ) ;
91+ const m = abs % 12 ;
92+ return `-${ y } -${ m } ` ;
93+ }
94+ return `${ years } -${ months } ` ;
95+ }
96+
97+ /**
98+ * Format an Arrow `Duration` value (rewritten by the SEA IPC
99+ * pre-processor to `Int64`) into the thrift INTERVAL DAY-TIME string.
100+ *
101+ * @param value the duration value as `bigint` (signed nanos/micros/
102+ * millis/seconds depending on `unit`)
103+ * @param unit one of `SECOND` / `MILLISECOND` / `MICROSECOND` /
104+ * `NANOSECOND` (the original Arrow time unit, captured
105+ * by `SeaArrowIpcDurationFix.ts`)
106+ */
107+ function formatDurationToIntervalDayTime ( value : bigint | number , unit : string ) : string {
108+ const bi = typeof value === 'bigint' ? value : BigInt ( value ) ;
109+ const nanos = toNanoseconds ( bi , unit ) ;
110+ return formatDayTimeFromTotal ( nanos , unit ) ;
111+ }
112+
113+ /**
114+ * Scale a duration value to nanoseconds based on its unit.
115+ *
116+ * SECOND → ×1_000_000_000
117+ * MILLISECOND → × 1_000_000
118+ * MICROSECOND → × 1_000
119+ * NANOSECOND → × 1
120+ */
121+ function toNanoseconds ( value : bigint , unit : string ) : bigint {
122+ switch ( unit ) {
123+ case 'SECOND' :
124+ return value * NS_PER_SEC ;
125+ case 'MILLISECOND' :
126+ return value * NS_PER_MILLI ;
127+ case 'MICROSECOND' :
128+ return value * NS_PER_MICRO ;
129+ case 'NANOSECOND' :
130+ default :
131+ return value ;
132+ }
133+ }
134+
135+ /**
136+ * Format a signed total-nanoseconds value as `"D HH:mm:ss.fffffffff"`.
137+ * Always emits 9 fractional digits to match the thrift driver's wire
138+ * format (`"1 02:03:04.000000000"` — 9 digits regardless of the
139+ * server-side storage precision). Negative values get a single
140+ * leading `-`.
141+ *
142+ * The `unit` parameter is currently unused for formatting (the value
143+ * is already in nanoseconds by the time we get here) but is retained
144+ * for future use if a unit-aware precision is ever needed.
145+ */
146+ function formatDayTimeFromTotal ( totalNanos : bigint , _unit : string ) : string {
147+ const sign = totalNanos < ZERO_BIGINT ? '-' : '' ;
148+ const abs = totalNanos < ZERO_BIGINT ? - totalNanos : totalNanos ;
149+
150+ const days = abs / NS_PER_DAY ;
151+ let rem = abs % NS_PER_DAY ;
152+ const hours = rem / NS_PER_HOUR ;
153+ rem %= NS_PER_HOUR ;
154+ const minutes = rem / NS_PER_MIN ;
155+ rem %= NS_PER_MIN ;
156+ const seconds = rem / NS_PER_SEC ;
157+ const subSeconds = rem % NS_PER_SEC ;
158+
159+ const pad2 = ( n : bigint ) : string => n . toString ( ) . padStart ( 2 , '0' ) ;
160+ const fraction = `.${ subSeconds . toString ( ) . padStart ( 9 , '0' ) } ` ;
161+
162+ return `${ sign } ${ days . toString ( ) } ${ pad2 ( hours ) } :${ pad2 ( minutes ) } :${ pad2 ( seconds ) } ${ fraction } ` ;
163+ }
164+
26165export default class ArrowResultConverter implements IResultsProvider < Array < any > > {
27166 private readonly context : IClientContext ;
28167
@@ -147,37 +286,52 @@ export default class ArrowResultConverter implements IResultsProvider<Array<any>
147286 private getRows ( schema : ArrowSchema , rows : Array < StructRow | MapRow > ) : Array < any > {
148287 return rows . map ( ( row ) => {
149288 // First, convert native Arrow values to corresponding plain JS objects
150- const record = this . convertArrowTypes ( row , undefined , schema . fields ) ;
289+ const record = this . convertArrowTypes ( row , undefined , schema . fields , undefined ) ;
151290 // Second, cast all the values to original Thrift types
152291 return this . convertThriftTypes ( record ) ;
153292 } ) ;
154293 }
155294
156- private convertArrowTypes ( value : any , valueType : DataType | undefined , fields : Array < ArrowSchemaField > = [ ] ) : any {
295+ private convertArrowTypes (
296+ value : any ,
297+ valueType : DataType | undefined ,
298+ fields : Array < ArrowSchemaField > = [ ] ,
299+ field ?: ArrowSchemaField ,
300+ ) : any {
157301 if ( value === null ) {
158302 return value ;
159303 }
160304
161305 const fieldsMap : Record < string , ArrowSchemaField > = { } ;
162- for ( const field of fields ) {
163- fieldsMap [ field . name ] = field ;
306+ for ( const f of fields ) {
307+ fieldsMap [ f . name ] = f ;
164308 }
165309
166310 // Convert structures to plain JS object and process all its fields recursively
167311 if ( value instanceof StructRow ) {
168312 const result = value . toJSON ( ) ;
169313 for ( const key of Object . keys ( result ) ) {
170- const field : ArrowSchemaField | undefined = fieldsMap [ key ] ;
171- result [ key ] = this . convertArrowTypes ( result [ key ] , field ?. type , field ?. type . children || [ ] ) ;
314+ const childField : ArrowSchemaField | undefined = fieldsMap [ key ] ;
315+ result [ key ] = this . convertArrowTypes (
316+ result [ key ] ,
317+ childField ?. type ,
318+ childField ?. type . children || [ ] ,
319+ childField ,
320+ ) ;
172321 }
173322 return result ;
174323 }
175324 if ( value instanceof MapRow ) {
176325 const result = value . toJSON ( ) ;
177326 // Map type consists of its key and value types. We need only value type here, key will be cast to string anyway
178- const field = fieldsMap . entries ?. type . children . find ( ( item ) => item . name === 'value' ) ;
327+ const valueField = fieldsMap . entries ?. type . children . find ( ( item ) => item . name === 'value' ) ;
179328 for ( const key of Object . keys ( result ) ) {
180- result [ key ] = this . convertArrowTypes ( result [ key ] , field ?. type , field ?. type . children || [ ] ) ;
329+ result [ key ] = this . convertArrowTypes (
330+ result [ key ] ,
331+ valueField ?. type ,
332+ valueField ?. type . children || [ ] ,
333+ valueField ,
334+ ) ;
181335 }
182336 return result ;
183337 }
@@ -186,31 +340,67 @@ export default class ArrowResultConverter implements IResultsProvider<Array<any>
186340 if ( value instanceof Vector ) {
187341 const result = value . toJSON ( ) ;
188342 // Array type contains the only child which defines a type of each array's element
189- const field = fieldsMap . element ;
190- return result . map ( ( item ) => this . convertArrowTypes ( item , field ?. type , field ?. type . children || [ ] ) ) ;
343+ const elementField = fieldsMap . element ;
344+ return result . map ( ( item ) =>
345+ this . convertArrowTypes ( item , elementField ?. type , elementField ?. type . children || [ ] , elementField ) ,
346+ ) ;
191347 }
192348
193349 if ( DataType . isTimestamp ( valueType ) ) {
194350 return new Date ( value ) ;
195351 }
196352
353+ // INTERVAL — Spark/Databricks SEA emits two flavours: native Arrow
354+ // `Interval[YearMonth]` / `Interval[DayTime]` (handled here) and
355+ // `Duration` (transparently rewritten to `Int64` upstream by
356+ // `SeaArrowIpcDurationFix.ts`; handled in the bigint/Int64 branch
357+ // below). In every case we coerce to the canonical thrift string
358+ // form so the SEA path is byte-identical with the thrift path:
359+ // YEAR-MONTH → `"Y-M"`
360+ // DAY-TIME → `"D HH:mm:ss.fffffffff"`
361+ if ( DataType . isInterval ( valueType ) ) {
362+ return formatArrowInterval ( value , valueType ) ;
363+ }
364+
197365 // Convert big number values to BigInt
198366 // Decimals are also represented as big numbers in Arrow, so additionally process them (convert to float)
199367 if ( value instanceof Object && value [ isArrowBigNumSymbol ] ) {
200368 const result = bigNumToBigInt ( value ) ;
201369 if ( DataType . isDecimal ( valueType ) ) {
202370 return Number ( result ) / 10 ** valueType . scale ;
203371 }
372+ // Duration columns rewritten to Int64 — detect via metadata.
373+ const durationUnit = field ?. metadata . get ( DURATION_UNIT_METADATA_KEY ) ;
374+ if ( durationUnit ) {
375+ return formatDurationToIntervalDayTime ( result , durationUnit ) ;
376+ }
204377 return result ;
205378 }
206379
207380 // Convert binary data to Buffer
208381 if ( value instanceof Uint8Array ) {
382+ // INTERVAL DAY-TIME / YEAR-MONTH that apache-arrow surfaced as
383+ // an Int32Array (size 2). `Uint8Array.isInstanceOf` is true for
384+ // every TypedArray subclass, so we have to check the parent type
385+ // first. The `DataType.isInterval` branch above already handles
386+ // the case where Arrow knew the field was an interval — this
387+ // fallback covers schemas where the interval surfaced as bare
388+ // bytes (defensive; not exercised in M0).
209389 return Buffer . from ( value ) ;
210390 }
211391
392+ // Bigint fallback — for raw bigints (not BigNum wrappers), the
393+ // duration_unit metadata also gates the INTERVAL DAY-TIME format.
394+ if ( typeof value === 'bigint' ) {
395+ const durationUnit = field ?. metadata . get ( DURATION_UNIT_METADATA_KEY ) ;
396+ if ( durationUnit ) {
397+ return formatDurationToIntervalDayTime ( value , durationUnit ) ;
398+ }
399+ return Number ( value ) ;
400+ }
401+
212402 // Return other values as is
213- return typeof value === 'bigint' ? Number ( value ) : value ;
403+ return value ;
214404 }
215405
216406 private convertThriftTypes ( record : Record < string , any > ) : any {
0 commit comments