From 76dbe78f5a9a80c46dadbb2c0a37ce1ff922c760 Mon Sep 17 00:00:00 2001 From: peancored Date: Wed, 8 Apr 2026 12:14:25 +0100 Subject: [PATCH 1/4] Add useBigIntTimestamp option --- src/batch-type.js | 4 ++-- src/types.ts | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/batch-type.js b/src/batch-type.js index 642a282..e7547f2 100644 --- a/src/batch-type.js +++ b/src/batch-type.js @@ -12,7 +12,7 @@ import { invalidDataType } from './data-types.js'; */ export function batchType(type, options = {}) { const { typeId, bitWidth, mode, precision, unit } = /** @type {any} */(type); - const { useBigInt, useDate, useDecimalInt, useMap, useProxy } = options; + const { useBigInt, useBigIntTimestamp, useDate, useDecimalInt, useMap, useProxy } = options; switch (typeId) { case Type.Null: return NullBatch; @@ -24,7 +24,7 @@ export function batchType(type, options = {}) { case Type.Float: return precision ? DirectBatch : Float16Batch; case Type.Date: - return wrap( + return useBigIntTimestamp ? DirectBatch : wrap( unit === DateUnit.DAY ? DateDayBatch : DateDayMillisecondBatch, useDate && DateBatch ); diff --git a/src/types.ts b/src/types.ts index a02939e..0ed5d4f 100644 --- a/src/types.ts +++ b/src/types.ts @@ -358,6 +358,11 @@ export interface ExtractionOptions { * Otherwise, coerce long integers to JavaScript number values (default). */ useBigInt?: boolean; + /** + * If true, extract 64-bit timestamps as JavaScript `BigInt` values. + * Otherwise, coerce timestamps to float milliseconds. + */ + useBigIntTimestamp?: boolean; /** * If true, extract Arrow 'Map' values as JavaScript `Map` instances. * Otherwise, return an array of [key, value] pairs compatible with From 30ae8391a19fef820c3e2cce157696478a40893e Mon Sep 17 00:00:00 2001 From: peancored Date: Thu, 9 Apr 2026 13:35:41 +0100 Subject: [PATCH 2/4] Update documentation --- README.md | 11 ++++++----- docs/api/data-types.md | 5 +++-- docs/api/index.md | 1 + docs/index.md | 11 ++++++----- package.json | 2 +- 5 files changed, 17 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 0a3e0b0..af684e3 100644 --- a/README.md +++ b/README.md @@ -114,11 +114,12 @@ Data extraction can be customized using options provided to table generation met ```js const table = tableFromIPC(ipc, { - useDate: true, // map dates and timestamps to Date objects - useDecimalInt: true, // use BigInt for decimals, do not coerce to number - useBigInt: true, // use BigInt for 64-bit ints, do not coerce to number - useMap: true, // create Map objects for [key, value] pair lists - useProxy: true // use zero-copy proxies for struct and table row objects + useDate: true, // map dates and timestamps to Date objects + useDecimalInt: true, // use BigInt for decimals, do not coerce to number + useBigInt: true, // use BigInt for 64-bit ints, do not coerce to number + useBigIntTimestamp: true, // use BigInt for timestamps, do not coerce to float + useMap: true, // create Map objects for [key, value] pair lists + useProxy: true // use zero-copy proxies for struct and table row objects }); ``` diff --git a/docs/api/data-types.md b/docs/api/data-types.md index de30239..c7a3adb 100644 --- a/docs/api/data-types.md +++ b/docs/api/data-types.md @@ -21,7 +21,7 @@ The table below provides an overview of all data types supported by the Apache A | 7 | [Decimal](#decimal) | ✅ | ✅ | ✅ | `number`, or scaled integers via the `useDecimalInt` flag | | 8 | [Date](#date) | ✅ | ✅ | ✅ | `number`, or `Date` via the `useDate` flag. | | 9 | [Time](#time) | ✅ | ✅ | ✅ | `number`, or `bigint` for 64-bit values via the `useBigInt` flag | -| 10 | [Timestamp](#timestamp) | ✅ | ✅ | ✅ | `number`, or `Date` via the `useDate` flag. | +| 10 | [Timestamp](#timestamp) | ✅ | ✅ | ✅ | `number`, `bigint` via `useBigIntTimestamp` flag, or `Date` via the `useDate` flag. | | 11 | [Interval](#interval) | ✅ | ✅ | ✅ | depends on the interval unit | | 12 | [List](#list) | ✅ | ✅ | ✅ | `Array` or `TypedArray` of child type | | 13 | [Struct](#struct) | ✅ | ✅ | ✅ | `object`, properties depend on child types | @@ -398,7 +398,8 @@ timeNanosecond() Create a Timestamp data type instance. Timestamp values are 64-bit signed integers representing an elapsed time since a fixed epoch, stored in either of four *unit*s: seconds, milliseconds, microseconds or nanoseconds, and are optionally annotated with a *timezone*. Timestamp values do not include any leap seconds (in other words, all days are considered 86400 seconds long). -Timestamp values are stored in a `BigInt64Array` and converted to millisecond-based JavaScript `number` values (potentially with fractional digits) upon extraction. An error is raised if a value exceeds either `Number.MIN_SAFE_INTEGER` or `Number.MAX_SAFE_INTEGER`. Pass the `useDate` extraction option (e.g., to [`tableFromIPC`](/flechette/api/#tableFromIPC) or [`tableFromArrays`](/flechette/api/#tableFromArrays)) to instead extract timestamp values as JavaScript `Date` objects. +Timestamp values are stored in a `BigInt64Array` and converted to millisecond-based JavaScript `number` values (potentially with fractional digits) upon extraction. An error is raised if a value exceeds either `Number.MIN_SAFE_INTEGER` or `Number.MAX_SAFE_INTEGER`. Pass the `useDate` extraction option (e.g., to [`tableFromIPC`](/flechette/api/#tableFromIPC) or [`tableFromArrays`](/flechette/api/#tableFromArrays)) to instead extract timestamp values as JavaScript `Date` objects. Alternatively, pass the `useBigIntTimestamp` extraction option to extract timestamp values as JavaScript `bigint` (bypass float conversion). + * *unit* (`number`): The time unit, one of `TimeUnit.SECOND`, `TimeUnit.MILLISECOND` (default), `TimeUnit.MICROSECOND`, or `TimeUnit.NANOSECOND`. * *timezone* (`string`): An optional string for the name of a timezone. If provided, the value should either be a string as used in the Olson timezone database (the "tz database" or "tzdata"), such as "America/New_York", or an absolute timezone offset of the form "+XX:XX" or "-XX:XX", such as "+07:30". Whether a timezone string is present indicates different semantics about the data. That said, Flechette does not process the timezone information. diff --git a/docs/api/index.md b/docs/api/index.md index b34b180..47b3e07 100644 --- a/docs/api/index.md +++ b/docs/api/index.md @@ -25,6 +25,7 @@ By default Flechette assumes input data is uncompressed. If input IPC data conta * *data* (`ArrayBuffer` \| `Uint8Array` \| `Uint8Array[]`): The source byte buffer, or an array of buffers. If an array, each byte array may contain one or more self-contained messages. Messages may NOT span multiple byte arrays. * *options* (`ExtractionOptions`): Options for controlling how values are transformed when extracted from an Arrow binary representation. * *useBigInt* (`boolean`): If true, extract 64-bit integers as JavaScript `BigInt` values. Otherwise, coerce long integers to JavaScript number values (default `false`), raising an error if the integer can not be represented as a double precision floating point number. + * *useBigIntTimestamp* (`boolean`): If true, extract timestamps as JavaScript `BigInt` values. Otherwise, coerce timestamps to float milliseconds. * *useDate* (`boolean`): If true, extract dates and timestamps as JavaScript `Date` objects. Otherwise, return numerical timestamp values (default `false`). * *useDecimalInt* (`boolean`): If true, extract decimal-type data as scaled integer values, where fractional digits are scaled to integer positions. Returned integers are `BigInt` values for decimal bit widths of 64 bits or higher and 32-bit integers (as JavaScript `number`) otherwise. If false, decimals are lossily converted to floating-point numbers (default). * *useMap* (`boolean`): If true, extract Arrow 'Map' values as JavaScript `Map` instances. Otherwise, return an array of [key, value] pairs compatible with both `Map` and `Object.fromEntries` (default `false`). diff --git a/docs/index.md b/docs/index.md index 5000bca..d0e8e0f 100644 --- a/docs/index.md +++ b/docs/index.md @@ -114,11 +114,12 @@ Data extraction can be customized using options provided to table generation met ```js const table = tableFromIPC(ipc, { - useDate: true, // map dates and timestamps to Date objects - useDecimalInt: true, // use scaled ints for decimals, not floating point - useBigInt: true, // use BigInt for 64-bit ints, do not coerce to number - useMap: true, // create Map objects for [key, value] pair lists - useProxy: true // use zero-copy proxies for struct and table row objects + useDate: true, // map dates and timestamps to Date objects + useDecimalInt: true, // use scaled ints for decimals, not floating point + useBigInt: true, // use BigInt for 64-bit ints, do not coerce to number + useBigIntTimestamp: true, // use BigInt for timestamps, do not coerce to float + useMap: true, // create Map objects for [key, value] pair lists + useProxy: true // use zero-copy proxies for struct and table row objects }); ``` diff --git a/package.json b/package.json index 781f7ee..619b693 100644 --- a/package.json +++ b/package.json @@ -30,7 +30,7 @@ "postbuild": "npm run types", "lint": "eslint src test", "test:unit": "vitest", - "test": "vitest --run", + "test": "vitest --run", "prepublishOnly": "npm run test && npm run lint && npm run build" }, "devDependencies": { From e9d942d7aa3e60523d14a77f01bce037791e106a Mon Sep 17 00:00:00 2001 From: peancored Date: Thu, 9 Apr 2026 14:10:47 +0100 Subject: [PATCH 3/4] Move the flag to Type.Timestamp from Type.Date --- src/batch-type.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/batch-type.js b/src/batch-type.js index e7547f2..2a05266 100644 --- a/src/batch-type.js +++ b/src/batch-type.js @@ -24,12 +24,12 @@ export function batchType(type, options = {}) { case Type.Float: return precision ? DirectBatch : Float16Batch; case Type.Date: - return useBigIntTimestamp ? DirectBatch : wrap( + return wrap( unit === DateUnit.DAY ? DateDayBatch : DateDayMillisecondBatch, useDate && DateBatch ); case Type.Timestamp: - return wrap( + return useBigIntTimestamp ? DirectBatch : wrap( unit === TimeUnit.SECOND ? TimestampSecondBatch : unit === TimeUnit.MILLISECOND ? TimestampMillisecondBatch : unit === TimeUnit.MICROSECOND ? TimestampMicrosecondBatch From 6787f48fda7a2f0b1d3b030a68e8e8c6a73fc899 Mon Sep 17 00:00:00 2001 From: peancored Date: Thu, 9 Apr 2026 14:56:54 +0100 Subject: [PATCH 4/4] Add tests --- test/column-from-array.test.js | 16 +++++++++++++ test/table-from-ipc.test.js | 6 ++++- test/util/data.js | 42 +++++++++++++++++++++++++++++----- 3 files changed, 57 insertions(+), 7 deletions(-) diff --git a/test/column-from-array.test.js b/test/column-from-array.test.js index ff30e1a..09b64f4 100644 --- a/test/column-from-array.test.js +++ b/test/column-from-array.test.js @@ -179,6 +179,22 @@ describe('columnFromArray', () => { test(ms, timestamp(TimeUnit.MILLISECOND)); test(ms.map(ts => ts + 0.001), timestamp(TimeUnit.MICROSECOND)); test(ms.map(ts => ts + 0.000001), timestamp(TimeUnit.NANOSECOND)); + + // bigint timestamps + const msBigInt = ms.map(BigInt); + test(msBigInt, timestamp(TimeUnit.MILLISECOND), { useBigIntTimestamp: true }); + test([...msBigInt, null], timestamp(TimeUnit.MILLISECOND), { useBigIntTimestamp: true }); + + const secCol = columnFromArray(ms, timestamp(TimeUnit.SECOND), { useBigIntTimestamp: true }); + expect(Array.from(secCol)).toStrictEqual(ms.map(t => BigInt(t) / 1000n)); + + const usMs = ms.map(t => t + 0.001); + const usCol = columnFromArray(usMs, timestamp(TimeUnit.MICROSECOND), { useBigIntTimestamp: true }); + expect(Array.from(usCol)).toStrictEqual(usMs.map(t => BigInt(Math.round(t * 1000)))); + + const ns = [0, 8640000, -8640000]; + const nsCol = columnFromArray(ns, timestamp(TimeUnit.NANOSECOND), { useBigIntTimestamp: true }); + expect(Array.from(nsCol)).toStrictEqual(ns.map(t => BigInt(t) * 1000000n)); }); it('builds interval year-month columns', () => { diff --git a/test/table-from-ipc.test.js b/test/table-from-ipc.test.js index 303fa5f..21b3af6 100644 --- a/test/table-from-ipc.test.js +++ b/test/table-from-ipc.test.js @@ -1,7 +1,7 @@ import { describe, it, expect } from "vitest"; import { readFile } from 'node:fs/promises'; import { tableFromIPC } from '../src/index.js'; -import { binaryView, bool, dateDay, decimal, decimal32, decimal128, decimal256, decimal64, empty, fixedListInt32, fixedListUtf8, float32, float64, int16, int32, int64, int8, intervalMonthDayNano, largeListView, listInt32, listUtf8, listView, map, runEndEncoded32, runEndEncoded64, struct, timestampMicrosecond, timestampMillisecond, timestampNanosecond, timestampSecond, uint16, uint32, uint64, uint8, union, utf8, utf8View } from './util/data.js'; +import { binaryView, bool, dateDay, decimal, decimal32, decimal128, decimal256, decimal64, empty, fixedListInt32, fixedListUtf8, float32, float64, int16, int32, int64, int8, intervalMonthDayNano, largeListView, listInt32, listUtf8, listView, map, runEndEncoded32, runEndEncoded64, struct, timestampMicrosecond, timestampMillisecond, timestampNanosecond, timestampSecond, uint16, uint32, uint64, uint8, union, utf8, utf8View, timestampNanosecondBigInt, timestampMicrosecondBigInt, timestampMillisecondBigInt, timestampSecondBigInt } from './util/data.js'; import { RowIndex } from '../src/util/struct.js'; const toBigInt = v => BigInt(v); @@ -95,6 +95,10 @@ describe('tableFromIPC', () => { it('decodes timestamp microsecond data to dates', () => test(timestampMicrosecond, Array, { useDate: true }, toDate)); it('decodes timestamp millisecond data to dates', () => test(timestampMillisecond, Array, { useDate: true }, toDate)); it('decodes timestamp second data to dates', () => test(timestampSecond, Array, { useDate: true }, toDate)); + it('decodes timestamp nanosecond data to bigint', () => test(timestampNanosecondBigInt, BigInt64Array, { useBigIntTimestamp: true })); + it('decodes timestamp microsecond data to bigint', () => test(timestampMicrosecondBigInt, BigInt64Array, { useBigIntTimestamp: true })); + it('decodes timestamp millisecond data to bigint', () => test(timestampMillisecondBigInt, BigInt64Array, { useBigIntTimestamp: true })); + it('decodes timestamp second data to bigint', () => test(timestampSecondBigInt, BigInt64Array, { useBigIntTimestamp: true })); it('decodes interval year/month/nano data', () => test(intervalMonthDayNano)); diff --git a/test/util/data.js b/test/util/data.js index 5a6ac9d..4bde27f 100644 --- a/test/util/data.js +++ b/test/util/data.js @@ -151,6 +151,36 @@ export function timestampSecond() { return loadData(data, 'timestampSecond', vals); } +export function timestampNanosecondBigInt() { + const ns = [456789n, 738209n]; + const ts = ['1992-09-20T11:30:00.123456789Z', '2002-12-13T07:28:56.564738209Z']; + const data = [ts, ts.concat(null)]; + const vals = data.map(v => v.map((d, i) => d === null ? null : BigInt(+new Date(d)) * 1000000n + ns[i])); + return loadData(data, 'timestampNanosecond', vals); +} + +export function timestampMicrosecondBigInt() { + const us = [457000n, 738000n]; + const ts = ['1992-09-20T11:30:00.123457Z', '2002-12-13T07:28:56.564738Z']; + const data = [ts, ts.concat(null)]; + const vals = data.map(v => v.map((d, i) => d === null ? null : BigInt(+new Date(d)) * 1000000n + us[i])); + return loadData(data, 'timestampMicrosecond', vals); +} + +export function timestampMillisecondBigInt() { + const ts = ['1992-09-20T11:30:00.123Z', '2002-12-13T07:28:56.565Z']; + const data = [ts, ts.concat(null)]; + const vals = data.map(v => v.map(d => d === null ? null : BigInt(+new Date(d)))); + return loadData(data, 'timestampMillisecond', vals); +} + +export function timestampSecondBigInt() { + const ts = ['1992-09-20T11:30:00Z', '2002-12-13T07:28:57Z']; + const data = [ts, ts.concat(null)]; + const vals = data.map(v => v.map(d => d === null ? null : BigInt(+new Date(d)) / 1000n)); + return loadData(data, 'timestampSecond', vals); +} + export function intervalMonthDayNano() { return loadData([ ['2 years', null, '12 years 2 month 1 day 5 seconds', '1 microsecond'] @@ -215,18 +245,18 @@ export function union() { export function map() { return loadData([ [ - new Map([ ['foo', 1], ['bar', 2] ]), - new Map([ ['foo', null], ['baz', 3] ]) + new Map([['foo', 1], ['bar', 2]]), + new Map([['foo', null], ['baz', 3]]) ] ], 'map'); } export function struct() { return loadData([ - [ {a: 1, b: 'foo'}, {a: 2, b: 'baz'} ], - [ {a: 1, b: 'foo'}, null, {a: 2, b: 'baz'} ], - [ {a: null, b: 'foo'}, {a: 2, b: null} ], - [ {a: ['a', 'b'], b: Math.E}, {a: ['c', 'd'], b: Math.PI} ] + [{ a: 1, b: 'foo' }, { a: 2, b: 'baz' }], + [{ a: 1, b: 'foo' }, null, { a: 2, b: 'baz' }], + [{ a: null, b: 'foo' }, { a: 2, b: null }], + [{ a: ['a', 'b'], b: Math.E }, { a: ['c', 'd'], b: Math.PI }] ], 'struct'); }