From 568d95c0f1a68b366f483acb0bed859e4bc27020 Mon Sep 17 00:00:00 2001 From: Jeff Raymakers Date: Sat, 19 Oct 2024 21:49:46 -0700 Subject: [PATCH] api: implement varint, fix timestamp_tz vector type --- api/src/DuckDBVector.ts | 98 ++++++++++++++++++++++++++++++++++++++--- api/test/api.test.ts | 32 ++++---------- 2 files changed, 101 insertions(+), 29 deletions(-) diff --git a/api/src/DuckDBVector.ts b/api/src/DuckDBVector.ts index 26a3dd09..dd3c4a2e 100644 --- a/api/src/DuckDBVector.ts +++ b/api/src/DuckDBVector.ts @@ -1,5 +1,5 @@ -import os from 'os'; import duckdb from '@duckdb/node-bindings'; +import os from 'os'; import { DuckDBLogicalType } from './DuckDBLogicalType'; import { DuckDBArrayType, @@ -25,6 +25,7 @@ import { DuckDBTimestampMillisecondsType, DuckDBTimestampNanosecondsType, DuckDBTimestampSecondsType, + DuckDBTimestampTZType, DuckDBTimestampType, DuckDBTinyIntType, DuckDBType, @@ -36,6 +37,7 @@ import { DuckDBUUIDType, DuckDBUnionType, DuckDBVarCharType, + DuckDBVarIntType, } from './DuckDBType'; import { DuckDBTypeId } from './DuckDBTypeId'; @@ -122,6 +124,26 @@ function getBuffer(dataView: DataView, offset: number): Buffer | null { return Buffer.from(stringBytes); } +function getVarIntFromBytes(bytes: Uint8Array): bigint { + const firstByte = bytes[0]; + const positive = (firstByte & 0x80) > 0; + const uint64Mask = positive ? 0n : 0xffffffffffffffffn; + const uint8Mask = positive ? 0 : 0xff; + const dv = new DataView(bytes.buffer, bytes.byteOffset + 3, bytes.byteLength - 3); + const lastUint64Offset = dv.byteLength - 8; + let offset = 0; + let result = 0n; + while (offset <= lastUint64Offset) { + result = (result << 64n) | (dv.getBigUint64(offset) ^ uint64Mask); + offset += 8; + } + while (offset < dv.byteLength) { + result = (result << 8n) | BigInt(dv.getUint8(offset) ^ uint8Mask); + offset += 1; + } + return positive ? result : -result; +} + function getBoolean1(dataView: DataView, offset: number): boolean { return getUInt8(dataView, offset) !== 0 } @@ -360,13 +382,13 @@ export abstract class DuckDBVector { case DuckDBTypeId.TIME_TZ: return DuckDBTimeTZVector.fromRawVector(vector, itemCount); case DuckDBTypeId.TIMESTAMP_TZ: - return DuckDBTimestampVector.fromRawVector(vector, itemCount); + return DuckDBTimestampTZVector.fromRawVector(vector, itemCount); case DuckDBTypeId.ANY: - throw new Error(`Vector not implemented for ANY type`); + throw new Error(`Invalid vector type: ANY`); case DuckDBTypeId.VARINT: - return DuckDBBlobVector.fromRawVector(vector, itemCount); // TODO: VARINT + return DuckDBVarIntVector.fromRawVector(vector, itemCount); case DuckDBTypeId.SQLNULL: - throw new Error(`Vector not implemented for SQLNULL type`); + throw new Error(`Invalid vector type: SQLNULL`); default: throw new Error(`Invalid type id: ${vectorType.typeId}`); } @@ -1774,3 +1796,69 @@ export class DuckDBTimeTZVector extends DuckDBVector { return new DuckDBTimeTZVector(this.items.slice(offset, offset + length), this.validity.slice(offset)); } } + +export class DuckDBTimestampTZVector extends DuckDBVector { + private readonly items: BigInt64Array; + private readonly validity: DuckDBValidity; + constructor(items: BigInt64Array, validity: DuckDBValidity) { + super(); + this.items = items; + this.validity = validity; + } + static fromRawVector(vector: duckdb.Vector, itemCount: number): DuckDBTimestampTZVector { + const data = vectorData(vector, itemCount * BigInt64Array.BYTES_PER_ELEMENT); + const items = new BigInt64Array(data.buffer, data.byteOffset, itemCount); + const validity = DuckDBValidity.fromVector(vector, itemCount); + return new DuckDBTimestampTZVector(items, validity); + } + public override get type(): DuckDBTimestampType { + return DuckDBTimestampTZType.instance; + } + public override get itemCount(): number { + return this.items.length; + } + public override getItem(itemIndex: number): bigint | null { // microseconds + return this.validity.itemValid(itemIndex) ? this.items[itemIndex] : null; + } + public override slice(offset: number, length: number): DuckDBTimestampTZVector { + return new DuckDBTimestampTZVector(this.items.slice(offset, offset + length), this.validity.slice(offset)); + } +} + +export class DuckDBVarIntVector extends DuckDBVector { + private readonly dataView: DataView; + private readonly validity: DuckDBValidity; + private readonly _itemCount: number; + constructor(dataView: DataView, validity: DuckDBValidity, itemCount: number) { + super(); + this.dataView = dataView; + this.validity = validity; + this._itemCount = itemCount; + } + static fromRawVector(vector: duckdb.Vector, itemCount: number): DuckDBVarIntVector { + const data = vectorData(vector, itemCount * 16); + const dataView = new DataView(data.buffer, data.byteOffset, data.byteLength); + const validity = DuckDBValidity.fromVector(vector, itemCount); + return new DuckDBVarIntVector(dataView, validity, itemCount); + } + public override get type(): DuckDBVarIntType { + return DuckDBVarIntType.instance; + } + public override get itemCount(): number { + return this._itemCount; + } + public override getItem(itemIndex: number): bigint | null { + if (!this.validity.itemValid(itemIndex)) { + return null; + } + const bytes = getStringBytes(this.dataView, itemIndex * 16); + return bytes ? getVarIntFromBytes(bytes) : null; + } + public override slice(offset: number, length: number): DuckDBVarIntVector { + return new DuckDBVarIntVector( + new DataView(this.dataView.buffer, this.dataView.byteOffset + offset * 16, length * 16), + this.validity.slice(offset), + length, + ); + } +} diff --git a/api/test/api.test.ts b/api/test/api.test.ts index f5fa6ab3..2222e0e4 100644 --- a/api/test/api.test.ts +++ b/api/test/api.test.ts @@ -62,6 +62,7 @@ import { DuckDBTimestampSecondsType, DuckDBTimestampSecondsVector, DuckDBTimestampTZType, + DuckDBTimestampTZVector, DuckDBTimestampType, DuckDBTimestampVector, DuckDBTinyIntType, @@ -85,6 +86,7 @@ import { DuckDBVarCharType, DuckDBVarCharVector, DuckDBVarIntType, + DuckDBVarIntVector, DuckDBVector, configurationOptionDescriptions, version @@ -145,7 +147,7 @@ const MinTimeTZOffset = -MaxTimeTZOffset; const MinTimeTZ = new DuckDBTimeTZValue(MinTimeTZMicroseconds, MaxTimeTZOffset); const MaxTimeTZ = new DuckDBTimeTZValue(MaxTimeTZMicroseconds, MinTimeTZOffset); const MinTS_S = BigInt(-9223372022400); // from test_all_types() select epoch(timestamp_s)::bigint; -const MaxTS_S = BigInt(9223372036854); +const MaxTS_S = BigInt( 9223372036854); const MinTS_MS = MinTS_S * BI_1000; const MaxTS_MS = (MaxInt64 - BI_1) / BI_1000; const MinTS_US = MinTS_MS * BI_1000; @@ -154,31 +156,13 @@ const TS_US_Inf = MaxInt64; const MinTS_NS = -9223286400000000000n; const MaxTS_NS = MaxInt64 - BI_1; const MinFloat32 = Math.fround(-3.4028235e+38); -const MaxFloat32 = Math.fround(3.4028235e+38); +const MaxFloat32 = Math.fround( 3.4028235e+38); const MinFloat64 = -Number.MAX_VALUE; const MaxFloat64 = Number.MAX_VALUE; const MinUUID = MinInt128; const MaxUUID = MaxInt128; -const MinVarInt = new Uint8Array([0x7F, 0xFF, 0x7F, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, -]); -const MaxVarInt = new Uint8Array([0x80, 0x00, 0x80, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xF8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -]); +const MinVarInt = -179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368n +const MaxVarInt = 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368n; async function sleep(ms: number): Promise { return new Promise((resolve) => { @@ -544,7 +528,7 @@ describe('api', () => { assertValues(chunk, 8, DuckDBUSmallIntVector, [MinUInt16, MaxUInt16, null]); assertValues(chunk, 9, DuckDBUIntegerVector, [MinUInt32, MaxUInt32, null]); assertValues(chunk, 10, DuckDBUBigIntVector, [MinUInt64, MaxUInt64, null]); - assertValues(chunk, 11, DuckDBBlobVector, [MinVarInt, MaxVarInt, null]); + assertValues(chunk, 11, DuckDBVarIntVector, [MinVarInt, MaxVarInt, null]); assertValues(chunk, 12, DuckDBDateVector, [MinDate, MaxDate, null]); assertValues(chunk, 13, DuckDBTimeVector, [MinTime, MaxTime, null]); assertValues(chunk, 14, DuckDBTimestampVector, [MinTS_US, MaxTS_US, null]); @@ -552,7 +536,7 @@ describe('api', () => { assertValues(chunk, 16, DuckDBTimestampMillisecondsVector, [MinTS_MS, MaxTS_MS, null]); assertValues(chunk, 17, DuckDBTimestampNanosecondsVector, [MinTS_NS, MaxTS_NS, null]); assertValues(chunk, 18, DuckDBTimeTZVector, [MinTimeTZ, MaxTimeTZ, null]); - assertValues(chunk, 19, DuckDBTimestampVector, [MinTS_US, MaxTS_US, null]); + assertValues(chunk, 19, DuckDBTimestampTZVector, [MinTS_US, MaxTS_US, null]); assertValues(chunk, 20, DuckDBFloatVector, [MinFloat32, MaxFloat32, null]); assertValues(chunk, 21, DuckDBDoubleVector, [MinFloat64, MaxFloat64, null]); assertValues(chunk, 22, DuckDBDecimal2Vector, [