Skip to content

api: implement varint, fix timestamp_tz vector type #28

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 93 additions & 5 deletions api/src/DuckDBVector.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os from 'os';
import duckdb from '@duckdb/node-bindings';
import os from 'os';
import { DuckDBLogicalType } from './DuckDBLogicalType';
import {
DuckDBArrayType,
Expand All @@ -25,6 +25,7 @@ import {
DuckDBTimestampMillisecondsType,
DuckDBTimestampNanosecondsType,
DuckDBTimestampSecondsType,
DuckDBTimestampTZType,
DuckDBTimestampType,
DuckDBTinyIntType,
DuckDBType,
Expand All @@ -36,6 +37,7 @@ import {
DuckDBUUIDType,
DuckDBUnionType,
DuckDBVarCharType,
DuckDBVarIntType,
} from './DuckDBType';
import { DuckDBTypeId } from './DuckDBTypeId';

Expand Down Expand Up @@ -122,6 +124,26 @@ function getBuffer(dataView: DataView, offset: number): Buffer | null {
return Buffer.from(stringBytes);
}

function getVarIntFromBytes(bytes: Uint8Array): bigint {
const firstByte = bytes[0];
const positive = (firstByte & 0x80) > 0;
const uint64Mask = positive ? 0n : 0xffffffffffffffffn;
const uint8Mask = positive ? 0 : 0xff;
const dv = new DataView(bytes.buffer, bytes.byteOffset + 3, bytes.byteLength - 3);
const lastUint64Offset = dv.byteLength - 8;
let offset = 0;
let result = 0n;
while (offset <= lastUint64Offset) {
result = (result << 64n) | (dv.getBigUint64(offset) ^ uint64Mask);
offset += 8;
}
while (offset < dv.byteLength) {
result = (result << 8n) | BigInt(dv.getUint8(offset) ^ uint8Mask);
offset += 1;
}
return positive ? result : -result;
}

function getBoolean1(dataView: DataView, offset: number): boolean {
return getUInt8(dataView, offset) !== 0
}
Expand Down Expand Up @@ -360,13 +382,13 @@ export abstract class DuckDBVector<T> {
case DuckDBTypeId.TIME_TZ:
return DuckDBTimeTZVector.fromRawVector(vector, itemCount);
case DuckDBTypeId.TIMESTAMP_TZ:
return DuckDBTimestampVector.fromRawVector(vector, itemCount);
return DuckDBTimestampTZVector.fromRawVector(vector, itemCount);
case DuckDBTypeId.ANY:
throw new Error(`Vector not implemented for ANY type`);
throw new Error(`Invalid vector type: ANY`);
case DuckDBTypeId.VARINT:
return DuckDBBlobVector.fromRawVector(vector, itemCount); // TODO: VARINT
return DuckDBVarIntVector.fromRawVector(vector, itemCount);
case DuckDBTypeId.SQLNULL:
throw new Error(`Vector not implemented for SQLNULL type`);
throw new Error(`Invalid vector type: SQLNULL`);
default:
throw new Error(`Invalid type id: ${vectorType.typeId}`);
}
Expand Down Expand Up @@ -1774,3 +1796,69 @@ export class DuckDBTimeTZVector extends DuckDBVector<DuckDBTimeTZValue> {
return new DuckDBTimeTZVector(this.items.slice(offset, offset + length), this.validity.slice(offset));
}
}

export class DuckDBTimestampTZVector extends DuckDBVector<bigint> {
private readonly items: BigInt64Array;
private readonly validity: DuckDBValidity;
constructor(items: BigInt64Array, validity: DuckDBValidity) {
super();
this.items = items;
this.validity = validity;
}
static fromRawVector(vector: duckdb.Vector, itemCount: number): DuckDBTimestampTZVector {
const data = vectorData(vector, itemCount * BigInt64Array.BYTES_PER_ELEMENT);
const items = new BigInt64Array(data.buffer, data.byteOffset, itemCount);
const validity = DuckDBValidity.fromVector(vector, itemCount);
return new DuckDBTimestampTZVector(items, validity);
}
public override get type(): DuckDBTimestampType {
return DuckDBTimestampTZType.instance;
}
public override get itemCount(): number {
return this.items.length;
}
public override getItem(itemIndex: number): bigint | null { // microseconds
return this.validity.itemValid(itemIndex) ? this.items[itemIndex] : null;
}
public override slice(offset: number, length: number): DuckDBTimestampTZVector {
return new DuckDBTimestampTZVector(this.items.slice(offset, offset + length), this.validity.slice(offset));
}
}

export class DuckDBVarIntVector extends DuckDBVector<bigint> {
private readonly dataView: DataView;
private readonly validity: DuckDBValidity;
private readonly _itemCount: number;
constructor(dataView: DataView, validity: DuckDBValidity, itemCount: number) {
super();
this.dataView = dataView;
this.validity = validity;
this._itemCount = itemCount;
}
static fromRawVector(vector: duckdb.Vector, itemCount: number): DuckDBVarIntVector {
const data = vectorData(vector, itemCount * 16);
const dataView = new DataView(data.buffer, data.byteOffset, data.byteLength);
const validity = DuckDBValidity.fromVector(vector, itemCount);
return new DuckDBVarIntVector(dataView, validity, itemCount);
}
public override get type(): DuckDBVarIntType {
return DuckDBVarIntType.instance;
}
public override get itemCount(): number {
return this._itemCount;
}
public override getItem(itemIndex: number): bigint | null {
if (!this.validity.itemValid(itemIndex)) {
return null;
}
const bytes = getStringBytes(this.dataView, itemIndex * 16);
return bytes ? getVarIntFromBytes(bytes) : null;
}
public override slice(offset: number, length: number): DuckDBVarIntVector {
return new DuckDBVarIntVector(
new DataView(this.dataView.buffer, this.dataView.byteOffset + offset * 16, length * 16),
this.validity.slice(offset),
length,
);
}
}
32 changes: 8 additions & 24 deletions api/test/api.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ import {
DuckDBTimestampSecondsType,
DuckDBTimestampSecondsVector,
DuckDBTimestampTZType,
DuckDBTimestampTZVector,
DuckDBTimestampType,
DuckDBTimestampVector,
DuckDBTinyIntType,
Expand All @@ -85,6 +86,7 @@ import {
DuckDBVarCharType,
DuckDBVarCharVector,
DuckDBVarIntType,
DuckDBVarIntVector,
DuckDBVector,
configurationOptionDescriptions,
version
Expand Down Expand Up @@ -145,7 +147,7 @@ const MinTimeTZOffset = -MaxTimeTZOffset;
const MinTimeTZ = new DuckDBTimeTZValue(MinTimeTZMicroseconds, MaxTimeTZOffset);
const MaxTimeTZ = new DuckDBTimeTZValue(MaxTimeTZMicroseconds, MinTimeTZOffset);
const MinTS_S = BigInt(-9223372022400); // from test_all_types() select epoch(timestamp_s)::bigint;
const MaxTS_S = BigInt(9223372036854);
const MaxTS_S = BigInt( 9223372036854);
const MinTS_MS = MinTS_S * BI_1000;
const MaxTS_MS = (MaxInt64 - BI_1) / BI_1000;
const MinTS_US = MinTS_MS * BI_1000;
Expand All @@ -154,31 +156,13 @@ const TS_US_Inf = MaxInt64;
const MinTS_NS = -9223286400000000000n;
const MaxTS_NS = MaxInt64 - BI_1;
const MinFloat32 = Math.fround(-3.4028235e+38);
const MaxFloat32 = Math.fround(3.4028235e+38);
const MaxFloat32 = Math.fround( 3.4028235e+38);
const MinFloat64 = -Number.MAX_VALUE;
const MaxFloat64 = Number.MAX_VALUE;
const MinUUID = MinInt128;
const MaxUUID = MaxInt128;
const MinVarInt = new Uint8Array([0x7F, 0xFF, 0x7F,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
]);
const MaxVarInt = new Uint8Array([0x80, 0x00, 0x80,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xF8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
]);
const MinVarInt = -179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368n
const MaxVarInt = 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368n;

async function sleep(ms: number): Promise<void> {
return new Promise((resolve) => {
Expand Down Expand Up @@ -544,15 +528,15 @@ describe('api', () => {
assertValues(chunk, 8, DuckDBUSmallIntVector, [MinUInt16, MaxUInt16, null]);
assertValues(chunk, 9, DuckDBUIntegerVector, [MinUInt32, MaxUInt32, null]);
assertValues(chunk, 10, DuckDBUBigIntVector, [MinUInt64, MaxUInt64, null]);
assertValues(chunk, 11, DuckDBBlobVector, [MinVarInt, MaxVarInt, null]);
assertValues(chunk, 11, DuckDBVarIntVector, [MinVarInt, MaxVarInt, null]);
assertValues(chunk, 12, DuckDBDateVector, [MinDate, MaxDate, null]);
assertValues(chunk, 13, DuckDBTimeVector, [MinTime, MaxTime, null]);
assertValues(chunk, 14, DuckDBTimestampVector, [MinTS_US, MaxTS_US, null]);
assertValues(chunk, 15, DuckDBTimestampSecondsVector, [MinTS_S, MaxTS_S, null]);
assertValues(chunk, 16, DuckDBTimestampMillisecondsVector, [MinTS_MS, MaxTS_MS, null]);
assertValues(chunk, 17, DuckDBTimestampNanosecondsVector, [MinTS_NS, MaxTS_NS, null]);
assertValues(chunk, 18, DuckDBTimeTZVector, [MinTimeTZ, MaxTimeTZ, null]);
assertValues(chunk, 19, DuckDBTimestampVector, [MinTS_US, MaxTS_US, null]);
assertValues(chunk, 19, DuckDBTimestampTZVector, [MinTS_US, MaxTS_US, null]);
assertValues(chunk, 20, DuckDBFloatVector, [MinFloat32, MaxFloat32, null]);
assertValues(chunk, 21, DuckDBDoubleVector, [MinFloat64, MaxFloat64, null]);
assertValues(chunk, 22, DuckDBDecimal2Vector, [
Expand Down