|
| 1 | +import { DuckDBDataChunk } from './DuckDBDataChunk'; |
| 2 | +import { DuckDBLogicalType } from './DuckDBLogicalType'; |
| 3 | +import { DuckDBResult } from './DuckDBResult'; |
| 4 | +import { DuckDBType } from './DuckDBType'; |
| 5 | +import { DuckDBTypeId } from './DuckDBTypeId'; |
| 6 | +import { DuckDBVector } from './DuckDBVector'; |
| 7 | +import { ResultReturnType, StatementType } from './enums'; |
| 8 | +import { DuckDBValue } from './values'; |
| 9 | + |
| 10 | +interface ChunkSizeRun { |
| 11 | + chunkCount: number; |
| 12 | + chunkSize: number; |
| 13 | + rowCount: number; // Equal to chunkCount * chunkSize; precalculated for efficiency. |
| 14 | +} |
| 15 | + |
| 16 | +export class DuckDBResultReader { |
| 17 | + private readonly result: DuckDBResult; |
| 18 | + private readonly chunks: DuckDBDataChunk[]; |
| 19 | + private readonly chunkSizeRuns: ChunkSizeRun[]; |
| 20 | + private currentRowCount_: number; |
| 21 | + private done_: boolean; |
| 22 | + |
| 23 | + constructor(result: DuckDBResult) { |
| 24 | + this.result = result; |
| 25 | + this.chunks = []; |
| 26 | + this.chunkSizeRuns = []; |
| 27 | + this.currentRowCount_ = 0; |
| 28 | + this.done_ = false; |
| 29 | + } |
| 30 | + |
| 31 | + public get returnType(): ResultReturnType { |
| 32 | + return this.result.returnType; |
| 33 | + } |
| 34 | + public get statementType(): StatementType { |
| 35 | + return this.result.statementType; |
| 36 | + } |
| 37 | + public get columnCount(): number { |
| 38 | + return this.result.columnCount; |
| 39 | + } |
| 40 | + public columnName(columnIndex: number): string { |
| 41 | + return this.result.columnName(columnIndex); |
| 42 | + } |
| 43 | + public columnNames(): string[] { |
| 44 | + return this.result.columnNames(); |
| 45 | + } |
| 46 | + public columnTypeId(columnIndex: number): DuckDBTypeId { |
| 47 | + return this.result.columnTypeId(columnIndex); |
| 48 | + } |
| 49 | + public columnLogicalType(columnIndex: number): DuckDBLogicalType { |
| 50 | + return this.result.columnLogicalType(columnIndex); |
| 51 | + } |
| 52 | + public columnType(columnIndex: number): DuckDBType { |
| 53 | + return this.result.columnType(columnIndex); |
| 54 | + } |
| 55 | + public columnTypes(): DuckDBType[] { |
| 56 | + return this.result.columnTypes(); |
| 57 | + } |
| 58 | + public get rowsChanged(): number { |
| 59 | + return this.result.rowsChanged; |
| 60 | + } |
| 61 | + |
| 62 | + /** Total number of rows read so far. Call `readAll` or `readUntil` to read rows. */ |
| 63 | + public get currentRowCount() { |
| 64 | + return this.currentRowCount_; |
| 65 | + } |
| 66 | + |
| 67 | + /** Whether reading is done, that is, there are no more rows to read. */ |
| 68 | + public get done() { |
| 69 | + return this.done_; |
| 70 | + } |
| 71 | + |
| 72 | + /** |
| 73 | + * Returns the value for the given column and row. Both are zero-indexed. |
| 74 | + * |
| 75 | + * Will return an error if `rowIndex` is greater than `currentRowCount`. |
| 76 | + */ |
| 77 | + public value(columnIndex: number, rowIndex: number): DuckDBValue { |
| 78 | + if (this.currentRowCount_ === 0) { |
| 79 | + throw Error(`No rows have been read`); |
| 80 | + } |
| 81 | + let chunkIndex = 0; |
| 82 | + let currentRowIndex = rowIndex; |
| 83 | + // Find which run of chunks our row is in. |
| 84 | + // Since chunkSizeRuns shouldn't ever be longer than 2, this should be O(1). |
| 85 | + for (const run of this.chunkSizeRuns) { |
| 86 | + if (currentRowIndex < run.rowCount) { |
| 87 | + // The row we're looking for is in this run. |
| 88 | + // Calculate the chunk index and the row index in that chunk. |
| 89 | + chunkIndex += Math.floor(currentRowIndex / run.chunkSize); |
| 90 | + const rowIndexInChunk = currentRowIndex % run.chunkSize; |
| 91 | + const chunk = this.chunks[chunkIndex]; |
| 92 | + return chunk.getColumnVector(columnIndex).getItem(rowIndexInChunk); |
| 93 | + } |
| 94 | + // The row we're looking for is not in this run. |
| 95 | + // Update our counts for this run and move to the next one. |
| 96 | + chunkIndex += run.chunkCount; |
| 97 | + currentRowIndex -= run.rowCount; |
| 98 | + } |
| 99 | + // We didn't find our row. It must have been out of range. |
| 100 | + throw Error( |
| 101 | + `Row index ${rowIndex} requested, but only ${this.currentRowCount_} row have been read so far.`, |
| 102 | + ); |
| 103 | + } |
| 104 | + |
| 105 | + /** Read all rows. */ |
| 106 | + public async readAll(): Promise<void> { |
| 107 | + return this.fetchChunks(); |
| 108 | + } |
| 109 | + |
| 110 | + /** |
| 111 | + * Read rows until at least the given target row count has been met. |
| 112 | + * |
| 113 | + * Note that the resulting row count could be greater than the target, since rows are read in chunks, typically of 2048 rows each. |
| 114 | + */ |
| 115 | + public async readUntil(targetRowCount: number): Promise<void> { |
| 116 | + return this.fetchChunks(targetRowCount); |
| 117 | + } |
| 118 | + |
| 119 | + private async fetchChunks(targetRowCount?: number): Promise<void> { |
| 120 | + while ( |
| 121 | + !( |
| 122 | + this.done_ || |
| 123 | + (targetRowCount !== undefined && this.currentRowCount_ >= targetRowCount) |
| 124 | + ) |
| 125 | + ) { |
| 126 | + const chunk = await this.result.fetchChunk(); |
| 127 | + if (chunk.rowCount > 0) { |
| 128 | + this.updateChunkSizeRuns(chunk); |
| 129 | + this.chunks.push(chunk); |
| 130 | + this.currentRowCount_ += chunk.rowCount; |
| 131 | + } else { |
| 132 | + this.done_ = true; |
| 133 | + } |
| 134 | + } |
| 135 | + } |
| 136 | + |
| 137 | + private updateChunkSizeRuns(chunk: DuckDBDataChunk) { |
| 138 | + if (this.chunkSizeRuns.length > 0) { |
| 139 | + const lastRun = this.chunkSizeRuns[this.chunkSizeRuns.length - 1]; |
| 140 | + if (lastRun.chunkSize === chunk.rowCount) { |
| 141 | + // If the new batch is the same size as the last one, just update our last run. |
| 142 | + lastRun.chunkCount += 1; |
| 143 | + lastRun.rowCount += lastRun.chunkSize; |
| 144 | + return; |
| 145 | + } |
| 146 | + } |
| 147 | + // If this is our first batch, or it's a different size, create a new run. |
| 148 | + this.chunkSizeRuns.push({ |
| 149 | + chunkCount: 1, |
| 150 | + chunkSize: chunk.rowCount, |
| 151 | + rowCount: chunk.rowCount, |
| 152 | + }); |
| 153 | + } |
| 154 | + |
| 155 | + public getColumns(): DuckDBValue[][] { |
| 156 | + if (this.chunks.length === 0) { |
| 157 | + return []; |
| 158 | + } |
| 159 | + const firstChunk = this.chunks[0]; |
| 160 | + const columns: DuckDBValue[][] = []; |
| 161 | + const columnCount = this.columnCount; |
| 162 | + for (let columnIndex = 0; columnIndex < columnCount; columnIndex++) { |
| 163 | + columns.push(firstChunk.getColumnValues(columnIndex)); |
| 164 | + } |
| 165 | + for (let chunkIndex = 1; chunkIndex < this.chunks.length; chunkIndex++) { |
| 166 | + for (let columnIndex = 0; columnIndex < columnCount; columnIndex++) { |
| 167 | + const vector = this.chunks[chunkIndex].getColumnVector(columnIndex); |
| 168 | + for (let itemIndex = 0; itemIndex < vector.itemCount; itemIndex++) { |
| 169 | + columns[columnIndex].push(vector.getItem(itemIndex)); |
| 170 | + } |
| 171 | + } |
| 172 | + } |
| 173 | + return columns; |
| 174 | + } |
| 175 | + |
| 176 | + public getRows(): DuckDBValue[][] { |
| 177 | + const rows: DuckDBValue[][] = []; |
| 178 | + for (const chunk of this.chunks) { |
| 179 | + const chunkVectors: DuckDBVector[] = []; |
| 180 | + const columnCount = chunk.columnCount; |
| 181 | + for (let columnIndex = 0; columnIndex < columnCount; columnIndex++) { |
| 182 | + chunkVectors.push(chunk.getColumnVector(columnIndex)); |
| 183 | + } |
| 184 | + const rowCount = chunk.rowCount; |
| 185 | + for (let rowIndex = 0; rowIndex < rowCount; rowIndex++) { |
| 186 | + const row: DuckDBValue[] = []; |
| 187 | + for (let columnIndex = 0; columnIndex < columnCount; columnIndex++) { |
| 188 | + row.push(chunkVectors[columnIndex].getItem(rowIndex)); |
| 189 | + } |
| 190 | + rows.push(row); |
| 191 | + } |
| 192 | + } |
| 193 | + return rows; |
| 194 | + } |
| 195 | + |
| 196 | +} |
0 commit comments