Skip to content

Commit ffb7643

Browse files
chenyangfbsdruzkin
authored andcommitted
orc_output_buffer_chunk_size
1 parent 092ba79 commit ffb7643

File tree

5 files changed

+81
-6
lines changed

5 files changed

+81
-6
lines changed

presto-orc/src/main/java/com/facebook/presto/orc/ChunkedSliceOutput.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ public final class ChunkedSliceOutput
3838
extends SliceOutput
3939
{
4040
private static final int INSTANCE_SIZE = ClassLayout.parseClass(ChunkedSliceOutput.class).instanceSize();
41-
private static final int MINIMUM_CHUNK_SIZE = 4096;
41+
private static final int MINIMUM_CHUNK_SIZE = 256;
4242
private static final int MAXIMUM_CHUNK_SIZE = 16 * 1024 * 1024;
4343
// This must not be larger than MINIMUM_CHUNK_SIZE/2
4444
private static final int MAX_UNUSED_BUFFER_SIZE = 128;
@@ -371,8 +371,8 @@ public byte[] get()
371371
{
372372
byte[] buffer;
373373
if (bufferPool.isEmpty()) {
374-
currentSize = min(multiplyExact(currentSize, 2), maxChunkSize);
375374
buffer = new byte[currentSize];
375+
currentSize = min(multiplyExact(currentSize, 2), maxChunkSize);
376376
}
377377
else {
378378
buffer = bufferPool.remove(0);

presto-orc/src/main/java/com/facebook/presto/orc/ColumnWriterOptions.java

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@
2424

2525
import static com.facebook.presto.orc.OrcWriterOptions.DEFAULT_MAX_COMPRESSION_BUFFER_SIZE;
2626
import static com.facebook.presto.orc.OrcWriterOptions.DEFAULT_MAX_FLATTENED_MAP_KEY_COUNT;
27+
import static com.facebook.presto.orc.OrcWriterOptions.DEFAULT_MAX_OUTPUT_BUFFER_CHUNK_SIZE;
2728
import static com.facebook.presto.orc.OrcWriterOptions.DEFAULT_MAX_STRING_STATISTICS_LIMIT;
29+
import static com.facebook.presto.orc.OrcWriterOptions.DEFAULT_MIN_OUTPUT_BUFFER_CHUNK_SIZE;
2830
import static com.facebook.presto.orc.OrcWriterOptions.DEFAULT_PRESERVE_DIRECT_ENCODING_STRIPE_COUNT;
2931
import static com.google.common.base.Preconditions.checkArgument;
3032
import static io.airlift.units.DataSize.Unit.BYTE;
@@ -36,6 +38,8 @@ public class ColumnWriterOptions
3638
private final CompressionKind compressionKind;
3739
private final OptionalInt compressionLevel;
3840
private final int compressionMaxBufferSize;
41+
private final int minOutputBufferChunkSize;
42+
private final int maxOutputBufferChunkSize;
3943
private final DataSize stringStatisticsLimit;
4044
private final boolean integerDictionaryEncodingEnabled;
4145
private final boolean stringDictionarySortingEnabled;
@@ -51,6 +55,8 @@ public ColumnWriterOptions(
5155
CompressionKind compressionKind,
5256
OptionalInt compressionLevel,
5357
DataSize compressionMaxBufferSize,
58+
DataSize minOutputBufferChunkSize,
59+
DataSize maxOutputBufferChunkSize,
5460
DataSize stringStatisticsLimit,
5561
boolean integerDictionaryEncodingEnabled,
5662
boolean stringDictionarySortingEnabled,
@@ -68,6 +74,8 @@ public ColumnWriterOptions(
6874
this.compressionKind = requireNonNull(compressionKind, "compressionKind is null");
6975
this.compressionLevel = requireNonNull(compressionLevel, "compressionLevel is null");
7076
this.compressionMaxBufferSize = toIntExact(compressionMaxBufferSize.toBytes());
77+
this.minOutputBufferChunkSize = toIntExact(minOutputBufferChunkSize.toBytes());
78+
this.maxOutputBufferChunkSize = toIntExact(maxOutputBufferChunkSize.toBytes());
7179
this.stringStatisticsLimit = requireNonNull(stringStatisticsLimit, "stringStatisticsLimit is null");
7280
this.integerDictionaryEncodingEnabled = integerDictionaryEncodingEnabled;
7381
this.stringDictionarySortingEnabled = stringDictionarySortingEnabled;
@@ -95,6 +103,16 @@ public int getCompressionMaxBufferSize()
95103
return compressionMaxBufferSize;
96104
}
97105

106+
public int getMinOutputBufferChunkSize()
107+
{
108+
return minOutputBufferChunkSize;
109+
}
110+
111+
public int getMaxOutputBufferChunkSize()
112+
{
113+
return maxOutputBufferChunkSize;
114+
}
115+
98116
public int getStringStatisticsLimit()
99117
{
100118
return toIntExact(stringStatisticsLimit.toBytes());
@@ -162,6 +180,8 @@ public Builder toBuilder()
162180
.setCompressionKind(getCompressionKind())
163181
.setCompressionLevel(getCompressionLevel())
164182
.setCompressionMaxBufferSize(new DataSize(getCompressionMaxBufferSize(), BYTE))
183+
.setMinOutputBufferChunkSize(new DataSize(getMinOutputBufferChunkSize(), BYTE))
184+
.setMaxOutputBufferChunkSize(new DataSize(getMaxOutputBufferChunkSize(), BYTE))
165185
.setStringStatisticsLimit(new DataSize(getStringStatisticsLimit(), BYTE))
166186
.setIntegerDictionaryEncodingEnabled(isIntegerDictionaryEncodingEnabled())
167187
.setStringDictionarySortingEnabled(isStringDictionarySortingEnabled())
@@ -184,6 +204,8 @@ public static class Builder
184204
private CompressionKind compressionKind;
185205
private OptionalInt compressionLevel = OptionalInt.empty();
186206
private DataSize compressionMaxBufferSize = DEFAULT_MAX_COMPRESSION_BUFFER_SIZE;
207+
private DataSize minOutputBufferChunkSize = DEFAULT_MIN_OUTPUT_BUFFER_CHUNK_SIZE;
208+
private DataSize maxOutputBufferChunkSize = DEFAULT_MAX_OUTPUT_BUFFER_CHUNK_SIZE;
187209
private DataSize stringStatisticsLimit = DEFAULT_MAX_STRING_STATISTICS_LIMIT;
188210
private boolean integerDictionaryEncodingEnabled;
189211
private boolean stringDictionarySortingEnabled = true;
@@ -215,6 +237,18 @@ public Builder setCompressionMaxBufferSize(DataSize compressionMaxBufferSize)
215237
return this;
216238
}
217239

240+
public Builder setMinOutputBufferChunkSize(DataSize minOutputBufferChunkSize)
241+
{
242+
this.minOutputBufferChunkSize = minOutputBufferChunkSize;
243+
return this;
244+
}
245+
246+
public Builder setMaxOutputBufferChunkSize(DataSize maxOutputBufferChunkSize)
247+
{
248+
this.maxOutputBufferChunkSize = maxOutputBufferChunkSize;
249+
return this;
250+
}
251+
218252
public Builder setStringStatisticsLimit(DataSize stringStatisticsLimit)
219253
{
220254
this.stringStatisticsLimit = stringStatisticsLimit;
@@ -281,6 +315,8 @@ public ColumnWriterOptions build()
281315
compressionKind,
282316
compressionLevel,
283317
compressionMaxBufferSize,
318+
minOutputBufferChunkSize,
319+
maxOutputBufferChunkSize,
284320
stringStatisticsLimit,
285321
integerDictionaryEncodingEnabled,
286322
stringDictionarySortingEnabled,

presto-orc/src/main/java/com/facebook/presto/orc/OrcOutputBuffer.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,9 @@ public class OrcOutputBuffer
5353
private static final int INSTANCE_SIZE = ClassLayout.parseClass(OrcOutputBuffer.class).instanceSize();
5454
private static final int PAGE_HEADER_SIZE = 3; // ORC spec 3 byte header
5555
private static final int INITIAL_BUFFER_SIZE = 256;
56-
private static final int MINIMUM_OUTPUT_BUFFER_CHUNK_SIZE = 4 * 1024;
57-
private static final int MAXIMUM_OUTPUT_BUFFER_CHUNK_SIZE = 1024 * 1024;
58-
5956
private final int maxBufferSize;
57+
private final int minOutputBufferChunkSize;
58+
private final int maxOutputBufferChunkSize;
6059
private final int minCompressibleSize;
6160

6261
private final CompressionBufferPool compressionBufferPool;
@@ -86,6 +85,8 @@ public OrcOutputBuffer(ColumnWriterOptions columnWriterOptions, Optional<DwrfDat
8685

8786
CompressionKind compressionKind = columnWriterOptions.getCompressionKind();
8887
this.maxBufferSize = compressionKind == CompressionKind.NONE ? maxBufferSize : maxBufferSize - PAGE_HEADER_SIZE;
88+
this.minOutputBufferChunkSize = columnWriterOptions.getMinOutputBufferChunkSize();
89+
this.maxOutputBufferChunkSize = columnWriterOptions.getMaxOutputBufferChunkSize();
8990
this.minCompressibleSize = compressionKind.getMinCompressibleSize();
9091

9192
this.buffer = new byte[INITIAL_BUFFER_SIZE];
@@ -470,7 +471,7 @@ private void flushBufferToOutputStream()
470471
private void initCompressedOutputStream()
471472
{
472473
checkState(compressedOutputStream == null, "compressedOutputStream is already initialized");
473-
compressedOutputStream = new ChunkedSliceOutput(MINIMUM_OUTPUT_BUFFER_CHUNK_SIZE, MAXIMUM_OUTPUT_BUFFER_CHUNK_SIZE);
474+
compressedOutputStream = new ChunkedSliceOutput(minOutputBufferChunkSize, maxOutputBufferChunkSize);
474475
}
475476

476477
private void writeChunkToOutputStream(byte[] chunk, int offset, int length)

presto-orc/src/main/java/com/facebook/presto/orc/OrcWriter.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,8 @@ public OrcWriter(
224224
.setCompressionKind(compressionKind)
225225
.setCompressionLevel(options.getCompressionLevel())
226226
.setCompressionMaxBufferSize(options.getMaxCompressionBufferSize())
227+
.setMinOutputBufferChunkSize(options.getMinOutputBufferChunkSize())
228+
.setMaxOutputBufferChunkSize(options.getMaxOutputBufferChunkSize())
227229
.setStringStatisticsLimit(options.getMaxStringStatisticsLimit())
228230
.setIntegerDictionaryEncodingEnabled(options.isIntegerDictionaryEncodingEnabled())
229231
.setStringDictionarySortingEnabled(options.isStringDictionarySortingEnabled())

presto-orc/src/main/java/com/facebook/presto/orc/OrcWriterOptions.java

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ public class OrcWriterOptions
4040
public static final DataSize DEFAULT_DICTIONARY_USEFUL_CHECK_COLUMN_SIZE = new DataSize(6, MEGABYTE);
4141
public static final DataSize DEFAULT_MAX_STRING_STATISTICS_LIMIT = new DataSize(64, BYTE);
4242
public static final DataSize DEFAULT_MAX_COMPRESSION_BUFFER_SIZE = new DataSize(256, KILOBYTE);
43+
public static final DataSize DEFAULT_MIN_OUTPUT_BUFFER_CHUNK_SIZE = new DataSize(8, KILOBYTE);
44+
public static final DataSize DEFAULT_MAX_OUTPUT_BUFFER_CHUNK_SIZE = new DataSize(1024, KILOBYTE);
4345
public static final DataSize DEFAULT_DWRF_STRIPE_CACHE_MAX_SIZE = new DataSize(8, MEGABYTE);
4446
public static final DwrfStripeCacheMode DEFAULT_DWRF_STRIPE_CACHE_MODE = INDEX_AND_FOOTER;
4547
public static final int DEFAULT_PRESERVE_DIRECT_ENCODING_STRIPE_COUNT = 0;
@@ -56,6 +58,8 @@ public class OrcWriterOptions
5658
private final DataSize dictionaryUsefulCheckColumnSize;
5759
private final DataSize maxStringStatisticsLimit;
5860
private final DataSize maxCompressionBufferSize;
61+
private final DataSize minOutputBufferChunkSize;
62+
private final DataSize maxOutputBufferChunkSize;
5963
private final OptionalInt compressionLevel;
6064
private final StreamLayoutFactory streamLayoutFactory;
6165
private final boolean integerDictionaryEncodingEnabled;
@@ -85,6 +89,8 @@ private OrcWriterOptions(
8589
DataSize dictionaryUsefulCheckColumnSize,
8690
DataSize maxStringStatisticsLimit,
8791
DataSize maxCompressionBufferSize,
92+
DataSize minOutputBufferChunkSize,
93+
DataSize maxOutputBufferChunkSize,
8894
OptionalInt compressionLevel,
8995
StreamLayoutFactory streamLayoutFactory,
9096
boolean integerDictionaryEncodingEnabled,
@@ -104,6 +110,8 @@ private OrcWriterOptions(
104110
requireNonNull(dictionaryUsefulCheckColumnSize, "dictionaryUsefulCheckColumnSize is null");
105111
requireNonNull(maxStringStatisticsLimit, "maxStringStatisticsLimit is null");
106112
requireNonNull(maxCompressionBufferSize, "maxCompressionBufferSize is null");
113+
requireNonNull(minOutputBufferChunkSize, "minOutputBufferChunkSize is null");
114+
requireNonNull(maxOutputBufferChunkSize, "maxOutputBufferChunkSize is null");
107115
requireNonNull(compressionLevel, "compressionLevel is null");
108116
requireNonNull(streamLayoutFactory, "streamLayoutFactory is null");
109117
requireNonNull(dwrfWriterOptions, "dwrfWriterOptions is null");
@@ -118,6 +126,8 @@ private OrcWriterOptions(
118126
this.dictionaryUsefulCheckColumnSize = dictionaryUsefulCheckColumnSize;
119127
this.maxStringStatisticsLimit = maxStringStatisticsLimit;
120128
this.maxCompressionBufferSize = maxCompressionBufferSize;
129+
this.minOutputBufferChunkSize = minOutputBufferChunkSize;
130+
this.maxOutputBufferChunkSize = maxOutputBufferChunkSize;
121131
this.compressionLevel = compressionLevel;
122132
this.streamLayoutFactory = streamLayoutFactory;
123133
this.integerDictionaryEncodingEnabled = integerDictionaryEncodingEnabled;
@@ -171,6 +181,16 @@ public DataSize getMaxCompressionBufferSize()
171181
return maxCompressionBufferSize;
172182
}
173183

184+
public DataSize getMinOutputBufferChunkSize()
185+
{
186+
return minOutputBufferChunkSize;
187+
}
188+
189+
public DataSize getMaxOutputBufferChunkSize()
190+
{
191+
return maxOutputBufferChunkSize;
192+
}
193+
174194
public OptionalInt getCompressionLevel()
175195
{
176196
return compressionLevel;
@@ -272,6 +292,8 @@ public static class Builder
272292
private DataSize dictionaryUsefulCheckColumnSize = DEFAULT_DICTIONARY_USEFUL_CHECK_COLUMN_SIZE;
273293
private DataSize maxStringStatisticsLimit = DEFAULT_MAX_STRING_STATISTICS_LIMIT;
274294
private DataSize maxCompressionBufferSize = DEFAULT_MAX_COMPRESSION_BUFFER_SIZE;
295+
private DataSize minOutputBufferChunkSize = DEFAULT_MIN_OUTPUT_BUFFER_CHUNK_SIZE;
296+
private DataSize maxOutputBufferChunkSize = DEFAULT_MAX_OUTPUT_BUFFER_CHUNK_SIZE;
275297
private OptionalInt compressionLevel = OptionalInt.empty();
276298
private StreamLayoutFactory streamLayoutFactory = new ColumnSizeLayoutFactory();
277299
private boolean integerDictionaryEncodingEnabled = DEFAULT_INTEGER_DICTIONARY_ENCODING_ENABLED;
@@ -336,6 +358,18 @@ public Builder withMaxCompressionBufferSize(DataSize maxCompressionBufferSize)
336358
return this;
337359
}
338360

361+
public Builder withMinOutputBufferChunkSize(DataSize minOutputBufferChunkSize)
362+
{
363+
this.minOutputBufferChunkSize = requireNonNull(minOutputBufferChunkSize, "minOutputBufferChunkSize is null");
364+
return this;
365+
}
366+
367+
public Builder withMaxOutputBufferChunkSize(DataSize maxOutputBufferChunkSize)
368+
{
369+
this.maxOutputBufferChunkSize = requireNonNull(maxOutputBufferChunkSize, "maxOutputBufferChunkSize is null");
370+
return this;
371+
}
372+
339373
public Builder withCompressionLevel(OptionalInt compressionLevel)
340374
{
341375
this.compressionLevel = requireNonNull(compressionLevel, "compressionLevel is null");
@@ -433,6 +467,8 @@ public OrcWriterOptions build()
433467
dictionaryUsefulCheckColumnSize,
434468
maxStringStatisticsLimit,
435469
maxCompressionBufferSize,
470+
minOutputBufferChunkSize,
471+
maxOutputBufferChunkSize,
436472
compressionLevel,
437473
streamLayoutFactory,
438474
integerDictionaryEncodingEnabled,

0 commit comments

Comments
 (0)