16
16
#include < algorithm>
17
17
18
18
#include " velox/core/PlanNode.h"
19
+ #include " velox/exec/tests/utils/AssertQueryBuilder.h"
20
+ #include " velox/exec/tests/utils/OperatorTestBase.h"
19
21
#include " velox/vector/BaseVector.h"
20
22
#include " velox/vector/tests/utils/VectorMaker.h"
21
23
#include " velox/vector/tests/utils/VectorTestBase.h"
24
26
25
27
namespace facebook ::presto::operators::test {
26
28
namespace {
29
+ // return -1 if key1 < key2, 0 if key1 == key2, 1 if key1 > key 2
30
+ int lexicographicalCompare (std::string key1, std::string key2) {
31
+ // doing unsinged byte comparison following the Cosco test suite's semantic.
32
+ const auto begin1 = reinterpret_cast <unsigned char *>(key1.data ());
33
+ const auto end1 = begin1 + key1.size ();
34
+ const auto begin2 = reinterpret_cast <unsigned char *>(key2.data ());
35
+ const auto end2 = begin2 + key2.size ();
36
+ bool lessThan = std::lexicographical_compare (begin1, end1, begin2, end2);
37
+
38
+ bool equal = std::equal (begin1, end1, begin2, end2);
39
+
40
+ return lessThan ? -1 : (equal ? 0 : 1 );
41
+ }
42
+
43
+ void serializeRow (
44
+ BinarySortableSerializer binarySortableSerializer,
45
+ size_t index,
46
+ velox::StringVectorBuffer* out) {
47
+ binarySortableSerializer.serialize (/* rowId=*/ index, out);
48
+ out->flushRow (index);
49
+ }
27
50
28
51
class BinarySortableSerializerTest : public ::testing::Test,
29
52
public velox::test::VectorTestBase {
@@ -32,28 +55,6 @@ class BinarySortableSerializerTest : public ::testing::Test,
32
55
velox::memory::MemoryManager::testingSetInstance ({});
33
56
}
34
57
35
- // return -1 if key1 < key2, 0 if key1 == key2, 1 if key1 > key 2
36
- int lexicographicalCompare (std::string key1, std::string key2) {
37
- // doing unsinged byte comparison following the Cosco test suite's semantic.
38
- auto begin1 = reinterpret_cast <unsigned char *>(key1.data ());
39
- auto end1 = begin1 + key1.size ();
40
- auto begin2 = reinterpret_cast <unsigned char *>(key2.data ());
41
- auto end2 = begin2 + key2.size ();
42
- bool lessThan = std::lexicographical_compare (begin1, end1, begin2, end2);
43
-
44
- bool equal = std::equal (begin1, end1, begin2, end2);
45
-
46
- return lessThan ? -1 : (equal ? 0 : 1 );
47
- }
48
-
49
- void serializeRow (
50
- BinarySortableSerializer binarySortableSerializer,
51
- size_t index,
52
- velox::StringVectorBuffer* out) {
53
- binarySortableSerializer.serialize (/* rowId=*/ index, out);
54
- out->flushRow (index);
55
- }
56
-
57
58
int compareRowVector (
58
59
const velox::RowVectorPtr& rowVector,
59
60
const std::vector<
@@ -136,8 +137,148 @@ class BinarySortableSerializerTest : public ::testing::Test,
136
137
std::unique_ptr<velox::StreamArena> streamArena_ =
137
138
std::make_unique<velox::StreamArena>(pool_.get());
138
139
};
140
+
141
+ class BinarySortableSerializerFuzzerTest : public ::testing::Test,
142
+ public velox::test::VectorTestBase {
143
+ protected:
144
+ static void SetUpTestCase () {
145
+ velox::memory::MemoryManager::testingSetInstance ({});
146
+ }
147
+
148
+ void runFuzzerTest (const velox::RowTypePtr& rowType) {
149
+ const auto seed = 1 ;
150
+ // Create a random number generator and seed it
151
+ std::mt19937 rng (seed);
152
+
153
+ const boost::random::uniform_int_distribution<int > distribution (
154
+ 0 , sortingOrders_.size () - 1 );
155
+
156
+ // Generate random sort ordering
157
+ std::vector<velox::core::SortOrder> testOrdering;
158
+ for (uint32_t i = 0 ; i < rowType->size (); ++i) {
159
+ const int randomIndex = distribution (rng);
160
+ testOrdering.push_back (sortingOrders_[randomIndex]);
161
+ }
162
+
163
+ const auto rowVector = makeData (rowType);
164
+ const auto fields = getFields (rowType);
165
+
166
+ ensureSorted (rowVector, fields, testOrdering);
167
+ }
168
+
169
+ bool sortedAfterSerialization (
170
+ const velox::RowVectorPtr& rowVector,
171
+ const std::vector<
172
+ std::shared_ptr<const velox::core::FieldAccessTypedExpr>>& fields,
173
+ const std::vector<velox::core::SortOrder>& ordering,
174
+ int vectorSize) {
175
+ BinarySortableSerializer binarySortableSerializer (
176
+ rowVector, ordering, fields);
177
+
178
+ auto vec = velox::BaseVector::create<velox::FlatVector<velox::StringView>>(
179
+ velox::VARBINARY (), vectorSize, pool_.get ());
180
+ // Create a ResizableVectorBuffer with initial and max capacity.
181
+ velox::StringVectorBuffer buffer (vec.get (), 1024 , 1 << 20 );
182
+ for (size_t i = 0 ; i < vectorSize; ++i) {
183
+ serializeRow (binarySortableSerializer, /* index=*/ i, &buffer);
184
+ }
185
+
186
+ for (velox::vector_size_t i = 0 ; i < vec->size () - 1 ; ++i) {
187
+ if (lexicographicalCompare (vec->valueAt (i), vec->valueAt (i + 1 )) > 0 ) {
188
+ return false ;
189
+ }
190
+ }
191
+ return true ; // ensure all elements are in non-descending order
192
+ }
193
+
194
+ void ensureSorted (
195
+ const velox::RowVectorPtr& input,
196
+ const std::vector<
197
+ std::shared_ptr<const velox::core::FieldAccessTypedExpr>>& keys,
198
+ const std::vector<velox::core::SortOrder>& ordering) {
199
+ const auto planNode = std::make_shared<velox::core::OrderByNode>(
200
+ " orderBy" ,
201
+ keys,
202
+ ordering,
203
+ false , // isPartial
204
+ std::make_shared<velox::core::ValuesNode>(
205
+ " values" , std::vector<velox::RowVectorPtr>{input}));
206
+
207
+ velox::exec::test::AssertQueryBuilder builder (planNode);
208
+ const auto sortedVector = builder.copyResults (pool_.get ());
209
+ // Ensure that sorting order is preserved after serialization.
210
+ EXPECT_TRUE (sortedAfterSerialization (
211
+ sortedVector, keys, ordering, sortedVector->size ()));
212
+ }
213
+
214
+ velox::RowVectorPtr makeData (const velox::RowTypePtr& rowType) {
215
+ velox::VectorFuzzer::Options options;
216
+ options.vectorSize = 1'000 ;
217
+ options.allowDictionaryVector = true ;
218
+
219
+ const auto seed = 1 ; // For reproducibility.
220
+ velox::VectorFuzzer fuzzer (options, pool_.get (), seed);
221
+
222
+ return fuzzer.fuzzInputRow (rowType);
223
+ }
224
+
225
+ std::vector<std::shared_ptr<const velox::core::FieldAccessTypedExpr>>
226
+ getFields (const velox::RowTypePtr& rowType) {
227
+ std::vector<std::shared_ptr<const velox::core::FieldAccessTypedExpr>>
228
+ fieldAccessExprs;
229
+ for (const auto & fieldName : rowType->names ()) {
230
+ auto fieldExpr = std::make_shared<velox::core::FieldAccessTypedExpr>(
231
+ rowType->findChild (fieldName), fieldName);
232
+ fieldAccessExprs.push_back (std::move (fieldExpr));
233
+ }
234
+ return fieldAccessExprs;
235
+ }
236
+
237
+ const std::vector<velox::core::SortOrder> sortingOrders_ = {
238
+ velox::core::kAscNullsFirst ,
239
+ velox::core::kAscNullsLast ,
240
+ velox::core::kDescNullsFirst ,
241
+ velox::core::kDescNullsLast };
242
+ std::shared_ptr<velox::memory::MemoryPool> pool_ =
243
+ velox::memory::deprecatedAddDefaultLeafMemoryPool ();
244
+ velox::test::VectorMaker vectorMaker_{pool_.get ()};
245
+ };
139
246
} // namespace
140
247
248
+ TEST_F (BinarySortableSerializerFuzzerTest, fuzzerTestString) {
249
+ const auto rowType = velox::ROW ({" c1" , " c2" }, {velox::BIGINT (), velox::VARCHAR ()});
250
+ runFuzzerTest (rowType);
251
+ }
252
+
253
+ TEST_F (BinarySortableSerializerFuzzerTest, fuzzerTestArray) {
254
+ const auto rowType = velox::ROW (
255
+ {" c1" , " c2" }, {velox::BIGINT (), velox::ARRAY (velox::BIGINT ())});
256
+ runFuzzerTest (rowType);
257
+ }
258
+
259
+ TEST_F (BinarySortableSerializerFuzzerTest, fuzzerTestStruct) {
260
+ const auto rowType = velox::ROW (
261
+ {" c1" , " c2" },
262
+ {velox::BIGINT (),
263
+ velox::ROW (
264
+ {velox::BIGINT (),
265
+ velox::DOUBLE (),
266
+ velox::BOOLEAN (),
267
+ velox::TINYINT (),
268
+ velox::REAL ()})});
269
+ runFuzzerTest (rowType);
270
+ }
271
+
272
+ TEST_F (BinarySortableSerializerFuzzerTest, fuzzerTestNestedStruct) {
273
+ const auto innerRowType = velox::ROW (
274
+ {velox::BIGINT (),
275
+ velox::DOUBLE (),
276
+ velox::BOOLEAN (),
277
+ velox::ROW ({velox::TINYINT (), velox::REAL ()})});
278
+ const auto rowType = velox::ROW ({" c0" , " c1" }, {velox::BIGINT (), innerRowType});
279
+ runFuzzerTest (rowType);
280
+ }
281
+
141
282
TEST_F (BinarySortableSerializerTest, LongTypeAllFields) {
142
283
auto ordering = {
143
284
velox::core::SortOrder (velox::core::kAscNullsFirst ),
0 commit comments