Skip to content

Commit 7a97ef8

Browse files
emilysun201309facebook-github-bot
authored andcommitted
Use VectorFuzzer for BinarySortableSerializer test (prestodb#24954)
Summary: - Use VectorFuzzer for test so that we can generate larger input data. - These tests work by first using OrderBy operator in Velox to sort the input data. Then serialize the sorted result, and ensure that the sorting order is preserved after serialization. Reviewed By: xiaoxmeng Differential Revision: D73075362
1 parent ee6bc00 commit 7a97ef8

File tree

1 file changed

+163
-22
lines changed

1 file changed

+163
-22
lines changed

presto-native-execution/presto_cpp/main/operators/tests/BinarySortableSerializerTest.cpp

Lines changed: 163 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
#include <algorithm>
1717

1818
#include "velox/core/PlanNode.h"
19+
#include "velox/exec/tests/utils/AssertQueryBuilder.h"
20+
#include "velox/exec/tests/utils/OperatorTestBase.h"
1921
#include "velox/vector/BaseVector.h"
2022
#include "velox/vector/tests/utils/VectorMaker.h"
2123
#include "velox/vector/tests/utils/VectorTestBase.h"
@@ -24,6 +26,27 @@
2426

2527
namespace facebook::presto::operators::test {
2628
namespace {
29+
// return -1 if key1 < key2, 0 if key1 == key2, 1 if key1 > key 2
30+
int lexicographicalCompare(std::string key1, std::string key2) {
31+
// doing unsinged byte comparison following the Cosco test suite's semantic.
32+
const auto begin1 = reinterpret_cast<unsigned char*>(key1.data());
33+
const auto end1 = begin1 + key1.size();
34+
const auto begin2 = reinterpret_cast<unsigned char*>(key2.data());
35+
const auto end2 = begin2 + key2.size();
36+
bool lessThan = std::lexicographical_compare(begin1, end1, begin2, end2);
37+
38+
bool equal = std::equal(begin1, end1, begin2, end2);
39+
40+
return lessThan ? -1 : (equal ? 0 : 1);
41+
}
42+
43+
void serializeRow(
44+
BinarySortableSerializer binarySortableSerializer,
45+
size_t index,
46+
velox::StringVectorBuffer* out) {
47+
binarySortableSerializer.serialize(/*rowId=*/index, out);
48+
out->flushRow(index);
49+
}
2750

2851
class BinarySortableSerializerTest : public ::testing::Test,
2952
public velox::test::VectorTestBase {
@@ -32,28 +55,6 @@ class BinarySortableSerializerTest : public ::testing::Test,
3255
velox::memory::MemoryManager::testingSetInstance({});
3356
}
3457

35-
// return -1 if key1 < key2, 0 if key1 == key2, 1 if key1 > key 2
36-
int lexicographicalCompare(std::string key1, std::string key2) {
37-
// doing unsinged byte comparison following the Cosco test suite's semantic.
38-
auto begin1 = reinterpret_cast<unsigned char*>(key1.data());
39-
auto end1 = begin1 + key1.size();
40-
auto begin2 = reinterpret_cast<unsigned char*>(key2.data());
41-
auto end2 = begin2 + key2.size();
42-
bool lessThan = std::lexicographical_compare(begin1, end1, begin2, end2);
43-
44-
bool equal = std::equal(begin1, end1, begin2, end2);
45-
46-
return lessThan ? -1 : (equal ? 0 : 1);
47-
}
48-
49-
void serializeRow(
50-
BinarySortableSerializer binarySortableSerializer,
51-
size_t index,
52-
velox::StringVectorBuffer* out) {
53-
binarySortableSerializer.serialize(/*rowId=*/index, out);
54-
out->flushRow(index);
55-
}
56-
5758
int compareRowVector(
5859
const velox::RowVectorPtr& rowVector,
5960
const std::vector<
@@ -136,8 +137,148 @@ class BinarySortableSerializerTest : public ::testing::Test,
136137
std::unique_ptr<velox::StreamArena> streamArena_ =
137138
std::make_unique<velox::StreamArena>(pool_.get());
138139
};
140+
141+
class BinarySortableSerializerFuzzerTest : public ::testing::Test,
142+
public velox::test::VectorTestBase {
143+
protected:
144+
static void SetUpTestCase() {
145+
velox::memory::MemoryManager::testingSetInstance({});
146+
}
147+
148+
void runFuzzerTest(const velox::RowTypePtr& rowType) {
149+
const auto seed = 1;
150+
// Create a random number generator and seed it
151+
std::mt19937 rng(seed);
152+
153+
const boost::random::uniform_int_distribution<int> distribution(
154+
0, sortingOrders_.size() - 1);
155+
156+
// Generate random sort ordering
157+
std::vector<velox::core::SortOrder> testOrdering;
158+
for (uint32_t i = 0; i < rowType->size(); ++i) {
159+
const int randomIndex = distribution(rng);
160+
testOrdering.push_back(sortingOrders_[randomIndex]);
161+
}
162+
163+
const auto rowVector = makeData(rowType);
164+
const auto fields = getFields(rowType);
165+
166+
ensureSorted(rowVector, fields, testOrdering);
167+
}
168+
169+
bool sortedAfterSerialization(
170+
const velox::RowVectorPtr& rowVector,
171+
const std::vector<
172+
std::shared_ptr<const velox::core::FieldAccessTypedExpr>>& fields,
173+
const std::vector<velox::core::SortOrder>& ordering,
174+
int vectorSize) {
175+
BinarySortableSerializer binarySortableSerializer(
176+
rowVector, ordering, fields);
177+
178+
auto vec = velox::BaseVector::create<velox::FlatVector<velox::StringView>>(
179+
velox::VARBINARY(), vectorSize, pool_.get());
180+
// Create a ResizableVectorBuffer with initial and max capacity.
181+
velox::StringVectorBuffer buffer(vec.get(), 1024, 1 << 20);
182+
for (size_t i = 0; i < vectorSize; ++i) {
183+
serializeRow(binarySortableSerializer, /*index=*/i, &buffer);
184+
}
185+
186+
for (velox::vector_size_t i = 0; i < vec->size() - 1; ++i) {
187+
if (lexicographicalCompare(vec->valueAt(i), vec->valueAt(i + 1)) > 0) {
188+
return false;
189+
}
190+
}
191+
return true; // ensure all elements are in non-descending order
192+
}
193+
194+
void ensureSorted(
195+
const velox::RowVectorPtr& input,
196+
const std::vector<
197+
std::shared_ptr<const velox::core::FieldAccessTypedExpr>>& keys,
198+
const std::vector<velox::core::SortOrder>& ordering) {
199+
const auto planNode = std::make_shared<velox::core::OrderByNode>(
200+
"orderBy",
201+
keys,
202+
ordering,
203+
false, // isPartial
204+
std::make_shared<velox::core::ValuesNode>(
205+
"values", std::vector<velox::RowVectorPtr>{input}));
206+
207+
velox::exec::test::AssertQueryBuilder builder(planNode);
208+
const auto sortedVector = builder.copyResults(pool_.get());
209+
// Ensure that sorting order is preserved after serialization.
210+
EXPECT_TRUE(sortedAfterSerialization(
211+
sortedVector, keys, ordering, sortedVector->size()));
212+
}
213+
214+
velox::RowVectorPtr makeData(const velox::RowTypePtr& rowType) {
215+
velox::VectorFuzzer::Options options;
216+
options.vectorSize = 1'000;
217+
options.allowDictionaryVector = true;
218+
219+
const auto seed = 1; // For reproducibility.
220+
velox::VectorFuzzer fuzzer(options, pool_.get(), seed);
221+
222+
return fuzzer.fuzzInputRow(rowType);
223+
}
224+
225+
std::vector<std::shared_ptr<const velox::core::FieldAccessTypedExpr>>
226+
getFields(const velox::RowTypePtr& rowType) {
227+
std::vector<std::shared_ptr<const velox::core::FieldAccessTypedExpr>>
228+
fieldAccessExprs;
229+
for (const auto& fieldName : rowType->names()) {
230+
auto fieldExpr = std::make_shared<velox::core::FieldAccessTypedExpr>(
231+
rowType->findChild(fieldName), fieldName);
232+
fieldAccessExprs.push_back(std::move(fieldExpr));
233+
}
234+
return fieldAccessExprs;
235+
}
236+
237+
const std::vector<velox::core::SortOrder> sortingOrders_ = {
238+
velox::core::kAscNullsFirst,
239+
velox::core::kAscNullsLast,
240+
velox::core::kDescNullsFirst,
241+
velox::core::kDescNullsLast};
242+
std::shared_ptr<velox::memory::MemoryPool> pool_ =
243+
velox::memory::deprecatedAddDefaultLeafMemoryPool();
244+
velox::test::VectorMaker vectorMaker_{pool_.get()};
245+
};
139246
} // namespace
140247

248+
TEST_F(BinarySortableSerializerFuzzerTest, fuzzerTestString) {
249+
const auto rowType = velox::ROW({"c1", "c2"}, {velox::BIGINT(), velox::VARCHAR()});
250+
runFuzzerTest(rowType);
251+
}
252+
253+
TEST_F(BinarySortableSerializerFuzzerTest, fuzzerTestArray) {
254+
const auto rowType = velox::ROW(
255+
{"c1", "c2"}, {velox::BIGINT(), velox::ARRAY(velox::BIGINT())});
256+
runFuzzerTest(rowType);
257+
}
258+
259+
TEST_F(BinarySortableSerializerFuzzerTest, fuzzerTestStruct) {
260+
const auto rowType = velox::ROW(
261+
{"c1", "c2"},
262+
{velox::BIGINT(),
263+
velox::ROW(
264+
{velox::BIGINT(),
265+
velox::DOUBLE(),
266+
velox::BOOLEAN(),
267+
velox::TINYINT(),
268+
velox::REAL()})});
269+
runFuzzerTest(rowType);
270+
}
271+
272+
TEST_F(BinarySortableSerializerFuzzerTest, fuzzerTestNestedStruct) {
273+
const auto innerRowType = velox::ROW(
274+
{velox::BIGINT(),
275+
velox::DOUBLE(),
276+
velox::BOOLEAN(),
277+
velox::ROW({velox::TINYINT(), velox::REAL()})});
278+
const auto rowType = velox::ROW({"c0", "c1"}, {velox::BIGINT(), innerRowType});
279+
runFuzzerTest(rowType);
280+
}
281+
141282
TEST_F(BinarySortableSerializerTest, LongTypeAllFields) {
142283
auto ordering = {
143284
velox::core::SortOrder(velox::core::kAscNullsFirst),

0 commit comments

Comments
 (0)