Skip to content

Commit 341a342

Browse files
committed
Initial tests work
1 parent 399a152 commit 341a342

File tree

2 files changed

+59
-43
lines changed

2 files changed

+59
-43
lines changed

cpp/src/cmdline/lub.cpp

Lines changed: 37 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -15,45 +15,53 @@ ::size_t findLeastUpperBound(std::vector<std::unique_ptr<cudf::table>> const &ta
1515
auto action = [&tables, &colNo]<typename T>() {
1616
using CudfScalarType = cudf::scalar_type_t<T>;
1717
::size_t lubTableIndex = 0;
18-
std::unique_ptr<cudf::scalar> currentLub =
19-
cudf::get_element(tables.front()->get_column(colNo), tables.front()->get_column(colNo).size() - 1);
18+
std::unique_ptr<cudf::scalar> currentLub;
2019
// Loop over each table view, grab the last element in the sort column and find the lowest
2120
for (::size_t idx = 0; std::unique_ptr<cudf::table> const &table : tables) {
22-
23-
std::unique_ptr<cudf::scalar> lastElement =
24-
cudf::get_element(table->view().column(colNo), table->view().column(colNo).size() - 1);
25-
auto const lub_ptr = static_cast<CudfScalarType *>(currentLub.get());
26-
auto const lastElement_ptr = static_cast<CudfScalarType *>(lastElement.get());
27-
28-
// Branch on template type if it's a string column or numeric column
29-
if constexpr (std::is_same_v<T, cudf::string_view>) {
30-
auto const lub = lub_ptr->to_string();
31-
auto const last = lastElement_ptr->to_string();
32-
// Perform string compare
33-
if (last < lub) {
21+
if (table->num_rows() > 0) {
22+
std::unique_ptr<cudf::scalar> lastElement =
23+
cudf::get_element(table->view().column(colNo), table->view().column(colNo).size() - 1);
24+
// Skip rest of checks if this is the first table with data
25+
if (!currentLub) {
26+
SPDLOG_INFO("Table {:d} is first with data", idx);
3427
currentLub = std::move(lastElement);
3528
lubTableIndex = idx;
36-
SPDLOG_INFO("Current least bound '{}' candidate '{}' is lower", lub, last);
3729
} else {
38-
SPDLOG_INFO("Current least bound '{}' candidate '{}'", lub, last);
39-
}
40-
} else if constexpr (cudf::is_integral_not_bool<T>()) {
41-
auto const lub = lub_ptr->value();
42-
auto const last = lastElement_ptr->value();
43-
// Perform numeric compare
44-
if (std::cmp_less(last, lub)) {
45-
currentLub = std::move(lastElement);
46-
lubTableIndex = idx;
47-
SPDLOG_INFO("Current least bound '{}' candidate '{}' is lower", lub, last);
48-
} else {
49-
SPDLOG_INFO("Current least bound '{}' candidate '{}'", lub, last);
30+
auto const lub_ptr = static_cast<CudfScalarType *>(currentLub.get());
31+
auto const lastElement_ptr = static_cast<CudfScalarType *>(lastElement.get());
32+
33+
// Branch on template type if it's a string column or numeric column
34+
if constexpr (std::is_same_v<T, cudf::string_view>) {
35+
auto const lub = lub_ptr->to_string();
36+
auto const last = lastElement_ptr->to_string();
37+
// Perform string compare
38+
if (last < lub) {
39+
currentLub = std::move(lastElement);
40+
lubTableIndex = idx;
41+
SPDLOG_INFO("Current least bound '{}' candidate '{}' is lower", lub, last);
42+
} else {
43+
SPDLOG_INFO("Current least bound '{}' candidate '{}'", lub, last);
44+
}
45+
} else if constexpr (cudf::is_integral_not_bool<T>()) {
46+
auto const lub = lub_ptr->value();
47+
auto const last = lastElement_ptr->value();
48+
// Perform numeric compare
49+
if (std::cmp_less(last, lub)) {
50+
currentLub = std::move(lastElement);
51+
lubTableIndex = idx;
52+
SPDLOG_INFO("Current least bound '{}' candidate '{}' is lower", lub, last);
53+
} else {
54+
SPDLOG_INFO("Current least bound '{}' candidate '{}'", lub, last);
55+
}
56+
} else {
57+
CUDF_FAIL("Column type not supported");
58+
}
5059
}
51-
} else {
52-
CUDF_FAIL("Column type not supported");
5360
}
5461
idx++;
5562
}
5663
SPDLOG_INFO("Found least upper bound on file no {:d}", lubTableIndex);
64+
CUDF_EXPECTS(currentLub, "No table contained any data!");
5765
return lubTableIndex;
5866
};
5967

cpp/src/cmdline/slice.cpp

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -39,20 +39,28 @@ std::pair<std::vector<cudf::table_view>, std::vector<cudf::table_view>> splitAtN
3939

4040
// Split each table at the point of that needle
4141
for (::size_t idx = 0; auto const &table : haystacks) {
42-
// Find needle in each table view, table is "haystack"
43-
std::unique_ptr<cudf::column> splitPoint =
44-
cudf::upper_bound(table->select({ 0 }), needle, { cudf::order::ASCENDING }, { cudf::null_order::AFTER });
45-
CUDF_EXPECTS(splitPoint->size() == 1, "Split result should be single row");
46-
// Get this index back to host
47-
std::unique_ptr<cudf::scalar> splitIndex = cudf::get_element(*splitPoint, 0);
48-
int const splitPos = convertInteger(*splitIndex);
49-
// Now split this table at that index
50-
std::vector<cudf::table_view> splitTables = cudf::split(*table, { splitPos });
51-
CUDF_EXPECTS(splitTables.size() == 2, "Should be two tables from split");
52-
SPDLOG_INFO(
53-
"File {:d} Table size after split {:d} and {:d}", idx, splitTables[0].num_rows(), splitTables[1].num_rows());
54-
lists.first.push_back(std::move(splitTables[0]));
55-
lists.second.push_back(std::move(splitTables[1]));
42+
// Empty table? Just push the empty table and skip
43+
if (table->num_rows() == 0) {
44+
lists.first.push_back(table->view());
45+
lists.second.push_back(table->view());
46+
} else {
47+
// Find needle in each table view, table is "haystack"
48+
std::unique_ptr<cudf::column> splitPoint =
49+
cudf::upper_bound(table->select({ 0 }), needle, { cudf::order::ASCENDING }, { cudf::null_order::AFTER });
50+
CUDF_EXPECTS(splitPoint->size() == 1, "Split result should be single row");
51+
// Get this index back to host
52+
std::unique_ptr<cudf::scalar> splitIndex = cudf::get_element(*splitPoint, 0);
53+
int const splitPos = convertInteger(*splitIndex);
54+
// Now split this table at that index
55+
std::vector<cudf::table_view> splitTables = cudf::split(*table, { splitPos });
56+
CUDF_EXPECTS(splitTables.size() == 2, "Should be two tables from split");
57+
SPDLOG_INFO("File {:d} Table size after split {:d} and {:d}",
58+
idx,
59+
splitTables[0].num_rows(),
60+
splitTables[1].num_rows());
61+
lists.first.push_back(std::move(splitTables[0]));
62+
lists.second.push_back(std::move(splitTables[1]));
63+
}
5664
idx++;
5765
}
5866
return lists;

0 commit comments

Comments
 (0)