Skip to content

Commit 2f03492

Browse files
committed
Refactor to remove a vector
1 parent 81cca48 commit 2f03492

File tree

5 files changed

+20
-24
lines changed

5 files changed

+20
-24
lines changed

cpp/include/cmdline/lub.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#pragma once
2-
#include <cudf/table/table_view.hpp>
2+
#include <cudf/io/types.hpp>
33

44
#include <cstddef>
55
#include <vector>
66

7-
::size_t findLeastUpperBound(std::vector<cudf::table_view> const &views, ::size_t const colNo = 0);
7+
::size_t findLeastUpperBound(std::vector<cudf::io::table_with_metadata> const &tables, ::size_t const colNo = 0);

cpp/include/cmdline/slice.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#pragma once
22

3+
#include <cudf/io/types.hpp>
34
#include <cudf/scalar/scalar.hpp>
45
#include <cudf/table/table_view.hpp>
56

@@ -9,4 +10,4 @@
910
int convertInteger(cudf::scalar const &scalar);
1011

1112
std::pair<std::vector<cudf::table_view>, std::vector<cudf::table_view>> splitAtNeedle(cudf::table_view const &needle,
12-
std::vector<cudf::table_view> const &haystacks);
13+
std::vector<cudf::io::table_with_metadata> const &haystacks);

cpp/src/cmdline/chunk_reader.cpp

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ ::size_t calcRowsWritten(auto const &readers) noexcept {
4747
}
4848

4949
[[nodiscard]] cudf::io::table_metadata grabMetaData(std::string const &file) {
50-
auto opts = cudf::io::parquet_reader_options::builder(cudf::io::source_info(file)).build();
50+
auto opts = cudf::io::parquet_reader_options::builder(cudf::io::source_info(file)).num_rows(1).build();
5151
return cudf::io::read_parquet(opts).metadata;
5252
}
5353

@@ -141,21 +141,16 @@ int main(int argc, char **argv) {
141141

142142
// Merge and write tables
143143
if (lastTotalRowCount > 0) {
144-
std::vector<cudf::table_view> views;
145-
views.reserve(tables.size());
146-
for (auto const &table : tables) { views.push_back(*table.tbl); }
147-
148144
// Find the least upper bound in sort column across these tables
149-
auto const leastUpperBound = findLeastUpperBound(views, 0);
145+
auto const leastUpperBound = findLeastUpperBound(tables, 0);
150146

151147
// Now take search "needle" from last row from of table with LUB
152-
auto const lubTable = views[leastUpperBound].select({ 0 });
148+
auto const lubTable = tables[leastUpperBound].tbl->select({ 0 });
153149
auto const needle = cudf::split(lubTable, { lubTable.num_rows() - 1 })[1];
154-
auto const tableVectors = splitAtNeedle(needle, views);
150+
auto const tableVectors = splitAtNeedle(needle, tables);
155151

156152
SPDLOG_INFO("Merging {:d} rows", lastTotalRowCount);
157-
auto merged = cudf::merge(views, { 0 }, { cudf::order::ASCENDING });
158-
views.clear();
153+
auto merged = cudf::merge(tableVectors.first, { 0 }, { cudf::order::ASCENDING });
159154
tables.clear();
160155
writer.write(*merged);
161156
auto const elapsedTime = std::chrono::duration_cast<std::chrono::seconds>(timestamp() - startTime);

cpp/src/cmdline/lub.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,18 @@
1111
#include <type_traits>
1212
#include <utility>
1313

14-
::size_t findLeastUpperBound(std::vector<cudf::table_view> const &views, ::size_t const colNo) {
14+
::size_t findLeastUpperBound(std::vector<cudf::io::table_with_metadata> const &tables, ::size_t const colNo) {
1515

16-
auto action = [&views, &colNo]<typename T>() {
16+
auto action = [&tables, &colNo]<typename T>() {
1717
using CudfScalarType = cudf::scalar_type_t<T>;
1818
::size_t lubTableIndex = 0;
1919
std::unique_ptr<cudf::scalar> currentLub =
20-
cudf::get_element(views.front().column(colNo), views.front().column(colNo).size() - 1);
20+
cudf::get_element(tables.front().tbl->get_column(colNo), tables.front().tbl->get_column(colNo).size() - 1);
2121
// Loop over each table view, grab the last element in the sort column and find the lowest
22-
for (::size_t idx = 0; cudf::table_view const &view : views) {
22+
for (::size_t idx = 0; cudf::io::table_with_metadata const &table : tables) {
2323

2424
std::unique_ptr<cudf::scalar> lastElement =
25-
cudf::get_element(view.column(colNo), view.column(colNo).size() - 1);
25+
cudf::get_element(table.tbl->view().column(colNo), table.tbl->view().column(colNo).size() - 1);
2626
auto const lub_ptr = static_cast<CudfScalarType *>(currentLub.get());
2727
auto const lastElement_ptr = static_cast<CudfScalarType *>(lastElement.get());
2828

@@ -58,6 +58,6 @@ ::size_t findLeastUpperBound(std::vector<cudf::table_view> const &views, ::size_
5858
return lubTableIndex;
5959
};
6060

61-
CUDF_EXPECTS(!views.empty(), "vector of tables cannot be empty");
62-
return cudf::type_dispatcher(views.front().column(colNo).type(), action);
61+
CUDF_EXPECTS(!tables.empty(), "vector of tables cannot be empty");
62+
return cudf::type_dispatcher(tables.front().tbl->get_column(colNo).type(), action);
6363
}

cpp/src/cmdline/slice.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,23 +30,23 @@ int convertInteger(cudf::scalar const &scalar) {
3030
}
3131

3232
std::pair<std::vector<cudf::table_view>, std::vector<cudf::table_view>> splitAtNeedle(cudf::table_view const &needle,
33-
std::vector<cudf::table_view> const &haystacks) {
33+
std::vector<cudf::io::table_with_metadata> const &haystacks) {
3434
std::vector<cudf::table_view> tablesToMerge;
3535
std::vector<cudf::table_view> remainingFragments;
3636
tablesToMerge.reserve(haystacks.size());
3737
remainingFragments.reserve(haystacks.size());
3838

3939
// Split each table at the point of that needle
40-
for (::size_t idx = 0; auto const &view : haystacks) {
40+
for (::size_t idx = 0; auto const &table : haystacks) {
4141
// Find needle in each table view, table is "haystack"
4242
std::unique_ptr<cudf::column> splitPoint =
43-
cudf::upper_bound(view.select({ 0 }), needle, { cudf::order::ASCENDING }, { cudf::null_order::AFTER });
43+
cudf::upper_bound(table.tbl->select({ 0 }), needle, { cudf::order::ASCENDING }, { cudf::null_order::AFTER });
4444
CUDF_EXPECTS(splitPoint->size() == 1, "Split result should be single row");
4545
// Get this index back to host
4646
std::unique_ptr<cudf::scalar> splitIndex = cudf::get_element(*splitPoint, 0);
4747
int const splitPos = convertInteger(*splitIndex);
4848
// Now split this table at that index
49-
std::vector<cudf::table_view> splitTables = cudf::split(view, { splitPos });
49+
std::vector<cudf::table_view> splitTables = cudf::split(*table.tbl, { splitPos });
5050
CUDF_EXPECTS(splitTables.size() == 2, "Should be two tables from split");
5151
SPDLOG_INFO(
5252
"File {:d} Table size after split {:d} and {:d}", idx, splitTables[0].num_rows(), splitTables[1].num_rows());

0 commit comments

Comments
 (0)