Skip to content

Commit 4c5ec7d

Browse files
authored
Improve writing large arrays (#18)
Merged FesapiHdfProxy::createSubArrayNd() in FesapiHdfProxy::writeSubArrayNd()
1 parent 58161f8 commit 4c5ec7d

File tree

2 files changed

+61
-84
lines changed

2 files changed

+61
-84
lines changed

src/etp/fesapi/FesapiHdfProxy.cpp

Lines changed: 56 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -115,68 +115,6 @@ std::vector<uint32_t> FesapiHdfProxy::getElementCountPerDimension(const std::str
115115
return result;
116116
}
117117

118-
template<typename T>
119-
void FesapiHdfProxy::writeSubArrayNd(
120-
const std::string& uri,
121-
const std::string& pathInResource,
122-
std::vector<int64_t>& totalCounts,
123-
std::vector<int64_t> starts,
124-
std::vector<int64_t> counts,
125-
const void* values)
126-
{
127-
// Calculate array size
128-
size_t totalCount{ 1 };
129-
130-
for (const auto& count : counts) {
131-
totalCount *= count;
132-
}
133-
134-
// [Base Condition] Array size is OK to be transmitted.
135-
if ((totalCount * sizeof(T)) <= maxArraySize_) {
136-
137-
// PUT DATA SUBARRAYS
138-
Energistics::Etp::v12::Protocol::DataArray::PutDataSubarrays pdsa{};
139-
pdsa.dataSubarrays["0"].uid.uri = uri;
140-
pdsa.dataSubarrays["0"].uid.pathInResource = pathInResource;
141-
pdsa.dataSubarrays["0"].starts = starts;
142-
pdsa.dataSubarrays["0"].counts = counts;
143-
144-
// Cast values in T values.
145-
const T* typeValues{ static_cast<const T*>(values) };
146-
147-
// Create 1D Array for Sub Values.
148-
T* subValues = new T[totalCount];
149-
size_t valueIndex{ 0 };
150-
151-
// Recursively populate subValues starting from first dimension.
152-
populateSubValuesNd<T>(
153-
0,
154-
totalCounts, starts, counts,
155-
valueIndex, typeValues, subValues);
156-
157-
// Create AVRO Array
158-
Energistics::Etp::v12::Datatypes::AnyArray data;
159-
createAnyArray<T>(data, totalCount, subValues); // Type-specific code is written in explicit specializations for createAnyArray().
160-
pdsa.dataSubarrays["0"].data = data;
161-
162-
std::cout << "Writing subarray..." << std::endl;
163-
164-
// Send putDataSubarrays Message
165-
session_->sendAndBlock(pdsa, 0, 0x02);
166-
167-
// Delete Array
168-
delete[] subValues;
169-
}
170-
// [Divide and Conquer Approach] If sub array is still large, partition it into more sub arrays.
171-
else {
172-
// Recursively divide all dimensions starting from first dimension.
173-
createSubArrayNd<T>(
174-
0,
175-
uri, pathInResource, totalCounts,
176-
starts, counts, values);
177-
}
178-
}
179-
180118
template<typename T>
181119
void FesapiHdfProxy::populateSubValuesNd(
182120
size_t dimensionIndex,
@@ -245,7 +183,7 @@ int64_t FesapiHdfProxy::getCountsProduct(
245183
}
246184

247185
template<typename T>
248-
void FesapiHdfProxy::createSubArrayNd(
186+
void FesapiHdfProxy::writeSubArrayNd(
249187
size_t dimensionIndex,
250188
const std::string& uri,
251189
const std::string& pathInResource,
@@ -254,15 +192,56 @@ void FesapiHdfProxy::createSubArrayNd(
254192
std::vector<int64_t> counts,
255193
const void* values)
256194
{
257-
// [Base Condition] If dimensionIndex exceeds the last dimension.
258-
if (dimensionIndex >= starts.size()) {
259-
// Recursively Write Subarray.
195+
// Calculate array size
196+
size_t totalCount{ 1 };
197+
198+
for (const auto& count : counts) {
199+
totalCount *= count;
200+
}
201+
202+
// [Base Condition] If subarray can be transmitted.
203+
if ((totalCount * sizeof(T)) <= maxArraySize_) {
204+
// PUT DATA SUBARRAYS
205+
Energistics::Etp::v12::Protocol::DataArray::PutDataSubarrays pdsa{};
206+
pdsa.dataSubarrays["0"].uid.uri = uri;
207+
pdsa.dataSubarrays["0"].uid.pathInResource = pathInResource;
208+
pdsa.dataSubarrays["0"].starts = starts;
209+
pdsa.dataSubarrays["0"].counts = counts;
210+
211+
// Cast values in T values.
212+
const T* typeValues{ static_cast<const T*>(values) };
213+
214+
// Create 1D Array for Sub Values.
215+
T* subValues = new T[totalCount];
216+
size_t valueIndex{ 0 };
217+
218+
// Recursively populate subValues starting from first dimension.
219+
populateSubValuesNd<T>(
220+
0,
221+
totalCounts, starts, counts,
222+
valueIndex, typeValues, subValues);
223+
224+
// Create AVRO Array
225+
Energistics::Etp::v12::Datatypes::AnyArray data;
226+
createAnyArray<T>(data, totalCount, subValues); // Type-specific code is written in explicit specializations for createAnyArray().
227+
pdsa.dataSubarrays["0"].data = data;
228+
229+
std::cout << "Writing subarray..." << std::endl;
230+
231+
// Send putDataSubarrays Message
232+
session_->sendAndBlock(pdsa, 0, 0x02);
233+
234+
// Delete Array
235+
delete[] subValues;
236+
}
237+
// Again divide all dimensions starting from first dimension.
238+
else if (dimensionIndex >= starts.size()) {
260239
writeSubArrayNd<T>(
240+
0,
261241
uri, pathInResource, totalCounts,
262-
starts,
263-
counts,
264-
values);
242+
starts, counts, values);
265243
}
244+
// Divide the values of current dimension in halves.
266245
else {
267246
int64_t numberOfValues = counts[dimensionIndex];
268247

@@ -273,7 +252,7 @@ void FesapiHdfProxy::createSubArrayNd(
273252
newCounts[dimensionIndex] = firstHalfValues;
274253

275254
// Recursively divide next dimension.
276-
createSubArrayNd<T>(
255+
writeSubArrayNd<T>(
277256
dimensionIndex + 1,
278257
uri, pathInResource, totalCounts,
279258
starts,
@@ -285,7 +264,7 @@ void FesapiHdfProxy::createSubArrayNd(
285264
newCounts[dimensionIndex] = secondHalfValues;
286265

287266
// Recursively divide next dimension.
288-
createSubArrayNd<T>(
267+
writeSubArrayNd<T>(
289268
dimensionIndex + 1,
290269
uri, pathInResource, totalCounts,
291270
newStarts,
@@ -543,43 +522,41 @@ void FesapiHdfProxy::writeArrayNd(const std::string & groupName,
543522

544523
// Recursively Write Subarrays
545524
if (datatype == COMMON_NS::AbstractObject::numericalDatatypeEnum::DOUBLE) {
546-
writeSubArrayNd<double>(uri, pathInResource, counts,
525+
writeSubArrayNd<double>(0, uri, pathInResource, counts,
547526
starts,
548527
counts,
549528
values);
550529
}
551530
else if (datatype == COMMON_NS::AbstractObject::numericalDatatypeEnum::FLOAT) {
552-
writeSubArrayNd<float>(uri, pathInResource, counts,
531+
writeSubArrayNd<float>(0, uri, pathInResource, counts,
553532
starts,
554533
counts,
555534
values);
556535
}
557536
else if (datatype == COMMON_NS::AbstractObject::numericalDatatypeEnum::INT64 ||
558537
datatype == COMMON_NS::AbstractObject::numericalDatatypeEnum::UINT64) {
559-
writeSubArrayNd<int64_t>(uri, pathInResource, counts,
538+
writeSubArrayNd<int64_t>(0, uri, pathInResource, counts,
560539
starts,
561540
counts,
562541
values);
563542
}
564543
else if (datatype == COMMON_NS::AbstractObject::numericalDatatypeEnum::INT32 ||
565544
datatype == COMMON_NS::AbstractObject::numericalDatatypeEnum::UINT32) {
566-
writeSubArrayNd<int32_t>(uri, pathInResource, counts,
545+
writeSubArrayNd<int32_t>(0, uri, pathInResource, counts,
567546
starts,
568547
counts,
569548
values);
570549
}
571550
else if (datatype == COMMON_NS::AbstractObject::numericalDatatypeEnum::INT16 ||
572551
datatype == COMMON_NS::AbstractObject::numericalDatatypeEnum::UINT16) {
573-
writeSubArrayNd<short>(
574-
uri, pathInResource, counts,
552+
writeSubArrayNd<short>(0, uri, pathInResource, counts,
575553
starts,
576554
counts,
577555
values);
578556
}
579557
else if (datatype == COMMON_NS::AbstractObject::numericalDatatypeEnum::INT8 ||
580558
datatype == COMMON_NS::AbstractObject::numericalDatatypeEnum::UINT8) {
581-
writeSubArrayNd<char>(
582-
uri, pathInResource, counts,
559+
writeSubArrayNd<char>(0, uri, pathInResource, counts,
583560
starts,
584561
counts,
585562
values);

src/etp/fesapi/FesapiHdfProxy.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -139,14 +139,14 @@ namespace ETP_NS
139139
* @param counts The number of values in each dimension of the subarray to be written.
140140
* @param values 1d array of specific datatype ordered firstly by fastest direction.
141141
*/
142-
template<typename T>
142+
/*template<typename T>
143143
void writeSubArrayNd(
144144
const std::string& uri,
145145
const std::string& pathInResource,
146146
std::vector<int64_t>& totalCounts,
147147
std::vector<int64_t> starts,
148148
std::vector<int64_t> counts,
149-
const void* values);
149+
const void* values);*/
150150

151151
/**
152152
* Recursively populate subValues array from original values array.
@@ -201,7 +201,7 @@ namespace ETP_NS
201201
std::vector<int64_t>& totalCounts);
202202

203203
/**
204-
* Recursively divide each dimension into half and create a new nD subarray.
204+
* Recursively write sub arrays (potentially with 2 dimensions) of a specific datatype into the HDF file by means of a single dataset.
205205
* @param dimensionIndex The index of dimension in nD array.
206206
* @param uri The uri of the original array.
207207
* @param pathInResource The path of the original array.
@@ -211,7 +211,7 @@ namespace ETP_NS
211211
* @param values 1d array of specific datatype ordered firstly by fastest direction.
212212
*/
213213
template<typename T>
214-
void createSubArrayNd(
214+
void writeSubArrayNd(
215215
size_t dimensionIndex,
216216
const std::string& uri,
217217
const std::string& pathInResource,
@@ -575,7 +575,7 @@ namespace ETP_NS
575575
AbstractSession* session_;
576576
unsigned int compressionLevel;
577577
std::string xmlNs_;
578-
int maxArraySize_{ 4000000 }; // Bytes
578+
int maxArraySize_{ 12000000 }; // Bytes
579579

580580
Energistics::Etp::v12::Datatypes::DataArrayTypes::DataArrayIdentifier buildDataArrayIdentifier(const std::string & datasetName) const;
581581
Energistics::Etp::v12::Protocol::DataArray::GetDataArrays buildGetDataArraysMessage(const std::string & datasetName) const;

0 commit comments

Comments
 (0)