Skip to content

Commit 44ba0d6

Browse files
authored
Additional HBM support
1 parent e6b4c1d commit 44ba0d6

File tree

3 files changed

+201
-115
lines changed

3 files changed

+201
-115
lines changed

include/hlslib/common/OpenCL.h

Lines changed: 148 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,27 @@ cl_mem_flags BankToFlag(MemoryBank memoryBank, bool failIfUnspecified,
255255
return 0;
256256
}
257257

258+
MemoryBank StorageTypeToMemoryBank(StorageType storage, int bank) {
259+
if (storage != StorageType::DDR) {
260+
ThrowRuntimeError("Only DDR bank identifiers can be converted to memory bank flags.");
261+
}
262+
if (bank < 0 || bank > 3) {
263+
ThrowRuntimeError("Bank identifier is out of range (must be [0-3]).");
264+
}
265+
switch (bank) {
266+
case 0:
267+
return MemoryBank::bank0;
268+
case 1:
269+
return MemoryBank::bank1;
270+
case 2:
271+
return MemoryBank::bank2;
272+
case 3:
273+
return MemoryBank::bank3;
274+
default:
275+
ThrowRuntimeError("Unsupported bank identifier.");
276+
}
277+
}
278+
258279
cl_uint NumEvents(cl::Event const *const eventsBegin,
259280
cl::Event const *const eventsEnd) {
260281
if (eventsBegin != nullptr) {
@@ -426,57 +447,7 @@ class Buffer {
426447
Buffer(Context &context, MemoryBank memoryBank, IteratorType begin,
427448
IteratorType end)
428449
: context_(&context), nElements_(std::distance(begin, end)) {
429-
#ifndef HLSLIB_SIMULATE_OPENCL
430-
431-
void *hostPtr = nullptr;
432-
433-
cl_mem_flags flags;
434-
435-
switch (access) {
436-
case Access::read:
437-
flags = CL_MEM_READ_ONLY;
438-
break;
439-
case Access::write:
440-
flags = CL_MEM_WRITE_ONLY;
441-
break;
442-
case Access::readWrite:
443-
flags = CL_MEM_READ_WRITE;
444-
break;
445-
}
446-
447-
#ifdef HLSLIB_XILINX
448-
hostPtr = const_cast<T *>(&(*begin));
449-
flags |= CL_MEM_USE_HOST_PTR;
450-
// Allow specifying memory bank
451-
ExtendedMemoryPointer extendedHostPointer;
452-
if (memoryBank != MemoryBank::unspecified) {
453-
extendedHostPointer =
454-
CreateExtendedPointer(hostPtr, memoryBank, context.DDRFlags_);
455-
// Replace hostPtr with Xilinx extended pointer
456-
hostPtr = &extendedHostPointer;
457-
flags |= kXilinxMemPointer;
458-
}
459-
#endif
460-
461-
#ifdef HLSLIB_INTEL
462-
flags |= BankToFlag(memoryBank, false, context.DDRFlags_);
463-
#endif
464-
465-
cl_int errorCode;
466-
devicePtr_ = cl::Buffer(context.context(), flags, sizeof(T) * nElements_,
467-
hostPtr, &errorCode);
468-
#ifdef HLSLIB_INTEL
469-
CopyFromHost(begin);
470-
#endif
471-
472-
if (errorCode != CL_SUCCESS) {
473-
ThrowRuntimeError("Failed to initialize and copy to device memory.");
474-
return;
475-
}
476-
#else
477-
devicePtr_ = std::make_unique<T[]>(nElements_);
478-
std::copy(begin, end, devicePtr_.get());
479-
#endif
450+
AllocateDDR(memoryBank, begin, end);
480451
}
481452

482453
template <typename IteratorType, typename = typename std::enable_if<
@@ -485,66 +456,27 @@ class Buffer {
485456
Buffer(Context &context, IteratorType begin, IteratorType end)
486457
: Buffer(context, MemoryBank::unspecified, begin, end) {}
487458

488-
/// Allocate device memory but don't perform any transfers.
459+
/// Allocate but don't perform any transfers
489460
Buffer(Context &context, MemoryBank memoryBank, size_t nElements)
490461
: context_(&context), nElements_(nElements) {
491-
#ifndef HLSLIB_SIMULATE_OPENCL
492-
493-
cl_mem_flags flags;
494-
switch (access) {
495-
case Access::read:
496-
flags = CL_MEM_READ_ONLY;
497-
break;
498-
case Access::write:
499-
flags = CL_MEM_WRITE_ONLY;
500-
break;
501-
case Access::readWrite:
502-
flags = CL_MEM_READ_WRITE;
503-
break;
504-
}
505-
506-
void *hostPtr = nullptr;
507-
#ifdef HLSLIB_XILINX
508-
ExtendedMemoryPointer extendedHostPointer;
509-
if (memoryBank != MemoryBank::unspecified) {
510-
extendedHostPointer =
511-
CreateExtendedPointer(nullptr, memoryBank, context.DDRFlags_);
512-
// Becomes a pointer to the Xilinx extended memory pointer if a memory
513-
// bank is specified
514-
hostPtr = &extendedHostPointer;
515-
flags |= kXilinxMemPointer;
516-
}
517-
#endif
518-
#ifdef HLSLIB_INTEL
519-
flags |= BankToFlag(memoryBank, false, context.DDRFlags_);
520-
#endif
521-
522-
cl_int errorCode;
523-
{
524-
std::lock_guard<std::mutex> lock(context_->memcopyMutex());
525-
devicePtr_ = cl::Buffer(context_->context(), flags,
526-
sizeof(T) * nElements_, hostPtr, &errorCode);
527-
}
528-
529-
if (errorCode != CL_SUCCESS) {
530-
ThrowRuntimeError("Failed to initialize device memory.");
531-
return;
532-
}
533-
#else
534-
devicePtr_ = std::make_unique<T[]>(nElements_);
535-
#endif
462+
AllocateDDRNoTransfer(memoryBank);
536463
}
537464

538465
Buffer(Context &context, size_t nElements)
539466
: Buffer(context, MemoryBank::unspecified, nElements) {}
540467

541-
#ifdef HLSLIB_XILINX
542468
/// Allocate DDR or HBM but don't perform any transfers.
543469
Buffer(Context &context, StorageType storageType, int bankIndex,
544470
size_t nElements)
545471
: context_(&context), nElements_(nElements) {
546472
#ifndef HLSLIB_SIMULATE_OPENCL
547-
473+
#ifdef HLSLIB_INTEL
474+
if (storageType != StorageType::DDR) {
475+
ThrowRuntimeError("Only DDR memory is supported for Intel FPGA.");
476+
}
477+
AllocateDDRNoTransfer(StorageTypeToMemoryBank(storageType, bankIndex));
478+
#endif
479+
#ifdef HLSLIB_XILINX
548480
ExtendedMemoryPointer extendedHostPointer = CreateExtendedPointer(
549481
nullptr, storageType, bankIndex, context.DDRFlags_);
550482
void *hostPtr = &extendedHostPointer;
@@ -561,6 +493,7 @@ class Buffer {
561493
ThrowRuntimeError("Failed to initialize device memory.");
562494
return;
563495
}
496+
#endif
564497
#else
565498
devicePtr_ = std::make_unique<T[]>(nElements_);
566499
#endif
@@ -574,7 +507,13 @@ class Buffer {
574507
IteratorType begin, IteratorType end)
575508
: context_(&context), nElements_(std::distance(begin, end)) {
576509
#ifndef HLSLIB_SIMULATE_OPENCL
577-
510+
#ifdef HLSLIB_INTEL
511+
if (storageType != StorageType::DDR) {
512+
ThrowRuntimeError("Only DDR memory is supported for Intel FPGA.");
513+
}
514+
AllocateDDR(StorageTypeToMemoryBank(storageType, bankIndex), begin, end);
515+
#endif
516+
#ifdef HLSLIB_XILINX
578517
void *hostPtr = const_cast<T *>(&(*begin));
579518
ExtendedMemoryPointer extendedHostPointer = CreateExtendedPointer(
580519
hostPtr, storageType, bankIndex, context.DDRFlags_);
@@ -589,14 +528,13 @@ class Buffer {
589528
ThrowRuntimeError("Failed to initialize and copy to device memory.");
590529
return;
591530
}
531+
#endif
592532
#else
593533
devicePtr_ = std::make_unique<T[]>(nElements_);
594534
std::copy(begin, end, devicePtr_.get());
595535
#endif
596536
}
597537

598-
#endif // HLSLIB_XILINX
599-
600538
friend void swap(Buffer<T, access> &first, Buffer<T, access> &second) {
601539
std::swap(first.context_, second.context_);
602540
std::swap(first.devicePtr_, second.devicePtr_);
@@ -1021,6 +959,114 @@ class Buffer {
1021959
}
1022960
#endif // HLSLIB_XILINX
1023961

962+
/// Allocate and copy to device.
963+
template <typename IteratorType, typename = typename std::enable_if<
964+
IsIteratorOfType<IteratorType, T>() &&
965+
IsRandomAccess<IteratorType>()>::type>
966+
void AllocateDDR(MemoryBank memoryBank, IteratorType begin,
967+
IteratorType end) {
968+
#ifndef HLSLIB_SIMULATE_OPENCL
969+
970+
void *hostPtr = nullptr;
971+
972+
cl_mem_flags flags;
973+
974+
switch (access) {
975+
case Access::read:
976+
flags = CL_MEM_READ_ONLY;
977+
break;
978+
case Access::write:
979+
flags = CL_MEM_WRITE_ONLY;
980+
break;
981+
case Access::readWrite:
982+
flags = CL_MEM_READ_WRITE;
983+
break;
984+
}
985+
986+
#ifdef HLSLIB_XILINX
987+
hostPtr = const_cast<T *>(&(*begin));
988+
flags |= CL_MEM_USE_HOST_PTR;
989+
// Allow specifying memory bank
990+
ExtendedMemoryPointer extendedHostPointer;
991+
if (memoryBank != MemoryBank::unspecified) {
992+
extendedHostPointer =
993+
CreateExtendedPointer(hostPtr, memoryBank, context_->DDRFlags_);
994+
// Replace hostPtr with Xilinx extended pointer
995+
hostPtr = &extendedHostPointer;
996+
flags |= kXilinxMemPointer;
997+
}
998+
#endif
999+
1000+
#ifdef HLSLIB_INTEL
1001+
flags |= BankToFlag(memoryBank, false, context_->DDRFlags_);
1002+
#endif
1003+
1004+
cl_int errorCode;
1005+
devicePtr_ = cl::Buffer(context_->context(), flags, sizeof(T) * nElements_,
1006+
hostPtr, &errorCode);
1007+
#ifdef HLSLIB_INTEL
1008+
CopyFromHost(begin);
1009+
#endif
1010+
1011+
if (errorCode != CL_SUCCESS) {
1012+
ThrowRuntimeError("Failed to initialize and copy to device memory.");
1013+
return;
1014+
}
1015+
#else
1016+
devicePtr_ = std::make_unique<T[]>(nElements_);
1017+
std::copy(begin, end, devicePtr_.get());
1018+
#endif
1019+
}
1020+
1021+
/// Allocate device memory but don't perform any transfers.
1022+
void AllocateDDRNoTransfer(MemoryBank memoryBank) {
1023+
#ifndef HLSLIB_SIMULATE_OPENCL
1024+
1025+
cl_mem_flags flags;
1026+
switch (access) {
1027+
case Access::read:
1028+
flags = CL_MEM_READ_ONLY;
1029+
break;
1030+
case Access::write:
1031+
flags = CL_MEM_WRITE_ONLY;
1032+
break;
1033+
case Access::readWrite:
1034+
flags = CL_MEM_READ_WRITE;
1035+
break;
1036+
}
1037+
1038+
void *hostPtr = nullptr;
1039+
#ifdef HLSLIB_XILINX
1040+
ExtendedMemoryPointer extendedHostPointer;
1041+
if (memoryBank != MemoryBank::unspecified) {
1042+
extendedHostPointer =
1043+
CreateExtendedPointer(nullptr, memoryBank, context_->DDRFlags_);
1044+
// Becomes a pointer to the Xilinx extended memory pointer if a memory
1045+
// bank is specified
1046+
hostPtr = &extendedHostPointer;
1047+
flags |= kXilinxMemPointer;
1048+
}
1049+
#endif
1050+
#ifdef HLSLIB_INTEL
1051+
flags |= BankToFlag(memoryBank, false, context_->DDRFlags_);
1052+
#endif
1053+
1054+
cl_int errorCode;
1055+
{
1056+
std::lock_guard<std::mutex> lock(context_->memcopyMutex());
1057+
devicePtr_ = cl::Buffer(context_->context(), flags,
1058+
sizeof(T) * nElements_, hostPtr, &errorCode);
1059+
}
1060+
1061+
if (errorCode != CL_SUCCESS) {
1062+
ThrowRuntimeError("Failed to initialize device memory.");
1063+
return;
1064+
}
1065+
#else
1066+
devicePtr_ = std::make_unique<T[]>(nElements_);
1067+
#endif
1068+
}
1069+
10241070
#ifndef HLSLIB_SIMULATE_OPENCL
10251071
/*
10261072
Transform the inputs of the CopyBlockXXX functions to arguments for the

intel_test/CMakeLists.txt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,4 +42,10 @@ function(opencl_target KERNEL_NAME)
4242
add_custom_target(run_${KERNEL_NAME}_emulator COMMAND CL_CONTEXT_EMULATOR_DEVICE_INTELFPGA=1 ${CMAKE_CURRENT_BINARY_DIR}/Run${KERNEL_NAME}.exe emulator)
4343
endfunction()
4444

45-
opencl_target("Jacobi2D")
45+
opencl_target("Jacobi2D")
46+
add_custom_target(run_Jacobi2D_hardware_oldapi_copy COMMAND ${CMAKE_CURRENT_BINARY_DIR}/RunJacobi2D.exe hardware oldapi_copy)
47+
add_custom_target(run_Jacobi2D_emulator_oldapi_copy COMMAND CL_CONTEXT_EMULATOR_DEVICE_INTELFPGA=1 ${CMAKE_CURRENT_BINARY_DIR}/RunJacobi2D.exe emulator oldapi_copy)
48+
add_custom_target(run_Jacobi2D_hardware_newapi_copy COMMAND ${CMAKE_CURRENT_BINARY_DIR}/RunJacobi2D.exe hardware newapi_copy)
49+
add_custom_target(run_Jacobi2D_emulator_newapi_copy COMMAND CL_CONTEXT_EMULATOR_DEVICE_INTELFPGA=1 ${CMAKE_CURRENT_BINARY_DIR}/RunJacobi2D.exe emulator newapi_copy)
50+
add_custom_target(run_Jacobi2D_hardware_newapi_notransfer COMMAND ${CMAKE_CURRENT_BINARY_DIR}/RunJacobi2D.exe hardware newapi_notransfer)
51+
add_custom_target(run_Jacobi2D_emulator_newapi_notransfer COMMAND CL_CONTEXT_EMULATOR_DEVICE_INTELFPGA=1 ${CMAKE_CURRENT_BINARY_DIR}/RunJacobi2D.exe emulator newapi_notransfer)

0 commit comments

Comments
 (0)