Skip to content

Commit e092f84

Browse files
authored
Merge pull request #421 from ohearnk/hip-f-func-porting-3
HIP and MPI+HIP Updates Part 3
2 parents b93b395 + 41d209d commit e092f84

File tree

5 files changed

+39
-3
lines changed

5 files changed

+39
-3
lines changed

quick-cmake/QUICKCudaConfig.cmake

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -406,13 +406,13 @@ if(HIP)
406406
# check ROCm version (as reported by hipcc),
407407
# as the QUICK HIP codes trigger a known scalar register fill/spill bug
408408
# in several ROCm versions
409-
if (${HIP_VERSION} VERSION_GREATER_EQUAL 5.4.3)
409+
if ((${HIP_VERSION} VERSION_GREATER_EQUAL 5.4.3) AND (${HIP_VERSION} VERSION_LESS 6.2.1))
410410
message(STATUS "")
411411
message("************************************************************")
412412
message("Error: Incompatible ROCm/HIP version: ${HIP_VERSION}")
413413
message(" The QUICK HIP codes trigger a known compiler scalar register ")
414-
message(" fill/spill bug in ROCm >= v5.4.3.")
415-
message(" Please build QUICK with a known working ROCm version.")
414+
message(" fill/spill bug in ROCm (>= v5.4.3, < v6.2.1).")
415+
message(" Please build QUICK with a tested working ROCm version.")
416416
message("************************************************************")
417417
message(STATUS "")
418418
message(FATAL_ERROR)

src/gpu/cuda/gpu.cu

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,15 @@ extern "C" void gpu_init_device_(int* ierr)
345345
status = cudaGetDeviceProperties(&deviceProp, device);
346346
PRINTERROR(status, "cudaGetDeviceProperties gpu_init failed!");
347347

348+
#if defined(HIP) || defined(HIP_MPIV)
349+
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
350+
/* NOTE: setting the stack size limit to 8K is required for correctness
351+
* in HIP/MPI+HIP codes to workaround GPU kernel issues for recent ROCm versions (>= v6.2.1);
352+
* ideally, this could be dropped in the future if ROCm properly addresses
353+
* this issues internally */
354+
cudaDeviceSetLimit(cudaLimitStackSize, 8192);
355+
#endif
356+
348357
#if defined(DEBUG)
349358
size_t val;
350359

src/gpu/cuda/mgpu.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,15 @@ extern "C" void mgpu_init_device_(int *mpirank, int *mpisize, int *device, int*
8383
status = cudaGetDeviceProperties(&deviceProp, gpu->gpu_dev_id);
8484
PRINTERROR(status, "cudaGetDeviceProperties gpu_init failed!");
8585

86+
#if defined(HIP) || defined(HIP_MPIV)
87+
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
88+
/* NOTE: setting the stack size limit to 8K is required for correctness
89+
* in HIP/MPI+HIP codes to workaround GPU kernel issues for recent ROCm versions (>= v6.2.1);
90+
* ideally, this could be dropped in the future if ROCm properly addresses
91+
* this issues internally */
92+
cudaDeviceSetLimit(cudaLimitStackSize, 8192);
93+
#endif
94+
8695
size_t val;
8796
cudaDeviceGetLimit(&val, cudaLimitStackSize);
8897
#ifdef DEBUG

src/gpu/hip/gpu.cu

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,15 @@ extern "C" void gpu_init_device_(int* ierr)
345345
status = hipGetDeviceProperties(&deviceProp, device);
346346
PRINTERROR(status, "hipGetDeviceProperties gpu_init failed!");
347347

348+
#if defined(HIP) || defined(HIP_MPIV)
349+
hipDeviceSetCacheConfig(hipFuncCachePreferL1);
350+
/* NOTE: setting the stack size limit to 8K is required for correctness
351+
* in HIP/MPI+HIP codes to workaround GPU kernel issues for recent ROCm versions (>= v6.2.1);
352+
* ideally, this could be dropped in the future if ROCm properly addresses
353+
* this issues internally */
354+
hipDeviceSetLimit(hipLimitStackSize, 8192);
355+
#endif
356+
348357
#if defined(DEBUG)
349358
size_t val;
350359

src/gpu/hip/mgpu.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,15 @@ extern "C" void mgpu_init_device_(int *mpirank, int *mpisize, int *device, int*
8383
status = hipGetDeviceProperties(&deviceProp, gpu->gpu_dev_id);
8484
PRINTERROR(status, "hipGetDeviceProperties gpu_init failed!");
8585

86+
#if defined(HIP) || defined(HIP_MPIV)
87+
hipDeviceSetCacheConfig(hipFuncCachePreferL1);
88+
/* NOTE: setting the stack size limit to 8K is required for correctness
89+
* in HIP/MPI+HIP codes to workaround GPU kernel issues for recent ROCm versions (>= v6.2.1);
90+
* ideally, this could be dropped in the future if ROCm properly addresses
91+
* this issues internally */
92+
hipDeviceSetLimit(hipLimitStackSize, 8192);
93+
#endif
94+
8695
size_t val;
8796
hipDeviceGetLimit(&val, hipLimitStackSize);
8897
#ifdef DEBUG

0 commit comments

Comments
 (0)