Skip to content

Commit 73805f0

Browse files
committed
gpu_cmode.lua: add missing initializations in slurm_spank_task_exit()
1 parent 3eb9901 commit 73805f0

File tree

1 file changed

+14
-0
lines changed

1 file changed

+14
-0
lines changed

gpu_cmode.lua

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,20 @@ function slurm_spank_task_exit(spank)
155155
return SPANK.SUCCESS
156156
end
157157

158+
-- get GPU ids from CUDA_VISIBLE_DEVICES
159+
device_ids = spank:getenv("CUDA_VISIBLE_DEVICES")
160+
if device_ids == nil or device_ids == "" then
161+
SPANK.log_error(myname .. ": CUDA_VISIBLE_DEVICES not set.")
162+
return SPANK.FAILURE
163+
end
164+
165+
-- check for nvidia-smi
166+
nvs_path = exec("which nvidia-smi")
167+
if nvs_path:match("nvidia%-smi$") == nil then
168+
SPANK.log_error(myname .. ": can't find nvidia-smi in PATH.")
169+
return SPANK.FAILURE
170+
end
171+
158172
-- reset compute mode on GPUs
159173
SPANK.log_info(myname .. ": resetting compute mode to default '%s'" ..
160174
" on GPU(s): %s\n", default_cmode, device_ids)

0 commit comments

Comments
 (0)