File tree Expand file tree Collapse file tree 1 file changed +14
-0
lines changed Expand file tree Collapse file tree 1 file changed +14
-0
lines changed Original file line number Diff line number Diff line change @@ -155,6 +155,20 @@ function slurm_spank_task_exit(spank)
155
155
return SPANK .SUCCESS
156
156
end
157
157
158
+ -- get GPU ids from CUDA_VISIBLE_DEVICES
159
+ device_ids = spank :getenv (" CUDA_VISIBLE_DEVICES" )
160
+ if device_ids == nil or device_ids == " " then
161
+ SPANK .log_error (myname .. " : CUDA_VISIBLE_DEVICES not set." )
162
+ return SPANK .FAILURE
163
+ end
164
+
165
+ -- check for nvidia-smi
166
+ nvs_path = exec (" which nvidia-smi" )
167
+ if nvs_path :match (" nvidia%-smi$" ) == nil then
168
+ SPANK .log_error (myname .. " : can't find nvidia-smi in PATH." )
169
+ return SPANK .FAILURE
170
+ end
171
+
158
172
-- reset compute mode on GPUs
159
173
SPANK .log_info (myname .. " : resetting compute mode to default '%s'" ..
160
174
" on GPU(s): %s\n " , default_cmode , device_ids )
You can’t perform that action at this time.
0 commit comments