Skip to content

Commit 8dcefbd

Browse files
authored
Merge pull request #4 from kpouget/timeout
remoting: improve the loading timeout
2 parents 9e11af3 + d2b60a2 commit 8dcefbd

File tree

5 files changed

+75
-12
lines changed

5 files changed

+75
-12
lines changed

ggml/src/ggml-remotingbackend/shared/apir_backend.h

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,29 +83,36 @@ struct timer_data {
8383
extern struct timer_data graph_compute_timer;
8484
extern struct timer_data get_tensor_timer;
8585
extern struct timer_data set_tensor_timer;
86+
extern struct timer_data wait_host_reply_timer;
87+
extern struct timer_data get_tensor_from_ptr_timer;
88+
extern struct timer_data set_tensor_from_ptr_timer;
8689

8790
static inline void start_timer(struct timer_data *timer) {
8891
struct timespec ts;
8992
clock_gettime(CLOCK_REALTIME, &ts); // Use CLOCK_MONOTONIC for elapsed time
9093
timer->start = (long long)ts.tv_sec * 1000000000LL + ts.tv_nsec;
9194
}
9295

93-
static inline void stop_timer(struct timer_data *timer) {
96+
// returns the duration in ns
97+
static inline long long stop_timer(struct timer_data *timer) {
9498
struct timespec ts;
9599
clock_gettime(CLOCK_REALTIME, &ts); // Use CLOCK_MONOTONIC for elapsed time
96100
long long timer_end = (long long)ts.tv_sec * 1000000000LL + ts.tv_nsec;
97101

98-
timer->total += (timer_end - timer->start);
102+
long long duration = (timer_end - timer->start);
103+
timer->total += duration;
99104
timer->count += 1;
105+
106+
return duration;
100107
}
101108

102109
static inline void show_timer(struct timer_data *timer) {
103110
double ms = timer->total/1000000;
104111
double itl = ms/timer->count;
105112
double speed = 1/itl * 1000;
106113

107-
INFO("%14s [%9.0f] ms for %4ld invocations | ITL %2.2f ms | throughput = %4.2f t/s",
108-
timer->name, ms, timer->count, itl, speed);
114+
INFO("%15s [%9.0f] ms for %4ld invocations | ITL %2.2f ms | throughput = %4.2f t/s (%4.2f ms/call)",
115+
timer->name, ms, timer->count, itl, speed, ms/timer->count);
109116
}
110117

111118
static const char *apir_backend_initialize_error(int code) {

ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,12 @@ static void showTime() {
121121
show_timer(&graph_compute_timer);
122122
show_timer(&get_tensor_timer);
123123
show_timer(&set_tensor_timer);
124+
show_timer(&wait_host_reply_timer);
125+
126+
if (get_tensor_from_ptr_timer.count) {
127+
show_timer(&get_tensor_from_ptr_timer);
128+
show_timer(&set_tensor_from_ptr_timer);
129+
}
124130
}
125131

126132
ggml_backend_reg_t ggml_backend_remoting_frontend_reg() {

ggml/src/ggml-remotingfrontend/virtgpu-utils.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ void breakpoint();
3434
#ifndef NDEBUG
3535
inline void
3636
INFO(const char *format, ...) {
37+
fprintf(stderr, "INFO: ");
38+
3739
va_list argptr;
3840
va_start(argptr, format);
3941
vfprintf(stderr, format, argptr);
@@ -56,6 +58,17 @@ WARNING(const char *format, ...) {
5658
va_end(argptr);
5759
}
5860

61+
inline void
62+
ERROR(const char *format, ...) {
63+
fprintf(stderr, "ERROR: ");
64+
65+
va_list argptr;
66+
va_start(argptr, format);
67+
vfprintf(stderr, format, argptr);
68+
fprintf(stderr, "\n");
69+
va_end(argptr);
70+
}
71+
5972
inline void
6073
FATAL(const char *format, ...) {
6174
fprintf(stderr, "FATAL: ");

ggml/src/ggml-remotingfrontend/virtgpu.cpp

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ virtgpu_ioctl_get_caps(struct virtgpu *gpu,
2626
static uint64_t virtgpu_ioctl_getparam(struct virtgpu *gpu, uint64_t param);
2727
static void virtgpu_init_renderer_info(struct virtgpu *gpu);
2828

29+
struct timer_data wait_host_reply_timer = {0, 0, 0, "wait_host_reply"};
30+
2931
static inline void
3032
virtgpu_init_shmem_blob_mem(struct virtgpu *gpu)
3133
{
@@ -98,8 +100,10 @@ create_virtgpu() {
98100
if (!encoder) {
99101
FATAL("%s: failed to prepare the remote call encoder :/", __func__);
100102
}
101-
const uint64_t MAX_WAIT_US = 500000; // 5s (some conversions are wrong down the stack)
102-
decoder = remote_call(gpu, encoder, MAX_WAIT_US);
103+
104+
const uint64_t MAX_WAIT_MS = 3000;
105+
decoder = remote_call(gpu, encoder, MAX_WAIT_MS);
106+
103107
if (!decoder) {
104108
FATAL("%s: failed to initialize the API remoting libraries. :/", __func__);
105109
return NULL;
@@ -447,7 +451,7 @@ struct vn_cs_decoder *
447451
remote_call(
448452
struct virtgpu *gpu,
449453
struct vn_cs_encoder *encoder,
450-
uint64_t max_us
454+
float max_wait_ms
451455
)
452456
{
453457
/*
@@ -483,18 +487,51 @@ remote_call(
483487
if (ret != 0) {
484488
FATAL("%s: the virtgpu EXECBUFFER ioctl failed (%d) :/ \n", ret);
485489
}
490+
486491
/*
487492
* Wait for the response notification
488493
*/
489494

490-
uint32_t total_us = 0;
495+
start_timer(&wait_host_reply_timer);
496+
497+
struct timespec ts_start, ts_end;
498+
clock_gettime(CLOCK_MONOTONIC, &ts_start);
499+
long long start_time = (long long)ts_start.tv_sec * 1000000000LL + ts_start.tv_nsec;
500+
501+
bool timeout = false;
491502
while (std::atomic_load_explicit(atomic_reply_notif, std::memory_order_acquire) == 0) {
492503
int64_t base_sleep_us = 15;
493504

494505
os_time_sleep(base_sleep_us);
495-
total_us += base_sleep_us;
496-
if (max_us && total_us > max_us) {
497-
WARNING("%s: timed out waiting for the API remoting answer...", __func__);
506+
507+
if (max_wait_ms) {
508+
clock_gettime(CLOCK_MONOTONIC, &ts_end);
509+
long long end_time = (long long)ts_end.tv_sec * 1000000000LL + ts_end.tv_nsec;
510+
float duration_ms = (end_time - start_time) / 1000000;
511+
512+
if (duration_ms > max_wait_ms) {
513+
timeout = true;
514+
break;
515+
}
516+
}
517+
}
518+
519+
long long duration_ns = stop_timer(&wait_host_reply_timer);
520+
521+
if (max_wait_ms) {
522+
double duration_ms = (double) duration_ns / 1e6; // 1 millisecond = 1e6 nanoseconds
523+
double duration_s = (double) duration_ns / 1e9; // 1 second = 1e9 nanoseconds
524+
525+
if (duration_s > 1) {
526+
INFO("%s: waited %.2fs for the host reply...", __func__, duration_s);
527+
} else if (duration_ms > 1) {
528+
INFO("%s: waited %.2fms for the host reply...", __func__, duration_ms);
529+
} else {
530+
INFO("%s: waited %lldns for the host reply...", __func__, duration_ns);
531+
}
532+
533+
if (timeout) {
534+
ERROR("timeout waiting for the host answer...");
498535
return NULL;
499536
}
500537
}

ggml/src/ggml-remotingfrontend/virtgpu.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,5 +113,5 @@ struct vn_cs_encoder *remote_call_prepare(
113113
struct virtgpu *gpu,
114114
int32_t cmd_type,
115115
int32_t cmd_flags);
116-
struct vn_cs_decoder *remote_call(struct virtgpu *gpu, struct vn_cs_encoder *enc, uint64_t max_ns);
116+
struct vn_cs_decoder *remote_call(struct virtgpu *gpu, struct vn_cs_encoder *enc, float max_wait_ms);
117117
int32_t remote_call_finish(struct vn_cs_encoder *enc, struct vn_cs_decoder *dec);

0 commit comments

Comments
 (0)