Skip to content

Commit 766c9a4

Browse files
authored
Merge pull request #7 from kpouget/remoting
remoting: improve the frontend<>backend error handling
2 parents 10c2a9f + 6f2500c commit 766c9a4

16 files changed

+436
-185
lines changed

ggml/src/ggml-remotingbackend/backend-dispatched.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ uint32_t backend_dispatch_initialize(void *ggml_backend_reg_fct_p, void *ggml_ba
4141

4242
size_t free, total;
4343
dev->iface.get_memory(dev, &free, &total);
44-
WARNING("%s: free memory: %ld MB\n", __func__, (size_t) free/1024/1024);
44+
INFO("%s: free memory: %ld MB", __func__, (size_t) free/1024/1024);
4545

46-
return APIR_BACKEND_INITIALIZE_SUCCESSS;
46+
return APIR_BACKEND_INITIALIZE_SUCCESS;
4747
}

ggml/src/ggml-remotingbackend/backend-internal.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include "ggml-impl.h"
66
#include "ggml-backend-impl.h"
77
#include "ggml-backend.h"
8+
#include "shared/api_remoting.h"
89

910
extern ggml_backend_reg_t reg;
1011
extern ggml_backend_dev_t dev;
@@ -20,7 +21,7 @@ extern ggml_backend_t bck;
2021
} while(0)
2122

2223
extern "C" {
23-
uint32_t apir_backend_initialize();
24+
ApirLoadLibraryReturnCode apir_backend_initialize();
2425
void apir_backend_deinit(void);
2526
uint32_t apir_backend_dispatcher(uint32_t cmd_type, struct virgl_apir_context *ctx,
2627
char *dec_cur, const char *dec_end,

ggml/src/ggml-remotingbackend/backend.cpp

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "backend-internal.h"
88
#include "backend-dispatched.h"
99

10+
#include "shared/api_remoting.h"
1011
#include "shared/apir_backend.h"
1112
#include "shared/venus_cs.h"
1213

@@ -26,9 +27,11 @@ extern "C" {
2627
buffer->iface.free_buffer(buffer);
2728
}
2829

29-
size_t free, total;
30-
dev->iface.get_memory(dev, &free, &total);
31-
WARNING("%s: free memory: %ld MB\n", __func__, (size_t) free/1024/1024);
30+
if (dev) {
31+
size_t free, total;
32+
dev->iface.get_memory(dev, &free, &total);
33+
INFO("%s: free memory: %ld MB", __func__, (size_t) free/1024/1024);
34+
}
3235

3336
show_timer(&graph_compute_timer);
3437
show_timer(&set_tensor_timer);
@@ -43,7 +46,7 @@ extern "C" {
4346
INFO("%s: bye-bye", __func__);
4447
}
4548

46-
uint32_t apir_backend_initialize() {
49+
ApirLoadLibraryReturnCode apir_backend_initialize() {
4750
const char* dlsym_error;
4851

4952
const char* library_name = getenv(GGML_BACKEND_LIBRARY_PATH_ENV);
@@ -53,56 +56,61 @@ extern "C" {
5356
INFO("%s: loading %s (%s|%s)", __func__, library_name, library_reg, library_init);
5457

5558
if (!library_name) {
56-
ERROR("Cannot open library: env var '%s' not defined\n", GGML_BACKEND_LIBRARY_PATH_ENV);
59+
ERROR("cannot open the GGML library: env var '%s' not defined\n", GGML_BACKEND_LIBRARY_PATH_ENV);
5760

58-
return APIR_BACKEND_INITIALIZE_CANNOT_OPEN_GGML_LIBRARY;
61+
return APIR_LOAD_LIBRARY_ENV_VAR_MISSING;
5962
}
6063

6164
backend_library_handle = dlopen(library_name, RTLD_LAZY);
6265

6366
if (!backend_library_handle) {
64-
ERROR("Cannot open library: %s\n", dlerror());
67+
ERROR("cannot open the GGML library: %s", dlerror());
6568

66-
return APIR_BACKEND_INITIALIZE_CANNOT_OPEN_GGML_LIBRARY;
69+
return APIR_LOAD_LIBRARY_CANNOT_OPEN;
6770
}
6871

6972
if (!library_reg) {
70-
ERROR("Cannot register library: env var '%s' not defined\n", GGML_BACKEND_LIBRARY_REG_ENV);
73+
ERROR("cannot register the GGML library: env var '%s' not defined", GGML_BACKEND_LIBRARY_REG_ENV);
7174

72-
return APIR_BACKEND_INITIALIZE_CANNOT_OPEN_GGML_LIBRARY;
75+
return APIR_LOAD_LIBRARY_ENV_VAR_MISSING;
7376
}
7477

7578
void *ggml_backend_reg_fct = dlsym(backend_library_handle, library_reg);
7679
dlsym_error = dlerror();
7780
if (dlsym_error) {
78-
ERROR("Cannot load symbol: %s\n", dlsym_error);
81+
ERROR("cannot find the GGML backend registration symbol '%s' (from %s): %s",
82+
library_reg, GGML_BACKEND_LIBRARY_REG_ENV, dlsym_error);
7983

80-
return APIR_BACKEND_INITIALIZE_MISSING_GGML_SYMBOLS;
84+
return APIR_LOAD_LIBRARY_SYMBOL_MISSING;
8185
}
8286

8387
if (!library_init) {
84-
ERROR("Cannot initialize library: env var '%s' not defined\n", library_init);
88+
ERROR("cannot initialize the GGML library: env var '%s' not defined", library_init);
8589

86-
return APIR_BACKEND_INITIALIZE_CANNOT_OPEN_GGML_LIBRARY;
90+
return APIR_LOAD_LIBRARY_ENV_VAR_MISSING;
8791
}
8892

8993
void *ggml_backend_init_fct = dlsym(backend_library_handle, library_init);
9094
dlsym_error = dlerror();
9195
if (dlsym_error) {
92-
ERROR("Cannot load symbol: %s\n", dlsym_error);
96+
ERROR("cannot find the GGML backend init symbol '%s' (from %s): %s",
97+
library_init, GGML_BACKEND_LIBRARY_INIT_ENV, dlsym_error);
9398

94-
return APIR_BACKEND_INITIALIZE_MISSING_GGML_SYMBOLS;
99+
return APIR_LOAD_LIBRARY_SYMBOL_MISSING;
95100
}
96101

97102
ggml_backend_metal_get_device_context_fct = (void (*)(ggml_backend_dev_t, bool *, bool *, bool *)) dlsym(backend_library_handle, GGML_BACKEND_LIBRARY_METAL_DEVICE_CONTEXT);
98103
dlsym_error = dlerror();
99104
if (dlsym_error) {
100-
ERROR("Cannot load symbol: %s\n", dlsym_error);
105+
ERROR("cannot find the GGML device context symbol '%s': %s\n",
106+
GGML_BACKEND_LIBRARY_METAL_DEVICE_CONTEXT, dlsym_error);
101107

102-
return APIR_BACKEND_INITIALIZE_MISSING_GGML_SYMBOLS;
108+
return APIR_LOAD_LIBRARY_SYMBOL_MISSING;
103109
}
104110

105-
return backend_dispatch_initialize(ggml_backend_reg_fct, ggml_backend_init_fct);
111+
uint32_t ret = backend_dispatch_initialize(ggml_backend_reg_fct, ggml_backend_init_fct);
112+
113+
return (ApirLoadLibraryReturnCode) (APIR_LOAD_LIBRARY_INIT_BASE_INDEX + ret);
106114
}
107115

108116
uint32_t apir_backend_dispatcher(uint32_t cmd_type, struct virgl_apir_context *ctx,
@@ -122,8 +130,8 @@ extern "C" {
122130
struct vn_cs_decoder *dec = &_dec;
123131

124132

125-
if (cmd_type > APIR_BACKEND_DISPATCH_TABLE_COUNT) {
126-
ERROR("Received an invalid dispatch index (%d > %d)\n",
133+
if (cmd_type >= APIR_BACKEND_DISPATCH_TABLE_COUNT) {
134+
ERROR("Received an invalid dispatch index (%d >= %d)\n",
127135
cmd_type, APIR_BACKEND_DISPATCH_TABLE_COUNT);
128136
return APIR_BACKEND_FORWARD_INDEX_INVALID;
129137
}
Lines changed: 82 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,88 @@
1-
#define VIRGL_APIR_COMMAND_TYPE_LoadLibrary 331
2-
#define VIRGL_APIR_COMMAND_TYPE_Forward 332
1+
#pragma once
32

4-
static inline const char *api_remoting_command_name(int32_t type)
3+
/* the rest of this file must match virglrenderer/src/apir-protocol.h */
4+
5+
#include <unistd.h>
6+
7+
#define VENUS_COMMAND_TYPE_LENGTH 331
8+
9+
#define APIR_PROTOCOL_MAJOR 0
10+
#define APIR_PROTOCOL_MINOR 1
11+
12+
#define APIR_HANDSHAKE_MAGIC 0xab1e
13+
14+
typedef enum {
15+
APIR_COMMAND_TYPE_HandShake = 0,
16+
APIR_COMMAND_TYPE_LoadLibrary = 1,
17+
APIR_COMMAND_TYPE_Forward = 2,
18+
19+
APIR_COMMAND_TYPE_LENGTH = 3,
20+
} ApirCommandType;
21+
22+
typedef uint64_t ApirCommandFlags;
23+
24+
typedef enum {
25+
APIR_LOAD_LIBRARY_SUCCESS = 0,
26+
APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR = 1,
27+
APIR_LOAD_LIBRARY_ALREADY_LOADED = 2,
28+
APIR_LOAD_LIBRARY_ENV_VAR_MISSING = 3,
29+
APIR_LOAD_LIBRARY_CANNOT_OPEN = 4,
30+
APIR_LOAD_LIBRARY_SYMBOL_MISSING = 5,
31+
APIR_LOAD_LIBRARY_INIT_BASE_INDEX = 6, // anything above this is a APIR backend library initialization return code
32+
} ApirLoadLibraryReturnCode;
33+
34+
typedef enum {
35+
APIR_FORWARD_SUCCESS = 0,
36+
APIR_FORWARD_NO_DISPATCH_FCT = 1,
37+
APIR_FORWARD_TIMEOUT = 2,
38+
39+
APIR_FORWARD_BASE_INDEX = 3, // anything above this is a APIR backend library forward return code
40+
} ApirForwardReturnCode;
41+
42+
__attribute__((unused))
43+
static inline const char *apir_command_name(ApirCommandType type)
544
{
645
switch (type) {
7-
case VIRGL_APIR_COMMAND_TYPE_LoadLibrary: return "LoadLibrary";
8-
case VIRGL_APIR_COMMAND_TYPE_Forward: return "Forward";
46+
case APIR_COMMAND_TYPE_HandShake: return "HandShake";
47+
case APIR_COMMAND_TYPE_LoadLibrary: return "LoadLibrary";
48+
case APIR_COMMAND_TYPE_Forward: return "Forward";
949
default: return "unknown";
1050
}
1151
}
52+
53+
__attribute__((unused))
54+
static const char *apir_load_library_error(ApirLoadLibraryReturnCode code) {
55+
#define APIR_LOAD_LIBRARY_ERROR(code_name) \
56+
do { \
57+
if (code == code_name) return #code_name; \
58+
} while (0) \
59+
60+
APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_SUCCESS);
61+
APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR);
62+
APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_ALREADY_LOADED);
63+
APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_ENV_VAR_MISSING);
64+
APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_CANNOT_OPEN);
65+
APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_SYMBOL_MISSING);
66+
APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_INIT_BASE_INDEX);
67+
68+
return "Unknown APIR_COMMAND_TYPE_LoadLibrary error";
69+
70+
#undef APIR_LOAD_LIBRARY_ERROR
71+
}
72+
73+
__attribute__((unused))
74+
static const char *apir_forward_error(ApirForwardReturnCode code) {
75+
#define APIR_FORWARD_ERROR(code_name) \
76+
do { \
77+
if (code == code_name) return #code_name; \
78+
} while (0) \
79+
80+
APIR_FORWARD_ERROR(APIR_FORWARD_SUCCESS);
81+
APIR_FORWARD_ERROR(APIR_FORWARD_NO_DISPATCH_FCT);
82+
APIR_FORWARD_ERROR(APIR_FORWARD_TIMEOUT);
83+
APIR_FORWARD_ERROR(APIR_FORWARD_BASE_INDEX);
84+
85+
return "Unknown APIR_COMMAND_TYPE_Forward error";
86+
87+
#undef APIR_FORWARD_ERROR
88+
}

ggml/src/ggml-remotingbackend/shared/apir_backend.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
#pragma once
22

3-
#define APIR_BACKEND_INITIALIZE_SUCCESSS 0
3+
#define APIR_BACKEND_INITIALIZE_SUCCESS 0
44
#define APIR_BACKEND_INITIALIZE_CANNOT_OPEN_BACKEND_LIBRARY 1
55
#define APIR_BACKEND_INITIALIZE_CANNOT_OPEN_GGML_LIBRARY 2
66
#define APIR_BACKEND_INITIALIZE_MISSING_BACKEND_SYMBOLS 3
77
#define APIR_BACKEND_INITIALIZE_MISSING_GGML_SYMBOLS 4
8+
89
#define APIR_BACKEND_INITIALIZE_BACKEND_FAILED 5
910
// new entries here need to be added to the apir_backend_initialize_error function below
1011

@@ -89,14 +90,14 @@ extern struct timer_data set_tensor_from_ptr_timer;
8990

9091
static inline void start_timer(struct timer_data *timer) {
9192
struct timespec ts;
92-
clock_gettime(CLOCK_REALTIME, &ts); // Use CLOCK_MONOTONIC for elapsed time
93+
clock_gettime(CLOCK_MONOTONIC, &ts);
9394
timer->start = (long long)ts.tv_sec * 1000000000LL + ts.tv_nsec;
9495
}
9596

9697
// returns the duration in ns
9798
static inline long long stop_timer(struct timer_data *timer) {
9899
struct timespec ts;
99-
clock_gettime(CLOCK_REALTIME, &ts); // Use CLOCK_MONOTONIC for elapsed time
100+
clock_gettime(CLOCK_MONOTONIC, &ts);
100101
long long timer_end = (long long)ts.tv_sec * 1000000000LL + ts.tv_nsec;
101102

102103
long long duration = (timer_end - timer->start);
@@ -111,6 +112,10 @@ static inline void show_timer(struct timer_data *timer) {
111112
double itl = ms/timer->count;
112113
double speed = 1/itl * 1000;
113114

115+
if (!timer->total) {
116+
return;
117+
}
118+
114119
INFO("%15s [%9.0f] ms for %4ld invocations | ITL %2.2f ms | throughput = %4.2f t/s (%4.2f ms/call)",
115120
timer->name, ms, timer->count, itl, speed, ms/timer->count);
116121
}
@@ -121,7 +126,7 @@ static const char *apir_backend_initialize_error(int code) {
121126
if (code == code_name) return #code_name; \
122127
} while (0) \
123128

124-
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_SUCCESSS);
129+
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_SUCCESS);
125130
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_CANNOT_OPEN_BACKEND_LIBRARY);
126131
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_CANNOT_OPEN_GGML_LIBRARY);
127132
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_MISSING_BACKEND_SYMBOLS);

ggml/src/ggml-remotingfrontend/ggml-metal-remoting.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ const struct ggml_backend_metal_device_context *get_metal_dev_context(const ggml
99
return &metal_dev_ctx;
1010
}
1111

12+
has_metal_dev_ctx = true;
1213
struct virtgpu *gpu = DEV_TO_GPU(dev);
1314

1415
apir_metal_get_device_context(gpu, &metal_dev_ctx);

ggml/src/ggml-remotingfrontend/ggml-remoting.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,6 @@ ggml_backend_dev_t ggml_backend_remoting_get_device(size_t device);
117117
ggml_backend_buffer_type_t ggml_backend_remoting_host_buffer_type();
118118
ggml_backend_t ggml_backend_remoting_device_init(ggml_backend_dev_t dev, const char * params);
119119
ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_backend_dev_t dev);
120-
ggml_backend_t ggml_backend_remoting_device_init(ggml_backend_dev_t dev, const char * params);
121120

122121
struct remoting_buffer_struct;
123122
typedef std::shared_ptr<remoting_buffer_struct> remoting_buffer;

ggml/src/ggml-remotingfrontend/virtgpu-forward-backend.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ ggml_status
1010
apir_backend_graph_compute(struct virtgpu *gpu, ggml_cgraph *cgraph) {
1111
struct vn_cs_encoder *encoder;
1212
struct vn_cs_decoder *decoder;
13+
ApirForwardReturnCode ret;
1314

1415
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE);
1516

@@ -37,13 +38,13 @@ apir_backend_graph_compute(struct virtgpu *gpu, ggml_cgraph *cgraph) {
3738

3839
vn_encode_cgraph_data(&secondary_enc, cgraph_data);
3940

40-
REMOTE_CALL(gpu, encoder, decoder);
41+
REMOTE_CALL(gpu, encoder, decoder, ret);
4142

4243
ggml_status status = GGML_STATUS_ABORTED;
4344
vn_decode_ggml_status(decoder, &status);
4445
//INFO("Received status %u", status);
4546

46-
REMOTE_CALL_FINISH(gpu, encoder, decoder);
47+
remote_call_finish(gpu, encoder, decoder);
4748

4849
if (shmem != gpu->data_shmem) {
4950
virtgpu_shmem_destroy(gpu, shmem->shmem);

0 commit comments

Comments
 (0)