Skip to content

Commit 82d675a

Browse files
Prevent dockerd from restarting after runtime error (#198)
1 parent 47bc495 commit 82d675a

File tree

2 files changed

+86
-43
lines changed

2 files changed

+86
-43
lines changed

README.md

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -240,28 +240,31 @@ Following are the possible values of `Status`:
240240
```text
241241
-1 NOT STARTED The application is not started.
242242
0 RUNNING The application is started and dockerd is running.
243-
1 TLS CERT MISSING Use TLS is selected but there but certificates are missing on the device.
243+
1 DOCKERD STOPPED Dockerd was stopped successfully and will soon be restarted.
244+
2 DOCKERD RUNTIME ERROR Dockerd has reported an error during runtime that needs to be resolved by the operator.
245+
Change at least one parameter or restart the application in order to start dockerd again.
246+
3 TLS CERT MISSING Use TLS is selected but there but certificates are missing on the device.
244247
The application is running but dockerd is stopped.
245248
Upload certificates and restart the application or de-select Use TLS.
246-
2 NO SOCKET Neither TCP Socket or IPC Socket are selected.
249+
4 NO SOCKET Neither TCP Socket or IPC Socket are selected.
247250
The application is running but dockerd is stopped.
248251
Select one or both sockets.
249-
3 NO SD CARD Use SD Card is selected but no SD Card is mounted in the device.
252+
5 NO SD CARD Use SD Card is selected but no SD Card is mounted in the device.
250253
The application is running but dockerd is stopped.
251-
Insert and mount a SD Card.
252-
4 SD CARD WRONG FS Use SD Card is selected but the mounted SD Card has the wrong file system.
254+
Insert and mount an SD Card.
255+
6 SD CARD WRONG FS Use SD Card is selected but the mounted SD Card has the wrong file system.
253256
The application is running but dockerd is stopped.
254257
Format the SD Card with the correct file system.
255-
5 SD CARD WRONG PERMISSION Use SD Card is selected but the application user does not have the correct file
258+
7 SD CARD WRONG PERMISSION Use SD Card is selected but the application user does not have the correct file
256259
permissions to use it.
257260
The application is running but dockerd is stopped.
258261
Make sure no directories with the wrong user permissions are left on the
259-
SD Card. Then restart the application.
260-
6 SD CARD MIGRATION FAILED Use SD Card is selected but migrating data from the old data root location to the
262+
SD Card, then restart the application.
263+
8 SD CARD MIGRATION FAILED Use SD Card is selected but migrating data from the old data root location to the
261264
new one has failed.
262265
The application is running but dockerd is stopped.
263-
Manually back up and remove either the old data root folder, or the new
264-
data root folder, from the SD card. Then restart the application.
266+
Manually back up and remove either the old or the new data root folder from the SD card,
267+
then restart the application.
265268
```
266269

267270
## Building the Docker ACAP

app/dockerdwrapper.c

Lines changed: 73 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,10 @@
4040
#define PARAM_STATUS "Status"
4141

4242
typedef enum {
43-
STATUS_NOT_STARTED = 0,
43+
STATUS_NOT_STARTED = 0, // Index in the array, not the actual status code
4444
STATUS_RUNNING,
45+
STATUS_DOCKERD_STOPPED,
46+
STATUS_DOCKERD_RUNTIME_ERROR,
4547
STATUS_TLS_CERT_MISSING,
4648
STATUS_NO_SOCKET,
4749
STATUS_NO_SD_CARD,
@@ -53,12 +55,14 @@ typedef enum {
5355

5456
static const char* const status_code_strs[STATUS_CODE_COUNT] = {"-1 NOT STARTED",
5557
"0 RUNNING",
56-
"1 TLS CERT MISSING",
57-
"2 NO SOCKET",
58-
"3 NO SD CARD",
59-
"4 SD CARD WRONG FS",
60-
"5 SD CARD WRONG PERMISSION",
61-
"6 SD CARD MIGRATION FAILED"};
58+
"1 DOCKERD STOPPED",
59+
"2 DOCKERD RUNTIME ERROR",
60+
"3 TLS CERT MISSING",
61+
"4 NO SOCKET",
62+
"5 NO SD CARD",
63+
"6 SD CARD WRONG FS",
64+
"7 SD CARD WRONG PERMISSION",
65+
"8 SD CARD MIGRATION FAILED"};
6266

6367
struct settings {
6468
char* data_root;
@@ -68,10 +72,18 @@ struct settings {
6872
};
6973

7074
struct app_state {
75+
bool allow_dockerd_to_start;
7176
char* sd_card_area;
7277
AXParameter* param_handle;
7378
};
7479

80+
// If process exited by a signal, code will be -1.
81+
// If process exited with an exit code, signal will be 0.
82+
struct exit_cause {
83+
int code;
84+
int signal;
85+
};
86+
7587
/**
7688
* @brief Callback called when the dockerd process exits.
7789
*/
@@ -653,22 +665,43 @@ static void stop_dockerd(void) {
653665
log_info("Stopped dockerd.");
654666
}
655667

668+
static struct exit_cause child_process_exit_cause(int status, GError* error) {
669+
struct exit_cause result;
670+
result.code = -1;
671+
result.signal = 0;
672+
673+
if (g_spawn_check_wait_status(status, &error) || error->domain == G_SPAWN_EXIT_ERROR)
674+
result.code = error ? error->code : 0;
675+
else if (error->domain == G_SPAWN_ERROR && error->code == G_SPAWN_ERROR_FAILED)
676+
result.signal = status;
677+
678+
return result;
679+
}
680+
656681
static void log_child_process_exit_cause(const char* name, GPid pid, int status) {
657682
GError* error = NULL;
683+
struct exit_cause exit_cause = child_process_exit_cause(status, error);
684+
658685
char msg[128];
659686
const char* end = msg + sizeof(msg);
660687
char* ptr = msg + g_snprintf(msg, end - msg, "Child process %s (%d)", name, pid);
661-
662-
if (g_spawn_check_wait_status(status, &error) || error->domain == G_SPAWN_EXIT_ERROR)
663-
g_snprintf(ptr, end - ptr, " exited with exit code %d", error ? error->code : 0);
664-
else if (error->domain == G_SPAWN_ERROR && error->code == G_SPAWN_ERROR_FAILED)
665-
g_snprintf(ptr, end - ptr, " was killed by signal %d", status);
688+
if (exit_cause.code >= 0)
689+
g_snprintf(ptr, end - ptr, " exited with exit code %d", exit_cause.code);
690+
else if (exit_cause.signal > 0)
691+
g_snprintf(ptr, end - ptr, " was killed by signal %d", exit_cause.signal);
666692
else
667693
g_snprintf(ptr, end - ptr, " terminated in an unexpected way: %s", error->message);
668694
g_clear_error(&error);
669695
log_debug("%s", msg);
670696
}
671697

698+
static bool child_process_exited_with_error(int status) {
699+
GError* error = NULL;
700+
struct exit_cause exit_cause = child_process_exit_cause(status, error);
701+
g_clear_error(&error);
702+
return exit_cause.code > 0;
703+
}
704+
672705
/**
673706
* @brief Callback called when the dockerd process exits.
674707
*/
@@ -677,6 +710,11 @@ static void dockerd_process_exited_callback(GPid pid, gint status, gpointer app_
677710

678711
struct app_state* app_state = app_state_void_ptr;
679712

713+
bool runtime_error = child_process_exited_with_error(status);
714+
app_state->allow_dockerd_to_start = !runtime_error;
715+
status_code_t s = runtime_error ? STATUS_DOCKERD_RUNTIME_ERROR : STATUS_DOCKERD_STOPPED;
716+
set_status_parameter(app_state->param_handle, s);
717+
680718
dockerd_process_pid = -1;
681719
g_spawn_close_pid(pid);
682720

@@ -700,26 +738,26 @@ static gboolean quit_main_loop(void*) {
700738
* @param name Name of the updated parameter.
701739
* @param value Value of the updated parameter.
702740
*/
703-
static void parameter_changed_callback(const gchar* name,
704-
const gchar* value,
705-
__attribute__((unused)) gpointer data) {
706-
log_debug("Parameter %s changed to %s", name, value);
741+
static void
742+
parameter_changed_callback(const gchar* name, const gchar* value, gpointer app_state_void_ptr) {
707743
const gchar* parname = name += strlen("root." APP_NAME ".");
708744

709-
for (size_t i = 0; i < sizeof(ax_parameters) / sizeof(ax_parameters[0]); ++i) {
710-
if (strcmp(parname, ax_parameters[i]) == 0) {
711-
log_info("%s changed to: %s", ax_parameters[i], value);
712-
// Trigger a restart of dockerd from main(), but delay it 1 second.
713-
// When there are multiple AXParameter callbacks in a queue, such as
714-
// during the first parameter change after installation, any parameter
715-
// usage, even outside a callback, will cause a 20 second deadlock per
716-
// queued callback.
717-
g_timeout_add_seconds(1, quit_main_loop, NULL);
718-
}
719-
}
745+
log_info("%s changed to %s", parname, value);
746+
747+
struct app_state* app_state = app_state_void_ptr;
748+
749+
// If dockerd has failed before, this parameter change may have resolved the problem.
750+
app_state->allow_dockerd_to_start = true;
751+
752+
// Trigger a restart of dockerd from main(), but delay it 1 second.
753+
// When there are multiple AXParameter callbacks in a queue, such as
754+
// during the first parameter change after installation, any parameter
755+
// usage, even outside a callback, will cause a 20 second deadlock per
756+
// queued callback.
757+
g_timeout_add_seconds(1, quit_main_loop, NULL);
720758
}
721759

722-
static AXParameter* setup_axparameter(void) {
760+
static AXParameter* setup_axparameter(struct app_state* app_state) {
723761
bool success = false;
724762
GError* error = NULL;
725763
AXParameter* ax_parameter = ax_parameter_new(APP_NAME, &error);
@@ -733,7 +771,7 @@ static AXParameter* setup_axparameter(void) {
733771
gboolean geresult = ax_parameter_register_callback(ax_parameter,
734772
parameter_path,
735773
parameter_changed_callback,
736-
NULL,
774+
app_state,
737775
&error);
738776
free(parameter_path);
739777

@@ -784,7 +822,9 @@ int main(int argc, char** argv) {
784822
parse_command_line(argc, argv, &log_settings);
785823
log_init(&log_settings);
786824

787-
app_state.param_handle = setup_axparameter();
825+
app_state.allow_dockerd_to_start = true;
826+
827+
app_state.param_handle = setup_axparameter(&app_state);
788828
if (!app_state.param_handle) {
789829
log_error("Error in setup_axparameter");
790830
return EX_SOFTWARE;
@@ -797,17 +837,15 @@ int main(int argc, char** argv) {
797837
struct sd_disk_storage* sd_disk_storage = sd_disk_storage_init(sd_card_callback, &app_state);
798838

799839
while (application_exit_code == EX_KEEP_RUNNING) {
800-
if (dockerd_process_pid == -1)
840+
if (dockerd_process_pid == -1 && app_state.allow_dockerd_to_start)
801841
read_settings_and_start_dockerd(&app_state);
802842

803843
main_loop_run();
804844

805845
log_settings.debug = is_app_log_level_debug(app_state.param_handle);
806846

807847
stop_dockerd();
808-
set_status_parameter(app_state.param_handle, STATUS_NOT_STARTED);
809848
}
810-
811849
main_loop_unref();
812850

813851
if (app_state.param_handle != NULL) {
@@ -821,5 +859,7 @@ int main(int argc, char** argv) {
821859

822860
sd_disk_storage_free(sd_disk_storage);
823861
free(app_state.sd_card_area);
862+
863+
set_status_parameter(app_state.param_handle, STATUS_NOT_STARTED);
824864
return application_exit_code;
825865
}

0 commit comments

Comments
 (0)