Projects STRLCPY criu Commits 204c1ef9
🤬
  • restore: Fix deadlock when helper's child dies

    Since commit ced9c529f687 ("restore: fix race with helpers' kids dying
    too early"), we block SIGCHLD in helper tasks before CR_STATE_RESTORE.
    This way we avoided default criu sighandler as it doesn't expect that
    childs may die.
    
    This is very racy as we wait on futex for another stage to be started,
    but the next stage may start only when all the tasks complete previous
    stage. If some children of helper dies, the helper may already have
    blocked SIGCHLD and have started sleeping on the futex. Then the next
    stage never comes and no one reads a pending SIGCHLD for helper.
    
    A customer met this situation on the node, where the following
    (non-related) problem has occured:
    Unable to send a fin packet: libnet_write_raw_ipv6(): -1 bytes written (Network is unreachable)
    Then child criu of the helper has exited with error-code and the
    lockup has happened.
    
    While we could fix it by aborting futex in the end of
    restore_task_with_children() for each (non-root also) tasks,
    that would be not completely correct:
    1. All futex-waiting tasks will wake up after that and they
       may not expect that some tasks are on the previous stage,
       so they will spam into logs with unrelated errors and may
       also die painfully.
    2. Child may die and miss aborting of the futex due to:
       o segfault
       o OOM killer
       o User-sended SIGKILL
       o Other error-path we forgot to cover with abort futex
    
    To fix this deadlock in TASK_HELPER, as suggested-by Kirill,
    let's check if there are children deaths expected - if there
    isn't any, don't block SIGCHLD, otherwise wait() and check if
    death was on expected stage of restore (not CR_STATE_RESTORE).
    
    Reviewed-by: Kirill Tkhai <[email protected]>
    Signed-off-by: Dmitry Safonov <[email protected]>
    Signed-off-by: Andrei Vagin <[email protected]>
    
    Conflicts:
    	criu/cr-restore.c
  • Loading...
  • Dmitry Safonov committed with Pavel Emelyanov 7 years ago
    204c1ef9
    1 parent 8a270e58
Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)
  • ■ ■ ■ ■ ■
    criu/cr-restore.c
    skipped 1001 lines
    1002 1002   return ret;
    1003 1003  }
    1004 1004   
     1005 +/*
     1006 + * Find if there are children which are zombies or helpers - processes
     1007 + * which are expected to die during the restore.
     1008 + */
     1009 +static bool child_death_expected(void)
     1010 +{
     1011 + struct pstree_item *pi;
     1012 + 
     1013 + list_for_each_entry(pi, &current->children, sibling) {
     1014 + switch (pi->pid->state) {
     1015 + case TASK_DEAD:
     1016 + case TASK_HELPER:
     1017 + return true;
     1018 + }
     1019 + }
     1020 + 
     1021 + return false;
     1022 +}
     1023 + 
     1024 +/*
     1025 + * Restore a helper process - artificially created by criu
     1026 + * to restore attributes of process tree.
     1027 + * - sessions for each leaders are dead
     1028 + * - process groups with dead leaders
     1029 + * - dead tasks for which /proc/<pid>/... is opened by restoring task
     1030 + * - whatnot
     1031 + */
     1032 +static int restore_one_helper(void)
     1033 +{
     1034 + siginfo_t info;
     1035 + 
     1036 + if (!child_death_expected()) {
     1037 + /*
     1038 + * Restoree has no children that should die, during restore,
     1039 + * wait for the next stage on futex.
     1040 + * The default SIGCHLD handler will handle an unexpected
     1041 + * child's death and abort the restore if someone dies.
     1042 + */
     1043 + restore_finish_stage(task_entries, CR_STATE_RESTORE);
     1044 + return 0;
     1045 + }
     1046 + 
     1047 + /*
     1048 + * The restoree has children which will die - decrement itself from
     1049 + * nr. of tasks processing the stage and wait for anyone to die.
     1050 + * Tasks may die only when they're on the following stage.
     1051 + * If one dies earlier - that's unexpected - treat it as an error
     1052 + * and abort the restore.
     1053 + */
     1054 + if (block_sigmask(NULL, SIGCHLD))
     1055 + return -1;
     1056 + 
     1057 + /* Finish CR_STATE_RESTORE, but do not wait for the next stage. */
     1058 + futex_dec_and_wake(&task_entries->nr_in_progress);
     1059 + 
     1060 + if (waitid(P_ALL, 0, &info, WEXITED | WNOWAIT)) {
     1061 + pr_perror("Failed to wait\n");
     1062 + return -1;
     1063 + }
     1064 + 
     1065 + if (futex_get(&task_entries->start) == CR_STATE_RESTORE) {
     1066 + pr_err("Child %d died too early\n", info.si_pid);
     1067 + return -1;
     1068 + }
     1069 + 
     1070 + if (wait_on_helpers_zombies()) {
     1071 + pr_err("Failed to wait on helpers and zombies\n");
     1072 + return -1;
     1073 + }
     1074 + 
     1075 + return 0;
     1076 +}
     1077 + 
    1005 1078  static int restore_one_task(int pid, CoreEntry *core)
    1006 1079  {
    1007 1080   int ret;
    skipped 5 lines
    1013 1086   else if (current->pid->state == TASK_DEAD)
    1014 1087   ret = restore_one_zombie(core);
    1015 1088   else if (current->pid->state == TASK_HELPER) {
    1016  - sigset_t blockmask, oldmask;
    1017  - 
    1018  - sigemptyset(&blockmask);
    1019  - sigaddset(&blockmask, SIGCHLD);
    1020  - 
    1021  - if (sigprocmask(SIG_BLOCK, &blockmask, &oldmask) == -1) {
    1022  - pr_perror("Can not set mask of blocked signals");
    1023  - return -1;
    1024  - }
    1025  - 
    1026  - restore_finish_stage(task_entries, CR_STATE_RESTORE);
    1027  - if (wait_on_helpers_zombies()) {
    1028  - pr_err("failed to wait on helpers and zombies\n");
    1029  - ret = -1;
    1030  - } else {
    1031  - ret = 0;
    1032  - }
     1089 + ret = restore_one_helper();
    1033 1090   } else {
    1034 1091   pr_err("Unknown state in code %d\n", (int)core->tc->task_state);
    1035 1092   ret = -1;
    skipped 2254 lines
Please wait...
Page is in error, reload to recover