From b5c727dc1a55323f02e5f60a50bcecb866dd51ea Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 8 Feb 2010 14:08:13 +1100 Subject: [PATCH] mdmon: remove switch-root functionality. Using switch-root and then creating files in /var/run once that location is writable is racing as most distro clean out /var/run shortly after it is mounted. This can cause the .pid and .sock files to be deleted shortly after they are created. This option doesn't seem to be used at all any, and the alternative of communicating the pid etc in some preserved directory and then restarting mdmon once all is settled seems simpler. So remove the code for supporting switchroot and update man page accordingly. Signed-off-by: NeilBrown --- mdmon.8 | 90 ++++++++++++++++++------------------------------------- mdmon.c | 92 ++++++++++++++------------------------------------------- 2 files changed, 51 insertions(+), 131 deletions(-) diff --git a/mdmon.8 b/mdmon.8 index 9be0327..2720b45 100644 --- a/mdmon.8 +++ b/mdmon.8 @@ -5,7 +5,7 @@ mdmon \- monitor MD external metadata arrays .SH SYNOPSIS -.BI mdmon " CONTAINER [NEWROOT | PID]" +.BI mdmon " [--all] [--takeover] CONTAINER" .SH OVERVIEW The 2.6.27 kernel brings the ability to support external metadata arrays. @@ -128,66 +128,35 @@ device>/" if the array is to remain readonly. CONTAINER The .B container -device to monitor. It can be a full path like /dev/md/container, a simple md -device name like md127, or -.I \-\-all - which tells -.I mdmon -to scan for containers and launch an -.I mdmon -instance for each one found. +device to monitor. It can be a full path like /dev/md/container, or a +simple md device name like md127. .TP -[NEWROOT] -In order to support an external metadata raid array as the rootfs +.B \-\-takeover +This instructs .I mdmon -needs to be started in the initramfs environment. Once the initramfs -environment mounts the final rootfs +to replace any active .I mdmon -needs to be restarted in the new namespace. When NEWROOT is specified +which is currently monitoring the array. This is primarily used late +in the boot process to replace any .I mdmon -will terminate any +which was started from an +.B initramfs +before the root filesystem was mounted. This avoids holding a +reference on that +.B initramfs +indefinitely and ensures that the +.I pid +and +.I sock +files used to communicate with .I mdmon -instances that are running in the current namespace, -.IR chroot (2) -to NEWROOT, and continue monitoring the container. - -Alternately the new -.I mdmon -can be started after the new root has been installed with -.I pivotroot -by passing -.B / -as the NEWROOT. For -.I mdmon -to be able to find and kill the old -.I mdmon -there must be some part of the filesystem that persists from before -the -.I pivotroot -to afterwards. This can be -.B /var/run/mdadm -or some other directory. The default other directory is -.B /lib/init/rw/mdadm -but this can easily be changed when -.I mdmon -is compiled. -If -.I mdmon -cannot store the pid file in -.B /var/run/mdadm -it will store it in the other directory until -.B /var/run/mdadm -becomes available, or until it is killed. +are in a standard place. .TP -[PID] -If the second argument to -.I mdmon -does not start with a slash it is assumed to be the pid of a previous -instance of -.I mdmon -to kill. This is used internally by -.I mdmon -to pass the pid across a chroot/exec and should not be used otherwise. +.B \-\-all +This tells mdmon to find any active containers and start monitoring +each of them if appropriate. This is normally used with +.B \-\-takeover +late in the boot sequence. .PP Note that @@ -200,17 +169,16 @@ RAID arrays. The only times it is run other that by is when the boot scripts need to restart it after mounting the new root filesystem. -.SH EXMAPLES +.SH EXAMPLES -.B " mdmon --all /" +.B " mdmon \-\-all \-\-takeover" .br Any .I mdmon which is currently running is killed and a new instance is started. -This should be run early in the boot sequence after a -.I pivotroot -to the final root filesystem, but before that filesystem is remounted -read-write. +This should be run late in the boot sequence and particularly after +.B /var +is mounted and writable. .SH SEE ALSO .IR mdadm (8), .IR md (4). diff --git a/mdmon.c b/mdmon.c index 3747460..c590fb2 100644 --- a/mdmon.c +++ b/mdmon.c @@ -254,28 +254,31 @@ void usage(void) exit(2); } -static int mdmon(char *devname, int devnum, int must_fork, char *switchroot); +static int mdmon(char *devname, int devnum, int must_fork, int takeover); int main(int argc, char *argv[]) { char *container_name = NULL; - char *switchroot = NULL; int devnum; char *devname; int status = 0; + int arg; + int all = 0; + int takeover = 0; - switch (argc) { - case 3: - switchroot = argv[2]; - case 2: - container_name = argv[1]; - break; - default: - usage(); + for (arg = 1; arg < argc; arg++) { + if (strcmp(argv[arg], "--all") == 0 || + strcmp(argv[arg], "/proc/mdstat") == 0) + all = 1; + else if (strcmp(argv[arg], "--takeover") == 0) + takeover = 1; + else if (container_name == NULL) + container_name = argv[arg]; + else + usage(); } - if (strcmp(container_name, "/proc/mdstat") == 0 || - strcmp(container_name, "--all") == 0) { + if (all) { struct mdstat_ent *mdstat, *e; /* launch an mdmon instance for each container found */ @@ -292,7 +295,7 @@ int main(int argc, char *argv[]) sprintf(container_name, "%s", devname); } status |= mdmon(devname, e->devnum, 1, - switchroot); + takeover); } } free_mdstat(mdstat); @@ -320,10 +323,10 @@ int main(int argc, char *argv[]) container_name); exit(1); } - return mdmon(devname, devnum, do_fork(), switchroot); + return mdmon(devname, devnum, do_fork(), takeover); } -static int mdmon(char *devname, int devnum, int must_fork, char *switchroot) +static int mdmon(char *devname, int devnum, int must_fork, int takeover) { int mdfd; struct mdinfo *mdi, *di; @@ -336,50 +339,7 @@ static int mdmon(char *devname, int devnum, int must_fork, char *switchroot) pid_t victim = -1; int victim_sock = -1; - dprintf("starting mdmon for %s in %s\n", - devname, switchroot ? : "/"); - - /* switchroot is either a path name starting with '/', or a - * pid of the original mdmon (we have already done the chroot). - * In the latter case, stdin is a socket connected to the original - * mdmon. - */ - - /* try to spawn mdmon instances from the target file system */ - if (switchroot && switchroot[0] == '/' && - strcmp(switchroot, "/") != 0) { - pid_t pid; - char buf[20]; - - switch (fork()) { - case 0: - victim = mdmon_pid(devnum); - victim_sock = connect_monitor(devname); - if (chroot(switchroot) != 0) { - fprintf(stderr, "mdmon: failed to chroot to '%s': %s\n", - switchroot, strerror(errno)); - exit(4); - } - ignore = chdir("/"); - sprintf(buf, "%d", victim); - if (victim_sock) { - close(0); - dup(victim_sock); - close(victim_sock); - } - execl("/sbin/mdmon", "mdmon", devname, buf, NULL); - exit(1); - case -1: - return 1; - default: - pid = wait(&status); - if (pid > -1 && WIFEXITED(status) && - WEXITSTATUS(status) == 0) - return 0; - else - return 1; - } - } + dprintf("starting mdmon for %s\n", devname); mdfd = open_dev(devnum); if (mdfd < 0) { @@ -486,17 +446,9 @@ static int mdmon(char *devname, int devnum, int must_fork, char *switchroot) act.sa_handler = SIG_IGN; sigaction(SIGPIPE, &act, NULL); - if (switchroot) { - /* we assume we assume that /sys /proc /dev are available in - * the new root - */ - if (switchroot[0] == '/') { - victim = mdmon_pid(container->devnum); - victim_sock = connect_monitor(container->devname); - } else { - victim = atoi(switchroot); - victim_sock = 0; - } + if (takeover) { + victim = mdmon_pid(container->devnum); + victim_sock = connect_monitor(container->devname); } ignore = chdir("/");