diff --git a/Makefile b/Makefile index 5636392..1836b4b 100644 --- a/Makefile +++ b/Makefile @@ -58,7 +58,13 @@ CONFFILE = $(SYSCONFDIR)/mdadm.conf CONFFILE2 = $(SYSCONFDIR)/mdadm/mdadm.conf MAILCMD =/usr/sbin/sendmail -t CONFFILEFLAGS = -DCONFFILE=\"$(CONFFILE)\" -DCONFFILE2=\"$(CONFFILE2)\" -CFLAGS = $(CWFLAGS) $(CXFLAGS) -DSendmail=\""$(MAILCMD)"\" $(CONFFILEFLAGS) +# ALT_RUN should be somewhere that persists across the pivotroot +# from early boot to late boot. +# If you don't have /lib/init/rw you might want to use /dev/.something +# e.g. make ALT_RUN=/dev/.mdadm +ALT_RUN = /lib/init/rw +ALTFLAGS = -DALT_RUN=\"$(ALT_RUN)\" +CFLAGS = $(CWFLAGS) $(CXFLAGS) -DSendmail=\""$(MAILCMD)"\" $(CONFFILEFLAGS) $(ALTFLAGS) # If you want a static binary, you might uncomment these # LDFLAGS = -static diff --git a/managemon.c b/managemon.c index a4e9a8f..2a73d4b 100644 --- a/managemon.c +++ b/managemon.c @@ -702,14 +702,29 @@ void do_manager(struct supertype *container) read_sock(container); - if (container->sock < 0 || socket_hup_requested) { - /* If this fails, we hope it already exists - * pid file lives in /var/run/mdadm/mdXX.pid + if (socket_hup_requested) { + /* Try to create pid file and socket in + * main or alternate RUN directory. */ - mkdir("/var/run/mdadm", 0600); - close(container->sock); - container->sock = make_control_sock(container->devname); - make_pidfile(container->devname, 0); + char *dir = VAR_RUN; + if (mkdir(dir, 0600) < 0 && errno != EEXIST) { + char *dir = ALT_RUN; + if (mkdir(dir, 0600) < 0 && errno != EEXIST) + dir = NULL; + } else { + if (proc_fd >= 0) + close(proc_fd); + proc_fd = -1; + } + if (dir && !sigterm && + (container->sock < 0 || + strcmp(dir, pid_dir) != 0)) { + close(container->sock); + remove_pidfile(container->devname); + pid_dir = dir; + container->sock = make_control_sock(container->devname); + make_pidfile(container->devname); + } socket_hup_requested = 0; } if (container->sock < 0) @@ -726,12 +741,9 @@ void do_manager(struct supertype *container) if (sigterm) wakeup_monitor(); - if (update_queue == NULL) { - if (container->sock < 0) - mdstat_wait_fd(proc_fd, &set); - else - mdstat_wait_fd(container->sock, &set); - } else + if (update_queue == NULL) + mdstat_wait_fd(container->sock, proc_fd, &set); + else /* If an update is happening, just wait for signal */ pselect(0, NULL, NULL, NULL, NULL, &set); } while(1); diff --git a/mdadm.h b/mdadm.h index eec5c0c..f65a462 100644 --- a/mdadm.h +++ b/mdadm.h @@ -68,6 +68,15 @@ extern __off64_t lseek64 __P ((int __fd, __off64_t __offset, int __whence)); #define DEFAULT_BITMAP_DELAY 5 #define DEFAULT_MAX_WRITE_BEHIND 256 +#define VAR_RUN "/var/run/mdadm" +/* ALT_RUN should be somewhere that persists across the pivotroot + * from early boot to late boot. + * If you don't have /lib/init/rw you might want to use /dev/.something + */ +#ifndef ALT_RUN +#define ALT_RUN "/lib/init/rw/mdadm" +#endif /* ALT_RUN */ + #include "md_u.h" #include "md_p.h" #include "bitmap.h" @@ -332,7 +341,7 @@ struct mdstat_ent { extern struct mdstat_ent *mdstat_read(int hold, int start); extern void free_mdstat(struct mdstat_ent *ms); extern void mdstat_wait(int seconds); -extern void mdstat_wait_fd(int fd, const sigset_t *sigmask); +extern void mdstat_wait_fd(int fd, int fd2, const sigset_t *sigmask); extern int mddev_busy(int devnum); struct map_ent { @@ -882,6 +891,7 @@ extern int create_mddev(char *dev, char *name, int autof, int trustworthy, extern int open_mddev(char *dev, int report_errors); extern int open_container(int fd); +extern char *pid_dir; extern int mdmon_running(int devnum); extern int mdmon_pid(int devnum); extern int check_env(char *name); diff --git a/mdmon.8 b/mdmon.8 index 05aaa50..1dc7844 100644 --- a/mdmon.8 +++ b/mdmon.8 @@ -5,7 +5,7 @@ mdmon \- monitor MD external metadata arrays .SH SYNOPSIS -.BI mdmon " CONTAINER [NEWROOT]" +.BI mdmon " CONTAINER [NEWROOT | PID]" .SH OVERVIEW The 2.6.27 kernel brings the ability to support external metadata arrays. @@ -148,6 +148,45 @@ will terminate any instances that are running in the current namespace, .IR chroot (2) to NEWROOT, and continue monitoring the container. + +Alternately the new +.I mdmon +can be started after the new root has been installed with +.I pivotroot +by passing +.B / +as the NEWROOT. For +.I mdmon +to be able to find and kill the old +.I mdmon +there must be some part of the filesystem that persists from before +the +.I pivotroot +to afterwards. This can be +.B /var/run/mdadm +or some other directory. The default other directory is +.B /lib/init/rw/mdadm +but this can easily be changed when +.I mdmon +is compiled. +If +.I mdmon +cannot store the pid file in +.B /var/run/mdadm +it will store it in the other directory until +.B /var/run/mdadm +becomes available, or until it is killed. +.TP +[PID] +If the second argument to +.I mdmon +does not start with a slash it is assumed to be the pid of a previous +instance of +.I mdmon +to kill. This is used internally by +.I mdmon +to pass the pid across a chroot/exec and should not be used otherwise. + .PP Note that .I mdmon @@ -159,6 +198,17 @@ RAID arrays. The only times it is run other that by is when the boot scripts need to restart it after mounting the new root filesystem. +.SH EXMAPLES + +.B " mdmon /proc/mdstat /" +.br +Any +.I mdmon +which is currently running is killed and a new instance is started. +This should be run early in the boot sequence after a +.I pivotroot +to the final root filesystem, but before that filesystem is remounted +read-write. .SH SEE ALSO .IR mdadm (8), .IR md (4). diff --git a/mdmon.c b/mdmon.c index d20bb3e..fa49706 100644 --- a/mdmon.c +++ b/mdmon.c @@ -113,19 +113,16 @@ static struct superswitch *find_metadata_methods(char *vers) return NULL; } -int make_pidfile(char *devname, int o_excl) +int make_pidfile(char *devname) { char path[100]; char pid[10]; int fd; int n; - if (sigterm) - return -1; + sprintf(path, "%s/%s.pid", pid_dir, devname); - sprintf(path, "/var/run/mdadm/%s.pid", devname); - - fd = open(path, O_RDWR|O_CREAT|o_excl, 0600); + fd = open(path, O_RDWR|O_CREAT|O_EXCL, 0600); if (fd < 0) return -errno; sprintf(pid, "%d\n", getpid()); @@ -186,13 +183,13 @@ void remove_pidfile(char *devname) { char buf[100]; - if (sigterm) - return; - - sprintf(buf, "/var/run/mdadm/%s.pid", devname); + sprintf(buf, "%s/%s.pid", pid_dir, devname); unlink(buf); - sprintf(buf, "/var/run/mdadm/%s.sock", devname); + sprintf(buf, "%s/%s.sock", pid_dir, devname); unlink(buf); + if (strcmp(pid_dir, ALT_RUN) == 0) + /* try to clean up when we are finished with this dir */ + rmdir(pid_dir); } int make_control_sock(char *devname) @@ -205,7 +202,7 @@ int make_control_sock(char *devname) if (sigterm) return -1; - sprintf(path, "/var/run/mdadm/%s.sock", devname); + sprintf(path, "%s/%s.sock", pid_dir, devname); unlink(path); sfd = socket(PF_LOCAL, SOCK_STREAM, 0); if (sfd < 0) diff --git a/mdmon.h b/mdmon.h index 4494085..a03686f 100644 --- a/mdmon.h +++ b/mdmon.h @@ -66,7 +66,7 @@ void remove_pidfile(char *devname); void do_monitor(struct supertype *container); void do_manager(struct supertype *container); int make_control_sock(char *devname); -int make_pidfile(char *devname, int o_excl); +int make_pidfile(char *devname); extern int socket_hup_requested; extern int sigterm; diff --git a/mdstat.c b/mdstat.c index 4d2f473..4b3f6fe 100644 --- a/mdstat.c +++ b/mdstat.c @@ -266,16 +266,20 @@ void mdstat_wait(int seconds) select(maxfd + 1, NULL, NULL, &fds, &tm); } -void mdstat_wait_fd(int fd, const sigset_t *sigmask) +void mdstat_wait_fd(int fd, int fd2, const sigset_t *sigmask) { fd_set fds, rfds; - int maxfd = fd; + int maxfd = 0; FD_ZERO(&fds); FD_ZERO(&rfds); if (mdstat_fd >= 0) FD_SET(mdstat_fd, &fds); - if (fd >= 0) { + + if (fd < 0) + fd = fd2, fd2 = -1; + + while (fd >= 0) { struct stat stb; fstat(fd, &stb); if ((stb.st_mode & S_IFMT) == S_IFREG) @@ -286,6 +290,12 @@ void mdstat_wait_fd(int fd, const sigset_t *sigmask) FD_SET(fd, &fds); else FD_SET(fd, &rfds); + + if (fd > maxfd) + maxfd = fd; + + fd = fd2; + fd2 = -1; } if (mdstat_fd > maxfd) maxfd = mdstat_fd; diff --git a/msg.c b/msg.c index 8d52b94..cc07b96 100644 --- a/msg.c +++ b/msg.c @@ -147,7 +147,7 @@ int connect_monitor(char *devname) int pos; char *c; - pos = sprintf(path, "/var/run/mdadm/"); + pos = sprintf(path, "%s/", VAR_RUN); if (is_subarray(devname)) { devname++; c = strchr(devname, '/'); diff --git a/util.c b/util.c index e5b0c1f..68f048d 100644 --- a/util.c +++ b/util.c @@ -1463,13 +1463,15 @@ int fd2devnum(int fd) return NoMdDev; } +char *pid_dir = VAR_RUN; + int mdmon_pid(int devnum) { char path[100]; char pid[10]; int fd; int n; - sprintf(path, "/var/run/mdadm/%s.pid", devnum2devname(devnum)); + sprintf(path, "%s/%s.pid", pid_dir, devnum2devname(devnum)); fd = open(path, O_RDONLY | O_NOATIME, 0); if (fd < 0)