From 773135f5bdf57219bf7f957b86964ba40d041180 Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Mon, 5 Dec 2005 05:55:56 +0000 Subject: [PATCH] syslog support for monitor mode From: ross@jose.lug.udel.edu (Ross Vandegrift) Hi Neil, While adding the text message mode, I saw a FIXME asking for syslog support in monitor mode. This patch adds exactly that. Signed-off-by: Neil Brown --- ChangeLog | 2 ++ Monitor.c | 63 +++++++++++++++++++++++++++++++++++++++---------------- ReadMe.c | 6 ++++-- mdadm.8 | 25 +++++++++++++++------- mdadm.c | 8 ++++++- mdadm.h | 5 ++++- 6 files changed, 80 insertions(+), 29 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8b358c9..7f03ead 100644 --- a/ChangeLog +++ b/ChangeLog @@ -10,6 +10,8 @@ Changes Prior to this release - Allow scanning of devices listed in /proc/partitions even if they don't appear in /dev. - Support --assume-clean in --create mode as well as --build + - Add support for --monitor to report to syslog: -y or --syslog. + Thanks to Ross Vandegrift - --monitor now reports which device failed in a 'Fail' message This broke with 2.6 diff --git a/Monitor.c b/Monitor.c index 2b30df1..1ad57be 100644 --- a/Monitor.c +++ b/Monitor.c @@ -33,8 +33,10 @@ #include #include #include +#include -static void alert(char *event, char *dev, char *disc, char *mailaddr, char *cmd); +static void alert(char *event, char *dev, char *disc, char *mailaddr, char *cmd, + int dosyslog); static char *percentalerts[] = { "RebuildStarted", @@ -47,7 +49,7 @@ static char *percentalerts[] = { int Monitor(mddev_dev_t devlist, char *mailaddr, char *alert_cmd, int period, int daemonise, int scan, int oneshot, - char *config, int test, char* pidfile) + int dosyslog, char *config, int test, char* pidfile) { /* * Every few seconds, scan every md device looking for changes @@ -212,12 +214,12 @@ int Monitor(mddev_dev_t devlist, unsigned int i; if (test) - alert("TestMessage", dev, NULL, mailaddr, alert_cmd); + alert("TestMessage", dev, NULL, mailaddr, alert_cmd, dosyslog); fd = open(dev, O_RDONLY); if (fd < 0) { if (!st->err) alert("DeviceDisappeared", dev, NULL, - mailaddr, alert_cmd); + mailaddr, alert_cmd, dosyslog); /* fprintf(stderr, Name ": cannot open %s: %s\n", dev, strerror(errno)); */ st->err=1; @@ -226,7 +228,7 @@ int Monitor(mddev_dev_t devlist, if (ioctl(fd, GET_ARRAY_INFO, &array)<0) { if (!st->err) alert("DeviceDisappeared", dev, NULL, - mailaddr, alert_cmd); + mailaddr, alert_cmd, dosyslog); /* fprintf(stderr, Name ": cannot get array info for %s: %s\n", dev, strerror(errno)); */ st->err=1; @@ -237,7 +239,7 @@ int Monitor(mddev_dev_t devlist, array.level != 6 && array.level != 10) { if (!st->err) alert("DeviceDisappeared", dev, "Wrong-Level", - mailaddr, alert_cmd); + mailaddr, alert_cmd, dosyslog); st->err = 1; close(fd); continue; @@ -274,27 +276,27 @@ int Monitor(mddev_dev_t devlist, mse && /* is in /proc/mdstat */ mse->pattern && strchr(mse->pattern, '_') /* degraded */ ) - alert("DegradedArray", dev, NULL, mailaddr, alert_cmd); + alert("DegradedArray", dev, NULL, mailaddr, alert_cmd, dosyslog); if (st->utime == 0 && /* new array */ st->expected_spares > 0 && array.spare_disks < st->expected_spares) - alert("SparesMissing", dev, NULL, mailaddr, alert_cmd); + alert("SparesMissing", dev, NULL, mailaddr, alert_cmd, dosyslog); if (mse && st->percent == -1 && mse->percent >= 0) - alert("RebuildStarted", dev, NULL, mailaddr, alert_cmd); + alert("RebuildStarted", dev, NULL, mailaddr, alert_cmd, dosyslog); if (mse && st->percent >= 0 && mse->percent >= 0 && (mse->percent / 20) > (st->percent / 20)) alert(percentalerts[mse->percent/20], - dev, NULL, mailaddr, alert_cmd); + dev, NULL, mailaddr, alert_cmd, dosyslog); if (mse && mse->percent == -1 && st->percent >= 0) - alert("RebuildFinished", dev, NULL, mailaddr, alert_cmd); + alert("RebuildFinished", dev, NULL, mailaddr, alert_cmd, dosyslog); if (mse) st->percent = mse->percent; @@ -323,19 +325,19 @@ int Monitor(mddev_dev_t devlist, ((st->devstate[i]&change)&(1<devstate[i]&change)&(1<= (unsigned)array.raid_disks && (disc.major || disc.minor) && st->devid[i] == makedev(disc.major, disc.minor) && ((newstate&change)&(1<devstate[i]&change)&(1<devstate[i] = disc.state; st->devid[i] = makedev(disc.major, disc.minor); @@ -381,7 +383,7 @@ int Monitor(mddev_dev_t devlist, st->spare_group = NULL; st->expected_spares = -1; statelist = st; - alert("NewArray", st->devname, NULL, mailaddr, alert_cmd); + alert("NewArray", st->devname, NULL, mailaddr, alert_cmd, dosyslog); new_found = 1; } } @@ -422,7 +424,7 @@ int Monitor(mddev_dev_t devlist, (unsigned long)dev) == 0) { if (ioctl(fd1, HOT_ADD_DISK, (unsigned long)dev) == 0) { - alert("MoveSpare", st->devname, st2->devname, mailaddr, alert_cmd); + alert("MoveSpare", st->devname, st2->devname, mailaddr, alert_cmd, dosyslog); close(fd1); close(fd2); break; @@ -448,8 +450,11 @@ int Monitor(mddev_dev_t devlist, } -static void alert(char *event, char *dev, char *disc, char *mailaddr, char *cmd) +static void alert(char *event, char *dev, char *disc, char *mailaddr, char *cmd, + int dosyslog) { + int priority; + if (!cmd && !mailaddr) { time_t now = time(0); @@ -494,5 +499,27 @@ static void alert(char *event, char *dev, char *disc, char *mailaddr, char *cmd) } } - /* FIXME log the event to syslog maybe */ + + /* log the event to syslog maybe */ + if (dosyslog) { + /* Log at a different severity depending on the event. + * + * These are the critical events: */ + if (strncmp(event, "Fail", 4)==0 || + strncmp(event, "Degrade", 7)==0 || + strncmp(event, "DeviceDisappeared", 17)==0) + priority = LOG_CRIT; + /* Good to know about, but are not failures: */ + else if (strncmp(event, "Rebuild", 7)==0 || + strncmp(event, "MoveSpare", 9)==0) + priority = LOG_WARNING; + /* Everything else: */ + else + priority = LOG_INFO; + + if (disc) + syslog(priority, "%s event detected on md device %s, component device %s", event, dev, disc); + else + syslog(priority, "%s event detected on md device %s", event, dev); + } } diff --git a/ReadMe.c b/ReadMe.c index 81766dd..98d91cc 100644 --- a/ReadMe.c +++ b/ReadMe.c @@ -1,3 +1,4 @@ + /* * mdadm - manage Linux "md" devices aka RAID arrays. * @@ -91,8 +92,9 @@ char Version[] = Name " - v2.1 - 12 September 2005\n"; * At the time if writing, there is only minimal support. */ -char short_options[]="-ABCDEFGQhVXvqbc:i:l:p:m:n:x:u:c:d:z:U:sarfRSow1te:"; -char short_bitmap_auto_options[]="-ABCDEFGQhVXvqb:c:i:l:p:m:n:x:u:c:d:z:U:sa:rfRSow1te:"; +char short_options[]="-ABCDEFGQhVXvqbc:i:l:p:m:n:x:u:c:d:z:U:sarfRSow1tye:"; +char short_bitmap_auto_options[]="-ABCDEFGQhVXvqb:c:i:l:p:m:n:x:u:c:d:z:U:sa:rfRSow1tye:"; + struct option long_options[] = { {"manage", 0, 0, '@'}, {"misc", 0, 0, '#'}, diff --git a/mdadm.8 b/mdadm.8 index 8e19195..e4f60c1 100644 --- a/mdadm.8 +++ b/mdadm.8 @@ -750,6 +750,11 @@ Give a mail address to send alerts to. .BR -p ", " --program ", " --alert Give a program to be run whenever an event is detected. +.TP +.BR -y ", " --syslog +Cause all events to be reported through 'syslog'. The messages have +facility of 'daemon' and varying priorities. + .TP .BR -d ", " --delay Give a delay in seconds. @@ -1189,7 +1194,7 @@ The different events are: .TP .B DeviceDisappeared An md array which previously was configured appears to no longer be -configured. +configured. (syslog priority: Critical) If .I mdadm @@ -1203,39 +1208,41 @@ hot-spare and resync operations which are monitored. .TP .B RebuildStarted -An md array started reconstruction. +An md array started reconstruction. (syslog priority: Warning) .TP .BI Rebuild NN Where .I NN is 20, 40, 60, or 80, this indicates that rebuild has passed that many -percentage of the total. +percentage of the total. (syslog priority: Warning) .TP .B RebuildFinished An md array that was rebuilding, isn't any more, either because it -finished normally or was aborted. +finished normally or was aborted. (syslog priority: Warning) .TP .B Fail -An active component device of an array has been marked as faulty. +An active component device of an array has been marked as +faulty. (syslog priority: Critical) .TP .B FailSpare A spare component device which was being rebuilt to replace a faulty -device has failed. +device has failed. (syslog priority: Critial) .TP .B SpareActive A spare component device which was being rebuilt to replace a faulty device as been successfully rebuild and has been made active. +(syslog priority: Info) .TP .B NewArray A new md array has been detected in the .B /proc/mdstat -file. +file. (syslog priority: Info) .TP .B DegradedArray @@ -1245,12 +1252,14 @@ generated when notices a drive failure which causes degradation, but only when .I mdadm notices that an array is degraded when it first sees the array. +(syslog priority: Critial) .TP .B MoveSpare A spare drive has been moved from one array in a .B spare-group to another to allow a failed drive to be replaced. +(syslog priority: Info) .TP .B SparesMissing @@ -1263,12 +1272,14 @@ detects that it has fewer that this number when it first sees the array, it will report a .B SparesMissing message. +(syslog priority: Info) .TP .B TestMessage An array was found at startup, and the .B --test flag was given. +(syslog priority: Info) .RE Only diff --git a/mdadm.c b/mdadm.c index c808799..844f6d4 100644 --- a/mdadm.c +++ b/mdadm.c @@ -94,6 +94,7 @@ int main(int argc, char *argv[]) int writemostly = 0; int re_add = 0; char *shortopt = short_options; + int dosyslog = 0; int copies; @@ -674,6 +675,10 @@ int main(int argc, char *argv[]) case O(MONITOR,'t'): /* test */ test = 1; continue; + case O(MONITOR,'y'): /* log messages to syslog */ + openlog("mdadm", 0, SYSLOG_FACILITY); + dosyslog = 1; + continue; /* now the general management options. Some are applicable * to other modes. None have arguments. @@ -1116,7 +1121,8 @@ int main(int argc, char *argv[]) break; } rv= Monitor(devlist, mailaddr, program, - delay?delay:60, daemonise, scan, oneshot, configfile, test, pidfile); + delay?delay:60, daemonise, scan, oneshot, + dosyslog, configfile, test, pidfile); break; case GROW: diff --git a/mdadm.h b/mdadm.h index 8c5f2f2..32b3e24 100644 --- a/mdadm.h +++ b/mdadm.h @@ -43,6 +43,7 @@ extern __off64_t lseek64 __P ((int __fd, __off64_t __offset, int __whence)); #include #include #include +#include #ifdef __dietlibc__NONO int strncmp(const char *s1, const char *s2, size_t n) __THROW __pure__; char *strncpy(char *dest, const char *src, size_t n) __THROW; @@ -167,6 +168,8 @@ extern void mdstat_wait(int seconds); #define Sendmail "/usr/lib/sendmail -t" #endif +#define SYSLOG_FACILITY LOG_DAEMON + extern char *map_num(mapping_t *map, int num); extern int map_name(mapping_t *map, char *name); extern mapping_t r5layout[], pers[], modes[], faultylayout[]; @@ -260,7 +263,7 @@ extern int Examine(mddev_dev_t devlist, int brief, int scan, int SparcAdjust, extern int Monitor(mddev_dev_t devlist, char *mailaddr, char *alert_cmd, int period, int daemonise, int scan, int oneshot, - char *config, int test, char *pidfile); + int dosyslog, char *config, int test, char *pidfile); extern int Kill(char *dev, int force);