syslog support for monitor mode

From: ross@jose.lug.udel.edu (Ross Vandegrift)

Hi Neil,

While adding the text message mode, I saw a FIXME asking for syslog
support in monitor mode.

This patch adds exactly that.
Signed-off-by: Neil Brown <neilb@suse.de>
This commit is contained in:
Neil Brown 2005-12-05 05:55:56 +00:00
parent fe394e5e42
commit 773135f5bd
6 changed files with 80 additions and 29 deletions

View File

@ -10,6 +10,8 @@ Changes Prior to this release
- Allow scanning of devices listed in /proc/partitions even - Allow scanning of devices listed in /proc/partitions even
if they don't appear in /dev. if they don't appear in /dev.
- Support --assume-clean in --create mode as well as --build - Support --assume-clean in --create mode as well as --build
- Add support for --monitor to report to syslog: -y or --syslog.
Thanks to Ross Vandegrift
- --monitor now reports which device failed in a 'Fail' message - --monitor now reports which device failed in a 'Fail' message
This broke with 2.6 This broke with 2.6

View File

@ -33,8 +33,10 @@
#include <sys/wait.h> #include <sys/wait.h>
#include <sys/signal.h> #include <sys/signal.h>
#include <values.h> #include <values.h>
#include <syslog.h>
static void alert(char *event, char *dev, char *disc, char *mailaddr, char *cmd); static void alert(char *event, char *dev, char *disc, char *mailaddr, char *cmd,
int dosyslog);
static char *percentalerts[] = { static char *percentalerts[] = {
"RebuildStarted", "RebuildStarted",
@ -47,7 +49,7 @@ static char *percentalerts[] = {
int Monitor(mddev_dev_t devlist, int Monitor(mddev_dev_t devlist,
char *mailaddr, char *alert_cmd, char *mailaddr, char *alert_cmd,
int period, int daemonise, int scan, int oneshot, int period, int daemonise, int scan, int oneshot,
char *config, int test, char* pidfile) int dosyslog, char *config, int test, char* pidfile)
{ {
/* /*
* Every few seconds, scan every md device looking for changes * Every few seconds, scan every md device looking for changes
@ -212,12 +214,12 @@ int Monitor(mddev_dev_t devlist,
unsigned int i; unsigned int i;
if (test) if (test)
alert("TestMessage", dev, NULL, mailaddr, alert_cmd); alert("TestMessage", dev, NULL, mailaddr, alert_cmd, dosyslog);
fd = open(dev, O_RDONLY); fd = open(dev, O_RDONLY);
if (fd < 0) { if (fd < 0) {
if (!st->err) if (!st->err)
alert("DeviceDisappeared", dev, NULL, alert("DeviceDisappeared", dev, NULL,
mailaddr, alert_cmd); mailaddr, alert_cmd, dosyslog);
/* fprintf(stderr, Name ": cannot open %s: %s\n", /* fprintf(stderr, Name ": cannot open %s: %s\n",
dev, strerror(errno)); dev, strerror(errno));
*/ st->err=1; */ st->err=1;
@ -226,7 +228,7 @@ int Monitor(mddev_dev_t devlist,
if (ioctl(fd, GET_ARRAY_INFO, &array)<0) { if (ioctl(fd, GET_ARRAY_INFO, &array)<0) {
if (!st->err) if (!st->err)
alert("DeviceDisappeared", dev, NULL, alert("DeviceDisappeared", dev, NULL,
mailaddr, alert_cmd); mailaddr, alert_cmd, dosyslog);
/* fprintf(stderr, Name ": cannot get array info for %s: %s\n", /* fprintf(stderr, Name ": cannot get array info for %s: %s\n",
dev, strerror(errno)); dev, strerror(errno));
*/ st->err=1; */ st->err=1;
@ -237,7 +239,7 @@ int Monitor(mddev_dev_t devlist,
array.level != 6 && array.level != 10) { array.level != 6 && array.level != 10) {
if (!st->err) if (!st->err)
alert("DeviceDisappeared", dev, "Wrong-Level", alert("DeviceDisappeared", dev, "Wrong-Level",
mailaddr, alert_cmd); mailaddr, alert_cmd, dosyslog);
st->err = 1; st->err = 1;
close(fd); close(fd);
continue; continue;
@ -274,27 +276,27 @@ int Monitor(mddev_dev_t devlist,
mse && /* is in /proc/mdstat */ mse && /* is in /proc/mdstat */
mse->pattern && strchr(mse->pattern, '_') /* degraded */ mse->pattern && strchr(mse->pattern, '_') /* degraded */
) )
alert("DegradedArray", dev, NULL, mailaddr, alert_cmd); alert("DegradedArray", dev, NULL, mailaddr, alert_cmd, dosyslog);
if (st->utime == 0 && /* new array */ if (st->utime == 0 && /* new array */
st->expected_spares > 0 && st->expected_spares > 0 &&
array.spare_disks < st->expected_spares) array.spare_disks < st->expected_spares)
alert("SparesMissing", dev, NULL, mailaddr, alert_cmd); alert("SparesMissing", dev, NULL, mailaddr, alert_cmd, dosyslog);
if (mse && if (mse &&
st->percent == -1 && st->percent == -1 &&
mse->percent >= 0) mse->percent >= 0)
alert("RebuildStarted", dev, NULL, mailaddr, alert_cmd); alert("RebuildStarted", dev, NULL, mailaddr, alert_cmd, dosyslog);
if (mse && if (mse &&
st->percent >= 0 && st->percent >= 0 &&
mse->percent >= 0 && mse->percent >= 0 &&
(mse->percent / 20) > (st->percent / 20)) (mse->percent / 20) > (st->percent / 20))
alert(percentalerts[mse->percent/20], alert(percentalerts[mse->percent/20],
dev, NULL, mailaddr, alert_cmd); dev, NULL, mailaddr, alert_cmd, dosyslog);
if (mse && if (mse &&
mse->percent == -1 && mse->percent == -1 &&
st->percent >= 0) st->percent >= 0)
alert("RebuildFinished", dev, NULL, mailaddr, alert_cmd); alert("RebuildFinished", dev, NULL, mailaddr, alert_cmd, dosyslog);
if (mse) if (mse)
st->percent = mse->percent; st->percent = mse->percent;
@ -323,19 +325,19 @@ int Monitor(mddev_dev_t devlist,
((st->devstate[i]&change)&(1<<MD_DISK_ACTIVE)) || ((st->devstate[i]&change)&(1<<MD_DISK_ACTIVE)) ||
((st->devstate[i]&change)&(1<<MD_DISK_SYNC))) ((st->devstate[i]&change)&(1<<MD_DISK_SYNC)))
) )
alert("Fail", dev, dv, mailaddr, alert_cmd); alert("Fail", dev, dv, mailaddr, alert_cmd, dosyslog);
else if (i >= (unsigned)array.raid_disks && else if (i >= (unsigned)array.raid_disks &&
(disc.major || disc.minor) && (disc.major || disc.minor) &&
st->devid[i] == makedev(disc.major, disc.minor) && st->devid[i] == makedev(disc.major, disc.minor) &&
((newstate&change)&(1<<MD_DISK_FAULTY)) ((newstate&change)&(1<<MD_DISK_FAULTY))
) )
alert("FailSpare", dev, dv, mailaddr, alert_cmd); alert("FailSpare", dev, dv, mailaddr, alert_cmd, dosyslog);
else if (i < (unsigned)array.raid_disks && else if (i < (unsigned)array.raid_disks &&
(((st->devstate[i]&change)&(1<<MD_DISK_FAULTY)) || (((st->devstate[i]&change)&(1<<MD_DISK_FAULTY)) ||
((newstate&change)&(1<<MD_DISK_ACTIVE)) || ((newstate&change)&(1<<MD_DISK_ACTIVE)) ||
((newstate&change)&(1<<MD_DISK_SYNC))) ((newstate&change)&(1<<MD_DISK_SYNC)))
) )
alert("SpareActive", dev, dv, mailaddr, alert_cmd); alert("SpareActive", dev, dv, mailaddr, alert_cmd, dosyslog);
} }
st->devstate[i] = disc.state; st->devstate[i] = disc.state;
st->devid[i] = makedev(disc.major, disc.minor); st->devid[i] = makedev(disc.major, disc.minor);
@ -381,7 +383,7 @@ int Monitor(mddev_dev_t devlist,
st->spare_group = NULL; st->spare_group = NULL;
st->expected_spares = -1; st->expected_spares = -1;
statelist = st; statelist = st;
alert("NewArray", st->devname, NULL, mailaddr, alert_cmd); alert("NewArray", st->devname, NULL, mailaddr, alert_cmd, dosyslog);
new_found = 1; new_found = 1;
} }
} }
@ -422,7 +424,7 @@ int Monitor(mddev_dev_t devlist,
(unsigned long)dev) == 0) { (unsigned long)dev) == 0) {
if (ioctl(fd1, HOT_ADD_DISK, if (ioctl(fd1, HOT_ADD_DISK,
(unsigned long)dev) == 0) { (unsigned long)dev) == 0) {
alert("MoveSpare", st->devname, st2->devname, mailaddr, alert_cmd); alert("MoveSpare", st->devname, st2->devname, mailaddr, alert_cmd, dosyslog);
close(fd1); close(fd1);
close(fd2); close(fd2);
break; break;
@ -448,8 +450,11 @@ int Monitor(mddev_dev_t devlist,
} }
static void alert(char *event, char *dev, char *disc, char *mailaddr, char *cmd) static void alert(char *event, char *dev, char *disc, char *mailaddr, char *cmd,
int dosyslog)
{ {
int priority;
if (!cmd && !mailaddr) { if (!cmd && !mailaddr) {
time_t now = time(0); time_t now = time(0);
@ -494,5 +499,27 @@ static void alert(char *event, char *dev, char *disc, char *mailaddr, char *cmd)
} }
} }
/* FIXME log the event to syslog maybe */
/* log the event to syslog maybe */
if (dosyslog) {
/* Log at a different severity depending on the event.
*
* These are the critical events: */
if (strncmp(event, "Fail", 4)==0 ||
strncmp(event, "Degrade", 7)==0 ||
strncmp(event, "DeviceDisappeared", 17)==0)
priority = LOG_CRIT;
/* Good to know about, but are not failures: */
else if (strncmp(event, "Rebuild", 7)==0 ||
strncmp(event, "MoveSpare", 9)==0)
priority = LOG_WARNING;
/* Everything else: */
else
priority = LOG_INFO;
if (disc)
syslog(priority, "%s event detected on md device %s, component device %s", event, dev, disc);
else
syslog(priority, "%s event detected on md device %s", event, dev);
}
} }

View File

@ -1,3 +1,4 @@
/* /*
* mdadm - manage Linux "md" devices aka RAID arrays. * mdadm - manage Linux "md" devices aka RAID arrays.
* *
@ -91,8 +92,9 @@ char Version[] = Name " - v2.1 - 12 September 2005\n";
* At the time if writing, there is only minimal support. * At the time if writing, there is only minimal support.
*/ */
char short_options[]="-ABCDEFGQhVXvqbc:i:l:p:m:n:x:u:c:d:z:U:sarfRSow1te:"; char short_options[]="-ABCDEFGQhVXvqbc:i:l:p:m:n:x:u:c:d:z:U:sarfRSow1tye:";
char short_bitmap_auto_options[]="-ABCDEFGQhVXvqb:c:i:l:p:m:n:x:u:c:d:z:U:sa:rfRSow1te:"; char short_bitmap_auto_options[]="-ABCDEFGQhVXvqb:c:i:l:p:m:n:x:u:c:d:z:U:sa:rfRSow1tye:";
struct option long_options[] = { struct option long_options[] = {
{"manage", 0, 0, '@'}, {"manage", 0, 0, '@'},
{"misc", 0, 0, '#'}, {"misc", 0, 0, '#'},

25
mdadm.8
View File

@ -750,6 +750,11 @@ Give a mail address to send alerts to.
.BR -p ", " --program ", " --alert .BR -p ", " --program ", " --alert
Give a program to be run whenever an event is detected. Give a program to be run whenever an event is detected.
.TP
.BR -y ", " --syslog
Cause all events to be reported through 'syslog'. The messages have
facility of 'daemon' and varying priorities.
.TP .TP
.BR -d ", " --delay .BR -d ", " --delay
Give a delay in seconds. Give a delay in seconds.
@ -1189,7 +1194,7 @@ The different events are:
.TP .TP
.B DeviceDisappeared .B DeviceDisappeared
An md array which previously was configured appears to no longer be An md array which previously was configured appears to no longer be
configured. configured. (syslog priority: Critical)
If If
.I mdadm .I mdadm
@ -1203,39 +1208,41 @@ hot-spare and resync operations which are monitored.
.TP .TP
.B RebuildStarted .B RebuildStarted
An md array started reconstruction. An md array started reconstruction. (syslog priority: Warning)
.TP .TP
.BI Rebuild NN .BI Rebuild NN
Where Where
.I NN .I NN
is 20, 40, 60, or 80, this indicates that rebuild has passed that many is 20, 40, 60, or 80, this indicates that rebuild has passed that many
percentage of the total. percentage of the total. (syslog priority: Warning)
.TP .TP
.B RebuildFinished .B RebuildFinished
An md array that was rebuilding, isn't any more, either because it An md array that was rebuilding, isn't any more, either because it
finished normally or was aborted. finished normally or was aborted. (syslog priority: Warning)
.TP .TP
.B Fail .B Fail
An active component device of an array has been marked as faulty. An active component device of an array has been marked as
faulty. (syslog priority: Critical)
.TP .TP
.B FailSpare .B FailSpare
A spare component device which was being rebuilt to replace a faulty A spare component device which was being rebuilt to replace a faulty
device has failed. device has failed. (syslog priority: Critial)
.TP .TP
.B SpareActive .B SpareActive
A spare component device which was being rebuilt to replace a faulty A spare component device which was being rebuilt to replace a faulty
device as been successfully rebuild and has been made active. device as been successfully rebuild and has been made active.
(syslog priority: Info)
.TP .TP
.B NewArray .B NewArray
A new md array has been detected in the A new md array has been detected in the
.B /proc/mdstat .B /proc/mdstat
file. file. (syslog priority: Info)
.TP .TP
.B DegradedArray .B DegradedArray
@ -1245,12 +1252,14 @@ generated when
notices a drive failure which causes degradation, but only when notices a drive failure which causes degradation, but only when
.I mdadm .I mdadm
notices that an array is degraded when it first sees the array. notices that an array is degraded when it first sees the array.
(syslog priority: Critial)
.TP .TP
.B MoveSpare .B MoveSpare
A spare drive has been moved from one array in a A spare drive has been moved from one array in a
.B spare-group .B spare-group
to another to allow a failed drive to be replaced. to another to allow a failed drive to be replaced.
(syslog priority: Info)
.TP .TP
.B SparesMissing .B SparesMissing
@ -1263,12 +1272,14 @@ detects that it has fewer that this number when it first sees the
array, it will report a array, it will report a
.B SparesMissing .B SparesMissing
message. message.
(syslog priority: Info)
.TP .TP
.B TestMessage .B TestMessage
An array was found at startup, and the An array was found at startup, and the
.B --test .B --test
flag was given. flag was given.
(syslog priority: Info)
.RE .RE
Only Only

View File

@ -94,6 +94,7 @@ int main(int argc, char *argv[])
int writemostly = 0; int writemostly = 0;
int re_add = 0; int re_add = 0;
char *shortopt = short_options; char *shortopt = short_options;
int dosyslog = 0;
int copies; int copies;
@ -674,6 +675,10 @@ int main(int argc, char *argv[])
case O(MONITOR,'t'): /* test */ case O(MONITOR,'t'): /* test */
test = 1; test = 1;
continue; continue;
case O(MONITOR,'y'): /* log messages to syslog */
openlog("mdadm", 0, SYSLOG_FACILITY);
dosyslog = 1;
continue;
/* now the general management options. Some are applicable /* now the general management options. Some are applicable
* to other modes. None have arguments. * to other modes. None have arguments.
@ -1116,7 +1121,8 @@ int main(int argc, char *argv[])
break; break;
} }
rv= Monitor(devlist, mailaddr, program, rv= Monitor(devlist, mailaddr, program,
delay?delay:60, daemonise, scan, oneshot, configfile, test, pidfile); delay?delay:60, daemonise, scan, oneshot,
dosyslog, configfile, test, pidfile);
break; break;
case GROW: case GROW:

View File

@ -43,6 +43,7 @@ extern __off64_t lseek64 __P ((int __fd, __off64_t __offset, int __whence));
#include <stdio.h> #include <stdio.h>
#include <errno.h> #include <errno.h>
#include <string.h> #include <string.h>
#include <syslog.h>
#ifdef __dietlibc__NONO #ifdef __dietlibc__NONO
int strncmp(const char *s1, const char *s2, size_t n) __THROW __pure__; int strncmp(const char *s1, const char *s2, size_t n) __THROW __pure__;
char *strncpy(char *dest, const char *src, size_t n) __THROW; char *strncpy(char *dest, const char *src, size_t n) __THROW;
@ -167,6 +168,8 @@ extern void mdstat_wait(int seconds);
#define Sendmail "/usr/lib/sendmail -t" #define Sendmail "/usr/lib/sendmail -t"
#endif #endif
#define SYSLOG_FACILITY LOG_DAEMON
extern char *map_num(mapping_t *map, int num); extern char *map_num(mapping_t *map, int num);
extern int map_name(mapping_t *map, char *name); extern int map_name(mapping_t *map, char *name);
extern mapping_t r5layout[], pers[], modes[], faultylayout[]; extern mapping_t r5layout[], pers[], modes[], faultylayout[];
@ -260,7 +263,7 @@ extern int Examine(mddev_dev_t devlist, int brief, int scan, int SparcAdjust,
extern int Monitor(mddev_dev_t devlist, extern int Monitor(mddev_dev_t devlist,
char *mailaddr, char *alert_cmd, char *mailaddr, char *alert_cmd,
int period, int daemonise, int scan, int oneshot, int period, int daemonise, int scan, int oneshot,
char *config, int test, char *pidfile); int dosyslog, char *config, int test, char *pidfile);
extern int Kill(char *dev, int force); extern int Kill(char *dev, int force);