Manage: support --fail set-X and --remove set-X

A RAID10 array can have 'sets' of devices which are reported by
--detail.
They can now be collectively failed or removed.

Signed-off-by: NeilBrown <neilb@suse.de>
This commit is contained in:
NeilBrown 2013-03-05 12:08:43 +11:00
parent 276be5147e
commit 64a78416e3
2 changed files with 89 additions and 6 deletions

View File

@ -452,6 +452,40 @@ static void add_detached(struct mddev_dev *dv, int fd, char disp)
}
}
static void add_set(struct mddev_dev *dv, int fd, char set_char)
{
mdu_array_info_t array;
mdu_disk_info_t disk;
int remaining_disks;
int copies, set;
int i;
if (ioctl(fd, GET_ARRAY_INFO, &array) != 0)
return;
if (array.level != 10)
return;
copies = ((array.layout & 0xff) *
((array.layout >> 8) & 0xff));
if (array.raid_disks % copies)
return;
remaining_disks = array.nr_disks;
for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
char buf[40];
disk.number = i;
if (ioctl(fd, GET_DISK_INFO, &disk) != 0)
continue;
if (disk.major == 0 && disk.minor == 0)
continue;
remaining_disks--;
set = disk.raid_disk % copies;
if (set_char != set + 'A')
continue;
sprintf(buf, "%d:%d", disk.major, disk.minor);
dv = add_one(dv, buf, dv->disposition);
}
}
int attempt_re_add(int fd, int tfd, struct mddev_dev *dv,
struct supertype *dev_st, struct supertype *tst,
unsigned long rdev,
@ -1195,6 +1229,35 @@ int Manage_subdevs(char *devname, int fd,
continue;
}
if (strncmp(dv->devname, "set-", 4) == 0 &&
strlen(dv->devname) == 5) {
int copies;
if (dv->disposition != 'r' &&
dv->disposition != 'f') {
pr_err("'%s' only meaningful with -r or -f\n",
dv->devname);
goto abort;
}
if (array.level != 10) {
pr_err("'%s' only meaningful with RAID10 arrays\n",
dv->devname);
goto abort;
}
copies = ((array.layout & 0xff) *
((array.layout >> 8) & 0xff));
if (array.raid_disks % copies != 0 ||
dv->devname[4] < 'A' ||
dv->devname[4] >= 'A' + copies ||
copies > 26) {
pr_err("'%s' not meaningful with this array\n",
dv->devname);
goto abort;
}
add_set(dv, fd, dv->devname[4]);
continue;
}
if (strchr(dv->devname, '/') == NULL &&
strchr(dv->devname, ':') == NULL &&
strlen(dv->devname) < 50) {

View File

@ -1273,30 +1273,50 @@ resolved.
.TP
.BR \-r ", " \-\-remove
remove listed devices. They must not be active. i.e. they should
be failed or spare devices. As well as the name of a device file
be failed or spare devices.
As well as the name of a device file
(e.g.
.BR /dev/sda1 )
the words
.B failed
and
.BR failed ,
.B detached
and names like
.B set-A
can be given to
.BR \-\-remove .
The first causes all failed device to be removed. The second causes
any device which is no longer connected to the system (i.e an 'open'
returns
.BR ENXIO )
to be removed. This will only succeed for devices that are spares or
have already been marked as failed.
to be removed.
The third will remove a set as describe below under
.BR \-\-fail .
.TP
.BR \-f ", " \-\-fail
Mark listed devices as faulty.
As well as the name of a device file, the word
.B detached
can be given. This will cause any device that has been detached from
or a set name like
.B set\-A
can be given. The former will cause any device that has been detached from
the system to be marked as failed. It can then be removed.
For RAID10 arrays where the number of copies evenly divides the number
of devices, the devices can be conceptually divided into sets where
each set contains a single complete copy of the data on the array.
Sometimes a RAID10 array will be configured so that these sets are on
separate controllers. In this case all the devices in one set can be
failed by giving a name like
.B set\-A
or
.B set\-B
to
.BR \-\-fail .
The appropriate set names are reported by
.BR \-\-detail .
.TP
.BR \-\-set\-faulty
same as