Add support for --replace and --with
--replace can be used to replace a device without completely failing it. Once the replacement completes the device will be failed. --with can indicate which of several spares to use. Signed-off-by: NeilBrown <neilb@suse.de>
This commit is contained in:
parent
1dc837e4ed
commit
70c55e36b7
147
Manage.c
147
Manage.c
|
@ -954,6 +954,111 @@ int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
|
|||
return 1;
|
||||
}
|
||||
|
||||
int Manage_replace(struct supertype *tst, int fd, struct mddev_dev *dv,
|
||||
unsigned long rdev, int verbose, char *devname)
|
||||
{
|
||||
struct mdinfo *mdi, *di;
|
||||
if (tst->ss->external) {
|
||||
pr_err("--replace only supported for native metadata (0.90 or 1.x)\n");
|
||||
return -1;
|
||||
}
|
||||
/* Need to find the device in sysfs and add 'want_replacement' to the
|
||||
* status.
|
||||
*/
|
||||
mdi = sysfs_read(fd, -1, GET_DEVS);
|
||||
if (!mdi || !mdi->devs) {
|
||||
pr_err("Cannot find status of %s to enable replacement - strange\n",
|
||||
devname);
|
||||
return -1;
|
||||
}
|
||||
for (di = mdi->devs; di; di = di->next)
|
||||
if (di->disk.major == (int)major(rdev) &&
|
||||
di->disk.minor == (int)minor(rdev))
|
||||
break;
|
||||
if (di) {
|
||||
int rv;
|
||||
if (di->disk.raid_disk < 0) {
|
||||
pr_err("%s is not active and so cannot be replaced.\n",
|
||||
dv->devname);
|
||||
sysfs_free(mdi);
|
||||
return -1;
|
||||
}
|
||||
rv = sysfs_set_str(mdi, di,
|
||||
"state", "want_replacement");
|
||||
if (rv) {
|
||||
sysfs_free(mdi);
|
||||
pr_err("Failed to request replacement for %s\n",
|
||||
dv->devname);
|
||||
return -1;
|
||||
}
|
||||
if (verbose >= 0)
|
||||
pr_err("Marked %s (device %d in %s) for replacement\n",
|
||||
dv->devname, di->disk.raid_disk, devname);
|
||||
/* If there is a matching 'with', we need to tell it which
|
||||
* raid disk
|
||||
*/
|
||||
while (dv && dv->disposition != 'W')
|
||||
dv = dv->next;
|
||||
if (dv) {
|
||||
dv->disposition = 'w';
|
||||
dv->used = di->disk.raid_disk;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
sysfs_free(mdi);
|
||||
pr_err("%s not found in %s so cannot --replace it\n",
|
||||
dv->devname, devname);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int Manage_with(struct supertype *tst, int fd, struct mddev_dev *dv,
|
||||
unsigned long rdev, int verbose, char *devname)
|
||||
{
|
||||
struct mdinfo *mdi, *di;
|
||||
/* try to set 'slot' for 'rdev' in 'fd' to 'dv->used' */
|
||||
mdi = sysfs_read(fd, -1, GET_DEVS|GET_STATE);
|
||||
if (!mdi || !mdi->devs) {
|
||||
pr_err("Cannot find status of %s to enable replacement - strange\n",
|
||||
devname);
|
||||
return -1;
|
||||
}
|
||||
for (di = mdi->devs; di; di = di->next)
|
||||
if (di->disk.major == (int)major(rdev) &&
|
||||
di->disk.minor == (int)minor(rdev))
|
||||
break;
|
||||
if (di) {
|
||||
int rv;
|
||||
if (di->disk.state & (1<<MD_DISK_FAULTY)) {
|
||||
pr_err("%s is faulty and cannot be a replacement\n",
|
||||
dv->devname);
|
||||
sysfs_free(mdi);
|
||||
return -1;
|
||||
}
|
||||
if (di->disk.raid_disk >= 0) {
|
||||
pr_err("%s is active and cannot be a replacement\n",
|
||||
dv->devname);
|
||||
sysfs_free(mdi);
|
||||
return -1;
|
||||
}
|
||||
rv = sysfs_set_num(mdi, di,
|
||||
"slot", dv->used);
|
||||
if (rv) {
|
||||
sysfs_free(mdi);
|
||||
pr_err("Failed to %s as preferred replacement.\n",
|
||||
dv->devname);
|
||||
return -1;
|
||||
}
|
||||
if (verbose >= 0)
|
||||
pr_err("Marked %s in %s as replacement for device %d\n",
|
||||
dv->devname, devname, dv->used);
|
||||
return 1;
|
||||
}
|
||||
sysfs_free(mdi);
|
||||
pr_err("%s not found in %s so cannot make it preferred replacement\n",
|
||||
dv->devname, devname);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int Manage_subdevs(char *devname, int fd,
|
||||
struct mddev_dev *devlist, int verbose, int test,
|
||||
char *update, int force)
|
||||
|
@ -970,6 +1075,16 @@ int Manage_subdevs(char *devname, int fd,
|
|||
* 'f' - set the device faulty SET_DISK_FAULTY
|
||||
* device can be 'detached' in which case any device that
|
||||
* is inaccessible will be marked faulty.
|
||||
* 'R' - mark this device as wanting replacement.
|
||||
* 'W' - this device is added if necessary and activated as
|
||||
* a replacement for a previous 'R' device.
|
||||
* -----
|
||||
* 'w' - 'W' will be changed to 'w' when it is paired with
|
||||
* a 'R' device. If a 'W' is found while walking the list
|
||||
* it must be unpaired, and is an error.
|
||||
* 'M' - this is created by a 'missing' target. It is a slight
|
||||
* variant on 'A'
|
||||
*
|
||||
* For 'f' and 'r', the device can also be a kernel-internal
|
||||
* name such as 'sdb'.
|
||||
*/
|
||||
|
@ -1209,6 +1324,38 @@ int Manage_subdevs(char *devname, int fd,
|
|||
pr_err("set %s faulty in %s\n",
|
||||
dv->devname, devname);
|
||||
break;
|
||||
case 'R': /* Mark as replaceable */
|
||||
if (subarray) {
|
||||
pr_err("Cannot replace disks in a"
|
||||
" \'member\' array, perform this"
|
||||
" operation on the parent container\n");
|
||||
rv = -1;
|
||||
} else {
|
||||
if (!frozen) {
|
||||
if (sysfs_freeze_array(&info) == 1)
|
||||
frozen = 1;
|
||||
else
|
||||
frozen = -1;
|
||||
}
|
||||
rv = Manage_replace(tst, fd, dv,
|
||||
stb.st_rdev, verbose,
|
||||
devname);
|
||||
}
|
||||
if (rv < 0)
|
||||
goto abort;
|
||||
if (rv > 0)
|
||||
count++;
|
||||
break;
|
||||
case 'W': /* --with device that doesn't match */
|
||||
pr_err("No matching --replace device for --with %s\n",
|
||||
dv->devname);
|
||||
goto abort;
|
||||
case 'w': /* --with device which was matched */
|
||||
rv = Manage_with(tst, fd, dv,
|
||||
stb.st_rdev, verbose, devname);
|
||||
if (rv < 0)
|
||||
goto abort;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (frozen > 0)
|
||||
|
|
7
ReadMe.c
7
ReadMe.c
|
@ -146,6 +146,8 @@ struct option long_options[] = {
|
|||
{"remove", 0, 0, Remove},
|
||||
{"fail", 0, 0, Fail},
|
||||
{"set-faulty",0, 0, Fail},
|
||||
{"replace", 0, 0, Replace},
|
||||
{"with", 0, 0, With},
|
||||
{"run", 0, 0, 'R'},
|
||||
{"stop", 0, 0, 'S'},
|
||||
{"readonly", 0, 0, 'o'},
|
||||
|
@ -309,6 +311,7 @@ char OptionHelp[] =
|
|||
" --remove -r : remove subsequent devices\n"
|
||||
" --fail -f : mark subsequent devices as faulty\n"
|
||||
" --set-faulty : same as --fail\n"
|
||||
" --replace : mark a device for replacement\n"
|
||||
" --run -R : start a partially built array\n"
|
||||
" --stop -S : deactivate array, releasing all resources\n"
|
||||
" --readonly -o : mark array as readonly\n"
|
||||
|
@ -462,6 +465,10 @@ char Help_manage[] =
|
|||
" --remove -r : remove subsequent devices, which must not be active\n"
|
||||
" --fail -f : mark subsequent devices a faulty\n"
|
||||
" --set-faulty : same as --fail\n"
|
||||
" --replace : mark device(s) to be replaced by spares. Once\n"
|
||||
" : replacement completes, device will be marked faulty\n"
|
||||
" --with : Indicate which spare a previous '--replace' should\n"
|
||||
" : prefer to use\n"
|
||||
" --run -R : start a partially built array\n"
|
||||
" --stop -S : deactivate array, releasing all resources\n"
|
||||
" --readonly -o : mark array as readonly\n"
|
||||
|
|
24
mdadm.8.in
24
mdadm.8.in
|
@ -216,8 +216,9 @@ to detect and assemble arrays \(em possibly in an
|
|||
If a device is given before any options, or if the first option is
|
||||
.BR \-\-add ,
|
||||
.BR \-\-fail ,
|
||||
or
|
||||
.BR \-\-remove ,
|
||||
or
|
||||
.BR \-\-replace ,
|
||||
then the MANAGE mode is assumed.
|
||||
Anything other than these will cause the
|
||||
.B Misc
|
||||
|
@ -1283,7 +1284,7 @@ have already been marked as failed.
|
|||
|
||||
.TP
|
||||
.BR \-f ", " \-\-fail
|
||||
mark listed devices as faulty.
|
||||
Mark listed devices as faulty.
|
||||
As well as the name of a device file, the word
|
||||
.B detached
|
||||
can be given. This will cause any device that has been detached from
|
||||
|
@ -1294,6 +1295,25 @@ the system to be marked as failed. It can then be removed.
|
|||
same as
|
||||
.BR \-\-fail .
|
||||
|
||||
.TP
|
||||
.B \-\-replace
|
||||
Mark listed devices as requiring replacement. As soon as a spare is
|
||||
available, it will be rebuilt and will replace the marked device.
|
||||
This is similar to marking a device as faulty, but the device remains
|
||||
in service during the recovery process to increase resilience against
|
||||
multiple failures. When the replacement process finishes, the
|
||||
replaced device will be marked as faulty.
|
||||
|
||||
.TP
|
||||
.B \-\-with
|
||||
This can follow a list of
|
||||
.B \-\-replace
|
||||
devices. The devices listed after
|
||||
.B \-\-with
|
||||
will be preferentially used to replace the devices listed after
|
||||
.BR \-\-replace .
|
||||
These device must already be spare devices in the array.
|
||||
|
||||
.TP
|
||||
.BR \-\-write\-mostly
|
||||
Subsequent devices that are added or re\-added will have the 'write-mostly'
|
||||
|
|
14
mdadm.c
14
mdadm.c
|
@ -195,6 +195,8 @@ int main(int argc, char *argv[])
|
|||
case Add:
|
||||
case 'r':
|
||||
case Remove:
|
||||
case Replace:
|
||||
case With:
|
||||
case 'f':
|
||||
case Fail:
|
||||
case ReAdd: /* re-add */
|
||||
|
@ -928,6 +930,18 @@ int main(int argc, char *argv[])
|
|||
* remove the device */
|
||||
devmode = 'f';
|
||||
continue;
|
||||
case O(MANAGE,Replace):
|
||||
/* Mark these devices for replacement */
|
||||
devmode = 'R';
|
||||
continue;
|
||||
case O(MANAGE,With):
|
||||
/* These are the replacements to use */
|
||||
if (devmode != 'R') {
|
||||
pr_err("--with must follow --replace\n");
|
||||
exit(2);
|
||||
}
|
||||
devmode = 'W';
|
||||
continue;
|
||||
case O(INCREMENTAL,'R'):
|
||||
case O(MANAGE,'R'):
|
||||
case O(ASSEMBLE,'R'):
|
||||
|
|
4
mdadm.h
4
mdadm.h
|
@ -312,6 +312,8 @@ enum special_options {
|
|||
Add,
|
||||
Remove,
|
||||
Fail,
|
||||
Replace,
|
||||
With,
|
||||
MiscOpt,
|
||||
WaitOpt,
|
||||
ConfigFile,
|
||||
|
@ -432,7 +434,7 @@ struct mddev_dev {
|
|||
* Not set for names read from .config
|
||||
*/
|
||||
char writemostly; /* 1 for 'set writemostly', 2 for 'clear writemostly' */
|
||||
char used; /* set when used */
|
||||
int used; /* set when used */
|
||||
long long data_offset;
|
||||
struct mddev_dev *next;
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue