Add support for --replace and --with
--replace can be used to replace a device without completely failing it. Once the replacement completes the device will be failed. --with can indicate which of several spares to use. Signed-off-by: NeilBrown <neilb@suse.de>
This commit is contained in:
parent
1dc837e4ed
commit
70c55e36b7
147
Manage.c
147
Manage.c
|
@ -954,6 +954,111 @@ int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int Manage_replace(struct supertype *tst, int fd, struct mddev_dev *dv,
|
||||||
|
unsigned long rdev, int verbose, char *devname)
|
||||||
|
{
|
||||||
|
struct mdinfo *mdi, *di;
|
||||||
|
if (tst->ss->external) {
|
||||||
|
pr_err("--replace only supported for native metadata (0.90 or 1.x)\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
/* Need to find the device in sysfs and add 'want_replacement' to the
|
||||||
|
* status.
|
||||||
|
*/
|
||||||
|
mdi = sysfs_read(fd, -1, GET_DEVS);
|
||||||
|
if (!mdi || !mdi->devs) {
|
||||||
|
pr_err("Cannot find status of %s to enable replacement - strange\n",
|
||||||
|
devname);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
for (di = mdi->devs; di; di = di->next)
|
||||||
|
if (di->disk.major == (int)major(rdev) &&
|
||||||
|
di->disk.minor == (int)minor(rdev))
|
||||||
|
break;
|
||||||
|
if (di) {
|
||||||
|
int rv;
|
||||||
|
if (di->disk.raid_disk < 0) {
|
||||||
|
pr_err("%s is not active and so cannot be replaced.\n",
|
||||||
|
dv->devname);
|
||||||
|
sysfs_free(mdi);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
rv = sysfs_set_str(mdi, di,
|
||||||
|
"state", "want_replacement");
|
||||||
|
if (rv) {
|
||||||
|
sysfs_free(mdi);
|
||||||
|
pr_err("Failed to request replacement for %s\n",
|
||||||
|
dv->devname);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (verbose >= 0)
|
||||||
|
pr_err("Marked %s (device %d in %s) for replacement\n",
|
||||||
|
dv->devname, di->disk.raid_disk, devname);
|
||||||
|
/* If there is a matching 'with', we need to tell it which
|
||||||
|
* raid disk
|
||||||
|
*/
|
||||||
|
while (dv && dv->disposition != 'W')
|
||||||
|
dv = dv->next;
|
||||||
|
if (dv) {
|
||||||
|
dv->disposition = 'w';
|
||||||
|
dv->used = di->disk.raid_disk;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
sysfs_free(mdi);
|
||||||
|
pr_err("%s not found in %s so cannot --replace it\n",
|
||||||
|
dv->devname, devname);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int Manage_with(struct supertype *tst, int fd, struct mddev_dev *dv,
|
||||||
|
unsigned long rdev, int verbose, char *devname)
|
||||||
|
{
|
||||||
|
struct mdinfo *mdi, *di;
|
||||||
|
/* try to set 'slot' for 'rdev' in 'fd' to 'dv->used' */
|
||||||
|
mdi = sysfs_read(fd, -1, GET_DEVS|GET_STATE);
|
||||||
|
if (!mdi || !mdi->devs) {
|
||||||
|
pr_err("Cannot find status of %s to enable replacement - strange\n",
|
||||||
|
devname);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
for (di = mdi->devs; di; di = di->next)
|
||||||
|
if (di->disk.major == (int)major(rdev) &&
|
||||||
|
di->disk.minor == (int)minor(rdev))
|
||||||
|
break;
|
||||||
|
if (di) {
|
||||||
|
int rv;
|
||||||
|
if (di->disk.state & (1<<MD_DISK_FAULTY)) {
|
||||||
|
pr_err("%s is faulty and cannot be a replacement\n",
|
||||||
|
dv->devname);
|
||||||
|
sysfs_free(mdi);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (di->disk.raid_disk >= 0) {
|
||||||
|
pr_err("%s is active and cannot be a replacement\n",
|
||||||
|
dv->devname);
|
||||||
|
sysfs_free(mdi);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
rv = sysfs_set_num(mdi, di,
|
||||||
|
"slot", dv->used);
|
||||||
|
if (rv) {
|
||||||
|
sysfs_free(mdi);
|
||||||
|
pr_err("Failed to %s as preferred replacement.\n",
|
||||||
|
dv->devname);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (verbose >= 0)
|
||||||
|
pr_err("Marked %s in %s as replacement for device %d\n",
|
||||||
|
dv->devname, devname, dv->used);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
sysfs_free(mdi);
|
||||||
|
pr_err("%s not found in %s so cannot make it preferred replacement\n",
|
||||||
|
dv->devname, devname);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
int Manage_subdevs(char *devname, int fd,
|
int Manage_subdevs(char *devname, int fd,
|
||||||
struct mddev_dev *devlist, int verbose, int test,
|
struct mddev_dev *devlist, int verbose, int test,
|
||||||
char *update, int force)
|
char *update, int force)
|
||||||
|
@ -970,6 +1075,16 @@ int Manage_subdevs(char *devname, int fd,
|
||||||
* 'f' - set the device faulty SET_DISK_FAULTY
|
* 'f' - set the device faulty SET_DISK_FAULTY
|
||||||
* device can be 'detached' in which case any device that
|
* device can be 'detached' in which case any device that
|
||||||
* is inaccessible will be marked faulty.
|
* is inaccessible will be marked faulty.
|
||||||
|
* 'R' - mark this device as wanting replacement.
|
||||||
|
* 'W' - this device is added if necessary and activated as
|
||||||
|
* a replacement for a previous 'R' device.
|
||||||
|
* -----
|
||||||
|
* 'w' - 'W' will be changed to 'w' when it is paired with
|
||||||
|
* a 'R' device. If a 'W' is found while walking the list
|
||||||
|
* it must be unpaired, and is an error.
|
||||||
|
* 'M' - this is created by a 'missing' target. It is a slight
|
||||||
|
* variant on 'A'
|
||||||
|
*
|
||||||
* For 'f' and 'r', the device can also be a kernel-internal
|
* For 'f' and 'r', the device can also be a kernel-internal
|
||||||
* name such as 'sdb'.
|
* name such as 'sdb'.
|
||||||
*/
|
*/
|
||||||
|
@ -1209,6 +1324,38 @@ int Manage_subdevs(char *devname, int fd,
|
||||||
pr_err("set %s faulty in %s\n",
|
pr_err("set %s faulty in %s\n",
|
||||||
dv->devname, devname);
|
dv->devname, devname);
|
||||||
break;
|
break;
|
||||||
|
case 'R': /* Mark as replaceable */
|
||||||
|
if (subarray) {
|
||||||
|
pr_err("Cannot replace disks in a"
|
||||||
|
" \'member\' array, perform this"
|
||||||
|
" operation on the parent container\n");
|
||||||
|
rv = -1;
|
||||||
|
} else {
|
||||||
|
if (!frozen) {
|
||||||
|
if (sysfs_freeze_array(&info) == 1)
|
||||||
|
frozen = 1;
|
||||||
|
else
|
||||||
|
frozen = -1;
|
||||||
|
}
|
||||||
|
rv = Manage_replace(tst, fd, dv,
|
||||||
|
stb.st_rdev, verbose,
|
||||||
|
devname);
|
||||||
|
}
|
||||||
|
if (rv < 0)
|
||||||
|
goto abort;
|
||||||
|
if (rv > 0)
|
||||||
|
count++;
|
||||||
|
break;
|
||||||
|
case 'W': /* --with device that doesn't match */
|
||||||
|
pr_err("No matching --replace device for --with %s\n",
|
||||||
|
dv->devname);
|
||||||
|
goto abort;
|
||||||
|
case 'w': /* --with device which was matched */
|
||||||
|
rv = Manage_with(tst, fd, dv,
|
||||||
|
stb.st_rdev, verbose, devname);
|
||||||
|
if (rv < 0)
|
||||||
|
goto abort;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (frozen > 0)
|
if (frozen > 0)
|
||||||
|
|
7
ReadMe.c
7
ReadMe.c
|
@ -146,6 +146,8 @@ struct option long_options[] = {
|
||||||
{"remove", 0, 0, Remove},
|
{"remove", 0, 0, Remove},
|
||||||
{"fail", 0, 0, Fail},
|
{"fail", 0, 0, Fail},
|
||||||
{"set-faulty",0, 0, Fail},
|
{"set-faulty",0, 0, Fail},
|
||||||
|
{"replace", 0, 0, Replace},
|
||||||
|
{"with", 0, 0, With},
|
||||||
{"run", 0, 0, 'R'},
|
{"run", 0, 0, 'R'},
|
||||||
{"stop", 0, 0, 'S'},
|
{"stop", 0, 0, 'S'},
|
||||||
{"readonly", 0, 0, 'o'},
|
{"readonly", 0, 0, 'o'},
|
||||||
|
@ -309,6 +311,7 @@ char OptionHelp[] =
|
||||||
" --remove -r : remove subsequent devices\n"
|
" --remove -r : remove subsequent devices\n"
|
||||||
" --fail -f : mark subsequent devices as faulty\n"
|
" --fail -f : mark subsequent devices as faulty\n"
|
||||||
" --set-faulty : same as --fail\n"
|
" --set-faulty : same as --fail\n"
|
||||||
|
" --replace : mark a device for replacement\n"
|
||||||
" --run -R : start a partially built array\n"
|
" --run -R : start a partially built array\n"
|
||||||
" --stop -S : deactivate array, releasing all resources\n"
|
" --stop -S : deactivate array, releasing all resources\n"
|
||||||
" --readonly -o : mark array as readonly\n"
|
" --readonly -o : mark array as readonly\n"
|
||||||
|
@ -462,6 +465,10 @@ char Help_manage[] =
|
||||||
" --remove -r : remove subsequent devices, which must not be active\n"
|
" --remove -r : remove subsequent devices, which must not be active\n"
|
||||||
" --fail -f : mark subsequent devices a faulty\n"
|
" --fail -f : mark subsequent devices a faulty\n"
|
||||||
" --set-faulty : same as --fail\n"
|
" --set-faulty : same as --fail\n"
|
||||||
|
" --replace : mark device(s) to be replaced by spares. Once\n"
|
||||||
|
" : replacement completes, device will be marked faulty\n"
|
||||||
|
" --with : Indicate which spare a previous '--replace' should\n"
|
||||||
|
" : prefer to use\n"
|
||||||
" --run -R : start a partially built array\n"
|
" --run -R : start a partially built array\n"
|
||||||
" --stop -S : deactivate array, releasing all resources\n"
|
" --stop -S : deactivate array, releasing all resources\n"
|
||||||
" --readonly -o : mark array as readonly\n"
|
" --readonly -o : mark array as readonly\n"
|
||||||
|
|
24
mdadm.8.in
24
mdadm.8.in
|
@ -216,8 +216,9 @@ to detect and assemble arrays \(em possibly in an
|
||||||
If a device is given before any options, or if the first option is
|
If a device is given before any options, or if the first option is
|
||||||
.BR \-\-add ,
|
.BR \-\-add ,
|
||||||
.BR \-\-fail ,
|
.BR \-\-fail ,
|
||||||
or
|
|
||||||
.BR \-\-remove ,
|
.BR \-\-remove ,
|
||||||
|
or
|
||||||
|
.BR \-\-replace ,
|
||||||
then the MANAGE mode is assumed.
|
then the MANAGE mode is assumed.
|
||||||
Anything other than these will cause the
|
Anything other than these will cause the
|
||||||
.B Misc
|
.B Misc
|
||||||
|
@ -1283,7 +1284,7 @@ have already been marked as failed.
|
||||||
|
|
||||||
.TP
|
.TP
|
||||||
.BR \-f ", " \-\-fail
|
.BR \-f ", " \-\-fail
|
||||||
mark listed devices as faulty.
|
Mark listed devices as faulty.
|
||||||
As well as the name of a device file, the word
|
As well as the name of a device file, the word
|
||||||
.B detached
|
.B detached
|
||||||
can be given. This will cause any device that has been detached from
|
can be given. This will cause any device that has been detached from
|
||||||
|
@ -1294,6 +1295,25 @@ the system to be marked as failed. It can then be removed.
|
||||||
same as
|
same as
|
||||||
.BR \-\-fail .
|
.BR \-\-fail .
|
||||||
|
|
||||||
|
.TP
|
||||||
|
.B \-\-replace
|
||||||
|
Mark listed devices as requiring replacement. As soon as a spare is
|
||||||
|
available, it will be rebuilt and will replace the marked device.
|
||||||
|
This is similar to marking a device as faulty, but the device remains
|
||||||
|
in service during the recovery process to increase resilience against
|
||||||
|
multiple failures. When the replacement process finishes, the
|
||||||
|
replaced device will be marked as faulty.
|
||||||
|
|
||||||
|
.TP
|
||||||
|
.B \-\-with
|
||||||
|
This can follow a list of
|
||||||
|
.B \-\-replace
|
||||||
|
devices. The devices listed after
|
||||||
|
.B \-\-with
|
||||||
|
will be preferentially used to replace the devices listed after
|
||||||
|
.BR \-\-replace .
|
||||||
|
These device must already be spare devices in the array.
|
||||||
|
|
||||||
.TP
|
.TP
|
||||||
.BR \-\-write\-mostly
|
.BR \-\-write\-mostly
|
||||||
Subsequent devices that are added or re\-added will have the 'write-mostly'
|
Subsequent devices that are added or re\-added will have the 'write-mostly'
|
||||||
|
|
14
mdadm.c
14
mdadm.c
|
@ -195,6 +195,8 @@ int main(int argc, char *argv[])
|
||||||
case Add:
|
case Add:
|
||||||
case 'r':
|
case 'r':
|
||||||
case Remove:
|
case Remove:
|
||||||
|
case Replace:
|
||||||
|
case With:
|
||||||
case 'f':
|
case 'f':
|
||||||
case Fail:
|
case Fail:
|
||||||
case ReAdd: /* re-add */
|
case ReAdd: /* re-add */
|
||||||
|
@ -928,6 +930,18 @@ int main(int argc, char *argv[])
|
||||||
* remove the device */
|
* remove the device */
|
||||||
devmode = 'f';
|
devmode = 'f';
|
||||||
continue;
|
continue;
|
||||||
|
case O(MANAGE,Replace):
|
||||||
|
/* Mark these devices for replacement */
|
||||||
|
devmode = 'R';
|
||||||
|
continue;
|
||||||
|
case O(MANAGE,With):
|
||||||
|
/* These are the replacements to use */
|
||||||
|
if (devmode != 'R') {
|
||||||
|
pr_err("--with must follow --replace\n");
|
||||||
|
exit(2);
|
||||||
|
}
|
||||||
|
devmode = 'W';
|
||||||
|
continue;
|
||||||
case O(INCREMENTAL,'R'):
|
case O(INCREMENTAL,'R'):
|
||||||
case O(MANAGE,'R'):
|
case O(MANAGE,'R'):
|
||||||
case O(ASSEMBLE,'R'):
|
case O(ASSEMBLE,'R'):
|
||||||
|
|
4
mdadm.h
4
mdadm.h
|
@ -312,6 +312,8 @@ enum special_options {
|
||||||
Add,
|
Add,
|
||||||
Remove,
|
Remove,
|
||||||
Fail,
|
Fail,
|
||||||
|
Replace,
|
||||||
|
With,
|
||||||
MiscOpt,
|
MiscOpt,
|
||||||
WaitOpt,
|
WaitOpt,
|
||||||
ConfigFile,
|
ConfigFile,
|
||||||
|
@ -432,7 +434,7 @@ struct mddev_dev {
|
||||||
* Not set for names read from .config
|
* Not set for names read from .config
|
||||||
*/
|
*/
|
||||||
char writemostly; /* 1 for 'set writemostly', 2 for 'clear writemostly' */
|
char writemostly; /* 1 for 'set writemostly', 2 for 'clear writemostly' */
|
||||||
char used; /* set when used */
|
int used; /* set when used */
|
||||||
long long data_offset;
|
long long data_offset;
|
||||||
struct mddev_dev *next;
|
struct mddev_dev *next;
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue