Add support for "--re-add missing"

If the device name "missing" is given for --re-add, then mdadm will
attempt to find any device which should be a member of the array but
currently isn't and will --re-add it to the array.
This can be useful if a device disappeared due to a cabling problem,
and was then re-connected.
The appropriate sequence would be
  mdadm /dev/mdX --fail detached
  mdadm /dev/mdX --remove detached
  mdadm /dev/mdX --re-add missing

Signed-off-by: NeilBrown <neilb@suse.de>
This commit is contained in:
NeilBrown 2010-07-05 15:06:27 +10:00
parent 3a6ec29ad5
commit a4e13010df
2 changed files with 81 additions and 20 deletions

View File

@ -342,6 +342,7 @@ int Manage_subdevs(char *devname, int fd,
* For 'f' and 'r', the device can also be a kernel-internal
* name such as 'sdb'.
*/
mddev_dev_t add_devlist = NULL;
mdu_array_info_t array;
mdu_disk_info_t disc;
unsigned long long array_size;
@ -381,6 +382,7 @@ int Manage_subdevs(char *devname, int fd,
unsigned long long ldsize;
char dvname[20];
char *dnprintable = dv->devname;
char *add_dev = dv->devname;
int err;
next = dv->next;
@ -458,6 +460,24 @@ int Manage_subdevs(char *devname, int fd,
}
if (jnext == 0)
continue;
} else if (strcmp(dv->devname, "missing") == 0) {
if (dv->disposition != 'a' || dv->re_add == 0) {
fprintf(stderr, Name ": 'missing' only meaningful "
"with --re-add\n");
return 1;
}
if (add_devlist == NULL)
add_devlist = conf_get_devs();
if (add_devlist == NULL) {
fprintf(stderr, Name ": no devices to scan for missing members.");
continue;
}
add_dev = add_devlist->devname;
add_devlist = add_devlist->next;
if (add_devlist != NULL)
next = dv;
if (stat(add_dev, &stb) < 0)
continue;
} else if (strchr(dv->devname, '/') == NULL &&
strlen(dv->devname) < 50) {
/* Assume this is a kernel-internal name like 'sda1' */
@ -533,39 +553,44 @@ int Manage_subdevs(char *devname, int fd,
return 1;
}
/* Make sure it isn't in use (in 2.6 or later) */
tfd = dev_open(dv->devname, O_RDONLY|O_EXCL|O_DIRECT);
tfd = dev_open(add_dev, O_RDONLY|O_EXCL|O_DIRECT);
if (tfd < 0 && add_dev != dv->devname)
continue;
if (tfd < 0) {
fprintf(stderr, Name ": Cannot open %s: %s\n",
dv->devname, strerror(errno));
return 1;
}
remove_partitions(tfd);
st = dup_super(tst);
if (array.not_persistent==0)
st->ss->load_super(st, tfd, NULL);
if (!get_dev_size(tfd, dv->devname, &ldsize)) {
if (add_dev == dv->devname) {
if (!get_dev_size(tfd, dv->devname, &ldsize)) {
close(tfd);
return 1;
}
} else if (!get_dev_size(tfd, NULL, &ldsize)) {
close(tfd);
return 1;
continue;
}
close(tfd);
if (!tst->ss->external &&
array.major_version == 0 &&
md_get_version(fd)%100 < 2) {
close(tfd);
if (ioctl(fd, HOT_ADD_DISK,
(unsigned long)stb.st_rdev)==0) {
if (verbose >= 0)
fprintf(stderr, Name ": hot added %s\n",
dv->devname);
add_dev);
continue;
}
fprintf(stderr, Name ": hot add failed for %s: %s\n",
dv->devname, strerror(errno));
add_dev, strerror(errno));
return 1;
}
@ -576,7 +601,9 @@ int Manage_subdevs(char *devname, int fd,
* For 'external' array (well, container based),
* We can just load the metadata for the array.
*/
if (tst->ss->external) {
if (tst->sb)
/* already loaded */;
else if (tst->ss->external) {
tst->ss->load_super(tst, fd, NULL);
} else for (j = 0; j < tst->max_devs; j++) {
char *dev;
@ -602,6 +629,7 @@ int Manage_subdevs(char *devname, int fd,
}
/* FIXME this is a bad test to be using */
if (!tst->sb) {
close(tfd);
fprintf(stderr, Name ": cannot find valid superblock in this array - HELP\n");
return 1;
}
@ -609,6 +637,9 @@ int Manage_subdevs(char *devname, int fd,
/* Make sure device is large enough */
if (tst->ss->avail_size(tst, ldsize/512) <
array_size) {
close(tfd);
if (add_dev != dv->devname)
continue;
fprintf(stderr, Name ": %s not large enough to join array\n",
dv->devname);
return 1;
@ -644,23 +675,40 @@ int Manage_subdevs(char *devname, int fd,
disc.state |= 1 << MD_DISK_WRITEMOSTLY;
if (dv->writemostly == 2)
disc.state &= ~(1 << MD_DISK_WRITEMOSTLY);
remove_partitions(tfd);
close(tfd);
tfd = -1;
/* don't even try if disk is marked as faulty */
errno = 0;
if ((disc.state & 1) == 0 &&
ioctl(fd, ADD_NEW_DISK, &disc) == 0) {
if (verbose >= 0)
fprintf(stderr, Name ": re-added %s\n", dv->devname);
fprintf(stderr, Name ": re-added %s\n", add_dev);
continue;
}
if (errno == ENOMEM || errno == EROFS) {
close(tfd);
fprintf(stderr, Name ": add new device failed for %s: %s\n",
dv->devname, strerror(errno));
add_dev, strerror(errno));
if (add_dev != dv->devname)
continue;
return 1;
}
/* fall back on normal-add */
}
}
if (add_dev != dv->devname) {
if (verbose > 0)
fprintf(stderr, Name
": --re-add for %s to %s is not possible\n",
add_dev, devname);
if (tfd >= 0)
close(tfd);
continue;
}
if (dv->re_add) {
if (tfd >= 0)
close(tfd);
fprintf(stderr, Name
": --re-add for %s to %s is not possible\n",
dv->devname, devname);
@ -676,6 +724,11 @@ int Manage_subdevs(char *devname, int fd,
return 1;
}
}
/* committed to really trying this device now*/
if (tfd >= 0) {
remove_partitions(tfd);
close(tfd);
}
/* in 2.6.17 and earlier, version-1 superblocks won't
* use the number we write, but will choose a free number.
* we must choose the same free number, which requires

View File

@ -1018,16 +1018,24 @@ immediately start recovering data on to one of these spares.
.TP
.BR \-\-re\-add
re-add a device that was recently removed from an array. This is only
needed for arrays that have be built (i.e. with
re\-add a device that was recently removed from an array. This is
normally only needed for arrays that have be built (i.e. with
.BR --build ).
For created arrays, devices are always re-added if that is possible.
When re-adding a device, if nothing has changed on the array since the
For created arrays, devices are always re\-added if that is possible,
however using \-\-re\-add will ensure the device isn't made into a
spare if the \-\-re\-add failed.
When re\-adding a device, if nothing has changed on the array since the
device was removed, no recovery is performed. Also, if the array has
a write-intent bitmap, then the recovery performed after a re-add will
a write-intent bitmap, then the recovery performed after a re\-add will
be limited to those blocks which, according to the bitmap, might have
changed since the device was removed.
If the device name given is
.B missing
then mdadm will try to find any device that looks like it should be
part of the array but isn't and will try to re\-add all such devices.
.TP
.BR \-r ", " \-\-remove
remove listed devices. They must not be active. i.e. they should
@ -1062,12 +1070,12 @@ same as
.TP
.BR \-\-write\-mostly
Subsequent devices that are added or re-added will have the 'write-mostly'
Subsequent devices that are added or re\-added will have the 'write-mostly'
flag set. This is only valid for RAID1 and means that the 'md' driver
will avoid reading from these devices if possible.
.TP
.BR \-\-readwrite
Subsequent devices that are added or re-added will have the 'write-mostly'
Subsequent devices that are added or re\-added will have the 'write-mostly'
flag cleared.
.P
@ -1082,7 +1090,7 @@ Each operation applies to all devices listed until the next
operation.
If an array is using a write-intent bitmap, then devices which have
been removed can be re-added in a way that avoids a full
been removed can be re\-added in a way that avoids a full
reconstruction but instead just updates the blocks that have changed
since the device was removed. For arrays with persistent metadata
(superblocks) this is done automatically. For arrays created with
@ -1691,7 +1699,7 @@ command.
When a device is added to an active array, mdadm checks to see if it
has metadata on it which suggests that it was recently a member of the
array. If it does, it tried to "re-add" the device. If there have
array. If it does, it tries to "re\-add" the device. If there have
been no changes since the device was removed, or if the array has a
write-intent bitmap which has recorded whatever changes there were,
then the device will immediately become a full member of the array and