Add a new clustered disk

A clustered disk is added by the traditional --add sequence.
However, other nodes need to acknowledge that they can "see"
the device. This is done by --cluster-confirm:

--cluster-confirm SLOTNUM:/dev/whatever (if disk is found)
or
--cluster-confirm SLOTNUM:missing (if disk is not found)

The node initiating the --add, has the disk state tagged with
MD_DISK_CLUSTER_ADD and the one confirming tag the disk with
MD_DISK_CANDIDATE.

Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Signed-off-by: NeilBrown <neilb@suse.de>
This commit is contained in:
Guoqing Jiang 2015-06-10 13:42:08 +08:00 committed by NeilBrown
parent b98043a2f8
commit 4de9091302
8 changed files with 68 additions and 3 deletions

View File

@ -690,7 +690,8 @@ skip_re_add:
int Manage_add(int fd, int tfd, struct mddev_dev *dv,
struct supertype *tst, mdu_array_info_t *array,
int force, int verbose, char *devname,
char *update, unsigned long rdev, unsigned long long array_size)
char *update, unsigned long rdev, unsigned long long array_size,
int raid_slot)
{
unsigned long long ldsize;
struct supertype *dev_st = NULL;
@ -880,7 +881,10 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
}
disc.major = major(rdev);
disc.minor = minor(rdev);
disc.number =j;
if (raid_slot < 0)
disc.number = j;
else
disc.number = raid_slot;
disc.state = 0;
if (array->not_persistent==0) {
int dfd;
@ -921,6 +925,14 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
}
free(used);
}
if (array->state & (1 << MD_SB_CLUSTERED)) {
if (dv->disposition == 'c')
disc.state |= (1 << MD_DISK_CANDIDATE);
else
disc.state |= (1 << MD_DISK_CLUSTER_ADD);
}
if (dv->writemostly == 1)
disc.state |= (1 << MD_DISK_WRITEMOSTLY);
if (tst->ss->external) {
@ -1240,6 +1252,7 @@ int Manage_subdevs(char *devname, int fd,
* variant on 'A'
* 'F' - Another variant of 'A', where the device was faulty
* so must be removed from the array first.
* 'c' - confirm the device as found (for clustered environments)
*
* For 'f' and 'r', the device can also be a kernel-internal
* name such as 'sdb'.
@ -1255,6 +1268,7 @@ int Manage_subdevs(char *devname, int fd,
struct mdinfo info;
int frozen = 0;
int busy = 0;
int raid_slot = -1;
if (ioctl(fd, GET_ARRAY_INFO, &array)) {
pr_err("Cannot get array info for %s\n",
@ -1283,6 +1297,17 @@ int Manage_subdevs(char *devname, int fd,
int rv;
int mj,mn;
raid_slot = -1;
if (dv->disposition == 'c') {
rv = parse_cluster_confirm_arg(dv->devname,
&dv->devname,
&raid_slot);
if (!rv) {
pr_err("Could not get the devname of cluster\n");
goto abort;
}
}
if (strcmp(dv->devname, "failed") == 0 ||
strcmp(dv->devname, "faulty") == 0) {
if (dv->disposition != 'A'
@ -1308,6 +1333,11 @@ int Manage_subdevs(char *devname, int fd,
if (strcmp(dv->devname, "missing") == 0) {
struct mddev_dev *add_devlist = NULL;
struct mddev_dev **dp;
if (dv->disposition == 'c') {
rv = ioctl(fd, CLUSTERED_DISK_NACK, NULL);
break;
}
if (dv->disposition != 'A') {
pr_err("'missing' only meaningful with --re-add\n");
goto abort;
@ -1438,6 +1468,7 @@ int Manage_subdevs(char *devname, int fd,
case 'A':
case 'M': /* --re-add missing */
case 'F': /* --re-add faulty */
case 'c': /* --cluster-confirm */
/* add the device */
if (subarray) {
pr_err("Cannot add disks to a \'member\' array, perform this operation on the parent container\n");
@ -1471,7 +1502,7 @@ int Manage_subdevs(char *devname, int fd,
}
rv = Manage_add(fd, tfd, dv, tst, &array,
force, verbose, devname, update,
rdev, array_size);
rdev, array_size, raid_slot);
close(tfd);
tfd = -1;
if (rv < 0)

View File

@ -169,6 +169,7 @@ struct option long_options[] = {
{"wait", 0, 0, WaitOpt},
{"wait-clean", 0, 0, Waitclean },
{"action", 1, 0, Action },
{"cluster-confirm", 0, 0, ClusterConfirm},
/* For Detail/Examine */
{"brief", 0, 0, Brief},

7
md_p.h
View File

@ -78,6 +78,12 @@
#define MD_DISK_ACTIVE 1 /* disk is running but may not be in sync */
#define MD_DISK_SYNC 2 /* disk is in sync with the raid set */
#define MD_DISK_REMOVED 3 /* disk is in sync with the raid set */
#define MD_DISK_CLUSTER_ADD 4 /* Initiate a disk add across the cluster
* For clustered enviroments only.
*/
#define MD_DISK_CANDIDATE 5 /* disk is added as spare (local) until confirmed
* For clustered enviroments only.
*/
#define MD_DISK_WRITEMOSTLY 9 /* disk is "write-mostly" is RAID1 config.
* read requests will only be sent here in
@ -106,6 +112,7 @@ typedef struct mdp_device_descriptor_s {
#define MD_SB_BLOCK_CONTAINER_RESHAPE 3 /* block container wide reshapes */
#define MD_SB_BLOCK_VOLUME 4 /* block activation of array, other arrays
* in container can be activated */
#define MD_SB_CLUSTERED 5 /* MD is clustered */
#define MD_SB_BITMAP_PRESENT 8 /* bitmap may be present nearby */
typedef struct mdp_superblock_s {

1
md_u.h
View File

@ -44,6 +44,7 @@
#define STOP_ARRAY _IO (MD_MAJOR, 0x32)
#define STOP_ARRAY_RO _IO (MD_MAJOR, 0x33)
#define RESTART_ARRAY_RW _IO (MD_MAJOR, 0x34)
#define CLUSTERED_DISK_NACK _IO (MD_MAJOR, 0x35)
typedef struct mdu_version_s {
int major;

View File

@ -1406,6 +1406,15 @@ will avoid reading from these devices if possible.
.BR \-\-readwrite
Subsequent devices that are added or re\-added will have the 'write-mostly'
flag cleared.
.TP
.BR \-\-cluster\-confirm
Confirm the existence of the device. This is issued in response to an \-\-add
request by a node in a cluster. When a node adds a device it sends a message
to all nodes in the cluster to look for a device with a UUID. This translates
to a udev notification with the UUID of the device to be added and the slot
number. The receiving node must acknowledge this message
with \-\-cluster\-confirm. Valid arguments are <slot>:<devicename> in case
the device is found or <slot>:missing in case the device is not found.
.P
Each of these options requires that the first device listed is the array

View File

@ -196,6 +196,7 @@ int main(int argc, char *argv[])
case 'f':
case Fail:
case ReAdd: /* re-add */
case ClusterConfirm:
if (!mode) {
newmode = MANAGE;
shortopt = short_bitmap_options;
@ -933,6 +934,9 @@ int main(int argc, char *argv[])
* remove the device */
devmode = 'f';
continue;
case O(MANAGE, ClusterConfirm):
devmode = 'c';
continue;
case O(MANAGE,Replace):
/* Mark these devices for replacement */
devmode = 'R';

View File

@ -346,6 +346,7 @@ enum special_options {
Action,
Nodes,
ClusterName,
ClusterConfirm,
};
enum prefix_standard {
@ -1281,6 +1282,7 @@ extern int parse_uuid(char *str, int uuid[4]);
extern int parse_layout_10(char *layout);
extern int parse_layout_faulty(char *layout);
extern long parse_num(char *num);
extern int parse_cluster_confirm_arg(char *inp, char **devname, int *slot);
extern int check_ext2(int fd, char *name);
extern int check_reiser(int fd, char *name);
extern int check_raid(int fd, char *name);

10
util.c
View File

@ -280,6 +280,16 @@ long parse_num(char *num)
}
#endif
int parse_cluster_confirm_arg(char *input, char **devname, int *slot)
{
char *dev;
*slot = strtoul(input, &dev, 10);
if (dev == input || dev[0] != ':')
return -1;
*devname = dev+1;
return 0;
}
void remove_partitions(int fd)
{
/* remove partitions from this block devices.