Add a new clustered disk

A clustered disk is added by the traditional --add sequence.
However, other nodes need to acknowledge that they can "see"
the device. This is done by --cluster-confirm:

--cluster-confirm SLOTNUM:/dev/whatever (if disk is found)
or
--cluster-confirm SLOTNUM:missing (if disk is not found)

The node initiating the --add, has the disk state tagged with
MD_DISK_CLUSTER_ADD and the one confirming tag the disk with
MD_DISK_CANDIDATE.

Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Signed-off-by: NeilBrown <neilb@suse.de>
This commit is contained in:
Guoqing Jiang 2015-06-10 13:42:08 +08:00 committed by NeilBrown
parent b98043a2f8
commit 4de9091302
8 changed files with 68 additions and 3 deletions

View File

@ -690,7 +690,8 @@ skip_re_add:
int Manage_add(int fd, int tfd, struct mddev_dev *dv, int Manage_add(int fd, int tfd, struct mddev_dev *dv,
struct supertype *tst, mdu_array_info_t *array, struct supertype *tst, mdu_array_info_t *array,
int force, int verbose, char *devname, int force, int verbose, char *devname,
char *update, unsigned long rdev, unsigned long long array_size) char *update, unsigned long rdev, unsigned long long array_size,
int raid_slot)
{ {
unsigned long long ldsize; unsigned long long ldsize;
struct supertype *dev_st = NULL; struct supertype *dev_st = NULL;
@ -880,7 +881,10 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
} }
disc.major = major(rdev); disc.major = major(rdev);
disc.minor = minor(rdev); disc.minor = minor(rdev);
disc.number =j; if (raid_slot < 0)
disc.number = j;
else
disc.number = raid_slot;
disc.state = 0; disc.state = 0;
if (array->not_persistent==0) { if (array->not_persistent==0) {
int dfd; int dfd;
@ -921,6 +925,14 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
} }
free(used); free(used);
} }
if (array->state & (1 << MD_SB_CLUSTERED)) {
if (dv->disposition == 'c')
disc.state |= (1 << MD_DISK_CANDIDATE);
else
disc.state |= (1 << MD_DISK_CLUSTER_ADD);
}
if (dv->writemostly == 1) if (dv->writemostly == 1)
disc.state |= (1 << MD_DISK_WRITEMOSTLY); disc.state |= (1 << MD_DISK_WRITEMOSTLY);
if (tst->ss->external) { if (tst->ss->external) {
@ -1240,6 +1252,7 @@ int Manage_subdevs(char *devname, int fd,
* variant on 'A' * variant on 'A'
* 'F' - Another variant of 'A', where the device was faulty * 'F' - Another variant of 'A', where the device was faulty
* so must be removed from the array first. * so must be removed from the array first.
* 'c' - confirm the device as found (for clustered environments)
* *
* For 'f' and 'r', the device can also be a kernel-internal * For 'f' and 'r', the device can also be a kernel-internal
* name such as 'sdb'. * name such as 'sdb'.
@ -1255,6 +1268,7 @@ int Manage_subdevs(char *devname, int fd,
struct mdinfo info; struct mdinfo info;
int frozen = 0; int frozen = 0;
int busy = 0; int busy = 0;
int raid_slot = -1;
if (ioctl(fd, GET_ARRAY_INFO, &array)) { if (ioctl(fd, GET_ARRAY_INFO, &array)) {
pr_err("Cannot get array info for %s\n", pr_err("Cannot get array info for %s\n",
@ -1283,6 +1297,17 @@ int Manage_subdevs(char *devname, int fd,
int rv; int rv;
int mj,mn; int mj,mn;
raid_slot = -1;
if (dv->disposition == 'c') {
rv = parse_cluster_confirm_arg(dv->devname,
&dv->devname,
&raid_slot);
if (!rv) {
pr_err("Could not get the devname of cluster\n");
goto abort;
}
}
if (strcmp(dv->devname, "failed") == 0 || if (strcmp(dv->devname, "failed") == 0 ||
strcmp(dv->devname, "faulty") == 0) { strcmp(dv->devname, "faulty") == 0) {
if (dv->disposition != 'A' if (dv->disposition != 'A'
@ -1308,6 +1333,11 @@ int Manage_subdevs(char *devname, int fd,
if (strcmp(dv->devname, "missing") == 0) { if (strcmp(dv->devname, "missing") == 0) {
struct mddev_dev *add_devlist = NULL; struct mddev_dev *add_devlist = NULL;
struct mddev_dev **dp; struct mddev_dev **dp;
if (dv->disposition == 'c') {
rv = ioctl(fd, CLUSTERED_DISK_NACK, NULL);
break;
}
if (dv->disposition != 'A') { if (dv->disposition != 'A') {
pr_err("'missing' only meaningful with --re-add\n"); pr_err("'missing' only meaningful with --re-add\n");
goto abort; goto abort;
@ -1438,6 +1468,7 @@ int Manage_subdevs(char *devname, int fd,
case 'A': case 'A':
case 'M': /* --re-add missing */ case 'M': /* --re-add missing */
case 'F': /* --re-add faulty */ case 'F': /* --re-add faulty */
case 'c': /* --cluster-confirm */
/* add the device */ /* add the device */
if (subarray) { if (subarray) {
pr_err("Cannot add disks to a \'member\' array, perform this operation on the parent container\n"); pr_err("Cannot add disks to a \'member\' array, perform this operation on the parent container\n");
@ -1471,7 +1502,7 @@ int Manage_subdevs(char *devname, int fd,
} }
rv = Manage_add(fd, tfd, dv, tst, &array, rv = Manage_add(fd, tfd, dv, tst, &array,
force, verbose, devname, update, force, verbose, devname, update,
rdev, array_size); rdev, array_size, raid_slot);
close(tfd); close(tfd);
tfd = -1; tfd = -1;
if (rv < 0) if (rv < 0)

View File

@ -169,6 +169,7 @@ struct option long_options[] = {
{"wait", 0, 0, WaitOpt}, {"wait", 0, 0, WaitOpt},
{"wait-clean", 0, 0, Waitclean }, {"wait-clean", 0, 0, Waitclean },
{"action", 1, 0, Action }, {"action", 1, 0, Action },
{"cluster-confirm", 0, 0, ClusterConfirm},
/* For Detail/Examine */ /* For Detail/Examine */
{"brief", 0, 0, Brief}, {"brief", 0, 0, Brief},

7
md_p.h
View File

@ -78,6 +78,12 @@
#define MD_DISK_ACTIVE 1 /* disk is running but may not be in sync */ #define MD_DISK_ACTIVE 1 /* disk is running but may not be in sync */
#define MD_DISK_SYNC 2 /* disk is in sync with the raid set */ #define MD_DISK_SYNC 2 /* disk is in sync with the raid set */
#define MD_DISK_REMOVED 3 /* disk is in sync with the raid set */ #define MD_DISK_REMOVED 3 /* disk is in sync with the raid set */
#define MD_DISK_CLUSTER_ADD 4 /* Initiate a disk add across the cluster
* For clustered enviroments only.
*/
#define MD_DISK_CANDIDATE 5 /* disk is added as spare (local) until confirmed
* For clustered enviroments only.
*/
#define MD_DISK_WRITEMOSTLY 9 /* disk is "write-mostly" is RAID1 config. #define MD_DISK_WRITEMOSTLY 9 /* disk is "write-mostly" is RAID1 config.
* read requests will only be sent here in * read requests will only be sent here in
@ -106,6 +112,7 @@ typedef struct mdp_device_descriptor_s {
#define MD_SB_BLOCK_CONTAINER_RESHAPE 3 /* block container wide reshapes */ #define MD_SB_BLOCK_CONTAINER_RESHAPE 3 /* block container wide reshapes */
#define MD_SB_BLOCK_VOLUME 4 /* block activation of array, other arrays #define MD_SB_BLOCK_VOLUME 4 /* block activation of array, other arrays
* in container can be activated */ * in container can be activated */
#define MD_SB_CLUSTERED 5 /* MD is clustered */
#define MD_SB_BITMAP_PRESENT 8 /* bitmap may be present nearby */ #define MD_SB_BITMAP_PRESENT 8 /* bitmap may be present nearby */
typedef struct mdp_superblock_s { typedef struct mdp_superblock_s {

1
md_u.h
View File

@ -44,6 +44,7 @@
#define STOP_ARRAY _IO (MD_MAJOR, 0x32) #define STOP_ARRAY _IO (MD_MAJOR, 0x32)
#define STOP_ARRAY_RO _IO (MD_MAJOR, 0x33) #define STOP_ARRAY_RO _IO (MD_MAJOR, 0x33)
#define RESTART_ARRAY_RW _IO (MD_MAJOR, 0x34) #define RESTART_ARRAY_RW _IO (MD_MAJOR, 0x34)
#define CLUSTERED_DISK_NACK _IO (MD_MAJOR, 0x35)
typedef struct mdu_version_s { typedef struct mdu_version_s {
int major; int major;

View File

@ -1406,6 +1406,15 @@ will avoid reading from these devices if possible.
.BR \-\-readwrite .BR \-\-readwrite
Subsequent devices that are added or re\-added will have the 'write-mostly' Subsequent devices that are added or re\-added will have the 'write-mostly'
flag cleared. flag cleared.
.TP
.BR \-\-cluster\-confirm
Confirm the existence of the device. This is issued in response to an \-\-add
request by a node in a cluster. When a node adds a device it sends a message
to all nodes in the cluster to look for a device with a UUID. This translates
to a udev notification with the UUID of the device to be added and the slot
number. The receiving node must acknowledge this message
with \-\-cluster\-confirm. Valid arguments are <slot>:<devicename> in case
the device is found or <slot>:missing in case the device is not found.
.P .P
Each of these options requires that the first device listed is the array Each of these options requires that the first device listed is the array

View File

@ -196,6 +196,7 @@ int main(int argc, char *argv[])
case 'f': case 'f':
case Fail: case Fail:
case ReAdd: /* re-add */ case ReAdd: /* re-add */
case ClusterConfirm:
if (!mode) { if (!mode) {
newmode = MANAGE; newmode = MANAGE;
shortopt = short_bitmap_options; shortopt = short_bitmap_options;
@ -933,6 +934,9 @@ int main(int argc, char *argv[])
* remove the device */ * remove the device */
devmode = 'f'; devmode = 'f';
continue; continue;
case O(MANAGE, ClusterConfirm):
devmode = 'c';
continue;
case O(MANAGE,Replace): case O(MANAGE,Replace):
/* Mark these devices for replacement */ /* Mark these devices for replacement */
devmode = 'R'; devmode = 'R';

View File

@ -346,6 +346,7 @@ enum special_options {
Action, Action,
Nodes, Nodes,
ClusterName, ClusterName,
ClusterConfirm,
}; };
enum prefix_standard { enum prefix_standard {
@ -1281,6 +1282,7 @@ extern int parse_uuid(char *str, int uuid[4]);
extern int parse_layout_10(char *layout); extern int parse_layout_10(char *layout);
extern int parse_layout_faulty(char *layout); extern int parse_layout_faulty(char *layout);
extern long parse_num(char *num); extern long parse_num(char *num);
extern int parse_cluster_confirm_arg(char *inp, char **devname, int *slot);
extern int check_ext2(int fd, char *name); extern int check_ext2(int fd, char *name);
extern int check_reiser(int fd, char *name); extern int check_reiser(int fd, char *name);
extern int check_raid(int fd, char *name); extern int check_raid(int fd, char *name);

10
util.c
View File

@ -280,6 +280,16 @@ long parse_num(char *num)
} }
#endif #endif
int parse_cluster_confirm_arg(char *input, char **devname, int *slot)
{
char *dev;
*slot = strtoul(input, &dev, 10);
if (dev == input || dev[0] != ':')
return -1;
*devname = dev+1;
return 0;
}
void remove_partitions(int fd) void remove_partitions(int fd)
{ {
/* remove partitions from this block devices. /* remove partitions from this block devices.