diff --git a/Assemble.c b/Assemble.c index 245e213..8940c27 100644 --- a/Assemble.c +++ b/Assemble.c @@ -635,7 +635,13 @@ static int load_devices(struct devs *devices, char *devmap, if (strcmp(c->update, "byteorder") == 0) err = 0; - else + else if (strcmp(c->update, "home-cluster") == 0) { + tst->cluster_name = c->homecluster; + tst->ss->write_bitmap(tst, dfd, NameUpdate); + } else if (strcmp(c->update, "nodes") == 0) { + tst->nodes = c->nodes; + err = tst->ss->write_bitmap(tst, dfd, NodeNumUpdate); + } else err = tst->ss->update_super(tst, content, c->update, devname, c->verbose, ident->uuid_set, diff --git a/Create.c b/Create.c index ef28da0..b62d8d4 100644 --- a/Create.c +++ b/Create.c @@ -531,6 +531,8 @@ int Create(struct supertype *st, char *mddev, st->ss->name); warn = 1; } + st->nodes = c->nodes; + st->cluster_name = c->homecluster; if (warn) { if (c->runstop!= 1) { @@ -750,7 +752,8 @@ int Create(struct supertype *st, char *mddev, #endif } - if (s->bitmap_file && strcmp(s->bitmap_file, "internal")==0) { + if (s->bitmap_file && (strcmp(s->bitmap_file, "internal")==0 || + strcmp(s->bitmap_file, "clustered")==0)) { if ((vers%100) < 2) { pr_err("internal bitmaps not supported by this kernel.\n"); goto abort_locked; diff --git a/Grow.c b/Grow.c index a336593..a1f8e4c 100644 --- a/Grow.c +++ b/Grow.c @@ -330,8 +330,7 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s) } return 0; } - pr_err("Internal bitmap already present on %s\n", - devname); + pr_err("%s bitmap already present on %s\n", s->bitmap_file, devname); return 1; } @@ -375,7 +374,8 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s) free(st); return 1; } - if (strcmp(s->bitmap_file, "internal") == 0) { + if (strcmp(s->bitmap_file, "internal") == 0 || + strcmp(s->bitmap_file, "clustered") == 0) { int rv; int d; int offset_setable = 0; @@ -384,6 +384,8 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s) pr_err("Internal bitmaps not supported with %s metadata\n", st->ss->name); return 1; } + st->nodes = c->nodes; + st->cluster_name = c->homecluster; mdi = sysfs_read(fd, NULL, GET_BITMAP_LOCATION); if (mdi) offset_setable = 1; @@ -410,7 +412,7 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s) bitmapsize, offset_setable, major) ) - st->ss->write_bitmap(st, fd2); + st->ss->write_bitmap(st, fd2, NoUpdate); else { pr_err("failed to create internal bitmap - chunksize problem.\n"); close(fd2); @@ -426,6 +428,8 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s) rv = sysfs_set_num_signed(mdi, NULL, "bitmap/location", mdi->bitmap_offset); } else { + if (strcmp(s->bitmap_file, "clustered") == 0) + array.state |= (1<not_persistent==0) { int dfd; @@ -955,6 +959,14 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv, } free(used); } + + if (array->state & (1 << MD_SB_CLUSTERED)) { + if (dv->disposition == 'c') + disc.state |= (1 << MD_DISK_CANDIDATE); + else + disc.state |= (1 << MD_DISK_CLUSTER_ADD); + } + if (dv->writemostly == 1) disc.state |= (1 << MD_DISK_WRITEMOSTLY); if (tst->ss->external) { @@ -1274,6 +1286,7 @@ int Manage_subdevs(char *devname, int fd, * variant on 'A' * 'F' - Another variant of 'A', where the device was faulty * so must be removed from the array first. + * 'c' - confirm the device as found (for clustered environments) * * For 'f' and 'r', the device can also be a kernel-internal * name such as 'sdb'. @@ -1289,6 +1302,7 @@ int Manage_subdevs(char *devname, int fd, struct mdinfo info; int frozen = 0; int busy = 0; + int raid_slot = -1; if (ioctl(fd, GET_ARRAY_INFO, &array)) { pr_err("Cannot get array info for %s\n", @@ -1317,6 +1331,17 @@ int Manage_subdevs(char *devname, int fd, int rv; int mj,mn; + raid_slot = -1; + if (dv->disposition == 'c') { + rv = parse_cluster_confirm_arg(dv->devname, + &dv->devname, + &raid_slot); + if (!rv) { + pr_err("Could not get the devname of cluster\n"); + goto abort; + } + } + if (strcmp(dv->devname, "failed") == 0 || strcmp(dv->devname, "faulty") == 0) { if (dv->disposition != 'A' @@ -1342,6 +1367,11 @@ int Manage_subdevs(char *devname, int fd, if (strcmp(dv->devname, "missing") == 0) { struct mddev_dev *add_devlist = NULL; struct mddev_dev **dp; + if (dv->disposition == 'c') { + rv = ioctl(fd, CLUSTERED_DISK_NACK, NULL); + break; + } + if (dv->disposition != 'A') { pr_err("'missing' only meaningful with --re-add\n"); goto abort; @@ -1472,6 +1502,7 @@ int Manage_subdevs(char *devname, int fd, case 'A': case 'M': /* --re-add missing */ case 'F': /* --re-add faulty */ + case 'c': /* --cluster-confirm */ /* add the device */ if (subarray) { pr_err("Cannot add disks to a \'member\' array, perform this operation on the parent container\n"); @@ -1505,7 +1536,7 @@ int Manage_subdevs(char *devname, int fd, } rv = Manage_add(fd, tfd, dv, tst, &array, force, verbose, devname, update, - rdev, array_size); + rdev, array_size, raid_slot); close(tfd); tfd = -1; if (rv < 0) diff --git a/ReadMe.c b/ReadMe.c index 8af8cd0..f4b4a4f 100644 --- a/ReadMe.c +++ b/ReadMe.c @@ -140,6 +140,8 @@ struct option long_options[] = { {"homehost", 1, 0, HomeHost}, {"symlinks", 1, 0, Symlinks}, {"data-offset",1, 0, DataOffset}, + {"nodes",1, 0, Nodes}, /* also for --assemble */ + {"home-cluster",1, 0, ClusterName}, /* For assemble */ {"uuid", 1, 0, 'u'}, @@ -167,6 +169,7 @@ struct option long_options[] = { {"wait", 0, 0, WaitOpt}, {"wait-clean", 0, 0, Waitclean }, {"action", 1, 0, Action }, + {"cluster-confirm", 0, 0, ClusterConfirm}, /* For Detail/Examine */ {"brief", 0, 0, Brief}, diff --git a/bitmap.c b/bitmap.c index bbe9bae..60865bc 100644 --- a/bitmap.c +++ b/bitmap.c @@ -32,6 +32,8 @@ static inline void sb_le_to_cpu(bitmap_super_t *sb) sb->daemon_sleep = __le32_to_cpu(sb->daemon_sleep); sb->sync_size = __le64_to_cpu(sb->sync_size); sb->write_behind = __le32_to_cpu(sb->write_behind); + sb->nodes = __le32_to_cpu(sb->nodes); + sb->sectors_reserved = __le32_to_cpu(sb->sectors_reserved); } static inline void sb_cpu_to_le(bitmap_super_t *sb) @@ -258,7 +260,7 @@ int ExamineBitmap(char *filename, int brief, struct supertype *st) int rv = 1; char buf[64]; int swap; - int fd; + int fd, i; __u32 uuid32[4]; fd = bitmap_file_open(filename, &st); @@ -315,9 +317,13 @@ int ExamineBitmap(char *filename, int brief, struct supertype *st) uuid32[2], uuid32[3]); - printf(" Events : %llu\n", (unsigned long long)sb->events); - printf(" Events Cleared : %llu\n", (unsigned long long)sb->events_cleared); - printf(" State : %s\n", bitmap_state(sb->state)); + if (sb->nodes == 0) { + printf(" Events : %llu\n", (unsigned long long)sb->events); + printf(" Events Cleared : %llu\n", (unsigned long long)sb->events_cleared); + printf(" State : %s\n", bitmap_state(sb->state)); + + } + printf(" Chunksize : %s\n", human_chunksize(sb->chunksize)); printf(" Daemon : %ds flush period\n", sb->daemon_sleep); if (sb->write_behind) @@ -327,11 +333,40 @@ int ExamineBitmap(char *filename, int brief, struct supertype *st) printf(" Write Mode : %s\n", buf); printf(" Sync Size : %llu%s\n", (unsigned long long)sb->sync_size/2, human_size(sb->sync_size * 512)); - if (brief) - goto free_info; - printf(" Bitmap : %llu bits (chunks), %llu dirty (%2.1f%%)\n", - info->total_bits, info->dirty_bits, - 100.0 * info->dirty_bits / (info->total_bits?:1)); + + if (sb->nodes == 0) { + if (brief) + goto free_info; + printf(" Bitmap : %llu bits (chunks), %llu dirty (%2.1f%%)\n", + info->total_bits, info->dirty_bits, + 100.0 * info->dirty_bits / (info->total_bits?:1)); + } else { + printf(" Cluster nodes : %d\n", sb->nodes); + printf(" Cluster name : %64s\n", sb->cluster_name); + for (i = 0; i < (int)sb->nodes; i++) { + if (i) { + free(info); + info = bitmap_fd_read(fd, brief); + sb = &info->sb; + } + if (sb->magic != BITMAP_MAGIC) + pr_err("invalid bitmap magic 0x%x, the bitmap file appears to be corrupted\n", sb->magic); + + printf(" Node Slot : %d\n", i); + printf(" Events : %llu\n", + (unsigned long long)sb->events); + printf(" Events Cleared : %llu\n", + (unsigned long long)sb->events_cleared); + printf(" State : %s\n", bitmap_state(sb->state)); + if (brief) + continue; + printf(" Bitmap : %llu bits (chunks), %llu dirty (%2.1f%%)\n", + info->total_bits, info->dirty_bits, + 100.0 * info->dirty_bits / (info->total_bits?:1)); + + } + } + free_info: free(info); return rv; diff --git a/bitmap.h b/bitmap.h index c8725a3..adbf0b4 100644 --- a/bitmap.h +++ b/bitmap.h @@ -154,8 +154,11 @@ typedef struct bitmap_super_s { __u32 chunksize; /* 52 the bitmap chunk size in bytes */ __u32 daemon_sleep; /* 56 seconds between disk flushes */ __u32 write_behind; /* 60 number of outstanding write-behind writes */ - - __u8 pad[256 - 64]; /* set to zero */ + __u32 sectors_reserved; /* 64 number of 512-byte sectors that are + * reserved for the bitmap. */ + __u32 nodes; /* 68 the maximum number of nodes in cluster. */ + __u8 cluster_name[64]; /* 72 cluster name to which this md belongs */ + __u8 pad[256 - 136]; /* set to zero */ } bitmap_super_t; /* notes: diff --git a/config.c b/config.c index a882ed3..c58c8fe 100644 --- a/config.c +++ b/config.c @@ -77,7 +77,7 @@ char DefaultAltConfFile[] = CONFFILE2; char DefaultAltConfDir[] = CONFFILE2 ".d"; enum linetype { Devices, Array, Mailaddr, Mailfrom, Program, CreateDev, - Homehost, AutoMode, Policy, PartPolicy, LTEnd }; + Homehost, HomeCluster, AutoMode, Policy, PartPolicy, LTEnd }; char *keywords[] = { [Devices] = "devices", [Array] = "array", @@ -86,6 +86,7 @@ char *keywords[] = { [Program] = "program", [CreateDev]= "create", [Homehost] = "homehost", + [HomeCluster] = "homecluster", [AutoMode] = "auto", [Policy] = "policy", [PartPolicy]="part-policy", @@ -562,6 +563,21 @@ void homehostline(char *line) } } +static char *home_cluster = NULL; +void homeclusterline(char *line) +{ + char *w; + + for (w=dl_next(line); w != line ; w=dl_next(w)) { + if (home_cluster == NULL) { + if (strcasecmp(w, "")==0) + home_cluster = xstrdup(""); + else + home_cluster = xstrdup(w); + } + } +} + char auto_yes[] = "yes"; char auto_no[] = "no"; char auto_homehost[] = "homehost"; @@ -724,6 +740,9 @@ void conf_file(FILE *f) case Homehost: homehostline(line); break; + case HomeCluster: + homeclusterline(line); + break; case AutoMode: autoline(line); break; @@ -884,6 +903,12 @@ char *conf_get_homehost(int *require_homehostp) return home_host; } +char *conf_get_homecluster(void) +{ + load_conffile(); + return home_cluster; +} + struct createinfo *conf_get_create_info(void) { load_conffile(); diff --git a/md_p.h b/md_p.h index c4846ba..9b6b5f8 100644 --- a/md_p.h +++ b/md_p.h @@ -78,6 +78,12 @@ #define MD_DISK_ACTIVE 1 /* disk is running but may not be in sync */ #define MD_DISK_SYNC 2 /* disk is in sync with the raid set */ #define MD_DISK_REMOVED 3 /* disk is in sync with the raid set */ +#define MD_DISK_CLUSTER_ADD 4 /* Initiate a disk add across the cluster + * For clustered enviroments only. + */ +#define MD_DISK_CANDIDATE 5 /* disk is added as spare (local) until confirmed + * For clustered enviroments only. + */ #define MD_DISK_WRITEMOSTLY 9 /* disk is "write-mostly" is RAID1 config. * read requests will only be sent here in @@ -106,6 +112,7 @@ typedef struct mdp_device_descriptor_s { #define MD_SB_BLOCK_CONTAINER_RESHAPE 3 /* block container wide reshapes */ #define MD_SB_BLOCK_VOLUME 4 /* block activation of array, other arrays * in container can be activated */ +#define MD_SB_CLUSTERED 5 /* MD is clustered */ #define MD_SB_BITMAP_PRESENT 8 /* bitmap may be present nearby */ typedef struct mdp_superblock_s { diff --git a/md_u.h b/md_u.h index be9868a..76068d6 100644 --- a/md_u.h +++ b/md_u.h @@ -44,6 +44,7 @@ #define STOP_ARRAY _IO (MD_MAJOR, 0x32) #define STOP_ARRAY_RO _IO (MD_MAJOR, 0x33) #define RESTART_ARRAY_RW _IO (MD_MAJOR, 0x34) +#define CLUSTERED_DISK_NACK _IO (MD_MAJOR, 0x35) typedef struct mdu_version_s { int major; diff --git a/mdadm.8.in b/mdadm.8.in index 912ee4c..e1ea9a8 100644 --- a/mdadm.8.in +++ b/mdadm.8.in @@ -422,6 +422,12 @@ This functionality is currently only provided by and .BR \-\-monitor . +.TP +.B \-\-home\-cluster= +specifies the cluster name for the md device. The md device can be assembled +only on the cluster which matches the name specified. If this option is not +provided, mdadm tries to detect the cluster name automatically. + .SH For create, build, or grow: .TP @@ -701,7 +707,12 @@ and so is replicated on all devices. If the word .B "none" is given with .B \-\-grow -mode, then any bitmap that is present is removed. +mode, then any bitmap that is present is removed. If the word +.B "clustered" +is given, the array is created for a clustered environment. One bitmap +is created for each node as defined by the +.B \-\-nodes +parameter and are stored internally. To help catch typing errors, the filename must contain at least one slash ('/') if it is a real file (not 'internal' or 'none'). @@ -973,6 +984,12 @@ However for RAID0, it is not possible to add spares. So to increase the number of devices in a RAID0, it is necessary to set the new number of devices, and to add the new devices, in the same command. +.TP +.BR \-\-nodes +Only works when the array is for clustered environment. It specifies +the maximum number of nodes in the cluster that will use this device +simultaneously. If not specified, this defaults to 4. + .SH For assemble: .TP @@ -1087,7 +1104,9 @@ argument given to this flag can be one of .BR summaries , .BR uuid , .BR name , +.BR nodes , .BR homehost , +.BR home-cluster , .BR resync , .BR byteorder , .BR devicesize , @@ -1138,6 +1157,13 @@ The .B name option will change the .I name +of the array as stored in the superblock and bitmap. This option only +works for clustered environment. + +The +.B nodes +option will change the +.I nodes of the array as stored in the superblock. This is only supported for version-1 superblocks. @@ -1149,6 +1175,11 @@ as recorded in the superblock. For version-0 superblocks, this is the same as updating the UUID. For version-1 superblocks, this involves updating the name. +The +.B home\-cluster +option will change the cluster name as recorded in the superblock and +bitmap. This option only works for clustered environment. + The .B resync option will cause the array to be marked @@ -1396,6 +1427,15 @@ will avoid reading from these devices if possible. .BR \-\-readwrite Subsequent devices that are added or re\-added will have the 'write-mostly' flag cleared. +.TP +.BR \-\-cluster\-confirm +Confirm the existence of the device. This is issued in response to an \-\-add +request by a node in a cluster. When a node adds a device it sends a message +to all nodes in the cluster to look for a device with a UUID. This translates +to a udev notification with the UUID of the device to be added and the slot +number. The receiving node must acknowledge this message +with \-\-cluster\-confirm. Valid arguments are : in case +the device is found or :missing in case the device is not found. .P Each of these options requires that the first device listed is the array diff --git a/mdadm.c b/mdadm.c index 93732a8..dcb49d0 100644 --- a/mdadm.c +++ b/mdadm.c @@ -196,6 +196,7 @@ int main(int argc, char *argv[]) case 'f': case Fail: case ReAdd: /* re-add */ + case ClusterConfirm: if (!mode) { newmode = MANAGE; shortopt = short_bitmap_options; @@ -588,7 +589,23 @@ int main(int argc, char *argv[]) } ident.raid_disks = s.raiddisks; continue; - + case O(ASSEMBLE, Nodes): + case O(CREATE, Nodes): + c.nodes = parse_num(optarg); + if (c.nodes <= 0) { + pr_err("invalid number for the number of cluster nodes: %s\n", + optarg); + exit(2); + } + continue; + case O(CREATE, ClusterName): + case O(ASSEMBLE, ClusterName): + c.homecluster = optarg; + if (strlen(c.homecluster) > 64) { + pr_err("Cluster name too big.\n"); + exit(ERANGE); + } + continue; case O(CREATE,'x'): /* number of spare (eXtra) disks */ if (s.sparedisks) { pr_err("spare-devices set twice: %d and %s\n", @@ -726,6 +743,10 @@ int main(int argc, char *argv[]) continue; if (strcmp(c.update, "homehost")==0) continue; + if (strcmp(c.update, "home-cluster")==0) + continue; + if (strcmp(c.update, "nodes")==0) + continue; if (strcmp(c.update, "devicesize")==0) continue; if (strcmp(c.update, "no-bitmap")==0) @@ -764,8 +785,8 @@ int main(int argc, char *argv[]) Name, c.update); } fprintf(outf, "Valid --update options are:\n" - " 'sparc2.2', 'super-minor', 'uuid', 'name', 'resync',\n" - " 'summaries', 'homehost', 'byteorder', 'devicesize',\n" + " 'sparc2.2', 'super-minor', 'uuid', 'name', 'nodes', 'resync',\n" + " 'summaries', 'homehost', 'home-cluster', 'byteorder', 'devicesize',\n" " 'no-bitmap', 'metadata', 'revert-reshape'\n" " 'bbl', 'no-bbl'\n" ); @@ -919,6 +940,9 @@ int main(int argc, char *argv[]) * remove the device */ devmode = 'f'; continue; + case O(MANAGE, ClusterConfirm): + devmode = 'c'; + continue; case O(MANAGE,Replace): /* Mark these devices for replacement */ devmode = 'R'; @@ -1097,6 +1121,15 @@ int main(int argc, char *argv[]) s.bitmap_file = optarg; continue; } + if (strcmp(optarg, "clustered")== 0) { + s.bitmap_file = optarg; + /* Set the default number of cluster nodes + * to 4 if not already set by user + */ + if (c.nodes < 1) + c.nodes = 4; + continue; + } /* probable typo */ pr_err("bitmap file must contain a '/', or be 'internal', or 'none'\n" " not '%s'\n", optarg); @@ -1260,6 +1293,16 @@ int main(int argc, char *argv[]) c.require_homehost = 0; } + if (c.homecluster == NULL && (c.nodes > 0)) { + c.homecluster = conf_get_homecluster(); + if (c.homecluster == NULL) + rv = get_cluster_name(&c.homecluster); + if (rv != 0) { + pr_err("The md can't get cluster name\n"); + exit(1); + } + } + if (c.backup_file && data_offset != INVALID_SECTORS) { pr_err("--backup-file and --data-offset are incompatible\n"); exit(2); @@ -1377,6 +1420,21 @@ int main(int argc, char *argv[]) case CREATE: if (c.delay == 0) c.delay = DEFAULT_BITMAP_DELAY; + + if (c.nodes) { + if (!s.bitmap_file || strcmp(s.bitmap_file, "clustered") != 0) { + pr_err("--nodes argument only compatible with --bitmap=clustered\n"); + rv = 1; + break; + } + + if (s.level != 1) { + pr_err("--bitmap=clustered is currently supported with RAID mirror only\n"); + rv = 1; + break; + } + } + if (s.write_behind && !s.bitmap_file) { pr_err("write-behind mode requires a bitmap.\n"); rv = 1; diff --git a/mdadm.h b/mdadm.h index 9df43d4..54a84ef 100644 --- a/mdadm.h +++ b/mdadm.h @@ -344,6 +344,9 @@ enum special_options { Dump, Restore, Action, + Nodes, + ClusterName, + ClusterConfirm, }; enum prefix_standard { @@ -351,6 +354,12 @@ enum prefix_standard { IEC }; +enum bitmap_update { + NoUpdate, + NameUpdate, + NodeNumUpdate, +}; + /* structures read from config file */ /* List of mddevice names and identifiers * Identifiers can be: @@ -418,6 +427,8 @@ struct context { char *backup_file; int invalid_backup; char *action; + int nodes; + char *homecluster; }; struct shape { @@ -844,7 +855,7 @@ extern struct superswitch { /* if add_internal_bitmap succeeded for existing array, this * writes it out. */ - int (*write_bitmap)(struct supertype *st, int fd); + int (*write_bitmap)(struct supertype *st, int fd, enum bitmap_update update); /* Free the superblock and any other allocated data */ void (*free_super)(struct supertype *st); @@ -1028,6 +1039,8 @@ struct supertype { */ int devcnt; int retry_soon; + int nodes; + char *cluster_name; struct mdinfo *devs; @@ -1274,6 +1287,7 @@ extern int parse_uuid(char *str, int uuid[4]); extern int parse_layout_10(char *layout); extern int parse_layout_faulty(char *layout); extern long parse_num(char *num); +extern int parse_cluster_confirm_arg(char *inp, char **devname, int *slot); extern int check_ext2(int fd, char *name); extern int check_reiser(int fd, char *name); extern int check_raid(int fd, char *name); @@ -1304,6 +1318,7 @@ extern char *conf_get_mailaddr(void); extern char *conf_get_mailfrom(void); extern char *conf_get_program(void); extern char *conf_get_homehost(int *require_homehostp); +extern char *conf_get_homecluster(void); extern char *conf_line(FILE *file); extern char *conf_word(FILE *file, int allow_key); extern void print_quoted(char *str); @@ -1412,6 +1427,7 @@ extern char *stat2devnm(struct stat *st); extern char *fd2devnm(int fd); extern int in_initrd(void); +extern int get_cluster_name(char **name); #define _ROUND_UP(val, base) (((val) + (base) - 1) & ~(base - 1)) #define ROUND_UP(val, base) _ROUND_UP(val, (typeof(val))(base)) diff --git a/super0.c b/super0.c index deb5999..6ad9d39 100644 --- a/super0.c +++ b/super0.c @@ -900,7 +900,7 @@ static int write_init_super0(struct supertype *st) rv = store_super0(st, di->fd); if (rv == 0 && (sb->state & (1<ss->write_bitmap(st, di->fd); + rv = st->ss->write_bitmap(st, di->fd, NoUpdate); if (rv) pr_err("failed to write superblock to %s\n", @@ -1175,7 +1175,7 @@ static void locate_bitmap0(struct supertype *st, int fd) lseek64(fd, offset, 0); } -static int write_bitmap0(struct supertype *st, int fd) +static int write_bitmap0(struct supertype *st, int fd, enum bitmap_update update) { unsigned long long dsize; unsigned long long offset; diff --git a/super1.c b/super1.c index f0508fe..fda71e3 100644 --- a/super1.c +++ b/super1.c @@ -134,6 +134,20 @@ struct misc_dev_info { |MD_FEATURE_NEW_OFFSET \ ) +/* return how many bytes are needed for bitmap, for cluster-md each node + * should have it's own bitmap */ +static unsigned int calc_bitmap_size(bitmap_super_t *bms, unsigned int boundary) +{ + unsigned long long bits, bytes; + + bits = __le64_to_cpu(bms->sync_size) / (__le32_to_cpu(bms->chunksize)>>9); + bytes = (bits+7) >> 3; + bytes += sizeof(bitmap_super_t); + bytes = ROUND_UP(bytes, boundary); + + return bytes; +} + static unsigned int calc_sb_1_csum(struct mdp_superblock_1 * sb) { unsigned int disk_csum, csum; @@ -256,6 +270,7 @@ static int awrite(struct align_fd *afd, void *buf, int len) static void examine_super1(struct supertype *st, char *homehost) { struct mdp_superblock_1 *sb = st->sb; + bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb)+MAX_SB_SIZE); time_t atime; unsigned int d; int role; @@ -289,6 +304,8 @@ static void examine_super1(struct supertype *st, char *homehost) strncmp(sb->set_name, homehost, l) == 0) printf(" (local to host %s)", homehost); printf("\n"); + if (bms->nodes > 0) + printf("Cluster Name : %s", bms->cluster_name); atime = __le64_to_cpu(sb->ctime) & 0xFFFFFFFFFFULL; printf(" Creation Time : %.24s\n", ctime(&atime)); c=map_num(pers, __le32_to_cpu(sb->level)); @@ -681,12 +698,8 @@ static int copy_metadata1(struct supertype *st, int from, int to) /* have the header, can calculate * correct bitmap bytes */ bitmap_super_t *bms; - int bits; bms = (void*)buf; - bits = __le64_to_cpu(bms->sync_size) / (__le32_to_cpu(bms->chunksize)>>9); - bytes = (bits+7) >> 3; - bytes += sizeof(bitmap_super_t); - bytes = ROUND_UP(bytes, 512); + bytes = calc_bitmap_size(bms, 512); if (n > bytes) n = bytes; } @@ -740,6 +753,7 @@ err: static void detail_super1(struct supertype *st, char *homehost) { struct mdp_superblock_1 *sb = st->sb; + bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb) + MAX_SB_SIZE); int i; int l = homehost ? strlen(homehost) : 0; @@ -748,6 +762,8 @@ static void detail_super1(struct supertype *st, char *homehost) sb->set_name[l] == ':' && strncmp(sb->set_name, homehost, l) == 0) printf(" (local to host %s)", homehost); + if (bms->nodes > 0) + printf("Cluster Name : %64s", bms->cluster_name); printf("\n UUID : "); for (i=0; i<16; i++) { if ((i&3)==0 && i != 0) printf(":"); @@ -891,6 +907,8 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map) info->array.state = (__le64_to_cpu(sb->resync_offset) == MaxSector) ? 1 : 0; + if (__le32_to_cpu(bsb->nodes) > 1) + info->array.state |= (1 << MD_SB_CLUSTERED); info->data_offset = __le64_to_cpu(sb->data_offset); info->component_size = __le64_to_cpu(sb->size); @@ -1689,7 +1707,7 @@ static int write_init_super1(struct supertype *st) sb->sb_csum = calc_sb_1_csum(sb); rv = store_super1(st, di->fd); if (rv == 0 && (__le32_to_cpu(sb->feature_map) & 1)) - rv = st->ss->write_bitmap(st, di->fd); + rv = st->ss->write_bitmap(st, di->fd, NoUpdate); close(di->fd); di->fd = -1; if (rv) @@ -2054,7 +2072,7 @@ add_internal_bitmap1(struct supertype *st, bbl_size = -bbl_offset; if (!may_change || (room < 3*2 && - __le32_to_cpu(sb->max_dev) <= 384)) { + __le32_to_cpu(sb->max_dev) <= 384)) { room = 3*2; offset = 1*2; bbl_size = 0; @@ -2144,6 +2162,10 @@ add_internal_bitmap1(struct supertype *st, bms->daemon_sleep = __cpu_to_le32(delay); bms->sync_size = __cpu_to_le64(size); bms->write_behind = __cpu_to_le32(write_behind); + bms->nodes = __cpu_to_le32(st->nodes); + if (st->cluster_name) + strncpy((char *)bms->cluster_name, + st->cluster_name, strlen(st->cluster_name)); *chunkp = chunk; return 1; @@ -2169,7 +2191,7 @@ static void locate_bitmap1(struct supertype *st, int fd) lseek64(fd, offset<<9, 0); } -static int write_bitmap1(struct supertype *st, int fd) +static int write_bitmap1(struct supertype *st, int fd, enum bitmap_update update) { struct mdp_superblock_1 *sb = st->sb; bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb)+MAX_SB_SIZE); @@ -2177,6 +2199,43 @@ static int write_bitmap1(struct supertype *st, int fd) void *buf; int towrite, n; struct align_fd afd; + unsigned int i = 0; + unsigned long long total_bm_space, bm_space_per_node; + + switch (update) { + case NameUpdate: + /* update cluster name */ + if (st->cluster_name) { + memset((char *)bms->cluster_name, 0, sizeof(bms->cluster_name)); + strncpy((char *)bms->cluster_name, st->cluster_name, 64); + } + break; + case NodeNumUpdate: + /* cluster md only supports superblock 1.2 now */ + if (st->minor_version != 2) { + pr_err("Warning: cluster md only works with superblock 1.2\n"); + return -EINVAL; + } + + /* Each node has an independent bitmap, it is necessary to calculate the + * space is enough or not, first get how many bytes for the total bitmap */ + bm_space_per_node = calc_bitmap_size(bms, 4096); + + total_bm_space = 512 * (__le64_to_cpu(sb->data_offset) - __le64_to_cpu(sb->super_offset)); + total_bm_space = total_bm_space - 4096; /* leave another 4k for superblock */ + + if (bm_space_per_node * st->nodes > total_bm_space) { + pr_err("Warning: The max num of nodes can't exceed %llu\n", + total_bm_space / bm_space_per_node); + return -ENOMEM; + } + + bms->nodes = __cpu_to_le32(st->nodes); + break; + case NoUpdate: + default: + break; + } init_afd(&afd, fd); @@ -2185,27 +2244,37 @@ static int write_bitmap1(struct supertype *st, int fd) if (posix_memalign(&buf, 4096, 4096)) return -ENOMEM; - memset(buf, 0xff, 4096); - memcpy(buf, (char *)bms, sizeof(bitmap_super_t)); - - towrite = __le64_to_cpu(bms->sync_size) / (__le32_to_cpu(bms->chunksize)>>9); - towrite = (towrite+7) >> 3; /* bits to bytes */ - towrite += sizeof(bitmap_super_t); - towrite = ROUND_UP(towrite, 512); - while (towrite > 0) { - n = towrite; - if (n > 4096) - n = 4096; - n = awrite(&afd, buf, n); - if (n > 0) - towrite -= n; + do { + /* Only the bitmap[0] should resync + * whole device on initial assembly + */ + if (i) + memset(buf, 0x00, 4096); else + memset(buf, 0xff, 4096); + memcpy(buf, (char *)bms, sizeof(bitmap_super_t)); + + towrite = calc_bitmap_size(bms, 4096); + while (towrite > 0) { + n = towrite; + if (n > 4096) + n = 4096; + n = awrite(&afd, buf, n); + if (n > 0) + towrite -= n; + else + break; + if (i) + memset(buf, 0x00, 4096); + else + memset(buf, 0xff, 4096); + } + fsync(fd); + if (towrite) { + rv = -2; break; - memset(buf, 0xff, 4096); - } - fsync(fd); - if (towrite) - rv = -2; + } + } while (++i < __le32_to_cpu(bms->nodes)); free(buf); return rv; diff --git a/util.c b/util.c index cc98d3b..ea6e688 100644 --- a/util.c +++ b/util.c @@ -34,6 +34,15 @@ #include #include #include +#include +#include +#ifdef NO_COROSYNC + typedef uint64_t cmap_handle_t; + #define CS_OK 1 +#else + #include +#endif + /* * following taken from linux/blkpg.h because they aren't @@ -271,6 +280,16 @@ long parse_num(char *num) } #endif +int parse_cluster_confirm_arg(char *input, char **devname, int *slot) +{ + char *dev; + *slot = strtoul(input, &dev, 10); + if (dev == input || dev[0] != ':') + return -1; + *devname = dev+1; + return 0; +} + void remove_partitions(int fd) { /* remove partitions from this block devices. @@ -1976,3 +1995,51 @@ void reopen_mddev(int mdfd) if (fd >= 0 && fd != mdfd) dup2(fd, mdfd); } + +int get_cluster_name(char **cluster_name) +{ + void *lib_handle = NULL; + int rv = -1; + + cmap_handle_t handle; + static int (*initialize)(cmap_handle_t *handle); + static int (*get_string)(cmap_handle_t handle, + const char *string, + char **name); + static int (*finalize)(cmap_handle_t handle); + + + lib_handle = dlopen("libcmap.so.4", RTLD_NOW | RTLD_LOCAL); + if (!lib_handle) + return rv; + + initialize = dlsym(lib_handle, "cmap_initialize"); + if (!initialize) + goto out; + + get_string = dlsym(lib_handle, "cmap_get_string"); + if (!get_string) + goto out; + + finalize = dlsym(lib_handle, "cmap_finalize"); + if (!finalize) + goto out; + + rv = initialize(&handle); + if (rv != CS_OK) + goto out; + + rv = get_string(handle, "totem.cluster_name", cluster_name); + if (rv != CS_OK) { + free(*cluster_name); + rv = -1; + goto name_err; + } + + rv = 0; +name_err: + finalize(handle); +out: + dlclose(lib_handle); + return rv; +}