Merge branch 'cluster'

Now that 3.3.3 is out, it is time to include the cluster-support code.

Signed-off-by: NeilBrown <neilb@suse.com>
This commit is contained in:
NeilBrown 2015-07-27 11:01:08 +10:00
commit 653299b699
18 changed files with 433 additions and 56 deletions

View File

@ -635,7 +635,13 @@ static int load_devices(struct devs *devices, char *devmap,
if (strcmp(c->update, "byteorder") == 0)
err = 0;
else
else if (strcmp(c->update, "home-cluster") == 0) {
tst->cluster_name = c->homecluster;
tst->ss->write_bitmap(tst, dfd, NameUpdate);
} else if (strcmp(c->update, "nodes") == 0) {
tst->nodes = c->nodes;
err = tst->ss->write_bitmap(tst, dfd, NodeNumUpdate);
} else
err = tst->ss->update_super(tst, content, c->update,
devname, c->verbose,
ident->uuid_set,

View File

@ -531,6 +531,8 @@ int Create(struct supertype *st, char *mddev,
st->ss->name);
warn = 1;
}
st->nodes = c->nodes;
st->cluster_name = c->homecluster;
if (warn) {
if (c->runstop!= 1) {
@ -750,7 +752,8 @@ int Create(struct supertype *st, char *mddev,
#endif
}
if (s->bitmap_file && strcmp(s->bitmap_file, "internal")==0) {
if (s->bitmap_file && (strcmp(s->bitmap_file, "internal")==0 ||
strcmp(s->bitmap_file, "clustered")==0)) {
if ((vers%100) < 2) {
pr_err("internal bitmaps not supported by this kernel.\n");
goto abort_locked;

12
Grow.c
View File

@ -330,8 +330,7 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
}
return 0;
}
pr_err("Internal bitmap already present on %s\n",
devname);
pr_err("%s bitmap already present on %s\n", s->bitmap_file, devname);
return 1;
}
@ -375,7 +374,8 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
free(st);
return 1;
}
if (strcmp(s->bitmap_file, "internal") == 0) {
if (strcmp(s->bitmap_file, "internal") == 0 ||
strcmp(s->bitmap_file, "clustered") == 0) {
int rv;
int d;
int offset_setable = 0;
@ -384,6 +384,8 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
pr_err("Internal bitmaps not supported with %s metadata\n", st->ss->name);
return 1;
}
st->nodes = c->nodes;
st->cluster_name = c->homecluster;
mdi = sysfs_read(fd, NULL, GET_BITMAP_LOCATION);
if (mdi)
offset_setable = 1;
@ -410,7 +412,7 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
bitmapsize, offset_setable,
major)
)
st->ss->write_bitmap(st, fd2);
st->ss->write_bitmap(st, fd2, NoUpdate);
else {
pr_err("failed to create internal bitmap - chunksize problem.\n");
close(fd2);
@ -426,6 +428,8 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
rv = sysfs_set_num_signed(mdi, NULL, "bitmap/location",
mdi->bitmap_offset);
} else {
if (strcmp(s->bitmap_file, "clustered") == 0)
array.state |= (1<<MD_SB_CLUSTERED);
array.state |= (1<<MD_SB_BITMAP_PRESENT);
rv = ioctl(fd, SET_ARRAY_INFO, &array);
}

View File

@ -232,6 +232,11 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
devname);
goto out;
}
/* Skip the clustered ones. This should be started by
* clustering resource agents
*/
if (info.array.state & (1 << MD_SB_CLUSTERED))
goto out;
/* 3a/ if not, check for homehost match. If no match, continue
* but don't trust the 'name' in the array. Thus a 'random' minor

View File

@ -79,10 +79,13 @@ MDMON_DIR = $(RUN_DIR)
# place for autoreplace cookies
FAILED_SLOTS_DIR = $(RUN_DIR)/failed-slots
SYSTEMD_DIR=/lib/systemd/system
COROSYNC:=$(shell [ -d /usr/include/corosync ] || echo -DNO_COROSYNC)
DIRFLAGS = -DMAP_DIR=\"$(MAP_DIR)\" -DMAP_FILE=\"$(MAP_FILE)\"
DIRFLAGS += -DMDMON_DIR=\"$(MDMON_DIR)\"
DIRFLAGS += -DFAILED_SLOTS_DIR=\"$(FAILED_SLOTS_DIR)\"
CFLAGS = $(CWFLAGS) $(CXFLAGS) -DSendmail=\""$(MAILCMD)"\" $(CONFFILEFLAGS) $(DIRFLAGS)
CFLAGS = $(CWFLAGS) $(CXFLAGS) -DSendmail=\""$(MAILCMD)"\" $(CONFFILEFLAGS) $(DIRFLAGS) $(COROSYNC)
VERSION = $(shell [ -d .git ] && git describe HEAD | sed 's/mdadm-//')
VERS_DATE = $(shell [ -d .git ] && date --date="`git log -n1 --format=format:%cd --date=short`" '+%0dth %B %Y' | sed -e 's/1th/1st/' -e 's/2th/2nd/' -e 's/11st/11th/' -e 's/12nd/12th/')
@ -101,6 +104,7 @@ endif
# If you want a static binary, you might uncomment these
# LDFLAGS = -static
# STRIP = -s
LDLIBS=-ldl
INSTALL = /usr/bin/install
DESTDIR =

View File

@ -724,7 +724,8 @@ skip_re_add:
int Manage_add(int fd, int tfd, struct mddev_dev *dv,
struct supertype *tst, mdu_array_info_t *array,
int force, int verbose, char *devname,
char *update, unsigned long rdev, unsigned long long array_size)
char *update, unsigned long rdev, unsigned long long array_size,
int raid_slot)
{
unsigned long long ldsize;
struct supertype *dev_st = NULL;
@ -914,7 +915,10 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
}
disc.major = major(rdev);
disc.minor = minor(rdev);
disc.number =j;
if (raid_slot < 0)
disc.number = j;
else
disc.number = raid_slot;
disc.state = 0;
if (array->not_persistent==0) {
int dfd;
@ -955,6 +959,14 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
}
free(used);
}
if (array->state & (1 << MD_SB_CLUSTERED)) {
if (dv->disposition == 'c')
disc.state |= (1 << MD_DISK_CANDIDATE);
else
disc.state |= (1 << MD_DISK_CLUSTER_ADD);
}
if (dv->writemostly == 1)
disc.state |= (1 << MD_DISK_WRITEMOSTLY);
if (tst->ss->external) {
@ -1274,6 +1286,7 @@ int Manage_subdevs(char *devname, int fd,
* variant on 'A'
* 'F' - Another variant of 'A', where the device was faulty
* so must be removed from the array first.
* 'c' - confirm the device as found (for clustered environments)
*
* For 'f' and 'r', the device can also be a kernel-internal
* name such as 'sdb'.
@ -1289,6 +1302,7 @@ int Manage_subdevs(char *devname, int fd,
struct mdinfo info;
int frozen = 0;
int busy = 0;
int raid_slot = -1;
if (ioctl(fd, GET_ARRAY_INFO, &array)) {
pr_err("Cannot get array info for %s\n",
@ -1317,6 +1331,17 @@ int Manage_subdevs(char *devname, int fd,
int rv;
int mj,mn;
raid_slot = -1;
if (dv->disposition == 'c') {
rv = parse_cluster_confirm_arg(dv->devname,
&dv->devname,
&raid_slot);
if (!rv) {
pr_err("Could not get the devname of cluster\n");
goto abort;
}
}
if (strcmp(dv->devname, "failed") == 0 ||
strcmp(dv->devname, "faulty") == 0) {
if (dv->disposition != 'A'
@ -1342,6 +1367,11 @@ int Manage_subdevs(char *devname, int fd,
if (strcmp(dv->devname, "missing") == 0) {
struct mddev_dev *add_devlist = NULL;
struct mddev_dev **dp;
if (dv->disposition == 'c') {
rv = ioctl(fd, CLUSTERED_DISK_NACK, NULL);
break;
}
if (dv->disposition != 'A') {
pr_err("'missing' only meaningful with --re-add\n");
goto abort;
@ -1472,6 +1502,7 @@ int Manage_subdevs(char *devname, int fd,
case 'A':
case 'M': /* --re-add missing */
case 'F': /* --re-add faulty */
case 'c': /* --cluster-confirm */
/* add the device */
if (subarray) {
pr_err("Cannot add disks to a \'member\' array, perform this operation on the parent container\n");
@ -1505,7 +1536,7 @@ int Manage_subdevs(char *devname, int fd,
}
rv = Manage_add(fd, tfd, dv, tst, &array,
force, verbose, devname, update,
rdev, array_size);
rdev, array_size, raid_slot);
close(tfd);
tfd = -1;
if (rv < 0)

View File

@ -140,6 +140,8 @@ struct option long_options[] = {
{"homehost", 1, 0, HomeHost},
{"symlinks", 1, 0, Symlinks},
{"data-offset",1, 0, DataOffset},
{"nodes",1, 0, Nodes}, /* also for --assemble */
{"home-cluster",1, 0, ClusterName},
/* For assemble */
{"uuid", 1, 0, 'u'},
@ -167,6 +169,7 @@ struct option long_options[] = {
{"wait", 0, 0, WaitOpt},
{"wait-clean", 0, 0, Waitclean },
{"action", 1, 0, Action },
{"cluster-confirm", 0, 0, ClusterConfirm},
/* For Detail/Examine */
{"brief", 0, 0, Brief},

View File

@ -32,6 +32,8 @@ static inline void sb_le_to_cpu(bitmap_super_t *sb)
sb->daemon_sleep = __le32_to_cpu(sb->daemon_sleep);
sb->sync_size = __le64_to_cpu(sb->sync_size);
sb->write_behind = __le32_to_cpu(sb->write_behind);
sb->nodes = __le32_to_cpu(sb->nodes);
sb->sectors_reserved = __le32_to_cpu(sb->sectors_reserved);
}
static inline void sb_cpu_to_le(bitmap_super_t *sb)
@ -258,7 +260,7 @@ int ExamineBitmap(char *filename, int brief, struct supertype *st)
int rv = 1;
char buf[64];
int swap;
int fd;
int fd, i;
__u32 uuid32[4];
fd = bitmap_file_open(filename, &st);
@ -315,9 +317,13 @@ int ExamineBitmap(char *filename, int brief, struct supertype *st)
uuid32[2],
uuid32[3]);
printf(" Events : %llu\n", (unsigned long long)sb->events);
printf(" Events Cleared : %llu\n", (unsigned long long)sb->events_cleared);
printf(" State : %s\n", bitmap_state(sb->state));
if (sb->nodes == 0) {
printf(" Events : %llu\n", (unsigned long long)sb->events);
printf(" Events Cleared : %llu\n", (unsigned long long)sb->events_cleared);
printf(" State : %s\n", bitmap_state(sb->state));
}
printf(" Chunksize : %s\n", human_chunksize(sb->chunksize));
printf(" Daemon : %ds flush period\n", sb->daemon_sleep);
if (sb->write_behind)
@ -327,11 +333,40 @@ int ExamineBitmap(char *filename, int brief, struct supertype *st)
printf(" Write Mode : %s\n", buf);
printf(" Sync Size : %llu%s\n", (unsigned long long)sb->sync_size/2,
human_size(sb->sync_size * 512));
if (brief)
goto free_info;
printf(" Bitmap : %llu bits (chunks), %llu dirty (%2.1f%%)\n",
info->total_bits, info->dirty_bits,
100.0 * info->dirty_bits / (info->total_bits?:1));
if (sb->nodes == 0) {
if (brief)
goto free_info;
printf(" Bitmap : %llu bits (chunks), %llu dirty (%2.1f%%)\n",
info->total_bits, info->dirty_bits,
100.0 * info->dirty_bits / (info->total_bits?:1));
} else {
printf(" Cluster nodes : %d\n", sb->nodes);
printf(" Cluster name : %64s\n", sb->cluster_name);
for (i = 0; i < (int)sb->nodes; i++) {
if (i) {
free(info);
info = bitmap_fd_read(fd, brief);
sb = &info->sb;
}
if (sb->magic != BITMAP_MAGIC)
pr_err("invalid bitmap magic 0x%x, the bitmap file appears to be corrupted\n", sb->magic);
printf(" Node Slot : %d\n", i);
printf(" Events : %llu\n",
(unsigned long long)sb->events);
printf(" Events Cleared : %llu\n",
(unsigned long long)sb->events_cleared);
printf(" State : %s\n", bitmap_state(sb->state));
if (brief)
continue;
printf(" Bitmap : %llu bits (chunks), %llu dirty (%2.1f%%)\n",
info->total_bits, info->dirty_bits,
100.0 * info->dirty_bits / (info->total_bits?:1));
}
}
free_info:
free(info);
return rv;

View File

@ -154,8 +154,11 @@ typedef struct bitmap_super_s {
__u32 chunksize; /* 52 the bitmap chunk size in bytes */
__u32 daemon_sleep; /* 56 seconds between disk flushes */
__u32 write_behind; /* 60 number of outstanding write-behind writes */
__u8 pad[256 - 64]; /* set to zero */
__u32 sectors_reserved; /* 64 number of 512-byte sectors that are
* reserved for the bitmap. */
__u32 nodes; /* 68 the maximum number of nodes in cluster. */
__u8 cluster_name[64]; /* 72 cluster name to which this md belongs */
__u8 pad[256 - 136]; /* set to zero */
} bitmap_super_t;
/* notes:

View File

@ -77,7 +77,7 @@ char DefaultAltConfFile[] = CONFFILE2;
char DefaultAltConfDir[] = CONFFILE2 ".d";
enum linetype { Devices, Array, Mailaddr, Mailfrom, Program, CreateDev,
Homehost, AutoMode, Policy, PartPolicy, LTEnd };
Homehost, HomeCluster, AutoMode, Policy, PartPolicy, LTEnd };
char *keywords[] = {
[Devices] = "devices",
[Array] = "array",
@ -86,6 +86,7 @@ char *keywords[] = {
[Program] = "program",
[CreateDev]= "create",
[Homehost] = "homehost",
[HomeCluster] = "homecluster",
[AutoMode] = "auto",
[Policy] = "policy",
[PartPolicy]="part-policy",
@ -562,6 +563,21 @@ void homehostline(char *line)
}
}
static char *home_cluster = NULL;
void homeclusterline(char *line)
{
char *w;
for (w=dl_next(line); w != line ; w=dl_next(w)) {
if (home_cluster == NULL) {
if (strcasecmp(w, "<none>")==0)
home_cluster = xstrdup("");
else
home_cluster = xstrdup(w);
}
}
}
char auto_yes[] = "yes";
char auto_no[] = "no";
char auto_homehost[] = "homehost";
@ -724,6 +740,9 @@ void conf_file(FILE *f)
case Homehost:
homehostline(line);
break;
case HomeCluster:
homeclusterline(line);
break;
case AutoMode:
autoline(line);
break;
@ -884,6 +903,12 @@ char *conf_get_homehost(int *require_homehostp)
return home_host;
}
char *conf_get_homecluster(void)
{
load_conffile();
return home_cluster;
}
struct createinfo *conf_get_create_info(void)
{
load_conffile();

7
md_p.h
View File

@ -78,6 +78,12 @@
#define MD_DISK_ACTIVE 1 /* disk is running but may not be in sync */
#define MD_DISK_SYNC 2 /* disk is in sync with the raid set */
#define MD_DISK_REMOVED 3 /* disk is in sync with the raid set */
#define MD_DISK_CLUSTER_ADD 4 /* Initiate a disk add across the cluster
* For clustered enviroments only.
*/
#define MD_DISK_CANDIDATE 5 /* disk is added as spare (local) until confirmed
* For clustered enviroments only.
*/
#define MD_DISK_WRITEMOSTLY 9 /* disk is "write-mostly" is RAID1 config.
* read requests will only be sent here in
@ -106,6 +112,7 @@ typedef struct mdp_device_descriptor_s {
#define MD_SB_BLOCK_CONTAINER_RESHAPE 3 /* block container wide reshapes */
#define MD_SB_BLOCK_VOLUME 4 /* block activation of array, other arrays
* in container can be activated */
#define MD_SB_CLUSTERED 5 /* MD is clustered */
#define MD_SB_BITMAP_PRESENT 8 /* bitmap may be present nearby */
typedef struct mdp_superblock_s {

1
md_u.h
View File

@ -44,6 +44,7 @@
#define STOP_ARRAY _IO (MD_MAJOR, 0x32)
#define STOP_ARRAY_RO _IO (MD_MAJOR, 0x33)
#define RESTART_ARRAY_RW _IO (MD_MAJOR, 0x34)
#define CLUSTERED_DISK_NACK _IO (MD_MAJOR, 0x35)
typedef struct mdu_version_s {
int major;

View File

@ -422,6 +422,12 @@ This functionality is currently only provided by
and
.BR \-\-monitor .
.TP
.B \-\-home\-cluster=
specifies the cluster name for the md device. The md device can be assembled
only on the cluster which matches the name specified. If this option is not
provided, mdadm tries to detect the cluster name automatically.
.SH For create, build, or grow:
.TP
@ -701,7 +707,12 @@ and so is replicated on all devices. If the word
.B "none"
is given with
.B \-\-grow
mode, then any bitmap that is present is removed.
mode, then any bitmap that is present is removed. If the word
.B "clustered"
is given, the array is created for a clustered environment. One bitmap
is created for each node as defined by the
.B \-\-nodes
parameter and are stored internally.
To help catch typing errors, the filename must contain at least one
slash ('/') if it is a real file (not 'internal' or 'none').
@ -973,6 +984,12 @@ However for RAID0, it is not possible to add spares. So to increase
the number of devices in a RAID0, it is necessary to set the new
number of devices, and to add the new devices, in the same command.
.TP
.BR \-\-nodes
Only works when the array is for clustered environment. It specifies
the maximum number of nodes in the cluster that will use this device
simultaneously. If not specified, this defaults to 4.
.SH For assemble:
.TP
@ -1087,7 +1104,9 @@ argument given to this flag can be one of
.BR summaries ,
.BR uuid ,
.BR name ,
.BR nodes ,
.BR homehost ,
.BR home-cluster ,
.BR resync ,
.BR byteorder ,
.BR devicesize ,
@ -1138,6 +1157,13 @@ The
.B name
option will change the
.I name
of the array as stored in the superblock and bitmap. This option only
works for clustered environment.
The
.B nodes
option will change the
.I nodes
of the array as stored in the superblock. This is only supported for
version-1 superblocks.
@ -1149,6 +1175,11 @@ as recorded in the superblock. For version-0 superblocks, this is the
same as updating the UUID.
For version-1 superblocks, this involves updating the name.
The
.B home\-cluster
option will change the cluster name as recorded in the superblock and
bitmap. This option only works for clustered environment.
The
.B resync
option will cause the array to be marked
@ -1396,6 +1427,15 @@ will avoid reading from these devices if possible.
.BR \-\-readwrite
Subsequent devices that are added or re\-added will have the 'write-mostly'
flag cleared.
.TP
.BR \-\-cluster\-confirm
Confirm the existence of the device. This is issued in response to an \-\-add
request by a node in a cluster. When a node adds a device it sends a message
to all nodes in the cluster to look for a device with a UUID. This translates
to a udev notification with the UUID of the device to be added and the slot
number. The receiving node must acknowledge this message
with \-\-cluster\-confirm. Valid arguments are <slot>:<devicename> in case
the device is found or <slot>:missing in case the device is not found.
.P
Each of these options requires that the first device listed is the array

64
mdadm.c
View File

@ -196,6 +196,7 @@ int main(int argc, char *argv[])
case 'f':
case Fail:
case ReAdd: /* re-add */
case ClusterConfirm:
if (!mode) {
newmode = MANAGE;
shortopt = short_bitmap_options;
@ -588,7 +589,23 @@ int main(int argc, char *argv[])
}
ident.raid_disks = s.raiddisks;
continue;
case O(ASSEMBLE, Nodes):
case O(CREATE, Nodes):
c.nodes = parse_num(optarg);
if (c.nodes <= 0) {
pr_err("invalid number for the number of cluster nodes: %s\n",
optarg);
exit(2);
}
continue;
case O(CREATE, ClusterName):
case O(ASSEMBLE, ClusterName):
c.homecluster = optarg;
if (strlen(c.homecluster) > 64) {
pr_err("Cluster name too big.\n");
exit(ERANGE);
}
continue;
case O(CREATE,'x'): /* number of spare (eXtra) disks */
if (s.sparedisks) {
pr_err("spare-devices set twice: %d and %s\n",
@ -726,6 +743,10 @@ int main(int argc, char *argv[])
continue;
if (strcmp(c.update, "homehost")==0)
continue;
if (strcmp(c.update, "home-cluster")==0)
continue;
if (strcmp(c.update, "nodes")==0)
continue;
if (strcmp(c.update, "devicesize")==0)
continue;
if (strcmp(c.update, "no-bitmap")==0)
@ -764,8 +785,8 @@ int main(int argc, char *argv[])
Name, c.update);
}
fprintf(outf, "Valid --update options are:\n"
" 'sparc2.2', 'super-minor', 'uuid', 'name', 'resync',\n"
" 'summaries', 'homehost', 'byteorder', 'devicesize',\n"
" 'sparc2.2', 'super-minor', 'uuid', 'name', 'nodes', 'resync',\n"
" 'summaries', 'homehost', 'home-cluster', 'byteorder', 'devicesize',\n"
" 'no-bitmap', 'metadata', 'revert-reshape'\n"
" 'bbl', 'no-bbl'\n"
);
@ -919,6 +940,9 @@ int main(int argc, char *argv[])
* remove the device */
devmode = 'f';
continue;
case O(MANAGE, ClusterConfirm):
devmode = 'c';
continue;
case O(MANAGE,Replace):
/* Mark these devices for replacement */
devmode = 'R';
@ -1097,6 +1121,15 @@ int main(int argc, char *argv[])
s.bitmap_file = optarg;
continue;
}
if (strcmp(optarg, "clustered")== 0) {
s.bitmap_file = optarg;
/* Set the default number of cluster nodes
* to 4 if not already set by user
*/
if (c.nodes < 1)
c.nodes = 4;
continue;
}
/* probable typo */
pr_err("bitmap file must contain a '/', or be 'internal', or 'none'\n"
" not '%s'\n", optarg);
@ -1260,6 +1293,16 @@ int main(int argc, char *argv[])
c.require_homehost = 0;
}
if (c.homecluster == NULL && (c.nodes > 0)) {
c.homecluster = conf_get_homecluster();
if (c.homecluster == NULL)
rv = get_cluster_name(&c.homecluster);
if (rv != 0) {
pr_err("The md can't get cluster name\n");
exit(1);
}
}
if (c.backup_file && data_offset != INVALID_SECTORS) {
pr_err("--backup-file and --data-offset are incompatible\n");
exit(2);
@ -1377,6 +1420,21 @@ int main(int argc, char *argv[])
case CREATE:
if (c.delay == 0)
c.delay = DEFAULT_BITMAP_DELAY;
if (c.nodes) {
if (!s.bitmap_file || strcmp(s.bitmap_file, "clustered") != 0) {
pr_err("--nodes argument only compatible with --bitmap=clustered\n");
rv = 1;
break;
}
if (s.level != 1) {
pr_err("--bitmap=clustered is currently supported with RAID mirror only\n");
rv = 1;
break;
}
}
if (s.write_behind && !s.bitmap_file) {
pr_err("write-behind mode requires a bitmap.\n");
rv = 1;

18
mdadm.h
View File

@ -344,6 +344,9 @@ enum special_options {
Dump,
Restore,
Action,
Nodes,
ClusterName,
ClusterConfirm,
};
enum prefix_standard {
@ -351,6 +354,12 @@ enum prefix_standard {
IEC
};
enum bitmap_update {
NoUpdate,
NameUpdate,
NodeNumUpdate,
};
/* structures read from config file */
/* List of mddevice names and identifiers
* Identifiers can be:
@ -418,6 +427,8 @@ struct context {
char *backup_file;
int invalid_backup;
char *action;
int nodes;
char *homecluster;
};
struct shape {
@ -844,7 +855,7 @@ extern struct superswitch {
/* if add_internal_bitmap succeeded for existing array, this
* writes it out.
*/
int (*write_bitmap)(struct supertype *st, int fd);
int (*write_bitmap)(struct supertype *st, int fd, enum bitmap_update update);
/* Free the superblock and any other allocated data */
void (*free_super)(struct supertype *st);
@ -1028,6 +1039,8 @@ struct supertype {
*/
int devcnt;
int retry_soon;
int nodes;
char *cluster_name;
struct mdinfo *devs;
@ -1274,6 +1287,7 @@ extern int parse_uuid(char *str, int uuid[4]);
extern int parse_layout_10(char *layout);
extern int parse_layout_faulty(char *layout);
extern long parse_num(char *num);
extern int parse_cluster_confirm_arg(char *inp, char **devname, int *slot);
extern int check_ext2(int fd, char *name);
extern int check_reiser(int fd, char *name);
extern int check_raid(int fd, char *name);
@ -1304,6 +1318,7 @@ extern char *conf_get_mailaddr(void);
extern char *conf_get_mailfrom(void);
extern char *conf_get_program(void);
extern char *conf_get_homehost(int *require_homehostp);
extern char *conf_get_homecluster(void);
extern char *conf_line(FILE *file);
extern char *conf_word(FILE *file, int allow_key);
extern void print_quoted(char *str);
@ -1412,6 +1427,7 @@ extern char *stat2devnm(struct stat *st);
extern char *fd2devnm(int fd);
extern int in_initrd(void);
extern int get_cluster_name(char **name);
#define _ROUND_UP(val, base) (((val) + (base) - 1) & ~(base - 1))
#define ROUND_UP(val, base) _ROUND_UP(val, (typeof(val))(base))

View File

@ -900,7 +900,7 @@ static int write_init_super0(struct supertype *st)
rv = store_super0(st, di->fd);
if (rv == 0 && (sb->state & (1<<MD_SB_BITMAP_PRESENT)))
rv = st->ss->write_bitmap(st, di->fd);
rv = st->ss->write_bitmap(st, di->fd, NoUpdate);
if (rv)
pr_err("failed to write superblock to %s\n",
@ -1175,7 +1175,7 @@ static void locate_bitmap0(struct supertype *st, int fd)
lseek64(fd, offset, 0);
}
static int write_bitmap0(struct supertype *st, int fd)
static int write_bitmap0(struct supertype *st, int fd, enum bitmap_update update)
{
unsigned long long dsize;
unsigned long long offset;

123
super1.c
View File

@ -134,6 +134,20 @@ struct misc_dev_info {
|MD_FEATURE_NEW_OFFSET \
)
/* return how many bytes are needed for bitmap, for cluster-md each node
* should have it's own bitmap */
static unsigned int calc_bitmap_size(bitmap_super_t *bms, unsigned int boundary)
{
unsigned long long bits, bytes;
bits = __le64_to_cpu(bms->sync_size) / (__le32_to_cpu(bms->chunksize)>>9);
bytes = (bits+7) >> 3;
bytes += sizeof(bitmap_super_t);
bytes = ROUND_UP(bytes, boundary);
return bytes;
}
static unsigned int calc_sb_1_csum(struct mdp_superblock_1 * sb)
{
unsigned int disk_csum, csum;
@ -256,6 +270,7 @@ static int awrite(struct align_fd *afd, void *buf, int len)
static void examine_super1(struct supertype *st, char *homehost)
{
struct mdp_superblock_1 *sb = st->sb;
bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb)+MAX_SB_SIZE);
time_t atime;
unsigned int d;
int role;
@ -289,6 +304,8 @@ static void examine_super1(struct supertype *st, char *homehost)
strncmp(sb->set_name, homehost, l) == 0)
printf(" (local to host %s)", homehost);
printf("\n");
if (bms->nodes > 0)
printf("Cluster Name : %s", bms->cluster_name);
atime = __le64_to_cpu(sb->ctime) & 0xFFFFFFFFFFULL;
printf(" Creation Time : %.24s\n", ctime(&atime));
c=map_num(pers, __le32_to_cpu(sb->level));
@ -681,12 +698,8 @@ static int copy_metadata1(struct supertype *st, int from, int to)
/* have the header, can calculate
* correct bitmap bytes */
bitmap_super_t *bms;
int bits;
bms = (void*)buf;
bits = __le64_to_cpu(bms->sync_size) / (__le32_to_cpu(bms->chunksize)>>9);
bytes = (bits+7) >> 3;
bytes += sizeof(bitmap_super_t);
bytes = ROUND_UP(bytes, 512);
bytes = calc_bitmap_size(bms, 512);
if (n > bytes)
n = bytes;
}
@ -740,6 +753,7 @@ err:
static void detail_super1(struct supertype *st, char *homehost)
{
struct mdp_superblock_1 *sb = st->sb;
bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb) + MAX_SB_SIZE);
int i;
int l = homehost ? strlen(homehost) : 0;
@ -748,6 +762,8 @@ static void detail_super1(struct supertype *st, char *homehost)
sb->set_name[l] == ':' &&
strncmp(sb->set_name, homehost, l) == 0)
printf(" (local to host %s)", homehost);
if (bms->nodes > 0)
printf("Cluster Name : %64s", bms->cluster_name);
printf("\n UUID : ");
for (i=0; i<16; i++) {
if ((i&3)==0 && i != 0) printf(":");
@ -891,6 +907,8 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
info->array.state =
(__le64_to_cpu(sb->resync_offset) == MaxSector)
? 1 : 0;
if (__le32_to_cpu(bsb->nodes) > 1)
info->array.state |= (1 << MD_SB_CLUSTERED);
info->data_offset = __le64_to_cpu(sb->data_offset);
info->component_size = __le64_to_cpu(sb->size);
@ -1689,7 +1707,7 @@ static int write_init_super1(struct supertype *st)
sb->sb_csum = calc_sb_1_csum(sb);
rv = store_super1(st, di->fd);
if (rv == 0 && (__le32_to_cpu(sb->feature_map) & 1))
rv = st->ss->write_bitmap(st, di->fd);
rv = st->ss->write_bitmap(st, di->fd, NoUpdate);
close(di->fd);
di->fd = -1;
if (rv)
@ -2054,7 +2072,7 @@ add_internal_bitmap1(struct supertype *st,
bbl_size = -bbl_offset;
if (!may_change || (room < 3*2 &&
__le32_to_cpu(sb->max_dev) <= 384)) {
__le32_to_cpu(sb->max_dev) <= 384)) {
room = 3*2;
offset = 1*2;
bbl_size = 0;
@ -2144,6 +2162,10 @@ add_internal_bitmap1(struct supertype *st,
bms->daemon_sleep = __cpu_to_le32(delay);
bms->sync_size = __cpu_to_le64(size);
bms->write_behind = __cpu_to_le32(write_behind);
bms->nodes = __cpu_to_le32(st->nodes);
if (st->cluster_name)
strncpy((char *)bms->cluster_name,
st->cluster_name, strlen(st->cluster_name));
*chunkp = chunk;
return 1;
@ -2169,7 +2191,7 @@ static void locate_bitmap1(struct supertype *st, int fd)
lseek64(fd, offset<<9, 0);
}
static int write_bitmap1(struct supertype *st, int fd)
static int write_bitmap1(struct supertype *st, int fd, enum bitmap_update update)
{
struct mdp_superblock_1 *sb = st->sb;
bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb)+MAX_SB_SIZE);
@ -2177,6 +2199,43 @@ static int write_bitmap1(struct supertype *st, int fd)
void *buf;
int towrite, n;
struct align_fd afd;
unsigned int i = 0;
unsigned long long total_bm_space, bm_space_per_node;
switch (update) {
case NameUpdate:
/* update cluster name */
if (st->cluster_name) {
memset((char *)bms->cluster_name, 0, sizeof(bms->cluster_name));
strncpy((char *)bms->cluster_name, st->cluster_name, 64);
}
break;
case NodeNumUpdate:
/* cluster md only supports superblock 1.2 now */
if (st->minor_version != 2) {
pr_err("Warning: cluster md only works with superblock 1.2\n");
return -EINVAL;
}
/* Each node has an independent bitmap, it is necessary to calculate the
* space is enough or not, first get how many bytes for the total bitmap */
bm_space_per_node = calc_bitmap_size(bms, 4096);
total_bm_space = 512 * (__le64_to_cpu(sb->data_offset) - __le64_to_cpu(sb->super_offset));
total_bm_space = total_bm_space - 4096; /* leave another 4k for superblock */
if (bm_space_per_node * st->nodes > total_bm_space) {
pr_err("Warning: The max num of nodes can't exceed %llu\n",
total_bm_space / bm_space_per_node);
return -ENOMEM;
}
bms->nodes = __cpu_to_le32(st->nodes);
break;
case NoUpdate:
default:
break;
}
init_afd(&afd, fd);
@ -2185,27 +2244,37 @@ static int write_bitmap1(struct supertype *st, int fd)
if (posix_memalign(&buf, 4096, 4096))
return -ENOMEM;
memset(buf, 0xff, 4096);
memcpy(buf, (char *)bms, sizeof(bitmap_super_t));
towrite = __le64_to_cpu(bms->sync_size) / (__le32_to_cpu(bms->chunksize)>>9);
towrite = (towrite+7) >> 3; /* bits to bytes */
towrite += sizeof(bitmap_super_t);
towrite = ROUND_UP(towrite, 512);
while (towrite > 0) {
n = towrite;
if (n > 4096)
n = 4096;
n = awrite(&afd, buf, n);
if (n > 0)
towrite -= n;
do {
/* Only the bitmap[0] should resync
* whole device on initial assembly
*/
if (i)
memset(buf, 0x00, 4096);
else
memset(buf, 0xff, 4096);
memcpy(buf, (char *)bms, sizeof(bitmap_super_t));
towrite = calc_bitmap_size(bms, 4096);
while (towrite > 0) {
n = towrite;
if (n > 4096)
n = 4096;
n = awrite(&afd, buf, n);
if (n > 0)
towrite -= n;
else
break;
if (i)
memset(buf, 0x00, 4096);
else
memset(buf, 0xff, 4096);
}
fsync(fd);
if (towrite) {
rv = -2;
break;
memset(buf, 0xff, 4096);
}
fsync(fd);
if (towrite)
rv = -2;
}
} while (++i < __le32_to_cpu(bms->nodes));
free(buf);
return rv;

67
util.c
View File

@ -34,6 +34,15 @@
#include <ctype.h>
#include <dirent.h>
#include <signal.h>
#include <dlfcn.h>
#include <stdint.h>
#ifdef NO_COROSYNC
typedef uint64_t cmap_handle_t;
#define CS_OK 1
#else
#include <corosync/cmap.h>
#endif
/*
* following taken from linux/blkpg.h because they aren't
@ -271,6 +280,16 @@ long parse_num(char *num)
}
#endif
int parse_cluster_confirm_arg(char *input, char **devname, int *slot)
{
char *dev;
*slot = strtoul(input, &dev, 10);
if (dev == input || dev[0] != ':')
return -1;
*devname = dev+1;
return 0;
}
void remove_partitions(int fd)
{
/* remove partitions from this block devices.
@ -1976,3 +1995,51 @@ void reopen_mddev(int mdfd)
if (fd >= 0 && fd != mdfd)
dup2(fd, mdfd);
}
int get_cluster_name(char **cluster_name)
{
void *lib_handle = NULL;
int rv = -1;
cmap_handle_t handle;
static int (*initialize)(cmap_handle_t *handle);
static int (*get_string)(cmap_handle_t handle,
const char *string,
char **name);
static int (*finalize)(cmap_handle_t handle);
lib_handle = dlopen("libcmap.so.4", RTLD_NOW | RTLD_LOCAL);
if (!lib_handle)
return rv;
initialize = dlsym(lib_handle, "cmap_initialize");
if (!initialize)
goto out;
get_string = dlsym(lib_handle, "cmap_get_string");
if (!get_string)
goto out;
finalize = dlsym(lib_handle, "cmap_finalize");
if (!finalize)
goto out;
rv = initialize(&handle);
if (rv != CS_OK)
goto out;
rv = get_string(handle, "totem.cluster_name", cluster_name);
if (rv != CS_OK) {
free(*cluster_name);
rv = -1;
goto name_err;
}
rv = 0;
name_err:
finalize(handle);
out:
dlclose(lib_handle);
return rv;
}