Grow: add raid10 reshape.

RAID10 reshape requires that data_offset be changed.
So we only allow it if the new_data_offset attribute is available,
and we compute a suitable change in data offset.

Signed-off-by: NeilBrown <neilb@suse.de>
This commit is contained in:
NeilBrown 2012-10-04 16:34:21 +10:00
parent b48e2e25c4
commit 19ceb16daf
1 changed files with 318 additions and 38 deletions

356
Grow.c
View File

@ -985,7 +985,9 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
* This can be called as part of starting a reshape, or
* when assembling an array that is undergoing reshape.
*/
int near, far, offset, copies;
int new_disks;
int old_chunk, new_chunk;
/* delta_parity records change in number of devices
* caused by level change
*/
@ -1072,38 +1074,90 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
return "Impossibly level change request for RAID1";
case 10:
/* RAID10 can only be converted from near mode to
* RAID0 by removing some devices
/* RAID10 can be converted from near mode to
* RAID0 by removing some devices.
* It can also be reshaped if the kernel supports
* new_data_offset.
*/
if ((info->array.layout & ~0xff) != 0x100)
return "Cannot Grow RAID10 with far/offset layout";
/* number of devices must be multiple of number of copies */
if (info->array.raid_disks % (info->array.layout & 0xff))
return "RAID10 layout too complex for Grow operation";
switch (info->new_level) {
case 0:
if ((info->array.layout & ~0xff) != 0x100)
return "Cannot Grow RAID10 with far/offset layout";
/* number of devices must be multiple of number of copies */
if (info->array.raid_disks % (info->array.layout & 0xff))
return "RAID10 layout too complex for Grow operation";
if (info->new_level != 0)
new_disks = (info->array.raid_disks
/ (info->array.layout & 0xff));
if (info->delta_disks == UnSet)
info->delta_disks = (new_disks
- info->array.raid_disks);
if (info->delta_disks != new_disks - info->array.raid_disks)
return "New number of raid-devices impossible for RAID10";
if (info->new_chunk &&
info->new_chunk != info->array.chunk_size)
return "Cannot change chunk-size with RAID10 Grow";
/* looks good */
re->level = 0;
re->parity = 0;
re->before.data_disks = new_disks;
re->after.data_disks = re->before.data_disks;
re->before.layout = 0;
re->backup_blocks = 0;
return NULL;
case 10:
near = info->array.layout & 0xff;
far = (info->array.layout >> 8) & 0xff;
offset = info->array.layout & 0x10000;
if (far > 1 && !offset)
return "Cannot reshape RAID10 in far-mode";
copies = near * far;
old_chunk = info->array.chunk_size * far;
if (info->new_layout == UnSet)
info->new_layout = info->array.layout;
else {
near = info->new_layout & 0xff;
far = (info->new_layout >> 8) & 0xff;
offset = info->new_layout & 0x10000;
if (far > 1 && !offset)
return "Cannot reshape RAID10 to far-mode";
if (near * far != copies)
return "Cannot change number of copies"
" when reshaping RAID10";
}
if (info->delta_disks == UnSet)
info->delta_disks = 0;
new_disks = (info->array.raid_disks +
info->delta_disks);
new_chunk = info->new_chunk * far;
re->level = 10;
re->parity = 0;
re->before.layout = info->array.layout;
re->before.data_disks = info->array.raid_disks;
re->after.layout = info->new_layout;
re->after.data_disks = new_disks;
/* For RAID10 we don't do backup, and there is
* no need to synchronise stripes on both
* 'old' and 'new'. So the important
* number is the minimum data_offset difference
* which is the larger of (offset copies * chunk).
*/
re->backup_blocks = max(old_chunk, new_chunk) / 512;
re->new_size = (info->component_size * new_disks
/ copies);
return NULL;
default:
return "RAID10 can only be changed to RAID0";
new_disks = (info->array.raid_disks
/ (info->array.layout & 0xff));
if (info->delta_disks == UnSet)
info->delta_disks = (new_disks
- info->array.raid_disks);
if (info->delta_disks != new_disks - info->array.raid_disks)
return "New number of raid-devices impossible for RAID10";
if (info->new_chunk &&
info->new_chunk != info->array.chunk_size)
return "Cannot change chunk-size with RAID10 Grow";
/* looks good */
re->level = 0;
re->parity = 0;
re->before.data_disks = new_disks;
re->after.data_disks = re->before.data_disks;
re->before.layout = 0;
re->backup_blocks = 0;
return NULL;
}
case 0:
/* RAID0 can be converted to RAID10, or to RAID456 */
if (info->new_level == 10) {
@ -1434,6 +1488,7 @@ static int set_array_size(struct supertype *st, struct mdinfo *sra,
static int reshape_array(char *container, int fd, char *devname,
struct supertype *st, struct mdinfo *info,
int force, struct mddev_dev *devlist,
unsigned long long data_offset,
char *backup_file, int verbose, int forked,
int restart, int freeze_reshape);
static int reshape_container(char *container, char *devname,
@ -1483,16 +1538,16 @@ int Grow_reshape(char *devname, int fd,
struct mdinfo info;
struct mdinfo *sra;
if (data_offset != INVALID_SECTORS) {
fprintf(stderr, Name ": --grow --data-offset not yet supported\n");
return 1;
}
if (ioctl(fd, GET_ARRAY_INFO, &array) < 0) {
pr_err("%s is not an active md array - aborting\n",
fprintf(stderr, Name ": %s is not an active md array - aborting\n",
devname);
return 1;
}
if (data_offset != INVALID_SECTORS && array.level != 10) {
pr_err("--grow --data-offset not yet supported\n");
return 1;
}
if (s->size > 0 &&
(s->chunk || s->level!= UnSet || s->layout_str || s->raiddisks)) {
@ -2021,7 +2076,8 @@ size_change_error:
}
sync_metadata(st);
rv = reshape_array(container, fd, devname, st, &info, c->force,
devlist, c->backup_file, c->verbose, 0, 0, 0);
devlist, data_offset, c->backup_file, c->verbose,
0, 0, 0);
frozen = 0;
}
release:
@ -2088,9 +2144,219 @@ static int verify_reshape_position(struct mdinfo *info, int level)
return ret_val;
}
static int raid10_reshape(char *container, int fd, char *devname,
struct supertype *st, struct mdinfo *info,
struct reshape *reshape,
unsigned long long data_offset,
int force, int verbose)
{
/* Changing raid_disks, layout, chunksize or possibly
* just data_offset for a RAID10.
* We must always change data_offset.
* The amount is change it relates to the minimum copy size.
* This is reshape->backup_blocks * copies / raid_disks
* where 'raid_disks' is the smaller of 'new' and 'old'.
* If raid_disks is increasing, then data_offset must decrease
* by at least this copy size.
* If raid_disks is unchanged, data_offset must increase or
* decrease by at least min-copy-size but preferably by much more.
* We choose half of the available space.
* If raid_disks is decreasing, data_offset must increase by
* at least min-copy-size.
*
* So we calculate the required minimum and direction, then iterate
* through the devices and set the new_data_offset.
* If that all works, we set chunk_size, layout, raid_disks, and start
* 'reshape'
*/
struct mdinfo *sra, *sd;
unsigned long long min;
int dir = 0;
int err = 0;
sra = sysfs_read(fd, 0,
GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|GET_CHUNK
);
if (!sra) {
fprintf(stderr, Name ": %s: Cannot get array details from sysfs\n",
devname);
goto release;
}
min = reshape->backup_blocks;
if (info->delta_disks)
sysfs_set_str(sra, NULL, "reshape_direction",
info->delta_disks < 0 ? "backwards" : "forwards");
for (sd = sra->devs; sd; sd = sd->next) {
char *dn;
int dfd;
int rv;
struct supertype *st2;
struct mdinfo info2;
if (sd->disk.state & (1<<MD_DISK_FAULTY))
continue;
dn = map_dev(sd->disk.major, sd->disk.minor, 0);
dfd = dev_open(dn, O_RDONLY);
if (dfd < 0) {
fprintf(stderr,
Name ": %s: cannot open component %s\n",
devname, dn ? dn : "-unknown-");
rv = -1;
goto release;
}
st2 = dup_super(st);
rv = st2->ss->load_super(st2,dfd, NULL);
close(dfd);
if (rv) {
free(st2);
fprintf(stderr, ": %s: cannot get superblock from %s\n",
devname, dn);
goto release;
}
st2->ss->getinfo_super(st2, &info2, NULL);
st2->ss->free_super(st2);
free(st2);
if (info->delta_disks < 0) {
/* Don't need any space as array is shrinking
* just move data_offset up by min
*/
if (data_offset == 1)
info2.new_data_offset = info2.data_offset + min;
else {
if ((unsigned long long)data_offset
< info2.data_offset + min) {
fprintf(stderr, Name ": --data-offset too small for %s\n",
dn);
goto release;
}
info2.new_data_offset = data_offset;
}
} else if (info->delta_disks > 0) {
/* need space before */
if (info2.space_before < min) {
fprintf(stderr, Name ": Insufficient head-space for reshape on %s\n",
dn);
goto release;
}
if (data_offset == 1)
info2.new_data_offset = info2.data_offset - min;
else {
if ((unsigned long long)data_offset
> info2.data_offset - min) {
fprintf(stderr, Name ": --data-offset too large for %s\n",
dn);
goto release;
}
info2.new_data_offset = data_offset;
}
} else {
if (dir == 0) {
/* can move up or down. 'data_offset'
* might guide us, otherwise choose
* direction with most space
*/
if (data_offset == 1) {
if (info2.space_before > info2.space_after)
dir = -1;
else
dir = 1;
} else if (data_offset < info2.data_offset)
dir = -1;
else
dir = 1;
sysfs_set_str(sra, NULL, "reshape_direction",
dir == 1 ? "backwards" : "forwards");
}
switch (dir) {
case 1: /* Increase data offset */
if (info2.space_after < min) {
fprintf(stderr, Name ": Insufficient tail-space for reshape on %s\n",
dn);
goto release;
}
if (data_offset != 1 &&
data_offset < info2.data_offset + min) {
fprintf(stderr, Name ": --data-offset too small on %s\n",
dn);
goto release;
}
if (data_offset != 1)
info2.new_data_offset = data_offset;
else {
unsigned long long off =
info2.space_after / 2;
off &= ~7ULL;
if (off < min)
off = min;
info2.new_data_offset =
info2.data_offset + off;
}
break;
case -1: /* Decrease data offset */
if (info2.space_before < min) {
fprintf(stderr, Name ": insufficient head-room on %s\n",
dn);
goto release;
}
if (data_offset != 1 &&
data_offset < info2.data_offset - min) {
fprintf(stderr, Name ": --data-offset too small on %s\n",
dn);
goto release;
}
if (data_offset != 1)
info2.new_data_offset = data_offset;
else {
unsigned long long off =
info2.space_before / 2;
off &= ~7ULL;
if (off < min)
off = min;
info2.new_data_offset =
info2.data_offset - off;
}
break;
}
}
if (sysfs_set_num(sra, sd, "new_offset",
info2.new_data_offset) < 0) {
err = errno;
fprintf(stderr, Name ": Cannot set new_offset for %s\n",
dn);
break;
}
}
if (sysfs_set_num(sra, NULL, "chunk_size", info->new_chunk) < 0)
err = errno;
if (!err && sysfs_set_num(sra, NULL, "layout", reshape->after.layout) < 0)
err = errno;
if (!err && sysfs_set_num(sra, NULL, "raid_disks",
info->array.raid_disks + info->delta_disks) < 0)
err = errno;
if (!err && sysfs_set_str(sra, NULL, "sync_action", "reshape") < 0)
err = errno;
if (err) {
fprintf(stderr, Name ": Cannot set array shape for %s\n",
devname);
if (err == EBUSY &&
(info->array.state & (1<<MD_SB_BITMAP_PRESENT)))
fprintf(stderr,
" Bitmap must be removed before"
" shape can be changed\n");
goto release;
}
sysfs_free(sra);
return 0;
release:
sysfs_free(sra);
return 1;
}
static int reshape_array(char *container, int fd, char *devname,
struct supertype *st, struct mdinfo *info,
int force, struct mddev_dev *devlist,
unsigned long long data_offset,
char *backup_file, int verbose, int forked,
int restart, int freeze_reshape)
{
@ -2373,7 +2639,6 @@ static int reshape_array(char *container, int fd, char *devname,
* - request the shape change.
* - fork to handle backup etc.
*/
started:
/* Check that we can hold all the data */
get_dev_size(fd, NULL, &array_size);
if (reshape.new_size < (array_size/512)) {
@ -2384,6 +2649,21 @@ started:
goto release;
}
started:
if (array.level == 10) {
/* Reshaping RAID10 does not require and data backup by
* user-space. Instead it requires that the data_offset
* is changed to avoid the need for backup.
* So this is handled very separately
*/
if (restart)
/* Nothing to do. */
return 0;
return raid10_reshape(container, fd, devname, st, info,
&reshape, data_offset,
force, verbose);
}
sra = sysfs_read(fd, 0,
GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|GET_CHUNK|
GET_CACHE);
@ -2848,7 +3128,7 @@ int reshape_container(char *container, char *devname,
flush_mdmon(container);
rv = reshape_array(container, fd, adev, st,
content, force, NULL,
content, force, NULL, 0ULL,
backup_file, verbose, 1, restart,
freeze_reshape);
close(fd);
@ -4187,7 +4467,7 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info,
0, 1, freeze_reshape);
} else
ret_val = reshape_array(NULL, mdfd, "array", st, info, 1,
NULL, backup_file, 0, 0, 1,
NULL, 0ULL, backup_file, 0, 0, 1,
freeze_reshape);
return ret_val;