Support restarting of a reshape on --assemble

Signed-off-by: Neil Brown <neilb@suse.de>
This commit is contained in:
Neil Brown 2006-03-20 03:17:31 +00:00
parent e86c9dd6d8
commit 353632d927
7 changed files with 220 additions and 22 deletions

View File

@ -551,7 +551,7 @@ int Assemble(struct supertype *st, char *mddev, int mdfd,
int fd;
fd = dev_open(devices[chosen_drive].devname, O_RDWR|O_EXCL);
if (fd < 0) {
fprintf(stderr, Name ": Could open %s for write - cannot Assemble array.\n",
fprintf(stderr, Name ": Could not open %s for write - cannot Assemble array.\n",
devices[chosen_drive].devname);
return 1;
}
@ -564,6 +564,37 @@ int Assemble(struct supertype *st, char *mddev, int mdfd,
close(fd);
}
/* If we are in the middle of a reshape we may need to restore saved data
* that was moved aside due to the reshape overwriting live data
* The code of doing this lives in Grow.c
*/
if (info.reshape_active) {
int err = 0;
int *fdlist = malloc(sizeof(int)* bestcnt);
for (i=0; i<bestcnt; i++) {
int j = best[i];
if (j >= 0) {
fdlist[i] = dev_open(devices[j].devname, O_RDWR|O_EXCL);
if (fdlist[i] < 0) {
fprintf(stderr, Name ": Could not open %s for write - cannot Assemble array.\n",
devices[j].devname);
err = 1;
break;
}
} else
fdlist[i] = -1;
}
if (!err)
err = Grow_restart(st, &info, fdlist, bestcnt);
while (i>0) {
i--;
if (fdlist[i]>=0) close(fdlist[i]);
}
if (err) {
fprintf(stderr, Name ": Failed to restore critical section for reshape, sorry.\n");
return err;
}
}
/* count number of in-sync devices according to the superblock.
* We must have this number to start the array without -s or -R
*/

131
Grow.c
View File

@ -605,12 +605,12 @@ int Grow_reshape(char *devname, int fd, int quiet,
* from
*/
nstripe = ostripe = 0;
while (nstripe+ochunk/512 >= ostripe) {
while (nstripe >= ostripe) {
nstripe += nchunk/512;
last_block = nstripe * ndata;
ostripe = last_block / odata;
ostripe = last_block / odata / (ochunk/512) * (ochunk/512);
}
printf("Need to backup to stripe %llu sectors, %lluK\n", nstripe, last_block/2);
printf("mdadm: Need to backup %lluK of critical section..\n", last_block/2);
sra = sysfs_read(fd, 0,
GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE);
@ -625,6 +625,11 @@ int Grow_reshape(char *devname, int fd, int quiet,
devname);
return 1;
}
if (sra->spares == 0) {
fprintf(stderr, Name ": %s: Cannot grow - need a spare to backup critical section\n",
devname);
return 1;
}
nrdisks = array.nr_disks + sra->spares;
/* Now we need to open all these devices so we can read/write.
@ -724,13 +729,13 @@ int Grow_reshape(char *devname, int fd, int quiet,
goto abort_resume;
}
/* FIXME write superblocks */
memcpy(bsb.magic, "md_backups_data-1", 16);
memcpy(bsb.magic, "md_backup_data-1", 16);
st->ss->uuid_from_super((int*)&bsb.set_uuid, super);
bsb.mtime = time(0);
bsb.mtime = __cpu_to_le64(time(0));
bsb.arraystart = 0;
bsb.length = last_block;
bsb.length = __cpu_to_le64(last_block);
for (i=odisks; i<d ; i++) {
bsb.devstart = offsets[i];
bsb.devstart = __cpu_to_le64(offsets[i]);
bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
lseek64(fdlist[i], (offsets[i]+last_block)<<9, 0);
write(fdlist[i], &bsb, sizeof(bsb));
@ -769,6 +774,7 @@ int Grow_reshape(char *devname, int fd, int quiet,
free(fdlist);
free(offsets);
printf("mdadm: ... critical section passed.\n");
break;
}
return 0;
@ -785,3 +791,114 @@ int Grow_reshape(char *devname, int fd, int quiet,
return 1;
}
/*
* If any spare contains md_back_data-1 which is recent wrt mtime,
* write that data into the array and update the super blocks with
* the new reshape_progress
*/
int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt)
{
int i, j;
int old_disks;
int err = 0;
unsigned long long *offsets;
if (info->delta_disks < 0)
return 1; /* cannot handle a shrink */
if (info->new_level != info->array.level ||
info->new_layout != info->array.layout ||
info->new_chunk != info->array.chunk_size)
return 1; /* Can only handle change in disks */
old_disks = info->array.raid_disks - info->delta_disks;
for (i=old_disks; i<cnt; i++) {
void *super = NULL;
struct mdinfo dinfo;
struct mddev_ident_s id;
struct mdp_backup_super bsb;
/* This was a spare and may have some saved data on it.
* Load the superblock, find and load the
* backup_super_block.
* If either fail, go on to next device.
* If the backup contains no new info, just return
* Else retore data and update all superblocks
*/
if (fdlist[i] < 0)
continue;
if (st->ss->load_super(st, fdlist[i], &super, NULL))
continue;
st->ss->getinfo_super(&dinfo, &id, super);
free(super); super = NULL;
if (lseek64(fdlist[i],
(dinfo.data_offset + dinfo.component_size - 8) <<9,
0) < 0)
continue; /* Cannot seek */
if (read(fdlist[i], &bsb, sizeof(bsb)) != sizeof(bsb))
continue; /* Cannot read */
if (memcmp(bsb.magic, "md_backup_data-1", 16) != 0)
continue;
if (bsb.sb_csum != bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb)))
continue; /* bad checksum */
if (memcmp(bsb.set_uuid,info->uuid, 16) != 0)
continue; /* Wrong uuid */
if (info->array.utime > __le64_to_cpu(bsb.mtime) + 3600 ||
info->array.utime < __le64_to_cpu(bsb.mtime))
continue; /* time stamp is too bad */
if (__le64_to_cpu(bsb.arraystart) != 0)
continue; /* Can only handle backup from start of array */
if (__le64_to_cpu(bsb.length) <
info->reshape_progress)
continue; /* No new data here */
if (lseek64(fdlist[i], __le64_to_cpu(bsb.devstart)*512, 0)< 0)
continue; /* Cannot seek */
/* Now need the data offsets for all devices. */
offsets = malloc(sizeof(*offsets)*info->array.raid_disks);
for(j=0; j<info->array.raid_disks; j++) {
if (fdlist[j] < 0)
continue;
if (st->ss->load_super(st, fdlist[j], &super, NULL))
/* FIXME should be this be an error */
continue;
st->ss->getinfo_super(&dinfo, &id, super);
free(super); super = NULL;
offsets[j] = dinfo.data_offset;
}
printf(Name ": restoring critical section\n");
if (restore_stripes(fdlist, offsets,
info->array.raid_disks,
info->new_chunk,
info->new_level,
info->new_layout,
fdlist[i], __le64_to_cpu(bsb.devstart)*512,
0, __le64_to_cpu(bsb.length)*512)) {
/* didn't succeed, so giveup */
return 0;
}
/* Ok, so the data is restored. Let's update those superblocks. */
for (j=0; j<info->array.raid_disks; j++) {
if (fdlist[j] < 0) continue;
if (st->ss->load_super(st, fdlist[j], &super, NULL))
continue;
st->ss->getinfo_super(&dinfo, &id, super);
dinfo.reshape_progress = __le64_to_cpu(bsb.length);
st->ss->update_super(&dinfo, super, "_reshape_progress",NULL,0);
st->ss->store_super(st, fdlist[j], super);
free(super);
}
/* And we are done! */
return 0;
}
return err;
}

10
mdadm.h
View File

@ -91,6 +91,11 @@ struct mdinfo {
mdu_disk_info_t disk;
__u64 events;
int uuid[4];
unsigned long long data_offset;
unsigned long long component_size;
int reshape_active;
unsigned long long reshape_progress;
int new_level, delta_disks, new_layout, new_chunk;
};
#define Name "mdadm"
@ -225,6 +230,10 @@ extern int save_stripes(int *source, unsigned long long *offsets,
int raid_disks, int chunk_size, int level, int layout,
int nwrites, int *dest,
unsigned long long start, unsigned long long length);
extern int restore_stripes(int *dest, unsigned long long *offsets,
int raid_disks, int chunk_size, int level, int layout,
int source, unsigned long long read_offset,
unsigned long long start, unsigned long long length);
#ifndef Sendmail
#define Sendmail "/usr/lib/sendmail -t"
@ -302,6 +311,7 @@ extern int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int dela
extern int Grow_reshape(char *devname, int fd, int quiet,
long long size,
int level, int layout, int chunksize, int raid_disks);
extern int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt);
extern int Assemble(struct supertype *st, char *mddev, int mdfd,

View File

@ -162,7 +162,7 @@ int save_stripes(int *source, unsigned long long *offsets,
/* Restore data:
* We are given:
* A list of 'fds' of the active disks. Some may be '-1' for not-available.
* A geometry: raid_disks, chunk_sisze, level, layout
* A geometry: raid_disks, chunk_size, level, layout
* An 'fd' to read from. It is already seeked to the right (Read) location.
* A start and length.
* The length must be a multiple of the stripe size.
@ -172,7 +172,7 @@ int save_stripes(int *source, unsigned long long *offsets,
*/
int restore_stripes(int *dest, unsigned long long *offsets,
int raid_disks, int chunk_size, int level, int layout,
int source,
int source, unsigned long long read_offset,
unsigned long long start, unsigned long long length)
{
char *stripe_buf = malloc(raid_disks * chunk_size);
@ -199,8 +199,11 @@ int restore_stripes(int *dest, unsigned long long *offsets,
int disk = geo_map(i, start/chunk_size/data_disks,
raid_disks, level, layout);
blocks[i] = stripes[disk];
if (lseek64(source, read_offset, 0) != read_offset)
return -1;
if (read(source, stripes[disk], chunk_size) != chunk_size)
return -1;
read_offset += chunk_size;
}
/* We have the data, now do the parity */
offset = (start/chunk_size/data_disks) * chunk_size;
@ -311,7 +314,7 @@ main(int argc, char *argv[])
} else {
int rv = restore_stripes(fds, offsets,
raid_disks, chunk_size, level, layout,
storefd,
storefd, 0ULL,
start, length);
if (rv != 0) {
fprintf(stderr, "test_stripe: restore_stripes returned %d\n", rv);

View File

@ -279,6 +279,9 @@ static void getinfo_super0(struct mdinfo *info, mddev_ident_t ident, void *sbv)
info->array.layout = sb->layout;
info->array.md_minor = sb->md_minor;
info->array.ctime = sb->ctime;
info->array.utime = sb->utime;
info->array.chunk_size = sb->chunk_size;
info->component_size = sb->size*2;
info->disk.state = sb->this_disk.state;
info->disk.major = sb->this_disk.major;
@ -287,9 +290,20 @@ static void getinfo_super0(struct mdinfo *info, mddev_ident_t ident, void *sbv)
info->disk.number = sb->this_disk.number;
info->events = md_event(sb);
info->data_offset = 0;
uuid_from_super0(info->uuid, sbv);
if (sb->minor_version > 90 && (sb->reshape_position+1) != 0) {
info->reshape_active = 1;
info->reshape_progress = sb->reshape_position;
info->new_level = sb->new_level;
info->delta_disks = sb->delta_disks;
info->new_layout = sb->new_layout;
info->new_chunk = sb->new_chunk;
} else
info->reshape_active = 0;
ident->name[0] = 0;
/* work_disks is calculated rather than read directly */
for (i=0; i < MD_SB_DISKS; i++)
@ -403,6 +417,8 @@ static int update_super0(struct mdinfo *info, void *sbv, char *update, char *dev
sb->set_uuid2 = info->uuid[2];
sb->set_uuid3 = info->uuid[3];
}
if (strcmp(update, "_reshape_progress")==0)
sb->reshape_position = info->reshape_progress;
sb->sb_csum = calc_sb0_csum(sb);
return rv;

View File

@ -368,6 +368,11 @@ static void getinfo_super1(struct mdinfo *info, mddev_ident_t ident, void *sbv)
info->array.layout = __le32_to_cpu(sb->layout);
info->array.md_minor = -1;
info->array.ctime = __le64_to_cpu(sb->ctime);
info->array.utime = __le64_to_cpu(sb->utime);
info->array.chunk_size = __le32_to_cpu(sb->chunksize)/512;
info->data_offset = __le64_to_cpu(sb->data_offset);
info->component_size = __le64_to_cpu(sb->size);
info->disk.major = 0;
info->disk.minor = 0;
@ -397,6 +402,16 @@ static void getinfo_super1(struct mdinfo *info, mddev_ident_t ident, void *sbv)
strncpy(ident->name, sb->set_name, 32);
ident->name[32] = 0;
if (sb->feature_map & __le32_to_cpu(MD_FEATURE_RESHAPE_ACTIVE)) {
info->reshape_active = 1;
info->reshape_progress = __le64_to_cpu(sb->reshape_position);
info->new_level = __le32_to_cpu(sb->new_level);
info->delta_disks = __le32_to_cpu(sb->delta_disks);
info->new_layout = __le32_to_cpu(sb->new_layout);
info->new_chunk = __le32_to_cpu(sb->new_chunk);
} else
info->reshape_active = 0;
for (i=0; i< __le32_to_cpu(sb->max_dev); i++) {
role = __le16_to_cpu(sb->dev_roles[i]);
if (/*role == 0xFFFF || */role < info->array.raid_disks)
@ -453,6 +468,8 @@ static int update_super1(struct mdinfo *info, void *sbv, char *update, char *dev
}
if (strcmp(update, "uuid") == 0)
memcpy(sb->set_uuid, info->uuid, 16);
if (strcmp(update, "_reshape_progress")==0)
sb->reshape_position = __cpu_to_le64(info->reshape_progress);
sb->sb_csum = calc_sb_1_csum(sb);
return rv;

View File

@ -98,6 +98,8 @@ struct sysarray *sysfs_read(int fd, int devnum, unsigned long options)
if (load_sys(fname, buf))
goto abort;
sra->component_size = strtoull(buf, NULL, 0);
/* sysfs reports "K", but we want sectors */
sra->component_size *= 2;
}
if (options & GET_CHUNK) {
strcpy(base, "chunk_size");
@ -192,6 +194,8 @@ unsigned long long get_component_size(int fd)
* We cannot trust GET_ARRAY_INFO ioctl as it's
* size field is only 32bits.
* So look in /sys/block/mdXXX/md/component_size
*
* WARNING: this returns in units of Kilobytes.
*/
struct stat stb;
char fname[50];