Support restarting of a reshape on --assemble
Signed-off-by: Neil Brown <neilb@suse.de>
This commit is contained in:
parent
e86c9dd6d8
commit
353632d927
33
Assemble.c
33
Assemble.c
|
@ -551,7 +551,7 @@ int Assemble(struct supertype *st, char *mddev, int mdfd,
|
|||
int fd;
|
||||
fd = dev_open(devices[chosen_drive].devname, O_RDWR|O_EXCL);
|
||||
if (fd < 0) {
|
||||
fprintf(stderr, Name ": Could open %s for write - cannot Assemble array.\n",
|
||||
fprintf(stderr, Name ": Could not open %s for write - cannot Assemble array.\n",
|
||||
devices[chosen_drive].devname);
|
||||
return 1;
|
||||
}
|
||||
|
@ -564,6 +564,37 @@ int Assemble(struct supertype *st, char *mddev, int mdfd,
|
|||
close(fd);
|
||||
}
|
||||
|
||||
/* If we are in the middle of a reshape we may need to restore saved data
|
||||
* that was moved aside due to the reshape overwriting live data
|
||||
* The code of doing this lives in Grow.c
|
||||
*/
|
||||
if (info.reshape_active) {
|
||||
int err = 0;
|
||||
int *fdlist = malloc(sizeof(int)* bestcnt);
|
||||
for (i=0; i<bestcnt; i++) {
|
||||
int j = best[i];
|
||||
if (j >= 0) {
|
||||
fdlist[i] = dev_open(devices[j].devname, O_RDWR|O_EXCL);
|
||||
if (fdlist[i] < 0) {
|
||||
fprintf(stderr, Name ": Could not open %s for write - cannot Assemble array.\n",
|
||||
devices[j].devname);
|
||||
err = 1;
|
||||
break;
|
||||
}
|
||||
} else
|
||||
fdlist[i] = -1;
|
||||
}
|
||||
if (!err)
|
||||
err = Grow_restart(st, &info, fdlist, bestcnt);
|
||||
while (i>0) {
|
||||
i--;
|
||||
if (fdlist[i]>=0) close(fdlist[i]);
|
||||
}
|
||||
if (err) {
|
||||
fprintf(stderr, Name ": Failed to restore critical section for reshape, sorry.\n");
|
||||
return err;
|
||||
}
|
||||
}
|
||||
/* count number of in-sync devices according to the superblock.
|
||||
* We must have this number to start the array without -s or -R
|
||||
*/
|
||||
|
|
133
Grow.c
133
Grow.c
|
@ -219,7 +219,7 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int
|
|||
}
|
||||
|
||||
if (ioctl(fd, GET_BITMAP_FILE, &bmf) != 0) {
|
||||
if (errno == ENOMEM)
|
||||
if (errno == ENOMEM)
|
||||
fprintf(stderr, Name ": Memory allocation failure.\n");
|
||||
else
|
||||
fprintf(stderr, Name ": bitmaps not supported by this kernel.\n");
|
||||
|
@ -605,12 +605,12 @@ int Grow_reshape(char *devname, int fd, int quiet,
|
|||
* from
|
||||
*/
|
||||
nstripe = ostripe = 0;
|
||||
while (nstripe+ochunk/512 >= ostripe) {
|
||||
while (nstripe >= ostripe) {
|
||||
nstripe += nchunk/512;
|
||||
last_block = nstripe * ndata;
|
||||
ostripe = last_block / odata;
|
||||
ostripe = last_block / odata / (ochunk/512) * (ochunk/512);
|
||||
}
|
||||
printf("Need to backup to stripe %llu sectors, %lluK\n", nstripe, last_block/2);
|
||||
printf("mdadm: Need to backup %lluK of critical section..\n", last_block/2);
|
||||
|
||||
sra = sysfs_read(fd, 0,
|
||||
GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE);
|
||||
|
@ -625,6 +625,11 @@ int Grow_reshape(char *devname, int fd, int quiet,
|
|||
devname);
|
||||
return 1;
|
||||
}
|
||||
if (sra->spares == 0) {
|
||||
fprintf(stderr, Name ": %s: Cannot grow - need a spare to backup critical section\n",
|
||||
devname);
|
||||
return 1;
|
||||
}
|
||||
|
||||
nrdisks = array.nr_disks + sra->spares;
|
||||
/* Now we need to open all these devices so we can read/write.
|
||||
|
@ -724,13 +729,13 @@ int Grow_reshape(char *devname, int fd, int quiet,
|
|||
goto abort_resume;
|
||||
}
|
||||
/* FIXME write superblocks */
|
||||
memcpy(bsb.magic, "md_backups_data-1", 16);
|
||||
memcpy(bsb.magic, "md_backup_data-1", 16);
|
||||
st->ss->uuid_from_super((int*)&bsb.set_uuid, super);
|
||||
bsb.mtime = time(0);
|
||||
bsb.mtime = __cpu_to_le64(time(0));
|
||||
bsb.arraystart = 0;
|
||||
bsb.length = last_block;
|
||||
bsb.length = __cpu_to_le64(last_block);
|
||||
for (i=odisks; i<d ; i++) {
|
||||
bsb.devstart = offsets[i];
|
||||
bsb.devstart = __cpu_to_le64(offsets[i]);
|
||||
bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
|
||||
lseek64(fdlist[i], (offsets[i]+last_block)<<9, 0);
|
||||
write(fdlist[i], &bsb, sizeof(bsb));
|
||||
|
@ -769,6 +774,7 @@ int Grow_reshape(char *devname, int fd, int quiet,
|
|||
free(fdlist);
|
||||
free(offsets);
|
||||
|
||||
printf("mdadm: ... critical section passed.\n");
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
|
@ -785,3 +791,114 @@ int Grow_reshape(char *devname, int fd, int quiet,
|
|||
return 1;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* If any spare contains md_back_data-1 which is recent wrt mtime,
|
||||
* write that data into the array and update the super blocks with
|
||||
* the new reshape_progress
|
||||
*/
|
||||
int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt)
|
||||
{
|
||||
int i, j;
|
||||
int old_disks;
|
||||
int err = 0;
|
||||
unsigned long long *offsets;
|
||||
|
||||
if (info->delta_disks < 0)
|
||||
return 1; /* cannot handle a shrink */
|
||||
if (info->new_level != info->array.level ||
|
||||
info->new_layout != info->array.layout ||
|
||||
info->new_chunk != info->array.chunk_size)
|
||||
return 1; /* Can only handle change in disks */
|
||||
|
||||
old_disks = info->array.raid_disks - info->delta_disks;
|
||||
|
||||
for (i=old_disks; i<cnt; i++) {
|
||||
void *super = NULL;
|
||||
struct mdinfo dinfo;
|
||||
struct mddev_ident_s id;
|
||||
struct mdp_backup_super bsb;
|
||||
|
||||
/* This was a spare and may have some saved data on it.
|
||||
* Load the superblock, find and load the
|
||||
* backup_super_block.
|
||||
* If either fail, go on to next device.
|
||||
* If the backup contains no new info, just return
|
||||
* Else retore data and update all superblocks
|
||||
*/
|
||||
if (fdlist[i] < 0)
|
||||
continue;
|
||||
if (st->ss->load_super(st, fdlist[i], &super, NULL))
|
||||
continue;
|
||||
|
||||
st->ss->getinfo_super(&dinfo, &id, super);
|
||||
free(super); super = NULL;
|
||||
if (lseek64(fdlist[i],
|
||||
(dinfo.data_offset + dinfo.component_size - 8) <<9,
|
||||
0) < 0)
|
||||
continue; /* Cannot seek */
|
||||
if (read(fdlist[i], &bsb, sizeof(bsb)) != sizeof(bsb))
|
||||
continue; /* Cannot read */
|
||||
if (memcmp(bsb.magic, "md_backup_data-1", 16) != 0)
|
||||
continue;
|
||||
if (bsb.sb_csum != bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb)))
|
||||
continue; /* bad checksum */
|
||||
if (memcmp(bsb.set_uuid,info->uuid, 16) != 0)
|
||||
continue; /* Wrong uuid */
|
||||
|
||||
if (info->array.utime > __le64_to_cpu(bsb.mtime) + 3600 ||
|
||||
info->array.utime < __le64_to_cpu(bsb.mtime))
|
||||
continue; /* time stamp is too bad */
|
||||
|
||||
if (__le64_to_cpu(bsb.arraystart) != 0)
|
||||
continue; /* Can only handle backup from start of array */
|
||||
if (__le64_to_cpu(bsb.length) <
|
||||
info->reshape_progress)
|
||||
continue; /* No new data here */
|
||||
|
||||
if (lseek64(fdlist[i], __le64_to_cpu(bsb.devstart)*512, 0)< 0)
|
||||
continue; /* Cannot seek */
|
||||
|
||||
/* Now need the data offsets for all devices. */
|
||||
offsets = malloc(sizeof(*offsets)*info->array.raid_disks);
|
||||
for(j=0; j<info->array.raid_disks; j++) {
|
||||
if (fdlist[j] < 0)
|
||||
continue;
|
||||
if (st->ss->load_super(st, fdlist[j], &super, NULL))
|
||||
/* FIXME should be this be an error */
|
||||
continue;
|
||||
st->ss->getinfo_super(&dinfo, &id, super);
|
||||
free(super); super = NULL;
|
||||
offsets[j] = dinfo.data_offset;
|
||||
}
|
||||
printf(Name ": restoring critical section\n");
|
||||
|
||||
if (restore_stripes(fdlist, offsets,
|
||||
info->array.raid_disks,
|
||||
info->new_chunk,
|
||||
info->new_level,
|
||||
info->new_layout,
|
||||
fdlist[i], __le64_to_cpu(bsb.devstart)*512,
|
||||
0, __le64_to_cpu(bsb.length)*512)) {
|
||||
/* didn't succeed, so giveup */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Ok, so the data is restored. Let's update those superblocks. */
|
||||
|
||||
for (j=0; j<info->array.raid_disks; j++) {
|
||||
if (fdlist[j] < 0) continue;
|
||||
if (st->ss->load_super(st, fdlist[j], &super, NULL))
|
||||
continue;
|
||||
st->ss->getinfo_super(&dinfo, &id, super);
|
||||
dinfo.reshape_progress = __le64_to_cpu(bsb.length);
|
||||
st->ss->update_super(&dinfo, super, "_reshape_progress",NULL,0);
|
||||
st->ss->store_super(st, fdlist[j], super);
|
||||
free(super);
|
||||
}
|
||||
|
||||
/* And we are done! */
|
||||
return 0;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
|
10
mdadm.h
10
mdadm.h
|
@ -91,6 +91,11 @@ struct mdinfo {
|
|||
mdu_disk_info_t disk;
|
||||
__u64 events;
|
||||
int uuid[4];
|
||||
unsigned long long data_offset;
|
||||
unsigned long long component_size;
|
||||
int reshape_active;
|
||||
unsigned long long reshape_progress;
|
||||
int new_level, delta_disks, new_layout, new_chunk;
|
||||
};
|
||||
|
||||
#define Name "mdadm"
|
||||
|
@ -225,6 +230,10 @@ extern int save_stripes(int *source, unsigned long long *offsets,
|
|||
int raid_disks, int chunk_size, int level, int layout,
|
||||
int nwrites, int *dest,
|
||||
unsigned long long start, unsigned long long length);
|
||||
extern int restore_stripes(int *dest, unsigned long long *offsets,
|
||||
int raid_disks, int chunk_size, int level, int layout,
|
||||
int source, unsigned long long read_offset,
|
||||
unsigned long long start, unsigned long long length);
|
||||
|
||||
#ifndef Sendmail
|
||||
#define Sendmail "/usr/lib/sendmail -t"
|
||||
|
@ -302,6 +311,7 @@ extern int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int dela
|
|||
extern int Grow_reshape(char *devname, int fd, int quiet,
|
||||
long long size,
|
||||
int level, int layout, int chunksize, int raid_disks);
|
||||
extern int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt);
|
||||
|
||||
|
||||
extern int Assemble(struct supertype *st, char *mddev, int mdfd,
|
||||
|
|
|
@ -162,7 +162,7 @@ int save_stripes(int *source, unsigned long long *offsets,
|
|||
/* Restore data:
|
||||
* We are given:
|
||||
* A list of 'fds' of the active disks. Some may be '-1' for not-available.
|
||||
* A geometry: raid_disks, chunk_sisze, level, layout
|
||||
* A geometry: raid_disks, chunk_size, level, layout
|
||||
* An 'fd' to read from. It is already seeked to the right (Read) location.
|
||||
* A start and length.
|
||||
* The length must be a multiple of the stripe size.
|
||||
|
@ -172,7 +172,7 @@ int save_stripes(int *source, unsigned long long *offsets,
|
|||
*/
|
||||
int restore_stripes(int *dest, unsigned long long *offsets,
|
||||
int raid_disks, int chunk_size, int level, int layout,
|
||||
int source,
|
||||
int source, unsigned long long read_offset,
|
||||
unsigned long long start, unsigned long long length)
|
||||
{
|
||||
char *stripe_buf = malloc(raid_disks * chunk_size);
|
||||
|
@ -199,8 +199,11 @@ int restore_stripes(int *dest, unsigned long long *offsets,
|
|||
int disk = geo_map(i, start/chunk_size/data_disks,
|
||||
raid_disks, level, layout);
|
||||
blocks[i] = stripes[disk];
|
||||
if (lseek64(source, read_offset, 0) != read_offset)
|
||||
return -1;
|
||||
if (read(source, stripes[disk], chunk_size) != chunk_size)
|
||||
return -1;
|
||||
read_offset += chunk_size;
|
||||
}
|
||||
/* We have the data, now do the parity */
|
||||
offset = (start/chunk_size/data_disks) * chunk_size;
|
||||
|
@ -311,7 +314,7 @@ main(int argc, char *argv[])
|
|||
} else {
|
||||
int rv = restore_stripes(fds, offsets,
|
||||
raid_disks, chunk_size, level, layout,
|
||||
storefd,
|
||||
storefd, 0ULL,
|
||||
start, length);
|
||||
if (rv != 0) {
|
||||
fprintf(stderr, "test_stripe: restore_stripes returned %d\n", rv);
|
||||
|
|
36
super0.c
36
super0.c
|
@ -80,7 +80,7 @@ void super0_swap_endian(struct mdp_superblock_s *sb)
|
|||
sb->cp_events_hi = sb->cp_events_lo;
|
||||
sb->cp_events_lo = t32;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef MDASSEMBLE
|
||||
|
||||
|
@ -182,7 +182,7 @@ static void examine_super0(void *sbv)
|
|||
case -1:
|
||||
printf(" Rounding : %dK\n", sb->chunk_size/1024);
|
||||
break;
|
||||
default: break;
|
||||
default: break;
|
||||
}
|
||||
printf("\n");
|
||||
printf(" Number Major Minor RaidDevice State\n");
|
||||
|
@ -279,6 +279,9 @@ static void getinfo_super0(struct mdinfo *info, mddev_ident_t ident, void *sbv)
|
|||
info->array.layout = sb->layout;
|
||||
info->array.md_minor = sb->md_minor;
|
||||
info->array.ctime = sb->ctime;
|
||||
info->array.utime = sb->utime;
|
||||
info->array.chunk_size = sb->chunk_size;
|
||||
info->component_size = sb->size*2;
|
||||
|
||||
info->disk.state = sb->this_disk.state;
|
||||
info->disk.major = sb->this_disk.major;
|
||||
|
@ -287,9 +290,20 @@ static void getinfo_super0(struct mdinfo *info, mddev_ident_t ident, void *sbv)
|
|||
info->disk.number = sb->this_disk.number;
|
||||
|
||||
info->events = md_event(sb);
|
||||
info->data_offset = 0;
|
||||
|
||||
uuid_from_super0(info->uuid, sbv);
|
||||
|
||||
if (sb->minor_version > 90 && (sb->reshape_position+1) != 0) {
|
||||
info->reshape_active = 1;
|
||||
info->reshape_progress = sb->reshape_position;
|
||||
info->new_level = sb->new_level;
|
||||
info->delta_disks = sb->delta_disks;
|
||||
info->new_layout = sb->new_layout;
|
||||
info->new_chunk = sb->new_chunk;
|
||||
} else
|
||||
info->reshape_active = 0;
|
||||
|
||||
ident->name[0] = 0;
|
||||
/* work_disks is calculated rather than read directly */
|
||||
for (i=0; i < MD_SB_DISKS; i++)
|
||||
|
@ -403,6 +417,8 @@ static int update_super0(struct mdinfo *info, void *sbv, char *update, char *dev
|
|||
sb->set_uuid2 = info->uuid[2];
|
||||
sb->set_uuid3 = info->uuid[3];
|
||||
}
|
||||
if (strcmp(update, "_reshape_progress")==0)
|
||||
sb->reshape_position = info->reshape_progress;
|
||||
|
||||
sb->sb_csum = calc_sb0_csum(sb);
|
||||
return rv;
|
||||
|
@ -481,7 +497,7 @@ static void add_to_super0(void *sbv, mdu_disk_info_t *dinfo)
|
|||
{
|
||||
mdp_super_t *sb = sbv;
|
||||
mdp_disk_t *dk = &sb->disks[dinfo->number];
|
||||
|
||||
|
||||
dk->number = dinfo->number;
|
||||
dk->major = dinfo->major;
|
||||
dk->minor = dinfo->minor;
|
||||
|
@ -508,7 +524,7 @@ static int store_super0(struct supertype *st, int fd, void *sbv)
|
|||
|
||||
if (dsize < MD_RESERVED_SECTORS*2*512)
|
||||
return 2;
|
||||
|
||||
|
||||
offset = MD_NEW_SIZE_SECTORS(dsize>>9);
|
||||
|
||||
offset *= 512;
|
||||
|
@ -622,7 +638,7 @@ static int load_super0(struct supertype *st, int fd, void **sbp, char *devname)
|
|||
devname, size);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
offset = MD_NEW_SIZE_SECTORS(dsize>>9);
|
||||
|
||||
offset *= 512;
|
||||
|
@ -717,7 +733,7 @@ static int add_internal_bitmap0(struct supertype *st, void *sbv, int chunk, int
|
|||
mdp_super_t *sb = sbv;
|
||||
bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb) + MD_SB_BYTES);
|
||||
|
||||
|
||||
|
||||
min_chunk = 4096; /* sub-page chunks don't work yet.. */
|
||||
bits = (size * 512)/ min_chunk +1;
|
||||
while (bits > max_bits) {
|
||||
|
@ -744,7 +760,7 @@ static int add_internal_bitmap0(struct supertype *st, void *sbv, int chunk, int
|
|||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void locate_bitmap0(struct supertype *st, int fd, void *sbv)
|
||||
{
|
||||
|
@ -763,7 +779,7 @@ void locate_bitmap0(struct supertype *st, int fd, void *sbv)
|
|||
|
||||
if (dsize < MD_RESERVED_SECTORS*2)
|
||||
return;
|
||||
|
||||
|
||||
offset = MD_NEW_SIZE_SECTORS(dsize>>9);
|
||||
|
||||
offset *= 512;
|
||||
|
@ -796,8 +812,8 @@ int write_bitmap0(struct supertype *st, int fd, void *sbv)
|
|||
}
|
||||
|
||||
if (dsize < MD_RESERVED_SECTORS*2)
|
||||
return -1;
|
||||
|
||||
return -1;
|
||||
|
||||
offset = MD_NEW_SIZE_SECTORS(dsize>>9);
|
||||
|
||||
offset *= 512;
|
||||
|
|
17
super1.c
17
super1.c
|
@ -368,6 +368,11 @@ static void getinfo_super1(struct mdinfo *info, mddev_ident_t ident, void *sbv)
|
|||
info->array.layout = __le32_to_cpu(sb->layout);
|
||||
info->array.md_minor = -1;
|
||||
info->array.ctime = __le64_to_cpu(sb->ctime);
|
||||
info->array.utime = __le64_to_cpu(sb->utime);
|
||||
info->array.chunk_size = __le32_to_cpu(sb->chunksize)/512;
|
||||
|
||||
info->data_offset = __le64_to_cpu(sb->data_offset);
|
||||
info->component_size = __le64_to_cpu(sb->size);
|
||||
|
||||
info->disk.major = 0;
|
||||
info->disk.minor = 0;
|
||||
|
@ -397,6 +402,16 @@ static void getinfo_super1(struct mdinfo *info, mddev_ident_t ident, void *sbv)
|
|||
strncpy(ident->name, sb->set_name, 32);
|
||||
ident->name[32] = 0;
|
||||
|
||||
if (sb->feature_map & __le32_to_cpu(MD_FEATURE_RESHAPE_ACTIVE)) {
|
||||
info->reshape_active = 1;
|
||||
info->reshape_progress = __le64_to_cpu(sb->reshape_position);
|
||||
info->new_level = __le32_to_cpu(sb->new_level);
|
||||
info->delta_disks = __le32_to_cpu(sb->delta_disks);
|
||||
info->new_layout = __le32_to_cpu(sb->new_layout);
|
||||
info->new_chunk = __le32_to_cpu(sb->new_chunk);
|
||||
} else
|
||||
info->reshape_active = 0;
|
||||
|
||||
for (i=0; i< __le32_to_cpu(sb->max_dev); i++) {
|
||||
role = __le16_to_cpu(sb->dev_roles[i]);
|
||||
if (/*role == 0xFFFF || */role < info->array.raid_disks)
|
||||
|
@ -453,6 +468,8 @@ static int update_super1(struct mdinfo *info, void *sbv, char *update, char *dev
|
|||
}
|
||||
if (strcmp(update, "uuid") == 0)
|
||||
memcpy(sb->set_uuid, info->uuid, 16);
|
||||
if (strcmp(update, "_reshape_progress")==0)
|
||||
sb->reshape_position = __cpu_to_le64(info->reshape_progress);
|
||||
|
||||
sb->sb_csum = calc_sb_1_csum(sb);
|
||||
return rv;
|
||||
|
|
4
sysfs.c
4
sysfs.c
|
@ -98,6 +98,8 @@ struct sysarray *sysfs_read(int fd, int devnum, unsigned long options)
|
|||
if (load_sys(fname, buf))
|
||||
goto abort;
|
||||
sra->component_size = strtoull(buf, NULL, 0);
|
||||
/* sysfs reports "K", but we want sectors */
|
||||
sra->component_size *= 2;
|
||||
}
|
||||
if (options & GET_CHUNK) {
|
||||
strcpy(base, "chunk_size");
|
||||
|
@ -192,6 +194,8 @@ unsigned long long get_component_size(int fd)
|
|||
* We cannot trust GET_ARRAY_INFO ioctl as it's
|
||||
* size field is only 32bits.
|
||||
* So look in /sys/block/mdXXX/md/component_size
|
||||
*
|
||||
* WARNING: this returns in units of Kilobytes.
|
||||
*/
|
||||
struct stat stb;
|
||||
char fname[50];
|
||||
|
|
Loading…
Reference in New Issue