From 353632d927d6e46282ef4f51d4ad17940a30cdf0 Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Mon, 20 Mar 2006 03:17:31 +0000 Subject: [PATCH] Support restarting of a reshape on --assemble Signed-off-by: Neil Brown --- Assemble.c | 33 ++++++++++++- Grow.c | 133 +++++++++++++++++++++++++++++++++++++++++++++++++---- mdadm.h | 10 ++++ restripe.c | 9 ++-- super0.c | 36 +++++++++++---- super1.c | 17 +++++++ sysfs.c | 4 ++ 7 files changed, 220 insertions(+), 22 deletions(-) diff --git a/Assemble.c b/Assemble.c index cc906db..480c512 100644 --- a/Assemble.c +++ b/Assemble.c @@ -551,7 +551,7 @@ int Assemble(struct supertype *st, char *mddev, int mdfd, int fd; fd = dev_open(devices[chosen_drive].devname, O_RDWR|O_EXCL); if (fd < 0) { - fprintf(stderr, Name ": Could open %s for write - cannot Assemble array.\n", + fprintf(stderr, Name ": Could not open %s for write - cannot Assemble array.\n", devices[chosen_drive].devname); return 1; } @@ -564,6 +564,37 @@ int Assemble(struct supertype *st, char *mddev, int mdfd, close(fd); } + /* If we are in the middle of a reshape we may need to restore saved data + * that was moved aside due to the reshape overwriting live data + * The code of doing this lives in Grow.c + */ + if (info.reshape_active) { + int err = 0; + int *fdlist = malloc(sizeof(int)* bestcnt); + for (i=0; i= 0) { + fdlist[i] = dev_open(devices[j].devname, O_RDWR|O_EXCL); + if (fdlist[i] < 0) { + fprintf(stderr, Name ": Could not open %s for write - cannot Assemble array.\n", + devices[j].devname); + err = 1; + break; + } + } else + fdlist[i] = -1; + } + if (!err) + err = Grow_restart(st, &info, fdlist, bestcnt); + while (i>0) { + i--; + if (fdlist[i]>=0) close(fdlist[i]); + } + if (err) { + fprintf(stderr, Name ": Failed to restore critical section for reshape, sorry.\n"); + return err; + } + } /* count number of in-sync devices according to the superblock. * We must have this number to start the array without -s or -R */ diff --git a/Grow.c b/Grow.c index ece2bda..6d09dc6 100644 --- a/Grow.c +++ b/Grow.c @@ -219,7 +219,7 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int } if (ioctl(fd, GET_BITMAP_FILE, &bmf) != 0) { - if (errno == ENOMEM) + if (errno == ENOMEM) fprintf(stderr, Name ": Memory allocation failure.\n"); else fprintf(stderr, Name ": bitmaps not supported by this kernel.\n"); @@ -605,12 +605,12 @@ int Grow_reshape(char *devname, int fd, int quiet, * from */ nstripe = ostripe = 0; - while (nstripe+ochunk/512 >= ostripe) { + while (nstripe >= ostripe) { nstripe += nchunk/512; last_block = nstripe * ndata; - ostripe = last_block / odata; + ostripe = last_block / odata / (ochunk/512) * (ochunk/512); } - printf("Need to backup to stripe %llu sectors, %lluK\n", nstripe, last_block/2); + printf("mdadm: Need to backup %lluK of critical section..\n", last_block/2); sra = sysfs_read(fd, 0, GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE); @@ -625,6 +625,11 @@ int Grow_reshape(char *devname, int fd, int quiet, devname); return 1; } + if (sra->spares == 0) { + fprintf(stderr, Name ": %s: Cannot grow - need a spare to backup critical section\n", + devname); + return 1; + } nrdisks = array.nr_disks + sra->spares; /* Now we need to open all these devices so we can read/write. @@ -724,13 +729,13 @@ int Grow_reshape(char *devname, int fd, int quiet, goto abort_resume; } /* FIXME write superblocks */ - memcpy(bsb.magic, "md_backups_data-1", 16); + memcpy(bsb.magic, "md_backup_data-1", 16); st->ss->uuid_from_super((int*)&bsb.set_uuid, super); - bsb.mtime = time(0); + bsb.mtime = __cpu_to_le64(time(0)); bsb.arraystart = 0; - bsb.length = last_block; + bsb.length = __cpu_to_le64(last_block); for (i=odisks; idelta_disks < 0) + return 1; /* cannot handle a shrink */ + if (info->new_level != info->array.level || + info->new_layout != info->array.layout || + info->new_chunk != info->array.chunk_size) + return 1; /* Can only handle change in disks */ + + old_disks = info->array.raid_disks - info->delta_disks; + + for (i=old_disks; iss->load_super(st, fdlist[i], &super, NULL)) + continue; + + st->ss->getinfo_super(&dinfo, &id, super); + free(super); super = NULL; + if (lseek64(fdlist[i], + (dinfo.data_offset + dinfo.component_size - 8) <<9, + 0) < 0) + continue; /* Cannot seek */ + if (read(fdlist[i], &bsb, sizeof(bsb)) != sizeof(bsb)) + continue; /* Cannot read */ + if (memcmp(bsb.magic, "md_backup_data-1", 16) != 0) + continue; + if (bsb.sb_csum != bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb))) + continue; /* bad checksum */ + if (memcmp(bsb.set_uuid,info->uuid, 16) != 0) + continue; /* Wrong uuid */ + + if (info->array.utime > __le64_to_cpu(bsb.mtime) + 3600 || + info->array.utime < __le64_to_cpu(bsb.mtime)) + continue; /* time stamp is too bad */ + + if (__le64_to_cpu(bsb.arraystart) != 0) + continue; /* Can only handle backup from start of array */ + if (__le64_to_cpu(bsb.length) < + info->reshape_progress) + continue; /* No new data here */ + + if (lseek64(fdlist[i], __le64_to_cpu(bsb.devstart)*512, 0)< 0) + continue; /* Cannot seek */ + + /* Now need the data offsets for all devices. */ + offsets = malloc(sizeof(*offsets)*info->array.raid_disks); + for(j=0; jarray.raid_disks; j++) { + if (fdlist[j] < 0) + continue; + if (st->ss->load_super(st, fdlist[j], &super, NULL)) + /* FIXME should be this be an error */ + continue; + st->ss->getinfo_super(&dinfo, &id, super); + free(super); super = NULL; + offsets[j] = dinfo.data_offset; + } + printf(Name ": restoring critical section\n"); + + if (restore_stripes(fdlist, offsets, + info->array.raid_disks, + info->new_chunk, + info->new_level, + info->new_layout, + fdlist[i], __le64_to_cpu(bsb.devstart)*512, + 0, __le64_to_cpu(bsb.length)*512)) { + /* didn't succeed, so giveup */ + return 0; + } + + /* Ok, so the data is restored. Let's update those superblocks. */ + + for (j=0; jarray.raid_disks; j++) { + if (fdlist[j] < 0) continue; + if (st->ss->load_super(st, fdlist[j], &super, NULL)) + continue; + st->ss->getinfo_super(&dinfo, &id, super); + dinfo.reshape_progress = __le64_to_cpu(bsb.length); + st->ss->update_super(&dinfo, super, "_reshape_progress",NULL,0); + st->ss->store_super(st, fdlist[j], super); + free(super); + } + + /* And we are done! */ + return 0; + } + return err; +} diff --git a/mdadm.h b/mdadm.h index 61d0469..00c280b 100644 --- a/mdadm.h +++ b/mdadm.h @@ -91,6 +91,11 @@ struct mdinfo { mdu_disk_info_t disk; __u64 events; int uuid[4]; + unsigned long long data_offset; + unsigned long long component_size; + int reshape_active; + unsigned long long reshape_progress; + int new_level, delta_disks, new_layout, new_chunk; }; #define Name "mdadm" @@ -225,6 +230,10 @@ extern int save_stripes(int *source, unsigned long long *offsets, int raid_disks, int chunk_size, int level, int layout, int nwrites, int *dest, unsigned long long start, unsigned long long length); +extern int restore_stripes(int *dest, unsigned long long *offsets, + int raid_disks, int chunk_size, int level, int layout, + int source, unsigned long long read_offset, + unsigned long long start, unsigned long long length); #ifndef Sendmail #define Sendmail "/usr/lib/sendmail -t" @@ -302,6 +311,7 @@ extern int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int dela extern int Grow_reshape(char *devname, int fd, int quiet, long long size, int level, int layout, int chunksize, int raid_disks); +extern int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt); extern int Assemble(struct supertype *st, char *mddev, int mdfd, diff --git a/restripe.c b/restripe.c index 94a0e3d..b7b3636 100644 --- a/restripe.c +++ b/restripe.c @@ -162,7 +162,7 @@ int save_stripes(int *source, unsigned long long *offsets, /* Restore data: * We are given: * A list of 'fds' of the active disks. Some may be '-1' for not-available. - * A geometry: raid_disks, chunk_sisze, level, layout + * A geometry: raid_disks, chunk_size, level, layout * An 'fd' to read from. It is already seeked to the right (Read) location. * A start and length. * The length must be a multiple of the stripe size. @@ -172,7 +172,7 @@ int save_stripes(int *source, unsigned long long *offsets, */ int restore_stripes(int *dest, unsigned long long *offsets, int raid_disks, int chunk_size, int level, int layout, - int source, + int source, unsigned long long read_offset, unsigned long long start, unsigned long long length) { char *stripe_buf = malloc(raid_disks * chunk_size); @@ -199,8 +199,11 @@ int restore_stripes(int *dest, unsigned long long *offsets, int disk = geo_map(i, start/chunk_size/data_disks, raid_disks, level, layout); blocks[i] = stripes[disk]; + if (lseek64(source, read_offset, 0) != read_offset) + return -1; if (read(source, stripes[disk], chunk_size) != chunk_size) return -1; + read_offset += chunk_size; } /* We have the data, now do the parity */ offset = (start/chunk_size/data_disks) * chunk_size; @@ -311,7 +314,7 @@ main(int argc, char *argv[]) } else { int rv = restore_stripes(fds, offsets, raid_disks, chunk_size, level, layout, - storefd, + storefd, 0ULL, start, length); if (rv != 0) { fprintf(stderr, "test_stripe: restore_stripes returned %d\n", rv); diff --git a/super0.c b/super0.c index 52be23d..99d09a4 100644 --- a/super0.c +++ b/super0.c @@ -80,7 +80,7 @@ void super0_swap_endian(struct mdp_superblock_s *sb) sb->cp_events_hi = sb->cp_events_lo; sb->cp_events_lo = t32; -} +} #ifndef MDASSEMBLE @@ -182,7 +182,7 @@ static void examine_super0(void *sbv) case -1: printf(" Rounding : %dK\n", sb->chunk_size/1024); break; - default: break; + default: break; } printf("\n"); printf(" Number Major Minor RaidDevice State\n"); @@ -279,6 +279,9 @@ static void getinfo_super0(struct mdinfo *info, mddev_ident_t ident, void *sbv) info->array.layout = sb->layout; info->array.md_minor = sb->md_minor; info->array.ctime = sb->ctime; + info->array.utime = sb->utime; + info->array.chunk_size = sb->chunk_size; + info->component_size = sb->size*2; info->disk.state = sb->this_disk.state; info->disk.major = sb->this_disk.major; @@ -287,9 +290,20 @@ static void getinfo_super0(struct mdinfo *info, mddev_ident_t ident, void *sbv) info->disk.number = sb->this_disk.number; info->events = md_event(sb); + info->data_offset = 0; uuid_from_super0(info->uuid, sbv); + if (sb->minor_version > 90 && (sb->reshape_position+1) != 0) { + info->reshape_active = 1; + info->reshape_progress = sb->reshape_position; + info->new_level = sb->new_level; + info->delta_disks = sb->delta_disks; + info->new_layout = sb->new_layout; + info->new_chunk = sb->new_chunk; + } else + info->reshape_active = 0; + ident->name[0] = 0; /* work_disks is calculated rather than read directly */ for (i=0; i < MD_SB_DISKS; i++) @@ -403,6 +417,8 @@ static int update_super0(struct mdinfo *info, void *sbv, char *update, char *dev sb->set_uuid2 = info->uuid[2]; sb->set_uuid3 = info->uuid[3]; } + if (strcmp(update, "_reshape_progress")==0) + sb->reshape_position = info->reshape_progress; sb->sb_csum = calc_sb0_csum(sb); return rv; @@ -481,7 +497,7 @@ static void add_to_super0(void *sbv, mdu_disk_info_t *dinfo) { mdp_super_t *sb = sbv; mdp_disk_t *dk = &sb->disks[dinfo->number]; - + dk->number = dinfo->number; dk->major = dinfo->major; dk->minor = dinfo->minor; @@ -508,7 +524,7 @@ static int store_super0(struct supertype *st, int fd, void *sbv) if (dsize < MD_RESERVED_SECTORS*2*512) return 2; - + offset = MD_NEW_SIZE_SECTORS(dsize>>9); offset *= 512; @@ -622,7 +638,7 @@ static int load_super0(struct supertype *st, int fd, void **sbp, char *devname) devname, size); return 1; } - + offset = MD_NEW_SIZE_SECTORS(dsize>>9); offset *= 512; @@ -717,7 +733,7 @@ static int add_internal_bitmap0(struct supertype *st, void *sbv, int chunk, int mdp_super_t *sb = sbv; bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb) + MD_SB_BYTES); - + min_chunk = 4096; /* sub-page chunks don't work yet.. */ bits = (size * 512)/ min_chunk +1; while (bits > max_bits) { @@ -744,7 +760,7 @@ static int add_internal_bitmap0(struct supertype *st, void *sbv, int chunk, int return 1; } - + void locate_bitmap0(struct supertype *st, int fd, void *sbv) { @@ -763,7 +779,7 @@ void locate_bitmap0(struct supertype *st, int fd, void *sbv) if (dsize < MD_RESERVED_SECTORS*2) return; - + offset = MD_NEW_SIZE_SECTORS(dsize>>9); offset *= 512; @@ -796,8 +812,8 @@ int write_bitmap0(struct supertype *st, int fd, void *sbv) } if (dsize < MD_RESERVED_SECTORS*2) - return -1; - + return -1; + offset = MD_NEW_SIZE_SECTORS(dsize>>9); offset *= 512; diff --git a/super1.c b/super1.c index dd852e5..4df6370 100644 --- a/super1.c +++ b/super1.c @@ -368,6 +368,11 @@ static void getinfo_super1(struct mdinfo *info, mddev_ident_t ident, void *sbv) info->array.layout = __le32_to_cpu(sb->layout); info->array.md_minor = -1; info->array.ctime = __le64_to_cpu(sb->ctime); + info->array.utime = __le64_to_cpu(sb->utime); + info->array.chunk_size = __le32_to_cpu(sb->chunksize)/512; + + info->data_offset = __le64_to_cpu(sb->data_offset); + info->component_size = __le64_to_cpu(sb->size); info->disk.major = 0; info->disk.minor = 0; @@ -397,6 +402,16 @@ static void getinfo_super1(struct mdinfo *info, mddev_ident_t ident, void *sbv) strncpy(ident->name, sb->set_name, 32); ident->name[32] = 0; + if (sb->feature_map & __le32_to_cpu(MD_FEATURE_RESHAPE_ACTIVE)) { + info->reshape_active = 1; + info->reshape_progress = __le64_to_cpu(sb->reshape_position); + info->new_level = __le32_to_cpu(sb->new_level); + info->delta_disks = __le32_to_cpu(sb->delta_disks); + info->new_layout = __le32_to_cpu(sb->new_layout); + info->new_chunk = __le32_to_cpu(sb->new_chunk); + } else + info->reshape_active = 0; + for (i=0; i< __le32_to_cpu(sb->max_dev); i++) { role = __le16_to_cpu(sb->dev_roles[i]); if (/*role == 0xFFFF || */role < info->array.raid_disks) @@ -453,6 +468,8 @@ static int update_super1(struct mdinfo *info, void *sbv, char *update, char *dev } if (strcmp(update, "uuid") == 0) memcpy(sb->set_uuid, info->uuid, 16); + if (strcmp(update, "_reshape_progress")==0) + sb->reshape_position = __cpu_to_le64(info->reshape_progress); sb->sb_csum = calc_sb_1_csum(sb); return rv; diff --git a/sysfs.c b/sysfs.c index 9894760..1774509 100644 --- a/sysfs.c +++ b/sysfs.c @@ -98,6 +98,8 @@ struct sysarray *sysfs_read(int fd, int devnum, unsigned long options) if (load_sys(fname, buf)) goto abort; sra->component_size = strtoull(buf, NULL, 0); + /* sysfs reports "K", but we want sectors */ + sra->component_size *= 2; } if (options & GET_CHUNK) { strcpy(base, "chunk_size"); @@ -192,6 +194,8 @@ unsigned long long get_component_size(int fd) * We cannot trust GET_ARRAY_INFO ioctl as it's * size field is only 32bits. * So look in /sys/block/mdXXX/md/component_size + * + * WARNING: this returns in units of Kilobytes. */ struct stat stb; char fname[50];