From 2eba849621011a5160b4597f82aa4ed0de7d4e64 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 1 Jul 2013 15:10:05 +1000 Subject: [PATCH] Manage: check alignment when stopping an array undergoing reshape. To be able to revert-reshape of raid4/5/6 which is changing the number of devices, the reshape must has been stopped on a multiple of the old and new stripe sizes. The kernel only enforces the new stripe size multiple. So we enforce the old-stripe-size multiple by careful use of "sync_max" and monitoring "reshape_position". Signed-off-by: NeilBrown --- Grow.c | 11 ------- Manage.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- lib.c | 11 +++++++ mdadm.h | 4 +++ sysfs.c | 43 ++++++++++++++++++++++++++++ 5 files changed, 144 insertions(+), 12 deletions(-) diff --git a/Grow.c b/Grow.c index 0e20b24..e27d29f 100644 --- a/Grow.c +++ b/Grow.c @@ -929,17 +929,6 @@ int reshape_open_backup_file(char *backup_file, return 1; } -unsigned long GCD(unsigned long a, unsigned long b) -{ - while (a != b) { - if (a < b) - b -= a; - if (b < a) - a -= b; - } - return a; -} - unsigned long compute_backup_blocks(int nchunk, int ochunk, unsigned int ndata, unsigned int odata) { diff --git a/Manage.c b/Manage.c index 230124e..ed3f61d 100644 --- a/Manage.c +++ b/Manage.c @@ -206,6 +206,8 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry) char container[32]; int err; int count; + char buf[32]; + unsigned long long rd1, rd2; if (will_retry && verbose == 0) verbose = -1; @@ -226,7 +228,7 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry) /* Get EXCL access first. If this fails, then attempting * to stop is probably a bad idea. */ - mdi = sysfs_read(fd, NULL, GET_LEVEL|GET_VERSION); + mdi = sysfs_read(fd, NULL, GET_LEVEL|GET_COMPONENT|GET_VERSION); if (mdi && is_subarray(mdi->text_version)) { char *sl; strncpy(container, mdi->text_version+1, sizeof(container)); @@ -331,6 +333,89 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry) } } + /* If the array is undergoing a reshape which changes the number + * of devices, then it would be nice to stop it at a point where + * it has completed a full number of stripes in both old and + * new layouts as this will allow the reshape to be reverted. + * So if 'sync_action' is "reshape" and 'raid_disks' shows two + * different numbers, then + * - freeze reshape + * - set sync_max to next multiple of both data_disks and + * chunk sizes (or next but one) + * - unfreeze reshape + * - wait on 'sync_completed' for that point to be reached. + */ + if (mdi && (mdi->array.level >= 4 && mdi->array.level <= 6) && + sysfs_attribute_available(mdi, NULL, "sync_action") && + sysfs_attribute_available(mdi, NULL, "reshape_direction") && + sysfs_get_str(mdi, NULL, "sync_action", buf, 20) > 0 && + strcmp(buf, "reshape\n") == 0 && + sysfs_get_two(mdi, NULL, "raid_disks", &rd1, &rd2) == 2 && + sysfs_set_str(mdi, NULL, "sync_action", "frozen") == 0) { + /* Array is frozen */ + unsigned long long position, curr; + unsigned long long chunk1, chunk2; + unsigned long long rddiv, chunkdiv; + unsigned long long sectors; + int backwards = 0; + int delay; + int scfd; + + rd1 -= mdi->array.level == 6 ? 2 : 1; + rd2 -= mdi->array.level == 6 ? 2 : 1; + sysfs_get_str(mdi, NULL, "reshape_direction", buf, sizeof(buf)); + if (strncmp(buf, "back", 4) == 0) + backwards = 1; + sysfs_get_ll(mdi, NULL, "reshape_position", &position); + sysfs_get_two(mdi, NULL, "chunk_size", &chunk1, &chunk2); + chunk1 /= 512; + chunk2 /= 512; + rddiv = GCD(rd1, rd2); + chunkdiv = GCD(chunk1, chunk2); + sectors = (chunk1/chunkdiv) * chunk2 * (rd1/rddiv) * rd2; + + if (backwards) { + /* Need to subtract 'reshape_position' from + * array size to get equivalent of sync_max. + * Size calculation based on raid5_size in kernel. + */ + unsigned long long size = mdi->component_size; + size &= ~(chunk1-1); + size &= ~(chunk2-1); + /* rd1 must be smaller */ + size *= rd1; + position = size - position; + position = (position/sectors + 2) * sectors; + sysfs_set_num(mdi, NULL, "sync_max", position/rd1); + position = size - position; + } else { + position = (position/sectors + 2) * sectors; + sysfs_set_num(mdi, NULL, "sync_max", position/rd1); + } + sysfs_set_str(mdi, NULL, "sync_action", "idle"); + + /* That should have set things going again. Now we + * wait a little while (5 seconds) for sync_completed + * to reach the target. + */ + delay = 500; + scfd = sysfs_open(mdi->sys_name, NULL, "sync_completed"); + while (scfd >= 0 && delay > 0) { + sysfs_fd_get_str(scfd, buf, sizeof(buf)); + if (strncmp(buf, "none", 4) == 0) + break; + sysfs_get_ll(mdi, NULL, "reshape_position", &curr); + if (!backwards && curr >= position) + break; + if (backwards && curr <= position) + break; + sysfs_wait(scfd, &delay); + } + if (scfd >= 0) + close(scfd); + + } + /* As we have an O_EXCL open, any use of the device * which blocks STOP_ARRAY is probably a transient use, * so it is reasonable to retry for a while - 5 seconds. diff --git a/lib.c b/lib.c index fa6f223..840c11f 100644 --- a/lib.c +++ b/lib.c @@ -396,3 +396,14 @@ int use_udev(void) } return use; } + +unsigned long GCD(unsigned long a, unsigned long b) +{ + while (a != b) { + if (a < b) + b -= a; + if (b < a) + a -= b; + } + return a; +} diff --git a/mdadm.h b/mdadm.h index 13ea26a..13aa84a 100644 --- a/mdadm.h +++ b/mdadm.h @@ -538,6 +538,9 @@ extern int sysfs_get_fd(struct mdinfo *sra, struct mdinfo *dev, extern int sysfs_fd_get_ll(int fd, unsigned long long *val); extern int sysfs_get_ll(struct mdinfo *sra, struct mdinfo *dev, char *name, unsigned long long *val); +extern int sysfs_fd_get_two(int fd, unsigned long long *v1, unsigned long long *v2); +extern int sysfs_get_two(struct mdinfo *sra, struct mdinfo *dev, + char *name, unsigned long long *v1, unsigned long long *v2); extern int sysfs_fd_get_str(int fd, char *val, int size); extern int sysfs_attribute_available(struct mdinfo *sra, struct mdinfo *dev, char *name); @@ -1277,6 +1280,7 @@ extern char *conf_word(FILE *file, int allow_key); extern void print_quoted(char *str); extern void print_escape(char *str); extern int use_udev(void); +extern unsigned long GCD(unsigned long a, unsigned long b); extern int conf_name_is_free(char *name); extern int conf_verify_devnames(struct mddev_ident *array_list); extern int devname_matches(char *name, char *match); diff --git a/sysfs.c b/sysfs.c index b7d41a5..19d56aa 100644 --- a/sysfs.c +++ b/sysfs.c @@ -513,6 +513,49 @@ int sysfs_get_ll(struct mdinfo *sra, struct mdinfo *dev, return n; } +int sysfs_fd_get_two(int fd, unsigned long long *v1, unsigned long long *v2) +{ + /* two numbers in this sysfs file, either + * NNN (NNN) + * or + * NNN / NNN + */ + char buf[80]; + int n; + char *ep, *ep2; + + lseek(fd, 0, 0); + n = read(fd, buf, sizeof(buf)); + if (n <= 0) + return -2; + buf[n] = 0; + *v1 = strtoull(buf, &ep, 0); + if (ep == buf || (*ep != 0 && *ep != '\n' && *ep != ' ')) + return -1; + while (*ep == ' ' || *ep == '/' || *ep == '(') + ep++; + *v2 = strtoull(ep, &ep2, 0); + if (ep2 == ep || (*ep2 != 0 && *ep2 != '\n' && *ep2 != ' ' && *ep2 != ')')) { + *v2 = *v1; + return 1; + } + return 2; +} + +int sysfs_get_two(struct mdinfo *sra, struct mdinfo *dev, + char *name, unsigned long long *v1, unsigned long long *v2) +{ + int n; + int fd; + + fd = sysfs_get_fd(sra, dev, name); + if (fd < 0) + return -1; + n = sysfs_fd_get_two(fd, v1, v2); + close(fd); + return n; +} + int sysfs_fd_get_str(int fd, char *val, int size) { int n;