Manage: check alignment when stopping an array undergoing reshape.

To be able to revert-reshape of raid4/5/6 which is changing
the number of devices, the reshape must has been stopped on a multiple
of the old and new stripe sizes.

The kernel only enforces the new stripe size multiple.

So we enforce the old-stripe-size multiple by careful use of
"sync_max" and monitoring "reshape_position".

Signed-off-by: NeilBrown <neilb@suse.de>
This commit is contained in:
NeilBrown 2013-07-01 15:10:05 +10:00
parent efc67e8e9f
commit 2eba849621
5 changed files with 144 additions and 12 deletions

11
Grow.c
View File

@ -929,17 +929,6 @@ int reshape_open_backup_file(char *backup_file,
return 1;
}
unsigned long GCD(unsigned long a, unsigned long b)
{
while (a != b) {
if (a < b)
b -= a;
if (b < a)
a -= b;
}
return a;
}
unsigned long compute_backup_blocks(int nchunk, int ochunk,
unsigned int ndata, unsigned int odata)
{

View File

@ -206,6 +206,8 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
char container[32];
int err;
int count;
char buf[32];
unsigned long long rd1, rd2;
if (will_retry && verbose == 0)
verbose = -1;
@ -226,7 +228,7 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
/* Get EXCL access first. If this fails, then attempting
* to stop is probably a bad idea.
*/
mdi = sysfs_read(fd, NULL, GET_LEVEL|GET_VERSION);
mdi = sysfs_read(fd, NULL, GET_LEVEL|GET_COMPONENT|GET_VERSION);
if (mdi && is_subarray(mdi->text_version)) {
char *sl;
strncpy(container, mdi->text_version+1, sizeof(container));
@ -331,6 +333,89 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
}
}
/* If the array is undergoing a reshape which changes the number
* of devices, then it would be nice to stop it at a point where
* it has completed a full number of stripes in both old and
* new layouts as this will allow the reshape to be reverted.
* So if 'sync_action' is "reshape" and 'raid_disks' shows two
* different numbers, then
* - freeze reshape
* - set sync_max to next multiple of both data_disks and
* chunk sizes (or next but one)
* - unfreeze reshape
* - wait on 'sync_completed' for that point to be reached.
*/
if (mdi && (mdi->array.level >= 4 && mdi->array.level <= 6) &&
sysfs_attribute_available(mdi, NULL, "sync_action") &&
sysfs_attribute_available(mdi, NULL, "reshape_direction") &&
sysfs_get_str(mdi, NULL, "sync_action", buf, 20) > 0 &&
strcmp(buf, "reshape\n") == 0 &&
sysfs_get_two(mdi, NULL, "raid_disks", &rd1, &rd2) == 2 &&
sysfs_set_str(mdi, NULL, "sync_action", "frozen") == 0) {
/* Array is frozen */
unsigned long long position, curr;
unsigned long long chunk1, chunk2;
unsigned long long rddiv, chunkdiv;
unsigned long long sectors;
int backwards = 0;
int delay;
int scfd;
rd1 -= mdi->array.level == 6 ? 2 : 1;
rd2 -= mdi->array.level == 6 ? 2 : 1;
sysfs_get_str(mdi, NULL, "reshape_direction", buf, sizeof(buf));
if (strncmp(buf, "back", 4) == 0)
backwards = 1;
sysfs_get_ll(mdi, NULL, "reshape_position", &position);
sysfs_get_two(mdi, NULL, "chunk_size", &chunk1, &chunk2);
chunk1 /= 512;
chunk2 /= 512;
rddiv = GCD(rd1, rd2);
chunkdiv = GCD(chunk1, chunk2);
sectors = (chunk1/chunkdiv) * chunk2 * (rd1/rddiv) * rd2;
if (backwards) {
/* Need to subtract 'reshape_position' from
* array size to get equivalent of sync_max.
* Size calculation based on raid5_size in kernel.
*/
unsigned long long size = mdi->component_size;
size &= ~(chunk1-1);
size &= ~(chunk2-1);
/* rd1 must be smaller */
size *= rd1;
position = size - position;
position = (position/sectors + 2) * sectors;
sysfs_set_num(mdi, NULL, "sync_max", position/rd1);
position = size - position;
} else {
position = (position/sectors + 2) * sectors;
sysfs_set_num(mdi, NULL, "sync_max", position/rd1);
}
sysfs_set_str(mdi, NULL, "sync_action", "idle");
/* That should have set things going again. Now we
* wait a little while (5 seconds) for sync_completed
* to reach the target.
*/
delay = 500;
scfd = sysfs_open(mdi->sys_name, NULL, "sync_completed");
while (scfd >= 0 && delay > 0) {
sysfs_fd_get_str(scfd, buf, sizeof(buf));
if (strncmp(buf, "none", 4) == 0)
break;
sysfs_get_ll(mdi, NULL, "reshape_position", &curr);
if (!backwards && curr >= position)
break;
if (backwards && curr <= position)
break;
sysfs_wait(scfd, &delay);
}
if (scfd >= 0)
close(scfd);
}
/* As we have an O_EXCL open, any use of the device
* which blocks STOP_ARRAY is probably a transient use,
* so it is reasonable to retry for a while - 5 seconds.

11
lib.c
View File

@ -396,3 +396,14 @@ int use_udev(void)
}
return use;
}
unsigned long GCD(unsigned long a, unsigned long b)
{
while (a != b) {
if (a < b)
b -= a;
if (b < a)
a -= b;
}
return a;
}

View File

@ -538,6 +538,9 @@ extern int sysfs_get_fd(struct mdinfo *sra, struct mdinfo *dev,
extern int sysfs_fd_get_ll(int fd, unsigned long long *val);
extern int sysfs_get_ll(struct mdinfo *sra, struct mdinfo *dev,
char *name, unsigned long long *val);
extern int sysfs_fd_get_two(int fd, unsigned long long *v1, unsigned long long *v2);
extern int sysfs_get_two(struct mdinfo *sra, struct mdinfo *dev,
char *name, unsigned long long *v1, unsigned long long *v2);
extern int sysfs_fd_get_str(int fd, char *val, int size);
extern int sysfs_attribute_available(struct mdinfo *sra, struct mdinfo *dev,
char *name);
@ -1277,6 +1280,7 @@ extern char *conf_word(FILE *file, int allow_key);
extern void print_quoted(char *str);
extern void print_escape(char *str);
extern int use_udev(void);
extern unsigned long GCD(unsigned long a, unsigned long b);
extern int conf_name_is_free(char *name);
extern int conf_verify_devnames(struct mddev_ident *array_list);
extern int devname_matches(char *name, char *match);

43
sysfs.c
View File

@ -513,6 +513,49 @@ int sysfs_get_ll(struct mdinfo *sra, struct mdinfo *dev,
return n;
}
int sysfs_fd_get_two(int fd, unsigned long long *v1, unsigned long long *v2)
{
/* two numbers in this sysfs file, either
* NNN (NNN)
* or
* NNN / NNN
*/
char buf[80];
int n;
char *ep, *ep2;
lseek(fd, 0, 0);
n = read(fd, buf, sizeof(buf));
if (n <= 0)
return -2;
buf[n] = 0;
*v1 = strtoull(buf, &ep, 0);
if (ep == buf || (*ep != 0 && *ep != '\n' && *ep != ' '))
return -1;
while (*ep == ' ' || *ep == '/' || *ep == '(')
ep++;
*v2 = strtoull(ep, &ep2, 0);
if (ep2 == ep || (*ep2 != 0 && *ep2 != '\n' && *ep2 != ' ' && *ep2 != ')')) {
*v2 = *v1;
return 1;
}
return 2;
}
int sysfs_get_two(struct mdinfo *sra, struct mdinfo *dev,
char *name, unsigned long long *v1, unsigned long long *v2)
{
int n;
int fd;
fd = sysfs_get_fd(sra, dev, name);
if (fd < 0)
return -1;
n = sysfs_fd_get_two(fd, v1, v2);
close(fd);
return n;
}
int sysfs_fd_get_str(int fd, char *val, int size)
{
int n;