Initial reshape support

Needs work for other levels etc.

Signed-off-by: Neil Brown <neilb@suse.de>
This commit is contained in:
Neil Brown 2006-03-13 05:51:32 +00:00
parent 8a4440794a
commit e86c9dd6d8
8 changed files with 1098 additions and 33 deletions

408
Grow.c
View File

@ -306,7 +306,7 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int
continue;
if (st->ss->load_super(st, fd2, &super, NULL)==0) {
if (st->ss->add_internal_bitmap(st, super,
chunk, delay, write_behind,
chunk, delay, write_behind,
bitmapsize, 0, major))
st->ss->write_bitmap(st, fd2, super);
else {
@ -378,4 +378,410 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int
return 0;
}
/*
* When reshaping an array we might need to backup some data.
* This is written to all spares with a 'super_block' describing it.
* The superblock goes 1K form the end of the used space on the
* device.
* It if written after the backup is complete.
* It has the following structure.
*/
struct mdp_backup_super {
char magic[16]; /* md_backup_data-1 */
__u8 set_uuid[16];
__u64 mtime;
/* start/sizes in 512byte sectors */
__u64 devstart;
__u64 arraystart;
__u64 length;
__u32 sb_csum; /* csum of preceeding bytes. */
};
int bsb_csum(char *buf, int len)
{
int i;
int csum = 0;
for (i=0; i<len; i++)
csum = (csum<<3) + buf[0];
return __cpu_to_le32(csum);
}
int Grow_reshape(char *devname, int fd, int quiet,
long long size,
int level, int layout, int chunksize, int raid_disks)
{
/* Make some changes in the shape of an array.
* The kernel must support the change.
* Different reshapes have subtly different meaning for different
* levels, so we need to check the current state of the array
* and go from there.
*/
struct mdu_array_info_s array;
char *c;
struct mdp_backup_super bsb;
struct supertype *st;
int nlevel, olevel;
int nchunk, ochunk;
int nlayout, olayout;
int ndisks, odisks;
int ndata, odata;
unsigned long long nstripe, ostripe, last_block;
int *fdlist;
unsigned long long *offsets;
int d, i, spares;
int nrdisks;
int err;
void *super = NULL;
struct sysarray *sra;
struct sysdev *sd;
if (ioctl(fd, GET_ARRAY_INFO, &array) < 0) {
fprintf(stderr, Name ": %s is not an active md array - aborting\n",
devname);
return 1;
}
c = map_num(pers, array.level);
if (c == NULL) c = "-unknown-";
switch(array.level) {
default: /* raid0, linear, multipath cannot be reconfigured */
fprintf(stderr, Name ": %s array %s cannot be reshaped.\n",
c, devname);
return 1;
case LEVEL_FAULTY: /* only 'layout' change is permitted */
if (size >= 0) {
fprintf(stderr, Name ": %s: Cannot change size of a 'faulty' array\n",
devname);
return 1;
}
if (level != UnSet && level != LEVEL_FAULTY) {
fprintf(stderr, Name ": %s: Cannot change RAID level of a 'faulty' array\n",
devname);
return 1;
}
if (chunksize || raid_disks) {
fprintf(stderr, Name ": %s: Cannot change chunksize or disks of a 'faulty' array\n",
devname);
return 1;
}
if (layout == UnSet)
return 0; /* nothing to do.... */
array.layout = layout;
if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
fprintf(stderr, Name ": Cannot set layout for %s: %s\n",
devname, strerror(errno));
return 1;
}
if (!quiet)
printf("layout for %s set to %d\n", devname, array.layout);
return 0;
case 1: /* raid_disks and size can each be changed. They are independant */
if (level != UnSet && level != 1) {
fprintf(stderr, Name ": %s: Cannot change RAID level of a RAID1 array.\n",
devname);
return 1;
}
if (chunksize || layout != UnSet) {
fprintf(stderr, Name ": %s: Cannot change chunk size of layout for a RAID1 array.\n",
devname);
return 1;
}
/* Each can trigger a resync/recovery which will block the
* other from happening. Later we could block
* resync for the duration via 'sync_action'...
*/
if (raid_disks >= 0)
array.raid_disks = raid_disks;
if (size >= 0)
array.size = size;
if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n",
devname, strerror(errno));
return 1;
}
return 0;
case 4:
case 5:
case 6:
st = super_by_version(array.major_version,
array.minor_version);
/* size can be changed independantly.
* layout/chunksize/raid_disks/level can be changed
* though the kernel may not support it all.
* If 'suspend_lo' is not present in devfs, then
* these cannot be changed.
*/
if (size >= 0) {
/* Cannot change other details as well.. */
if (layout != UnSet ||
chunksize != 0 ||
raid_disks != 0 ||
level != UnSet) {
fprintf(stderr, Name ": %s: Cannot change shape as well as size of a %s array.\n",
devname, c);
return 1;
}
array.size = size;
if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n",
devname, strerror(errno));
return 1;
}
return 0;
}
/* Ok, just change the shape. This can be awkward.
* There are three possibilities.
* 1/ The array will shrink. We don't support this
* possibility. Maybe one day...
* 2/ The array will not change size. This is easy enough
* to do, but not reliably. If the process is aborted
* the array *will* be corrupted. So maybe we can allow
* this but only if the user is really certain. e.g.
* --really-risk-everything
* 3/ The array will grow. This can be reliably achieved.
* However the kernel's restripe routines will cheerfully
* overwrite some early data before it is safe. So we
* need to make a backup of the early parts of the array
* and be ready to restore it if rebuild aborts very early.
*
* We backup data by writing it to all spares (there must be
* at least 1, so even raid6->raid5 requires a spare to be
* present).
*
* So: we enumerate the devices in the array and
* make sure we can open all of them.
* Then we freeze the early part of the array and
* backup to the various spares.
* Then we request changes and start the reshape.
* Monitor progress until it has passed the danger zone.
* and finally invalidate the copied data and unfreeze the
* start of the array.
*
* Before we can do this we need to decide:
* - will the array grow? Just calculate size
* - how much needs to be saved: count stripes.
* - where to save data... good question.
*
*/
nlevel = olevel = array.level;
nchunk = ochunk = array.chunk_size;
nlayout = olayout = array.layout;
ndisks = odisks = array.raid_disks;
if (level != UnSet) nlevel = level;
if (chunksize) nchunk = chunksize;
if (layout != UnSet) nlayout = layout;
if (raid_disks) ndisks = raid_disks;
odata = odisks-1;
if (olevel == 6) odata--; /* number of data disks */
ndata = ndisks-1;
if (nlevel == 6) ndata--;
if (ndata < odata) {
fprintf(stderr, Name ": %s: Cannot reduce number of data disks (yet).\n",
devname);
return 1;
}
if (ndata == odata) {
fprintf(stderr, Name ": %s: Cannot reshape array without increasing size (yet).\n",
devname);
return 1;
}
/* Well, it is growing... so how much do we need to backup.
* Need to backup a full number of new-stripes, such that the
* last one does not over-write any place that it would be read
* from
*/
nstripe = ostripe = 0;
while (nstripe+ochunk/512 >= ostripe) {
nstripe += nchunk/512;
last_block = nstripe * ndata;
ostripe = last_block / odata;
}
printf("Need to backup to stripe %llu sectors, %lluK\n", nstripe, last_block/2);
sra = sysfs_read(fd, 0,
GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE);
if (!sra) {
fprintf(stderr, Name ": %s: Cannot get array details from sysfs\n",
devname);
return 1;
}
if (last_block >= sra->component_size/2) {
fprintf(stderr, Name ": %s: Something wrong - reshape aborted\n",
devname);
return 1;
}
nrdisks = array.nr_disks + sra->spares;
/* Now we need to open all these devices so we can read/write.
*/
fdlist = malloc(nrdisks * sizeof(int));
offsets = malloc(nrdisks * sizeof(offsets[0]));
if (!fdlist || !offsets) {
fprintf(stderr, Name ": malloc failed: grow aborted\n");
return 1;
}
for (d=0; d< nrdisks; d++)
fdlist[d] = -1;
d = array.raid_disks;
for (sd = sra->devs; sd; sd=sd->next) {
if (sd->state & (1<<MD_DISK_FAULTY))
continue;
if (sd->state & (1<<MD_DISK_SYNC)) {
char *dn = map_dev(sd->major, sd->minor);
fdlist[sd->role] = open(dn, O_RDONLY);
offsets[sd->role] = sd->offset;
if (fdlist[sd->role] < 0) {
fprintf(stderr, Name ": %s: cannot open component %s\n",
devname, dn);
goto abort;
}
} else {
/* spare */
char *dn = map_dev(sd->major, sd->minor);
fdlist[d] = open(dn, O_RDWR);
offsets[d] = sd->offset;
if (fdlist[d]<0) {
fprintf(stderr, Name ": %s: cannot open component %s\n",
devname, dn);
goto abort;
}
d++;
}
}
for (i=0 ; i<array.raid_disks; i++)
if (fdlist[i] < 0) {
fprintf(stderr, Name ": %s: failed to find device %d. Array might be degraded.\n"
" --grow aborted\n", devname, i);
goto abort;
}
if (fdlist[array.raid_disks] < 0) {
fprintf(stderr, Name ": %s: failed to find a spare - --grow aborted\n",
devname);
goto abort;
}
/* Find a superblock */
if (st->ss->load_super(st, fdlist[0], &super, NULL)) {
fprintf(stderr, Name ": %s: Cannot find a superblock\n",
devname);
goto abort;
}
spares = sra->spares;
/* Decide offset for the backup and llseek the spares */
for (i=array.raid_disks; i<d; i++) {
offsets[i] += sra->component_size - last_block - 8;
if (lseek64(fdlist[i], offsets[i]<<9, 0) != offsets[i]<<9) {
fprintf(stderr, Name ": could not seek...\n");
goto abort;
}
}
array.level = nlevel;
array.raid_disks = ndisks;
array.chunk_size = nchunk;
array.layout = nlayout;
if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n",
devname, strerror(errno));
goto abort;
}
/* suspend the relevant region */
sysfs_set_num(sra, NULL, "suspend_hi", 0); /* just in case */
if (sysfs_set_num(sra, NULL, "suspend_lo", 0) < 0 ||
sysfs_set_num(sra, NULL, "suspend_hi", last_block) < 0) {
fprintf(stderr, Name ": %s: failed to suspend device.\n",
devname);
goto abort_resume;
}
err = save_stripes(fdlist, offsets,
odisks, ochunk, olevel, olayout,
spares, fdlist+odisks,
0ULL, nstripe*512);
/* abort if there was an error */
if (err < 0) {
fprintf(stderr, Name ": %s: failed to save critical region\n",
devname);
goto abort_resume;
}
/* FIXME write superblocks */
memcpy(bsb.magic, "md_backups_data-1", 16);
st->ss->uuid_from_super((int*)&bsb.set_uuid, super);
bsb.mtime = time(0);
bsb.arraystart = 0;
bsb.length = last_block;
for (i=odisks; i<d ; i++) {
bsb.devstart = offsets[i];
bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
lseek64(fdlist[i], (offsets[i]+last_block)<<9, 0);
write(fdlist[i], &bsb, sizeof(bsb));
/* FIXME error check */
}
/* start the reshape happening */
if (sysfs_set_str(sra, NULL, "sync_action", "reshape") < 0) {
fprintf(stderr, Name ": %s: failed to initiate reshape\n",
devname);
goto abort_resume;
}
/* wait for reshape to pass the critical region */
while(1) {
unsigned long long comp;
if (sysfs_get_ll(sra, NULL, "sync_completed", &comp)<0)
break;
if (comp >= nstripe)
break;
sleep(1);
}
/* invalidate superblocks */
memset(&bsb, 0, sizeof(bsb));
for (i=odisks; i<d ; i++) {
lseek64(fdlist[i], (offsets[i]+last_block)<<9, 0);
write(fdlist[i], &bsb, sizeof(bsb));
}
/* unsuspend. */
sysfs_set_num(sra, NULL, "suspend_lo", last_block);
for (i=0; i<d; i++)
if (fdlist[i] >= 0)
close(fdlist[i]);
free(fdlist);
free(offsets);
break;
}
return 0;
abort_resume:
sysfs_set_num(sra, NULL, "suspend_lo", last_block);
abort:
for (i=0; i<array.nr_disks; i++)
if (fdlist[i] >= 0)
close(fdlist[i]);
free(fdlist);
free(offsets);
return 1;
}

View File

@ -59,10 +59,10 @@ MAN8DIR = $(MANDIR)/man8
OBJS = mdadm.o config.o mdstat.o ReadMe.o util.o Manage.o Assemble.o Build.o \
Create.o Detail.o Examine.o Grow.o Monitor.o dlink.o Kill.o Query.o \
mdopen.o super0.o super1.o bitmap.o
mdopen.o super0.o super1.o bitmap.o restripe.o sysfs.o
SRCS = mdadm.c config.c mdstat.c ReadMe.c util.c Manage.c Assemble.c Build.c \
Create.c Detail.c Examine.c Grow.c Monitor.c dlink.c Kill.c Query.c \
mdopen.c super0.c super1.c bitmap.c
mdopen.c super0.c super1.c bitmap.c restripe.c sysfs.c
ASSEMBLE_SRCS := mdassemble.c Assemble.c config.c dlink.c util.c super0.c super1.c
ASSEMBLE_FLAGS:= -DMDASSEMBLE
@ -73,7 +73,7 @@ endif
all : mdadm mdadm.man md.man mdadm.conf.man
everything: all mdadm.static mdadm.uclibc swap_super mdassemble mdassemble.uclibc mdassemble.static mdassemble.man
everything: all mdadm.static mdadm.uclibc swap_super test_stripe mdassemble mdassemble.uclibc mdassemble.static mdassemble.man
# mdadm.tcc doesn't work..
mdadm : $(OBJS)
@ -92,6 +92,9 @@ mdadm.klibc : $(SRCS) mdadm.h
rm -f $(OBJS)
gcc -nostdinc -iwithprefix include -I$(KLIBC)/klibc/include -I$(KLIBC)/linux/include -I$(KLIBC)/klibc/arch/i386/include -I$(KLIBC)/klibc/include/bits32 $(CFLAGS) $(SRCS)
test_stripe : restripe.c mdadm.h
$(CC) $(CXFLAGS) $(LDFLAGS) -o test_stripe -DMAIN restripe.c
mdassemble : $(ASSEMBLE_SRCS) mdadm.h
rm -f $(OBJS)
$(DIET_GCC) $(ASSEMBLE_FLAGS) -o mdassemble $(ASSEMBLE_SRCS)

View File

@ -1155,7 +1155,8 @@ int main(int argc, char *argv[])
} else if (layout != UnSet)
rv = Manage_reconfig(devlist->devname, mdfd, layout);
else if (size >= 0 || raiddisks)
rv = Manage_resize(devlist->devname, mdfd, size, raiddisks);
rv = Grow_reshape(devlist->devname, mdfd, quiet,
size, level, layout, chunk, raiddisks);
else if (bitmap_file) {
if (delay == 0) delay = DEFAULT_BITMAP_DELAY;
rv = Grow_addbitmap(devlist->devname, mdfd, bitmap_file,

56
mdadm.h
View File

@ -178,6 +178,54 @@ extern struct mdstat_ent *mdstat_read(int hold, int start);
extern void free_mdstat(struct mdstat_ent *ms);
extern void mdstat_wait(int seconds);
/* Data structure for holding info read from sysfs */
struct sysdev {
char name[20];
int role;
int major, minor;
unsigned long long offset, size;
int state;
int errors;
struct sysdev *next;
};
struct sysarray {
char name[20];
struct sysdev *devs;
int chunk;
unsigned long long component_size;
int layout;
int level;
int spares;
};
/* various details can be requested */
#define GET_LEVEL 1
#define GET_LAYOUT 2
#define GET_COMPONENT 4
#define GET_CHUNK 8
#define GET_DEVS 1024 /* gets role, major, minor */
#define GET_OFFSET 2048
#define GET_SIZE 4096
#define GET_STATE 8192
#define GET_ERROR 16384
/* If fd >= 0, get the array it is open on,
* else use devnum. >=0 -> major9. <0.....
*/
extern struct sysarray *sysfs_read(int fd, int devnum, unsigned long options);
extern int sysfs_set_str(struct sysarray *sra, struct sysdev *dev,
char *name, char *val);
extern int sysfs_set_num(struct sysarray *sra, struct sysdev *dev,
char *name, unsigned long long val);
extern int sysfs_get_ll(struct sysarray *sra, struct sysdev *dev,
char *name, unsigned long long *val);
extern int save_stripes(int *source, unsigned long long *offsets,
int raid_disks, int chunk_size, int level, int layout,
int nwrites, int *dest,
unsigned long long start, unsigned long long length);
#ifndef Sendmail
#define Sendmail "/usr/lib/sendmail -t"
#endif
@ -251,6 +299,9 @@ extern int Manage_subdevs(char *devname, int fd,
mddev_dev_t devlist, int verbose);
extern int Grow_Add_device(char *devname, int fd, char *newdev);
extern int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int write_behind, int force);
extern int Grow_reshape(char *devname, int fd, int quiet,
long long size,
int level, int layout, int chunksize, int raid_disks);
extern int Assemble(struct supertype *st, char *mddev, int mdfd,
@ -367,3 +418,8 @@ extern int open_mddev(char *dev, int autof);
#define makedev(M,m) (((M)<<8) | (m))
#endif
/* for raid5 */
#define ALGORITHM_LEFT_ASYMMETRIC 0
#define ALGORITHM_RIGHT_ASYMMETRIC 1
#define ALGORITHM_LEFT_SYMMETRIC 2
#define ALGORITHM_RIGHT_SYMMETRIC 3

324
restripe.c Normal file
View File

@ -0,0 +1,324 @@
/*
* mdadm - manage Linux "md" devices aka RAID arrays.
*
* Copyright (C) 2006 Neil Brown <neilb@suse.de>
*
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Author: Neil Brown
* Email: <neilb@suse.de>
*/
#include "mdadm.h"
/* To restripe, we read from old geometry to a buffer, and
* read from buffer to new geometry.
* When reading we don't worry about parity. When writing we do.
*
*/
static int geo_map(int block, unsigned long long stripe, int raid_disks, int level, int layout)
{
/* On the given stripe, find which disk in the array with have
* block numbered 'block'.
*/
int pd;
switch(level*100 + layout) {
case 000:
case 400:
/* raid 4 isn't messed around by parity blocks */
if (block == -1)
return raid_disks-1; /* parity block */
return block;
case 500 + ALGORITHM_LEFT_ASYMMETRIC:
pd = (raid_disks-1) - stripe % raid_disks;
if (block == -1) return pd;
if (block >= pd)
block++;
return block;
case 500 + ALGORITHM_RIGHT_ASYMMETRIC:
pd = stripe % raid_disks;
if (block == -1) return pd;
if (block >= pd)
block++;
return block;
case 500 + ALGORITHM_LEFT_SYMMETRIC:
pd = (raid_disks - 1) - stripe % raid_disks;
if (block == -1) return pd;
return (pd + 1 + block) % raid_disks;
case 500 + ALGORITHM_RIGHT_SYMMETRIC:
pd = stripe % raid_disks;
if (block == -1) return pd;
return (pd + 1 + block) % raid_disks;
case 600 + ALGORITHM_LEFT_ASYMMETRIC:
pd = raid_disks - 1 - (stripe % raid_disks);
if (block == -1) return pd;
if (pd == raid_disks - 1)
return block+1;
if (block >= pd)
return block+2;
return block;
case 600 + ALGORITHM_RIGHT_ASYMMETRIC:
pd = stripe % raid_disks;
if (block == -1) return pd;
if (pd == raid_disks - 1)
return block+1;
if (block >= pd)
return block+2;
return block;
case 600 + ALGORITHM_LEFT_SYMMETRIC:
pd = raid_disks - 1 - (stripe % raid_disks);
if (block == -1) return pd;
return (pd + 2 + block) % raid_disks;
case 600 + ALGORITHM_RIGHT_SYMMETRIC:
pd = stripe % raid_disks;
if (block == -1) return pd;
return (pd + 2 + block) % raid_disks;
}
return -1;
}
static void xor_blocks(char *target, char **sources, int disks, int size)
{
int i, j;
/* Amazingly inefficient... */
for (i=0; i<size; i++) {
char c = 0;
for (j=0 ; j<disks; j++)
c ^= sources[j][i];
target[i] = c;
}
}
/* Save data:
* We are given:
* A list of 'fds' of the active disks. For now we require all to be present.
* A geomtry: raid_disks, chunk_size, level, layout
* A list of 'fds' for mirrored targets. They are already seeked to
* right (Write) location
* A start and length
*/
int save_stripes(int *source, unsigned long long *offsets,
int raid_disks, int chunk_size, int level, int layout,
int nwrites, int *dest,
unsigned long long start, unsigned long long length)
{
char buf[8192];
int cpos = start % chunk_size; /* where in chunk we are up to */
int len;
int data_disks = raid_disks - (level == 0 ? 0 : level <=5 ? 1 : 2);
int disk;
while (length > 0) {
unsigned long long offset;
int i;
len = chunk_size - cpos;
if (len > sizeof(buf)) len = sizeof(buf);
if (len > length) len = length;
/* len bytes to be moved from one device */
offset = (start/chunk_size/data_disks)*chunk_size + cpos;
disk = start/chunk_size % data_disks;
disk = geo_map(disk, start/chunk_size/data_disks,
raid_disks, level, layout);
if (lseek64(source[disk], offsets[disk]+offset, 0) < 0)
return -1;
if (read(source[disk], buf, len) != len)
return -1;
for (i=0; i<nwrites; i++)
if (write(dest[i], buf, len) != len)
return -1;
length -= len;
start += len;
cpos += len;
while (cpos >= chunk_size) cpos -= chunk_size;
}
return 0;
}
/* Restore data:
* We are given:
* A list of 'fds' of the active disks. Some may be '-1' for not-available.
* A geometry: raid_disks, chunk_sisze, level, layout
* An 'fd' to read from. It is already seeked to the right (Read) location.
* A start and length.
* The length must be a multiple of the stripe size.
*
* We build a full stripe in memory and then write it out.
* We assume that there are enough working devices.
*/
int restore_stripes(int *dest, unsigned long long *offsets,
int raid_disks, int chunk_size, int level, int layout,
int source,
unsigned long long start, unsigned long long length)
{
char *stripe_buf = malloc(raid_disks * chunk_size);
char **stripes = malloc(raid_disks * sizeof(char*));
char **blocks = malloc(raid_disks * sizeof(char*));
int i;
int data_disks = raid_disks - (level == 0 ? 0 : level <=5 ? 1 : 2);
if (stripe_buf == NULL || stripes == NULL || blocks == NULL) {
free(stripe_buf);
free(stripes);
free(blocks);
return -2;
}
for (i=0; i<raid_disks; i++)
stripes[i] = stripe_buf + i * chunk_size;
while (length > 0) {
int len = data_disks * chunk_size;
unsigned long long offset;
if (length < len)
return -3;
for (i=0; i < data_disks; i++) {
int disk = geo_map(i, start/chunk_size/data_disks,
raid_disks, level, layout);
blocks[i] = stripes[disk];
if (read(source, stripes[disk], chunk_size) != chunk_size)
return -1;
}
/* We have the data, now do the parity */
offset = (start/chunk_size/data_disks) * chunk_size;
if (level >= 4) {
int disk = geo_map(-1, start/chunk_size/data_disks,
raid_disks, level, layout);
xor_blocks(stripes[disk], blocks, data_disks, chunk_size);
/* FIXME need to do raid6 Q as well */
}
for (i=0; i < raid_disks ; i++)
if (dest[i] >= 0) {
if (lseek64(dest[i], offsets[i]+offset, 0) < 0)
return -1;
if (write(dest[i], stripes[i], chunk_size) != chunk_size)
return -1;
}
length -= len;
start += len;
}
return 0;
}
#ifdef MAIN
unsigned long long getnum(char *str, char **err)
{
char *e;
unsigned long long rv = strtoull(str, &e, 10);
if (e==str || *e) {
*err = str;
return 0;
}
return rv;
}
main(int argc, char *argv[])
{
/* save/restore file raid_disks chunk_size level layout start length devices...
*/
int save;
int *fds;
char *file;
int storefd;
unsigned long long *offsets;
int raid_disks, chunk_size, level, layout;
unsigned long long start, length;
int i;
char *err = NULL;
if (argc < 10) {
fprintf(stderr, "Usage: test_stripe save/restore file raid_disks"
" chunk_size level layout start length devices...\n");
exit(1);
}
if (strcmp(argv[1], "save")==0)
save = 1;
else if (strcmp(argv[1], "restore") == 0)
save = 0;
else {
fprintf(stderr, "test_stripe: must give 'save' or 'restore'.\n");
exit(2);
}
file = argv[2];
raid_disks = getnum(argv[3], &err);
chunk_size = getnum(argv[4], &err);
level = getnum(argv[5], &err);
layout = getnum(argv[6], &err);
start = getnum(argv[7], &err);
length = getnum(argv[8], &err);
if (err) {
fprintf(stderr, "test_stripe: Bad number: %s\n", err);
exit(2);
}
if (argc != raid_disks + 9) {
fprintf(stderr, "test_stripe: wrong number of devices: want %d found %d\n",
raid_disks, argc-9);
exit(2);
}
fds = malloc(raid_disks * sizeof(*fds));
offsets = malloc(raid_disks * sizeof(*offsets));
memset(offsets, 0, raid_disks * sizeof(*offsets));
storefd = open(file, O_RDWR);
if (storefd < 0) {
perror(file);
fprintf(stderr, "test_stripe: could not open %s.\n", file);
exit(3);
}
for (i=0; i<raid_disks; i++) {
fds[i] = open(argv[9+i], O_RDWR);
if (fds[i] < 0) {
perror(argv[9+i]);
fprintf(stderr,"test_stripe: cannot open %s.\n", argv[9+i]);
exit(3);
}
}
if (save) {
int rv = save_stripes(fds, offsets,
raid_disks, chunk_size, level, layout,
1, &storefd,
start, length);
if (rv != 0) {
fprintf(stderr, "test_stripe: save_stripes returned %d\n", rv);
exit(1);
}
} else {
int rv = restore_stripes(fds, offsets,
raid_disks, chunk_size, level, layout,
storefd,
start, length);
if (rv != 0) {
fprintf(stderr, "test_stripe: restore_stripes returned %d\n", rv);
exit(1);
}
}
exit(0);
}
#endif /* MAIN */

265
sysfs.c Normal file
View File

@ -0,0 +1,265 @@
/*
* sysfs - extract md related information from sysfs. Part of:
* mdadm - manage Linux "md" devices aka RAID arrays.
*
* Copyright (C) 2006 Neil Brown <neilb@suse.de>
*
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Author: Neil Brown
* Email: <neilb@suse.de>
*/
#include "mdadm.h"
#include <dirent.h>
int load_sys(char *path, char *buf)
{
int fd = open(path, O_RDONLY);
int n;
if (fd < 0)
return -1;
n = read(fd, buf, 1024);
close(fd);
if (n <=0 || n >= 1024)
return -1;
buf[n] = 0;
if (buf[n-1] == '\n')
buf[n-1] = 0;
return 0;
}
struct sysarray *sysfs_read(int fd, int devnum, unsigned long options)
{
/* Longest possible name in sysfs, mounted at /sys, is
* /sys/block/md_dXXX/md/dev-XXXXX/block/dev
* /sys/block/md_dXXX/md/metadata_version
* which is about 41 characters. 50 should do for now
*/
char fname[50];
char buf[1024];
char *base;
char *dbase;
struct sysarray *sra;
struct sysdev *dev;
DIR *dir;
struct dirent *de;
sra = malloc(sizeof(*sra));
if (sra == NULL)
return sra;
if (fd >= 0) {
struct stat stb;
if (fstat(fd, &stb)) return NULL;
if (major(stb.st_rdev)==9)
sprintf(sra->name, "md%d", minor(stb.st_rdev));
else
sprintf(sra->name, "md_d%d",
minor(stb.st_rdev)/16);
} else {
if (devnum >= 0)
sprintf(sra->name, "md%d", devnum);
else
sprintf(sra->name, "md_d%d",
-1-devnum);
}
sprintf(fname, "/sys/block/%s/md/", sra->name);
base = fname + strlen(fname);
sra->devs = NULL;
if (options & GET_LEVEL) {
strcpy(base, "level");
if (load_sys(fname, buf))
goto abort;
sra->level = map_name(pers, buf);
}
if (options & GET_LAYOUT) {
strcpy(base, "layout");
if (load_sys(fname, buf))
goto abort;
sra->layout = strtoul(buf, NULL, 0);
}
if (options & GET_COMPONENT) {
strcpy(base, "component_size");
if (load_sys(fname, buf))
goto abort;
sra->component_size = strtoull(buf, NULL, 0);
}
if (options & GET_CHUNK) {
strcpy(base, "chunk_size");
if (load_sys(fname, buf))
goto abort;
sra->chunk = strtoul(buf, NULL, 0);
}
if (! (options & GET_DEVS))
return sra;
/* Get all the devices as well */
*base = 0;
dir = opendir(fname);
if (!dir)
goto abort;
sra->spares = 0;
while ((de = readdir(dir)) != NULL) {
char *ep;
if (de->d_ino == 0 ||
strncmp(de->d_name, "dev-", 4) != 0)
continue;
strcpy(base, de->d_name);
dbase = base + strlen(base);
*dbase++ = '/';
dev = malloc(sizeof(*dev));
if (!dev)
goto abort;
dev->next = sra->devs;
sra->devs = dev;
/* Always get slot, major, minor */
strcpy(dbase, "slot");
if (load_sys(fname, buf))
goto abort;
dev->role = strtoul(buf, &ep, 10);
if (*ep) dev->role = -1;
strcpy(dbase, "block/dev");
if (load_sys(fname, buf))
goto abort;
sscanf(buf, "%d:%d", &dev->major, &dev->minor);
if (options & GET_OFFSET) {
strcpy(dbase, "offset");
if (load_sys(fname, buf))
goto abort;
dev->offset = strtoull(buf, NULL, 0);
}
if (options & GET_SIZE) {
strcpy(dbase, "size");
if (load_sys(fname, buf))
goto abort;
dev->size = strtoull(buf, NULL, 0);
}
if (options & GET_STATE) {
dev->state = 0;
strcpy(dbase, "state");
if (load_sys(fname, buf))
goto abort;
if (strstr(buf, "in_sync"))
dev->state |= (1<<MD_DISK_SYNC);
if (strstr(buf, "faulty"))
dev->state |= (1<<MD_DISK_FAULTY);
if (dev->state == 0)
sra->spares++;
}
if (options & GET_ERROR) {
strcpy(buf, "errors");
if (load_sys(fname, buf))
goto abort;
dev->errors = strtoul(buf, NULL, 0);
}
}
return sra;
abort:
while (sra && sra->devs) {
dev = sra->devs;
sra->devs = dev->next;
free(dev);
}
if(sra) free(sra);
return NULL;
}
unsigned long long get_component_size(int fd)
{
/* Find out the component size of the array.
* We cannot trust GET_ARRAY_INFO ioctl as it's
* size field is only 32bits.
* So look in /sys/block/mdXXX/md/component_size
*/
struct stat stb;
char fname[50];
int n;
if (fstat(fd, &stb)) return 0;
if (major(stb.st_rdev) == 9)
sprintf(fname, "/sys/block/md%d/md/component_size",
minor(stb.st_rdev));
else
sprintf(fname, "/sys/block/md_d%d/md/component_size",
minor(stb.st_rdev)/16);
fd = open(fname, O_RDONLY);
if (fd < 0)
return 0;
n = read(fd, fname, sizeof(fname));
close(fd);
if (n == sizeof(fname))
return 0;
fname[n] = 0;
return strtoull(fname, NULL, 10);
}
int sysfs_set_str(struct sysarray *sra, struct sysdev *dev,
char *name, char *val)
{
char fname[50];
int n;
int fd;
sprintf(fname, "/sys/block/%s/md/%s/%s",
sra->name, dev?dev->name:"", name);
fd = open(fname, O_WRONLY);
if (fd < 0)
return -1;
n = write(fd, val, strlen(val));
close(fd);
if (n != strlen(val))
return -1;
return 0;
}
int sysfs_set_num(struct sysarray *sra, struct sysdev *dev,
char *name, unsigned long long val)
{
char valstr[50];
sprintf(valstr, "%llu", val);
return sysfs_set_str(sra, dev, name, valstr);
}
int sysfs_get_ll(struct sysarray *sra, struct sysdev *dev,
char *name, unsigned long long *val)
{
char fname[50];
char buf[50];
int n;
int fd;
char *ep;
sprintf(fname, "/sys/block/%s/md/%s/%s",
sra->name, dev?dev->name:"", name);
fd = open(fname, O_RDONLY);
if (fd < 0)
return -1;
n = read(fd, buf, sizeof(buf));
close(fd);
if (n <= 0)
return -1;
buf[n] = 0;
*val = strtoull(buf, &ep, 0);
if (ep == buf || (*ep != 0 && *ep != '\n' && *ep != ' '))
return -1;
return 0;
}

38
tests/07testreshape5 Normal file
View File

@ -0,0 +1,38 @@
#
# test the reshape code by using test_reshape and the
# kernel md code to move data into and out of variously
# shaped md arrays.
set -x
layouts=(la ra ls rs)
for chunk in 4 8 16 32 64 128
do
devs="$dev1"
for disks in 2 3 4 5 6
do
eval devs=\"$devs \$dev$disks\"
for nlayout in 0 1 2 3
do
layout=${layouts[$nlayout]}
size=$[chunk*(disks-1)*disks]
# test restore: make a raid5 from a file, then do a compare
dd if=/dev/urandom of=/tmp/RandFile bs=1024 count=$size
$dir/test_stripe restore /tmp/RandFile $disks $[chunk*1024] 5 $nlayout 0 $[size*1024] $devs
$mdadm -CR $md0 -amd -l5 -n$disks --assume-clean -c $chunk -p $layout $devs
cmp -s -n $[size*1024] $md0 /tmp/RandFile || { echo cmp failed ; exit 2; }
# FIXME check parity
# test save
dd if=/dev/urandom of=$md0 bs=1024 count=$size
> /tmp/NewRand
$dir/test_stripe save /tmp/NewRand $disks $[chunk*1024] 5 $nlayout 0 $[size*1024] $devs
cmp -s -n $[size*1024] $md0 /tmp/NewRand || { echo cmp failed ; exit 2; }
$mdadm -S $md0
done
done
done
exit 0

28
util.c
View File

@ -676,34 +676,6 @@ struct supertype *guess_super(int fd)
return NULL;
}
unsigned long long get_component_size(int fd)
{
/* Find out the component size of the array.
* We cannot trust GET_ARRAY_INFO ioctl as it's
* size field is only 32bits.
* So look in /sys/block/mdXXX/md/component_size
*/
struct stat stb;
char fname[50];
int n;
if (fstat(fd, &stb)) return 0;
if (major(stb.st_rdev) == 9)
sprintf(fname, "/sys/block/md%d/md/component_size",
minor(stb.st_rdev));
else
sprintf(fname, "/sys/block/md_d%d/md/component_size",
minor(stb.st_rdev)/16);
fd = open(fname, O_RDONLY);
if (fd < 0)
return 0;
n = read(fd, fname, sizeof(fname));
close(fd);
if (n == sizeof(fname))
return 0;
fname[n] = 0;
return strtoull(fname, NULL, 10);
}
#ifdef __TINYC__
/* tinyc doesn't optimize this check in ioctl.h out ... */