mdadm-1.5.0

This commit is contained in:
Neil Brown 2004-01-22 02:10:29 +00:00
parent feb716e9c3
commit 98c6faba80
20 changed files with 467 additions and 141 deletions

29
ANNOUNCE-1.5.0 Normal file
View File

@ -0,0 +1,29 @@
Subject: ANNOUNCE: mdadm 1.5.0 - A tool for managing Soft RAID under Linux
I am pleased to announce the availability of
mdadm version 1.5.0
It is available at
http://www.cse.unsw.edu.au/~neilb/source/mdadm/
and
http://www.{countrycode}.kernel.org/pub/utils/raid/mdadm/
as a source tar-ball and (at the first site) as an SRPM, and as an RPM for i386.
mdadm is a tool for creating, managing and monitoring
device arrays using the "md" driver in Linux, also
known as Software RAID arrays.
Release 1.5.0 adds:
- new command "mdassemble" for use in initrd/initramfs.
- raid6 support (for 2.6.2 and later kernels)
- RebuildFinished event in monitor mode.
- include rebuild status in --detail output.
- fixes for assorted compilation problems
Development of mdadm is sponsored by CSE@UNSW:
The School of Computer Science and Engineering
at
The University of New South Wales
NeilBrown 22 Jan 2004

View File

@ -98,8 +98,8 @@ int Assemble(char *mddev, int mdfd,
mdp_super_t first_super, super;
struct {
char *devname;
int major, minor;
int oldmajor, oldminor;
unsigned int major, minor;
unsigned int oldmajor, oldminor;
long long events;
time_t utime;
int uptodate;
@ -107,16 +107,17 @@ int Assemble(char *mddev, int mdfd,
int raid_disk;
} *devices;
int *best = NULL; /* indexed by raid_disk */
int bestcnt = 0;
int devcnt = 0, okcnt, sparecnt;
int req_cnt;
int i;
unsigned int bestcnt = 0;
int devcnt = 0;
unsigned int okcnt, sparecnt;
unsigned int req_cnt;
unsigned int i;
int most_recent = 0;
int chosen_drive;
int change = 0;
int inargv = 0;
int start_partial_ok = force || devlist==NULL;
int num_devs;
unsigned int num_devs;
mddev_dev_t tmpdev;
vers = md_get_version(mdfd);
@ -224,21 +225,21 @@ int Assemble(char *mddev, int mdfd,
devname);
continue;
}
if (ident->super_minor >= 0 &&
if (ident->super_minor != UnSet &&
(!havesuper || ident->super_minor != super.md_minor)) {
if (inargv || verbose)
fprintf(stderr, Name ": %s has wrong super-minor.\n",
devname);
continue;
}
if (ident->level != -10 &&
(!havesuper|| ident->level != super.level)) {
if (ident->level != UnSet &&
(!havesuper|| ident->level != (int)super.level)) {
if (inargv || verbose)
fprintf(stderr, Name ": %s has wrong raid level.\n",
devname);
continue;
}
if (ident->raid_disks != -1 &&
if (ident->raid_disks != UnSet &&
(!havesuper || ident->raid_disks!= super.raid_disks)) {
if (inargv || verbose)
fprintf(stderr, Name ": %s requires wrong number of drives.\n",
@ -349,16 +350,16 @@ int Assemble(char *mddev, int mdfd,
> devices[most_recent].events)
most_recent = devcnt;
}
if (super.level == -4)
if ((int)super.level == -4)
/* with multipath, the raid_disk from the superblock is meaningless */
i = devcnt;
else
i = devices[devcnt].raid_disk;
if (i>=0 && i < 10000) {
if (i < 10000) {
if (i >= bestcnt) {
int newbestcnt = i+10;
unsigned int newbestcnt = i+10;
int *newbest = malloc(sizeof(int)*newbestcnt);
int c;
unsigned int c;
for (c=0; c < newbestcnt; c++)
if (c < bestcnt)
newbest[c] = best[c];
@ -392,7 +393,7 @@ int Assemble(char *mddev, int mdfd,
/* note: we ignore error flags in multipath arrays
* as they don't make sense
*/
if (first_super.level != -4)
if ((int)first_super.level != -4)
if (!(devices[j].state & (1<<MD_DISK_SYNC))) {
if (!(devices[j].state & (1<<MD_DISK_FAULTY)))
sparecnt++;
@ -494,7 +495,7 @@ int Assemble(char *mddev, int mdfd,
for (i=0; i<bestcnt; i++) {
int j = best[i];
int desired_state;
unsigned int desired_state;
if (i < super.raid_disks)
desired_state = (1<<MD_DISK_ACTIVE) | (1<<MD_DISK_SYNC);

View File

@ -1,3 +1,19 @@
Changes Prior to this release
- new commands "mdassemble" which is a stripped-down equivalent of
"mdadm -As", that can be compiled with dietlibc.
Thanks to Luca Berra <bluca@comedia.it>.
It can be using in an initramfs or initrd.
- Fix compiling error with BLKGETSIZE64 and some signed/unsigned
comparison warnings.
- Add Rebuild Status (% complete) to --detail output.
- Support "--monitor --test" which will generate a test alert
for each array once, to test notification paths.
- Generate RebuildFinished event when rebuild finishes.
- Support for raid6 as found in 2.6.2 - thanks to
H. Peter Anvin <hpa@zytor.com>
- Support partitioned md arrays with a different major number and
naming scheme (md_dX in /proc/mdstat, /dev/md/dXpY in /dev).
Changes Prior to 1.4.0 release
- Document fact that creating a raid5 array really creates a
degraded array with a spare.

View File

@ -71,7 +71,7 @@ int Create(char *mddev, int mdfd,
fprintf(stderr, Name ": Create requires md driver verison 0.90.0 or later\n");
return 1;
}
if (level == -10) {
if (level == UnSet) {
fprintf(stderr,
Name ": a RAID level is needed to create an array.\n");
return 1;
@ -81,9 +81,19 @@ int Create(char *mddev, int mdfd,
Name ": a number of --raid-devices must be given to create an array\n");
return 1;
}
if (raiddisks < 4 && level == 6) {
fprintf(stderr,
Name ": at least 4 raid-devices needed for level 6\n");
return 1;
}
if (raiddisks > 256 && level == 6) {
fprintf(stderr,
Name ": no more than 256 raid-devices supported for level 6\n");
return 1;
}
if (raiddisks < 2 && level >= 4) {
fprintf(stderr,
Name ": atleast 2 raid-devices needed for level 4 or 5\n");
Name ": at least 2 raid-devices needed for level 4 or 5\n");
return 1;
}
if (raiddisks+sparedisks > MD_SB_DISKS) {
@ -102,12 +112,13 @@ int Create(char *mddev, int mdfd,
}
/* now set some defaults */
if (layout == -1)
if (layout == UnSet)
switch(level) {
default: /* no layout */
layout = 0;
break;
case 5:
case 6:
layout = map_name(r5layout, "default");
if (verbose)
fprintf(stderr,
@ -118,6 +129,7 @@ int Create(char *mddev, int mdfd,
switch(level) {
case 4:
case 5:
case 6:
case 0:
case -1: /* linear */
if (chunk == 0) {
@ -229,12 +241,19 @@ int Create(char *mddev, int mdfd,
/* If this is raid5, we want to configure the last active slot
* as missing, so that a reconstruct happens (faster than re-parity)
* FIX: Can we do this for raid6 as well?
*/
if (force == 0 && level == 5 && first_missing >= raiddisks) {
insert_point = raiddisks-1;
sparedisks++;
array.active_disks--;
missing_disks++;
if (force == 0 && first_missing >= raiddisks) {
switch ( level ) {
case 5:
insert_point = raiddisks-1;
sparedisks++;
array.active_disks--;
missing_disks++;
break;
default:
break;
}
}
/* Ok, lets try some ioctls */
@ -249,8 +268,10 @@ int Create(char *mddev, int mdfd,
if (fstat(mdfd, &stb)==0)
array.md_minor = MINOR(stb.st_rdev);
array.not_persistent = 0;
if (level == 5 && (insert_point < raiddisks || first_missing < raiddisks))
array.state = 1; /* clean, but one drive will be missing */
/*** FIX: Need to do something about RAID-6 here ***/
if ( (level == 5 || level == 6) &&
(insert_point < raiddisks || first_missing < raiddisks) )
array.state = 1; /* clean, but one+ drive will be missing */
else
array.state = 0; /* not clean, but no errors */

View File

@ -142,6 +142,17 @@ int Detail(char *dev, int brief, int test)
}
printf("\n");
{
struct mdstat_ent *ms = mdstat_read();
struct mdstat_ent *e;
for (e=ms; e; e=e->next)
if (e->devnum == array.md_minor) {
if (e->percent >= 0)
printf(" Rebuild Status : %d%% complete\n\n", e->percent);
break;
}
free_mdstat(ms);
}
printf(" Number Major Minor RaidDevice State\n");
}
for (d= 0; d<MD_SB_DISKS; d++) {
@ -189,8 +200,8 @@ int Detail(char *dev, int brief, int test)
int fd = open(dv, O_RDONLY);
if (fd >=0 &&
load_super(fd, &super) ==0 &&
super.ctime == array.ctime &&
super.level == array.level)
(unsigned long)super.ctime == (unsigned long)array.ctime &&
(unsigned int)super.level == (unsigned int)array.level)
have_super = 1;
}
}

View File

@ -162,7 +162,7 @@ int Examine(mddev_dev_t devlist, int brief, int scan, int SparcAdjust)
if (calc_sb_csum(&super) == super.sb_csum)
printf(" Checksum : %x - correct\n", super.sb_csum);
else
printf(" Checksum : %x - expected %x\n", super.sb_csum, calc_sb_csum(&super));
printf(" Checksum : %x - expected %lx\n", super.sb_csum, calc_sb_csum(&super));
if (SparcAdjust) {
/* 2.2 sparc put the events in the wrong place
* So we copy the tail of the superblock

View File

@ -78,6 +78,16 @@ mdadm.klibc : $(SRCS) mdadm.h
rm -f $(OBJS)
gcc -nostdinc -iwithprefix include -I$(KLIBC)/klibc/include -I$(KLIBC)/linux/include -I$(KLIBC)/klibc/arch/i386/include -I$(KLIBC)/klibc/include/bits32 $(CFLAGS) $(SRCS)
mdassemble : mdassemble.c Assemble.c config.c dlink.c util.c mdadm.h
rm -f $(OBJS)
diet gcc -o mdassemble mdassemble.c Assemble.c config.c dlink.c util.c
# This doesn't work
mdassemble.klibc : mdassemble.c Assemble.c config.c dlink.c util.c mdadm.h
rm -f $(OBJS)
gcc -nostdinc -iwithprefix include -I$(KLIBC)/klibc/include -I$(KLIBC)/linux/include -I$(KLIBC)/klibc/arch/i386/include -I$(KLIBC)/klibc/include/bits32 $(CFLAGS) -o mdassemble mdassemble.c Assemble.c config.c dlink.c util.c
mdadm.man : mdadm.8
nroff -man mdadm.8 > mdadm.man

View File

@ -32,6 +32,7 @@
#include "md_u.h"
#include <sys/wait.h>
#include <sys/signal.h>
#include <values.h>
static void alert(char *event, char *dev, char *disc, char *mailaddr, char *cmd);
@ -46,7 +47,7 @@ static char *percentalerts[] = {
int Monitor(mddev_dev_t devlist,
char *mailaddr, char *alert_cmd,
int period, int daemonise, int scan, int oneshot,
char *config)
char *config, int test)
{
/*
* Every few seconds, scan every md device looking for changes
@ -150,7 +151,7 @@ int Monitor(mddev_dev_t devlist,
st->utime = 0;
st->next = statelist;
st->err = 0;
st->devnum = -1;
st->devnum = MAXINT;
st->percent = -2;
st->expected_spares = mdlist->spare_disks;
if (mdlist->spare_group)
@ -169,7 +170,7 @@ int Monitor(mddev_dev_t devlist,
st->utime = 0;
st->next = statelist;
st->err = 0;
st->devnum = -1;
st->devnum = MAXINT;
st->percent = -2;
st->expected_spares = -1;
st->spare_group = NULL;
@ -191,8 +192,10 @@ int Monitor(mddev_dev_t devlist,
struct mdstat_ent *mse;
char *dev = st->devname;
int fd;
int i;
unsigned int i;
if (test)
alert("TestMessage", dev, NULL, mailaddr, alert_cmd);
fd = open(dev, O_RDONLY);
if (fd < 0) {
if (!st->err)
@ -221,18 +224,20 @@ int Monitor(mddev_dev_t devlist,
close(fd);
continue;
}
if (st->devnum < 0) {
if (st->devnum == MAXINT) {
struct stat stb;
if (fstat(fd, &stb) == 0 &&
(S_IFMT&stb.st_mode)==S_IFBLK)
st->devnum = MINOR(stb.st_rdev);
(S_IFMT&stb.st_mode)==S_IFBLK) {
if (MINOR(stb.st_rdev) == 9)
st->devnum = MINOR(stb.st_rdev);
else
st->devnum = -1- (MINOR(stb.st_rdev)>>6);
}
}
for (mse = mdstat ; mse ; mse=mse->next)
if (mse->devnum == st->devnum) {
mse->devnum = -1; /* flag it as "used" */
break;
}
if (mse->devnum == st->devnum)
mse->devnum = MAXINT; /* flag it as "used" */
if (st->utime == array.utime &&
st->failed == array.failed_disks &&
@ -266,6 +271,11 @@ int Monitor(mddev_dev_t devlist,
alert(percentalerts[mse->percent/20],
dev, NULL, mailaddr, alert_cmd);
if (mse &&
mse->percent == -1 &&
st->percent >= 0)
alert("RebuildFinished", dev, NULL, mailaddr, alert_cmd);
if (mse)
st->percent = mse->percent;
@ -285,19 +295,19 @@ int Monitor(mddev_dev_t devlist,
}
change = newstate ^ st->devstate[i];
if (st->utime && change && !st->err) {
if (i < array.raid_disks &&
if (i < (unsigned)array.raid_disks &&
(((newstate&change)&(1<<MD_DISK_FAULTY)) ||
((st->devstate[i]&change)&(1<<MD_DISK_ACTIVE)) ||
((st->devstate[i]&change)&(1<<MD_DISK_SYNC)))
)
alert("Fail", dev, dv, mailaddr, alert_cmd);
else if (i>=array.raid_disks &&
else if (i >= (unsigned)array.raid_disks &&
(disc.major || disc.minor) &&
st->devid[i] == MKDEV(disc.major, disc.minor) &&
((newstate&change)&(1<<MD_DISK_FAULTY))
)
alert("FailSpare", dev, dv, mailaddr, alert_cmd);
else if (i < array.raid_disks &&
else if (i < (unsigned)array.raid_disks &&
(((st->devstate[i]&change)&(1<<MD_DISK_FAULTY)) ||
((newstate&change)&(1<<MD_DISK_ACTIVE)) ||
((newstate&change)&(1<<MD_DISK_SYNC)))
@ -320,21 +330,32 @@ int Monitor(mddev_dev_t devlist,
if (scan) {
struct mdstat_ent *mse;
for (mse=mdstat; mse; mse=mse->next)
if (mse->devnum >= 0 &&
if (mse->devnum != MAXINT &&
(strcmp(mse->level, "raid1")==0 ||
strcmp(mse->level, "raid5")==0 ||
strcmp(mse->level, "multipath")==0)
) {
struct state *st = malloc(sizeof *st);
mdu_array_info_t array;
int fd;
if (st == NULL)
continue;
st->devname = strdup(get_md_name(mse->devnum));
if ((fd = open(st->devname, O_RDONLY)) < 0 ||
ioctl(fd, GET_ARRAY_INFO, &array)< 0) {
/* no such array */
if (fd >=0) close(fd);
free(st->devname);
free(st);
continue;
}
st->utime = 0;
st->next = statelist;
st->err = 1;
st->devnum = mse->devnum;
st->percent = -2;
st->spare_group = NULL;
st->expected_spares = -1;
statelist = st;
alert("NewArray", st->devname, NULL, mailaddr, alert_cmd);
new_found = 1;
@ -395,6 +416,7 @@ int Monitor(mddev_dev_t devlist,
else
sleep(period);
}
test = 0;
}
return 0;
}
@ -422,6 +444,7 @@ static void alert(char *event, char *dev, char *disc, char *mailaddr, char *cmd)
}
if (mailaddr &&
(strncmp(event, "Fail", 4)==0 ||
strncmp(event, "Test", 4)==0 ||
strncmp(event, "Degrade", 7)==0)) {
FILE *mp = popen(Sendmail, "w");
if (mp) {

View File

@ -129,7 +129,7 @@ int Query(char *dev)
if (md_get_version(fd) >= 9000 &&
ioctl(fd, GET_ARRAY_INFO, &array)>= 0) {
if (ioctl(fd, GET_DISK_INFO, &disc) >= 0 &&
MKDEV(disc.major,disc.minor) == stb.st_rdev)
MKDEV((unsigned)disc.major,(unsigned)disc.minor) == stb.st_rdev)
activity = "active";
else
activity = "mismatch";

View File

@ -29,7 +29,7 @@
#include "mdadm.h"
char Version[] = Name " - v1.4.0 - 29 Oct 2003\n";
char Version[] = Name " - v1.5.0 - 22 Jan 2004\n";
/*
* File: ReadMe.c
*
@ -112,7 +112,7 @@ struct option long_options[] = {
/* For create or build: */
{"chunk", 1, 0, 'c'},
{"rounding", 1, 0, 'c'}, /* for linear, chunk is really a rounding number */
{"level", 1, 0, 'l'}, /* 0,1,4,5,linear */
{"level", 1, 0, 'l'}, /* 0,1,4,5,6,linear */
{"parity", 1, 0, 'p'}, /* {left,right}-{a,}symmetric */
{"layout", 1, 0, 'p'},
{"raid-disks",1, 0, 'n'},
@ -205,12 +205,12 @@ char OptionHelp[] =
" For create or build:\n"
" --chunk= -c : chunk size of kibibytes\n"
" --rounding= : rounding factor for linear array (==chunk size)\n"
" --level= -l : raid level: 0,1,4,5,linear,mp. 0 or linear for build\n"
" --parity= -p : raid5 parity algorithm: {left,right}-{,a}symmetric\n"
" --level= -l : raid level: 0,1,4,5,6,linear,mp. 0 or linear for build\n"
" --parity= -p : raid5/6 parity algorithm: {left,right}-{,a}symmetric\n"
" --layout= : same as --parity\n"
" --raid-devices= -n : number of active devices in array\n"
" --spare-devices= -x: number of spares (eXtras) devices in initial array\n"
" --size= -z : Size (in K) of each drive in RAID1/4/5 - optional\n"
" --size= -z : Size (in K) of each drive in RAID1/4/5/6 - optional\n"
" --force -f : Honour devices as listed on command line. Don't\n"
" : insert a missing drive for RAID5.\n"
"\n"
@ -270,12 +270,12 @@ char Help_create[] =
" Options that are valid with --create (-C) are:\n"
" --chunk= -c : chunk size of kibibytes\n"
" --rounding= : rounding factor for linear array (==chunk size)\n"
" --level= -l : raid level: 0,1,4,5,linear,multipath and synonyms\n"
" --parity= -p : raid5 parity algorithm: {left,right}-{,a}symmetric\n"
" --level= -l : raid level: 0,1,4,5,6,linear,multipath and synonyms\n"
" --parity= -p : raid5/6 parity algorithm: {left,right}-{,a}symmetric\n"
" --layout= : same as --parity\n"
" --raid-devices= -n : number of active devices in array\n"
" --spare-devices= -x: number of spares (eXtras) devices in initial array\n"
" --size= -z : Size (in K) of each drive in RAID1/4/5 - optional\n"
" --size= -z : Size (in K) of each drive in RAID1/4/5/6 - optional\n"
" --force -f : Honour devices as listed on command line. Don't\n"
" : insert a missing drive for RAID5.\n"
" --run -R : insist of running the array even if not all\n"
@ -410,6 +410,7 @@ char Help_monitor[] =
" --scan -s : find mail-address/program in config file\n"
" --daemonise -f : Fork and continue in child, parent exits\n"
" --oneshot -1 : Check for degraded arrays, then exit\n"
" --test -t : Generate a TestMessage event against each array at startup\n"
;
@ -480,6 +481,8 @@ mapping_t pers[] = {
{ "5", 5},
{ "multipath", -4},
{ "mp", -4},
{ "raid6", 6},
{ "6", 6},
{ NULL, 0}
};

View File

@ -211,12 +211,15 @@ void load_partitions(void)
}
while (fgets(buf, 1024, f)) {
int major, minor;
char *name;
char *name, *mp;
buf[1023] = '\0';
if (buf[0] != ' ')
continue;
if (sscanf(buf, " %d %d ", &major, &minor) != 2)
major = strtoul(buf, &mp, 10);
if (mp == buf || *mp != ' ')
continue;
minor = strtoul(mp, NULL, 10);
name = map_dev(major, minor);
if (name) {
struct conf_dev *cd;
@ -262,10 +265,10 @@ void arrayline(char *line)
mddev_ident_t mi;
mis.uuid_set = 0;
mis.super_minor = -1;
mis.level = -10;
mis.raid_disks = -1;
mis.spare_disks = -1;
mis.super_minor = UnSet;
mis.level = UnSet;
mis.raid_disks = UnSet;
mis.spare_disks = UnSet;
mis.devices = NULL;
mis.devname = NULL;
mis.spare_group = NULL;
@ -296,7 +299,7 @@ void arrayline(char *line)
if (w[12]==0 || endptr[0]!=0 || mis.super_minor < 0) {
fprintf(stderr, Name ": invalid super-minor number: %s\n",
w);
mis.super_minor = -1;
mis.super_minor = UnSet;
}
}
} else if (strncasecmp(w, "devices=", 8 ) == 0 ) {
@ -450,7 +453,7 @@ mddev_dev_t conf_get_devs(char *conffile)
struct conf_dev *cd;
int flags = 0;
static mddev_dev_t dlist = NULL;
int i;
unsigned int i;
while (dlist) {
mddev_dev_t t = dlist;

View File

@ -5,6 +5,9 @@
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#ifdef __dietlibc__
char *strncpy(char *dest, const char *src, size_t n) __THROW;
#endif
#include "dlink.h"

60
md.4
View File

@ -15,9 +15,12 @@ Array of Independent Devices.
.PP
.B md
supports RAID levels 1 (mirroring) 4 (striped array with parity
device) and 5 (striped array with distributed parity information).
If a single underlying device fails while using one of these levels,
the array will continue to function.
device), 5 (striped array with distributed parity information) and 6
(striped array with distributed dual redundancy information.) If a
some number of underlying devices fails while using one of these
levels, the array will continue to function; this number is one for
RAID levels 4 and 5, two for RAID level 6, and all but one (N-1) for
RAID level 1.
.PP
.B md
also supports a number of pseudo RAID (non-redundant) configurations
@ -140,6 +143,16 @@ parity blocks on different devices so there is less contention.
This also allows more parallelism when reading as read requests are
distributed over all the devices in the array instead of all but one.
.SS RAID6
RAID6 is similar to RAID5, but can handle the loss of any \fItwo\fP
devices without data loss. Accordingly, it requires N+2 drives to
store N drives worth of data.
The performance for RAID6 is slightly lower but comparable to RAID5 in
normal mode and single disk failure mode. It is very slow in dual
disk failure mode, however.
.SS MUTIPATH
MULTIPATH is not really a RAID at all as there is only one real device
@ -156,7 +169,7 @@ another interface.
.SS UNCLEAN SHUTDOWN
When changes are made to a RAID1, RAID4, or RAID5 array there is a
When changes are made to a RAID1, RAID4, RAID5 or RAID6 array there is a
possibility of inconsistency for short periods of time as each update
requires are least two block to be written to different devices, and
these writes probably wont happen at exactly the same time.
@ -166,33 +179,32 @@ consistent.
To handle this situation, the md driver marks an array as "dirty"
before writing any data to it, and marks it as "clean" when the array
is being disabled, e.g. at shutdown.
If the md driver finds an array to be dirty at startup, it proceeds to
correct any possibly inconsistency. For RAID1, this involves copying
the contents of the first drive onto all other drives.
For RAID4 or RAID5 this involves recalculating the parity for each
stripe and making sure that the parity block has the correct data.
This process, known as "resynchronising" or "resync" is performed in
the background. The array can still be used, though possibly with
reduced performance.
is being disabled, e.g. at shutdown. If the md driver finds an array
to be dirty at startup, it proceeds to correct any possibly
inconsistency. For RAID1, this involves copying the contents of the
first drive onto all other drives. For RAID4, RAID5 and RAID6 this
involves recalculating the parity for each stripe and making sure that
the parity block has the correct data. This process, known as
"resynchronising" or "resync" is performed in the background. The
array can still be used, though possibly with reduced performance.
If a RAID4 or RAID5 array is degraded (missing one drive) when it is
restarted after an unclean shutdown, it cannot recalculate parity, and
so it is possible that data might be undetectably corrupted.
The 2.4 md driver
If a RAID4, RAID5 or RAID6 array is degraded (missing at least one
drive) when it is restarted after an unclean shutdown, it cannot
recalculate parity, and so it is possible that data might be
undetectably corrupted. The 2.4 md driver
.B does not
alert the operator to this condition. The 2.5 md driver will fail to
start an array in this condition without manual intervention.
.SS RECOVERY
If the md driver detects any error on a device in a RAID1, RAID4, or
RAID5 array, it immediately disables that device (marking it as faulty)
and continues operation on the remaining devices. If there is a spare
drive, the driver will start recreating on one of the spare drives the
data what was on that failed drive, either by copying a working drive
in a RAID1 configuration, or by doing calculations with the parity
block on RAID4 and RAID5.
If the md driver detects any error on a device in a RAID1, RAID4,
RAID5 or RAID6 array, it immediately disables that device (marking it
as faulty) and continues operation on the remaining devices. If there
is a spare drive, the driver will start recreating on one of the spare
drives the data what was on that failed drive, either by copying a
working drive in a RAID1 configuration, or by doing calculations with
the parity block on RAID4, RAID5 or RAID6.
While this recovery process is happening, the md driver will monitor
accesses to the array and will slow down the rate of recovery if other

47
mdadm.8
View File

@ -1,5 +1,5 @@
.\" -*- nroff -*-
.TH MDADM 8 "" v1.4.0
.TH MDADM 8 "" v1.5.0
.SH NAME
mdadm \- manage MD devices
.I aka
@ -29,6 +29,7 @@ md devices,
(mirroring),
.BR RAID4 ,
.BR RAID5 ,
.BR RAID6 ,
and
.BR MULTIPATH .
@ -109,9 +110,9 @@ superblocks, erasing old superblocks and stopping active arrays.
.TP
.B "Follow or Monitor"
Monitor one or more md devices and act on any state changes. This is
only meaningful for raid1, raid5 or multipath arrays as only these have
interesting state. raid0 or linear never have missing, spare, or
failed drives, so there is nothing to monitor.
only meaningful for raid1, 4, 5, 6 or multipath arrays as
only these have interesting state. raid0 or linear never have
missing, spare, or failed drives, so there is nothing to monitor.
.SH OPTIONS
@ -234,8 +235,8 @@ Specify rounding factor for linear array (==chunk size)
.BR -l ", " --level=
Set raid level. When used with
.IR --create ,
options are: linear, raid0, 0, stripe, raid1, 1, mirror, raid5, 4,
raid5, 5, multipath, mp. Obviously some of these are synonymous.
options are: linear, raid0, 0, stripe, raid1, 1, mirror, raid4, 4,
raid5, 5, raid6, 6, multipath, mp. Obviously some of these are synonymous.
When used with
.IR --build ,
@ -279,7 +280,7 @@ number of spare devices.
.TP
.BR -z ", " --size=
Amount (in Kibibytes) of space to use from each drive in RAID1/4/5.
Amount (in Kibibytes) of space to use from each drive in RAID1/4/5/6.
This must be a multiple of the chunk size, and must leave about 128Kb
of space at the end of the drive for the RAID superblock.
If this is not specified
@ -465,6 +466,14 @@ events. Running
.in -5
from a cron script will ensure regular notification of any degraded arrays.
.TP
.BR -t ", " --test
Generate a
.B TestMessage
alert for every array found at startup. This alert gets mailed and
passed to the alert program. This can be used for testing that alert
message to get through successfully.
.SH ASSEMBLE MODE
.HP 12
@ -532,7 +541,7 @@ Normally the array will be started after it is assembled. However if
is not given and insufficient drives were listed to start a complete
(non-degraded) array, then the array is not started (to guard against
usage errors). To insist that the array be started in this case (as
may work for RAID1 or RAID5), give the
may work for RAID1, 4, 5 or 6), give the
.B --run
flag.
@ -590,7 +599,7 @@ in place of a device name. This will cause
.B mdadm
to leave the corresponding slot in the array empty.
For a RAID4 or RAID5 array at most one slot can be
"\fBmissing\fP".
"\fBmissing\fP"; for a RAID6 array at most two slots.
For a RAID1 array, only one real device needs to be given. All of the
others can be
"\fBmissing\fP".
@ -717,8 +726,8 @@ config file to be examined.
.TP
--stop
This devices should active md arrays which will be deactivated, if
they are not currently in use.
The devices should be active md arrays which will be deactivated, as
long as they are not currently in use.
.TP
--run
@ -822,6 +831,11 @@ Where
is 20, 40, 60, or 80, this indicates that rebuild has passed that many
percentage of the total.
.TP
.B RebuildFinished
An md array that was rebuilding, isn't any more, either because it
finished normally or was aborted.
.TP
.B Fail
An active component device of an array has been marked as faulty.
@ -857,12 +871,19 @@ A spare drive has been moved from one array in a
.B spare-group
to another to allow a failed drive to be replaced.
.TP
.B TestMessage
An array was found at startup, and the
.B --test
flag was given.
.RE
Only
.B Fail
.B Fail ,
.B FailSpare ,
.B DegradedArray ,
and
.B FailSpare
.B TestMessage
cause Email to be sent. All events cause the program to be run.
The program is run with two or three arguments, they being the event
name, the array device and possibly a second device.

32
mdadm.c
View File

@ -58,8 +58,8 @@ int main(int argc, char *argv[])
int chunk = 0;
int size = 0;
int level = -10;
int layout = -1;
int level = UnSet;
int layout = UnSet;
int raiddisks = 0;
int sparedisks = 0;
struct mddev_ident_s ident;
@ -89,9 +89,9 @@ int main(int argc, char *argv[])
int mdfd = -1;
ident.uuid_set=0;
ident.level = -10;
ident.raid_disks = -1;
ident.super_minor= -1;
ident.level = UnSet;
ident.raid_disks = UnSet;
ident.super_minor= UnSet;
ident.devices=0;
while ((option_index = -1) ,
@ -259,13 +259,13 @@ int main(int argc, char *argv[])
case O(CREATE,'l'):
case O(BUILD,'l'): /* set raid level*/
if (level != -10) {
if (level != UnSet) {
fprintf(stderr, Name ": raid level may only be set once. "
"Second value is %s.\n", optarg);
exit(2);
}
level = map_name(pers, optarg);
if (level == -10) {
if (level == UnSet) {
fprintf(stderr, Name ": invalid raid level: %s\n",
optarg);
exit(2);
@ -294,13 +294,14 @@ int main(int argc, char *argv[])
fprintf(stderr, Name ": layout not meaningful for %s arrays.\n",
map_num(pers, level));
exit(2);
case -10:
case UnSet:
fprintf(stderr, Name ": raid level must be given before layout.\n");
exit(2);
case 5:
case 6:
layout = map_name(r5layout, optarg);
if (layout==-10) {
if (layout==UnSet) {
fprintf(stderr, Name ": layout %s not understood for raid5.\n",
optarg);
exit(2);
@ -337,7 +338,7 @@ int main(int argc, char *argv[])
sparedisks, optarg);
exit(2);
}
if (level > -10 && level <= 0 && level >= -1) {
if (level != UnSet && level <= 0 && level >= -1) {
fprintf(stderr, Name ": spare-devices setting is incompatible with raid level %d\n",
level);
exit(2);
@ -372,7 +373,7 @@ int main(int argc, char *argv[])
continue;
case O(ASSEMBLE,'m'): /* super-minor for array */
if (ident.super_minor != -1) {
if (ident.super_minor != UnSet) {
fprintf(stderr, Name ": super-minor cannot be set twice. "
"Second value: %s.\n", optarg);
exit(2);
@ -455,6 +456,9 @@ int main(int argc, char *argv[])
case O(MONITOR,'1'): /* oneshot */
oneshot = 1;
continue;
case O(MONITOR,'t'): /* test */
test = 1;
continue;
/* now the general management options. Some are applicable
* to other modes. None have arguments.
@ -563,7 +567,7 @@ int main(int argc, char *argv[])
mdfd = open_mddev(devlist->devname);
if (mdfd < 0)
exit(1);
if (ident.super_minor == -2) {
if ((int)ident.super_minor == -2) {
struct stat stb;
fstat(mdfd, &stb);
ident.super_minor = MINOR(stb.st_rdev);
@ -586,7 +590,7 @@ int main(int argc, char *argv[])
break;
case ASSEMBLE:
if (devs_found == 1 && ident.uuid_set == 0 &&
ident.super_minor == -1 && !scan ) {
ident.super_minor == UnSet && !scan ) {
/* Only a device has been given, so get details from config file */
mddev_ident_t array_ident = conf_get_ident(configfile, devlist->devname);
mdfd = open_mddev(devlist->devname);
@ -733,7 +737,7 @@ int main(int argc, char *argv[])
break;
}
rv= Monitor(devlist, mailaddr, program,
delay?delay:60, daemonise, scan, oneshot, configfile);
delay?delay:60, daemonise, scan, oneshot, configfile, test);
break;
}
exit(rv);

24
mdadm.h
View File

@ -29,7 +29,9 @@
#define __USE_LARGEFILE64
#include <unistd.h>
#ifndef __dietlibc__
extern __off64_t lseek64 __P ((int __fd, __off64_t __offset, int __whence));
#endif
#include <sys/types.h>
#include <sys/stat.h>
@ -40,6 +42,12 @@ extern __off64_t lseek64 __P ((int __fd, __off64_t __offset, int __whence));
#include <stdio.h>
#include <errno.h>
#include <string.h>
#ifdef __dietlibc__NONO
int strncmp(const char *s1, const char *s2, size_t n) __THROW __pure__;
char *strncpy(char *dest, const char *src, size_t n) __THROW;
#include <strings.h>
#endif
#include <linux/kdev_t.h>
/*#include <linux/fs.h> */
@ -49,7 +57,7 @@ extern __off64_t lseek64 __P ((int __fd, __off64_t __offset, int __whence));
#define MD_MAJOR 9
#ifndef BLKGETSIZE64
#define BLKGETSIZE64 _IOR(0x12,114,sizeof(__u64)) /* return device size in bytes (u64 *arg) */
#define BLKGETSIZE64 _IOR(0x12,114,size_t) /* return device size in bytes (u64 *arg) */
#endif
@ -83,20 +91,21 @@ extern char Version[], Usage[], Help[], OptionHelp[],
* If multiple fields are present, the intersection of all matching
* devices is considered
*/
#define UnSet (0xfffe)
typedef struct mddev_ident_s {
char *devname;
int uuid_set;
__u32 uuid[4];
int super_minor; /* -1 if not set */
unsigned int super_minor;
char *devices; /* comma separated list of device
* names with wild cards
*/
int level; /* -10 if not set */
int raid_disks; /* -1 if not set */
int spare_disks; /* -1 if not set */
int level;
unsigned int raid_disks;
unsigned int spare_disks;
char *spare_group;
struct mddev_ident_s *next;
} *mddev_ident_t;
@ -170,7 +179,7 @@ extern int Examine(mddev_dev_t devlist, int brief, int scan, int SparcAdjust);
extern int Monitor(mddev_dev_t devlist,
char *mailaddr, char *alert_cmd,
int period, int daemonise, int scan, int oneshot,
char *config);
char *config, int test);
extern int Kill(char *dev, int force);
@ -186,13 +195,14 @@ extern mddev_dev_t conf_get_devs(char *conffile);
extern char *conf_get_mailaddr(char *conffile);
extern char *conf_get_program(char *conffile);
extern char *conf_line(FILE *file);
extern char *conf_word(FILE *file, int allow_key);
extern void free_line(char *line);
extern int match_oneof(char *devices, char *devname);
extern int load_super(int fd, mdp_super_t *super);
extern void uuid_from_super(int uuid[4], mdp_super_t *super);
extern int same_uuid(int a[4], int b[4]);
extern int compare_super(mdp_super_t *first, mdp_super_t *second);
extern int calc_sb_csum(mdp_super_t *super);
extern unsigned long calc_sb_csum(mdp_super_t *super);
extern int store_super(int fd, mdp_super_t *super);
extern int enough(int level, int raid_disks, int avail_disks);
extern int ask(char *mesg);

View File

@ -1,6 +1,6 @@
Summary: mdadm is used for controlling Linux md devices (aka RAID arrays)
Name: mdadm
Version: 1.4.0
Version: 1.5.0
Release: 1
Source: http://www.cse.unsw.edu.au/~neilb/source/mdadm/mdadm-%{version}.tgz
URL: http://www.cse.unsw.edu.au/~neilb/source/mdadm/

97
mdassemble.c Normal file
View File

@ -0,0 +1,97 @@
/*
* mdassemble - assemble Linux "md" devices aka RAID arrays.
*
* Copyright (C) 2001-2002 Neil Brown <neilb@cse.unsw.edu.au>
* Copyright (C) 2003 Luca Berra <bluca@vodka.it>
*
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Author: Neil Brown
* Email: <neilb@cse.unsw.edu.au>
* Paper: Neil Brown
* School of Computer Science and Engineering
* The University of New South Wales
* Sydney, 2052
* Australia
*/
#include "mdadm.h"
#include "md_p.h"
/* from readme.c */
mapping_t pers[] = {
{ "linear", -1},
{ "raid0", 0},
{ "0", 0},
{ "stripe", 0},
{ "raid1", 1},
{ "1", 1},
{ "mirror", 1},
{ "raid4", 4},
{ "4", 4},
{ "raid5", 5},
{ "5", 5},
{ "multipath", -4},
{ "mp", -4},
{ NULL, 0}
};
/* from mdadm.c */
int open_mddev(char *dev)
{
int mdfd = open(dev, O_RDWR, 0);
if (mdfd < 0)
fprintf(stderr, Name ": error opening %s: %s\n",
dev, strerror(errno));
else if (md_get_version(mdfd) <= 0) {
fprintf(stderr, Name ": %s does not appear to be an md device\n",
dev);
close(mdfd);
mdfd = -1;
}
return mdfd;
}
char *configfile = NULL;
int rv;
int mdfd = -1;
int runstop = 0;
int readonly = 0;
int verbose = 0;
int force = 0;
int main() {
mddev_ident_t array_list = conf_get_ident(configfile, NULL);
if (!array_list) {
fprintf(stderr, Name ": No arrays found in config file\n");
rv = 1;
} else
for (; array_list; array_list = array_list->next) {
mdu_array_info_t array;
mdfd = open_mddev(array_list->devname);
if (mdfd < 0) {
rv |= 1;
continue;
}
if (ioctl(mdfd, GET_ARRAY_INFO, &array)>=0)
/* already assembled, skip */
continue;
rv |= Assemble(array_list->devname, mdfd,
array_list, configfile,
NULL,
readonly, runstop, NULL, verbose, force);
}
}

View File

@ -114,6 +114,8 @@ struct mdstat_ent *mdstat_read()
for (; (line = conf_line(f)) ; free_line(line)) {
struct mdstat_ent *ent;
char *w;
int devnum;
char *ep;
if (strcmp(line, "Personalities")==0)
continue;
@ -122,9 +124,16 @@ struct mdstat_ent *mdstat_read()
if (strcmp(line, "unused")==0)
continue;
/* Better be an md line.. */
if (strncmp(line, "md", 2)!= 0
|| atoi(line+2)<0) {
fprintf(stderr, Name ": bad /proc/mdstat line starts: %s\n", line);
if (strncmp(line, "md", 2)!= 0)
continue;
if (strncmp(line, "md_d", 4) == 0)
devnum = -1-strtoul(line+4, &ep, 10);
else if (strncmp(line, "md", 2) == 0)
devnum = strtoul(line+2, &ep, 10);
else
continue;
if (ep == NULL || *ep ) {
/* fprintf(stderr, Name ": bad /proc/mdstat line starts: %s\n", line); */
continue;
}
@ -141,7 +150,7 @@ struct mdstat_ent *mdstat_read()
ent->active = -1;
ent->dev = strdup(line);
ent->devnum = atoi(line+2);
ent->devnum = devnum;
for (w=dl_next(line); w!= line ; w=dl_next(w)) {
int l = strlen(w);

85
util.c
View File

@ -30,6 +30,7 @@
#include "mdadm.h"
#include "md_p.h"
#include <sys/utsname.h>
#include <ctype.h>
/*
* Parse a 128 bit uuid in 4 integers
@ -102,12 +103,18 @@ int md_get_version(int fd)
int get_linux_version()
{
struct utsname name;
char *cp;
int a,b,c;
if (uname(&name) <0)
return -1;
if (sscanf(name.release, "%d.%d.%d", &a,&b,&c)!= 3)
return -1;
cp = name.release;
a = strtoul(cp, &cp, 10);
if (*cp != '.') return -1;
b = strtoul(cp+1, &cp, 10);
if (*cp != '.') return -1;
c = strtoul(cp+1, NULL, 10);
return (a*1000000)+(b*1000)+c;
}
@ -124,6 +131,8 @@ int enough(int level, int raid_disks, int avail_disks)
case 4:
case 5:
return avail_disks >= raid_disks-1;
case 6:
return avail_disks >= raid_disks-2;
default:
return 0;
}
@ -363,7 +372,7 @@ int map_name(mapping_t *map, char *name)
return map->num;
map++;
}
return -10;
return UnSet;
}
/*
@ -392,7 +401,11 @@ char *map_dev(int major, int minor)
#include <ftw.h>
#ifndef __dietlibc__
int add_dev(const char *name, const struct stat *stb, int flag, struct FTW *s)
#else
int add_dev(const char *name, const struct stat *stb, int flag)
#endif
{
if ((stb->st_mode&S_IFMT)== S_IFBLK) {
char *n = strdup(name);
@ -412,7 +425,11 @@ char *map_dev(int major, int minor)
{
struct devmap *p;
if (!devlist_ready) {
#ifndef __dietlibc__
nftw("/dev", add_dev, 10, FTW_PHYS);
#else
ftw("/dev", add_dev, 10);
#endif
devlist_ready=1;
}
@ -425,7 +442,7 @@ char *map_dev(int major, int minor)
#endif
int calc_sb_csum(mdp_super_t *super)
unsigned long calc_sb_csum(mdp_super_t *super)
{
unsigned int oldcsum = super->sb_csum;
unsigned long long newcsum = 0;
@ -487,27 +504,63 @@ char *human_size_brief(long long bytes)
return buf;
}
static int mdp_major = -1;
void get_mdp_major(void)
{
FILE *fl = fopen("/proc/devices", "r");
char *w;
int have_block = 0;
int have_devices = 0;
int last_num = -1;
if (!fl)
return;
while ((w = conf_word(fl, 1))) {
if (have_block && strcmp(w, "devices:")==0)
have_devices = 1;
have_block = (strcmp(w, "Block")==0);
if (isdigit(w[0]))
last_num = atoi(w);
if (have_devices && strcmp(w, "mdp")==0)
mdp_major = last_num;
free(w);
}
fclose(fl);
}
#define MD_MAJOR 9
char *get_md_name(int dev)
{
/* find /dev/md%d or /dev/md/%d or make a device /dev/.tmp.md%d */
/* if dev < 0, want /dev/md/d%d or find mdp in /proc/devices ... */
static char devname[50];
struct stat stb;
dev_t rdev = MKDEV(MD_MAJOR, dev);
dev_t rdev;
sprintf(devname, "/dev/md%d", dev);
if (stat(devname, &stb) == 0
&& (S_IFMT&stb.st_mode) == S_IFBLK
&& (stb.st_rdev == rdev))
return devname;
if (dev < 0) {
sprintf(devname, "/dev/md/%d", dev);
if (stat(devname, &stb) == 0
&& (S_IFMT&stb.st_mode) == S_IFBLK
&& (stb.st_rdev == rdev))
return devname;
if (mdp_major < 0) get_mdp_major();
if (mdp_major < 0) return NULL;
rdev = MKDEV(mdp_major, (-1-dev)<<6);
sprintf(devname, "/dev/md/d%d", -1-dev);
if (stat(devname, &stb) == 0
&& (S_IFMT&stb.st_mode) == S_IFBLK
&& (stb.st_rdev == rdev))
return devname;
} else {
rdev = MKDEV(MD_MAJOR, dev);
sprintf(devname, "/dev/md%d", dev);
if (stat(devname, &stb) == 0
&& (S_IFMT&stb.st_mode) == S_IFBLK
&& (stb.st_rdev == rdev))
return devname;
sprintf(devname, "/dev/md/%d", dev);
if (stat(devname, &stb) == 0
&& (S_IFMT&stb.st_mode) == S_IFBLK
&& (stb.st_rdev == rdev))
return devname;
}
sprintf(devname, "/dev/.tmp.md%d", dev);
if (mknod(devname, S_IFBLK | 0600, rdev) == -1)
return NULL;