diff --git a/Create.c b/Create.c index 692a74c..69192ab 100644 --- a/Create.c +++ b/Create.c @@ -139,7 +139,7 @@ int Create(struct supertype *st, char *mddev, int mdfd, int fd; memset(&inf, 0, sizeof(inf)); - fd = open(devlist->devname, O_RDONLY, 0); + fd = open(devlist->devname, O_RDONLY); if (fd >= 0 && ioctl(fd, GET_ARRAY_INFO, &inf) == 0 && inf.raid_disks == 0) { @@ -333,7 +333,7 @@ int Create(struct supertype *st, char *mddev, int mdfd, minsize = freesize; } if (runstop != 1 || verbose >= 0) { - int fd = open(dname, O_RDONLY, 0); + int fd = open(dname, O_RDONLY); if (fd <0 ) { fprintf(stderr, Name ": Cannot open %s: %s\n", dname, strerror(errno)); @@ -685,9 +685,9 @@ int Create(struct supertype *st, char *mddev, int mdfd, inf->disk.state |= (1<ss->external && st->subarray[0]) - fd = open(dv->devname, O_RDWR, 0); + fd = open(dv->devname, O_RDWR); else - fd = open(dv->devname, O_RDWR|O_EXCL,0); + fd = open(dv->devname, O_RDWR|O_EXCL); if (fd < 0) { fprintf(stderr, Name ": failed to open %s " diff --git a/Detail.c b/Detail.c index 2fb59a3..2b2111c 100644 --- a/Detail.c +++ b/Detail.c @@ -38,7 +38,7 @@ int Detail(char *dev, int brief, int export, int test, char *homehost) * GET_ARRAY_INFO and GET_DISK_INFO ioctl calls */ - int fd = open(dev, O_RDONLY, 0); + int fd = open(dev, O_RDONLY); int vers; mdu_array_info_t array; mdu_disk_info_t *disks; @@ -147,6 +147,7 @@ int Detail(char *dev, int brief, int export, int test, char *homehost) } if (brief) { + mdu_bitmap_file_t bmf; printf("ARRAY %s level=%s num-devices=%d", dev, c?c:"-unknown-", array.raid_disks ); @@ -155,6 +156,13 @@ int Detail(char *dev, int brief, int export, int test, char *homehost) else printf(" metadata=%02d.%02d", array.major_version, array.minor_version); + + /* Only try GET_BITMAP_FILE for 0.90.01 and later */ + if (vers >= 9001 && + ioctl(fd, GET_BITMAP_FILE, &bmf) == 0 && + bmf.pathname[0]) { + printf(" bitmap=%s", bmf.pathname); + } } else { mdu_bitmap_file_t bmf; unsigned long long larray_size; diff --git a/Grow.c b/Grow.c index fc007d6..3a31ea5 100644 --- a/Grow.c +++ b/Grow.c @@ -615,7 +615,8 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file, last_block = nstripe * ndata; ostripe = last_block / odata / (ochunk/512) * (ochunk/512); } - printf("mdadm: Need to backup %lluK of critical section..\n", last_block/2); + fprintf(stderr, Name ": Need to backup %lluK of critical " + "section..\n", last_block/2); sra = sysfs_read(fd, 0, GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE| @@ -685,7 +686,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file, } spares = sra->array.spare_disks; if (backup_file) { - fdlist[d] = open(backup_file, O_RDWR|O_CREAT|O_EXCL, 0600); + fdlist[d] = open(backup_file, O_RDWR|O_CREAT|O_EXCL, S_IRUSR | S_IWUSR); if (fdlist[d] < 0) { fprintf(stderr, Name ": %s: cannot create backup file %s: %s\n", devname, backup_file, strerror(errno)); @@ -837,7 +838,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file, if (backup_file) unlink(backup_file); - printf(Name ": ... critical section passed.\n"); + fprintf(stderr, Name ": ... critical section passed.\n"); break; } return 0; diff --git a/Makefile b/Makefile index dc06167..4a19fa9 100644 --- a/Makefile +++ b/Makefile @@ -213,3 +213,9 @@ testdist : everything clean TAGS : etags *.h *.c + +DISTRO_MAKEFILE := $(wildcard distropkg/Makefile) +ifdef DISTRO_MAKEFILE +include $(DISTRO_MAKEFILE) +endif + diff --git a/Manage.c b/Manage.c index 1a86a85..714a33b 100644 --- a/Manage.c +++ b/Manage.c @@ -158,10 +158,15 @@ int Manage_runstop(char *devname, int fd, int runstop, int quiet) sysfs_free(mdi); if (fd >= 0 && ioctl(fd, STOP_ARRAY, NULL)) { - if (quiet == 0) + if (quiet == 0) { fprintf(stderr, Name ": failed to stop array %s: %s\n", devname, strerror(errno)); + if (errno == EBUSY) + fprintf(stderr, "Perhaps a running " + "process, mounted filesystem " + "or active volume group?\n"); + } return 1; } diff --git a/Monitor.c b/Monitor.c index 54193f5..abc2dbd 100644 --- a/Monitor.c +++ b/Monitor.c @@ -156,7 +156,7 @@ int Monitor(mddev_dev_t devlist, return 1; } close(0); - open("/dev/null", 3); + open("/dev/null", O_RDWR); dup2(0,1); dup2(0,2); setsid(); diff --git a/Query.c b/Query.c index fcbafcf..dc69eb8 100644 --- a/Query.c +++ b/Query.c @@ -37,7 +37,7 @@ int Query(char *dev) * whether it is an md device and whether it has * a superblock */ - int fd = open(dev, O_RDONLY, 0); + int fd = open(dev, O_RDONLY); int vers; int ioctlerr; int superror, superrno; diff --git a/ReadMe.c b/ReadMe.c index 3fae193..12ed17f 100644 --- a/ReadMe.c +++ b/ReadMe.c @@ -518,9 +518,11 @@ char Help_grow[] = " --size= -z : Change the active size of devices in an array.\n" " : This is useful if all devices have been replaced\n" " : with larger devices.\n" -" --raid-disks= -n : Change the number of active devices in an array.\n" -" : array.\n" +" --raid-devices= -n : Change the number of active devices in an array.\n" " --bitmap= -b : Add or remove a write-intent bitmap.\n" +" --backup-file= file : A file on a differt device to store data for a\n" +" : short time while increasing raid-devices on a\n" +" : RAID4/5/6 array. Not needed when a spare is present.\n" ; char Help_incr[] = diff --git a/md.4 b/md.4 index f6d60fa..ea12eaf 100644 --- a/md.4 +++ b/md.4 @@ -240,7 +240,7 @@ across a later section of all drives, always ensuring that all copies of any given block are on different drives. The 'far' arrangement can give sequential read performance equal to -that of a RAID0 array, but at the cost of degraded write performance. +that of a RAID0 array, but at the cost of reduced write performance. When 'offset' replicas are chosen, the multiple copies of a given chunk are laid out on consecutive drives and at consecutive offsets. diff --git a/mdadm.8 b/mdadm.8 index f11298c..3c283ca 100644 --- a/mdadm.8 +++ b/mdadm.8 @@ -50,29 +50,29 @@ each device is a path to one common physical storage device. is also not true RAID, and it only involves one device. It provides a layer over a true device that can be used to inject faults. -.\".B mdadm +.\".I mdadm .\"is a program that can be used to create, manage, and monitor .\"MD devices. As .\"such it provides a similar set of functionality to the .\".B raidtools .\"packages. .\"The key differences between -.\".B mdadm +.\".I mdadm .\"and .\".B raidtools .\"are: .\".IP \(bu 4 -.\".B mdadm +.\".I mdadm .\"is a single program and not a collection of programs. .\".IP \(bu 4 -.\".B mdadm +.\".I mdadm .\"can perform (almost) all of its functions without having a .\"configuration file and does not use one by default. Also -.\".B mdadm +.\".I mdadm .\"helps with management of the configuration .\"file. .\".IP \(bu 4 -.\".B mdadm +.\".I mdadm .\"can provide information about your arrays (through Query, Detail, and Examine) .\"that .\".B raidtools @@ -93,7 +93,7 @@ mdadm has several major modes of operation: Assemble the components of a previously created array into an active array. Components can be explicitly given or can be searched for. -.B mdadm +.I mdadm checks that the components do form a bona fide array, and can, on request, fiddle superblock information so as to assemble a faulty array. @@ -241,7 +241,7 @@ and .TP .BR \-q ", " \-\-quiet Avoid printing purely informative messages. With this, -.B mdadm +.I mdadm will be silent unless there is something really important to report. .TP @@ -288,7 +288,7 @@ Scan config file or .B /proc/mdstat for missing information. In general, this option gives -.B mdadm +.I mdadm permission to get any missing information (like component devices, array devices, array identities, and alert destination) from the configuration file (see previous option); @@ -688,7 +688,7 @@ will cause to use the minor number of the md device that is being assembled. e.g. when assembling .BR /dev/md0 , -.M \-\-super\-minor=dev +.B \-\-super\-minor=dev will look for super blocks with a minor number of 0. .TP @@ -1057,14 +1057,14 @@ facility of 'daemon' and varying priorities. .TP .BR \-d ", " \-\-delay Give a delay in seconds. -.B mdadm +.I mdadm polls the md arrays and then waits this many seconds before polling again. The default is 60 seconds. .TP .BR \-f ", " \-\-daemonise Tell -.B mdadm +.I mdadm to run as a background daemon if it decides to monitor anything. This causes it to fork and run in the child, and to disconnect form the terminal. The process id of the child is written to stdout. @@ -1076,7 +1076,7 @@ is found in the config file. .TP .BR \-i ", " \-\-pid\-file When -.B mdadm +.I mdadm is running in daemon mode, write the pid of the daemon process to the specified file, instead of printing it on standard output. @@ -1325,7 +1325,7 @@ can override this caution. To create a "degraded" array in which some devices are missing, simply give the word "\fBmissing\fP" in place of a device name. This will cause -.B mdadm +.I mdadm to leave the corresponding slot in the array empty. For a RAID4 or RAID5 array at most one slot can be "\fBmissing\fP"; for a RAID6 array at most two slots. @@ -1334,7 +1334,7 @@ others can be "\fBmissing\fP". When creating a RAID5 array, -.B mdadm +.I mdadm will automatically create a degraded array with an extra spare drive. This is because building the spare into a degraded array is in general faster than resyncing the parity on a non-degraded, but not clean, array. This feature can @@ -1342,13 +1342,13 @@ be overridden with the .B \-\-force option. -When creating an array with version-1 metadata a name for the host is +When creating an array with version-1 metadata a name for the array is required. If this is not given with the .B \-\-name option, .I mdadm -will chose a name based on the last component of the name of the +will choose a name based on the last component of the name of the device being created. So if .B /dev/md3 is being created, then the name @@ -1360,6 +1360,14 @@ is being created, then the name .B home will be used. +When creating a partition based array, using +.I mdadm +with version-1.x metadata, the partition type should be set to +.B 0xDA +(non fs-data). This type selection allows for greater precision since +using any other [RAID auto-detect (0xFD) or a GNU/Linux partition (0x83)], +might create problems in the event of array recovery through a live cdrom. + A new array will normally get a randomly assigned 128bit UUID which is very likely to be unique. If you have a specific need, you can choose a UUID for the array by giving the @@ -1467,7 +1475,7 @@ There was an error while trying to get information about the device. .TP .B \-\-examine The device should be a component of an md array. -.B mdadm +.I mdadm will read the md superblock of the device and display the contents. If .B \-\-brief @@ -1525,22 +1533,22 @@ Usage: .PP This usage causes -.B mdadm +.I mdadm to periodically poll a number of md arrays and to report on any events noticed. -.B mdadm +.I mdadm will never exit once it decides that there are arrays to be checked, so it should normally be run in the background. As well as reporting events, -.B mdadm +.I mdadm may move a spare drive from one array to another if they are in the same .B spare-group and if the destination array has a failed drive but no spares. If any devices are listed on the command line, -.B mdadm +.I mdadm will only monitor those devices. Otherwise all arrays listed in the configuration file will be monitored. Further, if .B \-\-scan @@ -1562,11 +1570,11 @@ If .B \-\-scan is given, then a program or an E-mail address must be specified on the command line or in the config file. If neither are available, then -.B mdadm +.I mdadm will not monitor anything. Without .B \-\-scan, -.B mdadm +.I mdadm will continue monitoring as long as something was found to monitor. If no program or email is given, then each event is reported to .BR stdout . @@ -1689,7 +1697,7 @@ For the second device is the array that the spare was moved from. For -.B mdadm +.I mdadm to move spares from one array to another, the different arrays need to be labeled with the same .B spare-group @@ -1699,7 +1707,7 @@ name can be any string; it is only necessary that different spare groups use different names. When -.B mdadm +.I mdadm detects that an array in a spare group has fewer active devices than necessary for the complete array, and has no spare devices, it will look for another array in the same spare group that @@ -1721,7 +1729,7 @@ Currently the only support available is to change the "size" attribute for RAID1, RAID5 and RAID6. .IP \(bu 4 -increase the "raid-disks" attribute of RAID1, RAID5, and RAID6. +increase the "raid\-devices" attribute of RAID1, RAID5, and RAID6. .IP \(bu 4 add a write-intent bitmap to any array which supports these bitmaps, or remove a write-intent bitmap from such an array. @@ -1911,7 +1919,7 @@ that if any devices are missing the array will not be restarted. As an alternative, .B \-\-run may be passed to -.B mdadm +.I mdadm in which case the array will be run as soon as there are enough devices present for the data to be accessible. For a raid1, that means one device will start the array. For a clean raid5, the array @@ -2051,7 +2059,7 @@ If you're using the filesystem, .B /proc/mdstat lists all active md devices with information about them. -.B mdadm +.I mdadm uses this to find arrays when .B \-\-scan is given in Misc mode, and to monitor array reconstruction @@ -2103,13 +2111,13 @@ onwards) are either of Partition numbers should be indicated by added "pMM" to these, thus "/dev/md/d1p2". .SH NOTE -.B mdadm +.I mdadm was previously known as -.BR mdctl . +.IR mdctl . .P -.B mdadm +.I mdadm is completely separate from the -.B raidtools +.I raidtools package, and does not use the .I /etc/raidtab configuration file at all. diff --git a/mdadm.c b/mdadm.c index 8b4b5df..b7865ef 100644 --- a/mdadm.c +++ b/mdadm.c @@ -1155,7 +1155,7 @@ int main(int argc, char *argv[]) break; } if (raiddisks == 0) { - fprintf(stderr, Name ": no raid-disks specified.\n"); + fprintf(stderr, Name ": no raid-devices specified.\n"); rv = 1; break; } @@ -1179,7 +1179,7 @@ int main(int argc, char *argv[]) break; } if (raiddisks == 0) { - fprintf(stderr, Name ": no raid-disks specified.\n"); + fprintf(stderr, Name ": no raid-devices specified.\n"); rv = 1; break; } diff --git a/mdadm.conf.5 b/mdadm.conf.5 index aadfb23..40295be 100644 --- a/mdadm.conf.5 +++ b/mdadm.conf.5 @@ -11,7 +11,7 @@ mdadm.conf \- configuration for management of Software RAID with mdadm /etc/mdadm.conf .SH DESCRIPTION .PP -.B mdadm +.I mdadm is a tool for creating, managing, and monitoring RAID devices using the .B md driver in Linux. @@ -40,7 +40,7 @@ A line lists the devices (whole devices or partitions) that might contain a component of an MD array. When looking for the components of an array, -.B mdadm +.I mdadm will scan these devices (or any devices listed on the command line). The @@ -150,7 +150,7 @@ the same .B spare\-group name are considered to be part of the same group. The significance of a group of arrays is that -.B mdadm +.I mdadm will, when monitoring the arrays, move a spare drive from one array in a group to another array in that group if the first array had a failed or missing drive but no spare. @@ -158,7 +158,7 @@ or missing drive but no spare. .TP .B auto= This option declares to -.B mdadm +.I mdadm that it should try to create the device file of the array if it doesn't already exist, or exists but with the wrong device number. @@ -197,7 +197,7 @@ The .B mailaddr line gives an E-mail address that alerts should be sent to when -.M mdadm +.I mdadm is running in .B \-\-monitor mode (and was given the @@ -289,6 +289,24 @@ Give to suppress this symlink creation. .RE +.TP +.B HOMEHOST +The +.B homehost +line gives a default value for the +.B --homehost= +option to mdadm. There should be exactly one other word on the line. +It should either exactly +.B +or a host name. +If +.B +is given, then the +.BR gethostname ( 2 ) +systemcall is used to get the host name. +When arrays are created, this host name will be stored in the +metadata. When arrays are assembled using auto-assembly, only arrays +with this host name stored in the metadata will be considered. .SH EXAMPLE DEVICE /dev/sd[bcdjkl]1 diff --git a/mdassemble.c b/mdassemble.c index 9926dde..b0d87b8 100644 --- a/mdassemble.c +++ b/mdassemble.c @@ -57,7 +57,7 @@ mapping_t pers[] = { /* from mdopen.c */ int open_mddev(char *dev, int autof/*unused */) { - int mdfd = open(dev, O_RDWR, 0); + int mdfd = open(dev, O_RDWR); if (mdfd < 0) fprintf(stderr, Name ": error opening %s: %s\n", dev, strerror(errno)); diff --git a/mdopen.c b/mdopen.c index 448a9eb..4fbcb48 100644 --- a/mdopen.c +++ b/mdopen.c @@ -173,7 +173,7 @@ int open_mddev(char *dev, int autof) must_remove = 1; if (stb.st_mode && !must_remove) { /* looks ok, see if it is available */ - mdfd = open(dev, O_RDWR, 0); + mdfd = open(dev, O_RDWR); if (mdfd < 0) { fprintf(stderr, Name ": error opening %s: %s\n", dev, strerror(errno)); @@ -254,7 +254,7 @@ int open_mddev(char *dev, int autof) make_parts(dev,parts, ci->symlinks); } } - mdfd = open(dev, O_RDWR, 0); + mdfd = open(dev, O_RDWR); if (mdfd < 0) fprintf(stderr, Name ": error opening %s: %s\n", dev, strerror(errno)); diff --git a/test b/test index f83e28f..bd8d279 100644 --- a/test +++ b/test @@ -67,6 +67,9 @@ done path0=$dev6 path1=$dev7 +echo 2000 > /proc/sys/dev/raid/speed_limit_max +echo 0 > /sys/module/md_mod/parameters/start_ro + if [ " $1" = " setup" ] then trap 0 ; exit 0 fi @@ -95,7 +98,7 @@ check() { grep -s "active $1 " /proc/mdstat > /dev/null || { echo >&2 "ERROR active $1 not found" ; cat /proc/mdstat ; exit 1;} ;; - resync | recovery ) + resync | recovery | reshape) sleep 0.5 grep -s $1 /proc/mdstat > /dev/null || { echo >&2 ERROR no $1 happening; cat /proc/mdstat; exit 1; } @@ -103,14 +106,14 @@ check() { nosync ) sleep 0.5 - if grep -s 're[synccovery]* =' > /dev/null /proc/mdstat ; then + if grep -s -E '(resync|recovery|reshape) =' > /dev/null /proc/mdstat ; then echo >&2 "ERROR resync or recovery is happening!"; cat /proc/mdstat ; exit 1; fi ;; wait ) sleep 0.1 - while grep 're[synccovery]* =' > /dev/null /proc/mdstat + while grep -E '(resync|recovery|reshape|check|repair) =' > /dev/null /proc/mdstat do sleep 2; done ;; diff --git a/tests/07autoassemble b/tests/07autoassemble index 4496476..bdbc5d3 100644 --- a/tests/07autoassemble +++ b/tests/07autoassemble @@ -21,3 +21,4 @@ mdadm -Ss mdadm -As -c /dev/null --homehost=testing -vvv testdev $md1 1 $mdsize0 64 testdev $md0 1 $[mdsize0+mdsize00] 64 +mdadm -Ss diff --git a/tests/07reshape5intr b/tests/07reshape5intr new file mode 100644 index 0000000..96c8e02 --- /dev/null +++ b/tests/07reshape5intr @@ -0,0 +1,33 @@ + +# +# test interrupting and restartign raid5 reshape. +set -x +devs="$dev1" +st=UU +for disks in 2 3 4 5 +do + eval devs=\"$devs \$dev$disks\" + st=U$st + for d in $devs + do dd if=/dev/urandom of=$d bs=1024 || true + done + + mdadm -CR $md0 -amd -l5 -n$disks --assume-clean $devs + mdadm $md0 --add $dev6 + echo 20 > /proc/sys/dev/raid/speed_limit_max + mdadm --grow $md0 -n $[disks+1] + check reshape + check state $st + mdadm --stop $md0 + mdadm --assemble $md0 $devs $dev6 + check reshape + echo 2000 > /proc/sys/dev/raid/speed_limit_max + check wait + echo check > /sys/block/md0/md/sync_action + check wait + mm=`cat /sys/block/md0/md/mismatch_cnt` + if [ $mm -gt 0 ] + then echo >&2 "ERROR mismatch_cnt non-zero : $mm" ; exit 1 + fi + mdadm -S $md0 +done diff --git a/tests/07testreshape5 b/tests/07testreshape5 index 8f56a72..44d5ddc 100644 --- a/tests/07testreshape5 +++ b/tests/07testreshape5 @@ -20,7 +20,7 @@ do # test restore: make a raid5 from a file, then do a compare dd if=/dev/urandom of=/tmp/RandFile bs=1024 count=$size $dir/test_stripe restore /tmp/RandFile $disks $[chunk*1024] 5 $nlayout 0 $[size*1024] $devs - $mdadm -CR $md0 -amd -l5 -n$disks --assume-clean -c $chunk -p $layout $devs + mdadm -CR $md0 -amd -l5 -n$disks --assume-clean -c $chunk -p $layout $devs cmp -s -n $[size*1024] $md0 /tmp/RandFile || { echo cmp failed ; exit 2; } # FIXME check parity @@ -30,7 +30,7 @@ do > /tmp/NewRand $dir/test_stripe save /tmp/NewRand $disks $[chunk*1024] 5 $nlayout 0 $[size*1024] $devs cmp -s -n $[size*1024] $md0 /tmp/NewRand || { echo cmp failed ; exit 2; } - $mdadm -S $md0 + mdadm -S $md0 done done done