diff --git a/Grow.c b/Grow.c index 0916c5d..3923a90 100644 --- a/Grow.c +++ b/Grow.c @@ -546,7 +546,13 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file, return 1; } sra = sysfs_read(fd, 0, GET_LEVEL); - frozen = freeze_array(sra); + if (sra) + frozen = freeze_array(sra); + else { + fprintf(stderr, Name ": failed to read sysfs parameters for %s\n", + devname); + return 1; + } if (frozen < 0) { fprintf(stderr, Name ": %s is performing resync/recovery and cannot" " be reshaped\n", devname); @@ -948,6 +954,13 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file, GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE| GET_CACHE); + if (!sra) { + fprintf(stderr, Name ": %s: Cannot get array details from sysfs\n", + devname); + rv = 1; + break; + } + if (ndata == odata) { /* Make 'blocks' bigger for better throughput, but * not so big that we reject it below. @@ -960,13 +973,6 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file, fprintf(stderr, Name ": Need to backup %luK of critical " "section..\n", blocks/2); - if (!sra) { - fprintf(stderr, Name ": %s: Cannot get array details from sysfs\n", - devname); - rv = 1; - break; - } - if (blocks >= sra->component_size/2) { fprintf(stderr, Name ": %s: Something wrong - reshape aborted\n", devname); @@ -1970,6 +1976,12 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info, int cache; int done = 0; + sra = sysfs_read(-1, devname2devnum(info->sys_name), + GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE| + GET_CACHE); + if (!sra) + return 1; + err = sysfs_set_str(info, NULL, "array_state", "readonly"); if (err) return err; @@ -1990,7 +2002,6 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info, ochunk = info->array.chunk_size; nchunk = info->new_chunk; - a = (ochunk/512) * odata; b = (nchunk/512) * ndata; /* Find GCD */ @@ -2003,11 +2014,6 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info, /* LCM == product / GCD */ blocks = (ochunk/512) * (nchunk/512) * odata * ndata / a; - sra = sysfs_read(-1, devname2devnum(info->sys_name), - GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE| - GET_CACHE); - - if (ndata == odata) while (blocks * 32 < sra->component_size && blocks < 16*1024*2) diff --git a/Incremental.c b/Incremental.c index 7ad648a..8062e2b 100644 --- a/Incremental.c +++ b/Incremental.c @@ -258,6 +258,15 @@ int Incremental(char *devname, int verbose, int runstop, autof = ci->autof; if (st->ss->container_content && st->loaded_container) { + if ((runstop > 0 && info.container_enough >= 0) || + info.container_enough > 0) + /* pass */; + else { + if (verbose) + fprintf(stderr, Name ": not enough devices to start the container\n"); + return 1; + } + /* This is a pre-built container array, so we do something * rather different. */ @@ -360,6 +369,8 @@ int Incremental(char *devname, int verbose, int runstop, strcpy(chosen_name, devnum2devname(mp->devnum)); sra = sysfs_read(mdfd, fd2devnum(mdfd), (GET_DEVS | GET_STATE)); + if (!sra) + return 2; if (sra->devs) { sprintf(dn, "%d:%d", sra->devs->disk.major, @@ -428,8 +439,6 @@ int Incremental(char *devname, int verbose, int runstop, chosen_name, info.array.working_disks); wait_for(chosen_name, mdfd); close(mdfd); - if (runstop < 0) - return 0; /* don't try to assemble */ rv = Incremental(chosen_name, verbose, runstop, NULL, homehost, require_homehost, autof); if (rv == 1) @@ -443,8 +452,7 @@ int Incremental(char *devname, int verbose, int runstop, active_disks = count_active(st, mdfd, &avail, &info); if (enough(info.array.level, info.array.raid_disks, info.array.layout, info.array.state & 1, - avail, active_disks) == 0 || - (runstop < 0 && active_disks < info.array.raid_disks)) { + avail, active_disks) == 0) { free(avail); if (verbose >= 0) fprintf(stderr, Name @@ -580,6 +588,9 @@ static int count_active(struct supertype *st, int mdfd, char **availp, struct mdinfo *sra = sysfs_read(mdfd, -1, GET_DEVS | GET_STATE); char *avail = NULL; + if (!sra) + return 0; + for (d = sra->devs ; d ; d = d->next) { char dn[30]; int dfd; diff --git a/Makefile b/Makefile index 2517f75..237f4fc 100644 --- a/Makefile +++ b/Makefile @@ -48,9 +48,15 @@ CWFLAGS += -Wp,-D_FORTIFY_SOURCE=2 -O endif ifdef DEBIAN -CPPFLAGS= -DDEBIAN +CPPFLAGS := -DDEBIAN else -CPPFLAGS= +CPPFLAGS := +endif +ifdef DEFAULT_OLD_METADATA + CPPFLAG += -DDEFAULT_OLD_METADATA + DEFAULT_METADATA=0.90 +else + DEFAULT_METADATA=1.2 endif SYSCONFDIR = /etc @@ -151,8 +157,9 @@ mdadm.O2 : $(SRCS) mdadm.h mdmon.O2 mdmon.O2 : $(MON_SRCS) mdadm.h mdmon.h $(CC) -o mdmon.O2 $(CFLAGS) -DHAVE_STDINT_H -O2 -D_FORTIFY_SOURCE=2 $(MON_SRCS) +# use '-z now' to guarantee no dynamic linker interactions with the monitor thread mdmon : $(MON_OBJS) - $(CC) $(LDFLAGS) -o mdmon $(MON_OBJS) $(LDLIBS) + $(CC) $(LDFLAGS) -z now -o mdmon $(MON_OBJS) $(LDLIBS) msg.o: msg.c msg.h test_stripe : restripe.c mdadm.h @@ -180,6 +187,9 @@ mdassemble.klibc : $(ASSEMBLE_SRCS) mdadm.h rm -f $(OBJS) $(KLIBC_GCC) $(ASSEMBLE_FLAGS) -o mdassemble $(ASSEMBLE_SRCS) +mdadm.8 : mdadm.8.in + sed -e 's/{DEFAULT_METADATA}/$(DEFAULT_METADATA)/g' mdadm.8.in > mdadm.8 + mdadm.man : mdadm.8 nroff -man mdadm.8 > mdadm.man diff --git a/Monitor.c b/Monitor.c index b0802f8..8e82797 100644 --- a/Monitor.c +++ b/Monitor.c @@ -391,6 +391,7 @@ int Monitor(mddev_dev_t devlist, ) alert("FailSpare", dev, dv, mailaddr, mailfrom, alert_cmd, dosyslog); else if (i < (unsigned)array.raid_disks && + ! (newstate & (1<devstate[i]&change)&(1<devnum, GET_DEVS|SKIP_GONE_DEVS); + mdi = sysfs_read(-1, mdstat->devnum, GET_DEVS); if (!mdi) { /* invalidate the current count so we can try again */ container->devcnt = -1; diff --git a/mapfile.c b/mapfile.c index 0f12559..ffe8e16 100644 --- a/mapfile.c +++ b/mapfile.c @@ -368,7 +368,7 @@ void RebuildMap(void) } for (md = mdstat ; md ; md = md->next) { - struct mdinfo *sra = sysfs_read(-1, md->devnum, GET_DEVS|SKIP_GONE_DEVS); + struct mdinfo *sra = sysfs_read(-1, md->devnum, GET_DEVS); struct mdinfo *sd; if (!sra) @@ -486,7 +486,8 @@ void RebuildMap(void) for (md = mdstat ; md ; md = md->next) { struct mdinfo *sra = sysfs_read(-1, md->devnum, GET_VERSION); - sysfs_uevent(sra, "change"); + if (sra) + sysfs_uevent(sra, "change"); sysfs_free(sra); } map_free(map); diff --git a/mdadm.8 b/mdadm.8.in similarity index 99% rename from mdadm.8 rename to mdadm.8.in index 7e252fa..e107e4f 100644 --- a/mdadm.8 +++ b/mdadm.8.in @@ -299,7 +299,7 @@ says to get a list of array devices from .TP .BR \-e ", " \-\-metadata= Declare the style of RAID metadata (superblock) to be used. The -default is 1.2 for +default is {DEFAULT_METADATA} for .BR \-\-create , and to guess for other operations. The default can be overridden by setting the @@ -311,16 +311,24 @@ keyword in Options are: .RS +.ie '{DEFAULT_METADATA}'0.90' +.IP "0, 0.90, default" +.el .IP "0, 0.90" +.. Use the original 0.90 format superblock. This format limits arrays to 28 component devices and limits component devices of levels 1 and greater to 2 terabytes. +.ie '{DEFAULT_METADATA}'0.90' +.IP "1, 1.0, 1.1, 1.2" +.el .IP "1, 1.0, 1.1, 1.2 default" +.. Use the new version-1 format superblock. This has few restrictions. The different sub-versions store the superblock at different locations on the device, either at the end (for 1.0), at the start (for 1.1) or -4K from the start (for 1.2). '1' is equivalent to '1.0', 'default' is -equivalent to '1.2'. +4K from the start (for 1.2). "1" is equivalent to "1.0". +'if '{DEFAULT_METADATA}'1.2' "default" is equivalent to "1.2". .IP ddf Use the "Industry Standard" DDF (Disk Data Format) format defined by SNIA. @@ -462,8 +470,9 @@ When used with .BR \-\-build , only linear, stripe, raid0, 0, raid1, multipath, mp, and faulty are valid. -Not yet supported with -.BR \-\-grow . +Can be used with +.B \-\-grow +to change the RAID level in some cases. See LEVEL CHANGES below. .TP .BR \-p ", " \-\-layout= @@ -1233,11 +1242,6 @@ uses to help track which arrays are currently being assembled. Run any array assembled as soon as a minimal number of devices are available, rather than waiting until all expected devices are present. -.TP -.B \-\-no\-degraded -This allows the hot-plug system to prevent arrays from running when it knows -that more disks may arrive later in the discovery process. - .TP .BR \-\-scan ", " \-s Only meaningful with diff --git a/mdadm.c b/mdadm.c index e7435fd..acbf4ac 100644 --- a/mdadm.c +++ b/mdadm.c @@ -691,7 +691,6 @@ int main(int argc, char *argv[]) " 'summaries', 'homehost', 'byteorder', 'devicesize'.\n"); exit(outf == stdout ? 0 : 2); - case O(INCREMENTAL,NoDegraded): case O(ASSEMBLE,NoDegraded): /* --no-degraded */ runstop = -1; /* --stop isn't allowed for --assemble, * so we overload slightly */ diff --git a/mdadm.h b/mdadm.h index 798713c..03aa329 100644 --- a/mdadm.h +++ b/mdadm.h @@ -205,7 +205,9 @@ struct mdinfo { int container_member; /* for assembling external-metatdata arrays * This is to be used internally by metadata * handler only */ - + int container_enough; /* flag external handlers can set to + * indicate that subarrays have not enough (-1), + * enough to start (0), or all expected disks (1) */ char sys_name[20]; struct mdinfo *devs; struct mdinfo *next; @@ -405,7 +407,6 @@ enum sysfs_read_flags { GET_SIZE = (1 << 12), GET_STATE = (1 << 13), GET_ERROR = (1 << 14), - SKIP_GONE_DEVS = (1 << 15), }; /* If fd >= 0, get the array it is open on, diff --git a/mdmon.c b/mdmon.c index beb39cf..0c37426 100644 --- a/mdmon.c +++ b/mdmon.c @@ -373,8 +373,7 @@ static int mdmon(char *devname, int devnum, int must_fork, int takeover) exit(3); } - mdi = sysfs_read(mdfd, container->devnum, - GET_VERSION|GET_LEVEL|GET_DEVS|SKIP_GONE_DEVS); + mdi = sysfs_read(mdfd, container->devnum, GET_VERSION|GET_LEVEL|GET_DEVS); if (!mdi) { fprintf(stderr, "mdmon: failed to load sysfs info for %s\n", diff --git a/super-ddf.c b/super-ddf.c index 736e07f..6dd3173 100644 --- a/super-ddf.c +++ b/super-ddf.c @@ -1365,6 +1365,7 @@ static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info) (ddf->anchor.guid+16)); info->array.utime = 0; info->array.chunk_size = 0; + info->container_enough = 0; info->disk.major = 0; @@ -2814,14 +2815,8 @@ static int load_super_ddf_all(struct supertype *st, int fd, int seq; char nm[20]; int dfd; - int devnum = fd2devnum(fd); - enum sysfs_read_flags flags; - flags = GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE; - if (mdmon_running(devnum)) - flags |= SKIP_GONE_DEVS; - - sra = sysfs_read(fd, 0, flags); + sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE); if (!sra) return 1; if (sra->array.major_version != -1 || diff --git a/super-intel.c b/super-intel.c index 159ae4a..daf811f 100644 --- a/super-intel.c +++ b/super-intel.c @@ -357,7 +357,6 @@ static struct imsm_disk *__get_imsm_disk(struct imsm_super *mpb, __u8 index) return &mpb->disk[index]; } -#ifndef MDASSEMBLE /* retrieve a disk from the parsed metadata */ static struct imsm_disk *get_imsm_disk(struct intel_super *super, __u8 index) { @@ -369,7 +368,6 @@ static struct imsm_disk *get_imsm_disk(struct intel_super *super, __u8 index) return NULL; } -#endif /* generate a checksum directly from the anchor when the anchor is known to be * up-to-date, currently only at load or write_super after coalescing @@ -1547,6 +1545,20 @@ static void fixup_container_spare_uuid(struct mdinfo *inf) } } + +static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed); +static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev); + +static struct imsm_disk *get_imsm_missing(struct intel_super *super, __u8 index) +{ + struct dl *d; + + for (d = super->missing; d; d = d->next) + if (d->index == index) + return &d->disk; + return NULL; +} + static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info) { struct intel_super *super = st->sb; @@ -1581,6 +1593,53 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info) info->name[0] = 0; info->recovery_start = MaxSector; + /* do we have the all the insync disks that we expect? */ + if (st->loaded_container) { + struct imsm_super *mpb = super->anchor; + int max_enough = -1, i; + + for (i = 0; i < mpb->num_raid_devs; i++) { + struct imsm_dev *dev = get_imsm_dev(super, i); + int failed, enough, j, missing = 0; + struct imsm_map *map; + __u8 state; + + failed = imsm_count_failed(super, dev); + state = imsm_check_degraded(super, dev, failed); + map = get_imsm_map(dev, dev->vol.migr_state); + + /* any newly missing disks? + * (catches single-degraded vs double-degraded) + */ + for (j = 0; j < map->num_members; j++) { + __u32 ord = get_imsm_ord_tbl_ent(dev, i); + __u32 idx = ord_to_idx(ord); + + if (!(ord & IMSM_ORD_REBUILD) && + get_imsm_missing(super, idx)) { + missing = 1; + break; + } + } + + if (state == IMSM_T_STATE_FAILED) + enough = -1; + else if (state == IMSM_T_STATE_DEGRADED && + (state != map->map_state || missing)) + enough = 0; + else /* we're normal, or already degraded */ + enough = 1; + + /* in the missing/failed disk case check to see + * if at least one array is runnable + */ + max_enough = max(max_enough, enough); + } + dprintf("%s: enough: %d\n", __func__, max_enough); + info->container_enough = max_enough; + } else + info->container_enough = -1; + if (super->disks) { __u32 reserved = imsm_reserved_sectors(super, super->disks); @@ -2707,14 +2766,9 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp, int retry; int err = 0; int i; - enum sysfs_read_flags flags; - - flags = GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE; - if (mdmon_running(devnum)) - flags |= SKIP_GONE_DEVS; /* check if 'fd' an opened container */ - sra = sysfs_read(fd, 0, flags); + sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE); if (!sra) return 1; @@ -4231,6 +4285,15 @@ static void update_recovery_start(struct imsm_dev *dev, struct mdinfo *array) rebuild = d; } + if (!rebuild) { + /* (?) none of the disks are marked with + * IMSM_ORD_REBUILD, so assume they are missing and the + * disk_ord_tbl was not correctly updated + */ + dprintf("%s: failed to locate out-of-sync disk\n", __func__); + return; + } + units = __le32_to_cpu(dev->vol.curr_migr_unit); rebuild->recovery_start = units * blocks_per_migr_unit(dev); } @@ -4362,24 +4425,6 @@ static struct mdinfo *container_content_imsm(struct supertype *st) } -#ifndef MDASSEMBLE -static int imsm_open_new(struct supertype *c, struct active_array *a, - char *inst) -{ - struct intel_super *super = c->sb; - struct imsm_super *mpb = super->anchor; - - if (atoi(inst) >= mpb->num_raid_devs) { - fprintf(stderr, "%s: subarry index %d, out of range\n", - __func__, atoi(inst)); - return -ENODEV; - } - - dprintf("imsm: open_new %s\n", inst); - a->info.container_member = atoi(inst); - return 0; -} - static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed) { struct imsm_map *map = get_imsm_map(dev, 0); @@ -4478,6 +4523,24 @@ static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev) return failed; } +#ifndef MDASSEMBLE +static int imsm_open_new(struct supertype *c, struct active_array *a, + char *inst) +{ + struct intel_super *super = c->sb; + struct imsm_super *mpb = super->anchor; + + if (atoi(inst) >= mpb->num_raid_devs) { + fprintf(stderr, "%s: subarry index %d, out of range\n", + __func__, atoi(inst)); + return -ENODEV; + } + + dprintf("imsm: open_new %s\n", inst); + a->info.container_member = atoi(inst); + return 0; +} + static int is_resyncing(struct imsm_dev *dev) { struct imsm_map *migr_map; diff --git a/super0.c b/super0.c index 5c6b7d7..83600cb 100644 --- a/super0.c +++ b/super0.c @@ -922,7 +922,11 @@ static struct supertype *match_metadata_desc0(char *arg) while (arg[0] == '0' && arg[1] == '0') arg++; if (strcmp(arg, "0") == 0 || - strcmp(arg, "0.90") == 0 +#ifdef DEFAULT_OLD_METADATA /* ifndef in super1.c */ + strcmp(arg, "default") == 0 || +#endif /* DEFAULT_OLD_METADATA */ + strcmp(arg, "0.90") == 0 || + strcmp(arg, "") == 0 /* no metadata - i.e. non_persistent */ ) return st; diff --git a/super1.c b/super1.c index f3be7ce..216690d 100644 --- a/super1.c +++ b/super1.c @@ -1375,14 +1375,15 @@ static struct supertype *match_metadata_desc1(char *arg) return st; } if (strcmp(arg, "1.1") == 0 || - strcmp(arg, "1.01") == 0 || - strcmp(arg, "") == 0 /* no metadata */ + strcmp(arg, "1.01") == 0 ) { st->minor_version = 1; return st; } if (strcmp(arg, "1.2") == 0 || +#ifndef DEFAULT_OLD_METADATA /* ifdef in super0.c */ strcmp(arg, "default") == 0 || +#endif /* DEFAULT_OLD_METADATA */ strcmp(arg, "1.02") == 0) { st->minor_version = 2; return st; diff --git a/sysfs.c b/sysfs.c index 72c7c5b..725c90c 100644 --- a/sysfs.c +++ b/sysfs.c @@ -273,22 +273,20 @@ struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options) strcpy(dbase, "block/dev"); if (load_sys(fname, buf)) { + /* assume this is a stale reference to a hot + * removed device + */ free(dev); - if (options & SKIP_GONE_DEVS) - continue; - else - goto abort; + continue; } sscanf(buf, "%d:%d", &dev->disk.major, &dev->disk.minor); /* special case check for block devices that can go 'offline' */ - if (options & SKIP_GONE_DEVS) { - strcpy(dbase, "block/device/state"); - if (load_sys(fname, buf) == 0 && - strncmp(buf, "offline", 7) == 0) { - free(dev); - continue; - } + strcpy(dbase, "block/device/state"); + if (load_sys(fname, buf) == 0 && + strncmp(buf, "offline", 7) == 0) { + free(dev); + continue; } /* finally add this disk to the array */ diff --git a/util.c b/util.c index 66bf2f9..d22b0d0 100644 --- a/util.c +++ b/util.c @@ -1669,7 +1669,11 @@ int mdmon_pid(int devnum) char pid[10]; int fd; int n; - sprintf(path, "%s/%s.pid", pid_dir, devnum2devname(devnum)); + char *devname = devnum2devname(devnum); + + sprintf(path, "%s/%s.pid", pid_dir, devname); + free(devname); + fd = open(path, O_RDONLY | O_NOATIME, 0); if (fd < 0)