From 10013317ce51c6b619f9782b55e9b6b9f536f0a8 Mon Sep 17 00:00:00 2001 From: Przemyslaw Hawrylewicz Czarnowski Date: Thu, 22 Apr 2010 23:10:32 +0100 Subject: [PATCH 01/11] fix: memory leak in mdmon_pid() devnum2devname() returns pointer to memory allocated with strdup. It must be released to prevent memory leak. Signed-off-by: Przemyslaw Czarnowski Signed-off-by: Dan Williams --- util.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/util.c b/util.c index 25f1e56..8315200 100644 --- a/util.c +++ b/util.c @@ -1532,7 +1532,11 @@ int mdmon_pid(int devnum) char pid[10]; int fd; int n; - sprintf(path, "%s/%s.pid", pid_dir, devnum2devname(devnum)); + char *devname = devnum2devname(devnum); + + sprintf(path, "%s/%s.pid", pid_dir, devname); + free(devname); + fd = open(path, O_RDONLY | O_NOATIME, 0); if (fd < 0) From c03ef02d92e4b2a7397f7247ea5a25d932a1a889 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 18 May 2010 12:29:28 +1000 Subject: [PATCH 02/11] Grow: move error message closer to error cause. A recent change move the sysfs_read call away from the check that it succeeded. This patch moves the check back next to the sysfs_read call. Signed-off-by: NeilBrown --- Grow.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Grow.c b/Grow.c index 0916c5d..28ed8d7 100644 --- a/Grow.c +++ b/Grow.c @@ -948,6 +948,13 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file, GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE| GET_CACHE); + if (!sra) { + fprintf(stderr, Name ": %s: Cannot get array details from sysfs\n", + devname); + rv = 1; + break; + } + if (ndata == odata) { /* Make 'blocks' bigger for better throughput, but * not so big that we reject it below. @@ -960,13 +967,6 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file, fprintf(stderr, Name ": Need to backup %luK of critical " "section..\n", blocks/2); - if (!sra) { - fprintf(stderr, Name ": %s: Cannot get array details from sysfs\n", - devname); - rv = 1; - break; - } - if (blocks >= sra->component_size/2) { fprintf(stderr, Name ": %s: Something wrong - reshape aborted\n", devname); From 4460f8f7c344a0e8c8d454edcaf392e85912c76e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 18 May 2010 12:31:29 +1000 Subject: [PATCH 03/11] Monitor: don't report the disappearance of a faulty device as SpareActive. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Normally Monitor doesn't see faulty devices in active slots - they get moved away too quickly. But if it does, it reports the "faulty device disappeared" event (when it finally does get moved away) as SpareActive due to insufficient checking. So add a better check. Reported-by: Pierre Vignéras --- Monitor.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Monitor.c b/Monitor.c index b0802f8..8e82797 100644 --- a/Monitor.c +++ b/Monitor.c @@ -391,6 +391,7 @@ int Monitor(mddev_dev_t devlist, ) alert("FailSpare", dev, dv, mailaddr, mailfrom, alert_cmd, dosyslog); else if (i < (unsigned)array.raid_disks && + ! (newstate & (1<devstate[i]&change)&(1< Date: Wed, 26 May 2010 13:22:36 -0700 Subject: [PATCH 04/11] Incremental: honor an 'enough' flag from external handlers This is needed for imsm where: 1/ we want to report raid_disks as zero to allow mdadm -As to incorporate all spares 2/ we can't determine stale disks by looking at the event counts. 3/ we can't see per-subarray expectations with the info returned from the container level ->getinfo_super() Signed-off-by: Dan Williams --- Incremental.c | 9 +++++ mdadm.h | 4 ++- super-ddf.c | 1 + super-intel.c | 99 ++++++++++++++++++++++++++++++++++++++++----------- 4 files changed, 92 insertions(+), 21 deletions(-) diff --git a/Incremental.c b/Incremental.c index 7ad648a..adef44e 100644 --- a/Incremental.c +++ b/Incremental.c @@ -258,6 +258,15 @@ int Incremental(char *devname, int verbose, int runstop, autof = ci->autof; if (st->ss->container_content && st->loaded_container) { + if ((runstop > 0 && info.container_enough >= 0) || + info.container_enough > 0) + /* pass */; + else { + if (verbose) + fprintf(stderr, Name ": not enough devices to start the container\n"); + return 1; + } + /* This is a pre-built container array, so we do something * rather different. */ diff --git a/mdadm.h b/mdadm.h index d9d17b0..a0797e8 100644 --- a/mdadm.h +++ b/mdadm.h @@ -205,7 +205,9 @@ struct mdinfo { int container_member; /* for assembling external-metatdata arrays * This is to be used internally by metadata * handler only */ - + int container_enough; /* flag external handlers can set to + * indicate that subarrays have not enough (-1), + * enough to start (0), or all expected disks (1) */ char sys_name[20]; struct mdinfo *devs; struct mdinfo *next; diff --git a/super-ddf.c b/super-ddf.c index 0e6f1e5..b01c68d 100644 --- a/super-ddf.c +++ b/super-ddf.c @@ -1357,6 +1357,7 @@ static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info) (ddf->anchor.guid+16)); info->array.utime = 0; info->array.chunk_size = 0; + info->container_enough = 0; info->disk.major = 0; diff --git a/super-intel.c b/super-intel.c index bdd7a96..88ffb52 100644 --- a/super-intel.c +++ b/super-intel.c @@ -344,7 +344,6 @@ static struct imsm_disk *__get_imsm_disk(struct imsm_super *mpb, __u8 index) return &mpb->disk[index]; } -#ifndef MDASSEMBLE /* retrieve a disk from the parsed metadata */ static struct imsm_disk *get_imsm_disk(struct intel_super *super, __u8 index) { @@ -356,7 +355,6 @@ static struct imsm_disk *get_imsm_disk(struct intel_super *super, __u8 index) return NULL; } -#endif /* generate a checksum directly from the anchor when the anchor is known to be * up-to-date, currently only at load or write_super after coalescing @@ -1528,6 +1526,20 @@ static void fixup_container_spare_uuid(struct mdinfo *inf) } } + +static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed); +static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev); + +static struct imsm_disk *get_imsm_missing(struct intel_super *super, __u8 index) +{ + struct dl *d; + + for (d = super->missing; d; d = d->next) + if (d->index == index) + return &d->disk; + return NULL; +} + static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info) { struct intel_super *super = st->sb; @@ -1562,6 +1574,53 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info) info->name[0] = 0; info->recovery_start = MaxSector; + /* do we have the all the insync disks that we expect? */ + if (st->loaded_container) { + struct imsm_super *mpb = super->anchor; + int max_enough = -1, i; + + for (i = 0; i < mpb->num_raid_devs; i++) { + struct imsm_dev *dev = get_imsm_dev(super, i); + int failed, enough, j, missing = 0; + struct imsm_map *map; + __u8 state; + + failed = imsm_count_failed(super, dev); + state = imsm_check_degraded(super, dev, failed); + map = get_imsm_map(dev, dev->vol.migr_state); + + /* any newly missing disks? + * (catches single-degraded vs double-degraded) + */ + for (j = 0; j < map->num_members; j++) { + __u32 ord = get_imsm_ord_tbl_ent(dev, i); + __u32 idx = ord_to_idx(ord); + + if (!(ord & IMSM_ORD_REBUILD) && + get_imsm_missing(super, idx)) { + missing = 1; + break; + } + } + + if (state == IMSM_T_STATE_FAILED) + enough = -1; + else if (state == IMSM_T_STATE_DEGRADED && + (state != map->map_state || missing)) + enough = 0; + else /* we're normal, or already degraded */ + enough = 1; + + /* in the missing/failed disk case check to see + * if at least one array is runnable + */ + max_enough = max(max_enough, enough); + } + dprintf("%s: enough: %d\n", __func__, max_enough); + info->container_enough = max_enough; + } else + info->container_enough = -1; + if (super->disks) { __u32 reserved = imsm_reserved_sectors(super, super->disks); @@ -4175,24 +4234,6 @@ static struct mdinfo *container_content_imsm(struct supertype *st) } -#ifndef MDASSEMBLE -static int imsm_open_new(struct supertype *c, struct active_array *a, - char *inst) -{ - struct intel_super *super = c->sb; - struct imsm_super *mpb = super->anchor; - - if (atoi(inst) >= mpb->num_raid_devs) { - fprintf(stderr, "%s: subarry index %d, out of range\n", - __func__, atoi(inst)); - return -ENODEV; - } - - dprintf("imsm: open_new %s\n", inst); - a->info.container_member = atoi(inst); - return 0; -} - static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed) { struct imsm_map *map = get_imsm_map(dev, 0); @@ -4291,6 +4332,24 @@ static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev) return failed; } +#ifndef MDASSEMBLE +static int imsm_open_new(struct supertype *c, struct active_array *a, + char *inst) +{ + struct intel_super *super = c->sb; + struct imsm_super *mpb = super->anchor; + + if (atoi(inst) >= mpb->num_raid_devs) { + fprintf(stderr, "%s: subarry index %d, out of range\n", + __func__, atoi(inst)); + return -ENODEV; + } + + dprintf("imsm: open_new %s\n", inst); + a->info.container_member = atoi(inst); + return 0; +} + static int is_resyncing(struct imsm_dev *dev) { struct imsm_map *migr_map; From 3288b419b988b20a53a2b12eb8e5f9f536228db4 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 26 May 2010 13:25:47 -0700 Subject: [PATCH 05/11] Revert "Incremental: honor --no-degraded to delay assembly" This reverts commit fdb482f99b9ad2ef8cd1724902fdfeedaa8796a1. Now that containers can report state for ->container_enough we can automatically determine when the array can be started, and no longer need the --no-degraded hammer. Conflicts: Incremental.c Signed-off-by: Dan Williams --- Incremental.c | 5 +---- mdadm.8 | 5 ----- mdadm.c | 1 - 3 files changed, 1 insertion(+), 10 deletions(-) diff --git a/Incremental.c b/Incremental.c index adef44e..d6dd0f4 100644 --- a/Incremental.c +++ b/Incremental.c @@ -437,8 +437,6 @@ int Incremental(char *devname, int verbose, int runstop, chosen_name, info.array.working_disks); wait_for(chosen_name, mdfd); close(mdfd); - if (runstop < 0) - return 0; /* don't try to assemble */ rv = Incremental(chosen_name, verbose, runstop, NULL, homehost, require_homehost, autof); if (rv == 1) @@ -452,8 +450,7 @@ int Incremental(char *devname, int verbose, int runstop, active_disks = count_active(st, mdfd, &avail, &info); if (enough(info.array.level, info.array.raid_disks, info.array.layout, info.array.state & 1, - avail, active_disks) == 0 || - (runstop < 0 && active_disks < info.array.raid_disks)) { + avail, active_disks) == 0) { free(avail); if (verbose >= 0) fprintf(stderr, Name diff --git a/mdadm.8 b/mdadm.8 index 4edfc41..90470d9 100644 --- a/mdadm.8 +++ b/mdadm.8 @@ -1218,11 +1218,6 @@ uses to help track which arrays are currently being assembled. Run any array assembled as soon as a minimal number of devices are available, rather than waiting until all expected devices are present. -.TP -.B \-\-no\-degraded -This allows the hot-plug system to prevent arrays from running when it knows -that more disks may arrive later in the discovery process. - .TP .BR \-\-scan ", " \-s Only meaningful with diff --git a/mdadm.c b/mdadm.c index d5e34c0..a401be2 100644 --- a/mdadm.c +++ b/mdadm.c @@ -671,7 +671,6 @@ int main(int argc, char *argv[]) " 'summaries', 'homehost', 'byteorder', 'devicesize'.\n"); exit(outf == stdout ? 0 : 2); - case O(INCREMENTAL,NoDegraded): case O(ASSEMBLE,NoDegraded): /* --no-degraded */ runstop = -1; /* --stop isn't allowed for --assemble, * so we overload slightly */ From 4363fd80bcc9f85ed824228dee5e6350a8d73e18 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 26 May 2010 13:33:43 -0700 Subject: [PATCH 06/11] imsm: robustify recovery-start detection update_recovery_start() assumed that the out-of-sync disk would always be marked as IMSM_ORD_REBUILD in the disk_ord_tbl, but the segmentation fault reported by Andy proves otherwise. This might also be explained by an interrupted rebuild and the disk has not yet been marked missing. https://bugzilla.redhat.com/show_bug.cgi?id=592030 Reported-by: Andy Lutomirski Signed-off-by: Dan Williams --- super-intel.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/super-intel.c b/super-intel.c index bdd7a96..dd9699d 100644 --- a/super-intel.c +++ b/super-intel.c @@ -4044,6 +4044,15 @@ static void update_recovery_start(struct imsm_dev *dev, struct mdinfo *array) rebuild = d; } + if (!rebuild) { + /* (?) none of the disks are marked with + * IMSM_ORD_REBUILD, so assume they are missing and the + * disk_ord_tbl was not correctly updated + */ + dprintf("%s: failed to locate out-of-sync disk\n", __func__); + return; + } + units = __le32_to_cpu(dev->vol.curr_migr_unit); rebuild->recovery_start = units * blocks_per_migr_unit(dev); } From 5082750467726d7cad6059ee8a41713da23426d3 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 31 May 2010 12:08:02 +1000 Subject: [PATCH 07/11] Revert change to handling of -empty-string- metadata. If the metadata is an empty string, it means the array in question does not use metadata. This comes from sysfs_read finding "none" in "metadata_version", then super_by_fd noticing the vers == -1, and so just using the ->text_version (which is empty). In this case we want to use the super0 metadata handler routines because that is what we always used to do before commit 7d5c3964ccfaace123f7b75e15d38c2650e013d8 And that commit was wrong because "" doesn't mean "default" and so should not have been changed at the same time. Reported-by: martin f. krafft Signed-off-by: NeilBrown --- super0.c | 3 ++- super1.c | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/super0.c b/super0.c index 5c6b7d7..a0c7eb4 100644 --- a/super0.c +++ b/super0.c @@ -922,7 +922,8 @@ static struct supertype *match_metadata_desc0(char *arg) while (arg[0] == '0' && arg[1] == '0') arg++; if (strcmp(arg, "0") == 0 || - strcmp(arg, "0.90") == 0 + strcmp(arg, "0.90") == 0 || + strcmp(arg, "") == 0 /* no metadata - i.e. non_persistent */ ) return st; diff --git a/super1.c b/super1.c index f3be7ce..8fa0745 100644 --- a/super1.c +++ b/super1.c @@ -1375,8 +1375,7 @@ static struct supertype *match_metadata_desc1(char *arg) return st; } if (strcmp(arg, "1.1") == 0 || - strcmp(arg, "1.01") == 0 || - strcmp(arg, "") == 0 /* no metadata */ + strcmp(arg, "1.01") == 0 ) { st->minor_version = 1; return st; From 26f467a9548dcebe07d576deb6ceb02b947eaecc Mon Sep 17 00:00:00 2001 From: "martin f. krafft" Date: Fri, 28 May 2010 14:12:41 +0200 Subject: [PATCH 08/11] Compile-time switch to enable 0.9 metadata as default This commit introduces DEFAULT_OLD_METADATA as a preprocessor definition. If defined, it causes mdadm to assume metadata version 0.9 as default. If not defined, version 1.x (currently 1.2) is used as default. The man page mdadm.8 is also modified to reflect the chosen default. The selftests will not work if the old default is chosen. This patch was requested by Debian so they could distribute a current mdadm together with boot loaders that only understand 0.90 metadata for md-raid. Preferred usage is simply make DEFAULT_OLD_METADATA=yes Signed-off-by: martin f. krafft Signed-off-by: NeilBrown --- Makefile | 13 +++++++++++-- mdadm.8 => mdadm.8.in | 14 +++++++++++--- super0.c | 3 +++ super1.c | 2 ++ 4 files changed, 27 insertions(+), 5 deletions(-) rename mdadm.8 => mdadm.8.in (99%) diff --git a/Makefile b/Makefile index 2517f75..3af1665 100644 --- a/Makefile +++ b/Makefile @@ -48,9 +48,15 @@ CWFLAGS += -Wp,-D_FORTIFY_SOURCE=2 -O endif ifdef DEBIAN -CPPFLAGS= -DDEBIAN +CPPFLAGS := -DDEBIAN else -CPPFLAGS= +CPPFLAGS := +endif +ifdef DEFAULT_OLD_METADATA + CPPFLAG += -DDEFAULT_OLD_METADATA + DEFAULT_METADATA=0.90 +else + DEFAULT_METADATA=1.2 endif SYSCONFDIR = /etc @@ -180,6 +186,9 @@ mdassemble.klibc : $(ASSEMBLE_SRCS) mdadm.h rm -f $(OBJS) $(KLIBC_GCC) $(ASSEMBLE_FLAGS) -o mdassemble $(ASSEMBLE_SRCS) +mdadm.8 : mdadm.8.in + sed -e 's/{DEFAULT_METADATA}/$(DEFAULT_METADATA)/g' mdadm.8.in > mdadm.8 + mdadm.man : mdadm.8 nroff -man mdadm.8 > mdadm.man diff --git a/mdadm.8 b/mdadm.8.in similarity index 99% rename from mdadm.8 rename to mdadm.8.in index 90470d9..da1a0a9 100644 --- a/mdadm.8 +++ b/mdadm.8.in @@ -299,7 +299,7 @@ says to get a list of array devices from .TP .BR \-e ", " \-\-metadata= Declare the style of RAID metadata (superblock) to be used. The -default is 1.2 for +default is {DEFAULT_METADATA} for .BR \-\-create , and to guess for other operations. The default can be overridden by setting the @@ -311,16 +311,24 @@ keyword in Options are: .RS +.ie '{DEFAULT_METADATA}'0.90' +.IP "0, 0.90, default" +.el .IP "0, 0.90" +.. Use the original 0.90 format superblock. This format limits arrays to 28 component devices and limits component devices of levels 1 and greater to 2 terabytes. +.ie '{DEFAULT_METADATA}'0.90' +.IP "1, 1.0, 1.1, 1.2" +.el .IP "1, 1.0, 1.1, 1.2 default" +.. Use the new version-1 format superblock. This has few restrictions. The different sub-versions store the superblock at different locations on the device, either at the end (for 1.0), at the start (for 1.1) or -4K from the start (for 1.2). '1' is equivalent to '1.0', 'default' is -equivalent to '1.2'. +4K from the start (for 1.2). "1" is equivalent to "1.0". +'if '{DEFAULT_METADATA}'1.2' "default" is equivalent to "1.2". .IP ddf Use the "Industry Standard" DDF (Disk Data Format) format defined by SNIA. diff --git a/super0.c b/super0.c index a0c7eb4..83600cb 100644 --- a/super0.c +++ b/super0.c @@ -922,6 +922,9 @@ static struct supertype *match_metadata_desc0(char *arg) while (arg[0] == '0' && arg[1] == '0') arg++; if (strcmp(arg, "0") == 0 || +#ifdef DEFAULT_OLD_METADATA /* ifndef in super1.c */ + strcmp(arg, "default") == 0 || +#endif /* DEFAULT_OLD_METADATA */ strcmp(arg, "0.90") == 0 || strcmp(arg, "") == 0 /* no metadata - i.e. non_persistent */ ) diff --git a/super1.c b/super1.c index 8fa0745..216690d 100644 --- a/super1.c +++ b/super1.c @@ -1381,7 +1381,9 @@ static struct supertype *match_metadata_desc1(char *arg) return st; } if (strcmp(arg, "1.2") == 0 || +#ifndef DEFAULT_OLD_METADATA /* ifdef in super0.c */ strcmp(arg, "default") == 0 || +#endif /* DEFAULT_OLD_METADATA */ strcmp(arg, "1.02") == 0) { st->minor_version = 2; return st; From fd547b508c90a647cf60df678047f07046cf5c68 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 31 May 2010 12:51:51 +1000 Subject: [PATCH 09/11] Fix man page reference to --level changes with --grow. --level can be used with --grow now, so correct man page. Reported-by: "Leslie Rhorer" Signed-off-by: NeilBrown --- mdadm.8.in | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mdadm.8.in b/mdadm.8.in index da1a0a9..67fabf3 100644 --- a/mdadm.8.in +++ b/mdadm.8.in @@ -470,8 +470,9 @@ When used with .BR \-\-build , only linear, stripe, raid0, 0, raid1, multipath, mp, and faulty are valid. -Not yet supported with -.BR \-\-grow . +Can be used with +.B \-\-grow +to change the RAID level in some cases. See LEVEL CHANGES below. .TP .BR \-p ", " \-\-layout= From b526e52dc7cbdde98db9c9f8765be28ba6d71d78 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 16 Jun 2010 17:26:04 -0700 Subject: [PATCH 10/11] Always assume SKIP_GONE_DEVS behaviour and kill the flag ...i.e. GET_DEVS == (GET_DEVS|SKIP_GONE_DEVS) A null pointer dereference in Incremental.c can be triggered by replugging a disk while the old name is in use. When mdadm -I is called on the new disk we fail the call to sysfs_read(). I audited all the locations that use GET_DEVS and it appears they can tolerate missing a drive. So just make SKIP_GONE_DEVS the default behaviour. Also fix up remaining unchecked usages of the sysfs_read() return value. Reported-by: Dave Jiang Signed-off-by: Dan Williams --- Grow.c | 20 +++++++++++++------- Incremental.c | 5 +++++ managemon.c | 2 +- mapfile.c | 5 +++-- mdadm.h | 1 - mdmon.c | 3 +-- super-ddf.c | 8 +------- super-intel.c | 7 +------ sysfs.c | 20 +++++++++----------- 9 files changed, 34 insertions(+), 37 deletions(-) diff --git a/Grow.c b/Grow.c index 28ed8d7..3923a90 100644 --- a/Grow.c +++ b/Grow.c @@ -546,7 +546,13 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file, return 1; } sra = sysfs_read(fd, 0, GET_LEVEL); - frozen = freeze_array(sra); + if (sra) + frozen = freeze_array(sra); + else { + fprintf(stderr, Name ": failed to read sysfs parameters for %s\n", + devname); + return 1; + } if (frozen < 0) { fprintf(stderr, Name ": %s is performing resync/recovery and cannot" " be reshaped\n", devname); @@ -1970,6 +1976,12 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info, int cache; int done = 0; + sra = sysfs_read(-1, devname2devnum(info->sys_name), + GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE| + GET_CACHE); + if (!sra) + return 1; + err = sysfs_set_str(info, NULL, "array_state", "readonly"); if (err) return err; @@ -1990,7 +2002,6 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info, ochunk = info->array.chunk_size; nchunk = info->new_chunk; - a = (ochunk/512) * odata; b = (nchunk/512) * ndata; /* Find GCD */ @@ -2003,11 +2014,6 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info, /* LCM == product / GCD */ blocks = (ochunk/512) * (nchunk/512) * odata * ndata / a; - sra = sysfs_read(-1, devname2devnum(info->sys_name), - GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE| - GET_CACHE); - - if (ndata == odata) while (blocks * 32 < sra->component_size && blocks < 16*1024*2) diff --git a/Incremental.c b/Incremental.c index d6dd0f4..8062e2b 100644 --- a/Incremental.c +++ b/Incremental.c @@ -369,6 +369,8 @@ int Incremental(char *devname, int verbose, int runstop, strcpy(chosen_name, devnum2devname(mp->devnum)); sra = sysfs_read(mdfd, fd2devnum(mdfd), (GET_DEVS | GET_STATE)); + if (!sra) + return 2; if (sra->devs) { sprintf(dn, "%d:%d", sra->devs->disk.major, @@ -586,6 +588,9 @@ static int count_active(struct supertype *st, int mdfd, char **availp, struct mdinfo *sra = sysfs_read(mdfd, -1, GET_DEVS | GET_STATE); char *avail = NULL; + if (!sra) + return 0; + for (d = sra->devs ; d ; d = d->next) { char dn[30]; int dfd; diff --git a/managemon.c b/managemon.c index 454c39d..debca97 100644 --- a/managemon.c +++ b/managemon.c @@ -315,7 +315,7 @@ static void manage_container(struct mdstat_ent *mdstat, * To see what is removed and what is added. * These need to be remove from, or added to, the array */ - mdi = sysfs_read(-1, mdstat->devnum, GET_DEVS|SKIP_GONE_DEVS); + mdi = sysfs_read(-1, mdstat->devnum, GET_DEVS); if (!mdi) { /* invalidate the current count so we can try again */ container->devcnt = -1; diff --git a/mapfile.c b/mapfile.c index 0f12559..ffe8e16 100644 --- a/mapfile.c +++ b/mapfile.c @@ -368,7 +368,7 @@ void RebuildMap(void) } for (md = mdstat ; md ; md = md->next) { - struct mdinfo *sra = sysfs_read(-1, md->devnum, GET_DEVS|SKIP_GONE_DEVS); + struct mdinfo *sra = sysfs_read(-1, md->devnum, GET_DEVS); struct mdinfo *sd; if (!sra) @@ -486,7 +486,8 @@ void RebuildMap(void) for (md = mdstat ; md ; md = md->next) { struct mdinfo *sra = sysfs_read(-1, md->devnum, GET_VERSION); - sysfs_uevent(sra, "change"); + if (sra) + sysfs_uevent(sra, "change"); sysfs_free(sra); } map_free(map); diff --git a/mdadm.h b/mdadm.h index a0797e8..62bfc44 100644 --- a/mdadm.h +++ b/mdadm.h @@ -404,7 +404,6 @@ enum sysfs_read_flags { GET_SIZE = (1 << 12), GET_STATE = (1 << 13), GET_ERROR = (1 << 14), - SKIP_GONE_DEVS = (1 << 15), }; /* If fd >= 0, get the array it is open on, diff --git a/mdmon.c b/mdmon.c index 69c320e..2191814 100644 --- a/mdmon.c +++ b/mdmon.c @@ -394,8 +394,7 @@ static int mdmon(char *devname, int devnum, int must_fork, int takeover) exit(3); } - mdi = sysfs_read(mdfd, container->devnum, - GET_VERSION|GET_LEVEL|GET_DEVS|SKIP_GONE_DEVS); + mdi = sysfs_read(mdfd, container->devnum, GET_VERSION|GET_LEVEL|GET_DEVS); if (!mdi) { fprintf(stderr, "mdmon: failed to load sysfs info for %s\n", diff --git a/super-ddf.c b/super-ddf.c index b01c68d..6145e3c 100644 --- a/super-ddf.c +++ b/super-ddf.c @@ -2807,14 +2807,8 @@ static int load_super_ddf_all(struct supertype *st, int fd, int seq; char nm[20]; int dfd; - int devnum = fd2devnum(fd); - enum sysfs_read_flags flags; - flags = GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE; - if (mdmon_running(devnum)) - flags |= SKIP_GONE_DEVS; - - sra = sysfs_read(fd, 0, flags); + sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE); if (!sra) return 1; if (sra->array.major_version != -1 || diff --git a/super-intel.c b/super-intel.c index d6d8b09..e09ce5e 100644 --- a/super-intel.c +++ b/super-intel.c @@ -2747,14 +2747,9 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp, int retry; int err = 0; int i; - enum sysfs_read_flags flags; - - flags = GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE; - if (mdmon_running(devnum)) - flags |= SKIP_GONE_DEVS; /* check if 'fd' an opened container */ - sra = sysfs_read(fd, 0, flags); + sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE); if (!sra) return 1; diff --git a/sysfs.c b/sysfs.c index ebf9d8a..17f2567 100644 --- a/sysfs.c +++ b/sysfs.c @@ -273,22 +273,20 @@ struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options) strcpy(dbase, "block/dev"); if (load_sys(fname, buf)) { + /* assume this is a stale reference to a hot + * removed device + */ free(dev); - if (options & SKIP_GONE_DEVS) - continue; - else - goto abort; + continue; } sscanf(buf, "%d:%d", &dev->disk.major, &dev->disk.minor); /* special case check for block devices that can go 'offline' */ - if (options & SKIP_GONE_DEVS) { - strcpy(dbase, "block/device/state"); - if (load_sys(fname, buf) == 0 && - strncmp(buf, "offline", 7) == 0) { - free(dev); - continue; - } + strcpy(dbase, "block/device/state"); + if (load_sys(fname, buf) == 0 && + strncmp(buf, "offline", 7) == 0) { + free(dev); + continue; } /* finally add this disk to the array */ From 23eb475a96b1b0cf7f8feaeb7b32355b80e8faa7 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 1 Jul 2010 17:28:14 -0700 Subject: [PATCH 11/11] mdmon: prevent allocations due to late binding Current versions of glibc do not provide a useable interface to clone(2) as it inflicts hidden dependencies on setting up a glibc specific tls descriptor. The dynamic linker trips this dependency and causes mdmon to intermittently fail to load. Resolving all dynamic linking prior to starting the monitor thread appears to mitigate the issue but there is no guarantee that another tls dependency will bite us later. However, while the debate continues with the glibc maintainers it seems prudent to keep this change. It ensures that we do not get into a situation where the monitor thread needs to make a late allocation to resolve a symbol. Signed-off-by: Dan Williams --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3af1665..237f4fc 100644 --- a/Makefile +++ b/Makefile @@ -157,8 +157,9 @@ mdadm.O2 : $(SRCS) mdadm.h mdmon.O2 mdmon.O2 : $(MON_SRCS) mdadm.h mdmon.h $(CC) -o mdmon.O2 $(CFLAGS) -DHAVE_STDINT_H -O2 -D_FORTIFY_SOURCE=2 $(MON_SRCS) +# use '-z now' to guarantee no dynamic linker interactions with the monitor thread mdmon : $(MON_OBJS) - $(CC) $(LDFLAGS) -o mdmon $(MON_OBJS) $(LDLIBS) + $(CC) $(LDFLAGS) -z now -o mdmon $(MON_OBJS) $(LDLIBS) msg.o: msg.c msg.h test_stripe : restripe.c mdadm.h