Merge branch 'master' of git://github.com/djbw/mdadm

This commit is contained in:
NeilBrown 2009-12-30 13:42:37 +11:00
commit c1e3ab8c1e
12 changed files with 491 additions and 151 deletions

View File

@ -538,7 +538,7 @@ int Create(struct supertype *st, char *mddev,
assume_clean
) {
info.array.state = 1; /* clean, but one+ drive will be missing*/
info.resync_start = ~0ULL;
info.resync_start = MaxSector;
} else {
info.array.state = 0; /* not clean, but no errors */
info.resync_start = 0;

View File

@ -699,6 +699,7 @@ int Manage_subdevs(char *devname, int fd,
tst->ss->getinfo_super(tst, &new_mdi);
new_mdi.disk.major = disc.major;
new_mdi.disk.minor = disc.minor;
new_mdi.recovery_start = 0;
if (sysfs_add_disk(sra, &new_mdi, 0) != 0) {
fprintf(stderr, Name ": add new device to external metadata"
" failed for %s\n", dv->devname);

View File

@ -20,8 +20,6 @@
#include "mdadm.h"
#define min(a,b) (((a) < (b)) ? (a) : (b))
inline void sb_le_to_cpu(bitmap_super_t *sb)
{
sb->magic = __le32_to_cpu(sb->magic);

View File

@ -112,8 +112,10 @@ static void close_aa(struct active_array *aa)
{
struct mdinfo *d;
for (d = aa->info.devs; d; d = d->next)
for (d = aa->info.devs; d; d = d->next) {
close(d->recovery_fd);
close(d->state_fd);
}
close(aa->action_fd);
close(aa->info.state_fd);
@ -209,16 +211,22 @@ struct metadata_update *update_queue = NULL;
struct metadata_update *update_queue_handled = NULL;
struct metadata_update *update_queue_pending = NULL;
void check_update_queue(struct supertype *container)
static void free_updates(struct metadata_update **update)
{
while (update_queue_handled) {
struct metadata_update *this = update_queue_handled;
update_queue_handled = this->next;
while (*update) {
struct metadata_update *this = *update;
*update = this->next;
free(this->buf);
if (this->space)
free(this->space);
free(this->space);
free(this);
}
}
void check_update_queue(struct supertype *container)
{
free_updates(&update_queue_handled);
if (update_queue == NULL &&
update_queue_pending) {
update_queue = update_queue_pending;
@ -376,8 +384,9 @@ static void manage_member(struct mdstat_ent *mdstat,
if (a->check_degraded) {
struct metadata_update *updates = NULL;
struct mdinfo *newdev;
struct mdinfo *newdev = NULL;
struct active_array *newa;
struct mdinfo *d;
a->check_degraded = 0;
@ -385,34 +394,46 @@ static void manage_member(struct mdstat_ent *mdstat,
* to check.
*/
newdev = a->container->ss->activate_spare(a, &updates);
if (newdev) {
struct mdinfo *d;
/* Cool, we can add a device or several. */
newa = duplicate_aa(a);
/* suspend recovery - maybe not needed */
if (!newdev)
return;
/* Add device to array and set offset/size/slot.
* and open files for each newdev */
for (d = newdev; d ; d = d->next) {
struct mdinfo *newd;
if (sysfs_add_disk(&newa->info, d, 0) < 0)
continue;
newd = malloc(sizeof(*newd));
*newd = *d;
newd->next = newa->info.devs;
newa->info.devs = newd;
newa = duplicate_aa(a);
if (!newa)
goto out;
/* Cool, we can add a device or several. */
newd->state_fd = sysfs_open(a->devnum,
newd->sys_name,
"state");
newd->prev_state
= read_dev_state(newd->state_fd);
newd->curr_state = newd->prev_state;
/* Add device to array and set offset/size/slot.
* and open files for each newdev */
for (d = newdev; d ; d = d->next) {
struct mdinfo *newd;
newd = malloc(sizeof(*newd));
if (!newd)
continue;
if (sysfs_add_disk(&newa->info, d, 0) < 0) {
free(newd);
continue;
}
queue_metadata_update(updates);
replace_array(a->container, a, newa);
sysfs_set_str(&a->info, NULL, "sync_action", "recover");
*newd = *d;
newd->next = newa->info.devs;
newa->info.devs = newd;
newd->state_fd = sysfs_open(a->devnum, newd->sys_name,
"state");
newd->prev_state = read_dev_state(newd->state_fd);
newd->curr_state = newd->prev_state;
}
queue_metadata_update(updates);
updates = NULL;
replace_array(a->container, a, newa);
sysfs_set_str(&a->info, NULL, "sync_action", "recover");
out:
while (newdev) {
d = newdev->next;
free(newdev);
newdev = d;
}
free_updates(&updates);
}
}
@ -498,6 +519,9 @@ static void manage_new(struct mdstat_ent *mdstat,
newd->state_fd = sysfs_open(new->devnum,
newd->sys_name,
"state");
newd->recovery_fd = sysfs_open(new->devnum,
newd->sys_name,
"recovery_start");
newd->prev_state = read_dev_state(newd->state_fd);
newd->curr_state = newd->prev_state;
@ -522,7 +546,6 @@ static void manage_new(struct mdstat_ent *mdstat,
new->info.state_fd = sysfs_open(new->devnum, NULL, "array_state");
new->resync_start_fd = sysfs_open(new->devnum, NULL, "resync_start");
new->metadata_fd = sysfs_open(new->devnum, NULL, "metadata_version");
get_resync_start(new);
dprintf("%s: inst: %d action: %d state: %d\n", __func__, atoi(inst),
new->action_fd, new->info.state_fd);

27
mdadm.h
View File

@ -129,6 +129,22 @@ extern __off64_t lseek64 __P ((int __fd, __off64_t __offset, int __whence));
#endif /* __KLIBC__ */
/*
* min()/max()/clamp() macros that also do
* strict type-checking.. See the
* "unnecessary" pointer comparison.
*/
#define min(x, y) ({ \
typeof(x) _min1 = (x); \
typeof(y) _min2 = (y); \
(void) (&_min1 == &_min2); \
_min1 < _min2 ? _min1 : _min2; })
#define max(x, y) ({ \
typeof(x) _max1 = (x); \
typeof(y) _max2 = (y); \
(void) (&_max1 == &_max2); \
_max1 > _max2 ? _max1 : _max2; })
/* general information that might be extracted from a superblock */
struct mdinfo {
@ -146,7 +162,11 @@ struct mdinfo {
*/
int reshape_active;
unsigned long long reshape_progress;
unsigned long long resync_start;
union {
unsigned long long resync_start; /* per-array resync position */
unsigned long long recovery_start; /* per-device rebuild position */
#define MaxSector (~0ULL) /* resync/recovery complete position */
};
unsigned long safe_mode_delay; /* ms delay to mark clean */
int new_level, delta_disks, new_layout, new_chunk;
int errors;
@ -168,6 +188,7 @@ struct mdinfo {
struct mdinfo *next;
/* Device info for mdmon: */
int recovery_fd;
int state_fd;
#define DS_FAULTY 1
#define DS_INSYNC 2
@ -380,8 +401,7 @@ extern int sysfs_get_str(struct mdinfo *sra, struct mdinfo *dev,
char *name, char *val, int size);
extern int sysfs_set_safemode(struct mdinfo *sra, unsigned long ms);
extern int sysfs_set_array(struct mdinfo *info, int vers);
extern int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd,
int in_sync);
extern int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int resume);
extern int sysfs_disk_to_scsi_id(int fd, __u32 *id);
extern int sysfs_unique_holder(int devnum, long rdev);
extern int load_sys(char *path, char *buf);
@ -839,6 +859,7 @@ extern int assemble_container_content(struct supertype *st, int mdfd,
extern int add_disk(int mdfd, struct supertype *st,
struct mdinfo *sra, struct mdinfo *info);
extern int set_array_info(int mdfd, struct supertype *st, struct mdinfo *info);
unsigned long long min_recovery_start(struct mdinfo *array);
extern char *human_size(long long bytes);
extern char *human_size_brief(long long bytes);

View File

@ -39,8 +39,6 @@ struct active_array {
int check_degraded; /* flag set by mon, read by manage */
int devnum;
unsigned long long resync_start;
};
/*
@ -73,7 +71,6 @@ extern int socket_hup_requested;
extern int sigterm;
int read_dev_state(int fd);
int get_resync_start(struct active_array *a);
int is_container_member(struct mdstat_ent *mdstat, char *container);
struct mdstat_ent *mdstat_read(int hold, int start);
@ -85,9 +82,9 @@ extern int monitor_loop_cnt;
/* helper routine to determine resync completion since MaxSector is a
* moving target
*/
static inline int is_resync_complete(struct active_array *a)
static inline int is_resync_complete(struct mdinfo *array)
{
if (a->resync_start >= a->info.component_size)
if (array->resync_start >= array->component_size)
return 1;
return 0;
}

View File

@ -66,23 +66,20 @@ static int read_attr(char *buf, int len, int fd)
return n;
}
int get_resync_start(struct active_array *a)
static unsigned long long read_resync_start(int fd)
{
char buf[30];
int n;
n = read_attr(buf, 30, a->resync_start_fd);
n = read_attr(buf, 30, fd);
if (n <= 0)
return n;
return 0;
if (strncmp(buf, "none", 4) == 0)
a->resync_start = ~0ULL;
return MaxSector;
else
a->resync_start = strtoull(buf, NULL, 10);
return 1;
return strtoull(buf, NULL, 10);
}
static enum array_state read_state(int fd)
{
char buf[20];
@ -208,22 +205,23 @@ static int read_and_act(struct active_array *a)
a->curr_state = read_state(a->info.state_fd);
a->curr_action = read_action(a->action_fd);
a->info.resync_start = read_resync_start(a->resync_start_fd);
for (mdi = a->info.devs; mdi ; mdi = mdi->next) {
mdi->next_state = 0;
if (mdi->state_fd >= 0)
if (mdi->state_fd >= 0) {
mdi->recovery_start = read_resync_start(mdi->recovery_fd);
mdi->curr_state = read_dev_state(mdi->state_fd);
}
}
if (a->curr_state <= inactive &&
a->prev_state > inactive) {
/* array has been stopped */
get_resync_start(a);
a->container->ss->set_array_state(a, 1);
a->next_state = clear;
deactivate = 1;
}
if (a->curr_state == write_pending) {
get_resync_start(a);
a->container->ss->set_array_state(a, 0);
a->next_state = active;
dirty = 1;
@ -236,7 +234,6 @@ static int read_and_act(struct active_array *a)
dirty = 1;
}
if (a->curr_state == clean) {
get_resync_start(a);
a->container->ss->set_array_state(a, 1);
}
if (a->curr_state == active ||
@ -253,7 +250,6 @@ static int read_and_act(struct active_array *a)
/* explicit request for readonly array. Leave it alone */
;
} else {
get_resync_start(a);
if (a->container->ss->set_array_state(a, 2))
a->next_state = read_auto; /* array is clean */
else {
@ -271,7 +267,6 @@ static int read_and_act(struct active_array *a)
* until the array goes inactive or readonly though.
* Just check if we need to fiddle spares.
*/
get_resync_start(a);
a->container->ss->set_array_state(a, a->curr_state <= clean);
check_degraded = 1;
}

View File

@ -1433,7 +1433,7 @@ static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info)
(ddf->virt->entries[info->container_member].init_state
& DDF_initstate_mask)
== DDF_init_full)
info->resync_start = ~0ULL;
info->resync_start = MaxSector;
uuid_from_super_ddf(st, info->uuid);
@ -2921,7 +2921,7 @@ static struct mdinfo *container_content_ddf(struct supertype *st)
this->resync_start = 0;
} else {
this->array.state = 1;
this->resync_start = ~0ULL;
this->resync_start = MaxSector;
}
memcpy(this->name, ddf->virt->entries[i].name, 16);
this->name[16]=0;
@ -2968,6 +2968,7 @@ static struct mdinfo *container_content_ddf(struct supertype *st)
dev->disk.minor = d->minor;
dev->disk.raid_disk = i;
dev->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
dev->recovery_start = MaxSector;
dev->events = __be32_to_cpu(ddf->primary.seq);
dev->data_offset = __be64_to_cpu(vc->lba_offset[i]);
@ -3066,7 +3067,7 @@ static int ddf_set_array_state(struct active_array *a, int consistent)
if (consistent == 2) {
/* Should check if a recovery should be started FIXME */
consistent = 1;
if (!is_resync_complete(a))
if (!is_resync_complete(&a->info))
consistent = 0;
}
if (consistent)
@ -3078,9 +3079,9 @@ static int ddf_set_array_state(struct active_array *a, int consistent)
old = ddf->virt->entries[inst].init_state;
ddf->virt->entries[inst].init_state &= ~DDF_initstate_mask;
if (is_resync_complete(a))
if (is_resync_complete(&a->info))
ddf->virt->entries[inst].init_state |= DDF_init_full;
else if (a->resync_start == 0)
else if (a->info.resync_start == 0)
ddf->virt->entries[inst].init_state |= DDF_init_not;
else
ddf->virt->entries[inst].init_state |= DDF_init_quick;
@ -3088,7 +3089,7 @@ static int ddf_set_array_state(struct active_array *a, int consistent)
ddf->updates_pending = 1;
dprintf("ddf mark %d %s %llu\n", inst, consistent?"clean":"dirty",
a->resync_start);
a->info.resync_start);
return consistent;
}
@ -3547,6 +3548,7 @@ static struct mdinfo *ddf_activate_spare(struct active_array *a,
di->disk.major = dl->major;
di->disk.minor = dl->minor;
di->disk.state = 0;
di->recovery_start = 0;
di->data_offset = pos;
di->component_size = a->info.component_size;
di->container_member = dl->pdnum;

View File

@ -635,6 +635,8 @@ static int is_failed(struct imsm_disk *disk)
}
#ifndef MDASSEMBLE
static __u64 blocks_per_migr_unit(struct imsm_dev *dev);
static void print_imsm_dev(struct imsm_dev *dev, char *uuid, int disk_idx)
{
__u64 sz;
@ -669,27 +671,32 @@ static void print_imsm_dev(struct imsm_dev *dev, char *uuid, int disk_idx)
printf(" Chunk Size : %u KiB\n",
__le16_to_cpu(map->blocks_per_strip) / 2);
printf(" Reserved : %d\n", __le32_to_cpu(dev->reserved_blocks));
printf(" Migrate State : %s", dev->vol.migr_state ? "migrating" : "idle\n");
printf(" Migrate State : ");
if (dev->vol.migr_state) {
if (migr_type(dev) == MIGR_INIT)
printf(": initializing\n");
printf("initialize\n");
else if (migr_type(dev) == MIGR_REBUILD)
printf(": rebuilding\n");
printf("rebuild\n");
else if (migr_type(dev) == MIGR_VERIFY)
printf(": check\n");
printf("check\n");
else if (migr_type(dev) == MIGR_GEN_MIGR)
printf(": general migration\n");
printf("general migration\n");
else if (migr_type(dev) == MIGR_STATE_CHANGE)
printf(": state change\n");
printf("state change\n");
else if (migr_type(dev) == MIGR_REPAIR)
printf(": repair\n");
printf("repair\n");
else
printf(": <unknown:%d>\n", migr_type(dev));
}
printf("<unknown:%d>\n", migr_type(dev));
} else
printf("idle\n");
printf(" Map State : %s", map_state_str[map->map_state]);
if (dev->vol.migr_state) {
struct imsm_map *map = get_imsm_map(dev, 1);
printf(" <-- %s", map_state_str[map->map_state]);
printf("\n Checkpoint : %u (%llu)",
__le32_to_cpu(dev->vol.curr_migr_unit),
blocks_per_migr_unit(dev));
}
printf("\n");
printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean");
@ -1215,6 +1222,179 @@ get_imsm_numerical_version(struct imsm_super *mpb, int *m, int *p)
}
#endif
static __u32 migr_strip_blocks_resync(struct imsm_dev *dev)
{
/* migr_strip_size when repairing or initializing parity */
struct imsm_map *map = get_imsm_map(dev, 0);
__u32 chunk = __le32_to_cpu(map->blocks_per_strip);
switch (get_imsm_raid_level(map)) {
case 5:
case 10:
return chunk;
default:
return 128*1024 >> 9;
}
}
static __u32 migr_strip_blocks_rebuild(struct imsm_dev *dev)
{
/* migr_strip_size when rebuilding a degraded disk, no idea why
* this is different than migr_strip_size_resync(), but it's good
* to be compatible
*/
struct imsm_map *map = get_imsm_map(dev, 1);
__u32 chunk = __le32_to_cpu(map->blocks_per_strip);
switch (get_imsm_raid_level(map)) {
case 1:
case 10:
if (map->num_members % map->num_domains == 0)
return 128*1024 >> 9;
else
return chunk;
case 5:
return max((__u32) 64*1024 >> 9, chunk);
default:
return 128*1024 >> 9;
}
}
static __u32 num_stripes_per_unit_resync(struct imsm_dev *dev)
{
struct imsm_map *lo = get_imsm_map(dev, 0);
struct imsm_map *hi = get_imsm_map(dev, 1);
__u32 lo_chunk = __le32_to_cpu(lo->blocks_per_strip);
__u32 hi_chunk = __le32_to_cpu(hi->blocks_per_strip);
return max((__u32) 1, hi_chunk / lo_chunk);
}
static __u32 num_stripes_per_unit_rebuild(struct imsm_dev *dev)
{
struct imsm_map *lo = get_imsm_map(dev, 0);
int level = get_imsm_raid_level(lo);
if (level == 1 || level == 10) {
struct imsm_map *hi = get_imsm_map(dev, 1);
return hi->num_domains;
} else
return num_stripes_per_unit_resync(dev);
}
static __u8 imsm_num_data_members(struct imsm_dev *dev)
{
/* named 'imsm_' because raid0, raid1 and raid10
* counter-intuitively have the same number of data disks
*/
struct imsm_map *map = get_imsm_map(dev, 0);
switch (get_imsm_raid_level(map)) {
case 0:
case 1:
case 10:
return map->num_members;
case 5:
return map->num_members - 1;
default:
dprintf("%s: unsupported raid level\n", __func__);
return 0;
}
}
static __u32 parity_segment_depth(struct imsm_dev *dev)
{
struct imsm_map *map = get_imsm_map(dev, 0);
__u32 chunk = __le32_to_cpu(map->blocks_per_strip);
switch(get_imsm_raid_level(map)) {
case 1:
case 10:
return chunk * map->num_domains;
case 5:
return chunk * map->num_members;
default:
return chunk;
}
}
static __u32 map_migr_block(struct imsm_dev *dev, __u32 block)
{
struct imsm_map *map = get_imsm_map(dev, 1);
__u32 chunk = __le32_to_cpu(map->blocks_per_strip);
__u32 strip = block / chunk;
switch (get_imsm_raid_level(map)) {
case 1:
case 10: {
__u32 vol_strip = (strip * map->num_domains) + 1;
__u32 vol_stripe = vol_strip / map->num_members;
return vol_stripe * chunk + block % chunk;
} case 5: {
__u32 stripe = strip / (map->num_members - 1);
return stripe * chunk + block % chunk;
}
default:
return 0;
}
}
static __u64 blocks_per_migr_unit(struct imsm_dev *dev)
{
/* calculate the conversion factor between per member 'blocks'
* (md/{resync,rebuild}_start) and imsm migration units, return
* 0 for the 'not migrating' and 'unsupported migration' cases
*/
if (!dev->vol.migr_state)
return 0;
switch (migr_type(dev)) {
case MIGR_VERIFY:
case MIGR_REPAIR:
case MIGR_INIT: {
struct imsm_map *map = get_imsm_map(dev, 0);
__u32 stripes_per_unit;
__u32 blocks_per_unit;
__u32 parity_depth;
__u32 migr_chunk;
__u32 block_map;
__u32 block_rel;
__u32 segment;
__u32 stripe;
__u8 disks;
/* yes, this is really the translation of migr_units to
* per-member blocks in the 'resync' case
*/
stripes_per_unit = num_stripes_per_unit_resync(dev);
migr_chunk = migr_strip_blocks_resync(dev);
disks = imsm_num_data_members(dev);
blocks_per_unit = stripes_per_unit * migr_chunk * disks;
stripe = __le32_to_cpu(map->blocks_per_strip) * disks;
segment = blocks_per_unit / stripe;
block_rel = blocks_per_unit - segment * stripe;
parity_depth = parity_segment_depth(dev);
block_map = map_migr_block(dev, block_rel);
return block_map + parity_depth * segment;
}
case MIGR_REBUILD: {
__u32 stripes_per_unit;
__u32 migr_chunk;
stripes_per_unit = num_stripes_per_unit_rebuild(dev);
migr_chunk = migr_strip_blocks_rebuild(dev);
return migr_chunk * stripes_per_unit;
}
case MIGR_GEN_MIGR:
case MIGR_STATE_CHANGE:
default:
return 0;
}
}
static int imsm_level_to_layout(int level)
{
switch (level) {
@ -1264,13 +1444,34 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info)
info->component_size = __le32_to_cpu(map->blocks_per_member);
memset(info->uuid, 0, sizeof(info->uuid));
if (map->map_state == IMSM_T_STATE_UNINITIALIZED || dev->vol.dirty)
if (map->map_state == IMSM_T_STATE_UNINITIALIZED || dev->vol.dirty) {
info->resync_start = 0;
else if (dev->vol.migr_state)
/* FIXME add curr_migr_unit to resync_start conversion */
info->resync_start = 0;
else
info->resync_start = ~0ULL;
} else if (dev->vol.migr_state) {
switch (migr_type(dev)) {
case MIGR_REPAIR:
case MIGR_INIT: {
__u64 blocks_per_unit = blocks_per_migr_unit(dev);
__u64 units = __le32_to_cpu(dev->vol.curr_migr_unit);
info->resync_start = blocks_per_unit * units;
break;
}
case MIGR_VERIFY:
/* we could emulate the checkpointing of
* 'sync_action=check' migrations, but for now
* we just immediately complete them
*/
case MIGR_REBUILD:
/* this is handled by container_content_imsm() */
case MIGR_GEN_MIGR:
case MIGR_STATE_CHANGE:
/* FIXME handle other migrations */
default:
/* we are not dirty, so... */
info->resync_start = MaxSector;
}
} else
info->resync_start = MaxSector;
strncpy(info->name, (char *) dev->volume, MAX_RAID_SERIAL_LEN);
info->name[MAX_RAID_SERIAL_LEN] = 0;
@ -1566,6 +1767,7 @@ static int compare_super_imsm(struct supertype *st, struct supertype *tst)
first->anchor->num_raid_devs = sec->anchor->num_raid_devs;
first->anchor->orig_family_num = sec->anchor->orig_family_num;
first->anchor->family_num = sec->anchor->family_num;
memcpy(first->anchor->sig, sec->anchor->sig, MAX_SIGNATURE_LENGTH);
for (i = 0; i < sec->anchor->num_raid_devs; i++)
imsm_copy_dev(get_imsm_dev(first, i), get_imsm_dev(sec, i));
}
@ -2428,7 +2630,7 @@ imsm_thunderdome(struct intel_super **super_list, int len)
struct intel_disk *idisk;
idisk = disk_list_get(dl->serial, disk_list);
if (is_spare(&idisk->disk) &&
if (idisk && is_spare(&idisk->disk) &&
!is_failed(&idisk->disk) && !is_configured(&idisk->disk))
dl->index = -1;
else {
@ -3333,7 +3535,7 @@ static unsigned long long merge_extents(struct intel_super *super, int sum_exten
unsigned long reserve;
if (!e)
return ~0ULL; /* error */
return 0;
/* coalesce and sort all extents. also, check to see if we need to
* reserve space between member arrays
@ -3376,17 +3578,23 @@ static unsigned long long merge_extents(struct intel_super *super, int sum_exten
} while (e[i-1].size);
free(e);
if (maxsize == 0)
return 0;
/* FIXME assumes volume at offset 0 is the first volume in a
* container
*/
if (start_extent > 0)
reserve = IMSM_RESERVED_SECTORS; /* gap between raid regions */
else
reserve = 0;
if (maxsize < reserve)
return ~0ULL;
return 0;
super->create_offset = ~((__u32) 0);
if (start + reserve > super->create_offset)
return ~0ULL; /* start overflows create_offset */
return 0; /* start overflows create_offset */
super->create_offset = start + reserve;
return maxsize - reserve;
@ -3415,6 +3623,34 @@ static int is_raid_level_supported(const struct imsm_orom *orom, int level, int
}
#define pr_vrb(fmt, arg...) (void) (verbose && fprintf(stderr, Name fmt, ##arg))
static int
validate_geometry_imsm_orom(struct intel_super *super, int level, int layout,
int raiddisks, int chunk, int verbose)
{
if (!is_raid_level_supported(super->orom, level, raiddisks)) {
pr_vrb(": platform does not support raid%d with %d disk%s\n",
level, raiddisks, raiddisks > 1 ? "s" : "");
return 0;
}
if (super->orom && level != 1 &&
!imsm_orom_has_chunk(super->orom, chunk)) {
pr_vrb(": platform does not support a chunk size of: %d\n", chunk);
return 0;
}
if (layout != imsm_level_to_layout(level)) {
if (level == 5)
pr_vrb(": imsm raid 5 only supports the left-asymmetric layout\n");
else if (level == 10)
pr_vrb(": imsm raid 10 only supports the n2 layout\n");
else
pr_vrb(": imsm unknown layout %#x for this raid level %d\n",
layout, level);
return 0;
}
return 1;
}
/* validate_geometry_imsm_volume - lifted from validate_geometry_ddf_bvd
* FIX ME add ahci details
*/
@ -3437,26 +3673,8 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level,
if (!super)
return 0;
if (!is_raid_level_supported(super->orom, level, raiddisks)) {
pr_vrb(": platform does not support raid%d with %d disk%s\n",
level, raiddisks, raiddisks > 1 ? "s" : "");
if (!validate_geometry_imsm_orom(super, level, layout, raiddisks, chunk, verbose))
return 0;
}
if (super->orom && level != 1 &&
!imsm_orom_has_chunk(super->orom, chunk)) {
pr_vrb(": platform does not support a chunk size of: %d\n", chunk);
return 0;
}
if (layout != imsm_level_to_layout(level)) {
if (level == 5)
pr_vrb(": imsm raid 5 only supports the left-asymmetric layout\n");
else if (level == 10)
pr_vrb(": imsm raid 10 only supports the n2 layout\n");
else
pr_vrb(": imsm unknown layout %#x for this raid level %d\n",
layout, level);
return 0;
}
if (!dev) {
/* General test: make sure there is space for
@ -3464,7 +3682,7 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level,
* offset
*/
unsigned long long minsize = size;
unsigned long long start_offset = ~0ULL;
unsigned long long start_offset = MaxSector;
int dcnt = 0;
if (minsize == 0)
minsize = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
@ -3480,7 +3698,7 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level,
esize = e[i].start - pos;
if (esize >= minsize)
found = 1;
if (found && start_offset == ~0ULL) {
if (found && start_offset == MaxSector) {
start_offset = pos;
break;
} else if (found && pos != start_offset) {
@ -3569,15 +3787,11 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level,
i += dl->extent_cnt;
maxsize = merge_extents(super, i);
if (maxsize < size) {
if (maxsize < size || maxsize == 0) {
if (verbose)
fprintf(stderr, Name ": not enough space after merge (%llu < %llu)\n",
maxsize, size);
return 0;
} else if (maxsize == ~0ULL) {
if (verbose)
fprintf(stderr, Name ": failed to merge %d extents\n", i);
return 0;
}
*freesize = maxsize;
@ -3634,7 +3848,8 @@ static int reserve_space(struct supertype *st, int raiddisks,
if (cnt < raiddisks ||
(super->orom && used && used != raiddisks) ||
maxsize < minsize) {
maxsize < minsize ||
maxsize == 0) {
fprintf(stderr, Name ": not enough devices with space to create array.\n");
return 0; /* No enough free spaces large enough */
}
@ -3686,6 +3901,10 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout,
* created. add_to_super and getinfo_super
* detect when autolayout is in progress.
*/
if (!validate_geometry_imsm_orom(st->sb, level, layout,
raiddisks, chunk,
verbose))
return 0;
return reserve_space(st, raiddisks, size, chunk, freesize);
}
return 1;
@ -3763,6 +3982,46 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout,
}
#endif /* MDASSEMBLE */
static int is_rebuilding(struct imsm_dev *dev)
{
struct imsm_map *migr_map;
if (!dev->vol.migr_state)
return 0;
if (migr_type(dev) != MIGR_REBUILD)
return 0;
migr_map = get_imsm_map(dev, 1);
if (migr_map->map_state == IMSM_T_STATE_DEGRADED)
return 1;
else
return 0;
}
static void update_recovery_start(struct imsm_dev *dev, struct mdinfo *array)
{
struct mdinfo *rebuild = NULL;
struct mdinfo *d;
__u32 units;
if (!is_rebuilding(dev))
return;
/* Find the rebuild target, but punt on the dual rebuild case */
for (d = array->devs; d; d = d->next)
if (d->recovery_start == 0) {
if (rebuild)
return;
rebuild = d;
}
units = __le32_to_cpu(dev->vol.curr_migr_unit);
rebuild->recovery_start = units * blocks_per_migr_unit(dev);
}
static struct mdinfo *container_content_imsm(struct supertype *st)
{
/* Given a container loaded by load_super_imsm_all,
@ -3810,6 +4069,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st)
super->current_vol = i;
getinfo_super_imsm_volume(st, this);
for (slot = 0 ; slot < map->num_members; slot++) {
unsigned long long recovery_start;
struct mdinfo *info_d;
struct dl *d;
int idx;
@ -3823,33 +4083,41 @@ static struct mdinfo *container_content_imsm(struct supertype *st)
if (d->index == idx)
break;
recovery_start = MaxSector;
if (d == NULL)
skip = 1;
if (d && is_failed(&d->disk))
skip = 1;
if (ord & IMSM_ORD_REBUILD)
skip = 1;
recovery_start = 0;
/*
* if we skip some disks the array will be assmebled degraded;
* reset resync start to avoid a dirty-degraded situation
* reset resync start to avoid a dirty-degraded
* situation when performing the intial sync
*
* FIXME handle dirty degraded
*/
if (skip && !dev->vol.dirty)
this->resync_start = ~0ULL;
if ((skip || recovery_start == 0) && !dev->vol.dirty)
this->resync_start = MaxSector;
if (skip)
continue;
info_d = malloc(sizeof(*info_d));
info_d = calloc(1, sizeof(*info_d));
if (!info_d) {
fprintf(stderr, Name ": failed to allocate disk"
" for volume %.16s\n", dev->volume);
info_d = this->devs;
while (info_d) {
struct mdinfo *d = info_d->next;
free(info_d);
info_d = d;
}
free(this);
this = rest;
break;
}
memset(info_d, 0, sizeof(*info_d));
info_d->next = this->devs;
this->devs = info_d;
@ -3857,8 +4125,10 @@ static struct mdinfo *container_content_imsm(struct supertype *st)
info_d->disk.major = d->major;
info_d->disk.minor = d->minor;
info_d->disk.raid_disk = slot;
info_d->recovery_start = recovery_start;
this->array.working_disks++;
if (info_d->recovery_start == MaxSector)
this->array.working_disks++;
info_d->events = __le32_to_cpu(mpb->generation_num);
info_d->data_offset = __le32_to_cpu(map->pba_of_lba0);
@ -3866,6 +4136,8 @@ static struct mdinfo *container_content_imsm(struct supertype *st)
if (d->devname)
strcpy(info_d->name, d->devname);
}
/* now that the disk list is up-to-date fixup recovery_start */
update_recovery_start(dev, this);
rest = this;
}
@ -4008,24 +4280,6 @@ static int is_resyncing(struct imsm_dev *dev)
return 0;
}
static int is_rebuilding(struct imsm_dev *dev)
{
struct imsm_map *migr_map;
if (!dev->vol.migr_state)
return 0;
if (migr_type(dev) != MIGR_REBUILD)
return 0;
migr_map = get_imsm_map(dev, 1);
if (migr_map->map_state == IMSM_T_STATE_DEGRADED)
return 1;
else
return 0;
}
/* return true if we recorded new information */
static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
{
@ -4076,6 +4330,7 @@ static int imsm_set_array_state(struct active_array *a, int consistent)
struct imsm_map *map = get_imsm_map(dev, 0);
int failed = imsm_count_failed(super, dev);
__u8 map_state = imsm_check_degraded(super, dev, failed);
__u32 blocks_per_unit;
/* before we activate this array handle any missing disks */
if (consistent == 2 && super->missing) {
@ -4087,14 +4342,14 @@ static int imsm_set_array_state(struct active_array *a, int consistent)
mark_missing(dev, &dl->disk, dl->index);
super->updates_pending++;
}
if (consistent == 2 &&
(!is_resync_complete(a) ||
(!is_resync_complete(&a->info) ||
map_state != IMSM_T_STATE_NORMAL ||
dev->vol.migr_state))
consistent = 0;
if (is_resync_complete(a)) {
if (is_resync_complete(&a->info)) {
/* complete intialization / resync,
* recovery and interrupted recovery is completed in
* ->set_disk
@ -4106,7 +4361,7 @@ static int imsm_set_array_state(struct active_array *a, int consistent)
}
} else if (!is_resyncing(dev) && !failed) {
/* mark the start of the init process if nothing is failed */
dprintf("imsm: mark resync start (%llu)\n", a->resync_start);
dprintf("imsm: mark resync start\n");
if (map->map_state == IMSM_T_STATE_UNINITIALIZED)
migrate(dev, IMSM_T_STATE_NORMAL, MIGR_INIT);
else
@ -4114,12 +4369,32 @@ static int imsm_set_array_state(struct active_array *a, int consistent)
super->updates_pending++;
}
/* FIXME check if we can update curr_migr_unit from resync_start */
/* check if we can update curr_migr_unit from resync_start, recovery_start */
blocks_per_unit = blocks_per_migr_unit(dev);
if (blocks_per_unit && failed <= 1) {
__u32 units32;
__u64 units;
if (migr_type(dev) == MIGR_REBUILD)
units = min_recovery_start(&a->info) / blocks_per_unit;
else
units = a->info.resync_start / blocks_per_unit;
units32 = units;
/* check that we did not overflow 32-bits, and that
* curr_migr_unit needs updating
*/
if (units32 == units &&
__le32_to_cpu(dev->vol.curr_migr_unit) != units32) {
dprintf("imsm: mark checkpoint (%u)\n", units32);
dev->vol.curr_migr_unit = __cpu_to_le32(units32);
super->updates_pending++;
}
}
/* mark dirty / clean */
if (dev->vol.dirty != !consistent) {
dprintf("imsm: mark '%s' (%llu)\n",
consistent ? "clean" : "dirty", a->resync_start);
dprintf("imsm: mark '%s'\n", consistent ? "clean" : "dirty");
if (consistent)
dev->vol.dirty = 0;
else
@ -4436,6 +4711,7 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a,
di->disk.major = dl->major;
di->disk.minor = dl->minor;
di->disk.state = 0;
di->recovery_start = 0;
di->data_offset = __le32_to_cpu(map->pba_of_lba0);
di->component_size = a->info.component_size;
di->container_member = inst;

View File

@ -659,9 +659,9 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
switch(__le32_to_cpu(sb->level)) {
case 5: case 4: case 6:
/* need to force clean */
if (sb->resync_offset != ~0ULL)
if (sb->resync_offset != MaxSector)
rv = 1;
sb->resync_offset = ~0ULL;
sb->resync_offset = MaxSector;
}
}
if (strcmp(update, "assemble")==0) {
@ -855,7 +855,7 @@ static int init_super1(struct supertype *st, mdu_array_info_t *info,
sb->utime = sb->ctime;
sb->events = __cpu_to_le64(1);
if (info->state & (1<<MD_SB_CLEAN))
sb->resync_offset = ~0ULL;
sb->resync_offset = MaxSector;
else
sb->resync_offset = 0;
sb->max_dev = __cpu_to_le32((1024- sizeof(struct mdp_superblock_1))/

15
sysfs.c
View File

@ -572,7 +572,7 @@ int sysfs_set_array(struct mdinfo *info, int vers)
return rv;
}
int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int in_sync)
int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int resume)
{
char dv[100];
char nm[100];
@ -595,15 +595,24 @@ int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int in_sync)
strcpy(sd->sys_name, "dev-");
strcpy(sd->sys_name+4, dname);
/* test write to see if 'recovery_start' is available */
if (resume && sd->recovery_start < MaxSector &&
sysfs_set_num(sra, sd, "recovery_start", 0)) {
sysfs_set_str(sra, sd, "state", "remove");
return -1;
}
rv = sysfs_set_num(sra, sd, "offset", sd->data_offset);
rv |= sysfs_set_num(sra, sd, "size", (sd->component_size+1) / 2);
if (sra->array.level != LEVEL_CONTAINER) {
if (in_sync)
if (sd->recovery_start == MaxSector)
/* This can correctly fail if array isn't started,
* yet, so just ignore status for now.
*/
sysfs_set_str(sra, sd, "state", "in_sync");
sysfs_set_str(sra, sd, "state", "insync");
rv |= sysfs_set_num(sra, sd, "slot", sd->disk.raid_disk);
if (resume)
sysfs_set_num(sra, sd, "recovery_start", sd->recovery_start);
}
return rv;
}

24
util.c
View File

@ -1338,8 +1338,11 @@ int add_disk(int mdfd, struct supertype *st,
int rv;
#ifndef MDASSEMBLE
if (st->ss->external) {
rv = sysfs_add_disk(sra, info,
info->disk.state & (1<<MD_DISK_SYNC));
if (info->disk.state & (1<<MD_DISK_SYNC))
info->recovery_start = MaxSector;
else
info->recovery_start = 0;
rv = sysfs_add_disk(sra, info, 0);
if (! rv) {
struct mdinfo *sd2;
for (sd2 = sra->devs; sd2; sd2=sd2->next)
@ -1383,10 +1386,25 @@ int set_array_info(int mdfd, struct supertype *st, struct mdinfo *info)
return rv;
}
unsigned long long min_recovery_start(struct mdinfo *array)
{
/* find the minimum recovery_start in an array for metadata
* formats that only record per-array recovery progress instead
* of per-device
*/
unsigned long long recovery_start = MaxSector;
struct mdinfo *d;
for (d = array->devs; d; d = d->next)
recovery_start = min(recovery_start, d->recovery_start);
return recovery_start;
}
char *devnum2devname(int num)
{
char name[100];
if (num > 0)
if (num >= 0)
sprintf(name, "md%d", num);
else
sprintf(name, "md_d%d", -1-num);