Create: add support for RAID0 layouts.
Since Linux 5.4 a layout is needed for RAID0 arrays with varying device sizes. This patch makes the layout of an array visible (via --examine) and sets the layout on newly created arrays. --layout=dangerous can be used to avoid setting a layout so that they array can be used on older kernels. Tested-by: dann frazier <dann.frazier@canonical.com> Signed-off-by: NeilBrown <neilb@suse.de> Signed-off-by: Jes Sorensen <jsorensen@fb.com>
This commit is contained in:
parent
6da53c0e2a
commit
329dfc28de
11
Create.c
11
Create.c
|
@ -51,6 +51,9 @@ static int default_layout(struct supertype *st, int level, int verbose)
|
|||
default: /* no layout */
|
||||
layout = 0;
|
||||
break;
|
||||
case 0:
|
||||
layout = RAID0_ORIG_LAYOUT;
|
||||
break;
|
||||
case 10:
|
||||
layout = 0x102; /* near=2, far=1 */
|
||||
if (verbose > 0)
|
||||
|
@ -950,6 +953,11 @@ int Create(struct supertype *st, char *mddev,
|
|||
if (rv) {
|
||||
pr_err("ADD_NEW_DISK for %s failed: %s\n",
|
||||
dv->devname, strerror(errno));
|
||||
if (errno == EINVAL &&
|
||||
info.array.level == 0) {
|
||||
pr_err("Possibly your kernel doesn't support RAID0 layouts.\n");
|
||||
pr_err("Either upgrade, or use --layout=dangerous\n");
|
||||
}
|
||||
goto abort_locked;
|
||||
}
|
||||
break;
|
||||
|
@ -1046,6 +1054,9 @@ int Create(struct supertype *st, char *mddev,
|
|||
if (ioctl(mdfd, RUN_ARRAY, ¶m)) {
|
||||
pr_err("RUN_ARRAY failed: %s\n",
|
||||
strerror(errno));
|
||||
if (errno == 524 /* ENOTSUP */ &&
|
||||
info.array.level == 0)
|
||||
cont_err("Please use --layout=original or --layout=alternate\n");
|
||||
if (info.array.chunk_size & (info.array.chunk_size-1)) {
|
||||
cont_err("Problem may be that chunk size is not a power of 2\n");
|
||||
}
|
||||
|
|
5
Detail.c
5
Detail.c
|
@ -525,6 +525,11 @@ int Detail(char *dev, struct context *c)
|
|||
printf(" Layout : %s\n",
|
||||
str ? str : "-unknown-");
|
||||
}
|
||||
if (array.level == 0 && array.layout) {
|
||||
str = map_num(r0layout, array.layout);
|
||||
printf(" Layout : %s\n",
|
||||
str ? str : "-unknown-");
|
||||
}
|
||||
if (array.level == 6) {
|
||||
str = map_num(r6layout, array.layout);
|
||||
printf(" Layout : %s\n",
|
||||
|
|
12
maps.c
12
maps.c
|
@ -73,6 +73,18 @@ mapping_t r6layout[] = {
|
|||
{ NULL, UnSet }
|
||||
};
|
||||
|
||||
/* raid0 layout is only needed because of a bug in 3.14 which changed
|
||||
* the effective layout of raid0 arrays with varying device sizes.
|
||||
*/
|
||||
mapping_t r0layout[] = {
|
||||
{ "original", RAID0_ORIG_LAYOUT},
|
||||
{ "alternate", RAID0_ALT_MULTIZONE_LAYOUT},
|
||||
{ "1", 1}, /* aka ORIG */
|
||||
{ "2", 2}, /* aka ALT */
|
||||
{ "dangerous", 0},
|
||||
{ NULL, UnSet},
|
||||
};
|
||||
|
||||
mapping_t pers[] = {
|
||||
{ "linear", LEVEL_LINEAR},
|
||||
{ "raid0", 0},
|
||||
|
|
14
md.4
14
md.4
|
@ -193,6 +193,20 @@ smallest device has been exhausted, the RAID0 driver starts
|
|||
collecting chunks into smaller stripes that only span the drives which
|
||||
still have remaining space.
|
||||
|
||||
A bug was introduced in linux 3.14 which changed the layout of blocks in
|
||||
a RAID0 beyond the region that is striped over all devices. This bug
|
||||
does not affect an array with all devices the same size, but can affect
|
||||
other RAID0 arrays.
|
||||
|
||||
Linux 5.4 (and some stable kernels to which the change was backported)
|
||||
will not normally assemble such an array as it cannot know which layout
|
||||
to use. There is a module parameter "raid0.default_layout" which can be
|
||||
set to "1" to force the kernel to use the pre-3.14 layout or to "2" to
|
||||
force it to use the 3.14-and-later layout. when creating a new RAID0
|
||||
array,
|
||||
.I mdadm
|
||||
will record the chosen layout in the metadata in a way that allows newer
|
||||
kernels to assemble the array without needing a module parameter.
|
||||
|
||||
.SS RAID1
|
||||
|
||||
|
|
30
mdadm.8.in
30
mdadm.8.in
|
@ -593,6 +593,8 @@ to change the RAID level in some cases. See LEVEL CHANGES below.
|
|||
This option configures the fine details of data layout for RAID5, RAID6,
|
||||
and RAID10 arrays, and controls the failure modes for
|
||||
.IR faulty .
|
||||
It can also be used for working around a kernel bug with RAID0, but generally
|
||||
doesn't need to be used explicitly.
|
||||
|
||||
The layout of the RAID5 parity block can be one of
|
||||
.BR left\-asymmetric ,
|
||||
|
@ -652,7 +654,7 @@ option to set subsequent failure modes.
|
|||
"clear" or "none" will remove any pending or periodic failure modes,
|
||||
and "flush" will clear any persistent faults.
|
||||
|
||||
Finally, the layout options for RAID10 are one of 'n', 'o' or 'f' followed
|
||||
The layout options for RAID10 are one of 'n', 'o' or 'f' followed
|
||||
by a small number. The default is 'n2'. The supported options are:
|
||||
|
||||
.I 'n'
|
||||
|
@ -677,6 +679,32 @@ devices in the array. It does not need to divide evenly into that
|
|||
number (e.g. it is perfectly legal to have an 'n2' layout for an array
|
||||
with an odd number of devices).
|
||||
|
||||
A bug introduced in Linux 3.14 means that RAID0 arrays
|
||||
.B "with devices of differing sizes"
|
||||
started using a different layout. This could lead to
|
||||
data corruption. Since Linux 5.4 (and various stable releases that received
|
||||
backports), the kernel will not accept such an array unless
|
||||
a layout is explictly set. It can be set to
|
||||
.RB ' original '
|
||||
or
|
||||
.RB ' alternate '.
|
||||
When creating a new array,
|
||||
.I mdadm
|
||||
will select
|
||||
.RB ' original '
|
||||
by default, so the layout does not normally need to be set.
|
||||
An array created for either
|
||||
.RB ' original '
|
||||
or
|
||||
.RB ' alternate '
|
||||
will not be recognized by an (unpatched) kernel prior to 5.4. To create
|
||||
a RAID0 array with devices of differing sizes that can be used on an
|
||||
older kernel, you can set the layout to
|
||||
.RB ' dangerous '.
|
||||
This will use whichever layout the running kernel supports, so the data
|
||||
on the array may become corrupt when changing kernel from pre-3.14 to a
|
||||
later kernel.
|
||||
|
||||
When an array is converted between RAID5 and RAID6 an intermediate
|
||||
RAID6 layout is used in which the second parity block (Q) is always on
|
||||
the last device. To convert a RAID5 to RAID6 and leave it in this new
|
||||
|
|
8
mdadm.c
8
mdadm.c
|
@ -550,6 +550,14 @@ int main(int argc, char *argv[])
|
|||
pr_err("raid level must be given before layout.\n");
|
||||
exit(2);
|
||||
|
||||
case 0:
|
||||
s.layout = map_name(r0layout, optarg);
|
||||
if (s.layout == UnSet) {
|
||||
pr_err("layout %s not understood for raid0.\n",
|
||||
optarg);
|
||||
exit(2);
|
||||
}
|
||||
break;
|
||||
case 5:
|
||||
s.layout = map_name(r5layout, optarg);
|
||||
if (s.layout == UnSet) {
|
||||
|
|
8
mdadm.h
8
mdadm.h
|
@ -763,7 +763,8 @@ extern int restore_stripes(int *dest, unsigned long long *offsets,
|
|||
|
||||
extern char *map_num(mapping_t *map, int num);
|
||||
extern int map_name(mapping_t *map, char *name);
|
||||
extern mapping_t r5layout[], r6layout[], pers[], modes[], faultylayout[];
|
||||
extern mapping_t r0layout[], r5layout[], r6layout[],
|
||||
pers[], modes[], faultylayout[];
|
||||
extern mapping_t consistency_policies[], sysfs_array_states[];
|
||||
|
||||
extern char *map_dev_preferred(int major, int minor, int create,
|
||||
|
@ -1758,6 +1759,11 @@ char *xstrdup(const char *str);
|
|||
#define makedev(M,m) (((M)<<8) | (m))
|
||||
#endif
|
||||
|
||||
enum r0layout {
|
||||
RAID0_ORIG_LAYOUT = 1,
|
||||
RAID0_ALT_MULTIZONE_LAYOUT = 2,
|
||||
};
|
||||
|
||||
/* for raid4/5/6 */
|
||||
#define ALGORITHM_LEFT_ASYMMETRIC 0
|
||||
#define ALGORITHM_RIGHT_ASYMMETRIC 1
|
||||
|
|
6
super0.c
6
super0.c
|
@ -1291,6 +1291,12 @@ static int validate_geometry0(struct supertype *st, int level,
|
|||
if (*chunk == UnSet)
|
||||
*chunk = DEFAULT_CHUNK;
|
||||
|
||||
if (level == 0 && layout != UnSet) {
|
||||
if (verbose)
|
||||
pr_err("0.90 metadata does not support layouts for RAID0\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!subdev)
|
||||
return 1;
|
||||
|
||||
|
|
30
super1.c
30
super1.c
|
@ -43,7 +43,7 @@ struct mdp_superblock_1 {
|
|||
|
||||
__u64 ctime; /* lo 40 bits are seconds, top 24 are microseconds or 0*/
|
||||
__u32 level; /* -4 (multipath), -1 (linear), 0,1,4,5 */
|
||||
__u32 layout; /* only for raid5 currently */
|
||||
__u32 layout; /* used for raid5, raid6, raid10, and raid0 */
|
||||
__u64 size; /* used size of component devices, in 512byte sectors */
|
||||
|
||||
__u32 chunksize; /* in 512byte sectors */
|
||||
|
@ -144,6 +144,7 @@ struct misc_dev_info {
|
|||
#define MD_FEATURE_JOURNAL 512 /* support write journal */
|
||||
#define MD_FEATURE_PPL 1024 /* support PPL */
|
||||
#define MD_FEATURE_MUTLIPLE_PPLS 2048 /* support for multiple PPLs */
|
||||
#define MD_FEATURE_RAID0_LAYOUT 4096 /* layout is meaningful in RAID0 */
|
||||
#define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \
|
||||
|MD_FEATURE_RECOVERY_OFFSET \
|
||||
|MD_FEATURE_RESHAPE_ACTIVE \
|
||||
|
@ -155,6 +156,7 @@ struct misc_dev_info {
|
|||
|MD_FEATURE_JOURNAL \
|
||||
|MD_FEATURE_PPL \
|
||||
|MD_FEATURE_MULTIPLE_PPLS \
|
||||
|MD_FEATURE_RAID0_LAYOUT \
|
||||
)
|
||||
|
||||
static int role_from_sb(struct mdp_superblock_1 *sb)
|
||||
|
@ -498,6 +500,11 @@ static void examine_super1(struct supertype *st, char *homehost)
|
|||
printf(" Events : %llu\n",
|
||||
(unsigned long long)__le64_to_cpu(sb->events));
|
||||
printf("\n");
|
||||
if (__le32_to_cpu(sb->level) == 0 &&
|
||||
(sb->feature_map & __cpu_to_le32(MD_FEATURE_RAID0_LAYOUT))) {
|
||||
c = map_num(r0layout, __le32_to_cpu(sb->layout));
|
||||
printf(" Layout : %s\n", c?c:"-unknown-");
|
||||
}
|
||||
if (__le32_to_cpu(sb->level) == 5) {
|
||||
c = map_num(r5layout, __le32_to_cpu(sb->layout));
|
||||
printf(" Layout : %s\n", c?c:"-unknown-");
|
||||
|
@ -1646,6 +1653,7 @@ struct devinfo {
|
|||
int fd;
|
||||
char *devname;
|
||||
long long data_offset;
|
||||
unsigned long long dev_size;
|
||||
mdu_disk_info_t disk;
|
||||
struct devinfo *next;
|
||||
};
|
||||
|
@ -1687,6 +1695,7 @@ static int add_to_super1(struct supertype *st, mdu_disk_info_t *dk,
|
|||
di->devname = devname;
|
||||
di->disk = *dk;
|
||||
di->data_offset = data_offset;
|
||||
get_dev_size(fd, NULL, &di->dev_size);
|
||||
di->next = NULL;
|
||||
*dip = di;
|
||||
|
||||
|
@ -1888,10 +1897,25 @@ static int write_init_super1(struct supertype *st)
|
|||
unsigned long long sb_offset;
|
||||
unsigned long long data_offset;
|
||||
long bm_offset;
|
||||
int raid0_need_layout = 0;
|
||||
|
||||
for (di = st->info; di; di = di->next) {
|
||||
if (di->disk.state & (1 << MD_DISK_JOURNAL))
|
||||
sb->feature_map |= __cpu_to_le32(MD_FEATURE_JOURNAL);
|
||||
if (sb->level == 0 && sb->layout != 0) {
|
||||
struct devinfo *di2 = st->info;
|
||||
unsigned long long s1, s2;
|
||||
s1 = di->dev_size;
|
||||
if (di->data_offset != INVALID_SECTORS)
|
||||
s1 -= di->data_offset;
|
||||
s1 /= __le32_to_cpu(sb->chunksize);
|
||||
s2 = di2->dev_size;
|
||||
if (di2->data_offset != INVALID_SECTORS)
|
||||
s2 -= di2->data_offset;
|
||||
s2 /= __le32_to_cpu(sb->chunksize);
|
||||
if (s1 != s2)
|
||||
raid0_need_layout = 1;
|
||||
}
|
||||
}
|
||||
|
||||
for (di = st->info; di; di = di->next) {
|
||||
|
@ -2039,6 +2063,10 @@ static int write_init_super1(struct supertype *st)
|
|||
sb->bblog_offset = 0;
|
||||
}
|
||||
|
||||
/* RAID0 needs a layout if devices aren't all the same size */
|
||||
if (raid0_need_layout)
|
||||
sb->feature_map |= __cpu_to_le32(MD_FEATURE_RAID0_LAYOUT);
|
||||
|
||||
sb->sb_csum = calc_sb_1_csum(sb);
|
||||
rv = store_super1(st, di->fd);
|
||||
|
||||
|
|
Loading…
Reference in New Issue