Bad block log

This commit is contained in:
NeilBrown 2012-10-03 17:07:11 +10:00
parent 676ab3120b
commit bf95d0f38c
3 changed files with 73 additions and 5 deletions

28
md.4
View File

@ -551,6 +551,34 @@ intent log if one is present.
In 2.6.13, intent bitmaps are only supported with RAID1. Other levels
with redundancy are supported from 2.6.15.
.SS BAD BLOCK LOG
From Linux 3.5 each device in an
.I md
array can store a list of known-bad-blocks. This list is 4K in size
and usually positioned at the end of the space between the superblock
and the data.
When a block cannot be read and cannot be repaired by writing data
recovered from other devices, the address of the block is stored in
the bad block log. Similarly if an attempt to write a block fails,
the address will be recorded as a bad block. If attempting to record
the bad block fails, the whole device will be marked faulty.
Attempting to read from a known bad block will cause a read error.
Attempting to write to a known bad block will be ignored if any write
errors have been reported by the device. If there have been no write
errors then the data will be written to the known bad block and if
that succeeds, the address will be removed from the list.
This allows an array to fail more gracefully - a few blocks on
different devices can be faulty without taking the whole array out of
action.
The log is particularly useful when recovering to a spare. If a few blocks
cannot be read from the other devices, the bulk of the recovery can
complete and those few bad blocks will be recorded in the bad block log.
.SS WRITE-BEHIND
From Linux 2.6.14,

View File

@ -1871,6 +1871,11 @@ setting.
.\".B \-\-size
.\"is given, the apparent size of the smallest drive given is used.
If the metadata type supports it (currently only 1.x metadata), space
will be allocated to store a bad block list. This allows a modest
number of bad blocks to be recorded, allowing the drive to remain in
service while only partially functional.
When creating an array within a
.B CONTAINER
.I mdadm

View File

@ -70,7 +70,12 @@ struct mdp_superblock_1 {
__u8 device_uuid[16]; /* user-space setable, ignored by kernel */
__u8 devflags; /* per-device flags. Only one defined...*/
#define WriteMostly1 1 /* mask for writemostly flag in above */
__u8 pad2[64-57]; /* set to 0 when writing */
/* bad block log. If there are any bad blocks the feature flag is set.
* if offset and size are non-zero, that space is reserved and available.
*/
__u8 bblog_shift; /* shift from sectors to block size for badblocklist */
__u16 bblog_size; /* number of sectors reserved for badblocklist */
__u32 bblog_offset; /* sector offset from superblock to bblog, signed */
/* array state information - 64 bytes */
__u64 utime; /* 40 bits second, 24 btes microseconds */
@ -106,8 +111,9 @@ struct misc_dev_info {
* must be honoured
*/
#define MD_FEATURE_RESHAPE_ACTIVE 4
#define MD_FEATURE_BAD_BLOCKS 8 /* badblock list is not empty */
#define MD_FEATURE_ALL (1|2|4)
#define MD_FEATURE_ALL (1|2|4|8)
#ifndef offsetof
#define offsetof(t,f) ((size_t)&(((t*)0)->f))
@ -319,7 +325,7 @@ static void examine_super1(struct supertype *st, char *homehost)
printf("Internal Bitmap : %ld sectors from superblock\n",
(long)(int32_t)__le32_to_cpu(sb->bitmap_offset));
}
if (sb->feature_map & __le32_to_cpu(MD_FEATURE_RESHAPE_ACTIVE)) {
if (sb->feature_map & __cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE)) {
printf(" Reshape pos'n : %llu%s\n", (unsigned long long)__le64_to_cpu(sb->reshape_position)/2,
human_size(__le64_to_cpu(sb->reshape_position)<<9));
if (__le32_to_cpu(sb->delta_disks)) {
@ -363,6 +369,17 @@ static void examine_super1(struct supertype *st, char *homehost)
atime = __le64_to_cpu(sb->utime) & 0xFFFFFFFFFFULL;
printf(" Update Time : %.24s\n", ctime(&atime));
if (sb->bblog_size && sb->bblog_offset) {
printf(" Bad Block Log : %d entries available at offset %ld sectors",
__le16_to_cpu(sb->bblog_size)*512/8,
(long)__le32_to_cpu(sb->bblog_offset));
if (sb->feature_map &
__cpu_to_le32(MD_FEATURE_BAD_BLOCKS))
printf(" - bad blocks present.");
printf("\n");
}
if (calc_sb_1_csum(sb) == sb->sb_csum)
printf(" Checksum : %x - correct\n", __le32_to_cpu(sb->sb_csum));
else
@ -1180,10 +1197,12 @@ static int write_init_super1(struct supertype *st)
* 2: 4K from start of device.
* Depending on the array size, we might leave extra space
* for a bitmap.
* Also leave 4K for bad-block log.
*/
array_size = __le64_to_cpu(sb->size);
/* work out how much space we left for a bitmap */
bm_space = choose_bm_space(array_size);
/* work out how much space we left for a bitmap,
* Add 8 sectors for bad block log */
bm_space = choose_bm_space(array_size) + 8;
/* We try to leave 0.1% at the start for reshape
* operations, but limit this to 128Meg (0.1% of 10Gig)
@ -1203,6 +1222,10 @@ static int write_init_super1(struct supertype *st)
if (sb_offset < array_size + bm_space)
bm_space = sb_offset - array_size;
sb->data_size = __cpu_to_le64(sb_offset - bm_space);
if (bm_space >= 8) {
sb->bblog_size = __cpu_to_le16(8);
sb->bblog_offset = __cpu_to_le32((unsigned)-8);
}
break;
case 1:
sb->super_offset = __cpu_to_le64(0);
@ -1221,6 +1244,10 @@ static int write_init_super1(struct supertype *st)
sb->data_offset = __cpu_to_le64(reserved);
sb->data_size = __cpu_to_le64(dsize - reserved);
if (reserved >= 16) {
sb->bblog_size = __cpu_to_le16(8);
sb->bblog_offset = __cpu_to_le32(reserved-8);
}
break;
case 2:
sb_offset = 4*2;
@ -1245,6 +1272,14 @@ static int write_init_super1(struct supertype *st)
sb->data_offset = __cpu_to_le64(reserved);
sb->data_size = __cpu_to_le64(dsize - reserved);
if (reserved >= 16+16) {
sb->bblog_size = __cpu_to_le16(8);
/* '8' sectors for the bblog, and another '8'
* because we want offset from superblock, not
* start of device.
*/
sb->bblog_offset = __cpu_to_le32(reserved-8-8);
}
break;
default:
pr_err("Failed to write invalid "