From 9d0e78406a8ae36d5da0fbbbb98069db4ede2476 Mon Sep 17 00:00:00 2001 From: Piergiorgio Sartor Date: Tue, 8 Feb 2011 11:44:23 +1100 Subject: [PATCH] User space RAID-6 access > test_stripe assumes that the data starts at the start of each device. > AS you are using 1.2 metadata (the default), data starts about 1M in to > the device (I think - you can check with --examine) > > You could fix test_stripe to put the right value in the 'offsets' array, > or you could create the array with 1.0 or 0.90 metadata. Hi Neil, thanks for the info, maybe this should be a second patch. In the meantime, please find attached a patch to restripe.c of mdadm 3.2 (latest, I hope). This should add the functionality to detect, in RAID-6, which of the disks potentially has problems, in case of parity errors. Some checks take place in order to avoid false positives, I hope these are correct and enough. I'm not 100% happy of the interface (too much redundancy), but for the time being it could be OK. Of course, any improvement is welcome. Please consider to include these changes to the next mdadm whatever release. bye, Signed-off-by: NeilBrown --- restripe.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 99 insertions(+), 6 deletions(-) diff --git a/restripe.c b/restripe.c index d33dbba..241e3a4 100644 --- a/restripe.c +++ b/restripe.c @@ -285,10 +285,13 @@ uint8_t raid6_gfmul[256][256]; uint8_t raid6_gfexp[256]; uint8_t raid6_gfinv[256]; uint8_t raid6_gfexi[256]; +uint8_t raid6_gflog[256]; +uint8_t raid6_gfilog[256]; void make_tables(void) { int i, j; uint8_t v; + uint32_t b, log; /* Compute multiplication table */ for (i = 0; i < 256; i++) @@ -312,6 +315,21 @@ void make_tables(void) for (i = 0; i < 256; i ++) raid6_gfexi[i] = raid6_gfinv[raid6_gfexp[i] ^ 1]; + /* Compute log and inverse log */ + /* Modified code from: + * http://web.eecs.utk.edu/~plank/plank/papers/CS-96-332.html + */ + b = 1; + raid6_gflog[0] = 0; + raid6_gfilog[255] = 0; + + for (log = 0; log < 255; log++) { + raid6_gflog[b] = (uint8_t) log; + raid6_gfilog[log] = (uint8_t) b; + b = b << 1; + if (b & 256) b = b ^ 0435; + } + tables_ready = 1; } @@ -387,6 +405,67 @@ void raid6_datap_recov(int disks, size_t bytes, int faila, uint8_t **ptrs) } } +/* Try to find out if a specific disk has a problem */ +int raid6_check_disks(int data_disks, int start, int chunk_size, + int level, int layout, int diskP, int diskQ, + char *p, char *q, char **stripes) +{ + int i; + int data_id, diskD; + uint8_t Px, Qx; + int curr_broken_disk = -1; + int prev_broken_disk = -1; + int broken_status = 0; + + for(i = 0; i < chunk_size; i++) { + Px = (uint8_t)stripes[diskP][i] ^ (uint8_t)p[i]; + Qx = (uint8_t)stripes[diskQ][i] ^ (uint8_t)q[i]; + + if((Px != 0) && (Qx == 0)) + curr_broken_disk = diskP; + + + if((Px == 0) && (Qx != 0)) + curr_broken_disk = diskQ; + + + if((Px != 0) && (Qx != 0)) { + data_id = (raid6_gflog[Qx] - raid6_gflog[Px]) & 0xFF; + diskD = geo_map(data_id, start/chunk_size, + data_disks + 2, level, layout); + curr_broken_disk = diskD; + } + + if((Px == 0) && (Qx == 0)) + curr_broken_disk = curr_broken_disk; + + switch(broken_status) { + case 0: + if(curr_broken_disk != -1) { + prev_broken_disk = curr_broken_disk; + broken_status = 1; + } + break; + + case 1: + if(curr_broken_disk != prev_broken_disk) + broken_status = 2; + + if(curr_broken_disk >= data_disks + 2) + broken_status = 2; + + break; + + case 2: + default: + curr_broken_disk = prev_broken_disk = -2; + break; + } + } + + return curr_broken_disk; +} + /* Save data: * We are given: * A list of 'fds' of the active disks. Some may be absent. @@ -673,7 +752,12 @@ int test_stripes(int *source, unsigned long long *offsets, char *q = malloc(chunk_size); int i; + int diskP, diskQ; int data_disks = raid_disks - (level == 5 ? 1: 2); + + if (!tables_ready) + make_tables(); + for ( i = 0 ; i < raid_disks ; i++) stripes[i] = stripe_buf + i * chunk_size; @@ -693,18 +777,27 @@ int test_stripes(int *source, unsigned long long *offsets, switch(level) { case 6: qsyndrome(p, q, (uint8_t**)blocks, data_disks, chunk_size); - disk = geo_map(-1, start/chunk_size, raid_disks, + diskP = geo_map(-1, start/chunk_size, raid_disks, level, layout); - if (memcmp(p, stripes[disk], chunk_size) != 0) { - printf("P(%d) wrong at %llu\n", disk, + if (memcmp(p, stripes[diskP], chunk_size) != 0) { + printf("P(%d) wrong at %llu\n", diskP, start / chunk_size); } - disk = geo_map(-2, start/chunk_size, raid_disks, + diskQ = geo_map(-2, start/chunk_size, raid_disks, level, layout); - if (memcmp(q, stripes[disk], chunk_size) != 0) { - printf("Q(%d) wrong at %llu\n", disk, + if (memcmp(q, stripes[diskQ], chunk_size) != 0) { + printf("Q(%d) wrong at %llu\n", diskQ, start / chunk_size); } + disk = raid6_check_disks(data_disks, start, chunk_size, + level, layout, diskP, diskQ, + p, q, stripes); + if(disk >= 0) { + printf("Possible failed disk: %d\n", disk); + } + if(disk == -2) { + printf("Failure detected, but disk unknown\n"); + } break; } length -= chunk_size;