/* * raid6check - extended consistency check for RAID-6 * * Copyright (C) 2011 Piergiorgio Sartor * * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * Author: Piergiorgio Sartor * Based on "restripe.c" from "mdadm" codebase */ #include "mdadm.h" #include #include #include enum repair { NO_REPAIR = 0, MANUAL_REPAIR, AUTO_REPAIR }; int geo_map(int block, unsigned long long stripe, int raid_disks, int level, int layout); void qsyndrome(uint8_t *p, uint8_t *q, uint8_t **sources, int disks, int size); void make_tables(void); void ensure_zero_has_size(int chunk_size); void raid6_datap_recov(int disks, size_t bytes, int faila, uint8_t **ptrs); void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, uint8_t **ptrs); void xor_blocks(char *target, char **sources, int disks, int size); /* Collect per stripe consistency information */ void raid6_collect(int chunk_size, uint8_t *p, uint8_t *q, char *chunkP, char *chunkQ, int *results) { int i; int data_id; uint8_t Px, Qx; extern uint8_t raid6_gflog[]; for(i = 0; i < chunk_size; i++) { Px = (uint8_t)chunkP[i] ^ (uint8_t)p[i]; Qx = (uint8_t)chunkQ[i] ^ (uint8_t)q[i]; if((Px != 0) && (Qx == 0)) results[i] = -1; if((Px == 0) && (Qx != 0)) results[i] = -2; if((Px != 0) && (Qx != 0)) { data_id = (raid6_gflog[Qx] - raid6_gflog[Px]); if(data_id < 0) data_id += 255; results[i] = data_id; } if((Px == 0) && (Qx == 0)) results[i] = -255; } } /* Try to find out if a specific disk has problems */ int raid6_stats(int *results, int raid_disks, int chunk_size) { int i; int curr_broken_disk = -255; int prev_broken_disk = -255; int broken_status = 0; for(i = 0; i < chunk_size; i++) { if(results[i] != -255) curr_broken_disk = results[i]; if(curr_broken_disk >= raid_disks) broken_status = 2; switch(broken_status) { case 0: if(curr_broken_disk != -255) { prev_broken_disk = curr_broken_disk; broken_status = 1; } break; case 1: if(curr_broken_disk != prev_broken_disk) broken_status = 2; break; case 2: default: curr_broken_disk = prev_broken_disk = -65535; break; } } return curr_broken_disk; } int lock_stripe(struct mdinfo *info, unsigned long long start, int chunk_size, int data_disks, sighandler_t *sig) { int rv; if(mlockall(MCL_CURRENT | MCL_FUTURE) != 0) { return 2; } sig[0] = signal(SIGTERM, SIG_IGN); sig[1] = signal(SIGINT, SIG_IGN); sig[2] = signal(SIGQUIT, SIG_IGN); rv = sysfs_set_num(info, NULL, "suspend_lo", start * chunk_size * data_disks); rv |= sysfs_set_num(info, NULL, "suspend_hi", (start + 1) * chunk_size * data_disks); return rv * 256; } int unlock_all_stripes(struct mdinfo *info, sighandler_t *sig) { int rv; rv = sysfs_set_num(info, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL); rv |= sysfs_set_num(info, NULL, "suspend_hi", 0); rv |= sysfs_set_num(info, NULL, "suspend_lo", 0); signal(SIGQUIT, sig[2]); signal(SIGINT, sig[1]); signal(SIGTERM, sig[0]); if(munlockall() != 0) return 3; return rv * 256; } int check_stripes(struct mdinfo *info, int *source, unsigned long long *offsets, int raid_disks, int chunk_size, int level, int layout, unsigned long long start, unsigned long long length, char *name[], enum repair repair, int failed_disk1, int failed_disk2) { /* read the data and p and q blocks, and check we got them right */ char *stripe_buf = xmalloc(raid_disks * chunk_size); char **stripes = xmalloc(raid_disks * sizeof(char*)); char **blocks = xmalloc(raid_disks * sizeof(char*)); int *block_index_for_slot = xmalloc(raid_disks * sizeof(int)); uint8_t *p = xmalloc(chunk_size); uint8_t *q = xmalloc(chunk_size); int *results = xmalloc(chunk_size * sizeof(int)); sighandler_t *sig = xmalloc(3 * sizeof(sighandler_t)); int i; int diskP, diskQ; int data_disks = raid_disks - 2; int err = 0; extern int tables_ready; if (!tables_ready) make_tables(); for ( i = 0 ; i < raid_disks ; i++) stripes[i] = stripe_buf + i * chunk_size; while (length > 0) { int disk; printf("pos --> %llu\n", start); err = lock_stripe(info, start, chunk_size, data_disks, sig); if(err != 0) { if (err != 2) unlock_all_stripes(info, sig); goto exitCheck; } for (i = 0 ; i < raid_disks ; i++) { off64_t seek_res = lseek64(source[i], offsets[i] + start * chunk_size, SEEK_SET); if (seek_res < 0) { fprintf(stderr, "lseek to source %d failed\n", i); unlock_all_stripes(info, sig); err = -1; goto exitCheck; } int read_res = read(source[i], stripes[i], chunk_size); if (read_res < chunk_size) { fprintf(stderr, "Failed to read complete chunk disk %d, aborting\n", i); unlock_all_stripes(info, sig); err = -1; goto exitCheck; } } err = unlock_all_stripes(info, sig); if(err != 0) goto exitCheck; for (i = 0 ; i < data_disks ; i++) { int disk = geo_map(i, start, raid_disks, level, layout); blocks[i] = stripes[disk]; block_index_for_slot[disk] = i; printf("%d->%d\n", i, disk); } qsyndrome(p, q, (uint8_t**)blocks, data_disks, chunk_size); diskP = geo_map(-1, start, raid_disks, level, layout); diskQ = geo_map(-2, start, raid_disks, level, layout); blocks[data_disks] = stripes[diskP]; block_index_for_slot[diskP] = data_disks; blocks[data_disks+1] = stripes[diskQ]; block_index_for_slot[diskQ] = data_disks+1; if (memcmp(p, stripes[diskP], chunk_size) != 0) { printf("P(%d) wrong at %llu\n", diskP, start); } if (memcmp(q, stripes[diskQ], chunk_size) != 0) { printf("Q(%d) wrong at %llu\n", diskQ, start); } raid6_collect(chunk_size, p, q, stripes[diskP], stripes[diskQ], results); disk = raid6_stats(results, raid_disks, chunk_size); if(disk >= -2) { disk = geo_map(disk, start, raid_disks, level, layout); } if(disk >= 0) { printf("Error detected at %llu: possible failed disk slot: %d --> %s\n", start, disk, name[disk]); } if(disk == -65535) { printf("Error detected at %llu: disk slot unknown\n", start); } if(repair == MANUAL_REPAIR) { printf("Repairing stripe %llu\n", start); printf("Assuming slots %d (%s) and %d (%s) are incorrect\n", failed_disk1, name[failed_disk1], failed_disk2, name[failed_disk2]); if (failed_disk1 == diskQ || failed_disk2 == diskQ) { char *all_but_failed_blocks[data_disks]; int failed_data_or_p; int failed_block_index; if (failed_disk1 == diskQ) failed_data_or_p = failed_disk2; else failed_data_or_p = failed_disk1; printf("Repairing D/P(%d) and Q\n", failed_data_or_p); failed_block_index = block_index_for_slot[failed_data_or_p]; for (i=0; i < data_disks; i++) if (failed_block_index == i) all_but_failed_blocks[i] = stripes[diskP]; else all_but_failed_blocks[i] = blocks[i]; xor_blocks(stripes[failed_data_or_p], all_but_failed_blocks, data_disks, chunk_size); qsyndrome(p, (uint8_t*)stripes[diskQ], (uint8_t**)blocks, data_disks, chunk_size); } else { ensure_zero_has_size(chunk_size); if (failed_disk1 == diskP || failed_disk2 == diskP) { int failed_data, failed_block_index; if (failed_disk1 == diskP) failed_data = failed_disk2; else failed_data = failed_disk1; failed_block_index = block_index_for_slot[failed_data]; printf("Repairing D(%d) and P\n", failed_data); raid6_datap_recov(raid_disks, chunk_size, failed_block_index, (uint8_t**)blocks); } else { printf("Repairing D and D\n"); int failed_block_index1 = block_index_for_slot[failed_disk1]; int failed_block_index2 = block_index_for_slot[failed_disk2]; if (failed_block_index1 > failed_block_index2) { int t = failed_block_index1; failed_block_index1 = failed_block_index2; failed_block_index2 = t; } raid6_2data_recov(raid_disks, chunk_size, failed_block_index1, failed_block_index2, (uint8_t**)blocks); } } err = lock_stripe(info, start, chunk_size, data_disks, sig); if(err != 0) { if (err != 2) unlock_all_stripes(info, sig); goto exitCheck; } int write_res1, write_res2; off64_t seek_res; seek_res = lseek64(source[failed_disk1], offsets[failed_disk1] + start * chunk_size, SEEK_SET); if (seek_res < 0) { fprintf(stderr, "lseek failed for failed_disk1\n"); unlock_all_stripes(info, sig); err = -1; goto exitCheck; } write_res1 = write(source[failed_disk1], stripes[failed_disk1], chunk_size); seek_res = lseek64(source[failed_disk2], offsets[failed_disk2] + start * chunk_size, SEEK_SET); if (seek_res < 0) { fprintf(stderr, "lseek failed for failed_disk1\n"); unlock_all_stripes(info, sig); err = -1; goto exitCheck; } write_res2 = write(source[failed_disk2], stripes[failed_disk2], chunk_size); err = unlock_all_stripes(info, sig); if(err != 0) goto exitCheck; if (write_res1 != chunk_size || write_res2 != chunk_size) { fprintf(stderr, "Failed to write a complete chunk.\n"); goto exitCheck; } } else if (disk >= 0 && repair == AUTO_REPAIR) { printf("Auto-repairing slot %d (%s)\n", disk, name[disk]); if (disk == diskQ) { qsyndrome(p, (uint8_t*)stripes[diskQ], (uint8_t**)blocks, data_disks, chunk_size); } else { char *all_but_failed_blocks[data_disks]; int failed_block_index = block_index_for_slot[disk]; for (i=0; i < data_disks; i++) if (failed_block_index == i) all_but_failed_blocks[i] = stripes[diskP]; else all_but_failed_blocks[i] = blocks[i]; xor_blocks(stripes[disk], all_but_failed_blocks, data_disks, chunk_size); } err = lock_stripe(info, start, chunk_size, data_disks, sig); if(err != 0) { if (err != 2) unlock_all_stripes(info, sig); goto exitCheck; } lseek64(source[disk], offsets[disk] + start * chunk_size, 0); int write_res = write(source[disk], stripes[disk], chunk_size); err = unlock_all_stripes(info, sig); if(err != 0 || write_res != chunk_size) goto exitCheck; if (write_res != chunk_size) { fprintf(stderr, "Failed to write a full chunk.\n"); goto exitCheck; } } length--; start++; } exitCheck: free(stripe_buf); free(stripes); free(blocks); free(block_index_for_slot); free(p); free(q); free(results); free(sig); return err; } unsigned long long getnum(char *str, char **err) { char *e; unsigned long long rv = strtoull(str, &e, 10); if (e==str || *e) { *err = str; return 0; } return rv; } int main(int argc, char *argv[]) { /* md_device start length */ int *fds = NULL; char *buf = NULL; char **disk_name = NULL; unsigned long long *offsets = NULL; int raid_disks = 0; int active_disks; int chunk_size = 0; int layout = -1; int level = 6; enum repair repair = NO_REPAIR; int failed_disk1 = -1; int failed_disk2 = -1; unsigned long long start, length; int i; int mdfd; struct mdinfo *info = NULL, *comp = NULL; char *err = NULL; int exit_err = 0; int close_flag = 0; char *prg = strrchr(argv[0], '/'); if (prg == NULL) prg = argv[0]; else prg++; if (argc < 4) { fprintf(stderr, "Usage: %s md_device start_stripe length_stripes [autorepair]\n", prg); fprintf(stderr, " or: %s md_device repair stripe failed_slot_1 failed_slot_2\n", prg); exit_err = 1; goto exitHere; } mdfd = open(argv[1], O_RDONLY); if(mdfd < 0) { perror(argv[1]); fprintf(stderr, "%s: cannot open %s\n", prg, argv[1]); exit_err = 2; goto exitHere; } info = sysfs_read(mdfd, NULL, GET_LEVEL| GET_LAYOUT| GET_DISKS| GET_DEGRADED | GET_COMPONENT| GET_CHUNK| GET_DEVS| GET_OFFSET| GET_SIZE); if(info == NULL) { fprintf(stderr, "%s: Error reading sysfs information of %s\n", prg, argv[1]); exit_err = 9; goto exitHere; } if(info->array.level != level) { fprintf(stderr, "%s: %s not a RAID-6\n", prg, argv[1]); exit_err = 3; goto exitHere; } if(info->array.failed_disks > 0) { fprintf(stderr, "%s: %s degraded array\n", prg, argv[1]); exit_err = 8; goto exitHere; } printf("layout: %d\n", info->array.layout); printf("disks: %d\n", info->array.raid_disks); printf("component size: %llu\n", info->component_size * 512); printf("total stripes: %llu\n", (info->component_size * 512) / info->array.chunk_size); printf("chunk size: %d\n", info->array.chunk_size); printf("\n"); comp = info->devs; for(i = 0, active_disks = 0; active_disks < info->array.raid_disks; i++) { printf("disk: %d - offset: %llu - size: %llu - name: %s - slot: %d\n", i, comp->data_offset * 512, comp->component_size * 512, map_dev(comp->disk.major, comp->disk.minor, 0), comp->disk.raid_disk); if(comp->disk.raid_disk >= 0) active_disks++; comp = comp->next; } printf("\n"); close(mdfd); raid_disks = info->array.raid_disks; chunk_size = info->array.chunk_size; layout = info->array.layout; if (strcmp(argv[2], "repair")==0) { if (argc < 6) { fprintf(stderr, "For repair mode, call %s md_device repair stripe failed_slot_1 failed_slot_2\n", prg); exit_err = 1; goto exitHere; } repair = MANUAL_REPAIR; start = getnum(argv[3], &err); length = 1; failed_disk1 = getnum(argv[4], &err); failed_disk2 = getnum(argv[5], &err); if(failed_disk1 >= info->array.raid_disks) { fprintf(stderr, "%s: failed_slot_1 index is higher than number of devices in raid\n", prg); exit_err = 4; goto exitHere; } if(failed_disk2 >= info->array.raid_disks) { fprintf(stderr, "%s: failed_slot_2 index is higher than number of devices in raid\n", prg); exit_err = 4; goto exitHere; } if(failed_disk1 == failed_disk2) { fprintf(stderr, "%s: failed_slot_1 and failed_slot_2 are the same\n", prg); exit_err = 4; goto exitHere; } } else { start = getnum(argv[2], &err); length = getnum(argv[3], &err); if (argc >= 5 && strcmp(argv[4], "autorepair")==0) repair = AUTO_REPAIR; } if (err) { fprintf(stderr, "%s: Bad number: %s\n", prg, err); exit_err = 4; goto exitHere; } if(start > ((info->component_size * 512) / chunk_size)) { start = (info->component_size * 512) / chunk_size; fprintf(stderr, "%s: start beyond disks size\n", prg); } if((length == 0) || ((length + start) > ((info->component_size * 512) / chunk_size))) { length = (info->component_size * 512) / chunk_size - start; } disk_name = xmalloc(raid_disks * sizeof(*disk_name)); fds = xmalloc(raid_disks * sizeof(*fds)); offsets = xcalloc(raid_disks, sizeof(*offsets)); buf = xmalloc(raid_disks * chunk_size); for(i=0; idevs; for (i=0, active_disks=0; active_disksdisk.raid_disk; if(disk_slot >= 0) { disk_name[disk_slot] = map_dev(comp->disk.major, comp->disk.minor, 0); offsets[disk_slot] = comp->data_offset * 512; fds[disk_slot] = open(disk_name[disk_slot], O_RDWR); if (fds[disk_slot] < 0) { perror(disk_name[disk_slot]); fprintf(stderr,"%s: cannot open %s\n", prg, disk_name[disk_slot]); exit_err = 6; goto exitHere; } active_disks++; } comp = comp->next; } int rv = check_stripes(info, fds, offsets, raid_disks, chunk_size, level, layout, start, length, disk_name, repair, failed_disk1, failed_disk2); if (rv != 0) { fprintf(stderr, "%s: check_stripes returned %d\n", prg, rv); exit_err = 7; goto exitHere; } exitHere: if (close_flag) for(i = 0; i < raid_disks; i++) close(fds[i]); free(disk_name); free(fds); free(offsets); free(buf); exit(exit_err); }