when failures happen they should be propagated to all member arrays
From: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
This commit is contained in:
parent
8d45d1969b
commit
0af73f61a2
46
monitor.c
46
monitor.c
|
@ -175,7 +175,9 @@ int read_dev_state(int fd)
|
||||||
* detected by rd-N/state reporting "faulty"
|
* detected by rd-N/state reporting "faulty"
|
||||||
* mark device as 'failed' in metadata, let the kernel release the
|
* mark device as 'failed' in metadata, let the kernel release the
|
||||||
* device by writing '-blocked' to rd/state, and finally write 'remove' to
|
* device by writing '-blocked' to rd/state, and finally write 'remove' to
|
||||||
* rd/state
|
* rd/state. Before a disk can be replaced it must be failed and removed
|
||||||
|
* from all container members, this will be preemptive for the other
|
||||||
|
* arrays... safe?
|
||||||
*
|
*
|
||||||
* sync completes
|
* sync completes
|
||||||
* sync_action was 'resync' and becomes 'idle' and resync_start becomes
|
* sync_action was 'resync' and becomes 'idle' and resync_start becomes
|
||||||
|
@ -346,19 +348,47 @@ static int read_and_act(struct active_array *a)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct mdinfo *
|
||||||
|
find_device(struct active_array *a, int major, int minor)
|
||||||
|
{
|
||||||
|
struct mdinfo *mdi;
|
||||||
|
|
||||||
|
for (mdi = a->info.devs ; mdi ; mdi = mdi->next)
|
||||||
|
if (mdi->disk.major == major && mdi->disk.minor == minor)
|
||||||
|
return mdi;
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void reconcile_failed(struct active_array *aa, struct mdinfo *failed)
|
||||||
|
{
|
||||||
|
struct active_array *a;
|
||||||
|
struct mdinfo *victim;
|
||||||
|
|
||||||
|
for (a = aa; a; a = a->next) {
|
||||||
|
if (!a->container)
|
||||||
|
continue;
|
||||||
|
victim = find_device(a, failed->disk.major, failed->disk.minor);
|
||||||
|
if (!victim)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (!(victim->curr_state & DS_FAULTY))
|
||||||
|
write_attr("faulty", victim->state_fd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int wait_and_act(struct active_array *aa, int pfd, int nowait)
|
static int wait_and_act(struct active_array *aa, int pfd, int nowait)
|
||||||
{
|
{
|
||||||
fd_set rfds;
|
fd_set rfds;
|
||||||
int maxfd = 0;
|
int maxfd = 0;
|
||||||
struct active_array *a;
|
struct active_array *a;
|
||||||
int rv;
|
int rv;
|
||||||
|
struct mdinfo *mdi;
|
||||||
|
|
||||||
FD_ZERO(&rfds);
|
FD_ZERO(&rfds);
|
||||||
|
|
||||||
add_fd(&rfds, &maxfd, pfd);
|
add_fd(&rfds, &maxfd, pfd);
|
||||||
for (a = aa ; a ; a = a->next) {
|
for (a = aa ; a ; a = a->next) {
|
||||||
struct mdinfo *mdi;
|
|
||||||
|
|
||||||
/* once an array has been deactivated only the manager
|
/* once an array has been deactivated only the manager
|
||||||
* thread can make us care about it again
|
* thread can make us care about it again
|
||||||
*/
|
*/
|
||||||
|
@ -398,6 +428,16 @@ static int wait_and_act(struct active_array *aa, int pfd, int nowait)
|
||||||
if (a->container)
|
if (a->container)
|
||||||
rv += read_and_act(a);
|
rv += read_and_act(a);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* propagate failures across container members */
|
||||||
|
for (a = aa; a ; a = a->next) {
|
||||||
|
if (!a->container)
|
||||||
|
continue;
|
||||||
|
for (mdi = a->info.devs ; mdi ; mdi = mdi->next)
|
||||||
|
if (mdi->curr_state & DS_FAULTY)
|
||||||
|
reconcile_failed(aa, mdi);
|
||||||
|
}
|
||||||
|
|
||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue