FIX: Mdmon crashes after changing RAID level from 1 to 0
Description of the bug: Sometimes mdmon crashes after changing RAID level from 1 to 0 (takeover). Cause of the bug: The managemon marks an active_array for removal from monitoring by assigning a->container to NULL value (in the "manage_member" function). Sometimes (during stress test) it happens right when the monitor is in the "read_and_act" function and a->container pointer is in use. This causes the monitor crashes. Solution: The active array has to be marked for removal in another way than setting NULL pointer when it can be in use. A new field "to_remove" was added to the "active_array" structure. It is used in the managemon to mark a container to remove (instead of the old assigment: a->container = NULL) and monitor checks it to determine if the array should be removed. The field "to_remove" should be checked in some other places to avoid managing of the array which is going to be removed. Signed-off-by: Lukasz Dorau <lukasz.dorau@intel.com> Signed-off-by: NeilBrown <neilb@suse.de>
This commit is contained in:
parent
3960e579bf
commit
ba71445069
|
@ -461,7 +461,7 @@ static void manage_member(struct mdstat_ent *mdstat,
|
|||
if (mdstat->level) {
|
||||
int level = map_name(pers, mdstat->level);
|
||||
if (level == 0 || level == LEVEL_LINEAR) {
|
||||
a->container = NULL;
|
||||
a->to_remove = 1;
|
||||
wakeup_monitor();
|
||||
return;
|
||||
}
|
||||
|
@ -739,7 +739,7 @@ void manage(struct mdstat_ent *mdstat, struct supertype *container)
|
|||
/* Looks like a member of this container */
|
||||
for (a = container->arrays; a; a = a->next) {
|
||||
if (mdstat->devnum == a->devnum) {
|
||||
if (a->container)
|
||||
if (a->container && a->to_remove == 0)
|
||||
manage_member(mdstat, a);
|
||||
break;
|
||||
}
|
||||
|
|
1
mdmon.h
1
mdmon.h
|
@ -28,6 +28,7 @@ struct active_array {
|
|||
struct mdinfo info;
|
||||
struct supertype *container;
|
||||
struct active_array *next, *replaces;
|
||||
int to_remove;
|
||||
|
||||
int action_fd;
|
||||
int resync_start_fd;
|
||||
|
|
|
@ -479,7 +479,7 @@ static void reconcile_failed(struct active_array *aa, struct mdinfo *failed)
|
|||
struct mdinfo *victim;
|
||||
|
||||
for (a = aa; a; a = a->next) {
|
||||
if (!a->container)
|
||||
if (!a->container || a->to_remove)
|
||||
continue;
|
||||
victim = find_device(a, failed->disk.major, failed->disk.minor);
|
||||
if (!victim)
|
||||
|
@ -539,7 +539,7 @@ static int wait_and_act(struct supertype *container, int nowait)
|
|||
/* once an array has been deactivated we want to
|
||||
* ask the manager to discard it.
|
||||
*/
|
||||
if (!a->container) {
|
||||
if (!a->container || a->to_remove) {
|
||||
if (discard_this) {
|
||||
ap = &(*ap)->next;
|
||||
continue;
|
||||
|
@ -642,7 +642,7 @@ static int wait_and_act(struct supertype *container, int nowait)
|
|||
/* FIXME check if device->state_fd need to be cleared?*/
|
||||
signal_manager();
|
||||
}
|
||||
if (a->container) {
|
||||
if (a->container && !a->to_remove) {
|
||||
is_dirty = read_and_act(a);
|
||||
rv |= 1;
|
||||
dirty_arrays += is_dirty;
|
||||
|
@ -657,7 +657,7 @@ static int wait_and_act(struct supertype *container, int nowait)
|
|||
|
||||
/* propagate failures across container members */
|
||||
for (a = *aap; a ; a = a->next) {
|
||||
if (!a->container)
|
||||
if (!a->container || a->to_remove)
|
||||
continue;
|
||||
for (mdi = a->info.devs ; mdi ; mdi = mdi->next)
|
||||
if (mdi->curr_state & DS_FAULTY)
|
||||
|
|
Loading…
Reference in New Issue