Merge mdmon
This commit is contained in:
parent
f7dd881f90
commit
549e9569c6
13
Makefile
13
Makefile
|
@ -77,6 +77,11 @@ SRCS = mdadm.c config.c mdstat.c ReadMe.c util.c Manage.c Assemble.c Build.c \
|
||||||
mdopen.c super0.c super1.c super-ddf.c super-intel.c bitmap.c \
|
mdopen.c super0.c super1.c super-ddf.c super-intel.c bitmap.c \
|
||||||
restripe.c sysfs.c sha1.c mapfile.c crc32.c sg_io.c msg.c
|
restripe.c sysfs.c sha1.c mapfile.c crc32.c sg_io.c msg.c
|
||||||
|
|
||||||
|
MON_OBJS = mdmon.o monitor.o managemon.o util.o mdstat.o sysfs.o config.o \
|
||||||
|
Kill.o sg_io.o dlink.o ReadMe.o super0.o super1.o super-intel.o \
|
||||||
|
super-ddf.o sha1.o crc32.o
|
||||||
|
|
||||||
|
|
||||||
STATICSRC = pwgr.c
|
STATICSRC = pwgr.c
|
||||||
STATICOBJS = pwgr.o
|
STATICOBJS = pwgr.o
|
||||||
|
|
||||||
|
@ -88,7 +93,7 @@ ASSEMBLE_SRCS += mdopen.c mdstat.c
|
||||||
ASSEMBLE_FLAGS += -DMDASSEMBLE_AUTO
|
ASSEMBLE_FLAGS += -DMDASSEMBLE_AUTO
|
||||||
endif
|
endif
|
||||||
|
|
||||||
all : mdadm mdadm.man md.man mdadm.conf.man
|
all : mdadm mdmon mdadm.man md.man mdadm.conf.man
|
||||||
|
|
||||||
everything: all mdadm.static swap_super test_stripe \
|
everything: all mdadm.static swap_super test_stripe \
|
||||||
mdassemble mdassemble.static mdassemble.man \
|
mdassemble mdassemble.static mdassemble.man \
|
||||||
|
@ -118,6 +123,9 @@ mdadm.Os : $(SRCS) mdadm.h
|
||||||
mdadm.O2 : $(SRCS) mdadm.h
|
mdadm.O2 : $(SRCS) mdadm.h
|
||||||
gcc -o mdadm.O2 $(CFLAGS) -DHAVE_STDINT_H -O2 $(SRCS)
|
gcc -o mdadm.O2 $(CFLAGS) -DHAVE_STDINT_H -O2 $(SRCS)
|
||||||
|
|
||||||
|
mdmon : $(MON_OBJS)
|
||||||
|
$(CC) $(LDFLAGS) -o mdmon $(MON_OBJS) $(LDLIBS)
|
||||||
|
|
||||||
test_stripe : restripe.c mdadm.h
|
test_stripe : restripe.c mdadm.h
|
||||||
$(CC) $(CXFLAGS) $(LDFLAGS) -o test_stripe -DMAIN restripe.c
|
$(CC) $(CXFLAGS) $(LDFLAGS) -o test_stripe -DMAIN restripe.c
|
||||||
|
|
||||||
|
@ -182,7 +190,8 @@ test: mdadm test_stripe swap_super
|
||||||
@echo "Please run 'sh ./test' as root"
|
@echo "Please run 'sh ./test' as root"
|
||||||
|
|
||||||
clean :
|
clean :
|
||||||
rm -f mdadm $(OBJS) $(STATICOBJS) core *.man mdadm.tcc mdadm.uclibc mdadm.static *.orig *.porig *.rej *.alt \
|
rm -f mdadm mdmon $(OBJS) $(MON_OBJS) $(STATICOBJS) core *.man \
|
||||||
|
mdadm.tcc mdadm.uclibc mdadm.static *.orig *.porig *.rej *.alt \
|
||||||
mdadm.Os mdadm.O2 \
|
mdadm.Os mdadm.O2 \
|
||||||
mdassemble mdassemble.static mdassemble.uclibc mdassemble.klibc swap_super \
|
mdassemble mdassemble.static mdassemble.uclibc mdassemble.klibc swap_super \
|
||||||
init.cpio.gz mdadm.uclibc.static test_stripe
|
init.cpio.gz mdadm.uclibc.static test_stripe
|
||||||
|
|
|
@ -0,0 +1,309 @@
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The management thread for monitoring active md arrays.
|
||||||
|
* This thread does things which might block such as memory
|
||||||
|
* allocation.
|
||||||
|
* In particular:
|
||||||
|
*
|
||||||
|
* - Find out about new arrays in this container.
|
||||||
|
* Allocate the data structures and open the files.
|
||||||
|
*
|
||||||
|
* For this we watch /proc/mdstat and find new arrays with
|
||||||
|
* metadata type that confirms sharing. e.g. "md4"
|
||||||
|
* When we find a new array we slip it into the list of
|
||||||
|
* arrays and signal 'monitor' by writing to a pipe.
|
||||||
|
*
|
||||||
|
* - Respond to reshape requests by allocating new data structures
|
||||||
|
* and opening new files.
|
||||||
|
*
|
||||||
|
* These come as a change to raid_disks. We allocate a new
|
||||||
|
* version of the data structures and slip it into the list.
|
||||||
|
* 'monitor' will notice and release the old version.
|
||||||
|
* Changes to level, chunksize, layout.. do not need re-allocation.
|
||||||
|
* Reductions in raid_disks don't really either, but we handle
|
||||||
|
* them the same way for consistency.
|
||||||
|
*
|
||||||
|
* - When a device is added to the container, we add it to the metadata
|
||||||
|
* as a spare.
|
||||||
|
*
|
||||||
|
* - assist with activating spares by opening relevant sysfs file.
|
||||||
|
*
|
||||||
|
* - Pass on metadata updates from external programs such as
|
||||||
|
* mdadm creating a new array.
|
||||||
|
*
|
||||||
|
* This is most-messy.
|
||||||
|
* It might involve adding a new array or changing the status of
|
||||||
|
* a spare, or any reconfig that the kernel doesn't get involved in.
|
||||||
|
*
|
||||||
|
* The required updates are received via a named pipe. There will
|
||||||
|
* be one named pipe for each container. Each message contains a
|
||||||
|
* sync marker: 0x5a5aa5a5, A byte count, and the message. This is
|
||||||
|
* passed to the metadata handler which will interpret and process it.
|
||||||
|
* For 'DDF' messages are internal data blocks with the leading
|
||||||
|
* 'magic number' signifying what sort of data it is.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We select on /proc/mdstat and the named pipe.
|
||||||
|
* We create new arrays or updated version of arrays and slip
|
||||||
|
* them into the head of the list, then signal 'monitor' via a pipe write.
|
||||||
|
* 'monitor' will notice and place the old array on a return list.
|
||||||
|
* Metadata updates are placed on a queue just like they arrive
|
||||||
|
* from the named pipe.
|
||||||
|
*
|
||||||
|
* When new arrays are found based on correct metadata string, we
|
||||||
|
* need to identify them with an entry in the metadata. Maybe we require
|
||||||
|
* the metadata to be mdX/NN when NN is the index into an appropriate table.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* List of tasks:
|
||||||
|
* - Watch for spares to be added to the container, and write updated
|
||||||
|
* metadata to them.
|
||||||
|
* - Watch for new arrays using this container, confirm they match metadata
|
||||||
|
* and if so, start monitoring them
|
||||||
|
* - Watch for spares being added to monitored arrays. This shouldn't
|
||||||
|
* happen, as we should do all the adding. Just remove them.
|
||||||
|
* - Watch for change in raid-disks, chunk-size, etc. Update metadata and
|
||||||
|
* start a reshape.
|
||||||
|
*/
|
||||||
|
#ifndef _GNU_SOURCE
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
#endif
|
||||||
|
#include "mdadm.h"
|
||||||
|
#include "mdmon.h"
|
||||||
|
#include <sys/socket.h>
|
||||||
|
|
||||||
|
|
||||||
|
static void free_aa(struct active_array *aa)
|
||||||
|
{
|
||||||
|
/* Note that this doesn't close fds, as they may be in used
|
||||||
|
* by a clone. Use close_aa for that.
|
||||||
|
*/
|
||||||
|
while (aa->info.devs) {
|
||||||
|
struct mdinfo *d = aa->info.devs;
|
||||||
|
aa->info.devs = d->next;
|
||||||
|
free(d);
|
||||||
|
}
|
||||||
|
free(aa);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void replace_array(struct supertype *container,
|
||||||
|
struct active_array *old,
|
||||||
|
struct active_array *new)
|
||||||
|
{
|
||||||
|
/* To replace an array, we add it to the top of the list
|
||||||
|
* marked with ->replaces to point to the original.
|
||||||
|
* 'monitor' will take the original out of the list
|
||||||
|
* and put it on 'discard_this'. We take it from there
|
||||||
|
* and discard it.
|
||||||
|
*/
|
||||||
|
|
||||||
|
while (pending_discard) {
|
||||||
|
while (discard_this == NULL)
|
||||||
|
sleep(1);
|
||||||
|
if (discard_this != pending_discard)
|
||||||
|
abort();
|
||||||
|
discard_this->next = NULL;
|
||||||
|
free_aa(discard_this);
|
||||||
|
discard_this = NULL;
|
||||||
|
pending_discard = NULL;
|
||||||
|
}
|
||||||
|
pending_discard = old;
|
||||||
|
new->replaces = old;
|
||||||
|
new->next = container->arrays;
|
||||||
|
container->arrays = new;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void manage_container(struct mdstat_ent *mdstat,
|
||||||
|
struct supertype *container)
|
||||||
|
{
|
||||||
|
/* The only thing of interest here is if a new device
|
||||||
|
* has been added to the container. We add it to the
|
||||||
|
* array ignoring any metadata on it.
|
||||||
|
* FIXME should we look for compatible metadata and take hints
|
||||||
|
* about spare assignment.... probably not.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
if (mdstat->devcnt != container->devcnt) {
|
||||||
|
/* read /sys/block/NAME/md/dev-??/block/dev to find out
|
||||||
|
* what is there, and compare with container->info.devs
|
||||||
|
* To see what is removed and what is added.
|
||||||
|
* These need to be remove from, or added to, the array
|
||||||
|
*/
|
||||||
|
// FIXME
|
||||||
|
container->devcnt = mdstat->devcnt;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void manage_member(struct mdstat_ent *mdstat,
|
||||||
|
struct active_array *a)
|
||||||
|
{
|
||||||
|
/* Compare mdstat info with known state of member array.
|
||||||
|
* We do not need to look for device state changes here, that
|
||||||
|
* is dealt with by the monitor.
|
||||||
|
*
|
||||||
|
* We just look for changes which suggest that a reshape is
|
||||||
|
* being requested.
|
||||||
|
* Unfortunately decreases in raid_disks don't show up in
|
||||||
|
* mdstat until the reshape completes FIXME.
|
||||||
|
*/
|
||||||
|
// FIXME
|
||||||
|
a->info.array.raid_disks = mdstat->raid_disks;
|
||||||
|
a->info.array.chunk_size = mdstat->chunk_size;
|
||||||
|
// MORE
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static void write_wakeup(struct supertype *c)
|
||||||
|
{
|
||||||
|
write(c->pipe[1], "PING", 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void manage_new(struct mdstat_ent *mdstat,
|
||||||
|
struct supertype *container)
|
||||||
|
{
|
||||||
|
/* A new array has appeared in this container.
|
||||||
|
* Hopefully it is already recorded in the metadata.
|
||||||
|
* Check, then create the new array to report it to
|
||||||
|
* the monitor.
|
||||||
|
*/
|
||||||
|
|
||||||
|
struct active_array *new;
|
||||||
|
struct mdinfo *mdi, *di;
|
||||||
|
char *n;
|
||||||
|
int inst;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
new = malloc(sizeof(*new));
|
||||||
|
|
||||||
|
new->devnum = mdstat->devnum;
|
||||||
|
|
||||||
|
new->prev_state = new->curr_state = new->next_state = inactive;
|
||||||
|
new->prev_action= new->curr_action= new->next_action= idle;
|
||||||
|
|
||||||
|
new->container = container;
|
||||||
|
|
||||||
|
n = &mdstat->metadata_version[10+strlen(container->devname)+1];
|
||||||
|
inst = atoi(n);
|
||||||
|
if (inst < 0)
|
||||||
|
abort();//FIXME
|
||||||
|
|
||||||
|
mdi = sysfs_read(-1, new->devnum,
|
||||||
|
GET_LEVEL|GET_CHUNK|GET_DISKS|
|
||||||
|
GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE);
|
||||||
|
if (!mdi) {
|
||||||
|
/* Eeek. Cannot monitor this array.
|
||||||
|
* Mark it to be ignored by setting container to NULL
|
||||||
|
*/
|
||||||
|
new->container = NULL;
|
||||||
|
replace_array(container, NULL, new);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
new->info.array = mdi->array;
|
||||||
|
|
||||||
|
for (i = 0; i < new->info.array.raid_disks; i++) {
|
||||||
|
struct mdinfo *newd = malloc(sizeof(*newd));
|
||||||
|
|
||||||
|
for (di = mdi->devs; di; di = di->next)
|
||||||
|
if (i == di->disk.raid_disk)
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (di) {
|
||||||
|
memcpy(newd, di, sizeof(*newd));
|
||||||
|
|
||||||
|
sprintf(newd->sys_name, "rd%d", i);
|
||||||
|
|
||||||
|
newd->state_fd = sysfs_open(new->devnum,
|
||||||
|
newd->sys_name,
|
||||||
|
"state");
|
||||||
|
|
||||||
|
newd->prev_state = read_dev_state(newd->state_fd);
|
||||||
|
newd->curr_state = newd->curr_state;
|
||||||
|
} else {
|
||||||
|
newd->state_fd = -1;
|
||||||
|
}
|
||||||
|
newd->next = new->info.devs;
|
||||||
|
new->info.devs = newd;
|
||||||
|
}
|
||||||
|
new->action_fd = sysfs_open(new->devnum, NULL, "sync_action");
|
||||||
|
new->info.state_fd = sysfs_open(new->devnum, NULL, "array_state");
|
||||||
|
new->sync_pos_fd = sysfs_open(new->devnum, NULL, "sync_completed");
|
||||||
|
new->sync_pos = 0;
|
||||||
|
|
||||||
|
// finds and compares.
|
||||||
|
if (container->ss->open_new(container, new, inst) < 0) {
|
||||||
|
// FIXME close all those files
|
||||||
|
new->container = NULL;
|
||||||
|
replace_array(container, NULL, new);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
replace_array(container, NULL, new);
|
||||||
|
write_wakeup(container);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void manage(struct mdstat_ent *mdstat, struct active_array *aa,
|
||||||
|
struct supertype *container)
|
||||||
|
{
|
||||||
|
/* We have just read mdstat and need to compare it with
|
||||||
|
* the known active arrays.
|
||||||
|
* Arrays with the wrong metadata are ignored.
|
||||||
|
*/
|
||||||
|
|
||||||
|
for ( ; mdstat ; mdstat = mdstat->next) {
|
||||||
|
struct active_array *a;
|
||||||
|
if (mdstat->devnum == container->devnum) {
|
||||||
|
manage_container(mdstat, container);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (mdstat->metadata_version == NULL ||
|
||||||
|
strncmp(mdstat->metadata_version, "external:/", 10) != 0 ||
|
||||||
|
strncmp(mdstat->metadata_version+10, container->devname,
|
||||||
|
strlen(container->devname)) != 0 ||
|
||||||
|
mdstat->metadata_version[10+strlen(container->devname)]
|
||||||
|
!= '/')
|
||||||
|
/* Not for this array */
|
||||||
|
continue;
|
||||||
|
/* Looks like a member of this container */
|
||||||
|
for (a = aa; a; a = a->next) {
|
||||||
|
if (mdstat->devnum == a->devnum) {
|
||||||
|
if (a->container)
|
||||||
|
manage_member(mdstat, a);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (a == NULL)
|
||||||
|
manage_new(mdstat, container);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void read_sock(int pfd)
|
||||||
|
{
|
||||||
|
int fd;
|
||||||
|
|
||||||
|
// FIXME set non-blocking
|
||||||
|
fd = accept(pfd, NULL, NULL);
|
||||||
|
if (fd < 0)
|
||||||
|
return;
|
||||||
|
// FIXME do something useful
|
||||||
|
close(fd);
|
||||||
|
}
|
||||||
|
void do_manager(struct supertype *container)
|
||||||
|
{
|
||||||
|
struct mdstat_ent *mdstat;
|
||||||
|
|
||||||
|
do {
|
||||||
|
mdstat = mdstat_read(1, 0);
|
||||||
|
|
||||||
|
manage(mdstat, array_list, container);
|
||||||
|
|
||||||
|
read_sock(container->sock);
|
||||||
|
|
||||||
|
mdstat_wait_fd(container->sock);
|
||||||
|
} while(1);
|
||||||
|
}
|
35
mdadm.h
35
mdadm.h
|
@ -159,6 +159,11 @@ struct mdinfo {
|
||||||
char sys_name[20];
|
char sys_name[20];
|
||||||
struct mdinfo *devs;
|
struct mdinfo *devs;
|
||||||
struct mdinfo *next;
|
struct mdinfo *next;
|
||||||
|
|
||||||
|
/* Device info for mdmon: */
|
||||||
|
int state_fd;
|
||||||
|
int prev_state, curr_state, next_state;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct createinfo {
|
struct createinfo {
|
||||||
|
@ -271,12 +276,17 @@ struct mdstat_ent {
|
||||||
char *pattern; /* U or up, _ for down */
|
char *pattern; /* U or up, _ for down */
|
||||||
int percent; /* -1 if no resync */
|
int percent; /* -1 if no resync */
|
||||||
int resync; /* 1 if resync, 0 if recovery */
|
int resync; /* 1 if resync, 0 if recovery */
|
||||||
|
int devcnt;
|
||||||
|
int raid_disks;
|
||||||
|
int chunk_size;
|
||||||
|
char * metadata_version;
|
||||||
struct mdstat_ent *next;
|
struct mdstat_ent *next;
|
||||||
};
|
};
|
||||||
|
|
||||||
extern struct mdstat_ent *mdstat_read(int hold, int start);
|
extern struct mdstat_ent *mdstat_read(int hold, int start);
|
||||||
extern void free_mdstat(struct mdstat_ent *ms);
|
extern void free_mdstat(struct mdstat_ent *ms);
|
||||||
extern void mdstat_wait(int seconds);
|
extern void mdstat_wait(int seconds);
|
||||||
|
extern void mdstat_wait_fd(int fd);
|
||||||
extern int mddev_busy(int devnum);
|
extern int mddev_busy(int devnum);
|
||||||
|
|
||||||
struct map_ent {
|
struct map_ent {
|
||||||
|
@ -304,6 +314,7 @@ extern void map_add(struct map_ent **melp,
|
||||||
#define GET_CACHE 16
|
#define GET_CACHE 16
|
||||||
#define GET_MISMATCH 32
|
#define GET_MISMATCH 32
|
||||||
#define GET_VERSION 64
|
#define GET_VERSION 64
|
||||||
|
#define GET_DISKS 128
|
||||||
|
|
||||||
#define GET_DEVS 1024 /* gets role, major, minor */
|
#define GET_DEVS 1024 /* gets role, major, minor */
|
||||||
#define GET_OFFSET 2048
|
#define GET_OFFSET 2048
|
||||||
|
@ -314,6 +325,7 @@ extern void map_add(struct map_ent **melp,
|
||||||
/* If fd >= 0, get the array it is open on,
|
/* If fd >= 0, get the array it is open on,
|
||||||
* else use devnum. >=0 -> major9. <0.....
|
* else use devnum. >=0 -> major9. <0.....
|
||||||
*/
|
*/
|
||||||
|
extern int sysfs_open(int devnum, char *devname, char *attr);
|
||||||
extern void sysfs_free(struct mdinfo *sra);
|
extern void sysfs_free(struct mdinfo *sra);
|
||||||
extern struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options);
|
extern struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options);
|
||||||
extern int sysfs_set_str(struct mdinfo *sra, struct mdinfo *dev,
|
extern int sysfs_set_str(struct mdinfo *sra, struct mdinfo *dev,
|
||||||
|
@ -350,6 +362,7 @@ extern mapping_t r5layout[], pers[], modes[], faultylayout[];
|
||||||
|
|
||||||
extern char *map_dev(int major, int minor, int create);
|
extern char *map_dev(int major, int minor, int create);
|
||||||
|
|
||||||
|
struct active_array;
|
||||||
|
|
||||||
extern struct superswitch {
|
extern struct superswitch {
|
||||||
void (*examine_super)(struct supertype *st, char *homehost);
|
void (*examine_super)(struct supertype *st, char *homehost);
|
||||||
|
@ -390,6 +403,14 @@ extern struct superswitch {
|
||||||
|
|
||||||
struct mdinfo *(*container_content)(struct supertype *st);
|
struct mdinfo *(*container_content)(struct supertype *st);
|
||||||
|
|
||||||
|
/* for mdmon */
|
||||||
|
int (*open_new)(struct supertype *c, struct active_array *a, int inst);
|
||||||
|
void (*mark_clean)(struct active_array *a, unsigned long long sync_pos);
|
||||||
|
void (*mark_dirty)(struct active_array *a);
|
||||||
|
void (*set_disk)(struct active_array *a, int n);
|
||||||
|
void (*sync_metadata)(struct active_array *a);
|
||||||
|
|
||||||
|
|
||||||
int major;
|
int major;
|
||||||
char *text_version;
|
char *text_version;
|
||||||
int swapuuid; /* true if uuid is bigending rather than hostendian */
|
int swapuuid; /* true if uuid is bigending rather than hostendian */
|
||||||
|
@ -406,6 +427,20 @@ struct supertype {
|
||||||
int container_member; /* numerical position in container */
|
int container_member; /* numerical position in container */
|
||||||
void *sb;
|
void *sb;
|
||||||
void *info;
|
void *info;
|
||||||
|
|
||||||
|
/* extra stuff used by mdmon */
|
||||||
|
struct active_array *arrays;
|
||||||
|
int devfd;
|
||||||
|
int sock; /* listen to external programs */
|
||||||
|
int pipe[2]; /* communicate between threads */
|
||||||
|
int devnum;
|
||||||
|
char *devname; /* e.g. md0. This appears in metadata_verison:
|
||||||
|
* external:/md0/12
|
||||||
|
*/
|
||||||
|
int devcnt;
|
||||||
|
|
||||||
|
struct mdinfo *devs;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
extern struct supertype supertype_container_member;
|
extern struct supertype supertype_container_member;
|
||||||
|
|
|
@ -0,0 +1,222 @@
|
||||||
|
|
||||||
|
/*
|
||||||
|
* md array manager.
|
||||||
|
* When md arrays have user-space managed metadata, this is the program
|
||||||
|
* that does the managing.
|
||||||
|
*
|
||||||
|
* Given one argument: the name of the array (e.g. /dev/md0) that is
|
||||||
|
* the container.
|
||||||
|
* We fork off a helper that runs high priority and mlocked. It responds to
|
||||||
|
* device failures and other events that might stop writeout, or that are
|
||||||
|
* trivial to deal with.
|
||||||
|
* The main thread then watches for new arrays being created in the container
|
||||||
|
* and starts monitoring them too ... along with a few other tasks.
|
||||||
|
*
|
||||||
|
* The main thread communicates with the priority thread by writing over
|
||||||
|
* a pipe.
|
||||||
|
* Separate programs can communicate with the main thread via Unix-domain
|
||||||
|
* socket.
|
||||||
|
* The two threads share address space and open file table.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _GNU_SOURCE
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <sys/socket.h>
|
||||||
|
#include <sys/un.h>
|
||||||
|
#include <sys/mman.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
|
||||||
|
#include <sched.h>
|
||||||
|
|
||||||
|
#include "mdadm.h"
|
||||||
|
#include "mdmon.h"
|
||||||
|
|
||||||
|
struct active_array *array_list;
|
||||||
|
struct active_array *discard_this;
|
||||||
|
struct active_array *pending_discard;
|
||||||
|
|
||||||
|
int run_child(void *v)
|
||||||
|
{
|
||||||
|
struct supertype *c = v;
|
||||||
|
do_monitor(c);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int clone_monitor(struct supertype *container)
|
||||||
|
{
|
||||||
|
int pfd[2];
|
||||||
|
static char stack[4096];
|
||||||
|
int rv;
|
||||||
|
|
||||||
|
pipe(container->pipe);
|
||||||
|
|
||||||
|
rv = clone(run_child, stack+4096-64,
|
||||||
|
CLONE_FS|CLONE_FILES|CLONE_VM|CLONE_SIGHAND|CLONE_THREAD,
|
||||||
|
container);
|
||||||
|
|
||||||
|
if (rv < 0)
|
||||||
|
return rv;
|
||||||
|
return pfd[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct superswitch *find_metadata_methods(char *vers)
|
||||||
|
{
|
||||||
|
if (strcmp(vers, "ddf") == 0)
|
||||||
|
return &super_ddf;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int make_pidfile(char *devname)
|
||||||
|
{
|
||||||
|
char path[100];
|
||||||
|
char pid[10];
|
||||||
|
int fd;
|
||||||
|
sprintf(path, "/var/run/mdadm/%s.pid", devname);
|
||||||
|
|
||||||
|
fd = open(path, O_RDWR|O_CREAT|O_EXCL, 0600);
|
||||||
|
if (fd < 0)
|
||||||
|
return -1;
|
||||||
|
sprintf(pid, "%d\n", getpid());
|
||||||
|
write(fd, pid, strlen(pid));
|
||||||
|
close(fd);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int make_control_sock(char *devname)
|
||||||
|
{
|
||||||
|
char path[100];
|
||||||
|
int sfd;
|
||||||
|
long fl;
|
||||||
|
struct sockaddr_un addr;
|
||||||
|
|
||||||
|
sprintf(path, "/var/run/mdadm/%s.sock", devname);
|
||||||
|
unlink(path);
|
||||||
|
sfd = socket(PF_LOCAL, SOCK_STREAM, 0);
|
||||||
|
if (sfd < 0)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
addr.sun_family = PF_LOCAL;
|
||||||
|
strcpy(addr.sun_path, path);
|
||||||
|
if (bind(sfd, &addr, sizeof(addr)) < 0) {
|
||||||
|
close(sfd);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
listen(sfd, 10);
|
||||||
|
fl = fcntl(sfd, F_GETFL, 0);
|
||||||
|
fl |= O_NONBLOCK;
|
||||||
|
fcntl(sfd, F_SETFL, fl);
|
||||||
|
return sfd;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int mdfd;
|
||||||
|
int pipefd;
|
||||||
|
struct mdinfo *mdi, *di;
|
||||||
|
struct supertype *container;
|
||||||
|
if (argc != 2) {
|
||||||
|
fprintf(stderr, "Usage: md-manage /device/name/for/container\n");
|
||||||
|
exit(2);
|
||||||
|
}
|
||||||
|
mdfd = open(argv[1], O_RDWR);
|
||||||
|
if (mdfd < 0) {
|
||||||
|
fprintf(stderr, "md-manage: %s: %s\n", argv[1],
|
||||||
|
strerror(errno));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
if (md_get_version(mdfd) < 0) {
|
||||||
|
fprintf(stderr, "md-manage: %s: Not an md device\n",
|
||||||
|
argv[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* hopefully it is a container - we'll check later */
|
||||||
|
|
||||||
|
container = malloc(sizeof(*container));
|
||||||
|
container->devfd = mdfd;
|
||||||
|
container->devnum = fd2devnum(mdfd);
|
||||||
|
container->devname = devnum2devname(container->devnum);
|
||||||
|
|
||||||
|
/* If this fails, we hope it already exists */
|
||||||
|
mkdir("/var/run/mdadm", 0600);
|
||||||
|
/* pid file lives in /var/run/mdadm/mdXX.pid */
|
||||||
|
if (make_pidfile(container->devname) < 0) {
|
||||||
|
fprintf(stderr, "md-manage: %s already managed\n",
|
||||||
|
container->devname);
|
||||||
|
exit(3);
|
||||||
|
}
|
||||||
|
|
||||||
|
container->sock = make_control_sock(container->devname);
|
||||||
|
if (container->sock < 0) {
|
||||||
|
fprintf(stderr, "mdmon: Cannot create socket in /var/run/mdadm\n");
|
||||||
|
exit(3);
|
||||||
|
}
|
||||||
|
container->arrays = NULL;
|
||||||
|
|
||||||
|
mdi = sysfs_read(mdfd, container->devnum,
|
||||||
|
GET_VERSION|GET_LEVEL|GET_DEVS);
|
||||||
|
|
||||||
|
if (!mdi) {
|
||||||
|
fprintf(stderr, "mdmon: failed to load sysfs info for %s\n",
|
||||||
|
container->devname);
|
||||||
|
exit(3);
|
||||||
|
}
|
||||||
|
if (mdi->array.level != UnSet) {
|
||||||
|
fprintf(stderr, "mdmon: %s is not a container - cannot monitor\n",
|
||||||
|
argv[1]);
|
||||||
|
exit(3);
|
||||||
|
}
|
||||||
|
if (mdi->array.major_version != -1 ||
|
||||||
|
mdi->array.minor_version != -2) {
|
||||||
|
fprintf(stderr, "mdmon: %s does not use external metadata - cannot monitor\n",
|
||||||
|
argv[1]);
|
||||||
|
exit(3);
|
||||||
|
}
|
||||||
|
|
||||||
|
container->ss = find_metadata_methods(mdi->text_version);
|
||||||
|
if (container->ss == NULL) {
|
||||||
|
fprintf(stderr, "mdmon: %s uses unknown metadata: %s\n",
|
||||||
|
argv[1], mdi->text_version);
|
||||||
|
exit(3);
|
||||||
|
}
|
||||||
|
|
||||||
|
container->devs = NULL;
|
||||||
|
for (di = mdi->devs; di; di = di->next) {
|
||||||
|
struct mdinfo *cd = malloc(sizeof(*cd));
|
||||||
|
cd = di;
|
||||||
|
cd->next = container->devs;
|
||||||
|
container->devs = cd;
|
||||||
|
}
|
||||||
|
sysfs_free(mdi);
|
||||||
|
|
||||||
|
|
||||||
|
if (container->ss->load_super(container, mdfd, argv[1])) {
|
||||||
|
fprintf(stderr, "mdmon: Cannot load metadata for %s\n",
|
||||||
|
argv[1]);
|
||||||
|
exit(3);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
mlockall(MCL_FUTURE);
|
||||||
|
|
||||||
|
pipefd = clone_monitor(container);
|
||||||
|
if (pipefd < 0) {
|
||||||
|
fprintf(stderr, "md-manage: failed to start monitor process: %s\n",
|
||||||
|
strerror(errno));
|
||||||
|
exit(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
do_manager(container);
|
||||||
|
|
||||||
|
exit(0);
|
||||||
|
}
|
|
@ -0,0 +1,41 @@
|
||||||
|
|
||||||
|
enum array_state { clear, inactive, suspended, readonly, read_auto,
|
||||||
|
clean, active, write_pending, active_idle, bad_word};
|
||||||
|
|
||||||
|
enum sync_action { idle, reshape, resync, recover, check, repair, bad_action };
|
||||||
|
|
||||||
|
|
||||||
|
struct active_array {
|
||||||
|
struct mdinfo info;
|
||||||
|
struct supertype *container;
|
||||||
|
struct active_array *next, *replaces;
|
||||||
|
|
||||||
|
int action_fd;
|
||||||
|
int sync_pos_fd;
|
||||||
|
|
||||||
|
enum array_state prev_state, curr_state, next_state;
|
||||||
|
enum sync_action prev_action, curr_action, next_action;
|
||||||
|
|
||||||
|
int devnum;
|
||||||
|
|
||||||
|
unsigned long long sync_pos;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#define MD_MAJOR 9
|
||||||
|
|
||||||
|
extern struct active_array *container;
|
||||||
|
extern struct active_array *array_list;
|
||||||
|
extern struct active_array *discard_this;
|
||||||
|
extern struct active_array *pending_discard;
|
||||||
|
|
||||||
|
|
||||||
|
void do_monitor(struct supertype *container);
|
||||||
|
void do_manager(struct supertype *container);
|
||||||
|
|
||||||
|
int read_dev_state(int fd);
|
||||||
|
|
||||||
|
struct mdstat_ent *mdstat_read(int hold, int start);
|
||||||
|
|
||||||
|
extern struct superswitch super_ddf, super_ddf_bvd, super_ddf_svd;
|
60
mdstat.c
60
mdstat.c
|
@ -86,6 +86,7 @@
|
||||||
#include "mdadm.h"
|
#include "mdadm.h"
|
||||||
#include "dlink.h"
|
#include "dlink.h"
|
||||||
#include <sys/select.h>
|
#include <sys/select.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
|
||||||
void free_mdstat(struct mdstat_ent *ms)
|
void free_mdstat(struct mdstat_ent *ms)
|
||||||
{
|
{
|
||||||
|
@ -158,6 +159,10 @@ struct mdstat_ent *mdstat_read(int hold, int start)
|
||||||
ent->percent = -1;
|
ent->percent = -1;
|
||||||
ent->active = -1;
|
ent->active = -1;
|
||||||
ent->resync = 0;
|
ent->resync = 0;
|
||||||
|
ent->metadata_version = NULL;
|
||||||
|
ent->raid_disks = 0;
|
||||||
|
ent->chunk_size = 0;
|
||||||
|
ent->devcnt = 0;
|
||||||
|
|
||||||
ent->dev = strdup(line);
|
ent->dev = strdup(line);
|
||||||
ent->devnum = devnum;
|
ent->devnum = devnum;
|
||||||
|
@ -176,22 +181,32 @@ struct mdstat_ent *mdstat_read(int hold, int start)
|
||||||
in_devs = 1;
|
in_devs = 1;
|
||||||
} else if (in_devs && strcmp(w, "blocks")==0)
|
} else if (in_devs && strcmp(w, "blocks")==0)
|
||||||
in_devs = 0;
|
in_devs = 0;
|
||||||
else if (in_devs && strncmp(w, "md", 2)==0) {
|
else if (in_devs) {
|
||||||
/* This has an md device as a component.
|
ent->devcnt++;
|
||||||
* If that device is already in the list,
|
if (strncmp(w, "md", 2)==0) {
|
||||||
* make sure we insert before there.
|
/* This has an md device as a component.
|
||||||
*/
|
* If that device is already in the
|
||||||
struct mdstat_ent **ih;
|
* list, make sure we insert before
|
||||||
int dn2;
|
* there.
|
||||||
if (strncmp(w, "md_d", 4)==0)
|
*/
|
||||||
dn2 = -1-strtoul(w+4, &ep, 10);
|
struct mdstat_ent **ih;
|
||||||
else
|
int dn2;
|
||||||
dn2 = strtoul(w+2, &ep, 10);
|
if (strncmp(w, "md_d", 4)==0)
|
||||||
ih = &all;
|
dn2 = -1-strtoul(w+4, &ep, 10);
|
||||||
while (ih != insert_here && *ih &&
|
else
|
||||||
(*ih)->devnum != dn2)
|
dn2 = strtoul(w+2, &ep, 10);
|
||||||
ih = & (*ih)->next;
|
ih = &all;
|
||||||
insert_here = ih;
|
while (ih != insert_here && *ih &&
|
||||||
|
(*ih)->devnum != dn2)
|
||||||
|
ih = & (*ih)->next;
|
||||||
|
insert_here = ih;
|
||||||
|
}
|
||||||
|
} else if (strcmp(w, "super") == 0 &&
|
||||||
|
dl_next(w) != line) {
|
||||||
|
w = dl_next(w);
|
||||||
|
ent->metadata_version = strdup(w);
|
||||||
|
} else if (w[0] == '[' && isdigit(w[1])) {
|
||||||
|
ent->raid_disks = atoi(w+1);
|
||||||
} else if (!ent->pattern &&
|
} else if (!ent->pattern &&
|
||||||
w[0] == '[' &&
|
w[0] == '[' &&
|
||||||
(w[1] == 'U' || w[1] == '_')) {
|
(w[1] == 'U' || w[1] == '_')) {
|
||||||
|
@ -256,6 +271,19 @@ void mdstat_wait(int seconds)
|
||||||
select(mdstat_fd >2 ? mdstat_fd+1:3, NULL, NULL, &fds, &tm);
|
select(mdstat_fd >2 ? mdstat_fd+1:3, NULL, NULL, &fds, &tm);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void mdstat_wait_fd(int fd)
|
||||||
|
{
|
||||||
|
fd_set fds, rfds;
|
||||||
|
|
||||||
|
FD_ZERO(&fds);
|
||||||
|
FD_ZERO(&rfds);
|
||||||
|
if (mdstat_fd >= 0)
|
||||||
|
FD_SET(mdstat_fd, &fds);
|
||||||
|
FD_SET(fd, &rfds);
|
||||||
|
|
||||||
|
select(mdstat_fd >2 ? mdstat_fd+1:3, &rfds, NULL, &fds, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
int mddev_busy(int devnum)
|
int mddev_busy(int devnum)
|
||||||
{
|
{
|
||||||
struct mdstat_ent *mdstat = mdstat_read(0, 0);
|
struct mdstat_ent *mdstat = mdstat_read(0, 0);
|
||||||
|
|
|
@ -0,0 +1,372 @@
|
||||||
|
|
||||||
|
#include "mdadm.h"
|
||||||
|
#include "mdmon.h"
|
||||||
|
|
||||||
|
#include <sys/select.h>
|
||||||
|
|
||||||
|
|
||||||
|
static char *array_states[] = {
|
||||||
|
"clear", "inactive", "suspended", "readonly", "read-auto",
|
||||||
|
"clean", "active", "write-pending", "active-idle", NULL };
|
||||||
|
static char *sync_actions[] = {
|
||||||
|
"idle", "reshape", "resync", "recover", "check", "repair", NULL
|
||||||
|
};
|
||||||
|
|
||||||
|
static int write_attr(char *attr, int fd)
|
||||||
|
{
|
||||||
|
return write(fd, attr, strlen(attr));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void add_fd(fd_set *fds, int *maxfd, int fd)
|
||||||
|
{
|
||||||
|
if (fd < 0)
|
||||||
|
return;
|
||||||
|
if (fd > *maxfd)
|
||||||
|
*maxfd = fd;
|
||||||
|
FD_SET(fd, fds);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int read_attr(char *buf, int len, int fd)
|
||||||
|
{
|
||||||
|
int n;
|
||||||
|
|
||||||
|
if (fd < 0) {
|
||||||
|
buf[0] = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
lseek(fd, 0, 0);
|
||||||
|
n = read(fd, buf, len - 1);
|
||||||
|
|
||||||
|
if (n <= 0) {
|
||||||
|
buf[0] = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
buf[n] = 0;
|
||||||
|
if (buf[n-1] == '\n')
|
||||||
|
buf[n-1] = 0;
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int get_sync_pos(struct active_array *a)
|
||||||
|
{
|
||||||
|
char buf[30];
|
||||||
|
int n;
|
||||||
|
|
||||||
|
n = read_attr(buf, 30, a->sync_pos_fd);
|
||||||
|
if (n <= 0)
|
||||||
|
return n;
|
||||||
|
|
||||||
|
if (strncmp(buf, "max", 3) == 0) {
|
||||||
|
a->sync_pos = ~(unsigned long long)0;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
a->sync_pos = strtoull(buf, NULL, 10);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int attr_match(const char *attr, const char *str)
|
||||||
|
{
|
||||||
|
/* See if attr, read from a sysfs file, matches
|
||||||
|
* str. They must either be the same, or attr can
|
||||||
|
* have a trailing newline or comma
|
||||||
|
*/
|
||||||
|
while (*attr && *str && *attr == *str) {
|
||||||
|
attr++;
|
||||||
|
str++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*str || (*attr && *attr != ',' && *attr != '\n'))
|
||||||
|
return 0;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int match_word(const char *word, char **list)
|
||||||
|
{
|
||||||
|
int n;
|
||||||
|
for (n=0; list[n]; n++)
|
||||||
|
if (attr_match(word, list[n]))
|
||||||
|
break;
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
static enum array_state read_state(int fd)
|
||||||
|
{
|
||||||
|
char buf[20];
|
||||||
|
int n = read_attr(buf, 20, fd);
|
||||||
|
|
||||||
|
if (n <= 0)
|
||||||
|
return bad_word;
|
||||||
|
return (enum array_state) match_word(buf, array_states);
|
||||||
|
}
|
||||||
|
|
||||||
|
static enum sync_action read_action( int fd)
|
||||||
|
{
|
||||||
|
char buf[20];
|
||||||
|
int n = read_attr(buf, 20, fd);
|
||||||
|
|
||||||
|
if (n <= 0)
|
||||||
|
return bad_action;
|
||||||
|
return (enum sync_action) match_word(buf, sync_actions);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define DS_FAULTY 1
|
||||||
|
#define DS_INSYNC 2
|
||||||
|
#define DS_WRITE_MOSTLY 4
|
||||||
|
#define DS_SPARE 8
|
||||||
|
#define DS_REMOVE 1024
|
||||||
|
|
||||||
|
int read_dev_state(int fd)
|
||||||
|
{
|
||||||
|
char buf[60];
|
||||||
|
int n = read_attr(buf, 60, fd);
|
||||||
|
char *cp;
|
||||||
|
int rv = 0;
|
||||||
|
|
||||||
|
if (n <= 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
cp = buf;
|
||||||
|
while (cp) {
|
||||||
|
if (attr_match("faulty", cp))
|
||||||
|
rv |= DS_FAULTY;
|
||||||
|
if (attr_match("in_sync", cp))
|
||||||
|
rv |= DS_INSYNC;
|
||||||
|
if (attr_match("write_mostly", cp))
|
||||||
|
rv |= DS_WRITE_MOSTLY;
|
||||||
|
if (attr_match("spare", cp))
|
||||||
|
rv |= DS_SPARE;
|
||||||
|
cp = strchr(cp, ',');
|
||||||
|
if (cp)
|
||||||
|
cp++;
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Monitor a set of active md arrays - all of which share the
|
||||||
|
* same metadata - and respond to events that require
|
||||||
|
* metadata update.
|
||||||
|
*
|
||||||
|
* New arrays are detected by another thread which allocates
|
||||||
|
* required memory and attaches the data structure to our list.
|
||||||
|
*
|
||||||
|
* Events:
|
||||||
|
* Array stops.
|
||||||
|
* This is detected by array_state going to 'clear' or 'inactive'.
|
||||||
|
* while we thought it was active.
|
||||||
|
* Response is to mark metadata as clean and 'clear' the array(??)
|
||||||
|
* write-pending
|
||||||
|
* array_state if 'write-pending'
|
||||||
|
* We mark metadata as 'dirty' then set array to 'active'.
|
||||||
|
* active_idle
|
||||||
|
* Either ignore, or mark clean, then mark metadata as clean.
|
||||||
|
*
|
||||||
|
* device fails
|
||||||
|
* detected by rd-N/state reporting "faulty"
|
||||||
|
* mark device as 'failed' in metadata, the remove device
|
||||||
|
* by writing 'remove' to rd/state.
|
||||||
|
*
|
||||||
|
* sync completes
|
||||||
|
* sync_action was 'resync' and becomes 'idle' and resync_start becomes
|
||||||
|
* MaxSector
|
||||||
|
* Notify metadata that sync is complete.
|
||||||
|
* "Deal with Degraded"
|
||||||
|
*
|
||||||
|
* recovery completes
|
||||||
|
* sync_action changes from 'recover' to 'idle'
|
||||||
|
* Check each device state and mark metadata if 'faulty' or 'in_sync'.
|
||||||
|
* "Deal with Degraded"
|
||||||
|
*
|
||||||
|
* deal with degraded array
|
||||||
|
* We only do this when first noticing the array is degraded.
|
||||||
|
* This can be when we first see the array, when sync completes or
|
||||||
|
* when recovery completes.
|
||||||
|
*
|
||||||
|
* Check if number of failed devices suggests recovery is needed, and
|
||||||
|
* skip if not.
|
||||||
|
* Ask metadata for a spare device
|
||||||
|
* Add device as not in_sync and give a role
|
||||||
|
* Update metadata.
|
||||||
|
* Start recovery.
|
||||||
|
*
|
||||||
|
* deal with resync
|
||||||
|
* This only happens on finding a new array....
|
||||||
|
* Maybe this is done by mdadm before passing the array to us?
|
||||||
|
*
|
||||||
|
* If array is 'clean' but metadata is 'dirty', start a resync
|
||||||
|
* and mark array as 'dirty'.
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* We wait for a change (poll/select) on array_state, sync_action, and
|
||||||
|
* each rd-X/state file.
|
||||||
|
* When we get any change, we check everything. So read each state file,
|
||||||
|
* then decide what to do.
|
||||||
|
*
|
||||||
|
* The core action is to write new metadata to all devices in the array.
|
||||||
|
* This is done at most once on any wakeup.
|
||||||
|
* After that we might:
|
||||||
|
* - update the array_state
|
||||||
|
* - set the role of some devices.
|
||||||
|
* - request a sync_action
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int read_and_act(struct active_array *a)
|
||||||
|
{
|
||||||
|
int check_degraded;
|
||||||
|
struct mdinfo *mdi;
|
||||||
|
|
||||||
|
a->next_state = bad_word;
|
||||||
|
a->next_action = bad_action;
|
||||||
|
|
||||||
|
a->curr_state = read_state(a->info.state_fd);
|
||||||
|
a->curr_action = read_action(a->action_fd);
|
||||||
|
for (mdi = a->info.devs; mdi ; mdi = mdi->next) {
|
||||||
|
mdi->next_state = 0;
|
||||||
|
mdi->curr_state = read_dev_state(mdi->state_fd);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (a->curr_state <= inactive &&
|
||||||
|
a->prev_state > inactive) {
|
||||||
|
/* array has been stopped */
|
||||||
|
get_sync_pos(a);
|
||||||
|
a->container->ss->mark_clean(a, a->sync_pos);
|
||||||
|
a->next_state = clear;
|
||||||
|
}
|
||||||
|
if (a->curr_state == write_pending) {
|
||||||
|
a->container->ss->mark_dirty(a);
|
||||||
|
a->next_state = active;
|
||||||
|
}
|
||||||
|
if (a->curr_state == active_idle) {
|
||||||
|
/* Set array to 'clean' FIRST, then
|
||||||
|
* a->ss->mark_clean(a);
|
||||||
|
* just ignore for now.
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
if (a->curr_state == readonly) {
|
||||||
|
/* Well, I'm ready to handle things, so
|
||||||
|
* read-auto is OK. FIXME what if we really want
|
||||||
|
* readonly ???
|
||||||
|
*/
|
||||||
|
a->next_state = read_auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (a->curr_action == idle &&
|
||||||
|
a->prev_action == resync) {
|
||||||
|
/* check resync_start to see if it is 'max'.
|
||||||
|
* Do I open here, or have it open the whole time?
|
||||||
|
*/
|
||||||
|
get_sync_pos(a);
|
||||||
|
check_degraded = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (a->curr_action == idle &&
|
||||||
|
a->prev_action == recover) {
|
||||||
|
for (mdi = a->info.devs ; mdi ; mdi = mdi->next) {
|
||||||
|
a->container->ss->set_disk(a, mdi->disk.raid_disk);
|
||||||
|
if (! (mdi->curr_state & DS_INSYNC))
|
||||||
|
check_degraded = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
for (mdi = a->info.devs ; mdi ; mdi = mdi->next) {
|
||||||
|
if (mdi->curr_state & DS_FAULTY) {
|
||||||
|
a->container->ss->set_disk(a, mdi->disk.raid_disk);
|
||||||
|
check_degraded = 1;
|
||||||
|
mdi->next_state = DS_REMOVE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (check_degraded) {
|
||||||
|
// FIXME;
|
||||||
|
}
|
||||||
|
|
||||||
|
a->container->ss->sync_metadata(a);
|
||||||
|
|
||||||
|
/* Effect state changes in the array */
|
||||||
|
if (a->next_state != bad_word)
|
||||||
|
write_attr(array_states[a->next_state], a->info.state_fd);
|
||||||
|
if (a->next_action != bad_action)
|
||||||
|
write_attr(sync_actions[a->next_action], a->action_fd);
|
||||||
|
for (mdi = a->info.devs; mdi ; mdi = mdi->next) {
|
||||||
|
if (mdi->next_state == DS_REMOVE)
|
||||||
|
write_attr("remove", mdi->state_fd);
|
||||||
|
if (mdi->next_state & DS_INSYNC)
|
||||||
|
write_attr("+in_sync", mdi->state_fd);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* move curr_ to prev_ */
|
||||||
|
a->prev_state = a->curr_state;
|
||||||
|
|
||||||
|
a->prev_action = a->curr_action;
|
||||||
|
|
||||||
|
for (mdi = a->info.devs; mdi ; mdi = mdi->next) {
|
||||||
|
mdi->prev_state = mdi->curr_state;
|
||||||
|
mdi->next_state = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int wait_and_act(struct active_array *aa, int pfd, int nowait)
|
||||||
|
{
|
||||||
|
fd_set rfds;
|
||||||
|
int maxfd = 0;
|
||||||
|
struct active_array *a;
|
||||||
|
int rv;
|
||||||
|
|
||||||
|
FD_ZERO(&rfds);
|
||||||
|
|
||||||
|
add_fd(&rfds, &maxfd, pfd);
|
||||||
|
for (a = aa ; a ; a = a->next) {
|
||||||
|
struct mdinfo *mdi;
|
||||||
|
|
||||||
|
add_fd(&rfds, &maxfd, a->info.state_fd);
|
||||||
|
add_fd(&rfds, &maxfd, a->action_fd);
|
||||||
|
for (mdi = a->info.devs ; mdi ; mdi = mdi->next)
|
||||||
|
add_fd(&rfds, &maxfd, mdi->state_fd);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!nowait) {
|
||||||
|
rv = select(maxfd+1, &rfds, NULL, NULL, NULL);
|
||||||
|
|
||||||
|
if (rv <= 0)
|
||||||
|
return rv;
|
||||||
|
|
||||||
|
if (FD_ISSET(pfd, &rfds)) {
|
||||||
|
char buf[4];
|
||||||
|
read(pfd, buf, 4);
|
||||||
|
; // FIXME read from the pipe
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (a = aa; a ; a = a->next) {
|
||||||
|
if (a->replaces) {
|
||||||
|
struct active_array **ap;
|
||||||
|
for (ap = &a->next; *ap && *ap != a->replaces;
|
||||||
|
ap = & (*ap)->next)
|
||||||
|
;
|
||||||
|
if (*ap)
|
||||||
|
*ap = (*ap)->next;
|
||||||
|
discard_this = a->replaces;
|
||||||
|
a->replaces = NULL;
|
||||||
|
}
|
||||||
|
rv += read_and_act(a);
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
|
||||||
|
void do_monitor(struct supertype *container)
|
||||||
|
{
|
||||||
|
int rv;
|
||||||
|
int first = 1;
|
||||||
|
do {
|
||||||
|
rv = wait_and_act(container->arrays, container->pipe[0], first);
|
||||||
|
first = 0;
|
||||||
|
} while (rv >= 0);
|
||||||
|
}
|
39
super-ddf.c
39
super-ddf.c
|
@ -27,6 +27,7 @@
|
||||||
|
|
||||||
#define HAVE_STDINT_H 1
|
#define HAVE_STDINT_H 1
|
||||||
#include "mdadm.h"
|
#include "mdadm.h"
|
||||||
|
#include "mdmon.h"
|
||||||
#include "sha1.h"
|
#include "sha1.h"
|
||||||
#include <values.h>
|
#include <values.h>
|
||||||
|
|
||||||
|
@ -416,7 +417,7 @@ struct ddf_super {
|
||||||
#define offsetof(t,f) ((size_t)&(((t*)0)->f))
|
#define offsetof(t,f) ((size_t)&(((t*)0)->f))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern struct superswitch super_ddf_container, super_ddf_bvd;
|
extern struct superswitch super_ddf_container, super_ddf_bvd, super_ddf;
|
||||||
|
|
||||||
static int calc_crc(void *buf, int len)
|
static int calc_crc(void *buf, int len)
|
||||||
{
|
{
|
||||||
|
@ -2442,6 +2443,32 @@ static int compare_super_ddf(struct supertype *st, struct supertype *tst)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int ddf_open_new(struct supertype *c, struct active_array *a, int inst)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "ddf: open_new %d\n", inst);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ddf_mark_clean(struct active_array *a, unsigned long long sync_pos)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "ddf: mark clean %llu\n", sync_pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ddf_mark_dirty(struct active_array *a)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "ddf: mark dirty\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ddf_set_disk(struct active_array *a, int n)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "ddf: set_disk %d\n", n);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ddf_sync_metadata(struct active_array *a)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "ddf: sync_metadata\n");
|
||||||
|
}
|
||||||
|
|
||||||
struct superswitch super_ddf = {
|
struct superswitch super_ddf = {
|
||||||
#ifndef MDASSEMBLE
|
#ifndef MDASSEMBLE
|
||||||
.examine_super = examine_super_ddf,
|
.examine_super = examine_super_ddf,
|
||||||
|
@ -2471,6 +2498,16 @@ struct superswitch super_ddf = {
|
||||||
.swapuuid = 0,
|
.swapuuid = 0,
|
||||||
.external = 1,
|
.external = 1,
|
||||||
.text_version = "ddf",
|
.text_version = "ddf",
|
||||||
|
|
||||||
|
/* for mdmon */
|
||||||
|
.open_new = ddf_open_new,
|
||||||
|
.load_super = load_super_ddf,
|
||||||
|
.mark_clean = ddf_mark_clean,
|
||||||
|
.mark_dirty = ddf_mark_dirty,
|
||||||
|
.set_disk = ddf_set_disk,
|
||||||
|
.sync_metadata = ddf_sync_metadata,
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Super_ddf_container is set by validate_geometry_ddf when given a
|
/* Super_ddf_container is set by validate_geometry_ddf when given a
|
||||||
|
|
29
sysfs.c
29
sysfs.c
|
@ -56,6 +56,29 @@ void sysfs_free(struct mdinfo *sra)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int sysfs_open(int devnum, char *devname, char *attr)
|
||||||
|
{
|
||||||
|
char fname[50];
|
||||||
|
char sys_name[16];
|
||||||
|
int fd;
|
||||||
|
if (devnum >= 0)
|
||||||
|
sprintf(sys_name, "md%d", devnum);
|
||||||
|
else
|
||||||
|
sprintf(sys_name, "md_d%d",
|
||||||
|
-1-devnum);
|
||||||
|
|
||||||
|
sprintf(fname, "/sys/block/%s/md/", sys_name);
|
||||||
|
if (devname) {
|
||||||
|
strcat(fname, devname);
|
||||||
|
strcat(fname, "/");
|
||||||
|
}
|
||||||
|
strcat(fname, attr);
|
||||||
|
fd = open(fname, O_RDWR);
|
||||||
|
if (fd < 0 && errno == -EACCES)
|
||||||
|
fd = open(fname, O_RDONLY);
|
||||||
|
return fd;
|
||||||
|
}
|
||||||
|
|
||||||
struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options)
|
struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options)
|
||||||
{
|
{
|
||||||
/* Longest possible name in sysfs, mounted at /sys, is
|
/* Longest possible name in sysfs, mounted at /sys, is
|
||||||
|
@ -128,6 +151,12 @@ struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options)
|
||||||
goto abort;
|
goto abort;
|
||||||
sra->array.layout = strtoul(buf, NULL, 0);
|
sra->array.layout = strtoul(buf, NULL, 0);
|
||||||
}
|
}
|
||||||
|
if (options & GET_DISKS) {
|
||||||
|
strcpy(base, "raid_disks");
|
||||||
|
if (load_sys(fname, buf))
|
||||||
|
goto abort;
|
||||||
|
sra->array.raid_disks = strtoul(buf, NULL, 0);
|
||||||
|
}
|
||||||
if (options & GET_COMPONENT) {
|
if (options & GET_COMPONENT) {
|
||||||
strcpy(base, "component_size");
|
strcpy(base, "component_size");
|
||||||
if (load_sys(fname, buf))
|
if (load_sys(fname, buf))
|
||||||
|
|
Loading…
Reference in New Issue