Merge mdmon

This commit is contained in:
Neil Brown 2008-05-15 16:48:37 +10:00
parent f7dd881f90
commit 549e9569c6
9 changed files with 1101 additions and 19 deletions

View File

@ -77,6 +77,11 @@ SRCS = mdadm.c config.c mdstat.c ReadMe.c util.c Manage.c Assemble.c Build.c \
mdopen.c super0.c super1.c super-ddf.c super-intel.c bitmap.c \
restripe.c sysfs.c sha1.c mapfile.c crc32.c sg_io.c msg.c
MON_OBJS = mdmon.o monitor.o managemon.o util.o mdstat.o sysfs.o config.o \
Kill.o sg_io.o dlink.o ReadMe.o super0.o super1.o super-intel.o \
super-ddf.o sha1.o crc32.o
STATICSRC = pwgr.c
STATICOBJS = pwgr.o
@ -88,7 +93,7 @@ ASSEMBLE_SRCS += mdopen.c mdstat.c
ASSEMBLE_FLAGS += -DMDASSEMBLE_AUTO
endif
all : mdadm mdadm.man md.man mdadm.conf.man
all : mdadm mdmon mdadm.man md.man mdadm.conf.man
everything: all mdadm.static swap_super test_stripe \
mdassemble mdassemble.static mdassemble.man \
@ -118,6 +123,9 @@ mdadm.Os : $(SRCS) mdadm.h
mdadm.O2 : $(SRCS) mdadm.h
gcc -o mdadm.O2 $(CFLAGS) -DHAVE_STDINT_H -O2 $(SRCS)
mdmon : $(MON_OBJS)
$(CC) $(LDFLAGS) -o mdmon $(MON_OBJS) $(LDLIBS)
test_stripe : restripe.c mdadm.h
$(CC) $(CXFLAGS) $(LDFLAGS) -o test_stripe -DMAIN restripe.c
@ -182,7 +190,8 @@ test: mdadm test_stripe swap_super
@echo "Please run 'sh ./test' as root"
clean :
rm -f mdadm $(OBJS) $(STATICOBJS) core *.man mdadm.tcc mdadm.uclibc mdadm.static *.orig *.porig *.rej *.alt \
rm -f mdadm mdmon $(OBJS) $(MON_OBJS) $(STATICOBJS) core *.man \
mdadm.tcc mdadm.uclibc mdadm.static *.orig *.porig *.rej *.alt \
mdadm.Os mdadm.O2 \
mdassemble mdassemble.static mdassemble.uclibc mdassemble.klibc swap_super \
init.cpio.gz mdadm.uclibc.static test_stripe

309
managemon.c Normal file
View File

@ -0,0 +1,309 @@
/*
* The management thread for monitoring active md arrays.
* This thread does things which might block such as memory
* allocation.
* In particular:
*
* - Find out about new arrays in this container.
* Allocate the data structures and open the files.
*
* For this we watch /proc/mdstat and find new arrays with
* metadata type that confirms sharing. e.g. "md4"
* When we find a new array we slip it into the list of
* arrays and signal 'monitor' by writing to a pipe.
*
* - Respond to reshape requests by allocating new data structures
* and opening new files.
*
* These come as a change to raid_disks. We allocate a new
* version of the data structures and slip it into the list.
* 'monitor' will notice and release the old version.
* Changes to level, chunksize, layout.. do not need re-allocation.
* Reductions in raid_disks don't really either, but we handle
* them the same way for consistency.
*
* - When a device is added to the container, we add it to the metadata
* as a spare.
*
* - assist with activating spares by opening relevant sysfs file.
*
* - Pass on metadata updates from external programs such as
* mdadm creating a new array.
*
* This is most-messy.
* It might involve adding a new array or changing the status of
* a spare, or any reconfig that the kernel doesn't get involved in.
*
* The required updates are received via a named pipe. There will
* be one named pipe for each container. Each message contains a
* sync marker: 0x5a5aa5a5, A byte count, and the message. This is
* passed to the metadata handler which will interpret and process it.
* For 'DDF' messages are internal data blocks with the leading
* 'magic number' signifying what sort of data it is.
*
*/
/*
* We select on /proc/mdstat and the named pipe.
* We create new arrays or updated version of arrays and slip
* them into the head of the list, then signal 'monitor' via a pipe write.
* 'monitor' will notice and place the old array on a return list.
* Metadata updates are placed on a queue just like they arrive
* from the named pipe.
*
* When new arrays are found based on correct metadata string, we
* need to identify them with an entry in the metadata. Maybe we require
* the metadata to be mdX/NN when NN is the index into an appropriate table.
*
*/
/*
* List of tasks:
* - Watch for spares to be added to the container, and write updated
* metadata to them.
* - Watch for new arrays using this container, confirm they match metadata
* and if so, start monitoring them
* - Watch for spares being added to monitored arrays. This shouldn't
* happen, as we should do all the adding. Just remove them.
* - Watch for change in raid-disks, chunk-size, etc. Update metadata and
* start a reshape.
*/
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include "mdadm.h"
#include "mdmon.h"
#include <sys/socket.h>
static void free_aa(struct active_array *aa)
{
/* Note that this doesn't close fds, as they may be in used
* by a clone. Use close_aa for that.
*/
while (aa->info.devs) {
struct mdinfo *d = aa->info.devs;
aa->info.devs = d->next;
free(d);
}
free(aa);
}
static void replace_array(struct supertype *container,
struct active_array *old,
struct active_array *new)
{
/* To replace an array, we add it to the top of the list
* marked with ->replaces to point to the original.
* 'monitor' will take the original out of the list
* and put it on 'discard_this'. We take it from there
* and discard it.
*/
while (pending_discard) {
while (discard_this == NULL)
sleep(1);
if (discard_this != pending_discard)
abort();
discard_this->next = NULL;
free_aa(discard_this);
discard_this = NULL;
pending_discard = NULL;
}
pending_discard = old;
new->replaces = old;
new->next = container->arrays;
container->arrays = new;
}
static void manage_container(struct mdstat_ent *mdstat,
struct supertype *container)
{
/* The only thing of interest here is if a new device
* has been added to the container. We add it to the
* array ignoring any metadata on it.
* FIXME should we look for compatible metadata and take hints
* about spare assignment.... probably not.
*
*/
if (mdstat->devcnt != container->devcnt) {
/* read /sys/block/NAME/md/dev-??/block/dev to find out
* what is there, and compare with container->info.devs
* To see what is removed and what is added.
* These need to be remove from, or added to, the array
*/
// FIXME
container->devcnt = mdstat->devcnt;
}
}
static void manage_member(struct mdstat_ent *mdstat,
struct active_array *a)
{
/* Compare mdstat info with known state of member array.
* We do not need to look for device state changes here, that
* is dealt with by the monitor.
*
* We just look for changes which suggest that a reshape is
* being requested.
* Unfortunately decreases in raid_disks don't show up in
* mdstat until the reshape completes FIXME.
*/
// FIXME
a->info.array.raid_disks = mdstat->raid_disks;
a->info.array.chunk_size = mdstat->chunk_size;
// MORE
}
static void write_wakeup(struct supertype *c)
{
write(c->pipe[1], "PING", 4);
}
static void manage_new(struct mdstat_ent *mdstat,
struct supertype *container)
{
/* A new array has appeared in this container.
* Hopefully it is already recorded in the metadata.
* Check, then create the new array to report it to
* the monitor.
*/
struct active_array *new;
struct mdinfo *mdi, *di;
char *n;
int inst;
int i;
new = malloc(sizeof(*new));
new->devnum = mdstat->devnum;
new->prev_state = new->curr_state = new->next_state = inactive;
new->prev_action= new->curr_action= new->next_action= idle;
new->container = container;
n = &mdstat->metadata_version[10+strlen(container->devname)+1];
inst = atoi(n);
if (inst < 0)
abort();//FIXME
mdi = sysfs_read(-1, new->devnum,
GET_LEVEL|GET_CHUNK|GET_DISKS|
GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE);
if (!mdi) {
/* Eeek. Cannot monitor this array.
* Mark it to be ignored by setting container to NULL
*/
new->container = NULL;
replace_array(container, NULL, new);
return;
}
new->info.array = mdi->array;
for (i = 0; i < new->info.array.raid_disks; i++) {
struct mdinfo *newd = malloc(sizeof(*newd));
for (di = mdi->devs; di; di = di->next)
if (i == di->disk.raid_disk)
break;
if (di) {
memcpy(newd, di, sizeof(*newd));
sprintf(newd->sys_name, "rd%d", i);
newd->state_fd = sysfs_open(new->devnum,
newd->sys_name,
"state");
newd->prev_state = read_dev_state(newd->state_fd);
newd->curr_state = newd->curr_state;
} else {
newd->state_fd = -1;
}
newd->next = new->info.devs;
new->info.devs = newd;
}
new->action_fd = sysfs_open(new->devnum, NULL, "sync_action");
new->info.state_fd = sysfs_open(new->devnum, NULL, "array_state");
new->sync_pos_fd = sysfs_open(new->devnum, NULL, "sync_completed");
new->sync_pos = 0;
// finds and compares.
if (container->ss->open_new(container, new, inst) < 0) {
// FIXME close all those files
new->container = NULL;
replace_array(container, NULL, new);
return;
}
replace_array(container, NULL, new);
write_wakeup(container);
return;
}
void manage(struct mdstat_ent *mdstat, struct active_array *aa,
struct supertype *container)
{
/* We have just read mdstat and need to compare it with
* the known active arrays.
* Arrays with the wrong metadata are ignored.
*/
for ( ; mdstat ; mdstat = mdstat->next) {
struct active_array *a;
if (mdstat->devnum == container->devnum) {
manage_container(mdstat, container);
continue;
}
if (mdstat->metadata_version == NULL ||
strncmp(mdstat->metadata_version, "external:/", 10) != 0 ||
strncmp(mdstat->metadata_version+10, container->devname,
strlen(container->devname)) != 0 ||
mdstat->metadata_version[10+strlen(container->devname)]
!= '/')
/* Not for this array */
continue;
/* Looks like a member of this container */
for (a = aa; a; a = a->next) {
if (mdstat->devnum == a->devnum) {
if (a->container)
manage_member(mdstat, a);
break;
}
}
if (a == NULL)
manage_new(mdstat, container);
}
}
void read_sock(int pfd)
{
int fd;
// FIXME set non-blocking
fd = accept(pfd, NULL, NULL);
if (fd < 0)
return;
// FIXME do something useful
close(fd);
}
void do_manager(struct supertype *container)
{
struct mdstat_ent *mdstat;
do {
mdstat = mdstat_read(1, 0);
manage(mdstat, array_list, container);
read_sock(container->sock);
mdstat_wait_fd(container->sock);
} while(1);
}

35
mdadm.h
View File

@ -159,6 +159,11 @@ struct mdinfo {
char sys_name[20];
struct mdinfo *devs;
struct mdinfo *next;
/* Device info for mdmon: */
int state_fd;
int prev_state, curr_state, next_state;
};
struct createinfo {
@ -271,12 +276,17 @@ struct mdstat_ent {
char *pattern; /* U or up, _ for down */
int percent; /* -1 if no resync */
int resync; /* 1 if resync, 0 if recovery */
int devcnt;
int raid_disks;
int chunk_size;
char * metadata_version;
struct mdstat_ent *next;
};
extern struct mdstat_ent *mdstat_read(int hold, int start);
extern void free_mdstat(struct mdstat_ent *ms);
extern void mdstat_wait(int seconds);
extern void mdstat_wait_fd(int fd);
extern int mddev_busy(int devnum);
struct map_ent {
@ -304,6 +314,7 @@ extern void map_add(struct map_ent **melp,
#define GET_CACHE 16
#define GET_MISMATCH 32
#define GET_VERSION 64
#define GET_DISKS 128
#define GET_DEVS 1024 /* gets role, major, minor */
#define GET_OFFSET 2048
@ -314,6 +325,7 @@ extern void map_add(struct map_ent **melp,
/* If fd >= 0, get the array it is open on,
* else use devnum. >=0 -> major9. <0.....
*/
extern int sysfs_open(int devnum, char *devname, char *attr);
extern void sysfs_free(struct mdinfo *sra);
extern struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options);
extern int sysfs_set_str(struct mdinfo *sra, struct mdinfo *dev,
@ -350,6 +362,7 @@ extern mapping_t r5layout[], pers[], modes[], faultylayout[];
extern char *map_dev(int major, int minor, int create);
struct active_array;
extern struct superswitch {
void (*examine_super)(struct supertype *st, char *homehost);
@ -390,6 +403,14 @@ extern struct superswitch {
struct mdinfo *(*container_content)(struct supertype *st);
/* for mdmon */
int (*open_new)(struct supertype *c, struct active_array *a, int inst);
void (*mark_clean)(struct active_array *a, unsigned long long sync_pos);
void (*mark_dirty)(struct active_array *a);
void (*set_disk)(struct active_array *a, int n);
void (*sync_metadata)(struct active_array *a);
int major;
char *text_version;
int swapuuid; /* true if uuid is bigending rather than hostendian */
@ -406,6 +427,20 @@ struct supertype {
int container_member; /* numerical position in container */
void *sb;
void *info;
/* extra stuff used by mdmon */
struct active_array *arrays;
int devfd;
int sock; /* listen to external programs */
int pipe[2]; /* communicate between threads */
int devnum;
char *devname; /* e.g. md0. This appears in metadata_verison:
* external:/md0/12
*/
int devcnt;
struct mdinfo *devs;
};
extern struct supertype supertype_container_member;

222
mdmon.c Normal file
View File

@ -0,0 +1,222 @@
/*
* md array manager.
* When md arrays have user-space managed metadata, this is the program
* that does the managing.
*
* Given one argument: the name of the array (e.g. /dev/md0) that is
* the container.
* We fork off a helper that runs high priority and mlocked. It responds to
* device failures and other events that might stop writeout, or that are
* trivial to deal with.
* The main thread then watches for new arrays being created in the container
* and starts monitoring them too ... along with a few other tasks.
*
* The main thread communicates with the priority thread by writing over
* a pipe.
* Separate programs can communicate with the main thread via Unix-domain
* socket.
* The two threads share address space and open file table.
*
*/
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <unistd.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/mman.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <fcntl.h>
#include <sched.h>
#include "mdadm.h"
#include "mdmon.h"
struct active_array *array_list;
struct active_array *discard_this;
struct active_array *pending_discard;
int run_child(void *v)
{
struct supertype *c = v;
do_monitor(c);
return 0;
}
int clone_monitor(struct supertype *container)
{
int pfd[2];
static char stack[4096];
int rv;
pipe(container->pipe);
rv = clone(run_child, stack+4096-64,
CLONE_FS|CLONE_FILES|CLONE_VM|CLONE_SIGHAND|CLONE_THREAD,
container);
if (rv < 0)
return rv;
return pfd[1];
}
static struct superswitch *find_metadata_methods(char *vers)
{
if (strcmp(vers, "ddf") == 0)
return &super_ddf;
return NULL;
}
static int make_pidfile(char *devname)
{
char path[100];
char pid[10];
int fd;
sprintf(path, "/var/run/mdadm/%s.pid", devname);
fd = open(path, O_RDWR|O_CREAT|O_EXCL, 0600);
if (fd < 0)
return -1;
sprintf(pid, "%d\n", getpid());
write(fd, pid, strlen(pid));
close(fd);
return 0;
}
static int make_control_sock(char *devname)
{
char path[100];
int sfd;
long fl;
struct sockaddr_un addr;
sprintf(path, "/var/run/mdadm/%s.sock", devname);
unlink(path);
sfd = socket(PF_LOCAL, SOCK_STREAM, 0);
if (sfd < 0)
return -1;
addr.sun_family = PF_LOCAL;
strcpy(addr.sun_path, path);
if (bind(sfd, &addr, sizeof(addr)) < 0) {
close(sfd);
return -1;
}
listen(sfd, 10);
fl = fcntl(sfd, F_GETFL, 0);
fl |= O_NONBLOCK;
fcntl(sfd, F_SETFL, fl);
return sfd;
}
int main(int argc, char *argv[])
{
int mdfd;
int pipefd;
struct mdinfo *mdi, *di;
struct supertype *container;
if (argc != 2) {
fprintf(stderr, "Usage: md-manage /device/name/for/container\n");
exit(2);
}
mdfd = open(argv[1], O_RDWR);
if (mdfd < 0) {
fprintf(stderr, "md-manage: %s: %s\n", argv[1],
strerror(errno));
exit(1);
}
if (md_get_version(mdfd) < 0) {
fprintf(stderr, "md-manage: %s: Not an md device\n",
argv[1]);
exit(1);
}
/* hopefully it is a container - we'll check later */
container = malloc(sizeof(*container));
container->devfd = mdfd;
container->devnum = fd2devnum(mdfd);
container->devname = devnum2devname(container->devnum);
/* If this fails, we hope it already exists */
mkdir("/var/run/mdadm", 0600);
/* pid file lives in /var/run/mdadm/mdXX.pid */
if (make_pidfile(container->devname) < 0) {
fprintf(stderr, "md-manage: %s already managed\n",
container->devname);
exit(3);
}
container->sock = make_control_sock(container->devname);
if (container->sock < 0) {
fprintf(stderr, "mdmon: Cannot create socket in /var/run/mdadm\n");
exit(3);
}
container->arrays = NULL;
mdi = sysfs_read(mdfd, container->devnum,
GET_VERSION|GET_LEVEL|GET_DEVS);
if (!mdi) {
fprintf(stderr, "mdmon: failed to load sysfs info for %s\n",
container->devname);
exit(3);
}
if (mdi->array.level != UnSet) {
fprintf(stderr, "mdmon: %s is not a container - cannot monitor\n",
argv[1]);
exit(3);
}
if (mdi->array.major_version != -1 ||
mdi->array.minor_version != -2) {
fprintf(stderr, "mdmon: %s does not use external metadata - cannot monitor\n",
argv[1]);
exit(3);
}
container->ss = find_metadata_methods(mdi->text_version);
if (container->ss == NULL) {
fprintf(stderr, "mdmon: %s uses unknown metadata: %s\n",
argv[1], mdi->text_version);
exit(3);
}
container->devs = NULL;
for (di = mdi->devs; di; di = di->next) {
struct mdinfo *cd = malloc(sizeof(*cd));
cd = di;
cd->next = container->devs;
container->devs = cd;
}
sysfs_free(mdi);
if (container->ss->load_super(container, mdfd, argv[1])) {
fprintf(stderr, "mdmon: Cannot load metadata for %s\n",
argv[1]);
exit(3);
}
mlockall(MCL_FUTURE);
pipefd = clone_monitor(container);
if (pipefd < 0) {
fprintf(stderr, "md-manage: failed to start monitor process: %s\n",
strerror(errno));
exit(2);
}
do_manager(container);
exit(0);
}

41
mdmon.h Normal file
View File

@ -0,0 +1,41 @@
enum array_state { clear, inactive, suspended, readonly, read_auto,
clean, active, write_pending, active_idle, bad_word};
enum sync_action { idle, reshape, resync, recover, check, repair, bad_action };
struct active_array {
struct mdinfo info;
struct supertype *container;
struct active_array *next, *replaces;
int action_fd;
int sync_pos_fd;
enum array_state prev_state, curr_state, next_state;
enum sync_action prev_action, curr_action, next_action;
int devnum;
unsigned long long sync_pos;
};
#define MD_MAJOR 9
extern struct active_array *container;
extern struct active_array *array_list;
extern struct active_array *discard_this;
extern struct active_array *pending_discard;
void do_monitor(struct supertype *container);
void do_manager(struct supertype *container);
int read_dev_state(int fd);
struct mdstat_ent *mdstat_read(int hold, int start);
extern struct superswitch super_ddf, super_ddf_bvd, super_ddf_svd;

View File

@ -86,6 +86,7 @@
#include "mdadm.h"
#include "dlink.h"
#include <sys/select.h>
#include <ctype.h>
void free_mdstat(struct mdstat_ent *ms)
{
@ -158,6 +159,10 @@ struct mdstat_ent *mdstat_read(int hold, int start)
ent->percent = -1;
ent->active = -1;
ent->resync = 0;
ent->metadata_version = NULL;
ent->raid_disks = 0;
ent->chunk_size = 0;
ent->devcnt = 0;
ent->dev = strdup(line);
ent->devnum = devnum;
@ -176,22 +181,32 @@ struct mdstat_ent *mdstat_read(int hold, int start)
in_devs = 1;
} else if (in_devs && strcmp(w, "blocks")==0)
in_devs = 0;
else if (in_devs && strncmp(w, "md", 2)==0) {
/* This has an md device as a component.
* If that device is already in the list,
* make sure we insert before there.
*/
struct mdstat_ent **ih;
int dn2;
if (strncmp(w, "md_d", 4)==0)
dn2 = -1-strtoul(w+4, &ep, 10);
else
dn2 = strtoul(w+2, &ep, 10);
ih = &all;
while (ih != insert_here && *ih &&
(*ih)->devnum != dn2)
ih = & (*ih)->next;
insert_here = ih;
else if (in_devs) {
ent->devcnt++;
if (strncmp(w, "md", 2)==0) {
/* This has an md device as a component.
* If that device is already in the
* list, make sure we insert before
* there.
*/
struct mdstat_ent **ih;
int dn2;
if (strncmp(w, "md_d", 4)==0)
dn2 = -1-strtoul(w+4, &ep, 10);
else
dn2 = strtoul(w+2, &ep, 10);
ih = &all;
while (ih != insert_here && *ih &&
(*ih)->devnum != dn2)
ih = & (*ih)->next;
insert_here = ih;
}
} else if (strcmp(w, "super") == 0 &&
dl_next(w) != line) {
w = dl_next(w);
ent->metadata_version = strdup(w);
} else if (w[0] == '[' && isdigit(w[1])) {
ent->raid_disks = atoi(w+1);
} else if (!ent->pattern &&
w[0] == '[' &&
(w[1] == 'U' || w[1] == '_')) {
@ -256,6 +271,19 @@ void mdstat_wait(int seconds)
select(mdstat_fd >2 ? mdstat_fd+1:3, NULL, NULL, &fds, &tm);
}
void mdstat_wait_fd(int fd)
{
fd_set fds, rfds;
FD_ZERO(&fds);
FD_ZERO(&rfds);
if (mdstat_fd >= 0)
FD_SET(mdstat_fd, &fds);
FD_SET(fd, &rfds);
select(mdstat_fd >2 ? mdstat_fd+1:3, &rfds, NULL, &fds, NULL);
}
int mddev_busy(int devnum)
{
struct mdstat_ent *mdstat = mdstat_read(0, 0);

372
monitor.c Normal file
View File

@ -0,0 +1,372 @@
#include "mdadm.h"
#include "mdmon.h"
#include <sys/select.h>
static char *array_states[] = {
"clear", "inactive", "suspended", "readonly", "read-auto",
"clean", "active", "write-pending", "active-idle", NULL };
static char *sync_actions[] = {
"idle", "reshape", "resync", "recover", "check", "repair", NULL
};
static int write_attr(char *attr, int fd)
{
return write(fd, attr, strlen(attr));
}
static void add_fd(fd_set *fds, int *maxfd, int fd)
{
if (fd < 0)
return;
if (fd > *maxfd)
*maxfd = fd;
FD_SET(fd, fds);
}
static int read_attr(char *buf, int len, int fd)
{
int n;
if (fd < 0) {
buf[0] = 0;
return 0;
}
lseek(fd, 0, 0);
n = read(fd, buf, len - 1);
if (n <= 0) {
buf[0] = 0;
return 0;
}
buf[n] = 0;
if (buf[n-1] == '\n')
buf[n-1] = 0;
return n;
}
static int get_sync_pos(struct active_array *a)
{
char buf[30];
int n;
n = read_attr(buf, 30, a->sync_pos_fd);
if (n <= 0)
return n;
if (strncmp(buf, "max", 3) == 0) {
a->sync_pos = ~(unsigned long long)0;
return 1;
}
a->sync_pos = strtoull(buf, NULL, 10);
return 1;
}
static int attr_match(const char *attr, const char *str)
{
/* See if attr, read from a sysfs file, matches
* str. They must either be the same, or attr can
* have a trailing newline or comma
*/
while (*attr && *str && *attr == *str) {
attr++;
str++;
}
if (*str || (*attr && *attr != ',' && *attr != '\n'))
return 0;
return 1;
}
static int match_word(const char *word, char **list)
{
int n;
for (n=0; list[n]; n++)
if (attr_match(word, list[n]))
break;
return n;
}
static enum array_state read_state(int fd)
{
char buf[20];
int n = read_attr(buf, 20, fd);
if (n <= 0)
return bad_word;
return (enum array_state) match_word(buf, array_states);
}
static enum sync_action read_action( int fd)
{
char buf[20];
int n = read_attr(buf, 20, fd);
if (n <= 0)
return bad_action;
return (enum sync_action) match_word(buf, sync_actions);
}
#define DS_FAULTY 1
#define DS_INSYNC 2
#define DS_WRITE_MOSTLY 4
#define DS_SPARE 8
#define DS_REMOVE 1024
int read_dev_state(int fd)
{
char buf[60];
int n = read_attr(buf, 60, fd);
char *cp;
int rv = 0;
if (n <= 0)
return 0;
cp = buf;
while (cp) {
if (attr_match("faulty", cp))
rv |= DS_FAULTY;
if (attr_match("in_sync", cp))
rv |= DS_INSYNC;
if (attr_match("write_mostly", cp))
rv |= DS_WRITE_MOSTLY;
if (attr_match("spare", cp))
rv |= DS_SPARE;
cp = strchr(cp, ',');
if (cp)
cp++;
}
return rv;
}
/* Monitor a set of active md arrays - all of which share the
* same metadata - and respond to events that require
* metadata update.
*
* New arrays are detected by another thread which allocates
* required memory and attaches the data structure to our list.
*
* Events:
* Array stops.
* This is detected by array_state going to 'clear' or 'inactive'.
* while we thought it was active.
* Response is to mark metadata as clean and 'clear' the array(??)
* write-pending
* array_state if 'write-pending'
* We mark metadata as 'dirty' then set array to 'active'.
* active_idle
* Either ignore, or mark clean, then mark metadata as clean.
*
* device fails
* detected by rd-N/state reporting "faulty"
* mark device as 'failed' in metadata, the remove device
* by writing 'remove' to rd/state.
*
* sync completes
* sync_action was 'resync' and becomes 'idle' and resync_start becomes
* MaxSector
* Notify metadata that sync is complete.
* "Deal with Degraded"
*
* recovery completes
* sync_action changes from 'recover' to 'idle'
* Check each device state and mark metadata if 'faulty' or 'in_sync'.
* "Deal with Degraded"
*
* deal with degraded array
* We only do this when first noticing the array is degraded.
* This can be when we first see the array, when sync completes or
* when recovery completes.
*
* Check if number of failed devices suggests recovery is needed, and
* skip if not.
* Ask metadata for a spare device
* Add device as not in_sync and give a role
* Update metadata.
* Start recovery.
*
* deal with resync
* This only happens on finding a new array....
* Maybe this is done by mdadm before passing the array to us?
*
* If array is 'clean' but metadata is 'dirty', start a resync
* and mark array as 'dirty'.
*
*
*
*
* We wait for a change (poll/select) on array_state, sync_action, and
* each rd-X/state file.
* When we get any change, we check everything. So read each state file,
* then decide what to do.
*
* The core action is to write new metadata to all devices in the array.
* This is done at most once on any wakeup.
* After that we might:
* - update the array_state
* - set the role of some devices.
* - request a sync_action
*
*/
static int read_and_act(struct active_array *a)
{
int check_degraded;
struct mdinfo *mdi;
a->next_state = bad_word;
a->next_action = bad_action;
a->curr_state = read_state(a->info.state_fd);
a->curr_action = read_action(a->action_fd);
for (mdi = a->info.devs; mdi ; mdi = mdi->next) {
mdi->next_state = 0;
mdi->curr_state = read_dev_state(mdi->state_fd);
}
if (a->curr_state <= inactive &&
a->prev_state > inactive) {
/* array has been stopped */
get_sync_pos(a);
a->container->ss->mark_clean(a, a->sync_pos);
a->next_state = clear;
}
if (a->curr_state == write_pending) {
a->container->ss->mark_dirty(a);
a->next_state = active;
}
if (a->curr_state == active_idle) {
/* Set array to 'clean' FIRST, then
* a->ss->mark_clean(a);
* just ignore for now.
*/
}
if (a->curr_state == readonly) {
/* Well, I'm ready to handle things, so
* read-auto is OK. FIXME what if we really want
* readonly ???
*/
a->next_state = read_auto;
}
if (a->curr_action == idle &&
a->prev_action == resync) {
/* check resync_start to see if it is 'max'.
* Do I open here, or have it open the whole time?
*/
get_sync_pos(a);
check_degraded = 1;
}
if (a->curr_action == idle &&
a->prev_action == recover) {
for (mdi = a->info.devs ; mdi ; mdi = mdi->next) {
a->container->ss->set_disk(a, mdi->disk.raid_disk);
if (! (mdi->curr_state & DS_INSYNC))
check_degraded = 1;
}
}
for (mdi = a->info.devs ; mdi ; mdi = mdi->next) {
if (mdi->curr_state & DS_FAULTY) {
a->container->ss->set_disk(a, mdi->disk.raid_disk);
check_degraded = 1;
mdi->next_state = DS_REMOVE;
}
}
if (check_degraded) {
// FIXME;
}
a->container->ss->sync_metadata(a);
/* Effect state changes in the array */
if (a->next_state != bad_word)
write_attr(array_states[a->next_state], a->info.state_fd);
if (a->next_action != bad_action)
write_attr(sync_actions[a->next_action], a->action_fd);
for (mdi = a->info.devs; mdi ; mdi = mdi->next) {
if (mdi->next_state == DS_REMOVE)
write_attr("remove", mdi->state_fd);
if (mdi->next_state & DS_INSYNC)
write_attr("+in_sync", mdi->state_fd);
}
/* move curr_ to prev_ */
a->prev_state = a->curr_state;
a->prev_action = a->curr_action;
for (mdi = a->info.devs; mdi ; mdi = mdi->next) {
mdi->prev_state = mdi->curr_state;
mdi->next_state = 0;
}
return 1;
}
static int wait_and_act(struct active_array *aa, int pfd, int nowait)
{
fd_set rfds;
int maxfd = 0;
struct active_array *a;
int rv;
FD_ZERO(&rfds);
add_fd(&rfds, &maxfd, pfd);
for (a = aa ; a ; a = a->next) {
struct mdinfo *mdi;
add_fd(&rfds, &maxfd, a->info.state_fd);
add_fd(&rfds, &maxfd, a->action_fd);
for (mdi = a->info.devs ; mdi ; mdi = mdi->next)
add_fd(&rfds, &maxfd, mdi->state_fd);
}
if (!nowait) {
rv = select(maxfd+1, &rfds, NULL, NULL, NULL);
if (rv <= 0)
return rv;
if (FD_ISSET(pfd, &rfds)) {
char buf[4];
read(pfd, buf, 4);
; // FIXME read from the pipe
}
}
for (a = aa; a ; a = a->next) {
if (a->replaces) {
struct active_array **ap;
for (ap = &a->next; *ap && *ap != a->replaces;
ap = & (*ap)->next)
;
if (*ap)
*ap = (*ap)->next;
discard_this = a->replaces;
a->replaces = NULL;
}
rv += read_and_act(a);
}
return rv;
}
void do_monitor(struct supertype *container)
{
int rv;
int first = 1;
do {
rv = wait_and_act(container->arrays, container->pipe[0], first);
first = 0;
} while (rv >= 0);
}

View File

@ -27,6 +27,7 @@
#define HAVE_STDINT_H 1
#include "mdadm.h"
#include "mdmon.h"
#include "sha1.h"
#include <values.h>
@ -416,7 +417,7 @@ struct ddf_super {
#define offsetof(t,f) ((size_t)&(((t*)0)->f))
#endif
extern struct superswitch super_ddf_container, super_ddf_bvd;
extern struct superswitch super_ddf_container, super_ddf_bvd, super_ddf;
static int calc_crc(void *buf, int len)
{
@ -2442,6 +2443,32 @@ static int compare_super_ddf(struct supertype *st, struct supertype *tst)
return 0;
}
static int ddf_open_new(struct supertype *c, struct active_array *a, int inst)
{
fprintf(stderr, "ddf: open_new %d\n", inst);
return 0;
}
static void ddf_mark_clean(struct active_array *a, unsigned long long sync_pos)
{
fprintf(stderr, "ddf: mark clean %llu\n", sync_pos);
}
static void ddf_mark_dirty(struct active_array *a)
{
fprintf(stderr, "ddf: mark dirty\n");
}
static void ddf_set_disk(struct active_array *a, int n)
{
fprintf(stderr, "ddf: set_disk %d\n", n);
}
static void ddf_sync_metadata(struct active_array *a)
{
fprintf(stderr, "ddf: sync_metadata\n");
}
struct superswitch super_ddf = {
#ifndef MDASSEMBLE
.examine_super = examine_super_ddf,
@ -2471,6 +2498,16 @@ struct superswitch super_ddf = {
.swapuuid = 0,
.external = 1,
.text_version = "ddf",
/* for mdmon */
.open_new = ddf_open_new,
.load_super = load_super_ddf,
.mark_clean = ddf_mark_clean,
.mark_dirty = ddf_mark_dirty,
.set_disk = ddf_set_disk,
.sync_metadata = ddf_sync_metadata,
};
/* Super_ddf_container is set by validate_geometry_ddf when given a

29
sysfs.c
View File

@ -56,6 +56,29 @@ void sysfs_free(struct mdinfo *sra)
}
}
int sysfs_open(int devnum, char *devname, char *attr)
{
char fname[50];
char sys_name[16];
int fd;
if (devnum >= 0)
sprintf(sys_name, "md%d", devnum);
else
sprintf(sys_name, "md_d%d",
-1-devnum);
sprintf(fname, "/sys/block/%s/md/", sys_name);
if (devname) {
strcat(fname, devname);
strcat(fname, "/");
}
strcat(fname, attr);
fd = open(fname, O_RDWR);
if (fd < 0 && errno == -EACCES)
fd = open(fname, O_RDONLY);
return fd;
}
struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options)
{
/* Longest possible name in sysfs, mounted at /sys, is
@ -128,6 +151,12 @@ struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options)
goto abort;
sra->array.layout = strtoul(buf, NULL, 0);
}
if (options & GET_DISKS) {
strcpy(base, "raid_disks");
if (load_sys(fname, buf))
goto abort;
sra->array.raid_disks = strtoul(buf, NULL, 0);
}
if (options & GET_COMPONENT) {
strcpy(base, "component_size");
if (load_sys(fname, buf))