Initial bitmap support

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
This commit is contained in:
Neil Brown 2005-06-07 23:16:35 +00:00
parent a3fd117c7a
commit c82f047cfc
10 changed files with 765 additions and 13 deletions

View File

@ -531,6 +531,13 @@ int Assemble(struct supertype *st, char *mddev, int mdfd,
mddev, strerror(errno)); mddev, strerror(errno));
return 1; return 1;
} }
if (ident->bitmap_fd) {
if (ioctl(mdfd, SET_BITMAP_FILE, ident->bitmap_fd) != 0) {
fprintf(stderr, Name ": SET_BITMAP_FILE failed.\n");
return 1;
}
}
/* First, add the raid disks, but add the chosen one last */ /* First, add the raid disks, but add the chosen one last */
for (i=0; i<= bestcnt; i++) { for (i=0; i<= bestcnt; i++) {
int j; int j;

34
Build.c
View File

@ -35,7 +35,8 @@
int Build(char *mddev, int mdfd, int chunk, int level, int layout, int Build(char *mddev, int mdfd, int chunk, int level, int layout,
int raiddisks, int raiddisks,
mddev_dev_t devlist, int assume_clean) mddev_dev_t devlist, int assume_clean,
char *bitmap_file, int bitmap_chunk, int delay)
{ {
/* Build a linear or raid0 arrays without superblocks /* Build a linear or raid0 arrays without superblocks
* We cannot really do any checks, we just do it. * We cannot really do any checks, we just do it.
@ -56,6 +57,7 @@ int Build(char *mddev, int mdfd, int chunk, int level, int layout,
struct stat stb; struct stat stb;
int subdevs = 0; int subdevs = 0;
mddev_dev_t dv; mddev_dev_t dv;
int bitmap_fd;
/* scan all devices, make sure they really are block devices */ /* scan all devices, make sure they really are block devices */
for (dv = devlist; dv; dv=dv->next) { for (dv = devlist; dv; dv=dv->next) {
@ -135,6 +137,9 @@ int Build(char *mddev, int mdfd, int chunk, int level, int layout,
mddev, strerror(errno)); mddev, strerror(errno));
return 1; return 1;
} }
} else if (bitmap_file) {
fprintf(stderr, Name ": bitmaps not supported with this kernel\n");
return 1;
} }
/* now add the devices */ /* now add the devices */
for ((i=0), (dv = devlist) ; dv ; i++, dv=dv->next) { for ((i=0), (dv = devlist) ; dv ; i++, dv=dv->next) {
@ -171,6 +176,33 @@ int Build(char *mddev, int mdfd, int chunk, int level, int layout,
/* now to start it */ /* now to start it */
if (vers >= 9000) { if (vers >= 9000) {
mdu_param_t param; /* not used by syscall */ mdu_param_t param; /* not used by syscall */
if (bitmap_file) {
bitmap_fd = open(bitmap_file, O_RDWR);
if (bitmap_fd < 0) {
if (bitmap_chunk == UnSet) {
fprintf(stderr, Name ": %s cannot be openned.",
bitmap_file);
return 1;
}
if (CreateBitmap(bitmap_file, 1, NULL, bitmap_chunk,
delay, 0/* FIXME size */)) {
return 1;
}
bitmap_fd = open(bitmap_file, O_RDWR);
if (bitmap_fd < 0) {
fprintf(stderr, Name ": %s cannot be openned.",
bitmap_file);
return 1;
}
}
if (bitmap_fd >= 0) {
if (ioctl(mdfd, SET_BITMAP_FILE, bitmap_fd) < 0) {
fprintf(stderr, Name ": Cannot set bitmap file for %s: %s\n",
mddev, strerror(errno));
return 1;
}
}
}
if (ioctl(mdfd, RUN_ARRAY, &param)) { if (ioctl(mdfd, RUN_ARRAY, &param)) {
fprintf(stderr, Name ": RUN_ARRAY failed: %s\n", fprintf(stderr, Name ": RUN_ARRAY failed: %s\n",
strerror(errno)); strerror(errno));

View File

@ -34,7 +34,8 @@
int Create(struct supertype *st, char *mddev, int mdfd, int Create(struct supertype *st, char *mddev, int mdfd,
int chunk, int level, int layout, unsigned long size, int raiddisks, int sparedisks, int chunk, int level, int layout, unsigned long size, int raiddisks, int sparedisks,
int subdevs, mddev_dev_t devlist, int subdevs, mddev_dev_t devlist,
int runstop, int verbose, int force) int runstop, int verbose, int force,
char *bitmap_file, int bitmap_chunk, int delay)
{ {
/* /*
* Create a new raid array. * Create a new raid array.
@ -66,6 +67,7 @@ int Create(struct supertype *st, char *mddev, int mdfd,
int pass; int pass;
int vers; int vers;
int rv; int rv;
int bitmap_fd;
mdu_array_info_t array; mdu_array_info_t array;
@ -358,6 +360,26 @@ int Create(struct supertype *st, char *mddev, int mdfd,
return 1; return 1;
} }
if (bitmap_file) {
int uuid[4];
st->ss->uuid_from_super(uuid, super);
if (CreateBitmap(bitmap_file, force, (char*)uuid, bitmap_chunk, delay,
array.size*2ULL /* FIXME wrong for raid10 */)) {
return 1;
}
bitmap_fd = open(bitmap_file, O_RDWR);
if (bitmap_fd < 0) {
fprintf(stderr, Name ": weird: %s cannot be openned\n",
bitmap_file);
return 1;
}
if (ioctl(mdfd, SET_BITMAP_FILE, bitmap_fd) < 0) {
fprintf(stderr, Name ": Cannot set bitmap file for %s: %s\n",
mddev, strerror(errno));
return 1;
}
}
for (pass=1; pass <=2 ; pass++) { for (pass=1; pass <=2 ; pass++) {

View File

@ -58,9 +58,11 @@ MAN5DIR = $(MANDIR)/man5
MAN8DIR = $(MANDIR)/man8 MAN8DIR = $(MANDIR)/man8
OBJS = mdadm.o config.o mdstat.o ReadMe.o util.o Manage.o Assemble.o Build.o \ OBJS = mdadm.o config.o mdstat.o ReadMe.o util.o Manage.o Assemble.o Build.o \
Create.o Detail.o Examine.o Grow.o Monitor.o dlink.o Kill.o Query.o mdopen.o super0.o super1.o Create.o Detail.o Examine.o Grow.o Monitor.o dlink.o Kill.o Query.o \
mdopen.o super0.o super1.o bitmap.o
SRCS = mdadm.c config.c mdstat.c ReadMe.c util.c Manage.c Assemble.c Build.c \ SRCS = mdadm.c config.c mdstat.c ReadMe.c util.c Manage.c Assemble.c Build.c \
Create.c Detail.c Examine.c Grow.c Monitor.c dlink.c Kill.c Query.c mdopen.c super0.c super1.c Create.c Detail.c Examine.c Grow.c Monitor.c dlink.c Kill.c Query.c \
mdopen.c super0.c super1.c bitmap.c
ASSEMBLE_SRCS := mdassemble.c Assemble.c config.c dlink.c util.c super0.c super1.c ASSEMBLE_SRCS := mdassemble.c Assemble.c config.c dlink.c util.c super0.c super1.c
ifdef MDASSEMBLE_AUTO ifdef MDASSEMBLE_AUTO
@ -114,7 +116,7 @@ md.man : md.4
mdadm.conf.man : mdadm.conf.5 mdadm.conf.man : mdadm.conf.5
nroff -man mdadm.conf.5 > mdadm.conf.man nroff -man mdadm.conf.5 > mdadm.conf.man
$(OBJS) : mdadm.h $(OBJS) : mdadm.h bitmap.h
install : mdadm mdadm.8 md.4 mdadm.conf.5 install : mdadm mdadm.8 md.4 mdadm.conf.5
$(INSTALL) -D $(STRIP) -m 755 mdadm $(DESTDIR)$(BINDIR)/mdadm $(INSTALL) -D $(STRIP) -m 755 mdadm $(DESTDIR)$(BINDIR)/mdadm

View File

@ -91,7 +91,7 @@ char Version[] = Name " - v1.11.0 - 11 April 2005\n";
* At the time if writing, there is only minimal support. * At the time if writing, there is only minimal support.
*/ */
char short_options[]="-ABCDEFGQhVvbc:i:l:p:m:n:x:u:c:d:z:U:sa::rfRSow1te:"; char short_options[]="-ABCDEFGQhVXvb::c:i:l:p:m:n:x:u:c:d:z:U:sa::rfRSow1te:";
struct option long_options[] = { struct option long_options[] = {
{"manage", 0, 0, '@'}, {"manage", 0, 0, '@'},
{"misc", 0, 0, '#'}, {"misc", 0, 0, '#'},
@ -104,6 +104,7 @@ struct option long_options[] = {
{"grow", 0, 0, 'G'}, {"grow", 0, 0, 'G'},
{"zero-superblock", 0, 0, 'K'}, /* deliberately no a short_option */ {"zero-superblock", 0, 0, 'K'}, /* deliberately no a short_option */
{"query", 0, 0, 'Q'}, {"query", 0, 0, 'Q'},
{"examine-bitmap", 0, 0, 'X'},
/* synonyms */ /* synonyms */
{"monitor", 0, 0, 'F'}, {"monitor", 0, 0, 'F'},
@ -125,9 +126,11 @@ struct option long_options[] = {
{"spare-disks",1,0, 'x'}, {"spare-disks",1,0, 'x'},
{"spare-devices",1,0, 'x'}, {"spare-devices",1,0, 'x'},
{"size", 1, 0, 'z'}, {"size", 1, 0, 'z'},
{"auto", 2, 0, 'a'}, /* also for --assemble */ {"auto", 1, 0, 'a'}, /* also for --assemble */
{"assume-clean",0,0, 3 }, {"assume-clean",0,0, 3 },
{"metadata", 1, 0, 'e'}, /* superblock format */ {"metadata", 1, 0, 'e'}, /* superblock format */
{"bitmap", 1, 0, 'b'},
{"bitmap-chunk", 1, 0, 4},
/* For assemble */ /* For assemble */
{"uuid", 1, 0, 'u'}, {"uuid", 1, 0, 'u'},
@ -188,6 +191,7 @@ char Help[] =
char OptionHelp[] = char OptionHelp[] =
"Any parameter that does not start with '-' is treated as a device name\n" "Any parameter that does not start with '-' is treated as a device name\n"
"or, for --examine-bitmap, a file name.\n"
"The first such name is often the name of an md device. Subsequent\n" "The first such name is often the name of an md device. Subsequent\n"
"names are often names of component devices." "names are often names of component devices."
"\n" "\n"
@ -205,6 +209,7 @@ char OptionHelp[] =
" --create -C : Create a new array\n" " --create -C : Create a new array\n"
" --detail -D : Display details of an array\n" " --detail -D : Display details of an array\n"
" --examine -E : Examine superblock on an array component\n" " --examine -E : Examine superblock on an array component\n"
" --examine-bitmap -X: Display the detail of a bitmap file\n"
" --monitor -F : monitor (follow) some arrays\n" " --monitor -F : monitor (follow) some arrays\n"
" --query -Q : Display general information about how a\n" " --query -Q : Display general information about how a\n"
" device relates to the md driver\n" " device relates to the md driver\n"
@ -212,6 +217,7 @@ char OptionHelp[] =
/* /*
"\n" "\n"
" For create or build:\n" " For create or build:\n"
" --bitmap= -b : File to store bitmap in - may pre-exist for --build\n"
" --chunk= -c : chunk size of kibibytes\n" " --chunk= -c : chunk size of kibibytes\n"
" --rounding= : rounding factor for linear array (==chunk size)\n" " --rounding= : rounding factor for linear array (==chunk size)\n"
" --level= -l : raid level: 0,1,4,5,6,linear,mp. 0 or linear for build\n" " --level= -l : raid level: 0,1,4,5,6,linear,mp. 0 or linear for build\n"
@ -224,8 +230,11 @@ char OptionHelp[] =
" : insert a missing drive for RAID5.\n" " : insert a missing drive for RAID5.\n"
" --auto(=p) -a : Automatically allocate new (partitioned) md array if needed.\n" " --auto(=p) -a : Automatically allocate new (partitioned) md array if needed.\n"
" --assume-clean : Assume the array is already in-sync. This is dangerous.\n" " --assume-clean : Assume the array is already in-sync. This is dangerous.\n"
" --bitmap-chunk= : chunksize of bitmap in bitmap file (Kilobytes)\n"
" --delay= -d : seconds between bitmap updates\n"
"\n" "\n"
" For assemble:\n" " For assemble:\n"
" --bitmap= -b : File to find bitmap information in\n"
" --uuid= -u : uuid of array to assemble. Devices which don't\n" " --uuid= -u : uuid of array to assemble. Devices which don't\n"
" have this uuid are excluded\n" " have this uuid are excluded\n"
" --super-minor= -m : minor number to look for in super-block when\n" " --super-minor= -m : minor number to look for in super-block when\n"
@ -280,6 +289,7 @@ char Help_create[] =
" other levels.\n" " other levels.\n"
"\n" "\n"
" Options that are valid with --create (-C) are:\n" " Options that are valid with --create (-C) are:\n"
" --bitmap= : Create a bitmap for the array with the given filename\n"
" --chunk= -c : chunk size of kibibytes\n" " --chunk= -c : chunk size of kibibytes\n"
" --rounding= : rounding factor for linear array (==chunk size)\n" " --rounding= : rounding factor for linear array (==chunk size)\n"
" --level= -l : raid level: 0,1,4,5,6,linear,multipath and synonyms\n" " --level= -l : raid level: 0,1,4,5,6,linear,multipath and synonyms\n"
@ -293,6 +303,8 @@ char Help_create[] =
" --run -R : insist of running the array even if not all\n" " --run -R : insist of running the array even if not all\n"
" : devices are present or some look odd.\n" " : devices are present or some look odd.\n"
" --readonly -o : start the array readonly - not supported yet.\n" " --readonly -o : start the array readonly - not supported yet.\n"
" --bitmap-chunk= : bitmap chunksize in Kilobytes.\n"
" --delay= -d : bitmap update delay in seconds.\n"
"\n" "\n"
; ;
@ -308,10 +320,13 @@ char Help_build[] =
" The level may only be 0, raid0, or linear.\n" " The level may only be 0, raid0, or linear.\n"
" All devices must be listed and the array will be started once complete.\n" " All devices must be listed and the array will be started once complete.\n"
" Options that are valid with --build (-B) are:\n" " Options that are valid with --build (-B) are:\n"
" --bitmap= : file to store/find bitmap information in.\n"
" --chunk= -c : chunk size of kibibytes\n" " --chunk= -c : chunk size of kibibytes\n"
" --rounding= : rounding factor for linear array (==chunk size)\n" " --rounding= : rounding factor for linear array (==chunk size)\n"
" --level= -l : 0, raid0, or linear\n" " --level= -l : 0, raid0, or linear\n"
" --raid-devices= -n : number of active devices in array\n" " --raid-devices= -n : number of active devices in array\n"
" --bitmap-chunk= : bitmap chunksize in Kilobytes.\n"
" --delay= -d : bitmap update delay in seconds.\n"
; ;
char Help_assemble[] = char Help_assemble[] =
@ -347,6 +362,7 @@ char Help_assemble[] =
" and components are determined from the config file.\n" " and components are determined from the config file.\n"
"\n" "\n"
"Options that are valid with --assemble (-A) are:\n" "Options that are valid with --assemble (-A) are:\n"
" --bitmap= : bitmap file to use wit the array\n"
" --uuid= -u : uuid of array to assemble. Devices which don't\n" " --uuid= -u : uuid of array to assemble. Devices which don't\n"
" have this uuid are excluded\n" " have this uuid are excluded\n"
" --super-minor= -m : minor number to look for in super-block when\n" " --super-minor= -m : minor number to look for in super-block when\n"
@ -393,6 +409,7 @@ char Help_misc[] =
" device relates to the md driver\n" " device relates to the md driver\n"
" --detail -D : Display details of an array\n" " --detail -D : Display details of an array\n"
" --examine -E : Examine superblock on an array component\n" " --examine -E : Examine superblock on an array component\n"
" --examine-bitmap -X: Display contents of a bitmap file\n"
" --zero-superblock : erase the MD superblock from a device.\n" " --zero-superblock : erase the MD superblock from a device.\n"
" --run -R : start a partially built array\n" " --run -R : start a partially built array\n"
" --stop -S : deactivate array, releasing all resources\n" " --stop -S : deactivate array, releasing all resources\n"

315
bitmap.c Normal file
View File

@ -0,0 +1,315 @@
/*
* mdadm - manage Linux "md" devices aka RAID arrays.
*
* Copyright (C) 2004 Paul Clements, SteelEye Technology, Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <sys/types.h>
#include <sys/stat.h>
#include "mdadm.h"
#include <asm/byteorder.h>
#define min(a,b) (((a) < (b)) ? (a) : (b))
inline void sb_le_to_cpu(bitmap_super_t *sb)
{
sb->magic = __le32_to_cpu(sb->magic);
sb->version = __le32_to_cpu(sb->version);
/* uuid gets no translation */
sb->events = __le64_to_cpu(sb->events);
sb->events_cleared = __le64_to_cpu(sb->events_cleared);
sb->state = __le32_to_cpu(sb->state);
sb->chunksize = __le32_to_cpu(sb->chunksize);
sb->daemon_sleep = __le32_to_cpu(sb->daemon_sleep);
sb->sync_size = __le64_to_cpu(sb->sync_size);
}
inline void sb_cpu_to_le(bitmap_super_t *sb)
{
sb_le_to_cpu(sb); /* these are really the same thing */
}
mapping_t bitmap_states[] = {
{ "OK", 0 },
{ "Out of date", 2 },
{ NULL, -1 }
};
const char *bitmap_state(int state_num)
{
char *state = map_num(bitmap_states, state_num);
return state ? state : "Unknown";
}
const char *human_chunksize(unsigned long bytes)
{
static char buf[16];
char *suffixes[] = { "B", "KB", "MB", "GB", "TB", NULL };
int i = 0;
while (bytes >> 10) {
bytes >>= 10;
i++;
}
sprintf(buf, "%lu %s", bytes, suffixes[i]);
return buf;
}
typedef struct bitmap_info_s {
bitmap_super_t sb;
unsigned long long total_bits;
unsigned long long dirty_bits;
} bitmap_info_t;
/* count the dirty bits in the first num_bits of byte */
inline int count_dirty_bits_byte(char byte, int num_bits)
{
int num = 0;
switch (num_bits) { /* fall through... */
case 8: if (byte & 128) num++;
case 7: if (byte & 64) num++;
case 6: if (byte & 32) num++;
case 5: if (byte & 16) num++;
case 4: if (byte & 8) num++;
case 3: if (byte & 4) num++;
case 2: if (byte & 2) num++;
case 1: if (byte & 1) num++;
default: break;
}
return num;
}
int count_dirty_bits(char *buf, int num_bits)
{
int i, num = 0;
for (i=0; i < num_bits / 8; i++)
num += count_dirty_bits_byte(buf[i], 8);
if (num_bits % 8) /* not an even byte boundary */
num += count_dirty_bits_byte(buf[i], num_bits % 8);
return num;
}
/* calculate the size of the bitmap given the array size and bitmap chunksize */
unsigned long long bitmap_bits(unsigned long long array_size,
unsigned long chunksize)
{
return (array_size * 512 + chunksize - 1) / chunksize;
}
bitmap_info_t *bitmap_fd_read(int fd, int brief)
{
unsigned long long total_bits = 0, read_bits = 0, dirty_bits = 0;
bitmap_info_t *info;
char buf[512];
int n;
info = malloc(sizeof(*info));
if (info == NULL) {
fprintf(stderr, Name ": failed to allocate %d bytes\n",
sizeof(*info));
return NULL;
}
if (read(fd, &info->sb, sizeof(info->sb)) != sizeof(info->sb)) {
fprintf(stderr, Name ": failed to read superblock of bitmap "
"file: %s\n", strerror(errno));
free(info);
return NULL;
}
sb_le_to_cpu(&info->sb); /* convert superblock to CPU byte ordering */
if (brief || info->sb.sync_size == 0)
goto out;
/* read the rest of the file counting total bits and dirty bits --
* we stop when either:
* 1) we hit EOF, in which case we assume the rest of the bits (if any)
* are dirty
* 2) we've read the full bitmap, in which case we ignore any trailing
* data in the file
*/
total_bits = bitmap_bits(info->sb.sync_size, info->sb.chunksize);
while ((n = read(fd, buf, sizeof(*buf))) > 0) {
unsigned long long remaining = total_bits - read_bits;
if (remaining > sizeof(*buf) * 8) /* we want the full buffer */
remaining = sizeof(*buf) * 8;
if (remaining > n * 8) /* the file is truncated */
remaining = n * 8;
dirty_bits += count_dirty_bits(buf, remaining);
read_bits += remaining;
if (read_bits >= total_bits) /* we've got what we want */
break;
}
if (read_bits < total_bits) { /* file truncated... */
fprintf(stderr, Name ": WARNING: bitmap file is not large "
"enough for array size %llu!\n\n", info->sb.sync_size);
total_bits = read_bits;
}
out:
info->total_bits = total_bits;
info->dirty_bits = dirty_bits;
return info;
}
bitmap_info_t *bitmap_file_read(char *filename, int brief)
{
int fd;
bitmap_info_t *info;
fd = open(filename, O_RDONLY);
if (fd < 0) {
fprintf(stderr, Name ": failed to open bitmap file %s: %s\n",
filename, strerror(errno));
return NULL;
}
info = bitmap_fd_read(fd, brief);
close(fd);
return info;
}
int ExamineBitmap(char *filename, int brief)
{
/*
* Read the bitmap file and display its contents
*/
bitmap_super_t *sb;
bitmap_info_t *info;
int rv = 1;
info = bitmap_file_read(filename, brief);
if (!info)
return rv;
sb = &info->sb;
printf(" Filename : %s\n", filename);
printf(" Magic : %08x\n", sb->magic);
if (sb->magic != BITMAP_MAGIC) {
fprintf(stderr, Name ": invalid bitmap magic 0x%x, the bitmap file appears to be corrupted\n", sb->magic);
}
printf(" Version : %d\n", sb->version);
if (sb->version != BITMAP_MAJOR) {
fprintf(stderr, Name ": unknown bitmap version %d, either the bitmap file is corrupted or you need to upgrade your tools\n", sb->version);
goto free_info;
}
rv = 0;
printf(" UUID : %08x.%08x.%08x.%08x\n",
*(__u32 *)(sb->uuid+0),
*(__u32 *)(sb->uuid+4),
*(__u32 *)(sb->uuid+8),
*(__u32 *)(sb->uuid+12));
printf(" Events : %llu\n", sb->events);
printf(" Events Cleared : %llu\n", sb->events_cleared);
printf(" State : %s\n", bitmap_state(sb->state));
printf(" Chunksize : %s\n", human_chunksize(sb->chunksize));
printf(" Daemon : %ds flush period\n", sb->daemon_sleep);
printf(" Sync Size : %llu%s\n", sb->sync_size,
human_size(sb->sync_size * 1024));
if (brief)
goto free_info;
printf(" Bitmap : %llu bits (chunks), %llu dirty (%2.1f%%)\n",
info->total_bits, info->dirty_bits,
100.0 * info->dirty_bits / (info->total_bits + 1));
free_info:
free(info);
return rv;
}
int CreateBitmap(char *filename, int force, char uuid[16],
unsigned long chunksize, unsigned long daemon_sleep,
unsigned long long array_size)
{
/*
* Create a bitmap file with a superblock and (optionally) a full bitmap
*/
FILE *fp;
int rv = 1;
char block[512];
bitmap_super_t sb;
long long bytes, filesize;
if (!force && access(filename, F_OK) == 0) {
fprintf(stderr, Name ": bitmap file %s already exists, use --force to overwrite\n", filename);
return rv;
}
fp = fopen(filename, "w");
if (fp == NULL) {
fprintf(stderr, Name ": failed to open bitmap file %s: %s\n",
filename, strerror(errno));
return rv;
}
memset(&sb, 0, sizeof(sb));
sb.magic = BITMAP_MAGIC;
sb.version = BITMAP_MAJOR;
if (uuid != NULL)
memcpy(sb.uuid, uuid, 16);
sb.chunksize = chunksize;
sb.daemon_sleep = daemon_sleep;
sb.sync_size = array_size;
sb_cpu_to_le(&sb); /* convert to on-disk byte ordering */
if (fwrite(&sb, sizeof(sb), 1, fp) != 1) {
fprintf(stderr, Name ": failed to write superblock to bitmap file %s: %s\n", filename, strerror(errno));
goto out;
}
/* calculate the size of the bitmap and write it to disk */
bytes = (bitmap_bits(array_size, chunksize) + 7) / 8;
if (!bytes) {
rv = 0;
goto out;
}
filesize = bytes + sizeof(sb);
memset(block, 0xff, sizeof(block));
while (bytes > 0) {
if (fwrite(block, sizeof(block), 1, fp) != 1) {
fprintf(stderr, Name ": failed to write bitmap file %s: %s\n", filename, strerror(errno));
goto out;
}
bytes -= sizeof(block);
}
rv = 0;
/* make the file be the right size (well, to the nearest byte) */
ftruncate(fileno(fp), filesize);
out:
fclose(fp);
if (rv)
unlink(filename); /* possibly corrupted, better get rid of it */
return rv;
}

273
bitmap.h Normal file
View File

@ -0,0 +1,273 @@
/*
* bitmap.h: Copyright (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
*
* additions: Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.
*/
#ifndef BITMAP_H
#define BITMAP_H 1
#define BITMAP_MAJOR 3
#define BITMAP_MINOR 38
/*
* in-memory bitmap:
*
* Use 16 bit block counters to track pending writes to each "chunk".
* The 2 high order bits are special-purpose, the first is a flag indicating
* whether a resync is needed. The second is a flag indicating whether a
* resync is active.
* This means that the counter is actually 14 bits:
*
* +--------+--------+------------------------------------------------+
* | resync | resync | counter |
* | needed | active | |
* | (0-1) | (0-1) | (0-16383) |
* +--------+--------+------------------------------------------------+
*
* The "resync needed" bit is set when:
* a '1' bit is read from storage at startup.
* a write request fails on some drives
* a resync is aborted on a chunk with 'resync active' set
* It is cleared (and resync-active set) when a resync starts across all drives
* of the chunk.
*
*
* The "resync active" bit is set when:
* a resync is started on all drives, and resync_needed is set.
* resync_needed will be cleared (as long as resync_active wasn't already set).
* It is cleared when a resync completes.
*
* The counter counts pending write requests, plus the on-disk bit.
* When the counter is '1' and the resync bits are clear, the on-disk
* bit can be cleared aswell, thus setting the counter to 0.
* When we set a bit, or in the counter (to start a write), if the fields is
* 0, we first set the disk bit and set the counter to 1.
*
* Also, we'll hijack the "map" pointer itself and use it as two 16 bit block
* counters as a fallback when "page" memory cannot be allocated:
*
* Normal case (page memory allocated):
*
* page pointer (32-bit)
*
* [ ] ------+
* |
* +-------> [ ][ ]..[ ] (4096 byte page == 2048 counters)
* c1 c2 c2048
*
* Hijacked case (page memory allocation failed):
*
* hijacked page pointer (32-bit)
*
* [ ][ ] (no page memory allocated)
* counter #1 (16-bit) counter #2 (16-bit)
*
*/
#ifdef __KERNEL__
#define PAGE_BITS (PAGE_SIZE << 3)
#define PAGE_BIT_SHIFT (PAGE_SHIFT + 3)
typedef __u16 bitmap_counter_t;
#define COUNTER_BITS 16
#define COUNTER_BIT_SHIFT 4
#define COUNTER_BYTE_RATIO (COUNTER_BITS / 8)
#define COUNTER_BYTE_SHIFT (COUNTER_BIT_SHIFT - 3)
#define NEEDED_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 1)))
#define RESYNC_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 2)))
#define COUNTER_MAX ((bitmap_counter_t) RESYNC_MASK - 1)
#define NEEDED(x) (((bitmap_counter_t) x) & NEEDED_MASK)
#define RESYNC(x) (((bitmap_counter_t) x) & RESYNC_MASK)
#define COUNTER(x) (((bitmap_counter_t) x) & COUNTER_MAX)
/* how many counters per page? */
#define PAGE_COUNTER_RATIO (PAGE_BITS / COUNTER_BITS)
/* same, except a shift value for more efficient bitops */
#define PAGE_COUNTER_SHIFT (PAGE_BIT_SHIFT - COUNTER_BIT_SHIFT)
/* same, except a mask value for more efficient bitops */
#define PAGE_COUNTER_MASK (PAGE_COUNTER_RATIO - 1)
#define BITMAP_BLOCK_SIZE 512
#define BITMAP_BLOCK_SHIFT 9
/* how many blocks per chunk? (this is variable) */
#define CHUNK_BLOCK_RATIO(bitmap) ((bitmap)->chunksize >> BITMAP_BLOCK_SHIFT)
#define CHUNK_BLOCK_SHIFT(bitmap) ((bitmap)->chunkshift - BITMAP_BLOCK_SHIFT)
#define CHUNK_BLOCK_MASK(bitmap) (CHUNK_BLOCK_RATIO(bitmap) - 1)
/* when hijacked, the counters and bits represent even larger "chunks" */
/* there will be 1024 chunks represented by each counter in the page pointers */
#define PAGEPTR_BLOCK_RATIO(bitmap) \
(CHUNK_BLOCK_RATIO(bitmap) << PAGE_COUNTER_SHIFT >> 1)
#define PAGEPTR_BLOCK_SHIFT(bitmap) \
(CHUNK_BLOCK_SHIFT(bitmap) + PAGE_COUNTER_SHIFT - 1)
#define PAGEPTR_BLOCK_MASK(bitmap) (PAGEPTR_BLOCK_RATIO(bitmap) - 1)
/*
* on-disk bitmap:
*
* Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
* file a page at a time. There's a superblock at the start of the file.
*/
/* map chunks (bits) to file pages - offset by the size of the superblock */
#define CHUNK_BIT_OFFSET(chunk) ((chunk) + (sizeof(bitmap_super_t) << 3))
#endif
/*
* bitmap structures:
*/
#define BITMAP_MAGIC 0x6d746962
/* use these for bitmap->flags and bitmap->sb->state bit-fields */
enum bitmap_state {
BITMAP_ACTIVE = 0x001, /* the bitmap is in use */
BITMAP_STALE = 0x002 /* the bitmap file is out of date or had -EIO */
};
/* the superblock at the front of the bitmap file -- little endian */
typedef struct bitmap_super_s {
__u32 magic; /* 0 BITMAP_MAGIC */
__u32 version; /* 4 the bitmap major for now, could change... */
__u8 uuid[16]; /* 8 128 bit uuid - must match md device uuid */
__u64 events; /* 24 event counter for the bitmap (1)*/
__u64 events_cleared;/*32 event counter when last bit cleared (2) */
__u64 sync_size; /* 40 the size of the md device's sync range(3) */
__u32 state; /* 48 bitmap state information */
__u32 chunksize; /* 52 the bitmap chunk size in bytes */
__u32 daemon_sleep; /* 56 seconds between disk flushes */
__u8 pad[4096 - 60]; /* set to zero */
} bitmap_super_t;
/* notes:
* (1) This event counter is updated before the eventcounter in the md superblock
* When a bitmap is loaded, it is only accepted if this event counter is equal
* to, or one greater than, the event counter in the superblock.
* (2) This event counter is updated when the other one is *if*and*only*if* the
* array is not degraded. As bits are not cleared when the array is degraded,
* this represents the last time that any bits were cleared.
* If a device is being added that has an event count with this value or
* higher, it is accepted as conforming to the bitmap.
* (3)This is the number of sectors represented by the bitmap, and is the range that
* resync happens across. For raid1 and raid5/6 it is the size of individual
* devices. For raid10 it is the size of the array.
*/
#ifdef __KERNEL__
/* the in-memory bitmap is represented by bitmap_pages */
struct bitmap_page {
/*
* map points to the actual memory page
*/
char *map;
/*
* in emergencies (when map cannot be alloced), hijack the map
* pointer and use it as two counters itself
*/
unsigned int hijacked;
/*
* count of dirty bits on the page
*/
int count;
};
/* keep track of bitmap file pages that have pending writes on them */
struct page_list {
struct list_head list;
struct page *page;
};
/* the main bitmap structure - one per mddev */
struct bitmap {
struct bitmap_page *bp;
unsigned long pages; /* total number of pages in the bitmap */
unsigned long missing_pages; /* number of pages not yet allocated */
mddev_t *mddev; /* the md device that the bitmap is for */
int counter_bits; /* how many bits per block counter */
/* bitmap chunksize -- how much data does each bit represent? */
unsigned long chunksize;
unsigned long chunkshift; /* chunksize = 2^chunkshift (for bitops) */
unsigned long chunks; /* total number of data chunks for the array */
/* We hold a count on the chunk currently being synced, and drop
* it when the last block is started. If the resync is aborted
* midway, we need to be able to drop that count, so we remember
* the counted chunk..
*/
unsigned long syncchunk;
__u64 events_cleared;
/* bitmap spinlock */
spinlock_t lock;
struct file *file; /* backing disk file */
struct page *sb_page; /* cached copy of the bitmap file superblock */
struct page **filemap; /* list of cache pages for the file */
unsigned long *filemap_attr; /* attributes associated w/ filemap pages */
unsigned long file_pages; /* number of pages in the file */
unsigned long flags;
/*
* the bitmap daemon - periodically wakes up and sweeps the bitmap
* file, cleaning up bits and flushing out pages to disk as necessary
*/
mdk_thread_t *daemon;
unsigned long daemon_sleep; /* how many seconds between updates? */
/*
* bitmap write daemon - this daemon performs writes to the bitmap file
* this thread is only needed because of a limitation in ext3 (jbd)
* that does not allow a task to have two journal transactions ongoing
* simultaneously (even if the transactions are for two different
* filesystems) -- in the case of bitmap, that would be the filesystem
* that the bitmap file resides on and the filesystem that is mounted
* on the md device -- see current->journal_info in jbd/transaction.c
*/
mdk_thread_t *write_daemon;
mdk_thread_t *writeback_daemon;
spinlock_t write_lock;
struct semaphore write_ready;
struct semaphore write_done;
unsigned long writes_pending;
wait_queue_head_t write_wait;
struct list_head write_pages;
struct list_head complete_pages;
mempool_t *write_pool;
};
/* the bitmap API */
/* these are used only by md/bitmap */
int bitmap_create(mddev_t *mddev);
void bitmap_destroy(mddev_t *mddev);
int bitmap_active(struct bitmap *bitmap);
char *file_path(struct file *file, char *buf, int count);
void bitmap_print_sb(struct bitmap *bitmap);
int bitmap_update_sb(struct bitmap *bitmap);
int bitmap_setallbits(struct bitmap *bitmap);
/* these are exported */
void bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors);
void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors,
int success);
int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks);
void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted);
void bitmap_close_sync(struct bitmap *bitmap);
int bitmap_unplug(struct bitmap *bitmap);
#endif
#endif

7
md_u.h
View File

@ -23,6 +23,7 @@
#define GET_DISK_INFO _IOR (MD_MAJOR, 0x12, mdu_disk_info_t) #define GET_DISK_INFO _IOR (MD_MAJOR, 0x12, mdu_disk_info_t)
#define PRINT_RAID_DEBUG _IO (MD_MAJOR, 0x13) #define PRINT_RAID_DEBUG _IO (MD_MAJOR, 0x13)
#define RAID_AUTORUN _IO (MD_MAJOR, 0x14) #define RAID_AUTORUN _IO (MD_MAJOR, 0x14)
#define GET_BITMAP_FILE _IOR (MD_MAJOR, 0x15, mdu_bitmap_file_t)
/* configuration */ /* configuration */
#define CLEAR_ARRAY _IO (MD_MAJOR, 0x20) #define CLEAR_ARRAY _IO (MD_MAJOR, 0x20)
@ -35,6 +36,7 @@
#define PROTECT_ARRAY _IO (MD_MAJOR, 0x27) #define PROTECT_ARRAY _IO (MD_MAJOR, 0x27)
#define HOT_ADD_DISK _IO (MD_MAJOR, 0x28) #define HOT_ADD_DISK _IO (MD_MAJOR, 0x28)
#define SET_DISK_FAULTY _IO (MD_MAJOR, 0x29) #define SET_DISK_FAULTY _IO (MD_MAJOR, 0x29)
#define SET_BITMAP_FILE _IOW (MD_MAJOR, 0x2b, int)
/* usage */ /* usage */
#define RUN_ARRAY _IOW (MD_MAJOR, 0x30, mdu_param_t) #define RUN_ARRAY _IOW (MD_MAJOR, 0x30, mdu_param_t)
@ -105,6 +107,11 @@ typedef struct mdu_start_info_s {
} mdu_start_info_t; } mdu_start_info_t;
typedef struct mdu_bitmap_file_s
{
char pathname[4096];
} mdu_bitmap_file_t;
typedef struct mdu_param_s typedef struct mdu_param_s
{ {
int personality; /* 1,2,3,4 */ int personality; /* 1,2,3,4 */

72
mdadm.c
View File

@ -25,6 +25,9 @@
* The University of New South Wales * The University of New South Wales
* Sydney, 2052 * Sydney, 2052
* Australia * Australia
*
* Additions for bitmap and async RAID options, Copyright (C) 2003-2004,
* Paul Clements, SteelEye Technology, Inc.
*/ */
#include "mdadm.h" #include "mdadm.h"
@ -56,6 +59,9 @@ int main(int argc, char *argv[])
char devmode = 0; char devmode = 0;
int runstop = 0; int runstop = 0;
int readonly = 0; int readonly = 0;
int bitmap_fd = -1;
char *bitmap_file = NULL;
int bitmap_chunk = UnSet;
int SparcAdjust = 0; int SparcAdjust = 0;
mddev_dev_t devlist = NULL; mddev_dev_t devlist = NULL;
mddev_dev_t *devlistend = & devlist; mddev_dev_t *devlistend = & devlist;
@ -95,6 +101,7 @@ int main(int argc, char *argv[])
ident.spare_group = NULL; ident.spare_group = NULL;
ident.autof = 0; ident.autof = 0;
ident.st = NULL; ident.st = NULL;
ident.bitmap_fd = -1;
while ((option_index = -1) , while ((option_index = -1) ,
(opt=getopt_long(argc, argv, (opt=getopt_long(argc, argv,
@ -128,7 +135,10 @@ int main(int argc, char *argv[])
case 'v': verbose = 1; case 'v': verbose = 1;
continue; continue;
case 'b': brief = 1; case 'b':
if (mode == ASSEMBLE || mode == BUILD || mode == CREATE)
break; /* b means bitmap */
brief = 1;
continue; continue;
case ':': case ':':
@ -159,6 +169,7 @@ int main(int argc, char *argv[])
case '#': case '#':
case 'D': case 'D':
case 'E': case 'E':
case 'X':
case 'Q': newmode = MISC; break; case 'Q': newmode = MISC; break;
case 'R': case 'R':
case 'S': case 'S':
@ -574,6 +585,8 @@ int main(int argc, char *argv[])
continue; continue;
case O(MONITOR,'d'): /* delay in seconds */ case O(MONITOR,'d'): /* delay in seconds */
case O(BUILD,'d'): /* delay for bitmap updates */
case O(CREATE,'d'):
if (delay) if (delay)
fprintf(stderr, Name ": only specify delay once. %s ignored.\n", fprintf(stderr, Name ": only specify delay once. %s ignored.\n",
optarg); optarg);
@ -655,6 +668,7 @@ int main(int argc, char *argv[])
case O(MISC,'K'): case O(MISC,'K'):
case O(MISC,'R'): case O(MISC,'R'):
case O(MISC,'S'): case O(MISC,'S'):
case O(MISC,'X'):
case O(MISC,'o'): case O(MISC,'o'):
case O(MISC,'w'): case O(MISC,'w'):
if (devmode && devmode != opt && if (devmode && devmode != opt &&
@ -676,6 +690,36 @@ int main(int argc, char *argv[])
} }
SparcAdjust = 1; SparcAdjust = 1;
continue; continue;
case O(ASSEMBLE,'b'): /* here we simply set the bitmap file */
if (!optarg) {
fprintf(stderr, Name ": bitmap file needed with -b in --assemble mode\n");
exit(2);
}
bitmap_fd = open(optarg, O_RDWR);
if (!*optarg || bitmap_fd < 0) {
fprintf(stderr, Name ": cannot open bitmap file %s: %s\n", optarg, strerror(errno));
exit(2);
}
ident.bitmap_fd = bitmap_fd; /* for Assemble */
continue;
case O(BUILD,'b'):
case O(CREATE,'b'): /* here we create the bitmap */
bitmap_file = optarg;
continue;
case O(BUILD,4):
case O(CREATE,4): /* bitmap chunksize */
bitmap_chunk = strtol(optarg, &c, 10);
if (!optarg[0] || *c || bitmap_chunk < 0 ||
bitmap_chunk & (bitmap_chunk - 1)) {
fprintf(stderr, Name ": invalid bitmap chunksize: %s\n",
optarg);
exit(2);
}
/* convert K to B, chunk of 0K means 512B */
bitmap_chunk = bitmap_chunk ? bitmap_chunk * 1024 : 512;
continue;
} }
/* We have now processed all the valid options. Anything else is /* We have now processed all the valid options. Anything else is
* an error * an error
@ -726,6 +770,7 @@ int main(int argc, char *argv[])
} }
} }
rv = 0; rv = 0;
switch(mode) { switch(mode) {
case MANAGE: case MANAGE:
@ -813,9 +858,27 @@ int main(int argc, char *argv[])
} }
break; break;
case BUILD: case BUILD:
rv = Build(devlist->devname, mdfd, chunk, level, layout, raiddisks, devlist->next, assume_clean); if (bitmap_chunk == UnSet) bitmap_chunk = DEFAULT_BITMAP_CHUNK;
if (delay == 0) delay = DEFAULT_BITMAP_DELAY;
if (bitmap_file) {
bitmap_fd = open(bitmap_file, O_RDWR,0);
if (bitmap_fd < 0 && errno != ENOENT) {
perror(Name ": cannot create bitmap file");
rv |= 1;
break;
}
if (bitmap_fd < 0) {
bitmap_fd = CreateBitmap(bitmap_file, force, NULL,
bitmap_chunk, delay, size);
}
}
rv = Build(devlist->devname, mdfd, chunk, level, layout,
raiddisks, devlist->next, assume_clean,
bitmap_file, bitmap_chunk, delay);
break; break;
case CREATE: case CREATE:
if (bitmap_chunk == UnSet) bitmap_chunk = DEFAULT_BITMAP_CHUNK;
if (delay == 0) delay = DEFAULT_BITMAP_DELAY;
if (ss == NULL) { if (ss == NULL) {
for(i=0; !ss && superlist[i]; i++) for(i=0; !ss && superlist[i]; i++)
ss = superlist[i]->match_metadata_desc("default"); ss = superlist[i]->match_metadata_desc("default");
@ -827,7 +890,8 @@ int main(int argc, char *argv[])
rv = Create(ss, devlist->devname, mdfd, chunk, level, layout, size<0 ? 0 : size, rv = Create(ss, devlist->devname, mdfd, chunk, level, layout, size<0 ? 0 : size,
raiddisks, sparedisks, raiddisks, sparedisks,
devs_found-1, devlist->next, runstop, verbose, force); devs_found-1, devlist->next, runstop, verbose, force,
bitmap_file, bitmap_chunk, delay);
break; break;
case MISC: case MISC:
@ -891,6 +955,8 @@ int main(int argc, char *argv[])
rv |= Kill(dv->devname, force); continue; rv |= Kill(dv->devname, force); continue;
case 'Q': case 'Q':
rv |= Query(dv->devname); continue; rv |= Query(dv->devname); continue;
case 'X':
rv |= ExamineBitmap(dv->devname, brief); continue;
} }
mdfd = open_mddev(dv->devname, 0); mdfd = open_mddev(dv->devname, 0);
if (mdfd>=0) { if (mdfd>=0) {

15
mdadm.h
View File

@ -61,9 +61,12 @@ char *strncpy(char *dest, const char *src, size_t n) __THROW;
#define BLKGETSIZE64 _IOR(0x12,114,size_t) /* return device size in bytes (u64 *arg) */ #define BLKGETSIZE64 _IOR(0x12,114,size_t) /* return device size in bytes (u64 *arg) */
#endif #endif
#define DEFAULT_BITMAP_CHUNK 4096
#define DEFAULT_BITMAP_DELAY 5
#include "md_u.h" #include "md_u.h"
#include "md_p.h" #include "md_p.h"
#include "bitmap.h"
/* general information that might be extracted from a superblock */ /* general information that might be extracted from a superblock */
struct mdinfo { struct mdinfo {
@ -119,6 +122,7 @@ typedef struct mddev_ident_s {
struct supertype *st; struct supertype *st;
int autof; /* 1 for normal, 2 for partitioned */ int autof; /* 1 for normal, 2 for partitioned */
char *spare_group; char *spare_group;
int bitmap_fd;
struct mddev_ident_s *next; struct mddev_ident_s *next;
} *mddev_ident_t; } *mddev_ident_t;
@ -212,13 +216,15 @@ extern int Assemble(struct supertype *st, char *mddev, int mdfd,
extern int Build(char *mddev, int mdfd, int chunk, int level, int layout, extern int Build(char *mddev, int mdfd, int chunk, int level, int layout,
int raiddisks, int raiddisks,
mddev_dev_t devlist, int assume_clean); mddev_dev_t devlist, int assume_clean,
char *bitmap_file, int bitmap_chunk, int delay);
extern int Create(struct supertype *st, char *mddev, int mdfd, extern int Create(struct supertype *st, char *mddev, int mdfd,
int chunk, int level, int layout, unsigned long size, int raiddisks, int sparedisks, int chunk, int level, int layout, unsigned long size, int raiddisks, int sparedisks,
int subdevs, mddev_dev_t devlist, int subdevs, mddev_dev_t devlist,
int runstop, int verbose, int force); int runstop, int verbose, int force,
char *bitmap_file, int bitmap_chunk, int delay);
extern int Detail(char *dev, int brief, int test); extern int Detail(char *dev, int brief, int test);
extern int Query(char *dev); extern int Query(char *dev);
@ -231,6 +237,11 @@ extern int Monitor(mddev_dev_t devlist,
extern int Kill(char *dev, int force); extern int Kill(char *dev, int force);
extern int CreateBitmap(char *filename, int force, char uuid[16],
unsigned long chunksize, unsigned long daemon_sleep,
unsigned long long array_size);
extern int ExamineBitmap(char *filename, int brief);
extern int md_get_version(int fd); extern int md_get_version(int fd);
extern int get_linux_version(void); extern int get_linux_version(void);
extern int parse_uuid(char *str, int uuid[4]); extern int parse_uuid(char *str, int uuid[4]);