Initial bitmap support

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
This commit is contained in:
Neil Brown 2005-06-07 23:16:35 +00:00
parent a3fd117c7a
commit c82f047cfc
10 changed files with 765 additions and 13 deletions

View File

@ -531,6 +531,13 @@ int Assemble(struct supertype *st, char *mddev, int mdfd,
mddev, strerror(errno));
return 1;
}
if (ident->bitmap_fd) {
if (ioctl(mdfd, SET_BITMAP_FILE, ident->bitmap_fd) != 0) {
fprintf(stderr, Name ": SET_BITMAP_FILE failed.\n");
return 1;
}
}
/* First, add the raid disks, but add the chosen one last */
for (i=0; i<= bestcnt; i++) {
int j;

34
Build.c
View File

@ -35,7 +35,8 @@
int Build(char *mddev, int mdfd, int chunk, int level, int layout,
int raiddisks,
mddev_dev_t devlist, int assume_clean)
mddev_dev_t devlist, int assume_clean,
char *bitmap_file, int bitmap_chunk, int delay)
{
/* Build a linear or raid0 arrays without superblocks
* We cannot really do any checks, we just do it.
@ -56,6 +57,7 @@ int Build(char *mddev, int mdfd, int chunk, int level, int layout,
struct stat stb;
int subdevs = 0;
mddev_dev_t dv;
int bitmap_fd;
/* scan all devices, make sure they really are block devices */
for (dv = devlist; dv; dv=dv->next) {
@ -135,6 +137,9 @@ int Build(char *mddev, int mdfd, int chunk, int level, int layout,
mddev, strerror(errno));
return 1;
}
} else if (bitmap_file) {
fprintf(stderr, Name ": bitmaps not supported with this kernel\n");
return 1;
}
/* now add the devices */
for ((i=0), (dv = devlist) ; dv ; i++, dv=dv->next) {
@ -171,6 +176,33 @@ int Build(char *mddev, int mdfd, int chunk, int level, int layout,
/* now to start it */
if (vers >= 9000) {
mdu_param_t param; /* not used by syscall */
if (bitmap_file) {
bitmap_fd = open(bitmap_file, O_RDWR);
if (bitmap_fd < 0) {
if (bitmap_chunk == UnSet) {
fprintf(stderr, Name ": %s cannot be openned.",
bitmap_file);
return 1;
}
if (CreateBitmap(bitmap_file, 1, NULL, bitmap_chunk,
delay, 0/* FIXME size */)) {
return 1;
}
bitmap_fd = open(bitmap_file, O_RDWR);
if (bitmap_fd < 0) {
fprintf(stderr, Name ": %s cannot be openned.",
bitmap_file);
return 1;
}
}
if (bitmap_fd >= 0) {
if (ioctl(mdfd, SET_BITMAP_FILE, bitmap_fd) < 0) {
fprintf(stderr, Name ": Cannot set bitmap file for %s: %s\n",
mddev, strerror(errno));
return 1;
}
}
}
if (ioctl(mdfd, RUN_ARRAY, &param)) {
fprintf(stderr, Name ": RUN_ARRAY failed: %s\n",
strerror(errno));

View File

@ -34,7 +34,8 @@
int Create(struct supertype *st, char *mddev, int mdfd,
int chunk, int level, int layout, unsigned long size, int raiddisks, int sparedisks,
int subdevs, mddev_dev_t devlist,
int runstop, int verbose, int force)
int runstop, int verbose, int force,
char *bitmap_file, int bitmap_chunk, int delay)
{
/*
* Create a new raid array.
@ -66,6 +67,7 @@ int Create(struct supertype *st, char *mddev, int mdfd,
int pass;
int vers;
int rv;
int bitmap_fd;
mdu_array_info_t array;
@ -358,6 +360,26 @@ int Create(struct supertype *st, char *mddev, int mdfd,
return 1;
}
if (bitmap_file) {
int uuid[4];
st->ss->uuid_from_super(uuid, super);
if (CreateBitmap(bitmap_file, force, (char*)uuid, bitmap_chunk, delay,
array.size*2ULL /* FIXME wrong for raid10 */)) {
return 1;
}
bitmap_fd = open(bitmap_file, O_RDWR);
if (bitmap_fd < 0) {
fprintf(stderr, Name ": weird: %s cannot be openned\n",
bitmap_file);
return 1;
}
if (ioctl(mdfd, SET_BITMAP_FILE, bitmap_fd) < 0) {
fprintf(stderr, Name ": Cannot set bitmap file for %s: %s\n",
mddev, strerror(errno));
return 1;
}
}
for (pass=1; pass <=2 ; pass++) {

View File

@ -58,9 +58,11 @@ MAN5DIR = $(MANDIR)/man5
MAN8DIR = $(MANDIR)/man8
OBJS = mdadm.o config.o mdstat.o ReadMe.o util.o Manage.o Assemble.o Build.o \
Create.o Detail.o Examine.o Grow.o Monitor.o dlink.o Kill.o Query.o mdopen.o super0.o super1.o
Create.o Detail.o Examine.o Grow.o Monitor.o dlink.o Kill.o Query.o \
mdopen.o super0.o super1.o bitmap.o
SRCS = mdadm.c config.c mdstat.c ReadMe.c util.c Manage.c Assemble.c Build.c \
Create.c Detail.c Examine.c Grow.c Monitor.c dlink.c Kill.c Query.c mdopen.c super0.c super1.c
Create.c Detail.c Examine.c Grow.c Monitor.c dlink.c Kill.c Query.c \
mdopen.c super0.c super1.c bitmap.c
ASSEMBLE_SRCS := mdassemble.c Assemble.c config.c dlink.c util.c super0.c super1.c
ifdef MDASSEMBLE_AUTO
@ -114,7 +116,7 @@ md.man : md.4
mdadm.conf.man : mdadm.conf.5
nroff -man mdadm.conf.5 > mdadm.conf.man
$(OBJS) : mdadm.h
$(OBJS) : mdadm.h bitmap.h
install : mdadm mdadm.8 md.4 mdadm.conf.5
$(INSTALL) -D $(STRIP) -m 755 mdadm $(DESTDIR)$(BINDIR)/mdadm

View File

@ -91,7 +91,7 @@ char Version[] = Name " - v1.11.0 - 11 April 2005\n";
* At the time if writing, there is only minimal support.
*/
char short_options[]="-ABCDEFGQhVvbc:i:l:p:m:n:x:u:c:d:z:U:sa::rfRSow1te:";
char short_options[]="-ABCDEFGQhVXvb::c:i:l:p:m:n:x:u:c:d:z:U:sa::rfRSow1te:";
struct option long_options[] = {
{"manage", 0, 0, '@'},
{"misc", 0, 0, '#'},
@ -104,6 +104,7 @@ struct option long_options[] = {
{"grow", 0, 0, 'G'},
{"zero-superblock", 0, 0, 'K'}, /* deliberately no a short_option */
{"query", 0, 0, 'Q'},
{"examine-bitmap", 0, 0, 'X'},
/* synonyms */
{"monitor", 0, 0, 'F'},
@ -125,9 +126,11 @@ struct option long_options[] = {
{"spare-disks",1,0, 'x'},
{"spare-devices",1,0, 'x'},
{"size", 1, 0, 'z'},
{"auto", 2, 0, 'a'}, /* also for --assemble */
{"auto", 1, 0, 'a'}, /* also for --assemble */
{"assume-clean",0,0, 3 },
{"metadata", 1, 0, 'e'}, /* superblock format */
{"bitmap", 1, 0, 'b'},
{"bitmap-chunk", 1, 0, 4},
/* For assemble */
{"uuid", 1, 0, 'u'},
@ -188,6 +191,7 @@ char Help[] =
char OptionHelp[] =
"Any parameter that does not start with '-' is treated as a device name\n"
"or, for --examine-bitmap, a file name.\n"
"The first such name is often the name of an md device. Subsequent\n"
"names are often names of component devices."
"\n"
@ -205,6 +209,7 @@ char OptionHelp[] =
" --create -C : Create a new array\n"
" --detail -D : Display details of an array\n"
" --examine -E : Examine superblock on an array component\n"
" --examine-bitmap -X: Display the detail of a bitmap file\n"
" --monitor -F : monitor (follow) some arrays\n"
" --query -Q : Display general information about how a\n"
" device relates to the md driver\n"
@ -212,6 +217,7 @@ char OptionHelp[] =
/*
"\n"
" For create or build:\n"
" --bitmap= -b : File to store bitmap in - may pre-exist for --build\n"
" --chunk= -c : chunk size of kibibytes\n"
" --rounding= : rounding factor for linear array (==chunk size)\n"
" --level= -l : raid level: 0,1,4,5,6,linear,mp. 0 or linear for build\n"
@ -224,8 +230,11 @@ char OptionHelp[] =
" : insert a missing drive for RAID5.\n"
" --auto(=p) -a : Automatically allocate new (partitioned) md array if needed.\n"
" --assume-clean : Assume the array is already in-sync. This is dangerous.\n"
" --bitmap-chunk= : chunksize of bitmap in bitmap file (Kilobytes)\n"
" --delay= -d : seconds between bitmap updates\n"
"\n"
" For assemble:\n"
" --bitmap= -b : File to find bitmap information in\n"
" --uuid= -u : uuid of array to assemble. Devices which don't\n"
" have this uuid are excluded\n"
" --super-minor= -m : minor number to look for in super-block when\n"
@ -280,6 +289,7 @@ char Help_create[] =
" other levels.\n"
"\n"
" Options that are valid with --create (-C) are:\n"
" --bitmap= : Create a bitmap for the array with the given filename\n"
" --chunk= -c : chunk size of kibibytes\n"
" --rounding= : rounding factor for linear array (==chunk size)\n"
" --level= -l : raid level: 0,1,4,5,6,linear,multipath and synonyms\n"
@ -293,6 +303,8 @@ char Help_create[] =
" --run -R : insist of running the array even if not all\n"
" : devices are present or some look odd.\n"
" --readonly -o : start the array readonly - not supported yet.\n"
" --bitmap-chunk= : bitmap chunksize in Kilobytes.\n"
" --delay= -d : bitmap update delay in seconds.\n"
"\n"
;
@ -308,10 +320,13 @@ char Help_build[] =
" The level may only be 0, raid0, or linear.\n"
" All devices must be listed and the array will be started once complete.\n"
" Options that are valid with --build (-B) are:\n"
" --bitmap= : file to store/find bitmap information in.\n"
" --chunk= -c : chunk size of kibibytes\n"
" --rounding= : rounding factor for linear array (==chunk size)\n"
" --level= -l : 0, raid0, or linear\n"
" --raid-devices= -n : number of active devices in array\n"
" --raid-devices= -n : number of active devices in array\n"
" --bitmap-chunk= : bitmap chunksize in Kilobytes.\n"
" --delay= -d : bitmap update delay in seconds.\n"
;
char Help_assemble[] =
@ -347,6 +362,7 @@ char Help_assemble[] =
" and components are determined from the config file.\n"
"\n"
"Options that are valid with --assemble (-A) are:\n"
" --bitmap= : bitmap file to use wit the array\n"
" --uuid= -u : uuid of array to assemble. Devices which don't\n"
" have this uuid are excluded\n"
" --super-minor= -m : minor number to look for in super-block when\n"
@ -393,6 +409,7 @@ char Help_misc[] =
" device relates to the md driver\n"
" --detail -D : Display details of an array\n"
" --examine -E : Examine superblock on an array component\n"
" --examine-bitmap -X: Display contents of a bitmap file\n"
" --zero-superblock : erase the MD superblock from a device.\n"
" --run -R : start a partially built array\n"
" --stop -S : deactivate array, releasing all resources\n"

315
bitmap.c Normal file
View File

@ -0,0 +1,315 @@
/*
* mdadm - manage Linux "md" devices aka RAID arrays.
*
* Copyright (C) 2004 Paul Clements, SteelEye Technology, Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <sys/types.h>
#include <sys/stat.h>
#include "mdadm.h"
#include <asm/byteorder.h>
#define min(a,b) (((a) < (b)) ? (a) : (b))
inline void sb_le_to_cpu(bitmap_super_t *sb)
{
sb->magic = __le32_to_cpu(sb->magic);
sb->version = __le32_to_cpu(sb->version);
/* uuid gets no translation */
sb->events = __le64_to_cpu(sb->events);
sb->events_cleared = __le64_to_cpu(sb->events_cleared);
sb->state = __le32_to_cpu(sb->state);
sb->chunksize = __le32_to_cpu(sb->chunksize);
sb->daemon_sleep = __le32_to_cpu(sb->daemon_sleep);
sb->sync_size = __le64_to_cpu(sb->sync_size);
}
inline void sb_cpu_to_le(bitmap_super_t *sb)
{
sb_le_to_cpu(sb); /* these are really the same thing */
}
mapping_t bitmap_states[] = {
{ "OK", 0 },
{ "Out of date", 2 },
{ NULL, -1 }
};
const char *bitmap_state(int state_num)
{
char *state = map_num(bitmap_states, state_num);
return state ? state : "Unknown";
}
const char *human_chunksize(unsigned long bytes)
{
static char buf[16];
char *suffixes[] = { "B", "KB", "MB", "GB", "TB", NULL };
int i = 0;
while (bytes >> 10) {
bytes >>= 10;
i++;
}
sprintf(buf, "%lu %s", bytes, suffixes[i]);
return buf;
}
typedef struct bitmap_info_s {
bitmap_super_t sb;
unsigned long long total_bits;
unsigned long long dirty_bits;
} bitmap_info_t;
/* count the dirty bits in the first num_bits of byte */
inline int count_dirty_bits_byte(char byte, int num_bits)
{
int num = 0;
switch (num_bits) { /* fall through... */
case 8: if (byte & 128) num++;
case 7: if (byte & 64) num++;
case 6: if (byte & 32) num++;
case 5: if (byte & 16) num++;
case 4: if (byte & 8) num++;
case 3: if (byte & 4) num++;
case 2: if (byte & 2) num++;
case 1: if (byte & 1) num++;
default: break;
}
return num;
}
int count_dirty_bits(char *buf, int num_bits)
{
int i, num = 0;
for (i=0; i < num_bits / 8; i++)
num += count_dirty_bits_byte(buf[i], 8);
if (num_bits % 8) /* not an even byte boundary */
num += count_dirty_bits_byte(buf[i], num_bits % 8);
return num;
}
/* calculate the size of the bitmap given the array size and bitmap chunksize */
unsigned long long bitmap_bits(unsigned long long array_size,
unsigned long chunksize)
{
return (array_size * 512 + chunksize - 1) / chunksize;
}
bitmap_info_t *bitmap_fd_read(int fd, int brief)
{
unsigned long long total_bits = 0, read_bits = 0, dirty_bits = 0;
bitmap_info_t *info;
char buf[512];
int n;
info = malloc(sizeof(*info));
if (info == NULL) {
fprintf(stderr, Name ": failed to allocate %d bytes\n",
sizeof(*info));
return NULL;
}
if (read(fd, &info->sb, sizeof(info->sb)) != sizeof(info->sb)) {
fprintf(stderr, Name ": failed to read superblock of bitmap "
"file: %s\n", strerror(errno));
free(info);
return NULL;
}
sb_le_to_cpu(&info->sb); /* convert superblock to CPU byte ordering */
if (brief || info->sb.sync_size == 0)
goto out;
/* read the rest of the file counting total bits and dirty bits --
* we stop when either:
* 1) we hit EOF, in which case we assume the rest of the bits (if any)
* are dirty
* 2) we've read the full bitmap, in which case we ignore any trailing
* data in the file
*/
total_bits = bitmap_bits(info->sb.sync_size, info->sb.chunksize);
while ((n = read(fd, buf, sizeof(*buf))) > 0) {
unsigned long long remaining = total_bits - read_bits;
if (remaining > sizeof(*buf) * 8) /* we want the full buffer */
remaining = sizeof(*buf) * 8;
if (remaining > n * 8) /* the file is truncated */
remaining = n * 8;
dirty_bits += count_dirty_bits(buf, remaining);
read_bits += remaining;
if (read_bits >= total_bits) /* we've got what we want */
break;
}
if (read_bits < total_bits) { /* file truncated... */
fprintf(stderr, Name ": WARNING: bitmap file is not large "
"enough for array size %llu!\n\n", info->sb.sync_size);
total_bits = read_bits;
}
out:
info->total_bits = total_bits;
info->dirty_bits = dirty_bits;
return info;
}
bitmap_info_t *bitmap_file_read(char *filename, int brief)
{
int fd;
bitmap_info_t *info;
fd = open(filename, O_RDONLY);
if (fd < 0) {
fprintf(stderr, Name ": failed to open bitmap file %s: %s\n",
filename, strerror(errno));
return NULL;
}
info = bitmap_fd_read(fd, brief);
close(fd);
return info;
}
int ExamineBitmap(char *filename, int brief)
{
/*
* Read the bitmap file and display its contents
*/
bitmap_super_t *sb;
bitmap_info_t *info;
int rv = 1;
info = bitmap_file_read(filename, brief);
if (!info)
return rv;
sb = &info->sb;
printf(" Filename : %s\n", filename);
printf(" Magic : %08x\n", sb->magic);
if (sb->magic != BITMAP_MAGIC) {
fprintf(stderr, Name ": invalid bitmap magic 0x%x, the bitmap file appears to be corrupted\n", sb->magic);
}
printf(" Version : %d\n", sb->version);
if (sb->version != BITMAP_MAJOR) {
fprintf(stderr, Name ": unknown bitmap version %d, either the bitmap file is corrupted or you need to upgrade your tools\n", sb->version);
goto free_info;
}
rv = 0;
printf(" UUID : %08x.%08x.%08x.%08x\n",
*(__u32 *)(sb->uuid+0),
*(__u32 *)(sb->uuid+4),
*(__u32 *)(sb->uuid+8),
*(__u32 *)(sb->uuid+12));
printf(" Events : %llu\n", sb->events);
printf(" Events Cleared : %llu\n", sb->events_cleared);
printf(" State : %s\n", bitmap_state(sb->state));
printf(" Chunksize : %s\n", human_chunksize(sb->chunksize));
printf(" Daemon : %ds flush period\n", sb->daemon_sleep);
printf(" Sync Size : %llu%s\n", sb->sync_size,
human_size(sb->sync_size * 1024));
if (brief)
goto free_info;
printf(" Bitmap : %llu bits (chunks), %llu dirty (%2.1f%%)\n",
info->total_bits, info->dirty_bits,
100.0 * info->dirty_bits / (info->total_bits + 1));
free_info:
free(info);
return rv;
}
int CreateBitmap(char *filename, int force, char uuid[16],
unsigned long chunksize, unsigned long daemon_sleep,
unsigned long long array_size)
{
/*
* Create a bitmap file with a superblock and (optionally) a full bitmap
*/
FILE *fp;
int rv = 1;
char block[512];
bitmap_super_t sb;
long long bytes, filesize;
if (!force && access(filename, F_OK) == 0) {
fprintf(stderr, Name ": bitmap file %s already exists, use --force to overwrite\n", filename);
return rv;
}
fp = fopen(filename, "w");
if (fp == NULL) {
fprintf(stderr, Name ": failed to open bitmap file %s: %s\n",
filename, strerror(errno));
return rv;
}
memset(&sb, 0, sizeof(sb));
sb.magic = BITMAP_MAGIC;
sb.version = BITMAP_MAJOR;
if (uuid != NULL)
memcpy(sb.uuid, uuid, 16);
sb.chunksize = chunksize;
sb.daemon_sleep = daemon_sleep;
sb.sync_size = array_size;
sb_cpu_to_le(&sb); /* convert to on-disk byte ordering */
if (fwrite(&sb, sizeof(sb), 1, fp) != 1) {
fprintf(stderr, Name ": failed to write superblock to bitmap file %s: %s\n", filename, strerror(errno));
goto out;
}
/* calculate the size of the bitmap and write it to disk */
bytes = (bitmap_bits(array_size, chunksize) + 7) / 8;
if (!bytes) {
rv = 0;
goto out;
}
filesize = bytes + sizeof(sb);
memset(block, 0xff, sizeof(block));
while (bytes > 0) {
if (fwrite(block, sizeof(block), 1, fp) != 1) {
fprintf(stderr, Name ": failed to write bitmap file %s: %s\n", filename, strerror(errno));
goto out;
}
bytes -= sizeof(block);
}
rv = 0;
/* make the file be the right size (well, to the nearest byte) */
ftruncate(fileno(fp), filesize);
out:
fclose(fp);
if (rv)
unlink(filename); /* possibly corrupted, better get rid of it */
return rv;
}

273
bitmap.h Normal file
View File

@ -0,0 +1,273 @@
/*
* bitmap.h: Copyright (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
*
* additions: Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.
*/
#ifndef BITMAP_H
#define BITMAP_H 1
#define BITMAP_MAJOR 3
#define BITMAP_MINOR 38
/*
* in-memory bitmap:
*
* Use 16 bit block counters to track pending writes to each "chunk".
* The 2 high order bits are special-purpose, the first is a flag indicating
* whether a resync is needed. The second is a flag indicating whether a
* resync is active.
* This means that the counter is actually 14 bits:
*
* +--------+--------+------------------------------------------------+
* | resync | resync | counter |
* | needed | active | |
* | (0-1) | (0-1) | (0-16383) |
* +--------+--------+------------------------------------------------+
*
* The "resync needed" bit is set when:
* a '1' bit is read from storage at startup.
* a write request fails on some drives
* a resync is aborted on a chunk with 'resync active' set
* It is cleared (and resync-active set) when a resync starts across all drives
* of the chunk.
*
*
* The "resync active" bit is set when:
* a resync is started on all drives, and resync_needed is set.
* resync_needed will be cleared (as long as resync_active wasn't already set).
* It is cleared when a resync completes.
*
* The counter counts pending write requests, plus the on-disk bit.
* When the counter is '1' and the resync bits are clear, the on-disk
* bit can be cleared aswell, thus setting the counter to 0.
* When we set a bit, or in the counter (to start a write), if the fields is
* 0, we first set the disk bit and set the counter to 1.
*
* Also, we'll hijack the "map" pointer itself and use it as two 16 bit block
* counters as a fallback when "page" memory cannot be allocated:
*
* Normal case (page memory allocated):
*
* page pointer (32-bit)
*
* [ ] ------+
* |
* +-------> [ ][ ]..[ ] (4096 byte page == 2048 counters)
* c1 c2 c2048
*
* Hijacked case (page memory allocation failed):
*
* hijacked page pointer (32-bit)
*
* [ ][ ] (no page memory allocated)
* counter #1 (16-bit) counter #2 (16-bit)
*
*/
#ifdef __KERNEL__
#define PAGE_BITS (PAGE_SIZE << 3)
#define PAGE_BIT_SHIFT (PAGE_SHIFT + 3)
typedef __u16 bitmap_counter_t;
#define COUNTER_BITS 16
#define COUNTER_BIT_SHIFT 4
#define COUNTER_BYTE_RATIO (COUNTER_BITS / 8)
#define COUNTER_BYTE_SHIFT (COUNTER_BIT_SHIFT - 3)
#define NEEDED_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 1)))
#define RESYNC_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 2)))
#define COUNTER_MAX ((bitmap_counter_t) RESYNC_MASK - 1)
#define NEEDED(x) (((bitmap_counter_t) x) & NEEDED_MASK)
#define RESYNC(x) (((bitmap_counter_t) x) & RESYNC_MASK)
#define COUNTER(x) (((bitmap_counter_t) x) & COUNTER_MAX)
/* how many counters per page? */
#define PAGE_COUNTER_RATIO (PAGE_BITS / COUNTER_BITS)
/* same, except a shift value for more efficient bitops */
#define PAGE_COUNTER_SHIFT (PAGE_BIT_SHIFT - COUNTER_BIT_SHIFT)
/* same, except a mask value for more efficient bitops */
#define PAGE_COUNTER_MASK (PAGE_COUNTER_RATIO - 1)
#define BITMAP_BLOCK_SIZE 512
#define BITMAP_BLOCK_SHIFT 9
/* how many blocks per chunk? (this is variable) */
#define CHUNK_BLOCK_RATIO(bitmap) ((bitmap)->chunksize >> BITMAP_BLOCK_SHIFT)
#define CHUNK_BLOCK_SHIFT(bitmap) ((bitmap)->chunkshift - BITMAP_BLOCK_SHIFT)
#define CHUNK_BLOCK_MASK(bitmap) (CHUNK_BLOCK_RATIO(bitmap) - 1)
/* when hijacked, the counters and bits represent even larger "chunks" */
/* there will be 1024 chunks represented by each counter in the page pointers */
#define PAGEPTR_BLOCK_RATIO(bitmap) \
(CHUNK_BLOCK_RATIO(bitmap) << PAGE_COUNTER_SHIFT >> 1)
#define PAGEPTR_BLOCK_SHIFT(bitmap) \
(CHUNK_BLOCK_SHIFT(bitmap) + PAGE_COUNTER_SHIFT - 1)
#define PAGEPTR_BLOCK_MASK(bitmap) (PAGEPTR_BLOCK_RATIO(bitmap) - 1)
/*
* on-disk bitmap:
*
* Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
* file a page at a time. There's a superblock at the start of the file.
*/
/* map chunks (bits) to file pages - offset by the size of the superblock */
#define CHUNK_BIT_OFFSET(chunk) ((chunk) + (sizeof(bitmap_super_t) << 3))
#endif
/*
* bitmap structures:
*/
#define BITMAP_MAGIC 0x6d746962
/* use these for bitmap->flags and bitmap->sb->state bit-fields */
enum bitmap_state {
BITMAP_ACTIVE = 0x001, /* the bitmap is in use */
BITMAP_STALE = 0x002 /* the bitmap file is out of date or had -EIO */
};
/* the superblock at the front of the bitmap file -- little endian */
typedef struct bitmap_super_s {
__u32 magic; /* 0 BITMAP_MAGIC */
__u32 version; /* 4 the bitmap major for now, could change... */
__u8 uuid[16]; /* 8 128 bit uuid - must match md device uuid */
__u64 events; /* 24 event counter for the bitmap (1)*/
__u64 events_cleared;/*32 event counter when last bit cleared (2) */
__u64 sync_size; /* 40 the size of the md device's sync range(3) */
__u32 state; /* 48 bitmap state information */
__u32 chunksize; /* 52 the bitmap chunk size in bytes */
__u32 daemon_sleep; /* 56 seconds between disk flushes */
__u8 pad[4096 - 60]; /* set to zero */
} bitmap_super_t;
/* notes:
* (1) This event counter is updated before the eventcounter in the md superblock
* When a bitmap is loaded, it is only accepted if this event counter is equal
* to, or one greater than, the event counter in the superblock.
* (2) This event counter is updated when the other one is *if*and*only*if* the
* array is not degraded. As bits are not cleared when the array is degraded,
* this represents the last time that any bits were cleared.
* If a device is being added that has an event count with this value or
* higher, it is accepted as conforming to the bitmap.
* (3)This is the number of sectors represented by the bitmap, and is the range that
* resync happens across. For raid1 and raid5/6 it is the size of individual
* devices. For raid10 it is the size of the array.
*/
#ifdef __KERNEL__
/* the in-memory bitmap is represented by bitmap_pages */
struct bitmap_page {
/*
* map points to the actual memory page
*/
char *map;
/*
* in emergencies (when map cannot be alloced), hijack the map
* pointer and use it as two counters itself
*/
unsigned int hijacked;
/*
* count of dirty bits on the page
*/
int count;
};
/* keep track of bitmap file pages that have pending writes on them */
struct page_list {
struct list_head list;
struct page *page;
};
/* the main bitmap structure - one per mddev */
struct bitmap {
struct bitmap_page *bp;
unsigned long pages; /* total number of pages in the bitmap */
unsigned long missing_pages; /* number of pages not yet allocated */
mddev_t *mddev; /* the md device that the bitmap is for */
int counter_bits; /* how many bits per block counter */
/* bitmap chunksize -- how much data does each bit represent? */
unsigned long chunksize;
unsigned long chunkshift; /* chunksize = 2^chunkshift (for bitops) */
unsigned long chunks; /* total number of data chunks for the array */
/* We hold a count on the chunk currently being synced, and drop
* it when the last block is started. If the resync is aborted
* midway, we need to be able to drop that count, so we remember
* the counted chunk..
*/
unsigned long syncchunk;
__u64 events_cleared;
/* bitmap spinlock */
spinlock_t lock;
struct file *file; /* backing disk file */
struct page *sb_page; /* cached copy of the bitmap file superblock */
struct page **filemap; /* list of cache pages for the file */
unsigned long *filemap_attr; /* attributes associated w/ filemap pages */
unsigned long file_pages; /* number of pages in the file */
unsigned long flags;
/*
* the bitmap daemon - periodically wakes up and sweeps the bitmap
* file, cleaning up bits and flushing out pages to disk as necessary
*/
mdk_thread_t *daemon;
unsigned long daemon_sleep; /* how many seconds between updates? */
/*
* bitmap write daemon - this daemon performs writes to the bitmap file
* this thread is only needed because of a limitation in ext3 (jbd)
* that does not allow a task to have two journal transactions ongoing
* simultaneously (even if the transactions are for two different
* filesystems) -- in the case of bitmap, that would be the filesystem
* that the bitmap file resides on and the filesystem that is mounted
* on the md device -- see current->journal_info in jbd/transaction.c
*/
mdk_thread_t *write_daemon;
mdk_thread_t *writeback_daemon;
spinlock_t write_lock;
struct semaphore write_ready;
struct semaphore write_done;
unsigned long writes_pending;
wait_queue_head_t write_wait;
struct list_head write_pages;
struct list_head complete_pages;
mempool_t *write_pool;
};
/* the bitmap API */
/* these are used only by md/bitmap */
int bitmap_create(mddev_t *mddev);
void bitmap_destroy(mddev_t *mddev);
int bitmap_active(struct bitmap *bitmap);
char *file_path(struct file *file, char *buf, int count);
void bitmap_print_sb(struct bitmap *bitmap);
int bitmap_update_sb(struct bitmap *bitmap);
int bitmap_setallbits(struct bitmap *bitmap);
/* these are exported */
void bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors);
void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors,
int success);
int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks);
void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted);
void bitmap_close_sync(struct bitmap *bitmap);
int bitmap_unplug(struct bitmap *bitmap);
#endif
#endif

7
md_u.h
View File

@ -23,6 +23,7 @@
#define GET_DISK_INFO _IOR (MD_MAJOR, 0x12, mdu_disk_info_t)
#define PRINT_RAID_DEBUG _IO (MD_MAJOR, 0x13)
#define RAID_AUTORUN _IO (MD_MAJOR, 0x14)
#define GET_BITMAP_FILE _IOR (MD_MAJOR, 0x15, mdu_bitmap_file_t)
/* configuration */
#define CLEAR_ARRAY _IO (MD_MAJOR, 0x20)
@ -35,6 +36,7 @@
#define PROTECT_ARRAY _IO (MD_MAJOR, 0x27)
#define HOT_ADD_DISK _IO (MD_MAJOR, 0x28)
#define SET_DISK_FAULTY _IO (MD_MAJOR, 0x29)
#define SET_BITMAP_FILE _IOW (MD_MAJOR, 0x2b, int)
/* usage */
#define RUN_ARRAY _IOW (MD_MAJOR, 0x30, mdu_param_t)
@ -105,6 +107,11 @@ typedef struct mdu_start_info_s {
} mdu_start_info_t;
typedef struct mdu_bitmap_file_s
{
char pathname[4096];
} mdu_bitmap_file_t;
typedef struct mdu_param_s
{
int personality; /* 1,2,3,4 */

72
mdadm.c
View File

@ -25,6 +25,9 @@
* The University of New South Wales
* Sydney, 2052
* Australia
*
* Additions for bitmap and async RAID options, Copyright (C) 2003-2004,
* Paul Clements, SteelEye Technology, Inc.
*/
#include "mdadm.h"
@ -56,6 +59,9 @@ int main(int argc, char *argv[])
char devmode = 0;
int runstop = 0;
int readonly = 0;
int bitmap_fd = -1;
char *bitmap_file = NULL;
int bitmap_chunk = UnSet;
int SparcAdjust = 0;
mddev_dev_t devlist = NULL;
mddev_dev_t *devlistend = & devlist;
@ -95,6 +101,7 @@ int main(int argc, char *argv[])
ident.spare_group = NULL;
ident.autof = 0;
ident.st = NULL;
ident.bitmap_fd = -1;
while ((option_index = -1) ,
(opt=getopt_long(argc, argv,
@ -128,7 +135,10 @@ int main(int argc, char *argv[])
case 'v': verbose = 1;
continue;
case 'b': brief = 1;
case 'b':
if (mode == ASSEMBLE || mode == BUILD || mode == CREATE)
break; /* b means bitmap */
brief = 1;
continue;
case ':':
@ -159,6 +169,7 @@ int main(int argc, char *argv[])
case '#':
case 'D':
case 'E':
case 'X':
case 'Q': newmode = MISC; break;
case 'R':
case 'S':
@ -574,6 +585,8 @@ int main(int argc, char *argv[])
continue;
case O(MONITOR,'d'): /* delay in seconds */
case O(BUILD,'d'): /* delay for bitmap updates */
case O(CREATE,'d'):
if (delay)
fprintf(stderr, Name ": only specify delay once. %s ignored.\n",
optarg);
@ -655,6 +668,7 @@ int main(int argc, char *argv[])
case O(MISC,'K'):
case O(MISC,'R'):
case O(MISC,'S'):
case O(MISC,'X'):
case O(MISC,'o'):
case O(MISC,'w'):
if (devmode && devmode != opt &&
@ -676,6 +690,36 @@ int main(int argc, char *argv[])
}
SparcAdjust = 1;
continue;
case O(ASSEMBLE,'b'): /* here we simply set the bitmap file */
if (!optarg) {
fprintf(stderr, Name ": bitmap file needed with -b in --assemble mode\n");
exit(2);
}
bitmap_fd = open(optarg, O_RDWR);
if (!*optarg || bitmap_fd < 0) {
fprintf(stderr, Name ": cannot open bitmap file %s: %s\n", optarg, strerror(errno));
exit(2);
}
ident.bitmap_fd = bitmap_fd; /* for Assemble */
continue;
case O(BUILD,'b'):
case O(CREATE,'b'): /* here we create the bitmap */
bitmap_file = optarg;
continue;
case O(BUILD,4):
case O(CREATE,4): /* bitmap chunksize */
bitmap_chunk = strtol(optarg, &c, 10);
if (!optarg[0] || *c || bitmap_chunk < 0 ||
bitmap_chunk & (bitmap_chunk - 1)) {
fprintf(stderr, Name ": invalid bitmap chunksize: %s\n",
optarg);
exit(2);
}
/* convert K to B, chunk of 0K means 512B */
bitmap_chunk = bitmap_chunk ? bitmap_chunk * 1024 : 512;
continue;
}
/* We have now processed all the valid options. Anything else is
* an error
@ -726,6 +770,7 @@ int main(int argc, char *argv[])
}
}
rv = 0;
switch(mode) {
case MANAGE:
@ -813,9 +858,27 @@ int main(int argc, char *argv[])
}
break;
case BUILD:
rv = Build(devlist->devname, mdfd, chunk, level, layout, raiddisks, devlist->next, assume_clean);
if (bitmap_chunk == UnSet) bitmap_chunk = DEFAULT_BITMAP_CHUNK;
if (delay == 0) delay = DEFAULT_BITMAP_DELAY;
if (bitmap_file) {
bitmap_fd = open(bitmap_file, O_RDWR,0);
if (bitmap_fd < 0 && errno != ENOENT) {
perror(Name ": cannot create bitmap file");
rv |= 1;
break;
}
if (bitmap_fd < 0) {
bitmap_fd = CreateBitmap(bitmap_file, force, NULL,
bitmap_chunk, delay, size);
}
}
rv = Build(devlist->devname, mdfd, chunk, level, layout,
raiddisks, devlist->next, assume_clean,
bitmap_file, bitmap_chunk, delay);
break;
case CREATE:
if (bitmap_chunk == UnSet) bitmap_chunk = DEFAULT_BITMAP_CHUNK;
if (delay == 0) delay = DEFAULT_BITMAP_DELAY;
if (ss == NULL) {
for(i=0; !ss && superlist[i]; i++)
ss = superlist[i]->match_metadata_desc("default");
@ -827,7 +890,8 @@ int main(int argc, char *argv[])
rv = Create(ss, devlist->devname, mdfd, chunk, level, layout, size<0 ? 0 : size,
raiddisks, sparedisks,
devs_found-1, devlist->next, runstop, verbose, force);
devs_found-1, devlist->next, runstop, verbose, force,
bitmap_file, bitmap_chunk, delay);
break;
case MISC:
@ -891,6 +955,8 @@ int main(int argc, char *argv[])
rv |= Kill(dv->devname, force); continue;
case 'Q':
rv |= Query(dv->devname); continue;
case 'X':
rv |= ExamineBitmap(dv->devname, brief); continue;
}
mdfd = open_mddev(dv->devname, 0);
if (mdfd>=0) {

15
mdadm.h
View File

@ -61,9 +61,12 @@ char *strncpy(char *dest, const char *src, size_t n) __THROW;
#define BLKGETSIZE64 _IOR(0x12,114,size_t) /* return device size in bytes (u64 *arg) */
#endif
#define DEFAULT_BITMAP_CHUNK 4096
#define DEFAULT_BITMAP_DELAY 5
#include "md_u.h"
#include "md_p.h"
#include "bitmap.h"
/* general information that might be extracted from a superblock */
struct mdinfo {
@ -119,6 +122,7 @@ typedef struct mddev_ident_s {
struct supertype *st;
int autof; /* 1 for normal, 2 for partitioned */
char *spare_group;
int bitmap_fd;
struct mddev_ident_s *next;
} *mddev_ident_t;
@ -212,13 +216,15 @@ extern int Assemble(struct supertype *st, char *mddev, int mdfd,
extern int Build(char *mddev, int mdfd, int chunk, int level, int layout,
int raiddisks,
mddev_dev_t devlist, int assume_clean);
mddev_dev_t devlist, int assume_clean,
char *bitmap_file, int bitmap_chunk, int delay);
extern int Create(struct supertype *st, char *mddev, int mdfd,
int chunk, int level, int layout, unsigned long size, int raiddisks, int sparedisks,
int subdevs, mddev_dev_t devlist,
int runstop, int verbose, int force);
int runstop, int verbose, int force,
char *bitmap_file, int bitmap_chunk, int delay);
extern int Detail(char *dev, int brief, int test);
extern int Query(char *dev);
@ -231,6 +237,11 @@ extern int Monitor(mddev_dev_t devlist,
extern int Kill(char *dev, int force);
extern int CreateBitmap(char *filename, int force, char uuid[16],
unsigned long chunksize, unsigned long daemon_sleep,
unsigned long long array_size);
extern int ExamineBitmap(char *filename, int brief);
extern int md_get_version(int fd);
extern int get_linux_version(void);
extern int parse_uuid(char *str, int uuid[4]);