Use O_DIRECT for all IO to devices.

Using buffered IO risks non-atomic updates to parts of the
device that we don't actually want to write to.  This isn't in
general safe.
So switch to O_DIRECT for all that IO and make sure we have
properly aligned buffers.
This commit is contained in:
Neil Brown 2008-07-12 20:28:33 +10:00
parent 908ef18519
commit 6416d5275d
10 changed files with 136 additions and 106 deletions

2
Grow.c
View File

@ -69,7 +69,7 @@ int Grow_Add_device(char *devname, int fd, char *newdev)
return 1;
}
nfd = open(newdev, O_RDWR|O_EXCL);
nfd = open(newdev, O_RDWR|O_EXCL|O_DIRECT);
if (nfd < 0) {
fprintf(stderr, Name ": cannot open %s\n", newdev);
return 1;

2
Kill.c
View File

@ -44,7 +44,7 @@ int Kill(char *dev, int force, int quiet, int noexcl)
int fd, rv = 0;
struct supertype *st;
fd = open(dev, noexcl ? O_RDWR : (O_RDWR|O_EXCL));
fd = open(dev, O_DIRECT | (noexcl ? O_RDWR : (O_RDWR|O_EXCL)));
if (fd < 0) {
if (!quiet)
fprintf(stderr, Name ": Couldn't open %s for write - not zeroing\n",

View File

@ -315,7 +315,7 @@ int Manage_subdevs(char *devname, int fd,
return 1;
}
/* Make sure it isn't in use (in 2.6 or later) */
tfd = open(dv->devname, O_RDONLY|O_EXCL);
tfd = open(dv->devname, O_RDONLY|O_EXCL|O_DIRECT);
if (tfd < 0) {
fprintf(stderr, Name ": Cannot open %s: %s\n",
dv->devname, strerror(errno));
@ -458,7 +458,7 @@ int Manage_subdevs(char *devname, int fd,
int dfd;
if (dv->writemostly)
disc.state |= 1 << MD_DISK_WRITEMOSTLY;
dfd = open(dv->devname, O_RDWR | O_EXCL);
dfd = open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
tst->ss->add_to_super(tst, &disc, dfd,
dv->devname);
/* write_init_super will close 'dfd' */

View File

@ -122,11 +122,10 @@ bitmap_info_t *bitmap_fd_read(int fd, int brief)
*/
unsigned long long total_bits = 0, read_bits = 0, dirty_bits = 0;
bitmap_info_t *info;
char *buf, *unaligned;
void *buf;
int n, skip;
unaligned = malloc(8192*2);
buf = (char*) ((unsigned long)unaligned | 8191)+1;
posix_memalign(&buf, 512, 8192);
n = read(fd, buf, 8192);
info = malloc(sizeof(*info));
@ -145,7 +144,6 @@ bitmap_info_t *bitmap_fd_read(int fd, int brief)
fprintf(stderr, Name ": failed to read superblock of bitmap "
"file: %s\n", strerror(errno));
free(info);
free(unaligned);
return NULL;
}
memcpy(&info->sb, buf, sizeof(info->sb));

View File

@ -791,6 +791,11 @@ static inline int dev2minor(int d)
return (-1-d) << MdpMinorShift;
}
static inline int ROUND_UP(int a, int base)
{
return ((a+base-1)/base)*base;
}
#define LEVEL_MULTIPATH (-4)
#define LEVEL_LINEAR (-1)
#define LEVEL_FAULTY (-5)

View File

@ -31,11 +31,6 @@
#include "sha1.h"
#include <values.h>
static inline int ROUND_UP(int a, int base)
{
return ((a+base-1)/base)*base;
}
/* a non-official T10 name for creation GUIDs */
static char T10[] = "Linux-MD";
@ -395,8 +390,9 @@ struct bad_block_log {
* built in Create or Assemble to describe the whole array.
*/
struct ddf_super {
struct ddf_header anchor, primary, secondary, *active;
struct ddf_header anchor, primary, secondary;
struct ddf_controller_data controller;
struct ddf_header *active;
struct phys_disk *phys;
struct virtual_disk *virt;
int pdsize, vdsize;
@ -404,22 +400,32 @@ struct ddf_super {
int currentdev;
int updates_pending;
struct vcl {
struct vcl *next;
__u64 *lba_offset; /* location in 'conf' of
* the lba table */
int vcnum; /* index into ->virt */
__u64 *block_sizes; /* NULL if all the same */
union {
char space[512];
struct {
struct vcl *next;
__u64 *lba_offset; /* location in 'conf' of
* the lba table */
int vcnum; /* index into ->virt */
__u64 *block_sizes; /* NULL if all the same */
};
};
struct vd_config conf;
} *conflist, *currentconf;
struct dl {
struct dl *next;
union {
char space[512];
struct {
struct dl *next;
int major, minor;
char *devname;
int fd;
unsigned long long size; /* sectors */
int pdnum; /* index in ->phys */
struct spare_assign *spare;
};
};
struct disk_data disk;
int major, minor;
char *devname;
int fd;
unsigned long long size; /* sectors */
int pdnum; /* index in ->phys */
struct spare_assign *spare;
struct vcl *vlist[0]; /* max_part in size */
} *dlist;
};
@ -497,8 +503,10 @@ static void *load_section(int fd, struct ddf_super *super, void *buf,
/* All pre-allocated sections are a single block */
if (len != 1)
return NULL;
} else
buf = malloc(len<<9);
} else {
posix_memalign(&buf, 512, len<<9);
}
if (!buf)
return NULL;
@ -633,8 +641,9 @@ static int load_ddf_local(int fd, struct ddf_super *super,
unsigned long long dsize;
/* First the local disk info */
dl = malloc(sizeof(*dl) +
(super->max_part) * sizeof(dl->vlist[0]));
posix_memalign((void**)&dl, 512,
sizeof(*dl) +
(super->max_part) * sizeof(dl->vlist[0]));
load_section(fd, super, &dl->disk,
super->active->data_section_offset,
@ -683,7 +692,8 @@ static int load_ddf_local(int fd, struct ddf_super *super,
if (vd->magic == DDF_SPARE_ASSIGN_MAGIC) {
if (dl->spare)
continue;
dl->spare = malloc(super->conf_rec_len*512);
posix_memalign((void**)&dl->spare, 512,
super->conf_rec_len*512);
memcpy(dl->spare, vd, super->conf_rec_len*512);
continue;
}
@ -701,8 +711,9 @@ static int load_ddf_local(int fd, struct ddf_super *super,
__be32_to_cpu(vcl->conf.seqnum))
continue;
} else {
vcl = malloc(super->conf_rec_len*512 +
offsetof(struct vcl, conf));
posix_memalign((void**)&vcl, 512,
(super->conf_rec_len*512 +
offsetof(struct vcl, conf)));
vcl->next = super->conflist;
vcl->block_sizes = NULL; /* FIXME not for CONCAT */
super->conflist = vcl;
@ -766,8 +777,7 @@ static int load_super_ddf(struct supertype *st, int fd,
}
}
super = malloc(sizeof(*super));
if (!super) {
if (posix_memalign((void**)&super, 512, sizeof(*super))!= 0) {
fprintf(stderr, Name ": malloc of %zu failed.\n",
sizeof(*super));
return 1;
@ -1443,7 +1453,7 @@ static int init_super_ddf(struct supertype *st,
return init_super_ddf_bvd(st, info, size, name, homehost,
uuid);
ddf = malloc(sizeof(*ddf));
posix_memalign((void**)&ddf, 512, sizeof(*ddf));
memset(ddf, 0, sizeof(*ddf));
ddf->dlist = NULL; /* no physical disks yet */
ddf->conflist = NULL; /* No virtual disks yet */
@ -1570,7 +1580,8 @@ static int init_super_ddf(struct supertype *st,
memset(ddf->controller.pad, 0xff, 8);
memset(ddf->controller.vendor_data, 0xff, 448);
pd = ddf->phys = malloc(pdsize);
posix_memalign((void**)&pd, 512, pdsize);
ddf->phys = pd;
ddf->pdsize = pdsize;
memset(pd, 0xff, pdsize);
@ -1580,7 +1591,8 @@ static int init_super_ddf(struct supertype *st,
pd->max_pdes = __cpu_to_be16(max_phys_disks);
memset(pd->pad, 0xff, 52);
vd = ddf->virt = malloc(vdsize);
posix_memalign((void**)&vd, 512, vdsize);
ddf->virt = vd;
ddf->vdsize = vdsize;
memset(vd, 0, vdsize);
vd->magic = DDF_VIRT_RECORDS_MAGIC;
@ -1805,7 +1817,8 @@ static int init_super_ddf_bvd(struct supertype *st,
__cpu_to_be16(__be16_to_cpu(ddf->virt->populated_vdes)+1);
/* Now create a new vd_config */
vcl = malloc(offsetof(struct vcl, conf) + ddf->conf_rec_len * 512);
posix_memalign((void**)&vcl, 512,
(offsetof(struct vcl, conf) + ddf->conf_rec_len * 512));
vcl->lba_offset = (__u64*) &vcl->conf.phys_refnum[ddf->mppe];
vcl->vcnum = venum;
sprintf(st->subarray, "%d", venum);
@ -1974,7 +1987,8 @@ static void add_to_super_ddf(struct supertype *st,
* a phys_disk entry and a more detailed disk_data entry.
*/
fstat(fd, &stb);
dd = malloc(sizeof(*dd) + sizeof(dd->vlist[0]) * ddf->max_part);
posix_memalign((void**)&dd, 512,
sizeof(*dd) + sizeof(dd->vlist[0]) * ddf->max_part);
dd->major = major(stb.st_rdev);
dd->minor = minor(stb.st_rdev);
dd->devname = devname;
@ -2037,7 +2051,7 @@ static void add_to_super_ddf(struct supertype *st,
#ifndef MDASSEMBLE
static unsigned char null_conf[4096];
static unsigned char null_conf[4096+512];
static int __write_init_super_ddf(struct supertype *st, int do_close)
{
@ -2109,14 +2123,15 @@ static int __write_init_super_ddf(struct supertype *st, int do_close)
c->conf.crc = calc_crc(&c->conf, conf_size);
write(fd, &c->conf, conf_size);
} else {
char *null_aligned = (char*)((((unsigned long)null_conf)+511)&~511UL);
if (null_conf[0] != 0xff)
memset(null_conf, 0xff, sizeof(null_conf));
int togo = conf_size;
while (togo > sizeof(null_conf)) {
write(fd, null_conf, sizeof(null_conf));
togo -= sizeof(null_conf);
while (togo > sizeof(null_conf)-512) {
write(fd, null_aligned, sizeof(null_conf)-512);
togo -= sizeof(null_conf)-512;
}
write(fd, null_conf, togo);
write(fd, null_aligned, togo);
}
}
d->disk.crc = calc_crc(&d->disk, 512);
@ -2425,8 +2440,7 @@ static int load_super_ddf_all(struct supertype *st, int fd,
strcmp(sra->text_version, "ddf") != 0)
return 1;
super = malloc(sizeof(*super));
if (!super)
if (posix_memalign((void**)&super, 512, sizeof(*super)) != 0)
return 1;
memset(super, 0, sizeof(*super));
@ -2584,14 +2598,17 @@ static struct mdinfo *container_content_ddf(struct supertype *st)
static int store_zero_ddf(struct supertype *st, int fd)
{
unsigned long long dsize;
char buf[512];
memset(buf, 0, 512);
void *buf;
if (!get_dev_size(fd, NULL, &dsize))
return 1;
posix_memalign(&buf, 512, 512);
memset(buf, 0, 512);
lseek64(fd, dsize-512, 0);
write(fd, buf, 512);
free(buf);
return 0;
}
@ -2948,8 +2965,9 @@ static void ddf_prepare_update(struct supertype *st,
struct ddf_super *ddf = st->sb;
__u32 *magic = (__u32*)update->buf;
if (*magic == DDF_VD_CONF_MAGIC)
update->space = malloc(offsetof(struct vcl, conf)
+ ddf->conf_rec_len * 512);
posix_memalign(&update->space, 512,
offsetof(struct vcl, conf)
+ ddf->conf_rec_len * 512);
}
/*
@ -3131,7 +3149,7 @@ static struct mdinfo *ddf_activate_spare(struct active_array *a,
*/
mu = malloc(sizeof(*mu));
mu->buf = malloc(ddf->conf_rec_len * 512);
mu->space = malloc(sizeof(struct vcl));
posix_memalign(&mu->space, 512, sizeof(struct vcl));
mu->len = ddf->conf_rec_len;
mu->next = *updates;
vc = find_vdcr(ddf, a->info.container_member);

View File

@ -762,7 +762,7 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
size_t len, mpb_size;
unsigned long long sectors;
struct stat;
struct imsm_super anchor;
struct imsm_super *anchor;
__u32 check_sum;
memset(super, 0, sizeof(*super));
@ -776,44 +776,40 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
return 1;
}
len = sizeof(anchor);
if (read(fd, &anchor, len) != len) {
len = 512;
posix_memalign((void**)&anchor, 512, len);
if (read(fd, anchor, len) != len) {
if (devname)
fprintf(stderr,
Name ": Cannot read anchor block on %s: %s\n",
devname, strerror(errno));
free(anchor);
return 1;
}
if (strncmp((char *) anchor.sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0) {
if (strncmp((char *) anchor->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0) {
if (devname)
fprintf(stderr,
Name ": no IMSM anchor on %s\n", devname);
free(anchor);
return 2;
}
mpb_size = __le32_to_cpu(anchor.mpb_size);
super->mpb = malloc(mpb_size < 512 ? 512 : mpb_size);
mpb_size = __le32_to_cpu(anchor->mpb_size);
mpb_size = ROUND_UP(mpb_size, 512);
posix_memalign((void**)&super->mpb, 512, mpb_size);
if (!super->mpb) {
if (devname)
fprintf(stderr,
Name ": unable to allocate %zu byte mpb buffer\n",
mpb_size);
free(anchor);
return 2;
}
memcpy(super->buf, &anchor, sizeof(anchor));
memcpy(super->buf, anchor, len);
/* read the rest of the first block */
len = 512 - sizeof(anchor);
if (read(fd, super->buf + sizeof(anchor), len) != len) {
if (devname)
fprintf(stderr,
Name ": Cannot read anchor remainder on %s: %s\n",
devname, strerror(errno));
return 2;
}
sectors = mpb_sectors(&anchor) - 1;
sectors = mpb_sectors(anchor) - 1;
free(anchor);
if (!sectors)
return load_imsm_disk(fd, super, devname, 0);
@ -1067,7 +1063,7 @@ static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
if (!super)
return 0;
mpb_size = disks_to_mpb_size(info->nr_disks);
mpb = malloc(mpb_size);
posix_memalign((void**)&mpb, 512, mpb_size);
if (!mpb) {
free(super);
return 0;
@ -1281,7 +1277,7 @@ static int write_init_super_imsm(struct supertype *st)
static int store_zero_imsm(struct supertype *st, int fd)
{
unsigned long long dsize;
char buf[512];
void *buf;
get_dev_size(fd, NULL, &dsize);
@ -1289,6 +1285,7 @@ static int store_zero_imsm(struct supertype *st, int fd)
if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0)
return 1;
posix_memalign(&buf, 512, 512);
memset(buf, 0, sizeof(buf));
if (write(fd, buf, sizeof(buf)) != sizeof(buf))
return 1;

View File

@ -554,8 +554,10 @@ static int init_super0(struct supertype *st, mdu_array_info_t *info,
unsigned long long size, char *ignored_name, char *homehost,
int *uuid)
{
mdp_super_t *sb = malloc(MD_SB_BYTES + sizeof(bitmap_super_t));
mdp_super_t *sb;
int spares;
posix_memalign((void**)&sb, 512, MD_SB_BYTES + sizeof(bitmap_super_t));
memset(sb, 0, MD_SB_BYTES + sizeof(bitmap_super_t));
st->sb = sb;
@ -684,7 +686,8 @@ static int store_super0(struct supertype *st, int fd)
if (super->state & (1<<MD_SB_BITMAP_PRESENT)) {
struct bitmap_super_s * bm = (struct bitmap_super_s*)(super+1);
if (__le32_to_cpu(bm->magic) == BITMAP_MAGIC)
if (write(fd, bm, sizeof(*bm)) != sizeof(*bm))
if (write(fd, bm, ROUND_UP(sizeof(*bm),512)) !=
ROUND_UP(sizeof(*bm),512))
return 5;
}
@ -744,7 +747,8 @@ static int compare_super0(struct supertype *st, struct supertype *tst)
if (second->md_magic != MD_SB_MAGIC)
return 1;
if (!first) {
first = malloc(MD_SB_BYTES + sizeof(struct bitmap_super_s));
posix_memalign((void**)&first, 512,
MD_SB_BYTES + sizeof(struct bitmap_super_s));
memcpy(first, second, MD_SB_BYTES + sizeof(struct bitmap_super_s));
st->sb = first;
return 0;
@ -813,7 +817,7 @@ static int load_super0(struct supertype *st, int fd, char *devname)
return 1;
}
super = malloc(MD_SB_BYTES + sizeof(bitmap_super_t));
posix_memalign((void**)&super, 512, MD_SB_BYTES + sizeof(bitmap_super_t)+512);
if (read(fd, super, sizeof(*super)) != MD_SB_BYTES) {
if (devname)
@ -857,8 +861,8 @@ static int load_super0(struct supertype *st, int fd, char *devname)
* valid. If it doesn't clear the bit. An --assemble --force
* should get that written out.
*/
if (read(fd, super+1, sizeof(struct bitmap_super_s))
!= sizeof(struct bitmap_super_s))
if (read(fd, super+1, ROUND_UP(sizeof(struct bitmap_super_s),512))
!= ROUND_UP(sizeof(struct bitmap_super_s),512))
goto no_bitmap;
uuid_from_super0(st, uuid);
@ -986,7 +990,8 @@ static int write_bitmap0(struct supertype *st, int fd)
int rv = 0;
int towrite, n;
char buf[4096];
char abuf[4096+512];
char *buf = (char*)(((long)(abuf+512))&~511UL);
if (!get_dev_size(fd, NULL, &dsize))
return 1;
@ -1002,21 +1007,19 @@ static int write_bitmap0(struct supertype *st, int fd)
if (lseek64(fd, offset + 4096, 0)< 0LL)
return 3;
if (write(fd, ((char*)sb)+MD_SB_BYTES, sizeof(bitmap_super_t)) !=
sizeof(bitmap_super_t))
return -2;
towrite = 64*1024 - MD_SB_BYTES - sizeof(bitmap_super_t);
memset(buf, 0xff, sizeof(buf));
memset(buf, 0xff, 4096);
memcpy(buf, ((char*)sb)+MD_SB_BYTES, sizeof(bitmap_super_t));
towrite = 64*1024;
while (towrite > 0) {
n = towrite;
if (n > sizeof(buf))
n = sizeof(buf);
if (n > 4096)
n = 4096;
n = write(fd, buf, n);
if (n > 0)
towrite -= n;
else
break;
memset(buf, 0xff, 4096);
}
fsync(fd);
if (towrite)

View File

@ -671,7 +671,7 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
__le64_to_cpu(sb->data_offset)) {
/* set data_size to device size less data_offset */
struct misc_dev_info *misc = (struct misc_dev_info*)
(st->sb + 1024 + sizeof(struct bitmap_super_s));
(st->sb + 1024 + 512);
printf("Size was %llu\n", (unsigned long long)
__le64_to_cpu(sb->data_size));
sb->data_size = __cpu_to_le64(
@ -689,11 +689,13 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
static int init_super1(struct supertype *st, mdu_array_info_t *info,
unsigned long long size, char *name, char *homehost, int *uuid)
{
struct mdp_superblock_1 *sb = malloc(1024 + sizeof(bitmap_super_t) +
sizeof(struct misc_dev_info));
struct mdp_superblock_1 *sb;
int spares;
int rfd;
char defname[10];
posix_memalign((void**)&sb, 512, (1024 + 512 +
sizeof(struct misc_dev_info)));
memset(sb, 0, 1024);
st->sb = sb;
@ -857,6 +859,7 @@ static int store_super1(struct supertype *st, int fd)
return 3;
sbsize = sizeof(*sb) + 2 * __le32_to_cpu(sb->max_dev);
sbsize = (sbsize+511)&(~511UL);
if (write(fd, sb, sbsize) != sbsize)
return 4;
@ -866,7 +869,8 @@ static int store_super1(struct supertype *st, int fd)
(((char*)sb)+1024);
if (__le32_to_cpu(bm->magic) == BITMAP_MAGIC) {
locate_bitmap1(st, fd);
if (write(fd, bm, sizeof(*bm)) != sizeof(*bm))
if (write(fd, bm, ROUND_UP(sizeof(*bm),512)) !=
ROUND_UP(sizeof(*bm),512))
return 5;
}
}
@ -1035,9 +1039,10 @@ static int compare_super1(struct supertype *st, struct supertype *tst)
return 1;
if (!first) {
first = malloc(1024+sizeof(bitmap_super_t) +
posix_memalign((void**)&first, 512,
1024 + 512 +
sizeof(struct misc_dev_info));
memcpy(first, second, 1024+sizeof(bitmap_super_t) +
memcpy(first, second, 1024 + 512 +
sizeof(struct misc_dev_info));
st->sb = first;
return 0;
@ -1150,7 +1155,8 @@ static int load_super1(struct supertype *st, int fd, char *devname)
return 1;
}
super = malloc(1024 + sizeof(bitmap_super_t) +
posix_memalign((void**)&super, 512,
1024 + 512 +
sizeof(struct misc_dev_info));
if (read(fd, super, 1024) != 1024) {
@ -1187,7 +1193,7 @@ static int load_super1(struct supertype *st, int fd, char *devname)
bsb = (struct bitmap_super_s *)(((char*)super)+1024);
misc = (struct misc_dev_info*) (bsb+1);
misc = (struct misc_dev_info*) (((char*)super)+1024+512);
misc->device_size = dsize;
/* Now check on the bitmap superblock */
@ -1198,8 +1204,8 @@ static int load_super1(struct supertype *st, int fd, char *devname)
* should get that written out.
*/
locate_bitmap1(st, fd);
if (read(fd, ((char*)super)+1024, sizeof(struct bitmap_super_s))
!= sizeof(struct bitmap_super_s))
if (read(fd, ((char*)super)+1024, 512)
!= 512)
goto no_bitmap;
uuid_from_super1(st, uuid);
@ -1419,25 +1425,28 @@ static int write_bitmap1(struct supertype *st, int fd)
int rv = 0;
int towrite, n;
char buf[4096];
char abuf[4096+512];
char *buf = (char*)(((long)(abuf+512))&~511UL);
locate_bitmap1(st, fd);
if (write(fd, ((char*)sb)+1024, sizeof(bitmap_super_t)) !=
sizeof(bitmap_super_t))
return -2;
memset(buf, 0xff, 4096);
memcpy(buf, ((char*)sb)+1024, sizeof(bitmap_super_t));
towrite = __le64_to_cpu(bms->sync_size) / (__le32_to_cpu(bms->chunksize)>>9);
towrite = (towrite+7) >> 3; /* bits to bytes */
memset(buf, 0xff, sizeof(buf));
towrite += sizeof(bitmap_super_t);
towrite = ROUND_UP(towrite, 512);
while (towrite > 0) {
n = towrite;
if (n > sizeof(buf))
n = sizeof(buf);
if (n > 4096)
n = 4096;
n = write(fd, buf, n);
if (n > 0)
towrite -= n;
else
break;
memset(buf, 0xff, 4096);
}
fsync(fd);
if (towrite)

4
util.c
View File

@ -761,11 +761,11 @@ int dev_open(char *dev, int flags)
snprintf(devname, sizeof(devname), "/dev/.tmp.md.%d:%d:%d",
(int)getpid(), major, minor);
if (mknod(devname, S_IFBLK|0600, makedev(major, minor))==0) {
fd = open(devname, flags);
fd = open(devname, flags|O_DIRECT);
unlink(devname);
}
} else
fd = open(dev, flags);
fd = open(dev, flags|O_DIRECT);
return fd;
}