[VERSION,3] Disk image exclusive and shared locks.

Message ID	20091215164238.GA24410@amd.home.annexia.org
State	New
Headers	show Return-Path: <qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org> Date: Tue, 15 Dec 2009 16:42:38 +0000 From: "Richard W.M. Jones" <rjones@redhat.com> To: qemu-devel@nongnu.org Message-ID: <20091215164238.GA24410@amd.home.annexia.org> MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="bg08WKrSYDhXBjb5" Content-Disposition: inline User-Agent: Mutt/1.5.18 (2008-05-17) Subject: [Qemu-devel] [PATCH VERSION 3] Disk image exclusive and shared locks. Precedence: list Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org

diff --git a/block.c b/block.c index 3f3496e..0e69ba8 100644 --- a/block.c +++ b/block.c @@ -449,7 +449,7 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags, try_rw = !bs->read_only || bs->is_temporary; if (!(flags & BDRV_O_FILE)) open_flags = (try_rw ? BDRV_O_RDWR : 0) | - (flags & (BDRV_O_CACHE_MASK|BDRV_O_NATIVE_AIO)); + (flags & (BDRV_O_CACHE_MASK|BDRV_O_NATIVE_AIO|BDRV_O_LOCK_MASK)); else open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT); if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) @@ -480,14 +480,19 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags, if (bs->backing_file[0] != '\0') { /* if there is a backing file, use it */ BlockDriver *back_drv = NULL; + int back_drv_open_flags = open_flags; bs->backing_hd = bdrv_new(""); /* pass on read_only property to the backing_hd */ bs->backing_hd->read_only = bs->read_only; + /* if front disk is locked, lock backing disk shared */ + back_drv_open_flags &= ~BDRV_O_LOCK_MASK; + if (open_flags & BDRV_O_LOCK_MASK) + back_drv_open_flags |= BDRV_O_LOCK_SHARED; path_combine(backing_filename, sizeof(backing_filename), filename, bs->backing_file); if (bs->backing_format[0] != '\0') back_drv = bdrv_find_format(bs->backing_format); - ret = bdrv_open2(bs->backing_hd, backing_filename, open_flags, + ret = bdrv_open2(bs->backing_hd, backing_filename, back_drv_open_flags, back_drv); if (ret < 0) { bdrv_close(bs); @@ -1388,6 +1393,16 @@ int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) return drv->bdrv_get_info(bs, bdi); } +int bdrv_acquire_lock(BlockDriverState *bs, int lock_flags) +{ + BlockDriver *drv = bs->drv; + if (!drv) + return -ENOMEDIUM; + if (!drv->bdrv_acquire_lock) + return -ENOTSUP; + return drv->bdrv_acquire_lock(bs, lock_flags); +} + int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, int64_t pos, int size) { diff --git a/block.h b/block.h index fa51ddf..bd15bbe 100644 --- a/block.h +++ b/block.h @@ -39,8 +39,11 @@ typedef struct QEMUSnapshotInfo { #define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */ #define BDRV_O_CACHE_WB 0x0040 /* use write-back caching */ #define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the thread pool */ +#define BDRV_O_LOCK_SHARED 0x0100 /* fail unless we can lock shared */ +#define BDRV_O_LOCK_EXCLUSIVE 0x0200 /* fail unless we can lock exclusive */ #define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB) +#define BDRV_O_LOCK_MASK (BDRV_O_LOCK_SHARED | BDRV_O_LOCK_EXCLUSIVE) #define BDRV_SECTOR_BITS 9 #define BDRV_SECTOR_SIZE (1 << BDRV_SECTOR_BITS) @@ -170,6 +173,7 @@ const char *bdrv_get_device_name(BlockDriverState *bs); int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors); int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi); +int bdrv_acquire_lock(BlockDriverState *bs, int lock_flags); const char *bdrv_get_encrypted_filename(BlockDriverState *bs); void bdrv_get_backing_filename(BlockDriverState *bs, diff --git a/block/raw-posix.c b/block/raw-posix.c index 5a6a22b..4c3326c 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -128,6 +128,24 @@ static int64_t raw_getlength(BlockDriverState *bs); static int cdrom_reopen(BlockDriverState *bs); #endif +static int raw_acquire_lock(BlockDriverState *bs, int lock_flags) +{ + BDRVRawState *s = bs->opaque; + struct flock lk; + + if (lock_flags & BDRV_O_LOCK_SHARED) + lk.l_type = F_RDLCK; + else /* bdrv_flags & BDRV_O_LOCK_EXCLUSIVE */ + lk.l_type = F_WRLCK; + lk.l_whence = SEEK_SET; + lk.l_start = 0; + lk.l_len = 0; /* means lock the whole file */ + + if (fcntl (s->fd, F_SETLK, &lk) == -1) + return -errno; + return 0; +} + static int raw_open_common(BlockDriverState *bs, const char *filename, int bdrv_flags, int open_flags) { @@ -163,6 +181,11 @@ static int raw_open_common(BlockDriverState *bs, const char *filename, s->fd = fd; s->aligned_buf = NULL; + if (bdrv_flags & BDRV_O_LOCK_MASK) { + if (raw_acquire_lock(bs, bdrv_flags & BDRV_O_LOCK_MASK) < 0) + goto out_close; + } + if ((bdrv_flags & BDRV_O_NOCACHE)) { s->aligned_buf = qemu_blockalign(bs, ALIGNED_BUFFER_SIZE); if (s->aligned_buf == NULL) { @@ -768,6 +791,8 @@ static BlockDriver bdrv_raw = { .bdrv_truncate = raw_truncate, .bdrv_getlength = raw_getlength, + .bdrv_acquire_lock = raw_acquire_lock, + .create_options = raw_create_options, }; diff --git a/block/raw-win32.c b/block/raw-win32.c index 72acad5..6f89f03 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -73,6 +73,27 @@ static int set_sparse(int fd) NULL, 0, NULL, 0, &returned, NULL); } +static int raw_acquire_lock(BlockDriverState *bs, int lock_flags) +{ + BDRVRawState *s = bs->opaque; + DWORD flags; + OVERLAPPED ov; + + flags = LOCKFILE_FAIL_IMMEDIATELY; + if (lock_flags & BDRV_O_LOCK_EXCLUSIVE) + flags |= LOCKFILE_EXCLUSIVE_LOCK; + + memset(&ov, 0, sizeof(ov)); + ov.Offset = 0; + ov.OffsetHigh = 0; + + if (!LockFileEx(s->hfile, flags, 0, 1, 0, &ov)) + /* For compatibility with the POSIX lock failure ... */ + return -EAGAIN; + + return 0; +} + static int raw_open(BlockDriverState *bs, const char *filename, int flags) { BDRVRawState *s = bs->opaque; @@ -106,6 +127,15 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) return -EACCES; return -1; } + + if (flags & BDRV_O_LOCK_MASK) { + int err; + + err = raw_acquire_lock(s, flags & BDRV_O_LOCK_MASK); + if (err < 0) + return err; + } + return 0; } @@ -253,6 +283,7 @@ static BlockDriver bdrv_raw = { .bdrv_write = raw_write, .bdrv_truncate = raw_truncate, .bdrv_getlength = raw_getlength, + .bdrv_acquire_lock = raw_acquire_lock, .create_options = raw_create_options, }; diff --git a/block_int.h b/block_int.h index 9a3b2e0..729d540 100644 --- a/block_int.h +++ b/block_int.h @@ -92,6 +92,7 @@ struct BlockDriver { int (*bdrv_snapshot_list)(BlockDriverState *bs, QEMUSnapshotInfo **psn_info); int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi); + int (*bdrv_acquire_lock)(BlockDriverState *bs, int lock_flags); int (*bdrv_save_vmstate)(BlockDriverState *bs, const uint8_t *buf, int64_t pos, int size); diff --git a/monitor.c b/monitor.c index d97d529..d00d5d4 100644 --- a/monitor.c +++ b/monitor.c @@ -890,6 +890,38 @@ static void do_block_set_passwd(Monitor *mon, const QDict *qdict, } } +static void do_block_lock(Monitor *mon, const QDict *qdict, + QObject **ret_data) +{ + BlockDriverState *bs; + const char *p; + int lock_flags; + + bs = bdrv_find(qdict_get_str(qdict, "device")); + if (!bs) { + qemu_error_new(QERR_DEVICE_NOT_FOUND, qdict_get_str(qdict, "device")); + return; + } + + p = qdict_get_str(qdict, "mode"); + if (!p) { + qemu_error_new(QERR_MISSING_PARAMETER, "mode"); + return; + } + if (!strcmp(p, "exclusive")) + lock_flags = BDRV_O_LOCK_EXCLUSIVE; + else if (!strcmp (p, "shared")) + lock_flags = BDRV_O_LOCK_SHARED; + else { + qemu_error_new(QERR_INVALID_PARAMETER, p); + return; + } + + if (bdrv_acquire_lock(bs, lock_flags) < 0) { + qemu_error_new(QERR_LOCK_ACQUIRE_FAILED); + } +} + static void do_change_block(Monitor *mon, const char *device, const char *filename, const char *fmt) { diff --git a/qemu-config.c b/qemu-config.c index c3203c8..df0d3fb 100644 --- a/qemu-config.c +++ b/qemu-config.c @@ -78,6 +78,10 @@ QemuOptsList qemu_drive_opts = { },{ .name = "readonly", .type = QEMU_OPT_BOOL, + },{ + .name = "lock", + .type = QEMU_OPT_STRING, + .help = "lock disk image (exclusive, shared, none)", }, { /* end if list */ } }, diff --git a/qemu-monitor.hx b/qemu-monitor.hx index c788c73..9474905 100644 --- a/qemu-monitor.hx +++ b/qemu-monitor.hx @@ -1063,6 +1063,25 @@ STEXI Set the encrypted device @var{device} password to @var{password} ETEXI + { + .name = "lock", + .args_type = "device:B,mode:s", + .params = "device [exclusive|shared]", + .help = "acquire a lock on an existing device", + .user_print = monitor_user_noop, + .mhandler.cmd_new = do_block_lock, + }, + +STEXI +@item lock @var{device} @var{mode} +Acquire a lock on @var{device}. The @var{mode} string should be +@var{exclusive} to acquire an exclusive lock, or @var{shared} to +acquire a shared lock. + +This does not attempt to lock the backing disk for formats like +qcow2 that can have backing storage. +ETEXI + STEXI @end table ETEXI diff --git a/qemu-options.hx b/qemu-options.hx index b8cc375..efc5f19 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -104,6 +104,7 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive, " [,cyls=c,heads=h,secs=s[,trans=t]][,snapshot=on|off]\n" " [,cache=writethrough|writeback|none][,format=f][,serial=s]\n" " [,addr=A][,id=name][,aio=threads|native]\n" + " [,lock=exclusive|shared|none]\n" " use 'file' as a drive image\n") DEF("set", HAS_ARG, QEMU_OPTION_set, "-set group.id.arg=value\n" @@ -149,6 +150,12 @@ an untrusted format header. This option specifies the serial number to assign to the device. @item addr=@var{addr} Specify the controller's PCI address (if=virtio only). +@item lock=@var{mode} +Acquire a lock on the disk image (@var{file}). +Available modes are: exclusive, shared, none. +The default is "none", meaning we don't try to acquire a lock. To +avoid multiple virtual machines trying to write to a disk at the +same time (which can cause disk corruption), use lock=exclusive. @end table By default, writethrough caching is used for all block device. This means that diff --git a/qerror.c b/qerror.c index 5f8fc5d..8b74a58 100644 --- a/qerror.c +++ b/qerror.c @@ -120,6 +120,10 @@ static const QErrorStringTable qerror_table[] = { .error_fmt = QERR_VNC_SERVER_FAILED, .desc = "Could not start VNC server on %(target)", }, + { + .error_fmt = QERR_LOCK_ACQUIRE_FAILED, + .desc = "Could not lock device with requested mode", + }, {} }; diff --git a/qerror.h b/qerror.h index 9e220d6..540bf8d 100644 --- a/qerror.h +++ b/qerror.h @@ -100,4 +100,7 @@ QError *qobject_to_qerror(const QObject *obj); #define QERR_VNC_SERVER_FAILED \ "{ 'class': 'VNCServerFailed', 'data': { 'target': %s } }" +#define QERR_LOCK_ACQUIRE_FAILED \ + "{ 'class': 'LockAcquireFailed', 'data': {} }" + #endif /* QERROR_H */ diff --git a/vl.c b/vl.c index c0d98f5..b114518 100644 --- a/vl.c +++ b/vl.c @@ -2110,6 +2110,7 @@ DriveInfo *drive_init(QemuOpts *opts, void *opaque, const char *devaddr; DriveInfo *dinfo; int snapshot = 0; + int lock_flags = 0; *fatal_error = 1; @@ -2300,6 +2301,19 @@ DriveInfo *drive_init(QemuOpts *opts, void *opaque, } } + if ((buf = qemu_opt_get(opts, "lock")) != NULL) { + if (!strcmp(buf, "none")) + /* nothing */; + else if (!strcmp(buf, "shared")) + lock_flags |= BDRV_O_LOCK_SHARED; + else if (!strcmp(buf, "exclusive")) + lock_flags |= BDRV_O_LOCK_EXCLUSIVE; + else { + fprintf(stderr, "qemu: invalid lock option\n"); + return NULL; + } + } + /* compute bus and unit according index */ if (index != -1) { @@ -2444,6 +2458,8 @@ DriveInfo *drive_init(QemuOpts *opts, void *opaque, (void)bdrv_set_read_only(dinfo->bdrv, 1); } + bdrv_flags |= lock_flags; + if (bdrv_open2(dinfo->bdrv, file, bdrv_flags, drv) < 0) { fprintf(stderr, "qemu: could not open disk image %s: %s\n", file, strerror(errno));

[VERSION,3] Disk image exclusive and shared locks.

Commit Message

Comments

Patch