diff options
54 files changed, 1875 insertions, 390 deletions
diff --git a/async.c b/async.c index 46d9e639d7..77d080d6f5 100644 --- a/async.c +++ b/async.c @@ -280,6 +280,12 @@ static void aio_timerlist_notify(void *opaque) aio_notify(opaque); } +static void aio_rfifolock_cb(void *opaque) +{ + /* Kick owner thread in case they are blocked in aio_poll() */ + aio_notify(opaque); +} + AioContext *aio_context_new(Error **errp) { int ret; @@ -297,7 +303,7 @@ AioContext *aio_context_new(Error **errp) event_notifier_test_and_clear); ctx->thread_pool = NULL; qemu_mutex_init(&ctx->bh_lock); - rfifolock_init(&ctx->lock, NULL, NULL); + rfifolock_init(&ctx->lock, aio_rfifolock_cb, ctx); timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx); return ctx; diff --git a/block.c b/block.c index 2b9ceae02f..dd4f58d6cb 100644 --- a/block.c +++ b/block.c @@ -36,6 +36,7 @@ #include "qmp-commands.h" #include "qemu/timer.h" #include "qapi-event.h" +#include "block/throttle-groups.h" #ifdef CONFIG_BSD #include <sys/types.h> @@ -79,6 +80,12 @@ static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = static QLIST_HEAD(, BlockDriver) bdrv_drivers = QLIST_HEAD_INITIALIZER(bdrv_drivers); +static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, + const char *reference, QDict *options, int flags, + BlockDriverState *parent, + const BdrvChildRole *child_role, + BlockDriver *drv, Error **errp); + static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs); /* If non-zero, use only whitelisted block drivers */ static int use_bdrv_whitelist; @@ -682,8 +689,8 @@ static int bdrv_temp_snapshot_flags(int flags) } /* - * Returns the flags that bs->file should get, based on the given flags for - * the parent BDS + * Returns the flags that bs->file should get if a protocol driver is expected, + * based on the given flags for the parent BDS */ static int bdrv_inherited_flags(int flags) { @@ -700,6 +707,25 @@ static int bdrv_inherited_flags(int flags) return flags; } +const BdrvChildRole child_file = { + .inherit_flags = bdrv_inherited_flags, +}; + +/* + * Returns the flags that bs->file should get if the use of formats (and not + * only protocols) is permitted for it, based on the given flags for the parent + * BDS + */ +static int bdrv_inherited_fmt_flags(int parent_flags) +{ + int flags = child_file.inherit_flags(parent_flags); + return flags & ~BDRV_O_PROTOCOL; +} + +const BdrvChildRole child_format = { + .inherit_flags = bdrv_inherited_fmt_flags, +}; + /* * Returns the flags that bs->backing_hd should get, based on the given flags * for the parent BDS @@ -715,6 +741,10 @@ static int bdrv_backing_flags(int flags) return flags; } +static const BdrvChildRole child_backing = { + .inherit_flags = bdrv_backing_flags, +}; + static int bdrv_open_flags(BlockDriverState *bs, int flags) { int open_flags = flags | BDRV_O_CACHE_WB; @@ -767,6 +797,19 @@ static void bdrv_assign_node_name(BlockDriverState *bs, QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list); } +static QemuOptsList bdrv_runtime_opts = { + .name = "bdrv_common", + .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head), + .desc = { + { + .name = "node-name", + .type = QEMU_OPT_STRING, + .help = "Node name of the block device node", + }, + { /* end of list */ } + }, +}; + /* * Common part for opening disk images and files * @@ -778,6 +821,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, int ret, open_flags; const char *filename; const char *node_name = NULL; + QemuOpts *opts; Error *local_err = NULL; assert(drv != NULL); @@ -798,23 +842,22 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name); - node_name = qdict_get_try_str(options, "node-name"); - bdrv_assign_node_name(bs, node_name, &local_err); + opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); + qemu_opts_absorb_qdict(opts, options, &local_err); if (local_err) { error_propagate(errp, local_err); - return -EINVAL; + ret = -EINVAL; + goto fail_opts; } - qdict_del(options, "node-name"); - /* bdrv_open() with directly using a protocol as drv. This layer is already - * opened, so assign it to bs (while file becomes a closed BlockDriverState) - * and return immediately. */ - if (file != NULL && drv->bdrv_file_open) { - bdrv_swap(file, bs); - return 0; + node_name = qemu_opt_get(opts, "node-name"); + bdrv_assign_node_name(bs, node_name, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail_opts; } - bs->open_flags = flags; bs->guest_block_size = 512; bs->request_alignment = 512; bs->zero_beyond_eof = true; @@ -827,7 +870,8 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, ? "Driver '%s' can only be used for read-only devices" : "Driver '%s' is not whitelisted", drv->format_name); - return -ENOTSUP; + ret = -ENOTSUP; + goto fail_opts; } assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */ @@ -836,7 +880,8 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, bdrv_enable_copy_on_read(bs); } else { error_setg(errp, "Can't use copy-on-read on read-only device"); - return -EINVAL; + ret = -EINVAL; + goto fail_opts; } } @@ -902,6 +947,8 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, assert(bdrv_opt_mem_align(bs) != 0); assert(bdrv_min_mem_align(bs) != 0); assert((bs->request_alignment != 0) || bs->sg); + + qemu_opts_del(opts); return 0; free_and_fail: @@ -909,6 +956,8 @@ free_and_fail: g_free(bs->opaque); bs->opaque = NULL; bs->drv = NULL; +fail_opts: + qemu_opts_del(opts); return ret; } @@ -942,14 +991,17 @@ static QDict *parse_json_filename(const char *filename, Error **errp) /* * Fills in default options for opening images and converts the legacy * filename/flags pair to option QDict entries. + * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a + * block driver has been specified explicitly. */ -static int bdrv_fill_options(QDict **options, const char **pfilename, int flags, - BlockDriver *drv, Error **errp) +static int bdrv_fill_options(QDict **options, const char **pfilename, + int *flags, BlockDriver *drv, Error **errp) { const char *filename = *pfilename; const char *drvname; - bool protocol = flags & BDRV_O_PROTOCOL; + bool protocol = *flags & BDRV_O_PROTOCOL; bool parse_filename = false; + BlockDriver *tmp_drv; Error *local_err = NULL; /* Parse json: pseudo-protocol */ @@ -967,6 +1019,24 @@ static int bdrv_fill_options(QDict **options, const char **pfilename, int flags, *pfilename = filename = NULL; } + drvname = qdict_get_try_str(*options, "driver"); + + /* If the user has explicitly specified the driver, this choice should + * override the BDRV_O_PROTOCOL flag */ + tmp_drv = drv; + if (!tmp_drv && drvname) { + tmp_drv = bdrv_find_format(drvname); + } + if (tmp_drv) { + protocol = tmp_drv->bdrv_file_open; + } + + if (protocol) { + *flags |= BDRV_O_PROTOCOL; + } else { + *flags &= ~BDRV_O_PROTOCOL; + } + /* Fetch the file name from the options QDict if necessary */ if (protocol && filename) { if (!qdict_haskey(*options, "filename")) { @@ -981,7 +1051,6 @@ static int bdrv_fill_options(QDict **options, const char **pfilename, int flags, /* Find the right block driver */ filename = qdict_get_try_str(*options, "filename"); - drvname = qdict_get_try_str(*options, "driver"); if (drv) { if (drvname) { @@ -1118,9 +1187,10 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) } assert(bs->backing_hd == NULL); - ret = bdrv_open(&backing_hd, - *backing_filename ? backing_filename : NULL, NULL, options, - bdrv_backing_flags(bs->open_flags), NULL, &local_err); + ret = bdrv_open_inherit(&backing_hd, + *backing_filename ? backing_filename : NULL, + NULL, options, 0, bs, &child_backing, + NULL, &local_err); if (ret < 0) { bdrv_unref(backing_hd); backing_hd = NULL; @@ -1154,7 +1224,8 @@ free_exit: * To conform with the behavior of bdrv_open(), *pbs has to be NULL. */ int bdrv_open_image(BlockDriverState **pbs, const char *filename, - QDict *options, const char *bdref_key, int flags, + QDict *options, const char *bdref_key, + BlockDriverState* parent, const BdrvChildRole *child_role, bool allow_none, Error **errp) { QDict *image_options; @@ -1182,7 +1253,8 @@ int bdrv_open_image(BlockDriverState **pbs, const char *filename, goto done; } - ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp); + ret = bdrv_open_inherit(pbs, filename, reference, image_options, 0, + parent, child_role, NULL, errp); done: qdict_del(options, bdref_key); @@ -1254,6 +1326,19 @@ out: return ret; } +static void bdrv_attach_child(BlockDriverState *parent_bs, + BlockDriverState *child_bs, + const BdrvChildRole *child_role) +{ + BdrvChild *child = g_new(BdrvChild, 1); + *child = (BdrvChild) { + .bs = child_bs, + .role = child_role, + }; + + QLIST_INSERT_HEAD(&parent_bs->children, child, next); +} + /* * Opens a disk image (raw, qcow2, vmdk, ...) * @@ -1269,9 +1354,11 @@ out: * should be opened. If specified, neither options nor a filename may be given, * nor can an existing BDS be reused (that is, *pbs has to be NULL). */ -int bdrv_open(BlockDriverState **pbs, const char *filename, - const char *reference, QDict *options, int flags, - BlockDriver *drv, Error **errp) +static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, + const char *reference, QDict *options, int flags, + BlockDriverState *parent, + const BdrvChildRole *child_role, + BlockDriver *drv, Error **errp) { int ret; BlockDriverState *file = NULL, *bs; @@ -1280,6 +1367,8 @@ int bdrv_open(BlockDriverState **pbs, const char *filename, int snapshot_flags = 0; assert(pbs); + assert(!child_role || !flags); + assert(!child_role == !parent); if (reference) { bool options_non_empty = options ? qdict_size(options) : false; @@ -1302,6 +1391,9 @@ int bdrv_open(BlockDriverState **pbs, const char *filename, return -ENODEV; } bdrv_ref(bs); + if (child_role) { + bdrv_attach_child(parent, bs, child_role); + } *pbs = bs; return 0; } @@ -1317,7 +1409,12 @@ int bdrv_open(BlockDriverState **pbs, const char *filename, options = qdict_new(); } - ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err); + if (child_role) { + bs->inherits_from = parent; + flags = child_role->inherit_flags(parent->open_flags); + } + + ret = bdrv_fill_options(&options, &filename, &flags, drv, &local_err); if (local_err) { goto fail; } @@ -1336,12 +1433,8 @@ int bdrv_open(BlockDriverState **pbs, const char *filename, } assert(drvname || !(flags & BDRV_O_PROTOCOL)); - if (drv && !drv->bdrv_file_open) { - /* If the user explicitly wants a format driver here, we'll need to add - * another layer for the protocol in bs->file */ - flags &= ~BDRV_O_PROTOCOL; - } + bs->open_flags = flags; bs->options = options; options = qdict_clone_shallow(options); @@ -1356,9 +1449,9 @@ int bdrv_open(BlockDriverState **pbs, const char *filename, } assert(file == NULL); + bs->open_flags = flags; ret = bdrv_open_image(&file, filename, options, "file", - bdrv_inherited_flags(flags), - true, &local_err); + bs, &child_file, true, &local_err); if (ret < 0) { goto fail; } @@ -1377,6 +1470,12 @@ int bdrv_open(BlockDriverState **pbs, const char *filename, goto fail; } + /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */ + assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open); + /* file must be NULL if a protocol BDS is about to be created + * (the inverse results in an error message from bdrv_open_common()) */ + assert(!(flags & BDRV_O_PROTOCOL) || !file); + /* Open the image */ ret = bdrv_open_common(bs, file, options, flags, drv, &local_err); if (ret < 0) { @@ -1439,6 +1538,10 @@ int bdrv_open(BlockDriverState **pbs, const char *filename, goto close_and_fail; } + if (child_role) { + bdrv_attach_child(parent, bs, child_role); + } + QDECREF(options); *pbs = bs; return 0; @@ -1475,6 +1578,14 @@ close_and_fail: return ret; } +int bdrv_open(BlockDriverState **pbs, const char *filename, + const char *reference, QDict *options, int flags, + BlockDriver *drv, Error **errp) +{ + return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL, + NULL, drv, errp); +} + typedef struct BlockReopenQueueEntry { bool prepared; BDRVReopenState state; @@ -1505,6 +1616,8 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, assert(bs != NULL); BlockReopenQueueEntry *bs_entry; + BdrvChild *child; + if (bs_queue == NULL) { bs_queue = g_new0(BlockReopenQueue, 1); QSIMPLEQ_INIT(bs_queue); @@ -1513,8 +1626,15 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, /* bdrv_open() masks this flag out */ flags &= ~BDRV_O_PROTOCOL; - if (bs->file) { - bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags)); + QLIST_FOREACH(child, &bs->children, next) { + int child_flags; + + if (child->bs->inherits_from != bs) { + continue; + } + + child_flags = child->role->inherit_flags(flags); + bdrv_reopen_queue(bs_queue, child->bs, child_flags); } bs_entry = g_new0(BlockReopenQueueEntry, 1); @@ -1725,6 +1845,16 @@ void bdrv_close(BlockDriverState *bs) notifier_list_notify(&bs->close_notifiers, bs); if (bs->drv) { + BdrvChild *child, *next; + + QLIST_FOREACH_SAFE(child, &bs->children, next, next) { + if (child->bs->inherits_from == bs) { + child->bs->inherits_from = NULL; + } + QLIST_REMOVE(child, next); + g_free(child); + } + if (bs->backing_hd) { BlockDriverState *backing_hd = bs->backing_hd; bdrv_set_backing_hd(bs, NULL); @@ -1822,12 +1952,18 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest, bs_dest->enable_write_cache = bs_src->enable_write_cache; /* i/o throttled req */ - memcpy(&bs_dest->throttle_state, - &bs_src->throttle_state, - sizeof(ThrottleState)); + bs_dest->throttle_state = bs_src->throttle_state, + bs_dest->io_limits_enabled = bs_src->io_limits_enabled; + bs_dest->pending_reqs[0] = bs_src->pending_reqs[0]; + bs_dest->pending_reqs[1] = bs_src->pending_reqs[1]; bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0]; bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1]; - bs_dest->io_limits_enabled = bs_src->io_limits_enabled; + memcpy(&bs_dest->round_robin, + &bs_src->round_robin, + sizeof(bs_dest->round_robin)); + memcpy(&bs_dest->throttle_timers, + &bs_src->throttle_timers, + sizeof(ThrottleTimers)); /* r/w error */ bs_dest->on_read_error = bs_src->on_read_error; @@ -1869,6 +2005,10 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest, void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) { BlockDriverState tmp; + BdrvChild *child; + + bdrv_drain(bs_new); + bdrv_drain(bs_old); /* The code needs to swap the node_name but simply swapping node_list won't * work so first remove the nodes from the graph list, do the swap then @@ -1881,12 +2021,21 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list); } + /* If the BlockDriverState is part of a throttling group acquire + * its lock since we're going to mess with the protected fields. + * Otherwise there's no need to worry since no one else can touch + * them. */ + if (bs_old->throttle_state) { + throttle_group_lock(bs_old); + } + /* bs_new must be unattached and shouldn't have anything fancy enabled */ assert(!bs_new->blk); assert(QLIST_EMPTY(&bs_new->dirty_bitmaps)); assert(bs_new->job == NULL); assert(bs_new->io_limits_enabled == false); - assert(!throttle_have_timer(&bs_new->throttle_state)); + assert(bs_new->throttle_state == NULL); + assert(!throttle_timers_are_initialized(&bs_new->throttle_timers)); tmp = *bs_new; *bs_new = *bs_old; @@ -1903,7 +2052,13 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) /* Check a few fields that should remain attached to the device */ assert(bs_new->job == NULL); assert(bs_new->io_limits_enabled == false); - assert(!throttle_have_timer(&bs_new->throttle_state)); + assert(bs_new->throttle_state == NULL); + assert(!throttle_timers_are_initialized(&bs_new->throttle_timers)); + + /* Release the ThrottleGroup lock */ + if (bs_old->throttle_state) { + throttle_group_unlock(bs_old); + } /* insert the nodes back into the graph node list if needed */ if (bs_new->node_name[0] != '\0') { @@ -1913,6 +2068,30 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list); } + /* + * Update lh_first.le_prev for non-empty lists. + * + * The head of the op blocker list doesn't change because it is moved back + * in bdrv_move_feature_fields(). + */ + assert(QLIST_EMPTY(&bs_old->tracked_requests)); + assert(QLIST_EMPTY(&bs_new->tracked_requests)); + + QLIST_FIX_HEAD_PTR(&bs_new->children, next); + QLIST_FIX_HEAD_PTR(&bs_old->children, next); + + /* Update references in bs->opaque and children */ + QLIST_FOREACH(child, &bs_old->children, next) { + if (child->bs->inherits_from == bs_new) { + child->bs->inherits_from = bs_old; + } + } + QLIST_FOREACH(child, &bs_new->children, next) { + if (child->bs->inherits_from == bs_old) { + child->bs->inherits_from = bs_new; + } + } + bdrv_rebind(bs_new); bdrv_rebind(bs_old); } @@ -1935,6 +2114,7 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) /* The contents of 'tmp' will become bs_top, as we are * swapping bs_new and bs_top contents. */ bdrv_set_backing_hd(bs_top, bs_new); + bdrv_attach_child(bs_top, bs_new, &child_backing); } static void bdrv_delete(BlockDriverState *bs) @@ -3220,10 +3400,9 @@ static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs) uint64_t size = bdrv_nb_sectors(bs); QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { - if (bdrv_dirty_bitmap_frozen(bitmap)) { - continue; - } + assert(!bdrv_dirty_bitmap_frozen(bitmap)); hbitmap_truncate(bitmap->bitmap, size); + bitmap->size = size; } } @@ -3691,7 +3870,7 @@ void bdrv_detach_aio_context(BlockDriverState *bs) } if (bs->io_limits_enabled) { - throttle_detach_aio_context(&bs->throttle_state); + throttle_timers_detach_aio_context(&bs->throttle_timers); } if (bs->drv->bdrv_detach_aio_context) { bs->drv->bdrv_detach_aio_context(bs); @@ -3727,7 +3906,7 @@ void bdrv_attach_aio_context(BlockDriverState *bs, bs->drv->bdrv_attach_aio_context(bs, new_context); } if (bs->io_limits_enabled) { - throttle_attach_aio_context(&bs->throttle_state, new_context); + throttle_timers_attach_aio_context(&bs->throttle_timers, new_context); } QLIST_FOREACH(ban, &bs->aio_notifiers, list) { diff --git a/block/Makefile.objs b/block/Makefile.objs index 0d8c2a4ab6..c34fd7cdc2 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -10,6 +10,7 @@ block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o block-obj-$(CONFIG_POSIX) += raw-posix.o block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o block-obj-y += null.o mirror.o io.o +block-obj-y += throttle-groups.o block-obj-y += nbd.o nbd-client.o sheepdog.o block-obj-$(CONFIG_LIBISCSI) += iscsi.o diff --git a/block/blkdebug.c b/block/blkdebug.c index 1e92607ef3..bc247f46f5 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -429,7 +429,7 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags, /* Open the backing file */ assert(bs->file == NULL); ret = bdrv_open_image(&bs->file, qemu_opt_get(opts, "x-image"), options, "image", - flags | BDRV_O_PROTOCOL, false, &local_err); + bs, &child_file, false, &local_err); if (ret < 0) { error_propagate(errp, local_err); goto out; diff --git a/block/blkverify.c b/block/blkverify.c index 438dff8bcb..d277e63220 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -125,7 +125,7 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags, /* Open the raw file */ assert(bs->file == NULL); ret = bdrv_open_image(&bs->file, qemu_opt_get(opts, "x-raw"), options, - "raw", flags | BDRV_O_PROTOCOL, false, &local_err); + "raw", bs, &child_file, false, &local_err); if (ret < 0) { error_propagate(errp, local_err); goto fail; @@ -134,7 +134,7 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags, /* Open the test file */ assert(s->test_file == NULL); ret = bdrv_open_image(&s->test_file, qemu_opt_get(opts, "x-image"), options, - "test", flags, false, &local_err); + "test", bs, &child_format, false, &local_err); if (ret < 0) { error_propagate(errp, local_err); s->test_file = NULL; diff --git a/block/io.c b/block/io.c index e394d92626..bb4f78784e 100644 --- a/block/io.c +++ b/block/io.c @@ -23,9 +23,9 @@ */ #include "trace.h" -#include "sysemu/qtest.h" #include "block/blockjob.h" #include "block/block_int.h" +#include "block/throttle-groups.h" #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ @@ -65,7 +65,7 @@ void bdrv_set_io_limits(BlockDriverState *bs, { int i; - throttle_config(&bs->throttle_state, cfg); + throttle_group_config(bs, cfg); for (i = 0; i < 2; i++) { qemu_co_enter_next(&bs->throttled_reqs[i]); @@ -95,72 +95,33 @@ static bool bdrv_start_throttled_reqs(BlockDriverState *bs) void bdrv_io_limits_disable(BlockDriverState *bs) { bs->io_limits_enabled = false; - bdrv_start_throttled_reqs(bs); - - throttle_destroy(&bs->throttle_state); -} - -static void bdrv_throttle_read_timer_cb(void *opaque) -{ - BlockDriverState *bs = opaque; - qemu_co_enter_next(&bs->throttled_reqs[0]); -} - -static void bdrv_throttle_write_timer_cb(void *opaque) -{ - BlockDriverState *bs = opaque; - qemu_co_enter_next(&bs->throttled_reqs[1]); + throttle_group_unregister_bs(bs); } /* should be called before bdrv_set_io_limits if a limit is set */ -void bdrv_io_limits_enable(BlockDriverState *bs) +void bdrv_io_limits_enable(BlockDriverState *bs, const char *group) { - int clock_type = QEMU_CLOCK_REALTIME; - - if (qtest_enabled()) { - /* For testing block IO throttling only */ - clock_type = QEMU_CLOCK_VIRTUAL; - } assert(!bs->io_limits_enabled); - throttle_init(&bs->throttle_state, - bdrv_get_aio_context(bs), - clock_type, - bdrv_throttle_read_timer_cb, - bdrv_throttle_write_timer_cb, - bs); + throttle_group_register_bs(bs, group); bs->io_limits_enabled = true; } -/* This function makes an IO wait if needed - * - * @nb_sectors: the number of sectors of the IO - * @is_write: is the IO a write - */ -static void bdrv_io_limits_intercept(BlockDriverState *bs, - unsigned int bytes, - bool is_write) +void bdrv_io_limits_update_group(BlockDriverState *bs, const char *group) { - /* does this io must wait */ - bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write); - - /* if must wait or any request of this type throttled queue the IO */ - if (must_wait || - !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) { - qemu_co_queue_wait(&bs->throttled_reqs[is_write]); + /* this bs is not part of any group */ + if (!bs->throttle_state) { + return; } - /* the IO will be executed, do the accounting */ - throttle_account(&bs->throttle_state, is_write, bytes); - - - /* if the next request must wait -> do nothing */ - if (throttle_schedule_timer(&bs->throttle_state, is_write)) { + /* this bs is a part of the same group than the one we want */ + if (!g_strcmp0(throttle_group_get_name(bs), group)) { return; } - /* else queue next request for execution */ - qemu_co_queue_next(&bs->throttled_reqs[is_write]); + /* need to change the group this bs belong to */ + bdrv_io_limits_disable(bs); + bdrv_io_limits_enable(bs, group); } void bdrv_setup_io_funcs(BlockDriver *bdrv) @@ -967,7 +928,7 @@ static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, /* throttling disk I/O */ if (bs->io_limits_enabled) { - bdrv_io_limits_intercept(bs, bytes, false); + throttle_group_co_io_limits_intercept(bs, bytes, false); } /* Align read if necessary by padding qiov */ @@ -1297,7 +1258,7 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, /* throttling disk I/O */ if (bs->io_limits_enabled) { - bdrv_io_limits_intercept(bs, bytes, true); + throttle_group_co_io_limits_intercept(bs, bytes, true); } /* diff --git a/block/qapi.c b/block/qapi.c index 18d2b95f54..a738148bce 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -24,6 +24,7 @@ #include "block/qapi.h" #include "block/block_int.h" +#include "block/throttle-groups.h" #include "block/write-threshold.h" #include "qmp-commands.h" #include "qapi-visit.h" @@ -65,7 +66,9 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp) if (bs->io_limits_enabled) { ThrottleConfig cfg; - throttle_get_config(&bs->throttle_state, &cfg); + + throttle_group_get_config(bs, &cfg); + info->bps = cfg.buckets[THROTTLE_BPS_TOTAL].avg; info->bps_rd = cfg.buckets[THROTTLE_BPS_READ].avg; info->bps_wr = cfg.buckets[THROTTLE_BPS_WRITE].avg; @@ -90,6 +93,9 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp) info->has_iops_size = cfg.op_size; info->iops_size = cfg.op_size; + + info->has_group = true; + info->group = g_strdup(throttle_group_get_name(bs)); } info->write_threshold = bdrv_write_threshold_get(bs); diff --git a/block/qcow2.c b/block/qcow2.c index f7b4cc6a32..c4f6938a36 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -483,9 +483,11 @@ static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = { [QCOW2_OL_INACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L2, }; -static void read_cache_sizes(QemuOpts *opts, uint64_t *l2_cache_size, +static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, + uint64_t *l2_cache_size, uint64_t *refcount_cache_size, Error **errp) { + BDRVQcowState *s = bs->opaque; uint64_t combined_cache_size; bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set; @@ -525,7 +527,9 @@ static void read_cache_sizes(QemuOpts *opts, uint64_t *l2_cache_size, } } else { if (!l2_cache_size_set && !refcount_cache_size_set) { - *l2_cache_size = DEFAULT_L2_CACHE_BYTE_SIZE; + *l2_cache_size = MAX(DEFAULT_L2_CACHE_BYTE_SIZE, + (uint64_t)DEFAULT_L2_CACHE_CLUSTERS + * s->cluster_size); *refcount_cache_size = *l2_cache_size / DEFAULT_L2_REFCOUNT_SIZE_RATIO; } else if (!l2_cache_size_set) { @@ -803,7 +807,8 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } - read_cache_sizes(opts, &l2_cache_size, &refcount_cache_size, &local_err); + read_cache_sizes(bs, opts, &l2_cache_size, &refcount_cache_size, + &local_err); if (local_err) { error_propagate(errp, local_err); ret = -EINVAL; diff --git a/block/qcow2.h b/block/qcow2.h index 0076512af4..5936d299a3 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -62,11 +62,14 @@ #define MIN_CLUSTER_BITS 9 #define MAX_CLUSTER_BITS 21 -#define MIN_L2_CACHE_SIZE 1 /* cluster */ +/* Must be at least 2 to cover COW */ +#define MIN_L2_CACHE_SIZE 2 /* clusters */ /* Must be at least 4 to cover all cases of refcount table growth */ #define MIN_REFCOUNT_CACHE_SIZE 4 /* clusters */ +/* Whichever is more */ +#define DEFAULT_L2_CACHE_CLUSTERS 8 /* clusters */ #define DEFAULT_L2_CACHE_BYTE_SIZE 1048576 /* bytes */ /* The refblock cache needs only a fourth of the L2 cache size to cover as many diff --git a/block/quorum.c b/block/quorum.c index f91ef75a84..77e55b2775 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -866,25 +866,18 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, Error *local_err = NULL; QemuOpts *opts = NULL; bool *opened; - QDict *sub = NULL; - QList *list = NULL; - const QListEntry *lentry; int i; int ret = 0; qdict_flatten(options); - qdict_extract_subqdict(options, &sub, "children."); - qdict_array_split(sub, &list); - if (qdict_size(sub)) { - error_setg(&local_err, "Invalid option children.%s", - qdict_first(sub)->key); + /* count how many different children are present */ + s->num_children = qdict_array_entries(options, "children."); + if (s->num_children < 0) { + error_setg(&local_err, "Option children is not a valid array"); ret = -EINVAL; goto exit; } - - /* count how many different children are present */ - s->num_children = qlist_size(list); if (s->num_children < 2) { error_setg(&local_err, "Number of provided children must be greater than 1"); @@ -937,37 +930,17 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, s->bs = g_new0(BlockDriverState *, s->num_children); opened = g_new0(bool, s->num_children); - for (i = 0, lentry = qlist_first(list); lentry; - lentry = qlist_next(lentry), i++) { - QDict *d; - QString *string; - - switch (qobject_type(lentry->value)) - { - /* List of options */ - case QTYPE_QDICT: - d = qobject_to_qdict(lentry->value); - QINCREF(d); - ret = bdrv_open(&s->bs[i], NULL, NULL, d, flags, NULL, - &local_err); - break; - - /* QMP reference */ - case QTYPE_QSTRING: - string = qobject_to_qstring(lentry->value); - ret = bdrv_open(&s->bs[i], NULL, qstring_get_str(string), NULL, - flags, NULL, &local_err); - break; - - default: - error_setg(&local_err, "Specification of child block device %i " - "is invalid", i); - ret = -EINVAL; - } + for (i = 0; i < s->num_children; i++) { + char indexstr[32]; + ret = snprintf(indexstr, 32, "children.%d", i); + assert(ret < 32); + ret = bdrv_open_image(&s->bs[i], NULL, options, indexstr, bs, + &child_format, false, &local_err); if (ret < 0) { goto close_exit; } + opened[i] = true; } @@ -990,8 +963,6 @@ exit: if (local_err) { error_propagate(errp, local_err); } - QDECREF(list); - QDECREF(sub); return ret; } diff --git a/block/raw-posix.c b/block/raw-posix.c index 2990e954ae..44ade8cf4e 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -1848,8 +1848,9 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, *pnum = nb_sectors; ret = BDRV_BLOCK_DATA; } else if (data == start) { - /* On a data extent, compute sectors to the end of the extent. */ - *pnum = MIN(nb_sectors, (hole - start) / BDRV_SECTOR_SIZE); + /* On a data extent, compute sectors to the end of the extent, + * possibly including a partial sector at EOF. */ + *pnum = MIN(nb_sectors, DIV_ROUND_UP(hole - start, BDRV_SECTOR_SIZE)); ret = BDRV_BLOCK_DATA; } else { /* On a hole, compute sectors to the beginning of the next extent. */ diff --git a/block/throttle-groups.c b/block/throttle-groups.c new file mode 100644 index 0000000000..efc462fbc5 --- /dev/null +++ b/block/throttle-groups.c @@ -0,0 +1,496 @@ +/* + * QEMU block throttling group infrastructure + * + * Copyright (C) Nodalink, EURL. 2014 + * Copyright (C) Igalia, S.L. 2015 + * + * Authors: + * Benoît Canet <benoit.canet@nodalink.com> + * Alberto Garcia <berto@igalia.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "block/throttle-groups.h" +#include "qemu/queue.h" +#include "qemu/thread.h" +#include "sysemu/qtest.h" + +/* The ThrottleGroup structure (with its ThrottleState) is shared + * among different BlockDriverState and it's independent from + * AioContext, so in order to use it from different threads it needs + * its own locking. + * + * This locking is however handled internally in this file, so it's + * mostly transparent to outside users (but see the documentation in + * throttle_groups_lock()). + * + * The whole ThrottleGroup structure is private and invisible to + * outside users, that only use it through its ThrottleState. + * + * In addition to the ThrottleGroup structure, BlockDriverState has + * fields that need to be accessed by other members of the group and + * therefore also need to be protected by this lock. Once a BDS is + * registered in a group those fields can be accessed by other threads + * any time. + * + * Again, all this is handled internally and is mostly transparent to + * the outside. The 'throttle_timers' field however has an additional + * constraint because it may be temporarily invalid (see for example + * bdrv_set_aio_context()). Therefore in this file a thread will + * access some other BDS's timers only after verifying that that BDS + * has throttled requests in the queue. + */ +typedef struct ThrottleGroup { + char *name; /* This is constant during the lifetime of the group */ + + QemuMutex lock; /* This lock protects the following four fields */ + ThrottleState ts; + QLIST_HEAD(, BlockDriverState) head; + BlockDriverState *tokens[2]; + bool any_timer_armed[2]; + + /* These two are protected by the global throttle_groups_lock */ + unsigned refcount; + QTAILQ_ENTRY(ThrottleGroup) list; +} ThrottleGroup; + +static QemuMutex throttle_groups_lock; +static QTAILQ_HEAD(, ThrottleGroup) throttle_groups = + QTAILQ_HEAD_INITIALIZER(throttle_groups); + +/* Increments the reference count of a ThrottleGroup given its name. + * + * If no ThrottleGroup is found with the given name a new one is + * created. + * + * @name: the name of the ThrottleGroup + * @ret: the ThrottleGroup + */ +static ThrottleGroup *throttle_group_incref(const char *name) +{ + ThrottleGroup *tg = NULL; + ThrottleGroup *iter; + + qemu_mutex_lock(&throttle_groups_lock); + + /* Look for an existing group with that name */ + QTAILQ_FOREACH(iter, &throttle_groups, list) { + if (!strcmp(name, iter->name)) { + tg = iter; + break; + } + } + + /* Create a new one if not found */ + if (!tg) { + tg = g_new0(ThrottleGroup, 1); + tg->name = g_strdup(name); + qemu_mutex_init(&tg->lock); + throttle_init(&tg->ts); + QLIST_INIT(&tg->head); + + QTAILQ_INSERT_TAIL(&throttle_groups, tg, list); + } + + tg->refcount++; + + qemu_mutex_unlock(&throttle_groups_lock); + + return tg; +} + +/* Decrease the reference count of a ThrottleGroup. + * + * When the reference count reaches zero the ThrottleGroup is + * destroyed. + * + * @tg: The ThrottleGroup to unref + */ +static void throttle_group_unref(ThrottleGroup *tg) +{ + qemu_mutex_lock(&throttle_groups_lock); + if (--tg->refcount == 0) { + QTAILQ_REMOVE(&throttle_groups, tg, list); + qemu_mutex_destroy(&tg->lock); + g_free(tg->name); + g_free(tg); + } + qemu_mutex_unlock(&throttle_groups_lock); +} + +/* Get the name from a BlockDriverState's ThrottleGroup. The name (and + * the pointer) is guaranteed to remain constant during the lifetime + * of the group. + * + * @bs: a BlockDriverState that is member of a throttling group + * @ret: the name of the group. + */ +const char *throttle_group_get_name(BlockDriverState *bs) +{ + ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts); + return tg->name; +} + +/* Return the next BlockDriverState in the round-robin sequence, + * simulating a circular list. + * + * This assumes that tg->lock is held. + * + * @bs: the current BlockDriverState + * @ret: the next BlockDriverState in the sequence + */ +static BlockDriverState *throttle_group_next_bs(BlockDriverState *bs) +{ + ThrottleState *ts = bs->throttle_state; + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); + BlockDriverState *next = QLIST_NEXT(bs, round_robin); + + if (!next) { + return QLIST_FIRST(&tg->head); + } + + return next; +} + +/* Return the next BlockDriverState in the round-robin sequence with + * pending I/O requests. + * + * This assumes that tg->lock is held. + * + * @bs: the current BlockDriverState + * @is_write: the type of operation (read/write) + * @ret: the next BlockDriverState with pending requests, or bs + * if there is none. + */ +static BlockDriverState *next_throttle_token(BlockDriverState *bs, + bool is_write) +{ + ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts); + BlockDriverState *token, *start; + + start = token = tg->tokens[is_write]; + + /* get next bs round in round robin style */ + token = throttle_group_next_bs(token); + while (token != start && !token->pending_reqs[is_write]) { + token = throttle_group_next_bs(token); + } + + /* If no IO are queued for scheduling on the next round robin token + * then decide the token is the current bs because chances are + * the current bs get the current request queued. + */ + if (token == start && !token->pending_reqs[is_write]) { + token = bs; + } + + return token; +} + +/* Check if the next I/O request for a BlockDriverState needs to be + * throttled or not. If there's no timer set in this group, set one + * and update the token accordingly. + * + * This assumes that tg->lock is held. + * + * @bs: the current BlockDriverState + * @is_write: the type of operation (read/write) + * @ret: whether the I/O request needs to be throttled or not + */ +static bool throttle_group_schedule_timer(BlockDriverState *bs, + bool is_write) +{ + ThrottleState *ts = bs->throttle_state; + ThrottleTimers *tt = &bs->throttle_timers; + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); + bool must_wait; + + /* Check if any of the timers in this group is already armed */ + if (tg->any_timer_armed[is_write]) { + return true; + } + + must_wait = throttle_schedule_timer(ts, tt, is_write); + + /* If a timer just got armed, set bs as the current token */ + if (must_wait) { + tg->tokens[is_write] = bs; + tg->any_timer_armed[is_write] = true; + } + + return must_wait; +} + +/* Look for the next pending I/O request and schedule it. + * + * This assumes that tg->lock is held. + * + * @bs: the current BlockDriverState + * @is_write: the type of operation (read/write) + */ +static void schedule_next_request(BlockDriverState *bs, bool is_write) +{ + ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts); + bool must_wait; + BlockDriverState *token; + + /* Check if there's any pending request to schedule next */ + token = next_throttle_token(bs, is_write); + if (!token->pending_reqs[is_write]) { + return; + } + + /* Set a timer for the request if it needs to be throttled */ + must_wait = throttle_group_schedule_timer(token, is_write); + + /* If it doesn't have to wait, queue it for immediate execution */ + if (!must_wait) { + /* Give preference to requests from the current bs */ + if (qemu_in_coroutine() && + qemu_co_queue_next(&bs->throttled_reqs[is_write])) { + token = bs; + } else { + ThrottleTimers *tt = &token->throttle_timers; + int64_t now = qemu_clock_get_ns(tt->clock_type); + timer_mod(tt->timers[is_write], now + 1); + tg->any_timer_armed[is_write] = true; + } + tg->tokens[is_write] = token; + } +} + +/* Check if an I/O request needs to be throttled, wait and set a timer + * if necessary, and schedule the next request using a round robin + * algorithm. + * + * @bs: the current BlockDriverState + * @bytes: the number of bytes for this I/O + * @is_write: the type of operation (read/write) + */ +void coroutine_fn throttle_group_co_io_limits_intercept(BlockDriverState *bs, + unsigned int bytes, + bool is_write) +{ + bool must_wait; + BlockDriverState *token; + + ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts); + qemu_mutex_lock(&tg->lock); + + /* First we check if this I/O has to be throttled. */ + token = next_throttle_token(bs, is_write); + must_wait = throttle_group_schedule_timer(token, is_write); + + /* Wait if there's a timer set or queued requests of this type */ + if (must_wait || bs->pending_reqs[is_write]) { + bs->pending_reqs[is_write]++; + qemu_mutex_unlock(&tg->lock); + qemu_co_queue_wait(&bs->throttled_reqs[is_write]); + qemu_mutex_lock(&tg->lock); + bs->pending_reqs[is_write]--; + } + + /* The I/O will be executed, so do the accounting */ + throttle_account(bs->throttle_state, is_write, bytes); + + /* Schedule the next request */ + schedule_next_request(bs, is_write); + + qemu_mutex_unlock(&tg->lock); +} + +/* Update the throttle configuration for a particular group. Similar + * to throttle_config(), but guarantees atomicity within the + * throttling group. + * + * @bs: a BlockDriverState that is member of the group + * @cfg: the configuration to set + */ +void throttle_group_config(BlockDriverState *bs, ThrottleConfig *cfg) +{ + ThrottleTimers *tt = &bs->throttle_timers; + ThrottleState *ts = bs->throttle_state; + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); + qemu_mutex_lock(&tg->lock); + throttle_config(ts, tt, cfg); + /* throttle_config() cancels the timers */ + tg->any_timer_armed[0] = tg->any_timer_armed[1] = false; + qemu_mutex_unlock(&tg->lock); +} + +/* Get the throttle configuration from a particular group. Similar to + * throttle_get_config(), but guarantees atomicity within the + * throttling group. + * + * @bs: a BlockDriverState that is member of the group + * @cfg: the configuration will be written here + */ +void throttle_group_get_config(BlockDriverState *bs, ThrottleConfig *cfg) +{ + ThrottleState *ts = bs->throttle_state; + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); + qemu_mutex_lock(&tg->lock); + throttle_get_config(ts, cfg); + qemu_mutex_unlock(&tg->lock); +} + +/* ThrottleTimers callback. This wakes up a request that was waiting + * because it had been throttled. + * + * @bs: the BlockDriverState whose request had been throttled + * @is_write: the type of operation (read/write) + */ +static void timer_cb(BlockDriverState *bs, bool is_write) +{ + ThrottleState *ts = bs->throttle_state; + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); + bool empty_queue; + + /* The timer has just been fired, so we can update the flag */ + qemu_mutex_lock(&tg->lock); + tg->any_timer_armed[is_write] = false; + qemu_mutex_unlock(&tg->lock); + + /* Run the request that was waiting for this timer */ + empty_queue = !qemu_co_enter_next(&bs->throttled_reqs[is_write]); + + /* If the request queue was empty then we have to take care of + * scheduling the next one */ + if (empty_queue) { + qemu_mutex_lock(&tg->lock); + schedule_next_request(bs, is_write); + qemu_mutex_unlock(&tg->lock); + } +} + +static void read_timer_cb(void *opaque) +{ + timer_cb(opaque, false); +} + +static void write_timer_cb(void *opaque) +{ + timer_cb(opaque, true); +} + +/* Register a BlockDriverState in the throttling group, also + * initializing its timers and updating its throttle_state pointer to + * point to it. If a throttling group with that name does not exist + * yet, it will be created. + * + * @bs: the BlockDriverState to insert + * @groupname: the name of the group + */ +void throttle_group_register_bs(BlockDriverState *bs, const char *groupname) +{ + int i; + ThrottleGroup *tg = throttle_group_incref(groupname); + int clock_type = QEMU_CLOCK_REALTIME; + + if (qtest_enabled()) { + /* For testing block IO throttling only */ + clock_type = QEMU_CLOCK_VIRTUAL; + } + + bs->throttle_state = &tg->ts; + + qemu_mutex_lock(&tg->lock); + /* If the ThrottleGroup is new set this BlockDriverState as the token */ + for (i = 0; i < 2; i++) { + if (!tg->tokens[i]) { + tg->tokens[i] = bs; + } + } + + QLIST_INSERT_HEAD(&tg->head, bs, round_robin); + + throttle_timers_init(&bs->throttle_timers, + bdrv_get_aio_context(bs), + clock_type, + read_timer_cb, + write_timer_cb, + bs); + + qemu_mutex_unlock(&tg->lock); +} + +/* Unregister a BlockDriverState from its group, removing it from the + * list, destroying the timers and setting the throttle_state pointer + * to NULL. + * + * The group will be destroyed if it's empty after this operation. + * + * @bs: the BlockDriverState to remove + */ +void throttle_group_unregister_bs(BlockDriverState *bs) +{ + ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts); + int i; + + qemu_mutex_lock(&tg->lock); + for (i = 0; i < 2; i++) { + if (tg->tokens[i] == bs) { + BlockDriverState *token = throttle_group_next_bs(bs); + /* Take care of the case where this is the last bs in the group */ + if (token == bs) { + token = NULL; + } + tg->tokens[i] = token; + } + } + + /* remove the current bs from the list */ + QLIST_REMOVE(bs, round_robin); + throttle_timers_destroy(&bs->throttle_timers); + qemu_mutex_unlock(&tg->lock); + + throttle_group_unref(tg); + bs->throttle_state = NULL; +} + +/* Acquire the lock of this throttling group. + * + * You won't normally need to use this. None of the functions from the + * ThrottleGroup API require you to acquire the lock since all of them + * deal with it internally. + * + * This should only be used in exceptional cases when you want to + * access the protected fields of a BlockDriverState directly + * (e.g. bdrv_swap()). + * + * @bs: a BlockDriverState that is member of the group + */ +void throttle_group_lock(BlockDriverState *bs) +{ + ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts); + qemu_mutex_lock(&tg->lock); +} + +/* Release the lock of this throttling group. + * + * See the comments in throttle_group_lock(). + */ +void throttle_group_unlock(BlockDriverState *bs) +{ + ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts); + qemu_mutex_unlock(&tg->lock); +} + +static void throttle_groups_init(void) +{ + qemu_mutex_init(&throttle_groups_lock); +} + +block_init(throttle_groups_init); diff --git a/block/vmdk.c b/block/vmdk.c index b66745dfdd..be9263a34e 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -321,37 +321,13 @@ static int vmdk_is_cid_valid(BlockDriverState *bs) return 1; } -/* Queue extents, if any, for reopen() */ +/* We have nothing to do for VMDK reopen, stubs just return success */ static int vmdk_reopen_prepare(BDRVReopenState *state, BlockReopenQueue *queue, Error **errp) { - BDRVVmdkState *s; - int ret = -1; - int i; - VmdkExtent *e; - assert(state != NULL); assert(state->bs != NULL); - - if (queue == NULL) { - error_setg(errp, "No reopen queue for VMDK extents"); - goto exit; - } - - s = state->bs->opaque; - - assert(s != NULL); - - for (i = 0; i < s->num_extents; i++) { - e = &s->extents[i]; - if (e->file != state->bs->file) { - bdrv_reopen_queue(queue, e->file, state->flags); - } - } - ret = 0; - -exit: - return ret; + return 0; } static int vmdk_parent_open(BlockDriverState *bs) @@ -543,7 +519,7 @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs, } static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf, - Error **errp); + QDict *options, Error **errp); static char *vmdk_read_desc(BlockDriverState *file, uint64_t desc_offset, Error **errp) @@ -582,7 +558,7 @@ static char *vmdk_read_desc(BlockDriverState *file, uint64_t desc_offset, static int vmdk_open_vmdk4(BlockDriverState *bs, BlockDriverState *file, - int flags, Error **errp) + int flags, QDict *options, Error **errp) { int ret; uint32_t magic; @@ -606,7 +582,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, if (!buf) { return -EINVAL; } - ret = vmdk_open_desc_file(bs, flags, buf, errp); + ret = vmdk_open_desc_file(bs, flags, buf, options, errp); g_free(buf); return ret; } @@ -763,7 +739,7 @@ static int vmdk_parse_description(const char *desc, const char *opt_name, /* Open an extent file and append to bs array */ static int vmdk_open_sparse(BlockDriverState *bs, BlockDriverState *file, int flags, - char *buf, Error **errp) + char *buf, QDict *options, Error **errp) { uint32_t magic; @@ -773,7 +749,7 @@ static int vmdk_open_sparse(BlockDriverState *bs, return vmdk_open_vmfs_sparse(bs, file, flags, errp); break; case VMDK4_MAGIC: - return vmdk_open_vmdk4(bs, file, flags, errp); + return vmdk_open_vmdk4(bs, file, flags, options, errp); break; default: error_setg(errp, "Image not in VMDK format"); @@ -783,7 +759,8 @@ static int vmdk_open_sparse(BlockDriverState *bs, } static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, - const char *desc_file_path, Error **errp) + const char *desc_file_path, QDict *options, + Error **errp) { int ret; int matches; @@ -797,6 +774,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, BlockDriverState *extent_file; BDRVVmdkState *s = bs->opaque; VmdkExtent *extent; + char extent_opt_prefix[32]; while (*p) { /* parse extent line in one of below formats: @@ -846,8 +824,12 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, extent_path = g_malloc0(PATH_MAX); path_combine(extent_path, PATH_MAX, desc_file_path, fname); extent_file = NULL; - ret = bdrv_open(&extent_file, extent_path, NULL, NULL, - bs->open_flags | BDRV_O_PROTOCOL, NULL, errp); + + ret = snprintf(extent_opt_prefix, 32, "extents.%d", s->num_extents); + assert(ret < 32); + + ret = bdrv_open_image(&extent_file, extent_path, options, + extent_opt_prefix, bs, &child_file, false, errp); g_free(extent_path); if (ret) { return ret; @@ -870,7 +852,8 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, if (!buf) { ret = -EINVAL; } else { - ret = vmdk_open_sparse(bs, extent_file, bs->open_flags, buf, errp); + ret = vmdk_open_sparse(bs, extent_file, bs->open_flags, buf, + options, errp); } g_free(buf); if (ret) { @@ -898,7 +881,7 @@ next_line: } static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf, - Error **errp) + QDict *options, Error **errp) { int ret; char ct[128]; @@ -920,7 +903,7 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf, } s->create_type = g_strdup(ct); s->desc_offset = 0; - ret = vmdk_parse_extents(buf, bs, bs->file->exact_filename, errp); + ret = vmdk_parse_extents(buf, bs, bs->file->exact_filename, options, errp); exit: return ret; } @@ -942,11 +925,11 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags, switch (magic) { case VMDK3_MAGIC: case VMDK4_MAGIC: - ret = vmdk_open_sparse(bs, bs->file, flags, buf, errp); + ret = vmdk_open_sparse(bs, bs->file, flags, buf, options, errp); s->desc_offset = 0x200; break; default: - ret = vmdk_open_desc_file(bs, flags, buf, errp); + ret = vmdk_open_desc_file(bs, flags, buf, options, errp); break; } if (ret) { @@ -1248,6 +1231,17 @@ static VmdkExtent *find_extent(BDRVVmdkState *s, return NULL; } +static inline uint64_t vmdk_find_index_in_cluster(VmdkExtent *extent, + int64_t sector_num) +{ + uint64_t index_in_cluster, extent_begin_sector, extent_relative_sector_num; + + extent_begin_sector = extent->end_sector - extent->sectors; + extent_relative_sector_num = sector_num - extent_begin_sector; + index_in_cluster = extent_relative_sector_num % extent->cluster_sectors; + return index_in_cluster; +} + static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) { @@ -1285,7 +1279,7 @@ static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs, break; } - index_in_cluster = sector_num % extent->cluster_sectors; + index_in_cluster = vmdk_find_index_in_cluster(extent, sector_num); n = extent->cluster_sectors - index_in_cluster; if (n > nb_sectors) { n = nb_sectors; @@ -1413,7 +1407,6 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num, BDRVVmdkState *s = bs->opaque; int ret; uint64_t n, index_in_cluster; - uint64_t extent_begin_sector, extent_relative_sector_num; VmdkExtent *extent = NULL; uint64_t cluster_offset; @@ -1425,9 +1418,7 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num, ret = get_cluster_offset(bs, extent, NULL, sector_num << 9, false, &cluster_offset, 0, 0); - extent_begin_sector = extent->end_sector - extent->sectors; - extent_relative_sector_num = sector_num - extent_begin_sector; - index_in_cluster = extent_relative_sector_num % extent->cluster_sectors; + index_in_cluster = vmdk_find_index_in_cluster(extent, sector_num); n = extent->cluster_sectors - index_in_cluster; if (n > nb_sectors) { n = nb_sectors; @@ -1489,7 +1480,6 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, VmdkExtent *extent = NULL; int ret; int64_t index_in_cluster, n; - uint64_t extent_begin_sector, extent_relative_sector_num; uint64_t cluster_offset; VmdkMetaData m_data; @@ -1505,9 +1495,7 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, if (!extent) { return -EIO; } - extent_begin_sector = extent->end_sector - extent->sectors; - extent_relative_sector_num = sector_num - extent_begin_sector; - index_in_cluster = extent_relative_sector_num % extent->cluster_sectors; + index_in_cluster = vmdk_find_index_in_cluster(extent, sector_num); n = extent->cluster_sectors - index_in_cluster; if (n > nb_sectors) { n = nb_sectors; diff --git a/blockdev.c b/blockdev.c index de94a8bcb3..3aa1ae6698 100644 --- a/blockdev.c +++ b/blockdev.c @@ -34,6 +34,7 @@ #include "sysemu/blockdev.h" #include "hw/block/block.h" #include "block/blockjob.h" +#include "block/throttle-groups.h" #include "monitor/monitor.h" #include "qemu/option.h" #include "qemu/config-file.h" @@ -357,6 +358,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, const char *id; bool has_driver_specific_opts; BlockdevDetectZeroesOptions detect_zeroes; + const char *throttling_group; /* Check common options by copying from bs_opts to opts, all other options * stay in bs_opts for processing by bdrv_open(). */ @@ -391,13 +393,13 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, } } - if (qemu_opt_get_bool(opts, "cache.writeback", true)) { + if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_WB, true)) { bdrv_flags |= BDRV_O_CACHE_WB; } - if (qemu_opt_get_bool(opts, "cache.direct", false)) { + if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) { bdrv_flags |= BDRV_O_NOCACHE; } - if (qemu_opt_get_bool(opts, "cache.no-flush", false)) { + if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) { bdrv_flags |= BDRV_O_NO_FLUSH; } @@ -459,6 +461,8 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, cfg.op_size = qemu_opt_get_number(opts, "throttling.iops-size", 0); + throttling_group = qemu_opt_get(opts, "throttling.group"); + if (!check_throttle_config(&cfg, &error)) { error_propagate(errp, error); goto early_err; @@ -547,7 +551,10 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, /* disk I/O throttling */ if (throttle_enabled(&cfg)) { - bdrv_io_limits_enable(bs); + if (!throttling_group) { + throttling_group = blk_name(blk); + } + bdrv_io_limits_enable(bs, throttling_group); bdrv_set_io_limits(bs, &cfg); } @@ -711,6 +718,8 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type) { "iops_size", "throttling.iops-size" }, + { "group", "throttling.group" }, + { "readonly", "read-only" }, }; @@ -733,16 +742,16 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type) } /* Specific options take precedence */ - if (!qemu_opt_get(all_opts, "cache.writeback")) { - qemu_opt_set_bool(all_opts, "cache.writeback", + if (!qemu_opt_get(all_opts, BDRV_OPT_CACHE_WB)) { + qemu_opt_set_bool(all_opts, BDRV_OPT_CACHE_WB, !!(flags & BDRV_O_CACHE_WB), &error_abort); } - if (!qemu_opt_get(all_opts, "cache.direct")) { - qemu_opt_set_bool(all_opts, "cache.direct", + if (!qemu_opt_get(all_opts, BDRV_OPT_CACHE_DIRECT)) { + qemu_opt_set_bool(all_opts, BDRV_OPT_CACHE_DIRECT, !!(flags & BDRV_O_NOCACHE), &error_abort); } - if (!qemu_opt_get(all_opts, "cache.no-flush")) { - qemu_opt_set_bool(all_opts, "cache.no-flush", + if (!qemu_opt_get(all_opts, BDRV_OPT_CACHE_NO_FLUSH)) { + qemu_opt_set_bool(all_opts, BDRV_OPT_CACHE_NO_FLUSH, !!(flags & BDRV_O_NO_FLUSH), &error_abort); } qemu_opt_unset(all_opts, "cache"); @@ -1951,7 +1960,9 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd, bool has_iops_wr_max, int64_t iops_wr_max, bool has_iops_size, - int64_t iops_size, Error **errp) + int64_t iops_size, + bool has_group, + const char *group, Error **errp) { ThrottleConfig cfg; BlockDriverState *bs; @@ -2004,14 +2015,19 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd, aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); - if (!bs->io_limits_enabled && throttle_enabled(&cfg)) { - bdrv_io_limits_enable(bs); - } else if (bs->io_limits_enabled && !throttle_enabled(&cfg)) { - bdrv_io_limits_disable(bs); - } - - if (bs->io_limits_enabled) { + if (throttle_enabled(&cfg)) { + /* Enable I/O limits if they're not enabled yet, otherwise + * just update the throttling group. */ + if (!bs->io_limits_enabled) { + bdrv_io_limits_enable(bs, has_group ? group : device); + } else if (has_group) { + bdrv_io_limits_update_group(bs, group); + } + /* Set the new throttling configuration */ bdrv_set_io_limits(bs, &cfg); + } else if (bs->io_limits_enabled) { + /* If all throttling settings are set to 0, disable I/O limits */ + bdrv_io_limits_disable(bs); } aio_context_release(aio_context); @@ -3105,15 +3121,15 @@ QemuOptsList qemu_common_drive_opts = { .type = QEMU_OPT_STRING, .help = "discard operation (ignore/off, unmap/on)", },{ - .name = "cache.writeback", + .name = BDRV_OPT_CACHE_WB, .type = QEMU_OPT_BOOL, .help = "enables writeback mode for any caches", },{ - .name = "cache.direct", + .name = BDRV_OPT_CACHE_DIRECT, .type = QEMU_OPT_BOOL, .help = "enables use of O_DIRECT (bypass the host page cache)", },{ - .name = "cache.no-flush", + .name = BDRV_OPT_CACHE_NO_FLUSH, .type = QEMU_OPT_BOOL, .help = "ignore any flush requests for the device", },{ @@ -3189,6 +3205,10 @@ QemuOptsList qemu_common_drive_opts = { .type = QEMU_OPT_NUMBER, .help = "when limiting by iops max size of an I/O in bytes", },{ + .name = "throttling.group", + .type = QEMU_OPT_STRING, + .help = "name of the block throttling group", + },{ .name = "copy-on-read", .type = QEMU_OPT_BOOL, .help = "copy read data from backing file into image file", diff --git a/default-configs/s390x-softmmu.mak b/default-configs/s390x-softmmu.mak index f9e13f177e..36e15de336 100644 --- a/default-configs/s390x-softmmu.mak +++ b/default-configs/s390x-softmmu.mak @@ -4,3 +4,4 @@ CONFIG_VIRTIO=y CONFIG_SCLPCONSOLE=y CONFIG_S390_FLIC=y CONFIG_S390_FLIC_KVM=$(CONFIG_KVM) +CONFIG_WDT_DIAG288=y diff --git a/hmp.c b/hmp.c index 1e7cac02ac..23abc7d9f1 100644 --- a/hmp.c +++ b/hmp.c @@ -400,7 +400,8 @@ static void print_block_info(Monitor *mon, BlockInfo *info, " iops_max=%" PRId64 " iops_rd_max=%" PRId64 " iops_wr_max=%" PRId64 - " iops_size=%" PRId64 "\n", + " iops_size=%" PRId64 + " group=%s\n", inserted->bps, inserted->bps_rd, inserted->bps_wr, @@ -413,7 +414,8 @@ static void print_block_info(Monitor *mon, BlockInfo *info, inserted->iops_max, inserted->iops_rd_max, inserted->iops_wr_max, - inserted->iops_size); + inserted->iops_size, + inserted->group); } if (verbose) { @@ -1357,7 +1359,9 @@ void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict) false, 0, false, /* No default I/O size */ - 0, &err); + 0, + false, + NULL, &err); hmp_handle_error(mon, &err); } diff --git a/hw/core/nmi.c b/hw/core/nmi.c index 3dff020659..5260d6c1ec 100644 --- a/hw/core/nmi.c +++ b/hw/core/nmi.c @@ -21,6 +21,7 @@ #include "hw/nmi.h" #include "qapi/qmp/qerror.h" +#include "monitor/monitor.h" struct do_nmi_s { int cpu_index; @@ -70,6 +71,25 @@ void nmi_monitor_handle(int cpu_index, Error **errp) } } +void inject_nmi(void) +{ +#if defined(TARGET_I386) + CPUState *cs; + + CPU_FOREACH(cs) { + X86CPU *cpu = X86_CPU(cs); + + if (!cpu->apic_state) { + cpu_interrupt(cs, CPU_INTERRUPT_NMI); + } else { + apic_deliver_nmi(cpu->apic_state); + } + } +#else + nmi_monitor_handle(0, NULL); +#endif +} + static const TypeInfo nmi_info = { .name = TYPE_NMI, .parent = TYPE_INTERFACE, diff --git a/hw/watchdog/Makefile.objs b/hw/watchdog/Makefile.objs index 4b0374a555..72e3ffd93c 100644 --- a/hw/watchdog/Makefile.objs +++ b/hw/watchdog/Makefile.objs @@ -1,3 +1,4 @@ common-obj-y += watchdog.o common-obj-$(CONFIG_WDT_IB6300ESB) += wdt_i6300esb.o common-obj-$(CONFIG_WDT_IB700) += wdt_ib700.o +common-obj-$(CONFIG_WDT_DIAG288) += wdt_diag288.o diff --git a/hw/watchdog/watchdog.c b/hw/watchdog/watchdog.c index 54440c91c5..8d4b0eeeb0 100644 --- a/hw/watchdog/watchdog.c +++ b/hw/watchdog/watchdog.c @@ -27,6 +27,7 @@ #include "sysemu/sysemu.h" #include "sysemu/watchdog.h" #include "qapi-event.h" +#include "hw/nmi.h" /* Possible values for action parameter. */ #define WDT_RESET 1 /* Hard reset. */ @@ -35,6 +36,7 @@ #define WDT_PAUSE 4 /* Pause. */ #define WDT_DEBUG 5 /* Prints a message and continues running. */ #define WDT_NONE 6 /* Do nothing. */ +#define WDT_NMI 7 /* Inject nmi into the guest */ static int watchdog_action = WDT_RESET; static QLIST_HEAD(watchdog_list, WatchdogTimerModel) watchdog_list; @@ -95,6 +97,8 @@ int select_watchdog_action(const char *p) watchdog_action = WDT_DEBUG; else if (strcasecmp(p, "none") == 0) watchdog_action = WDT_NONE; + else if (strcasecmp(p, "inject-nmi") == 0) + watchdog_action = WDT_NMI; else return -1; @@ -138,5 +142,11 @@ void watchdog_perform_action(void) case WDT_NONE: qapi_event_send_watchdog(WATCHDOG_EXPIRATION_ACTION_NONE, &error_abort); break; + + case WDT_NMI: + qapi_event_send_watchdog(WATCHDOG_EXPIRATION_ACTION_INJECT_NMI, + &error_abort); + inject_nmi(); + break; } } diff --git a/hw/watchdog/wdt_diag288.c b/hw/watchdog/wdt_diag288.c new file mode 100644 index 0000000000..1185e0681c --- /dev/null +++ b/hw/watchdog/wdt_diag288.c @@ -0,0 +1,122 @@ +/* + * watchdog device diag288 support + * + * Copyright IBM, Corp. 2015 + * + * Authors: + * Xu Wang <gesaint@linux.vnet.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at your + * option) any later version. See the COPYING file in the top-level directory. + * + */ + +#include "sysemu/watchdog.h" +#include "hw/sysbus.h" +#include "qemu/timer.h" +#include "hw/watchdog/wdt_diag288.h" + +static WatchdogTimerModel model = { + .wdt_name = TYPE_WDT_DIAG288, + .wdt_description = "diag288 device for s390x platform", +}; + +static const VMStateDescription vmstate_diag288 = { + .name = "vmstate_diag288", + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_TIMER_PTR(timer, DIAG288State), + VMSTATE_BOOL(enabled, DIAG288State), + VMSTATE_END_OF_LIST() + } +}; + +static void wdt_diag288_reset(DeviceState *dev) +{ + DIAG288State *diag288 = DIAG288(dev); + + diag288->enabled = false; + timer_del(diag288->timer); +} + +static void diag288_timer_expired(void *dev) +{ + qemu_log_mask(CPU_LOG_RESET, "Watchdog timer expired.\n"); + watchdog_perform_action(); + wdt_diag288_reset(dev); +} + +static int wdt_diag288_handle_timer(DIAG288State *diag288, + uint64_t func, uint64_t timeout) +{ + switch (func) { + case WDT_DIAG288_INIT: + diag288->enabled = true; + /* fall through */ + case WDT_DIAG288_CHANGE: + if (!diag288->enabled) { + return -1; + } + timer_mod(diag288->timer, + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + + timeout * get_ticks_per_sec()); + break; + case WDT_DIAG288_CANCEL: + if (!diag288->enabled) { + return -1; + } + diag288->enabled = false; + timer_del(diag288->timer); + break; + default: + return -1; + } + + return 0; +} + +static void wdt_diag288_realize(DeviceState *dev, Error **errp) +{ + DIAG288State *diag288 = DIAG288(dev); + + diag288->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, diag288_timer_expired, + dev); +} + +static void wdt_diag288_unrealize(DeviceState *dev, Error **errp) +{ + DIAG288State *diag288 = DIAG288(dev); + + timer_del(diag288->timer); + timer_free(diag288->timer); +} + +static void wdt_diag288_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + DIAG288Class *diag288 = DIAG288_CLASS(klass); + + dc->realize = wdt_diag288_realize; + dc->unrealize = wdt_diag288_unrealize; + dc->reset = wdt_diag288_reset; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + dc->vmsd = &vmstate_diag288; + diag288->handle_timer = wdt_diag288_handle_timer; +} + +static const TypeInfo wdt_diag288_info = { + .class_init = wdt_diag288_class_init, + .parent = TYPE_DEVICE, + .name = TYPE_WDT_DIAG288, + .instance_size = sizeof(DIAG288State), + .class_size = sizeof(DIAG288Class), +}; + +static void wdt_diag288_register_types(void) +{ + watchdog_add_model(&model); + type_register_static(&wdt_diag288_info); +} + +type_init(wdt_diag288_register_types) diff --git a/include/block/block.h b/include/block/block.h index f7680b6e68..07bb724f7d 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -12,6 +12,7 @@ /* block.c */ typedef struct BlockDriver BlockDriver; typedef struct BlockJob BlockJob; +typedef struct BdrvChildRole BdrvChildRole; typedef struct BlockDriverInfo { /* in bytes, 0 if irrelevant */ @@ -90,6 +91,14 @@ typedef struct HDGeometry { #define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH) + +/* Option names of options parsed by the block layer */ + +#define BDRV_OPT_CACHE_WB "cache.writeback" +#define BDRV_OPT_CACHE_DIRECT "cache.direct" +#define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush" + + #define BDRV_SECTOR_BITS 9 #define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS) #define BDRV_SECTOR_MASK ~(BDRV_SECTOR_SIZE - 1) @@ -173,8 +182,9 @@ void bdrv_stats_print(Monitor *mon, const QObject *data); void bdrv_info_stats(Monitor *mon, QObject **ret_data); /* disk I/O throttling */ -void bdrv_io_limits_enable(BlockDriverState *bs); +void bdrv_io_limits_enable(BlockDriverState *bs, const char *group); void bdrv_io_limits_disable(BlockDriverState *bs); +void bdrv_io_limits_update_group(BlockDriverState *bs, const char *group); void bdrv_init(void); void bdrv_init_with_whitelist(void); @@ -195,7 +205,8 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top); int bdrv_parse_cache_flags(const char *mode, int *flags); int bdrv_parse_discard_flags(const char *mode, int *flags); int bdrv_open_image(BlockDriverState **pbs, const char *filename, - QDict *options, const char *bdref_key, int flags, + QDict *options, const char *bdref_key, + BlockDriverState* parent, const BdrvChildRole *child_role, bool allow_none, Error **errp); void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd); int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp); diff --git a/include/block/block_int.h b/include/block/block_int.h index f004378d58..888ec09e96 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -330,6 +330,19 @@ typedef struct BdrvAioNotifier { QLIST_ENTRY(BdrvAioNotifier) list; } BdrvAioNotifier; +struct BdrvChildRole { + int (*inherit_flags)(int parent_flags); +}; + +extern const BdrvChildRole child_file; +extern const BdrvChildRole child_format; + +typedef struct BdrvChild { + BlockDriverState *bs; + const BdrvChildRole *role; + QLIST_ENTRY(BdrvChild) next; +} BdrvChild; + /* * Note: the function bdrv_append() copies and swaps contents of * BlockDriverStates, so if you add new fields to this struct, please @@ -379,9 +392,14 @@ struct BlockDriverState { unsigned int serialising_in_flight; /* I/O throttling */ - ThrottleState throttle_state; CoQueue throttled_reqs[2]; bool io_limits_enabled; + /* The following fields are protected by the ThrottleGroup lock. + * See the ThrottleGroup documentation for details. */ + ThrottleState *throttle_state; + ThrottleTimers throttle_timers; + unsigned pending_reqs[2]; + QLIST_ENTRY(BlockDriverState) round_robin; /* I/O stats (display with "info blockstats"). */ BlockAcctStats stats; @@ -424,6 +442,12 @@ struct BlockDriverState { /* long-running background operation */ BlockJob *job; + /* The node that this node inherited default options from (and a reopen on + * which can affect this node by changing these defaults). This is always a + * parent node of this node. */ + BlockDriverState *inherits_from; + QLIST_HEAD(, BdrvChild) children; + QDict *options; BlockdevDetectZeroesOptions detect_zeroes; diff --git a/include/block/throttle-groups.h b/include/block/throttle-groups.h new file mode 100644 index 0000000000..fab113f6d1 --- /dev/null +++ b/include/block/throttle-groups.h @@ -0,0 +1,46 @@ +/* + * QEMU block throttling group infrastructure + * + * Copyright (C) Nodalink, EURL. 2014 + * Copyright (C) Igalia, S.L. 2015 + * + * Authors: + * Benoît Canet <benoit.canet@nodalink.com> + * Alberto Garcia <berto@igalia.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef THROTTLE_GROUPS_H +#define THROTTLE_GROUPS_H + +#include "qemu/throttle.h" +#include "block/block_int.h" + +const char *throttle_group_get_name(BlockDriverState *bs); + +void throttle_group_config(BlockDriverState *bs, ThrottleConfig *cfg); +void throttle_group_get_config(BlockDriverState *bs, ThrottleConfig *cfg); + +void throttle_group_register_bs(BlockDriverState *bs, const char *groupname); +void throttle_group_unregister_bs(BlockDriverState *bs); + +void coroutine_fn throttle_group_co_io_limits_intercept(BlockDriverState *bs, + unsigned int bytes, + bool is_write); + +void throttle_group_lock(BlockDriverState *bs); +void throttle_group_unlock(BlockDriverState *bs); + +#endif diff --git a/include/hw/nmi.h b/include/hw/nmi.h index b541772e1d..f4cec6257d 100644 --- a/include/hw/nmi.h +++ b/include/hw/nmi.h @@ -45,5 +45,6 @@ typedef struct NMIClass { } NMIClass; void nmi_monitor_handle(int cpu_index, Error **errp); +void inject_nmi(void); #endif /* NMI_H */ diff --git a/include/hw/watchdog/wdt_diag288.h b/include/hw/watchdog/wdt_diag288.h new file mode 100644 index 0000000000..7f3fd450dc --- /dev/null +++ b/include/hw/watchdog/wdt_diag288.h @@ -0,0 +1,36 @@ +#ifndef WDT_DIAG288_H +#define WDT_DIAG288_H + +#include "hw/qdev.h" + +#define TYPE_WDT_DIAG288 "diag288" +#define DIAG288(obj) \ + OBJECT_CHECK(DIAG288State, (obj), TYPE_WDT_DIAG288) +#define DIAG288_CLASS(klass) \ + OBJECT_CLASS_CHECK(DIAG288Class, (klass), TYPE_WDT_DIAG288) +#define DIAG288_GET_CLASS(obj) \ + OBJECT_GET_CLASS(DIAG288Class, (obj), TYPE_WDT_DIAG288) + +#define WDT_DIAG288_INIT 0 +#define WDT_DIAG288_CHANGE 1 +#define WDT_DIAG288_CANCEL 2 + +typedef struct DIAG288State { + /*< private >*/ + DeviceState parent_obj; + QEMUTimer *timer; + bool enabled; + + /*< public >*/ +} DIAG288State; + +typedef struct DIAG288Class { + /*< private >*/ + DeviceClass parent_class; + + /*< public >*/ + int (*handle_timer)(DIAG288State *dev, + uint64_t func, uint64_t timeout); +} DIAG288Class; + +#endif /* WDT_DIAG288_H */ diff --git a/include/qapi/qmp/qdict.h b/include/qapi/qmp/qdict.h index d68f4eb4d5..9fbf68ee0c 100644 --- a/include/qapi/qmp/qdict.h +++ b/include/qapi/qmp/qdict.h @@ -65,11 +65,15 @@ int64_t qdict_get_try_int(const QDict *qdict, const char *key, int qdict_get_try_bool(const QDict *qdict, const char *key, int def_value); const char *qdict_get_try_str(const QDict *qdict, const char *key); +void qdict_copy_default(QDict *dst, QDict *src, const char *key); +void qdict_set_default_str(QDict *dst, const char *key, const char *val); + QDict *qdict_clone_shallow(const QDict *src); void qdict_flatten(QDict *qdict); void qdict_extract_subqdict(QDict *src, QDict **dst, const char *start); void qdict_array_split(QDict *src, QList **dst); +int qdict_array_entries(QDict *src, const char *subqdict); void qdict_join(QDict *dest, QDict *src, bool overwrite); diff --git a/include/qemu/queue.h b/include/qemu/queue.h index f781aa20a8..a8d3cb8e63 100644 --- a/include/qemu/queue.h +++ b/include/qemu/queue.h @@ -117,6 +117,12 @@ struct { \ } \ } while (/*CONSTCOND*/0) +#define QLIST_FIX_HEAD_PTR(head, field) do { \ + if ((head)->lh_first != NULL) { \ + (head)->lh_first->field.le_prev = &(head)->lh_first; \ + } \ +} while (/*CONSTCOND*/0) + #define QLIST_INSERT_AFTER(listelm, elm, field) do { \ if (((elm)->field.le_next = (listelm)->field.le_next) != NULL) \ (listelm)->field.le_next->field.le_prev = \ diff --git a/include/qemu/throttle.h b/include/qemu/throttle.h index b890613a9c..5af76f0ff4 100644 --- a/include/qemu/throttle.h +++ b/include/qemu/throttle.h @@ -1,10 +1,12 @@ /* * QEMU throttling infrastructure * - * Copyright (C) Nodalink, SARL. 2013 + * Copyright (C) Nodalink, EURL. 2013-2014 + * Copyright (C) Igalia, S.L. 2015 * - * Author: - * Benoît Canet <benoit.canet@irqsave.net> + * Authors: + * Benoît Canet <benoit.canet@nodalink.com> + * Alberto Garcia <berto@igalia.com> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -65,14 +67,17 @@ typedef struct ThrottleConfig { typedef struct ThrottleState { ThrottleConfig cfg; /* configuration */ int64_t previous_leak; /* timestamp of the last leak done */ - QEMUTimer * timers[2]; /* timers used to do the throttling */ +} ThrottleState; + +typedef struct ThrottleTimers { + QEMUTimer *timers[2]; /* timers used to do the throttling */ QEMUClockType clock_type; /* the clock used */ /* Callbacks */ QEMUTimerCB *read_timer_cb; QEMUTimerCB *write_timer_cb; void *timer_opaque; -} ThrottleState; +} ThrottleTimers; /* operations on single leaky buckets */ void throttle_leak_bucket(LeakyBucket *bkt, int64_t delta); @@ -86,20 +91,23 @@ bool throttle_compute_timer(ThrottleState *ts, int64_t *next_timestamp); /* init/destroy cycle */ -void throttle_init(ThrottleState *ts, - AioContext *aio_context, - QEMUClockType clock_type, - void (read_timer)(void *), - void (write_timer)(void *), - void *timer_opaque); +void throttle_init(ThrottleState *ts); + +void throttle_timers_init(ThrottleTimers *tt, + AioContext *aio_context, + QEMUClockType clock_type, + QEMUTimerCB *read_timer_cb, + QEMUTimerCB *write_timer_cb, + void *timer_opaque); -void throttle_destroy(ThrottleState *ts); +void throttle_timers_destroy(ThrottleTimers *tt); -void throttle_detach_aio_context(ThrottleState *ts); +void throttle_timers_detach_aio_context(ThrottleTimers *tt); -void throttle_attach_aio_context(ThrottleState *ts, AioContext *new_context); +void throttle_timers_attach_aio_context(ThrottleTimers *tt, + AioContext *new_context); -bool throttle_have_timer(ThrottleState *ts); +bool throttle_timers_are_initialized(ThrottleTimers *tt); /* configuration */ bool throttle_enabled(ThrottleConfig *cfg); @@ -108,12 +116,16 @@ bool throttle_conflicting(ThrottleConfig *cfg); bool throttle_is_valid(ThrottleConfig *cfg); -void throttle_config(ThrottleState *ts, ThrottleConfig *cfg); +void throttle_config(ThrottleState *ts, + ThrottleTimers *tt, + ThrottleConfig *cfg); void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg); /* usage */ -bool throttle_schedule_timer(ThrottleState *ts, bool is_write); +bool throttle_schedule_timer(ThrottleState *ts, + ThrottleTimers *tt, + bool is_write); void throttle_account(ThrottleState *ts, bool is_write, uint64_t size); diff --git a/iothread.c b/iothread.c index 0416fc4268..878a594ef4 100644 --- a/iothread.c +++ b/iothread.c @@ -31,14 +31,21 @@ typedef ObjectClass IOThreadClass; static void *iothread_run(void *opaque) { IOThread *iothread = opaque; + bool blocking; qemu_mutex_lock(&iothread->init_done_lock); iothread->thread_id = qemu_get_thread_id(); qemu_cond_signal(&iothread->init_done_cond); qemu_mutex_unlock(&iothread->init_done_lock); - while (!atomic_read(&iothread->stopping)) { - aio_poll(iothread->ctx, true); + while (!iothread->stopping) { + aio_context_acquire(iothread->ctx); + blocking = true; + while (!iothread->stopping && aio_poll(iothread->ctx, blocking)) { + /* Progress was made, keep going */ + blocking = false; + } + aio_context_release(iothread->ctx); } return NULL; } diff --git a/pc-bios/s390-ccw/Makefile b/pc-bios/s390-ccw/Makefile index 009bb8de1c..746603a315 100644 --- a/pc-bios/s390-ccw/Makefile +++ b/pc-bios/s390-ccw/Makefile @@ -10,7 +10,7 @@ $(call set-vpath, $(SRC_PATH)/pc-bios/s390-ccw) .PHONY : all clean build-all OBJECTS = start.o main.o bootmap.o sclp-ascii.o virtio.o -CFLAGS += -fPIE -fno-stack-protector -ffreestanding +CFLAGS += -fPIE -fno-stack-protector -ffreestanding -fno-delete-null-pointer-checks LDFLAGS += -Wl,-pie -nostdlib build-all: s390-ccw.img diff --git a/qapi-schema.json b/qapi-schema.json index bcc604b813..106008cdeb 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -3746,10 +3746,14 @@ # # @none: nothing is done # +# @inject-nmi: a non-maskable interrupt is injected into the first VCPU (all +# VCPUS on x86) (since 2.4) +# # Since: 2.1 ## { 'enum': 'WatchdogExpirationAction', - 'data': [ 'reset', 'shutdown', 'poweroff', 'pause', 'debug', 'none' ] } + 'data': [ 'reset', 'shutdown', 'poweroff', 'pause', 'debug', 'none', + 'inject-nmi' ] } ## # @IoOperationType diff --git a/qapi/block-core.json b/qapi/block-core.json index 8411d4f83a..afa9d3d1f3 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -259,6 +259,8 @@ # # @iops_size: #optional an I/O size in bytes (Since 1.7) # +# @group: #optional throttle group name (Since 2.4) +# # @cache: the cache mode used for the block device (since: 2.3) # # @write_threshold: configured write threshold for the device. @@ -278,7 +280,7 @@ '*bps_max': 'int', '*bps_rd_max': 'int', '*bps_wr_max': 'int', '*iops_max': 'int', '*iops_rd_max': 'int', '*iops_wr_max': 'int', - '*iops_size': 'int', 'cache': 'BlockdevCacheInfo', + '*iops_size': 'int', '*group': 'str', 'cache': 'BlockdevCacheInfo', 'write_threshold': 'int' } } ## @@ -1062,6 +1064,27 @@ # # Change I/O throttle limits for a block drive. # +# Since QEMU 2.4, each device with I/O limits is member of a throttle +# group. +# +# If two or more devices are members of the same group, the limits +# will apply to the combined I/O of the whole group in a round-robin +# fashion. Therefore, setting new I/O limits to a device will affect +# the whole group. +# +# The name of the group can be specified using the 'group' parameter. +# If the parameter is unset, it is assumed to be the current group of +# that device. If it's not in any group yet, the name of the device +# will be used as the name for its group. +# +# The 'group' parameter can also be used to move a device to a +# different group. In this case the limits specified in the parameters +# will be applied to the new group only. +# +# I/O limits can be disabled by setting all of them to 0. In this case +# the device will be removed from its group and the rest of its +# members will no be affected. The 'group' parameter is ignored. +# # @device: The name of the device # # @bps: total throughput limit in bytes per second @@ -1090,6 +1113,8 @@ # # @iops_size: #optional an I/O size in bytes (Since 1.7) # +# @group: #optional throttle group name (Since 2.4) +# # Returns: Nothing on success # If @device is not a valid block device, DeviceNotFound # @@ -1101,7 +1126,7 @@ '*bps_max': 'int', '*bps_rd_max': 'int', '*bps_wr_max': 'int', '*iops_max': 'int', '*iops_rd_max': 'int', '*iops_wr_max': 'int', - '*iops_size': 'int' } } + '*iops_size': 'int', '*group': 'str' } } ## # @block-stream: diff --git a/qemu-options.hx b/qemu-options.hx index 1d281f6818..bf1683e5b4 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -468,6 +468,7 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive, " [[,bps_max=bm]|[[,bps_rd_max=rm][,bps_wr_max=wm]]]\n" " [[,iops_max=im]|[[,iops_rd_max=irm][,iops_wr_max=iwm]]]\n" " [[,iops_size=is]]\n" + " [[,group=g]]\n" " use 'file' as a drive image\n", QEMU_ARCH_ALL) STEXI @item -drive @var{option}[,@var{option}[,@var{option}[,...]]] @@ -3152,7 +3153,7 @@ when the shift value is high (how high depends on the host machine). ETEXI DEF("watchdog", HAS_ARG, QEMU_OPTION_watchdog, \ - "-watchdog i6300esb|ib700\n" \ + "-watchdog model\n" \ " enable virtual hardware watchdog [default=none]\n", QEMU_ARCH_ALL) STEXI @@ -3160,16 +3161,24 @@ STEXI @findex -watchdog Create a virtual hardware watchdog device. Once enabled (by a guest action), the watchdog must be periodically polled by an agent inside -the guest or else the guest will be restarted. +the guest or else the guest will be restarted. Choose a model for +which your guest has drivers. -The @var{model} is the model of hardware watchdog to emulate. Choices -for model are: @code{ib700} (iBASE 700) which is a very simple ISA -watchdog with a single timer, or @code{i6300esb} (Intel 6300ESB I/O -controller hub) which is a much more featureful PCI-based dual-timer -watchdog. Choose a model for which your guest has drivers. - -Use @code{-watchdog help} to list available hardware models. Only one +The @var{model} is the model of hardware watchdog to emulate. Use +@code{-watchdog help} to list available hardware models. Only one watchdog can be enabled for a guest. + +The following models may be available: +@table @option +@item ib700 +iBASE 700 is a very simple ISA watchdog with a single timer. +@item i6300esb +Intel 6300ESB I/O controller hub is a much more featureful PCI-based +dual-timer watchdog. +@item diag288 +A virtual watchdog for s390x backed by the diagnose 288 hypercall +(currently KVM only). +@end table ETEXI DEF("watchdog-action", HAS_ARG, QEMU_OPTION_watchdog_action, \ diff --git a/qmp-commands.hx b/qmp-commands.hx index c97d0d7667..1db6524e97 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -1853,7 +1853,7 @@ EQMP { .name = "block_set_io_throttle", - .args_type = "device:B,bps:l,bps_rd:l,bps_wr:l,iops:l,iops_rd:l,iops_wr:l,bps_max:l?,bps_rd_max:l?,bps_wr_max:l?,iops_max:l?,iops_rd_max:l?,iops_wr_max:l?,iops_size:l?", + .args_type = "device:B,bps:l,bps_rd:l,bps_wr:l,iops:l,iops_rd:l,iops_wr:l,bps_max:l?,bps_rd_max:l?,bps_wr_max:l?,iops_max:l?,iops_rd_max:l?,iops_wr_max:l?,iops_size:l?,group:s?", .mhandler.cmd_new = qmp_marshal_input_block_set_io_throttle, }, @@ -1879,6 +1879,7 @@ Arguments: - "iops_rd_max": read I/O operations max (json-int) - "iops_wr_max": write I/O operations max (json-int) - "iops_size": I/O size in bytes when limiting (json-int) +- "group": throttle group name (json-string) Example: diff --git a/qobject/qdict.c b/qobject/qdict.c index ea239f082e..190791becf 100644 --- a/qobject/qdict.c +++ b/qobject/qdict.c @@ -477,6 +477,39 @@ static void qdict_destroy_obj(QObject *obj) g_free(qdict); } +/** + * qdict_copy_default(): If no entry mapped by 'key' exists in 'dst' yet, the + * value of 'key' in 'src' is copied there (and the refcount increased + * accordingly). + */ +void qdict_copy_default(QDict *dst, QDict *src, const char *key) +{ + QObject *val; + + if (qdict_haskey(dst, key)) { + return; + } + + val = qdict_get(src, key); + if (val) { + qobject_incref(val); + qdict_put_obj(dst, key, val); + } +} + +/** + * qdict_set_default_str(): If no entry mapped by 'key' exists in 'dst' yet, a + * new QString initialised by 'val' is put there. + */ +void qdict_set_default_str(QDict *dst, const char *key, const char *val) +{ + if (qdict_haskey(dst, key)) { + return; + } + + qdict_put(dst, key, qstring_from_str(val)); +} + static void qdict_flatten_qdict(QDict *qdict, QDict *target, const char *prefix); @@ -597,17 +630,21 @@ void qdict_extract_subqdict(QDict *src, QDict **dst, const char *start) } } -static bool qdict_has_prefixed_entries(const QDict *src, const char *start) +static int qdict_count_prefixed_entries(const QDict *src, const char *start) { const QDictEntry *entry; + int count = 0; for (entry = qdict_first(src); entry; entry = qdict_next(src, entry)) { if (strstart(entry->key, start, NULL)) { - return true; + if (count == INT_MAX) { + return -ERANGE; + } + count++; } } - return false; + return count; } /** @@ -646,7 +683,8 @@ void qdict_array_split(QDict *src, QList **dst) snprintf_ret = snprintf(prefix, 32, "%u.", i); assert(snprintf_ret < 32); - is_subqdict = qdict_has_prefixed_entries(src, prefix); + /* Overflow is the same as positive non-zero results */ + is_subqdict = qdict_count_prefixed_entries(src, prefix); // There may be either a single subordinate object (named "%u") or // multiple objects (each with a key prefixed "%u."), but not both. @@ -667,6 +705,71 @@ void qdict_array_split(QDict *src, QList **dst) } /** + * qdict_array_entries(): Returns the number of direct array entries if the + * sub-QDict of src specified by the prefix in subqdict (or src itself for + * prefix == "") is valid as an array, i.e. the length of the created list if + * the sub-QDict would become empty after calling qdict_array_split() on it. If + * the array is not valid, -EINVAL is returned. + */ +int qdict_array_entries(QDict *src, const char *subqdict) +{ + const QDictEntry *entry; + unsigned i; + unsigned entries = 0; + size_t subqdict_len = strlen(subqdict); + + assert(!subqdict_len || subqdict[subqdict_len - 1] == '.'); + + /* qdict_array_split() loops until UINT_MAX, but as we want to return + * negative errors, we only have a signed return value here. Any additional + * entries will lead to -EINVAL. */ + for (i = 0; i < INT_MAX; i++) { + QObject *subqobj; + int subqdict_entries; + size_t slen = 32 + subqdict_len; + char indexstr[slen], prefix[slen]; + size_t snprintf_ret; + + snprintf_ret = snprintf(indexstr, slen, "%s%u", subqdict, i); + assert(snprintf_ret < slen); + + subqobj = qdict_get(src, indexstr); + + snprintf_ret = snprintf(prefix, slen, "%s%u.", subqdict, i); + assert(snprintf_ret < slen); + + subqdict_entries = qdict_count_prefixed_entries(src, prefix); + if (subqdict_entries < 0) { + return subqdict_entries; + } + + /* There may be either a single subordinate object (named "%u") or + * multiple objects (each with a key prefixed "%u."), but not both. */ + if (subqobj && subqdict_entries) { + return -EINVAL; + } else if (!subqobj && !subqdict_entries) { + break; + } + + entries += subqdict_entries ? subqdict_entries : 1; + } + + /* Consider everything handled that isn't part of the given sub-QDict */ + for (entry = qdict_first(src); entry; entry = qdict_next(src, entry)) { + if (!strstart(qdict_entry_key(entry), subqdict, NULL)) { + entries++; + } + } + + /* Anything left in the sub-QDict that wasn't handled? */ + if (qdict_size(src) != entries) { + return -EINVAL; + } + + return i; +} + +/** * qdict_join(): Absorb the src QDict into the dest QDict, that is, move all * elements from src to dest. * diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h index 584e74b89a..d63eb51186 100644 --- a/target-s390x/cpu.h +++ b/target-s390x/cpu.h @@ -1100,6 +1100,7 @@ uint32_t set_cc_nz_f128(float128 v); /* misc_helper.c */ #ifndef CONFIG_USER_ONLY +int handle_diag_288(CPUS390XState *env, uint64_t r1, uint64_t r3); void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3); #endif void program_interrupt(CPUS390XState *env, uint32_t code, int ilen); diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c index f6f61b9619..b02ff8d61d 100644 --- a/target-s390x/kvm.c +++ b/target-s390x/kvm.c @@ -98,6 +98,7 @@ #define PRIV_E3_MPCIFC 0xd0 #define PRIV_E3_STPCIFC 0xd4 +#define DIAG_TIMEREVENT 0x288 #define DIAG_IPL 0x308 #define DIAG_KVM_HYPERCALL 0x500 #define DIAG_KVM_BREAKPOINT 0x501 @@ -1267,6 +1268,20 @@ static int handle_hypercall(S390CPU *cpu, struct kvm_run *run) return ret; } +static void kvm_handle_diag_288(S390CPU *cpu, struct kvm_run *run) +{ + uint64_t r1, r3; + int rc; + + cpu_synchronize_state(CPU(cpu)); + r1 = (run->s390_sieic.ipa & 0x00f0) >> 4; + r3 = run->s390_sieic.ipa & 0x000f; + rc = handle_diag_288(&cpu->env, r1, r3); + if (rc) { + enter_pgmcheck(cpu, PGM_SPECIFICATION); + } +} + static void kvm_handle_diag_308(S390CPU *cpu, struct kvm_run *run) { uint64_t r1, r3; @@ -1306,6 +1321,9 @@ static int handle_diag(S390CPU *cpu, struct kvm_run *run, uint32_t ipb) */ func_code = decode_basedisp_rs(&cpu->env, ipb, NULL) & DIAG_KVM_CODE_MASK; switch (func_code) { + case DIAG_TIMEREVENT: + kvm_handle_diag_288(cpu, run); + break; case DIAG_IPL: kvm_handle_diag_308(cpu, run); break; diff --git a/target-s390x/misc_helper.c b/target-s390x/misc_helper.c index b375ab724b..6711504221 100644 --- a/target-s390x/misc_helper.c +++ b/target-s390x/misc_helper.c @@ -30,6 +30,7 @@ #include <linux/kvm.h> #endif #include "exec/cpu_ldst.h" +#include "hw/watchdog/wdt_diag288.h" #if !defined(CONFIG_USER_ONLY) #include "sysemu/cpus.h" @@ -153,6 +154,34 @@ static int load_normal_reset(S390CPU *cpu) return 0; } +int handle_diag_288(CPUS390XState *env, uint64_t r1, uint64_t r3) +{ + uint64_t func = env->regs[r1]; + uint64_t timeout = env->regs[r1 + 1]; + uint64_t action = env->regs[r3]; + Object *obj; + DIAG288State *diag288; + DIAG288Class *diag288_class; + + if (r1 % 2 || action != 0) { + return -1; + } + + /* Timeout must be more than 15 seconds except for timer deletion */ + if (func != WDT_DIAG288_CANCEL && timeout < 15) { + return -1; + } + + obj = object_resolve_path_type("", TYPE_WDT_DIAG288, NULL); + if (!obj) { + return -1; + } + + diag288 = DIAG288(obj); + diag288_class = DIAG288_GET_CLASS(diag288); + return diag288_class->handle_timer(diag288, func, timeout); +} + #define DIAG_308_RC_OK 0x0001 #define DIAG_308_RC_NO_CONF 0x0102 #define DIAG_308_RC_INVALID 0x0402 diff --git a/tests/check-qdict.c b/tests/check-qdict.c index a9296f0833..a136f2addf 100644 --- a/tests/check-qdict.c +++ b/tests/check-qdict.c @@ -152,6 +152,28 @@ static void qdict_get_try_str_test(void) QDECREF(tests_dict); } +static void qdict_defaults_test(void) +{ + QDict *dict, *copy; + + dict = qdict_new(); + copy = qdict_new(); + + qdict_set_default_str(dict, "foo", "abc"); + qdict_set_default_str(dict, "foo", "def"); + g_assert_cmpstr(qdict_get_str(dict, "foo"), ==, "abc"); + qdict_set_default_str(dict, "bar", "ghi"); + + qdict_copy_default(copy, dict, "foo"); + g_assert_cmpstr(qdict_get_str(copy, "foo"), ==, "abc"); + qdict_set_default_str(copy, "bar", "xyz"); + qdict_copy_default(copy, dict, "bar"); + g_assert_cmpstr(qdict_get_str(copy, "bar"), ==, "xyz"); + + QDECREF(copy); + QDECREF(dict); +} + static void qdict_haskey_not_test(void) { QDict *tests_dict = qdict_new(); @@ -444,6 +466,49 @@ static void qdict_array_split_test(void) QDECREF(test_dict); } +static void qdict_array_entries_test(void) +{ + QDict *dict = qdict_new(); + + g_assert_cmpint(qdict_array_entries(dict, "foo."), ==, 0); + + qdict_put(dict, "bar", qint_from_int(0)); + qdict_put(dict, "baz.0", qint_from_int(0)); + g_assert_cmpint(qdict_array_entries(dict, "foo."), ==, 0); + + qdict_put(dict, "foo.1", qint_from_int(0)); + g_assert_cmpint(qdict_array_entries(dict, "foo."), ==, -EINVAL); + qdict_put(dict, "foo.0", qint_from_int(0)); + g_assert_cmpint(qdict_array_entries(dict, "foo."), ==, 2); + qdict_put(dict, "foo.bar", qint_from_int(0)); + g_assert_cmpint(qdict_array_entries(dict, "foo."), ==, -EINVAL); + qdict_del(dict, "foo.bar"); + + qdict_put(dict, "foo.2.a", qint_from_int(0)); + qdict_put(dict, "foo.2.b", qint_from_int(0)); + qdict_put(dict, "foo.2.c", qint_from_int(0)); + g_assert_cmpint(qdict_array_entries(dict, "foo."), ==, 3); + g_assert_cmpint(qdict_array_entries(dict, ""), ==, -EINVAL); + + QDECREF(dict); + + dict = qdict_new(); + qdict_put(dict, "1", qint_from_int(0)); + g_assert_cmpint(qdict_array_entries(dict, ""), ==, -EINVAL); + qdict_put(dict, "0", qint_from_int(0)); + g_assert_cmpint(qdict_array_entries(dict, ""), ==, 2); + qdict_put(dict, "bar", qint_from_int(0)); + g_assert_cmpint(qdict_array_entries(dict, ""), ==, -EINVAL); + qdict_del(dict, "bar"); + + qdict_put(dict, "2.a", qint_from_int(0)); + qdict_put(dict, "2.b", qint_from_int(0)); + qdict_put(dict, "2.c", qint_from_int(0)); + g_assert_cmpint(qdict_array_entries(dict, ""), ==, 3); + + QDECREF(dict); +} + static void qdict_join_test(void) { QDict *dict1, *dict2; @@ -663,6 +728,7 @@ int main(int argc, char **argv) g_test_add_func("/public/get_try_int", qdict_get_try_int_test); g_test_add_func("/public/get_str", qdict_get_str_test); g_test_add_func("/public/get_try_str", qdict_get_try_str_test); + g_test_add_func("/public/defaults", qdict_defaults_test); g_test_add_func("/public/haskey_not", qdict_haskey_not_test); g_test_add_func("/public/haskey", qdict_haskey_test); g_test_add_func("/public/del", qdict_del_test); @@ -670,6 +736,7 @@ int main(int argc, char **argv) g_test_add_func("/public/iterapi", qdict_iterapi_test); g_test_add_func("/public/flatten", qdict_flatten_test); g_test_add_func("/public/array_split", qdict_array_split_test); + g_test_add_func("/public/array_entries", qdict_array_entries_test); g_test_add_func("/public/join", qdict_join_test); g_test_add_func("/errors/put_exists", qdict_put_exists_test); diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 index 0360f37e5a..4a8055b673 100755 --- a/tests/qemu-iotests/051 +++ b/tests/qemu-iotests/051 @@ -194,7 +194,6 @@ echo === Specifying the protocol layer === echo run_qemu -drive file="$TEST_IMG",file.driver=file -run_qemu -drive file="$TEST_IMG",file.driver=qcow2 echo echo === Leaving out required options === diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out index 2890eac084..652dd63bf8 100644 --- a/tests/qemu-iotests/051.out +++ b/tests/qemu-iotests/051.out @@ -253,9 +253,6 @@ Testing: -drive file=TEST_DIR/t.qcow2,file.driver=file QEMU X.Y.Z monitor - type 'help' for more information (qemu) q[K[Dqu[K[D[Dqui[K[D[D[Dquit[K -Testing: -drive file=TEST_DIR/t.qcow2,file.driver=qcow2 -QEMU_PROG: -drive file=TEST_DIR/t.qcow2,file.driver=qcow2: Block format 'qcow2' used by device '' doesn't support the option 'filename' - === Leaving out required options === diff --git a/tests/qemu-iotests/093 b/tests/qemu-iotests/093 index b9096a55d4..c0e9e2b0b5 100755 --- a/tests/qemu-iotests/093 +++ b/tests/qemu-iotests/093 @@ -3,6 +3,7 @@ # Tests for IO throttling # # Copyright (C) 2015 Red Hat, Inc. +# Copyright (C) 2015 Igalia, S.L. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -22,6 +23,7 @@ import iotests class ThrottleTestCase(iotests.QMPTestCase): test_img = "null-aio://" + max_drives = 3 def blockstats(self, device): result = self.vm.qmp("query-blockstats") @@ -32,26 +34,31 @@ class ThrottleTestCase(iotests.QMPTestCase): raise Exception("Device not found for blockstats: %s" % device) def setUp(self): - self.vm = iotests.VM().add_drive(self.test_img) + self.vm = iotests.VM() + for i in range(0, self.max_drives): + self.vm.add_drive(self.test_img) self.vm.launch() def tearDown(self): self.vm.shutdown() - def do_test_throttle(self, seconds, params): + def do_test_throttle(self, ndrives, seconds, params): def check_limit(limit, num): # IO throttling algorithm is discrete, allow 10% error so the test # is more robust return limit == 0 or \ - (num < seconds * limit * 1.1 - and num > seconds * limit * 0.9) + (num < seconds * limit * 1.1 / ndrives + and num > seconds * limit * 0.9 / ndrives) nsec_per_sec = 1000000000 - params['device'] = 'drive0' + params['group'] = 'test' - result = self.vm.qmp("block_set_io_throttle", conv_keys=False, **params) - self.assert_qmp(result, 'return', {}) + # Set the I/O throttling parameters to all drives + for i in range(0, ndrives): + params['device'] = 'drive%d' % i + result = self.vm.qmp("block_set_io_throttle", conv_keys=False, **params) + self.assert_qmp(result, 'return', {}) # Set vm clock to a known value ns = seconds * nsec_per_sec @@ -66,32 +73,60 @@ class ThrottleTestCase(iotests.QMPTestCase): params['iops'] / 2, params['iops_rd']) rd_nr *= seconds * 2 + rd_nr /= ndrives wr_nr = max(params['bps'] / rq_size / 2, params['bps_wr'] / rq_size, params['iops'] / 2, params['iops_wr']) wr_nr *= seconds * 2 + wr_nr /= ndrives + + # Send I/O requests to all drives for i in range(rd_nr): - self.vm.hmp_qemu_io("drive0", "aio_read %d %d" % (i * rq_size, rq_size)) - for i in range(wr_nr): - self.vm.hmp_qemu_io("drive0", "aio_write %d %d" % (i * rq_size, rq_size)) + for drive in range(0, ndrives): + self.vm.hmp_qemu_io("drive%d" % drive, "aio_read %d %d" % + (i * rq_size, rq_size)) - start_rd_bytes, start_rd_iops, start_wr_bytes, start_wr_iops = self.blockstats('drive0') + for i in range(wr_nr): + for drive in range(0, ndrives): + self.vm.hmp_qemu_io("drive%d" % drive, "aio_write %d %d" % + (i * rq_size, rq_size)) + + # We'll store the I/O stats for each drive in these arrays + start_rd_bytes = [0] * ndrives + start_rd_iops = [0] * ndrives + start_wr_bytes = [0] * ndrives + start_wr_iops = [0] * ndrives + end_rd_bytes = [0] * ndrives + end_rd_iops = [0] * ndrives + end_wr_bytes = [0] * ndrives + end_wr_iops = [0] * ndrives + + # Read the stats before advancing the clock + for i in range(0, ndrives): + start_rd_bytes[i], start_rd_iops[i], start_wr_bytes[i], \ + start_wr_iops[i] = self.blockstats('drive%d' % i) self.vm.qtest("clock_step %d" % ns) - end_rd_bytes, end_rd_iops, end_wr_bytes, end_wr_iops = self.blockstats('drive0') - - rd_bytes = end_rd_bytes - start_rd_bytes - rd_iops = end_rd_iops - start_rd_iops - wr_bytes = end_wr_bytes - start_wr_bytes - wr_iops = end_wr_iops - start_wr_iops - self.assertTrue(check_limit(params['bps'], rd_bytes + wr_bytes)) - self.assertTrue(check_limit(params['bps_rd'], rd_bytes)) - self.assertTrue(check_limit(params['bps_wr'], wr_bytes)) - self.assertTrue(check_limit(params['iops'], rd_iops + wr_iops)) - self.assertTrue(check_limit(params['iops_rd'], rd_iops)) - self.assertTrue(check_limit(params['iops_wr'], wr_iops)) + # Read the stats after advancing the clock + for i in range(0, ndrives): + end_rd_bytes[i], end_rd_iops[i], end_wr_bytes[i], \ + end_wr_iops[i] = self.blockstats('drive%d' % i) + + # Check that the I/O is within the limits and evenly distributed + for i in range(0, ndrives): + rd_bytes = end_rd_bytes[i] - start_rd_bytes[i] + rd_iops = end_rd_iops[i] - start_rd_iops[i] + wr_bytes = end_wr_bytes[i] - start_wr_bytes[i] + wr_iops = end_wr_iops[i] - start_wr_iops[i] + + self.assertTrue(check_limit(params['bps'], rd_bytes + wr_bytes)) + self.assertTrue(check_limit(params['bps_rd'], rd_bytes)) + self.assertTrue(check_limit(params['bps_wr'], wr_bytes)) + self.assertTrue(check_limit(params['iops'], rd_iops + wr_iops)) + self.assertTrue(check_limit(params['iops_rd'], rd_iops)) + self.assertTrue(check_limit(params['iops_wr'], wr_iops)) def test_all(self): params = {"bps": 4096, @@ -101,11 +136,13 @@ class ThrottleTestCase(iotests.QMPTestCase): "iops_rd": 10, "iops_wr": 10, } - # Pick each out of all possible params and test - for tk in params: - limits = dict([(k, 0) for k in params]) - limits[tk] = params[tk] - self.do_test_throttle(5, limits) + # Repeat the test with different numbers of drives + for ndrives in range(1, self.max_drives + 1): + # Pick each out of all possible params and test + for tk in params: + limits = dict([(k, 0) for k in params]) + limits[tk] = params[tk] * ndrives + self.do_test_throttle(ndrives, 5, limits) class ThrottleTestCoroutine(ThrottleTestCase): test_img = "null-co://" diff --git a/tests/qemu-iotests/103 b/tests/qemu-iotests/103 index ccab551f63..fa9a3c1fc9 100755 --- a/tests/qemu-iotests/103 +++ b/tests/qemu-iotests/103 @@ -93,6 +93,16 @@ $QEMU_IO -c "open -o l2-cache-size=1M,refcount-cache-size=0.25M $TEST_IMG" \ -c 'read -P 42 0 64k' \ | _filter_qemu_io +echo +echo '=== Testing minimal L2 cache and COW ===' +echo + +$QEMU_IMG snapshot -c foo "$TEST_IMG" +# This requires a COW operation, which accesses two L2 tables simultaneously +# (COW source and destination), so there must be enough space in the cache to +# place both tables there (and qemu should not crash) +$QEMU_IO -c "open -o cache-size=0 $TEST_IMG" -c 'write 0 64k' | _filter_qemu_io + # success, all done echo '*** done' rm -f $seq.full diff --git a/tests/qemu-iotests/103.out b/tests/qemu-iotests/103.out index ee705b05f0..d05f49fdba 100644 --- a/tests/qemu-iotests/103.out +++ b/tests/qemu-iotests/103.out @@ -26,4 +26,9 @@ read 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +=== Testing minimal L2 cache and COW === + +wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) *** done diff --git a/tests/qemu-iotests/119 b/tests/qemu-iotests/119 new file mode 100755 index 0000000000..9a11f1b921 --- /dev/null +++ b/tests/qemu-iotests/119 @@ -0,0 +1,60 @@ +#!/bin/bash +# +# NBD test case for overriding BDRV_O_PROTOCOL by explicitly specifying +# a driver +# +# Copyright (C) 2015 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=mreitz@redhat.com + +seq="$(basename $0)" +echo "QA output created by $seq" + +here="$PWD" +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt raw +_supported_proto nbd +_supported_os Linux + +_make_test_img 64M +# This should not crash +echo "{'execute': 'qmp_capabilities'} + {'execute': 'human-monitor-command', + 'arguments': {'command-line': 'qemu-io drv \"read -P 0 0 64k\"'}} + {'execute': 'quit'}" \ + | $QEMU -drive id=drv,if=none,file="$TEST_IMG",driver=nbd \ + -qmp stdio -nodefaults \ + | _filter_qmp | _filter_qemu_io + +# success, all done +echo +echo '*** done' +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/119.out b/tests/qemu-iotests/119.out new file mode 100644 index 0000000000..58e7114e8b --- /dev/null +++ b/tests/qemu-iotests/119.out @@ -0,0 +1,11 @@ +QA output created by 119 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +QMP_VERSION +{"return": {}} +read 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +{"return": ""} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} + +*** done diff --git a/tests/qemu-iotests/120 b/tests/qemu-iotests/120 new file mode 100755 index 0000000000..9f13078764 --- /dev/null +++ b/tests/qemu-iotests/120 @@ -0,0 +1,65 @@ +#!/bin/bash +# +# Non-NBD test cases for overriding BDRV_O_PROTOCOL by explicitly +# specifying a driver +# +# Copyright (C) 2015 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=mreitz@redhat.com + +seq="$(basename $0)" +echo "QA output created by $seq" + +here="$PWD" +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt generic +_supported_proto file +_supported_os Linux + +_make_test_img 64M + +echo "{'execute': 'qmp_capabilities'} + {'execute': 'human-monitor-command', + 'arguments': {'command-line': 'qemu-io drv \"write -P 42 0 64k\"'}} + {'execute': 'quit'}" \ + | $QEMU -qmp stdio -nodefaults \ + -drive id=drv,if=none,file="$TEST_IMG",driver=raw,file.driver=$IMGFMT \ + | _filter_qmp | _filter_qemu_io +$QEMU_IO -c 'read -P 42 0 64k' "$TEST_IMG" | _filter_qemu_io + +$QEMU_IO_PROG -c 'read -P 42 0 64k' \ + "json:{'driver': 'raw', 'file': {'driver': '$IMGFMT', 'file': {'filename': '$TEST_IMG'}}}" \ + | _filter_qemu_io + +# success, all done +echo +echo '*** done' +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/120.out b/tests/qemu-iotests/120.out new file mode 100644 index 0000000000..9131b1bce9 --- /dev/null +++ b/tests/qemu-iotests/120.out @@ -0,0 +1,15 @@ +QA output created by 120 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +QMP_VERSION +{"return": {}} +wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +{"return": ""} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +read 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +*** done diff --git a/tests/qemu-iotests/124 b/tests/qemu-iotests/124 index 3ee78cd1f1..8abce2f869 100644 --- a/tests/qemu-iotests/124 +++ b/tests/qemu-iotests/124 @@ -125,7 +125,7 @@ class TestIncrementalBackup(iotests.QMPTestCase): event = self.vm.event_wait(name="BLOCK_JOB_COMPLETED", match={'data': {'device': kwargs['device']}}) - self.assertIsNotNone(event) + self.assertNotEqual(event, None) try: failure = self.dictpath(event, 'data/error') diff --git a/tests/qemu-iotests/128 b/tests/qemu-iotests/128 index 249a865581..e2a0f2f890 100755 --- a/tests/qemu-iotests/128 +++ b/tests/qemu-iotests/128 @@ -29,6 +29,7 @@ tmp=/tmp/$$ status=1 # failure is the default! devname="eiodev$$" +sudo="" _setup_eiodev() { @@ -37,6 +38,7 @@ _setup_eiodev() echo "0 $((1024 * 1024 * 1024 / 512)) error" | \ $cmd dmsetup create "$devname" 2>/dev/null if [ "$?" -eq 0 ]; then + sudo="$cmd" return fi done @@ -74,7 +76,7 @@ TEST_IMG="/dev/mapper/$devname" echo echo "== reading from error device ==" # Opening image should succeed but the read operation should fail -$QEMU_IO --format "$IMGFMT" --nocache -c "read 0 65536" "$TEST_IMG" | _filter_qemu_io +$sudo $QEMU_IO --format "$IMGFMT" --nocache -c "read 0 65536" "$TEST_IMG" | _filter_qemu_io # success, all done echo "*** done" diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 0b817ca32d..4597fc11c0 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -121,6 +121,8 @@ 114 rw auto quick 115 rw auto 116 rw auto quick +119 rw auto quick +120 rw auto quick 121 rw auto 122 rw auto 123 rw auto quick diff --git a/tests/test-aio.c b/tests/test-aio.c index 4b0cb45d31..a7cb5c9915 100644 --- a/tests/test-aio.c +++ b/tests/test-aio.c @@ -107,7 +107,6 @@ static void test_notify(void) typedef struct { QemuMutex start_lock; - EventNotifier notifier; bool thread_acquired; } AcquireTestData; @@ -119,8 +118,6 @@ static void *test_acquire_thread(void *opaque) qemu_mutex_lock(&data->start_lock); qemu_mutex_unlock(&data->start_lock); - g_usleep(500000); - event_notifier_set(&data->notifier); aio_context_acquire(ctx); aio_context_release(ctx); @@ -129,19 +126,20 @@ static void *test_acquire_thread(void *opaque) return NULL; } -static void dummy_notifier_read(EventNotifier *n) +static void dummy_notifier_read(EventNotifier *unused) { - event_notifier_test_and_clear(n); + g_assert(false); /* should never be invoked */ } static void test_acquire(void) { QemuThread thread; + EventNotifier notifier; AcquireTestData data; /* Dummy event notifier ensures aio_poll() will block */ - event_notifier_init(&data.notifier, false); - aio_set_event_notifier(ctx, &data.notifier, dummy_notifier_read); + event_notifier_init(¬ifier, false); + aio_set_event_notifier(ctx, ¬ifier, dummy_notifier_read); g_assert(!aio_poll(ctx, false)); /* consume aio_notify() */ qemu_mutex_init(&data.start_lock); @@ -155,13 +153,12 @@ static void test_acquire(void) /* Block in aio_poll(), let other thread kick us and acquire context */ aio_context_acquire(ctx); qemu_mutex_unlock(&data.start_lock); /* let the thread run */ - g_assert(aio_poll(ctx, true)); - g_assert(!data.thread_acquired); + g_assert(!aio_poll(ctx, true)); aio_context_release(ctx); qemu_thread_join(&thread); - aio_set_event_notifier(ctx, &data.notifier, NULL); - event_notifier_cleanup(&data.notifier); + aio_set_event_notifier(ctx, ¬ifier, NULL); + event_notifier_cleanup(¬ifier); g_assert(data.thread_acquired); } diff --git a/tests/test-throttle.c b/tests/test-throttle.c index d8ba415e43..016844546a 100644 --- a/tests/test-throttle.c +++ b/tests/test-throttle.c @@ -1,10 +1,12 @@ /* * Throttle infrastructure tests * - * Copyright Nodalink, SARL. 2013 + * Copyright Nodalink, EURL. 2013-2014 + * Copyright Igalia, S.L. 2015 * * Authors: - * Benoît Canet <benoit.canet@irqsave.net> + * Benoît Canet <benoit.canet@nodalink.com> + * Alberto Garcia <berto@igalia.com> * * This work is licensed under the terms of the GNU LGPL, version 2 or later. * See the COPYING.LIB file in the top-level directory. @@ -15,11 +17,13 @@ #include "block/aio.h" #include "qemu/throttle.h" #include "qemu/error-report.h" +#include "block/throttle-groups.h" static AioContext *ctx; static LeakyBucket bkt; static ThrottleConfig cfg; static ThrottleState ts; +static ThrottleTimers tt; /* useful function */ static bool double_cmp(double x, double y) @@ -103,17 +107,19 @@ static void test_init(void) { int i; - /* fill the structure with crap */ + /* fill the structures with crap */ memset(&ts, 1, sizeof(ts)); + memset(&tt, 1, sizeof(tt)); - /* init the structure */ - throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL, - read_timer_cb, write_timer_cb, &ts); + /* init structures */ + throttle_init(&ts); + throttle_timers_init(&tt, ctx, QEMU_CLOCK_VIRTUAL, + read_timer_cb, write_timer_cb, &ts); /* check initialized fields */ - g_assert(ts.clock_type == QEMU_CLOCK_VIRTUAL); - g_assert(ts.timers[0]); - g_assert(ts.timers[1]); + g_assert(tt.clock_type == QEMU_CLOCK_VIRTUAL); + g_assert(tt.timers[0]); + g_assert(tt.timers[1]); /* check other fields where cleared */ g_assert(!ts.previous_leak); @@ -124,17 +130,18 @@ static void test_init(void) g_assert(!ts.cfg.buckets[i].level); } - throttle_destroy(&ts); + throttle_timers_destroy(&tt); } static void test_destroy(void) { int i; - throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL, - read_timer_cb, write_timer_cb, &ts); - throttle_destroy(&ts); + throttle_init(&ts); + throttle_timers_init(&tt, ctx, QEMU_CLOCK_VIRTUAL, + read_timer_cb, write_timer_cb, &ts); + throttle_timers_destroy(&tt); for (i = 0; i < 2; i++) { - g_assert(!ts.timers[i]); + g_assert(!tt.timers[i]); } } @@ -170,11 +177,12 @@ static void test_config_functions(void) orig_cfg.op_size = 1; - throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL, - read_timer_cb, write_timer_cb, &ts); + throttle_init(&ts); + throttle_timers_init(&tt, ctx, QEMU_CLOCK_VIRTUAL, + read_timer_cb, write_timer_cb, &ts); /* structure reset by throttle_init previous_leak should be null */ g_assert(!ts.previous_leak); - throttle_config(&ts, &orig_cfg); + throttle_config(&ts, &tt, &orig_cfg); /* has previous leak been initialized by throttle_config ? */ g_assert(ts.previous_leak); @@ -182,7 +190,7 @@ static void test_config_functions(void) /* get back the fixed configuration */ throttle_get_config(&ts, &final_cfg); - throttle_destroy(&ts); + throttle_timers_destroy(&tt); g_assert(final_cfg.buckets[THROTTLE_BPS_TOTAL].avg == 153); g_assert(final_cfg.buckets[THROTTLE_BPS_READ].avg == 56); @@ -323,43 +331,47 @@ static void test_is_valid(void) static void test_have_timer(void) { - /* zero the structure */ + /* zero structures */ memset(&ts, 0, sizeof(ts)); + memset(&tt, 0, sizeof(tt)); /* no timer set should return false */ - g_assert(!throttle_have_timer(&ts)); + g_assert(!throttle_timers_are_initialized(&tt)); - /* init the structure */ - throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL, - read_timer_cb, write_timer_cb, &ts); + /* init structures */ + throttle_init(&ts); + throttle_timers_init(&tt, ctx, QEMU_CLOCK_VIRTUAL, + read_timer_cb, write_timer_cb, &ts); /* timer set by init should return true */ - g_assert(throttle_have_timer(&ts)); + g_assert(throttle_timers_are_initialized(&tt)); - throttle_destroy(&ts); + throttle_timers_destroy(&tt); } static void test_detach_attach(void) { - /* zero the structure */ + /* zero structures */ memset(&ts, 0, sizeof(ts)); + memset(&tt, 0, sizeof(tt)); /* init the structure */ - throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL, - read_timer_cb, write_timer_cb, &ts); + throttle_init(&ts); + throttle_timers_init(&tt, ctx, QEMU_CLOCK_VIRTUAL, + read_timer_cb, write_timer_cb, &ts); /* timer set by init should return true */ - g_assert(throttle_have_timer(&ts)); + g_assert(throttle_timers_are_initialized(&tt)); /* timer should no longer exist after detaching */ - throttle_detach_aio_context(&ts); - g_assert(!throttle_have_timer(&ts)); + throttle_timers_detach_aio_context(&tt); + g_assert(!throttle_timers_are_initialized(&tt)); /* timer should exist again after attaching */ - throttle_attach_aio_context(&ts, ctx); - g_assert(throttle_have_timer(&ts)); + throttle_timers_attach_aio_context(&tt, ctx); + g_assert(throttle_timers_are_initialized(&tt)); - throttle_destroy(&ts); + throttle_timers_destroy(&tt); } static bool do_test_accounting(bool is_ops, /* are we testing bps or ops */ @@ -387,9 +399,10 @@ static bool do_test_accounting(bool is_ops, /* are we testing bps or ops */ cfg.op_size = op_size; - throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL, - read_timer_cb, write_timer_cb, &ts); - throttle_config(&ts, &cfg); + throttle_init(&ts); + throttle_timers_init(&tt, ctx, QEMU_CLOCK_VIRTUAL, + read_timer_cb, write_timer_cb, &ts); + throttle_config(&ts, &tt, &cfg); /* account a read */ throttle_account(&ts, false, size); @@ -414,7 +427,7 @@ static bool do_test_accounting(bool is_ops, /* are we testing bps or ops */ return false; } - throttle_destroy(&ts); + throttle_timers_destroy(&tt); return true; } @@ -490,23 +503,80 @@ static void test_accounting(void) (64.0 / 13))); } +static void test_groups(void) +{ + ThrottleConfig cfg1, cfg2; + BlockDriverState *bdrv1, *bdrv2, *bdrv3; + + bdrv1 = bdrv_new(); + bdrv2 = bdrv_new(); + bdrv3 = bdrv_new(); + + g_assert(bdrv1->throttle_state == NULL); + g_assert(bdrv2->throttle_state == NULL); + g_assert(bdrv3->throttle_state == NULL); + + throttle_group_register_bs(bdrv1, "bar"); + throttle_group_register_bs(bdrv2, "foo"); + throttle_group_register_bs(bdrv3, "bar"); + + g_assert(bdrv1->throttle_state != NULL); + g_assert(bdrv2->throttle_state != NULL); + g_assert(bdrv3->throttle_state != NULL); + + g_assert(!strcmp(throttle_group_get_name(bdrv1), "bar")); + g_assert(!strcmp(throttle_group_get_name(bdrv2), "foo")); + g_assert(bdrv1->throttle_state == bdrv3->throttle_state); + + /* Setting the config of a group member affects the whole group */ + memset(&cfg1, 0, sizeof(cfg1)); + cfg1.buckets[THROTTLE_BPS_READ].avg = 500000; + cfg1.buckets[THROTTLE_BPS_WRITE].avg = 285000; + cfg1.buckets[THROTTLE_OPS_READ].avg = 20000; + cfg1.buckets[THROTTLE_OPS_WRITE].avg = 12000; + throttle_group_config(bdrv1, &cfg1); + + throttle_group_get_config(bdrv1, &cfg1); + throttle_group_get_config(bdrv3, &cfg2); + g_assert(!memcmp(&cfg1, &cfg2, sizeof(cfg1))); + + cfg2.buckets[THROTTLE_BPS_READ].avg = 4547; + cfg2.buckets[THROTTLE_BPS_WRITE].avg = 1349; + cfg2.buckets[THROTTLE_OPS_READ].avg = 123; + cfg2.buckets[THROTTLE_OPS_WRITE].avg = 86; + throttle_group_config(bdrv3, &cfg1); + + throttle_group_get_config(bdrv1, &cfg1); + throttle_group_get_config(bdrv3, &cfg2); + g_assert(!memcmp(&cfg1, &cfg2, sizeof(cfg1))); + + throttle_group_unregister_bs(bdrv1); + throttle_group_unregister_bs(bdrv2); + throttle_group_unregister_bs(bdrv3); + + g_assert(bdrv1->throttle_state == NULL); + g_assert(bdrv2->throttle_state == NULL); + g_assert(bdrv3->throttle_state == NULL); +} + int main(int argc, char **argv) { - GSource *src; Error *local_error = NULL; - init_clocks(); + qemu_init_main_loop(&local_error); + ctx = qemu_get_aio_context(); - ctx = aio_context_new(&local_error); if (!ctx) { error_report("Failed to create AIO Context: '%s'", - error_get_pretty(local_error)); - error_free(local_error); + local_error ? error_get_pretty(local_error) : + "Failed to initialize the QEMU main loop"); + if (local_error) { + error_free(local_error); + } exit(1); } - src = aio_get_g_source(ctx); - g_source_attach(src, NULL); - g_source_unref(src); + + bdrv_init(); do {} while (g_main_context_iteration(NULL, false)); @@ -523,6 +593,7 @@ int main(int argc, char **argv) g_test_add_func("/throttle/config/is_valid", test_is_valid); g_test_add_func("/throttle/config_functions", test_config_functions); g_test_add_func("/throttle/accounting", test_accounting); + g_test_add_func("/throttle/groups", test_groups); return g_test_run(); } diff --git a/util/throttle.c b/util/throttle.c index f976ac7de5..706c13111e 100644 --- a/util/throttle.c +++ b/util/throttle.c @@ -1,10 +1,12 @@ /* * QEMU throttling infrastructure * - * Copyright (C) Nodalink, SARL. 2013 + * Copyright (C) Nodalink, EURL. 2013-2014 + * Copyright (C) Igalia, S.L. 2015 * - * Author: - * Benoît Canet <benoit.canet@irqsave.net> + * Authors: + * Benoît Canet <benoit.canet@nodalink.com> + * Alberto Garcia <berto@igalia.com> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -159,29 +161,36 @@ bool throttle_compute_timer(ThrottleState *ts, } /* Add timers to event loop */ -void throttle_attach_aio_context(ThrottleState *ts, AioContext *new_context) +void throttle_timers_attach_aio_context(ThrottleTimers *tt, + AioContext *new_context) { - ts->timers[0] = aio_timer_new(new_context, ts->clock_type, SCALE_NS, - ts->read_timer_cb, ts->timer_opaque); - ts->timers[1] = aio_timer_new(new_context, ts->clock_type, SCALE_NS, - ts->write_timer_cb, ts->timer_opaque); + tt->timers[0] = aio_timer_new(new_context, tt->clock_type, SCALE_NS, + tt->read_timer_cb, tt->timer_opaque); + tt->timers[1] = aio_timer_new(new_context, tt->clock_type, SCALE_NS, + tt->write_timer_cb, tt->timer_opaque); } /* To be called first on the ThrottleState */ -void throttle_init(ThrottleState *ts, - AioContext *aio_context, - QEMUClockType clock_type, - QEMUTimerCB *read_timer_cb, - QEMUTimerCB *write_timer_cb, - void *timer_opaque) +void throttle_init(ThrottleState *ts) { memset(ts, 0, sizeof(ThrottleState)); +} + +/* To be called first on the ThrottleTimers */ +void throttle_timers_init(ThrottleTimers *tt, + AioContext *aio_context, + QEMUClockType clock_type, + QEMUTimerCB *read_timer_cb, + QEMUTimerCB *write_timer_cb, + void *timer_opaque) +{ + memset(tt, 0, sizeof(ThrottleTimers)); - ts->clock_type = clock_type; - ts->read_timer_cb = read_timer_cb; - ts->write_timer_cb = write_timer_cb; - ts->timer_opaque = timer_opaque; - throttle_attach_aio_context(ts, aio_context); + tt->clock_type = clock_type; + tt->read_timer_cb = read_timer_cb; + tt->write_timer_cb = write_timer_cb; + tt->timer_opaque = timer_opaque; + throttle_timers_attach_aio_context(tt, aio_context); } /* destroy a timer */ @@ -195,25 +204,25 @@ static void throttle_timer_destroy(QEMUTimer **timer) } /* Remove timers from event loop */ -void throttle_detach_aio_context(ThrottleState *ts) +void throttle_timers_detach_aio_context(ThrottleTimers *tt) { int i; for (i = 0; i < 2; i++) { - throttle_timer_destroy(&ts->timers[i]); + throttle_timer_destroy(&tt->timers[i]); } } -/* To be called last on the ThrottleState */ -void throttle_destroy(ThrottleState *ts) +/* To be called last on the ThrottleTimers */ +void throttle_timers_destroy(ThrottleTimers *tt) { - throttle_detach_aio_context(ts); + throttle_timers_detach_aio_context(tt); } /* is any throttling timer configured */ -bool throttle_have_timer(ThrottleState *ts) +bool throttle_timers_are_initialized(ThrottleTimers *tt) { - if (ts->timers[0]) { + if (tt->timers[0]) { return true; } @@ -324,9 +333,12 @@ static void throttle_cancel_timer(QEMUTimer *timer) /* Used to configure the throttle * * @ts: the throttle state we are working on + * @tt: the throttle timers we use in this aio context * @cfg: the config to set */ -void throttle_config(ThrottleState *ts, ThrottleConfig *cfg) +void throttle_config(ThrottleState *ts, + ThrottleTimers *tt, + ThrottleConfig *cfg) { int i; @@ -336,10 +348,10 @@ void throttle_config(ThrottleState *ts, ThrottleConfig *cfg) throttle_fix_bucket(&ts->cfg.buckets[i]); } - ts->previous_leak = qemu_clock_get_ns(ts->clock_type); + ts->previous_leak = qemu_clock_get_ns(tt->clock_type); for (i = 0; i < 2; i++) { - throttle_cancel_timer(ts->timers[i]); + throttle_cancel_timer(tt->timers[i]); } } @@ -358,12 +370,15 @@ void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg) * * NOTE: this function is not unit tested due to it's usage of timer_mod * + * @tt: the timers structure * @is_write: the type of operation (read/write) * @ret: true if the timer has been scheduled else false */ -bool throttle_schedule_timer(ThrottleState *ts, bool is_write) +bool throttle_schedule_timer(ThrottleState *ts, + ThrottleTimers *tt, + bool is_write) { - int64_t now = qemu_clock_get_ns(ts->clock_type); + int64_t now = qemu_clock_get_ns(tt->clock_type); int64_t next_timestamp; bool must_wait; @@ -378,12 +393,12 @@ bool throttle_schedule_timer(ThrottleState *ts, bool is_write) } /* request throttled and timer pending -> do nothing */ - if (timer_pending(ts->timers[is_write])) { + if (timer_pending(tt->timers[is_write])) { return true; } /* request throttled and timer not pending -> arm timer */ - timer_mod(ts->timers[is_write], next_timestamp); + timer_mod(tt->timers[is_write], next_timestamp); return true; } |