diff options
Diffstat (limited to 'block')
| -rw-r--r-- | block/Makefile.objs | 1 | ||||
| -rw-r--r-- | block/blkdebug.c | 2 | ||||
| -rw-r--r-- | block/blkverify.c | 4 | ||||
| -rw-r--r-- | block/io.c | 71 | ||||
| -rw-r--r-- | block/qapi.c | 8 | ||||
| -rw-r--r-- | block/qcow2.c | 11 | ||||
| -rw-r--r-- | block/qcow2.h | 5 | ||||
| -rw-r--r-- | block/quorum.c | 51 | ||||
| -rw-r--r-- | block/raw-posix.c | 5 | ||||
| -rw-r--r-- | block/throttle-groups.c | 496 | ||||
| -rw-r--r-- | block/vmdk.c | 84 |
11 files changed, 585 insertions, 153 deletions
diff --git a/block/Makefile.objs b/block/Makefile.objs index 0d8c2a4ab6..c34fd7cdc2 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -10,6 +10,7 @@ block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o block-obj-$(CONFIG_POSIX) += raw-posix.o block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o block-obj-y += null.o mirror.o io.o +block-obj-y += throttle-groups.o block-obj-y += nbd.o nbd-client.o sheepdog.o block-obj-$(CONFIG_LIBISCSI) += iscsi.o diff --git a/block/blkdebug.c b/block/blkdebug.c index 1e92607ef3..bc247f46f5 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -429,7 +429,7 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags, /* Open the backing file */ assert(bs->file == NULL); ret = bdrv_open_image(&bs->file, qemu_opt_get(opts, "x-image"), options, "image", - flags | BDRV_O_PROTOCOL, false, &local_err); + bs, &child_file, false, &local_err); if (ret < 0) { error_propagate(errp, local_err); goto out; diff --git a/block/blkverify.c b/block/blkverify.c index 438dff8bcb..d277e63220 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -125,7 +125,7 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags, /* Open the raw file */ assert(bs->file == NULL); ret = bdrv_open_image(&bs->file, qemu_opt_get(opts, "x-raw"), options, - "raw", flags | BDRV_O_PROTOCOL, false, &local_err); + "raw", bs, &child_file, false, &local_err); if (ret < 0) { error_propagate(errp, local_err); goto fail; @@ -134,7 +134,7 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags, /* Open the test file */ assert(s->test_file == NULL); ret = bdrv_open_image(&s->test_file, qemu_opt_get(opts, "x-image"), options, - "test", flags, false, &local_err); + "test", bs, &child_format, false, &local_err); if (ret < 0) { error_propagate(errp, local_err); s->test_file = NULL; diff --git a/block/io.c b/block/io.c index e394d92626..bb4f78784e 100644 --- a/block/io.c +++ b/block/io.c @@ -23,9 +23,9 @@ */ #include "trace.h" -#include "sysemu/qtest.h" #include "block/blockjob.h" #include "block/block_int.h" +#include "block/throttle-groups.h" #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ @@ -65,7 +65,7 @@ void bdrv_set_io_limits(BlockDriverState *bs, { int i; - throttle_config(&bs->throttle_state, cfg); + throttle_group_config(bs, cfg); for (i = 0; i < 2; i++) { qemu_co_enter_next(&bs->throttled_reqs[i]); @@ -95,72 +95,33 @@ static bool bdrv_start_throttled_reqs(BlockDriverState *bs) void bdrv_io_limits_disable(BlockDriverState *bs) { bs->io_limits_enabled = false; - bdrv_start_throttled_reqs(bs); - - throttle_destroy(&bs->throttle_state); -} - -static void bdrv_throttle_read_timer_cb(void *opaque) -{ - BlockDriverState *bs = opaque; - qemu_co_enter_next(&bs->throttled_reqs[0]); -} - -static void bdrv_throttle_write_timer_cb(void *opaque) -{ - BlockDriverState *bs = opaque; - qemu_co_enter_next(&bs->throttled_reqs[1]); + throttle_group_unregister_bs(bs); } /* should be called before bdrv_set_io_limits if a limit is set */ -void bdrv_io_limits_enable(BlockDriverState *bs) +void bdrv_io_limits_enable(BlockDriverState *bs, const char *group) { - int clock_type = QEMU_CLOCK_REALTIME; - - if (qtest_enabled()) { - /* For testing block IO throttling only */ - clock_type = QEMU_CLOCK_VIRTUAL; - } assert(!bs->io_limits_enabled); - throttle_init(&bs->throttle_state, - bdrv_get_aio_context(bs), - clock_type, - bdrv_throttle_read_timer_cb, - bdrv_throttle_write_timer_cb, - bs); + throttle_group_register_bs(bs, group); bs->io_limits_enabled = true; } -/* This function makes an IO wait if needed - * - * @nb_sectors: the number of sectors of the IO - * @is_write: is the IO a write - */ -static void bdrv_io_limits_intercept(BlockDriverState *bs, - unsigned int bytes, - bool is_write) +void bdrv_io_limits_update_group(BlockDriverState *bs, const char *group) { - /* does this io must wait */ - bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write); - - /* if must wait or any request of this type throttled queue the IO */ - if (must_wait || - !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) { - qemu_co_queue_wait(&bs->throttled_reqs[is_write]); + /* this bs is not part of any group */ + if (!bs->throttle_state) { + return; } - /* the IO will be executed, do the accounting */ - throttle_account(&bs->throttle_state, is_write, bytes); - - - /* if the next request must wait -> do nothing */ - if (throttle_schedule_timer(&bs->throttle_state, is_write)) { + /* this bs is a part of the same group than the one we want */ + if (!g_strcmp0(throttle_group_get_name(bs), group)) { return; } - /* else queue next request for execution */ - qemu_co_queue_next(&bs->throttled_reqs[is_write]); + /* need to change the group this bs belong to */ + bdrv_io_limits_disable(bs); + bdrv_io_limits_enable(bs, group); } void bdrv_setup_io_funcs(BlockDriver *bdrv) @@ -967,7 +928,7 @@ static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, /* throttling disk I/O */ if (bs->io_limits_enabled) { - bdrv_io_limits_intercept(bs, bytes, false); + throttle_group_co_io_limits_intercept(bs, bytes, false); } /* Align read if necessary by padding qiov */ @@ -1297,7 +1258,7 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, /* throttling disk I/O */ if (bs->io_limits_enabled) { - bdrv_io_limits_intercept(bs, bytes, true); + throttle_group_co_io_limits_intercept(bs, bytes, true); } /* diff --git a/block/qapi.c b/block/qapi.c index 18d2b95f54..a738148bce 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -24,6 +24,7 @@ #include "block/qapi.h" #include "block/block_int.h" +#include "block/throttle-groups.h" #include "block/write-threshold.h" #include "qmp-commands.h" #include "qapi-visit.h" @@ -65,7 +66,9 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp) if (bs->io_limits_enabled) { ThrottleConfig cfg; - throttle_get_config(&bs->throttle_state, &cfg); + + throttle_group_get_config(bs, &cfg); + info->bps = cfg.buckets[THROTTLE_BPS_TOTAL].avg; info->bps_rd = cfg.buckets[THROTTLE_BPS_READ].avg; info->bps_wr = cfg.buckets[THROTTLE_BPS_WRITE].avg; @@ -90,6 +93,9 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp) info->has_iops_size = cfg.op_size; info->iops_size = cfg.op_size; + + info->has_group = true; + info->group = g_strdup(throttle_group_get_name(bs)); } info->write_threshold = bdrv_write_threshold_get(bs); diff --git a/block/qcow2.c b/block/qcow2.c index f7b4cc6a32..c4f6938a36 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -483,9 +483,11 @@ static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = { [QCOW2_OL_INACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L2, }; -static void read_cache_sizes(QemuOpts *opts, uint64_t *l2_cache_size, +static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, + uint64_t *l2_cache_size, uint64_t *refcount_cache_size, Error **errp) { + BDRVQcowState *s = bs->opaque; uint64_t combined_cache_size; bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set; @@ -525,7 +527,9 @@ static void read_cache_sizes(QemuOpts *opts, uint64_t *l2_cache_size, } } else { if (!l2_cache_size_set && !refcount_cache_size_set) { - *l2_cache_size = DEFAULT_L2_CACHE_BYTE_SIZE; + *l2_cache_size = MAX(DEFAULT_L2_CACHE_BYTE_SIZE, + (uint64_t)DEFAULT_L2_CACHE_CLUSTERS + * s->cluster_size); *refcount_cache_size = *l2_cache_size / DEFAULT_L2_REFCOUNT_SIZE_RATIO; } else if (!l2_cache_size_set) { @@ -803,7 +807,8 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } - read_cache_sizes(opts, &l2_cache_size, &refcount_cache_size, &local_err); + read_cache_sizes(bs, opts, &l2_cache_size, &refcount_cache_size, + &local_err); if (local_err) { error_propagate(errp, local_err); ret = -EINVAL; diff --git a/block/qcow2.h b/block/qcow2.h index 0076512af4..5936d299a3 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -62,11 +62,14 @@ #define MIN_CLUSTER_BITS 9 #define MAX_CLUSTER_BITS 21 -#define MIN_L2_CACHE_SIZE 1 /* cluster */ +/* Must be at least 2 to cover COW */ +#define MIN_L2_CACHE_SIZE 2 /* clusters */ /* Must be at least 4 to cover all cases of refcount table growth */ #define MIN_REFCOUNT_CACHE_SIZE 4 /* clusters */ +/* Whichever is more */ +#define DEFAULT_L2_CACHE_CLUSTERS 8 /* clusters */ #define DEFAULT_L2_CACHE_BYTE_SIZE 1048576 /* bytes */ /* The refblock cache needs only a fourth of the L2 cache size to cover as many diff --git a/block/quorum.c b/block/quorum.c index f91ef75a84..77e55b2775 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -866,25 +866,18 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, Error *local_err = NULL; QemuOpts *opts = NULL; bool *opened; - QDict *sub = NULL; - QList *list = NULL; - const QListEntry *lentry; int i; int ret = 0; qdict_flatten(options); - qdict_extract_subqdict(options, &sub, "children."); - qdict_array_split(sub, &list); - if (qdict_size(sub)) { - error_setg(&local_err, "Invalid option children.%s", - qdict_first(sub)->key); + /* count how many different children are present */ + s->num_children = qdict_array_entries(options, "children."); + if (s->num_children < 0) { + error_setg(&local_err, "Option children is not a valid array"); ret = -EINVAL; goto exit; } - - /* count how many different children are present */ - s->num_children = qlist_size(list); if (s->num_children < 2) { error_setg(&local_err, "Number of provided children must be greater than 1"); @@ -937,37 +930,17 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, s->bs = g_new0(BlockDriverState *, s->num_children); opened = g_new0(bool, s->num_children); - for (i = 0, lentry = qlist_first(list); lentry; - lentry = qlist_next(lentry), i++) { - QDict *d; - QString *string; - - switch (qobject_type(lentry->value)) - { - /* List of options */ - case QTYPE_QDICT: - d = qobject_to_qdict(lentry->value); - QINCREF(d); - ret = bdrv_open(&s->bs[i], NULL, NULL, d, flags, NULL, - &local_err); - break; - - /* QMP reference */ - case QTYPE_QSTRING: - string = qobject_to_qstring(lentry->value); - ret = bdrv_open(&s->bs[i], NULL, qstring_get_str(string), NULL, - flags, NULL, &local_err); - break; - - default: - error_setg(&local_err, "Specification of child block device %i " - "is invalid", i); - ret = -EINVAL; - } + for (i = 0; i < s->num_children; i++) { + char indexstr[32]; + ret = snprintf(indexstr, 32, "children.%d", i); + assert(ret < 32); + ret = bdrv_open_image(&s->bs[i], NULL, options, indexstr, bs, + &child_format, false, &local_err); if (ret < 0) { goto close_exit; } + opened[i] = true; } @@ -990,8 +963,6 @@ exit: if (local_err) { error_propagate(errp, local_err); } - QDECREF(list); - QDECREF(sub); return ret; } diff --git a/block/raw-posix.c b/block/raw-posix.c index 2990e954ae..44ade8cf4e 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -1848,8 +1848,9 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, *pnum = nb_sectors; ret = BDRV_BLOCK_DATA; } else if (data == start) { - /* On a data extent, compute sectors to the end of the extent. */ - *pnum = MIN(nb_sectors, (hole - start) / BDRV_SECTOR_SIZE); + /* On a data extent, compute sectors to the end of the extent, + * possibly including a partial sector at EOF. */ + *pnum = MIN(nb_sectors, DIV_ROUND_UP(hole - start, BDRV_SECTOR_SIZE)); ret = BDRV_BLOCK_DATA; } else { /* On a hole, compute sectors to the beginning of the next extent. */ diff --git a/block/throttle-groups.c b/block/throttle-groups.c new file mode 100644 index 0000000000..efc462fbc5 --- /dev/null +++ b/block/throttle-groups.c @@ -0,0 +1,496 @@ +/* + * QEMU block throttling group infrastructure + * + * Copyright (C) Nodalink, EURL. 2014 + * Copyright (C) Igalia, S.L. 2015 + * + * Authors: + * BenoƮt Canet <benoit.canet@nodalink.com> + * Alberto Garcia <berto@igalia.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "block/throttle-groups.h" +#include "qemu/queue.h" +#include "qemu/thread.h" +#include "sysemu/qtest.h" + +/* The ThrottleGroup structure (with its ThrottleState) is shared + * among different BlockDriverState and it's independent from + * AioContext, so in order to use it from different threads it needs + * its own locking. + * + * This locking is however handled internally in this file, so it's + * mostly transparent to outside users (but see the documentation in + * throttle_groups_lock()). + * + * The whole ThrottleGroup structure is private and invisible to + * outside users, that only use it through its ThrottleState. + * + * In addition to the ThrottleGroup structure, BlockDriverState has + * fields that need to be accessed by other members of the group and + * therefore also need to be protected by this lock. Once a BDS is + * registered in a group those fields can be accessed by other threads + * any time. + * + * Again, all this is handled internally and is mostly transparent to + * the outside. The 'throttle_timers' field however has an additional + * constraint because it may be temporarily invalid (see for example + * bdrv_set_aio_context()). Therefore in this file a thread will + * access some other BDS's timers only after verifying that that BDS + * has throttled requests in the queue. + */ +typedef struct ThrottleGroup { + char *name; /* This is constant during the lifetime of the group */ + + QemuMutex lock; /* This lock protects the following four fields */ + ThrottleState ts; + QLIST_HEAD(, BlockDriverState) head; + BlockDriverState *tokens[2]; + bool any_timer_armed[2]; + + /* These two are protected by the global throttle_groups_lock */ + unsigned refcount; + QTAILQ_ENTRY(ThrottleGroup) list; +} ThrottleGroup; + +static QemuMutex throttle_groups_lock; +static QTAILQ_HEAD(, ThrottleGroup) throttle_groups = + QTAILQ_HEAD_INITIALIZER(throttle_groups); + +/* Increments the reference count of a ThrottleGroup given its name. + * + * If no ThrottleGroup is found with the given name a new one is + * created. + * + * @name: the name of the ThrottleGroup + * @ret: the ThrottleGroup + */ +static ThrottleGroup *throttle_group_incref(const char *name) +{ + ThrottleGroup *tg = NULL; + ThrottleGroup *iter; + + qemu_mutex_lock(&throttle_groups_lock); + + /* Look for an existing group with that name */ + QTAILQ_FOREACH(iter, &throttle_groups, list) { + if (!strcmp(name, iter->name)) { + tg = iter; + break; + } + } + + /* Create a new one if not found */ + if (!tg) { + tg = g_new0(ThrottleGroup, 1); + tg->name = g_strdup(name); + qemu_mutex_init(&tg->lock); + throttle_init(&tg->ts); + QLIST_INIT(&tg->head); + + QTAILQ_INSERT_TAIL(&throttle_groups, tg, list); + } + + tg->refcount++; + + qemu_mutex_unlock(&throttle_groups_lock); + + return tg; +} + +/* Decrease the reference count of a ThrottleGroup. + * + * When the reference count reaches zero the ThrottleGroup is + * destroyed. + * + * @tg: The ThrottleGroup to unref + */ +static void throttle_group_unref(ThrottleGroup *tg) +{ + qemu_mutex_lock(&throttle_groups_lock); + if (--tg->refcount == 0) { + QTAILQ_REMOVE(&throttle_groups, tg, list); + qemu_mutex_destroy(&tg->lock); + g_free(tg->name); + g_free(tg); + } + qemu_mutex_unlock(&throttle_groups_lock); +} + +/* Get the name from a BlockDriverState's ThrottleGroup. The name (and + * the pointer) is guaranteed to remain constant during the lifetime + * of the group. + * + * @bs: a BlockDriverState that is member of a throttling group + * @ret: the name of the group. + */ +const char *throttle_group_get_name(BlockDriverState *bs) +{ + ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts); + return tg->name; +} + +/* Return the next BlockDriverState in the round-robin sequence, + * simulating a circular list. + * + * This assumes that tg->lock is held. + * + * @bs: the current BlockDriverState + * @ret: the next BlockDriverState in the sequence + */ +static BlockDriverState *throttle_group_next_bs(BlockDriverState *bs) +{ + ThrottleState *ts = bs->throttle_state; + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); + BlockDriverState *next = QLIST_NEXT(bs, round_robin); + + if (!next) { + return QLIST_FIRST(&tg->head); + } + + return next; +} + +/* Return the next BlockDriverState in the round-robin sequence with + * pending I/O requests. + * + * This assumes that tg->lock is held. + * + * @bs: the current BlockDriverState + * @is_write: the type of operation (read/write) + * @ret: the next BlockDriverState with pending requests, or bs + * if there is none. + */ +static BlockDriverState *next_throttle_token(BlockDriverState *bs, + bool is_write) +{ + ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts); + BlockDriverState *token, *start; + + start = token = tg->tokens[is_write]; + + /* get next bs round in round robin style */ + token = throttle_group_next_bs(token); + while (token != start && !token->pending_reqs[is_write]) { + token = throttle_group_next_bs(token); + } + + /* If no IO are queued for scheduling on the next round robin token + * then decide the token is the current bs because chances are + * the current bs get the current request queued. + */ + if (token == start && !token->pending_reqs[is_write]) { + token = bs; + } + + return token; +} + +/* Check if the next I/O request for a BlockDriverState needs to be + * throttled or not. If there's no timer set in this group, set one + * and update the token accordingly. + * + * This assumes that tg->lock is held. + * + * @bs: the current BlockDriverState + * @is_write: the type of operation (read/write) + * @ret: whether the I/O request needs to be throttled or not + */ +static bool throttle_group_schedule_timer(BlockDriverState *bs, + bool is_write) +{ + ThrottleState *ts = bs->throttle_state; + ThrottleTimers *tt = &bs->throttle_timers; + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); + bool must_wait; + + /* Check if any of the timers in this group is already armed */ + if (tg->any_timer_armed[is_write]) { + return true; + } + + must_wait = throttle_schedule_timer(ts, tt, is_write); + + /* If a timer just got armed, set bs as the current token */ + if (must_wait) { + tg->tokens[is_write] = bs; + tg->any_timer_armed[is_write] = true; + } + + return must_wait; +} + +/* Look for the next pending I/O request and schedule it. + * + * This assumes that tg->lock is held. + * + * @bs: the current BlockDriverState + * @is_write: the type of operation (read/write) + */ +static void schedule_next_request(BlockDriverState *bs, bool is_write) +{ + ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts); + bool must_wait; + BlockDriverState *token; + + /* Check if there's any pending request to schedule next */ + token = next_throttle_token(bs, is_write); + if (!token->pending_reqs[is_write]) { + return; + } + + /* Set a timer for the request if it needs to be throttled */ + must_wait = throttle_group_schedule_timer(token, is_write); + + /* If it doesn't have to wait, queue it for immediate execution */ + if (!must_wait) { + /* Give preference to requests from the current bs */ + if (qemu_in_coroutine() && + qemu_co_queue_next(&bs->throttled_reqs[is_write])) { + token = bs; + } else { + ThrottleTimers *tt = &token->throttle_timers; + int64_t now = qemu_clock_get_ns(tt->clock_type); + timer_mod(tt->timers[is_write], now + 1); + tg->any_timer_armed[is_write] = true; + } + tg->tokens[is_write] = token; + } +} + +/* Check if an I/O request needs to be throttled, wait and set a timer + * if necessary, and schedule the next request using a round robin + * algorithm. + * + * @bs: the current BlockDriverState + * @bytes: the number of bytes for this I/O + * @is_write: the type of operation (read/write) + */ +void coroutine_fn throttle_group_co_io_limits_intercept(BlockDriverState *bs, + unsigned int bytes, + bool is_write) +{ + bool must_wait; + BlockDriverState *token; + + ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts); + qemu_mutex_lock(&tg->lock); + + /* First we check if this I/O has to be throttled. */ + token = next_throttle_token(bs, is_write); + must_wait = throttle_group_schedule_timer(token, is_write); + + /* Wait if there's a timer set or queued requests of this type */ + if (must_wait || bs->pending_reqs[is_write]) { + bs->pending_reqs[is_write]++; + qemu_mutex_unlock(&tg->lock); + qemu_co_queue_wait(&bs->throttled_reqs[is_write]); + qemu_mutex_lock(&tg->lock); + bs->pending_reqs[is_write]--; + } + + /* The I/O will be executed, so do the accounting */ + throttle_account(bs->throttle_state, is_write, bytes); + + /* Schedule the next request */ + schedule_next_request(bs, is_write); + + qemu_mutex_unlock(&tg->lock); +} + +/* Update the throttle configuration for a particular group. Similar + * to throttle_config(), but guarantees atomicity within the + * throttling group. + * + * @bs: a BlockDriverState that is member of the group + * @cfg: the configuration to set + */ +void throttle_group_config(BlockDriverState *bs, ThrottleConfig *cfg) +{ + ThrottleTimers *tt = &bs->throttle_timers; + ThrottleState *ts = bs->throttle_state; + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); + qemu_mutex_lock(&tg->lock); + throttle_config(ts, tt, cfg); + /* throttle_config() cancels the timers */ + tg->any_timer_armed[0] = tg->any_timer_armed[1] = false; + qemu_mutex_unlock(&tg->lock); +} + +/* Get the throttle configuration from a particular group. Similar to + * throttle_get_config(), but guarantees atomicity within the + * throttling group. + * + * @bs: a BlockDriverState that is member of the group + * @cfg: the configuration will be written here + */ +void throttle_group_get_config(BlockDriverState *bs, ThrottleConfig *cfg) +{ + ThrottleState *ts = bs->throttle_state; + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); + qemu_mutex_lock(&tg->lock); + throttle_get_config(ts, cfg); + qemu_mutex_unlock(&tg->lock); +} + +/* ThrottleTimers callback. This wakes up a request that was waiting + * because it had been throttled. + * + * @bs: the BlockDriverState whose request had been throttled + * @is_write: the type of operation (read/write) + */ +static void timer_cb(BlockDriverState *bs, bool is_write) +{ + ThrottleState *ts = bs->throttle_state; + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); + bool empty_queue; + + /* The timer has just been fired, so we can update the flag */ + qemu_mutex_lock(&tg->lock); + tg->any_timer_armed[is_write] = false; + qemu_mutex_unlock(&tg->lock); + + /* Run the request that was waiting for this timer */ + empty_queue = !qemu_co_enter_next(&bs->throttled_reqs[is_write]); + + /* If the request queue was empty then we have to take care of + * scheduling the next one */ + if (empty_queue) { + qemu_mutex_lock(&tg->lock); + schedule_next_request(bs, is_write); + qemu_mutex_unlock(&tg->lock); + } +} + +static void read_timer_cb(void *opaque) +{ + timer_cb(opaque, false); +} + +static void write_timer_cb(void *opaque) +{ + timer_cb(opaque, true); +} + +/* Register a BlockDriverState in the throttling group, also + * initializing its timers and updating its throttle_state pointer to + * point to it. If a throttling group with that name does not exist + * yet, it will be created. + * + * @bs: the BlockDriverState to insert + * @groupname: the name of the group + */ +void throttle_group_register_bs(BlockDriverState *bs, const char *groupname) +{ + int i; + ThrottleGroup *tg = throttle_group_incref(groupname); + int clock_type = QEMU_CLOCK_REALTIME; + + if (qtest_enabled()) { + /* For testing block IO throttling only */ + clock_type = QEMU_CLOCK_VIRTUAL; + } + + bs->throttle_state = &tg->ts; + + qemu_mutex_lock(&tg->lock); + /* If the ThrottleGroup is new set this BlockDriverState as the token */ + for (i = 0; i < 2; i++) { + if (!tg->tokens[i]) { + tg->tokens[i] = bs; + } + } + + QLIST_INSERT_HEAD(&tg->head, bs, round_robin); + + throttle_timers_init(&bs->throttle_timers, + bdrv_get_aio_context(bs), + clock_type, + read_timer_cb, + write_timer_cb, + bs); + + qemu_mutex_unlock(&tg->lock); +} + +/* Unregister a BlockDriverState from its group, removing it from the + * list, destroying the timers and setting the throttle_state pointer + * to NULL. + * + * The group will be destroyed if it's empty after this operation. + * + * @bs: the BlockDriverState to remove + */ +void throttle_group_unregister_bs(BlockDriverState *bs) +{ + ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts); + int i; + + qemu_mutex_lock(&tg->lock); + for (i = 0; i < 2; i++) { + if (tg->tokens[i] == bs) { + BlockDriverState *token = throttle_group_next_bs(bs); + /* Take care of the case where this is the last bs in the group */ + if (token == bs) { + token = NULL; + } + tg->tokens[i] = token; + } + } + + /* remove the current bs from the list */ + QLIST_REMOVE(bs, round_robin); + throttle_timers_destroy(&bs->throttle_timers); + qemu_mutex_unlock(&tg->lock); + + throttle_group_unref(tg); + bs->throttle_state = NULL; +} + +/* Acquire the lock of this throttling group. + * + * You won't normally need to use this. None of the functions from the + * ThrottleGroup API require you to acquire the lock since all of them + * deal with it internally. + * + * This should only be used in exceptional cases when you want to + * access the protected fields of a BlockDriverState directly + * (e.g. bdrv_swap()). + * + * @bs: a BlockDriverState that is member of the group + */ +void throttle_group_lock(BlockDriverState *bs) +{ + ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts); + qemu_mutex_lock(&tg->lock); +} + +/* Release the lock of this throttling group. + * + * See the comments in throttle_group_lock(). + */ +void throttle_group_unlock(BlockDriverState *bs) +{ + ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts); + qemu_mutex_unlock(&tg->lock); +} + +static void throttle_groups_init(void) +{ + qemu_mutex_init(&throttle_groups_lock); +} + +block_init(throttle_groups_init); diff --git a/block/vmdk.c b/block/vmdk.c index b66745dfdd..be9263a34e 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -321,37 +321,13 @@ static int vmdk_is_cid_valid(BlockDriverState *bs) return 1; } -/* Queue extents, if any, for reopen() */ +/* We have nothing to do for VMDK reopen, stubs just return success */ static int vmdk_reopen_prepare(BDRVReopenState *state, BlockReopenQueue *queue, Error **errp) { - BDRVVmdkState *s; - int ret = -1; - int i; - VmdkExtent *e; - assert(state != NULL); assert(state->bs != NULL); - - if (queue == NULL) { - error_setg(errp, "No reopen queue for VMDK extents"); - goto exit; - } - - s = state->bs->opaque; - - assert(s != NULL); - - for (i = 0; i < s->num_extents; i++) { - e = &s->extents[i]; - if (e->file != state->bs->file) { - bdrv_reopen_queue(queue, e->file, state->flags); - } - } - ret = 0; - -exit: - return ret; + return 0; } static int vmdk_parent_open(BlockDriverState *bs) @@ -543,7 +519,7 @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs, } static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf, - Error **errp); + QDict *options, Error **errp); static char *vmdk_read_desc(BlockDriverState *file, uint64_t desc_offset, Error **errp) @@ -582,7 +558,7 @@ static char *vmdk_read_desc(BlockDriverState *file, uint64_t desc_offset, static int vmdk_open_vmdk4(BlockDriverState *bs, BlockDriverState *file, - int flags, Error **errp) + int flags, QDict *options, Error **errp) { int ret; uint32_t magic; @@ -606,7 +582,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, if (!buf) { return -EINVAL; } - ret = vmdk_open_desc_file(bs, flags, buf, errp); + ret = vmdk_open_desc_file(bs, flags, buf, options, errp); g_free(buf); return ret; } @@ -763,7 +739,7 @@ static int vmdk_parse_description(const char *desc, const char *opt_name, /* Open an extent file and append to bs array */ static int vmdk_open_sparse(BlockDriverState *bs, BlockDriverState *file, int flags, - char *buf, Error **errp) + char *buf, QDict *options, Error **errp) { uint32_t magic; @@ -773,7 +749,7 @@ static int vmdk_open_sparse(BlockDriverState *bs, return vmdk_open_vmfs_sparse(bs, file, flags, errp); break; case VMDK4_MAGIC: - return vmdk_open_vmdk4(bs, file, flags, errp); + return vmdk_open_vmdk4(bs, file, flags, options, errp); break; default: error_setg(errp, "Image not in VMDK format"); @@ -783,7 +759,8 @@ static int vmdk_open_sparse(BlockDriverState *bs, } static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, - const char *desc_file_path, Error **errp) + const char *desc_file_path, QDict *options, + Error **errp) { int ret; int matches; @@ -797,6 +774,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, BlockDriverState *extent_file; BDRVVmdkState *s = bs->opaque; VmdkExtent *extent; + char extent_opt_prefix[32]; while (*p) { /* parse extent line in one of below formats: @@ -846,8 +824,12 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, extent_path = g_malloc0(PATH_MAX); path_combine(extent_path, PATH_MAX, desc_file_path, fname); extent_file = NULL; - ret = bdrv_open(&extent_file, extent_path, NULL, NULL, - bs->open_flags | BDRV_O_PROTOCOL, NULL, errp); + + ret = snprintf(extent_opt_prefix, 32, "extents.%d", s->num_extents); + assert(ret < 32); + + ret = bdrv_open_image(&extent_file, extent_path, options, + extent_opt_prefix, bs, &child_file, false, errp); g_free(extent_path); if (ret) { return ret; @@ -870,7 +852,8 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, if (!buf) { ret = -EINVAL; } else { - ret = vmdk_open_sparse(bs, extent_file, bs->open_flags, buf, errp); + ret = vmdk_open_sparse(bs, extent_file, bs->open_flags, buf, + options, errp); } g_free(buf); if (ret) { @@ -898,7 +881,7 @@ next_line: } static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf, - Error **errp) + QDict *options, Error **errp) { int ret; char ct[128]; @@ -920,7 +903,7 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf, } s->create_type = g_strdup(ct); s->desc_offset = 0; - ret = vmdk_parse_extents(buf, bs, bs->file->exact_filename, errp); + ret = vmdk_parse_extents(buf, bs, bs->file->exact_filename, options, errp); exit: return ret; } @@ -942,11 +925,11 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags, switch (magic) { case VMDK3_MAGIC: case VMDK4_MAGIC: - ret = vmdk_open_sparse(bs, bs->file, flags, buf, errp); + ret = vmdk_open_sparse(bs, bs->file, flags, buf, options, errp); s->desc_offset = 0x200; break; default: - ret = vmdk_open_desc_file(bs, flags, buf, errp); + ret = vmdk_open_desc_file(bs, flags, buf, options, errp); break; } if (ret) { @@ -1248,6 +1231,17 @@ static VmdkExtent *find_extent(BDRVVmdkState *s, return NULL; } +static inline uint64_t vmdk_find_index_in_cluster(VmdkExtent *extent, + int64_t sector_num) +{ + uint64_t index_in_cluster, extent_begin_sector, extent_relative_sector_num; + + extent_begin_sector = extent->end_sector - extent->sectors; + extent_relative_sector_num = sector_num - extent_begin_sector; + index_in_cluster = extent_relative_sector_num % extent->cluster_sectors; + return index_in_cluster; +} + static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) { @@ -1285,7 +1279,7 @@ static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs, break; } - index_in_cluster = sector_num % extent->cluster_sectors; + index_in_cluster = vmdk_find_index_in_cluster(extent, sector_num); n = extent->cluster_sectors - index_in_cluster; if (n > nb_sectors) { n = nb_sectors; @@ -1413,7 +1407,6 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num, BDRVVmdkState *s = bs->opaque; int ret; uint64_t n, index_in_cluster; - uint64_t extent_begin_sector, extent_relative_sector_num; VmdkExtent *extent = NULL; uint64_t cluster_offset; @@ -1425,9 +1418,7 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num, ret = get_cluster_offset(bs, extent, NULL, sector_num << 9, false, &cluster_offset, 0, 0); - extent_begin_sector = extent->end_sector - extent->sectors; - extent_relative_sector_num = sector_num - extent_begin_sector; - index_in_cluster = extent_relative_sector_num % extent->cluster_sectors; + index_in_cluster = vmdk_find_index_in_cluster(extent, sector_num); n = extent->cluster_sectors - index_in_cluster; if (n > nb_sectors) { n = nb_sectors; @@ -1489,7 +1480,6 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, VmdkExtent *extent = NULL; int ret; int64_t index_in_cluster, n; - uint64_t extent_begin_sector, extent_relative_sector_num; uint64_t cluster_offset; VmdkMetaData m_data; @@ -1505,9 +1495,7 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, if (!extent) { return -EIO; } - extent_begin_sector = extent->end_sector - extent->sectors; - extent_relative_sector_num = sector_num - extent_begin_sector; - index_in_cluster = extent_relative_sector_num % extent->cluster_sectors; + index_in_cluster = vmdk_find_index_in_cluster(extent, sector_num); n = extent->cluster_sectors - index_in_cluster; if (n > nb_sectors) { n = nb_sectors; |