diff options
47 files changed, 943 insertions, 453 deletions
diff --git a/.travis.yml b/.travis.yml index caf0a1f8fa..92b00927d4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -232,7 +232,7 @@ matrix: # Acceptance (Functional) tests - env: - - CONFIG="--python=/usr/bin/python3 --target-list=x86_64-softmmu,mips-softmmu,mips64el-softmmu,aarch64-softmmu,arm-softmmu,s390x-softmmu,alpha-softmmu" + - CONFIG="--python=/usr/bin/python3 --target-list=x86_64-softmmu,mips-softmmu,mips64el-softmmu,aarch64-softmmu,arm-softmmu,s390x-softmmu,alpha-softmmu,ppc64-softmmu" - TEST_CMD="make check-acceptance" after_failure: - cat tests/results/latest/job.log diff --git a/audio/audio.c b/audio/audio.c index 7d715332c9..e99fcd0694 100644 --- a/audio/audio.c +++ b/audio/audio.c @@ -1412,8 +1412,9 @@ static AudioState *audio_init(Audiodev *dev, const char *name) drvname = AudiodevDriver_str(dev->driver); } else if (!QTAILQ_EMPTY(&audio_states)) { if (!legacy_config) { - dolog("You must specify an audiodev= for the device %s\n", name); - exit(1); + dolog("Device %s: audiodev default parameter is deprecated, please " + "specify audiodev=%s\n", name, + QTAILQ_FIRST(&audio_states)->dev->id); } return QTAILQ_FIRST(&audio_states); } else { @@ -1548,8 +1549,7 @@ CaptureVoiceOut *AUD_add_capture( if (!s) { if (!legacy_config) { - dolog("You must specify audiodev when trying to capture\n"); - return NULL; + dolog("Capturing without setting an audiodev is deprecated\n"); } s = audio_init(NULL, NULL); } @@ -1685,7 +1685,7 @@ void audio_create_pdos(Audiodev *dev) } \ if (!dev->u.driver.has_out) { \ dev->u.driver.out = g_malloc0( \ - sizeof(AudiodevAlsaPerDirectionOptions)); \ + sizeof(Audiodev##pdo_name##PerDirectionOptions)); \ dev->u.driver.has_out = true; \ } \ break diff --git a/block/backup.c b/block/backup.c index 2baf7bed65..03637aeb11 100644 --- a/block/backup.c +++ b/block/backup.c @@ -674,7 +674,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, return NULL; } - if (compress && target->drv->bdrv_co_pwritev_compressed == NULL) { + if (compress && !block_driver_can_compress(target->drv)) { error_setg(errp, "Compression is not supported for this drive %s", bdrv_get_device_name(target)); return NULL; diff --git a/block/io.c b/block/io.c index 56bbf195bb..0fa10831ed 100644 --- a/block/io.c +++ b/block/io.c @@ -146,7 +146,8 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp) /* Default alignment based on whether driver has byte interface */ bs->bl.request_alignment = (drv->bdrv_co_preadv || - drv->bdrv_aio_preadv) ? 1 : 512; + drv->bdrv_aio_preadv || + drv->bdrv_co_preadv_part) ? 1 : 512; /* Take some limits from the children as a default */ if (bs->file) { @@ -1044,11 +1045,14 @@ static void bdrv_co_io_em_complete(void *opaque, int ret) static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) + QEMUIOVector *qiov, + size_t qiov_offset, int flags) { BlockDriver *drv = bs->drv; int64_t sector_num; unsigned int nb_sectors; + QEMUIOVector local_qiov; + int ret; assert(!(flags & ~BDRV_REQ_MASK)); assert(!(flags & BDRV_REQ_NO_FALLBACK)); @@ -1057,8 +1061,19 @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs, return -ENOMEDIUM; } + if (drv->bdrv_co_preadv_part) { + return drv->bdrv_co_preadv_part(bs, offset, bytes, qiov, qiov_offset, + flags); + } + + if (qiov_offset > 0 || bytes != qiov->size) { + qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes); + qiov = &local_qiov; + } + if (drv->bdrv_co_preadv) { - return drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags); + ret = drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags); + goto out; } if (drv->bdrv_aio_preadv) { @@ -1070,10 +1085,12 @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs, acb = drv->bdrv_aio_preadv(bs, offset, bytes, qiov, flags, bdrv_co_io_em_complete, &co); if (acb == NULL) { - return -EIO; + ret = -EIO; + goto out; } else { qemu_coroutine_yield(); - return co.ret; + ret = co.ret; + goto out; } } @@ -1085,16 +1102,25 @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs, assert(bytes <= BDRV_REQUEST_MAX_BYTES); assert(drv->bdrv_co_readv); - return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); + ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); + +out: + if (qiov == &local_qiov) { + qemu_iovec_destroy(&local_qiov); + } + + return ret; } static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) + QEMUIOVector *qiov, + size_t qiov_offset, int flags) { BlockDriver *drv = bs->drv; int64_t sector_num; unsigned int nb_sectors; + QEMUIOVector local_qiov; int ret; assert(!(flags & ~BDRV_REQ_MASK)); @@ -1104,6 +1130,18 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs, return -ENOMEDIUM; } + if (drv->bdrv_co_pwritev_part) { + ret = drv->bdrv_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset, + flags & bs->supported_write_flags); + flags &= ~bs->supported_write_flags; + goto emulate_flags; + } + + if (qiov_offset > 0 || bytes != qiov->size) { + qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes); + qiov = &local_qiov; + } + if (drv->bdrv_co_pwritev) { ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov, flags & bs->supported_write_flags); @@ -1147,29 +1185,49 @@ emulate_flags: ret = bdrv_co_flush(bs); } + if (qiov == &local_qiov) { + qemu_iovec_destroy(&local_qiov); + } + return ret; } static int coroutine_fn bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, QEMUIOVector *qiov) + uint64_t bytes, QEMUIOVector *qiov, + size_t qiov_offset) { BlockDriver *drv = bs->drv; + QEMUIOVector local_qiov; + int ret; if (!drv) { return -ENOMEDIUM; } - if (!drv->bdrv_co_pwritev_compressed) { + if (!block_driver_can_compress(drv)) { return -ENOTSUP; } - return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov); + if (drv->bdrv_co_pwritev_compressed_part) { + return drv->bdrv_co_pwritev_compressed_part(bs, offset, bytes, + qiov, qiov_offset); + } + + if (qiov_offset == 0) { + return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov); + } + + qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes); + ret = drv->bdrv_co_pwritev_compressed(bs, offset, bytes, &local_qiov); + qemu_iovec_destroy(&local_qiov); + + return ret; } static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, unsigned int bytes, QEMUIOVector *qiov, - int flags) + size_t qiov_offset, int flags) { BlockDriverState *bs = child->bs; @@ -1178,10 +1236,9 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, * modifying the image file. This is critical for zero-copy guest I/O * where anything might happen inside guest memory. */ - void *bounce_buffer; + void *bounce_buffer = NULL; BlockDriver *drv = bs->drv; - QEMUIOVector local_qiov; int64_t cluster_offset; int64_t cluster_bytes; size_t skip_bytes; @@ -1214,14 +1271,6 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, trace_bdrv_co_do_copy_on_readv(bs, offset, bytes, cluster_offset, cluster_bytes); - bounce_buffer = qemu_try_blockalign(bs, - MIN(MIN(max_transfer, cluster_bytes), - MAX_BOUNCE_BUFFER)); - if (bounce_buffer == NULL) { - ret = -ENOMEM; - goto err; - } - while (cluster_bytes) { int64_t pnum; @@ -1244,12 +1293,25 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, assert(skip_bytes < pnum); if (ret <= 0) { + QEMUIOVector local_qiov; + /* Must copy-on-read; use the bounce buffer */ pnum = MIN(pnum, MAX_BOUNCE_BUFFER); + if (!bounce_buffer) { + int64_t max_we_need = MAX(pnum, cluster_bytes - pnum); + int64_t max_allowed = MIN(max_transfer, MAX_BOUNCE_BUFFER); + int64_t bounce_buffer_len = MIN(max_we_need, max_allowed); + + bounce_buffer = qemu_try_blockalign(bs, bounce_buffer_len); + if (!bounce_buffer) { + ret = -ENOMEM; + goto err; + } + } qemu_iovec_init_buf(&local_qiov, bounce_buffer, pnum); ret = bdrv_driver_preadv(bs, cluster_offset, pnum, - &local_qiov, 0); + &local_qiov, 0, 0); if (ret < 0) { goto err; } @@ -1267,7 +1329,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, * necessary to flush even in cache=writethrough mode. */ ret = bdrv_driver_pwritev(bs, cluster_offset, pnum, - &local_qiov, + &local_qiov, 0, BDRV_REQ_WRITE_UNCHANGED); } @@ -1281,16 +1343,15 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, } if (!(flags & BDRV_REQ_PREFETCH)) { - qemu_iovec_from_buf(qiov, progress, bounce_buffer + skip_bytes, + qemu_iovec_from_buf(qiov, qiov_offset + progress, + bounce_buffer + skip_bytes, pnum - skip_bytes); } } else if (!(flags & BDRV_REQ_PREFETCH)) { /* Read directly into the destination */ - qemu_iovec_init(&local_qiov, qiov->niov); - qemu_iovec_concat(&local_qiov, qiov, progress, pnum - skip_bytes); - ret = bdrv_driver_preadv(bs, offset + progress, local_qiov.size, - &local_qiov, 0); - qemu_iovec_destroy(&local_qiov); + ret = bdrv_driver_preadv(bs, offset + progress, + MIN(pnum - skip_bytes, bytes - progress), + qiov, qiov_offset + progress, 0); if (ret < 0) { goto err; } @@ -1315,7 +1376,7 @@ err: */ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, - int64_t align, QEMUIOVector *qiov, int flags) + int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags) { BlockDriverState *bs = child->bs; int64_t total_bytes, max_bytes; @@ -1326,7 +1387,6 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, assert(is_power_of_2(align)); assert((offset & (align - 1)) == 0); assert((bytes & (align - 1)) == 0); - assert(!qiov || bytes == qiov->size); assert((bs->open_flags & BDRV_O_NO_IO) == 0); max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX), align); @@ -1364,7 +1424,8 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, } if (!ret || pnum != bytes) { - ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov, flags); + ret = bdrv_co_do_copy_on_readv(child, offset, bytes, + qiov, qiov_offset, flags); goto out; } else if (flags & BDRV_REQ_PREFETCH) { goto out; @@ -1380,7 +1441,7 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align); if (bytes <= max_bytes && bytes <= max_transfer) { - ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0); + ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, 0); goto out; } @@ -1388,17 +1449,12 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, int num; if (max_bytes) { - QEMUIOVector local_qiov; - num = MIN(bytes_remaining, MIN(max_bytes, max_transfer)); assert(num); - qemu_iovec_init(&local_qiov, qiov->niov); - qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num); ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining, - num, &local_qiov, 0); + num, qiov, bytes - bytes_remaining, 0); max_bytes -= num; - qemu_iovec_destroy(&local_qiov); } else { num = bytes_remaining; ret = qemu_iovec_memset(qiov, bytes - bytes_remaining, 0, @@ -1415,28 +1471,187 @@ out: } /* - * Handle a read request in coroutine context + * Request padding + * + * |<---- align ----->| |<----- align ---->| + * |<- head ->|<------------- bytes ------------->|<-- tail -->| + * | | | | | | + * -*----------$-------*-------- ... --------*-----$------------*--- + * | | | | | | + * | offset | | end | + * ALIGN_DOWN(offset) ALIGN_UP(offset) ALIGN_DOWN(end) ALIGN_UP(end) + * [buf ... ) [tail_buf ) + * + * @buf is an aligned allocation needed to store @head and @tail paddings. @head + * is placed at the beginning of @buf and @tail at the @end. + * + * @tail_buf is a pointer to sub-buffer, corresponding to align-sized chunk + * around tail, if tail exists. + * + * @merge_reads is true for small requests, + * if @buf_len == @head + bytes + @tail. In this case it is possible that both + * head and tail exist but @buf_len == align and @tail_buf == @buf. + */ +typedef struct BdrvRequestPadding { + uint8_t *buf; + size_t buf_len; + uint8_t *tail_buf; + size_t head; + size_t tail; + bool merge_reads; + QEMUIOVector local_qiov; +} BdrvRequestPadding; + +static bool bdrv_init_padding(BlockDriverState *bs, + int64_t offset, int64_t bytes, + BdrvRequestPadding *pad) +{ + uint64_t align = bs->bl.request_alignment; + size_t sum; + + memset(pad, 0, sizeof(*pad)); + + pad->head = offset & (align - 1); + pad->tail = ((offset + bytes) & (align - 1)); + if (pad->tail) { + pad->tail = align - pad->tail; + } + + if ((!pad->head && !pad->tail) || !bytes) { + return false; + } + + sum = pad->head + bytes + pad->tail; + pad->buf_len = (sum > align && pad->head && pad->tail) ? 2 * align : align; + pad->buf = qemu_blockalign(bs, pad->buf_len); + pad->merge_reads = sum == pad->buf_len; + if (pad->tail) { + pad->tail_buf = pad->buf + pad->buf_len - align; + } + + return true; +} + +static int bdrv_padding_rmw_read(BdrvChild *child, + BdrvTrackedRequest *req, + BdrvRequestPadding *pad, + bool zero_middle) +{ + QEMUIOVector local_qiov; + BlockDriverState *bs = child->bs; + uint64_t align = bs->bl.request_alignment; + int ret; + + assert(req->serialising && pad->buf); + + if (pad->head || pad->merge_reads) { + uint64_t bytes = pad->merge_reads ? pad->buf_len : align; + + qemu_iovec_init_buf(&local_qiov, pad->buf, bytes); + + if (pad->head) { + bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD); + } + if (pad->merge_reads && pad->tail) { + bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); + } + ret = bdrv_aligned_preadv(child, req, req->overlap_offset, bytes, + align, &local_qiov, 0, 0); + if (ret < 0) { + return ret; + } + if (pad->head) { + bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); + } + if (pad->merge_reads && pad->tail) { + bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); + } + + if (pad->merge_reads) { + goto zero_mem; + } + } + + if (pad->tail) { + qemu_iovec_init_buf(&local_qiov, pad->tail_buf, align); + + bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); + ret = bdrv_aligned_preadv( + child, req, + req->overlap_offset + req->overlap_bytes - align, + align, align, &local_qiov, 0, 0); + if (ret < 0) { + return ret; + } + bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); + } + +zero_mem: + if (zero_middle) { + memset(pad->buf + pad->head, 0, pad->buf_len - pad->head - pad->tail); + } + + return 0; +} + +static void bdrv_padding_destroy(BdrvRequestPadding *pad) +{ + if (pad->buf) { + qemu_vfree(pad->buf); + qemu_iovec_destroy(&pad->local_qiov); + } +} + +/* + * bdrv_pad_request + * + * Exchange request parameters with padded request if needed. Don't include RMW + * read of padding, bdrv_padding_rmw_read() should be called separately if + * needed. + * + * All parameters except @bs are in-out: they represent original request at + * function call and padded (if padding needed) at function finish. + * + * Function always succeeds. */ +static bool bdrv_pad_request(BlockDriverState *bs, + QEMUIOVector **qiov, size_t *qiov_offset, + int64_t *offset, unsigned int *bytes, + BdrvRequestPadding *pad) +{ + if (!bdrv_init_padding(bs, *offset, *bytes, pad)) { + return false; + } + + qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head, + *qiov, *qiov_offset, *bytes, + pad->buf + pad->buf_len - pad->tail, pad->tail); + *bytes += pad->head + pad->tail; + *offset -= pad->head; + *qiov = &pad->local_qiov; + *qiov_offset = 0; + + return true; +} + int coroutine_fn bdrv_co_preadv(BdrvChild *child, int64_t offset, unsigned int bytes, QEMUIOVector *qiov, BdrvRequestFlags flags) { + return bdrv_co_preadv_part(child, offset, bytes, qiov, 0, flags); +} + +int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, + int64_t offset, unsigned int bytes, + QEMUIOVector *qiov, size_t qiov_offset, + BdrvRequestFlags flags) +{ BlockDriverState *bs = child->bs; - BlockDriver *drv = bs->drv; BdrvTrackedRequest req; - - uint64_t align = bs->bl.request_alignment; - uint8_t *head_buf = NULL; - uint8_t *tail_buf = NULL; - QEMUIOVector local_qiov; - bool use_local_qiov = false; + BdrvRequestPadding pad; int ret; - trace_bdrv_co_preadv(child->bs, offset, bytes, flags); - - if (!drv) { - return -ENOMEDIUM; - } + trace_bdrv_co_preadv(bs, offset, bytes, flags); ret = bdrv_check_byte_request(bs, offset, bytes); if (ret < 0) { @@ -1450,43 +1665,16 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child, flags |= BDRV_REQ_COPY_ON_READ; } - /* Align read if necessary by padding qiov */ - if (offset & (align - 1)) { - head_buf = qemu_blockalign(bs, align); - qemu_iovec_init(&local_qiov, qiov->niov + 2); - qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1)); - qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); - use_local_qiov = true; - - bytes += offset & (align - 1); - offset = offset & ~(align - 1); - } - - if ((offset + bytes) & (align - 1)) { - if (!use_local_qiov) { - qemu_iovec_init(&local_qiov, qiov->niov + 1); - qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); - use_local_qiov = true; - } - tail_buf = qemu_blockalign(bs, align); - qemu_iovec_add(&local_qiov, tail_buf, - align - ((offset + bytes) & (align - 1))); - - bytes = ROUND_UP(bytes, align); - } + bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad); tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ); - ret = bdrv_aligned_preadv(child, &req, offset, bytes, align, - use_local_qiov ? &local_qiov : qiov, - flags); + ret = bdrv_aligned_preadv(child, &req, offset, bytes, + bs->bl.request_alignment, + qiov, qiov_offset, flags); tracked_request_end(&req); bdrv_dec_in_flight(bs); - if (use_local_qiov) { - qemu_iovec_destroy(&local_qiov); - qemu_vfree(head_buf); - qemu_vfree(tail_buf); - } + bdrv_padding_destroy(&pad); return ret; } @@ -1579,7 +1767,7 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, } qemu_iovec_init_buf(&qiov, buf, num); - ret = bdrv_driver_pwritev(bs, offset, num, &qiov, write_flags); + ret = bdrv_driver_pwritev(bs, offset, num, &qiov, 0, write_flags); /* Keep bounce buffer around if it is big enough for all * all future requests. @@ -1694,7 +1882,7 @@ bdrv_co_write_req_finish(BdrvChild *child, int64_t offset, uint64_t bytes, */ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, - int64_t align, QEMUIOVector *qiov, int flags) + int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags) { BlockDriverState *bs = child->bs; BlockDriver *drv = bs->drv; @@ -1714,7 +1902,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, assert(is_power_of_2(align)); assert((offset & (align - 1)) == 0); assert((bytes & (align - 1)) == 0); - assert(!qiov || bytes == qiov->size); + assert(!qiov || qiov_offset + bytes <= qiov->size); max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX), align); @@ -1722,7 +1910,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF && !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_pwrite_zeroes && - qemu_iovec_is_zero(qiov)) { + qemu_iovec_is_zero(qiov, qiov_offset, bytes)) { flags |= BDRV_REQ_ZERO_WRITE; if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) { flags |= BDRV_REQ_MAY_UNMAP; @@ -1735,15 +1923,15 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO); ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags); } else if (flags & BDRV_REQ_WRITE_COMPRESSED) { - ret = bdrv_driver_pwritev_compressed(bs, offset, bytes, qiov); + ret = bdrv_driver_pwritev_compressed(bs, offset, bytes, + qiov, qiov_offset); } else if (bytes <= max_transfer) { bdrv_debug_event(bs, BLKDBG_PWRITEV); - ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, flags); + ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, qiov_offset, flags); } else { bdrv_debug_event(bs, BLKDBG_PWRITEV); while (bytes_remaining) { int num = MIN(bytes_remaining, max_transfer); - QEMUIOVector local_qiov; int local_flags = flags; assert(num); @@ -1753,12 +1941,10 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, * need to flush on the last iteration */ local_flags &= ~BDRV_REQ_FUA; } - qemu_iovec_init(&local_qiov, qiov->niov); - qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num); ret = bdrv_driver_pwritev(bs, offset + bytes - bytes_remaining, - num, &local_qiov, local_flags); - qemu_iovec_destroy(&local_qiov); + num, qiov, bytes - bytes_remaining, + local_flags); if (ret < 0) { break; } @@ -1782,44 +1968,34 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, BdrvTrackedRequest *req) { BlockDriverState *bs = child->bs; - uint8_t *buf = NULL; QEMUIOVector local_qiov; uint64_t align = bs->bl.request_alignment; - unsigned int head_padding_bytes, tail_padding_bytes; int ret = 0; + bool padding; + BdrvRequestPadding pad; - head_padding_bytes = offset & (align - 1); - tail_padding_bytes = (align - (offset + bytes)) & (align - 1); - - - assert(flags & BDRV_REQ_ZERO_WRITE); - if (head_padding_bytes || tail_padding_bytes) { - buf = qemu_blockalign(bs, align); - qemu_iovec_init_buf(&local_qiov, buf, align); - } - if (head_padding_bytes) { - uint64_t zero_bytes = MIN(bytes, align - head_padding_bytes); - - /* RMW the unaligned part before head. */ + padding = bdrv_init_padding(bs, offset, bytes, &pad); + if (padding) { mark_request_serialising(req, align); wait_serialising_requests(req); - bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD); - ret = bdrv_aligned_preadv(child, req, offset & ~(align - 1), align, - align, &local_qiov, 0); - if (ret < 0) { - goto fail; - } - bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); - memset(buf + head_padding_bytes, 0, zero_bytes); - ret = bdrv_aligned_pwritev(child, req, offset & ~(align - 1), align, - align, &local_qiov, - flags & ~BDRV_REQ_ZERO_WRITE); - if (ret < 0) { - goto fail; + bdrv_padding_rmw_read(child, req, &pad, true); + + if (pad.head || pad.merge_reads) { + int64_t aligned_offset = offset & ~(align - 1); + int64_t write_bytes = pad.merge_reads ? pad.buf_len : align; + + qemu_iovec_init_buf(&local_qiov, pad.buf, write_bytes); + ret = bdrv_aligned_pwritev(child, req, aligned_offset, write_bytes, + align, &local_qiov, 0, + flags & ~BDRV_REQ_ZERO_WRITE); + if (ret < 0 || pad.merge_reads) { + /* Error or all work is done */ + goto out; + } + offset += write_bytes - pad.head; + bytes -= write_bytes - pad.head; } - offset += zero_bytes; - bytes -= zero_bytes; } assert(!bytes || (offset & (align - 1)) == 0); @@ -1827,9 +2003,9 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, /* Write the aligned part in the middle. */ uint64_t aligned_bytes = bytes & ~(align - 1); ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align, - NULL, flags); + NULL, 0, flags); if (ret < 0) { - goto fail; + goto out; } bytes -= aligned_bytes; offset += aligned_bytes; @@ -1837,26 +2013,18 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, assert(!bytes || (offset & (align - 1)) == 0); if (bytes) { - assert(align == tail_padding_bytes + bytes); - /* RMW the unaligned part after tail. */ - mark_request_serialising(req, align); - wait_serialising_requests(req); - bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); - ret = bdrv_aligned_preadv(child, req, offset, align, - align, &local_qiov, 0); - if (ret < 0) { - goto fail; - } - bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); + assert(align == pad.tail + bytes); - memset(buf, 0, bytes); + qemu_iovec_init_buf(&local_qiov, pad.tail_buf, align); ret = bdrv_aligned_pwritev(child, req, offset, align, align, - &local_qiov, flags & ~BDRV_REQ_ZERO_WRITE); + &local_qiov, 0, + flags & ~BDRV_REQ_ZERO_WRITE); } -fail: - qemu_vfree(buf); - return ret; +out: + bdrv_padding_destroy(&pad); + + return ret; } /* @@ -1866,13 +2034,17 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes, QEMUIOVector *qiov, BdrvRequestFlags flags) { + return bdrv_co_pwritev_part(child, offset, bytes, qiov, 0, flags); +} + +int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, + int64_t offset, unsigned int bytes, QEMUIOVector *qiov, size_t qiov_offset, + BdrvRequestFlags flags) +{ BlockDriverState *bs = child->bs; BdrvTrackedRequest req; uint64_t align = bs->bl.request_alignment; - uint8_t *head_buf = NULL; - uint8_t *tail_buf = NULL; - QEMUIOVector local_qiov; - bool use_local_qiov = false; + BdrvRequestPadding pad; int ret; trace_bdrv_co_pwritev(child->bs, offset, bytes, flags); @@ -1899,86 +2071,21 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, goto out; } - if (offset & (align - 1)) { - QEMUIOVector head_qiov; - + if (bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad)) { mark_request_serialising(&req, align); wait_serialising_requests(&req); - - head_buf = qemu_blockalign(bs, align); - qemu_iovec_init_buf(&head_qiov, head_buf, align); - - bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD); - ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align, - align, &head_qiov, 0); - if (ret < 0) { - goto fail; - } - bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); - - qemu_iovec_init(&local_qiov, qiov->niov + 2); - qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1)); - qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); - use_local_qiov = true; - - bytes += offset & (align - 1); - offset = offset & ~(align - 1); - - /* We have read the tail already if the request is smaller - * than one aligned block. - */ - if (bytes < align) { - qemu_iovec_add(&local_qiov, head_buf + bytes, align - bytes); - bytes = align; - } - } - - if ((offset + bytes) & (align - 1)) { - QEMUIOVector tail_qiov; - size_t tail_bytes; - bool waited; - - mark_request_serialising(&req, align); - waited = wait_serialising_requests(&req); - assert(!waited || !use_local_qiov); - - tail_buf = qemu_blockalign(bs, align); - qemu_iovec_init_buf(&tail_qiov, tail_buf, align); - - bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); - ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1), - align, align, &tail_qiov, 0); - if (ret < 0) { - goto fail; - } - bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); - - if (!use_local_qiov) { - qemu_iovec_init(&local_qiov, qiov->niov + 1); - qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); - use_local_qiov = true; - } - - tail_bytes = (offset + bytes) & (align - 1); - qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes); - - bytes = ROUND_UP(bytes, align); + bdrv_padding_rmw_read(child, &req, &pad, false); } ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align, - use_local_qiov ? &local_qiov : qiov, - flags); + qiov, qiov_offset, flags); -fail: + bdrv_padding_destroy(&pad); - if (use_local_qiov) { - qemu_iovec_destroy(&local_qiov); - } - qemu_vfree(head_buf); - qemu_vfree(tail_buf); out: tracked_request_end(&req); bdrv_dec_in_flight(bs); + return ret; } diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index cc5609e27a..f09cc992af 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -452,8 +452,9 @@ static int coroutine_fn do_perform_cow_read(BlockDriverState *bs, * interface. This avoids double I/O throttling and request tracking, * which can lead to deadlock when block layer copy-on-read is enabled. */ - ret = bs->drv->bdrv_co_preadv(bs, src_cluster_offset + offset_in_cluster, - qiov->size, qiov, 0); + ret = bs->drv->bdrv_co_preadv_part(bs, + src_cluster_offset + offset_in_cluster, + qiov->size, qiov, 0, 0); if (ret < 0) { return ret; } @@ -828,7 +829,6 @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m) assert(start->nb_bytes <= UINT_MAX - end->nb_bytes); assert(start->nb_bytes + end->nb_bytes <= UINT_MAX - data_bytes); assert(start->offset + start->nb_bytes <= end->offset); - assert(!m->data_qiov || m->data_qiov->size == data_bytes); if ((start->nb_bytes == 0 && end->nb_bytes == 0) || m->skip_cow) { return 0; @@ -860,7 +860,11 @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m) /* The part of the buffer where the end region is located */ end_buffer = start_buffer + buffer_size - end->nb_bytes; - qemu_iovec_init(&qiov, 2 + (m->data_qiov ? m->data_qiov->niov : 0)); + qemu_iovec_init(&qiov, 2 + (m->data_qiov ? + qemu_iovec_subvec_niov(m->data_qiov, + m->data_qiov_offset, + data_bytes) + : 0)); qemu_co_mutex_unlock(&s->lock); /* First we read the existing data from both COW regions. We @@ -903,7 +907,7 @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m) if (start->nb_bytes) { qemu_iovec_add(&qiov, start_buffer, start->nb_bytes); } - qemu_iovec_concat(&qiov, m->data_qiov, 0, data_bytes); + qemu_iovec_concat(&qiov, m->data_qiov, m->data_qiov_offset, data_bytes); if (end->nb_bytes) { qemu_iovec_add(&qiov, end_buffer, end->nb_bytes); } diff --git a/block/qcow2.c b/block/qcow2.c index 7c5a4859f7..0882ff6e92 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -76,7 +76,8 @@ qcow2_co_preadv_compressed(BlockDriverState *bs, uint64_t file_cluster_offset, uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov); + QEMUIOVector *qiov, + size_t qiov_offset); static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename) { @@ -1967,21 +1968,18 @@ out: return ret; } -static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, QEMUIOVector *qiov, - int flags) +static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, + size_t qiov_offset, int flags) { BDRVQcow2State *s = bs->opaque; int offset_in_cluster; int ret; unsigned int cur_bytes; /* number of bytes in current iteration */ uint64_t cluster_offset = 0; - uint64_t bytes_done = 0; - QEMUIOVector hd_qiov; uint8_t *cluster_data = NULL; - qemu_iovec_init(&hd_qiov, qiov->niov); - while (bytes != 0) { /* prepare next request */ @@ -2000,34 +1998,31 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, offset_in_cluster = offset_into_cluster(s, offset); - qemu_iovec_reset(&hd_qiov); - qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes); - switch (ret) { case QCOW2_CLUSTER_UNALLOCATED: if (bs->backing) { BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); - ret = bdrv_co_preadv(bs->backing, offset, cur_bytes, - &hd_qiov, 0); + ret = bdrv_co_preadv_part(bs->backing, offset, cur_bytes, + qiov, qiov_offset, 0); if (ret < 0) { goto fail; } } else { /* Note: in this case, no need to wait */ - qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes); + qemu_iovec_memset(qiov, qiov_offset, 0, cur_bytes); } break; case QCOW2_CLUSTER_ZERO_PLAIN: case QCOW2_CLUSTER_ZERO_ALLOC: - qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes); + qemu_iovec_memset(qiov, qiov_offset, 0, cur_bytes); break; case QCOW2_CLUSTER_COMPRESSED: ret = qcow2_co_preadv_compressed(bs, cluster_offset, offset, cur_bytes, - &hd_qiov); + qiov, qiov_offset); if (ret < 0) { goto fail; } @@ -2059,19 +2054,15 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, } assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); - qemu_iovec_reset(&hd_qiov); - qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes); - } - BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); - ret = bdrv_co_preadv(s->data_file, - cluster_offset + offset_in_cluster, - cur_bytes, &hd_qiov, 0); - if (ret < 0) { - goto fail; - } - if (bs->encrypted) { - assert(s->crypto); + BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); + ret = bdrv_co_pread(s->data_file, + cluster_offset + offset_in_cluster, + cur_bytes, cluster_data, 0); + if (ret < 0) { + goto fail; + } + assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0); if (qcow2_co_decrypt(bs, cluster_offset, offset, @@ -2079,7 +2070,15 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, ret = -EIO; goto fail; } - qemu_iovec_from_buf(qiov, bytes_done, cluster_data, cur_bytes); + qemu_iovec_from_buf(qiov, qiov_offset, cluster_data, cur_bytes); + } else { + BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); + ret = bdrv_co_preadv_part(s->data_file, + cluster_offset + offset_in_cluster, + cur_bytes, qiov, qiov_offset, 0); + if (ret < 0) { + goto fail; + } } break; @@ -2091,12 +2090,11 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, bytes -= cur_bytes; offset += cur_bytes; - bytes_done += cur_bytes; + qiov_offset += cur_bytes; } ret = 0; fail: - qemu_iovec_destroy(&hd_qiov); qemu_vfree(cluster_data); return ret; @@ -2105,7 +2103,8 @@ fail: /* Check if it's possible to merge a write request with the writing of * the data from the COW regions */ static bool merge_cow(uint64_t offset, unsigned bytes, - QEMUIOVector *hd_qiov, QCowL2Meta *l2meta) + QEMUIOVector *qiov, size_t qiov_offset, + QCowL2Meta *l2meta) { QCowL2Meta *m; @@ -2134,11 +2133,12 @@ static bool merge_cow(uint64_t offset, unsigned bytes, /* Make sure that adding both COW regions to the QEMUIOVector * does not exceed IOV_MAX */ - if (hd_qiov->niov > IOV_MAX - 2) { + if (qemu_iovec_subvec_niov(qiov, qiov_offset, bytes) > IOV_MAX - 2) { continue; } - m->data_qiov = hd_qiov; + m->data_qiov = qiov; + m->data_qiov_offset = qiov_offset; return true; } @@ -2220,24 +2220,22 @@ static int handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta) return 0; } -static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, QEMUIOVector *qiov, - int flags) +static coroutine_fn int qcow2_co_pwritev_part( + BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset, int flags) { BDRVQcow2State *s = bs->opaque; int offset_in_cluster; int ret; unsigned int cur_bytes; /* number of sectors in current iteration */ uint64_t cluster_offset; - QEMUIOVector hd_qiov; + QEMUIOVector encrypted_qiov; uint64_t bytes_done = 0; uint8_t *cluster_data = NULL; QCowL2Meta *l2meta = NULL; trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes); - qemu_iovec_init(&hd_qiov, qiov->niov); - qemu_co_mutex_lock(&s->lock); while (bytes != 0) { @@ -2270,9 +2268,6 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, qemu_co_mutex_unlock(&s->lock); - qemu_iovec_reset(&hd_qiov); - qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes); - if (bs->encrypted) { assert(s->crypto); if (!cluster_data) { @@ -2285,9 +2280,9 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, } } - assert(hd_qiov.size <= - QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); - qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size); + assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); + qemu_iovec_to_buf(qiov, qiov_offset + bytes_done, + cluster_data, cur_bytes); if (qcow2_co_encrypt(bs, cluster_offset, offset, cluster_data, cur_bytes) < 0) { @@ -2295,8 +2290,7 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, goto out_unlocked; } - qemu_iovec_reset(&hd_qiov); - qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes); + qemu_iovec_init_buf(&encrypted_qiov, cluster_data, cur_bytes); } /* Try to efficiently initialize the physical space with zeroes */ @@ -2309,13 +2303,17 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, * writing of the guest data together with that of the COW regions. * If it's not possible (or not necessary) then write the * guest data now. */ - if (!merge_cow(offset, cur_bytes, &hd_qiov, l2meta)) { + if (!merge_cow(offset, cur_bytes, + bs->encrypted ? &encrypted_qiov : qiov, + bs->encrypted ? 0 : qiov_offset + bytes_done, l2meta)) + { BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); trace_qcow2_writev_data(qemu_coroutine_self(), cluster_offset + offset_in_cluster); - ret = bdrv_co_pwritev(s->data_file, - cluster_offset + offset_in_cluster, - cur_bytes, &hd_qiov, 0); + ret = bdrv_co_pwritev_part( + s->data_file, cluster_offset + offset_in_cluster, cur_bytes, + bs->encrypted ? &encrypted_qiov : qiov, + bs->encrypted ? 0 : qiov_offset + bytes_done, 0); if (ret < 0) { goto out_unlocked; } @@ -2344,7 +2342,6 @@ out_locked: qemu_co_mutex_unlock(&s->lock); - qemu_iovec_destroy(&hd_qiov); qemu_vfree(cluster_data); trace_qcow2_writev_done_req(qemu_coroutine_self(), ret); @@ -4009,8 +4006,9 @@ fail: /* XXX: put compressed sectors first, then all the cluster aligned tables to avoid losing bytes in alignment */ static coroutine_fn int -qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, QEMUIOVector *qiov) +qcow2_co_pwritev_compressed_part(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset) { BDRVQcow2State *s = bs->opaque; int ret; @@ -4047,7 +4045,7 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, /* Zero-pad last write if image size is not cluster aligned */ memset(buf + bytes, 0, s->cluster_size - bytes); } - qemu_iovec_to_buf(qiov, 0, buf, bytes); + qemu_iovec_to_buf(qiov, qiov_offset, buf, bytes); out_buf = g_malloc(s->cluster_size); @@ -4055,7 +4053,7 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, buf, s->cluster_size); if (out_len == -ENOMEM) { /* could not compress: write normal cluster */ - ret = qcow2_co_pwritev(bs, offset, bytes, qiov, 0); + ret = qcow2_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset, 0); if (ret < 0) { goto fail; } @@ -4097,7 +4095,8 @@ qcow2_co_preadv_compressed(BlockDriverState *bs, uint64_t file_cluster_offset, uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov) + QEMUIOVector *qiov, + size_t qiov_offset) { BDRVQcow2State *s = bs->opaque; int ret = 0, csize, nb_csectors; @@ -4128,7 +4127,7 @@ qcow2_co_preadv_compressed(BlockDriverState *bs, goto fail; } - qemu_iovec_from_buf(qiov, 0, out_buf + offset_in_cluster, bytes); + qemu_iovec_from_buf(qiov, qiov_offset, out_buf + offset_in_cluster, bytes); fail: qemu_vfree(out_buf); @@ -4665,8 +4664,8 @@ static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, BDRVQcow2State *s = bs->opaque; BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE); - return bs->drv->bdrv_co_pwritev(bs, qcow2_vm_state_offset(s) + pos, - qiov->size, qiov, 0); + return bs->drv->bdrv_co_pwritev_part(bs, qcow2_vm_state_offset(s) + pos, + qiov->size, qiov, 0, 0); } static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, @@ -4675,8 +4674,8 @@ static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, BDRVQcow2State *s = bs->opaque; BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD); - return bs->drv->bdrv_co_preadv(bs, qcow2_vm_state_offset(s) + pos, - qiov->size, qiov, 0); + return bs->drv->bdrv_co_preadv_part(bs, qcow2_vm_state_offset(s) + pos, + qiov->size, qiov, 0, 0); } /* @@ -5218,8 +5217,8 @@ BlockDriver bdrv_qcow2 = { .bdrv_has_zero_init_truncate = bdrv_has_zero_init_1, .bdrv_co_block_status = qcow2_co_block_status, - .bdrv_co_preadv = qcow2_co_preadv, - .bdrv_co_pwritev = qcow2_co_pwritev, + .bdrv_co_preadv_part = qcow2_co_preadv_part, + .bdrv_co_pwritev_part = qcow2_co_pwritev_part, .bdrv_co_flush_to_os = qcow2_co_flush_to_os, .bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes, @@ -5227,7 +5226,7 @@ BlockDriver bdrv_qcow2 = { .bdrv_co_copy_range_from = qcow2_co_copy_range_from, .bdrv_co_copy_range_to = qcow2_co_copy_range_to, .bdrv_co_truncate = qcow2_co_truncate, - .bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed, + .bdrv_co_pwritev_compressed_part = qcow2_co_pwritev_compressed_part, .bdrv_make_empty = qcow2_make_empty, .bdrv_snapshot_create = qcow2_snapshot_create, diff --git a/block/qcow2.h b/block/qcow2.h index fc1b0d3c1e..998bcdaef1 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -420,6 +420,7 @@ typedef struct QCowL2Meta * from @cow_start and @cow_end into one single write operation. */ QEMUIOVector *data_qiov; + size_t data_qiov_offset; /** Pointer to next L2Meta of the same write request */ struct QCowL2Meta *next; diff --git a/configure b/configure index e44e454c43..95134c0180 100755 --- a/configure +++ b/configure @@ -1864,7 +1864,7 @@ if ! $python -c 'import sys; sys.exit(sys.version_info < (2,7))'; then fi # Preserve python version since some functionality is dependent on it -python_version=$($python -V 2>&1 | sed -e 's/Python\ //') +python_version=$($python -c 'import sys; print("%d.%d.%d" % (sys.version_info[0], sys.version_info[1], sys.version_info[2]))' 2>/dev/null) # Suppress writing compiled files python="$python -B" @@ -6511,6 +6511,7 @@ if ! $python -c 'import sys; sys.exit(sys.version_info < (3,0))'; then echo echo "warning: Python 2 support is deprecated" >&2 echo "warning: Python 3 will be required for building future versions of QEMU" >&2 + python2="y" fi config_host_mak="config-host.mak" @@ -7333,7 +7334,7 @@ echo "INSTALL_DATA=$install -c -m 0644" >> $config_host_mak echo "INSTALL_PROG=$install -c -m 0755" >> $config_host_mak echo "INSTALL_LIB=$install -c -m 0644" >> $config_host_mak echo "PYTHON=$python" >> $config_host_mak -echo "PYTHON_VERSION=$python_version" >> $config_host_mak +echo "PYTHON2=$python2" >> $config_host_mak echo "CC=$cc" >> $config_host_mak if $iasl -h > /dev/null 2>&1; then echo "IASL=$iasl" >> $config_host_mak diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index d95086fbbd..3f08db7b9e 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -434,9 +434,14 @@ static void pnv_dt_isa(PnvMachineState *pnv, void *fdt) .fdt = fdt, .offset = isa_offset, }; + uint32_t phandle; _FDT((fdt_setprop(fdt, isa_offset, "primary", NULL, 0))); + phandle = qemu_fdt_alloc_phandle(fdt); + assert(phandle > 0); + _FDT((fdt_setprop_cell(fdt, isa_offset, "phandle", phandle))); + /* ISA devices are not necessarily parented to the ISA bus so we * can not use object_child_foreach() */ qbus_walk_children(BUS(pnv->isa_bus), pnv_dt_isa_device, NULL, NULL, NULL, @@ -600,9 +605,20 @@ static void pnv_chip_power9_pic_print_info(PnvChip *chip, Monitor *mon) pnv_psi_pic_print_info(&chip9->psi, mon); } +static bool pnv_match_cpu(const char *default_type, const char *cpu_type) +{ + PowerPCCPUClass *ppc_default = + POWERPC_CPU_CLASS(object_class_by_name(default_type)); + PowerPCCPUClass *ppc = + POWERPC_CPU_CLASS(object_class_by_name(cpu_type)); + + return ppc_default->pvr_match(ppc_default, ppc->pvr); +} + static void pnv_init(MachineState *machine) { PnvMachineState *pnv = PNV_MACHINE(machine); + MachineClass *mc = MACHINE_GET_CLASS(machine); MemoryRegion *ram; char *fw_filename; long fw_size; @@ -662,13 +678,23 @@ static void pnv_init(MachineState *machine) } } + /* + * Check compatibility of the specified CPU with the machine + * default. + */ + if (!pnv_match_cpu(mc->default_cpu_type, machine->cpu_type)) { + error_report("invalid CPU model '%s' for %s machine", + machine->cpu_type, mc->name); + exit(1); + } + /* Create the processor chips */ i = strlen(machine->cpu_type) - strlen(POWERPC_CPU_TYPE_SUFFIX); chip_typename = g_strdup_printf(PNV_CHIP_TYPE_NAME("%.*s"), i, machine->cpu_type); if (!object_class_by_name(chip_typename)) { - error_report("invalid CPU model '%.*s' for %s machine", - i, machine->cpu_type, MACHINE_GET_CLASS(machine)->name); + error_report("invalid chip model '%.*s' for %s machine", + i, machine->cpu_type, mc->name); exit(1); } @@ -1346,25 +1372,47 @@ static void pnv_machine_class_props_init(ObjectClass *oc) NULL); } -static void pnv_machine_class_init(ObjectClass *oc, void *data) +static void pnv_machine_power8_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); XICSFabricClass *xic = XICS_FABRIC_CLASS(oc); + + mc->desc = "IBM PowerNV (Non-Virtualized) POWER8"; + mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0"); + + xic->icp_get = pnv_icp_get; + xic->ics_get = pnv_ics_get; + xic->ics_resend = pnv_ics_resend; +} + +static void pnv_machine_power9_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + + mc->desc = "IBM PowerNV (Non-Virtualized) POWER9"; + mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power9_v2.0"); + + mc->alias = "powernv"; +} + +static void pnv_machine_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); InterruptStatsProviderClass *ispc = INTERRUPT_STATS_PROVIDER_CLASS(oc); mc->desc = "IBM PowerNV (Non-Virtualized)"; mc->init = pnv_init; mc->reset = pnv_reset; mc->max_cpus = MAX_CPUS; - mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0"); mc->block_default_type = IF_IDE; /* Pnv provides a AHCI device for * storage */ mc->no_parallel = 1; mc->default_boot_order = NULL; - mc->default_ram_size = 1 * GiB; - xic->icp_get = pnv_icp_get; - xic->ics_get = pnv_ics_get; - xic->ics_resend = pnv_ics_resend; + /* + * RAM defaults to less than 2048 for 32-bit hosts, and large + * enough to fit the maximum initrd size at it's load address + */ + mc->default_ram_size = INITRD_LOAD_ADDR + INITRD_MAX_SIZE; ispc->print_info = pnv_pic_print_info; pnv_machine_class_props_init(oc); @@ -1384,10 +1432,27 @@ static void pnv_machine_class_init(ObjectClass *oc, void *data) .parent = TYPE_PNV9_CHIP, \ } +#define DEFINE_PNV_MACHINE_TYPE(cpu, class_initfn) \ + { \ + .name = MACHINE_TYPE_NAME(cpu), \ + .parent = TYPE_PNV_MACHINE, \ + .instance_size = sizeof(PnvMachineState), \ + .instance_init = pnv_machine_instance_init, \ + .class_init = class_initfn, \ + .interfaces = (InterfaceInfo[]) { \ + { TYPE_XICS_FABRIC }, \ + { TYPE_INTERRUPT_STATS_PROVIDER }, \ + { }, \ + }, \ + } + static const TypeInfo types[] = { + DEFINE_PNV_MACHINE_TYPE("powernv8", pnv_machine_power8_class_init), + DEFINE_PNV_MACHINE_TYPE("powernv9", pnv_machine_power9_class_init), { .name = TYPE_PNV_MACHINE, .parent = TYPE_MACHINE, + .abstract = true, .instance_size = sizeof(PnvMachineState), .instance_init = pnv_machine_instance_init, .class_init = pnv_machine_class_init, diff --git a/hw/ppc/pnv_xscom.c b/hw/ppc/pnv_xscom.c index 0e31c5786b..67aab98fef 100644 --- a/hw/ppc/pnv_xscom.c +++ b/hw/ppc/pnv_xscom.c @@ -106,6 +106,16 @@ static uint64_t xscom_read_default(PnvChip *chip, uint32_t pcba) case 0x201302a: /* CAPP stuff */ case 0x2013801: /* CAPP stuff */ case 0x2013802: /* CAPP stuff */ + + /* P9 CAPP regs */ + case 0x2010841: + case 0x2010842: + case 0x201082a: + case 0x2010828: + case 0x4010841: + case 0x4010842: + case 0x401082a: + case 0x4010828: return 0; default: return -1; @@ -138,6 +148,16 @@ static bool xscom_write_default(PnvChip *chip, uint32_t pcba, uint64_t val) case 0x2013801: /* CAPP stuff */ case 0x2013802: /* CAPP stuff */ + /* P9 CAPP regs */ + case 0x2010841: + case 0x2010842: + case 0x201082a: + case 0x2010828: + case 0x4010841: + case 0x4010842: + case 0x401082a: + case 0x4010828: + /* P8 PRD registers */ case PRD_P8_IPOLL_REG_MASK: case PRD_P8_IPOLL_REG_STATUS: diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index baedadf20b..ea56499b4b 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1168,6 +1168,7 @@ static void spapr_dt_ov5_platform_support(SpaprMachineState *spapr, void *fdt, static void spapr_dt_chosen(SpaprMachineState *spapr, void *fdt) { MachineState *machine = MACHINE(spapr); + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine); int chosen; const char *boot_device = machine->boot_order; char *stdout_path = spapr_vio_stdout_path(spapr->vio_bus); @@ -1225,6 +1226,11 @@ static void spapr_dt_chosen(SpaprMachineState *spapr, void *fdt) _FDT(fdt_setprop_string(fdt, chosen, "stdout-path", stdout_path)); } + /* We can deal with BAR reallocation just fine, advertise it to the guest */ + if (smc->linux_pci_probe) { + _FDT(fdt_setprop_cell(fdt, chosen, "linux,pci-probe-only", 0)); + } + spapr_dt_ov5_platform_support(spapr, fdt, chosen); g_free(stdout_path); @@ -1752,7 +1758,7 @@ static void spapr_machine_reset(MachineState *machine) spapr_ovec_cleanup(spapr->ov5_cas); spapr->ov5_cas = spapr_ovec_new(); - ppc_set_compat(first_ppc_cpu, spapr->max_compat_pvr, &error_fatal); + ppc_set_compat_all(spapr->max_compat_pvr, &error_fatal); } /* @@ -3829,6 +3835,7 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev, CPUArchId *core_slot; int index; bool hotplugged = spapr_drc_hotplugged(dev); + int i; core_slot = spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index); if (!core_slot) { @@ -3862,13 +3869,26 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev, core_slot->cpu = OBJECT(dev); if (smc->pre_2_10_has_unused_icps) { - int i; - for (i = 0; i < cc->nr_threads; i++) { cs = CPU(core->threads[i]); pre_2_10_vmstate_unregister_dummy_icp(cs->cpu_index); } } + + /* + * Set compatibility mode to match the boot CPU, which was either set + * by the machine reset code or by CAS. + */ + if (hotplugged) { + for (i = 0; i < cc->nr_threads; i++) { + ppc_set_compat(core->threads[i], POWERPC_CPU(first_cpu)->compat_pvr, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } + } } static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, @@ -4470,6 +4490,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) spapr_caps_add_properties(smc, &error_abort); smc->irq = &spapr_irq_dual; smc->dr_phb_enabled = true; + smc->linux_pci_probe = true; } static const TypeInfo spapr_machine_info = { @@ -4529,12 +4550,14 @@ DEFINE_SPAPR_MACHINE(4_2, "4.2", true); */ static void spapr_machine_4_1_class_options(MachineClass *mc) { + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); static GlobalProperty compat[] = { /* Only allow 4kiB and 64kiB IOMMU pagesizes */ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pgsz", "0x11000" }, }; spapr_machine_4_2_class_options(mc); + smc->linux_pci_probe = false; compat_props_add(mc->compat_props, hw_compat_4_1, hw_compat_4_1_len); compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); } diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index bf47fbdf6f..1d93de8161 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -41,11 +41,6 @@ static void spapr_cpu_reset(void *opaque) * using an RTAS call */ cs->halted = 1; - /* Set compatibility mode to match the boot CPU, which was either set - * by the machine reset code or by CAS. This should never fail. - */ - ppc_set_compat(cpu, POWERPC_CPU(first_cpu)->compat_pvr, &error_abort); - env->spr[SPR_HIOR] = 0; lpcr = env->spr[SPR_LPCR]; diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index e20a946b99..23e4bdb829 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -1811,7 +1811,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, spapr_ovec_cleanup(ov5_updates); if (spapr->cas_reboot) { - qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); + qemu_system_reset_request(SHUTDOWN_CAUSE_SUBSYSTEM_RESET); } return H_SUCCESS; diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index deb0b0c80c..a777fb3e7f 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -280,7 +280,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, SpaprMachineState *spapr, unsigned int irq, max_irqs = 0; SpaprPhbState *phb = NULL; PCIDevice *pdev = NULL; - spapr_pci_msi *msi; + SpaprPciMsi *msi; int *config_addr_key; Error *err = NULL; int i; @@ -328,7 +328,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, SpaprMachineState *spapr, return; } - msi = (spapr_pci_msi *) g_hash_table_lookup(phb->msi, &config_addr); + msi = (SpaprPciMsi *) g_hash_table_lookup(phb->msi, &config_addr); /* Releasing MSIs */ if (!req_num) { @@ -415,7 +415,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, SpaprMachineState *spapr, irq, req_num); /* Add MSI device to cache */ - msi = g_new(spapr_pci_msi, 1); + msi = g_new(SpaprPciMsi, 1); msi->first_irq = irq; msi->num = req_num; config_addr_key = g_new(int, 1); @@ -446,7 +446,7 @@ static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu, unsigned int intr_src_num = -1, ioa_intr_num = rtas_ld(args, 3); SpaprPhbState *phb = NULL; PCIDevice *pdev = NULL; - spapr_pci_msi *msi; + SpaprPciMsi *msi; /* Find SpaprPhbState */ phb = spapr_pci_find_phb(spapr, buid); @@ -459,7 +459,7 @@ static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu, } /* Find device descriptor and start IRQ */ - msi = (spapr_pci_msi *) g_hash_table_lookup(phb->msi, &config_addr); + msi = (SpaprPciMsi *) g_hash_table_lookup(phb->msi, &config_addr); if (!msi || !msi->first_irq || !msi->num || (ioa_intr_num >= msi->num)) { trace_spapr_pci_msi("Failed to return vector", config_addr); rtas_st(rets, 0, RTAS_OUT_HW_ERROR); @@ -1700,11 +1700,13 @@ static void spapr_pci_unplug_request(HotplugHandler *plug_handler, state = func_drck->dr_entity_sense(func_drc); if (state == SPAPR_DR_ENTITY_SENSE_PRESENT && !spapr_drc_unplug_requested(func_drc)) { - error_setg(errp, - "PCI: slot %d, function %d still present. " - "Must unplug all non-0 functions first.", - slotnr, i); - return; + /* + * Attempting to remove function 0 of a multifunction + * device will will cascade into removing all child + * functions, even if their unplug weren't requested + * beforehand. + */ + spapr_drc_detach(func_drc); } } } @@ -1804,7 +1806,7 @@ static void spapr_phb_destroy_msi(gpointer opaque) { SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); - spapr_pci_msi *msi = opaque; + SpaprPciMsi *msi = opaque; if (!smc->legacy_irq_allocation) { spapr_irq_msi_free(spapr, msi->first_irq, msi->num); @@ -2118,7 +2120,7 @@ static const VMStateDescription vmstate_spapr_pci_lsi = { .version_id = 1, .minimum_version_id = 1, .fields = (VMStateField[]) { - VMSTATE_UINT32_EQUAL(irq, struct spapr_pci_lsi, NULL), + VMSTATE_UINT32_EQUAL(irq, SpaprPciLsi, NULL), VMSTATE_END_OF_LIST() }, @@ -2129,9 +2131,9 @@ static const VMStateDescription vmstate_spapr_pci_msi = { .version_id = 1, .minimum_version_id = 1, .fields = (VMStateField []) { - VMSTATE_UINT32(key, spapr_pci_msi_mig), - VMSTATE_UINT32(value.first_irq, spapr_pci_msi_mig), - VMSTATE_UINT32(value.num, spapr_pci_msi_mig), + VMSTATE_UINT32(key, SpaprPciMsiMig), + VMSTATE_UINT32(value.first_irq, SpaprPciMsiMig), + VMSTATE_UINT32(value.num, SpaprPciMsiMig), VMSTATE_END_OF_LIST() }, }; @@ -2163,12 +2165,12 @@ static int spapr_pci_pre_save(void *opaque) if (!sphb->msi_devs_num) { return 0; } - sphb->msi_devs = g_new(spapr_pci_msi_mig, sphb->msi_devs_num); + sphb->msi_devs = g_new(SpaprPciMsiMig, sphb->msi_devs_num); g_hash_table_iter_init(&iter, sphb->msi); for (i = 0; g_hash_table_iter_next(&iter, &key, &value); ++i) { sphb->msi_devs[i].key = *(uint32_t *) key; - sphb->msi_devs[i].value = *(spapr_pci_msi *) value; + sphb->msi_devs[i].value = *(SpaprPciMsi *) value; } return 0; @@ -2215,10 +2217,10 @@ static const VMStateDescription vmstate_spapr_pci = { VMSTATE_UINT64_TEST(mig_io_win_addr, SpaprPhbState, pre_2_8_migration), VMSTATE_UINT64_TEST(mig_io_win_size, SpaprPhbState, pre_2_8_migration), VMSTATE_STRUCT_ARRAY(lsi_table, SpaprPhbState, PCI_NUM_PINS, 0, - vmstate_spapr_pci_lsi, struct spapr_pci_lsi), + vmstate_spapr_pci_lsi, SpaprPciLsi), VMSTATE_INT32(msi_devs_num, SpaprPhbState), VMSTATE_STRUCT_VARRAY_ALLOC(msi_devs, SpaprPhbState, msi_devs_num, 0, - vmstate_spapr_pci_msi, spapr_pci_msi_mig), + vmstate_spapr_pci_msi, SpaprPciMsiMig), VMSTATE_END_OF_LIST() }, }; diff --git a/hw/ppc/spapr_pci_nvlink2.c b/hw/ppc/spapr_pci_nvlink2.c index eda8c752aa..4aa89ede23 100644 --- a/hw/ppc/spapr_pci_nvlink2.c +++ b/hw/ppc/spapr_pci_nvlink2.c @@ -39,11 +39,7 @@ #define SPAPR_GPU_NUMA_ID (cpu_to_be32(1)) -struct spapr_phb_pci_nvgpu_config { - uint64_t nv2_ram_current; - uint64_t nv2_atsd_current; - int num; /* number of non empty (i.e. tgt!=0) entries in slots[] */ - struct spapr_phb_pci_nvgpu_slot { +typedef struct SpaprPhbPciNvGpuSlot { uint64_t tgt; uint64_t gpa; unsigned numa_id; @@ -54,12 +50,18 @@ struct spapr_phb_pci_nvgpu_config { PCIDevice *npdev; uint32_t link_speed; } links[NVGPU_MAX_LINKS]; - } slots[NVGPU_MAX_NUM]; +} SpaprPhbPciNvGpuSlot; + +struct SpaprPhbPciNvGpuConfig { + uint64_t nv2_ram_current; + uint64_t nv2_atsd_current; + int num; /* number of non empty (i.e. tgt!=0) entries in slots[] */ + SpaprPhbPciNvGpuSlot slots[NVGPU_MAX_NUM]; Error *errp; }; -static struct spapr_phb_pci_nvgpu_slot * -spapr_nvgpu_get_slot(struct spapr_phb_pci_nvgpu_config *nvgpus, uint64_t tgt) +static SpaprPhbPciNvGpuSlot * +spapr_nvgpu_get_slot(SpaprPhbPciNvGpuConfig *nvgpus, uint64_t tgt) { int i; @@ -81,13 +83,13 @@ spapr_nvgpu_get_slot(struct spapr_phb_pci_nvgpu_config *nvgpus, uint64_t tgt) return &nvgpus->slots[i]; } -static void spapr_pci_collect_nvgpu(struct spapr_phb_pci_nvgpu_config *nvgpus, +static void spapr_pci_collect_nvgpu(SpaprPhbPciNvGpuConfig *nvgpus, PCIDevice *pdev, uint64_t tgt, MemoryRegion *mr, Error **errp) { MachineState *machine = MACHINE(qdev_get_machine()); SpaprMachineState *spapr = SPAPR_MACHINE(machine); - struct spapr_phb_pci_nvgpu_slot *nvslot = spapr_nvgpu_get_slot(nvgpus, tgt); + SpaprPhbPciNvGpuSlot *nvslot = spapr_nvgpu_get_slot(nvgpus, tgt); if (!nvslot) { error_setg(errp, "Found too many GPUs per vPHB"); @@ -102,11 +104,11 @@ static void spapr_pci_collect_nvgpu(struct spapr_phb_pci_nvgpu_config *nvgpus, ++spapr->gpu_numa_id; } -static void spapr_pci_collect_nvnpu(struct spapr_phb_pci_nvgpu_config *nvgpus, +static void spapr_pci_collect_nvnpu(SpaprPhbPciNvGpuConfig *nvgpus, PCIDevice *pdev, uint64_t tgt, MemoryRegion *mr, Error **errp) { - struct spapr_phb_pci_nvgpu_slot *nvslot = spapr_nvgpu_get_slot(nvgpus, tgt); + SpaprPhbPciNvGpuSlot *nvslot = spapr_nvgpu_get_slot(nvgpus, tgt); int j; if (!nvslot) { @@ -138,7 +140,7 @@ static void spapr_phb_pci_collect_nvgpu(PCIBus *bus, PCIDevice *pdev, if (tgt) { Error *local_err = NULL; - struct spapr_phb_pci_nvgpu_config *nvgpus = opaque; + SpaprPhbPciNvGpuConfig *nvgpus = opaque; Object *mr_gpu = object_property_get_link(po, "nvlink2-mr[0]", NULL); Object *mr_npu = object_property_get_link(po, "nvlink2-atsd-mr[0]", NULL); @@ -177,7 +179,7 @@ void spapr_phb_nvgpu_setup(SpaprPhbState *sphb, Error **errp) return; } - sphb->nvgpus = g_new0(struct spapr_phb_pci_nvgpu_config, 1); + sphb->nvgpus = g_new0(SpaprPhbPciNvGpuConfig, 1); sphb->nvgpus->nv2_ram_current = sphb->nv2_gpa_win_addr; sphb->nvgpus->nv2_atsd_current = sphb->nv2_atsd_win_addr; @@ -194,7 +196,7 @@ void spapr_phb_nvgpu_setup(SpaprPhbState *sphb, Error **errp) /* Add found GPU RAM and ATSD MRs if found */ for (i = 0, valid_gpu_num = 0; i < sphb->nvgpus->num; ++i) { Object *nvmrobj; - struct spapr_phb_pci_nvgpu_slot *nvslot = &sphb->nvgpus->slots[i]; + SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[i]; if (!nvslot->gpdev) { continue; @@ -242,7 +244,7 @@ void spapr_phb_nvgpu_free(SpaprPhbState *sphb) } for (i = 0; i < sphb->nvgpus->num; ++i) { - struct spapr_phb_pci_nvgpu_slot *nvslot = &sphb->nvgpus->slots[i]; + SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[i]; Object *nv_mrobj = object_property_get_link(OBJECT(nvslot->gpdev), "nvlink2-mr[0]", NULL); @@ -276,7 +278,7 @@ void spapr_phb_nvgpu_populate_dt(SpaprPhbState *sphb, void *fdt, int bus_off, } for (i = 0; (i < sphb->nvgpus->num) && (atsdnum < ARRAY_SIZE(atsd)); ++i) { - struct spapr_phb_pci_nvgpu_slot *nvslot = &sphb->nvgpus->slots[i]; + SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[i]; if (!nvslot->gpdev) { continue; @@ -354,7 +356,7 @@ void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb, void *fdt) /* Add memory nodes for GPU RAM and mark them unusable */ for (i = 0; i < sphb->nvgpus->num; ++i) { - struct spapr_phb_pci_nvgpu_slot *nvslot = &sphb->nvgpus->slots[i]; + SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[i]; Object *nv_mrobj = object_property_get_link(OBJECT(nvslot->gpdev), "nvlink2-mr[0]", NULL); uint32_t associativity[] = { @@ -398,7 +400,7 @@ void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt, int offset, } for (i = 0; i < sphb->nvgpus->num; ++i) { - struct spapr_phb_pci_nvgpu_slot *nvslot = &sphb->nvgpus->slots[i]; + SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[i]; /* Skip "slot" without attached GPU */ if (!nvslot->gpdev) { diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index 526b489297..bee3835214 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -266,6 +266,7 @@ static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu, target_ulong args, uint32_t nret, target_ulong rets) { + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); MachineState *ms = MACHINE(qdev_get_machine()); unsigned int max_cpus = ms->smp.max_cpus; target_ulong parameter = rtas_ld(args, 0); @@ -283,6 +284,20 @@ static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu, current_machine->ram_size / MiB, ms->smp.cpus, max_cpus); + if (pcc->n_host_threads > 0) { + char *hostthr_val, *old = param_val; + + /* + * Add HostThrs property. This property is not present in PAPR but + * is expected by some guests to communicate the number of physical + * host threads per core on the system so that they can scale + * information which varies based on the thread configuration. + */ + hostthr_val = g_strdup_printf(",HostThrs=%d", pcc->n_host_threads); + param_val = g_strconcat(param_val, hostthr_val, NULL); + g_free(hostthr_val); + g_free(old); + } ret = sysparm_st(buffer, length, param_val, strlen(param_val) + 1); g_free(param_val); break; diff --git a/hw/usb/dev-mtp.c b/hw/usb/dev-mtp.c index 9846e4b513..7c07295519 100644 --- a/hw/usb/dev-mtp.c +++ b/hw/usb/dev-mtp.c @@ -2038,26 +2038,36 @@ static void usb_mtp_realize(USBDevice *dev, Error **errp) { MTPState *s = USB_MTP(dev); - usb_desc_create_serial(dev); - usb_desc_init(dev); - QTAILQ_INIT(&s->objects); - if (s->desc == NULL) { - if (s->root == NULL) { - error_setg(errp, "usb-mtp: rootdir property must be configured"); - return; - } - s->desc = strrchr(s->root, '/'); - if (s->desc && s->desc[0]) { - s->desc = g_strdup(s->desc + 1); - } else { - s->desc = g_strdup("none"); - } + if ((s->root == NULL) || !g_path_is_absolute(s->root)) { + error_setg(errp, "usb-mtp: rootdir must be configured and be an absolute path"); + return; } + + if (access(s->root, R_OK) != 0) { + error_setg(errp, "usb-mtp: rootdir does not exist/not readable"); + return; + } else if (!s->readonly && access(s->root, W_OK) != 0) { + error_setg(errp, "usb-mtp: rootdir does not have write permissions"); + return; + } + /* Mark store as RW */ if (!s->readonly) { s->flags |= (1 << MTP_FLAG_WRITABLE); } + if (s->desc == NULL) { + /* + * This does not check if path exists + * but we have the checks above + */ + s->desc = g_path_get_basename(s->root); + } + + usb_desc_create_serial(dev); + usb_desc_init(dev); + QTAILQ_INIT(&s->objects); + } static const VMStateDescription vmstate_usb_mtp = { diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c index f578264948..80988bb305 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c @@ -1914,6 +1914,7 @@ static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid) } usb_handle_packet(xfer->packet.ep->dev, &xfer->packet); if (xfer->packet.status == USB_RET_NAK) { + xhci_xfer_unmap(xfer); return; } xhci_try_complete_packet(xfer); @@ -2161,6 +2162,7 @@ static TRBCCode xhci_address_slot(XHCIState *xhci, unsigned int slotid, DeviceOutRequest | USB_REQ_SET_ADDRESS, slotid, 0, 0, NULL); assert(p.status != USB_RET_ASYNC); + usb_packet_cleanup(&p); } res = xhci_enable_ep(xhci, slotid, 1, octx+32, ep0_ctx); diff --git a/include/block/block_int.h b/include/block/block_int.h index ceec8c2f56..0422acdf1c 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -210,6 +210,9 @@ struct BlockDriver { */ int coroutine_fn (*bdrv_co_preadv)(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags); + int coroutine_fn (*bdrv_co_preadv_part)(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset, int flags); int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int flags); /** @@ -229,6 +232,9 @@ struct BlockDriver { */ int coroutine_fn (*bdrv_co_pwritev)(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags); + int coroutine_fn (*bdrv_co_pwritev_part)(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset, int flags); /* * Efficiently zero a region of the disk image. Typically an image format @@ -339,6 +345,9 @@ struct BlockDriver { int coroutine_fn (*bdrv_co_pwritev_compressed)(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov); + int coroutine_fn (*bdrv_co_pwritev_compressed_part)(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, + size_t qiov_offset); int (*bdrv_snapshot_create)(BlockDriverState *bs, QEMUSnapshotInfo *sn_info); @@ -570,6 +579,12 @@ struct BlockDriver { const char *const *strong_runtime_opts; }; +static inline bool block_driver_can_compress(BlockDriver *drv) +{ + return drv->bdrv_co_pwritev_compressed || + drv->bdrv_co_pwritev_compressed_part; +} + typedef struct BlockLimits { /* Alignment requirement, in bytes, for offset/length of I/O * requests. Must be a power of 2 less than INT_MAX; defaults to @@ -944,9 +959,15 @@ extern BlockDriver bdrv_qcow2; int coroutine_fn bdrv_co_preadv(BdrvChild *child, int64_t offset, unsigned int bytes, QEMUIOVector *qiov, BdrvRequestFlags flags); +int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, + int64_t offset, unsigned int bytes, + QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags); int coroutine_fn bdrv_co_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes, QEMUIOVector *qiov, BdrvRequestFlags flags); +int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, + int64_t offset, unsigned int bytes, + QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags); static inline int coroutine_fn bdrv_co_pread(BdrvChild *child, int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags) diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h index 1b61162f91..abd87605b2 100644 --- a/include/hw/pci-host/spapr.h +++ b/include/hw/pci-host/spapr.h @@ -34,15 +34,21 @@ typedef struct SpaprPhbState SpaprPhbState; -typedef struct spapr_pci_msi { +typedef struct SpaprPciMsi { uint32_t first_irq; uint32_t num; -} spapr_pci_msi; +} SpaprPciMsi; -typedef struct spapr_pci_msi_mig { +typedef struct SpaprPciMsiMig { uint32_t key; - spapr_pci_msi value; -} spapr_pci_msi_mig; + SpaprPciMsi value; +} SpaprPciMsiMig; + +typedef struct SpaprPciLsi { + uint32_t irq; +} SpaprPciLsi; + +typedef struct SpaprPhbPciNvGpuConfig SpaprPhbPciNvGpuConfig; struct SpaprPhbState { PCIHostState parent_obj; @@ -63,14 +69,12 @@ struct SpaprPhbState { AddressSpace iommu_as; MemoryRegion iommu_root; - struct spapr_pci_lsi { - uint32_t irq; - } lsi_table[PCI_NUM_PINS]; + SpaprPciLsi lsi_table[PCI_NUM_PINS]; GHashTable *msi; /* Temporary cache for migration purposes */ int32_t msi_devs_num; - spapr_pci_msi_mig *msi_devs; + SpaprPciMsiMig *msi_devs; QLIST_ENTRY(SpaprPhbState) list; @@ -89,7 +93,7 @@ struct SpaprPhbState { hwaddr mig_io_win_addr, mig_io_win_size; hwaddr nv2_gpa_win_addr; hwaddr nv2_atsd_win_addr; - struct spapr_phb_pci_nvgpu_config *nvgpus; + SpaprPhbPciNvGpuConfig *nvgpus; }; #define SPAPR_PCI_MEM_WIN_BUS_OFFSET 0x80000000ULL diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index fa7c380edb..03111fd55b 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -121,6 +121,7 @@ struct SpaprMachineClass { bool legacy_irq_allocation; bool broken_host_serial_model; /* present real host info to the guest */ bool pre_4_1_migration; /* don't migrate hpt-max-page-size */ + bool linux_pci_probe; void (*phb_placement)(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, diff --git a/include/qemu/iov.h b/include/qemu/iov.h index 48b45987b7..bffc151282 100644 --- a/include/qemu/iov.h +++ b/include/qemu/iov.h @@ -199,13 +199,21 @@ static inline void *qemu_iovec_buf(QEMUIOVector *qiov) void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint); void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov); +void qemu_iovec_init_extended( + QEMUIOVector *qiov, + void *head_buf, size_t head_len, + QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len, + void *tail_buf, size_t tail_len); +void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, + size_t offset, size_t len); +int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len); void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len); void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t soffset, size_t sbytes); size_t qemu_iovec_concat_iov(QEMUIOVector *dst, struct iovec *src_iov, unsigned int src_cnt, size_t soffset, size_t sbytes); -bool qemu_iovec_is_zero(QEMUIOVector *qiov); +bool qemu_iovec_is_zero(QEMUIOVector *qiov, size_t qiov_offeset, size_t bytes); void qemu_iovec_destroy(QEMUIOVector *qiov); void qemu_iovec_reset(QEMUIOVector *qiov); size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset, diff --git a/pc-bios/README b/pc-bios/README index d59cd25461..ad78f6dc49 100644 --- a/pc-bios/README +++ b/pc-bios/README @@ -17,7 +17,7 @@ - SLOF (Slimline Open Firmware) is a free IEEE 1275 Open Firmware implementation for certain IBM POWER hardware. The sources are at https://github.com/aik/SLOF, and the image currently in qemu is - built from git tag qemu-slof-20190719. + built from git tag qemu-slof-20190827. - sgabios (the Serial Graphics Adapter option ROM) provides a means for legacy x86 software to communicate with an attached serial console as diff --git a/pc-bios/skiboot.lid b/pc-bios/skiboot.lid index 6d5966c3ae..504b95e8b6 100644 --- a/pc-bios/skiboot.lid +++ b/pc-bios/skiboot.lid Binary files differdiff --git a/pc-bios/slof.bin b/pc-bios/slof.bin index fb0837508b..a3a3e49332 100644 --- a/pc-bios/slof.bin +++ b/pc-bios/slof.bin Binary files differdiff --git a/qemu-deprecated.texi b/qemu-deprecated.texi index 00a4b6f350..9d74a1cfc0 100644 --- a/qemu-deprecated.texi +++ b/qemu-deprecated.texi @@ -72,6 +72,13 @@ backend settings instead of environment variables. To ease migration to the new format, the ``-audiodev-help'' option can be used to convert the current values of the environment variables to ``-audiodev'' options. +@subsection Creating sound card devices and vnc without audiodev= property (since 4.2) + +When not using the deprecated legacy audio config, each sound card +should specify an @code{audiodev=} property. Additionally, when using +vnc, you should specify an @code{audiodev=} propery if you plan to +transmit audio through the VNC protocol. + @subsection -mon ...,control=readline,pretty=on|off (since 4.1) The @code{pretty=on|off} switch has no effect for HMP monitors, but is diff --git a/qemu-img.c b/qemu-img.c index 7daa05e51a..4ee436fc94 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -2388,7 +2388,7 @@ static int img_convert(int argc, char **argv) const char *preallocation = qemu_opt_get(opts, BLOCK_OPT_PREALLOC); - if (drv && !drv->bdrv_co_pwritev_compressed) { + if (drv && !block_driver_can_compress(drv)) { error_report("Compression not supported for this file format"); ret = -1; goto out; @@ -2459,7 +2459,7 @@ static int img_convert(int argc, char **argv) } out_bs = blk_bs(s.target); - if (s.compressed && !out_bs->drv->bdrv_co_pwritev_compressed) { + if (s.compressed && !block_driver_can_compress(out_bs->drv)) { error_report("Compression not supported for this file format"); ret = -1; goto out; diff --git a/roms/SLOF b/roms/SLOF -Subproject 7bfe584e321946771692711ff83ad2b5850daca +Subproject ea221600a116883137ef90b2b7ab7d2472bc4f1 diff --git a/roms/skiboot b/roms/skiboot -Subproject 261ca8e779e5138869a45f174caa49be6a27450 +Subproject 3a6fdede6ce117facec0108afe716cf5d0472c3 diff --git a/target/ppc/cpu-qom.h b/target/ppc/cpu-qom.h index 7ffdb0a706..e499575dc8 100644 --- a/target/ppc/cpu-qom.h +++ b/target/ppc/cpu-qom.h @@ -191,6 +191,7 @@ typedef struct PowerPCCPUClass { const PPCHash64Options *hash64_opts; struct ppc_radix_page_info *radix_page_info; uint32_t lrg_decr_bits; + int n_host_threads; void (*init_proc)(CPUPPCState *env); int (*check_pow)(CPUPPCState *env); int (*handle_mmu_fault)(PowerPCCPU *cpu, vaddr eaddr, int rwx, int mmu_idx); diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index 07bc9051b0..4b1a2e6178 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -630,19 +630,15 @@ static void do_float_check_status(CPUPPCState *env, uintptr_t raddr) { CPUState *cs = env_cpu(env); int status = get_float_exception_flags(&env->fp_status); - bool inexact_happened = false; if (status & float_flag_overflow) { float_overflow_excp(env); } else if (status & float_flag_underflow) { float_underflow_excp(env); - } else if (status & float_flag_inexact) { - float_inexact_excp(env); - inexact_happened = true; } - - /* if the inexact flag was not set */ - if (inexact_happened == false) { + if (status & float_flag_inexact) { + float_inexact_excp(env); + } else { env->fpscr &= ~(1 << FPSCR_FI); /* clear the FPSCR[FI] bit */ } @@ -2887,12 +2883,40 @@ void helper_xscvqpdp(CPUPPCState *env, uint32_t opcode, uint64_t helper_xscvdpspn(CPUPPCState *env, uint64_t xb) { - uint64_t result; + uint64_t result, sign, exp, frac; float_status tstat = env->fp_status; set_float_exception_flags(0, &tstat); - result = (uint64_t)float64_to_float32(xb, &tstat); + sign = extract64(xb, 63, 1); + exp = extract64(xb, 52, 11); + frac = extract64(xb, 0, 52) | 0x10000000000000ULL; + + if (unlikely(exp == 0 && extract64(frac, 0, 52) != 0)) { + /* DP denormal operand. */ + /* Exponent override to DP min exp. */ + exp = 1; + /* Implicit bit override to 0. */ + frac = deposit64(frac, 53, 1, 0); + } + + if (unlikely(exp < 897 && frac != 0)) { + /* SP tiny operand. */ + if (897 - exp > 63) { + frac = 0; + } else { + /* Denormalize until exp = SP min exp. */ + frac >>= (897 - exp); + } + /* Exponent override to SP min exp - 1. */ + exp = 896; + } + + result = sign << 31; + result |= extract64(exp, 10, 1) << 30; + result |= extract64(exp, 0, 7) << 23; + result |= extract64(frac, 29, 23); + /* hardware replicates result to both words of the doubleword result. */ return (result << 32) | result; } diff --git a/target/ppc/translate/vmx-impl.inc.c b/target/ppc/translate/vmx-impl.inc.c index 0d71c10428..2472a5217a 100644 --- a/target/ppc/translate/vmx-impl.inc.c +++ b/target/ppc/translate/vmx-impl.inc.c @@ -350,6 +350,28 @@ static void glue(gen_, name0##_##name1)(DisasContext *ctx) \ } \ } +/* + * We use this macro if one instruction is realized with direct + * translation, and second one with helper. + */ +#define GEN_VXFORM_TRANS_DUAL(name0, flg0, flg2_0, name1, flg1, flg2_1)\ +static void glue(gen_, name0##_##name1)(DisasContext *ctx) \ +{ \ + if ((Rc(ctx->opcode) == 0) && \ + ((ctx->insns_flags & flg0) || (ctx->insns_flags2 & flg2_0))) { \ + if (unlikely(!ctx->altivec_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_VPU); \ + return; \ + } \ + trans_##name0(ctx); \ + } else if ((Rc(ctx->opcode) == 1) && \ + ((ctx->insns_flags & flg1) || (ctx->insns_flags2 & flg2_1))) { \ + gen_##name1(ctx); \ + } else { \ + gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL); \ + } \ +} + /* Adds support to provide invalid mask */ #define GEN_VXFORM_DUAL_EXT(name0, flg0, flg2_0, inval0, \ name1, flg1, flg2_1, inval1) \ @@ -431,20 +453,13 @@ GEN_VXFORM(vmrglb, 6, 4); GEN_VXFORM(vmrglh, 6, 5); GEN_VXFORM(vmrglw, 6, 6); -static void gen_vmrgew(DisasContext *ctx) +static void trans_vmrgew(DisasContext *ctx) { - TCGv_i64 tmp; - TCGv_i64 avr; - int VT, VA, VB; - if (unlikely(!ctx->altivec_enabled)) { - gen_exception(ctx, POWERPC_EXCP_VPU); - return; - } - VT = rD(ctx->opcode); - VA = rA(ctx->opcode); - VB = rB(ctx->opcode); - tmp = tcg_temp_new_i64(); - avr = tcg_temp_new_i64(); + int VT = rD(ctx->opcode); + int VA = rA(ctx->opcode); + int VB = rB(ctx->opcode); + TCGv_i64 tmp = tcg_temp_new_i64(); + TCGv_i64 avr = tcg_temp_new_i64(); get_avr64(avr, VB, true); tcg_gen_shri_i64(tmp, avr, 32); @@ -462,21 +477,14 @@ static void gen_vmrgew(DisasContext *ctx) tcg_temp_free_i64(avr); } -static void gen_vmrgow(DisasContext *ctx) +static void trans_vmrgow(DisasContext *ctx) { - TCGv_i64 t0, t1; - TCGv_i64 avr; - int VT, VA, VB; - if (unlikely(!ctx->altivec_enabled)) { - gen_exception(ctx, POWERPC_EXCP_VPU); - return; - } - VT = rD(ctx->opcode); - VA = rA(ctx->opcode); - VB = rB(ctx->opcode); - t0 = tcg_temp_new_i64(); - t1 = tcg_temp_new_i64(); - avr = tcg_temp_new_i64(); + int VT = rD(ctx->opcode); + int VA = rA(ctx->opcode); + int VB = rB(ctx->opcode); + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 avr = tcg_temp_new_i64(); get_avr64(t0, VB, true); get_avr64(t1, VA, true); @@ -936,14 +944,14 @@ GEN_VXFORM_ENV(vminfp, 5, 17); GEN_VXFORM_HETRO(vextublx, 6, 24) GEN_VXFORM_HETRO(vextuhlx, 6, 25) GEN_VXFORM_HETRO(vextuwlx, 6, 26) -GEN_VXFORM_DUAL(vmrgow, PPC_NONE, PPC2_ALTIVEC_207, +GEN_VXFORM_TRANS_DUAL(vmrgow, PPC_NONE, PPC2_ALTIVEC_207, vextuwlx, PPC_NONE, PPC2_ISA300) GEN_VXFORM_HETRO(vextubrx, 6, 28) GEN_VXFORM_HETRO(vextuhrx, 6, 29) GEN_VXFORM_HETRO(vextuwrx, 6, 30) GEN_VXFORM_TRANS(lvsl, 6, 31) GEN_VXFORM_TRANS(lvsr, 6, 32) -GEN_VXFORM_DUAL(vmrgew, PPC_NONE, PPC2_ALTIVEC_207, \ +GEN_VXFORM_TRANS_DUAL(vmrgew, PPC_NONE, PPC2_ALTIVEC_207, vextuwrx, PPC_NONE, PPC2_ISA300) #define GEN_VXRFORM1(opname, name, str, opc2, opc3) \ diff --git a/target/ppc/translate/vsx-impl.inc.c b/target/ppc/translate/vsx-impl.inc.c index 3922686ad6..8287e272f5 100644 --- a/target/ppc/translate/vsx-impl.inc.c +++ b/target/ppc/translate/vsx-impl.inc.c @@ -1308,7 +1308,7 @@ static void gen_##name(DisasContext *ctx) \ } \ xt = gen_vsr_ptr(xT(ctx->opcode)); \ xa = gen_vsr_ptr(xA(ctx->opcode)); \ - if (ctx->opcode & PPC_BIT(25)) { \ + if (ctx->opcode & PPC_BIT32(25)) { \ /* \ * AxT + B \ */ \ diff --git a/target/ppc/translate_init.inc.c b/target/ppc/translate_init.inc.c index 4a21ed7289..0fb11c7ac6 100644 --- a/target/ppc/translate_init.inc.c +++ b/target/ppc/translate_init.inc.c @@ -8770,6 +8770,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data) pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault; pcc->hash64_opts = &ppc_hash64_opts_POWER7; pcc->lrg_decr_bits = 32; + pcc->n_host_threads = 8; #endif pcc->excp_model = POWERPC_EXCP_POWER8; pcc->bus_model = PPC_FLAGS_INPUT_POWER7; @@ -8981,6 +8982,7 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data) pcc->hash64_opts = &ppc_hash64_opts_POWER7; pcc->radix_page_info = &POWER9_radix_page_info; pcc->lrg_decr_bits = 56; + pcc->n_host_threads = 4; #endif pcc->excp_model = POWERPC_EXCP_POWER9; pcc->bus_model = PPC_FLAGS_INPUT_POWER9; @@ -10461,6 +10463,10 @@ static void ppc_cpu_reset(CPUState *s) s->exception_index = POWERPC_EXCP_NONE; env->error_code = 0; + /* tininess for underflow is detected before rounding */ + set_float_detect_tininess(float_tininess_before_rounding, + &env->fp_status); + for (i = 0; i < ARRAY_SIZE(env->spr_cb); i++) { ppc_spr_t *spr = &env->spr_cb[i]; diff --git a/tests/Makefile.include b/tests/Makefile.include index 49684fd4f4..f5ac09549c 100644 --- a/tests/Makefile.include +++ b/tests/Makefile.include @@ -1135,7 +1135,7 @@ TESTS_RESULTS_DIR=$(BUILD_DIR)/tests/results AVOCADO_SHOW=app AVOCADO_TAGS=$(patsubst %-softmmu,-t arch:%, $(filter %-softmmu,$(TARGET_DIRS))) -ifneq ($(findstring v2,"v$(PYTHON_VERSION)"),v2) +ifneq ($(PYTHON2),y) $(TESTS_VENV_DIR): $(TESTS_VENV_REQ) $(call quiet-command, \ $(PYTHON) -m venv --system-site-packages $@, \ diff --git a/tests/acceptance/avocado_qemu/__init__.py b/tests/acceptance/avocado_qemu/__init__.py index aee5d820ed..bd41e0443c 100644 --- a/tests/acceptance/avocado_qemu/__init__.py +++ b/tests/acceptance/avocado_qemu/__init__.py @@ -39,6 +39,9 @@ def pick_default_qemu_bin(arch=None): """ if arch is None: arch = os.uname()[4] + # qemu binary path does not match arch for powerpc, handle it + if 'ppc64le' in arch: + arch = 'ppc64' qemu_bin_relative_path = os.path.join("%s-softmmu" % arch, "qemu-system-%s" % arch) if is_readable_executable_file(qemu_bin_relative_path): diff --git a/tests/acceptance/boot_linux_console.py b/tests/acceptance/boot_linux_console.py index 32159503e9..2504ef0150 100644 --- a/tests/acceptance/boot_linux_console.py +++ b/tests/acceptance/boot_linux_console.py @@ -354,3 +354,22 @@ class BootLinuxConsole(Test): self.vm.launch() console_pattern = 'Kernel command line: %s' % kernel_command_line self.wait_for_console_pattern(console_pattern) + + def test_ppc64_pseries(self): + """ + :avocado: tags=arch:ppc64 + :avocado: tags=machine:pseries + """ + kernel_url = ('https://download.fedoraproject.org/pub/fedora-secondary/' + 'releases/29/Everything/ppc64le/os/ppc/ppc64/vmlinuz') + kernel_hash = '3fe04abfc852b66653b8c3c897a59a689270bc77' + kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash) + + self.vm.set_machine('pseries') + self.vm.set_console() + kernel_command_line = self.KERNEL_COMMON_COMMAND_LINE + 'console=hvc0' + self.vm.add_args('-kernel', kernel_path, + '-append', kernel_command_line) + self.vm.launch() + console_pattern = 'Kernel command line: %s' % kernel_command_line + self.wait_for_console_pattern(console_pattern) diff --git a/tests/acceptance/linux_ssh_mips_malta.py b/tests/acceptance/linux_ssh_mips_malta.py index aafb0c39f6..134f10cac3 100644 --- a/tests/acceptance/linux_ssh_mips_malta.py +++ b/tests/acceptance/linux_ssh_mips_malta.py @@ -162,7 +162,7 @@ class LinuxSSH(Test): self.assertIn(True, ["0dfbe8aa4c20b52e1b8bf3cb6cbdf193" in line for line in stdout]) - def do_test_mips_malta(self, endianess, kernel_path, uname_m): + def check_mips_malta(self, endianess, kernel_path, uname_m): self.boot_debian_wheezy_image_and_ssh_login(endianess, kernel_path) stdout, stderr = self.ssh_command('uname -a') @@ -184,7 +184,7 @@ class LinuxSSH(Test): kernel_hash = '592e384a4edc16dade52a6cd5c785c637bcbc9ad' kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash) - self.do_test_mips_malta('be', kernel_path, 'mips') + self.check_mips_malta('be', kernel_path, 'mips') @skipIf(os.getenv('CONTINUOUS_INTEGRATION'), 'Running on Travis-CI') def test_mips_malta32el_kernel3_2_0(self): @@ -199,7 +199,7 @@ class LinuxSSH(Test): kernel_hash = 'a66bea5a8adaa2cb3d36a1d4e0ccdb01be8f6c2a' kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash) - self.do_test_mips_malta('le', kernel_path, 'mips') + self.check_mips_malta('le', kernel_path, 'mips') @skipIf(os.getenv('CONTINUOUS_INTEGRATION'), 'Running on Travis-CI') def test_mips_malta64eb_kernel3_2_0(self): @@ -213,7 +213,7 @@ class LinuxSSH(Test): 'vmlinux-3.2.0-4-5kc-malta') kernel_hash = 'db6eea7de35d36c77d8c165b6bcb222e16eb91db' kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash) - self.do_test_mips_malta('be', kernel_path, 'mips64') + self.check_mips_malta('be', kernel_path, 'mips64') @skipIf(os.getenv('CONTINUOUS_INTEGRATION'), 'Running on Travis-CI') def test_mips_malta64el_kernel3_2_0(self): @@ -227,4 +227,4 @@ class LinuxSSH(Test): 'vmlinux-3.2.0-4-5kc-malta') kernel_hash = '6a7f77245acf231415a0e8b725d91ed2f3487794' kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash) - self.do_test_mips_malta('le', kernel_path, 'mips64') + self.check_mips_malta('le', kernel_path, 'mips64') diff --git a/tests/acceptance/migration.py b/tests/acceptance/migration.py index 6115cf6c24..a44c1ae58f 100644 --- a/tests/acceptance/migration.py +++ b/tests/acceptance/migration.py @@ -17,9 +17,6 @@ from avocado.utils import wait class Migration(Test): - """ - :avocado: enable - """ timeout = 10 diff --git a/tests/acceptance/vnc.py b/tests/acceptance/vnc.py index 064ceabcc1..3f40bc2be1 100644 --- a/tests/acceptance/vnc.py +++ b/tests/acceptance/vnc.py @@ -34,7 +34,7 @@ class Vnc(Test): self.assertEqual(set_password_response['error']['desc'], 'Could not set password') - def test_vnc_change_password_requires_a_password(self): + def test_change_password_requires_a_password(self): self.vm.add_args('-nodefaults', '-S', '-vnc', ':0') self.vm.launch() self.assertTrue(self.vm.qmp('query-vnc')['return']['enabled']) @@ -48,7 +48,7 @@ class Vnc(Test): self.assertEqual(set_password_response['error']['desc'], 'Could not set password') - def test_vnc_change_password(self): + def test_change_password(self): self.vm.add_args('-nodefaults', '-S', '-vnc', ':0,password') self.vm.launch() self.assertTrue(self.vm.qmp('query-vnc')['return']['enabled']) diff --git a/tests/boot-serial-test.c b/tests/boot-serial-test.c index 24852d4c7d..a54d007298 100644 --- a/tests/boot-serial-test.c +++ b/tests/boot-serial-test.c @@ -103,7 +103,8 @@ static testdef_t tests[] = { { "ppc64", "pseries", "-machine cap-cfpc=broken,cap-sbbc=broken,cap-ibs=broken", "Open Firmware" }, - { "ppc64", "powernv", "-cpu POWER8", "OPAL" }, + { "ppc64", "powernv8", "", "OPAL" }, + { "ppc64", "powernv9", "", "OPAL" }, { "ppc64", "sam460ex", "-device e1000", "8086 100e" }, { "i386", "isapc", "-cpu qemu32 -device sga", "SGABIOS" }, { "i386", "pc", "-device sga", "SGABIOS" }, diff --git a/tests/modules-test.c b/tests/modules-test.c index f9de3afdb7..d1a6ace218 100644 --- a/tests/modules-test.c +++ b/tests/modules-test.c @@ -1,12 +1,14 @@ #include "qemu/osdep.h" #include "libqtest.h" +const char common_args[] = "-nodefaults -machine none"; + static void test_modules_load(const void *data) { QTestState *qts; const char **args = (const char **)data; - qts = qtest_init(NULL); + qts = qtest_init(common_args); qtest_module_load(qts, args[0], args[1]); qtest_quit(qts); } @@ -53,6 +55,9 @@ int main(int argc, char *argv[]) #ifdef CONFIG_SDL "ui-", "sdl", #endif +#if defined(CONFIG_SPICE) && defined(CONFIG_GIO) + "ui-", "spice-app", +#endif }; int i; diff --git a/tests/pnv-xscom-test.c b/tests/pnv-xscom-test.c index 63d464048d..9fddc7d5f9 100644 --- a/tests/pnv-xscom-test.c +++ b/tests/pnv-xscom-test.c @@ -77,9 +77,15 @@ static void test_xscom_cfam_id(QTestState *qts, const PnvChip *chip) static void test_cfam_id(const void *data) { const PnvChip *chip = data; + const char *machine = "powernv8"; QTestState *qts; - qts = qtest_initf("-M powernv,accel=tcg -cpu %s", chip->cpu_model); + if (chip->chip_type == PNV_CHIP_POWER9) { + machine = "powernv9"; + } + + qts = qtest_initf("-M %s,accel=tcg -cpu %s", + machine, chip->cpu_model); test_xscom_cfam_id(qts, chip); qtest_quit(qts); } @@ -113,8 +119,14 @@ static void test_core(const void *data) { const PnvChip *chip = data; QTestState *qts; + const char *machine = "powernv8"; + + if (chip->chip_type == PNV_CHIP_POWER9) { + machine = "powernv9"; + } - qts = qtest_initf("-M powernv,accel=tcg -cpu %s", chip->cpu_model); + qts = qtest_initf("-M %s,accel=tcg -cpu %s", + machine, chip->cpu_model); test_xscom_core(qts, chip); qtest_quit(qts); } diff --git a/tests/requirements.txt b/tests/requirements.txt index 3ae0e29ad7..bd1f7590ed 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -2,4 +2,4 @@ # in the tests/venv Python virtual environment. For more info, # refer to: https://pip.pypa.io/en/stable/user_guide/#id1 avocado-framework==68.0 -paramiko +paramiko==2.4.2 diff --git a/trace/control.c b/trace/control.c index 43fb7868db..d9cafc161b 100644 --- a/trace/control.c +++ b/trace/control.c @@ -165,6 +165,12 @@ void trace_list_events(void) while ((ev = trace_event_iter_next(&iter)) != NULL) { fprintf(stderr, "%s\n", trace_event_get_name(ev)); } +#ifdef CONFIG_TRACE_DTRACE + fprintf(stderr, "This list of names of trace points may be incomplete " + "when using the DTrace/SystemTap backends.\n" + "Run 'qemu-trace-stap list %s' to print the full list.\n", + error_get_progname()); +#endif } static void do_trace_enable_events(const char *line_buf) diff --git a/ui/Makefile.objs b/ui/Makefile.objs index cc2bf5b180..ba39080edb 100644 --- a/ui/Makefile.objs +++ b/ui/Makefile.objs @@ -49,7 +49,9 @@ curses.mo-objs := curses.o curses.mo-cflags := $(CURSES_CFLAGS) $(ICONV_CFLAGS) curses.mo-libs := $(CURSES_LIBS) $(ICONV_LIBS) -common-obj-$(call land,$(CONFIG_SPICE),$(CONFIG_GIO)) += spice-app.mo +ifeq ($(CONFIG_GIO)$(CONFIG_SPICE),yy) +common-obj-$(if $(CONFIG_MODULES),m,y) += spice-app.mo +endif spice-app.mo-objs := spice-app.o spice-app.mo-cflags := $(GIO_CFLAGS) spice-app.mo-libs := $(GIO_LIBS) diff --git a/util/iov.c b/util/iov.c index 74e6ca8ed7..5059e10431 100644 --- a/util/iov.c +++ b/util/iov.c @@ -354,34 +354,153 @@ void qemu_iovec_concat(QEMUIOVector *dst, } /* - * Check if the contents of the iovecs are all zero + * qiov_find_iov + * + * Return pointer to iovec structure, where byte at @offset in original vector + * @iov exactly is. + * Set @remaining_offset to be offset inside that iovec to the same byte. */ -bool qemu_iovec_is_zero(QEMUIOVector *qiov) +static struct iovec *iov_skip_offset(struct iovec *iov, size_t offset, + size_t *remaining_offset) { - int i; - for (i = 0; i < qiov->niov; i++) { - size_t offs = QEMU_ALIGN_DOWN(qiov->iov[i].iov_len, 4 * sizeof(long)); - uint8_t *ptr = qiov->iov[i].iov_base; - if (offs && !buffer_is_zero(qiov->iov[i].iov_base, offs)) { + while (offset > 0 && offset >= iov->iov_len) { + offset -= iov->iov_len; + iov++; + } + *remaining_offset = offset; + + return iov; +} + +/* + * qiov_slice + * + * Find subarray of iovec's, containing requested range. @head would + * be offset in first iov (returned by the function), @tail would be + * count of extra bytes in last iovec (returned iov + @niov - 1). + */ +static struct iovec *qiov_slice(QEMUIOVector *qiov, + size_t offset, size_t len, + size_t *head, size_t *tail, int *niov) +{ + struct iovec *iov, *end_iov; + + assert(offset + len <= qiov->size); + + iov = iov_skip_offset(qiov->iov, offset, head); + end_iov = iov_skip_offset(iov, *head + len, tail); + + if (*tail > 0) { + assert(*tail < end_iov->iov_len); + *tail = end_iov->iov_len - *tail; + end_iov++; + } + + *niov = end_iov - iov; + + return iov; +} + +int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len) +{ + size_t head, tail; + int niov; + + qiov_slice(qiov, offset, len, &head, &tail, &niov); + + return niov; +} + +/* + * Compile new iovec, combining @head_buf buffer, sub-qiov of @mid_qiov, + * and @tail_buf buffer into new qiov. + */ +void qemu_iovec_init_extended( + QEMUIOVector *qiov, + void *head_buf, size_t head_len, + QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len, + void *tail_buf, size_t tail_len) +{ + size_t mid_head, mid_tail; + int total_niov, mid_niov = 0; + struct iovec *p, *mid_iov; + + if (mid_len) { + mid_iov = qiov_slice(mid_qiov, mid_offset, mid_len, + &mid_head, &mid_tail, &mid_niov); + } + + total_niov = !!head_len + mid_niov + !!tail_len; + if (total_niov == 1) { + qemu_iovec_init_buf(qiov, NULL, 0); + p = &qiov->local_iov; + } else { + qiov->niov = qiov->nalloc = total_niov; + qiov->size = head_len + mid_len + tail_len; + p = qiov->iov = g_new(struct iovec, qiov->niov); + } + + if (head_len) { + p->iov_base = head_buf; + p->iov_len = head_len; + p++; + } + + if (mid_len) { + memcpy(p, mid_iov, mid_niov * sizeof(*p)); + p[0].iov_base = (uint8_t *)p[0].iov_base + mid_head; + p[0].iov_len -= mid_head; + p[mid_niov - 1].iov_len -= mid_tail; + p += mid_niov; + } + + if (tail_len) { + p->iov_base = tail_buf; + p->iov_len = tail_len; + } +} + +/* + * Check if the contents of subrange of qiov data is all zeroes. + */ +bool qemu_iovec_is_zero(QEMUIOVector *qiov, size_t offset, size_t bytes) +{ + struct iovec *iov; + size_t current_offset; + + assert(offset + bytes <= qiov->size); + + iov = iov_skip_offset(qiov->iov, offset, ¤t_offset); + + while (bytes) { + uint8_t *base = (uint8_t *)iov->iov_base + current_offset; + size_t len = MIN(iov->iov_len - current_offset, bytes); + + if (!buffer_is_zero(base, len)) { return false; } - for (; offs < qiov->iov[i].iov_len; offs++) { - if (ptr[offs]) { - return false; - } - } + + current_offset = 0; + bytes -= len; + iov++; } + return true; } +void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, + size_t offset, size_t len) +{ + qemu_iovec_init_extended(qiov, NULL, 0, source, offset, len, NULL, 0); +} + void qemu_iovec_destroy(QEMUIOVector *qiov) { - assert(qiov->nalloc != -1); + if (qiov->nalloc != -1) { + g_free(qiov->iov); + } - qemu_iovec_reset(qiov); - g_free(qiov->iov); - qiov->nalloc = 0; - qiov->iov = NULL; + memset(qiov, 0, sizeof(*qiov)); } void qemu_iovec_reset(QEMUIOVector *qiov) |