From 993935f315b75436f7664debdd1bf68622aabc2b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 10 May 2016 10:10:49 +0200 Subject: scsi-disk: introduce a common base class This will be the place to add DMAIOFuncs in the next patch. There are also a couple DeviceClass members that can be moved to the abstract class's initialization function. Signed-off-by: Paolo Bonzini --- hw/scsi/scsi-disk.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) (limited to 'hw/scsi/scsi-disk.c') diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index 8865da53e8..2d9dcdeaab 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -53,6 +53,8 @@ do { printf("scsi-disk: " fmt , ## __VA_ARGS__); } while (0) #define DEFAULT_MAX_UNMAP_SIZE (1 << 30) /* 1 GB */ #define DEFAULT_MAX_IO_SIZE INT_MAX /* 2 GB - 1 block */ +#define TYPE_SCSI_DISK_BASE "scsi-disk-base" + typedef struct SCSIDiskState SCSIDiskState; typedef struct SCSIDiskReq { @@ -2656,6 +2658,21 @@ static int scsi_block_parse_cdb(SCSIDevice *d, SCSICommand *cmd, #endif +static void scsi_disk_base_class_initfn(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->fw_name = "disk"; + dc->reset = scsi_disk_reset; +} + +static const TypeInfo scsi_disk_base_info = { + .name = TYPE_SCSI_DISK_BASE, + .parent = TYPE_SCSI_DEVICE, + .class_init = scsi_disk_base_class_initfn, + .instance_size = sizeof(SCSIDiskState), +}; + #define DEFINE_SCSI_DISK_PROPERTIES() \ DEFINE_BLOCK_PROPERTIES(SCSIDiskState, qdev.conf), \ DEFINE_PROP_STRING("ver", SCSIDiskState, version), \ @@ -2703,17 +2720,14 @@ static void scsi_hd_class_initfn(ObjectClass *klass, void *data) sc->realize = scsi_hd_realize; sc->alloc_req = scsi_new_request; sc->unit_attention_reported = scsi_disk_unit_attention_reported; - dc->fw_name = "disk"; dc->desc = "virtual SCSI disk"; - dc->reset = scsi_disk_reset; dc->props = scsi_hd_properties; dc->vmsd = &vmstate_scsi_disk_state; } static const TypeInfo scsi_hd_info = { .name = "scsi-hd", - .parent = TYPE_SCSI_DEVICE, - .instance_size = sizeof(SCSIDiskState), + .parent = TYPE_SCSI_DISK_BASE, .class_init = scsi_hd_class_initfn, }; @@ -2735,17 +2749,14 @@ static void scsi_cd_class_initfn(ObjectClass *klass, void *data) sc->realize = scsi_cd_realize; sc->alloc_req = scsi_new_request; sc->unit_attention_reported = scsi_disk_unit_attention_reported; - dc->fw_name = "disk"; dc->desc = "virtual SCSI CD-ROM"; - dc->reset = scsi_disk_reset; dc->props = scsi_cd_properties; dc->vmsd = &vmstate_scsi_disk_state; } static const TypeInfo scsi_cd_info = { .name = "scsi-cd", - .parent = TYPE_SCSI_DEVICE, - .instance_size = sizeof(SCSIDiskState), + .parent = TYPE_SCSI_DISK_BASE, .class_init = scsi_cd_class_initfn, }; @@ -2763,17 +2774,14 @@ static void scsi_block_class_initfn(ObjectClass *klass, void *data) sc->realize = scsi_block_realize; sc->alloc_req = scsi_block_new_request; sc->parse_cdb = scsi_block_parse_cdb; - dc->fw_name = "disk"; dc->desc = "SCSI block device passthrough"; - dc->reset = scsi_disk_reset; dc->props = scsi_block_properties; dc->vmsd = &vmstate_scsi_disk_state; } static const TypeInfo scsi_block_info = { .name = "scsi-block", - .parent = TYPE_SCSI_DEVICE, - .instance_size = sizeof(SCSIDiskState), + .parent = TYPE_SCSI_DISK_BASE, .class_init = scsi_block_class_initfn, }; #endif @@ -2811,13 +2819,13 @@ static void scsi_disk_class_initfn(ObjectClass *klass, void *data) static const TypeInfo scsi_disk_info = { .name = "scsi-disk", - .parent = TYPE_SCSI_DEVICE, - .instance_size = sizeof(SCSIDiskState), + .parent = TYPE_SCSI_DISK_BASE, .class_init = scsi_disk_class_initfn, }; static void scsi_disk_register_types(void) { + type_register_static(&scsi_disk_base_info); type_register_static(&scsi_hd_info); type_register_static(&scsi_cd_info); #ifdef __linux__ -- cgit 1.4.1 From fcaafb1001b9c42817714dd3b2aadcfdb997b53d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 10 May 2016 10:13:00 +0200 Subject: scsi-disk: introduce dma_readv and dma_writev These are replacements for blk_aio_readv and blk_aio_writev that allow customization of the data path. They reuse the DMA helpers' DMAIOFunc callback type, so that the same function can be used in either the QEMUSGList or the bounce-buffered case. This customization will be needed in the next patch to do zero-copy SG_IO on scsi-block. Signed-off-by: Paolo Bonzini --- hw/scsi/scsi-disk.c | 67 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 52 insertions(+), 15 deletions(-) (limited to 'hw/scsi/scsi-disk.c') diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index 2d9dcdeaab..6506257a00 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -55,7 +55,18 @@ do { printf("scsi-disk: " fmt , ## __VA_ARGS__); } while (0) #define TYPE_SCSI_DISK_BASE "scsi-disk-base" -typedef struct SCSIDiskState SCSIDiskState; +#define SCSI_DISK_BASE(obj) \ + OBJECT_CHECK(SCSIDiskState, (obj), TYPE_SCSI_DISK_BASE) +#define SCSI_DISK_BASE_CLASS(klass) \ + OBJECT_CLASS_CHECK(SCSIDiskClass, (klass), TYPE_SCSI_DISK_BASE) +#define SCSI_DISK_BASE_GET_CLASS(obj) \ + OBJECT_GET_CLASS(SCSIDiskClass, (obj), TYPE_SCSI_DISK_BASE) + +typedef struct SCSIDiskClass { + SCSIDeviceClass parent_class; + DMAIOFunc *dma_readv; + DMAIOFunc *dma_writev; +} SCSIDiskClass; typedef struct SCSIDiskReq { SCSIRequest req; @@ -73,7 +84,7 @@ typedef struct SCSIDiskReq { #define SCSI_DISK_F_DPOFUA 1 #define SCSI_DISK_F_NO_REMOVABLE_DEVOPS 2 -struct SCSIDiskState +typedef struct SCSIDiskState { SCSIDevice qdev; uint32_t features; @@ -90,7 +101,7 @@ struct SCSIDiskState char *product; bool tray_open; bool tray_locked; -}; +} SCSIDiskState; static int scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed); @@ -317,6 +328,7 @@ done: static void scsi_do_read(SCSIDiskReq *r, int ret) { SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s)); assert (r->req.aiocb == NULL); @@ -337,16 +349,16 @@ static void scsi_do_read(SCSIDiskReq *r, int ret) if (r->req.sg) { dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_READ); r->req.resid -= r->req.sg->size; - r->req.aiocb = dma_blk_read(s->qdev.conf.blk, r->req.sg, - r->sector << BDRV_SECTOR_BITS, - scsi_dma_complete, r); + r->req.aiocb = dma_blk_io(blk_get_aio_context(s->qdev.conf.blk), + r->req.sg, r->sector << BDRV_SECTOR_BITS, + sdc->dma_readv, r, scsi_dma_complete, r, + DMA_DIRECTION_FROM_DEVICE); } else { scsi_init_iovec(r, SCSI_DMA_BUF_SIZE); block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, r->qiov.size, BLOCK_ACCT_READ); - r->req.aiocb = blk_aio_preadv(s->qdev.conf.blk, - r->sector << BDRV_SECTOR_BITS, &r->qiov, - 0, scsi_read_complete, r); + r->req.aiocb = sdc->dma_readv(r->sector, &r->qiov, + scsi_read_complete, r, r); } done: @@ -506,6 +518,7 @@ static void scsi_write_data(SCSIRequest *req) { SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s)); /* No data transfer may already be in progress */ assert(r->req.aiocb == NULL); @@ -542,15 +555,15 @@ static void scsi_write_data(SCSIRequest *req) if (r->req.sg) { dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_WRITE); r->req.resid -= r->req.sg->size; - r->req.aiocb = dma_blk_write(s->qdev.conf.blk, r->req.sg, - r->sector << BDRV_SECTOR_BITS, - scsi_dma_complete, r); + r->req.aiocb = dma_blk_io(blk_get_aio_context(s->qdev.conf.blk), + r->req.sg, r->sector << BDRV_SECTOR_BITS, + sdc->dma_writev, r, scsi_dma_complete, r, + DMA_DIRECTION_TO_DEVICE); } else { block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, r->qiov.size, BLOCK_ACCT_WRITE); - r->req.aiocb = blk_aio_pwritev(s->qdev.conf.blk, - r->sector << BDRV_SECTOR_BITS, &r->qiov, - 0, scsi_write_complete, r); + r->req.aiocb = sdc->dma_writev(r->sector << BDRV_SECTOR_BITS, &r->qiov, + scsi_write_complete, r, r); } } @@ -2658,12 +2671,35 @@ static int scsi_block_parse_cdb(SCSIDevice *d, SCSICommand *cmd, #endif +static +BlockAIOCB *scsi_dma_readv(int64_t offset, QEMUIOVector *iov, + BlockCompletionFunc *cb, void *cb_opaque, + void *opaque) +{ + SCSIDiskReq *r = opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + return blk_aio_preadv(s->qdev.conf.blk, offset, iov, 0, cb, cb_opaque); +} + +static +BlockAIOCB *scsi_dma_writev(int64_t offset, QEMUIOVector *iov, + BlockCompletionFunc *cb, void *cb_opaque, + void *opaque) +{ + SCSIDiskReq *r = opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + return blk_aio_pwritev(s->qdev.conf.blk, offset, iov, 0, cb, cb_opaque); +} + static void scsi_disk_base_class_initfn(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + SCSIDiskClass *sdc = SCSI_DISK_BASE_CLASS(klass); dc->fw_name = "disk"; dc->reset = scsi_disk_reset; + sdc->dma_readv = scsi_dma_readv; + sdc->dma_writev = scsi_dma_writev; } static const TypeInfo scsi_disk_base_info = { @@ -2671,6 +2707,7 @@ static const TypeInfo scsi_disk_base_info = { .parent = TYPE_SCSI_DEVICE, .class_init = scsi_disk_base_class_initfn, .instance_size = sizeof(SCSIDiskState), + .class_size = sizeof(SCSIDiskClass), }; #define DEFINE_SCSI_DISK_PROPERTIES() \ -- cgit 1.4.1 From 94f8ba1125ee7dfc7aa46166c8e3312e5ad949df Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 13 May 2016 13:14:52 +0200 Subject: scsi-disk: add need_fua_emulation to SCSIDiskClass scsi-block will be able to do FUA just by passing the request through to the LUN (which is also more efficient); there is no need to emulate it like we do for scsi-disk. Add a new method to distinguish this. Signed-off-by: Paolo Bonzini --- hw/scsi/scsi-disk.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'hw/scsi/scsi-disk.c') diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index 6506257a00..28ab277d8d 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -66,6 +66,7 @@ typedef struct SCSIDiskClass { SCSIDeviceClass parent_class; DMAIOFunc *dma_readv; DMAIOFunc *dma_writev; + bool (*need_fua_emulation)(SCSICommand *cmd); } SCSIDiskClass; typedef struct SCSIDiskReq { @@ -75,6 +76,7 @@ typedef struct SCSIDiskReq { uint32_t sector_count; uint32_t buflen; bool started; + bool need_fua_emulation; struct iovec iov; QEMUIOVector qiov; BlockAcctCookie acct; @@ -236,7 +238,7 @@ static void scsi_write_do_fua(SCSIDiskReq *r) goto done; } - if (scsi_is_cmd_fua(&r->req.cmd)) { + if (r->need_fua_emulation) { block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0, BLOCK_ACCT_FLUSH); r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r); @@ -413,7 +415,7 @@ static void scsi_read_data(SCSIRequest *req) first = !r->started; r->started = true; - if (first && scsi_is_cmd_fua(&r->req.cmd)) { + if (first && r->need_fua_emulation) { block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0, BLOCK_ACCT_FLUSH); r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_do_read_cb, r); @@ -2153,6 +2155,7 @@ static int32_t scsi_disk_dma_command(SCSIRequest *req, uint8_t *buf) { SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); + SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s)); uint32_t len; uint8_t command; @@ -2211,6 +2214,7 @@ static int32_t scsi_disk_dma_command(SCSIRequest *req, uint8_t *buf) scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE)); return 0; } + r->need_fua_emulation = sdc->need_fua_emulation(&r->req.cmd); if (r->sector_count == 0) { scsi_req_complete(&r->req, GOOD); } @@ -2700,6 +2704,7 @@ static void scsi_disk_base_class_initfn(ObjectClass *klass, void *data) dc->reset = scsi_disk_reset; sdc->dma_readv = scsi_dma_readv; sdc->dma_writev = scsi_dma_writev; + sdc->need_fua_emulation = scsi_is_cmd_fua; } static const TypeInfo scsi_disk_base_info = { -- cgit 1.4.1 From 5b956f415a356449a4171d5e0c7d9a25bbc84b5a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 13 May 2016 13:08:13 +0200 Subject: scsi-disk: introduce scsi_disk_req_check_error Commonize all the checks for canceled requests and errors. The next patch will add another case to check for, in order to handle passthrough commands. There is no semantic change here; the only nontrivial modification is in scsi_write_do_fua, where cancellation has been checked earlier by both callers. Thus, the check is replaced with an assertion. Signed-off-by: Paolo Bonzini --- hw/scsi/scsi-disk.c | 89 +++++++++++++---------------------------------------- 1 file changed, 22 insertions(+), 67 deletions(-) (limited to 'hw/scsi/scsi-disk.c') diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index 28ab277d8d..4684888daf 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -176,6 +176,20 @@ static void scsi_disk_load_request(QEMUFile *f, SCSIRequest *req) qemu_iovec_init_external(&r->qiov, &r->iov, 1); } +static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed) +{ + if (r->req.io_canceled) { + scsi_req_cancel_complete(&r->req); + return true; + } + + if (ret < 0) { + return scsi_handle_rw_error(r, -ret, acct_failed); + } + + return false; +} + static void scsi_aio_complete(void *opaque, int ret) { SCSIDiskReq *r = (SCSIDiskReq *)opaque; @@ -183,17 +197,10 @@ static void scsi_aio_complete(void *opaque, int ret) assert(r->req.aiocb != NULL); r->req.aiocb = NULL; - if (r->req.io_canceled) { - scsi_req_cancel_complete(&r->req); + if (scsi_disk_req_check_error(r, ret, true)) { goto done; } - if (ret < 0) { - if (scsi_handle_rw_error(r, -ret, true)) { - goto done; - } - } - block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); scsi_req_complete(&r->req, GOOD); @@ -232,11 +239,7 @@ static void scsi_write_do_fua(SCSIDiskReq *r) SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); assert(r->req.aiocb == NULL); - - if (r->req.io_canceled) { - scsi_req_cancel_complete(&r->req); - goto done; - } + assert(!r->req.io_canceled); if (r->need_fua_emulation) { block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0, @@ -246,26 +249,16 @@ static void scsi_write_do_fua(SCSIDiskReq *r) } scsi_req_complete(&r->req, GOOD); - -done: scsi_req_unref(&r->req); } static void scsi_dma_complete_noio(SCSIDiskReq *r, int ret) { assert(r->req.aiocb == NULL); - - if (r->req.io_canceled) { - scsi_req_cancel_complete(&r->req); + if (scsi_disk_req_check_error(r, ret, false)) { goto done; } - if (ret < 0) { - if (scsi_handle_rw_error(r, -ret, false)) { - goto done; - } - } - r->sector += r->sector_count; r->sector_count = 0; if (r->req.cmd.mode == SCSI_XFER_TO_DEV) { @@ -303,17 +296,10 @@ static void scsi_read_complete(void * opaque, int ret) assert(r->req.aiocb != NULL); r->req.aiocb = NULL; - if (r->req.io_canceled) { - scsi_req_cancel_complete(&r->req); + if (scsi_disk_req_check_error(r, ret, true)) { goto done; } - if (ret < 0) { - if (scsi_handle_rw_error(r, -ret, true)) { - goto done; - } - } - block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); DPRINTF("Data ready tag=0x%x len=%zd\n", r->req.tag, r->qiov.size); @@ -333,18 +319,10 @@ static void scsi_do_read(SCSIDiskReq *r, int ret) SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s)); assert (r->req.aiocb == NULL); - - if (r->req.io_canceled) { - scsi_req_cancel_complete(&r->req); + if (scsi_disk_req_check_error(r, ret, false)) { goto done; } - if (ret < 0) { - if (scsi_handle_rw_error(r, -ret, false)) { - goto done; - } - } - /* The request is used as the AIO opaque value, so add a ref. */ scsi_req_ref(&r->req); @@ -472,18 +450,10 @@ static void scsi_write_complete_noio(SCSIDiskReq *r, int ret) uint32_t n; assert (r->req.aiocb == NULL); - - if (r->req.io_canceled) { - scsi_req_cancel_complete(&r->req); + if (scsi_disk_req_check_error(r, ret, false)) { goto done; } - if (ret < 0) { - if (scsi_handle_rw_error(r, -ret, false)) { - goto done; - } - } - n = r->qiov.size / 512; r->sector += n; r->sector_count -= n; @@ -1617,18 +1587,10 @@ static void scsi_unmap_complete_noio(UnmapCBData *data, int ret) uint32_t nb_sectors; assert(r->req.aiocb == NULL); - - if (r->req.io_canceled) { - scsi_req_cancel_complete(&r->req); + if (scsi_disk_req_check_error(r, ret, false)) { goto done; } - if (ret < 0) { - if (scsi_handle_rw_error(r, -ret, false)) { - goto done; - } - } - if (data->count > 0) { sector_num = ldq_be_p(&data->inbuf[0]); nb_sectors = ldl_be_p(&data->inbuf[8]) & 0xffffffffULL; @@ -1728,17 +1690,10 @@ static void scsi_write_same_complete(void *opaque, int ret) assert(r->req.aiocb != NULL); r->req.aiocb = NULL; - if (r->req.io_canceled) { - scsi_req_cancel_complete(&r->req); + if (scsi_disk_req_check_error(r, ret, true)) { goto done; } - if (ret < 0) { - if (scsi_handle_rw_error(r, -ret, true)) { - goto done; - } - } - block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); data->nb_sectors -= data->iov.iov_len / 512; -- cgit 1.4.1 From 8fdc7839e40f43a426bc7e858cf1dbfe315a3804 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 10 May 2016 10:50:44 +0200 Subject: scsi-block: always use SG_IO Using pread/pwrite or io_submit has the advantage of eliminating the bounce buffer, but drops the SCSI status. This keeps the guest from seeing unit attention codes, as well as statuses such as RESERVATION CONFLICT. Because we know scsi-block operates on an SBC device we can still use the DMA helpers with SG_IO; just remember to patch the CDBs if the transfer is split into multiple segments. This means that scsi-block will always use the thread-pool unfortunately, instead of respecting aio=native. Signed-off-by: Paolo Bonzini --- hw/scsi/scsi-disk.c | 214 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 196 insertions(+), 18 deletions(-) (limited to 'hw/scsi/scsi-disk.c') diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index 4684888daf..ace65e0720 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -80,6 +80,7 @@ typedef struct SCSIDiskReq { struct iovec iov; QEMUIOVector qiov; BlockAcctCookie acct; + unsigned char *status; } SCSIDiskReq; #define SCSI_DISK_F_REMOVABLE 0 @@ -187,6 +188,15 @@ static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed) return scsi_handle_rw_error(r, -ret, acct_failed); } + if (r->status && *r->status) { + if (acct_failed) { + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); + } + scsi_req_complete(&r->req, *r->status); + return true; + } + return false; } @@ -2552,16 +2562,145 @@ static void scsi_block_realize(SCSIDevice *dev, Error **errp) scsi_generic_read_device_identification(&s->qdev); } +typedef struct SCSIBlockReq { + SCSIDiskReq req; + sg_io_hdr_t io_header; + + /* Selected bytes of the original CDB, copied into our own CDB. */ + uint8_t cmd, cdb1, group_number; + + /* CDB passed to SG_IO. */ + uint8_t cdb[16]; +} SCSIBlockReq; + +static BlockAIOCB *scsi_block_do_sgio(SCSIBlockReq *req, + int64_t offset, QEMUIOVector *iov, + int direction, + BlockCompletionFunc *cb, void *opaque) +{ + sg_io_hdr_t *io_header = &req->io_header; + SCSIDiskReq *r = &req->req; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + int nb_logical_blocks; + uint64_t lba; + BlockAIOCB *aiocb; + + /* This is not supported yet. It can only happen if the guest does + * reads and writes that are not aligned to one logical sectors + * _and_ cover multiple MemoryRegions. + */ + assert(offset % s->qdev.blocksize == 0); + assert(iov->size % s->qdev.blocksize == 0); + + io_header->interface_id = 'S'; + + /* The data transfer comes from the QEMUIOVector. */ + io_header->dxfer_direction = direction; + io_header->dxfer_len = iov->size; + io_header->dxferp = (void *)iov->iov; + io_header->iovec_count = iov->niov; + assert(io_header->iovec_count == iov->niov); /* no overflow! */ + + /* Build a new CDB with the LBA and length patched in, in case + * DMA helpers split the transfer in multiple segments. Do not + * build a CDB smaller than what the guest wanted, and only build + * a larger one if strictly necessary. + */ + io_header->cmdp = req->cdb; + lba = offset / s->qdev.blocksize; + nb_logical_blocks = io_header->dxfer_len / s->qdev.blocksize; + + if ((req->cmd >> 5) == 0 && lba <= 0x1ffff) { + /* 6-byte CDB */ + stl_be_p(&req->cdb[0], lba | (req->cmd << 24)); + req->cdb[4] = nb_logical_blocks; + req->cdb[5] = 0; + io_header->cmd_len = 6; + } else if ((req->cmd >> 5) <= 1 && lba <= 0xffffffffULL) { + /* 10-byte CDB */ + req->cdb[0] = (req->cmd & 0x1f) | 0x20; + req->cdb[1] = req->cdb1; + stl_be_p(&req->cdb[2], lba); + req->cdb[6] = req->group_number; + stw_be_p(&req->cdb[7], nb_logical_blocks); + req->cdb[9] = 0; + io_header->cmd_len = 10; + } else if ((req->cmd >> 5) != 4 && lba <= 0xffffffffULL) { + /* 12-byte CDB */ + req->cdb[0] = (req->cmd & 0x1f) | 0xA0; + req->cdb[1] = req->cdb1; + stl_be_p(&req->cdb[2], lba); + stl_be_p(&req->cdb[6], nb_logical_blocks); + req->cdb[10] = req->group_number; + req->cdb[11] = 0; + io_header->cmd_len = 12; + } else { + /* 16-byte CDB */ + req->cdb[0] = (req->cmd & 0x1f) | 0x80; + req->cdb[1] = req->cdb1; + stq_be_p(&req->cdb[2], lba); + stl_be_p(&req->cdb[10], nb_logical_blocks); + req->cdb[14] = req->group_number; + req->cdb[15] = 0; + io_header->cmd_len = 16; + } + + /* The rest is as in scsi-generic.c. */ + io_header->mx_sb_len = sizeof(r->req.sense); + io_header->sbp = r->req.sense; + io_header->timeout = UINT_MAX; + io_header->usr_ptr = r; + io_header->flags |= SG_FLAG_DIRECT_IO; + + aiocb = blk_aio_ioctl(s->qdev.conf.blk, SG_IO, io_header, cb, opaque); + assert(aiocb != NULL); + return aiocb; +} + +static bool scsi_block_no_fua(SCSICommand *cmd) +{ + return false; +} + +static BlockAIOCB *scsi_block_dma_readv(int64_t offset, + QEMUIOVector *iov, + BlockCompletionFunc *cb, void *cb_opaque, + void *opaque) +{ + SCSIBlockReq *r = opaque; + return scsi_block_do_sgio(r, offset, iov, + SG_DXFER_FROM_DEV, cb, cb_opaque); +} + +static BlockAIOCB *scsi_block_dma_writev(int64_t offset, + QEMUIOVector *iov, + BlockCompletionFunc *cb, void *cb_opaque, + void *opaque) +{ + SCSIBlockReq *r = opaque; + return scsi_block_do_sgio(r, offset, iov, + SG_DXFER_TO_DEV, cb, cb_opaque); +} + static bool scsi_block_is_passthrough(SCSIDiskState *s, uint8_t *buf) { switch (buf[0]) { + case VERIFY_10: + case VERIFY_12: + case VERIFY_16: + /* Check if BYTCHK == 0x01 (data-out buffer contains data + * for the number of logical blocks specified in the length + * field). For other modes, do not use scatter/gather operation. + */ + if ((buf[1] & 6) != 2) { + return false; + } + break; + case READ_6: case READ_10: case READ_12: case READ_16: - case VERIFY_10: - case VERIFY_12: - case VERIFY_16: case WRITE_6: case WRITE_10: case WRITE_12: @@ -2569,21 +2708,8 @@ static bool scsi_block_is_passthrough(SCSIDiskState *s, uint8_t *buf) case WRITE_VERIFY_10: case WRITE_VERIFY_12: case WRITE_VERIFY_16: - /* If we are not using O_DIRECT, we might read stale data from the - * host cache if writes were made using other commands than these - * ones (such as WRITE SAME or EXTENDED COPY, etc.). So, without - * O_DIRECT everything must go through SG_IO. - */ - if (!(blk_get_flags(s->qdev.conf.blk) & BDRV_O_NOCACHE)) { - break; - } - - /* MMC writing cannot be done via pread/pwrite, because it sometimes + /* MMC writing cannot be done via DMA helpers, because it sometimes * involves writing beyond the maximum LBA or to negative LBA (lead-in). - * And once you do these writes, reading from the block device is - * unreliable, too. It is even possible that reads deliver random data - * from the host page cache (this is probably a Linux bug). - * * We might use scsi_disk_dma_reqops as long as no writing commands are * seen, but performance usually isn't paramount on optical media. So, * just make scsi-block operate the same as scsi-generic for them. @@ -2601,6 +2727,54 @@ static bool scsi_block_is_passthrough(SCSIDiskState *s, uint8_t *buf) } +static int32_t scsi_block_dma_command(SCSIRequest *req, uint8_t *buf) +{ + SCSIBlockReq *r = (SCSIBlockReq *)req; + r->cmd = req->cmd.buf[0]; + switch (r->cmd >> 5) { + case 0: + /* 6-byte CDB. */ + r->cdb1 = r->group_number = 0; + break; + case 1: + /* 10-byte CDB. */ + r->cdb1 = req->cmd.buf[1]; + r->group_number = req->cmd.buf[6]; + case 4: + /* 12-byte CDB. */ + r->cdb1 = req->cmd.buf[1]; + r->group_number = req->cmd.buf[10]; + break; + case 5: + /* 16-byte CDB. */ + r->cdb1 = req->cmd.buf[1]; + r->group_number = req->cmd.buf[14]; + break; + default: + abort(); + } + + if (r->cdb1 & 0xe0) { + /* Protection information is not supported. */ + scsi_check_condition(&r->req, SENSE_CODE(INVALID_FIELD)); + return 0; + } + + r->req.status = &r->io_header.status; + return scsi_disk_dma_command(req, buf); +} + +static const SCSIReqOps scsi_block_dma_reqops = { + .size = sizeof(SCSIBlockReq), + .free_req = scsi_free_request, + .send_command = scsi_block_dma_command, + .read_data = scsi_read_data, + .write_data = scsi_write_data, + .get_buf = scsi_get_buf, + .load_request = scsi_disk_load_request, + .save_request = scsi_disk_save_request, +}; + static SCSIRequest *scsi_block_new_request(SCSIDevice *d, uint32_t tag, uint32_t lun, uint8_t *buf, void *hba_private) @@ -2611,7 +2785,7 @@ static SCSIRequest *scsi_block_new_request(SCSIDevice *d, uint32_t tag, return scsi_req_alloc(&scsi_generic_req_ops, &s->qdev, tag, lun, hba_private); } else { - return scsi_req_alloc(&scsi_disk_dma_reqops, &s->qdev, tag, lun, + return scsi_req_alloc(&scsi_block_dma_reqops, &s->qdev, tag, lun, hba_private); } } @@ -2767,10 +2941,14 @@ static void scsi_block_class_initfn(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass); + SCSIDiskClass *sdc = SCSI_DISK_BASE_CLASS(klass); sc->realize = scsi_block_realize; sc->alloc_req = scsi_block_new_request; sc->parse_cdb = scsi_block_parse_cdb; + sdc->dma_readv = scsi_block_dma_readv; + sdc->dma_writev = scsi_block_dma_writev; + sdc->need_fua_emulation = scsi_block_no_fua; dc->desc = "SCSI block device passthrough"; dc->props = scsi_block_properties; dc->vmsd = &vmstate_scsi_disk_state; -- cgit 1.4.1