diff options
Diffstat (limited to 'block/sheepdog.c')
| -rw-r--r-- | block/sheepdog.c | 167 |
1 files changed, 109 insertions, 58 deletions
diff --git a/block/sheepdog.c b/block/sheepdog.c index 4ecbf5f498..1fa19399f0 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -200,6 +200,8 @@ typedef struct SheepdogInode { uint32_t data_vdi_id[MAX_DATA_OBJS]; } SheepdogInode; +#define SD_INODE_HEADER_SIZE offsetof(SheepdogInode, data_vdi_id) + /* * 64 bit FNV-1a non-zero initial basis */ @@ -282,6 +284,7 @@ typedef struct AIOReq { unsigned int data_len; uint8_t flags; uint32_t id; + bool create; QLIST_ENTRY(AIOReq) aio_siblings; } AIOReq; @@ -314,6 +317,7 @@ struct SheepdogAIOCB { typedef struct BDRVSheepdogState { BlockDriverState *bs; + AioContext *aio_context; SheepdogInode inode; @@ -404,7 +408,7 @@ static const char * sd_strerror(int err) static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb, uint64_t oid, unsigned int data_len, - uint64_t offset, uint8_t flags, + uint64_t offset, uint8_t flags, bool create, uint64_t base_oid, unsigned int iov_offset) { AIOReq *aio_req; @@ -418,6 +422,7 @@ static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb, aio_req->data_len = data_len; aio_req->flags = flags; aio_req->id = s->aioreq_seq_num++; + aio_req->create = create; acb->nr_pending++; return aio_req; @@ -496,7 +501,7 @@ static void sd_aio_cancel(BlockDriverAIOCB *blockacb) sd_finish_aiocb(acb); return; } - qemu_aio_wait(); + aio_poll(s->aio_context, true); } } @@ -578,6 +583,7 @@ static void restart_co_req(void *opaque) typedef struct SheepdogReqCo { int sockfd; + AioContext *aio_context; SheepdogReq *hdr; void *data; unsigned int *wlen; @@ -598,14 +604,14 @@ static coroutine_fn void do_co_req(void *opaque) unsigned int *rlen = srco->rlen; co = qemu_coroutine_self(); - qemu_aio_set_fd_handler(sockfd, NULL, restart_co_req, co); + aio_set_fd_handler(srco->aio_context, sockfd, NULL, restart_co_req, co); ret = send_co_req(sockfd, hdr, data, wlen); if (ret < 0) { goto out; } - qemu_aio_set_fd_handler(sockfd, restart_co_req, NULL, co); + aio_set_fd_handler(srco->aio_context, sockfd, restart_co_req, NULL, co); ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr)); if (ret != sizeof(*hdr)) { @@ -630,18 +636,19 @@ static coroutine_fn void do_co_req(void *opaque) out: /* there is at most one request for this sockfd, so it is safe to * set each handler to NULL. */ - qemu_aio_set_fd_handler(sockfd, NULL, NULL, NULL); + aio_set_fd_handler(srco->aio_context, sockfd, NULL, NULL, NULL); srco->ret = ret; srco->finished = true; } -static int do_req(int sockfd, SheepdogReq *hdr, void *data, - unsigned int *wlen, unsigned int *rlen) +static int do_req(int sockfd, AioContext *aio_context, SheepdogReq *hdr, + void *data, unsigned int *wlen, unsigned int *rlen) { Coroutine *co; SheepdogReqCo srco = { .sockfd = sockfd, + .aio_context = aio_context, .hdr = hdr, .data = data, .wlen = wlen, @@ -656,7 +663,7 @@ static int do_req(int sockfd, SheepdogReq *hdr, void *data, co = qemu_coroutine_create(do_co_req); qemu_coroutine_enter(co, &srco); while (!srco.finished) { - qemu_aio_wait(); + aio_poll(aio_context, true); } } @@ -664,8 +671,8 @@ static int do_req(int sockfd, SheepdogReq *hdr, void *data, } static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, - struct iovec *iov, int niov, bool create, - enum AIOCBState aiocb_type); + struct iovec *iov, int niov, + enum AIOCBState aiocb_type); static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req); static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag); static int get_sheep_fd(BDRVSheepdogState *s, Error **errp); @@ -698,7 +705,7 @@ static void coroutine_fn send_pending_req(BDRVSheepdogState *s, uint64_t oid) /* move aio_req from pending list to inflight one */ QLIST_REMOVE(aio_req, aio_siblings); QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); - add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, false, + add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, acb->aiocb_type); } } @@ -709,7 +716,7 @@ static coroutine_fn void reconnect_to_sdog(void *opaque) BDRVSheepdogState *s = opaque; AIOReq *aio_req, *next; - qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL); + aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL); close(s->fd); s->fd = -1; @@ -797,7 +804,7 @@ static void coroutine_fn aio_read_response(void *opaque) } idx = data_oid_to_idx(aio_req->oid); - if (s->inode.data_vdi_id[idx] != s->inode.vdi_id) { + if (aio_req->create) { /* * If the object is newly created one, we need to update * the vdi object (metadata object). min_dirty_data_idx @@ -922,7 +929,7 @@ static int get_sheep_fd(BDRVSheepdogState *s, Error **errp) return fd; } - qemu_aio_set_fd_handler(fd, co_read_response, NULL, s); + aio_set_fd_handler(s->aio_context, fd, co_read_response, NULL, s); return fd; } @@ -1092,7 +1099,7 @@ static int find_vdi_name(BDRVSheepdogState *s, const char *filename, hdr.snapid = snapid; hdr.flags = SD_FLAG_CMD_WRITE; - ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen); + ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen); if (ret) { error_setg_errno(errp, -ret, "cannot get vdi info"); goto out; @@ -1117,8 +1124,8 @@ out: } static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, - struct iovec *iov, int niov, bool create, - enum AIOCBState aiocb_type) + struct iovec *iov, int niov, + enum AIOCBState aiocb_type) { int nr_copies = s->inode.nr_copies; SheepdogObjReq hdr; @@ -1129,6 +1136,7 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, uint64_t offset = aio_req->offset; uint8_t flags = aio_req->flags; uint64_t old_oid = aio_req->base_oid; + bool create = aio_req->create; if (!nr_copies) { error_report("bug"); @@ -1173,7 +1181,8 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, qemu_co_mutex_lock(&s->lock); s->co_send = qemu_coroutine_self(); - qemu_aio_set_fd_handler(s->fd, co_read_response, co_write_request, s); + aio_set_fd_handler(s->aio_context, s->fd, + co_read_response, co_write_request, s); socket_set_cork(s->fd, 1); /* send a header */ @@ -1191,12 +1200,13 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, } out: socket_set_cork(s->fd, 0); - qemu_aio_set_fd_handler(s->fd, co_read_response, NULL, s); + aio_set_fd_handler(s->aio_context, s->fd, co_read_response, NULL, s); s->co_send = NULL; qemu_co_mutex_unlock(&s->lock); } -static int read_write_object(int fd, char *buf, uint64_t oid, uint8_t copies, +static int read_write_object(int fd, AioContext *aio_context, char *buf, + uint64_t oid, uint8_t copies, unsigned int datalen, uint64_t offset, bool write, bool create, uint32_t cache_flags) { @@ -1229,7 +1239,7 @@ static int read_write_object(int fd, char *buf, uint64_t oid, uint8_t copies, hdr.offset = offset; hdr.copies = copies; - ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen); + ret = do_req(fd, aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen); if (ret) { error_report("failed to send a request to the sheep"); return ret; @@ -1244,19 +1254,23 @@ static int read_write_object(int fd, char *buf, uint64_t oid, uint8_t copies, } } -static int read_object(int fd, char *buf, uint64_t oid, uint8_t copies, +static int read_object(int fd, AioContext *aio_context, char *buf, + uint64_t oid, uint8_t copies, unsigned int datalen, uint64_t offset, uint32_t cache_flags) { - return read_write_object(fd, buf, oid, copies, datalen, offset, false, + return read_write_object(fd, aio_context, buf, oid, copies, + datalen, offset, false, false, cache_flags); } -static int write_object(int fd, char *buf, uint64_t oid, uint8_t copies, +static int write_object(int fd, AioContext *aio_context, char *buf, + uint64_t oid, uint8_t copies, unsigned int datalen, uint64_t offset, bool create, uint32_t cache_flags) { - return read_write_object(fd, buf, oid, copies, datalen, offset, true, + return read_write_object(fd, aio_context, buf, oid, copies, + datalen, offset, true, create, cache_flags); } @@ -1275,7 +1289,7 @@ static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag) return -EIO; } - inode = g_malloc(sizeof(s->inode)); + inode = g_malloc(SD_INODE_HEADER_SIZE); ret = find_vdi_name(s, s->name, snapid, tag, &vid, false, &local_err); if (ret) { @@ -1284,14 +1298,15 @@ static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag) goto out; } - ret = read_object(fd, (char *)inode, vid_to_vdi_oid(vid), - s->inode.nr_copies, sizeof(*inode), 0, s->cache_flags); + ret = read_object(fd, s->aio_context, (char *)inode, vid_to_vdi_oid(vid), + s->inode.nr_copies, SD_INODE_HEADER_SIZE, 0, + s->cache_flags); if (ret < 0) { goto out; } if (inode->vdi_id != s->inode.vdi_id) { - memcpy(&s->inode, inode, sizeof(s->inode)); + memcpy(&s->inode, inode, SD_INODE_HEADER_SIZE); } out: @@ -1315,6 +1330,7 @@ static bool check_simultaneous_create(BDRVSheepdogState *s, AIOReq *aio_req) DPRINTF("simultaneous create to %" PRIx64 "\n", aio_req->oid); aio_req->flags = 0; aio_req->base_oid = 0; + aio_req->create = false; QLIST_REMOVE(aio_req, aio_siblings); QLIST_INSERT_HEAD(&s->pending_aio_head, aio_req, aio_siblings); return true; @@ -1327,7 +1343,8 @@ static bool check_simultaneous_create(BDRVSheepdogState *s, AIOReq *aio_req) static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req) { SheepdogAIOCB *acb = aio_req->aiocb; - bool create = false; + + aio_req->create = false; /* check whether this request becomes a CoW one */ if (acb->aiocb_type == AIOCB_WRITE_UDATA && is_data_obj(aio_req->oid)) { @@ -1345,20 +1362,36 @@ static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req) aio_req->base_oid = vid_to_data_oid(s->inode.data_vdi_id[idx], idx); aio_req->flags |= SD_FLAG_CMD_COW; } - create = true; + aio_req->create = true; } out: if (is_data_obj(aio_req->oid)) { - add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, create, + add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, acb->aiocb_type); } else { struct iovec iov; iov.iov_base = &s->inode; iov.iov_len = sizeof(s->inode); - add_aio_request(s, aio_req, &iov, 1, false, AIOCB_WRITE_UDATA); + add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA); } } +static void sd_detach_aio_context(BlockDriverState *bs) +{ + BDRVSheepdogState *s = bs->opaque; + + aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL); +} + +static void sd_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + BDRVSheepdogState *s = bs->opaque; + + s->aio_context = new_context; + aio_set_fd_handler(new_context, s->fd, co_read_response, NULL, s); +} + /* TODO Convert to fine grained options */ static QemuOptsList runtime_opts = { .name = "sheepdog", @@ -1387,6 +1420,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags, const char *filename; s->bs = bs; + s->aio_context = bdrv_get_aio_context(bs); opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); qemu_opts_absorb_qdict(opts, options, &local_err); @@ -1448,8 +1482,8 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags, } buf = g_malloc(SD_INODE_SIZE); - ret = read_object(fd, buf, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE, 0, - s->cache_flags); + ret = read_object(fd, s->aio_context, buf, vid_to_vdi_oid(vid), + 0, SD_INODE_SIZE, 0, s->cache_flags); closesocket(fd); @@ -1469,7 +1503,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags, g_free(buf); return 0; out: - qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL); + aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, NULL, NULL, NULL); if (s->fd >= 0) { closesocket(s->fd); } @@ -1512,7 +1546,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, hdr.copy_policy = s->inode.copy_policy; hdr.copies = s->inode.nr_copies; - ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen); + ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen); closesocket(fd); @@ -1766,7 +1800,8 @@ static void sd_close(BlockDriverState *bs) hdr.data_length = wlen; hdr.flags = SD_FLAG_CMD_WRITE; - ret = do_req(fd, (SheepdogReq *)&hdr, s->name, &wlen, &rlen); + ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, + s->name, &wlen, &rlen); closesocket(fd); @@ -1775,7 +1810,7 @@ static void sd_close(BlockDriverState *bs) error_report("%s, %s", sd_strerror(rsp->result), s->name); } - qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL); + aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, NULL, NULL, NULL); closesocket(s->fd); g_free(s->host_spec); } @@ -1812,8 +1847,9 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset) /* we don't need to update entire object */ datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id); s->inode.vdi_size = offset; - ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id), - s->inode.nr_copies, datalen, 0, false, s->cache_flags); + ret = write_object(fd, s->aio_context, (char *)&s->inode, + vid_to_vdi_oid(s->inode.vdi_id), s->inode.nr_copies, + datalen, 0, false, s->cache_flags); close(fd); if (ret < 0) { @@ -1849,9 +1885,9 @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb) iov.iov_base = &s->inode; iov.iov_len = sizeof(s->inode); aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id), - data_len, offset, 0, 0, offset); + data_len, offset, 0, false, 0, offset); QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); - add_aio_request(s, aio_req, &iov, 1, false, AIOCB_WRITE_UDATA); + add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA); acb->aio_done_func = sd_finish_aiocb; acb->aiocb_type = AIOCB_WRITE_UDATA; @@ -1882,7 +1918,8 @@ static bool sd_delete(BDRVSheepdogState *s) return false; } - ret = do_req(fd, (SheepdogReq *)&hdr, s->name, &wlen, &rlen); + ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, + s->name, &wlen, &rlen); closesocket(fd); if (ret) { return false; @@ -1939,8 +1976,8 @@ static int sd_create_branch(BDRVSheepdogState *s) goto out; } - ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies, - SD_INODE_SIZE, 0, s->cache_flags); + ret = read_object(fd, s->aio_context, buf, vid_to_vdi_oid(vid), + s->inode.nr_copies, SD_INODE_SIZE, 0, s->cache_flags); closesocket(fd); @@ -2049,7 +2086,8 @@ static int coroutine_fn sd_co_rw_vector(void *p) DPRINTF("new oid %" PRIx64 "\n", oid); } - aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, old_oid, done); + aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, create, + old_oid, done); QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); if (create) { @@ -2058,7 +2096,7 @@ static int coroutine_fn sd_co_rw_vector(void *p) } } - add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, create, + add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, acb->aiocb_type); done: offset = 0; @@ -2138,9 +2176,9 @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs) acb->aio_done_func = sd_finish_aiocb; aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id), - 0, 0, 0, 0, 0); + 0, 0, 0, false, 0, 0); QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); - add_aio_request(s, aio_req, NULL, 0, false, acb->aiocb_type); + add_aio_request(s, aio_req, NULL, 0, acb->aiocb_type); qemu_coroutine_yield(); return acb->ret; @@ -2187,8 +2225,9 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) goto cleanup; } - ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id), - s->inode.nr_copies, datalen, 0, false, s->cache_flags); + ret = write_object(fd, s->aio_context, (char *)&s->inode, + vid_to_vdi_oid(s->inode.vdi_id), s->inode.nr_copies, + datalen, 0, false, s->cache_flags); if (ret < 0) { error_report("failed to write snapshot's inode."); goto cleanup; @@ -2203,8 +2242,9 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) goto cleanup; } - ret = read_object(fd, (char *)inode, vid_to_vdi_oid(new_vid), - s->inode.nr_copies, datalen, 0, s->cache_flags); + ret = read_object(fd, s->aio_context, (char *)inode, + vid_to_vdi_oid(new_vid), s->inode.nr_copies, datalen, 0, + s->cache_flags); if (ret < 0) { error_report("failed to read new inode info. %s", strerror(errno)); @@ -2311,7 +2351,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) req.opcode = SD_OP_READ_VDIS; req.data_length = max; - ret = do_req(fd, (SheepdogReq *)&req, vdi_inuse, &wlen, &rlen); + ret = do_req(fd, s->aio_context, (SheepdogReq *)&req, + vdi_inuse, &wlen, &rlen); closesocket(fd); if (ret) { @@ -2338,7 +2379,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) } /* we don't need to read entire object */ - ret = read_object(fd, (char *)&inode, vid_to_vdi_oid(vid), + ret = read_object(fd, s->aio_context, (char *)&inode, + vid_to_vdi_oid(vid), 0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0, s->cache_flags); @@ -2403,11 +2445,11 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data, create = (offset == 0); if (load) { - ret = read_object(fd, (char *)data, vmstate_oid, + ret = read_object(fd, s->aio_context, (char *)data, vmstate_oid, s->inode.nr_copies, data_len, offset, s->cache_flags); } else { - ret = write_object(fd, (char *)data, vmstate_oid, + ret = write_object(fd, s->aio_context, (char *)data, vmstate_oid, s->inode.nr_copies, data_len, offset, create, s->cache_flags); } @@ -2580,6 +2622,9 @@ static BlockDriver bdrv_sheepdog = { .bdrv_save_vmstate = sd_save_vmstate, .bdrv_load_vmstate = sd_load_vmstate, + .bdrv_detach_aio_context = sd_detach_aio_context, + .bdrv_attach_aio_context = sd_attach_aio_context, + .create_options = sd_create_options, }; @@ -2610,6 +2655,9 @@ static BlockDriver bdrv_sheepdog_tcp = { .bdrv_save_vmstate = sd_save_vmstate, .bdrv_load_vmstate = sd_load_vmstate, + .bdrv_detach_aio_context = sd_detach_aio_context, + .bdrv_attach_aio_context = sd_attach_aio_context, + .create_options = sd_create_options, }; @@ -2640,6 +2688,9 @@ static BlockDriver bdrv_sheepdog_unix = { .bdrv_save_vmstate = sd_save_vmstate, .bdrv_load_vmstate = sd_load_vmstate, + .bdrv_detach_aio_context = sd_detach_aio_context, + .bdrv_attach_aio_context = sd_attach_aio_context, + .create_options = sd_create_options, }; |