From 86f425db3b1c4b6c4a2927eaec35627f9ab2e703 Mon Sep 17 00:00:00 2001 From: Alex Bligh Date: Fri, 5 Apr 2013 15:45:10 +0000 Subject: Xen PV backend: Move call to bdrv_new from blk_init to blk_connect This commit delays the point at which bdrv_new (and hence blk_open on the underlying device) is called from blk_init to blk_connect. This ensures that in an inbound live migrate, the block device is not opened until it has been closed at the other end. This is in preparation for supporting devices with open/close consistency without using O_DIRECT. This commit does NOT itself change O_DIRECT semantics. Signed-off-by: Alex Bligh Signed-off-by: Stefano Stabellini --- hw/xen_disk.c | 72 +++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 40 insertions(+), 32 deletions(-) (limited to 'hw/xen_disk.c') diff --git a/hw/xen_disk.c b/hw/xen_disk.c index 83329e2e69..24e8b2491a 100644 --- a/hw/xen_disk.c +++ b/hw/xen_disk.c @@ -700,7 +700,7 @@ static void blk_alloc(struct XenDevice *xendev) static int blk_init(struct XenDevice *xendev) { struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); - int index, qflags, info = 0; + int info = 0; /* read xenstore entries */ if (blkdev->params == NULL) { @@ -743,10 +743,7 @@ static int blk_init(struct XenDevice *xendev) } /* read-only ? */ - qflags = BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NATIVE_AIO; - if (strcmp(blkdev->mode, "w") == 0) { - qflags |= BDRV_O_RDWR; - } else { + if (strcmp(blkdev->mode, "w")) { info |= VDISK_READONLY; } @@ -755,6 +752,41 @@ static int blk_init(struct XenDevice *xendev) info |= VDISK_CDROM; } + blkdev->file_blk = BLOCK_SIZE; + + /* fill info + * blk_connect supplies sector-size and sectors + */ + xenstore_write_be_int(&blkdev->xendev, "feature-flush-cache", 1); + xenstore_write_be_int(&blkdev->xendev, "feature-persistent", 1); + xenstore_write_be_int(&blkdev->xendev, "info", info); + return 0; + +out_error: + g_free(blkdev->params); + blkdev->params = NULL; + g_free(blkdev->mode); + blkdev->mode = NULL; + g_free(blkdev->type); + blkdev->type = NULL; + g_free(blkdev->dev); + blkdev->dev = NULL; + g_free(blkdev->devtype); + blkdev->devtype = NULL; + return -1; +} + +static int blk_connect(struct XenDevice *xendev) +{ + struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); + int pers, index, qflags; + + /* read-only ? */ + qflags = BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NATIVE_AIO; + if (strcmp(blkdev->mode, "w") == 0) { + qflags |= BDRV_O_RDWR; + } + /* init qemu block driver */ index = (blkdev->xendev.dev - 202 * 256) / 16; blkdev->dinfo = drive_get(IF_XEN, 0, index); @@ -770,7 +802,7 @@ static int blk_init(struct XenDevice *xendev) } } if (!blkdev->bs) { - goto out_error; + return -1; } } else { /* setup via qemu cmdline -> already setup for us */ @@ -778,7 +810,6 @@ static int blk_init(struct XenDevice *xendev) blkdev->bs = blkdev->dinfo->bdrv; } bdrv_attach_dev_nofail(blkdev->bs, blkdev); - blkdev->file_blk = BLOCK_SIZE; blkdev->file_size = bdrv_getlength(blkdev->bs); if (blkdev->file_size < 0) { xen_be_printf(&blkdev->xendev, 1, "bdrv_getlength: %d (%s) | drv %s\n", @@ -792,33 +823,10 @@ static int blk_init(struct XenDevice *xendev) blkdev->type, blkdev->fileproto, blkdev->filename, blkdev->file_size, blkdev->file_size >> 20); - /* fill info */ - xenstore_write_be_int(&blkdev->xendev, "feature-flush-cache", 1); - xenstore_write_be_int(&blkdev->xendev, "feature-persistent", 1); - xenstore_write_be_int(&blkdev->xendev, "info", info); - xenstore_write_be_int(&blkdev->xendev, "sector-size", blkdev->file_blk); + /* Fill in number of sector size and number of sectors */ + xenstore_write_be_int(&blkdev->xendev, "sector-size", blkdev->file_blk); xenstore_write_be_int(&blkdev->xendev, "sectors", blkdev->file_size / blkdev->file_blk); - return 0; - -out_error: - g_free(blkdev->params); - blkdev->params = NULL; - g_free(blkdev->mode); - blkdev->mode = NULL; - g_free(blkdev->type); - blkdev->type = NULL; - g_free(blkdev->dev); - blkdev->dev = NULL; - g_free(blkdev->devtype); - blkdev->devtype = NULL; - return -1; -} - -static int blk_connect(struct XenDevice *xendev) -{ - struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); - int pers; if (xenstore_read_fe_int(&blkdev->xendev, "ring-ref", &blkdev->ring_ref) == -1) { return -1; -- cgit 1.4.1 From c1a88ad1f4ac994cd70695bf08141d161e21533e Mon Sep 17 00:00:00 2001 From: Alex Bligh Date: Fri, 5 Apr 2013 15:45:15 +0000 Subject: Xen PV backend: Disable use of O_DIRECT by default as it results in crashes. Due to what is almost certainly a kernel bug, writes with O_DIRECT may continue to reference the page after the write has been marked as completed, particularly in the case of TCP retransmit. In other scenarios, this "merely" risks data corruption on the write, but with Xen pages from domU are only transiently mapped into dom0's memory, resulting in kernel panics when they are subsequently accessed. This brings PV devices in line with emulated devices. Removing O_DIRECT is safe as barrier operations are now correctly passed through. See: http://lists.xen.org/archives/html/xen-devel/2012-12/msg01154.html for more details. Signed-off-by: Alex Bligh Signed-off-by: Stefano Stabellini --- hw/xen_disk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'hw/xen_disk.c') diff --git a/hw/xen_disk.c b/hw/xen_disk.c index 24e8b2491a..1a30f0a9ed 100644 --- a/hw/xen_disk.c +++ b/hw/xen_disk.c @@ -782,7 +782,7 @@ static int blk_connect(struct XenDevice *xendev) int pers, index, qflags; /* read-only ? */ - qflags = BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NATIVE_AIO; + qflags = BDRV_O_CACHE_WB | BDRV_O_NATIVE_AIO; if (strcmp(blkdev->mode, "w") == 0) { qflags |= BDRV_O_RDWR; } -- cgit 1.4.1 From 9246ce881128df2a69178779c1ef33c83df3c70d Mon Sep 17 00:00:00 2001 From: Felipe Franciosi Date: Fri, 5 Apr 2013 15:47:59 +0000 Subject: Allow xen guests to plug disks of 1 TiB or more The current xen backend driver implementation uses int64_t variables to store the size of the corresponding backend disk/file. It also uses an int64_t variable to store the block size of that image. When writing the number of sectors (file_size/block_size) to xenstore, however, it passes these values as 32 bit signed integers. This will cause an overflow for any disk of 1 TiB or more. This patch changes the xen backend driver to use a 64 bit integer write xenstore function. Signed-off-by: Felipe Franciosi Signed-off-by: Stefano Stabellini --- hw/xen_disk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'hw/xen_disk.c') diff --git a/hw/xen_disk.c b/hw/xen_disk.c index 1a30f0a9ed..47a51cf014 100644 --- a/hw/xen_disk.c +++ b/hw/xen_disk.c @@ -825,8 +825,8 @@ static int blk_connect(struct XenDevice *xendev) /* Fill in number of sector size and number of sectors */ xenstore_write_be_int(&blkdev->xendev, "sector-size", blkdev->file_blk); - xenstore_write_be_int(&blkdev->xendev, "sectors", - blkdev->file_size / blkdev->file_blk); + xenstore_write_be_int64(&blkdev->xendev, "sectors", + blkdev->file_size / blkdev->file_blk); if (xenstore_read_fe_int(&blkdev->xendev, "ring-ref", &blkdev->ring_ref) == -1) { return -1; -- cgit 1.4.1