summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2024-01-19 11:39:27 +0000
committerPeter Maydell <peter.maydell@linaro.org>2024-01-19 11:39:27 +0000
commite566fb85939bec58a75f22ff9d80d0bac8717806 (patch)
tree9581dd6247f94588dd16a1f026305382d26cd09c
parent88cf5fec91e50cd34bc002b633b4116228db0bc8 (diff)
parent379652e967b32ac905056bf723b54298b2f79a51 (diff)
downloadfocaccia-qemu-e566fb85939bec58a75f22ff9d80d0bac8717806.tar.gz
focaccia-qemu-e566fb85939bec58a75f22ff9d80d0bac8717806.zip
Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging
* vga: implement odd/even and byte/word/doubleword modes more accurately
* vga: implement horizontal pel panning
* KVM: add class property to configure KVM device node to use
* fix various bugs in x86 TCG PC-relative translation
* properly align huge pages on LoongArch
* cleanup patches

# -----BEGIN PGP SIGNATURE-----
#
# iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmWo8wkUHHBib256aW5p
# QHJlZGhhdC5jb20ACgkQv/vSX3jHroMhHAf+KiYvN/gxrYnpSS7IfbFqx6MtQrwW
# Hj9QsfgRF2sThRu6BveLMG7REwEtwFh7lMhUbiyR/mzuYN9cMmHDw4OE62WRrL86
# 3PAtzAaJ/Lzd8Qp4K4un+ZrFZvq8a83HvGRRTaF9wO6/9EwpqmqMTbrNlSSA08Gy
# mnfXGEt0oYitJ3JUH0MI8y6EOh1mkIhEfGPeyJaGDZVW/m4ob+QIauNOOozYN7r6
# QK+2OU0HeJC5CIzi2o5kq5U0AydVO1iAp7OBxtclYaaSvRyhlpEmbdTIKCzzPlUx
# vZthxbU2PgzUbME8fraUcd7GzT64++QOLDxNyZNEL8PCCcd0lRQ0EEukgA==
# =Uz0X
# -----END PGP SIGNATURE-----
# gpg: Signature made Thu 18 Jan 2024 09:44:41 GMT
# gpg:                using RSA key F13338574B662389866C7682BFFBD25F78C7AE83
# gpg:                issuer "pbonzini@redhat.com"
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [full]
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>" [full]
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* tag 'for-upstream' of https://gitlab.com/bonzini/qemu:
  tests/tcg: Don't #include <inttypes.h> in aarch64/system/vtimer.c
  qemu/osdep: Add huge page aligned support on LoongArch platform
  remove unnecessary casts from uintptr_t
  target/i386: pcrel: store low bits of physical address in data[0]
  target/i386: fix incorrect EIP in PC-relative translation blocks
  target/i386: Do not re-compute new pc with CF_PCREL
  io_uring: move LuringState typedef to block/aio.h
  Add class property to configure KVM device node to use
  vga: sort-of implement word and double-word access modes
  vga: use latches in odd/even mode too
  vga: reindent memory access code
  vga: optimize horizontal pel panning in 256-color modes
  vga: implement horizontal pel panning in graphics modes
  vga: mask addresses in non-VESA modes to 256k
  vga: introduce VGADisplayParams
  vga: use common endian swap macros

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--accel/kvm/kvm-all.c25
-rw-r--r--block/io_uring.c6
-rw-r--r--hw/display/cirrus_vga.c28
-rw-r--r--hw/display/vga-helpers.h121
-rw-r--r--hw/display/vga.c458
-rw-r--r--hw/display/vga_int.h18
-rw-r--r--hw/display/vga_regs.h4
-rw-r--r--hw/vfio/common.c4
-rw-r--r--include/block/aio.h8
-rw-r--r--include/block/raw-aio.h1
-rw-r--r--include/qemu/osdep.h8
-rw-r--r--include/sysemu/kvm_int.h1
-rw-r--r--qemu-options.hx8
-rw-r--r--target/i386/sev.c8
-rw-r--r--target/i386/tcg/tcg-cpu.c20
-rw-r--r--target/i386/tcg/translate.c11
-rw-r--r--tests/tcg/aarch64/system/vtimer.c2
-rw-r--r--util/fdmon-io_uring.c2
18 files changed, 433 insertions, 300 deletions
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index bbc60146d1..49e755ec4a 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2349,7 +2349,7 @@ static int kvm_init(MachineState *ms)
     QTAILQ_INIT(&s->kvm_sw_breakpoints);
 #endif
     QLIST_INIT(&s->kvm_parked_vcpus);
-    s->fd = qemu_open_old("/dev/kvm", O_RDWR);
+    s->fd = qemu_open_old(s->device ?: "/dev/kvm", O_RDWR);
     if (s->fd == -1) {
         fprintf(stderr, "Could not access KVM kernel module: %m\n");
         ret = -errno;
@@ -3585,6 +3585,24 @@ static void kvm_set_dirty_ring_size(Object *obj, Visitor *v,
     s->kvm_dirty_ring_size = value;
 }
 
+static char *kvm_get_device(Object *obj,
+                            Error **errp G_GNUC_UNUSED)
+{
+    KVMState *s = KVM_STATE(obj);
+
+    return g_strdup(s->device);
+}
+
+static void kvm_set_device(Object *obj,
+                           const char *value,
+                           Error **errp G_GNUC_UNUSED)
+{
+    KVMState *s = KVM_STATE(obj);
+
+    g_free(s->device);
+    s->device = g_strdup(value);
+}
+
 static void kvm_accel_instance_init(Object *obj)
 {
     KVMState *s = KVM_STATE(obj);
@@ -3603,6 +3621,7 @@ static void kvm_accel_instance_init(Object *obj)
     s->xen_version = 0;
     s->xen_gnttab_max_frames = 64;
     s->xen_evtchn_max_pirq = 256;
+    s->device = NULL;
 }
 
 /**
@@ -3643,6 +3662,10 @@ static void kvm_accel_class_init(ObjectClass *oc, void *data)
     object_class_property_set_description(oc, "dirty-ring-size",
         "Size of KVM dirty page ring buffer (default: 0, i.e. use bitmap)");
 
+    object_class_property_add_str(oc, "device", kvm_get_device, kvm_set_device);
+    object_class_property_set_description(oc, "device",
+        "Path to the device node to use (default: /dev/kvm)");
+
     kvm_arch_accel_class_init(oc);
 }
 
diff --git a/block/io_uring.c b/block/io_uring.c
index 7cdd00e9f1..d77ae55745 100644
--- a/block/io_uring.c
+++ b/block/io_uring.c
@@ -49,7 +49,7 @@ typedef struct LuringQueue {
     QSIMPLEQ_HEAD(, LuringAIOCB) submit_queue;
 } LuringQueue;
 
-typedef struct LuringState {
+struct LuringState {
     AioContext *aio_context;
 
     struct io_uring ring;
@@ -58,7 +58,7 @@ typedef struct LuringState {
     LuringQueue io_q;
 
     QEMUBH *completion_bh;
-} LuringState;
+};
 
 /**
  * luring_resubmit:
@@ -102,7 +102,7 @@ static void luring_resubmit_short_read(LuringState *s, LuringAIOCB *luringcb,
 
     /* Update sqe */
     luringcb->sqeq.off += nread;
-    luringcb->sqeq.addr = (__u64)(uintptr_t)luringcb->resubmit_qiov.iov;
+    luringcb->sqeq.addr = (uintptr_t)luringcb->resubmit_qiov.iov;
     luringcb->sqeq.len = luringcb->resubmit_qiov.niov;
 
     luring_resubmit(s, luringcb);
diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c
index 5dd5136a0c..150883a971 100644
--- a/hw/display/cirrus_vga.c
+++ b/hw/display/cirrus_vga.c
@@ -43,6 +43,7 @@
 #include "hw/qdev-properties.h"
 #include "migration/vmstate.h"
 #include "ui/pixel_ops.h"
+#include "vga_regs.h"
 #include "cirrus_vga_internal.h"
 #include "qom/object.h"
 #include "ui/console.h"
@@ -798,9 +799,9 @@ static int cirrus_bitblt_videotovideo_copy(CirrusVGAState * s)
     if (blit_is_unsafe(s, false))
         return 0;
 
-    return cirrus_do_copy(s, s->cirrus_blt_dstaddr - s->vga.start_addr,
-            s->cirrus_blt_srcaddr - s->vga.start_addr,
-            s->cirrus_blt_width, s->cirrus_blt_height);
+    return cirrus_do_copy(s, s->cirrus_blt_dstaddr - s->vga.params.start_addr,
+                          s->cirrus_blt_srcaddr - s->vga.params.start_addr,
+                          s->cirrus_blt_width, s->cirrus_blt_height);
 }
 
 /***************************************
@@ -1101,30 +1102,29 @@ static void cirrus_write_bitblt(CirrusVGAState * s, unsigned reg_value)
  *
  ***************************************/
 
-static void cirrus_get_offsets(VGACommonState *s1,
-                               uint32_t *pline_offset,
-                               uint32_t *pstart_addr,
-                               uint32_t *pline_compare)
+static void cirrus_get_params(VGACommonState *s1,
+                              VGADisplayParams *params)
 {
     CirrusVGAState * s = container_of(s1, CirrusVGAState, vga);
-    uint32_t start_addr, line_offset, line_compare;
+    uint32_t line_offset;
 
     line_offset = s->vga.cr[0x13]
         | ((s->vga.cr[0x1b] & 0x10) << 4);
     line_offset <<= 3;
-    *pline_offset = line_offset;
+    params->line_offset = line_offset;
 
-    start_addr = (s->vga.cr[0x0c] << 8)
+    params->start_addr = (s->vga.cr[0x0c] << 8)
         | s->vga.cr[0x0d]
         | ((s->vga.cr[0x1b] & 0x01) << 16)
         | ((s->vga.cr[0x1b] & 0x0c) << 15)
         | ((s->vga.cr[0x1d] & 0x80) << 12);
-    *pstart_addr = start_addr;
 
-    line_compare = s->vga.cr[0x18] |
+    params->line_compare = s->vga.cr[0x18] |
         ((s->vga.cr[0x07] & 0x10) << 4) |
         ((s->vga.cr[0x09] & 0x40) << 3);
-    *pline_compare = line_compare;
+
+    params->hpel = s->vga.ar[VGA_ATC_PEL];
+    params->hpel_split = s->vga.ar[VGA_ATC_MODE] & 0x20;
 }
 
 static uint32_t cirrus_get_bpp16_depth(CirrusVGAState * s)
@@ -2925,7 +2925,7 @@ void cirrus_init_common(CirrusVGAState *s, Object *owner,
     s->linear_mmio_mask = s->real_vram_size - 256;
 
     s->vga.get_bpp = cirrus_get_bpp;
-    s->vga.get_offsets = cirrus_get_offsets;
+    s->vga.get_params = cirrus_get_params;
     s->vga.get_resolution = cirrus_get_resolution;
     s->vga.cursor_invalidate = cirrus_cursor_invalidate;
     s->vga.cursor_draw_line = cirrus_cursor_draw_line;
diff --git a/hw/display/vga-helpers.h b/hw/display/vga-helpers.h
index 10e9cfd40a..2029b61791 100644
--- a/hw/display/vga-helpers.h
+++ b/hw/display/vga-helpers.h
@@ -98,17 +98,22 @@ static void vga_draw_glyph9(uint8_t *d, int linesize,
 /*
  * 4 color mode
  */
-static void vga_draw_line2(VGACommonState *vga, uint8_t *d,
-                           uint32_t addr, int width)
+static void *vga_draw_line2(VGACommonState *vga, uint8_t *d,
+                            uint32_t addr, int width, int hpel)
 {
     uint32_t plane_mask, *palette, data, v;
     int x;
 
     palette = vga->last_palette;
     plane_mask = mask16[vga->ar[VGA_ATC_PLANE_ENABLE] & 0xf];
+    hpel &= 7;
+    if (hpel) {
+        width += 8;
+        d = vga->panning_buf;
+    }
     width >>= 3;
     for(x = 0; x < width; x++) {
-        data = vga_read_dword_le(vga, addr);
+        data = vga_read_dword_le(vga, addr & (VGA_VRAM_SIZE - 1));
         data &= plane_mask;
         v = expand2[GET_PLANE(data, 0)];
         v |= expand2[GET_PLANE(data, 2)] << 2;
@@ -126,6 +131,7 @@ static void vga_draw_line2(VGACommonState *vga, uint8_t *d,
         d += 32;
         addr += 4;
     }
+    return hpel ? vga->panning_buf + 4 * hpel : NULL;
 }
 
 #define PUT_PIXEL2(d, n, v) \
@@ -134,17 +140,22 @@ static void vga_draw_line2(VGACommonState *vga, uint8_t *d,
 /*
  * 4 color mode, dup2 horizontal
  */
-static void vga_draw_line2d2(VGACommonState *vga, uint8_t *d,
-                             uint32_t addr, int width)
+static void *vga_draw_line2d2(VGACommonState *vga, uint8_t *d,
+                              uint32_t addr, int width, int hpel)
 {
     uint32_t plane_mask, *palette, data, v;
     int x;
 
     palette = vga->last_palette;
     plane_mask = mask16[vga->ar[VGA_ATC_PLANE_ENABLE] & 0xf];
+    hpel &= 7;
+    if (hpel) {
+        width += 8;
+        d = vga->panning_buf;
+    }
     width >>= 3;
     for(x = 0; x < width; x++) {
-        data = vga_read_dword_le(vga, addr);
+        data = vga_read_dword_le(vga, addr & (VGA_VRAM_SIZE - 1));
         data &= plane_mask;
         v = expand2[GET_PLANE(data, 0)];
         v |= expand2[GET_PLANE(data, 2)] << 2;
@@ -162,22 +173,28 @@ static void vga_draw_line2d2(VGACommonState *vga, uint8_t *d,
         d += 64;
         addr += 4;
     }
+    return hpel ? vga->panning_buf + 8 * hpel : NULL;
 }
 
 /*
  * 16 color mode
  */
-static void vga_draw_line4(VGACommonState *vga, uint8_t *d,
-                           uint32_t addr, int width)
+static void *vga_draw_line4(VGACommonState *vga, uint8_t *d,
+                            uint32_t addr, int width, int hpel)
 {
     uint32_t plane_mask, data, v, *palette;
     int x;
 
     palette = vga->last_palette;
     plane_mask = mask16[vga->ar[VGA_ATC_PLANE_ENABLE] & 0xf];
+    hpel &= 7;
+    if (hpel) {
+        width += 8;
+        d = vga->panning_buf;
+    }
     width >>= 3;
     for(x = 0; x < width; x++) {
-        data = vga_read_dword_le(vga, addr);
+        data = vga_read_dword_le(vga, addr & (VGA_VRAM_SIZE - 1));
         data &= plane_mask;
         v = expand4[GET_PLANE(data, 0)];
         v |= expand4[GET_PLANE(data, 1)] << 1;
@@ -194,22 +211,28 @@ static void vga_draw_line4(VGACommonState *vga, uint8_t *d,
         d += 32;
         addr += 4;
     }
+    return hpel ? vga->panning_buf + 4 * hpel : NULL;
 }
 
 /*
  * 16 color mode, dup2 horizontal
  */
-static void vga_draw_line4d2(VGACommonState *vga, uint8_t *d,
-                             uint32_t addr, int width)
+static void *vga_draw_line4d2(VGACommonState *vga, uint8_t *d,
+                              uint32_t addr, int width, int hpel)
 {
     uint32_t plane_mask, data, v, *palette;
     int x;
 
     palette = vga->last_palette;
     plane_mask = mask16[vga->ar[VGA_ATC_PLANE_ENABLE] & 0xf];
+    hpel &= 7;
+    if (hpel) {
+        width += 8;
+        d = vga->panning_buf;
+    }
     width >>= 3;
     for(x = 0; x < width; x++) {
-        data = vga_read_dword_le(vga, addr);
+        data = vga_read_dword_le(vga, addr & (VGA_VRAM_SIZE - 1));
         data &= plane_mask;
         v = expand4[GET_PLANE(data, 0)];
         v |= expand4[GET_PLANE(data, 1)] << 1;
@@ -226,6 +249,7 @@ static void vga_draw_line4d2(VGACommonState *vga, uint8_t *d,
         d += 64;
         addr += 4;
     }
+    return hpel ? vga->panning_buf + 8 * hpel : NULL;
 }
 
 /*
@@ -233,15 +257,33 @@ static void vga_draw_line4d2(VGACommonState *vga, uint8_t *d,
  *
  * XXX: add plane_mask support (never used in standard VGA modes)
  */
-static void vga_draw_line8d2(VGACommonState *vga, uint8_t *d,
-                             uint32_t addr, int width)
+static void *vga_draw_line8d2(VGACommonState *vga, uint8_t *d,
+                              uint32_t addr, int width, int hpel)
 {
     uint32_t *palette;
     int x;
 
     palette = vga->last_palette;
+    hpel = (hpel >> 1) & 3;
+
+    /* For 256 color modes, we can adjust the source address and write directly
+     * to the destination, even if horizontal pel panning is active.  However,
+     * the loop below assumes that the address does not wrap in the middle of a
+     * plane.  If that happens...
+     */
+    if (addr + (width >> 3) * 4 < VGA_VRAM_SIZE) {
+        addr += hpel * 4;
+        hpel = 0;
+    }
+
+    /* ... use the panning buffer as in planar modes.  */
+    if (hpel) {
+        width += 8;
+        d = vga->panning_buf;
+    }
     width >>= 3;
     for(x = 0; x < width; x++) {
+        addr &= VGA_VRAM_SIZE - 1;
         PUT_PIXEL2(d, 0, palette[vga_read_byte(vga, addr + 0)]);
         PUT_PIXEL2(d, 1, palette[vga_read_byte(vga, addr + 1)]);
         PUT_PIXEL2(d, 2, palette[vga_read_byte(vga, addr + 2)]);
@@ -249,6 +291,7 @@ static void vga_draw_line8d2(VGACommonState *vga, uint8_t *d,
         d += 32;
         addr += 4;
     }
+    return hpel ? vga->panning_buf + 8 * hpel : NULL;
 }
 
 /*
@@ -256,13 +299,18 @@ static void vga_draw_line8d2(VGACommonState *vga, uint8_t *d,
  *
  * XXX: add plane_mask support (never used in standard VGA modes)
  */
-static void vga_draw_line8(VGACommonState *vga, uint8_t *d,
-                           uint32_t addr, int width)
+static void *vga_draw_line8(VGACommonState *vga, uint8_t *d,
+                            uint32_t addr, int width, int hpel)
 {
     uint32_t *palette;
     int x;
 
     palette = vga->last_palette;
+    hpel = (hpel >> 1) & 3;
+    if (hpel) {
+        width += 8;
+        d = vga->panning_buf;
+    }
     width >>= 3;
     for(x = 0; x < width; x++) {
         ((uint32_t *)d)[0] = palette[vga_read_byte(vga, addr + 0)];
@@ -276,13 +324,14 @@ static void vga_draw_line8(VGACommonState *vga, uint8_t *d,
         d += 32;
         addr += 8;
     }
+    return hpel ? vga->panning_buf + 4 * hpel : NULL;
 }
 
 /*
  * 15 bit color
  */
-static void vga_draw_line15_le(VGACommonState *vga, uint8_t *d,
-                               uint32_t addr, int width)
+static void *vga_draw_line15_le(VGACommonState *vga, uint8_t *d,
+                                uint32_t addr, int width, int hpel)
 {
     int w;
     uint32_t v, r, g, b;
@@ -297,10 +346,11 @@ static void vga_draw_line15_le(VGACommonState *vga, uint8_t *d,
         addr += 2;
         d += 4;
     } while (--w != 0);
+    return NULL;
 }
 
-static void vga_draw_line15_be(VGACommonState *vga, uint8_t *d,
-                               uint32_t addr, int width)
+static void *vga_draw_line15_be(VGACommonState *vga, uint8_t *d,
+                                uint32_t addr, int width, int hpel)
 {
     int w;
     uint32_t v, r, g, b;
@@ -315,13 +365,14 @@ static void vga_draw_line15_be(VGACommonState *vga, uint8_t *d,
         addr += 2;
         d += 4;
     } while (--w != 0);
+    return NULL;
 }
 
 /*
  * 16 bit color
  */
-static void vga_draw_line16_le(VGACommonState *vga, uint8_t *d,
-                               uint32_t addr, int width)
+static void *vga_draw_line16_le(VGACommonState *vga, uint8_t *d,
+                                uint32_t addr, int width, int hpel)
 {
     int w;
     uint32_t v, r, g, b;
@@ -336,10 +387,11 @@ static void vga_draw_line16_le(VGACommonState *vga, uint8_t *d,
         addr += 2;
         d += 4;
     } while (--w != 0);
+    return NULL;
 }
 
-static void vga_draw_line16_be(VGACommonState *vga, uint8_t *d,
-                               uint32_t addr, int width)
+static void *vga_draw_line16_be(VGACommonState *vga, uint8_t *d,
+                                uint32_t addr, int width, int hpel)
 {
     int w;
     uint32_t v, r, g, b;
@@ -354,13 +406,14 @@ static void vga_draw_line16_be(VGACommonState *vga, uint8_t *d,
         addr += 2;
         d += 4;
     } while (--w != 0);
+    return NULL;
 }
 
 /*
  * 24 bit color
  */
-static void vga_draw_line24_le(VGACommonState *vga, uint8_t *d,
-                               uint32_t addr, int width)
+static void *vga_draw_line24_le(VGACommonState *vga, uint8_t *d,
+                                uint32_t addr, int width, int hpel)
 {
     int w;
     uint32_t r, g, b;
@@ -374,10 +427,11 @@ static void vga_draw_line24_le(VGACommonState *vga, uint8_t *d,
         addr += 3;
         d += 4;
     } while (--w != 0);
+    return NULL;
 }
 
-static void vga_draw_line24_be(VGACommonState *vga, uint8_t *d,
-                               uint32_t addr, int width)
+static void *vga_draw_line24_be(VGACommonState *vga, uint8_t *d,
+                                uint32_t addr, int width, int hpel)
 {
     int w;
     uint32_t r, g, b;
@@ -391,13 +445,14 @@ static void vga_draw_line24_be(VGACommonState *vga, uint8_t *d,
         addr += 3;
         d += 4;
     } while (--w != 0);
+    return NULL;
 }
 
 /*
  * 32 bit color
  */
-static void vga_draw_line32_le(VGACommonState *vga, uint8_t *d,
-                               uint32_t addr, int width)
+static void *vga_draw_line32_le(VGACommonState *vga, uint8_t *d,
+                                uint32_t addr, int width, int hpel)
 {
     int w;
     uint32_t r, g, b;
@@ -411,10 +466,11 @@ static void vga_draw_line32_le(VGACommonState *vga, uint8_t *d,
         addr += 4;
         d += 4;
     } while (--w != 0);
+    return NULL;
 }
 
-static void vga_draw_line32_be(VGACommonState *vga, uint8_t *d,
-                               uint32_t addr, int width)
+static void *vga_draw_line32_be(VGACommonState *vga, uint8_t *d,
+                                uint32_t addr, int width, int hpel)
 {
     int w;
     uint32_t r, g, b;
@@ -428,4 +484,5 @@ static void vga_draw_line32_be(VGACommonState *vga, uint8_t *d,
         addr += 4;
         d += 4;
     } while (--w != 0);
+    return NULL;
 }
diff --git a/hw/display/vga.c b/hw/display/vga.c
index 886a4020e5..bc5b83421b 100644
--- a/hw/display/vga.c
+++ b/hw/display/vga.c
@@ -47,6 +47,16 @@ bool have_vga = true;
 /* 16 state changes per vertical frame @60 Hz */
 #define VGA_TEXT_CURSOR_PERIOD_MS       (1000 * 2 * 16 / 60)
 
+/* Address mask for non-VESA modes.  */
+#define VGA_VRAM_SIZE                   (256 * KiB)
+
+/* This value corresponds to a shift of zero pixels
+ * in 9-dot text mode.  In other modes, bit 3 is undefined;
+ * we just ignore it, so that 8 corresponds to zero pixels
+ * in all modes.
+ */
+#define VGA_HPEL_NEUTRAL		8
+
 /*
  * Video Graphics Array (VGA)
  *
@@ -90,58 +100,27 @@ const uint8_t gr_mask[16] = {
     0x00, /* 0x0f */
 };
 
-#define cbswap_32(__x) \
-((uint32_t)( \
-                (((uint32_t)(__x) & (uint32_t)0x000000ffUL) << 24) | \
-                (((uint32_t)(__x) & (uint32_t)0x0000ff00UL) <<  8) | \
-                (((uint32_t)(__x) & (uint32_t)0x00ff0000UL) >>  8) | \
-                (((uint32_t)(__x) & (uint32_t)0xff000000UL) >> 24) ))
-
-#if HOST_BIG_ENDIAN
-#define PAT(x) cbswap_32(x)
-#else
-#define PAT(x) (x)
-#endif
-
-#if HOST_BIG_ENDIAN
-#define BIG 1
-#else
-#define BIG 0
-#endif
-
-#if HOST_BIG_ENDIAN
-#define GET_PLANE(data, p) (((data) >> (24 - (p) * 8)) & 0xff)
-#else
-#define GET_PLANE(data, p) (((data) >> ((p) * 8)) & 0xff)
-#endif
+#define GET_PLANE(data, p) ((cpu_to_le32(data) >> ((p) * 8)) & 0xff)
 
 static const uint32_t mask16[16] = {
-    PAT(0x00000000),
-    PAT(0x000000ff),
-    PAT(0x0000ff00),
-    PAT(0x0000ffff),
-    PAT(0x00ff0000),
-    PAT(0x00ff00ff),
-    PAT(0x00ffff00),
-    PAT(0x00ffffff),
-    PAT(0xff000000),
-    PAT(0xff0000ff),
-    PAT(0xff00ff00),
-    PAT(0xff00ffff),
-    PAT(0xffff0000),
-    PAT(0xffff00ff),
-    PAT(0xffffff00),
-    PAT(0xffffffff),
+    const_le32(0x00000000),
+    const_le32(0x000000ff),
+    const_le32(0x0000ff00),
+    const_le32(0x0000ffff),
+    const_le32(0x00ff0000),
+    const_le32(0x00ff00ff),
+    const_le32(0x00ffff00),
+    const_le32(0x00ffffff),
+    const_le32(0xff000000),
+    const_le32(0xff0000ff),
+    const_le32(0xff00ff00),
+    const_le32(0xff00ffff),
+    const_le32(0xffff0000),
+    const_le32(0xffff00ff),
+    const_le32(0xffffff00),
+    const_le32(0xffffffff),
 };
 
-#undef PAT
-
-#if HOST_BIG_ENDIAN
-#define PAT(x) (x)
-#else
-#define PAT(x) cbswap_32(x)
-#endif
-
 static uint32_t expand4[256];
 static uint16_t expand2[256];
 static uint8_t expand4to8[16];
@@ -836,45 +815,62 @@ uint32_t vga_mem_readb(VGACommonState *s, hwaddr addr)
     }
 
     if (sr(s, VGA_SEQ_MEMORY_MODE) & VGA_SR04_CHN_4M) {
-        /* chain 4 mode : simplest access */
-        assert(addr < s->vram_size);
-        ret = s->vram_ptr[addr];
-    } else if (s->gr[VGA_GFX_MODE] & 0x10) {
+        /* chain4 mode */
+        plane = addr & 3;
+        addr &= ~3;
+    } else if (s->gr[VGA_GFX_MODE] & VGA_GR05_HOST_ODD_EVEN) {
         /* odd/even mode (aka text mode mapping) */
         plane = (s->gr[VGA_GFX_PLANE_READ] & 2) | (addr & 1);
-        addr = ((addr & ~1) << 1) | plane;
-        if (addr >= s->vram_size) {
-            return 0xff;
-        }
-        ret = s->vram_ptr[addr];
     } else {
         /* standard VGA latched access */
-        if (addr * sizeof(uint32_t) >= s->vram_size) {
-            return 0xff;
-        }
-        s->latch = ((uint32_t *)s->vram_ptr)[addr];
+        plane = s->gr[VGA_GFX_PLANE_READ];
+    }
 
-        if (!(s->gr[VGA_GFX_MODE] & 0x08)) {
-            /* read mode 0 */
-            plane = s->gr[VGA_GFX_PLANE_READ];
-            ret = GET_PLANE(s->latch, plane);
-        } else {
-            /* read mode 1 */
-            ret = (s->latch ^ mask16[s->gr[VGA_GFX_COMPARE_VALUE]]) &
-                mask16[s->gr[VGA_GFX_COMPARE_MASK]];
-            ret |= ret >> 16;
-            ret |= ret >> 8;
-            ret = (~ret) & 0xff;
-        }
+    if (s->gr[VGA_GFX_MISC] & VGA_GR06_CHAIN_ODD_EVEN) {
+        addr &= ~1;
     }
+
+    /* Doubleword/word mode.  See comment in vga_mem_writeb */
+    if (s->cr[VGA_CRTC_UNDERLINE] & VGA_CR14_DW) {
+        addr >>= 2;
+    } else if ((s->gr[VGA_GFX_MODE] & VGA_GR05_HOST_ODD_EVEN) &&
+               (s->cr[VGA_CRTC_MODE] & VGA_CR17_WORD_BYTE) == 0) {
+        addr >>= 1;
+    }
+
+    if (addr * sizeof(uint32_t) >= s->vram_size) {
+        return 0xff;
+    }
+
+    if (s->sr[VGA_SEQ_MEMORY_MODE] & VGA_SR04_CHN_4M) {
+        /* chain 4 mode: simplified access (but it should use the same
+         * algorithms as below, see e.g. vga_mem_writeb's plane mask check).
+         */
+        return s->vram_ptr[(addr << 2) | plane];
+    }
+
+    s->latch = ((uint32_t *)s->vram_ptr)[addr];
+    if (!(s->gr[VGA_GFX_MODE] & 0x08)) {
+        /* read mode 0 */
+        ret = GET_PLANE(s->latch, plane);
+    } else {
+        /* read mode 1 */
+        ret = (s->latch ^ mask16[s->gr[VGA_GFX_COMPARE_VALUE]]) &
+            mask16[s->gr[VGA_GFX_COMPARE_MASK]];
+        ret |= ret >> 16;
+        ret |= ret >> 8;
+        ret = (~ret) & 0xff;
+    }
+
     return ret;
 }
 
 /* called for accesses between 0xa0000 and 0xc0000 */
 void vga_mem_writeb(VGACommonState *s, hwaddr addr, uint32_t val)
 {
-    int memory_map_mode, plane, write_mode, b, func_select, mask;
+    int memory_map_mode, write_mode, b, func_select, mask;
     uint32_t write_mask, bit_mask, set_mask;
+    int plane = 0;
 
 #ifdef DEBUG_VGA_MEM
     printf("vga: [0x" HWADDR_FMT_plx "] = 0x%02x\n", addr, val);
@@ -903,117 +899,136 @@ void vga_mem_writeb(VGACommonState *s, hwaddr addr, uint32_t val)
         break;
     }
 
+    mask = sr(s, VGA_SEQ_PLANE_WRITE);
     if (sr(s, VGA_SEQ_MEMORY_MODE) & VGA_SR04_CHN_4M) {
         /* chain 4 mode : simplest access */
         plane = addr & 3;
-        mask = (1 << plane);
-        if (sr(s, VGA_SEQ_PLANE_WRITE) & mask) {
-            assert(addr < s->vram_size);
-            s->vram_ptr[addr] = val;
-#ifdef DEBUG_VGA_MEM
-            printf("vga: chain4: [0x" HWADDR_FMT_plx "]\n", addr);
-#endif
-            s->plane_updated |= mask; /* only used to detect font change */
-            memory_region_set_dirty(&s->vram, addr, 1);
+        mask &= (1 << plane);
+        addr &= ~3;
+    } else {
+        if ((sr(s, VGA_SEQ_MEMORY_MODE) & VGA_SR04_SEQ_MODE) == 0) {
+            mask &= (addr & 1) ? 0x0a : 0x05;
         }
-    } else if (s->gr[VGA_GFX_MODE] & 0x10) {
-        /* odd/even mode (aka text mode mapping) */
-        plane = (s->gr[VGA_GFX_PLANE_READ] & 2) | (addr & 1);
-        mask = (1 << plane);
-        if (sr(s, VGA_SEQ_PLANE_WRITE) & mask) {
-            addr = ((addr & ~1) << 1) | plane;
-            if (addr >= s->vram_size) {
-                return;
-            }
-            s->vram_ptr[addr] = val;
+        if (s->gr[VGA_GFX_MISC] & VGA_GR06_CHAIN_ODD_EVEN) {
+            addr &= ~1;
+        }
+    }
+
+    /* Doubleword/word mode.  These should be honored when displaying,
+     * not when reading/writing to memory!  For example, chain4 modes
+     * use double-word mode and, on real hardware, would fetch bytes
+     * 0,1,2,3, 16,17,18,19, 32,33,34,35, etc.  Text modes use word
+     * mode and, on real hardware, would fetch bytes 0,1, 8,9, etc.
+     *
+     * QEMU instead shifted addresses on memory accesses because it
+     * allows more optimizations (e.g. chain4_alias) and simplifies
+     * the draw_line handlers. Unfortunately, there is one case where
+     * the difference shows.  When fetching font data, accesses are
+     * always in consecutive bytes, even if the text/attribute pairs
+     * are done in word mode.  Hence, doing a right shift when operating
+     * on font data is wrong.  So check the odd/even mode bits together with
+     * word mode bit.  The odd/even read bit is 0 when reading font data,
+     * and the odd/even write bit is 1 when writing it.
+     */
+    if (s->cr[VGA_CRTC_UNDERLINE] & VGA_CR14_DW) {
+        addr >>= 2;
+    } else if ((sr(s, VGA_SEQ_MEMORY_MODE) & VGA_SR04_SEQ_MODE) == 0 &&
+               (s->cr[VGA_CRTC_MODE] & VGA_CR17_WORD_BYTE) == 0) {
+        addr >>= 1;
+    }
+
+    if (addr * sizeof(uint32_t) >= s->vram_size) {
+        return;
+    }
+
+    if (sr(s, VGA_SEQ_MEMORY_MODE) & VGA_SR04_CHN_4M) {
+        if (mask) {
+            s->vram_ptr[(addr << 2) | plane] = val;
 #ifdef DEBUG_VGA_MEM
-            printf("vga: odd/even: [0x" HWADDR_FMT_plx "]\n", addr);
+            printf("vga: chain4: [0x" HWADDR_FMT_plx "]\n", addr);
 #endif
             s->plane_updated |= mask; /* only used to detect font change */
             memory_region_set_dirty(&s->vram, addr, 1);
         }
-    } else {
-        /* standard VGA latched access */
-        write_mode = s->gr[VGA_GFX_MODE] & 3;
-        switch(write_mode) {
-        default:
-        case 0:
-            /* rotate */
-            b = s->gr[VGA_GFX_DATA_ROTATE] & 7;
-            val = ((val >> b) | (val << (8 - b))) & 0xff;
-            val |= val << 8;
-            val |= val << 16;
-
-            /* apply set/reset mask */
-            set_mask = mask16[s->gr[VGA_GFX_SR_ENABLE]];
-            val = (val & ~set_mask) |
-                (mask16[s->gr[VGA_GFX_SR_VALUE]] & set_mask);
-            bit_mask = s->gr[VGA_GFX_BIT_MASK];
-            break;
-        case 1:
-            val = s->latch;
-            goto do_write;
-        case 2:
-            val = mask16[val & 0x0f];
-            bit_mask = s->gr[VGA_GFX_BIT_MASK];
-            break;
-        case 3:
-            /* rotate */
-            b = s->gr[VGA_GFX_DATA_ROTATE] & 7;
-            val = (val >> b) | (val << (8 - b));
+        return;
+    }
 
-            bit_mask = s->gr[VGA_GFX_BIT_MASK] & val;
-            val = mask16[s->gr[VGA_GFX_SR_VALUE]];
-            break;
-        }
+    /* standard VGA latched access */
+    write_mode = s->gr[VGA_GFX_MODE] & 3;
+    switch(write_mode) {
+    default:
+    case 0:
+        /* rotate */
+        b = s->gr[VGA_GFX_DATA_ROTATE] & 7;
+        val = ((val >> b) | (val << (8 - b))) & 0xff;
+        val |= val << 8;
+        val |= val << 16;
+
+        /* apply set/reset mask */
+        set_mask = mask16[s->gr[VGA_GFX_SR_ENABLE]];
+        val = (val & ~set_mask) |
+            (mask16[s->gr[VGA_GFX_SR_VALUE]] & set_mask);
+        bit_mask = s->gr[VGA_GFX_BIT_MASK];
+        break;
+    case 1:
+        val = s->latch;
+        goto do_write;
+    case 2:
+        val = mask16[val & 0x0f];
+        bit_mask = s->gr[VGA_GFX_BIT_MASK];
+        break;
+    case 3:
+        /* rotate */
+        b = s->gr[VGA_GFX_DATA_ROTATE] & 7;
+        val = (val >> b) | (val << (8 - b));
 
-        /* apply logical operation */
-        func_select = s->gr[VGA_GFX_DATA_ROTATE] >> 3;
-        switch(func_select) {
-        case 0:
-        default:
-            /* nothing to do */
-            break;
-        case 1:
-            /* and */
-            val &= s->latch;
-            break;
-        case 2:
-            /* or */
-            val |= s->latch;
-            break;
-        case 3:
-            /* xor */
-            val ^= s->latch;
-            break;
-        }
+        bit_mask = s->gr[VGA_GFX_BIT_MASK] & val;
+        val = mask16[s->gr[VGA_GFX_SR_VALUE]];
+        break;
+    }
 
-        /* apply bit mask */
-        bit_mask |= bit_mask << 8;
-        bit_mask |= bit_mask << 16;
-        val = (val & bit_mask) | (s->latch & ~bit_mask);
-
-    do_write:
-        /* mask data according to sr[2] */
-        mask = sr(s, VGA_SEQ_PLANE_WRITE);
-        s->plane_updated |= mask; /* only used to detect font change */
-        write_mask = mask16[mask];
-        if (addr * sizeof(uint32_t) >= s->vram_size) {
-            return;
-        }
-        ((uint32_t *)s->vram_ptr)[addr] =
-            (((uint32_t *)s->vram_ptr)[addr] & ~write_mask) |
-            (val & write_mask);
+    /* apply logical operation */
+    func_select = s->gr[VGA_GFX_DATA_ROTATE] >> 3;
+    switch(func_select) {
+    case 0:
+    default:
+        /* nothing to do */
+        break;
+    case 1:
+        /* and */
+        val &= s->latch;
+        break;
+    case 2:
+        /* or */
+        val |= s->latch;
+        break;
+    case 3:
+        /* xor */
+        val ^= s->latch;
+        break;
+    }
+
+    /* apply bit mask */
+    bit_mask |= bit_mask << 8;
+    bit_mask |= bit_mask << 16;
+    val = (val & bit_mask) | (s->latch & ~bit_mask);
+
+do_write:
+    /* mask data according to sr[2] */
+    s->plane_updated |= mask; /* only used to detect font change */
+    write_mask = mask16[mask];
+    ((uint32_t *)s->vram_ptr)[addr] =
+        (((uint32_t *)s->vram_ptr)[addr] & ~write_mask) |
+        (val & write_mask);
 #ifdef DEBUG_VGA_MEM
-        printf("vga: latch: [0x" HWADDR_FMT_plx "] mask=0x%08x val=0x%08x\n",
-               addr * 4, write_mask, val);
+    printf("vga: latch: [0x" HWADDR_FMT_plx "] mask=0x%08x val=0x%08x\n",
+           addr * 4, write_mask, val);
 #endif
-        memory_region_set_dirty(&s->vram, addr << 2, sizeof(uint32_t));
-    }
+    memory_region_set_dirty(&s->vram, addr << 2, sizeof(uint32_t));
 }
 
-typedef void vga_draw_line_func(VGACommonState *s1, uint8_t *d,
-                                uint32_t srcaddr, int width);
+typedef void *vga_draw_line_func(VGACommonState *s1, uint8_t *d,
+                                 uint32_t srcaddr, int width, int hpel);
 
 #include "vga-access.h"
 #include "vga-helpers.h"
@@ -1073,52 +1088,45 @@ static int update_palette256(VGACommonState *s)
     return full_update;
 }
 
-static void vga_get_offsets(VGACommonState *s,
-                            uint32_t *pline_offset,
-                            uint32_t *pstart_addr,
-                            uint32_t *pline_compare)
+static void vga_get_params(VGACommonState *s,
+                           VGADisplayParams *params)
 {
-    uint32_t start_addr, line_offset, line_compare;
-
     if (vbe_enabled(s)) {
-        line_offset = s->vbe_line_offset;
-        start_addr = s->vbe_start_addr;
-        line_compare = 65535;
+        params->line_offset = s->vbe_line_offset;
+        params->start_addr = s->vbe_start_addr;
+        params->line_compare = 65535;
+        params->hpel = VGA_HPEL_NEUTRAL;
+        params->hpel_split = false;
     } else {
         /* compute line_offset in bytes */
-        line_offset = s->cr[VGA_CRTC_OFFSET];
-        line_offset <<= 3;
+        params->line_offset = s->cr[VGA_CRTC_OFFSET] << 3;
 
         /* starting address */
-        start_addr = s->cr[VGA_CRTC_START_LO] |
+        params->start_addr = s->cr[VGA_CRTC_START_LO] |
             (s->cr[VGA_CRTC_START_HI] << 8);
 
         /* line compare */
-        line_compare = s->cr[VGA_CRTC_LINE_COMPARE] |
+        params->line_compare = s->cr[VGA_CRTC_LINE_COMPARE] |
             ((s->cr[VGA_CRTC_OVERFLOW] & 0x10) << 4) |
             ((s->cr[VGA_CRTC_MAX_SCAN] & 0x40) << 3);
+
+        params->hpel = s->ar[VGA_ATC_PEL];
+        params->hpel_split = s->ar[VGA_ATC_MODE] & 0x20;
     }
-    *pline_offset = line_offset;
-    *pstart_addr = start_addr;
-    *pline_compare = line_compare;
 }
 
 /* update start_addr and line_offset. Return TRUE if modified */
 static int update_basic_params(VGACommonState *s)
 {
     int full_update;
-    uint32_t start_addr, line_offset, line_compare;
+    VGADisplayParams current;
 
     full_update = 0;
 
-    s->get_offsets(s, &line_offset, &start_addr, &line_compare);
+    s->get_params(s, &current);
 
-    if (line_offset != s->line_offset ||
-        start_addr != s->start_addr ||
-        line_compare != s->line_compare) {
-        s->line_offset = line_offset;
-        s->start_addr = start_addr;
-        s->line_compare = line_compare;
+    if (memcmp(&current, &s->params, sizeof(current))) {
+        s->params = current;
         full_update = 1;
     }
     return full_update;
@@ -1219,7 +1227,7 @@ static void vga_draw_text(VGACommonState *s, int full_update)
     }
     full_update |= update_basic_params(s);
 
-    line_offset = s->line_offset;
+    line_offset = s->params.line_offset;
 
     vga_get_text_resolution(s, &width, &height, &cw, &cheight);
     if ((height * width) <= 1) {
@@ -1258,7 +1266,7 @@ static void vga_draw_text(VGACommonState *s, int full_update)
     }
 
     cursor_offset = ((s->cr[VGA_CRTC_CURSOR_HI] << 8) |
-                     s->cr[VGA_CRTC_CURSOR_LO]) - s->start_addr;
+                     s->cr[VGA_CRTC_CURSOR_LO]) - s->params.start_addr;
     if (cursor_offset != s->cursor_offset ||
         s->cr[VGA_CRTC_CURSOR_START] != s->cursor_start ||
         s->cr[VGA_CRTC_CURSOR_END] != s->cursor_end) {
@@ -1272,7 +1280,7 @@ static void vga_draw_text(VGACommonState *s, int full_update)
         s->cursor_start = s->cr[VGA_CRTC_CURSOR_START];
         s->cursor_end = s->cr[VGA_CRTC_CURSOR_END];
     }
-    cursor_ptr = s->vram_ptr + (s->start_addr + cursor_offset) * 4;
+    cursor_ptr = s->vram_ptr + (s->params.start_addr + cursor_offset) * 4;
     if (now >= s->cursor_blink_time) {
         s->cursor_blink_time = now + VGA_TEXT_CURSOR_PERIOD_MS / 2;
         s->cursor_visible_phase = !s->cursor_visible_phase;
@@ -1282,7 +1290,7 @@ static void vga_draw_text(VGACommonState *s, int full_update)
     linesize = surface_stride(surface);
     ch_attr_ptr = s->last_ch_attr;
     line = 0;
-    offset = s->start_addr * 4;
+    offset = s->params.start_addr * 4;
     for(cy = 0; cy < height; cy++) {
         d1 = dest;
         src = s->vram_ptr + offset;
@@ -1362,7 +1370,7 @@ static void vga_draw_text(VGACommonState *s, int full_update)
         dest += linesize * cheight;
         line1 = line + cheight;
         offset += line_offset;
-        if (line < s->line_compare && line1 >= s->line_compare) {
+        if (line < s->params.line_compare && line1 >= s->params.line_compare) {
             offset = 0;
         }
         line = line1;
@@ -1475,6 +1483,7 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
     ram_addr_t page0, page1, region_start, region_end;
     DirtyBitmapSnapshot *snap = NULL;
     int disp_width, multi_scan, multi_run;
+    int hpel;
     uint8_t *d;
     uint32_t v, addr1, addr;
     vga_draw_line_func *vga_draw_line = NULL;
@@ -1492,10 +1501,10 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
     disp_width = width;
     depth = s->get_bpp(s);
 
-    region_start = (s->start_addr * 4);
-    region_end = region_start + (ram_addr_t)s->line_offset * height;
+    region_start = (s->params.start_addr * 4);
+    region_end = region_start + (ram_addr_t)s->params.line_offset * height;
     region_end += width * depth / 8; /* scanline length */
-    region_end -= s->line_offset;
+    region_end -= s->params.line_offset;
     if (region_end > s->vbe_size || depth == 0 || depth == 15) {
         /*
          * We land here on:
@@ -1560,7 +1569,7 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
         share_surface = false;
     }
 
-    if (s->line_offset != s->last_line_offset ||
+    if (s->params.line_offset != s->last_line_offset ||
         disp_width != s->last_width ||
         height != s->last_height ||
         s->last_depth != depth ||
@@ -1571,12 +1580,15 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
         s->last_scr_height = height;
         s->last_width = disp_width;
         s->last_height = height;
-        s->last_line_offset = s->line_offset;
+        s->last_line_offset = s->params.line_offset;
         s->last_depth = depth;
         s->last_byteswap = byteswap;
+        /* 16 extra pixels are needed for double-width planar modes.  */
+        s->panning_buf = g_realloc(s->panning_buf,
+                                   (disp_width + 16) * sizeof(uint32_t));
         full_update = 1;
     }
-    if (surface_data(surface) != s->vram_ptr + (s->start_addr * 4)
+    if (surface_data(surface) != s->vram_ptr + (s->params.start_addr * 4)
         && is_buffer_shared(surface)) {
         /* base address changed (page flip) -> shared display surfaces
          * must be updated with the new base address */
@@ -1586,8 +1598,8 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
     if (full_update) {
         if (share_surface) {
             surface = qemu_create_displaysurface_from(disp_width,
-                    height, format, s->line_offset,
-                    s->vram_ptr + (s->start_addr * 4));
+                    height, format, s->params.line_offset,
+                    s->vram_ptr + (s->params.start_addr * 4));
             dpy_gfx_replace_surface(s->con, surface);
         } else {
             qemu_console_resize(s->con, disp_width, height);
@@ -1651,17 +1663,21 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
 #if 0
     printf("w=%d h=%d v=%d line_offset=%d cr[0x09]=0x%02x cr[0x17]=0x%02x linecmp=%d sr[0x01]=0x%02x\n",
            width, height, v, line_offset, s->cr[9], s->cr[VGA_CRTC_MODE],
-           s->line_compare, sr(s, VGA_SEQ_CLOCK_MODE));
+           s->params.line_compare, sr(s, VGA_SEQ_CLOCK_MODE));
 #endif
-    addr1 = (s->start_addr * 4);
+    hpel = bits <= 8 ? s->params.hpel : 0;
+    addr1 = (s->params.start_addr * 4);
     bwidth = DIV_ROUND_UP(width * bits, 8);
+    if (hpel) {
+        bwidth += 4;
+    }
     y_start = -1;
     d = surface_data(surface);
     linesize = surface_stride(surface);
     y1 = 0;
 
     if (!full_update) {
-        if (s->line_compare < height) {
+        if (s->params.line_compare < height) {
             /* split screen mode */
             region_start = 0;
         }
@@ -1702,7 +1718,11 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
             if (y_start < 0)
                 y_start = y;
             if (!(is_buffer_shared(surface))) {
-                vga_draw_line(s, d, addr, width);
+                uint8_t *p;
+                p = vga_draw_line(s, d, addr, width, hpel);
+                if (p) {
+                    memcpy(d, p, disp_width * sizeof(uint32_t));
+                }
                 if (s->cursor_draw_line)
                     s->cursor_draw_line(s, d, y);
             }
@@ -1717,15 +1737,19 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
         if (!multi_run) {
             mask = (s->cr[VGA_CRTC_MODE] & 3) ^ 3;
             if ((y1 & mask) == mask)
-                addr1 += s->line_offset;
+                addr1 += s->params.line_offset;
             y1++;
             multi_run = multi_scan;
         } else {
             multi_run--;
         }
         /* line compare acts on the displayed lines */
-        if (y == s->line_compare)
+        if (y == s->params.line_compare) {
+            if (s->params.hpel_split) {
+                hpel = VGA_HPEL_NEUTRAL;
+            }
             addr1 = 0;
+        }
         d += linesize;
     }
     if (y_start >= 0) {
@@ -1841,9 +1865,7 @@ void vga_common_reset(VGACommonState *s)
     s->graphic_mode = -1; /* force full update */
     s->shift_control = 0;
     s->double_scan = 0;
-    s->line_offset = 0;
-    s->line_compare = 0;
-    s->start_addr = 0;
+    memset(&s->params, '\0', sizeof(s->params));
     s->plane_updated = 0;
     s->last_cw = 0;
     s->last_ch = 0;
@@ -1965,7 +1987,7 @@ static void vga_update_text(void *opaque, console_ch_t *chardata)
 
         /* Update "hardware" cursor */
         cursor_offset = ((s->cr[VGA_CRTC_CURSOR_HI] << 8) |
-                         s->cr[VGA_CRTC_CURSOR_LO]) - s->start_addr;
+                         s->cr[VGA_CRTC_CURSOR_LO]) - s->params.start_addr;
         if (cursor_offset != s->cursor_offset ||
             s->cr[VGA_CRTC_CURSOR_START] != s->cursor_start ||
             s->cr[VGA_CRTC_CURSOR_END] != s->cursor_end || full_update) {
@@ -1981,7 +2003,7 @@ static void vga_update_text(void *opaque, console_ch_t *chardata)
             s->cursor_end = s->cr[VGA_CRTC_CURSOR_END];
         }
 
-        src = (uint32_t *) s->vram_ptr + s->start_addr;
+        src = (uint32_t *) s->vram_ptr + s->params.start_addr;
         dst = chardata;
 
         if (full_update) {
@@ -2226,7 +2248,7 @@ bool vga_common_init(VGACommonState *s, Object *obj, Error **errp)
     xen_register_framebuffer(&s->vram);
     s->vram_ptr = memory_region_get_ram_ptr(&s->vram);
     s->get_bpp = vga_get_bpp;
-    s->get_offsets = vga_get_offsets;
+    s->get_params = vga_get_params;
     s->get_resolution = vga_get_resolution;
     s->hw_ops = &vga_ops;
     switch (vga_retrace_method) {
diff --git a/hw/display/vga_int.h b/hw/display/vga_int.h
index 7cf0d11201..876a1d3697 100644
--- a/hw/display/vga_int.h
+++ b/hw/display/vga_int.h
@@ -56,6 +56,14 @@ struct VGACommonState;
 typedef uint8_t (* vga_retrace_fn)(struct VGACommonState *s);
 typedef void (* vga_update_retrace_info_fn)(struct VGACommonState *s);
 
+typedef struct VGADisplayParams {
+    uint32_t line_offset;
+    uint32_t start_addr;
+    uint32_t line_compare;
+    uint8_t  hpel;
+    bool     hpel_split;
+} VGADisplayParams;
+
 typedef struct VGACommonState {
     MemoryRegion *legacy_address_space;
     uint8_t *vram_ptr;
@@ -90,10 +98,7 @@ typedef struct VGACommonState {
     uint8_t palette[768];
     int32_t bank_offset;
     int (*get_bpp)(struct VGACommonState *s);
-    void (*get_offsets)(struct VGACommonState *s,
-                        uint32_t *pline_offset,
-                        uint32_t *pstart_addr,
-                        uint32_t *pline_compare);
+    void (*get_params)(struct VGACommonState *s, VGADisplayParams *params);
     void (*get_resolution)(struct VGACommonState *s,
                         int *pwidth,
                         int *pheight);
@@ -108,12 +113,11 @@ typedef struct VGACommonState {
     /* display refresh support */
     QemuConsole *con;
     uint32_t font_offsets[2];
+    uint8_t *panning_buf;
     int graphic_mode;
     uint8_t shift_control;
     uint8_t double_scan;
-    uint32_t line_offset;
-    uint32_t line_compare;
-    uint32_t start_addr;
+    VGADisplayParams params;
     uint32_t plane_updated;
     uint32_t last_line_offset;
     uint8_t last_cw, last_ch;
diff --git a/hw/display/vga_regs.h b/hw/display/vga_regs.h
index 7fdba34b9b..40e673f164 100644
--- a/hw/display/vga_regs.h
+++ b/hw/display/vga_regs.h
@@ -100,7 +100,9 @@
 
 /* VGA CRT controller bit masks */
 #define VGA_CR11_LOCK_CR0_CR7   0x80 /* lock writes to CR0 - CR7 */
+#define VGA_CR14_DW             0x40
 #define VGA_CR17_H_V_SIGNALS_ENABLED 0x80
+#define VGA_CR17_WORD_BYTE      0x40
 
 /* VGA attribute controller register indices */
 #define VGA_ATC_PALETTE0        0x00
@@ -154,6 +156,8 @@
 #define VGA_GFX_BIT_MASK        0x08
 
 /* VGA graphics controller bit masks */
+#define VGA_GR05_HOST_ODD_EVEN  0x10
 #define VGA_GR06_GRAPHICS_MODE  0x01
+#define VGA_GR06_CHAIN_ODD_EVEN 0x02
 
 #endif /* HW_VGA_REGS_H */
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 0b3352f2a9..4aa86f563c 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1000,7 +1000,7 @@ vfio_device_feature_dma_logging_start_create(VFIOContainerBase *bcontainer,
         return NULL;
     }
 
-    control->ranges = (__u64)(uintptr_t)ranges;
+    control->ranges = (uintptr_t)ranges;
     if (tracking->max32) {
         ranges->iova = tracking->min32;
         ranges->length = (tracking->max32 - tracking->min32) + 1;
@@ -1126,7 +1126,7 @@ static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova,
     report->iova = iova;
     report->length = size;
     report->page_size = qemu_real_host_page_size();
-    report->bitmap = (__u64)(uintptr_t)bitmap;
+    report->bitmap = (uintptr_t)bitmap;
 
     feature->argsz = sizeof(buf);
     feature->flags = VFIO_DEVICE_FEATURE_GET |
diff --git a/include/block/aio.h b/include/block/aio.h
index c802a392e5..5d0a114988 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -55,7 +55,7 @@ typedef void IOHandler(void *opaque);
 
 struct ThreadPool;
 struct LinuxAioState;
-struct LuringState;
+typedef struct LuringState LuringState;
 
 /* Is polling disabled? */
 bool aio_poll_disabled(AioContext *ctx);
@@ -212,7 +212,7 @@ struct AioContext {
     struct LinuxAioState *linux_aio;
 #endif
 #ifdef CONFIG_LINUX_IO_URING
-    struct LuringState *linux_io_uring;
+    LuringState *linux_io_uring;
 
     /* State for file descriptor monitoring using Linux io_uring */
     struct io_uring fdmon_io_uring;
@@ -504,10 +504,10 @@ struct LinuxAioState *aio_setup_linux_aio(AioContext *ctx, Error **errp);
 struct LinuxAioState *aio_get_linux_aio(AioContext *ctx);
 
 /* Setup the LuringState bound to this AioContext */
-struct LuringState *aio_setup_linux_io_uring(AioContext *ctx, Error **errp);
+LuringState *aio_setup_linux_io_uring(AioContext *ctx, Error **errp);
 
 /* Return the LuringState bound to this AioContext */
-struct LuringState *aio_get_linux_io_uring(AioContext *ctx);
+LuringState *aio_get_linux_io_uring(AioContext *ctx);
 /**
  * aio_timer_new_with_attrs:
  * @ctx: the aio context
diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h
index 0f63c2800c..20e000b8ef 100644
--- a/include/block/raw-aio.h
+++ b/include/block/raw-aio.h
@@ -65,7 +65,6 @@ void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context);
 #endif
 /* io_uring.c - Linux io_uring implementation */
 #ifdef CONFIG_LINUX_IO_URING
-typedef struct LuringState LuringState;
 LuringState *luring_init(Error **errp);
 void luring_cleanup(LuringState *s);
 
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index 9a405bed89..c9692cc314 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -547,6 +547,14 @@ int madvise(char *, size_t, int);
 #  define QEMU_VMALLOC_ALIGN (256 * 4096)
 #elif defined(__linux__) && defined(__sparc__)
 #  define QEMU_VMALLOC_ALIGN MAX(qemu_real_host_page_size(), SHMLBA)
+#elif defined(__linux__) && defined(__loongarch__)
+   /*
+    * For transparent hugepage optimization, it has better be huge page
+    * aligned. LoongArch host system supports two kinds of pagesize: 4K
+    * and 16K, here calculate huge page size from host page size
+    */
+#  define QEMU_VMALLOC_ALIGN (qemu_real_host_page_size() * \
+                         qemu_real_host_page_size() / sizeof(long))
 #else
 #  define QEMU_VMALLOC_ALIGN qemu_real_host_page_size()
 #endif
diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
index fd846394be..882e37e12c 100644
--- a/include/sysemu/kvm_int.h
+++ b/include/sysemu/kvm_int.h
@@ -120,6 +120,7 @@ struct KVMState
     uint32_t xen_caps;
     uint16_t xen_gnttab_max_frames;
     uint16_t xen_evtchn_max_pirq;
+    char *device;
 };
 
 void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
diff --git a/qemu-options.hx b/qemu-options.hx
index 1912b19cb8..ab5cd845bd 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -190,7 +190,8 @@ DEF("accel", HAS_ARG, QEMU_OPTION_accel,
     "                dirty-ring-size=n (KVM dirty ring GFN count, default 0)\n"
     "                eager-split-size=n (KVM Eager Page Split chunk size, default 0, disabled. ARM only)\n"
     "                notify-vmexit=run|internal-error|disable,notify-window=n (enable notify VM exit and set notify window, x86 only)\n"
-    "                thread=single|multi (enable multi-threaded TCG)\n", QEMU_ARCH_ALL)
+    "                thread=single|multi (enable multi-threaded TCG)\n"
+    "                device=path (KVM device path, default /dev/kvm)\n", QEMU_ARCH_ALL)
 SRST
 ``-accel name[,prop=value[,...]]``
     This is used to enable an accelerator. Depending on the target
@@ -271,6 +272,11 @@ SRST
         open up for a specified of time (i.e. notify-window).
         Default: notify-vmexit=run,notify-window=0.
 
+    ``device=path``
+        Sets the path to the KVM device node. Defaults to ``/dev/kvm``. This
+        option can be used to pass the KVM device to use via a file descriptor
+        by setting the value to ``/dev/fdset/NN``.
+
 ERST
 
 DEF("smp", HAS_ARG, QEMU_OPTION_smp,
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 9a71246682..173de91afe 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -167,7 +167,7 @@ sev_ioctl(int fd, int cmd, void *data, int *error)
 
     input.id = cmd;
     input.sev_fd = fd;
-    input.data = (__u64)(unsigned long)data;
+    input.data = (uintptr_t)data;
 
     r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, &input);
 
@@ -240,7 +240,7 @@ sev_ram_block_added(RAMBlockNotifier *n, void *host, size_t size,
         return;
     }
 
-    range.addr = (__u64)(unsigned long)host;
+    range.addr = (uintptr_t)host;
     range.size = max_size;
 
     trace_kvm_memcrypt_register_region(host, max_size);
@@ -270,7 +270,7 @@ sev_ram_block_removed(RAMBlockNotifier *n, void *host, size_t size,
         return;
     }
 
-    range.addr = (__u64)(unsigned long)host;
+    range.addr = (uintptr_t)host;
     range.size = max_size;
 
     trace_kvm_memcrypt_unregister_region(host, max_size);
@@ -767,7 +767,7 @@ sev_launch_update_data(SevGuestState *sev, uint8_t *addr, uint64_t len)
         return 1;
     }
 
-    update.uaddr = (__u64)(unsigned long)addr;
+    update.uaddr = (uintptr_t)addr;
     update.len = len;
     trace_kvm_sev_launch_update_data(addr, len);
     ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA,
diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c
index 6e881e9e27..1d54164bdf 100644
--- a/target/i386/tcg/tcg-cpu.c
+++ b/target/i386/tcg/tcg-cpu.c
@@ -68,14 +68,26 @@ static void x86_restore_state_to_opc(CPUState *cs,
     X86CPU *cpu = X86_CPU(cs);
     CPUX86State *env = &cpu->env;
     int cc_op = data[1];
+    uint64_t new_pc;
 
     if (tb_cflags(tb) & CF_PCREL) {
-        env->eip = (env->eip & TARGET_PAGE_MASK) | data[0];
-    } else if (tb->flags & HF_CS64_MASK) {
-        env->eip = data[0];
+        /*
+         * data[0] in PC-relative TBs is also a linear address, i.e. an address with
+         * the CS base added, because it is not guaranteed that EIP bits 12 and higher
+         * stay the same across the translation block.  Add the CS base back before
+         * replacing the low bits, and subtract it below just like for !CF_PCREL.
+         */
+        uint64_t pc = env->eip + tb->cs_base;
+        new_pc = (pc & TARGET_PAGE_MASK) | data[0];
     } else {
-        env->eip = (uint32_t)(data[0] - tb->cs_base);
+        new_pc = data[0];
     }
+    if (tb->flags & HF_CS64_MASK) {
+        env->eip = new_pc;
+    } else {
+        env->eip = (uint32_t)(new_pc - tb->cs_base);
+    }
+
     if (cc_op != CC_OP_DYNAMIC) {
         env->cc_op = cc_op;
     }
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index e1eb82a5c6..e193c74472 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -567,9 +567,9 @@ static void gen_update_eip_next(DisasContext *s)
     if (tb_cflags(s->base.tb) & CF_PCREL) {
         tcg_gen_addi_tl(cpu_eip, cpu_eip, s->pc - s->pc_save);
     } else if (CODE64(s)) {
-        tcg_gen_movi_tl(cpu_eip, s->base.pc_next);
+        tcg_gen_movi_tl(cpu_eip, s->pc);
     } else {
-        tcg_gen_movi_tl(cpu_eip, (uint32_t)(s->base.pc_next - s->cs_base));
+        tcg_gen_movi_tl(cpu_eip, (uint32_t)(s->pc - s->cs_base));
     }
     s->pc_save = s->pc;
 }
@@ -2866,10 +2866,6 @@ static void gen_jmp_rel(DisasContext *s, MemOp ot, int diff, int tb_num)
         }
     }
     new_eip &= mask;
-    new_pc = new_eip + s->cs_base;
-    if (!CODE64(s)) {
-        new_pc = (uint32_t)new_pc;
-    }
 
     gen_update_cc_op(s);
     set_cc_op(s, CC_OP_DYNAMIC);
@@ -2885,6 +2881,8 @@ static void gen_jmp_rel(DisasContext *s, MemOp ot, int diff, int tb_num)
             tcg_gen_andi_tl(cpu_eip, cpu_eip, mask);
             use_goto_tb = false;
         }
+    } else if (!CODE64(s)) {
+        new_pc = (uint32_t)(new_eip + s->cs_base);
     }
 
     if (use_goto_tb && translator_use_goto_tb(&s->base, new_pc)) {
@@ -6998,7 +6996,6 @@ static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
 
     dc->prev_insn_end = tcg_last_op();
     if (tb_cflags(dcbase->tb) & CF_PCREL) {
-        pc_arg -= dc->cs_base;
         pc_arg &= ~TARGET_PAGE_MASK;
     }
     tcg_gen_insn_start(pc_arg, dc->cc_op);
diff --git a/tests/tcg/aarch64/system/vtimer.c b/tests/tcg/aarch64/system/vtimer.c
index 42f2f7796c..7d725eced3 100644
--- a/tests/tcg/aarch64/system/vtimer.c
+++ b/tests/tcg/aarch64/system/vtimer.c
@@ -6,7 +6,7 @@
  * SPDX-License-Identifier: GPL-2.0-or-later
  */
 
-#include <inttypes.h>
+#include <stdint.h>
 #include <minilib.h>
 
 /* grabbed from Linux */
diff --git a/util/fdmon-io_uring.c b/util/fdmon-io_uring.c
index 16054c5ede..b0d68bdc44 100644
--- a/util/fdmon-io_uring.c
+++ b/util/fdmon-io_uring.c
@@ -180,7 +180,7 @@ static void add_poll_remove_sqe(AioContext *ctx, AioHandler *node)
     struct io_uring_sqe *sqe = get_sqe(ctx);
 
 #ifdef LIBURING_HAVE_DATA64
-    io_uring_prep_poll_remove(sqe, (__u64)(uintptr_t)node);
+    io_uring_prep_poll_remove(sqe, (uintptr_t)node);
 #else
     io_uring_prep_poll_remove(sqe, node);
 #endif