summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--Makefile8
-rw-r--r--Makefile.target6
-rw-r--r--accel/kvm/kvm-all.c39
-rw-r--r--accel/tcg/translate-all.c2
-rw-r--r--chardev/char-socket.c19
-rwxr-xr-xconfigure12
-rw-r--r--default-configs/ppc-linux-user.mak1
-rw-r--r--default-configs/ppc-softmmu.mak1
-rw-r--r--default-configs/ppc64-linux-user.mak1
-rw-r--r--default-configs/ppc64-softmmu.mak1
-rw-r--r--default-configs/ppc64abi32-linux-user.mak1
-rw-r--r--default-configs/ppc64le-linux-user.mak1
-rw-r--r--default-configs/ppcemb-softmmu.mak1
-rw-r--r--disas.c1
-rw-r--r--docs/devel/loads-stores.rst396
-rw-r--r--exec.c109
-rw-r--r--hw/acpi/tco.c11
-rw-r--r--hw/acpi/trace-events4
-rw-r--r--hw/core/qdev.c32
-rw-r--r--hw/display/cirrus_vga.c6
-rw-r--r--hw/display/vga.c33
-rw-r--r--hw/i386/pc.c7
-rw-r--r--hw/ide/core.c1
-rw-r--r--hw/ide/qdev.c1
-rw-r--r--hw/scsi/scsi-disk.c28
-rw-r--r--include/disas/bfd.h1
-rw-r--r--include/hw/ide/internal.h8
-rw-r--r--include/hw/qdev-core.h1
-rw-r--r--include/qemu/typedefs.h1
-rw-r--r--include/ui/console.h39
-rw-r--r--include/ui/egl-helpers.h7
-rw-r--r--include/ui/gtk.h2
-rw-r--r--include/ui/sdl2.h2
-rw-r--r--include/ui/shader.h14
-rw-r--r--include/ui/spice-display.h2
-rw-r--r--libdecnumber/Makefile.objs5
-rw-r--r--linux-user/arm/target_cpu.h4
-rw-r--r--linux-user/elfload.c2
-rw-r--r--linux-user/main.c45
-rw-r--r--linux-user/signal.c43
-rw-r--r--linux-user/strace.c4
-rw-r--r--linux-user/syscall.c3
-rw-r--r--linux-user/syscall_defs.h43
-rw-r--r--memory.c18
-rwxr-xr-xscripts/checkpatch.pl19
-rw-r--r--scsi/qemu-pr-helper.c17
-rw-r--r--target/i386/translate.c257
-rw-r--r--target/mips/mips-defs.h6
-rw-r--r--target/nios2/cpu.h6
-rw-r--r--target/nios2/translate.c1
-rw-r--r--target/ppc/Makefile.objs1
-rw-r--r--target/sh4/cpu.h6
-rw-r--r--ui/console-gl.c45
-rw-r--r--ui/console.c33
-rw-r--r--ui/egl-headless.c59
-rw-r--r--ui/egl-helpers.c73
-rw-r--r--ui/gtk-egl.c2
-rw-r--r--ui/gtk-gl-area.c2
-rw-r--r--ui/sdl2-gl.c4
-rw-r--r--ui/shader.c57
-rw-r--r--ui/shader/texture-blit-flip.vert10
-rw-r--r--ui/spice-display.c2
62 files changed, 1227 insertions, 339 deletions
diff --git a/Makefile b/Makefile
index 90f91e54eb..9372742f86 100644
--- a/Makefile
+++ b/Makefile
@@ -380,7 +380,7 @@ DTC_CPPFLAGS=-I$(BUILD_DIR)/dtc -I$(SRC_PATH)/dtc -I$(SRC_PATH)/dtc/libfdt
 subdir-dtc: .git-submodule-status dtc/libfdt dtc/tests
 	$(call quiet-command,$(MAKE) $(DTC_MAKE_ARGS) CPPFLAGS="$(DTC_CPPFLAGS)" CFLAGS="$(DTC_CFLAGS)" LDFLAGS="$(LDFLAGS)" ARFLAGS="$(ARFLAGS)" CC="$(CC)" AR="$(AR)" LD="$(LD)" $(SUBDIR_MAKEFLAGS) libfdt/libfdt.a,)
 
-dtc/%:
+dtc/%: .git-submodule-status
 	mkdir -p $@
 
 $(SUBDIR_RULES): libqemuutil.a $(common-obj-y) $(chardev-obj-y) \
@@ -724,8 +724,10 @@ ui/shader/%-frag.h: $(SRC_PATH)/ui/shader/%.frag $(SRC_PATH)/scripts/shaderinclu
 		perl $(SRC_PATH)/scripts/shaderinclude.pl $< > $@,\
 		"FRAG","$@")
 
-ui/console-gl.o: $(SRC_PATH)/ui/console-gl.c \
-	ui/shader/texture-blit-vert.h ui/shader/texture-blit-frag.h
+ui/shader.o: $(SRC_PATH)/ui/shader.c \
+	ui/shader/texture-blit-vert.h \
+	ui/shader/texture-blit-flip-vert.h \
+	ui/shader/texture-blit-frag.h
 
 # documentation
 MAKEINFO=makeinfo
diff --git a/Makefile.target b/Makefile.target
index a4b292d2e2..e4244c188a 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -102,12 +102,6 @@ obj-y += target/$(TARGET_BASE_ARCH)/
 obj-y += disas.o
 obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o
 
-obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/decContext.o
-obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/decNumber.o
-obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/dpd/decimal32.o
-obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/dpd/decimal64.o
-obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/dpd/decimal128.o
-
 #########################################################
 # Linux user emulator target
 
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 90c88b517d..f290f487a5 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -197,26 +197,20 @@ static hwaddr kvm_align_section(MemoryRegionSection *section,
                                 hwaddr *start)
 {
     hwaddr size = int128_get64(section->size);
-    hwaddr delta;
-
-    *start = section->offset_within_address_space;
+    hwaddr delta, aligned;
 
     /* kvm works in page size chunks, but the function may be called
        with sub-page size and unaligned start address. Pad the start
        address to next and truncate size to previous page boundary. */
-    delta = qemu_real_host_page_size - (*start & ~qemu_real_host_page_mask);
-    delta &= ~qemu_real_host_page_mask;
-    *start += delta;
+    aligned = ROUND_UP(section->offset_within_address_space,
+                       qemu_real_host_page_size);
+    delta = aligned - section->offset_within_address_space;
+    *start = aligned;
     if (delta > size) {
         return 0;
     }
-    size -= delta;
-    size &= qemu_real_host_page_mask;
-    if (*start & ~qemu_real_host_page_mask) {
-        return 0;
-    }
 
-    return size;
+    return (size - delta) & qemu_real_host_page_mask;
 }
 
 int kvm_physical_memory_addr_from_host(KVMState *s, void *ram,
@@ -394,8 +388,8 @@ static int kvm_section_update_flags(KVMMemoryListener *kml,
 
     mem = kvm_lookup_matching_slot(kml, start_addr, size);
     if (!mem) {
-        fprintf(stderr, "%s: error finding slot\n", __func__);
-        abort();
+        /* We don't have a slot if we want to trap every access. */
+        return 0;
     }
 
     return kvm_slot_update_flags(kml, mem, section->mr);
@@ -470,8 +464,8 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml,
     if (size) {
         mem = kvm_lookup_matching_slot(kml, start_addr, size);
         if (!mem) {
-            fprintf(stderr, "%s: error finding slot\n", __func__);
-            abort();
+            /* We don't have a slot if we want to trap every access. */
+            return 0;
         }
 
         /* XXX bad kernel interface alert
@@ -717,11 +711,12 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
         return;
     }
 
+    /* use aligned delta to align the ram address */
     ram = memory_region_get_ram_ptr(mr) + section->offset_within_region +
-          (section->offset_within_address_space - start_addr);
+          (start_addr - section->offset_within_address_space);
 
-    mem = kvm_lookup_matching_slot(kml, start_addr, size);
     if (!add) {
+        mem = kvm_lookup_matching_slot(kml, start_addr, size);
         if (!mem) {
             return;
         }
@@ -733,19 +728,13 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
         mem->memory_size = 0;
         err = kvm_set_user_memory_region(kml, mem);
         if (err) {
-            fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
+            fprintf(stderr, "%s: error unregistering slot: %s\n",
                     __func__, strerror(-err));
             abort();
         }
         return;
     }
 
-    if (mem) {
-        /* update the slot */
-        kvm_slot_update_flags(kml, mem, mr);
-        return;
-    }
-
     /* register the new slot */
     mem = kvm_alloc_slot(kml);
     mem->memory_size = size;
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index c5ce99d549..1b43deb0cd 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -2114,7 +2114,7 @@ void page_set_flags(target_ulong start, target_ulong end, int flags)
        guest address space.  If this assert fires, it probably indicates
        a missing call to h2g_valid.  */
 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
-    assert(end < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
+    assert(end <= ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
 #endif
     assert(start < end);
     assert_memory_lock();
diff --git a/chardev/char-socket.c b/chardev/char-socket.c
index e65148fe97..53eda8ef00 100644
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -332,10 +332,6 @@ static void tcp_chr_free_connection(Chardev *chr)
     SocketChardev *s = SOCKET_CHARDEV(chr);
     int i;
 
-    if (!s->connected) {
-        return;
-    }
-
     if (s->read_msgfds_num) {
         for (i = 0; i < s->read_msgfds_num; i++) {
             close(s->read_msgfds[i]);
@@ -394,22 +390,25 @@ static void update_disconnected_filename(SocketChardev *s)
                                          s->is_listen, s->is_telnet);
 }
 
+/* NB may be called even if tcp_chr_connect has not been
+ * reached, due to TLS or telnet initialization failure,
+ * so can *not* assume s->connected == true
+ */
 static void tcp_chr_disconnect(Chardev *chr)
 {
     SocketChardev *s = SOCKET_CHARDEV(chr);
-
-    if (!s->connected) {
-        return;
-    }
+    bool emit_close = s->connected;
 
     tcp_chr_free_connection(chr);
 
-    if (s->listen_ioc) {
+    if (s->listen_ioc && s->listen_tag == 0) {
         s->listen_tag = qio_channel_add_watch(
             QIO_CHANNEL(s->listen_ioc), G_IO_IN, tcp_chr_accept, chr, NULL);
     }
     update_disconnected_filename(s);
-    qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
+    if (emit_close) {
+        qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
+    }
     if (s->reconnect_time) {
         qemu_chr_socket_restart_timer(chr);
     }
diff --git a/configure b/configure
index 663e9081ea..6f21aaf989 100755
--- a/configure
+++ b/configure
@@ -3335,9 +3335,17 @@ if test "$mpath" != "no" ; then
 #include <mpath_persist.h>
 unsigned mpath_mx_alloc_len = 1024;
 int logsink;
+static struct config *multipath_conf;
+extern struct udev *udev;
+extern struct config *get_multipath_config(void);
+extern void put_multipath_config(struct config *conf);
+struct udev *udev;
+struct config *get_multipath_config(void) { return multipath_conf; }
+void put_multipath_config(struct config *conf) { }
+
 int main(void) {
-    struct udev *udev = udev_new();
-    mpath_lib_init(udev);
+    udev = udev_new();
+    multipath_conf = mpath_lib_init();
     return 0;
 }
 EOF
diff --git a/default-configs/ppc-linux-user.mak b/default-configs/ppc-linux-user.mak
index 260ba41836..6273df2930 100644
--- a/default-configs/ppc-linux-user.mak
+++ b/default-configs/ppc-linux-user.mak
@@ -1,2 +1 @@
 # Default configuration for ppc-linux-user
-CONFIG_LIBDECNUMBER=y
diff --git a/default-configs/ppc-softmmu.mak b/default-configs/ppc-softmmu.mak
index d7a3755881..bb225c6e46 100644
--- a/default-configs/ppc-softmmu.mak
+++ b/default-configs/ppc-softmmu.mak
@@ -46,7 +46,6 @@ CONFIG_E500=y
 CONFIG_OPENPIC_KVM=$(call land,$(CONFIG_E500),$(CONFIG_KVM))
 CONFIG_PLATFORM_BUS=y
 CONFIG_ETSEC=y
-CONFIG_LIBDECNUMBER=y
 CONFIG_SM501=y
 # For PReP
 CONFIG_SERIAL_ISA=y
diff --git a/default-configs/ppc64-linux-user.mak b/default-configs/ppc64-linux-user.mak
index e731ce016e..422d3fbaeb 100644
--- a/default-configs/ppc64-linux-user.mak
+++ b/default-configs/ppc64-linux-user.mak
@@ -1,2 +1 @@
 # Default configuration for ppc64-linux-user
-CONFIG_LIBDECNUMBER=y
diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
index 9086475bf6..d1b3a6dd50 100644
--- a/default-configs/ppc64-softmmu.mak
+++ b/default-configs/ppc64-softmmu.mak
@@ -51,7 +51,6 @@ CONFIG_E500=y
 CONFIG_OPENPIC_KVM=$(call land,$(CONFIG_E500),$(CONFIG_KVM))
 CONFIG_PLATFORM_BUS=y
 CONFIG_ETSEC=y
-CONFIG_LIBDECNUMBER=y
 CONFIG_SM501=y
 # For pSeries
 CONFIG_XICS=$(CONFIG_PSERIES)
diff --git a/default-configs/ppc64abi32-linux-user.mak b/default-configs/ppc64abi32-linux-user.mak
index c244d07d56..1c657ec9bb 100644
--- a/default-configs/ppc64abi32-linux-user.mak
+++ b/default-configs/ppc64abi32-linux-user.mak
@@ -1,2 +1 @@
 # Default configuration for ppc64abi32-linux-user
-CONFIG_LIBDECNUMBER=y
diff --git a/default-configs/ppc64le-linux-user.mak b/default-configs/ppc64le-linux-user.mak
index 4ba4eae6ce..63f4269023 100644
--- a/default-configs/ppc64le-linux-user.mak
+++ b/default-configs/ppc64le-linux-user.mak
@@ -1,2 +1 @@
 # Default configuration for ppc64le-linux-user
-CONFIG_LIBDECNUMBER=y
diff --git a/default-configs/ppcemb-softmmu.mak b/default-configs/ppcemb-softmmu.mak
index 635923a166..13917fb7a3 100644
--- a/default-configs/ppcemb-softmmu.mak
+++ b/default-configs/ppcemb-softmmu.mak
@@ -15,5 +15,4 @@ CONFIG_PTIMER=y
 CONFIG_I8259=y
 CONFIG_XILINX=y
 CONFIG_XILINX_ETHLITE=y
-CONFIG_LIBDECNUMBER=y
 CONFIG_SM501=y
diff --git a/disas.c b/disas.c
index d6a1eb9c8e..54eea3f9c9 100644
--- a/disas.c
+++ b/disas.c
@@ -190,7 +190,6 @@ void target_disas(FILE *out, CPUState *cpu, target_ulong code,
 
     s.cpu = cpu;
     s.info.read_memory_func = target_read_memory;
-    s.info.read_memory_inner_func = NULL;
     s.info.buffer_vma = code;
     s.info.buffer_length = size;
     s.info.print_address_func = generic_print_address;
diff --git a/docs/devel/loads-stores.rst b/docs/devel/loads-stores.rst
new file mode 100644
index 0000000000..6a990cc243
--- /dev/null
+++ b/docs/devel/loads-stores.rst
@@ -0,0 +1,396 @@
+..
+   Copyright (c) 2017 Linaro Limited
+   Written by Peter Maydell
+
+===================
+Load and Store APIs
+===================
+
+QEMU internally has multiple families of functions for performing
+loads and stores. This document attempts to enumerate them all
+and indicate when to use them. It does not provide detailed
+documentation of each API -- for that you should look at the
+documentation comments in the relevant header files.
+
+
+``ld*_p and st*_p``
+~~~~~~~~~~~~~~~~~~~
+
+These functions operate on a host pointer, and should be used
+when you already have a pointer into host memory (corresponding
+to guest ram or a local buffer). They deal with doing accesses
+with the desired endianness and with correctly handling
+potentially unaligned pointer values.
+
+Function names follow the pattern:
+
+load: ``ld{type}{sign}{size}_{endian}_p(ptr)``
+
+store: ``st{type}{size}_{endian}_p(ptr, val)``
+
+``type``
+ - (empty) : integer access
+ - ``f`` : float access
+
+``sign``
+ - (empty) : for 32 or 64 bit sizes (including floats and doubles)
+ - ``u`` : unsigned
+ - ``s`` : signed
+
+``size``
+ - ``b`` : 8 bits
+ - ``w`` : 16 bits
+ - ``l`` : 32 bits
+ - ``q`` : 64 bits
+
+``endian``
+ - ``he`` : host endian
+ - ``be`` : big endian
+ - ``le`` : little endian
+
+The ``_{endian}`` infix is omitted for target-endian accesses.
+
+The target endian accessors are only available to source
+files which are built per-target.
+
+Regexes for git grep
+ - ``\<ldf\?[us]\?[bwlq]\(_[hbl]e\)\?_p\>``
+ - ``\<stf\?[bwlq]\(_[hbl]e\)\?_p\>``
+
+``cpu_{ld,st}_*``
+~~~~~~~~~~~~~~~~~
+
+These functions operate on a guest virtual address. Be aware
+that these functions may cause a guest CPU exception to be
+taken (e.g. for an alignment fault or MMU fault) which will
+result in guest CPU state being updated and control longjumping
+out of the function call. They should therefore only be used
+in code that is implementing emulation of the target CPU.
+
+These functions may throw an exception (longjmp() back out
+to the top level TCG loop). This means they must only be used
+from helper functions where the translator has saved all
+necessary CPU state before generating the helper function call.
+It's usually better to use the ``_ra`` variants described below
+from helper functions, but these functions are the right choice
+for calls made from hooks like the CPU do_interrupt hook or
+when you know for certain that the translator had to save all
+the CPU state that ``cpu_restore_state()`` would restore anyway.
+
+Function names follow the pattern:
+
+load: ``cpu_ld{sign}{size}_{mmusuffix}(env, ptr)``
+
+store: ``cpu_st{size}_{mmusuffix}(env, ptr, val)``
+
+``sign``
+ - (empty) : for 32 or 64 bit sizes
+ - ``u`` : unsigned
+ - ``s`` : signed
+
+``size``
+ - ``b`` : 8 bits
+ - ``w`` : 16 bits
+ - ``l`` : 32 bits
+ - ``q`` : 64 bits
+
+``mmusuffix`` is one of the generic suffixes ``data`` or ``code``, or
+(for softmmu configs) a target-specific MMU mode suffix as defined
+in the target's ``cpu.h``.
+
+Regexes for git grep
+ - ``\<cpu_ld[us]\?[bwlq]_[a-zA-Z0-9]\+\>``
+ - ``\<cpu_st[bwlq]_[a-zA-Z0-9]\+\>``
+
+``cpu_{ld,st}_*_ra``
+~~~~~~~~~~~~~~~~~~~~
+
+These functions work like the ``cpu_{ld,st}_*`` functions except
+that they also take a ``retaddr`` argument. This extra argument
+allows for correct unwinding of any exception that is taken,
+and should generally be the result of GETPC() called directly
+from the top level HELPER(foo) function (i.e. the return address
+in the generated code).
+
+These are generally the preferred way to do accesses by guest
+virtual address from helper functions; see the documentation
+of the non-``_ra`` variants for when those would be better.
+
+Calling these functions with a ``retaddr`` argument of 0 is
+equivalent to calling the non-``_ra`` version of the function.
+
+Function names follow the pattern:
+
+load: ``cpu_ld{sign}{size}_{mmusuffix}_ra(env, ptr, retaddr)``
+
+store: ``cpu_st{sign}{size}_{mmusuffix}_ra(env, ptr, val, retaddr)``
+
+Regexes for git grep
+ - ``\<cpu_ld[us]\?[bwlq]_[a-zA-Z0-9]\+_ra\>``
+ - ``\<cpu_st[bwlq]_[a-zA-Z0-9]\+_ra\>``
+
+``helper_*_{ld,st}*mmu``
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+These functions are intended primarily to be called by the code
+generated by the TCG backend. They may also be called by target
+CPU helper function code. Like the ``cpu_{ld,st}_*_ra`` functions
+they perform accesses by guest virtual address; the difference is
+that these functions allow you to specify an ``opindex`` parameter
+which encodes (among other things) the mmu index to use for the
+access. This is necessary if your helper needs to make an access
+via a specific mmu index (for instance, an "always as non-privileged"
+access) rather than using the default mmu index for the current state
+of the guest CPU.
+
+The ``opindex`` parameter should be created by calling ``make_memop_idx()``.
+
+The ``retaddr`` parameter should be the result of GETPC() called directly
+from the top level HELPER(foo) function (or 0 if no guest CPU state
+unwinding is required).
+
+**TODO** The names of these functions are a bit odd for historical
+reasons because they were originally expected to be called only from
+within generated code. We should rename them to bring them
+more in line with the other memory access functions.
+
+load: ``helper_{endian}_ld{sign}{size}_mmu(env, addr, opindex, retaddr)``
+
+load (code): ``helper_{endian}_ld{sign}{size}_cmmu(env, addr, opindex, retaddr)``
+
+store: ``helper_{endian}_st{size}_mmu(env, addr, val, opindex, retaddr)``
+
+``sign``
+ - (empty) : for 32 or 64 bit sizes
+ - ``u`` : unsigned
+ - ``s`` : signed
+
+``size``
+ - ``b`` : 8 bits
+ - ``w`` : 16 bits
+ - ``l`` : 32 bits
+ - ``q`` : 64 bits
+
+``endian``
+ - ``le`` : little endian
+ - ``be`` : big endian
+ - ``ret`` : target endianness
+
+Regexes for git grep
+ - ``\<helper_\(le\|be\|ret\)_ld[us]\?[bwlq]_c\?mmu\>``
+ - ``\<helper_\(le\|be\|ret\)_st[bwlq]_mmu\>``
+
+``address_space_*``
+~~~~~~~~~~~~~~~~~~~
+
+These functions are the primary ones to use when emulating CPU
+or device memory accesses. They take an AddressSpace, which is the
+way QEMU defines the view of memory that a device or CPU has.
+(They generally correspond to being the "master" end of a hardware bus
+or bus fabric.)
+
+Each CPU has an AddressSpace. Some kinds of CPU have more than
+one AddressSpace (for instance ARM guest CPUs have an AddressSpace
+for the Secure world and one for NonSecure if they implement TrustZone).
+Devices which can do DMA-type operations should generally have an
+AddressSpace. There is also a "system address space" which typically
+has all the devices and memory that all CPUs can see. (Some older
+device models use the "system address space" rather than properly
+modelling that they have an AddressSpace of their own.)
+
+Functions are provided for doing byte-buffer reads and writes,
+and also for doing one-data-item loads and stores.
+
+In all cases the caller provides a MemTxAttrs to specify bus
+transaction attributes, and can check whether the memory transaction
+succeeded using a MemTxResult return code.
+
+``address_space_read(address_space, addr, attrs, buf, len)``
+
+``address_space_write(address_space, addr, attrs, buf, len)``
+
+``address_space_rw(address_space, addr, attrs, buf, len, is_write)``
+
+``address_space_ld{sign}{size}_{endian}(address_space, addr, attrs, txresult)``
+
+``address_space_st{size}_{endian}(address_space, addr, val, attrs, txresult)``
+
+``sign``
+ - (empty) : for 32 or 64 bit sizes
+ - ``u`` : unsigned
+
+(No signed load operations are provided.)
+
+``size``
+ - ``b`` : 8 bits
+ - ``w`` : 16 bits
+ - ``l`` : 32 bits
+ - ``q`` : 64 bits
+
+``endian``
+ - ``le`` : little endian
+ - ``be`` : big endian
+
+The ``_{endian}`` suffix is omitted for byte accesses.
+
+Regexes for git grep
+ - ``\<address_space_\(read\|write\|rw\)\>``
+ - ``\<address_space_ldu\?[bwql]\(_[lb]e\)\?\>``
+ - ``\<address_space_st[bwql]\(_[lb]e\)\?\>``
+
+``{ld,st}*_phys``
+~~~~~~~~~~~~~~~~~
+
+These are functions which are identical to
+``address_space_{ld,st}*``, except that they always pass
+``MEMTXATTRS_UNSPECIFIED`` for the transaction attributes, and ignore
+whether the transaction succeeded or failed.
+
+The fact that they ignore whether the transaction succeeded means
+they should not be used in new code, unless you know for certain
+that your code will only be used in a context where the CPU or
+device doing the access has no way to report such an error.
+
+``load: ld{sign}{size}_{endian}_phys``
+
+``store: st{size}_{endian}_phys``
+
+``sign``
+ - (empty) : for 32 or 64 bit sizes
+ - ``u`` : unsigned
+
+(No signed load operations are provided.)
+
+``size``
+ - ``b`` : 8 bits
+ - ``w`` : 16 bits
+ - ``l`` : 32 bits
+ - ``q`` : 64 bits
+
+``endian``
+ - ``le`` : little endian
+ - ``be`` : big endian
+
+The ``_{endian}_`` infix is omitted for byte accesses.
+
+Regexes for git grep
+ - ``\<ldu\?[bwlq]\(_[bl]e\)\?_phys\>``
+ - ``\<st[bwlq]\(_[bl]e\)\?_phys\>``
+
+``cpu_physical_memory_*``
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+These are convenience functions which are identical to
+``address_space_*`` but operate specifically on the system address space,
+always pass a ``MEMTXATTRS_UNSPECIFIED`` set of memory attributes and
+ignore whether the memory transaction succeeded or failed.
+For new code they are better avoided:
+
+* there is likely to be behaviour you need to model correctly for a
+  failed read or write operation
+* a device should usually perform operations on its own AddressSpace
+  rather than using the system address space
+
+``cpu_physical_memory_read``
+
+``cpu_physical_memory_write``
+
+``cpu_physical_memory_rw``
+
+Regexes for git grep
+ - ``\<cpu_physical_memory_\(read\|write\|rw\)\>``
+
+``cpu_physical_memory_write_rom``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This function performs a write by physical address like
+``address_space_write``, except that if the write is to a ROM then
+the ROM contents will be modified, even though a write by the guest
+CPU to the ROM would be ignored.
+
+Note that unlike ``cpu_physical_memory_write()`` this function takes
+an AddressSpace argument, but unlike ``address_space_write()`` this
+function does not take a ``MemTxAttrs`` or return a ``MemTxResult``.
+
+**TODO**: we should probably clean up this inconsistency and
+turn the function into ``address_space_write_rom`` with an API
+matching ``address_space_write``.
+
+``cpu_physical_memory_write_rom``
+
+
+``cpu_memory_rw_debug``
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Access CPU memory by virtual address for debug purposes.
+
+This function is intended for use by the GDB stub and similar code.
+It takes a virtual address, converts it to a physical address via
+an MMU lookup using the current settings of the specified CPU,
+and then performs the access (using ``address_space_rw`` for
+reads or ``cpu_physical_memory_write_rom`` for writes).
+This means that if the access is a write to a ROM then this
+function will modify the contents (whereas a normal guest CPU access
+would ignore the write attempt).
+
+``cpu_memory_rw_debug``
+
+``dma_memory_*``
+~~~~~~~~~~~~~~~~
+
+These behave like ``address_space_*``, except that they perform a DMA
+barrier operation first.
+
+**TODO**: We should provide guidance on when you need the DMA
+barrier operation and when it's OK to use ``address_space_*``, and
+make sure our existing code is doing things correctly.
+
+``dma_memory_read``
+
+``dma_memory_write``
+
+``dma_memory_rw``
+
+Regexes for git grep
+ - ``\<dma_memory_\(read\|write\|rw\)\>``
+
+``pci_dma_*`` and ``{ld,st}*_pci_dma``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+These functions are specifically for PCI device models which need to
+perform accesses where the PCI device is a bus master. You pass them a
+``PCIDevice *`` and they will do ``dma_memory_*`` operations on the
+correct address space for that device.
+
+``pci_dma_read``
+
+``pci_dma_write``
+
+``pci_dma_rw``
+
+``load: ld{sign}{size}_{endian}_pci_dma``
+
+``store: st{size}_{endian}_pci_dma``
+
+``sign``
+ - (empty) : for 32 or 64 bit sizes
+ - ``u`` : unsigned
+
+(No signed load operations are provided.)
+
+``size``
+ - ``b`` : 8 bits
+ - ``w`` : 16 bits
+ - ``l`` : 32 bits
+ - ``q`` : 64 bits
+
+``endian``
+ - ``le`` : little endian
+ - ``be`` : big endian
+
+The ``_{endian}_`` infix is omitted for byte accesses.
+
+Regexes for git grep
+ - ``\<pci_dma_\(read\|write\|rw\)\>``
+ - ``\<ldu\?[bwlq]\(_[bl]e\)\?_pci_dma\>``
+ - ``\<st[bwlq]\(_[bl]e\)\?_pci_dma\>``
diff --git a/exec.c b/exec.c
index 6378714a2b..db5ae23118 100644
--- a/exec.c
+++ b/exec.c
@@ -465,11 +465,29 @@ address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *x
     return section;
 }
 
-/* Called from RCU critical section */
+/**
+ * flatview_do_translate - translate an address in FlatView
+ *
+ * @fv: the flat view that we want to translate on
+ * @addr: the address to be translated in above address space
+ * @xlat: the translated address offset within memory region. It
+ *        cannot be @NULL.
+ * @plen_out: valid read/write length of the translated address. It
+ *            can be @NULL when we don't care about it.
+ * @page_mask_out: page mask for the translated address. This
+ *            should only be meaningful for IOMMU translated
+ *            addresses, since there may be huge pages that this bit
+ *            would tell. It can be @NULL if we don't care about it.
+ * @is_write: whether the translation operation is for write
+ * @is_mmio: whether this can be MMIO, set true if it can
+ *
+ * This function is called from RCU critical section
+ */
 static MemoryRegionSection flatview_do_translate(FlatView *fv,
                                                  hwaddr addr,
                                                  hwaddr *xlat,
-                                                 hwaddr *plen,
+                                                 hwaddr *plen_out,
+                                                 hwaddr *page_mask_out,
                                                  bool is_write,
                                                  bool is_mmio,
                                                  AddressSpace **target_as)
@@ -478,11 +496,17 @@ static MemoryRegionSection flatview_do_translate(FlatView *fv,
     MemoryRegionSection *section;
     IOMMUMemoryRegion *iommu_mr;
     IOMMUMemoryRegionClass *imrc;
+    hwaddr page_mask = (hwaddr)(-1);
+    hwaddr plen = (hwaddr)(-1);
+
+    if (plen_out) {
+        plen = *plen_out;
+    }
 
     for (;;) {
         section = address_space_translate_internal(
                 flatview_to_dispatch(fv), addr, &addr,
-                plen, is_mmio);
+                &plen, is_mmio);
 
         iommu_mr = memory_region_get_iommu(section->mr);
         if (!iommu_mr) {
@@ -494,7 +518,8 @@ static MemoryRegionSection flatview_do_translate(FlatView *fv,
                                 IOMMU_WO : IOMMU_RO);
         addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
                 | (addr & iotlb.addr_mask));
-        *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
+        page_mask &= iotlb.addr_mask;
+        plen = MIN(plen, (addr | iotlb.addr_mask) - addr + 1);
         if (!(iotlb.perm & (1 << is_write))) {
             goto translate_fail;
         }
@@ -505,6 +530,19 @@ static MemoryRegionSection flatview_do_translate(FlatView *fv,
 
     *xlat = addr;
 
+    if (page_mask == (hwaddr)(-1)) {
+        /* Not behind an IOMMU, use default page size. */
+        page_mask = ~TARGET_PAGE_MASK;
+    }
+
+    if (page_mask_out) {
+        *page_mask_out = page_mask;
+    }
+
+    if (plen_out) {
+        *plen_out = plen;
+    }
+
     return *section;
 
 translate_fail:
@@ -516,14 +554,14 @@ IOMMUTLBEntry address_space_get_iotlb_entry(AddressSpace *as, hwaddr addr,
                                             bool is_write)
 {
     MemoryRegionSection section;
-    hwaddr xlat, plen;
-
-    /* Try to get maximum page mask during translation. */
-    plen = (hwaddr)-1;
+    hwaddr xlat, page_mask;
 
-    /* This can never be MMIO. */
-    section = flatview_do_translate(address_space_to_flatview(as), addr,
-                                    &xlat, &plen, is_write, false, &as);
+    /*
+     * This can never be MMIO, and we don't really care about plen,
+     * but page mask.
+     */
+    section = flatview_do_translate(address_space_to_flatview(as), addr, &xlat,
+                                    NULL, &page_mask, is_write, false, &as);
 
     /* Illegal translation */
     if (section.mr == &io_mem_unassigned) {
@@ -534,22 +572,11 @@ IOMMUTLBEntry address_space_get_iotlb_entry(AddressSpace *as, hwaddr addr,
     xlat += section.offset_within_address_space -
         section.offset_within_region;
 
-    if (plen == (hwaddr)-1) {
-        /*
-         * We use default page size here. Logically it only happens
-         * for identity mappings.
-         */
-        plen = TARGET_PAGE_SIZE;
-    }
-
-    /* Convert to address mask */
-    plen -= 1;
-
     return (IOMMUTLBEntry) {
         .target_as = as,
-        .iova = addr & ~plen,
-        .translated_addr = xlat & ~plen,
-        .addr_mask = plen,
+        .iova = addr & ~page_mask,
+        .translated_addr = xlat & ~page_mask,
+        .addr_mask = page_mask,
         /* IOTLBs are for DMAs, and DMA only allows on RAMs. */
         .perm = IOMMU_RW,
     };
@@ -567,7 +594,8 @@ MemoryRegion *flatview_translate(FlatView *fv, hwaddr addr, hwaddr *xlat,
     AddressSpace *as = NULL;
 
     /* This can be MMIO, so setup MMIO bit. */
-    section = flatview_do_translate(fv, addr, xlat, plen, is_write, true, &as);
+    section = flatview_do_translate(fv, addr, xlat, plen, NULL,
+                                    is_write, true, &as);
     mr = section.mr;
 
     if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
@@ -2348,6 +2376,9 @@ static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
     case 4:
         stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
         break;
+    case 8:
+        stq_p(qemu_map_ram_ptr(NULL, ram_addr), val);
+        break;
     default:
         abort();
     }
@@ -2378,6 +2409,16 @@ static const MemoryRegionOps notdirty_mem_ops = {
     .write = notdirty_mem_write,
     .valid.accepts = notdirty_mem_accepts,
     .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 1,
+        .max_access_size = 8,
+        .unaligned = false,
+    },
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 8,
+        .unaligned = false,
+    },
 };
 
 /* Generate a debug exception if a watchpoint has been hit.  */
@@ -2462,6 +2503,9 @@ static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
     case 4:
         data = address_space_ldl(as, addr, attrs, &res);
         break;
+    case 8:
+        data = address_space_ldq(as, addr, attrs, &res);
+        break;
     default: abort();
     }
     *pdata = data;
@@ -2487,6 +2531,9 @@ static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
     case 4:
         address_space_stl(as, addr, val, attrs, &res);
         break;
+    case 8:
+        address_space_stq(as, addr, val, attrs, &res);
+        break;
     default: abort();
     }
     return res;
@@ -2496,6 +2543,16 @@ static const MemoryRegionOps watch_mem_ops = {
     .read_with_attrs = watch_mem_read,
     .write_with_attrs = watch_mem_write,
     .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 1,
+        .max_access_size = 8,
+        .unaligned = false,
+    },
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 8,
+        .unaligned = false,
+    },
 };
 
 static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs,
diff --git a/hw/acpi/tco.c b/hw/acpi/tco.c
index 05b9d7ba36..a9143963ab 100644
--- a/hw/acpi/tco.c
+++ b/hw/acpi/tco.c
@@ -12,6 +12,7 @@
 #include "hw/i386/ich9.h"
 
 #include "hw/acpi/tco.h"
+#include "trace.h"
 
 //#define DEBUG
 
@@ -41,8 +42,11 @@ enum {
 
 static inline void tco_timer_reload(TCOIORegs *tr)
 {
-    tr->expire_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
-        ((int64_t)(tr->tco.tmr & TCO_TMR_MASK) * TCO_TICK_NSEC);
+    int ticks = tr->tco.tmr & TCO_TMR_MASK;
+    int64_t nsec = (int64_t)ticks * TCO_TICK_NSEC;
+
+    trace_tco_timer_reload(ticks, nsec / 1000000);
+    tr->expire_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + nsec;
     timer_mod(tr->tco_timer, tr->expire_time);
 }
 
@@ -59,6 +63,9 @@ static void tco_timer_expired(void *opaque)
     ICH9LPCState *lpc = container_of(pm, ICH9LPCState, pm);
     uint32_t gcs = pci_get_long(lpc->chip_config + ICH9_CC_GCS);
 
+    trace_tco_timer_expired(tr->timeouts_no,
+                            lpc->pin_strap.spkr_hi,
+                            !!(gcs & ICH9_CC_GCS_NO_REBOOT));
     tr->tco.rld = 0;
     tr->tco.sts1 |= TCO_TIMEOUT;
     if (++tr->timeouts_no == 2) {
diff --git a/hw/acpi/trace-events b/hw/acpi/trace-events
index e3b41e9df4..df0024f8b2 100644
--- a/hw/acpi/trace-events
+++ b/hw/acpi/trace-events
@@ -30,3 +30,7 @@ cpuhp_acpi_ejecting_invalid_cpu(uint32_t idx) "0x%"PRIx32
 cpuhp_acpi_ejecting_cpu(uint32_t idx) "0x%"PRIx32
 cpuhp_acpi_write_ost_ev(uint32_t slot, uint32_t ev) "idx[0x%"PRIx32"] OST EVENT: 0x%"PRIx32
 cpuhp_acpi_write_ost_status(uint32_t slot, uint32_t st) "idx[0x%"PRIx32"] OST STATUS: 0x%"PRIx32
+
+# hw/acpi/tco.c
+tco_timer_reload(int ticks, int msec) "ticks=%d (%d ms)"
+tco_timer_expired(int timeouts_no, bool strap, bool no_reboot) "timeouts_no=%d no_reboot=%d/%d"
diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index 606ab53c42..11112951a5 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -928,6 +928,13 @@ static void device_set_realized(Object *obj, bool value, Error **errp)
             goto post_realize_fail;
         }
 
+        /*
+         * always free/re-initialize here since the value cannot be cleaned up
+         * in device_unrealize due to its usage later on in the unplug path
+         */
+        g_free(dev->canonical_path);
+        dev->canonical_path = object_get_canonical_path(OBJECT(dev));
+
         if (qdev_get_vmsd(dev)) {
             if (vmstate_register_with_alias_id(dev, -1, qdev_get_vmsd(dev), dev,
                                                dev->instance_id_alias,
@@ -984,6 +991,8 @@ child_realize_fail:
     }
 
 post_realize_fail:
+    g_free(dev->canonical_path);
+    dev->canonical_path = NULL;
     if (dc->unrealize) {
         dc->unrealize(dev, NULL);
     }
@@ -1070,6 +1079,18 @@ static void device_finalize(Object *obj)
          * here
          */
     }
+
+    /* Only send event if the device had been completely realized */
+    if (dev->pending_deleted_event) {
+        g_assert(dev->canonical_path);
+
+        qapi_event_send_device_deleted(!!dev->id, dev->id, dev->canonical_path,
+                                       &error_abort);
+        g_free(dev->canonical_path);
+        dev->canonical_path = NULL;
+    }
+
+    qemu_opts_del(dev->opts);
 }
 
 static void device_class_base_init(ObjectClass *class, void *data)
@@ -1099,17 +1120,6 @@ static void device_unparent(Object *obj)
         object_unref(OBJECT(dev->parent_bus));
         dev->parent_bus = NULL;
     }
-
-    /* Only send event if the device had been completely realized */
-    if (dev->pending_deleted_event) {
-        gchar *path = object_get_canonical_path(OBJECT(dev));
-
-        qapi_event_send_device_deleted(!!dev->id, dev->id, path, &error_abort);
-        g_free(path);
-    }
-
-    qemu_opts_del(dev->opts);
-    dev->opts = NULL;
 }
 
 static void device_class_init(ObjectClass *class, void *data)
diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c
index b4d579857a..bc32bf1e39 100644
--- a/hw/display/cirrus_vga.c
+++ b/hw/display/cirrus_vga.c
@@ -2038,15 +2038,14 @@ static void cirrus_mem_writeb_mode4and5_8bpp(CirrusVGAState * s,
     unsigned val = mem_value;
     uint8_t *dst;
 
-    dst = s->vga.vram_ptr + (offset &= s->cirrus_addr_mask);
     for (x = 0; x < 8; x++) {
+        dst = s->vga.vram_ptr + ((offset + x) & s->cirrus_addr_mask);
 	if (val & 0x80) {
 	    *dst = s->cirrus_shadow_gr1;
 	} else if (mode == 5) {
 	    *dst = s->cirrus_shadow_gr0;
 	}
 	val <<= 1;
-	dst++;
     }
     memory_region_set_dirty(&s->vga.vram, offset, 8);
 }
@@ -2060,8 +2059,8 @@ static void cirrus_mem_writeb_mode4and5_16bpp(CirrusVGAState * s,
     unsigned val = mem_value;
     uint8_t *dst;
 
-    dst = s->vga.vram_ptr + (offset &= s->cirrus_addr_mask);
     for (x = 0; x < 8; x++) {
+        dst = s->vga.vram_ptr + ((offset + 2 * x) & s->cirrus_addr_mask & ~1);
 	if (val & 0x80) {
 	    *dst = s->cirrus_shadow_gr1;
 	    *(dst + 1) = s->vga.gr[0x11];
@@ -2070,7 +2069,6 @@ static void cirrus_mem_writeb_mode4and5_16bpp(CirrusVGAState * s,
 	    *(dst + 1) = s->vga.gr[0x10];
 	}
 	val <<= 1;
-	dst += 2;
     }
     memory_region_set_dirty(&s->vga.vram, offset, 16);
 }
diff --git a/hw/display/vga.c b/hw/display/vga.c
index ed24ef7076..1d19f6bc48 100644
--- a/hw/display/vga.c
+++ b/hw/display/vga.c
@@ -1464,14 +1464,14 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
 {
     DisplaySurface *surface = qemu_console_surface(s->con);
     int y1, y, update, linesize, y_start, double_scan, mask, depth;
-    int width, height, shift_control, line_offset, bwidth, bits;
-    ram_addr_t page0, page1;
+    int width, height, shift_control, bwidth, bits;
+    ram_addr_t page0, page1, region_start, region_end;
     DirtyBitmapSnapshot *snap = NULL;
     int disp_width, multi_scan, multi_run;
     uint8_t *d;
     uint32_t v, addr1, addr;
     vga_draw_line_func *vga_draw_line = NULL;
-    bool share_surface;
+    bool share_surface, force_shadow = false;
     pixman_format_code_t format;
 #ifdef HOST_WORDS_BIGENDIAN
     bool byteswap = !s->big_endian_fb;
@@ -1484,6 +1484,15 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
     s->get_resolution(s, &width, &height);
     disp_width = width;
 
+    region_start = (s->start_addr * 4);
+    region_end = region_start + (ram_addr_t)s->line_offset * height;
+    if (region_end > s->vbe_size) {
+        /* wraps around (can happen with cirrus vbe modes) */
+        region_start = 0;
+        region_end = s->vbe_size;
+        force_shadow = true;
+    }
+
     shift_control = (s->gr[VGA_GFX_MODE] >> 5) & 3;
     double_scan = (s->cr[VGA_CRTC_MAX_SCAN] >> 7);
     if (shift_control != 1) {
@@ -1523,7 +1532,7 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
     format = qemu_default_pixman_format(depth, !byteswap);
     if (format) {
         share_surface = dpy_gfx_check_format(s->con, format)
-            && !s->force_shadow;
+            && !s->force_shadow && !force_shadow;
     } else {
         share_surface = false;
     }
@@ -1614,7 +1623,6 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
         s->cursor_invalidate(s);
     }
 
-    line_offset = s->line_offset;
 #if 0
     printf("w=%d h=%d v=%d line_offset=%d cr[0x09]=0x%02x cr[0x17]=0x%02x linecmp=%d sr[0x01]=0x%02x\n",
            width, height, v, line_offset, s->cr[9], s->cr[VGA_CRTC_MODE],
@@ -1628,8 +1636,6 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
     y1 = 0;
 
     if (!full_update) {
-        ram_addr_t region_start = addr1;
-        ram_addr_t region_end = addr1 + line_offset * height;
         vga_sync_dirty_bitmap(s);
         if (s->line_compare < height) {
             /* split screen mode */
@@ -1652,10 +1658,17 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
             addr = (addr & ~0x8000) | ((y1 & 2) << 14);
         }
         update = full_update;
-        page0 = addr;
-        page1 = addr + bwidth - 1;
+        page0 = addr & s->vbe_size_mask;
+        page1 = (addr + bwidth - 1) & s->vbe_size_mask;
         if (full_update) {
             update = 1;
+        } else if (page1 < page0) {
+            /* scanline wraps from end of video memory to the start */
+            assert(force_shadow);
+            update = memory_region_snapshot_get_dirty(&s->vram, snap,
+                                                      page0, 0);
+            update |= memory_region_snapshot_get_dirty(&s->vram, snap,
+                                                       page1, 0);
         } else {
             update = memory_region_snapshot_get_dirty(&s->vram, snap,
                                                       page0, page1 - page0);
@@ -1681,7 +1694,7 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
         if (!multi_run) {
             mask = (s->cr[VGA_CRTC_MODE] & 3) ^ 3;
             if ((y1 & mask) == mask)
-                addr1 += line_offset;
+                addr1 += s->line_offset;
             y1++;
             multi_run = multi_scan;
         } else {
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 05985d4927..8e307f7aff 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1876,8 +1876,15 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev,
     CPUArchId *cpu_slot;
     X86CPUTopoInfo topo;
     X86CPU *cpu = X86_CPU(dev);
+    MachineState *ms = MACHINE(hotplug_dev);
     PCMachineState *pcms = PC_MACHINE(hotplug_dev);
 
+    if(!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) {
+        error_setg(errp, "Invalid CPU type, expected cpu type: '%s'",
+                   ms->cpu_type);
+        return;
+    }
+
     /* if APIC ID is not set, set it based on socket/core/thread properties */
     if (cpu->apic_id == UNASSIGNED_APIC_ID) {
         int max_socket = (max_cpus - 1) / smp_threads / smp_cores;
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 5f1cd3b91f..a04766aee7 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -208,6 +208,7 @@ static void ide_identify(IDEState *s)
     if (dev && dev->conf.discard_granularity) {
         put_le16(p + 169, 1); /* TRIM support */
     }
+    put_le16(p + 217, dev->rotation_rate); /* Nominal media rotation rate */
 
     ide_identify_size(s);
     s->identify_set = 1;
diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
index d60ac25be0..a5181b4448 100644
--- a/hw/ide/qdev.c
+++ b/hw/ide/qdev.c
@@ -299,6 +299,7 @@ static Property ide_hd_properties[] = {
     DEFINE_BLOCK_CHS_PROPERTIES(IDEDrive, dev.conf),
     DEFINE_PROP_BIOS_CHS_TRANS("bios-chs-trans",
                 IDEDrive, dev.chs_trans, BIOS_ATA_TRANSLATION_AUTO),
+    DEFINE_PROP_UINT16("rotation_rate", IDEDrive, dev.rotation_rate, 0),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 6e841fb5ff..12431177a7 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -104,6 +104,14 @@ typedef struct SCSIDiskState
     char *product;
     bool tray_open;
     bool tray_locked;
+    /*
+     * 0x0000        - rotation rate not reported
+     * 0x0001        - non-rotating medium (SSD)
+     * 0x0002-0x0400 - reserved
+     * 0x0401-0xffe  - rotations per minute
+     * 0xffff        - reserved
+     */
+    uint16_t rotation_rate;
 } SCSIDiskState;
 
 static bool scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed);
@@ -605,6 +613,7 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
             outbuf[buflen++] = 0x83; // device identification
             if (s->qdev.type == TYPE_DISK) {
                 outbuf[buflen++] = 0xb0; // block limits
+                outbuf[buflen++] = 0xb1; /* block device characteristics */
                 outbuf[buflen++] = 0xb2; // thin provisioning
             }
             break;
@@ -747,6 +756,15 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
             outbuf[43] = max_io_sectors & 0xff;
             break;
         }
+        case 0xb1: /* block device characteristics */
+        {
+            buflen = 8;
+            outbuf[4] = (s->rotation_rate >> 8) & 0xff;
+            outbuf[5] = s->rotation_rate & 0xff;
+            outbuf[6] = 0;
+            outbuf[7] = 0;
+            break;
+        }
         case 0xb2: /* thin provisioning */
         {
             buflen = 8;
@@ -2329,6 +2347,14 @@ static void scsi_realize(SCSIDevice *dev, Error **errp)
 
     blkconf_serial(&s->qdev.conf, &s->serial);
     blkconf_blocksizes(&s->qdev.conf);
+
+    if (s->qdev.conf.logical_block_size >
+        s->qdev.conf.physical_block_size) {
+        error_setg(errp,
+                   "logical_block_size > physical_block_size not supported");
+        return;
+    }
+
     if (dev->type == TYPE_DISK) {
         blkconf_geometry(&dev->conf, NULL, 65535, 255, 255, &err);
         if (err) {
@@ -2911,6 +2937,7 @@ static Property scsi_hd_properties[] = {
                        DEFAULT_MAX_UNMAP_SIZE),
     DEFINE_PROP_UINT64("max_io_size", SCSIDiskState, max_io_size,
                        DEFAULT_MAX_IO_SIZE),
+    DEFINE_PROP_UINT16("rotation_rate", SCSIDiskState, rotation_rate, 0),
     DEFINE_BLOCK_CHS_PROPERTIES(SCSIDiskState, qdev.conf),
     DEFINE_PROP_END_OF_LIST(),
 };
@@ -2982,6 +3009,7 @@ static const TypeInfo scsi_cd_info = {
 static Property scsi_block_properties[] = {
     DEFINE_BLOCK_ERROR_PROPERTIES(SCSIDiskState, qdev.conf),         \
     DEFINE_PROP_DRIVE("drive", SCSIDiskState, qdev.conf.blk),
+    DEFINE_PROP_UINT16("rotation_rate", SCSIDiskState, rotation_rate, 0),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/include/disas/bfd.h b/include/disas/bfd.h
index b01e002b4c..d99da68267 100644
--- a/include/disas/bfd.h
+++ b/include/disas/bfd.h
@@ -479,6 +479,7 @@ int generic_symbol_at_address(bfd_vma, struct disassemble_info *);
   (INFO).buffer_vma = 0, \
   (INFO).buffer_length = 0, \
   (INFO).read_memory_func = buffer_read_memory, \
+  (INFO).read_memory_inner_func = NULL, \
   (INFO).memory_error_func = perror_memory, \
   (INFO).print_address_func = generic_print_address, \
   (INFO).print_insn = NULL, \
diff --git a/include/hw/ide/internal.h b/include/hw/ide/internal.h
index e641012b48..31851b44d1 100644
--- a/include/hw/ide/internal.h
+++ b/include/hw/ide/internal.h
@@ -508,6 +508,14 @@ struct IDEDevice {
     char *serial;
     char *model;
     uint64_t wwn;
+    /*
+     * 0x0000        - rotation rate not reported
+     * 0x0001        - non-rotating medium (SSD)
+     * 0x0002-0x0400 - reserved
+     * 0x0401-0xffe  - rotations per minute
+     * 0xffff        - reserved
+     */
+    uint16_t rotation_rate;
 };
 
 /* These are used for the error_status field of IDEBus */
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index 089146197f..0a71bf83f0 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -153,6 +153,7 @@ struct DeviceState {
     /*< public >*/
 
     const char *id;
+    char *canonical_path;
     bool realized;
     bool pending_deleted_event;
     QemuOpts *opts;
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index 980d2b330e..3dbc69b1e9 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -83,6 +83,7 @@ typedef struct PropertyInfo PropertyInfo;
 typedef struct PS2State PS2State;
 typedef struct QEMUBH QEMUBH;
 typedef struct QemuConsole QemuConsole;
+typedef struct QemuDmaBuf QemuDmaBuf;
 typedef struct QEMUFile QEMUFile;
 typedef struct QemuOpt QemuOpt;
 typedef struct QemuOpts QemuOpts;
diff --git a/include/ui/console.h b/include/ui/console.h
index 6966e4bd9d..580dfc57ee 100644
--- a/include/ui/console.h
+++ b/include/ui/console.h
@@ -5,12 +5,14 @@
 #include "qom/object.h"
 #include "qapi/qmp/qdict.h"
 #include "qemu/notify.h"
+#include "qemu/typedefs.h"
 #include "qapi-types.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 
 #ifdef CONFIG_OPENGL
 # include <epoxy/gl.h>
+# include "ui/shader.h"
 #endif
 
 /* keyboard/mouse support */
@@ -180,6 +182,15 @@ struct QEMUGLParams {
     int minor_ver;
 };
 
+struct QemuDmaBuf {
+    int       fd;
+    uint32_t  width;
+    uint32_t  height;
+    uint32_t  stride;
+    uint32_t  fourcc;
+    uint32_t  texture;
+};
+
 typedef struct DisplayChangeListenerOps {
     const char *dpy_name;
 
@@ -220,6 +231,13 @@ typedef struct DisplayChangeListenerOps {
                                    uint32_t backing_height,
                                    uint32_t x, uint32_t y,
                                    uint32_t w, uint32_t h);
+    void (*dpy_gl_scanout_dmabuf)(DisplayChangeListener *dcl,
+                                  QemuDmaBuf *dmabuf);
+    void (*dpy_gl_cursor_dmabuf)(DisplayChangeListener *dcl,
+                                 QemuDmaBuf *dmabuf,
+                                 uint32_t pos_x, uint32_t pos_y);
+    void (*dpy_gl_release_dmabuf)(DisplayChangeListener *dcl,
+                                  QemuDmaBuf *dmabuf);
     void (*dpy_gl_update)(DisplayChangeListener *dcl,
                           uint32_t x, uint32_t y, uint32_t w, uint32_t h);
 
@@ -288,6 +306,13 @@ void dpy_gl_scanout_texture(QemuConsole *con,
                             uint32_t backing_id, bool backing_y_0_top,
                             uint32_t backing_width, uint32_t backing_height,
                             uint32_t x, uint32_t y, uint32_t w, uint32_t h);
+void dpy_gl_scanout_dmabuf(QemuConsole *con,
+                           QemuDmaBuf *dmabuf);
+void dpy_gl_cursor_dmabuf(QemuConsole *con,
+                          QemuDmaBuf *dmabuf,
+                          uint32_t pos_x, uint32_t pos_y);
+void dpy_gl_release_dmabuf(QemuConsole *con,
+                           QemuDmaBuf *dmabuf);
 void dpy_gl_update(QemuConsole *con,
                    uint32_t x, uint32_t y, uint32_t w, uint32_t h);
 
@@ -298,6 +323,7 @@ int dpy_gl_ctx_make_current(QemuConsole *con, QEMUGLContext ctx);
 QEMUGLContext dpy_gl_ctx_get_current(QemuConsole *con);
 
 bool console_has_gl(QemuConsole *con);
+bool console_has_gl_dmabuf(QemuConsole *con);
 
 static inline int surface_stride(DisplaySurface *s)
 {
@@ -390,22 +416,19 @@ void qemu_console_resize(QemuConsole *con, int width, int height);
 DisplaySurface *qemu_console_surface(QemuConsole *con);
 
 /* console-gl.c */
-typedef struct ConsoleGLState ConsoleGLState;
 #ifdef CONFIG_OPENGL
-ConsoleGLState *console_gl_init_context(void);
-void console_gl_fini_context(ConsoleGLState *gls);
 bool console_gl_check_format(DisplayChangeListener *dcl,
                              pixman_format_code_t format);
-void surface_gl_create_texture(ConsoleGLState *gls,
+void surface_gl_create_texture(QemuGLShader *gls,
                                DisplaySurface *surface);
-void surface_gl_update_texture(ConsoleGLState *gls,
+void surface_gl_update_texture(QemuGLShader *gls,
                                DisplaySurface *surface,
                                int x, int y, int w, int h);
-void surface_gl_render_texture(ConsoleGLState *gls,
+void surface_gl_render_texture(QemuGLShader *gls,
                                DisplaySurface *surface);
-void surface_gl_destroy_texture(ConsoleGLState *gls,
+void surface_gl_destroy_texture(QemuGLShader *gls,
                                DisplaySurface *surface);
-void surface_gl_setup_viewport(ConsoleGLState *gls,
+void surface_gl_setup_viewport(QemuGLShader *gls,
                                DisplaySurface *surface,
                                int ww, int wh);
 #endif
diff --git a/include/ui/egl-helpers.h b/include/ui/egl-helpers.h
index 81cb255de0..747233ce58 100644
--- a/include/ui/egl-helpers.h
+++ b/include/ui/egl-helpers.h
@@ -24,6 +24,10 @@ void egl_fb_setup_new_tex(egl_fb *fb, int width, int height);
 void egl_fb_blit(egl_fb *dst, egl_fb *src, bool flip);
 void egl_fb_read(void *dst, egl_fb *src);
 
+void egl_texture_blit(QemuGLShader *gls, egl_fb *dst, egl_fb *src, bool flip);
+void egl_texture_blend(QemuGLShader *gls, egl_fb *dst, egl_fb *src, bool flip,
+                       int x, int y);
+
 #ifdef CONFIG_OPENGL_DMABUF
 
 extern int qemu_egl_rn_fd;
@@ -33,6 +37,9 @@ extern EGLContext qemu_egl_rn_ctx;
 int egl_rendernode_init(const char *rendernode);
 int egl_get_fd_for_texture(uint32_t tex_id, EGLint *stride, EGLint *fourcc);
 
+void egl_dmabuf_import_texture(QemuDmaBuf *dmabuf);
+void egl_dmabuf_release_texture(QemuDmaBuf *dmabuf);
+
 #endif
 
 EGLSurface qemu_egl_init_surface_x11(EGLContext ectx, Window win);
diff --git a/include/ui/gtk.h b/include/ui/gtk.h
index 2f7b720358..849c896eef 100644
--- a/include/ui/gtk.h
+++ b/include/ui/gtk.h
@@ -47,7 +47,7 @@ typedef struct VirtualGfxConsole {
     double scale_x;
     double scale_y;
 #if defined(CONFIG_OPENGL)
-    ConsoleGLState *gls;
+    QemuGLShader *gls;
     EGLContext ectx;
     EGLSurface esurface;
     int glupdates;
diff --git a/include/ui/sdl2.h b/include/ui/sdl2.h
index 454367ac84..b29cf803c9 100644
--- a/include/ui/sdl2.h
+++ b/include/ui/sdl2.h
@@ -26,7 +26,7 @@ struct sdl2_console {
     int idle_counter;
     SDL_GLContext winctx;
 #ifdef CONFIG_OPENGL
-    ConsoleGLState *gls;
+    QemuGLShader *gls;
     egl_fb guest_fb;
     egl_fb win_fb;
     bool y0_top;
diff --git a/include/ui/shader.h b/include/ui/shader.h
index f7d86188bf..4c5acb2ce8 100644
--- a/include/ui/shader.h
+++ b/include/ui/shader.h
@@ -3,13 +3,11 @@
 
 #include <epoxy/gl.h>
 
-GLuint qemu_gl_init_texture_blit(GLint texture_blit_prog);
-void qemu_gl_run_texture_blit(GLint texture_blit_prog,
-                              GLint texture_blit_vao);
-
-GLuint qemu_gl_create_compile_shader(GLenum type, const GLchar *src);
-GLuint qemu_gl_create_link_program(GLuint vert, GLuint frag);
-GLuint qemu_gl_create_compile_link_program(const GLchar *vert_src,
-                                           const GLchar *frag_src);
+typedef struct QemuGLShader QemuGLShader;
+
+void qemu_gl_run_texture_blit(QemuGLShader *gls, bool flip);
+
+QemuGLShader *qemu_gl_init_shader(void);
+void qemu_gl_fini_shader(QemuGLShader *gls);
 
 #endif /* QEMU_SHADER_H */
diff --git a/include/ui/spice-display.h b/include/ui/spice-display.h
index 4ba9444dba..aaf2019889 100644
--- a/include/ui/spice-display.h
+++ b/include/ui/spice-display.h
@@ -119,7 +119,7 @@ struct SimpleSpiceDisplay {
     /* opengl rendering */
     QEMUBH *gl_unblock_bh;
     QEMUTimer *gl_unblock_timer;
-    ConsoleGLState *gls;
+    QemuGLShader *gls;
     int gl_updates;
     bool have_scanout;
     bool have_surface;
diff --git a/libdecnumber/Makefile.objs b/libdecnumber/Makefile.objs
new file mode 100644
index 0000000000..d81db0443a
--- /dev/null
+++ b/libdecnumber/Makefile.objs
@@ -0,0 +1,5 @@
+obj-y += decContext.o
+obj-y += decNumber.o
+obj-y += dpd/decimal32.o
+obj-y += dpd/decimal64.o
+obj-y += dpd/decimal128.o
diff --git a/linux-user/arm/target_cpu.h b/linux-user/arm/target_cpu.h
index d888219150..c3eb4b243d 100644
--- a/linux-user/arm/target_cpu.h
+++ b/linux-user/arm/target_cpu.h
@@ -19,6 +19,10 @@
 #ifndef ARM_TARGET_CPU_H
 #define ARM_TARGET_CPU_H
 
+/* We need to be able to map the commpage.
+   See validate_guest_space in linux-user/elfload.c.  */
+#define MAX_RESERVED_VA  0xffff0000ul
+
 static inline void cpu_clone_regs(CPUARMState *env, target_ulong newsp)
 {
     if (newsp) {
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 79062882ba..3b857fbc9c 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -377,7 +377,7 @@ static int validate_guest_space(unsigned long guest_base,
      * then there is no way we can allocate it.
      */
     if (test_page_addr >= guest_base
-        && test_page_addr <= (guest_base + guest_size)) {
+        && test_page_addr < (guest_base + guest_size)) {
         return -1;
     }
 
diff --git a/linux-user/main.c b/linux-user/main.c
index 829f974662..dde04c769a 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -60,23 +60,38 @@ do {                                                                    \
     }                                                                   \
 } while (0)
 
-#if (TARGET_LONG_BITS == 32) && (HOST_LONG_BITS == 64)
 /*
  * When running 32-on-64 we should make sure we can fit all of the possible
  * guest address space into a contiguous chunk of virtual host memory.
  *
  * This way we will never overlap with our own libraries or binaries or stack
  * or anything else that QEMU maps.
+ *
+ * Many cpus reserve the high bit (or more than one for some 64-bit cpus)
+ * of the address for the kernel.  Some cpus rely on this and user space
+ * uses the high bit(s) for pointer tagging and the like.  For them, we
+ * must preserve the expected address space.
  */
-# if defined(TARGET_MIPS) || defined(TARGET_NIOS2)
-/*
- * MIPS only supports 31 bits of virtual address space for user space.
- * Nios2 also only supports 31 bits.
- */
-unsigned long reserved_va = 0x77000000;
+#ifndef MAX_RESERVED_VA
+# if HOST_LONG_BITS > TARGET_VIRT_ADDR_SPACE_BITS
+#  if TARGET_VIRT_ADDR_SPACE_BITS == 32 && \
+      (TARGET_LONG_BITS == 32 || defined(TARGET_ABI32))
+/* There are a number of places where we assign reserved_va to a variable
+   of type abi_ulong and expect it to fit.  Avoid the last page.  */
+#   define MAX_RESERVED_VA  (0xfffffffful & TARGET_PAGE_MASK)
+#  else
+#   define MAX_RESERVED_VA  (1ul << TARGET_VIRT_ADDR_SPACE_BITS)
+#  endif
 # else
-unsigned long reserved_va = 0xf7000000;
+#  define MAX_RESERVED_VA  0
 # endif
+#endif
+
+/* That said, reserving *too* much vm space via mmap can run into problems
+   with rlimits, oom due to page table creation, etc.  We will still try it,
+   if directed by the command-line option, but not by default.  */
+#if HOST_LONG_BITS == 64 && TARGET_VIRT_ADDR_SPACE_BITS <= 32
+unsigned long reserved_va = MAX_RESERVED_VA;
 #else
 unsigned long reserved_va;
 #endif
@@ -3854,6 +3869,11 @@ static void handle_arg_log(const char *arg)
     qemu_set_log(mask);
 }
 
+static void handle_arg_dfilter(const char *arg)
+{
+    qemu_set_dfilter_ranges(arg, NULL);
+}
+
 static void handle_arg_log_filename(const char *arg)
 {
     qemu_set_log_filename(arg, &error_fatal);
@@ -3978,11 +3998,8 @@ static void handle_arg_reserved_va(const char *arg)
         unsigned long unshifted = reserved_va;
         p++;
         reserved_va <<= shift;
-        if (((reserved_va >> shift) != unshifted)
-#if HOST_LONG_BITS > TARGET_VIRT_ADDR_SPACE_BITS
-            || (reserved_va > (1ul << TARGET_VIRT_ADDR_SPACE_BITS))
-#endif
-            ) {
+        if (reserved_va >> shift != unshifted
+            || (MAX_RESERVED_VA && reserved_va > MAX_RESERVED_VA)) {
             fprintf(stderr, "Reserved virtual address too big\n");
             exit(EXIT_FAILURE);
         }
@@ -4054,6 +4071,8 @@ static const struct qemu_argument arg_table[] = {
     {"d",          "QEMU_LOG",         true,  handle_arg_log,
      "item[,...]", "enable logging of specified items "
      "(use '-d help' for a list of items)"},
+    {"dfilter",    "QEMU_DFILTER",     true,  handle_arg_dfilter,
+     "range[,...]","filter logging based on address range"},
     {"D",          "QEMU_LOG_FILENAME", true, handle_arg_log_filename,
      "logfile",     "write logs to 'logfile' (default stderr)"},
     {"p",          "QEMU_PAGESIZE",    true,  handle_arg_pagesize,
diff --git a/linux-user/signal.c b/linux-user/signal.c
index cc0c3fcee9..7a238aaea1 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -5704,6 +5704,24 @@ give_sigsegv:
     force_sigsegv(sig);
 }
 
+static inline void target_rt_save_fpu_state(struct target_ucontext *uc,
+                                           CPUM68KState *env)
+{
+    int i;
+    target_fpregset_t *fpregs = &uc->tuc_mcontext.fpregs;
+
+    __put_user(env->fpcr, &fpregs->f_fpcntl[0]);
+    __put_user(env->fpsr, &fpregs->f_fpcntl[1]);
+    /* fpiar is not emulated */
+
+    for (i = 0; i < 8; i++) {
+        uint32_t high = env->fregs[i].d.high << 16;
+        __put_user(high, &fpregs->f_fpregs[i * 3]);
+        __put_user(env->fregs[i].d.low,
+                   (uint64_t *)&fpregs->f_fpregs[i * 3 + 1]);
+    }
+}
+
 static inline int target_rt_setup_ucontext(struct target_ucontext *uc,
                                            CPUM68KState *env)
 {
@@ -5730,9 +5748,32 @@ static inline int target_rt_setup_ucontext(struct target_ucontext *uc,
     __put_user(env->pc, &gregs[16]);
     __put_user(sr, &gregs[17]);
 
+    target_rt_save_fpu_state(uc, env);
+
     return 0;
 }
 
+static inline void target_rt_restore_fpu_state(CPUM68KState *env,
+                                               struct target_ucontext *uc)
+{
+    int i;
+    target_fpregset_t *fpregs = &uc->tuc_mcontext.fpregs;
+    uint32_t fpcr;
+
+    __get_user(fpcr, &fpregs->f_fpcntl[0]);
+    cpu_m68k_set_fpcr(env, fpcr);
+    __get_user(env->fpsr, &fpregs->f_fpcntl[1]);
+    /* fpiar is not emulated */
+
+    for (i = 0; i < 8; i++) {
+        uint32_t high;
+        __get_user(high, &fpregs->f_fpregs[i * 3]);
+        env->fregs[i].d.high = high >> 16;
+        __get_user(env->fregs[i].d.low,
+                   (uint64_t *)&fpregs->f_fpregs[i * 3 + 1]);
+    }
+}
+
 static inline int target_rt_restore_ucontext(CPUM68KState *env,
                                              struct target_ucontext *uc)
 {
@@ -5764,6 +5805,8 @@ static inline int target_rt_restore_ucontext(CPUM68KState *env,
     __get_user(temp, &gregs[17]);
     cpu_m68k_set_ccr(env, temp);
 
+    target_rt_restore_fpu_state(env, uc);
+
     return 0;
 
 badframe:
diff --git a/linux-user/strace.c b/linux-user/strace.c
index d821d165ff..bd897a3f20 100644
--- a/linux-user/strace.c
+++ b/linux-user/strace.c
@@ -838,6 +838,10 @@ UNUSED static struct flags open_flags[] = {
 #ifdef O_PATH
     FLAG_TARGET(O_PATH),
 #endif
+#ifdef O_TMPFILE
+    FLAG_TARGET(O_TMPFILE),
+    FLAG_TARGET(__O_TMPFILE),
+#endif
     FLAG_END,
 };
 
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 9d4cc4cf5d..9bf901fa11 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -343,6 +343,9 @@ static bitmask_transtbl fcntl_flags_tbl[] = {
 #if defined(O_PATH)
   { TARGET_O_PATH,      TARGET_O_PATH,      O_PATH,      O_PATH       },
 #endif
+#if defined(O_TMPFILE)
+  { TARGET_O_TMPFILE,   TARGET_O_TMPFILE,   O_TMPFILE,   O_TMPFILE    },
+#endif
   /* Don't terminate the list prematurely on 64-bit host+guest.  */
 #if TARGET_O_LARGEFILE != 0 || O_LARGEFILE != 0
   { TARGET_O_LARGEFILE, TARGET_O_LARGEFILE, O_LARGEFILE, O_LARGEFILE, },
diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h
index b3d55e35ac..450960bb54 100644
--- a/linux-user/syscall_defs.h
+++ b/linux-user/syscall_defs.h
@@ -1108,8 +1108,8 @@ struct target_pollfd {
 /* Note that the ioctl numbers claim type "long" but the actual type
  * used by the kernel is "int".
  */
-#define TARGET_FS_IOC_GETFLAGS TARGET_IOR('f', 1, long)
-#define TARGET_FS_IOC_SETFLAGS TARGET_IOW('f', 2, long)
+#define TARGET_FS_IOC_GETFLAGS TARGET_IOR('f', 1, abi_long)
+#define TARGET_FS_IOC_SETFLAGS TARGET_IOW('f', 2, abi_long)
 
 #define TARGET_FS_IOC_FIEMAP TARGET_IOWR('f',11,struct fiemap)
 
@@ -2423,7 +2423,7 @@ struct target_statfs64 {
 #define TARGET_O_CLOEXEC     010000000
 #define TARGET___O_SYNC      000100000
 #define TARGET_O_PATH        020000000
-#elif defined(TARGET_ARM) || defined(TARGET_M68K)
+#elif defined(TARGET_ARM) || defined(TARGET_M68K) || defined(TARGET_AARCH64)
 #define TARGET_O_DIRECTORY      040000 /* must be a directory */
 #define TARGET_O_NOFOLLOW      0100000 /* don't follow links */
 #define TARGET_O_DIRECT        0200000 /* direct disk access hint */
@@ -2520,6 +2520,12 @@ struct target_statfs64 {
 #ifndef TARGET_O_PATH
 #define TARGET_O_PATH        010000000
 #endif
+#ifndef TARGET___O_TMPFILE
+#define TARGET___O_TMPFILE   020000000
+#endif
+#ifndef TARGET_O_TMPFILE
+#define TARGET_O_TMPFILE     (TARGET___O_TMPFILE | TARGET_O_DIRECTORY)
+#endif
 #ifndef TARGET_O_NDELAY
 #define TARGET_O_NDELAY  TARGET_O_NONBLOCK
 #endif
@@ -2713,9 +2719,34 @@ struct target_f_owner_ex {
 #define TARGET_VFAT_IOCTL_READDIR_BOTH    TARGET_IORU('r', 1)
 #define TARGET_VFAT_IOCTL_READDIR_SHORT   TARGET_IORU('r', 2)
 
-#define TARGET_MTIOCTOP        TARGET_IOW('m', 1, struct mtop)
-#define TARGET_MTIOCGET        TARGET_IOR('m', 2, struct mtget)
-#define TARGET_MTIOCPOS        TARGET_IOR('m', 3, struct mtpos)
+struct target_mtop {
+    abi_short mt_op;
+    abi_int mt_count;
+};
+
+#if defined(TARGET_SPARC) || defined(TARGET_MIPS)
+typedef abi_long target_kernel_daddr_t;
+#else
+typedef abi_int target_kernel_daddr_t;
+#endif
+
+struct target_mtget {
+    abi_long mt_type;
+    abi_long mt_resid;
+    abi_long mt_dsreg;
+    abi_long mt_gstat;
+    abi_long mt_erreg;
+    target_kernel_daddr_t mt_fileno;
+    target_kernel_daddr_t mt_blkno;
+};
+
+struct target_mtpos {
+    abi_long mt_blkno;
+};
+
+#define TARGET_MTIOCTOP        TARGET_IOW('m', 1, struct target_mtop)
+#define TARGET_MTIOCGET        TARGET_IOR('m', 2, struct target_mtget)
+#define TARGET_MTIOCPOS        TARGET_IOR('m', 3, struct target_mtpos)
 
 struct target_sysinfo {
     abi_long uptime;                /* Seconds since boot */
diff --git a/memory.c b/memory.c
index 5e6351a6c1..e26e5a3b1d 100644
--- a/memory.c
+++ b/memory.c
@@ -1892,7 +1892,7 @@ void memory_region_notify_one(IOMMUNotifier *notifier,
      * Skip the notification if the notification does not overlap
      * with registered range.
      */
-    if (notifier->start > entry->iova + entry->addr_mask + 1 ||
+    if (notifier->start > entry->iova + entry->addr_mask ||
         notifier->end < entry->iova) {
         return;
     }
@@ -2599,20 +2599,14 @@ static void listener_add_address_space(MemoryListener *listener,
 
     view = address_space_get_flatview(as);
     FOR_EACH_FLAT_RANGE(fr, view) {
-        MemoryRegionSection section = {
-            .mr = fr->mr,
-            .fv = view,
-            .offset_within_region = fr->offset_in_region,
-            .size = fr->addr.size,
-            .offset_within_address_space = int128_get64(fr->addr.start),
-            .readonly = fr->readonly,
-        };
-        if (fr->dirty_log_mask && listener->log_start) {
-            listener->log_start(listener, &section, 0, fr->dirty_log_mask);
-        }
+        MemoryRegionSection section = section_from_flat_range(fr, view);
+
         if (listener->region_add) {
             listener->region_add(listener, &section);
         }
+        if (fr->dirty_log_mask && listener->log_start) {
+            listener->log_start(listener, &section, 0, fr->dirty_log_mask);
+        }
     }
     if (listener->commit) {
         listener->commit(listener);
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 0c41f1212f..ce9f08eb9f 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -11,6 +11,8 @@ use warnings;
 my $P = $0;
 $P =~ s@.*/@@g;
 
+our $SrcFile    = qr{\.(?:h|c|cpp|s|S|pl|py|sh)$};
+
 my $V = '0.31';
 
 use Getopt::Long qw(:config no_auto_abbrev);
@@ -101,30 +103,29 @@ if ($#ARGV < 0) {
 }
 
 if (!defined $chk_branch && !defined $chk_patch && !defined $file) {
-	$chk_branch = $ARGV[0] =~ /\.\./ ? 1 : 0;
-	$chk_patch = $chk_branch ? 0 :
-		$ARGV[0] =~ /\.patch$/ || $ARGV[0] eq "-" ? 1 : 0;
-	$file = $chk_branch || $chk_patch ? 0 : 1;
+	$chk_branch = $ARGV[0] =~ /.\.\./ ? 1 : 0;
+	$file = $ARGV[0] =~ /$SrcFile/ ? 1 : 0;
+	$chk_patch = $chk_branch || $file ? 0 : 1;
 } elsif (!defined $chk_branch && !defined $chk_patch) {
 	if ($file) {
 		$chk_branch = $chk_patch = 0;
 	} else {
-		$chk_branch = $ARGV[0] =~ /\.\./ ? 1 : 0;
+		$chk_branch = $ARGV[0] =~ /.\.\./ ? 1 : 0;
 		$chk_patch = $chk_branch ? 0 : 1;
 	}
 } elsif (!defined $chk_branch && !defined $file) {
 	if ($chk_patch) {
 		$chk_branch = $file = 0;
 	} else {
-		$chk_branch = $ARGV[0] =~ /\.\./ ? 1 : 0;
+		$chk_branch = $ARGV[0] =~ /.\.\./ ? 1 : 0;
 		$file = $chk_branch ? 0 : 1;
 	}
 } elsif (!defined $chk_patch && !defined $file) {
 	if ($chk_branch) {
 		$chk_patch = $file = 0;
 	} else {
-		$chk_patch = $ARGV[0] =~ /\.patch$/ || $ARGV[0] eq "-" ? 1 : 0;
-		$file = $chk_patch ? 0 : 1;
+		$file = $ARGV[0] =~ /$SrcFile/ ? 1 : 0;
+		$chk_patch = $file ? 0 : 1;
 	}
 } elsif (!defined $chk_branch) {
 	$chk_branch = $chk_patch || $file ? 0 : 1;
@@ -1443,7 +1444,7 @@ sub process {
 		}
 
 # check we are in a valid source file if not then ignore this hunk
-		next if ($realfile !~ /\.(h|c|cpp|s|S|pl|py|sh)$/);
+		next if ($realfile !~ /$SrcFile/);
 
 #90 column limit
 		if ($line =~ /^\+/ &&
diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c
index d58184833f..dd9785143b 100644
--- a/scsi/qemu-pr-helper.c
+++ b/scsi/qemu-pr-helper.c
@@ -276,15 +276,26 @@ static void dm_init(void)
 
 /* Variables required by libmultipath and libmpathpersist.  */
 QEMU_BUILD_BUG_ON(PR_HELPER_DATA_SIZE > MPATH_MAX_PARAM_LEN);
+static struct config *multipath_conf;
 unsigned mpath_mx_alloc_len = PR_HELPER_DATA_SIZE;
 int logsink;
+struct udev *udev;
 
-static void multipath_pr_init(void)
+extern struct config *get_multipath_config(void);
+struct config *get_multipath_config(void)
 {
-    static struct udev *udev;
+    return multipath_conf;
+}
 
+extern void put_multipath_config(struct config *conf);
+void put_multipath_config(struct config *conf)
+{
+}
+
+static void multipath_pr_init(void)
+{
     udev = udev_new();
-    mpath_lib_init(udev);
+    multipath_conf = mpath_lib_init();
 }
 
 static int is_mpath(int fd)
diff --git a/target/i386/translate.c b/target/i386/translate.c
index 5d61fa96ad..5f24a2de3c 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -136,6 +136,7 @@ typedef struct DisasContext {
     int cpuid_ext3_features;
     int cpuid_7_0_ebx_features;
     int cpuid_xsave_features;
+    sigjmp_buf jmpbuf;
 } DisasContext;
 
 static void gen_eob(DisasContext *s);
@@ -1863,6 +1864,57 @@ static void gen_shifti(DisasContext *s1, int op, TCGMemOp ot, int d, int c)
     }
 }
 
+#define X86_MAX_INSN_LENGTH 15
+
+static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
+{
+    uint64_t pc = s->pc;
+
+    s->pc += num_bytes;
+    if (unlikely(s->pc - s->pc_start > X86_MAX_INSN_LENGTH)) {
+        /* If the instruction's 16th byte is on a different page than the 1st, a
+         * page fault on the second page wins over the general protection fault
+         * caused by the instruction being too long.
+         * This can happen even if the operand is only one byte long!
+         */
+        if (((s->pc - 1) ^ (pc - 1)) & TARGET_PAGE_MASK) {
+            volatile uint8_t unused =
+                cpu_ldub_code(env, (s->pc - 1) & TARGET_PAGE_MASK);
+            (void) unused;
+        }
+        siglongjmp(s->jmpbuf, 1);
+    }
+
+    return pc;
+}
+
+static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s)
+{
+    return cpu_ldub_code(env, advance_pc(env, s, 1));
+}
+
+static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
+{
+    return cpu_ldsw_code(env, advance_pc(env, s, 2));
+}
+
+static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
+{
+    return cpu_lduw_code(env, advance_pc(env, s, 2));
+}
+
+static inline uint32_t x86_ldl_code(CPUX86State *env, DisasContext *s)
+{
+    return cpu_ldl_code(env, advance_pc(env, s, 4));
+}
+
+#ifdef TARGET_X86_64
+static inline uint64_t x86_ldq_code(CPUX86State *env, DisasContext *s)
+{
+    return cpu_ldq_code(env, advance_pc(env, s, 8));
+}
+#endif
+
 /* Decompose an address.  */
 
 typedef struct AddressParts {
@@ -1900,7 +1952,7 @@ static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
     case MO_32:
         havesib = 0;
         if (rm == 4) {
-            int code = cpu_ldub_code(env, s->pc++);
+            int code = x86_ldub_code(env, s);
             scale = (code >> 6) & 3;
             index = ((code >> 3) & 7) | REX_X(s);
             if (index == 4) {
@@ -1914,8 +1966,7 @@ static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
         case 0:
             if ((base & 7) == 5) {
                 base = -1;
-                disp = (int32_t)cpu_ldl_code(env, s->pc);
-                s->pc += 4;
+                disp = (int32_t)x86_ldl_code(env, s);
                 if (CODE64(s) && !havesib) {
                     base = -2;
                     disp += s->pc + s->rip_offset;
@@ -1923,12 +1974,11 @@ static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
             }
             break;
         case 1:
-            disp = (int8_t)cpu_ldub_code(env, s->pc++);
+            disp = (int8_t)x86_ldub_code(env, s);
             break;
         default:
         case 2:
-            disp = (int32_t)cpu_ldl_code(env, s->pc);
-            s->pc += 4;
+            disp = (int32_t)x86_ldl_code(env, s);
             break;
         }
 
@@ -1945,15 +1995,13 @@ static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
         if (mod == 0) {
             if (rm == 6) {
                 base = -1;
-                disp = cpu_lduw_code(env, s->pc);
-                s->pc += 2;
+                disp = x86_lduw_code(env, s);
                 break;
             }
         } else if (mod == 1) {
-            disp = (int8_t)cpu_ldub_code(env, s->pc++);
+            disp = (int8_t)x86_ldub_code(env, s);
         } else {
-            disp = (int16_t)cpu_lduw_code(env, s->pc);
-            s->pc += 2;
+            disp = (int16_t)x86_lduw_code(env, s);
         }
 
         switch (rm) {
@@ -2103,19 +2151,16 @@ static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, TCGMemOp ot)
 
     switch (ot) {
     case MO_8:
-        ret = cpu_ldub_code(env, s->pc);
-        s->pc++;
+        ret = x86_ldub_code(env, s);
         break;
     case MO_16:
-        ret = cpu_lduw_code(env, s->pc);
-        s->pc += 2;
+        ret = x86_lduw_code(env, s);
         break;
     case MO_32:
 #ifdef TARGET_X86_64
     case MO_64:
 #endif
-        ret = cpu_ldl_code(env, s->pc);
-        s->pc += 4;
+        ret = x86_ldl_code(env, s);
         break;
     default:
         tcg_abort();
@@ -3041,7 +3086,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         gen_helper_enter_mmx(cpu_env);
     }
 
-    modrm = cpu_ldub_code(env, s->pc++);
+    modrm = x86_ldub_code(env, s);
     reg = ((modrm >> 3) & 7);
     if (is_xmm)
         reg |= rex_r;
@@ -3250,8 +3295,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
 
                 if (b1 == 1 && reg != 0)
                     goto illegal_op;
-                field_length = cpu_ldub_code(env, s->pc++) & 0x3F;
-                bit_index = cpu_ldub_code(env, s->pc++) & 0x3F;
+                field_length = x86_ldub_code(env, s) & 0x3F;
+                bit_index = x86_ldub_code(env, s) & 0x3F;
                 tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
                     offsetof(CPUX86State,xmm_regs[reg]));
                 if (b1 == 1)
@@ -3380,7 +3425,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             if (b1 >= 2) {
 	        goto unknown_op;
             }
-            val = cpu_ldub_code(env, s->pc++);
+            val = x86_ldub_code(env, s);
             if (is_xmm) {
                 tcg_gen_movi_tl(cpu_T0, val);
                 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
@@ -3537,7 +3582,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x1c4:
             s->rip_offset = 1;
             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
-            val = cpu_ldub_code(env, s->pc++);
+            val = x86_ldub_code(env, s);
             if (b1) {
                 val &= 7;
                 tcg_gen_st16_tl(cpu_T0, cpu_env,
@@ -3553,7 +3598,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             if (mod != 3)
                 goto illegal_op;
             ot = mo_64_32(s->dflag);
-            val = cpu_ldub_code(env, s->pc++);
+            val = x86_ldub_code(env, s);
             if (b1) {
                 val &= 7;
                 rm = (modrm & 7) | REX_B(s);
@@ -3616,7 +3661,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             if ((b & 0xf0) == 0xf0) {
                 goto do_0f_38_fx;
             }
-            modrm = cpu_ldub_code(env, s->pc++);
+            modrm = x86_ldub_code(env, s);
             rm = modrm & 7;
             reg = ((modrm >> 3) & 7) | rex_r;
             mod = (modrm >> 6) & 3;
@@ -3693,7 +3738,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         do_0f_38_fx:
             /* Various integer extensions at 0f 38 f[0-f].  */
             b = modrm | (b1 << 8);
-            modrm = cpu_ldub_code(env, s->pc++);
+            modrm = x86_ldub_code(env, s);
             reg = ((modrm >> 3) & 7) | rex_r;
 
             switch (b) {
@@ -4054,7 +4099,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x03a:
         case 0x13a:
             b = modrm;
-            modrm = cpu_ldub_code(env, s->pc++);
+            modrm = x86_ldub_code(env, s);
             rm = modrm & 7;
             reg = ((modrm >> 3) & 7) | rex_r;
             mod = (modrm >> 6) & 3;
@@ -4077,7 +4122,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 if (mod != 3)
                     gen_lea_modrm(env, s, modrm);
                 reg = ((modrm >> 3) & 7) | rex_r;
-                val = cpu_ldub_code(env, s->pc++);
+                val = x86_ldub_code(env, s);
                 switch (b) {
                 case 0x14: /* pextrb */
                     tcg_gen_ld8u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
@@ -4225,7 +4270,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     gen_ldq_env_A0(s, op2_offset);
                 }
             }
-            val = cpu_ldub_code(env, s->pc++);
+            val = x86_ldub_code(env, s);
 
             if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
                 set_cc_op(s, CC_OP_EFLAGS);
@@ -4244,7 +4289,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x33a:
             /* Various integer extensions at 0f 3a f[0-f].  */
             b = modrm | (b1 << 8);
-            modrm = cpu_ldub_code(env, s->pc++);
+            modrm = x86_ldub_code(env, s);
             reg = ((modrm >> 3) & 7) | rex_r;
 
             switch (b) {
@@ -4256,7 +4301,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 }
                 ot = mo_64_32(s->dflag);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-                b = cpu_ldub_code(env, s->pc++);
+                b = x86_ldub_code(env, s);
                 if (ot == MO_64) {
                     tcg_gen_rotri_tl(cpu_T0, cpu_T0, b & 63);
                 } else {
@@ -4351,7 +4396,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         }
         switch(b) {
         case 0x0f: /* 3DNow! data insns */
-            val = cpu_ldub_code(env, s->pc++);
+            val = x86_ldub_code(env, s);
             sse_fn_epp = sse_op_table5[val];
             if (!sse_fn_epp) {
                 goto unknown_op;
@@ -4365,7 +4410,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             break;
         case 0x70: /* pshufx insn */
         case 0xc6: /* pshufx insn */
-            val = cpu_ldub_code(env, s->pc++);
+            val = x86_ldub_code(env, s);
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
             /* XXX: introduce a new table? */
@@ -4374,7 +4419,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             break;
         case 0xc2:
             /* compare insns */
-            val = cpu_ldub_code(env, s->pc++);
+            val = x86_ldub_code(env, s);
             if (val >= 8)
                 goto unknown_op;
             sse_fn_epp = sse_op_table4[val][b1];
@@ -4435,16 +4480,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     s->rip_offset = 0; /* for relative ip address */
     s->vex_l = 0;
     s->vex_v = 0;
- next_byte:
-    /* x86 has an upper limit of 15 bytes for an instruction. Since we
-     * do not want to decode and generate IR for an illegal
-     * instruction, the following check limits the instruction size to
-     * 25 bytes: 14 prefix + 1 opc + 6 (modrm+sib+ofs) + 4 imm */
-    if (s->pc - pc_start > 14) {
-        goto illegal_op;
+    if (sigsetjmp(s->jmpbuf, 0) != 0) {
+        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+        return s->pc;
     }
-    b = cpu_ldub_code(env, s->pc);
-    s->pc++;
+
+ next_byte:
+    b = x86_ldub_code(env, s);
     /* Collect prefixes.  */
     switch (b) {
     case 0xf3:
@@ -4501,7 +4543,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             static const int pp_prefix[4] = {
                 0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
             };
-            int vex3, vex2 = cpu_ldub_code(env, s->pc);
+            int vex3, vex2 = x86_ldub_code(env, s);
 
             if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
                 /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
@@ -4523,17 +4565,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             rex_r = (~vex2 >> 4) & 8;
             if (b == 0xc5) {
                 vex3 = vex2;
-                b = cpu_ldub_code(env, s->pc++);
+                b = x86_ldub_code(env, s);
             } else {
 #ifdef TARGET_X86_64
                 s->rex_x = (~vex2 >> 3) & 8;
                 s->rex_b = (~vex2 >> 2) & 8;
 #endif
-                vex3 = cpu_ldub_code(env, s->pc++);
+                vex3 = x86_ldub_code(env, s);
                 rex_w = (vex3 >> 7) & 1;
                 switch (vex2 & 0x1f) {
                 case 0x01: /* Implied 0f leading opcode bytes.  */
-                    b = cpu_ldub_code(env, s->pc++) | 0x100;
+                    b = x86_ldub_code(env, s) | 0x100;
                     break;
                 case 0x02: /* Implied 0f 38 leading opcode bytes.  */
                     b = 0x138;
@@ -4585,7 +4627,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x0f:
         /**************************/
         /* extended op code */
-        b = cpu_ldub_code(env, s->pc++) | 0x100;
+        b = x86_ldub_code(env, s) | 0x100;
         goto reswitch;
 
         /**************************/
@@ -4607,7 +4649,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 
             switch(f) {
             case 0: /* OP Ev, Gv */
-                modrm = cpu_ldub_code(env, s->pc++);
+                modrm = x86_ldub_code(env, s);
                 reg = ((modrm >> 3) & 7) | rex_r;
                 mod = (modrm >> 6) & 3;
                 rm = (modrm & 7) | REX_B(s);
@@ -4628,7 +4670,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_op(s, op, ot, opreg);
                 break;
             case 1: /* OP Gv, Ev */
-                modrm = cpu_ldub_code(env, s->pc++);
+                modrm = x86_ldub_code(env, s);
                 mod = (modrm >> 6) & 3;
                 reg = ((modrm >> 3) & 7) | rex_r;
                 rm = (modrm & 7) | REX_B(s);
@@ -4662,7 +4704,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 
             ot = mo_b_d(b, dflag);
 
-            modrm = cpu_ldub_code(env, s->pc++);
+            modrm = x86_ldub_code(env, s);
             mod = (modrm >> 6) & 3;
             rm = (modrm & 7) | REX_B(s);
             op = (modrm >> 3) & 7;
@@ -4708,7 +4750,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0xf7:
         ot = mo_b_d(b, dflag);
 
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         mod = (modrm >> 6) & 3;
         rm = (modrm & 7) | REX_B(s);
         op = (modrm >> 3) & 7;
@@ -4940,7 +4982,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0xff: /* GRP5 */
         ot = mo_b_d(b, dflag);
 
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         mod = (modrm >> 6) & 3;
         rm = (modrm & 7) | REX_B(s);
         op = (modrm >> 3) & 7;
@@ -5048,7 +5090,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x85:
         ot = mo_b_d(b, dflag);
 
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         reg = ((modrm >> 3) & 7) | rex_r;
 
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
@@ -5120,7 +5162,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x69: /* imul Gv, Ev, I */
     case 0x6b:
         ot = dflag;
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         reg = ((modrm >> 3) & 7) | rex_r;
         if (b == 0x69)
             s->rip_offset = insn_const_size(ot);
@@ -5172,7 +5214,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x1c0:
     case 0x1c1: /* xadd Ev, Gv */
         ot = mo_b_d(b, dflag);
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         reg = ((modrm >> 3) & 7) | rex_r;
         mod = (modrm >> 6) & 3;
         gen_op_mov_v_reg(ot, cpu_T0, reg);
@@ -5204,7 +5246,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             TCGv oldv, newv, cmpv;
 
             ot = mo_b_d(b, dflag);
-            modrm = cpu_ldub_code(env, s->pc++);
+            modrm = x86_ldub_code(env, s);
             reg = ((modrm >> 3) & 7) | rex_r;
             mod = (modrm >> 6) & 3;
             oldv = tcg_temp_new();
@@ -5256,7 +5298,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         }
         break;
     case 0x1c7: /* cmpxchg8b */
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         mod = (modrm >> 6) & 3;
         if ((mod == 3) || ((modrm & 0x38) != 0x8))
             goto illegal_op;
@@ -5318,7 +5360,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_push_v(s, cpu_T0);
         break;
     case 0x8f: /* pop Ev */
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         mod = (modrm >> 6) & 3;
         ot = gen_pop_T0(s);
         if (mod == 3) {
@@ -5337,9 +5379,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0xc8: /* enter */
         {
             int level;
-            val = cpu_lduw_code(env, s->pc);
-            s->pc += 2;
-            level = cpu_ldub_code(env, s->pc++);
+            val = x86_lduw_code(env, s);
+            level = x86_ldub_code(env, s);
             gen_enter(s, val, level);
         }
         break;
@@ -5396,7 +5437,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x88:
     case 0x89: /* mov Gv, Ev */
         ot = mo_b_d(b, dflag);
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         reg = ((modrm >> 3) & 7) | rex_r;
 
         /* generate a generic store */
@@ -5405,7 +5446,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0xc6:
     case 0xc7: /* mov Ev, Iv */
         ot = mo_b_d(b, dflag);
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         mod = (modrm >> 6) & 3;
         if (mod != 3) {
             s->rip_offset = insn_const_size(ot);
@@ -5422,14 +5463,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x8a:
     case 0x8b: /* mov Ev, Gv */
         ot = mo_b_d(b, dflag);
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         reg = ((modrm >> 3) & 7) | rex_r;
 
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
         gen_op_mov_reg_v(ot, reg, cpu_T0);
         break;
     case 0x8e: /* mov seg, Gv */
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         reg = (modrm >> 3) & 7;
         if (reg >= 6 || reg == R_CS)
             goto illegal_op;
@@ -5447,7 +5488,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         }
         break;
     case 0x8c: /* mov Gv, seg */
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         reg = (modrm >> 3) & 7;
         mod = (modrm >> 6) & 3;
         if (reg >= 6)
@@ -5472,7 +5513,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             /* s_ot is the sign+size of source */
             s_ot = b & 8 ? MO_SIGN | ot : ot;
 
-            modrm = cpu_ldub_code(env, s->pc++);
+            modrm = x86_ldub_code(env, s);
             reg = ((modrm >> 3) & 7) | rex_r;
             mod = (modrm >> 6) & 3;
             rm = (modrm & 7) | REX_B(s);
@@ -5508,7 +5549,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         break;
 
     case 0x8d: /* lea */
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
@@ -5532,8 +5573,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             switch (s->aflag) {
 #ifdef TARGET_X86_64
             case MO_64:
-                offset_addr = cpu_ldq_code(env, s->pc);
-                s->pc += 8;
+                offset_addr = x86_ldq_code(env, s);
                 break;
 #endif
             default:
@@ -5570,8 +5610,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (dflag == MO_64) {
             uint64_t tmp;
             /* 64 bit case */
-            tmp = cpu_ldq_code(env, s->pc);
-            s->pc += 8;
+            tmp = x86_ldq_code(env, s);
             reg = (b & 7) | REX_B(s);
             tcg_gen_movi_tl(cpu_T0, tmp);
             gen_op_mov_reg_v(MO_64, reg, cpu_T0);
@@ -5595,7 +5634,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x86:
     case 0x87: /* xchg Ev, Gv */
         ot = mo_b_d(b, dflag);
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         reg = ((modrm >> 3) & 7) | rex_r;
         mod = (modrm >> 6) & 3;
         if (mod == 3) {
@@ -5632,7 +5671,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         op = R_GS;
     do_lxx:
         ot = dflag != MO_16 ? MO_32 : MO_16;
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         reg = ((modrm >> 3) & 7) | rex_r;
         mod = (modrm >> 6) & 3;
         if (mod == 3)
@@ -5660,7 +5699,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     grp2:
         {
             ot = mo_b_d(b, dflag);
-            modrm = cpu_ldub_code(env, s->pc++);
+            modrm = x86_ldub_code(env, s);
             mod = (modrm >> 6) & 3;
             op = (modrm >> 3) & 7;
 
@@ -5679,7 +5718,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_shift(s, op, ot, opreg, OR_ECX);
             } else {
                 if (shift == 2) {
-                    shift = cpu_ldub_code(env, s->pc++);
+                    shift = x86_ldub_code(env, s);
                 }
                 gen_shifti(s, op, ot, opreg, shift);
             }
@@ -5713,7 +5752,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         shift = 0;
     do_shiftd:
         ot = dflag;
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         mod = (modrm >> 6) & 3;
         rm = (modrm & 7) | REX_B(s);
         reg = ((modrm >> 3) & 7) | rex_r;
@@ -5726,7 +5765,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_op_mov_v_reg(ot, cpu_T1, reg);
 
         if (shift) {
-            TCGv imm = tcg_const_tl(cpu_ldub_code(env, s->pc++));
+            TCGv imm = tcg_const_tl(x86_ldub_code(env, s));
             gen_shiftd_rm_T1(s, ot, opreg, op, imm);
             tcg_temp_free(imm);
         } else {
@@ -5743,7 +5782,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
             break;
         }
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         mod = (modrm >> 6) & 3;
         rm = modrm & 7;
         op = ((b & 7) << 3) | ((modrm >> 3) & 7);
@@ -6328,7 +6367,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0xe4:
     case 0xe5:
         ot = mo_b_d32(b, dflag);
-        val = cpu_ldub_code(env, s->pc++);
+        val = x86_ldub_code(env, s);
         tcg_gen_movi_tl(cpu_T0, val);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
@@ -6347,7 +6386,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0xe6:
     case 0xe7:
         ot = mo_b_d32(b, dflag);
-        val = cpu_ldub_code(env, s->pc++);
+        val = x86_ldub_code(env, s);
         tcg_gen_movi_tl(cpu_T0, val);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      svm_is_rep(prefixes));
@@ -6407,8 +6446,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         /************************/
         /* control */
     case 0xc2: /* ret im */
-        val = cpu_ldsw_code(env, s->pc);
-        s->pc += 2;
+        val = x86_ldsw_code(env, s);
         ot = gen_pop_T0(s);
         gen_stack_update(s, val + (1 << ot));
         /* Note that gen_pop_T0 uses a zero-extending load.  */
@@ -6425,8 +6463,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_jr(s, cpu_T0);
         break;
     case 0xca: /* lret im */
-        val = cpu_ldsw_code(env, s->pc);
-        s->pc += 2;
+        val = x86_ldsw_code(env, s);
     do_lret:
         if (s->pe && !s->vm86) {
             gen_update_cc_op(s);
@@ -6563,7 +6600,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         break;
 
     case 0x190 ... 0x19f: /* setcc Gv */
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         gen_setcc1(s, b, cpu_T0);
         gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
         break;
@@ -6572,7 +6609,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             goto illegal_op;
         }
         ot = dflag;
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         reg = ((modrm >> 3) & 7) | rex_r;
         gen_cmovcc1(env, s, ot, b, modrm, reg);
         break;
@@ -6689,7 +6726,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         /* bit operations */
     case 0x1ba: /* bt/bts/btr/btc Gv, im */
         ot = dflag;
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         op = (modrm >> 3) & 7;
         mod = (modrm >> 6) & 3;
         rm = (modrm & 7) | REX_B(s);
@@ -6703,7 +6740,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_op_mov_v_reg(ot, cpu_T0, rm);
         }
         /* load shift */
-        val = cpu_ldub_code(env, s->pc++);
+        val = x86_ldub_code(env, s);
         tcg_gen_movi_tl(cpu_T1, val);
         if (op < 4)
             goto unknown_op;
@@ -6722,7 +6759,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         op = 3;
     do_btx:
         ot = dflag;
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         reg = ((modrm >> 3) & 7) | rex_r;
         mod = (modrm >> 6) & 3;
         rm = (modrm & 7) | REX_B(s);
@@ -6827,7 +6864,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x1bc: /* bsf / tzcnt */
     case 0x1bd: /* bsr / lzcnt */
         ot = dflag;
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         reg = ((modrm >> 3) & 7) | rex_r;
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
         gen_extu(ot, cpu_T0);
@@ -6907,7 +6944,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0xd4: /* aam */
         if (CODE64(s))
             goto illegal_op;
-        val = cpu_ldub_code(env, s->pc++);
+        val = x86_ldub_code(env, s);
         if (val == 0) {
             gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
         } else {
@@ -6918,7 +6955,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0xd5: /* aad */
         if (CODE64(s))
             goto illegal_op;
-        val = cpu_ldub_code(env, s->pc++);
+        val = x86_ldub_code(env, s);
         gen_helper_aad(cpu_env, tcg_const_i32(val));
         set_cc_op(s, CC_OP_LOGICB);
         break;
@@ -6952,7 +6989,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
         break;
     case 0xcd: /* int N */
-        val = cpu_ldub_code(env, s->pc++);
+        val = x86_ldub_code(env, s);
         if (s->vm86 && s->iopl != 3) {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
@@ -7007,7 +7044,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (CODE64(s))
             goto illegal_op;
         ot = dflag;
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         reg = (modrm >> 3) & 7;
         mod = (modrm >> 6) & 3;
         if (mod == 3)
@@ -7186,7 +7223,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         }
         break;
     case 0x100:
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         mod = (modrm >> 6) & 3;
         op = (modrm >> 3) & 7;
         switch(op) {
@@ -7251,7 +7288,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         break;
 
     case 0x101:
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         switch (modrm) {
         CASE_MODRM_MEM_OP(0): /* sgdt */
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
@@ -7596,7 +7633,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             /* d_ot is the size of destination */
             d_ot = dflag;
 
-            modrm = cpu_ldub_code(env, s->pc++);
+            modrm = x86_ldub_code(env, s);
             reg = ((modrm >> 3) & 7) | rex_r;
             mod = (modrm >> 6) & 3;
             rm = (modrm & 7) | REX_B(s);
@@ -7625,7 +7662,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             t1 = tcg_temp_local_new();
             t2 = tcg_temp_local_new();
             ot = MO_16;
-            modrm = cpu_ldub_code(env, s->pc++);
+            modrm = x86_ldub_code(env, s);
             reg = (modrm >> 3) & 7;
             mod = (modrm >> 6) & 3;
             rm = modrm & 7;
@@ -7670,7 +7707,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             if (!s->pe || s->vm86)
                 goto illegal_op;
             ot = dflag != MO_16 ? MO_32 : MO_16;
-            modrm = cpu_ldub_code(env, s->pc++);
+            modrm = x86_ldub_code(env, s);
             reg = ((modrm >> 3) & 7) | rex_r;
             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
             t0 = tcg_temp_local_new();
@@ -7690,7 +7727,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         }
         break;
     case 0x118:
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         mod = (modrm >> 6) & 3;
         op = (modrm >> 3) & 7;
         switch(op) {
@@ -7709,7 +7746,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         }
         break;
     case 0x11a:
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         if (s->flags & HF_MPX_EN_MASK) {
             mod = (modrm >> 6) & 3;
             reg = ((modrm >> 3) & 7) | rex_r;
@@ -7799,7 +7836,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_nop_modrm(env, s, modrm);
         break;
     case 0x11b:
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         if (s->flags & HF_MPX_EN_MASK) {
             mod = (modrm >> 6) & 3;
             reg = ((modrm >> 3) & 7) | rex_r;
@@ -7901,7 +7938,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_nop_modrm(env, s, modrm);
         break;
     case 0x119: case 0x11c ... 0x11f: /* nop (multi byte) */
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         gen_nop_modrm(env, s, modrm);
         break;
     case 0x120: /* mov reg, crN */
@@ -7909,7 +7946,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (s->cpl != 0) {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
-            modrm = cpu_ldub_code(env, s->pc++);
+            modrm = x86_ldub_code(env, s);
             /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
              * AMD documentation (24594.pdf) and testing of
              * intel 386 and 486 processors all show that the mod bits
@@ -7966,7 +8003,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (s->cpl != 0) {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
-            modrm = cpu_ldub_code(env, s->pc++);
+            modrm = x86_ldub_code(env, s);
             /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
              * AMD documentation (24594.pdf) and testing of
              * intel 386 and 486 processors all show that the mod bits
@@ -8012,7 +8049,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (!(s->cpuid_features & CPUID_SSE2))
             goto illegal_op;
         ot = mo_64_32(dflag);
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
@@ -8021,7 +8058,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_ldst_modrm(env, s, modrm, ot, reg, 1);
         break;
     case 0x1ae:
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         switch (modrm) {
         CASE_MODRM_MEM_OP(0): /* fxsave */
             if (!(s->cpuid_features & CPUID_FXSR)
@@ -8219,7 +8256,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         break;
 
     case 0x10d: /* 3DNow! prefetch(w) */
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
@@ -8241,7 +8278,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
             goto illegal_op;
 
-        modrm = cpu_ldub_code(env, s->pc++);
+        modrm = x86_ldub_code(env, s);
         reg = ((modrm >> 3) & 7) | rex_r;
 
         if (s->prefix & PREFIX_DATA) {
diff --git a/target/mips/mips-defs.h b/target/mips/mips-defs.h
index 047554ee45..d239069975 100644
--- a/target/mips/mips-defs.h
+++ b/target/mips/mips-defs.h
@@ -15,7 +15,11 @@
 #else
 #define TARGET_LONG_BITS 32
 #define TARGET_PHYS_ADDR_SPACE_BITS 40
-#define TARGET_VIRT_ADDR_SPACE_BITS 32
+# ifdef CONFIG_USER_ONLY
+#  define TARGET_VIRT_ADDR_SPACE_BITS 31
+# else
+#  define TARGET_VIRT_ADDR_SPACE_BITS 32
+#endif
 #endif
 
 /* Masks used to mark instructions to indicate which ISA level they
diff --git a/target/nios2/cpu.h b/target/nios2/cpu.h
index 50d803a217..9119eee587 100644
--- a/target/nios2/cpu.h
+++ b/target/nios2/cpu.h
@@ -226,7 +226,11 @@ qemu_irq *nios2_cpu_pic_init(Nios2CPU *cpu);
 void nios2_check_interrupts(CPUNios2State *env);
 
 #define TARGET_PHYS_ADDR_SPACE_BITS 32
-#define TARGET_VIRT_ADDR_SPACE_BITS 32
+#ifdef CONFIG_USER_ONLY
+# define TARGET_VIRT_ADDR_SPACE_BITS 31
+#else
+# define TARGET_VIRT_ADDR_SPACE_BITS 32
+#endif
 
 #define cpu_init(cpu_model) cpu_generic_init(TYPE_NIOS2_CPU, cpu_model)
 
diff --git a/target/nios2/translate.c b/target/nios2/translate.c
index 6b0961837d..54fbe898df 100644
--- a/target/nios2/translate.c
+++ b/target/nios2/translate.c
@@ -948,6 +948,7 @@ void nios2_tcg_init(void)
     int i;
 
     cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
+    tcg_ctx.tcg_env = cpu_env;
 
     for (i = 0; i < NUM_CORE_REGS; i++) {
         cpu_R[i] = tcg_global_mem_new(cpu_env,
diff --git a/target/ppc/Makefile.objs b/target/ppc/Makefile.objs
index f92ba67ebd..e8fa18ce13 100644
--- a/target/ppc/Makefile.objs
+++ b/target/ppc/Makefile.objs
@@ -15,5 +15,6 @@ obj-y += int_helper.o
 obj-y += timebase_helper.o
 obj-y += misc_helper.o
 obj-y += mem_helper.o
+obj-y += ../../libdecnumber/
 obj-$(CONFIG_USER_ONLY) += user_only_helper.o
 obj-y += gdbstub.o
diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
index 79f85d3365..123f34783a 100644
--- a/target/sh4/cpu.h
+++ b/target/sh4/cpu.h
@@ -45,7 +45,11 @@
 #define TARGET_PAGE_BITS 12	/* 4k XXXXX */
 
 #define TARGET_PHYS_ADDR_SPACE_BITS 32
-#define TARGET_VIRT_ADDR_SPACE_BITS 32
+#ifdef CONFIG_USER_ONLY
+# define TARGET_VIRT_ADDR_SPACE_BITS 31
+#else
+# define TARGET_VIRT_ADDR_SPACE_BITS 32
+#endif
 
 #define SR_MD 30
 #define SR_RB 29
diff --git a/ui/console-gl.c b/ui/console-gl.c
index 5165e21646..5b77e7aa88 100644
--- a/ui/console-gl.c
+++ b/ui/console-gl.c
@@ -29,40 +29,8 @@
 #include "ui/console.h"
 #include "ui/shader.h"
 
-#include "shader/texture-blit-vert.h"
-#include "shader/texture-blit-frag.h"
-
-struct ConsoleGLState {
-    GLint texture_blit_prog;
-    GLint texture_blit_vao;
-};
-
 /* ---------------------------------------------------------------------- */
 
-ConsoleGLState *console_gl_init_context(void)
-{
-    ConsoleGLState *gls = g_new0(ConsoleGLState, 1);
-
-    gls->texture_blit_prog = qemu_gl_create_compile_link_program
-        (texture_blit_vert_src, texture_blit_frag_src);
-    if (!gls->texture_blit_prog) {
-        exit(1);
-    }
-
-    gls->texture_blit_vao =
-        qemu_gl_init_texture_blit(gls->texture_blit_prog);
-
-    return gls;
-}
-
-void console_gl_fini_context(ConsoleGLState *gls)
-{
-    if (!gls) {
-        return;
-    }
-    g_free(gls);
-}
-
 bool console_gl_check_format(DisplayChangeListener *dcl,
                              pixman_format_code_t format)
 {
@@ -76,7 +44,7 @@ bool console_gl_check_format(DisplayChangeListener *dcl,
     }
 }
 
-void surface_gl_create_texture(ConsoleGLState *gls,
+void surface_gl_create_texture(QemuGLShader *gls,
                                DisplaySurface *surface)
 {
     assert(gls);
@@ -116,7 +84,7 @@ void surface_gl_create_texture(ConsoleGLState *gls,
     glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
 }
 
-void surface_gl_update_texture(ConsoleGLState *gls,
+void surface_gl_update_texture(QemuGLShader *gls,
                                DisplaySurface *surface,
                                int x, int y, int w, int h)
 {
@@ -133,7 +101,7 @@ void surface_gl_update_texture(ConsoleGLState *gls,
                     + surface_bytes_per_pixel(surface) * x);
 }
 
-void surface_gl_render_texture(ConsoleGLState *gls,
+void surface_gl_render_texture(QemuGLShader *gls,
                                DisplaySurface *surface)
 {
     assert(gls);
@@ -141,11 +109,10 @@ void surface_gl_render_texture(ConsoleGLState *gls,
     glClearColor(0.1f, 0.1f, 0.1f, 0.0f);
     glClear(GL_COLOR_BUFFER_BIT);
 
-    qemu_gl_run_texture_blit(gls->texture_blit_prog,
-                             gls->texture_blit_vao);
+    qemu_gl_run_texture_blit(gls, false);
 }
 
-void surface_gl_destroy_texture(ConsoleGLState *gls,
+void surface_gl_destroy_texture(QemuGLShader *gls,
                                 DisplaySurface *surface)
 {
     if (!surface || !surface->texture) {
@@ -155,7 +122,7 @@ void surface_gl_destroy_texture(ConsoleGLState *gls,
     surface->texture = 0;
 }
 
-void surface_gl_setup_viewport(ConsoleGLState *gls,
+void surface_gl_setup_viewport(QemuGLShader *gls,
                                DisplaySurface *surface,
                                int ww, int wh)
 {
diff --git a/ui/console.c b/ui/console.c
index b82c27960a..eca854cbd5 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -1404,6 +1404,11 @@ bool console_has_gl(QemuConsole *con)
     return con->gl != NULL;
 }
 
+bool console_has_gl_dmabuf(QemuConsole *con)
+{
+    return con->gl != NULL && con->gl->ops->dpy_gl_scanout_dmabuf != NULL;
+}
+
 void register_displaychangelistener(DisplayChangeListener *dcl)
 {
     static const char nodev[] =
@@ -1745,6 +1750,34 @@ void dpy_gl_scanout_texture(QemuConsole *con,
                                          x, y, width, height);
 }
 
+void dpy_gl_scanout_dmabuf(QemuConsole *con,
+                           QemuDmaBuf *dmabuf)
+{
+    assert(con->gl);
+    con->gl->ops->dpy_gl_scanout_dmabuf(con->gl, dmabuf);
+}
+
+void dpy_gl_cursor_dmabuf(QemuConsole *con,
+                          QemuDmaBuf *dmabuf,
+                          uint32_t pos_x, uint32_t pos_y)
+{
+    assert(con->gl);
+
+    if (con->gl->ops->dpy_gl_cursor_dmabuf) {
+        con->gl->ops->dpy_gl_cursor_dmabuf(con->gl, dmabuf, pos_x, pos_y);
+    }
+}
+
+void dpy_gl_release_dmabuf(QemuConsole *con,
+                          QemuDmaBuf *dmabuf)
+{
+    assert(con->gl);
+
+    if (con->gl->ops->dpy_gl_release_dmabuf) {
+        con->gl->ops->dpy_gl_release_dmabuf(con->gl, dmabuf);
+    }
+}
+
 void dpy_gl_update(QemuConsole *con,
                    uint32_t x, uint32_t y, uint32_t w, uint32_t h)
 {
diff --git a/ui/egl-headless.c b/ui/egl-headless.c
index 12ad64e995..5d50226869 100644
--- a/ui/egl-headless.c
+++ b/ui/egl-headless.c
@@ -4,13 +4,18 @@
 #include "ui/console.h"
 #include "ui/egl-helpers.h"
 #include "ui/egl-context.h"
+#include "ui/shader.h"
 
 typedef struct egl_dpy {
     DisplayChangeListener dcl;
     DisplaySurface *ds;
+    QemuGLShader *gls;
     egl_fb guest_fb;
+    egl_fb cursor_fb;
     egl_fb blit_fb;
     bool y_0_top;
+    uint32_t pos_x;
+    uint32_t pos_y;
 } egl_dpy;
 
 /* ------------------------------------------------------------------ */
@@ -65,6 +70,43 @@ static void egl_scanout_texture(DisplayChangeListener *dcl,
     }
 }
 
+static void egl_scanout_dmabuf(DisplayChangeListener *dcl,
+                               QemuDmaBuf *dmabuf)
+{
+    egl_dmabuf_import_texture(dmabuf);
+    if (!dmabuf->texture) {
+        return;
+    }
+
+    egl_scanout_texture(dcl, dmabuf->texture,
+                        false, dmabuf->width, dmabuf->height,
+                        0, 0, dmabuf->width, dmabuf->height);
+}
+
+static void egl_cursor_dmabuf(DisplayChangeListener *dcl,
+                              QemuDmaBuf *dmabuf,
+                              uint32_t pos_x, uint32_t pos_y)
+{
+    egl_dpy *edpy = container_of(dcl, egl_dpy, dcl);
+
+    edpy->pos_x = pos_x;
+    edpy->pos_y = pos_y;
+
+    egl_dmabuf_import_texture(dmabuf);
+    if (!dmabuf->texture) {
+        return;
+    }
+
+    egl_fb_setup_for_tex(&edpy->cursor_fb, dmabuf->width, dmabuf->height,
+                         dmabuf->texture, false);
+}
+
+static void egl_release_dmabuf(DisplayChangeListener *dcl,
+                               QemuDmaBuf *dmabuf)
+{
+    egl_dmabuf_release_texture(dmabuf);
+}
+
 static void egl_scanout_flush(DisplayChangeListener *dcl,
                               uint32_t x, uint32_t y,
                               uint32_t w, uint32_t h)
@@ -78,9 +120,18 @@ static void egl_scanout_flush(DisplayChangeListener *dcl,
     assert(surface_height(edpy->ds) == edpy->guest_fb.height);
     assert(surface_format(edpy->ds) == PIXMAN_x8r8g8b8);
 
-    egl_fb_blit(&edpy->blit_fb, &edpy->guest_fb, edpy->y_0_top);
-    egl_fb_read(surface_data(edpy->ds), &edpy->blit_fb);
+    if (edpy->cursor_fb.texture) {
+        /* have cursor -> render using textures */
+        egl_texture_blit(edpy->gls, &edpy->blit_fb, &edpy->guest_fb,
+                         !edpy->y_0_top);
+        egl_texture_blend(edpy->gls, &edpy->blit_fb, &edpy->cursor_fb,
+                          !edpy->y_0_top, edpy->pos_x, edpy->pos_y);
+    } else {
+        /* no cursor -> use simple framebuffer blit */
+        egl_fb_blit(&edpy->blit_fb, &edpy->guest_fb, edpy->y_0_top);
+    }
 
+    egl_fb_read(surface_data(edpy->ds), &edpy->blit_fb);
     dpy_gfx_update(edpy->dcl.con, x, y, w, h);
 }
 
@@ -97,6 +148,9 @@ static const DisplayChangeListenerOps egl_ops = {
 
     .dpy_gl_scanout_disable  = egl_scanout_disable,
     .dpy_gl_scanout_texture  = egl_scanout_texture,
+    .dpy_gl_scanout_dmabuf   = egl_scanout_dmabuf,
+    .dpy_gl_cursor_dmabuf    = egl_cursor_dmabuf,
+    .dpy_gl_release_dmabuf   = egl_release_dmabuf,
     .dpy_gl_update           = egl_scanout_flush,
 };
 
@@ -120,6 +174,7 @@ void egl_headless_init(void)
         edpy = g_new0(egl_dpy, 1);
         edpy->dcl.con = con;
         edpy->dcl.ops = &egl_ops;
+        edpy->gls = qemu_gl_init_shader();
         register_displaychangelistener(&edpy->dcl);
     }
 }
diff --git a/ui/egl-helpers.c b/ui/egl-helpers.c
index cde9965dea..5fa60ef4e8 100644
--- a/ui/egl-helpers.c
+++ b/ui/egl-helpers.c
@@ -19,6 +19,7 @@
 #include <dirent.h>
 
 #include "qemu/error-report.h"
+#include "ui/console.h"
 #include "ui/egl-helpers.h"
 
 EGLDisplay *qemu_egl_display;
@@ -110,6 +111,33 @@ void egl_fb_read(void *dst, egl_fb *src)
                  GL_BGRA, GL_UNSIGNED_BYTE, dst);
 }
 
+void egl_texture_blit(QemuGLShader *gls, egl_fb *dst, egl_fb *src, bool flip)
+{
+    glBindFramebuffer(GL_FRAMEBUFFER_EXT, dst->framebuffer);
+    glViewport(0, 0, dst->width, dst->height);
+    glEnable(GL_TEXTURE_2D);
+    glBindTexture(GL_TEXTURE_2D, src->texture);
+    qemu_gl_run_texture_blit(gls, flip);
+}
+
+void egl_texture_blend(QemuGLShader *gls, egl_fb *dst, egl_fb *src, bool flip,
+                       int x, int y)
+{
+    glBindFramebuffer(GL_FRAMEBUFFER_EXT, dst->framebuffer);
+    if (flip) {
+        glViewport(x, y, src->width, src->height);
+    } else {
+        glViewport(x, dst->height - src->height - y,
+                   src->width, src->height);
+    }
+    glEnable(GL_TEXTURE_2D);
+    glBindTexture(GL_TEXTURE_2D, src->texture);
+    glEnable(GL_BLEND);
+    glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
+    qemu_gl_run_texture_blit(gls, flip);
+    glDisable(GL_BLEND);
+}
+
 /* ---------------------------------------------------------------------- */
 
 #ifdef CONFIG_OPENGL_DMABUF
@@ -241,6 +269,51 @@ int egl_get_fd_for_texture(uint32_t tex_id, EGLint *stride, EGLint *fourcc)
     return fd;
 }
 
+void egl_dmabuf_import_texture(QemuDmaBuf *dmabuf)
+{
+    EGLImageKHR image = EGL_NO_IMAGE_KHR;
+    EGLint attrs[] = {
+        EGL_DMA_BUF_PLANE0_FD_EXT,      dmabuf->fd,
+        EGL_DMA_BUF_PLANE0_PITCH_EXT,   dmabuf->stride,
+        EGL_DMA_BUF_PLANE0_OFFSET_EXT,  0,
+        EGL_WIDTH,                      dmabuf->width,
+        EGL_HEIGHT,                     dmabuf->height,
+        EGL_LINUX_DRM_FOURCC_EXT,       dmabuf->fourcc,
+        EGL_NONE, /* end of list */
+    };
+
+    if (dmabuf->texture != 0) {
+        return;
+    }
+
+    image = eglCreateImageKHR(qemu_egl_display,
+                              EGL_NO_CONTEXT,
+                              EGL_LINUX_DMA_BUF_EXT,
+                              NULL, attrs);
+    if (image == EGL_NO_IMAGE_KHR) {
+        error_report("eglCreateImageKHR failed");
+        return;
+    }
+
+    glGenTextures(1, &dmabuf->texture);
+    glBindTexture(GL_TEXTURE_2D, dmabuf->texture);
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+
+    glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, (GLeglImageOES)image);
+    eglDestroyImageKHR(qemu_egl_display, image);
+}
+
+void egl_dmabuf_release_texture(QemuDmaBuf *dmabuf)
+{
+    if (dmabuf->texture == 0) {
+        return;
+    }
+
+    glDeleteTextures(1, &dmabuf->texture);
+    dmabuf->texture = 0;
+}
+
 #endif /* CONFIG_OPENGL_DMABUF */
 
 /* ---------------------------------------------------------------------- */
diff --git a/ui/gtk-egl.c b/ui/gtk-egl.c
index 0f0d35e041..eb86c26a1d 100644
--- a/ui/gtk-egl.c
+++ b/ui/gtk-egl.c
@@ -113,7 +113,7 @@ void gd_egl_refresh(DisplayChangeListener *dcl)
         if (!vc->gfx.esurface) {
             return;
         }
-        vc->gfx.gls = console_gl_init_context();
+        vc->gfx.gls = qemu_gl_init_shader();
         if (vc->gfx.ds) {
             surface_gl_create_texture(vc->gfx.gls, vc->gfx.ds);
         }
diff --git a/ui/gtk-gl-area.c b/ui/gtk-gl-area.c
index 7080f4e14f..147ad6f9b5 100644
--- a/ui/gtk-gl-area.c
+++ b/ui/gtk-gl-area.c
@@ -96,7 +96,7 @@ void gd_gl_area_refresh(DisplayChangeListener *dcl)
             return;
         }
         gtk_gl_area_make_current(GTK_GL_AREA(vc->gfx.drawing_area));
-        vc->gfx.gls = console_gl_init_context();
+        vc->gfx.gls = qemu_gl_init_shader();
         if (vc->gfx.ds) {
             surface_gl_create_texture(vc->gfx.gls, vc->gfx.ds);
         }
diff --git a/ui/sdl2-gl.c b/ui/sdl2-gl.c
index 9110491ee5..5e1073a084 100644
--- a/ui/sdl2-gl.c
+++ b/ui/sdl2-gl.c
@@ -90,7 +90,7 @@ void sdl2_gl_switch(DisplayChangeListener *dcl,
     scon->surface = new_surface;
 
     if (!new_surface) {
-        console_gl_fini_context(scon->gls);
+        qemu_gl_fini_shader(scon->gls);
         scon->gls = NULL;
         sdl2_window_destroy(scon);
         return;
@@ -98,7 +98,7 @@ void sdl2_gl_switch(DisplayChangeListener *dcl,
 
     if (!scon->real_window) {
         sdl2_window_create(scon);
-        scon->gls = console_gl_init_context();
+        scon->gls = qemu_gl_init_shader();
     } else if (old_surface &&
                ((surface_width(old_surface)  != surface_width(new_surface)) ||
                 (surface_height(old_surface) != surface_height(new_surface)))) {
diff --git a/ui/shader.c b/ui/shader.c
index 1ffddbef3b..008458bf94 100644
--- a/ui/shader.c
+++ b/ui/shader.c
@@ -28,9 +28,19 @@
 #include "qemu-common.h"
 #include "ui/shader.h"
 
+#include "shader/texture-blit-vert.h"
+#include "shader/texture-blit-flip-vert.h"
+#include "shader/texture-blit-frag.h"
+
+struct QemuGLShader {
+    GLint texture_blit_prog;
+    GLint texture_blit_flip_prog;
+    GLint texture_blit_vao;
+};
+
 /* ---------------------------------------------------------------------- */
 
-GLuint qemu_gl_init_texture_blit(GLint texture_blit_prog)
+static GLuint qemu_gl_init_texture_blit(GLint texture_blit_prog)
 {
     static const GLfloat in_position[] = {
         -1, -1,
@@ -60,17 +70,18 @@ GLuint qemu_gl_init_texture_blit(GLint texture_blit_prog)
     return vao;
 }
 
-void qemu_gl_run_texture_blit(GLint texture_blit_prog,
-                              GLint texture_blit_vao)
+void qemu_gl_run_texture_blit(QemuGLShader *gls, bool flip)
 {
-    glUseProgram(texture_blit_prog);
-    glBindVertexArray(texture_blit_vao);
+    glUseProgram(flip
+                 ? gls->texture_blit_flip_prog
+                 : gls->texture_blit_prog);
+    glBindVertexArray(gls->texture_blit_vao);
     glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
 }
 
 /* ---------------------------------------------------------------------- */
 
-GLuint qemu_gl_create_compile_shader(GLenum type, const GLchar *src)
+static GLuint qemu_gl_create_compile_shader(GLenum type, const GLchar *src)
 {
     GLuint shader;
     GLint status, length;
@@ -94,7 +105,7 @@ GLuint qemu_gl_create_compile_shader(GLenum type, const GLchar *src)
     return shader;
 }
 
-GLuint qemu_gl_create_link_program(GLuint vert, GLuint frag)
+static GLuint qemu_gl_create_link_program(GLuint vert, GLuint frag)
 {
     GLuint program;
     GLint status, length;
@@ -117,8 +128,8 @@ GLuint qemu_gl_create_link_program(GLuint vert, GLuint frag)
     return program;
 }
 
-GLuint qemu_gl_create_compile_link_program(const GLchar *vert_src,
-                                           const GLchar *frag_src)
+static GLuint qemu_gl_create_compile_link_program(const GLchar *vert_src,
+                                                  const GLchar *frag_src)
 {
     GLuint vert_shader, frag_shader, program;
 
@@ -134,3 +145,31 @@ GLuint qemu_gl_create_compile_link_program(const GLchar *vert_src,
 
     return program;
 }
+
+/* ---------------------------------------------------------------------- */
+
+QemuGLShader *qemu_gl_init_shader(void)
+{
+    QemuGLShader *gls = g_new0(QemuGLShader, 1);
+
+    gls->texture_blit_prog = qemu_gl_create_compile_link_program
+        (texture_blit_vert_src, texture_blit_frag_src);
+    gls->texture_blit_flip_prog = qemu_gl_create_compile_link_program
+        (texture_blit_flip_vert_src, texture_blit_frag_src);
+    if (!gls->texture_blit_prog || !gls->texture_blit_flip_prog) {
+        exit(1);
+    }
+
+    gls->texture_blit_vao =
+        qemu_gl_init_texture_blit(gls->texture_blit_prog);
+
+    return gls;
+}
+
+void qemu_gl_fini_shader(QemuGLShader *gls)
+{
+    if (!gls) {
+        return;
+    }
+    g_free(gls);
+}
diff --git a/ui/shader/texture-blit-flip.vert b/ui/shader/texture-blit-flip.vert
new file mode 100644
index 0000000000..ba081fa5a6
--- /dev/null
+++ b/ui/shader/texture-blit-flip.vert
@@ -0,0 +1,10 @@
+
+#version 300 es
+
+in vec2  in_position;
+out vec2 ex_tex_coord;
+
+void main(void) {
+    gl_Position = vec4(in_position, 0.0, 1.0);
+    ex_tex_coord = vec2(1.0 + in_position.x, 1.0 + in_position.y) * 0.5;
+}
diff --git a/ui/spice-display.c b/ui/spice-display.c
index 0963c7825f..ad1ceafb3f 100644
--- a/ui/spice-display.c
+++ b/ui/spice-display.c
@@ -1019,7 +1019,7 @@ static void qemu_spice_display_init_one(QemuConsole *con)
         ssd->gl_unblock_bh = qemu_bh_new(qemu_spice_gl_unblock_bh, ssd);
         ssd->gl_unblock_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
                                              qemu_spice_gl_block_timer, ssd);
-        ssd->gls = console_gl_init_context();
+        ssd->gls = qemu_gl_init_shader();
         ssd->have_surface = false;
         ssd->have_scanout = false;
     }