summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS2
-rw-r--r--backends/cryptodev-builtin.c42
-rw-r--r--backends/cryptodev-hmp-cmds.c54
-rw-r--r--backends/cryptodev-lkcf.c19
-rw-r--r--backends/cryptodev-vhost-user.c13
-rw-r--r--backends/cryptodev-vhost.c4
-rw-r--r--backends/cryptodev.c433
-rw-r--r--backends/meson.build1
-rw-r--r--hmp-commands-info.hx14
-rw-r--r--hw/acpi/acpi-pci-hotplug-stub.c9
-rw-r--r--hw/acpi/ich9.c21
-rw-r--r--hw/acpi/pci-bridge.c14
-rw-r--r--hw/acpi/pcihp.c112
-rw-r--r--hw/acpi/piix4.c33
-rw-r--r--hw/cxl/cxl-component-utils.c20
-rw-r--r--hw/cxl/cxl-host.c31
-rw-r--r--hw/i386/acpi-build.c179
-rw-r--r--hw/isa/lpc_ich9.c1
-rw-r--r--hw/mem/cxl_type3.c294
-rw-r--r--hw/mem/cxl_type3_stubs.c17
-rw-r--r--hw/mem/meson.build2
-rw-r--r--hw/pci-bridge/cxl_root_port.c64
-rw-r--r--hw/pci-bridge/pci_expander_bridge.c44
-rw-r--r--hw/pci/pci-internal.h1
-rw-r--r--hw/pci/pci.c57
-rw-r--r--hw/pci/pcie_aer.c14
-rw-r--r--hw/pci/pcie_port.c46
-rw-r--r--hw/virtio/trace-events1
-rw-r--r--hw/virtio/vhost-shadow-virtqueue.c8
-rw-r--r--hw/virtio/vhost-user.c4
-rw-r--r--hw/virtio/vhost-vdpa.c126
-rw-r--r--hw/virtio/vhost.c3
-rw-r--r--hw/virtio/virtio-crypto.c48
-rw-r--r--hw/virtio/virtio.c11
-rw-r--r--include/hw/acpi/ich9.h1
-rw-r--r--include/hw/acpi/pcihp.h11
-rw-r--r--include/hw/acpi/piix4.h2
-rw-r--r--include/hw/cxl/cxl.h1
-rw-r--r--include/hw/cxl/cxl_component.h27
-rw-r--r--include/hw/cxl/cxl_device.h11
-rw-r--r--include/hw/hotplug.h2
-rw-r--r--include/hw/pci/pci_bridge.h1
-rw-r--r--include/hw/pci/pcie_aer.h1
-rw-r--r--include/hw/pci/pcie_port.h2
-rw-r--r--include/hw/pci/pcie_regs.h3
-rw-r--r--include/hw/qdev-core.h13
-rw-r--r--include/hw/virtio/vhost-backend.h4
-rw-r--r--include/hw/virtio/vhost-vdpa.h3
-rw-r--r--include/monitor/hmp.h1
-rw-r--r--include/sysemu/cryptodev.h113
-rw-r--r--net/vhost-vdpa.c198
-rw-r--r--qapi/cryptodev.json89
-rw-r--r--qapi/cxl.json128
-rw-r--r--qapi/meson.build2
-rw-r--r--qapi/qapi-schema.json2
-rw-r--r--qapi/qom.json8
-rw-r--r--qapi/stats.json10
-rw-r--r--qga/installer/qemu-ga.wxs15
-rw-r--r--qga/vss-win32/install.cpp9
-rw-r--r--qga/vss-win32/qga-vss.def2
-rw-r--r--qga/vss-win32/requester.cpp41
-rw-r--r--qga/vss-win32/vss-handles.h3
-rw-r--r--stats/stats-hmp-cmds.c5
-rw-r--r--stats/stats-qmp-cmds.c2
-rw-r--r--target/hexagon/README31
-rw-r--r--target/hexagon/attribs_def.h.inc1
-rw-r--r--target/hexagon/cpu.h5
-rwxr-xr-xtarget/hexagon/gen_analyze_funcs.py252
-rwxr-xr-xtarget/hexagon/gen_helper_funcs.py19
-rwxr-xr-xtarget/hexagon/gen_helper_protos.py12
-rw-r--r--target/hexagon/gen_tcg.h90
-rwxr-xr-xtarget/hexagon/gen_tcg_funcs.py152
-rw-r--r--target/hexagon/gen_tcg_hvx.h17
-rw-r--r--target/hexagon/genptr.c296
-rwxr-xr-xtarget/hexagon/hex_common.py10
-rw-r--r--target/hexagon/idef-parser/idef-parser.h1
-rw-r--r--target/hexagon/idef-parser/idef-parser.lex31
-rw-r--r--target/hexagon/idef-parser/idef-parser.y49
-rw-r--r--target/hexagon/idef-parser/macros.inc9
-rw-r--r--target/hexagon/idef-parser/parser-helpers.c19
-rw-r--r--target/hexagon/macros.h29
-rw-r--r--target/hexagon/meson.build11
-rw-r--r--target/hexagon/op_helper.c60
-rw-r--r--target/hexagon/op_helper.h3
-rw-r--r--target/hexagon/translate.c288
-rw-r--r--target/hexagon/translate.h86
-rw-r--r--tests/data/acpi/pc/DSDTbin6360 -> 6488 bytes
-rw-r--r--tests/data/acpi/pc/DSDT.acpierstbin6283 -> 6411 bytes
-rw-r--r--tests/data/acpi/pc/DSDT.acpihmatbin7685 -> 7813 bytes
-rw-r--r--tests/data/acpi/pc/DSDT.bridgebin12487 -> 12615 bytes
-rw-r--r--tests/data/acpi/pc/DSDT.cphpbin6824 -> 6952 bytes
-rw-r--r--tests/data/acpi/pc/DSDT.dimmpxmbin8014 -> 8142 bytes
-rw-r--r--tests/data/acpi/pc/DSDT.hpbridgebin6289 -> 6451 bytes
-rw-r--r--tests/data/acpi/pc/DSDT.hpbrrootbin3081 -> 3343 bytes
-rw-r--r--tests/data/acpi/pc/DSDT.ipmikcsbin6432 -> 6560 bytes
-rw-r--r--tests/data/acpi/pc/DSDT.memhpbin7719 -> 7847 bytes
-rw-r--r--tests/data/acpi/pc/DSDT.nohpetbin6218 -> 6346 bytes
-rw-r--r--tests/data/acpi/pc/DSDT.numamembin6366 -> 6494 bytes
-rw-r--r--tests/data/acpi/pc/DSDT.roothpbin9745 -> 9873 bytes
-rw-r--r--tests/data/acpi/q35/DSDTbin8252 -> 8361 bytes
-rw-r--r--tests/data/acpi/q35/DSDT.acpierstbin8269 -> 8378 bytes
-rw-r--r--tests/data/acpi/q35/DSDT.acpihmatbin9577 -> 9686 bytes
-rw-r--r--tests/data/acpi/q35/DSDT.acpihmat-noinitiatorbin8531 -> 8640 bytes
-rw-r--r--tests/data/acpi/q35/DSDT.applesmcbin8298 -> 8407 bytes
-rw-r--r--tests/data/acpi/q35/DSDT.bridgebin11481 -> 11590 bytes
-rw-r--r--tests/data/acpi/q35/DSDT.core-count2bin32392 -> 32501 bytes
-rw-r--r--tests/data/acpi/q35/DSDT.cphpbin8716 -> 8825 bytes
-rw-r--r--tests/data/acpi/q35/DSDT.cxlbin9564 -> 9673 bytes
-rw-r--r--tests/data/acpi/q35/DSDT.dimmpxmbin9906 -> 10015 bytes
-rw-r--r--tests/data/acpi/q35/DSDT.ipmibtbin8327 -> 8436 bytes
-rw-r--r--tests/data/acpi/q35/DSDT.ipmismbusbin8340 -> 8449 bytes
-rw-r--r--tests/data/acpi/q35/DSDT.ivrsbin8269 -> 8378 bytes
-rw-r--r--tests/data/acpi/q35/DSDT.memhpbin9611 -> 9720 bytes
-rw-r--r--tests/data/acpi/q35/DSDT.mmio64bin9382 -> 9491 bytes
-rw-r--r--tests/data/acpi/q35/DSDT.multi-bridgebin12337 -> 12770 bytes
-rw-r--r--tests/data/acpi/q35/DSDT.noacpihpbin0 -> 8248 bytes
-rw-r--r--tests/data/acpi/q35/DSDT.nohpetbin8110 -> 8219 bytes
-rw-r--r--tests/data/acpi/q35/DSDT.numamembin8258 -> 8367 bytes
-rw-r--r--tests/data/acpi/q35/DSDT.pvpanic-isabin8353 -> 8462 bytes
-rw-r--r--tests/data/acpi/q35/DSDT.tis.tpm12bin8858 -> 8967 bytes
-rw-r--r--tests/data/acpi/q35/DSDT.tis.tpm2bin8884 -> 8993 bytes
-rw-r--r--tests/data/acpi/q35/DSDT.viotbin9361 -> 9470 bytes
-rw-r--r--tests/data/acpi/q35/DSDT.xapicbin35615 -> 35724 bytes
-rw-r--r--tests/qtest/bios-tables-test.c125
-rw-r--r--tests/tcg/hexagon/Makefile.target13
-rw-r--r--tests/tcg/hexagon/fpstuff.c31
-rw-r--r--tests/tcg/hexagon/preg_alias.c10
-rw-r--r--tests/tcg/hexagon/scatter_gather.c513
128 files changed, 3310 insertions, 1305 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 564af9d633..95c957d587 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2902,9 +2902,11 @@ T: git https://gitlab.com/ehabkost/qemu.git machine-next
 
 Cryptodev Backends
 M: Gonglei <arei.gonglei@huawei.com>
+M: zhenwei pi <pizhenwei@bytedance.com>
 S: Maintained
 F: include/sysemu/cryptodev*.h
 F: backends/cryptodev*.c
+F: qapi/cryptodev.json
 
 Python library
 M: John Snow <jsnow@redhat.com>
diff --git a/backends/cryptodev-builtin.c b/backends/cryptodev-builtin.c
index cda6ca3b71..39d0455280 100644
--- a/backends/cryptodev-builtin.c
+++ b/backends/cryptodev-builtin.c
@@ -59,6 +59,19 @@ struct CryptoDevBackendBuiltin {
     CryptoDevBackendBuiltinSession *sessions[MAX_NUM_SESSIONS];
 };
 
+static void cryptodev_builtin_init_akcipher(CryptoDevBackend *backend)
+{
+    QCryptoAkCipherOptions opts;
+
+    opts.alg = QCRYPTO_AKCIPHER_ALG_RSA;
+    opts.u.rsa.padding_alg = QCRYPTO_RSA_PADDING_ALG_RAW;
+    if (qcrypto_akcipher_supports(&opts)) {
+        backend->conf.crypto_services |=
+                     (1u << QCRYPTODEV_BACKEND_SERVICE_AKCIPHER);
+        backend->conf.akcipher_algo = 1u << VIRTIO_CRYPTO_AKCIPHER_RSA;
+    }
+}
+
 static void cryptodev_builtin_init(
              CryptoDevBackend *backend, Error **errp)
 {
@@ -72,21 +85,18 @@ static void cryptodev_builtin_init(
         return;
     }
 
-    cc = cryptodev_backend_new_client(
-              "cryptodev-builtin", NULL);
+    cc = cryptodev_backend_new_client();
     cc->info_str = g_strdup_printf("cryptodev-builtin0");
     cc->queue_index = 0;
-    cc->type = CRYPTODEV_BACKEND_TYPE_BUILTIN;
+    cc->type = QCRYPTODEV_BACKEND_TYPE_BUILTIN;
     backend->conf.peers.ccs[0] = cc;
 
     backend->conf.crypto_services =
-                         1u << VIRTIO_CRYPTO_SERVICE_CIPHER |
-                         1u << VIRTIO_CRYPTO_SERVICE_HASH |
-                         1u << VIRTIO_CRYPTO_SERVICE_MAC |
-                         1u << VIRTIO_CRYPTO_SERVICE_AKCIPHER;
+                         1u << QCRYPTODEV_BACKEND_SERVICE_CIPHER |
+                         1u << QCRYPTODEV_BACKEND_SERVICE_HASH |
+                         1u << QCRYPTODEV_BACKEND_SERVICE_MAC;
     backend->conf.cipher_algo_l = 1u << VIRTIO_CRYPTO_CIPHER_AES_CBC;
     backend->conf.hash_algo = 1u << VIRTIO_CRYPTO_HASH_SHA1;
-    backend->conf.akcipher_algo = 1u << VIRTIO_CRYPTO_AKCIPHER_RSA;
     /*
      * Set the Maximum length of crypto request.
      * Why this value? Just avoid to overflow when
@@ -95,6 +105,7 @@ static void cryptodev_builtin_init(
     backend->conf.max_size = LONG_MAX - sizeof(CryptoDevBackendOpInfo);
     backend->conf.max_cipher_key_len = CRYPTODEV_BUITLIN_MAX_CIPHER_KEY_LEN;
     backend->conf.max_auth_key_len = CRYPTODEV_BUITLIN_MAX_AUTH_KEY_LEN;
+    cryptodev_builtin_init_akcipher(backend);
 
     cryptodev_backend_set_ready(backend, true);
 }
@@ -528,17 +539,14 @@ static int cryptodev_builtin_asym_operation(
 
 static int cryptodev_builtin_operation(
                  CryptoDevBackend *backend,
-                 CryptoDevBackendOpInfo *op_info,
-                 uint32_t queue_index,
-                 CryptoDevCompletionFunc cb,
-                 void *opaque)
+                 CryptoDevBackendOpInfo *op_info)
 {
     CryptoDevBackendBuiltin *builtin =
                       CRYPTODEV_BACKEND_BUILTIN(backend);
     CryptoDevBackendBuiltinSession *sess;
     CryptoDevBackendSymOpInfo *sym_op_info;
     CryptoDevBackendAsymOpInfo *asym_op_info;
-    enum CryptoDevBackendAlgType algtype = op_info->algtype;
+    QCryptodevBackendAlgType algtype = op_info->algtype;
     int status = -VIRTIO_CRYPTO_ERR;
     Error *local_error = NULL;
 
@@ -550,11 +558,11 @@ static int cryptodev_builtin_operation(
     }
 
     sess = builtin->sessions[op_info->session_id];
-    if (algtype == CRYPTODEV_BACKEND_ALG_SYM) {
+    if (algtype == QCRYPTODEV_BACKEND_ALG_SYM) {
         sym_op_info = op_info->u.sym_op_info;
         status = cryptodev_builtin_sym_operation(sess, sym_op_info,
                                                  &local_error);
-    } else if (algtype == CRYPTODEV_BACKEND_ALG_ASYM) {
+    } else if (algtype == QCRYPTODEV_BACKEND_ALG_ASYM) {
         asym_op_info = op_info->u.asym_op_info;
         status = cryptodev_builtin_asym_operation(sess, op_info->op_code,
                                                   asym_op_info, &local_error);
@@ -563,8 +571,8 @@ static int cryptodev_builtin_operation(
     if (local_error) {
         error_report_err(local_error);
     }
-    if (cb) {
-        cb(opaque, status);
+    if (op_info->cb) {
+        op_info->cb(op_info->opaque, status);
     }
     return 0;
 }
diff --git a/backends/cryptodev-hmp-cmds.c b/backends/cryptodev-hmp-cmds.c
new file mode 100644
index 0000000000..4f7220bb13
--- /dev/null
+++ b/backends/cryptodev-hmp-cmds.c
@@ -0,0 +1,54 @@
+/*
+ * HMP commands related to cryptodev
+ *
+ * Copyright (c) 2023 Bytedance.Inc
+ *
+ * Authors:
+ *    zhenwei pi<pizhenwei@bytedance.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "monitor/hmp.h"
+#include "monitor/monitor.h"
+#include "qapi/qapi-commands-cryptodev.h"
+#include "qapi/qmp/qdict.h"
+
+
+void hmp_info_cryptodev(Monitor *mon, const QDict *qdict)
+{
+    QCryptodevInfoList *il;
+    QCryptodevBackendServiceTypeList *sl;
+    QCryptodevBackendClientList *cl;
+
+    for (il = qmp_query_cryptodev(NULL); il; il = il->next) {
+        g_autofree char *services = NULL;
+        QCryptodevInfo *info = il->value;
+        char *tmp_services;
+
+        /* build a string like 'service=[akcipher|mac|hash|cipher]' */
+        for (sl = info->service; sl; sl = sl->next) {
+            const char *service = QCryptodevBackendServiceType_str(sl->value);
+
+            if (!services) {
+                services = g_strdup(service);
+            } else {
+                tmp_services = g_strjoin("|", services, service, NULL);
+                g_free(services);
+                services = tmp_services;
+            }
+        }
+        monitor_printf(mon, "%s: service=[%s]\n", info->id, services);
+
+        for (cl = info->client; cl; cl = cl->next) {
+            QCryptodevBackendClient *client = cl->value;
+            monitor_printf(mon, "    queue %" PRIu32 ": type=%s\n",
+                           client->queue,
+                           QCryptodevBackendType_str(client->type));
+        }
+    }
+
+    qapi_free_QCryptodevInfoList(il);
+}
diff --git a/backends/cryptodev-lkcf.c b/backends/cryptodev-lkcf.c
index 133bd706a4..45aba1ff67 100644
--- a/backends/cryptodev-lkcf.c
+++ b/backends/cryptodev-lkcf.c
@@ -223,14 +223,14 @@ static void cryptodev_lkcf_init(CryptoDevBackend *backend, Error **errp)
         return;
     }
 
-    cc = cryptodev_backend_new_client("cryptodev-lkcf", NULL);
+    cc = cryptodev_backend_new_client();
     cc->info_str = g_strdup_printf("cryptodev-lkcf0");
     cc->queue_index = 0;
-    cc->type = CRYPTODEV_BACKEND_TYPE_LKCF;
+    cc->type = QCRYPTODEV_BACKEND_TYPE_LKCF;
     backend->conf.peers.ccs[0] = cc;
 
     backend->conf.crypto_services =
-        1u << VIRTIO_CRYPTO_SERVICE_AKCIPHER;
+        1u << QCRYPTODEV_BACKEND_SERVICE_AKCIPHER;
     backend->conf.akcipher_algo = 1u << VIRTIO_CRYPTO_AKCIPHER_RSA;
     lkcf->running = true;
 
@@ -469,15 +469,12 @@ static void *cryptodev_lkcf_worker(void *arg)
 
 static int cryptodev_lkcf_operation(
     CryptoDevBackend *backend,
-    CryptoDevBackendOpInfo *op_info,
-    uint32_t queue_index,
-    CryptoDevCompletionFunc cb,
-    void *opaque)
+    CryptoDevBackendOpInfo *op_info)
 {
     CryptoDevBackendLKCF *lkcf =
         CRYPTODEV_BACKEND_LKCF(backend);
     CryptoDevBackendLKCFSession *sess;
-    enum CryptoDevBackendAlgType algtype = op_info->algtype;
+    QCryptodevBackendAlgType algtype = op_info->algtype;
     CryptoDevLKCFTask *task;
 
     if (op_info->session_id >= MAX_SESSIONS ||
@@ -488,15 +485,15 @@ static int cryptodev_lkcf_operation(
     }
 
     sess = lkcf->sess[op_info->session_id];
-    if (algtype != CRYPTODEV_BACKEND_ALG_ASYM) {
+    if (algtype != QCRYPTODEV_BACKEND_ALG_ASYM) {
         error_report("algtype not supported: %u", algtype);
         return -VIRTIO_CRYPTO_NOTSUPP;
     }
 
     task = g_new0(CryptoDevLKCFTask, 1);
     task->op_info = op_info;
-    task->cb = cb;
-    task->opaque = opaque;
+    task->cb = op_info->cb;
+    task->opaque = op_info->opaque;
     task->sess = sess;
     task->lkcf = lkcf;
     task->status = -VIRTIO_CRYPTO_ERR;
diff --git a/backends/cryptodev-vhost-user.c b/backends/cryptodev-vhost-user.c
index ab3028e045..b1d9eb735f 100644
--- a/backends/cryptodev-vhost-user.c
+++ b/backends/cryptodev-vhost-user.c
@@ -67,7 +67,7 @@ cryptodev_vhost_user_get_vhost(
 {
     CryptoDevBackendVhostUser *s =
                       CRYPTODEV_BACKEND_VHOST_USER(b);
-    assert(cc->type == CRYPTODEV_BACKEND_TYPE_VHOST_USER);
+    assert(cc->type == QCRYPTODEV_BACKEND_TYPE_VHOST_USER);
     assert(queue < MAX_CRYPTO_QUEUE_NUM);
 
     return s->vhost_crypto[queue];
@@ -198,12 +198,11 @@ static void cryptodev_vhost_user_init(
     s->opened = true;
 
     for (i = 0; i < queues; i++) {
-        cc = cryptodev_backend_new_client(
-                  "cryptodev-vhost-user", NULL);
+        cc = cryptodev_backend_new_client();
         cc->info_str = g_strdup_printf("cryptodev-vhost-user%zu to %s ",
                                        i, chr->label);
         cc->queue_index = i;
-        cc->type = CRYPTODEV_BACKEND_TYPE_VHOST_USER;
+        cc->type = QCRYPTODEV_BACKEND_TYPE_VHOST_USER;
 
         backend->conf.peers.ccs[i] = cc;
 
@@ -222,9 +221,9 @@ static void cryptodev_vhost_user_init(
                      cryptodev_vhost_user_event, NULL, s, NULL, true);
 
     backend->conf.crypto_services =
-                         1u << VIRTIO_CRYPTO_SERVICE_CIPHER |
-                         1u << VIRTIO_CRYPTO_SERVICE_HASH |
-                         1u << VIRTIO_CRYPTO_SERVICE_MAC;
+                         1u << QCRYPTODEV_BACKEND_SERVICE_CIPHER |
+                         1u << QCRYPTODEV_BACKEND_SERVICE_HASH |
+                         1u << QCRYPTODEV_BACKEND_SERVICE_MAC;
     backend->conf.cipher_algo_l = 1u << VIRTIO_CRYPTO_CIPHER_AES_CBC;
     backend->conf.hash_algo = 1u << VIRTIO_CRYPTO_HASH_SHA1;
 
diff --git a/backends/cryptodev-vhost.c b/backends/cryptodev-vhost.c
index 74ea0ad63d..93523732f3 100644
--- a/backends/cryptodev-vhost.c
+++ b/backends/cryptodev-vhost.c
@@ -127,7 +127,7 @@ cryptodev_get_vhost(CryptoDevBackendClient *cc,
 
     switch (cc->type) {
 #if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX)
-    case CRYPTODEV_BACKEND_TYPE_VHOST_USER:
+    case QCRYPTODEV_BACKEND_TYPE_VHOST_USER:
         vhost_crypto = cryptodev_vhost_user_get_vhost(cc, b, queue);
         break;
 #endif
@@ -195,7 +195,7 @@ int cryptodev_vhost_start(VirtIODevice *dev, int total_queues)
          * because vhost user doesn't interrupt masking/unmasking
          * properly.
          */
-        if (cc->type == CRYPTODEV_BACKEND_TYPE_VHOST_USER) {
+        if (cc->type == QCRYPTODEV_BACKEND_TYPE_VHOST_USER) {
             dev->use_guest_notifier_mask = false;
         }
      }
diff --git a/backends/cryptodev.c b/backends/cryptodev.c
index 54ee8c81f5..94ca393cee 100644
--- a/backends/cryptodev.c
+++ b/backends/cryptodev.c
@@ -23,29 +23,92 @@
 
 #include "qemu/osdep.h"
 #include "sysemu/cryptodev.h"
+#include "sysemu/stats.h"
 #include "qapi/error.h"
+#include "qapi/qapi-commands-cryptodev.h"
+#include "qapi/qapi-types-stats.h"
 #include "qapi/visitor.h"
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
+#include "qemu/main-loop.h"
 #include "qom/object_interfaces.h"
 #include "hw/virtio/virtio-crypto.h"
 
+#define SYM_ENCRYPT_OPS_STR "sym-encrypt-ops"
+#define SYM_DECRYPT_OPS_STR "sym-decrypt-ops"
+#define SYM_ENCRYPT_BYTES_STR "sym-encrypt-bytes"
+#define SYM_DECRYPT_BYTES_STR "sym-decrypt-bytes"
+
+#define ASYM_ENCRYPT_OPS_STR "asym-encrypt-ops"
+#define ASYM_DECRYPT_OPS_STR "asym-decrypt-ops"
+#define ASYM_SIGN_OPS_STR "asym-sign-ops"
+#define ASYM_VERIFY_OPS_STR "asym-verify-ops"
+#define ASYM_ENCRYPT_BYTES_STR "asym-encrypt-bytes"
+#define ASYM_DECRYPT_BYTES_STR "asym-decrypt-bytes"
+#define ASYM_SIGN_BYTES_STR "asym-sign-bytes"
+#define ASYM_VERIFY_BYTES_STR "asym-verify-bytes"
+
+typedef struct StatsArgs {
+    union StatsResultsType {
+        StatsResultList **stats;
+        StatsSchemaList **schema;
+    } result;
+    strList *names;
+    Error **errp;
+} StatsArgs;
 
 static QTAILQ_HEAD(, CryptoDevBackendClient) crypto_clients;
 
+static int qmp_query_cryptodev_foreach(Object *obj, void *data)
+{
+    CryptoDevBackend *backend;
+    QCryptodevInfoList **infolist = data;
+    uint32_t services, i;
+
+    if (!object_dynamic_cast(obj, TYPE_CRYPTODEV_BACKEND)) {
+        return 0;
+    }
+
+    QCryptodevInfo *info = g_new0(QCryptodevInfo, 1);
+    info->id = g_strdup(object_get_canonical_path_component(obj));
+
+    backend = CRYPTODEV_BACKEND(obj);
+    services = backend->conf.crypto_services;
+    for (i = 0; i < QCRYPTODEV_BACKEND_SERVICE__MAX; i++) {
+        if (services & (1 << i)) {
+            QAPI_LIST_PREPEND(info->service, i);
+        }
+    }
+
+    for (i = 0; i < backend->conf.peers.queues; i++) {
+        CryptoDevBackendClient *cc = backend->conf.peers.ccs[i];
+        QCryptodevBackendClient *client = g_new0(QCryptodevBackendClient, 1);
+
+        client->queue = cc->queue_index;
+        client->type = cc->type;
+        QAPI_LIST_PREPEND(info->client, client);
+    }
+
+    QAPI_LIST_PREPEND(*infolist, info);
+
+    return 0;
+}
 
-CryptoDevBackendClient *
-cryptodev_backend_new_client(const char *model,
-                                    const char *name)
+QCryptodevInfoList *qmp_query_cryptodev(Error **errp)
+{
+    QCryptodevInfoList *list = NULL;
+    Object *objs = container_get(object_get_root(), "/objects");
+
+    object_child_foreach(objs, qmp_query_cryptodev_foreach, &list);
+
+    return list;
+}
+
+CryptoDevBackendClient *cryptodev_backend_new_client(void)
 {
     CryptoDevBackendClient *cc;
 
     cc = g_new0(CryptoDevBackendClient, 1);
-    cc->model = g_strdup(model);
-    if (name) {
-        cc->name = g_strdup(name);
-    }
-
     QTAILQ_INSERT_TAIL(&crypto_clients, cc, next);
 
     return cc;
@@ -55,8 +118,6 @@ void cryptodev_backend_free_client(
                   CryptoDevBackendClient *cc)
 {
     QTAILQ_REMOVE(&crypto_clients, cc, next);
-    g_free(cc->name);
-    g_free(cc->model);
     g_free(cc->info_str);
     g_free(cc);
 }
@@ -71,6 +132,9 @@ void cryptodev_backend_cleanup(
     if (bc->cleanup) {
         bc->cleanup(backend, errp);
     }
+
+    g_free(backend->sym_stat);
+    g_free(backend->asym_stat);
 }
 
 int cryptodev_backend_create_session(
@@ -107,38 +171,111 @@ int cryptodev_backend_close_session(
 
 static int cryptodev_backend_operation(
                  CryptoDevBackend *backend,
-                 CryptoDevBackendOpInfo *op_info,
-                 uint32_t queue_index,
-                 CryptoDevCompletionFunc cb,
-                 void *opaque)
+                 CryptoDevBackendOpInfo *op_info)
 {
     CryptoDevBackendClass *bc =
                       CRYPTODEV_BACKEND_GET_CLASS(backend);
 
     if (bc->do_op) {
-        return bc->do_op(backend, op_info, queue_index, cb, opaque);
+        return bc->do_op(backend, op_info);
     }
     return -VIRTIO_CRYPTO_NOTSUPP;
 }
 
+static int cryptodev_backend_account(CryptoDevBackend *backend,
+                 CryptoDevBackendOpInfo *op_info)
+{
+    enum QCryptodevBackendAlgType algtype = op_info->algtype;
+    int len;
+
+    if (algtype == QCRYPTODEV_BACKEND_ALG_ASYM) {
+        CryptoDevBackendAsymOpInfo *asym_op_info = op_info->u.asym_op_info;
+        len = asym_op_info->src_len;
+        switch (op_info->op_code) {
+        case VIRTIO_CRYPTO_AKCIPHER_ENCRYPT:
+            CryptodevAsymStatIncEncrypt(backend, len);
+            break;
+        case VIRTIO_CRYPTO_AKCIPHER_DECRYPT:
+            CryptodevAsymStatIncDecrypt(backend, len);
+            break;
+        case VIRTIO_CRYPTO_AKCIPHER_SIGN:
+            CryptodevAsymStatIncSign(backend, len);
+            break;
+        case VIRTIO_CRYPTO_AKCIPHER_VERIFY:
+            CryptodevAsymStatIncVerify(backend, len);
+            break;
+        default:
+            return -VIRTIO_CRYPTO_NOTSUPP;
+        }
+    } else if (algtype == QCRYPTODEV_BACKEND_ALG_SYM) {
+        CryptoDevBackendSymOpInfo *sym_op_info = op_info->u.sym_op_info;
+        len = sym_op_info->src_len;
+        switch (op_info->op_code) {
+        case VIRTIO_CRYPTO_CIPHER_ENCRYPT:
+            CryptodevSymStatIncEncrypt(backend, len);
+            break;
+        case VIRTIO_CRYPTO_CIPHER_DECRYPT:
+            CryptodevSymStatIncDecrypt(backend, len);
+            break;
+        default:
+            return -VIRTIO_CRYPTO_NOTSUPP;
+        }
+    } else {
+        error_report("Unsupported cryptodev alg type: %" PRIu32 "", algtype);
+        return -VIRTIO_CRYPTO_NOTSUPP;
+    }
+
+    return len;
+}
+
+static void cryptodev_backend_throttle_timer_cb(void *opaque)
+{
+    CryptoDevBackend *backend = (CryptoDevBackend *)opaque;
+    CryptoDevBackendOpInfo *op_info, *tmpop;
+    int ret;
+
+    QTAILQ_FOREACH_SAFE(op_info, &backend->opinfos, next, tmpop) {
+        QTAILQ_REMOVE(&backend->opinfos, op_info, next);
+        ret = cryptodev_backend_account(backend, op_info);
+        if (ret < 0) {
+            op_info->cb(op_info->opaque, ret);
+            continue;
+        }
+
+        throttle_account(&backend->ts, true, ret);
+        cryptodev_backend_operation(backend, op_info);
+        if (throttle_enabled(&backend->tc) &&
+            throttle_schedule_timer(&backend->ts, &backend->tt, true)) {
+            break;
+        }
+    }
+}
+
 int cryptodev_backend_crypto_operation(
                  CryptoDevBackend *backend,
-                 void *opaque1,
-                 uint32_t queue_index,
-                 CryptoDevCompletionFunc cb, void *opaque2)
+                 CryptoDevBackendOpInfo *op_info)
 {
-    VirtIOCryptoReq *req = opaque1;
-    CryptoDevBackendOpInfo *op_info = &req->op_info;
-    enum CryptoDevBackendAlgType algtype = req->flags;
+    int ret;
 
-    if ((algtype != CRYPTODEV_BACKEND_ALG_SYM)
-        && (algtype != CRYPTODEV_BACKEND_ALG_ASYM)) {
-        error_report("Unsupported cryptodev alg type: %" PRIu32 "", algtype);
-        return -VIRTIO_CRYPTO_NOTSUPP;
+    if (!throttle_enabled(&backend->tc)) {
+        goto do_account;
+    }
+
+    if (throttle_schedule_timer(&backend->ts, &backend->tt, true) ||
+        !QTAILQ_EMPTY(&backend->opinfos)) {
+        QTAILQ_INSERT_TAIL(&backend->opinfos, op_info, next);
+        return 0;
     }
 
-    return cryptodev_backend_operation(backend, op_info, queue_index,
-                                       cb, opaque2);
+do_account:
+    ret = cryptodev_backend_account(backend, op_info);
+    if (ret < 0) {
+        return ret;
+    }
+
+    throttle_account(&backend->ts, true, ret);
+
+    return cryptodev_backend_operation(backend, op_info);
 }
 
 static void
@@ -169,15 +306,111 @@ cryptodev_backend_set_queues(Object *obj, Visitor *v, const char *name,
     backend->conf.peers.queues = value;
 }
 
+static void cryptodev_backend_set_throttle(CryptoDevBackend *backend, int field,
+                                           uint64_t value, Error **errp)
+{
+    uint64_t orig = backend->tc.buckets[field].avg;
+    bool enabled = throttle_enabled(&backend->tc);
+
+    if (orig == value) {
+        return;
+    }
+
+    backend->tc.buckets[field].avg = value;
+    if (!throttle_enabled(&backend->tc)) {
+        throttle_timers_destroy(&backend->tt);
+        cryptodev_backend_throttle_timer_cb(backend); /* drain opinfos */
+        return;
+    }
+
+    if (!throttle_is_valid(&backend->tc, errp)) {
+        backend->tc.buckets[field].avg = orig; /* revert change */
+        return;
+    }
+
+    if (!enabled) {
+        throttle_init(&backend->ts);
+        throttle_timers_init(&backend->tt, qemu_get_aio_context(),
+                             QEMU_CLOCK_REALTIME,
+                             cryptodev_backend_throttle_timer_cb, /* FIXME */
+                             cryptodev_backend_throttle_timer_cb, backend);
+    }
+
+    throttle_config(&backend->ts, QEMU_CLOCK_REALTIME, &backend->tc);
+}
+
+static void cryptodev_backend_get_bps(Object *obj, Visitor *v,
+                                      const char *name, void *opaque,
+                                      Error **errp)
+{
+    CryptoDevBackend *backend = CRYPTODEV_BACKEND(obj);
+    uint64_t value = backend->tc.buckets[THROTTLE_BPS_TOTAL].avg;
+
+    visit_type_uint64(v, name, &value, errp);
+}
+
+static void cryptodev_backend_set_bps(Object *obj, Visitor *v, const char *name,
+                                      void *opaque, Error **errp)
+{
+    CryptoDevBackend *backend = CRYPTODEV_BACKEND(obj);
+    uint64_t value;
+
+    if (!visit_type_uint64(v, name, &value, errp)) {
+        return;
+    }
+
+    cryptodev_backend_set_throttle(backend, THROTTLE_BPS_TOTAL, value, errp);
+}
+
+static void cryptodev_backend_get_ops(Object *obj, Visitor *v, const char *name,
+                                      void *opaque, Error **errp)
+{
+    CryptoDevBackend *backend = CRYPTODEV_BACKEND(obj);
+    uint64_t value = backend->tc.buckets[THROTTLE_OPS_TOTAL].avg;
+
+    visit_type_uint64(v, name, &value, errp);
+}
+
+static void cryptodev_backend_set_ops(Object *obj, Visitor *v,
+                                       const char *name, void *opaque,
+                                       Error **errp)
+{
+    CryptoDevBackend *backend = CRYPTODEV_BACKEND(obj);
+    uint64_t value;
+
+    if (!visit_type_uint64(v, name, &value, errp)) {
+        return;
+    }
+
+    cryptodev_backend_set_throttle(backend, THROTTLE_OPS_TOTAL, value, errp);
+}
+
 static void
 cryptodev_backend_complete(UserCreatable *uc, Error **errp)
 {
     CryptoDevBackend *backend = CRYPTODEV_BACKEND(uc);
     CryptoDevBackendClass *bc = CRYPTODEV_BACKEND_GET_CLASS(uc);
+    uint32_t services;
+    uint64_t value;
+
+    QTAILQ_INIT(&backend->opinfos);
+    value = backend->tc.buckets[THROTTLE_OPS_TOTAL].avg;
+    cryptodev_backend_set_throttle(backend, THROTTLE_OPS_TOTAL, value, errp);
+    value = backend->tc.buckets[THROTTLE_BPS_TOTAL].avg;
+    cryptodev_backend_set_throttle(backend, THROTTLE_BPS_TOTAL, value, errp);
 
     if (bc->init) {
         bc->init(backend, errp);
     }
+
+    services = backend->conf.crypto_services;
+    if (services & (1 << QCRYPTODEV_BACKEND_SERVICE_CIPHER)) {
+        backend->sym_stat = g_new0(CryptodevBackendSymStat, 1);
+    }
+
+    if (services & (1 << QCRYPTODEV_BACKEND_SERVICE_AKCIPHER)) {
+        backend->asym_stat = g_new0(CryptodevBackendAsymStat, 1);
+    }
 }
 
 void cryptodev_backend_set_used(CryptoDevBackend *backend, bool used)
@@ -208,8 +441,12 @@ cryptodev_backend_can_be_deleted(UserCreatable *uc)
 
 static void cryptodev_backend_instance_init(Object *obj)
 {
+    CryptoDevBackend *backend = CRYPTODEV_BACKEND(obj);
+
     /* Initialize devices' queues property to 1 */
     object_property_set_int(obj, "queues", 1, NULL);
+
+    throttle_config_init(&backend->tc);
 }
 
 static void cryptodev_backend_finalize(Object *obj)
@@ -217,6 +454,137 @@ static void cryptodev_backend_finalize(Object *obj)
     CryptoDevBackend *backend = CRYPTODEV_BACKEND(obj);
 
     cryptodev_backend_cleanup(backend, NULL);
+    if (throttle_enabled(&backend->tc)) {
+        throttle_timers_destroy(&backend->tt);
+    }
+}
+
+static StatsList *cryptodev_backend_stats_add(const char *name, int64_t *val,
+                                              StatsList *stats_list)
+{
+    Stats *stats = g_new0(Stats, 1);
+
+    stats->name = g_strdup(name);
+    stats->value = g_new0(StatsValue, 1);
+    stats->value->type = QTYPE_QNUM;
+    stats->value->u.scalar = *val;
+
+    QAPI_LIST_PREPEND(stats_list, stats);
+    return stats_list;
+}
+
+static int cryptodev_backend_stats_query(Object *obj, void *data)
+{
+    StatsArgs *stats_args = data;
+    StatsResultList **stats_results = stats_args->result.stats;
+    StatsList *stats_list = NULL;
+    StatsResult *entry;
+    CryptoDevBackend *backend;
+    CryptodevBackendSymStat *sym_stat;
+    CryptodevBackendAsymStat *asym_stat;
+
+    if (!object_dynamic_cast(obj, TYPE_CRYPTODEV_BACKEND)) {
+        return 0;
+    }
+
+    backend = CRYPTODEV_BACKEND(obj);
+    sym_stat = backend->sym_stat;
+    if (sym_stat) {
+        stats_list = cryptodev_backend_stats_add(SYM_ENCRYPT_OPS_STR,
+                         &sym_stat->encrypt_ops, stats_list);
+        stats_list = cryptodev_backend_stats_add(SYM_DECRYPT_OPS_STR,
+                         &sym_stat->decrypt_ops, stats_list);
+        stats_list = cryptodev_backend_stats_add(SYM_ENCRYPT_BYTES_STR,
+                         &sym_stat->encrypt_bytes, stats_list);
+        stats_list = cryptodev_backend_stats_add(SYM_DECRYPT_BYTES_STR,
+                         &sym_stat->decrypt_bytes, stats_list);
+    }
+
+    asym_stat = backend->asym_stat;
+    if (asym_stat) {
+        stats_list = cryptodev_backend_stats_add(ASYM_ENCRYPT_OPS_STR,
+                         &asym_stat->encrypt_ops, stats_list);
+        stats_list = cryptodev_backend_stats_add(ASYM_DECRYPT_OPS_STR,
+                         &asym_stat->decrypt_ops, stats_list);
+        stats_list = cryptodev_backend_stats_add(ASYM_SIGN_OPS_STR,
+                         &asym_stat->sign_ops, stats_list);
+        stats_list = cryptodev_backend_stats_add(ASYM_VERIFY_OPS_STR,
+                         &asym_stat->verify_ops, stats_list);
+        stats_list = cryptodev_backend_stats_add(ASYM_ENCRYPT_BYTES_STR,
+                         &asym_stat->encrypt_bytes, stats_list);
+        stats_list = cryptodev_backend_stats_add(ASYM_DECRYPT_BYTES_STR,
+                         &asym_stat->decrypt_bytes, stats_list);
+        stats_list = cryptodev_backend_stats_add(ASYM_SIGN_BYTES_STR,
+                         &asym_stat->sign_bytes, stats_list);
+        stats_list = cryptodev_backend_stats_add(ASYM_VERIFY_BYTES_STR,
+                         &asym_stat->verify_bytes, stats_list);
+    }
+
+    entry = g_new0(StatsResult, 1);
+    entry->provider = STATS_PROVIDER_CRYPTODEV;
+    entry->qom_path = g_strdup(object_get_canonical_path(obj));
+    entry->stats = stats_list;
+    QAPI_LIST_PREPEND(*stats_results, entry);
+
+    return 0;
+}
+
+static void cryptodev_backend_stats_cb(StatsResultList **result,
+                                       StatsTarget target,
+                                       strList *names, strList *targets,
+                                       Error **errp)
+{
+    switch (target) {
+    case STATS_TARGET_CRYPTODEV:
+    {
+        Object *objs = container_get(object_get_root(), "/objects");
+        StatsArgs stats_args;
+        stats_args.result.stats = result;
+        stats_args.names = names;
+        stats_args.errp = errp;
+
+        object_child_foreach(objs, cryptodev_backend_stats_query, &stats_args);
+        break;
+    }
+    default:
+        break;
+    }
+}
+
+static StatsSchemaValueList *cryptodev_backend_schemas_add(const char *name,
+                                 StatsSchemaValueList *list)
+{
+    StatsSchemaValueList *schema_entry = g_new0(StatsSchemaValueList, 1);
+
+    schema_entry->value = g_new0(StatsSchemaValue, 1);
+    schema_entry->value->type = STATS_TYPE_CUMULATIVE;
+    schema_entry->value->name = g_strdup(name);
+    schema_entry->next = list;
+
+    return schema_entry;
+}
+
+static void cryptodev_backend_schemas_cb(StatsSchemaList **result,
+                                         Error **errp)
+{
+    StatsSchemaValueList *stats_list = NULL;
+    const char *sym_stats[] = { SYM_ENCRYPT_OPS_STR, SYM_DECRYPT_OPS_STR,
+                                SYM_ENCRYPT_BYTES_STR, SYM_DECRYPT_BYTES_STR };
+    const char *asym_stats[] = { ASYM_ENCRYPT_OPS_STR, ASYM_DECRYPT_OPS_STR,
+                                 ASYM_SIGN_OPS_STR, ASYM_VERIFY_OPS_STR,
+                                 ASYM_ENCRYPT_BYTES_STR, ASYM_DECRYPT_BYTES_STR,
+                                 ASYM_SIGN_BYTES_STR, ASYM_VERIFY_BYTES_STR };
+
+    for (int i = 0; i < ARRAY_SIZE(sym_stats); i++) {
+        stats_list = cryptodev_backend_schemas_add(sym_stats[i], stats_list);
+    }
+
+    for (int i = 0; i < ARRAY_SIZE(asym_stats); i++) {
+        stats_list = cryptodev_backend_schemas_add(asym_stats[i], stats_list);
+    }
+
+    add_stats_schema(result, STATS_PROVIDER_CRYPTODEV, STATS_TARGET_CRYPTODEV,
+                     stats_list);
 }
 
 static void
@@ -232,6 +600,17 @@ cryptodev_backend_class_init(ObjectClass *oc, void *data)
                               cryptodev_backend_get_queues,
                               cryptodev_backend_set_queues,
                               NULL, NULL);
+    object_class_property_add(oc, "throttle-bps", "uint64",
+                              cryptodev_backend_get_bps,
+                              cryptodev_backend_set_bps,
+                              NULL, NULL);
+    object_class_property_add(oc, "throttle-ops", "uint64",
+                              cryptodev_backend_get_ops,
+                              cryptodev_backend_set_ops,
+                              NULL, NULL);
+
+    add_stats_callbacks(STATS_PROVIDER_CRYPTODEV, cryptodev_backend_stats_cb,
+                        cryptodev_backend_schemas_cb);
 }
 
 static const TypeInfo cryptodev_backend_info = {
diff --git a/backends/meson.build b/backends/meson.build
index 954e658b25..b369e0a9d0 100644
--- a/backends/meson.build
+++ b/backends/meson.build
@@ -1,5 +1,6 @@
 softmmu_ss.add([files(
   'cryptodev-builtin.c',
+  'cryptodev-hmp-cmds.c',
   'cryptodev.c',
   'hostmem-ram.c',
   'hostmem.c',
diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
index 754b1e8408..47d63d26db 100644
--- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx
@@ -993,3 +993,17 @@ SRST
   ``info virtio-queue-element`` *path* *queue* [*index*]
     Display element of a given virtio queue
 ERST
+
+    {
+        .name       = "cryptodev",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show the crypto devices",
+        .cmd        = hmp_info_cryptodev,
+        .flags      = "p",
+    },
+
+SRST
+  ``info cryptodev``
+    Show the crypto devices.
+ERST
diff --git a/hw/acpi/acpi-pci-hotplug-stub.c b/hw/acpi/acpi-pci-hotplug-stub.c
index a43f6dafc9..dcee3ad7a1 100644
--- a/hw/acpi/acpi-pci-hotplug-stub.c
+++ b/hw/acpi/acpi-pci-hotplug-stub.c
@@ -5,8 +5,7 @@
 const VMStateDescription vmstate_acpi_pcihp_pci_status;
 
 void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus,
-                     MemoryRegion *address_space_io, bool bridges_enabled,
-                     uint16_t io_base)
+                     MemoryRegion *address_space_io, uint16_t io_base)
 {
     return;
 }
@@ -36,8 +35,12 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev,
     return;
 }
 
-void acpi_pcihp_reset(AcpiPciHpState *s, bool acpihp_root_off)
+void acpi_pcihp_reset(AcpiPciHpState *s)
 {
     return;
 }
 
+bool acpi_pcihp_is_hotpluggbale_bus(AcpiPciHpState *s, BusState *bus)
+{
+    return true;
+}
diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c
index d23bfcaa6b..25e2c7243e 100644
--- a/hw/acpi/ich9.c
+++ b/hw/acpi/ich9.c
@@ -218,7 +218,7 @@ static bool vmstate_test_use_pcihp(void *opaque)
 {
     ICH9LPCPMRegs *s = opaque;
 
-    return s->use_acpi_hotplug_bridge;
+    return s->acpi_pci_hotplug.use_acpi_hotplug_bridge;
 }
 
 static const VMStateDescription vmstate_pcihp_state = {
@@ -277,8 +277,8 @@ static void pm_reset(void *opaque)
     }
     pm->smi_en_wmask = ~0;
 
-    if (pm->use_acpi_hotplug_bridge) {
-        acpi_pcihp_reset(&pm->acpi_pci_hotplug, true);
+    if (pm->acpi_pci_hotplug.use_acpi_hotplug_bridge) {
+        acpi_pcihp_reset(&pm->acpi_pci_hotplug);
     }
 
     acpi_update_sci(&pm->acpi_regs, pm->irq);
@@ -316,12 +316,11 @@ void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, qemu_irq sci_irq)
         acpi_pm_tco_init(&pm->tco_regs, &pm->io);
     }
 
-    if (pm->use_acpi_hotplug_bridge) {
+    if (pm->acpi_pci_hotplug.use_acpi_hotplug_bridge) {
         acpi_pcihp_init(OBJECT(lpc_pci),
                         &pm->acpi_pci_hotplug,
                         pci_get_bus(lpc_pci),
                         pci_address_space_io(lpc_pci),
-                        true,
                         ACPI_PCIHP_ADDR_ICH9);
 
         qbus_set_hotplug_handler(BUS(pci_get_bus(lpc_pci)),
@@ -403,14 +402,14 @@ static bool ich9_pm_get_acpi_pci_hotplug(Object *obj, Error **errp)
 {
     ICH9LPCState *s = ICH9_LPC_DEVICE(obj);
 
-    return s->pm.use_acpi_hotplug_bridge;
+    return s->pm.acpi_pci_hotplug.use_acpi_hotplug_bridge;
 }
 
 static void ich9_pm_set_acpi_pci_hotplug(Object *obj, bool value, Error **errp)
 {
     ICH9LPCState *s = ICH9_LPC_DEVICE(obj);
 
-    s->pm.use_acpi_hotplug_bridge = value;
+    s->pm.acpi_pci_hotplug.use_acpi_hotplug_bridge = value;
 }
 
 static bool ich9_pm_get_keep_pci_slot_hpc(Object *obj, Error **errp)
@@ -435,7 +434,7 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm)
     pm->disable_s3 = 0;
     pm->disable_s4 = 0;
     pm->s4_val = 2;
-    pm->use_acpi_hotplug_bridge = true;
+    pm->acpi_pci_hotplug.use_acpi_hotplug_bridge = true;
     pm->keep_pci_slot_hpc = true;
     pm->enable_tco = true;
 
@@ -579,6 +578,12 @@ void ich9_pm_device_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
     }
 }
 
+bool ich9_pm_is_hotpluggable_bus(HotplugHandler *hotplug_dev, BusState *bus)
+{
+    ICH9LPCState *lpc = ICH9_LPC_DEVICE(hotplug_dev);
+    return acpi_pcihp_is_hotpluggbale_bus(&lpc->pm.acpi_pci_hotplug, bus);
+}
+
 void ich9_pm_ospm_status(AcpiDeviceIf *adev, ACPIOSTInfoList ***list)
 {
     ICH9LPCState *s = ICH9_LPC_DEVICE(adev);
diff --git a/hw/acpi/pci-bridge.c b/hw/acpi/pci-bridge.c
index 5f3ee5157f..7baa7034a1 100644
--- a/hw/acpi/pci-bridge.c
+++ b/hw/acpi/pci-bridge.c
@@ -21,7 +21,17 @@ void build_pci_bridge_aml(AcpiDevAmlIf *adev, Aml *scope)
 {
     PCIBridge *br = PCI_BRIDGE(adev);
 
-    if (object_property_find(OBJECT(&br->sec_bus), ACPI_PCIHP_PROP_BSEL)) {
-        build_append_pci_bus_devices(scope, pci_bridge_get_sec_bus(br));
+    if (!DEVICE(br)->hotplugged) {
+        PCIBus *sec_bus = pci_bridge_get_sec_bus(br);
+
+        build_append_pci_bus_devices(scope, sec_bus);
+
+        /*
+         * generate hotplug slots descriptors if
+         * bridge has ACPI PCI hotplug attached,
+         */
+        if (object_property_find(OBJECT(sec_bus), ACPI_PCIHP_PROP_BSEL)) {
+            build_append_pcihp_slots(scope, sec_bus);
+        }
     }
 }
diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c
index 5dc7377411..dcfb779a7a 100644
--- a/hw/acpi/pcihp.c
+++ b/hw/acpi/pcihp.c
@@ -54,21 +54,6 @@ typedef struct AcpiPciHpFind {
     PCIBus *bus;
 } AcpiPciHpFind;
 
-static gint g_cmp_uint32(gconstpointer a, gconstpointer b, gpointer user_data)
-{
-    return a - b;
-}
-
-static GSequence *pci_acpi_index_list(void)
-{
-    static GSequence *used_acpi_index_list;
-
-    if (!used_acpi_index_list) {
-        used_acpi_index_list = g_sequence_new(NULL);
-    }
-    return used_acpi_index_list;
-}
-
 static int acpi_pcihp_get_bsel(PCIBus *bus)
 {
     Error *local_err = NULL;
@@ -136,20 +121,6 @@ static void acpi_set_pci_info(bool has_bridge_hotplug)
     }
 }
 
-static void acpi_pcihp_disable_root_bus(void)
-{
-    Object *host = acpi_get_i386_pci_host();
-    PCIBus *bus;
-
-    bus = PCI_HOST_BRIDGE(host)->bus;
-    if (bus && qbus_is_hotpluggable(BUS(bus))) {
-        /* setting the hotplug handler to NULL makes the bus non-hotpluggable */
-        qbus_set_hotplug_handler(BUS(bus), NULL);
-    }
-
-    return;
-}
-
 static void acpi_pcihp_test_hotplug_bus(PCIBus *bus, void *opaque)
 {
     AcpiPciHpFind *find = opaque;
@@ -291,17 +262,12 @@ static void acpi_pcihp_update(AcpiPciHpState *s)
     }
 }
 
-void acpi_pcihp_reset(AcpiPciHpState *s, bool acpihp_root_off)
+void acpi_pcihp_reset(AcpiPciHpState *s)
 {
-    if (acpihp_root_off) {
-        acpi_pcihp_disable_root_bus();
-    }
-    acpi_set_pci_info(!s->legacy_piix);
+    acpi_set_pci_info(s->use_acpi_hotplug_bridge);
     acpi_pcihp_update(s);
 }
 
-#define ONBOARD_INDEX_MAX (16 * 1024 - 1)
-
 void acpi_pcihp_device_pre_plug_cb(HotplugHandler *hotplug_dev,
                                    DeviceState *dev, Error **errp)
 {
@@ -314,34 +280,6 @@ void acpi_pcihp_device_pre_plug_cb(HotplugHandler *hotplug_dev,
                    ACPI_PCIHP_PROP_BSEL "' set");
         return;
     }
-
-    /*
-     * capped by systemd (see: udev-builtin-net_id.c)
-     * as it's the only known user honor it to avoid users
-     * misconfigure QEMU and then wonder why acpi-index doesn't work
-     */
-    if (pdev->acpi_index > ONBOARD_INDEX_MAX) {
-        error_setg(errp, "acpi-index should be less or equal to %u",
-                   ONBOARD_INDEX_MAX);
-        return;
-    }
-
-    /*
-     * make sure that acpi-index is unique across all present PCI devices
-     */
-    if (pdev->acpi_index) {
-        GSequence *used_indexes = pci_acpi_index_list();
-
-        if (g_sequence_lookup(used_indexes, GINT_TO_POINTER(pdev->acpi_index),
-                              g_cmp_uint32, NULL)) {
-            error_setg(errp, "a PCI device with acpi-index = %" PRIu32
-                       " already exist", pdev->acpi_index);
-            return;
-        }
-        g_sequence_insert_sorted(used_indexes,
-                                 GINT_TO_POINTER(pdev->acpi_index),
-                                 g_cmp_uint32, NULL);
-    }
 }
 
 void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s,
@@ -361,17 +299,10 @@ void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s,
          * Overwrite the default hotplug handler with the ACPI PCI one
          * for cold plugged bridges only.
          */
-        if (!s->legacy_piix &&
+        if (s->use_acpi_hotplug_bridge &&
             object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) {
             PCIBus *sec = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));
 
-            /* Remove all hot-plug handlers if hot-plug is disabled on slot */
-            if (object_dynamic_cast(OBJECT(dev), TYPE_PCIE_SLOT) &&
-                !PCIE_SLOT(pdev)->hotplug) {
-                qbus_set_hotplug_handler(BUS(sec), NULL);
-                return;
-            }
-
             qbus_set_hotplug_handler(BUS(sec), OBJECT(hotplug_dev));
             /* We don't have to overwrite any other hotplug handler yet */
             assert(QLIST_EMPTY(&sec->child));
@@ -401,17 +332,6 @@ void acpi_pcihp_device_unplug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s,
     trace_acpi_pci_unplug(PCI_SLOT(pdev->devfn),
                           acpi_pcihp_get_bsel(pci_get_bus(pdev)));
 
-    /*
-     * clean up acpi-index so it could reused by another device
-     */
-    if (pdev->acpi_index) {
-        GSequence *used_indexes = pci_acpi_index_list();
-
-        g_sequence_remove(g_sequence_lookup(used_indexes,
-                          GINT_TO_POINTER(pdev->acpi_index),
-                          g_cmp_uint32, NULL));
-    }
-
     qdev_unrealize(dev);
 }
 
@@ -441,6 +361,24 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev,
     acpi_send_event(DEVICE(hotplug_dev), ACPI_PCI_HOTPLUG_STATUS);
 }
 
+bool acpi_pcihp_is_hotpluggbale_bus(AcpiPciHpState *s, BusState *bus)
+{
+    Object *o = OBJECT(bus->parent);
+
+    if (s->use_acpi_hotplug_bridge &&
+        object_dynamic_cast(o, TYPE_PCI_BRIDGE)) {
+        if (object_dynamic_cast(o, TYPE_PCIE_SLOT) && !PCIE_SLOT(o)->hotplug) {
+            return false;
+        }
+        return true;
+    }
+
+    if (s->use_acpi_root_pci_hotplug) {
+        return true;
+    }
+    return false;
+}
+
 static uint64_t pci_read(void *opaque, hwaddr addr, unsigned int size)
 {
     AcpiPciHpState *s = opaque;
@@ -454,7 +392,7 @@ static uint64_t pci_read(void *opaque, hwaddr addr, unsigned int size)
     switch (addr) {
     case PCI_UP_BASE:
         val = s->acpi_pcihp_pci_status[bsel].up;
-        if (!s->legacy_piix) {
+        if (s->use_acpi_hotplug_bridge) {
             s->acpi_pcihp_pci_status[bsel].up = 0;
         }
         trace_acpi_pci_up_read(val);
@@ -529,7 +467,8 @@ static void pci_write(void *opaque, hwaddr addr, uint64_t data,
         trace_acpi_pci_ej_write(addr, data);
         break;
     case PCI_SEL_BASE:
-        s->hotplug_select = s->legacy_piix ? ACPI_PCIHP_BSEL_DEFAULT : data;
+        s->hotplug_select = s->use_acpi_hotplug_bridge ? data :
+            ACPI_PCIHP_BSEL_DEFAULT;
         trace_acpi_pci_sel_write(addr, data);
     default:
         break;
@@ -547,14 +486,13 @@ static const MemoryRegionOps acpi_pcihp_io_ops = {
 };
 
 void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus,
-                     MemoryRegion *address_space_io, bool bridges_enabled,
+                     MemoryRegion *address_space_io,
                      uint16_t io_base)
 {
     s->io_len = ACPI_PCIHP_SIZE;
     s->io_base = io_base;
 
     s->root = root_bus;
-    s->legacy_piix = !bridges_enabled;
 
     memory_region_init_io(&s->io, owner, &acpi_pcihp_io_ops, s,
                           "acpi-pci-hotplug", s->io_len);
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index eac2125abd..63d2113b86 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -170,14 +170,14 @@ static const VMStateDescription vmstate_pci_status = {
 static bool vmstate_test_use_acpi_hotplug_bridge(void *opaque, int version_id)
 {
     PIIX4PMState *s = opaque;
-    return s->use_acpi_hotplug_bridge;
+    return s->acpi_pci_hotplug.use_acpi_hotplug_bridge;
 }
 
 static bool vmstate_test_no_use_acpi_hotplug_bridge(void *opaque,
                                                     int version_id)
 {
     PIIX4PMState *s = opaque;
-    return !s->use_acpi_hotplug_bridge;
+    return !s->acpi_pci_hotplug.use_acpi_hotplug_bridge;
 }
 
 static bool vmstate_test_use_memhp(void *opaque)
@@ -234,7 +234,8 @@ static bool piix4_vmstate_need_smbus(void *opaque, int version_id)
 static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id)
 {
     PIIX4PMState *s = PIIX4_PM(opaque);
-    return s->use_acpi_hotplug_bridge && !s->not_migrate_acpi_index;
+    return s->acpi_pci_hotplug.use_acpi_hotplug_bridge &&
+           !s->not_migrate_acpi_index;
 }
 
 /* qemu-kvm 1.2 uses version 3 but advertised as 2
@@ -303,8 +304,9 @@ static void piix4_pm_reset(DeviceState *dev)
     acpi_update_sci(&s->ar, s->irq);
 
     pm_io_space_update(s);
-    if (s->use_acpi_hotplug_bridge || s->use_acpi_root_pci_hotplug) {
-        acpi_pcihp_reset(&s->acpi_pci_hotplug, !s->use_acpi_root_pci_hotplug);
+    if (s->acpi_pci_hotplug.use_acpi_hotplug_bridge ||
+        s->acpi_pci_hotplug.use_acpi_root_pci_hotplug) {
+        acpi_pcihp_reset(&s->acpi_pci_hotplug);
     }
 }
 
@@ -402,6 +404,13 @@ static void piix4_device_unplug_cb(HotplugHandler *hotplug_dev,
     }
 }
 
+static bool piix4_is_hotpluggable_bus(HotplugHandler *hotplug_dev,
+                                      BusState *bus)
+{
+    PIIX4PMState *s = PIIX4_PM(hotplug_dev);
+    return acpi_pcihp_is_hotpluggbale_bus(&s->acpi_pci_hotplug, bus);
+}
+
 static void piix4_pm_machine_ready(Notifier *n, void *opaque)
 {
     PIIX4PMState *s = container_of(n, PIIX4PMState, machine_ready);
@@ -487,12 +496,11 @@ static void piix4_pm_realize(PCIDevice *dev, Error **errp)
     qemu_add_machine_init_done_notifier(&s->machine_ready);
 
     if (xen_enabled()) {
-        s->use_acpi_hotplug_bridge = false;
+        s->acpi_pci_hotplug.use_acpi_hotplug_bridge = false;
     }
 
     piix4_acpi_system_hot_add_init(pci_address_space_io(dev),
                                    pci_get_bus(dev), s);
-    qbus_set_hotplug_handler(BUS(pci_get_bus(dev)), OBJECT(s));
 
     piix4_pm_add_properties(s);
 }
@@ -561,9 +569,11 @@ static void piix4_acpi_system_hot_add_init(MemoryRegion *parent,
                           "acpi-gpe0", GPE_LEN);
     memory_region_add_subregion(parent, GPE_BASE, &s->io_gpe);
 
-    if (s->use_acpi_hotplug_bridge || s->use_acpi_root_pci_hotplug) {
+    if (s->acpi_pci_hotplug.use_acpi_hotplug_bridge ||
+        s->acpi_pci_hotplug.use_acpi_root_pci_hotplug) {
         acpi_pcihp_init(OBJECT(s), &s->acpi_pci_hotplug, bus, parent,
-                        s->use_acpi_hotplug_bridge, ACPI_PCIHP_ADDR_PIIX4);
+                        ACPI_PCIHP_ADDR_PIIX4);
+        qbus_set_hotplug_handler(BUS(pci_get_bus(PCI_DEVICE(s))), OBJECT(s));
     }
 
     s->cpu_hotplug_legacy = true;
@@ -602,9 +612,9 @@ static Property piix4_pm_properties[] = {
     DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 0),
     DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2),
     DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, PIIX4PMState,
-                     use_acpi_hotplug_bridge, true),
+                     acpi_pci_hotplug.use_acpi_hotplug_bridge, true),
     DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCI_ROOTHP, PIIX4PMState,
-                     use_acpi_root_pci_hotplug, true),
+                     acpi_pci_hotplug.use_acpi_root_pci_hotplug, true),
     DEFINE_PROP_BOOL("memory-hotplug-support", PIIX4PMState,
                      acpi_memory_hotplug.is_enabled, true),
     DEFINE_PROP_BOOL("smm-compat", PIIX4PMState, smm_compat, false),
@@ -641,6 +651,7 @@ static void piix4_pm_class_init(ObjectClass *klass, void *data)
     hc->plug = piix4_device_plug_cb;
     hc->unplug_request = piix4_device_unplug_request_cb;
     hc->unplug = piix4_device_unplug_cb;
+    hc->is_hotpluggable_bus = piix4_is_hotpluggable_bus;
     adevc->ospm_status = piix4_ospm_status;
     adevc->send_event = piix4_send_gpe;
     adevc->madt_cpu = pc_madt_cpu_entry;
diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c
index 3edd303a33..b665d4f565 100644
--- a/hw/cxl/cxl-component-utils.c
+++ b/hw/cxl/cxl-component-utils.c
@@ -141,17 +141,19 @@ static void ras_init_common(uint32_t *reg_state, uint32_t *write_msk)
      * Error status is RW1C but given bits are not yet set, it can
      * be handled as RO.
      */
-    reg_state[R_CXL_RAS_UNC_ERR_STATUS] = 0;
+    stl_le_p(reg_state + R_CXL_RAS_UNC_ERR_STATUS, 0);
+    stl_le_p(write_msk + R_CXL_RAS_UNC_ERR_STATUS, 0x1cfff);
     /* Bits 12-13 and 17-31 reserved in CXL 2.0 */
-    reg_state[R_CXL_RAS_UNC_ERR_MASK] = 0x1cfff;
-    write_msk[R_CXL_RAS_UNC_ERR_MASK] = 0x1cfff;
-    reg_state[R_CXL_RAS_UNC_ERR_SEVERITY] = 0x1cfff;
-    write_msk[R_CXL_RAS_UNC_ERR_SEVERITY] = 0x1cfff;
-    reg_state[R_CXL_RAS_COR_ERR_STATUS] = 0;
-    reg_state[R_CXL_RAS_COR_ERR_MASK] = 0x7f;
-    write_msk[R_CXL_RAS_COR_ERR_MASK] = 0x7f;
+    stl_le_p(reg_state + R_CXL_RAS_UNC_ERR_MASK, 0x1cfff);
+    stl_le_p(write_msk + R_CXL_RAS_UNC_ERR_MASK, 0x1cfff);
+    stl_le_p(reg_state + R_CXL_RAS_UNC_ERR_SEVERITY, 0x1cfff);
+    stl_le_p(write_msk + R_CXL_RAS_UNC_ERR_SEVERITY, 0x1cfff);
+    stl_le_p(reg_state + R_CXL_RAS_COR_ERR_STATUS, 0);
+    stl_le_p(write_msk + R_CXL_RAS_COR_ERR_STATUS, 0x7f);
+    stl_le_p(reg_state + R_CXL_RAS_COR_ERR_MASK, 0x7f);
+    stl_le_p(write_msk + R_CXL_RAS_COR_ERR_MASK, 0x7f);
     /* CXL switches and devices must set */
-    reg_state[R_CXL_RAS_ERR_CAP_CTRL] = 0x00;
+    stl_le_p(reg_state + R_CXL_RAS_ERR_CAP_CTRL, 0x200);
 }
 
 static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk,
diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c
index 3c1ec8732a..6e923ceeaf 100644
--- a/hw/cxl/cxl-host.c
+++ b/hw/cxl/cxl-host.c
@@ -146,21 +146,28 @@ static PCIDevice *cxl_cfmws_find_device(CXLFixedWindow *fw, hwaddr addr)
         return NULL;
     }
 
-    hb_cstate = cxl_get_hb_cstate(hb);
-    if (!hb_cstate) {
-        return NULL;
-    }
+    if (cxl_get_hb_passthrough(hb)) {
+        rp = pcie_find_port_first(hb->bus);
+        if (!rp) {
+            return NULL;
+        }
+    } else {
+        hb_cstate = cxl_get_hb_cstate(hb);
+        if (!hb_cstate) {
+            return NULL;
+        }
 
-    cache_mem = hb_cstate->crb.cache_mem_registers;
+        cache_mem = hb_cstate->crb.cache_mem_registers;
 
-    target_found = cxl_hdm_find_target(cache_mem, addr, &target);
-    if (!target_found) {
-        return NULL;
-    }
+        target_found = cxl_hdm_find_target(cache_mem, addr, &target);
+        if (!target_found) {
+            return NULL;
+        }
 
-    rp = pcie_find_port_by_pn(hb->bus, target);
-    if (!rp) {
-        return NULL;
+        rp = pcie_find_port_by_pn(hb->bus, target);
+        if (!rp) {
+            return NULL;
+        }
     }
 
     d = pci_bridge_get_sec_bus(PCI_BRIDGE(rp))->devices[0];
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index b19fb4259e..ec857a117e 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -373,6 +373,104 @@ Aml *aml_pci_device_dsm(void)
     return method;
 }
 
+static void build_append_pci_dsm_func0_common(Aml *ctx, Aml *retvar)
+{
+    Aml *UUID, *ifctx1;
+    uint8_t byte_list[1] = { 0 }; /* nothing supported yet */
+
+    aml_append(ctx, aml_store(aml_buffer(1, byte_list), retvar));
+    /*
+     * PCI Firmware Specification 3.1
+     * 4.6.  _DSM Definitions for PCI
+     */
+    UUID = aml_touuid("E5C937D0-3553-4D7A-9117-EA4D19C3434D");
+    ifctx1 = aml_if(aml_lnot(aml_equal(aml_arg(0), UUID)));
+    {
+        /* call is for unsupported UUID, bail out */
+        aml_append(ifctx1, aml_return(retvar));
+    }
+    aml_append(ctx, ifctx1);
+
+    ifctx1 = aml_if(aml_lless(aml_arg(1), aml_int(2)));
+    {
+        /* call is for unsupported REV, bail out */
+        aml_append(ifctx1, aml_return(retvar));
+    }
+    aml_append(ctx, ifctx1);
+}
+
+static Aml *aml_pci_edsm(void)
+{
+    Aml *method, *ifctx;
+    Aml *zero = aml_int(0);
+    Aml *func = aml_arg(2);
+    Aml *ret = aml_local(0);
+    Aml *aidx = aml_local(1);
+    Aml *params = aml_arg(4);
+
+    method = aml_method("EDSM", 5, AML_SERIALIZED);
+
+    /* get supported functions */
+    ifctx = aml_if(aml_equal(func, zero));
+    {
+        /* 1: have supported functions */
+        /* 7: support for function 7 */
+        const uint8_t caps = 1 | BIT(7);
+        build_append_pci_dsm_func0_common(ifctx, ret);
+        aml_append(ifctx, aml_store(aml_int(caps), aml_index(ret, zero)));
+        aml_append(ifctx, aml_return(ret));
+    }
+    aml_append(method, ifctx);
+
+    /* handle specific functions requests */
+    /*
+     * PCI Firmware Specification 3.1
+     * 4.6.7. _DSM for Naming a PCI or PCI Express Device Under
+     *        Operating Systems
+     */
+    ifctx = aml_if(aml_equal(func, aml_int(7)));
+    {
+       Aml *pkg = aml_package(2);
+       aml_append(pkg, zero);
+       /* optional, if not impl. should return null string */
+       aml_append(pkg, aml_string("%s", ""));
+       aml_append(ifctx, aml_store(pkg, ret));
+
+       /*
+        * IASL is fine when initializing Package with computational data,
+        * however it makes guest unhappy /it fails to process such AML/.
+        * So use runtime assignment to set acpi-index after initializer
+        * to make OSPM happy.
+        */
+       aml_append(ifctx,
+           aml_store(aml_derefof(aml_index(params, aml_int(0))), aidx));
+       aml_append(ifctx, aml_store(aidx, aml_index(ret, zero)));
+       aml_append(ifctx, aml_return(ret));
+    }
+    aml_append(method, ifctx);
+
+    return method;
+}
+
+static Aml *aml_pci_static_endpoint_dsm(PCIDevice *pdev)
+{
+    Aml *method;
+
+    g_assert(pdev->acpi_index != 0);
+    method = aml_method("_DSM", 4, AML_SERIALIZED);
+    {
+        Aml *params = aml_local(0);
+        Aml *pkg = aml_package(1);
+        aml_append(pkg, aml_int(pdev->acpi_index));
+        aml_append(method, aml_store(pkg, params));
+        aml_append(method,
+            aml_return(aml_call5("EDSM", aml_arg(0), aml_arg(1),
+                                 aml_arg(2), aml_arg(3), params))
+        );
+    }
+    return method;
+}
+
 static void build_append_pcihp_notify_entry(Aml *method, int slot)
 {
     Aml *if_ctx;
@@ -396,12 +494,6 @@ static bool is_devfn_ignored_generic(const int devfn, const PCIBus *bus)
             if (DEVICE(pdev)->hotplugged) {
                 return true;
             }
-        } else if (!get_dev_aml_func(DEVICE(pdev))) {
-            /*
-             * Ignore all other devices on !0 functions unless they
-             * have AML description (i.e have get_dev_aml_func() != 0)
-             */
-            return true;
         }
     }
     return false;
@@ -428,12 +520,14 @@ static bool is_devfn_ignored_hotplug(const int devfn, const PCIBus *bus)
     return false;
 }
 
-static void build_append_pcihp_slots(Aml *parent_scope, PCIBus *bus,
-                                     QObject *bsel)
+void build_append_pcihp_slots(Aml *parent_scope, PCIBus *bus)
 {
     int devfn;
     Aml *dev, *notify_method = NULL, *method;
+    QObject *bsel = object_property_get_qobject(OBJECT(bus),
+                        ACPI_PCIHP_PROP_BSEL, NULL);
     uint64_t bsel_val = qnum_get_uint(qobject_to(QNum, bsel));
+    qobject_unref(bsel);
 
     aml_append(parent_scope, aml_name_decl("BSEL", aml_int(bsel_val)));
     notify_method = aml_method("DVNT", 2, AML_NOTSERIALIZED);
@@ -478,12 +572,9 @@ static void build_append_pcihp_slots(Aml *parent_scope, PCIBus *bus,
 
 void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus)
 {
-    QObject *bsel;
     int devfn;
     Aml *dev;
 
-    bsel = object_property_get_qobject(OBJECT(bus), ACPI_PCIHP_PROP_BSEL, NULL);
-
     for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
         /* ACPI spec: 1.0b: Table 6-2 _ADR Object Bus Types, PCI type */
         int adr = PCI_SLOT(devfn) << 16 | PCI_FUNC(devfn);
@@ -498,16 +589,16 @@ void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus)
         aml_append(dev, aml_name_decl("_ADR", aml_int(adr)));
 
         call_dev_aml_func(DEVICE(bus->devices[devfn]), dev);
+        /* add _DSM if device has acpi-index set */
+        if (pdev->acpi_index &&
+            !object_property_get_bool(OBJECT(pdev), "hotpluggable",
+                                      &error_abort)) {
+            aml_append(dev, aml_pci_static_endpoint_dsm(pdev));
+        }
 
         /* device descriptor has been composed, add it into parent context */
         aml_append(parent_scope, dev);
     }
-
-    if (bsel) {
-        build_append_pcihp_slots(parent_scope, bus, bsel);
-    }
-
-    qobject_unref(bsel);
 }
 
 static bool build_append_notfication_callback(Aml *parent_scope,
@@ -517,16 +608,24 @@ static bool build_append_notfication_callback(Aml *parent_scope,
     PCIBus *sec;
     QObject *bsel;
     int nr_notifiers = 0;
+    GQueue *pcnt_bus_list = g_queue_new();
 
     QLIST_FOREACH(sec, &bus->child, sibling) {
         Aml *br_scope = aml_scope("S%.02X", sec->parent_dev->devfn);
-        if (pci_bus_is_root(sec) ||
-            !object_property_find(OBJECT(sec), ACPI_PCIHP_PROP_BSEL)) {
+        if (pci_bus_is_root(sec)) {
             continue;
         }
         nr_notifiers = nr_notifiers +
                        build_append_notfication_callback(br_scope, sec);
-        aml_append(parent_scope, br_scope);
+        /*
+         * add new child scope to parent
+         * and keep track of bus that have PCNT,
+         * bus list is used later to call children PCNTs from this level PCNT
+         */
+        if (nr_notifiers) {
+            g_queue_push_tail(pcnt_bus_list, sec);
+            aml_append(parent_scope, br_scope);
+        }
     }
 
     /*
@@ -550,30 +649,25 @@ static bool build_append_notfication_callback(Aml *parent_scope,
     }
 
     /* Notify about child bus events in any case */
-    QLIST_FOREACH(sec, &bus->child, sibling) {
-        if (pci_bus_is_root(sec) ||
-            !object_property_find(OBJECT(sec), ACPI_PCIHP_PROP_BSEL)) {
-            continue;
-        }
-
+    while ((sec = g_queue_pop_head(pcnt_bus_list))) {
         aml_append(method, aml_name("^S%.02X.PCNT", sec->parent_dev->devfn));
     }
 
     aml_append(parent_scope, method);
     qobject_unref(bsel);
+    g_queue_free(pcnt_bus_list);
     return !!nr_notifiers;
 }
 
 static Aml *aml_pci_pdsm(void)
 {
-    Aml *method, *UUID, *ifctx, *ifctx1;
+    Aml *method, *ifctx, *ifctx1;
     Aml *ret = aml_local(0);
     Aml *caps = aml_local(1);
     Aml *acpi_index = aml_local(2);
     Aml *zero = aml_int(0);
     Aml *one = aml_int(1);
     Aml *func = aml_arg(2);
-    Aml *rev = aml_arg(1);
     Aml *params = aml_arg(4);
     Aml *bnum = aml_derefof(aml_index(params, aml_int(0)));
     Aml *sunum = aml_derefof(aml_index(params, aml_int(1)));
@@ -583,29 +677,9 @@ static Aml *aml_pci_pdsm(void)
     /* get supported functions */
     ifctx = aml_if(aml_equal(func, zero));
     {
-        uint8_t byte_list[1] = { 0 }; /* nothing supported yet */
-        aml_append(ifctx, aml_store(aml_buffer(1, byte_list), ret));
-        aml_append(ifctx, aml_store(zero, caps));
-
-       /*
-        * PCI Firmware Specification 3.1
-        * 4.6.  _DSM Definitions for PCI
-        */
-        UUID = aml_touuid("E5C937D0-3553-4D7A-9117-EA4D19C3434D");
-        ifctx1 = aml_if(aml_lnot(aml_equal(aml_arg(0), UUID)));
-        {
-            /* call is for unsupported UUID, bail out */
-            aml_append(ifctx1, aml_return(ret));
-        }
-        aml_append(ifctx, ifctx1);
-
-        ifctx1 = aml_if(aml_lless(rev, aml_int(2)));
-        {
-            /* call is for unsupported REV, bail out */
-            aml_append(ifctx1, aml_return(ret));
-        }
-        aml_append(ifctx, ifctx1);
+        build_append_pci_dsm_func0_common(ifctx, ret);
 
+        aml_append(ifctx, aml_store(zero, caps));
         aml_append(ifctx,
             aml_store(aml_call2("AIDX", bnum, sunum), acpi_index));
         /*
@@ -1388,6 +1462,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
         aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03")));
         aml_append(dev, aml_name_decl("_ADR", aml_int(0)));
         aml_append(dev, aml_name_decl("_UID", aml_int(pcmc->pci_root_uid)));
+        aml_append(dev, aml_pci_edsm());
         aml_append(sb_scope, dev);
         aml_append(dsdt, sb_scope);
 
@@ -1403,6 +1478,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
         aml_append(dev, aml_name_decl("_ADR", aml_int(0)));
         aml_append(dev, aml_name_decl("_UID", aml_int(pcmc->pci_root_uid)));
         aml_append(dev, build_q35_osc_method(!pm->pcihp_bridge_en));
+        aml_append(dev, aml_pci_edsm());
         aml_append(sb_scope, dev);
         if (mcfg_valid) {
             aml_append(sb_scope, build_q35_dram_controller(&mcfg));
@@ -1710,6 +1786,9 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
             Aml *scope = aml_scope("PCI0");
             /* Scan all PCI buses. Generate tables to support hotplug. */
             build_append_pci_bus_devices(scope, bus);
+            if (object_property_find(OBJECT(bus), ACPI_PCIHP_PROP_BSEL)) {
+                build_append_pcihp_slots(scope, bus);
+            }
             aml_append(sb_scope, scope);
         }
     }
diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c
index d8303d0322..9714b0001e 100644
--- a/hw/isa/lpc_ich9.c
+++ b/hw/isa/lpc_ich9.c
@@ -865,6 +865,7 @@ static void ich9_lpc_class_init(ObjectClass *klass, void *data)
     hc->plug = ich9_pm_device_plug_cb;
     hc->unplug_request = ich9_pm_device_unplug_request_cb;
     hc->unplug = ich9_pm_device_unplug_cb;
+    hc->is_hotpluggable_bus = ich9_pm_is_hotpluggable_bus;
     adevc->ospm_status = ich9_pm_ospm_status;
     adevc->send_event = ich9_send_gpe;
     adevc->madt_cpu = pc_madt_cpu_entry;
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 217a5e639b..abe60b362c 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -1,6 +1,7 @@
 #include "qemu/osdep.h"
 #include "qemu/units.h"
 #include "qemu/error-report.h"
+#include "qapi/qapi-commands-cxl.h"
 #include "hw/mem/memory-device.h"
 #include "hw/mem/pc-dimm.h"
 #include "hw/pci/pci.h"
@@ -250,6 +251,7 @@ static void ct3d_config_write(PCIDevice *pci_dev, uint32_t addr, uint32_t val,
 
     pcie_doe_write_config(&ct3d->doe_cdat, addr, val, size);
     pci_default_write_config(pci_dev, addr, val, size);
+    pcie_aer_write_config(pci_dev, addr, val, size);
 }
 
 /*
@@ -322,6 +324,66 @@ static void hdm_decoder_commit(CXLType3Dev *ct3d, int which)
     ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMITTED, 1);
 }
 
+static int ct3d_qmp_uncor_err_to_cxl(CxlUncorErrorType qmp_err)
+{
+    switch (qmp_err) {
+    case CXL_UNCOR_ERROR_TYPE_CACHE_DATA_PARITY:
+        return CXL_RAS_UNC_ERR_CACHE_DATA_PARITY;
+    case CXL_UNCOR_ERROR_TYPE_CACHE_ADDRESS_PARITY:
+        return CXL_RAS_UNC_ERR_CACHE_ADDRESS_PARITY;
+    case CXL_UNCOR_ERROR_TYPE_CACHE_BE_PARITY:
+        return CXL_RAS_UNC_ERR_CACHE_BE_PARITY;
+    case CXL_UNCOR_ERROR_TYPE_CACHE_DATA_ECC:
+        return CXL_RAS_UNC_ERR_CACHE_DATA_ECC;
+    case CXL_UNCOR_ERROR_TYPE_MEM_DATA_PARITY:
+        return CXL_RAS_UNC_ERR_MEM_DATA_PARITY;
+    case CXL_UNCOR_ERROR_TYPE_MEM_ADDRESS_PARITY:
+        return CXL_RAS_UNC_ERR_MEM_ADDRESS_PARITY;
+    case CXL_UNCOR_ERROR_TYPE_MEM_BE_PARITY:
+        return CXL_RAS_UNC_ERR_MEM_BE_PARITY;
+    case CXL_UNCOR_ERROR_TYPE_MEM_DATA_ECC:
+        return CXL_RAS_UNC_ERR_MEM_DATA_ECC;
+    case CXL_UNCOR_ERROR_TYPE_REINIT_THRESHOLD:
+        return CXL_RAS_UNC_ERR_REINIT_THRESHOLD;
+    case CXL_UNCOR_ERROR_TYPE_RSVD_ENCODING:
+        return CXL_RAS_UNC_ERR_RSVD_ENCODING;
+    case CXL_UNCOR_ERROR_TYPE_POISON_RECEIVED:
+        return CXL_RAS_UNC_ERR_POISON_RECEIVED;
+    case CXL_UNCOR_ERROR_TYPE_RECEIVER_OVERFLOW:
+        return CXL_RAS_UNC_ERR_RECEIVER_OVERFLOW;
+    case CXL_UNCOR_ERROR_TYPE_INTERNAL:
+        return CXL_RAS_UNC_ERR_INTERNAL;
+    case CXL_UNCOR_ERROR_TYPE_CXL_IDE_TX:
+        return CXL_RAS_UNC_ERR_CXL_IDE_TX;
+    case CXL_UNCOR_ERROR_TYPE_CXL_IDE_RX:
+        return CXL_RAS_UNC_ERR_CXL_IDE_RX;
+    default:
+        return -EINVAL;
+    }
+}
+
+static int ct3d_qmp_cor_err_to_cxl(CxlCorErrorType qmp_err)
+{
+    switch (qmp_err) {
+    case CXL_COR_ERROR_TYPE_CACHE_DATA_ECC:
+        return CXL_RAS_COR_ERR_CACHE_DATA_ECC;
+    case CXL_COR_ERROR_TYPE_MEM_DATA_ECC:
+        return CXL_RAS_COR_ERR_MEM_DATA_ECC;
+    case CXL_COR_ERROR_TYPE_CRC_THRESHOLD:
+        return CXL_RAS_COR_ERR_CRC_THRESHOLD;
+    case CXL_COR_ERROR_TYPE_RETRY_THRESHOLD:
+        return CXL_RAS_COR_ERR_RETRY_THRESHOLD;
+    case CXL_COR_ERROR_TYPE_CACHE_POISON_RECEIVED:
+        return CXL_RAS_COR_ERR_CACHE_POISON_RECEIVED;
+    case CXL_COR_ERROR_TYPE_MEM_POISON_RECEIVED:
+        return CXL_RAS_COR_ERR_MEM_POISON_RECEIVED;
+    case CXL_COR_ERROR_TYPE_PHYSICAL:
+        return CXL_RAS_COR_ERR_PHYSICAL;
+    default:
+        return -EINVAL;
+    }
+}
+
 static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value,
                            unsigned size)
 {
@@ -340,6 +402,83 @@ static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value,
         should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT);
         which_hdm = 0;
         break;
+    case A_CXL_RAS_UNC_ERR_STATUS:
+    {
+        uint32_t capctrl = ldl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL);
+        uint32_t fe = FIELD_EX32(capctrl, CXL_RAS_ERR_CAP_CTRL, FIRST_ERROR_POINTER);
+        CXLError *cxl_err;
+        uint32_t unc_err;
+
+        /*
+         * If single bit written that corresponds to the first error
+         * pointer being cleared, update the status and header log.
+         */
+        if (!QTAILQ_EMPTY(&ct3d->error_list)) {
+            if ((1 << fe) ^ value) {
+                CXLError *cxl_next;
+                /*
+                 * Software is using wrong flow for multiple header recording
+                 * Following behavior in PCIe r6.0 and assuming multiple
+                 * header support. Implementation defined choice to clear all
+                 * matching records if more than one bit set - which corresponds
+                 * closest to behavior of hardware not capable of multiple
+                 * header recording.
+                 */
+                QTAILQ_FOREACH_SAFE(cxl_err, &ct3d->error_list, node, cxl_next) {
+                    if ((1 << cxl_err->type) & value) {
+                        QTAILQ_REMOVE(&ct3d->error_list, cxl_err, node);
+                        g_free(cxl_err);
+                    }
+                }
+            } else {
+                /* Done with previous FE, so drop from list */
+                cxl_err = QTAILQ_FIRST(&ct3d->error_list);
+                QTAILQ_REMOVE(&ct3d->error_list, cxl_err, node);
+                g_free(cxl_err);
+            }
+
+            /*
+             * If there is another FE, then put that in place and update
+             * the header log
+             */
+            if (!QTAILQ_EMPTY(&ct3d->error_list)) {
+                uint32_t *header_log = &cache_mem[R_CXL_RAS_ERR_HEADER0];
+                int i;
+
+                cxl_err = QTAILQ_FIRST(&ct3d->error_list);
+                for (i = 0; i < CXL_RAS_ERR_HEADER_NUM; i++) {
+                    stl_le_p(header_log + i, cxl_err->header[i]);
+                }
+                capctrl = FIELD_DP32(capctrl, CXL_RAS_ERR_CAP_CTRL,
+                                     FIRST_ERROR_POINTER, cxl_err->type);
+            } else {
+                /*
+                 * If no more errors, then follow recomendation of PCI spec
+                 * r6.0 6.2.4.2 to set the first error pointer to a status
+                 * bit that will never be used.
+                 */
+                capctrl = FIELD_DP32(capctrl, CXL_RAS_ERR_CAP_CTRL,
+                                     FIRST_ERROR_POINTER,
+                                     CXL_RAS_UNC_ERR_CXL_UNUSED);
+            }
+            stl_le_p((uint8_t *)cache_mem + A_CXL_RAS_ERR_CAP_CTRL, capctrl);
+        }
+        unc_err = 0;
+        QTAILQ_FOREACH(cxl_err, &ct3d->error_list, node) {
+            unc_err |= 1 << cxl_err->type;
+        }
+        stl_le_p((uint8_t *)cache_mem + offset, unc_err);
+
+        return;
+    }
+    case A_CXL_RAS_COR_ERR_STATUS:
+    {
+        uint32_t rw1c = value;
+        uint32_t temp = ldl_le_p((uint8_t *)cache_mem + offset);
+        temp &= ~rw1c;
+        stl_le_p((uint8_t *)cache_mem + offset, temp);
+        return;
+    }
     default:
         break;
     }
@@ -403,6 +542,8 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
     unsigned short msix_num = 1;
     int i, rc;
 
+    QTAILQ_INIT(&ct3d->error_list);
+
     if (!cxl_setup_memory(ct3d, errp)) {
         return;
     }
@@ -452,8 +593,19 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
     cxl_cstate->cdat.free_cdat_table = ct3_free_cdat_table;
     cxl_cstate->cdat.private = ct3d;
     cxl_doe_cdat_init(cxl_cstate, errp);
+
+    pcie_cap_deverr_init(pci_dev);
+    /* Leave a bit of room for expansion */
+    rc = pcie_aer_init(pci_dev, PCI_ERR_VER, 0x200, PCI_ERR_SIZEOF, NULL);
+    if (rc) {
+        goto err_release_cdat;
+    }
+
     return;
 
+err_release_cdat:
+    cxl_doe_cdat_release(cxl_cstate);
+    g_free(regs->special_ops);
 err_address_space_free:
     address_space_destroy(&ct3d->hostmem_as);
     return;
@@ -465,6 +617,7 @@ static void ct3_exit(PCIDevice *pci_dev)
     CXLComponentState *cxl_cstate = &ct3d->cxl_cstate;
     ComponentRegisters *regs = &cxl_cstate->crb;
 
+    pcie_aer_exit(pci_dev);
     cxl_doe_cdat_release(cxl_cstate);
     g_free(regs->special_ops);
     address_space_destroy(&ct3d->hostmem_as);
@@ -618,6 +771,147 @@ static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size,
      */
 }
 
+/* For uncorrectable errors include support for multiple header recording */
+void qmp_cxl_inject_uncorrectable_errors(const char *path,
+                                         CXLUncorErrorRecordList *errors,
+                                         Error **errp)
+{
+    Object *obj = object_resolve_path(path, NULL);
+    static PCIEAERErr err = {};
+    CXLType3Dev *ct3d;
+    CXLError *cxl_err;
+    uint32_t *reg_state;
+    uint32_t unc_err;
+    bool first;
+
+    if (!obj) {
+        error_setg(errp, "Unable to resolve path");
+        return;
+    }
+
+    if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
+        error_setg(errp, "Path does not point to a CXL type 3 device");
+        return;
+    }
+
+    err.status = PCI_ERR_UNC_INTN;
+    err.source_id = pci_requester_id(PCI_DEVICE(obj));
+    err.flags = 0;
+
+    ct3d = CXL_TYPE3(obj);
+
+    first = QTAILQ_EMPTY(&ct3d->error_list);
+    reg_state = ct3d->cxl_cstate.crb.cache_mem_registers;
+    while (errors) {
+        uint32List *header = errors->value->header;
+        uint8_t header_count = 0;
+        int cxl_err_code;
+
+        cxl_err_code = ct3d_qmp_uncor_err_to_cxl(errors->value->type);
+        if (cxl_err_code < 0) {
+            error_setg(errp, "Unknown error code");
+            return;
+        }
+
+        /* If the error is masked, nothing to do here */
+        if (!((1 << cxl_err_code) &
+              ~ldl_le_p(reg_state + R_CXL_RAS_UNC_ERR_MASK))) {
+            errors = errors->next;
+            continue;
+        }
+
+        cxl_err = g_malloc0(sizeof(*cxl_err));
+        if (!cxl_err) {
+            return;
+        }
+
+        cxl_err->type = cxl_err_code;
+        while (header && header_count < 32) {
+            cxl_err->header[header_count++] = header->value;
+            header = header->next;
+        }
+        if (header_count > 32) {
+            error_setg(errp, "Header must be 32 DWORD or less");
+            return;
+        }
+        QTAILQ_INSERT_TAIL(&ct3d->error_list, cxl_err, node);
+
+        errors = errors->next;
+    }
+
+    if (first && !QTAILQ_EMPTY(&ct3d->error_list)) {
+        uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
+        uint32_t capctrl = ldl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL);
+        uint32_t *header_log = &cache_mem[R_CXL_RAS_ERR_HEADER0];
+        int i;
+
+        cxl_err = QTAILQ_FIRST(&ct3d->error_list);
+        for (i = 0; i < CXL_RAS_ERR_HEADER_NUM; i++) {
+            stl_le_p(header_log + i, cxl_err->header[i]);
+        }
+
+        capctrl = FIELD_DP32(capctrl, CXL_RAS_ERR_CAP_CTRL,
+                             FIRST_ERROR_POINTER, cxl_err->type);
+        stl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL, capctrl);
+    }
+
+    unc_err = 0;
+    QTAILQ_FOREACH(cxl_err, &ct3d->error_list, node) {
+        unc_err |= (1 << cxl_err->type);
+    }
+    if (!unc_err) {
+        return;
+    }
+
+    stl_le_p(reg_state + R_CXL_RAS_UNC_ERR_STATUS, unc_err);
+    pcie_aer_inject_error(PCI_DEVICE(obj), &err);
+
+    return;
+}
+
+void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type,
+                                      Error **errp)
+{
+    static PCIEAERErr err = {};
+    Object *obj = object_resolve_path(path, NULL);
+    CXLType3Dev *ct3d;
+    uint32_t *reg_state;
+    uint32_t cor_err;
+    int cxl_err_type;
+
+    if (!obj) {
+        error_setg(errp, "Unable to resolve path");
+        return;
+    }
+    if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
+        error_setg(errp, "Path does not point to a CXL type 3 device");
+        return;
+    }
+
+    err.status = PCI_ERR_COR_INTERNAL;
+    err.source_id = pci_requester_id(PCI_DEVICE(obj));
+    err.flags = PCIE_AER_ERR_IS_CORRECTABLE;
+
+    ct3d = CXL_TYPE3(obj);
+    reg_state = ct3d->cxl_cstate.crb.cache_mem_registers;
+    cor_err = ldl_le_p(reg_state + R_CXL_RAS_COR_ERR_STATUS);
+
+    cxl_err_type = ct3d_qmp_cor_err_to_cxl(type);
+    if (cxl_err_type < 0) {
+        error_setg(errp, "Invalid COR error");
+        return;
+    }
+    /* If the error is masked, nothting to do here */
+    if (!((1 << cxl_err_type) & ~ldl_le_p(reg_state + R_CXL_RAS_COR_ERR_MASK))) {
+        return;
+    }
+
+    cor_err |= (1 << cxl_err_type);
+    stl_le_p(reg_state + R_CXL_RAS_COR_ERR_STATUS, cor_err);
+
+    pcie_aer_inject_error(PCI_DEVICE(obj), &err);
+}
+
 static void ct3_class_init(ObjectClass *oc, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(oc);
diff --git a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c
new file mode 100644
index 0000000000..d574c58f9a
--- /dev/null
+++ b/hw/mem/cxl_type3_stubs.c
@@ -0,0 +1,17 @@
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-cxl.h"
+
+void qmp_cxl_inject_uncorrectable_errors(const char *path,
+                                         CXLUncorErrorRecordList *errors,
+                                         Error **errp)
+{
+    error_setg(errp, "CXL Type 3 support is not compiled in");
+}
+
+void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type,
+                                      Error **errp)
+{
+    error_setg(errp, "CXL Type 3 support is not compiled in");
+}
diff --git a/hw/mem/meson.build b/hw/mem/meson.build
index 609b2b36fc..56c2618b84 100644
--- a/hw/mem/meson.build
+++ b/hw/mem/meson.build
@@ -4,6 +4,8 @@ mem_ss.add(when: 'CONFIG_DIMM', if_true: files('pc-dimm.c'))
 mem_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_mc.c'))
 mem_ss.add(when: 'CONFIG_NVDIMM', if_true: files('nvdimm.c'))
 mem_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_true: files('cxl_type3.c'))
+softmmu_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_false: files('cxl_type3_stubs.c'))
+softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('cxl_type3_stubs.c'))
 
 softmmu_ss.add_all(when: 'CONFIG_MEM_DEVICE', if_true: mem_ss)
 
diff --git a/hw/pci-bridge/cxl_root_port.c b/hw/pci-bridge/cxl_root_port.c
index 6664783974..7dfd20aa67 100644
--- a/hw/pci-bridge/cxl_root_port.c
+++ b/hw/pci-bridge/cxl_root_port.c
@@ -22,6 +22,7 @@
 #include "qemu/range.h"
 #include "hw/pci/pci_bridge.h"
 #include "hw/pci/pcie_port.h"
+#include "hw/pci/msi.h"
 #include "hw/qdev-properties.h"
 #include "hw/sysbus.h"
 #include "qapi/error.h"
@@ -29,6 +30,10 @@
 
 #define CXL_ROOT_PORT_DID 0x7075
 
+#define CXL_RP_MSI_OFFSET               0x60
+#define CXL_RP_MSI_SUPPORTED_FLAGS      PCI_MSI_FLAGS_MASKBIT
+#define CXL_RP_MSI_NR_VECTOR            2
+
 /* Copied from the gen root port which we derive */
 #define GEN_PCIE_ROOT_PORT_AER_OFFSET 0x100
 #define GEN_PCIE_ROOT_PORT_ACS_OFFSET \
@@ -47,6 +52,49 @@ typedef struct CXLRootPort {
 #define TYPE_CXL_ROOT_PORT "cxl-rp"
 DECLARE_INSTANCE_CHECKER(CXLRootPort, CXL_ROOT_PORT, TYPE_CXL_ROOT_PORT)
 
+/*
+ * If two MSI vector are allocated, Advanced Error Interrupt Message Number
+ * is 1. otherwise 0.
+ * 17.12.5.10 RPERRSTS,  32:27 bit Advanced Error Interrupt Message Number.
+ */
+static uint8_t cxl_rp_aer_vector(const PCIDevice *d)
+{
+    switch (msi_nr_vectors_allocated(d)) {
+    case 1:
+        return 0;
+    case 2:
+        return 1;
+    case 4:
+    case 8:
+    case 16:
+    case 32:
+    default:
+        break;
+    }
+    abort();
+    return 0;
+}
+
+static int cxl_rp_interrupts_init(PCIDevice *d, Error **errp)
+{
+    int rc;
+
+    rc = msi_init(d, CXL_RP_MSI_OFFSET, CXL_RP_MSI_NR_VECTOR,
+                  CXL_RP_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_64BIT,
+                  CXL_RP_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_MASKBIT,
+                  errp);
+    if (rc < 0) {
+        assert(rc == -ENOTSUP);
+    }
+
+    return rc;
+}
+
+static void cxl_rp_interrupts_uninit(PCIDevice *d)
+{
+    msi_uninit(d);
+}
+
 static void latch_registers(CXLRootPort *crp)
 {
     uint32_t *reg_state = crp->cxl_cstate.crb.cache_mem_registers;
@@ -183,16 +231,29 @@ static void cxl_rp_dvsec_write_config(PCIDevice *dev, uint32_t addr,
     }
 }
 
+static void cxl_rp_aer_vector_update(PCIDevice *d)
+{
+    PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(d);
+
+    if (rpc->aer_vector) {
+        pcie_aer_root_set_vector(d, rpc->aer_vector(d));
+    }
+}
+
 static void cxl_rp_write_config(PCIDevice *d, uint32_t address, uint32_t val,
                                 int len)
 {
     uint16_t slt_ctl, slt_sta;
+    uint32_t root_cmd =
+        pci_get_long(d->config + d->exp.aer_cap + PCI_ERR_ROOT_COMMAND);
 
     pcie_cap_slot_get(d, &slt_ctl, &slt_sta);
     pci_bridge_write_config(d, address, val, len);
+    cxl_rp_aer_vector_update(d);
     pcie_cap_flr_write_config(d, address, val, len);
     pcie_cap_slot_write_config(d, slt_ctl, slt_sta, address, val, len);
     pcie_aer_write_config(d, address, val, len);
+    pcie_aer_root_write_config(d, address, val, len, root_cmd);
 
     cxl_rp_dvsec_write_config(d, address, val, len);
 }
@@ -217,6 +278,9 @@ static void cxl_root_port_class_init(ObjectClass *oc, void *data)
 
     rpc->aer_offset = GEN_PCIE_ROOT_PORT_AER_OFFSET;
     rpc->acs_offset = GEN_PCIE_ROOT_PORT_ACS_OFFSET;
+    rpc->aer_vector = cxl_rp_aer_vector;
+    rpc->interrupts_init = cxl_rp_interrupts_init;
+    rpc->interrupts_uninit = cxl_rp_interrupts_uninit;
 
     dc->hotpluggable = false;
 }
diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c
index e752a21292..ead33f0c05 100644
--- a/hw/pci-bridge/pci_expander_bridge.c
+++ b/hw/pci-bridge/pci_expander_bridge.c
@@ -15,6 +15,7 @@
 #include "hw/pci/pci.h"
 #include "hw/pci/pci_bus.h"
 #include "hw/pci/pci_host.h"
+#include "hw/pci/pcie_port.h"
 #include "hw/qdev-properties.h"
 #include "hw/pci/pci_bridge.h"
 #include "hw/pci-bridge/pci_expander_bridge.h"
@@ -79,6 +80,13 @@ CXLComponentState *cxl_get_hb_cstate(PCIHostState *hb)
     return &host->cxl_cstate;
 }
 
+bool cxl_get_hb_passthrough(PCIHostState *hb)
+{
+    CXLHost *host = PXB_CXL_HOST(hb);
+
+    return host->passthrough;
+}
+
 static int pxb_bus_num(PCIBus *bus)
 {
     PXBDev *pxb = convert_to_pxb(bus->parent_dev);
@@ -289,15 +297,32 @@ static int pxb_map_irq_fn(PCIDevice *pci_dev, int pin)
     return pin - PCI_SLOT(pxb->devfn);
 }
 
-static void pxb_dev_reset(DeviceState *dev)
+static void pxb_cxl_dev_reset(DeviceState *dev)
 {
     CXLHost *cxl = PXB_CXL_DEV(dev)->cxl.cxl_host_bridge;
     CXLComponentState *cxl_cstate = &cxl->cxl_cstate;
+    PCIHostState *hb = PCI_HOST_BRIDGE(cxl);
     uint32_t *reg_state = cxl_cstate->crb.cache_mem_registers;
     uint32_t *write_msk = cxl_cstate->crb.cache_mem_regs_write_mask;
+    int dsp_count = 0;
 
     cxl_component_register_init_common(reg_state, write_msk, CXL2_ROOT_PORT);
-    ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, TARGET_COUNT, 8);
+    /*
+     * The CXL specification allows for host bridges with no HDM decoders
+     * if they only have a single root port.
+     */
+    if (!PXB_DEV(dev)->hdm_for_passthrough) {
+        dsp_count = pcie_count_ds_ports(hb->bus);
+    }
+    /* Initial reset will have 0 dsp so wait until > 0 */
+    if (dsp_count == 1) {
+        cxl->passthrough = true;
+        /* Set Capability ID in header to NONE */
+        ARRAY_FIELD_DP32(reg_state, CXL_HDM_CAPABILITY_HEADER, ID, 0);
+    } else {
+        ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, TARGET_COUNT,
+                         8);
+    }
 }
 
 static gint pxb_compare(gconstpointer a, gconstpointer b)
@@ -481,9 +506,18 @@ static void pxb_cxl_dev_realize(PCIDevice *dev, Error **errp)
     }
 
     pxb_dev_realize_common(dev, CXL, errp);
-    pxb_dev_reset(DEVICE(dev));
+    pxb_cxl_dev_reset(DEVICE(dev));
 }
 
+static Property pxb_cxl_dev_properties[] = {
+    /* Note: 0 is not a legal PXB bus number. */
+    DEFINE_PROP_UINT8("bus_nr", PXBDev, bus_nr, 0),
+    DEFINE_PROP_UINT16("numa_node", PXBDev, numa_node, NUMA_NODE_UNASSIGNED),
+    DEFINE_PROP_BOOL("bypass_iommu", PXBDev, bypass_iommu, false),
+    DEFINE_PROP_BOOL("hdm_for_passthrough", PXBDev, hdm_for_passthrough, false),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static void pxb_cxl_dev_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc   = DEVICE_CLASS(klass);
@@ -497,12 +531,12 @@ static void pxb_cxl_dev_class_init(ObjectClass *klass, void *data)
      */
 
     dc->desc = "CXL Host Bridge";
-    device_class_set_props(dc, pxb_dev_properties);
+    device_class_set_props(dc, pxb_cxl_dev_properties);
     set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
 
     /* Host bridges aren't hotpluggable. FIXME: spec reference */
     dc->hotpluggable = false;
-    dc->reset = pxb_dev_reset;
+    dc->reset = pxb_cxl_dev_reset;
 }
 
 static const TypeInfo pxb_cxl_dev_info = {
diff --git a/hw/pci/pci-internal.h b/hw/pci/pci-internal.h
index 2ea356bdf5..a7d6d8a732 100644
--- a/hw/pci/pci-internal.h
+++ b/hw/pci/pci-internal.h
@@ -20,6 +20,5 @@ void pcibus_dev_print(Monitor *mon, DeviceState *dev, int indent);
 
 int pcie_aer_parse_error_string(const char *error_name,
                                 uint32_t *status, bool *correctable);
-int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err);
 
 #endif
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 034fe49e9a..def5000e7b 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -95,6 +95,21 @@ static const VMStateDescription vmstate_pcibus = {
     }
 };
 
+static gint g_cmp_uint32(gconstpointer a, gconstpointer b, gpointer user_data)
+{
+    return a - b;
+}
+
+static GSequence *pci_acpi_index_list(void)
+{
+    static GSequence *used_acpi_index_list;
+
+    if (!used_acpi_index_list) {
+        used_acpi_index_list = g_sequence_new(NULL);
+    }
+    return used_acpi_index_list;
+}
+
 static void pci_init_bus_master(PCIDevice *pci_dev)
 {
     AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
@@ -1246,6 +1261,17 @@ static void pci_qdev_unrealize(DeviceState *dev)
     do_pci_unregister_device(pci_dev);
 
     pci_dev->msi_trigger = NULL;
+
+    /*
+     * clean up acpi-index so it could reused by another device
+     */
+    if (pci_dev->acpi_index) {
+        GSequence *used_indexes = pci_acpi_index_list();
+
+        g_sequence_remove(g_sequence_lookup(used_indexes,
+                          GINT_TO_POINTER(pci_dev->acpi_index),
+                          g_cmp_uint32, NULL));
+    }
 }
 
 void pci_register_bar(PCIDevice *pci_dev, int region_num,
@@ -2005,6 +2031,8 @@ PCIDevice *pci_find_device(PCIBus *bus, int bus_num, uint8_t devfn)
     return bus->devices[devfn];
 }
 
+#define ONBOARD_INDEX_MAX (16 * 1024 - 1)
+
 static void pci_qdev_realize(DeviceState *qdev, Error **errp)
 {
     PCIDevice *pci_dev = (PCIDevice *)qdev;
@@ -2014,6 +2042,35 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
     bool is_default_rom;
     uint16_t class_id;
 
+    /*
+     * capped by systemd (see: udev-builtin-net_id.c)
+     * as it's the only known user honor it to avoid users
+     * misconfigure QEMU and then wonder why acpi-index doesn't work
+     */
+    if (pci_dev->acpi_index > ONBOARD_INDEX_MAX) {
+        error_setg(errp, "acpi-index should be less or equal to %u",
+                   ONBOARD_INDEX_MAX);
+        return;
+    }
+
+    /*
+     * make sure that acpi-index is unique across all present PCI devices
+     */
+    if (pci_dev->acpi_index) {
+        GSequence *used_indexes = pci_acpi_index_list();
+
+        if (g_sequence_lookup(used_indexes,
+                              GINT_TO_POINTER(pci_dev->acpi_index),
+                              g_cmp_uint32, NULL)) {
+            error_setg(errp, "a PCI device with acpi-index = %" PRIu32
+                       " already exist", pci_dev->acpi_index);
+            return;
+        }
+        g_sequence_insert_sorted(used_indexes,
+                                 GINT_TO_POINTER(pci_dev->acpi_index),
+                                 g_cmp_uint32, NULL);
+    }
+
     if (pci_dev->romsize != -1 && !is_power_of_2(pci_dev->romsize)) {
         error_setg(errp, "ROM size %u is not a power of two", pci_dev->romsize);
         return;
diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
index 9a19be44ae..103667c368 100644
--- a/hw/pci/pcie_aer.c
+++ b/hw/pci/pcie_aer.c
@@ -112,6 +112,10 @@ int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, uint16_t offset,
 
     pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS,
                  PCI_ERR_UNC_SUPPORTED);
+    pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK,
+                 PCI_ERR_UNC_MASK_DEFAULT);
+    pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK,
+                 PCI_ERR_UNC_SUPPORTED);
 
     pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER,
                  PCI_ERR_UNC_SEVERITY_DEFAULT);
@@ -188,8 +192,16 @@ static void pcie_aer_update_uncor_status(PCIDevice *dev)
 static bool
 pcie_aer_msg_alldev(PCIDevice *dev, const PCIEAERMsg *msg)
 {
+    uint16_t devctl = pci_get_word(dev->config + dev->exp.exp_cap +
+                                   PCI_EXP_DEVCTL);
     if (!(pcie_aer_msg_is_uncor(msg) &&
-          (pci_get_word(dev->config + PCI_COMMAND) & PCI_COMMAND_SERR))) {
+          (pci_get_word(dev->config + PCI_COMMAND) & PCI_COMMAND_SERR)) &&
+        !((msg->severity == PCI_ERR_ROOT_CMD_NONFATAL_EN) &&
+          (devctl & PCI_EXP_DEVCTL_NFERE)) &&
+        !((msg->severity == PCI_ERR_ROOT_CMD_COR_EN) &&
+          (devctl & PCI_EXP_DEVCTL_CERE)) &&
+        !((msg->severity == PCI_ERR_ROOT_CMD_FATAL_EN) &&
+          (devctl & PCI_EXP_DEVCTL_FERE))) {
         return false;
     }
 
diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c
index 65a397ad23..20ff2b39e8 100644
--- a/hw/pci/pcie_port.c
+++ b/hw/pci/pcie_port.c
@@ -161,6 +161,51 @@ PCIDevice *pcie_find_port_by_pn(PCIBus *bus, uint8_t pn)
     return NULL;
 }
 
+/* Find first port in devfn number order */
+PCIDevice *pcie_find_port_first(PCIBus *bus)
+{
+    int devfn;
+
+    for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
+        PCIDevice *d = bus->devices[devfn];
+
+        if (!d || !pci_is_express(d) || !d->exp.exp_cap) {
+            continue;
+        }
+
+        if (object_dynamic_cast(OBJECT(d), TYPE_PCIE_PORT)) {
+            return d;
+        }
+    }
+
+    return NULL;
+}
+
+int pcie_count_ds_ports(PCIBus *bus)
+{
+    int dsp_count = 0;
+    int devfn;
+
+    for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
+        PCIDevice *d = bus->devices[devfn];
+
+        if (!d || !pci_is_express(d) || !d->exp.exp_cap) {
+            continue;
+        }
+        if (object_dynamic_cast(OBJECT(d), TYPE_PCIE_PORT)) {
+            dsp_count++;
+        }
+    }
+    return dsp_count;
+}
+
+static bool pcie_slot_is_hotpluggbale_bus(HotplugHandler *plug_handler,
+                                          BusState *bus)
+{
+    PCIESlot *s = PCIE_SLOT(bus->parent);
+    return s->hotplug;
+}
+
 static const TypeInfo pcie_port_type_info = {
     .name = TYPE_PCIE_PORT,
     .parent = TYPE_PCI_BRIDGE,
@@ -188,6 +233,7 @@ static void pcie_slot_class_init(ObjectClass *oc, void *data)
     hc->plug = pcie_cap_slot_plug_cb;
     hc->unplug = pcie_cap_slot_unplug_cb;
     hc->unplug_request = pcie_cap_slot_unplug_request_cb;
+    hc->is_hotpluggable_bus = pcie_slot_is_hotpluggbale_bus;
 }
 
 static const TypeInfo pcie_slot_type_info = {
diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index a87c5f39a2..8f8d05cf9b 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -50,6 +50,7 @@ vhost_vdpa_set_vring_ready(void *dev) "dev: %p"
 vhost_vdpa_dump_config(void *dev, const char *line) "dev: %p %s"
 vhost_vdpa_set_config(void *dev, uint32_t offset, uint32_t size, uint32_t flags) "dev: %p offset: %"PRIu32" size: %"PRIu32" flags: 0x%"PRIx32
 vhost_vdpa_get_config(void *dev, void *config, uint32_t config_len) "dev: %p config: %p config_len: %"PRIu32
+vhost_vdpa_suspend(void *dev) "dev: %p"
 vhost_vdpa_dev_start(void *dev, bool started) "dev: %p started: %d"
 vhost_vdpa_set_log_base(void *dev, uint64_t base, unsigned long long size, int refcnt, int fd, void *log) "dev: %p base: 0x%"PRIx64" size: %llu refcnt: %d fd: %d log: %p"
 vhost_vdpa_set_vring_addr(void *dev, unsigned int index, unsigned int flags, uint64_t desc_user_addr, uint64_t used_user_addr, uint64_t avail_user_addr, uint64_t log_guest_addr) "dev: %p index: %u flags: 0x%x desc_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" log_guest_addr: 0x%"PRIx64
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
index 515ccf870d..8361e70d1b 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -694,13 +694,17 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
         g_autofree VirtQueueElement *elem = NULL;
         elem = g_steal_pointer(&svq->desc_state[i].elem);
         if (elem) {
-            virtqueue_detach_element(svq->vq, elem, 0);
+            /*
+             * TODO: This is ok for networking, but other kinds of devices
+             * might have problems with just unpop these.
+             */
+            virtqueue_unpop(svq->vq, elem, 0);
         }
     }
 
     next_avail_elem = g_steal_pointer(&svq->next_guest_avail_elem);
     if (next_avail_elem) {
-        virtqueue_detach_element(svq->vq, next_avail_elem, 0);
+        virtqueue_unpop(svq->vq, next_avail_elem, 0);
     }
     svq->vq = NULL;
     g_free(svq->desc_next);
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 8968541514..e5285df4ba 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -2031,8 +2031,8 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque,
         } else {
             if (virtio_has_feature(protocol_features,
                                    VHOST_USER_PROTOCOL_F_CONFIG)) {
-                warn_reportf_err(*errp, "vhost-user backend supports "
-                                 "VHOST_USER_PROTOCOL_F_CONFIG but QEMU does not.");
+                warn_report("vhost-user backend supports "
+                            "VHOST_USER_PROTOCOL_F_CONFIG but QEMU does not.");
                 protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
             }
         }
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index df3a1e92ac..bc6bad23d5 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -431,16 +431,6 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
     trace_vhost_vdpa_init(dev, opaque);
     int ret;
 
-    /*
-     * Similar to VFIO, we end up pinning all guest memory and have to
-     * disable discarding of RAM.
-     */
-    ret = ram_block_discard_disable(true);
-    if (ret) {
-        error_report("Cannot set discarding of RAM broken");
-        return ret;
-    }
-
     v = opaque;
     v->dev = dev;
     dev->opaque =  opaque ;
@@ -448,10 +438,36 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
     v->msg_type = VHOST_IOTLB_MSG_V2;
     vhost_vdpa_init_svq(dev, v);
 
+    error_propagate(&dev->migration_blocker, v->migration_blocker);
     if (!vhost_vdpa_first_dev(dev)) {
         return 0;
     }
 
+    /*
+     * If dev->shadow_vqs_enabled at initialization that means the device has
+     * been started with x-svq=on, so don't block migration
+     */
+    if (dev->migration_blocker == NULL && !v->shadow_vqs_enabled) {
+        /* We don't have dev->features yet */
+        uint64_t features;
+        ret = vhost_vdpa_get_dev_features(dev, &features);
+        if (unlikely(ret)) {
+            error_setg_errno(errp, -ret, "Could not get device features");
+            return ret;
+        }
+        vhost_svq_valid_features(features, &dev->migration_blocker);
+    }
+
+    /*
+     * Similar to VFIO, we end up pinning all guest memory and have to
+     * disable discarding of RAM.
+     */
+    ret = ram_block_discard_disable(true);
+    if (ret) {
+        error_report("Cannot set discarding of RAM broken");
+        return ret;
+    }
+
     vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
                                VIRTIO_CONFIG_S_DRIVER);
 
@@ -577,12 +593,15 @@ static int vhost_vdpa_cleanup(struct vhost_dev *dev)
     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
     v = dev->opaque;
     trace_vhost_vdpa_cleanup(dev, v);
+    if (vhost_vdpa_first_dev(dev)) {
+        ram_block_discard_disable(false);
+    }
+
     vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
     memory_listener_unregister(&v->listener);
     vhost_vdpa_svq_cleanup(dev);
 
     dev->opaque = NULL;
-    ram_block_discard_disable(false);
 
     return 0;
 }
@@ -659,7 +678,8 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
     uint64_t features;
     uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 |
         0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH |
-        0x1ULL << VHOST_BACKEND_F_IOTLB_ASID;
+        0x1ULL << VHOST_BACKEND_F_IOTLB_ASID |
+        0x1ULL << VHOST_BACKEND_F_SUSPEND;
     int r;
 
     if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) {
@@ -691,11 +711,13 @@ static int vhost_vdpa_get_device_id(struct vhost_dev *dev,
 
 static int vhost_vdpa_reset_device(struct vhost_dev *dev)
 {
+    struct vhost_vdpa *v = dev->opaque;
     int ret;
     uint8_t status = 0;
 
     ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status);
     trace_vhost_vdpa_reset_device(dev, status);
+    v->suspended = false;
     return ret;
 }
 
@@ -1094,6 +1116,29 @@ static void vhost_vdpa_svqs_stop(struct vhost_dev *dev)
     }
 }
 
+static void vhost_vdpa_suspend(struct vhost_dev *dev)
+{
+    struct vhost_vdpa *v = dev->opaque;
+    int r;
+
+    if (!vhost_vdpa_first_dev(dev)) {
+        return;
+    }
+
+    if (dev->backend_cap & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) {
+        trace_vhost_vdpa_suspend(dev);
+        r = ioctl(v->device_fd, VHOST_VDPA_SUSPEND);
+        if (unlikely(r)) {
+            error_report("Cannot suspend: %s(%d)", g_strerror(errno), errno);
+        } else {
+            v->suspended = true;
+            return;
+        }
+    }
+
+    vhost_vdpa_reset_device(dev);
+}
+
 static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
 {
     struct vhost_vdpa *v = dev->opaque;
@@ -1108,6 +1153,7 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
         }
         vhost_vdpa_set_vring_ready(dev);
     } else {
+        vhost_vdpa_suspend(dev);
         vhost_vdpa_svqs_stop(dev);
         vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
     }
@@ -1119,14 +1165,23 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
     if (started) {
         memory_listener_register(&v->listener, &address_space_memory);
         return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
-    } else {
-        vhost_vdpa_reset_device(dev);
-        vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
-                                   VIRTIO_CONFIG_S_DRIVER);
-        memory_listener_unregister(&v->listener);
+    }
 
-        return 0;
+    return 0;
+}
+
+static void vhost_vdpa_reset_status(struct vhost_dev *dev)
+{
+    struct vhost_vdpa *v = dev->opaque;
+
+    if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
+        return;
     }
+
+    vhost_vdpa_reset_device(dev);
+    vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
+                               VIRTIO_CONFIG_S_DRIVER);
+    memory_listener_unregister(&v->listener);
 }
 
 static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
@@ -1169,18 +1224,7 @@ static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
                                        struct vhost_vring_state *ring)
 {
     struct vhost_vdpa *v = dev->opaque;
-    VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index);
 
-    /*
-     * vhost-vdpa devices does not support in-flight requests. Set all of them
-     * as available.
-     *
-     * TODO: This is ok for networking, but other kinds of devices might
-     * have problems with these retransmissions.
-     */
-    while (virtqueue_rewind(vq, 1)) {
-        continue;
-    }
     if (v->shadow_vqs_enabled) {
         /*
          * Device vring base was set at device start. SVQ base is handled by
@@ -1203,6 +1247,14 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
         return 0;
     }
 
+    if (!v->suspended) {
+        /*
+         * Cannot trust in value returned by device, let vhost recover used
+         * idx from guest.
+         */
+        return -1;
+    }
+
     ret = vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring);
     trace_vhost_vdpa_get_vring_base(dev, ring->index, ring->num);
     return ret;
@@ -1227,25 +1279,24 @@ static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
                                        struct vhost_vring_file *file)
 {
     struct vhost_vdpa *v = dev->opaque;
+    int vdpa_idx = file->index - dev->vq_index;
+    VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);
 
+    /* Remember last call fd because we can switch to SVQ anytime. */
+    vhost_svq_set_svq_call_fd(svq, file->fd);
     if (v->shadow_vqs_enabled) {
-        int vdpa_idx = file->index - dev->vq_index;
-        VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);
-
-        vhost_svq_set_svq_call_fd(svq, file->fd);
         return 0;
-    } else {
-        return vhost_vdpa_set_vring_dev_call(dev, file);
     }
+
+    return vhost_vdpa_set_vring_dev_call(dev, file);
 }
 
 static int vhost_vdpa_get_features(struct vhost_dev *dev,
                                      uint64_t *features)
 {
-    struct vhost_vdpa *v = dev->opaque;
     int ret = vhost_vdpa_get_dev_features(dev, features);
 
-    if (ret == 0 && v->shadow_vqs_enabled) {
+    if (ret == 0) {
         /* Add SVQ logging capabilities */
         *features |= BIT_ULL(VHOST_F_LOG_ALL);
     }
@@ -1313,4 +1364,5 @@ const VhostOps vdpa_ops = {
         .vhost_vq_get_addr = vhost_vdpa_vq_get_addr,
         .vhost_force_iommu = vhost_vdpa_force_iommu,
         .vhost_set_config_call = vhost_vdpa_set_config_call,
+        .vhost_reset_status = vhost_vdpa_reset_status,
 };
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index eb8c4c378c..a266396576 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -2049,6 +2049,9 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
                              hdev->vqs + i,
                              hdev->vq_index + i);
     }
+    if (hdev->vhost_ops->vhost_reset_status) {
+        hdev->vhost_ops->vhost_reset_status(hdev);
+    }
 
     if (vhost_dev_has_iommu(hdev)) {
         if (hdev->vhost_ops->vhost_set_iotlb_callback) {
diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c
index 516425e26a..802e1b9659 100644
--- a/hw/virtio/virtio-crypto.c
+++ b/hw/virtio/virtio-crypto.c
@@ -462,7 +462,7 @@ static void virtio_crypto_init_request(VirtIOCrypto *vcrypto, VirtQueue *vq,
     req->in_iov = NULL;
     req->in_num = 0;
     req->in_len = 0;
-    req->flags = CRYPTODEV_BACKEND_ALG__MAX;
+    req->flags = QCRYPTODEV_BACKEND_ALG__MAX;
     memset(&req->op_info, 0x00, sizeof(req->op_info));
 }
 
@@ -472,7 +472,7 @@ static void virtio_crypto_free_request(VirtIOCryptoReq *req)
         return;
     }
 
-    if (req->flags == CRYPTODEV_BACKEND_ALG_SYM) {
+    if (req->flags == QCRYPTODEV_BACKEND_ALG_SYM) {
         size_t max_len;
         CryptoDevBackendSymOpInfo *op_info = req->op_info.u.sym_op_info;
 
@@ -485,7 +485,7 @@ static void virtio_crypto_free_request(VirtIOCryptoReq *req)
         /* Zeroize and free request data structure */
         memset(op_info, 0, sizeof(*op_info) + max_len);
         g_free(op_info);
-    } else if (req->flags == CRYPTODEV_BACKEND_ALG_ASYM) {
+    } else if (req->flags == QCRYPTODEV_BACKEND_ALG_ASYM) {
         CryptoDevBackendAsymOpInfo *op_info = req->op_info.u.asym_op_info;
         if (op_info) {
             g_free(op_info->src);
@@ -570,10 +570,10 @@ static void virtio_crypto_req_complete(void *opaque, int ret)
     VirtIODevice *vdev = VIRTIO_DEVICE(vcrypto);
     uint8_t status = -ret;
 
-    if (req->flags == CRYPTODEV_BACKEND_ALG_SYM) {
+    if (req->flags == QCRYPTODEV_BACKEND_ALG_SYM) {
         virtio_crypto_sym_input_data_helper(vdev, req, status,
                                             req->op_info.u.sym_op_info);
-    } else if (req->flags == CRYPTODEV_BACKEND_ALG_ASYM) {
+    } else if (req->flags == QCRYPTODEV_BACKEND_ALG_ASYM) {
         virtio_crypto_akcipher_input_data_helper(vdev, req, status,
                                              req->op_info.u.asym_op_info);
     }
@@ -871,11 +871,14 @@ virtio_crypto_handle_request(VirtIOCryptoReq *request)
     opcode = ldl_le_p(&req.header.opcode);
     op_info->session_id = ldq_le_p(&req.header.session_id);
     op_info->op_code = opcode;
+    op_info->queue_index = queue_index;
+    op_info->cb = virtio_crypto_req_complete;
+    op_info->opaque = request;
 
     switch (opcode) {
     case VIRTIO_CRYPTO_CIPHER_ENCRYPT:
     case VIRTIO_CRYPTO_CIPHER_DECRYPT:
-        op_info->algtype = request->flags = CRYPTODEV_BACKEND_ALG_SYM;
+        op_info->algtype = request->flags = QCRYPTODEV_BACKEND_ALG_SYM;
         ret = virtio_crypto_handle_sym_req(vcrypto,
                          &req.u.sym_req, op_info,
                          out_iov, out_num);
@@ -885,7 +888,7 @@ virtio_crypto_handle_request(VirtIOCryptoReq *request)
     case VIRTIO_CRYPTO_AKCIPHER_DECRYPT:
     case VIRTIO_CRYPTO_AKCIPHER_SIGN:
     case VIRTIO_CRYPTO_AKCIPHER_VERIFY:
-        op_info->algtype = request->flags = CRYPTODEV_BACKEND_ALG_ASYM;
+        op_info->algtype = request->flags = QCRYPTODEV_BACKEND_ALG_ASYM;
         ret = virtio_crypto_handle_asym_req(vcrypto,
                          &req.u.akcipher_req, op_info,
                          out_iov, out_num);
@@ -898,9 +901,7 @@ check_result:
             virtio_crypto_req_complete(request, -VIRTIO_CRYPTO_NOTSUPP);
         } else {
             ret = cryptodev_backend_crypto_operation(vcrypto->cryptodev,
-                                    request, queue_index,
-                                    virtio_crypto_req_complete,
-                                    request);
+                                    op_info);
             if (ret < 0) {
                 virtio_crypto_req_complete(request, ret);
             }
@@ -997,12 +998,35 @@ static void virtio_crypto_reset(VirtIODevice *vdev)
     }
 }
 
+static uint32_t virtio_crypto_init_services(uint32_t qservices)
+{
+    uint32_t vservices = 0;
+
+    if (qservices & (1 << QCRYPTODEV_BACKEND_SERVICE_CIPHER)) {
+        vservices |= (1 << VIRTIO_CRYPTO_SERVICE_CIPHER);
+    }
+    if (qservices & (1 << QCRYPTODEV_BACKEND_SERVICE_HASH)) {
+        vservices |= (1 << VIRTIO_CRYPTO_SERVICE_HASH);
+    }
+    if (qservices & (1 << QCRYPTODEV_BACKEND_SERVICE_MAC)) {
+        vservices |= (1 << VIRTIO_CRYPTO_SERVICE_MAC);
+    }
+    if (qservices & (1 << QCRYPTODEV_BACKEND_SERVICE_AEAD)) {
+        vservices |= (1 << VIRTIO_CRYPTO_SERVICE_AEAD);
+    }
+    if (qservices & (1 << QCRYPTODEV_BACKEND_SERVICE_AKCIPHER)) {
+        vservices |= (1 << VIRTIO_CRYPTO_SERVICE_AKCIPHER);
+    }
+
+    return vservices;
+}
+
 static void virtio_crypto_init_config(VirtIODevice *vdev)
 {
     VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev);
 
-    vcrypto->conf.crypto_services =
-                     vcrypto->conf.cryptodev->conf.crypto_services;
+    vcrypto->conf.crypto_services = virtio_crypto_init_services(
+                     vcrypto->conf.cryptodev->conf.crypto_services);
     vcrypto->conf.cipher_algo_l =
                      vcrypto->conf.cryptodev->conf.cipher_algo_l;
     vcrypto->conf.cipher_algo_h =
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index f35178f5fc..98c4819fcc 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -1069,7 +1069,7 @@ static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
                             VRingMemoryRegionCaches *caches)
 {
     VirtIODevice *vdev = vq->vdev;
-    unsigned int max, idx;
+    unsigned int idx;
     unsigned int total_bufs, in_total, out_total;
     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
     int64_t len = 0;
@@ -1078,13 +1078,12 @@ static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
     idx = vq->last_avail_idx;
     total_bufs = in_total = out_total = 0;
 
-    max = vq->vring.num;
-
     while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
         MemoryRegionCache *desc_cache = &caches->desc;
         unsigned int num_bufs;
         VRingDesc desc;
         unsigned int i;
+        unsigned int max = vq->vring.num;
 
         num_bufs = total_bufs;
 
@@ -1206,7 +1205,7 @@ static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
                                              VRingMemoryRegionCaches *caches)
 {
     VirtIODevice *vdev = vq->vdev;
-    unsigned int max, idx;
+    unsigned int idx;
     unsigned int total_bufs, in_total, out_total;
     MemoryRegionCache *desc_cache;
     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
@@ -1218,14 +1217,14 @@ static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
     wrap_counter = vq->last_avail_wrap_counter;
     total_bufs = in_total = out_total = 0;
 
-    max = vq->vring.num;
-
     for (;;) {
         unsigned int num_bufs = total_bufs;
         unsigned int i = idx;
         int rc;
+        unsigned int max = vq->vring.num;
 
         desc_cache = &caches->desc;
+
         vring_packed_desc_read(vdev, &desc, desc_cache, idx, true);
         if (!is_desc_avail(desc.flags, wrap_counter)) {
             break;
diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h
index 57a542c4b8..2faf7f0cae 100644
--- a/include/hw/acpi/ich9.h
+++ b/include/hw/acpi/ich9.h
@@ -87,6 +87,7 @@ void ich9_pm_device_unplug_request_cb(HotplugHandler *hotplug_dev,
                                       DeviceState *dev, Error **errp);
 void ich9_pm_device_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
                               Error **errp);
+bool ich9_pm_is_hotpluggable_bus(HotplugHandler *hotplug_dev, BusState *bus);
 
 void ich9_pm_ospm_status(AcpiDeviceIf *adev, ACPIOSTInfoList ***list);
 #endif /* HW_ACPI_ICH9_H */
diff --git a/include/hw/acpi/pcihp.h b/include/hw/acpi/pcihp.h
index 7e268c2c9c..ef59810c17 100644
--- a/include/hw/acpi/pcihp.h
+++ b/include/hw/acpi/pcihp.h
@@ -49,15 +49,16 @@ typedef struct AcpiPciHpState {
     uint32_t acpi_index;
     PCIBus *root;
     MemoryRegion io;
-    bool legacy_piix;
     uint16_t io_base;
     uint16_t io_len;
+    bool use_acpi_hotplug_bridge;
+    bool use_acpi_root_pci_hotplug;
 } AcpiPciHpState;
 
 void acpi_pcihp_init(Object *owner, AcpiPciHpState *, PCIBus *root,
-                     MemoryRegion *address_space_io, bool bridges_enabled,
-                     uint16_t io_base);
+                     MemoryRegion *address_space_io, uint16_t io_base);
 
+bool acpi_pcihp_is_hotpluggbale_bus(AcpiPciHpState *s, BusState *bus);
 void acpi_pcihp_device_pre_plug_cb(HotplugHandler *hotplug_dev,
                                    DeviceState *dev, Error **errp);
 void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s,
@@ -69,7 +70,9 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev,
                                          Error **errp);
 
 /* Called on reset */
-void acpi_pcihp_reset(AcpiPciHpState *s, bool acpihp_root_off);
+void acpi_pcihp_reset(AcpiPciHpState *s);
+
+void build_append_pcihp_slots(Aml *parent_scope, PCIBus *bus);
 
 extern const VMStateDescription vmstate_acpi_pcihp_pci_status;
 
diff --git a/include/hw/acpi/piix4.h b/include/hw/acpi/piix4.h
index be1f8ea80e..eb1c122d80 100644
--- a/include/hw/acpi/piix4.h
+++ b/include/hw/acpi/piix4.h
@@ -57,8 +57,6 @@ struct PIIX4PMState {
     Notifier powerdown_notifier;
 
     AcpiPciHpState acpi_pci_hotplug;
-    bool use_acpi_hotplug_bridge;
-    bool use_acpi_root_pci_hotplug;
     bool not_migrate_acpi_index;
 
     uint8_t disable_s3;
diff --git a/include/hw/cxl/cxl.h b/include/hw/cxl/cxl.h
index b161be59b7..b2cffbb364 100644
--- a/include/hw/cxl/cxl.h
+++ b/include/hw/cxl/cxl.h
@@ -49,6 +49,7 @@ struct CXLHost {
     PCIHostState parent_obj;
 
     CXLComponentState cxl_cstate;
+    bool passthrough;
 };
 
 #define TYPE_PXB_CXL_HOST "pxb-cxl-host"
diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h
index 692d7a5507..42c7e581a7 100644
--- a/include/hw/cxl/cxl_component.h
+++ b/include/hw/cxl/cxl_component.h
@@ -65,11 +65,37 @@ CXLx_CAPABILITY_HEADER(SNOOP, 0x14)
 #define CXL_RAS_REGISTERS_OFFSET 0x80
 #define CXL_RAS_REGISTERS_SIZE   0x58
 REG32(CXL_RAS_UNC_ERR_STATUS, CXL_RAS_REGISTERS_OFFSET)
+#define CXL_RAS_UNC_ERR_CACHE_DATA_PARITY 0
+#define CXL_RAS_UNC_ERR_CACHE_ADDRESS_PARITY 1
+#define CXL_RAS_UNC_ERR_CACHE_BE_PARITY 2
+#define CXL_RAS_UNC_ERR_CACHE_DATA_ECC 3
+#define CXL_RAS_UNC_ERR_MEM_DATA_PARITY 4
+#define CXL_RAS_UNC_ERR_MEM_ADDRESS_PARITY 5
+#define CXL_RAS_UNC_ERR_MEM_BE_PARITY 6
+#define CXL_RAS_UNC_ERR_MEM_DATA_ECC 7
+#define CXL_RAS_UNC_ERR_REINIT_THRESHOLD 8
+#define CXL_RAS_UNC_ERR_RSVD_ENCODING 9
+#define CXL_RAS_UNC_ERR_POISON_RECEIVED 10
+#define CXL_RAS_UNC_ERR_RECEIVER_OVERFLOW 11
+#define CXL_RAS_UNC_ERR_INTERNAL 14
+#define CXL_RAS_UNC_ERR_CXL_IDE_TX 15
+#define CXL_RAS_UNC_ERR_CXL_IDE_RX 16
+#define CXL_RAS_UNC_ERR_CXL_UNUSED 63 /* Magic value */
 REG32(CXL_RAS_UNC_ERR_MASK, CXL_RAS_REGISTERS_OFFSET + 0x4)
 REG32(CXL_RAS_UNC_ERR_SEVERITY, CXL_RAS_REGISTERS_OFFSET + 0x8)
 REG32(CXL_RAS_COR_ERR_STATUS, CXL_RAS_REGISTERS_OFFSET + 0xc)
+#define CXL_RAS_COR_ERR_CACHE_DATA_ECC 0
+#define CXL_RAS_COR_ERR_MEM_DATA_ECC 1
+#define CXL_RAS_COR_ERR_CRC_THRESHOLD 2
+#define CXL_RAS_COR_ERR_RETRY_THRESHOLD 3
+#define CXL_RAS_COR_ERR_CACHE_POISON_RECEIVED 4
+#define CXL_RAS_COR_ERR_MEM_POISON_RECEIVED 5
+#define CXL_RAS_COR_ERR_PHYSICAL 6
 REG32(CXL_RAS_COR_ERR_MASK, CXL_RAS_REGISTERS_OFFSET + 0x10)
 REG32(CXL_RAS_ERR_CAP_CTRL, CXL_RAS_REGISTERS_OFFSET + 0x14)
+    FIELD(CXL_RAS_ERR_CAP_CTRL, FIRST_ERROR_POINTER, 0, 6)
+REG32(CXL_RAS_ERR_HEADER0, CXL_RAS_REGISTERS_OFFSET + 0x18)
+#define CXL_RAS_ERR_HEADER_NUM 32
 /* Offset 0x18 - 0x58 reserved for RAS logs */
 
 /* 8.2.5.10 - CXL Security Capability Structure */
@@ -221,6 +247,7 @@ static inline hwaddr cxl_decode_ig(int ig)
 }
 
 CXLComponentState *cxl_get_hb_cstate(PCIHostState *hb);
+bool cxl_get_hb_passthrough(PCIHostState *hb);
 
 void cxl_doe_cdat_init(CXLComponentState *cxl_cstate, Error **errp);
 void cxl_doe_cdat_release(CXLComponentState *cxl_cstate);
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 7e5ad65c1d..d589f78202 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -232,6 +232,14 @@ REG64(CXL_MEM_DEV_STS, 0)
     FIELD(CXL_MEM_DEV_STS, MBOX_READY, 4, 1)
     FIELD(CXL_MEM_DEV_STS, RESET_NEEDED, 5, 3)
 
+typedef struct CXLError {
+    QTAILQ_ENTRY(CXLError) node;
+    int type; /* Error code as per FE definition */
+    uint32_t header[32];
+} CXLError;
+
+typedef QTAILQ_HEAD(, CXLError) CXLErrorList;
+
 struct CXLType3Dev {
     /* Private */
     PCIDevice parent_obj;
@@ -248,6 +256,9 @@ struct CXLType3Dev {
 
     /* DOE */
     DOECap doe_cdat;
+
+    /* Error injection */
+    CXLErrorList error_list;
 };
 
 #define TYPE_CXL_TYPE3 "cxl-type3"
diff --git a/include/hw/hotplug.h b/include/hw/hotplug.h
index e15f59c8b3..a9840ed485 100644
--- a/include/hw/hotplug.h
+++ b/include/hw/hotplug.h
@@ -48,6 +48,7 @@ typedef void (*hotplug_fn)(HotplugHandler *plug_handler,
  * @unplug: unplug callback.
  *          Used for device removal with devices that implement
  *          asynchronous and synchronous (surprise) removal.
+ * @is_hotpluggable_bus: called to check if bus/its parent allow hotplug on bus
  */
 struct HotplugHandlerClass {
     /* <private> */
@@ -58,6 +59,7 @@ struct HotplugHandlerClass {
     hotplug_fn plug;
     hotplug_fn unplug_request;
     hotplug_fn unplug;
+    bool (*is_hotpluggable_bus)(HotplugHandler *plug_handler, BusState *bus);
 };
 
 /**
diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h
index 63a7521567..81a058bb2c 100644
--- a/include/hw/pci/pci_bridge.h
+++ b/include/hw/pci/pci_bridge.h
@@ -92,6 +92,7 @@ struct PXBDev {
     uint8_t bus_nr;
     uint16_t numa_node;
     bool bypass_iommu;
+    bool hdm_for_passthrough;
     struct cxl_dev {
         CXLHost *cxl_host_bridge; /* Pointer to a CXLHost */
     } cxl;
diff --git a/include/hw/pci/pcie_aer.h b/include/hw/pci/pcie_aer.h
index 65e71d98fe..1234fdc4e2 100644
--- a/include/hw/pci/pcie_aer.h
+++ b/include/hw/pci/pcie_aer.h
@@ -100,4 +100,5 @@ void pcie_aer_root_write_config(PCIDevice *dev,
                                 uint32_t addr, uint32_t val, int len,
                                 uint32_t root_cmd_prev);
 
+int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err);
 #endif /* QEMU_PCIE_AER_H */
diff --git a/include/hw/pci/pcie_port.h b/include/hw/pci/pcie_port.h
index 6c40e3733f..90e6cf45b8 100644
--- a/include/hw/pci/pcie_port.h
+++ b/include/hw/pci/pcie_port.h
@@ -41,6 +41,8 @@ struct PCIEPort {
 void pcie_port_init_reg(PCIDevice *d);
 
 PCIDevice *pcie_find_port_by_pn(PCIBus *bus, uint8_t pn);
+PCIDevice *pcie_find_port_first(PCIBus *bus);
+int pcie_count_ds_ports(PCIBus *bus);
 
 #define TYPE_PCIE_SLOT "pcie-slot"
 OBJECT_DECLARE_SIMPLE_TYPE(PCIESlot, PCIE_SLOT)
diff --git a/include/hw/pci/pcie_regs.h b/include/hw/pci/pcie_regs.h
index 1fe0bdd25b..4972106c42 100644
--- a/include/hw/pci/pcie_regs.h
+++ b/include/hw/pci/pcie_regs.h
@@ -141,6 +141,9 @@ typedef enum PCIExpLinkWidth {
                                          PCI_ERR_UNC_ATOP_EBLOCKED |    \
                                          PCI_ERR_UNC_TLP_PRF_BLOCKED)
 
+#define PCI_ERR_UNC_MASK_DEFAULT        (PCI_ERR_UNC_INTN | \
+                                         PCI_ERR_UNC_TLP_PRF_BLOCKED)
+
 #define PCI_ERR_UNC_SEVERITY_DEFAULT    (PCI_ERR_UNC_DLP |              \
                                          PCI_ERR_UNC_SDN |              \
                                          PCI_ERR_UNC_FCP |              \
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index f5b3b2f89a..bd50ad5ee1 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -812,7 +812,18 @@ void qbus_set_bus_hotplug_handler(BusState *bus);
 
 static inline bool qbus_is_hotpluggable(BusState *bus)
 {
-   return bus->hotplug_handler;
+    HotplugHandler *plug_handler = bus->hotplug_handler;
+    bool ret = !!plug_handler;
+
+    if (plug_handler) {
+        HotplugHandlerClass *hdc;
+
+        hdc = HOTPLUG_HANDLER_GET_CLASS(plug_handler);
+        if (hdc->is_hotpluggable_bus) {
+            ret = hdc->is_hotpluggable_bus(plug_handler, bus);
+        }
+    }
+    return ret;
 }
 
 /**
diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
index c5ab49051e..ec3fbae58d 100644
--- a/include/hw/virtio/vhost-backend.h
+++ b/include/hw/virtio/vhost-backend.h
@@ -130,6 +130,9 @@ typedef bool (*vhost_force_iommu_op)(struct vhost_dev *dev);
 
 typedef int (*vhost_set_config_call_op)(struct vhost_dev *dev,
                                        int fd);
+
+typedef void (*vhost_reset_status_op)(struct vhost_dev *dev);
+
 typedef struct VhostOps {
     VhostBackendType backend_type;
     vhost_backend_init vhost_backend_init;
@@ -177,6 +180,7 @@ typedef struct VhostOps {
     vhost_get_device_id_op vhost_get_device_id;
     vhost_force_iommu_op vhost_force_iommu;
     vhost_set_config_call_op vhost_set_config_call;
+    vhost_reset_status_op vhost_reset_status;
 } VhostOps;
 
 int vhost_backend_update_device_iotlb(struct vhost_dev *dev,
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index 7997f09a8d..c278a2a8de 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -42,12 +42,15 @@ typedef struct vhost_vdpa {
     bool shadow_vqs_enabled;
     /* Vdpa must send shadow addresses as IOTLB key for data queues, not GPA */
     bool shadow_data;
+    /* Device suspended successfully */
+    bool suspended;
     /* IOVA mapping used by the Shadow Virtqueue */
     VhostIOVATree *iova_tree;
     GPtrArray *shadow_vqs;
     const VhostShadowVirtqueueOps *shadow_vq_ops;
     void *shadow_vq_ops_opaque;
     struct vhost_dev *dev;
+    Error *migration_blocker;
     VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
 } VhostVDPA;
 
diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
index efae6b06bc..fdb69b7f9c 100644
--- a/include/monitor/hmp.h
+++ b/include/monitor/hmp.h
@@ -180,5 +180,6 @@ void hmp_ioport_read(Monitor *mon, const QDict *qdict);
 void hmp_ioport_write(Monitor *mon, const QDict *qdict);
 void hmp_boot_set(Monitor *mon, const QDict *qdict);
 void hmp_info_mtree(Monitor *mon, const QDict *qdict);
+void hmp_info_cryptodev(Monitor *mon, const QDict *qdict);
 
 #endif
diff --git a/include/sysemu/cryptodev.h b/include/sysemu/cryptodev.h
index cf9b3f07fe..bc021ce847 100644
--- a/include/sysemu/cryptodev.h
+++ b/include/sysemu/cryptodev.h
@@ -24,7 +24,9 @@
 #define CRYPTODEV_H
 
 #include "qemu/queue.h"
+#include "qemu/throttle.h"
 #include "qom/object.h"
+#include "qapi/qapi-types-cryptodev.h"
 
 /**
  * CryptoDevBackend:
@@ -48,12 +50,6 @@ typedef struct CryptoDevBackendPeers CryptoDevBackendPeers;
 typedef struct CryptoDevBackendClient
                      CryptoDevBackendClient;
 
-enum CryptoDevBackendAlgType {
-    CRYPTODEV_BACKEND_ALG_SYM,
-    CRYPTODEV_BACKEND_ALG_ASYM,
-    CRYPTODEV_BACKEND_ALG__MAX,
-};
-
 /**
  * CryptoDevBackendSymSessionInfo:
  *
@@ -179,17 +175,22 @@ typedef struct CryptoDevBackendAsymOpInfo {
     uint8_t *dst;
 } CryptoDevBackendAsymOpInfo;
 
+typedef void (*CryptoDevCompletionFunc) (void *opaque, int ret);
+
 typedef struct CryptoDevBackendOpInfo {
-    enum CryptoDevBackendAlgType algtype;
+    QCryptodevBackendAlgType algtype;
     uint32_t op_code;
+    uint32_t queue_index;
+    CryptoDevCompletionFunc cb;
+    void *opaque; /* argument for cb */
     uint64_t session_id;
     union {
         CryptoDevBackendSymOpInfo *sym_op_info;
         CryptoDevBackendAsymOpInfo *asym_op_info;
     } u;
+    QTAILQ_ENTRY(CryptoDevBackendOpInfo) next;
 } CryptoDevBackendOpInfo;
 
-typedef void (*CryptoDevCompletionFunc) (void *opaque, int ret);
 struct CryptoDevBackendClass {
     ObjectClass parent_class;
 
@@ -209,24 +210,11 @@ struct CryptoDevBackendClass {
                          void *opaque);
 
     int (*do_op)(CryptoDevBackend *backend,
-                 CryptoDevBackendOpInfo *op_info,
-                 uint32_t queue_index,
-                 CryptoDevCompletionFunc cb,
-                 void *opaque);
+                 CryptoDevBackendOpInfo *op_info);
 };
 
-typedef enum CryptoDevBackendOptionsType {
-    CRYPTODEV_BACKEND_TYPE_NONE = 0,
-    CRYPTODEV_BACKEND_TYPE_BUILTIN = 1,
-    CRYPTODEV_BACKEND_TYPE_VHOST_USER = 2,
-    CRYPTODEV_BACKEND_TYPE_LKCF = 3,
-    CRYPTODEV_BACKEND_TYPE__MAX,
-} CryptoDevBackendOptionsType;
-
 struct CryptoDevBackendClient {
-    CryptoDevBackendOptionsType type;
-    char *model;
-    char *name;
+    QCryptodevBackendType type;
     char *info_str;
     unsigned int queue_index;
     int vring_enable;
@@ -260,6 +248,24 @@ struct CryptoDevBackendConf {
     uint64_t max_size;
 };
 
+typedef struct CryptodevBackendSymStat {
+    int64_t encrypt_ops;
+    int64_t decrypt_ops;
+    int64_t encrypt_bytes;
+    int64_t decrypt_bytes;
+} CryptodevBackendSymStat;
+
+typedef struct CryptodevBackendAsymStat {
+    int64_t encrypt_ops;
+    int64_t decrypt_ops;
+    int64_t sign_ops;
+    int64_t verify_ops;
+    int64_t encrypt_bytes;
+    int64_t decrypt_bytes;
+    int64_t sign_bytes;
+    int64_t verify_bytes;
+} CryptodevBackendAsymStat;
+
 struct CryptoDevBackend {
     Object parent_obj;
 
@@ -267,15 +273,48 @@ struct CryptoDevBackend {
     /* Tag the cryptodev backend is used by virtio-crypto or not */
     bool is_used;
     CryptoDevBackendConf conf;
+    CryptodevBackendSymStat *sym_stat;
+    CryptodevBackendAsymStat *asym_stat;
+
+    ThrottleState ts;
+    ThrottleTimers tt;
+    ThrottleConfig tc;
+    QTAILQ_HEAD(, CryptoDevBackendOpInfo) opinfos;
 };
 
+#define CryptodevSymStatInc(be, op, bytes) do { \
+   be->sym_stat->op##_bytes += (bytes); \
+   be->sym_stat->op##_ops += 1; \
+} while (/*CONSTCOND*/0)
+
+#define CryptodevSymStatIncEncrypt(be, bytes) \
+            CryptodevSymStatInc(be, encrypt, bytes)
+
+#define CryptodevSymStatIncDecrypt(be, bytes) \
+            CryptodevSymStatInc(be, decrypt, bytes)
+
+#define CryptodevAsymStatInc(be, op, bytes) do { \
+    be->asym_stat->op##_bytes += (bytes); \
+    be->asym_stat->op##_ops += 1; \
+} while (/*CONSTCOND*/0)
+
+#define CryptodevAsymStatIncEncrypt(be, bytes) \
+            CryptodevAsymStatInc(be, encrypt, bytes)
+
+#define CryptodevAsymStatIncDecrypt(be, bytes) \
+            CryptodevAsymStatInc(be, decrypt, bytes)
+
+#define CryptodevAsymStatIncSign(be, bytes) \
+            CryptodevAsymStatInc(be, sign, bytes)
+
+#define CryptodevAsymStatIncVerify(be, bytes) \
+            CryptodevAsymStatInc(be, verify, bytes)
+
+
 /**
  * cryptodev_backend_new_client:
- * @model: the cryptodev backend model
- * @name: the cryptodev backend name, can be NULL
  *
- * Creates a new cryptodev backend client object
- * with the @name in the model @model.
+ * Creates a new cryptodev backend client object.
  *
  * The returned object must be released with
  * cryptodev_backend_free_client() when no
@@ -283,9 +322,8 @@ struct CryptoDevBackend {
  *
  * Returns: a new cryptodev backend client object
  */
-CryptoDevBackendClient *
-cryptodev_backend_new_client(const char *model,
-                                    const char *name);
+CryptoDevBackendClient *cryptodev_backend_new_client(void);
+
 /**
  * cryptodev_backend_free_client:
  * @cc: the cryptodev backend client object
@@ -354,24 +392,17 @@ int cryptodev_backend_close_session(
 /**
  * cryptodev_backend_crypto_operation:
  * @backend: the cryptodev backend object
- * @opaque1: pointer to a VirtIOCryptoReq object
- * @queue_index: queue index of cryptodev backend client
- * @errp: pointer to a NULL-initialized error object
- * @cb: callbacks when operation is completed
- * @opaque2: parameter passed to cb
+ * @op_info: pointer to a CryptoDevBackendOpInfo object
  *
- * Do crypto operation, such as encryption and
- * decryption
+ * Do crypto operation, such as encryption, decryption, signature and
+ * verification
  *
  * Returns: 0 for success and cb will be called when creation is completed,
  * negative value for error, and cb will not be called.
  */
 int cryptodev_backend_crypto_operation(
                  CryptoDevBackend *backend,
-                 void *opaque1,
-                 uint32_t queue_index,
-                 CryptoDevCompletionFunc cb,
-                 void *opaque2);
+                 CryptoDevBackendOpInfo *op_info);
 
 /**
  * cryptodev_backend_set_used:
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index de5ed8ff22..99904a0da7 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -26,12 +26,15 @@
 #include <err.h>
 #include "standard-headers/linux/virtio_net.h"
 #include "monitor/monitor.h"
+#include "migration/migration.h"
+#include "migration/misc.h"
 #include "hw/virtio/vhost.h"
 
 /* Todo:need to add the multiqueue support here */
 typedef struct VhostVDPAState {
     NetClientState nc;
     struct vhost_vdpa vhost_vdpa;
+    Notifier migration_state;
     VHostNetState *vhost_net;
 
     /* Control commands shadow buffers */
@@ -98,6 +101,8 @@ static const uint64_t vdpa_svq_device_features =
     BIT_ULL(VIRTIO_NET_F_MQ) |
     BIT_ULL(VIRTIO_F_ANY_LAYOUT) |
     BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) |
+    /* VHOST_F_LOG_ALL is exposed by SVQ */
+    BIT_ULL(VHOST_F_LOG_ALL) |
     BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
     BIT_ULL(VIRTIO_NET_F_STANDBY);
 
@@ -178,13 +183,9 @@ err_init:
 static void vhost_vdpa_cleanup(NetClientState *nc)
 {
     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
-    struct vhost_dev *dev = &s->vhost_net->dev;
 
     qemu_vfree(s->cvq_cmd_out_buffer);
     qemu_vfree(s->status);
-    if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
-        g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
-    }
     if (s->vhost_net) {
         vhost_net_cleanup(s->vhost_net);
         g_free(s->vhost_net);
@@ -234,10 +235,126 @@ static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf,
     return size;
 }
 
+/** From any vdpa net client, get the netclient of the first queue pair */
+static VhostVDPAState *vhost_vdpa_net_first_nc_vdpa(VhostVDPAState *s)
+{
+    NICState *nic = qemu_get_nic(s->nc.peer);
+    NetClientState *nc0 = qemu_get_peer(nic->ncs, 0);
+
+    return DO_UPCAST(VhostVDPAState, nc, nc0);
+}
+
+static void vhost_vdpa_net_log_global_enable(VhostVDPAState *s, bool enable)
+{
+    struct vhost_vdpa *v = &s->vhost_vdpa;
+    VirtIONet *n;
+    VirtIODevice *vdev;
+    int data_queue_pairs, cvq, r;
+
+    /* We are only called on the first data vqs and only if x-svq is not set */
+    if (s->vhost_vdpa.shadow_vqs_enabled == enable) {
+        return;
+    }
+
+    vdev = v->dev->vdev;
+    n = VIRTIO_NET(vdev);
+    if (!n->vhost_started) {
+        return;
+    }
+
+    data_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
+    cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
+                                  n->max_ncs - n->max_queue_pairs : 0;
+    /*
+     * TODO: vhost_net_stop does suspend, get_base and reset. We can be smarter
+     * in the future and resume the device if read-only operations between
+     * suspend and reset goes wrong.
+     */
+    vhost_net_stop(vdev, n->nic->ncs, data_queue_pairs, cvq);
+
+    /* Start will check migration setup_or_active to configure or not SVQ */
+    r = vhost_net_start(vdev, n->nic->ncs, data_queue_pairs, cvq);
+    if (unlikely(r < 0)) {
+        error_report("unable to start vhost net: %s(%d)", g_strerror(-r), -r);
+    }
+}
+
+static void vdpa_net_migration_state_notifier(Notifier *notifier, void *data)
+{
+    MigrationState *migration = data;
+    VhostVDPAState *s = container_of(notifier, VhostVDPAState,
+                                     migration_state);
+
+    if (migration_in_setup(migration)) {
+        vhost_vdpa_net_log_global_enable(s, true);
+    } else if (migration_has_failed(migration)) {
+        vhost_vdpa_net_log_global_enable(s, false);
+    }
+}
+
+static void vhost_vdpa_net_data_start_first(VhostVDPAState *s)
+{
+    struct vhost_vdpa *v = &s->vhost_vdpa;
+
+    add_migration_state_change_notifier(&s->migration_state);
+    if (v->shadow_vqs_enabled) {
+        v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
+                                           v->iova_range.last);
+    }
+}
+
+static int vhost_vdpa_net_data_start(NetClientState *nc)
+{
+    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
+    struct vhost_vdpa *v = &s->vhost_vdpa;
+
+    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
+
+    if (s->always_svq ||
+        migration_is_setup_or_active(migrate_get_current()->state)) {
+        v->shadow_vqs_enabled = true;
+        v->shadow_data = true;
+    } else {
+        v->shadow_vqs_enabled = false;
+        v->shadow_data = false;
+    }
+
+    if (v->index == 0) {
+        vhost_vdpa_net_data_start_first(s);
+        return 0;
+    }
+
+    if (v->shadow_vqs_enabled) {
+        VhostVDPAState *s0 = vhost_vdpa_net_first_nc_vdpa(s);
+        v->iova_tree = s0->vhost_vdpa.iova_tree;
+    }
+
+    return 0;
+}
+
+static void vhost_vdpa_net_client_stop(NetClientState *nc)
+{
+    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
+    struct vhost_dev *dev;
+
+    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
+
+    if (s->vhost_vdpa.index == 0) {
+        remove_migration_state_change_notifier(&s->migration_state);
+    }
+
+    dev = s->vhost_vdpa.dev;
+    if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
+        g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
+    }
+}
+
 static NetClientInfo net_vhost_vdpa_info = {
         .type = NET_CLIENT_DRIVER_VHOST_VDPA,
         .size = sizeof(VhostVDPAState),
         .receive = vhost_vdpa_receive,
+        .start = vhost_vdpa_net_data_start,
+        .stop = vhost_vdpa_net_client_stop,
         .cleanup = vhost_vdpa_cleanup,
         .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
         .has_ufo = vhost_vdpa_has_ufo,
@@ -351,7 +468,7 @@ dma_map_err:
 
 static int vhost_vdpa_net_cvq_start(NetClientState *nc)
 {
-    VhostVDPAState *s;
+    VhostVDPAState *s, *s0;
     struct vhost_vdpa *v;
     uint64_t backend_features;
     int64_t cvq_group;
@@ -362,11 +479,12 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
     s = DO_UPCAST(VhostVDPAState, nc, nc);
     v = &s->vhost_vdpa;
 
-    v->shadow_data = s->always_svq;
+    s0 = vhost_vdpa_net_first_nc_vdpa(s);
+    v->shadow_data = s0->vhost_vdpa.shadow_vqs_enabled;
     v->shadow_vqs_enabled = s->always_svq;
     s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID;
 
-    if (s->always_svq) {
+    if (s->vhost_vdpa.shadow_data) {
         /* SVQ is already configured for all virtqueues */
         goto out;
     }
@@ -415,8 +533,6 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
         return r;
     }
 
-    v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
-                                       v->iova_range.last);
     v->shadow_vqs_enabled = true;
     s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID;
 
@@ -425,6 +541,26 @@ out:
         return 0;
     }
 
+    if (s0->vhost_vdpa.iova_tree) {
+        /*
+         * SVQ is already configured for all virtqueues.  Reuse IOVA tree for
+         * simplicity, whether CVQ shares ASID with guest or not, because:
+         * - Memory listener need access to guest's memory addresses allocated
+         *   in the IOVA tree.
+         * - There should be plenty of IOVA address space for both ASID not to
+         *   worry about collisions between them.  Guest's translations are
+         *   still validated with virtio virtqueue_pop so there is no risk for
+         *   the guest to access memory that it shouldn't.
+         *
+         * To allocate a iova tree per ASID is doable but it complicates the
+         * code and it is not worth it for the moment.
+         */
+        v->iova_tree = s0->vhost_vdpa.iova_tree;
+    } else {
+        v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
+                                           v->iova_range.last);
+    }
+
     r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer,
                                vhost_vdpa_net_cvq_cmd_page_len(), false);
     if (unlikely(r < 0)) {
@@ -449,15 +585,9 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
     if (s->vhost_vdpa.shadow_vqs_enabled) {
         vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
         vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status);
-        if (!s->always_svq) {
-            /*
-             * If only the CVQ is shadowed we can delete this safely.
-             * If all the VQs are shadows this will be needed by the time the
-             * device is started again to register SVQ vrings and similar.
-             */
-            g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
-        }
     }
+
+    vhost_vdpa_net_client_stop(nc);
 }
 
 static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len,
@@ -668,7 +798,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
                                        bool is_datapath,
                                        bool svq,
                                        struct vhost_vdpa_iova_range iova_range,
-                                       VhostIOVATree *iova_tree)
+                                       uint64_t features)
 {
     NetClientState *nc = NULL;
     VhostVDPAState *s;
@@ -687,11 +817,14 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
     s->vhost_vdpa.device_fd = vdpa_device_fd;
     s->vhost_vdpa.index = queue_pair_index;
     s->always_svq = svq;
+    s->migration_state.notify = vdpa_net_migration_state_notifier;
     s->vhost_vdpa.shadow_vqs_enabled = svq;
     s->vhost_vdpa.iova_range = iova_range;
     s->vhost_vdpa.shadow_data = svq;
-    s->vhost_vdpa.iova_tree = iova_tree;
-    if (!is_datapath) {
+    if (queue_pair_index == 0) {
+        vhost_vdpa_net_valid_svq_features(features,
+                                          &s->vhost_vdpa.migration_blocker);
+    } else if (!is_datapath) {
         s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
                                             vhost_vdpa_net_cvq_cmd_page_len());
         memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
@@ -701,6 +834,15 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
 
         s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
         s->vhost_vdpa.shadow_vq_ops_opaque = s;
+
+        /*
+         * TODO: We cannot migrate devices with CVQ as there is no way to set
+         * the device state (MAC, MQ, etc) before starting the datapath.
+         *
+         * Migration blocker ownership now belongs to s->vhost_vdpa.
+         */
+        error_setg(&s->vhost_vdpa.migration_blocker,
+                   "net vdpa cannot migrate with CVQ feature");
     }
     ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
     if (ret) {
@@ -760,7 +902,6 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
     uint64_t features;
     int vdpa_device_fd;
     g_autofree NetClientState **ncs = NULL;
-    g_autoptr(VhostIOVATree) iova_tree = NULL;
     struct vhost_vdpa_iova_range iova_range;
     NetClientState *nc;
     int queue_pairs, r, i = 0, has_cvq = 0;
@@ -812,12 +953,8 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
         goto err;
     }
 
-    if (opts->x_svq) {
-        if (!vhost_vdpa_net_valid_svq_features(features, errp)) {
-            goto err_svq;
-        }
-
-        iova_tree = vhost_iova_tree_new(iova_range.first, iova_range.last);
+    if (opts->x_svq && !vhost_vdpa_net_valid_svq_features(features, errp)) {
+        goto err;
     }
 
     ncs = g_malloc0(sizeof(*ncs) * queue_pairs);
@@ -825,7 +962,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
     for (i = 0; i < queue_pairs; i++) {
         ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
                                      vdpa_device_fd, i, 2, true, opts->x_svq,
-                                     iova_range, iova_tree);
+                                     iova_range, features);
         if (!ncs[i])
             goto err;
     }
@@ -833,13 +970,11 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
     if (has_cvq) {
         nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
                                  vdpa_device_fd, i, 1, false,
-                                 opts->x_svq, iova_range, iova_tree);
+                                 opts->x_svq, iova_range, features);
         if (!nc)
             goto err;
     }
 
-    /* iova_tree ownership belongs to last NetClientState */
-    g_steal_pointer(&iova_tree);
     return 0;
 
 err:
@@ -849,7 +984,6 @@ err:
         }
     }
 
-err_svq:
     qemu_close(vdpa_device_fd);
 
     return -1;
diff --git a/qapi/cryptodev.json b/qapi/cryptodev.json
new file mode 100644
index 0000000000..f33f96a692
--- /dev/null
+++ b/qapi/cryptodev.json
@@ -0,0 +1,89 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+
+##
+# @QCryptodevBackendAlgType:
+#
+# The supported algorithm types of a crypto device.
+#
+# @sym: symmetric encryption
+# @asym: asymmetric Encryption
+#
+# Since: 8.0
+##
+{ 'enum': 'QCryptodevBackendAlgType',
+  'prefix': 'QCRYPTODEV_BACKEND_ALG',
+  'data': ['sym', 'asym']}
+
+##
+# @QCryptodevBackendServiceType:
+#
+# The supported service types of a crypto device.
+#
+# Since: 8.0
+##
+{ 'enum': 'QCryptodevBackendServiceType',
+  'prefix': 'QCRYPTODEV_BACKEND_SERVICE',
+  'data': ['cipher', 'hash', 'mac', 'aead', 'akcipher']}
+
+##
+# @QCryptodevBackendType:
+#
+# The crypto device backend type
+#
+# @builtin: the QEMU builtin support
+# @vhost-user: vhost-user
+# @lkcf: Linux kernel cryptographic framework
+#
+# Since: 8.0
+##
+{ 'enum': 'QCryptodevBackendType',
+  'prefix': 'QCRYPTODEV_BACKEND_TYPE',
+  'data': ['builtin', 'vhost-user', 'lkcf']}
+
+##
+# @QCryptodevBackendClient:
+#
+# Information about a queue of crypto device.
+#
+# @queue: the queue index of the crypto device
+#
+# @type: the type of the crypto device
+#
+# Since: 8.0
+##
+{ 'struct': 'QCryptodevBackendClient',
+  'data': { 'queue': 'uint32',
+            'type': 'QCryptodevBackendType' } }
+
+##
+# @QCryptodevInfo:
+#
+# Information about a crypto device.
+#
+# @id: the id of the crypto device
+#
+# @service: supported service types of a crypto device
+#
+# @client: the additional infomation of the crypto device
+#
+# Since: 8.0
+##
+{ 'struct': 'QCryptodevInfo',
+  'data': { 'id': 'str',
+            'service': ['QCryptodevBackendServiceType'],
+            'client': ['QCryptodevBackendClient'] } }
+
+##
+# @query-cryptodev:
+#
+# Returns information about current crypto devices.
+#
+# Returns: a list of @QCryptodevInfo
+#
+# Since: 8.0
+##
+{ 'command': 'query-cryptodev', 'returns': ['QCryptodevInfo']}
diff --git a/qapi/cxl.json b/qapi/cxl.json
new file mode 100644
index 0000000000..4be7d46041
--- /dev/null
+++ b/qapi/cxl.json
@@ -0,0 +1,128 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+
+##
+# = CXL devices
+##
+
+##
+# @CxlUncorErrorType:
+#
+# Type of uncorrectable CXL error to inject. These errors are reported via
+# an AER uncorrectable internal error with additional information logged at
+# the CXL device.
+#
+# @cache-data-parity: Data error such as data parity or data ECC error CXL.cache
+# @cache-address-parity: Address parity or other errors associated with the
+#                        address field on CXL.cache
+# @cache-be-parity: Byte enable parity or other byte enable errors on CXL.cache
+# @cache-data-ecc: ECC error on CXL.cache
+# @mem-data-parity: Data error such as data parity or data ECC error on CXL.mem
+# @mem-address-parity: Address parity or other errors associated with the
+#                      address field on CXL.mem
+# @mem-be-parity: Byte enable parity or other byte enable errors on CXL.mem.
+# @mem-data-ecc: Data ECC error on CXL.mem.
+# @reinit-threshold: REINIT threshold hit.
+# @rsvd-encoding: Received unrecognized encoding.
+# @poison-received: Received poison from the peer.
+# @receiver-overflow: Buffer overflows (first 3 bits of header log indicate which)
+# @internal: Component specific error
+# @cxl-ide-tx: Integrity and data encryption tx error.
+# @cxl-ide-rx: Integrity and data encryption rx error.
+#
+# Since: 8.0
+##
+
+{ 'enum': 'CxlUncorErrorType',
+  'data': ['cache-data-parity',
+           'cache-address-parity',
+           'cache-be-parity',
+           'cache-data-ecc',
+           'mem-data-parity',
+           'mem-address-parity',
+           'mem-be-parity',
+           'mem-data-ecc',
+           'reinit-threshold',
+           'rsvd-encoding',
+           'poison-received',
+           'receiver-overflow',
+           'internal',
+           'cxl-ide-tx',
+           'cxl-ide-rx'
+           ]
+ }
+
+##
+# @CXLUncorErrorRecord:
+#
+# Record of a single error including header log.
+#
+# @type: Type of error
+# @header: 16 DWORD of header.
+#
+# Since: 8.0
+##
+{ 'struct': 'CXLUncorErrorRecord',
+  'data': {
+      'type': 'CxlUncorErrorType',
+      'header': [ 'uint32' ]
+  }
+}
+
+##
+# @cxl-inject-uncorrectable-errors:
+#
+# Command to allow injection of multiple errors in one go. This allows testing
+# of multiple header log handling in the OS.
+#
+# @path: CXL Type 3 device canonical QOM path
+# @errors: Errors to inject
+#
+# Since: 8.0
+##
+{ 'command': 'cxl-inject-uncorrectable-errors',
+  'data': { 'path': 'str',
+             'errors': [ 'CXLUncorErrorRecord' ] }}
+
+##
+# @CxlCorErrorType:
+#
+# Type of CXL correctable error to inject
+#
+# @cache-data-ecc: Data ECC error on CXL.cache
+# @mem-data-ecc: Data ECC error on CXL.mem
+# @crc-threshold: Component specific and applicable to 68 byte Flit mode only.
+# @cache-poison-received: Received poison from a peer on CXL.cache.
+# @mem-poison-received: Received poison from a peer on CXL.mem
+# @physical: Received error indication from the physical layer.
+#
+# Since: 8.0
+##
+{ 'enum': 'CxlCorErrorType',
+  'data': ['cache-data-ecc',
+           'mem-data-ecc',
+           'crc-threshold',
+           'retry-threshold',
+           'cache-poison-received',
+           'mem-poison-received',
+           'physical']
+}
+
+##
+# @cxl-inject-correctable-error:
+#
+# Command to inject a single correctable error.  Multiple error injection
+# of this error type is not interesting as there is no associated header log.
+# These errors are reported via AER as a correctable internal error, with
+# additional detail available from the CXL device.
+#
+# @path: CXL Type 3 device canonical QOM path
+# @type: Type of error.
+#
+# Since: 8.0
+##
+{ 'command': 'cxl-inject-correctable-error',
+  'data': { 'path': 'str',
+            'type': 'CxlCorErrorType'
+  }
+}
diff --git a/qapi/meson.build b/qapi/meson.build
index fbdb442fdf..9fd480c4d8 100644
--- a/qapi/meson.build
+++ b/qapi/meson.build
@@ -31,6 +31,7 @@ qapi_all_modules = [
   'compat',
   'control',
   'crypto',
+  'cxl',
   'dump',
   'error',
   'introspect',
@@ -56,6 +57,7 @@ if have_system
   qapi_all_modules += [
     'acpi',
     'audio',
+    'cryptodev',
     'qdev',
     'pci',
     'rdma',
diff --git a/qapi/qapi-schema.json b/qapi/qapi-schema.json
index f000b90744..7c09af5cc8 100644
--- a/qapi/qapi-schema.json
+++ b/qapi/qapi-schema.json
@@ -95,3 +95,5 @@
 { 'include': 'pci.json' }
 { 'include': 'stats.json' }
 { 'include': 'virtio.json' }
+{ 'include': 'cryptodev.json' }
+{ 'include': 'cxl.json' }
diff --git a/qapi/qom.json b/qapi/qom.json
index 30e76653ad..a877b879b9 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -278,10 +278,16 @@
 #          cryptodev-backend and must be 1 for cryptodev-backend-builtin.
 #          (default: 1)
 #
+# @throttle-bps: limit total bytes per second (Since 8.0)
+#
+# @throttle-ops: limit total operations per second (Since 8.0)
+#
 # Since: 2.8
 ##
 { 'struct': 'CryptodevBackendProperties',
-  'data': { '*queues': 'uint32' } }
+  'data': { '*queues': 'uint32',
+            '*throttle-bps': 'uint64',
+            '*throttle-ops': 'uint64' } }
 
 ##
 # @CryptodevVhostUserProperties:
diff --git a/qapi/stats.json b/qapi/stats.json
index 57db5b1c74..1f5d3c59ab 100644
--- a/qapi/stats.json
+++ b/qapi/stats.json
@@ -50,10 +50,14 @@
 #
 # Enumeration of statistics providers.
 #
+# @kvm: since 7.1
+#
+# @cryptodev: since 8.0
+#
 # Since: 7.1
 ##
 { 'enum': 'StatsProvider',
-  'data': [ 'kvm' ] }
+  'data': [ 'kvm', 'cryptodev' ] }
 
 ##
 # @StatsTarget:
@@ -65,10 +69,12 @@
 #
 # @vcpu: statistics that apply to a single virtual CPU.
 #
+# @cryptodev: statistics that apply to a crypto device. since 8.0
+#
 # Since: 7.1
 ##
 { 'enum': 'StatsTarget',
-  'data': [ 'vm', 'vcpu' ] }
+  'data': [ 'vm', 'vcpu', 'cryptodev' ] }
 
 ##
 # @StatsRequest:
diff --git a/qga/installer/qemu-ga.wxs b/qga/installer/qemu-ga.wxs
index 51340f7ecc..df572adb4a 100644
--- a/qga/installer/qemu-ga.wxs
+++ b/qga/installer/qemu-ga.wxs
@@ -31,6 +31,7 @@
       />
     <Media Id="1" Cabinet="qemu_ga.$(var.QEMU_GA_VERSION).cab" EmbedCab="yes" />
     <Property Id="WHSLogo">1</Property>
+    <Property Id="ARPNOMODIFY" Value="yes" Secure="yes" />
     <MajorUpgrade
       DowngradeErrorMessage="Error: A newer version of QEMU guest agent is already installed."
       />
@@ -121,27 +122,31 @@
               <RegistryValue Type="integer" Name="TypesSupported" Value="7" />
               <RegistryValue Type="string" Name="EventMessageFile" Value="[qemu_ga_directory]qemu-ga.exe" />
             </RegistryKey>
+            <RegistryKey Root="HKLM"
+                         Key="System\CurrentControlSet\Services\QEMU Guest Agent VSS Provider">
+              <RegistryValue Type="integer" Name="VssOption" Value="1" />
+            </RegistryKey>
           </Component>
         </Directory>
       </Directory>
     </Directory>
 
-    <Property Id="cmd" Value="cmd.exe"/>
+    <Property Id="rundll" Value="rundll32.exe"/>
     <Property Id="REINSTALLMODE" Value="amus"/>
 
     <?ifdef var.InstallVss?>
     <CustomAction Id="RegisterCom"
-              ExeCommand='/c "[qemu_ga_directory]qemu-ga.exe" -s vss-install'
+              ExeCommand='"[qemu_ga_directory]qga-vss.dll",DLLCOMRegister'
               Execute="deferred"
-              Property="cmd"
+              Property="rundll"
               Impersonate="no"
               Return="check"
               >
     </CustomAction>
     <CustomAction Id="UnRegisterCom"
-              ExeCommand='/c "[qemu_ga_directory]qemu-ga.exe" -s vss-uninstall'
+              ExeCommand='"[qemu_ga_directory]qga-vss.dll",DLLCOMUnregister'
               Execute="deferred"
-              Property="cmd"
+              Property="rundll"
               Impersonate="no"
               Return="check"
               >
diff --git a/qga/vss-win32/install.cpp b/qga/vss-win32/install.cpp
index b57508fbe0..68662a6dfc 100644
--- a/qga/vss-win32/install.cpp
+++ b/qga/vss-win32/install.cpp
@@ -357,6 +357,15 @@ out:
     return hr;
 }
 
+STDAPI_(void) CALLBACK DLLCOMRegister(HWND, HINSTANCE, LPSTR, int)
+{
+    COMRegister();
+}
+
+STDAPI_(void) CALLBACK DLLCOMUnregister(HWND, HINSTANCE, LPSTR, int)
+{
+    COMUnregister();
+}
 
 static BOOL CreateRegistryKey(LPCTSTR key, LPCTSTR value, LPCTSTR data)
 {
diff --git a/qga/vss-win32/qga-vss.def b/qga/vss-win32/qga-vss.def
index 927782c31b..ee97a81427 100644
--- a/qga/vss-win32/qga-vss.def
+++ b/qga/vss-win32/qga-vss.def
@@ -1,6 +1,8 @@
 LIBRARY      "QGA-PROVIDER.DLL"
 
 EXPORTS
+	DLLCOMRegister
+	DLLCOMUnregister
 	COMRegister		PRIVATE
 	COMUnregister		PRIVATE
 	DllCanUnloadNow		PRIVATE
diff --git a/qga/vss-win32/requester.cpp b/qga/vss-win32/requester.cpp
index b371affeab..3e998af4a8 100644
--- a/qga/vss-win32/requester.cpp
+++ b/qga/vss-win32/requester.cpp
@@ -23,6 +23,8 @@
 /* Call QueryStatus every 10 ms while waiting for frozen event */
 #define VSS_TIMEOUT_EVENT_MSEC 10
 
+#define DEFAULT_VSS_BACKUP_TYPE VSS_BT_FULL
+
 #define err_set(e, err, fmt, ...)                                           \
     ((e)->error_setg_win32_wrapper((e)->errp, __FILE__, __LINE__, __func__, \
                                    err, fmt, ## __VA_ARGS__))
@@ -234,6 +236,42 @@ out:
     }
 }
 
+DWORD get_reg_dword_value(HKEY baseKey, LPCSTR subKey, LPCSTR valueName,
+                          DWORD defaultData)
+{
+    DWORD regGetValueError;
+    DWORD dwordData;
+    DWORD dataSize = sizeof(DWORD);
+
+    regGetValueError = RegGetValue(baseKey, subKey, valueName, RRF_RT_DWORD,
+                                   NULL, &dwordData, &dataSize);
+    if (regGetValueError  != ERROR_SUCCESS) {
+        return defaultData;
+    }
+    return dwordData;
+}
+
+bool is_valid_vss_backup_type(VSS_BACKUP_TYPE vssBT)
+{
+    return (vssBT > VSS_BT_UNDEFINED && vssBT < VSS_BT_OTHER);
+}
+
+VSS_BACKUP_TYPE get_vss_backup_type(
+    VSS_BACKUP_TYPE defaultVssBT = DEFAULT_VSS_BACKUP_TYPE)
+{
+    VSS_BACKUP_TYPE vssBackupType;
+
+    vssBackupType = static_cast<VSS_BACKUP_TYPE>(
+                            get_reg_dword_value(HKEY_LOCAL_MACHINE,
+                                                QGA_PROVIDER_REGISTRY_ADDRESS,
+                                                "VssOption",
+                                                defaultVssBT));
+    if (!is_valid_vss_backup_type(vssBackupType)) {
+        return defaultVssBT;
+    }
+    return vssBackupType;
+}
+
 void requester_freeze(int *num_vols, void *mountpoints, ErrorSet *errset)
 {
     COMPointer<IVssAsync> pAsync;
@@ -247,6 +285,7 @@ void requester_freeze(int *num_vols, void *mountpoints, ErrorSet *errset)
     DWORD wait_status;
     int num_fixed_drives = 0, i;
     int num_mount_points = 0;
+    VSS_BACKUP_TYPE vss_bt = get_vss_backup_type();
 
     if (vss_ctx.pVssbc) { /* already frozen */
         *num_vols = 0;
@@ -294,7 +333,7 @@ void requester_freeze(int *num_vols, void *mountpoints, ErrorSet *errset)
         goto out;
     }
 
-    hr = vss_ctx.pVssbc->SetBackupState(true, true, VSS_BT_FULL, false);
+    hr = vss_ctx.pVssbc->SetBackupState(true, true, vss_bt, false);
     if (FAILED(hr)) {
         err_set(errset, hr, "failed to set backup state");
         goto out;
diff --git a/qga/vss-win32/vss-handles.h b/qga/vss-win32/vss-handles.h
index 0f8a741ad2..1a7d842129 100644
--- a/qga/vss-win32/vss-handles.h
+++ b/qga/vss-win32/vss-handles.h
@@ -6,6 +6,9 @@
 #define QGA_PROVIDER_NAME "QEMU Guest Agent VSS Provider"
 #define QGA_PROVIDER_LNAME L(QGA_PROVIDER_NAME)
 #define QGA_PROVIDER_VERSION L(QEMU_VERSION)
+#define QGA_PROVIDER_REGISTRY_ADDRESS "SYSTEM\\CurrentControlSet"\
+                                      "\\Services"\
+                                      "\\" QGA_PROVIDER_NAME
 
 #define EVENT_NAME_FROZEN  "Global\\QGAVSSEvent-frozen"
 #define EVENT_NAME_THAW    "Global\\QGAVSSEvent-thaw"
diff --git a/stats/stats-hmp-cmds.c b/stats/stats-hmp-cmds.c
index 531e35d128..1f91bf8bd5 100644
--- a/stats/stats-hmp-cmds.c
+++ b/stats/stats-hmp-cmds.c
@@ -155,6 +155,8 @@ static StatsFilter *stats_filter(StatsTarget target, const char *names,
         filter->u.vcpu.vcpus = vcpu_list;
         break;
     }
+    case STATS_TARGET_CRYPTODEV:
+        break;
     default:
         break;
     }
@@ -226,6 +228,9 @@ void hmp_info_stats(Monitor *mon, const QDict *qdict)
         int cpu_index = monitor_get_cpu_index(mon);
         filter = stats_filter(target, names, cpu_index, provider);
         break;
+    case STATS_TARGET_CRYPTODEV:
+        filter = stats_filter(target, names, -1, provider);
+        break;
     default:
         abort();
     }
diff --git a/stats/stats-qmp-cmds.c b/stats/stats-qmp-cmds.c
index bc973747fb..e214b964fd 100644
--- a/stats/stats-qmp-cmds.c
+++ b/stats/stats-qmp-cmds.c
@@ -64,6 +64,8 @@ static bool invoke_stats_cb(StatsCallbacks *entry,
             targets = filter->u.vcpu.vcpus;
         }
         break;
+    case STATS_TARGET_CRYPTODEV:
+        break;
     default:
         abort();
     }
diff --git a/target/hexagon/README b/target/hexagon/README
index 251960b862..ebafc78b1c 100644
--- a/target/hexagon/README
+++ b/target/hexagon/README
@@ -52,6 +52,7 @@ header files in <BUILD_DIR>/target/hexagon
         gen_tcg_func_table.py           -> tcg_func_table_generated.c.inc
         gen_helper_funcs.py             -> helper_funcs_generated.c.inc
         gen_idef_parser_funcs.py        -> idef_parser_input.h
+        gen_analyze_funcs.py            -> analyze_funcs_generated.c.inc
 
 Qemu helper functions have 3 parts
     DEF_HELPER declaration indicates the signature of the helper
@@ -87,7 +88,6 @@ tcg_funcs_generated.c.inc
         TCGv RtV = hex_gpr[insn->regno[2]];
         gen_helper_A2_add(RdV, cpu_env, RsV, RtV);
         gen_log_reg_write(RdN, RdV);
-        ctx_log_reg_write(ctx, RdN);
     }
 
 helper_funcs_generated.c.inc
@@ -136,12 +136,9 @@ For HVX vectors, the generator behaves slightly differently.  The wide vectors
 won't fit in a TCGv or TCGv_i64, so we pass TCGv_ptr variables to pass the
 address to helper functions.  Here's an example for an HVX vector-add-word
 istruction.
-    static void generate_V6_vaddw(
-                    CPUHexagonState *env,
-                    DisasContext *ctx,
-                    Insn *insn,
-                    Packet *pkt)
+    static void generate_V6_vaddw(DisasContext *ctx)
     {
+        Insn *insn __attribute__((unused)) = ctx->insn;
         const int VdN = insn->regno[0];
         const intptr_t VdV_off =
             ctx_future_vreg_off(ctx, VdN, 1, true);
@@ -157,10 +154,7 @@ istruction.
         TCGv_ptr VvV = tcg_temp_new_ptr();
         tcg_gen_addi_ptr(VuV, cpu_env, VuV_off);
         tcg_gen_addi_ptr(VvV, cpu_env, VvV_off);
-        TCGv slot = tcg_constant_tl(insn->slot);
-        gen_helper_V6_vaddw(cpu_env, VdV, VuV, VvV, slot);
-        gen_log_vreg_write(ctx, VdV_off, VdN, EXT_DFL, insn->slot, false);
-        ctx_log_vreg_write(ctx, VdN, EXT_DFL, false);
+        gen_helper_V6_vaddw(cpu_env, VdV, VuV, VvV);
     }
 
 Notice that we also generate a variable named <operand>_off for each operand of
@@ -173,12 +167,9 @@ functions from tcg-op-gvec.h.  Here's the override for this instruction.
 Finally, we notice that the override doesn't use the TCGv_ptr variables, so
 we don't generate them when an override is present.  Here is what we generate
 when the override is present.
-    static void generate_V6_vaddw(
-                    CPUHexagonState *env,
-                    DisasContext *ctx,
-                    Insn *insn,
-                    Packet *pkt)
+    static void generate_V6_vaddw(DisasContext *ctx)
     {
+        Insn *insn __attribute__((unused)) = ctx->insn;
         const int VdN = insn->regno[0];
         const intptr_t VdV_off =
             ctx_future_vreg_off(ctx, VdN, 1, true);
@@ -189,10 +180,14 @@ when the override is present.
         const intptr_t VvV_off =
             vreg_src_off(ctx, VvN);
         fGEN_TCG_V6_vaddw({ fHIDE(int i;) fVFOREACH(32, i) { VdV.w[i] = VuV.w[i] + VvV.w[i] ; } });
-        gen_log_vreg_write(ctx, VdV_off, VdN, EXT_DFL, insn->slot, false);
-        ctx_log_vreg_write(ctx, VdN, EXT_DFL, false);
     }
 
+We also generate an analyze_<tag> function for each instruction.  Currently,
+these functions record the writes to registers by calling ctx_log_*.  During
+gen_start_packet, we invoke the analyze_<tag> function for each instruction in
+the packet, and we mark the implicit writes.  After the analysis is performed,
+we initialize hex_new_value for each of the predicated assignments.
+
 In addition to instruction semantics, we use a generator to create the decode
 tree.  This generation is also a two step process.  The first step is to run
 target/hexagon/gen_dectree_import.c to produce
@@ -277,10 +272,8 @@ For Hexagon Vector eXtensions (HVX), the following fields are used
     VRegs                       Vector registers
     future_VRegs                Registers to be stored during packet commit
     tmp_VRegs                   Temporary registers *not* stored during commit
-    VRegs_updated               Mask of predicated vector writes
     QRegs                       Q (vector predicate) registers
     future_QRegs                Registers to be stored during packet commit
-    QRegs_updated               Mask of predicated vector writes
 
 *** Debugging ***
 
diff --git a/target/hexagon/attribs_def.h.inc b/target/hexagon/attribs_def.h.inc
index 5d2a102c18..9874d1658f 100644
--- a/target/hexagon/attribs_def.h.inc
+++ b/target/hexagon/attribs_def.h.inc
@@ -44,6 +44,7 @@ DEF_ATTRIB(MEMSIZE_1B, "Memory width is 1 byte", "", "")
 DEF_ATTRIB(MEMSIZE_2B, "Memory width is 2 bytes", "", "")
 DEF_ATTRIB(MEMSIZE_4B, "Memory width is 4 bytes", "", "")
 DEF_ATTRIB(MEMSIZE_8B, "Memory width is 8 bytes", "", "")
+DEF_ATTRIB(SCALAR_LOAD, "Load is scalar", "", "")
 DEF_ATTRIB(SCALAR_STORE, "Store is scalar", "", "")
 DEF_ATTRIB(REGWRSIZE_1B, "Memory width is 1 byte", "", "")
 DEF_ATTRIB(REGWRSIZE_2B, "Memory width is 2 bytes", "", "")
diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h
index 34c0ae0a67..81b663ecfb 100644
--- a/target/hexagon/cpu.h
+++ b/target/hexagon/cpu.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -111,11 +111,8 @@ typedef struct CPUArchState {
     MMVector future_VRegs[VECTOR_TEMPS_MAX] QEMU_ALIGNED(16);
     MMVector tmp_VRegs[VECTOR_TEMPS_MAX] QEMU_ALIGNED(16);
 
-    VRegMask VRegs_updated;
-
     MMQReg QRegs[NUM_QREGS] QEMU_ALIGNED(16);
     MMQReg future_QRegs[NUM_QREGS] QEMU_ALIGNED(16);
-    QRegMask QRegs_updated;
 
     /* Temporaries used within instructions */
     MMVectorPair VuuV QEMU_ALIGNED(16);
diff --git a/target/hexagon/gen_analyze_funcs.py b/target/hexagon/gen_analyze_funcs.py
new file mode 100755
index 0000000000..ebd3e7afb9
--- /dev/null
+++ b/target/hexagon/gen_analyze_funcs.py
@@ -0,0 +1,252 @@
+#!/usr/bin/env python3
+
+##
+##  Copyright(c) 2022-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
+##
+##  This program is free software; you can redistribute it and/or modify
+##  it under the terms of the GNU General Public License as published by
+##  the Free Software Foundation; either version 2 of the License, or
+##  (at your option) any later version.
+##
+##  This program is distributed in the hope that it will be useful,
+##  but WITHOUT ANY WARRANTY; without even the implied warranty of
+##  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+##  GNU General Public License for more details.
+##
+##  You should have received a copy of the GNU General Public License
+##  along with this program; if not, see <http://www.gnu.org/licenses/>.
+##
+
+import sys
+import re
+import string
+import hex_common
+
+##
+## Helpers for gen_analyze_func
+##
+def is_predicated(tag):
+    return 'A_CONDEXEC' in hex_common.attribdict[tag]
+
+def analyze_opn_old(f, tag, regtype, regid, regno):
+    regN = "%s%sN" % (regtype, regid)
+    predicated = "true" if is_predicated(tag) else "false"
+    if (regtype == "R"):
+        if (regid in {"ss", "tt"}):
+            f.write("//    const int %s = insn->regno[%d];\n" % \
+                (regN, regno))
+        elif (regid in {"dd", "ee", "xx", "yy"}):
+            f.write("    const int %s = insn->regno[%d];\n" % (regN, regno))
+            f.write("    ctx_log_reg_write_pair(ctx, %s, %s);\n" % \
+                (regN, predicated))
+        elif (regid in {"s", "t", "u", "v"}):
+            f.write("//    const int %s = insn->regno[%d];\n" % \
+                (regN, regno))
+        elif (regid in {"d", "e", "x", "y"}):
+            f.write("    const int %s = insn->regno[%d];\n" % (regN, regno))
+            f.write("    ctx_log_reg_write(ctx, %s, %s);\n" % \
+                (regN, predicated))
+        else:
+            print("Bad register parse: ", regtype, regid)
+    elif (regtype == "P"):
+        if (regid in {"s", "t", "u", "v"}):
+            f.write("//    const int %s = insn->regno[%d];\n" % \
+                (regN, regno))
+        elif (regid in {"d", "e", "x"}):
+            f.write("    const int %s = insn->regno[%d];\n" % (regN, regno))
+            f.write("    ctx_log_pred_write(ctx, %s);\n" % (regN))
+        else:
+            print("Bad register parse: ", regtype, regid)
+    elif (regtype == "C"):
+        if (regid == "ss"):
+            f.write("//    const int %s = insn->regno[%d] + HEX_REG_SA0;\n" % \
+                (regN, regno))
+        elif (regid == "dd"):
+            f.write("    const int %s = insn->regno[%d] + HEX_REG_SA0;\n" % \
+                (regN, regno))
+            f.write("    ctx_log_reg_write_pair(ctx, %s, %s);\n" % \
+                (regN, predicated))
+        elif (regid == "s"):
+            f.write("//    const int %s = insn->regno[%d] + HEX_REG_SA0;\n" % \
+                (regN, regno))
+        elif (regid == "d"):
+            f.write("    const int %s = insn->regno[%d] + HEX_REG_SA0;\n" % \
+                (regN, regno))
+            f.write("    ctx_log_reg_write(ctx, %s, %s);\n" % \
+                (regN, predicated))
+        else:
+            print("Bad register parse: ", regtype, regid)
+    elif (regtype == "M"):
+        if (regid == "u"):
+            f.write("//    const int %s = insn->regno[%d];\n"% \
+                (regN, regno))
+        else:
+            print("Bad register parse: ", regtype, regid)
+    elif (regtype == "V"):
+        newv = "EXT_DFL"
+        if (hex_common.is_new_result(tag)):
+            newv = "EXT_NEW"
+        elif (hex_common.is_tmp_result(tag)):
+            newv = "EXT_TMP"
+        if (regid in {"dd", "xx"}):
+            f.write("    const int %s = insn->regno[%d];\n" %\
+                (regN, regno))
+            f.write("    ctx_log_vreg_write_pair(ctx, %s, %s, %s);\n" % \
+                (regN, newv, predicated))
+        elif (regid in {"uu", "vv"}):
+            f.write("//    const int %s = insn->regno[%d];\n" % \
+                (regN, regno))
+        elif (regid in {"s", "u", "v", "w"}):
+            f.write("//    const int %s = insn->regno[%d];\n" % \
+                (regN, regno))
+        elif (regid in {"d", "x", "y"}):
+            f.write("    const int %s = insn->regno[%d];\n" % \
+                (regN, regno))
+            f.write("    ctx_log_vreg_write(ctx, %s, %s, %s);\n" % \
+                (regN, newv, predicated))
+        else:
+            print("Bad register parse: ", regtype, regid)
+    elif (regtype == "Q"):
+        if (regid in {"d", "e", "x"}):
+            f.write("    const int %s = insn->regno[%d];\n" % \
+                (regN, regno))
+            f.write("    ctx_log_qreg_write(ctx, %s);\n" % (regN))
+        elif (regid in {"s", "t", "u", "v"}):
+            f.write("//    const int %s = insn->regno[%d];\n" % \
+                (regN, regno))
+        else:
+            print("Bad register parse: ", regtype, regid)
+    elif (regtype == "G"):
+        if (regid in {"dd"}):
+            f.write("//    const int %s = insn->regno[%d];\n" % \
+                (regN, regno))
+        elif (regid in {"d"}):
+            f.write("//    const int %s = insn->regno[%d];\n" % \
+                (regN, regno))
+        elif (regid in {"ss"}):
+            f.write("//    const int %s = insn->regno[%d];\n" % \
+                (regN, regno))
+        elif (regid in {"s"}):
+            f.write("//    const int %s = insn->regno[%d];\n" % \
+                (regN, regno))
+        else:
+            print("Bad register parse: ", regtype, regid)
+    elif (regtype == "S"):
+        if (regid in {"dd"}):
+            f.write("//    const int %s = insn->regno[%d];\n" % \
+                (regN, regno))
+        elif (regid in {"d"}):
+            f.write("//    const int %s = insn->regno[%d];\n" % \
+                (regN, regno))
+        elif (regid in {"ss"}):
+            f.write("//    const int %s = insn->regno[%d];\n" % \
+                (regN, regno))
+        elif (regid in {"s"}):
+            f.write("//    const int %s = insn->regno[%d];\n" % \
+                (regN, regno))
+        else:
+            print("Bad register parse: ", regtype, regid)
+    else:
+        print("Bad register parse: ", regtype, regid)
+
+def analyze_opn_new(f, tag, regtype, regid, regno):
+    regN = "%s%sN" % (regtype, regid)
+    if (regtype == "N"):
+        if (regid in {"s", "t"}):
+            f.write("//    const int %s = insn->regno[%d];\n" % \
+                (regN, regno))
+        else:
+            print("Bad register parse: ", regtype, regid)
+    elif (regtype == "P"):
+        if (regid in {"t", "u", "v"}):
+            f.write("//    const int %s = insn->regno[%d];\n" % \
+                (regN, regno))
+        else:
+            print("Bad register parse: ", regtype, regid)
+    elif (regtype == "O"):
+        if (regid == "s"):
+            f.write("//    const int %s = insn->regno[%d];\n" % \
+                (regN, regno))
+        else:
+            print("Bad register parse: ", regtype, regid)
+    else:
+        print("Bad register parse: ", regtype, regid)
+
+def analyze_opn(f, tag, regtype, regid, toss, numregs, i):
+    if (hex_common.is_pair(regid)):
+        analyze_opn_old(f, tag, regtype, regid, i)
+    elif (hex_common.is_single(regid)):
+        if hex_common.is_old_val(regtype, regid, tag):
+            analyze_opn_old(f,tag, regtype, regid, i)
+        elif hex_common.is_new_val(regtype, regid, tag):
+            analyze_opn_new(f, tag, regtype, regid, i)
+        else:
+            print("Bad register parse: ", regtype, regid, toss, numregs)
+    else:
+        print("Bad register parse: ", regtype, regid, toss, numregs)
+
+##
+## Generate the code to analyze the instruction
+##     For A2_add: Rd32=add(Rs32,Rt32), { RdV=RsV+RtV;}
+##     We produce:
+##     static void analyze_A2_add(DisasContext *ctx)
+##     {
+##         Insn *insn G_GNUC_UNUSED = ctx->insn;
+##         const int RdN = insn->regno[0];
+##         ctx_log_reg_write(ctx, RdN, false);
+##     //    const int RsN = insn->regno[1];
+##     //    const int RtN = insn->regno[2];
+##     }
+##
+def gen_analyze_func(f, tag, regs, imms):
+    f.write("static void analyze_%s(DisasContext *ctx)\n" %tag)
+    f.write('{\n')
+
+    f.write("    Insn *insn G_GNUC_UNUSED = ctx->insn;\n")
+
+    i=0
+    ## Analyze all the registers
+    for regtype, regid, toss, numregs in regs:
+        analyze_opn(f, tag, regtype, regid, toss, numregs, i)
+        i += 1
+
+    has_generated_helper = (not hex_common.skip_qemu_helper(tag) and
+                            not hex_common.is_idef_parser_enabled(tag))
+    if (has_generated_helper and
+        'A_SCALAR_LOAD' in hex_common.attribdict[tag]):
+        f.write("    ctx->need_pkt_has_store_s1 = true;\n")
+
+    f.write("}\n\n")
+
+def main():
+    hex_common.read_semantics_file(sys.argv[1])
+    hex_common.read_attribs_file(sys.argv[2])
+    hex_common.read_overrides_file(sys.argv[3])
+    hex_common.read_overrides_file(sys.argv[4])
+    ## Whether or not idef-parser is enabled is
+    ## determined by the number of arguments to
+    ## this script:
+    ##
+    ##   5 args. -> not enabled,
+    ##   6 args. -> idef-parser enabled.
+    ##
+    ## The 6:th arg. then holds a list of the successfully
+    ## parsed instructions.
+    is_idef_parser_enabled = len(sys.argv) > 6
+    if is_idef_parser_enabled:
+        hex_common.read_idef_parser_enabled_file(sys.argv[5])
+    hex_common.calculate_attribs()
+    tagregs = hex_common.get_tagregs()
+    tagimms = hex_common.get_tagimms()
+
+    with open(sys.argv[-1], 'w') as f:
+        f.write("#ifndef HEXAGON_TCG_FUNCS_H\n")
+        f.write("#define HEXAGON_TCG_FUNCS_H\n\n")
+
+        for tag in hex_common.tags:
+            gen_analyze_func(f, tag, tagregs[tag], tagimms[tag])
+
+        f.write("#endif    /* HEXAGON_TCG_FUNCS_H */\n")
+
+if __name__ == "__main__":
+    main()
diff --git a/target/hexagon/gen_helper_funcs.py b/target/hexagon/gen_helper_funcs.py
index 19e9883f4c..7a224b66e6 100755
--- a/target/hexagon/gen_helper_funcs.py
+++ b/target/hexagon/gen_helper_funcs.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 ##
-##  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
+##  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
 ##
 ##  This program is free software; you can redistribute it and/or modify
 ##  it under the terms of the GNU General Public License as published by
@@ -226,6 +226,14 @@ def gen_helper_function(f, tag, tagregs, tagimms):
                     print("Bad register parse: ",regtype,regid,toss,numregs)
                 i += 1
 
+        ## For conditional instructions, we pass in the destination register
+        if 'A_CONDEXEC' in hex_common.attribdict[tag]:
+            for regtype, regid, toss, numregs in regs:
+                if (hex_common.is_writeonly(regid) and
+                    not hex_common.is_hvx_reg(regtype)):
+                    gen_helper_arg_opn(f, regtype, regid, i, tag)
+                    i += 1
+
         ## Arguments to the helper function are the source regs and immediates
         for regtype,regid,toss,numregs in regs:
             if (hex_common.is_read(regid)):
@@ -262,10 +270,11 @@ def gen_helper_function(f, tag, tagregs, tagimms):
         if hex_common.need_ea(tag): gen_decl_ea(f)
         ## Declare the return variable
         i=0
-        for regtype,regid,toss,numregs in regs:
-            if (hex_common.is_writeonly(regid)):
-                gen_helper_dest_decl_opn(f,regtype,regid,i)
-            i += 1
+        if 'A_CONDEXEC' not in hex_common.attribdict[tag]:
+            for regtype,regid,toss,numregs in regs:
+                if (hex_common.is_writeonly(regid)):
+                    gen_helper_dest_decl_opn(f,regtype,regid,i)
+                i += 1
 
         for regtype,regid,toss,numregs in regs:
             if (hex_common.is_read(regid)):
diff --git a/target/hexagon/gen_helper_protos.py b/target/hexagon/gen_helper_protos.py
index 674bf370fa..ddddc9e4f0 100755
--- a/target/hexagon/gen_helper_protos.py
+++ b/target/hexagon/gen_helper_protos.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 ##
-##  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
+##  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
 ##
 ##  This program is free software; you can redistribute it and/or modify
 ##  it under the terms of the GNU General Public License as published by
@@ -87,6 +87,7 @@ def gen_helper_prototype(f, tag, tagregs, tagimms):
             if hex_common.need_slot(tag): def_helper_size += 1
             if hex_common.need_PC(tag): def_helper_size += 1
             if hex_common.helper_needs_next_PC(tag): def_helper_size += 1
+            if hex_common.need_condexec_reg(tag, regs): def_helper_size += 1
             f.write('DEF_HELPER_%s(%s' % (def_helper_size, tag))
             ## The return type is void
             f.write(', void' )
@@ -96,6 +97,7 @@ def gen_helper_prototype(f, tag, tagregs, tagimms):
             if hex_common.need_part1(tag): def_helper_size += 1
             if hex_common.need_slot(tag): def_helper_size += 1
             if hex_common.need_PC(tag): def_helper_size += 1
+            if hex_common.need_condexec_reg(tag, regs): def_helper_size += 1
             if hex_common.helper_needs_next_PC(tag): def_helper_size += 1
             f.write('DEF_HELPER_%s(%s' % (def_helper_size, tag))
 
@@ -121,6 +123,14 @@ def gen_helper_prototype(f, tag, tagregs, tagimms):
                     gen_def_helper_opn(f, tag, regtype, regid, toss, numregs, i)
                     i += 1
 
+        ## For conditional instructions, we pass in the destination register
+        if 'A_CONDEXEC' in hex_common.attribdict[tag]:
+            for regtype, regid, toss, numregs in regs:
+                if (hex_common.is_writeonly(regid) and
+                    not hex_common.is_hvx_reg(regtype)):
+                    gen_def_helper_opn(f, tag, regtype, regid, toss, numregs, i)
+                    i += 1
+
         ## Generate the qemu type for each input operand (regs and immediates)
         for regtype,regid,toss,numregs in regs:
             if (hex_common.is_read(regid)):
diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h
index b2e7880b5c..bcf0cf466a 100644
--- a/target/hexagon/gen_tcg.h
+++ b/target/hexagon/gen_tcg.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -332,8 +332,6 @@
         tcg_gen_movi_tl(EA, 0); \
         PRED;  \
         CHECK_NOSHUF_PRED(GET_EA, SIZE, LSB); \
-        PRED_LOAD_CANCEL(LSB, EA); \
-        tcg_gen_movi_tl(RdV, 0); \
         tcg_gen_brcondi_tl(TCG_COND_EQ, LSB, 0, label); \
         fLOAD(1, SIZE, SIGN, EA, RdV); \
         gen_set_label(label); \
@@ -391,8 +389,6 @@
         tcg_gen_movi_tl(EA, 0); \
         PRED;  \
         CHECK_NOSHUF_PRED(GET_EA, 8, LSB); \
-        PRED_LOAD_CANCEL(LSB, EA); \
-        tcg_gen_movi_i64(RddV, 0); \
         tcg_gen_brcondi_tl(TCG_COND_EQ, LSB, 0, label); \
         fLOAD(1, 8, u, EA, RddV); \
         gen_set_label(label); \
@@ -419,16 +415,16 @@
 
 #define fGEN_TCG_STORE(SHORTCODE) \
     do { \
-        TCGv HALF = tcg_temp_new(); \
-        TCGv BYTE = tcg_temp_new(); \
+        TCGv HALF G_GNUC_UNUSED = tcg_temp_new(); \
+        TCGv BYTE G_GNUC_UNUSED = tcg_temp_new(); \
         SHORTCODE; \
     } while (0)
 
 #define fGEN_TCG_STORE_pcr(SHIFT, STORE) \
     do { \
         TCGv ireg = tcg_temp_new(); \
-        TCGv HALF = tcg_temp_new(); \
-        TCGv BYTE = tcg_temp_new(); \
+        TCGv HALF G_GNUC_UNUSED = tcg_temp_new(); \
+        TCGv BYTE G_GNUC_UNUSED = tcg_temp_new(); \
         tcg_gen_mov_tl(EA, RxV); \
         gen_read_ireg(ireg, MuV, SHIFT); \
         gen_helper_fcircadd(RxV, RxV, ireg, MuV, hex_gpr[HEX_REG_CS0 + MuN]); \
@@ -492,6 +488,59 @@
     fGEN_TCG_STORE_pcr(2, fSTORE(1, 4, EA, NtN))
 
 /*
+ * dealloc_return
+ * Assembler mapped to
+ * r31:30 = dealloc_return(r30):raw
+ */
+#define fGEN_TCG_L4_return(SHORTCODE) \
+    gen_return(ctx, RddV, RsV)
+
+/*
+ * sub-instruction version (no RddV, so handle it manually)
+ */
+#define fGEN_TCG_SL2_return(SHORTCODE) \
+    do { \
+        TCGv_i64 RddV = get_result_gpr_pair(ctx, HEX_REG_FP); \
+        gen_return(ctx, RddV, hex_gpr[HEX_REG_FP]); \
+        gen_log_reg_write_pair(HEX_REG_FP, RddV); \
+    } while (0)
+
+/*
+ * Conditional returns follow this naming convention
+ *     _t                 predicate true
+ *     _f                 predicate false
+ *     _tnew_pt           predicate.new true predict taken
+ *     _fnew_pt           predicate.new false predict taken
+ *     _tnew_pnt          predicate.new true predict not taken
+ *     _fnew_pnt          predicate.new false predict not taken
+ * Predictions are not modelled in QEMU
+ *
+ * Example:
+ *     if (p1) r31:30 = dealloc_return(r30):raw
+ */
+#define fGEN_TCG_L4_return_t(SHORTCODE) \
+    gen_cond_return(ctx, RddV, RsV, PvV, TCG_COND_EQ);
+#define fGEN_TCG_L4_return_f(SHORTCODE) \
+    gen_cond_return(ctx, RddV, RsV, PvV, TCG_COND_NE)
+#define fGEN_TCG_L4_return_tnew_pt(SHORTCODE) \
+    gen_cond_return(ctx, RddV, RsV, PvN, TCG_COND_EQ)
+#define fGEN_TCG_L4_return_fnew_pt(SHORTCODE) \
+    gen_cond_return(ctx, RddV, RsV, PvN, TCG_COND_NE)
+#define fGEN_TCG_L4_return_tnew_pnt(SHORTCODE) \
+    gen_cond_return(ctx, RddV, RsV, PvN, TCG_COND_EQ)
+#define fGEN_TCG_L4_return_fnew_pnt(SHORTCODE) \
+    gen_cond_return(ctx, RddV, RsV, PvN, TCG_COND_NE)
+
+#define fGEN_TCG_SL2_return_t(SHORTCODE) \
+    gen_cond_return_subinsn(ctx, TCG_COND_EQ, hex_pred[0])
+#define fGEN_TCG_SL2_return_f(SHORTCODE) \
+    gen_cond_return_subinsn(ctx, TCG_COND_NE, hex_pred[0])
+#define fGEN_TCG_SL2_return_tnew(SHORTCODE) \
+    gen_cond_return_subinsn(ctx, TCG_COND_EQ, hex_new_pred_value[0])
+#define fGEN_TCG_SL2_return_fnew(SHORTCODE) \
+    gen_cond_return_subinsn(ctx, TCG_COND_NE, hex_new_pred_value[0])
+
+/*
  * Mathematical operations with more than one definition require
  * special handling
  */
@@ -589,14 +638,24 @@
 
 #define fGEN_TCG_J2_call(SHORTCODE) \
     gen_call(ctx, riV)
+#define fGEN_TCG_J2_callr(SHORTCODE) \
+    gen_callr(ctx, RsV)
 
 #define fGEN_TCG_J2_callt(SHORTCODE) \
     gen_cond_call(ctx, PuV, TCG_COND_EQ, riV)
 #define fGEN_TCG_J2_callf(SHORTCODE) \
     gen_cond_call(ctx, PuV, TCG_COND_NE, riV)
+#define fGEN_TCG_J2_callrt(SHORTCODE) \
+    gen_cond_callr(ctx, TCG_COND_EQ, PuV, RsV)
+#define fGEN_TCG_J2_callrf(SHORTCODE) \
+    gen_cond_callr(ctx, TCG_COND_NE, PuV, RsV)
 
 #define fGEN_TCG_J2_endloop0(SHORTCODE) \
     gen_endloop0(ctx)
+#define fGEN_TCG_J2_endloop1(SHORTCODE) \
+    gen_endloop1(ctx)
+#define fGEN_TCG_J2_endloop01(SHORTCODE) \
+    gen_endloop01(ctx)
 
 /*
  * Compound compare and jump instructions
@@ -986,6 +1045,19 @@
 #define fGEN_TCG_S2_asl_r_r_sat(SHORTCODE) \
     gen_asl_r_r_sat(RdV, RsV, RtV)
 
+#define fGEN_TCG_SL2_jumpr31(SHORTCODE) \
+    gen_jumpr(ctx, hex_gpr[HEX_REG_LR])
+
+#define fGEN_TCG_SL2_jumpr31_t(SHORTCODE) \
+    gen_cond_jumpr31(ctx, TCG_COND_EQ, hex_pred[0])
+#define fGEN_TCG_SL2_jumpr31_f(SHORTCODE) \
+    gen_cond_jumpr31(ctx, TCG_COND_NE, hex_pred[0])
+
+#define fGEN_TCG_SL2_jumpr31_tnew(SHORTCODE) \
+    gen_cond_jumpr31(ctx, TCG_COND_EQ, hex_new_pred_value[0])
+#define fGEN_TCG_SL2_jumpr31_fnew(SHORTCODE) \
+    gen_cond_jumpr31(ctx, TCG_COND_NE, hex_new_pred_value[0])
+
 /* Floating point */
 #define fGEN_TCG_F2_conv_sf2df(SHORTCODE) \
     gen_helper_conv_sf2df(RddV, cpu_env, RsV)
diff --git a/target/hexagon/gen_tcg_funcs.py b/target/hexagon/gen_tcg_funcs.py
index 02cb52c21e..fa93e185ce 100755
--- a/target/hexagon/gen_tcg_funcs.py
+++ b/target/hexagon/gen_tcg_funcs.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 ##
-##  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
+##  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
 ##
 ##  This program is free software; you can redistribute it and/or modify
 ##  it under the terms of the GNU General Public License as published by
@@ -30,37 +30,33 @@ def gen_decl_ea_tcg(f, tag):
 
 def genptr_decl_pair_writable(f, tag, regtype, regid, regno):
     regN="%s%sN" % (regtype,regid)
-    f.write("    TCGv_i64 %s%sV = tcg_temp_new_i64();\n" % \
-        (regtype, regid))
-    if (regtype == "C"):
+    if (regtype == "R"):
+        f.write("    const int %s = insn->regno[%d];\n" % (regN, regno))
+    elif (regtype == "C"):
         f.write("    const int %s = insn->regno[%d] + HEX_REG_SA0;\n" % \
             (regN, regno))
     else:
-        f.write("    const int %s = insn->regno[%d];\n" % (regN, regno))
-    if ('A_CONDEXEC' in hex_common.attribdict[tag]):
-        f.write("    if (!is_preloaded(ctx, %s)) {\n" % regN)
-        f.write("        tcg_gen_mov_tl(hex_new_value[%s], hex_gpr[%s]);\n" % \
-                             (regN, regN))
-        f.write("    }\n")
-        f.write("    if (!is_preloaded(ctx, %s + 1)) {\n" % regN)
-        f.write("        tcg_gen_mov_tl(hex_new_value[%s + 1], hex_gpr[%s + 1]);\n" % \
-                             (regN, regN))
-        f.write("    }\n")
+        print("Bad register parse: ", regtype, regid)
+    f.write("    TCGv_i64 %s%sV = get_result_gpr_pair(ctx, %s);\n" % \
+        (regtype, regid, regN))
 
 def genptr_decl_writable(f, tag, regtype, regid, regno):
     regN="%s%sN" % (regtype,regid)
-    f.write("    TCGv %s%sV = tcg_temp_new();\n" % \
-        (regtype, regid))
-    if (regtype == "C"):
+    if (regtype == "R"):
+        f.write("    const int %s = insn->regno[%d];\n" % (regN, regno))
+        f.write("    TCGv %s%sV = get_result_gpr(ctx, %s);\n" % \
+            (regtype, regid, regN))
+    elif (regtype == "C"):
         f.write("    const int %s = insn->regno[%d] + HEX_REG_SA0;\n" % \
             (regN, regno))
-    else:
+        f.write("    TCGv %s%sV = get_result_gpr(ctx, %s);\n" % \
+            (regtype, regid, regN))
+    elif (regtype == "P"):
         f.write("    const int %s = insn->regno[%d];\n" % (regN, regno))
-    if ('A_CONDEXEC' in hex_common.attribdict[tag]):
-        f.write("    if (!is_preloaded(ctx, %s)) {\n" % regN)
-        f.write("        tcg_gen_mov_tl(hex_new_value[%s], hex_gpr[%s]);\n" % \
-                             (regN, regN))
-        f.write("    }\n")
+        f.write("    TCGv %s%sV = tcg_temp_new();\n" % \
+            (regtype, regid))
+    else:
+        print("Bad register parse: ", regtype, regid)
 
 def genptr_decl(f, tag, regtype, regid, regno):
     regN="%s%sN" % (regtype,regid)
@@ -166,17 +162,6 @@ def genptr_decl(f, tag, regtype, regid, regno):
                 f.write("        ctx_future_vreg_off(ctx, %s%sN," % \
                     (regtype, regid))
                 f.write(" 1, true);\n");
-            if 'A_CONDEXEC' in hex_common.attribdict[tag]:
-                f.write("    if (!is_vreg_preloaded(ctx, %s)) {\n" % (regN))
-                f.write("        intptr_t src_off =")
-                f.write(" offsetof(CPUHexagonState, VRegs[%s%sN]);\n"% \
-                                     (regtype, regid))
-                f.write("        tcg_gen_gvec_mov(MO_64, %s%sV_off,\n" % \
-                                     (regtype, regid))
-                f.write("                         src_off,\n")
-                f.write("                         sizeof(MMVector),\n")
-                f.write("                         sizeof(MMVector));\n")
-                f.write("    }\n")
 
             if (not hex_common.skip_qemu_helper(tag)):
                 f.write("    TCGv_ptr %s%sV = tcg_temp_new_ptr();\n" % \
@@ -191,8 +176,7 @@ def genptr_decl(f, tag, regtype, regid, regno):
                 (regtype, regid, regno))
             f.write("    const intptr_t %s%sV_off =\n" % \
                 (regtype, regid))
-            f.write("        offsetof(CPUHexagonState,\n")
-            f.write("                 future_QRegs[%s%sN]);\n" % \
+            f.write("        get_result_qreg(ctx, %s%sN);\n" % \
                 (regtype, regid))
             if (not hex_common.skip_qemu_helper(tag)):
                 f.write("    TCGv_ptr %s%sV = tcg_temp_new_ptr();\n" % \
@@ -274,8 +258,12 @@ def genptr_src_read(f, tag, regtype, regid):
             f.write("                                 hex_gpr[%s%sN + 1]);\n" % \
                 (regtype, regid))
         elif (regid in {"x", "y"}):
-            f.write("    tcg_gen_mov_tl(%s%sV, hex_gpr[%s%sN]);\n" % \
-                (regtype,regid,regtype,regid))
+            ## For read/write registers, we need to get the original value into
+            ## the result TCGv.  For conditional instructions, this is done in
+            ## gen_start_packet.  For unconditional instructions, we do it here.
+            if ('A_CONDEXEC' not in hex_common.attribdict[tag]):
+                f.write("    tcg_gen_mov_tl(%s%sV, hex_gpr[%s%sN]);\n" % \
+                    (regtype, regid, regtype, regid))
         elif (regid not in {"s", "t", "u", "v"}):
             print("Bad register parse: ", regtype, regid)
     elif (regtype == "P"):
@@ -385,37 +373,22 @@ def gen_helper_call_imm(f,immlett):
     f.write(", tcgv_%s" % hex_common.imm_name(immlett))
 
 def genptr_dst_write_pair(f, tag, regtype, regid):
-    if ('A_CONDEXEC' in hex_common.attribdict[tag]):
-        f.write("    gen_log_predicated_reg_write_pair(%s%sN, %s%sV, insn->slot);\n" % \
-            (regtype, regid, regtype, regid))
-    else:
-        f.write("    gen_log_reg_write_pair(%s%sN, %s%sV);\n" % \
-            (regtype, regid, regtype, regid))
-    f.write("    ctx_log_reg_write_pair(ctx, %s%sN);\n" % \
-        (regtype, regid))
+    f.write("    gen_log_reg_write_pair(%s%sN, %s%sV);\n" % \
+        (regtype, regid, regtype, regid))
 
 def genptr_dst_write(f, tag, regtype, regid):
     if (regtype == "R"):
         if (regid in {"dd", "xx", "yy"}):
             genptr_dst_write_pair(f, tag, regtype, regid)
         elif (regid in {"d", "e", "x", "y"}):
-            if ('A_CONDEXEC' in hex_common.attribdict[tag]):
-                f.write("    gen_log_predicated_reg_write(%s%sN, %s%sV,\n" % \
-                    (regtype, regid, regtype, regid))
-                f.write("                                 insn->slot);\n")
-            else:
-                f.write("    gen_log_reg_write(%s%sN, %s%sV);\n" % \
-                    (regtype, regid, regtype, regid))
-            f.write("    ctx_log_reg_write(ctx, %s%sN);\n" % \
-                (regtype, regid))
+            f.write("    gen_log_reg_write(%s%sN, %s%sV);\n" % \
+                (regtype, regid, regtype, regid))
         else:
             print("Bad register parse: ", regtype, regid)
     elif (regtype == "P"):
         if (regid in {"d", "e", "x"}):
             f.write("    gen_log_pred_write(ctx, %s%sN, %s%sV);\n" % \
                 (regtype, regid, regtype, regid))
-            f.write("    ctx_log_pred_write(ctx, %s%sN);\n" % \
-                (regtype, regid))
         else:
             print("Bad register parse: ", regtype, regid)
     elif (regtype == "C"):
@@ -432,43 +405,18 @@ def genptr_dst_write(f, tag, regtype, regid):
 
 def genptr_dst_write_ext(f, tag, regtype, regid, newv="EXT_DFL"):
     if (regtype == "V"):
-        if (regid in {"dd", "xx", "yy"}):
-            if ('A_CONDEXEC' in hex_common.attribdict[tag]):
-                is_predicated = "true"
-            else:
-                is_predicated = "false"
+        if (regid in {"xx"}):
             f.write("    gen_log_vreg_write_pair(ctx, %s%sV_off, %s%sN, " % \
                 (regtype, regid, regtype, regid))
-            f.write("%s, insn->slot, %s);\n" % \
-                (newv, is_predicated))
-            f.write("    ctx_log_vreg_write_pair(ctx, %s%sN, %s,\n" % \
-                (regtype, regid, newv))
-            f.write("        %s);\n" % (is_predicated))
-        elif (regid in {"d", "x", "y"}):
-            if ('A_CONDEXEC' in hex_common.attribdict[tag]):
-                is_predicated = "true"
-            else:
-                is_predicated = "false"
-            f.write("    gen_log_vreg_write(ctx, %s%sV_off, %s%sN, %s, " % \
+            f.write("%s);\n" % \
+                (newv))
+        elif (regid in {"y"}):
+            f.write("    gen_log_vreg_write(ctx, %s%sV_off, %s%sN, %s);\n" % \
                 (regtype, regid, regtype, regid, newv))
-            f.write("insn->slot, %s);\n" % \
-                (is_predicated))
-            f.write("    ctx_log_vreg_write(ctx, %s%sN, %s, %s);\n" % \
-                (regtype, regid, newv, is_predicated))
-        else:
+        elif (regid not in {"dd", "d", "x"}):
             print("Bad register parse: ", regtype, regid)
     elif (regtype == "Q"):
-        if (regid in {"d", "e", "x"}):
-            if ('A_CONDEXEC' in hex_common.attribdict[tag]):
-                is_predicated = "true"
-            else:
-                is_predicated = "false"
-            f.write("    gen_log_qreg_write(%s%sV_off, %s%sN, %s, " % \
-                (regtype, regid, regtype, regid, newv))
-            f.write("insn->slot, %s);\n" % (is_predicated))
-            f.write("    ctx_log_qreg_write(ctx, %s%sN, %s);\n" % \
-                (regtype, regid, is_predicated))
-        else:
+        if (regid not in {"d", "e", "x"}):
             print("Bad register parse: ", regtype, regid)
     else:
         print("Bad register parse: ", regtype, regid)
@@ -500,15 +448,15 @@ def genptr_dst_write_opn(f,regtype, regid, tag):
 ##     For A2_add: Rd32=add(Rs32,Rt32), { RdV=RsV+RtV;}
 ##     We produce:
 ##    static void generate_A2_add(DisasContext *ctx)
-##       {
-##           TCGv RdV = tcg_temp_new();
-##           const int RdN = insn->regno[0];
-##           TCGv RsV = hex_gpr[insn->regno[1]];
-##           TCGv RtV = hex_gpr[insn->regno[2]];
-##           <GEN>
-##           gen_log_reg_write(RdN, RdV);
-##           ctx_log_reg_write(ctx, RdN);
-##       }
+##    {
+##        Insn *insn __attribute__((unused)) = ctx->insn;
+##        const int RdN = insn->regno[0];
+##        TCGv RdV = get_result_gpr(ctx, RdN);
+##        TCGv RsV = hex_gpr[insn->regno[1]];
+##        TCGv RtV = hex_gpr[insn->regno[2]];
+##        <GEN>
+##        gen_log_reg_write(RdN, RdV);
+##    }
 ##
 ##       where <GEN> depends on hex_common.skip_qemu_helper(tag)
 ##       if hex_common.skip_qemu_helper(tag) is True
@@ -592,6 +540,14 @@ def gen_tcg_func(f, tag, regs, imms):
         if (i > 0): f.write(", ")
         f.write("cpu_env")
         i=1
+        ## For conditional instructions, we pass in the destination register
+        if 'A_CONDEXEC' in hex_common.attribdict[tag]:
+            for regtype, regid, toss, numregs in regs:
+                if (hex_common.is_writeonly(regid) and
+                    not hex_common.is_hvx_reg(regtype)):
+                    gen_helper_call_opn(f, tag, regtype, regid, toss, \
+                                        numregs, i)
+                    i += 1
         for regtype,regid,toss,numregs in regs:
             if (hex_common.is_written(regid)):
                 if (not hex_common.is_hvx_reg(regtype)):
diff --git a/target/hexagon/gen_tcg_hvx.h b/target/hexagon/gen_tcg_hvx.h
index 94f272e286..d4aefe8e3f 100644
--- a/target/hexagon/gen_tcg_hvx.h
+++ b/target/hexagon/gen_tcg_hvx.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -133,16 +133,11 @@ static inline void assert_vhist_tmp(DisasContext *ctx)
     do { \
         TCGv lsb = tcg_temp_new(); \
         TCGLabel *false_label = gen_new_label(); \
-        TCGLabel *end_label = gen_new_label(); \
         tcg_gen_andi_tl(lsb, PsV, 1); \
         tcg_gen_brcondi_tl(TCG_COND_NE, lsb, PRED, false_label); \
         tcg_gen_gvec_mov(MO_64, VdV_off, VuV_off, \
                          sizeof(MMVector), sizeof(MMVector)); \
-        tcg_gen_br(end_label); \
         gen_set_label(false_label); \
-        tcg_gen_ori_tl(hex_slot_cancelled, hex_slot_cancelled, \
-                       1 << insn->slot); \
-        gen_set_label(end_label); \
     } while (0)
 
 
@@ -547,17 +542,12 @@ static inline void assert_vhist_tmp(DisasContext *ctx)
     do { \
         TCGv LSB = tcg_temp_new(); \
         TCGLabel *false_label = gen_new_label(); \
-        TCGLabel *end_label = gen_new_label(); \
         GET_EA; \
         PRED; \
         tcg_gen_brcondi_tl(TCG_COND_EQ, LSB, 0, false_label); \
         gen_vreg_load(ctx, DSTOFF, EA, true); \
         INC; \
-        tcg_gen_br(end_label); \
         gen_set_label(false_label); \
-        tcg_gen_ori_tl(hex_slot_cancelled, hex_slot_cancelled, \
-                       1 << insn->slot); \
-        gen_set_label(end_label); \
     } while (0)
 
 #define fGEN_TCG_PRED_VEC_LOAD_pred_pi \
@@ -717,17 +707,12 @@ static inline void assert_vhist_tmp(DisasContext *ctx)
     do { \
         TCGv LSB = tcg_temp_new(); \
         TCGLabel *false_label = gen_new_label(); \
-        TCGLabel *end_label = gen_new_label(); \
         GET_EA; \
         PRED; \
         tcg_gen_brcondi_tl(TCG_COND_EQ, LSB, 0, false_label); \
         gen_vreg_store(ctx, EA, SRCOFF, insn->slot, ALIGN); \
         INC; \
-        tcg_gen_br(end_label); \
         gen_set_label(false_label); \
-        tcg_gen_ori_tl(hex_slot_cancelled, hex_slot_cancelled, \
-                       1 << insn->slot); \
-        gen_set_label(end_label); \
     } while (0)
 
 #define fGEN_TCG_PRED_VEC_STORE_pred_pi(ALIGN) \
diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
index 86bd093ce8..bb274d4a71 100644
--- a/target/hexagon/genptr.c
+++ b/target/hexagon/genptr.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -68,26 +68,17 @@ static inline void gen_masked_reg_write(TCGv new_val, TCGv cur_val,
     }
 }
 
-static inline void gen_log_predicated_reg_write(int rnum, TCGv val,
-                                                uint32_t slot)
+static TCGv get_result_gpr(DisasContext *ctx, int rnum)
 {
-    TCGv zero = tcg_constant_tl(0);
-    TCGv slot_mask = tcg_temp_new();
+    return hex_new_value[rnum];
+}
 
-    tcg_gen_andi_tl(slot_mask, hex_slot_cancelled, 1 << slot);
-    tcg_gen_movcond_tl(TCG_COND_EQ, hex_new_value[rnum], slot_mask, zero,
-                           val, hex_new_value[rnum]);
-    if (HEX_DEBUG) {
-        /*
-         * Do this so HELPER(debug_commit_end) will know
-         *
-         * Note that slot_mask indicates the value is not written
-         * (i.e., slot was cancelled), so we create a true/false value before
-         * or'ing with hex_reg_written[rnum].
-         */
-        tcg_gen_setcond_tl(TCG_COND_EQ, slot_mask, slot_mask, zero);
-        tcg_gen_or_tl(hex_reg_written[rnum], hex_reg_written[rnum], slot_mask);
-    }
+static TCGv_i64 get_result_gpr_pair(DisasContext *ctx, int rnum)
+{
+    TCGv_i64 result = tcg_temp_new_i64();
+    tcg_gen_concat_i32_i64(result, hex_new_value[rnum],
+                                   hex_new_value[rnum + 1]);
+    return result;
 }
 
 void gen_log_reg_write(int rnum, TCGv val)
@@ -102,39 +93,6 @@ void gen_log_reg_write(int rnum, TCGv val)
     }
 }
 
-static void gen_log_predicated_reg_write_pair(int rnum, TCGv_i64 val,
-                                              uint32_t slot)
-{
-    TCGv val32 = tcg_temp_new();
-    TCGv zero = tcg_constant_tl(0);
-    TCGv slot_mask = tcg_temp_new();
-
-    tcg_gen_andi_tl(slot_mask, hex_slot_cancelled, 1 << slot);
-    /* Low word */
-    tcg_gen_extrl_i64_i32(val32, val);
-    tcg_gen_movcond_tl(TCG_COND_EQ, hex_new_value[rnum],
-                       slot_mask, zero,
-                       val32, hex_new_value[rnum]);
-    /* High word */
-    tcg_gen_extrh_i64_i32(val32, val);
-    tcg_gen_movcond_tl(TCG_COND_EQ, hex_new_value[rnum + 1],
-                       slot_mask, zero,
-                       val32, hex_new_value[rnum + 1]);
-    if (HEX_DEBUG) {
-        /*
-         * Do this so HELPER(debug_commit_end) will know
-         *
-         * Note that slot_mask indicates the value is not written
-         * (i.e., slot was cancelled), so we create a true/false value before
-         * or'ing with hex_reg_written[rnum].
-         */
-        tcg_gen_setcond_tl(TCG_COND_EQ, slot_mask, slot_mask, zero);
-        tcg_gen_or_tl(hex_reg_written[rnum], hex_reg_written[rnum], slot_mask);
-        tcg_gen_or_tl(hex_reg_written[rnum + 1], hex_reg_written[rnum + 1],
-                      slot_mask);
-    }
-}
-
 static void gen_log_reg_write_pair(int rnum, TCGv_i64 val)
 {
     const target_ulong reg_mask_low = reg_immut_masks[rnum];
@@ -180,6 +138,7 @@ void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val)
                        hex_new_pred_value[pnum], base_val);
     }
     tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << pnum);
+    set_bit(pnum, ctx->pregs_written);
 }
 
 static inline void gen_read_p3_0(TCGv control_reg)
@@ -256,7 +215,6 @@ static void gen_write_p3_0(DisasContext *ctx, TCGv control_reg)
     for (int i = 0; i < NUM_PREGS; i++) {
         tcg_gen_extract_tl(hex_p8, control_reg, i * 8, 8);
         gen_log_pred_write(ctx, i, hex_p8);
-        ctx_log_pred_write(ctx, i);
     }
 }
 
@@ -274,7 +232,6 @@ static inline void gen_write_ctrl_reg(DisasContext *ctx, int reg_num,
         gen_write_p3_0(ctx, val);
     } else {
         gen_log_reg_write(reg_num, val);
-        ctx_log_reg_write(ctx, reg_num);
         if (reg_num == HEX_REG_QEMU_PKT_CNT) {
             ctx->num_packets = 0;
         }
@@ -291,15 +248,14 @@ static inline void gen_write_ctrl_reg_pair(DisasContext *ctx, int reg_num,
                                            TCGv_i64 val)
 {
     if (reg_num == HEX_REG_P3_0_ALIASED) {
+        TCGv result = get_result_gpr(ctx, reg_num + 1);
         TCGv val32 = tcg_temp_new();
         tcg_gen_extrl_i64_i32(val32, val);
         gen_write_p3_0(ctx, val32);
         tcg_gen_extrh_i64_i32(val32, val);
-        gen_log_reg_write(reg_num + 1, val32);
-        ctx_log_reg_write(ctx, reg_num + 1);
+        tcg_gen_mov_tl(result, val32);
     } else {
         gen_log_reg_write_pair(reg_num, val);
-        ctx_log_reg_write_pair(ctx, reg_num);
         if (reg_num == HEX_REG_QEMU_PKT_CNT) {
             ctx->num_packets = 0;
             ctx->num_insns = 0;
@@ -571,6 +527,13 @@ static void gen_cond_jumpr(DisasContext *ctx, TCGv dst_pc,
     gen_write_new_pc_addr(ctx, dst_pc, cond, pred);
 }
 
+static void gen_cond_jumpr31(DisasContext *ctx, TCGCond cond, TCGv pred)
+{
+    TCGv LSB = tcg_temp_new();
+    tcg_gen_andi_tl(LSB, pred, 1);
+    gen_cond_jumpr(ctx, hex_gpr[HEX_REG_LR], cond, LSB);
+}
+
 static void gen_cond_jump(DisasContext *ctx, TCGCond cond, TCGv pred,
                           int pc_off)
 {
@@ -669,27 +632,99 @@ static void gen_jumpr(DisasContext *ctx, TCGv new_pc)
 
 static void gen_call(DisasContext *ctx, int pc_off)
 {
-    TCGv next_PC =
-        tcg_constant_tl(ctx->pkt->pc + ctx->pkt->encod_pkt_size_in_bytes);
-    gen_log_reg_write(HEX_REG_LR, next_PC);
+    TCGv lr = get_result_gpr(ctx, HEX_REG_LR);
+    tcg_gen_movi_tl(lr, ctx->next_PC);
     gen_write_new_pc_pcrel(ctx, pc_off, TCG_COND_ALWAYS, NULL);
 }
 
+static void gen_callr(DisasContext *ctx, TCGv new_pc)
+{
+    TCGv lr = get_result_gpr(ctx, HEX_REG_LR);
+    tcg_gen_movi_tl(lr, ctx->next_PC);
+    gen_write_new_pc_addr(ctx, new_pc, TCG_COND_ALWAYS, NULL);
+}
+
 static void gen_cond_call(DisasContext *ctx, TCGv pred,
                           TCGCond cond, int pc_off)
 {
-    TCGv next_PC;
+    TCGv lr = get_result_gpr(ctx, HEX_REG_LR);
     TCGv lsb = tcg_temp_new();
     TCGLabel *skip = gen_new_label();
     tcg_gen_andi_tl(lsb, pred, 1);
     gen_write_new_pc_pcrel(ctx, pc_off, cond, lsb);
     tcg_gen_brcondi_tl(cond, lsb, 0, skip);
-    next_PC =
-        tcg_constant_tl(ctx->pkt->pc + ctx->pkt->encod_pkt_size_in_bytes);
-    gen_log_reg_write(HEX_REG_LR, next_PC);
+    tcg_gen_movi_tl(lr, ctx->next_PC);
     gen_set_label(skip);
 }
 
+static void gen_cond_callr(DisasContext *ctx,
+                           TCGCond cond, TCGv pred, TCGv new_pc)
+{
+    TCGv lsb = tcg_temp_new();
+    TCGLabel *skip = gen_new_label();
+    tcg_gen_andi_tl(lsb, pred, 1);
+    tcg_gen_brcondi_tl(cond, lsb, 0, skip);
+    gen_callr(ctx, new_pc);
+    gen_set_label(skip);
+}
+
+/* frame ^= (int64_t)FRAMEKEY << 32 */
+static void gen_frame_unscramble(TCGv_i64 frame)
+{
+    TCGv_i64 framekey = tcg_temp_new_i64();
+    tcg_gen_extu_i32_i64(framekey, hex_gpr[HEX_REG_FRAMEKEY]);
+    tcg_gen_shli_i64(framekey, framekey, 32);
+    tcg_gen_xor_i64(frame, frame, framekey);
+}
+
+static void gen_load_frame(DisasContext *ctx, TCGv_i64 frame, TCGv EA)
+{
+    Insn *insn = ctx->insn;  /* Needed for CHECK_NOSHUF */
+    CHECK_NOSHUF(EA, 8);
+    tcg_gen_qemu_ld64(frame, EA, ctx->mem_idx);
+}
+
+static void gen_return(DisasContext *ctx, TCGv_i64 dst, TCGv src)
+{
+    /*
+     * frame = *src
+     * dst = frame_unscramble(frame)
+     * SP = src + 8
+     * PC = dst.w[1]
+     */
+    TCGv_i64 frame = tcg_temp_new_i64();
+    TCGv r31 = tcg_temp_new();
+    TCGv r29 = get_result_gpr(ctx, HEX_REG_SP);
+
+    gen_load_frame(ctx, frame, src);
+    gen_frame_unscramble(frame);
+    tcg_gen_mov_i64(dst, frame);
+    tcg_gen_addi_tl(r29, src, 8);
+    tcg_gen_extrh_i64_i32(r31, dst);
+    gen_jumpr(ctx, r31);
+}
+
+/* if (pred) dst = dealloc_return(src):raw */
+static void gen_cond_return(DisasContext *ctx, TCGv_i64 dst, TCGv src,
+                            TCGv pred, TCGCond cond)
+{
+    TCGv LSB = tcg_temp_new();
+    TCGLabel *skip = gen_new_label();
+    tcg_gen_andi_tl(LSB, pred, 1);
+
+    tcg_gen_brcondi_tl(cond, LSB, 0, skip);
+    gen_return(ctx, dst, src);
+    gen_set_label(skip);
+}
+
+/* sub-instruction version (no RddV, so handle it manually) */
+static void gen_cond_return_subinsn(DisasContext *ctx, TCGCond cond, TCGv pred)
+{
+    TCGv_i64 RddV = get_result_gpr_pair(ctx, HEX_REG_FP);
+    gen_cond_return(ctx, RddV, hex_gpr[HEX_REG_FP], pred, cond);
+    gen_log_reg_write_pair(HEX_REG_FP, RddV);
+}
+
 static void gen_endloop0(DisasContext *ctx)
 {
     TCGv lpcfg = tcg_temp_new();
@@ -737,14 +772,95 @@ static void gen_endloop0(DisasContext *ctx)
         TCGLabel *label3 = gen_new_label();
         tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC0], 1, label3);
         {
+            TCGv lc0 = get_result_gpr(ctx, HEX_REG_LC0);
             gen_jumpr(ctx, hex_gpr[HEX_REG_SA0]);
-            tcg_gen_subi_tl(hex_new_value[HEX_REG_LC0],
-                            hex_gpr[HEX_REG_LC0], 1);
+            tcg_gen_subi_tl(lc0, hex_gpr[HEX_REG_LC0], 1);
         }
         gen_set_label(label3);
     }
 }
 
+static void gen_endloop1(DisasContext *ctx)
+{
+    /*
+     *    if (hex_gpr[HEX_REG_LC1] > 1) {
+     *        PC = hex_gpr[HEX_REG_SA1];
+     *        hex_new_value[HEX_REG_LC1] = hex_gpr[HEX_REG_LC1] - 1;
+     *    }
+     */
+    TCGLabel *label = gen_new_label();
+    tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC1], 1, label);
+    {
+        TCGv lc1 = get_result_gpr(ctx, HEX_REG_LC1);
+        gen_jumpr(ctx, hex_gpr[HEX_REG_SA1]);
+        tcg_gen_subi_tl(lc1, hex_gpr[HEX_REG_LC1], 1);
+    }
+    gen_set_label(label);
+}
+
+static void gen_endloop01(DisasContext *ctx)
+{
+    TCGv lpcfg = tcg_temp_new();
+    TCGLabel *label1 = gen_new_label();
+    TCGLabel *label2 = gen_new_label();
+    TCGLabel *label3 = gen_new_label();
+    TCGLabel *done = gen_new_label();
+
+    GET_USR_FIELD(USR_LPCFG, lpcfg);
+
+    /*
+     *    if (lpcfg == 1) {
+     *        hex_new_pred_value[3] = 0xff;
+     *        hex_pred_written |= 1 << 3;
+     *    }
+     */
+    tcg_gen_brcondi_tl(TCG_COND_NE, lpcfg, 1, label1);
+    {
+        tcg_gen_movi_tl(hex_new_pred_value[3], 0xff);
+        tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << 3);
+    }
+    gen_set_label(label1);
+
+    /*
+     *    if (lpcfg) {
+     *        SET_USR_FIELD(USR_LPCFG, lpcfg - 1);
+     *    }
+     */
+    tcg_gen_brcondi_tl(TCG_COND_EQ, lpcfg, 0, label2);
+    {
+        tcg_gen_subi_tl(lpcfg, lpcfg, 1);
+        SET_USR_FIELD(USR_LPCFG, lpcfg);
+    }
+    gen_set_label(label2);
+
+    /*
+     *    if (hex_gpr[HEX_REG_LC0] > 1) {
+     *        PC = hex_gpr[HEX_REG_SA0];
+     *        hex_new_value[HEX_REG_LC0] = hex_gpr[HEX_REG_LC0] - 1;
+     *    } else {
+     *        if (hex_gpr[HEX_REG_LC1] > 1) {
+     *            hex_next_pc = hex_gpr[HEX_REG_SA1];
+     *            hex_new_value[HEX_REG_LC1] = hex_gpr[HEX_REG_LC1] - 1;
+     *        }
+     *    }
+     */
+    tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC0], 1, label3);
+    {
+        TCGv lc0 = get_result_gpr(ctx, HEX_REG_LC0);
+        gen_jumpr(ctx, hex_gpr[HEX_REG_SA0]);
+        tcg_gen_subi_tl(lc0, hex_gpr[HEX_REG_LC0], 1);
+        tcg_gen_br(done);
+    }
+    gen_set_label(label3);
+    tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC1], 1, done);
+    {
+        TCGv lc1 = get_result_gpr(ctx, HEX_REG_LC1);
+        gen_jumpr(ctx, hex_gpr[HEX_REG_SA1]);
+        tcg_gen_subi_tl(lc1, hex_gpr[HEX_REG_LC1], 1);
+    }
+    gen_set_label(done);
+}
+
 static void gen_cmp_jumpnv(DisasContext *ctx,
                            TCGCond cond, TCGv val, TCGv src, int pc_off)
 {
@@ -869,68 +985,32 @@ static intptr_t vreg_src_off(DisasContext *ctx, int num)
 }
 
 static void gen_log_vreg_write(DisasContext *ctx, intptr_t srcoff, int num,
-                               VRegWriteType type, int slot_num,
-                               bool is_predicated)
+                               VRegWriteType type)
 {
-    TCGLabel *label_end = NULL;
     intptr_t dstoff;
 
-    if (is_predicated) {
-        TCGv cancelled = tcg_temp_new();
-        label_end = gen_new_label();
-
-        /* Don't do anything if the slot was cancelled */
-        tcg_gen_extract_tl(cancelled, hex_slot_cancelled, slot_num, 1);
-        tcg_gen_brcondi_tl(TCG_COND_NE, cancelled, 0, label_end);
-    }
-
     if (type != EXT_TMP) {
         dstoff = ctx_future_vreg_off(ctx, num, 1, true);
         tcg_gen_gvec_mov(MO_64, dstoff, srcoff,
                          sizeof(MMVector), sizeof(MMVector));
-        tcg_gen_ori_tl(hex_VRegs_updated, hex_VRegs_updated, 1 << num);
     } else {
         dstoff = ctx_tmp_vreg_off(ctx, num, 1, false);
         tcg_gen_gvec_mov(MO_64, dstoff, srcoff,
                          sizeof(MMVector), sizeof(MMVector));
     }
-
-    if (is_predicated) {
-        gen_set_label(label_end);
-    }
 }
 
 static void gen_log_vreg_write_pair(DisasContext *ctx, intptr_t srcoff, int num,
-                                    VRegWriteType type, int slot_num,
-                                    bool is_predicated)
+                                    VRegWriteType type)
 {
-    gen_log_vreg_write(ctx, srcoff, num ^ 0, type, slot_num, is_predicated);
+    gen_log_vreg_write(ctx, srcoff, num ^ 0, type);
     srcoff += sizeof(MMVector);
-    gen_log_vreg_write(ctx, srcoff, num ^ 1, type, slot_num, is_predicated);
+    gen_log_vreg_write(ctx, srcoff, num ^ 1, type);
 }
 
-static void gen_log_qreg_write(intptr_t srcoff, int num, int vnew,
-                               int slot_num, bool is_predicated)
+static intptr_t get_result_qreg(DisasContext *ctx, int qnum)
 {
-    TCGLabel *label_end = NULL;
-    intptr_t dstoff;
-
-    if (is_predicated) {
-        TCGv cancelled = tcg_temp_new();
-        label_end = gen_new_label();
-
-        /* Don't do anything if the slot was cancelled */
-        tcg_gen_extract_tl(cancelled, hex_slot_cancelled, slot_num, 1);
-        tcg_gen_brcondi_tl(TCG_COND_NE, cancelled, 0, label_end);
-    }
-
-    dstoff = offsetof(CPUHexagonState, future_QRegs[num]);
-    tcg_gen_gvec_mov(MO_64, dstoff, srcoff, sizeof(MMQReg), sizeof(MMQReg));
-
-    if (is_predicated) {
-        tcg_gen_ori_tl(hex_QRegs_updated, hex_QRegs_updated, 1 << num);
-        gen_set_label(label_end);
-    }
+    return  offsetof(CPUHexagonState, future_QRegs[qnum]);
 }
 
 static void gen_vreg_load(DisasContext *ctx, intptr_t dstoff, TCGv src,
diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py
index a29f61bb4f..0200a66cb6 100755
--- a/target/hexagon/hex_common.py
+++ b/target/hexagon/hex_common.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 ##
-##  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
+##  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
 ##
 ##  This program is free software; you can redistribute it and/or modify
 ##  it under the terms of the GNU General Public License as published by
@@ -89,6 +89,7 @@ def calculate_attribs():
     add_qemu_macro_attrib('fWRITE_P3', 'A_WRITES_PRED_REG')
     add_qemu_macro_attrib('fSET_OVERFLOW', 'A_IMPLICIT_WRITES_USR')
     add_qemu_macro_attrib('fSET_LPCFG', 'A_IMPLICIT_WRITES_USR')
+    add_qemu_macro_attrib('fLOAD', 'A_SCALAR_LOAD')
     add_qemu_macro_attrib('fSTORE', 'A_SCALAR_STORE')
 
     # Recurse down macros, find attributes from sub-macros
@@ -236,6 +237,13 @@ def helper_needs_next_PC(tag):
 def need_pkt_has_multi_cof(tag):
     return 'A_COF' in attribdict[tag]
 
+def need_condexec_reg(tag, regs):
+    if 'A_CONDEXEC' in attribdict[tag]:
+        for regtype, regid, toss, numregs in regs:
+            if is_writeonly(regid) and not is_hvx_reg(regtype):
+                return True
+    return False
+
 def skip_qemu_helper(tag):
     return tag in overrides.keys()
 
diff --git a/target/hexagon/idef-parser/idef-parser.h b/target/hexagon/idef-parser/idef-parser.h
index 17d2ebfaf6..d23e71f13b 100644
--- a/target/hexagon/idef-parser/idef-parser.h
+++ b/target/hexagon/idef-parser/idef-parser.h
@@ -82,7 +82,6 @@ enum ImmUnionTag {
     VALUE,
     QEMU_TMP,
     IMM_PC,
-    IMM_NPC,
     IMM_CONSTEXT,
 };
 
diff --git a/target/hexagon/idef-parser/idef-parser.lex b/target/hexagon/idef-parser/idef-parser.lex
index ff87a02c3a..5eb8ac5a80 100644
--- a/target/hexagon/idef-parser/idef-parser.lex
+++ b/target/hexagon/idef-parser/idef-parser.lex
@@ -5,7 +5,7 @@
 
 %{
 /*
- *  Copyright(c) 2019-2022 rev.ng Labs Srl. All Rights Reserved.
+ *  Copyright(c) 2019-2023 rev.ng Labs Srl. All Rights Reserved.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -140,8 +140,6 @@ STRING_LIT               \"(\\.|[^"\\])*\"
                            yylval->rvalue.is_dotnew = true;
                            yylval->rvalue.signedness = SIGNED;
                            return PRED; }
-"IV1DEAD()"              |
-"fPAUSE(uiV);"           { return ';'; }
 "+="                     { return INC; }
 "-="                     { return DEC; }
 "++"                     { return PLUSPLUS; }
@@ -159,9 +157,8 @@ STRING_LIT               \"(\\.|[^"\\])*\"
 "else"                   { return ELSE; }
 "for"                    { return FOR; }
 "fREAD_IREG"             { return ICIRC; }
-"fPART1"                 { return PART1; }
 "if"                     { return IF; }
-"fFRAME_SCRAMBLE"        { return FSCR; }
+"fFRAME_SCRAMBLE"        |
 "fFRAME_UNSCRAMBLE"      { return FSCR; }
 "fFRAMECHECK"            { return FCHK; }
 "Constant_extended"      { return CONSTEXT; }
@@ -312,14 +309,10 @@ STRING_LIT               \"(\\.|[^"\\])*\"
 "(unsigned int)"         { yylval->cast.bit_width = 32;
                            yylval->cast.signedness = UNSIGNED;
                            return CAST; }
-"fREAD_PC()"             |
-"PC"                     { return PC; }
-"fREAD_NPC()"            |
-"NPC"                    { return NPC; }
-"fGET_LPCFG"             |
+"fREAD_PC()"             { return PC; }
 "USR.LPCFG"              { return LPCFG; }
 "LOAD_CANCEL(EA)"        { return LOAD_CANCEL; }
-"STORE_CANCEL(EA)"       |
+"STORE_CANCEL(EA)"       { return STORE_CANCEL; }
 "CANCEL"                 { return CANCEL; }
 "N"{LOWER_ID}"N"         { yylval->rvalue.type = REGISTER_ARG;
                            yylval->rvalue.reg.type = DOTNEW;
@@ -360,14 +353,6 @@ STRING_LIT               \"(\\.|[^"\\])*\"
                            yylval->rvalue.bit_width = 32;
                            yylval->rvalue.signedness = UNSIGNED;
                            return REG; }
-"fREAD_LC"[01]           { yylval->rvalue.type = REGISTER;
-                           yylval->rvalue.reg.type = CONTROL;
-                           yylval->rvalue.reg.id = HEX_REG_LC0
-                                                 + (yytext[8] - '0') * 2;
-                           yylval->rvalue.reg.bit_width = 32;
-                           yylval->rvalue.bit_width = 32;
-                           yylval->rvalue.signedness = UNSIGNED;
-                           return REG; }
 "LC"[01]                 { yylval->rvalue.type = REGISTER;
                            yylval->rvalue.reg.type = CONTROL;
                            yylval->rvalue.reg.id = HEX_REG_LC0
@@ -376,14 +361,6 @@ STRING_LIT               \"(\\.|[^"\\])*\"
                            yylval->rvalue.bit_width = 32;
                            yylval->rvalue.signedness = UNSIGNED;
                            return REG; }
-"fREAD_SA"[01]           { yylval->rvalue.type = REGISTER;
-                           yylval->rvalue.reg.type = CONTROL;
-                           yylval->rvalue.reg.id = HEX_REG_SA0
-                                                 + (yytext[8] - '0') * 2;
-                           yylval->rvalue.reg.bit_width = 32;
-                           yylval->rvalue.bit_width = 32;
-                           yylval->rvalue.signedness = UNSIGNED;
-                           return REG; }
 "SA"[01]                 { yylval->rvalue.type = REGISTER;
                            yylval->rvalue.reg.type = CONTROL;
                            yylval->rvalue.reg.id = HEX_REG_SA0
diff --git a/target/hexagon/idef-parser/idef-parser.y b/target/hexagon/idef-parser/idef-parser.y
index c784726d41..7d05773b67 100644
--- a/target/hexagon/idef-parser/idef-parser.y
+++ b/target/hexagon/idef-parser/idef-parser.y
@@ -1,6 +1,6 @@
 %{
 /*
- *  Copyright(c) 2019-2022 rev.ng Labs Srl. All Rights Reserved.
+ *  Copyright(c) 2019-2023 rev.ng Labs Srl. All Rights Reserved.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -52,8 +52,8 @@
 %token IN INAME VAR
 %token ABS CROUND ROUND CIRCADD COUNTONES INC DEC ANDA ORA XORA PLUSPLUS ASL
 %token ASR LSR EQ NEQ LTE GTE MIN MAX ANDL FOR ICIRC IF MUN FSCR FCHK SXT
-%token ZXT CONSTEXT LOCNT BREV SIGN LOAD STORE PC NPC LPCFG
-%token LOAD_CANCEL CANCEL IDENTITY PART1 ROTL INSBITS SETBITS EXTRANGE
+%token ZXT CONSTEXT LOCNT BREV SIGN LOAD STORE PC LPCFG
+%token LOAD_CANCEL STORE_CANCEL CANCEL IDENTITY ROTL INSBITS SETBITS EXTRANGE
 %token CAST4_8U FAIL CARRY_FROM_ADD ADDSAT64 LSBNEW
 %token TYPE_SIZE_T TYPE_INT TYPE_SIGNED TYPE_UNSIGNED TYPE_LONG
 
@@ -336,15 +336,6 @@ assign_statement : lvalue '=' rvalue
                        OUT(c, &@1, &$1, " = ", &$3, ";\n");
                        $$ = $1;
                    }
-                 | PC '=' rvalue
-                   {
-                       @1.last_column = @3.last_column;
-                       yyassert(c, &@1, !is_inside_ternary(c),
-                                "Assignment side-effect not modeled!");
-                       $3 = gen_rvalue_truncate(c, &@1, &$3);
-                       $3 = rvalue_materialize(c, &@1, &$3);
-                       OUT(c, &@1, "gen_write_new_pc(", &$3, ");\n");
-                   }
                  | LOAD '(' IMM ',' IMM ',' SIGN ',' var ',' lvalue ')'
                    {
                        @1.last_column = @12.last_column;
@@ -412,7 +403,6 @@ control_statement : frame_check
                   | cancel_statement
                   | if_statement
                   | for_statement
-                  | fpart1_statement
                   ;
 
 frame_check : FCHK '(' rvalue ',' rvalue ')' ';'
@@ -422,10 +412,11 @@ cancel_statement : LOAD_CANCEL
                    {
                        gen_load_cancel(c, &@1);
                    }
-                 | CANCEL
+                 | STORE_CANCEL
                    {
                        gen_cancel(c, &@1);
                    }
+                 | CANCEL
                  ;
 
 if_statement : if_stmt
@@ -462,17 +453,6 @@ for_statement : FOR '(' IMM '=' IMM ';' IMM '<' IMM ';' IMM PLUSPLUS ')'
                 }
               ;
 
-fpart1_statement : PART1
-                   {
-                       OUT(c, &@1, "if (insn->part1) {\n");
-                   }
-                   '(' statements ')'
-                   {
-                       @1.last_column = @3.last_column;
-                       OUT(c, &@1, "return; }\n");
-                   }
-                 ;
-
 if_stmt : IF '(' rvalue ')'
           {
               @1.last_column = @3.last_column;
@@ -512,20 +492,6 @@ rvalue : FAIL
              rvalue.signedness = UNSIGNED;
              $$ = rvalue;
          }
-       | NPC
-         {
-             /*
-              * NPC is only read from CALLs, so we can hardcode it
-              * at translation time
-              */
-             HexValue rvalue;
-             memset(&rvalue, 0, sizeof(HexValue));
-             rvalue.type = IMMEDIATE;
-             rvalue.imm.type = IMM_NPC;
-             rvalue.bit_width = 32;
-             rvalue.signedness = UNSIGNED;
-             $$ = rvalue;
-         }
        | CONSTEXT
          {
              HexValue rvalue;
@@ -781,11 +747,6 @@ rvalue : FAIL
              /* Ones count */
              $$ = gen_ctpop_op(c, &@1, &$3);
          }
-       | LPCFG
-         {
-             $$ = gen_tmp(c, &@1, 32, UNSIGNED);
-             OUT(c, &@1, "GET_USR_FIELD(USR_LPCFG, ", &$$, ");\n");
-         }
        | EXTRACT '(' rvalue ',' rvalue ')'
          {
              @1.last_column = @6.last_column;
diff --git a/target/hexagon/idef-parser/macros.inc b/target/hexagon/idef-parser/macros.inc
index 6b697da87a..7478d4db17 100644
--- a/target/hexagon/idef-parser/macros.inc
+++ b/target/hexagon/idef-parser/macros.inc
@@ -97,16 +97,8 @@
 #define fWRITE_LR(A) (LR = A)
 #define fWRITE_FP(A) (FP = A)
 #define fWRITE_SP(A) (SP = A)
-/*
- * Note: There is a rule in the parser that matches `PC = ...` and emits
- * a call to `gen_write_new_pc`. We need to call `gen_write_new_pc` to
- * get the correct semantics when there are multiple stores in a packet.
- */
-#define fBRANCH(LOC, TYPE) (PC = LOC)
-#define fJUMPR(REGNO, TARGET, TYPE) (PC = TARGET)
 #define fWRITE_LOOP_REGS0(START, COUNT) SA0 = START; (LC0 = COUNT)
 #define fWRITE_LOOP_REGS1(START, COUNT) SA1 = START; (LC1 = COUNT)
-#define fWRITE_LC0(VAL) (LC0 = VAL)
 #define fWRITE_LC1(VAL) (LC1 = VAL)
 #define fSET_LPCFG(VAL) (USR.LPCFG = VAL)
 #define fWRITE_P0(VAL) P0 = VAL;
@@ -121,7 +113,6 @@
 #define fEA_GPI(IMM) (EA = fREAD_GP() + IMM)
 #define fPM_I(REG, IMM) (REG = REG + IMM)
 #define fPM_M(REG, MVAL) (REG = REG + MVAL)
-#define fWRITE_NPC(VAL) (PC = VAL)
 
 /* Unary operators */
 #define fROUND(A) (A + 0x8000)
diff --git a/target/hexagon/idef-parser/parser-helpers.c b/target/hexagon/idef-parser/parser-helpers.c
index e1a55412c8..18cde6a1be 100644
--- a/target/hexagon/idef-parser/parser-helpers.c
+++ b/target/hexagon/idef-parser/parser-helpers.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2022 rev.ng Labs Srl. All Rights Reserved.
+ *  Copyright(c) 2019-2023 rev.ng Labs Srl. All Rights Reserved.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -185,9 +185,6 @@ void imm_print(Context *c, YYLTYPE *locp, HexImm *imm)
     case IMM_PC:
         EMIT(c, "ctx->base.pc_next");
         break;
-    case IMM_NPC:
-        EMIT(c, "ctx->npc");
-        break;
     case IMM_CONSTEXT:
         EMIT(c, "insn->extension_valid");
         break;
@@ -1323,10 +1320,6 @@ void gen_write_reg(Context *c, YYLTYPE *locp, HexValue *reg, HexValue *value)
         locp,
         "gen_log_reg_write(", &reg->reg.id, ", ",
         &value_m, ");\n");
-    OUT(c,
-        locp,
-        "ctx_log_reg_write(ctx, ", &reg->reg.id,
-        ");\n");
 }
 
 void gen_assign(Context *c,
@@ -1675,9 +1668,7 @@ void gen_inst_init_args(Context *c, YYLTYPE *locp)
     for (unsigned i = 0; i < c->inst.init_list->len; i++) {
         HexValue *val = &g_array_index(c->inst.init_list, HexValue, i);
         if (val->type == REGISTER_ARG) {
-            char reg_id[5];
-            reg_compose(c, locp, &val->reg, reg_id);
-            EMIT_HEAD(c, "tcg_gen_movi_i%u(%s, 0);\n", val->bit_width, reg_id);
+            /* Nothing to do here */
         } else if (val->type == PREDICATE) {
             char suffix = val->is_dotnew ? 'N' : 'V';
             EMIT_HEAD(c, "tcg_gen_movi_i%u(P%c%c, 0);\n", val->bit_width,
@@ -1722,13 +1713,10 @@ void gen_pred_assign(Context *c, YYLTYPE *locp, HexValue *left_pred,
         *left_pred = gen_tmp(c, locp, 32, UNSIGNED);
     }
     /* Extract first 8 bits, and store new predicate value */
-    OUT(c, locp, "tcg_gen_mov_i32(", left_pred, ", ", &r, ");\n");
-    OUT(c, locp, "tcg_gen_andi_i32(", left_pred, ", ", left_pred,
-        ", 0xff);\n");
+    OUT(c, locp, "tcg_gen_andi_i32(", left_pred, ", ", &r, ", 0xff);\n");
     if (is_direct) {
         OUT(c, locp, "gen_log_pred_write(ctx, ", pred_id, ", ", left_pred,
             ");\n");
-        OUT(c, locp, "ctx_log_pred_write(ctx, ", pred_id, ");\n");
     }
 }
 
@@ -1739,7 +1727,6 @@ void gen_cancel(Context *c, YYLTYPE *locp)
 
 void gen_load_cancel(Context *c, YYLTYPE *locp)
 {
-    gen_cancel(c, locp);
     OUT(c, locp, "if (insn->slot == 0 && pkt->pkt_has_store_s1) {\n");
     OUT(c, locp, "ctx->s1_store_processed = false;\n");
     OUT(c, locp, "process_store(ctx, 1);\n");
diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h
index 17facadaad..482a9c787f 100644
--- a/target/hexagon/macros.h
+++ b/target/hexagon/macros.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -205,26 +205,11 @@ static inline void gen_cancel(uint32_t slot)
 
 #define CANCEL gen_cancel(slot);
 #else
-#define CANCEL cancel_slot(env, slot)
+#define CANCEL do { } while (0)
 #endif
 
 #define LOAD_CANCEL(EA) do { CANCEL; } while (0)
 
-#ifdef QEMU_GENERATE
-static inline void gen_pred_cancel(TCGv pred, uint32_t slot_num)
- {
-    TCGv slot_mask = tcg_temp_new();
-    TCGv tmp = tcg_temp_new();
-    TCGv zero = tcg_constant_tl(0);
-    tcg_gen_ori_tl(slot_mask, hex_slot_cancelled, 1 << slot_num);
-    tcg_gen_andi_tl(tmp, pred, 1);
-    tcg_gen_movcond_tl(TCG_COND_EQ, hex_slot_cancelled, tmp, zero,
-                       slot_mask, hex_slot_cancelled);
-}
-#define PRED_LOAD_CANCEL(PRED, EA) \
-    gen_pred_cancel(PRED, insn->is_endloop ? 4 : insn->slot)
-#endif
-
 #define STORE_CANCEL(EA) { env->slot_cancelled |= (1 << slot); }
 
 #define fMAX(A, B) (((A) > (B)) ? (A) : (B))
@@ -415,16 +400,6 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift)
 #define fBRANCH(LOC, TYPE)          fWRITE_NPC(LOC)
 #define fJUMPR(REGNO, TARGET, TYPE) fBRANCH(TARGET, COF_TYPE_JUMPR)
 #define fHINTJR(TARGET) { /* Not modelled in qemu */}
-#define fCALL(A) \
-    do { \
-        fWRITE_LR(fREAD_NPC()); \
-        fBRANCH(A, COF_TYPE_CALL); \
-    } while (0)
-#define fCALLR(A) \
-    do { \
-        fWRITE_LR(fREAD_NPC()); \
-        fBRANCH(A, COF_TYPE_CALLR); \
-    } while (0)
 #define fWRITE_LOOP_REGS0(START, COUNT) \
     do { \
         WRITE_RREG(HEX_REG_LC0, COUNT);  \
diff --git a/target/hexagon/meson.build b/target/hexagon/meson.build
index 42b03c81e6..da8e608d00 100644
--- a/target/hexagon/meson.build
+++ b/target/hexagon/meson.build
@@ -1,5 +1,5 @@
 ##
-##  Copyright(c) 2020-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+##  Copyright(c) 2020-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
 ##
 ##  This program is free software; you can redistribute it and/or modify
 ##  it under the terms of the GNU General Public License as published by
@@ -276,4 +276,13 @@ tcg_funcs_generated = custom_target(
 )
 hexagon_ss.add(tcg_funcs_generated)
 
+analyze_funcs_generated = custom_target(
+    'analyze_funcs_generated.c.inc',
+    output: 'analyze_funcs_generated.c.inc',
+    depends: helper_dep,
+    depend_files: [hex_common_py, attribs_def, gen_tcg_h, gen_tcg_hvx_h],
+    command: [python, files('gen_analyze_funcs.py'), helper_in, '@OUTPUT@'],
+)
+hexagon_ss.add(analyze_funcs_generated)
+
 target_arch += {'hexagon': hexagon_ss}
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
index 35449ef524..c9a156030e 100644
--- a/target/hexagon/op_helper.c
+++ b/target/hexagon/op_helper.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -30,6 +30,7 @@
 #include "mmvec/mmvec.h"
 #include "mmvec/macros.h"
 #include "op_helper.h"
+#include "translate.h"
 
 #define SF_BIAS        127
 #define SF_MANTBITS    23
@@ -105,30 +106,6 @@ void log_store64(CPUHexagonState *env, target_ulong addr,
     env->mem_log_stores[slot].data64 = val;
 }
 
-void write_new_pc(CPUHexagonState *env, bool pkt_has_multi_cof,
-                         target_ulong addr)
-{
-    HEX_DEBUG_LOG("write_new_pc(0x" TARGET_FMT_lx ")\n", addr);
-
-    if (pkt_has_multi_cof) {
-        /*
-         * If more than one branch is taken in a packet, only the first one
-         * is actually done.
-         */
-        if (env->branch_taken) {
-            HEX_DEBUG_LOG("INFO: multiple branches taken in same packet, "
-                          "ignoring the second one\n");
-        } else {
-            fCHECK_PCALIGN(addr);
-            env->gpr[HEX_REG_PC] = addr;
-            env->branch_taken = 1;
-        }
-    } else {
-        fCHECK_PCALIGN(addr);
-        env->gpr[HEX_REG_PC] = addr;
-    }
-}
-
 /* Handy place to set a breakpoint */
 void HELPER(debug_start_packet)(CPUHexagonState *env)
 {
@@ -439,9 +416,10 @@ int32_t HELPER(vacsh_pred)(CPUHexagonState *env,
     return PeV;
 }
 
-static void probe_store(CPUHexagonState *env, int slot, int mmu_idx)
+static void probe_store(CPUHexagonState *env, int slot, int mmu_idx,
+                        bool is_predicated)
 {
-    if (!(env->slot_cancelled & (1 << slot))) {
+    if (!is_predicated || !(env->slot_cancelled & (1 << slot))) {
         size1u_t width = env->mem_log_stores[slot].width;
         target_ulong va = env->mem_log_stores[slot].va;
         uintptr_t ra = GETPC();
@@ -461,9 +439,12 @@ void HELPER(probe_noshuf_load)(CPUHexagonState *env, target_ulong va,
 }
 
 /* Called during packet commit when there are two scalar stores */
-void HELPER(probe_pkt_scalar_store_s0)(CPUHexagonState *env, int mmu_idx)
+void HELPER(probe_pkt_scalar_store_s0)(CPUHexagonState *env, int args)
 {
-    probe_store(env, 0, mmu_idx);
+    int mmu_idx = FIELD_EX32(args, PROBE_PKT_SCALAR_STORE_S0, MMU_IDX);
+    bool is_predicated =
+        FIELD_EX32(args, PROBE_PKT_SCALAR_STORE_S0, IS_PREDICATED);
+    probe_store(env, 0, mmu_idx, is_predicated);
 }
 
 void HELPER(probe_hvx_stores)(CPUHexagonState *env, int mmu_idx)
@@ -510,15 +491,18 @@ void HELPER(probe_hvx_stores)(CPUHexagonState *env, int mmu_idx)
 void HELPER(probe_pkt_scalar_hvx_stores)(CPUHexagonState *env, int mask,
                                          int mmu_idx)
 {
-    bool has_st0        = (mask >> 0) & 1;
-    bool has_st1        = (mask >> 1) & 1;
-    bool has_hvx_stores = (mask >> 2) & 1;
+    bool has_st0 = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST0);
+    bool has_st1 = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST1);
+    bool has_hvx_stores =
+        FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_HVX_STORES);
+    bool s0_is_pred = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, S0_IS_PRED);
+    bool s1_is_pred = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, S1_IS_PRED);
 
     if (has_st0) {
-        probe_store(env, 0, mmu_idx);
+        probe_store(env, 0, mmu_idx, s0_is_pred);
     }
     if (has_st1) {
-        probe_store(env, 1, mmu_idx);
+        probe_store(env, 1, mmu_idx, s1_is_pred);
     }
     if (has_hvx_stores) {
         HELPER(probe_hvx_stores)(env, mmu_idx);
@@ -1193,7 +1177,7 @@ float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV,
 {
     float32 neg_RsV;
     arch_fpop_start(env);
-    neg_RsV = float32_sub(float32_zero, RsV, &env->fp_status);
+    neg_RsV = float32_set_sign(RsV, float32_is_neg(RsV) ? 0 : 1);
     RxV = internal_fmafx(neg_RsV, RtV, RxV, 0, &env->fp_status);
     arch_fpop_end(env);
     return RxV;
@@ -1468,12 +1452,6 @@ void HELPER(vwhist128qm)(CPUHexagonState *env, int32_t uiV)
     }
 }
 
-void cancel_slot(CPUHexagonState *env, uint32_t slot)
-{
-    HEX_DEBUG_LOG("Slot %d cancelled\n", slot);
-    env->slot_cancelled |= (1 << slot);
-}
-
 /* These macros can be referenced in the generated helper functions */
 #define warn(...) /* Nothing */
 #define fatal(...) g_assert_not_reached();
diff --git a/target/hexagon/op_helper.h b/target/hexagon/op_helper.h
index 02347edee8..34b3a53975 100644
--- a/target/hexagon/op_helper.h
+++ b/target/hexagon/op_helper.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -19,7 +19,6 @@
 #define HEXAGON_OP_HELPER_H
 
 /* Misc functions */
-void cancel_slot(CPUHexagonState *env, uint32_t slot);
 void write_new_pc(CPUHexagonState *env, bool pkt_has_multi_cof, target_ulong addr);
 
 uint8_t mem_load1(CPUHexagonState *env, uint32_t slot, target_ulong vaddr);
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index 93fd1b55e3..665476ab48 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -29,6 +29,15 @@
 #include "translate.h"
 #include "printinsn.h"
 
+#include "analyze_funcs_generated.c.inc"
+
+typedef void (*AnalyzeInsn)(DisasContext *ctx);
+static const AnalyzeInsn opcode_analyze[XX_LAST_OPCODE] = {
+#define OPCODE(X)    [X] = analyze_##X
+#include "opcodes_def_generated.h.inc"
+#undef OPCODE
+};
+
 TCGv hex_gpr[TOTAL_PER_THREAD_REGS];
 TCGv hex_pred[NUM_PREGS];
 TCGv hex_this_PC;
@@ -47,8 +56,6 @@ TCGv hex_dczero_addr;
 TCGv hex_llsc_addr;
 TCGv hex_llsc_val;
 TCGv_i64 hex_llsc_val_i64;
-TCGv hex_VRegs_updated;
-TCGv hex_QRegs_updated;
 TCGv hex_vstore_addr[VSTORES_MAX];
 TCGv hex_vstore_size[VSTORES_MAX];
 TCGv hex_vstore_pending[VSTORES_MAX];
@@ -239,7 +246,15 @@ static bool check_for_attrib(Packet *pkt, int attrib)
 
 static bool need_slot_cancelled(Packet *pkt)
 {
-    return check_for_attrib(pkt, A_CONDEXEC);
+    /* We only need slot_cancelled for conditional store instructions */
+    for (int i = 0; i < pkt->num_insns; i++) {
+        uint16_t opcode = pkt->insn[i].opcode;
+        if (GET_ATTRIB(opcode, A_CONDEXEC) &&
+            GET_ATTRIB(opcode, A_SCALAR_STORE)) {
+            return true;
+        }
+    }
+    return false;
 }
 
 static bool need_pred_written(Packet *pkt)
@@ -265,6 +280,77 @@ static bool need_next_PC(DisasContext *ctx)
     return false;
 }
 
+/*
+ * The opcode_analyze functions mark most of the writes in a packet
+ * However, there are some implicit writes marked as attributes
+ * of the applicable instructions.
+ */
+static void mark_implicit_reg_write(DisasContext *ctx, int attrib, int rnum)
+{
+    uint16_t opcode = ctx->insn->opcode;
+    if (GET_ATTRIB(opcode, attrib)) {
+        /*
+         * USR is used to set overflow and FP exceptions,
+         * so treat it as conditional
+         */
+        bool is_predicated = GET_ATTRIB(opcode, A_CONDEXEC) ||
+                             rnum == HEX_REG_USR;
+
+        /* LC0/LC1 is conditionally written by endloop instructions */
+        if ((rnum == HEX_REG_LC0 || rnum == HEX_REG_LC1) &&
+            (opcode == J2_endloop0 ||
+             opcode == J2_endloop1 ||
+             opcode == J2_endloop01)) {
+            is_predicated = true;
+        }
+
+        ctx_log_reg_write(ctx, rnum, is_predicated);
+    }
+}
+
+static void mark_implicit_reg_writes(DisasContext *ctx)
+{
+    mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_FP,  HEX_REG_FP);
+    mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SP,  HEX_REG_SP);
+    mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LR,  HEX_REG_LR);
+    mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC0, HEX_REG_LC0);
+    mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA0, HEX_REG_SA0);
+    mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC1, HEX_REG_LC1);
+    mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA1, HEX_REG_SA1);
+    mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_USR, HEX_REG_USR);
+    mark_implicit_reg_write(ctx, A_FPOP, HEX_REG_USR);
+}
+
+static void mark_implicit_pred_write(DisasContext *ctx, int attrib, int pnum)
+{
+    if (GET_ATTRIB(ctx->insn->opcode, attrib)) {
+        ctx_log_pred_write(ctx, pnum);
+    }
+}
+
+static void mark_implicit_pred_writes(DisasContext *ctx)
+{
+    mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P0, 0);
+    mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P1, 1);
+    mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P2, 2);
+    mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P3, 3);
+}
+
+static void analyze_packet(DisasContext *ctx)
+{
+    Packet *pkt = ctx->pkt;
+    ctx->need_pkt_has_store_s1 = false;
+    for (int i = 0; i < pkt->num_insns; i++) {
+        Insn *insn = &pkt->insn[i];
+        ctx->insn = insn;
+        if (opcode_analyze[insn->opcode]) {
+            opcode_analyze[insn->opcode](ctx);
+        }
+        mark_implicit_reg_writes(ctx);
+        mark_implicit_pred_writes(ctx);
+    }
+}
+
 static void gen_start_packet(DisasContext *ctx)
 {
     Packet *pkt = ctx->pkt;
@@ -275,6 +361,7 @@ static void gen_start_packet(DisasContext *ctx)
     ctx->next_PC = next_PC;
     ctx->reg_log_idx = 0;
     bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS);
+    bitmap_zero(ctx->predicated_regs, TOTAL_PER_THREAD_REGS);
     ctx->preg_log_idx = 0;
     bitmap_zero(ctx->pregs_written, NUM_PREGS);
     ctx->future_vregs_idx = 0;
@@ -283,14 +370,27 @@ static void gen_start_packet(DisasContext *ctx)
     bitmap_zero(ctx->vregs_updated_tmp, NUM_VREGS);
     bitmap_zero(ctx->vregs_updated, NUM_VREGS);
     bitmap_zero(ctx->vregs_select, NUM_VREGS);
+    bitmap_zero(ctx->predicated_future_vregs, NUM_VREGS);
+    bitmap_zero(ctx->predicated_tmp_vregs, NUM_VREGS);
     ctx->qreg_log_idx = 0;
     for (i = 0; i < STORES_MAX; i++) {
         ctx->store_width[i] = 0;
     }
-    tcg_gen_movi_tl(hex_pkt_has_store_s1, pkt->pkt_has_store_s1);
     ctx->s1_store_processed = false;
     ctx->pre_commit = true;
 
+    analyze_packet(ctx);
+
+    if (ctx->need_pkt_has_store_s1) {
+        tcg_gen_movi_tl(hex_pkt_has_store_s1, pkt->pkt_has_store_s1);
+    }
+
+    /*
+     * pregs_written is used both in the analyze phase as well as the code
+     * gen phase, so clear it again.
+     */
+    bitmap_zero(ctx->pregs_written, NUM_PREGS);
+
     if (HEX_DEBUG) {
         /* Handy place to set a breakpoint before the packet executes */
         gen_helper_debug_start_packet(cpu_env);
@@ -313,9 +413,42 @@ static void gen_start_packet(DisasContext *ctx)
         tcg_gen_movi_tl(hex_pred_written, 0);
     }
 
-    if (pkt->pkt_has_hvx) {
-        tcg_gen_movi_tl(hex_VRegs_updated, 0);
-        tcg_gen_movi_tl(hex_QRegs_updated, 0);
+    /* Preload the predicated registers into hex_new_value[i] */
+    if (!bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) {
+        int i = find_first_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS);
+        while (i < TOTAL_PER_THREAD_REGS) {
+            tcg_gen_mov_tl(hex_new_value[i], hex_gpr[i]);
+            i = find_next_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS,
+                              i + 1);
+        }
+    }
+
+    /* Preload the predicated HVX registers into future_VRegs and tmp_VRegs */
+    if (!bitmap_empty(ctx->predicated_future_vregs, NUM_VREGS)) {
+        int i = find_first_bit(ctx->predicated_future_vregs, NUM_VREGS);
+        while (i < NUM_VREGS) {
+            const intptr_t VdV_off =
+                ctx_future_vreg_off(ctx, i, 1, true);
+            intptr_t src_off = offsetof(CPUHexagonState, VRegs[i]);
+            tcg_gen_gvec_mov(MO_64, VdV_off,
+                             src_off,
+                             sizeof(MMVector),
+                             sizeof(MMVector));
+            i = find_next_bit(ctx->predicated_future_vregs, NUM_VREGS, i + 1);
+        }
+    }
+    if (!bitmap_empty(ctx->predicated_tmp_vregs, NUM_VREGS)) {
+        int i = find_first_bit(ctx->predicated_tmp_vregs, NUM_VREGS);
+        while (i < NUM_VREGS) {
+            const intptr_t VdV_off =
+                ctx_tmp_vreg_off(ctx, i, 1, true);
+            intptr_t src_off = offsetof(CPUHexagonState, VRegs[i]);
+            tcg_gen_gvec_mov(MO_64, VdV_off,
+                             src_off,
+                             sizeof(MMVector),
+                             sizeof(MMVector));
+            i = find_next_bit(ctx->predicated_tmp_vregs, NUM_VREGS, i + 1);
+        }
     }
 }
 
@@ -336,66 +469,6 @@ bool is_gather_store_insn(DisasContext *ctx)
     return false;
 }
 
-/*
- * The LOG_*_WRITE macros mark most of the writes in a packet
- * However, there are some implicit writes marked as attributes
- * of the applicable instructions.
- */
-static void mark_implicit_reg_write(DisasContext *ctx, int attrib, int rnum)
-{
-    uint16_t opcode = ctx->insn->opcode;
-    if (GET_ATTRIB(opcode, attrib)) {
-        /*
-         * USR is used to set overflow and FP exceptions,
-         * so treat it as conditional
-         */
-        bool is_predicated = GET_ATTRIB(opcode, A_CONDEXEC) ||
-                             rnum == HEX_REG_USR;
-
-        /* LC0/LC1 is conditionally written by endloop instructions */
-        if ((rnum == HEX_REG_LC0 || rnum == HEX_REG_LC1) &&
-            (opcode == J2_endloop0 ||
-             opcode == J2_endloop1 ||
-             opcode == J2_endloop01)) {
-            is_predicated = true;
-        }
-
-        if (is_predicated && !is_preloaded(ctx, rnum)) {
-            tcg_gen_mov_tl(hex_new_value[rnum], hex_gpr[rnum]);
-        }
-
-        ctx_log_reg_write(ctx, rnum);
-    }
-}
-
-static void mark_implicit_pred_write(DisasContext *ctx, int attrib, int pnum)
-{
-    if (GET_ATTRIB(ctx->insn->opcode, attrib)) {
-        ctx_log_pred_write(ctx, pnum);
-    }
-}
-
-static void mark_implicit_reg_writes(DisasContext *ctx)
-{
-    mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_FP,  HEX_REG_FP);
-    mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SP,  HEX_REG_SP);
-    mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LR,  HEX_REG_LR);
-    mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC0, HEX_REG_LC0);
-    mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA0, HEX_REG_SA0);
-    mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC1, HEX_REG_LC1);
-    mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA1, HEX_REG_SA1);
-    mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_USR, HEX_REG_USR);
-    mark_implicit_reg_write(ctx, A_FPOP, HEX_REG_USR);
-}
-
-static void mark_implicit_pred_writes(DisasContext *ctx)
-{
-    mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P0, 0);
-    mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P1, 1);
-    mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P2, 2);
-    mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P3, 3);
-}
-
 static void mark_store_width(DisasContext *ctx)
 {
     uint16_t opcode = ctx->insn->opcode;
@@ -423,9 +496,7 @@ static void mark_store_width(DisasContext *ctx)
 static void gen_insn(DisasContext *ctx)
 {
     if (ctx->insn->generate) {
-        mark_implicit_reg_writes(ctx);
         ctx->insn->generate(ctx);
-        mark_implicit_pred_writes(ctx);
         mark_store_width(ctx);
     } else {
         gen_exception_end_tb(ctx, HEX_EXCP_INVALID_OPCODE);
@@ -646,65 +717,31 @@ static void gen_commit_hvx(DisasContext *ctx)
     /*
      *    for (i = 0; i < ctx->vreg_log_idx; i++) {
      *        int rnum = ctx->vreg_log[i];
-     *        if (ctx->vreg_is_predicated[i]) {
-     *            if (env->VRegs_updated & (1 << rnum)) {
-     *                env->VRegs[rnum] = env->future_VRegs[rnum];
-     *            }
-     *        } else {
-     *            env->VRegs[rnum] = env->future_VRegs[rnum];
-     *        }
+     *        env->VRegs[rnum] = env->future_VRegs[rnum];
      *    }
      */
     for (i = 0; i < ctx->vreg_log_idx; i++) {
         int rnum = ctx->vreg_log[i];
-        bool is_predicated = ctx->vreg_is_predicated[i];
         intptr_t dstoff = offsetof(CPUHexagonState, VRegs[rnum]);
         intptr_t srcoff = ctx_future_vreg_off(ctx, rnum, 1, false);
         size_t size = sizeof(MMVector);
 
-        if (is_predicated) {
-            TCGv cmp = tcg_temp_new();
-            TCGLabel *label_skip = gen_new_label();
-
-            tcg_gen_andi_tl(cmp, hex_VRegs_updated, 1 << rnum);
-            tcg_gen_brcondi_tl(TCG_COND_EQ, cmp, 0, label_skip);
-            tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
-            gen_set_label(label_skip);
-        } else {
-            tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
-        }
+        tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
     }
 
     /*
      *    for (i = 0; i < ctx->qreg_log_idx; i++) {
      *        int rnum = ctx->qreg_log[i];
-     *        if (ctx->qreg_is_predicated[i]) {
-     *            if (env->QRegs_updated) & (1 << rnum)) {
-     *                env->QRegs[rnum] = env->future_QRegs[rnum];
-     *            }
-     *        } else {
-     *            env->QRegs[rnum] = env->future_QRegs[rnum];
-     *        }
+     *        env->QRegs[rnum] = env->future_QRegs[rnum];
      *    }
      */
     for (i = 0; i < ctx->qreg_log_idx; i++) {
         int rnum = ctx->qreg_log[i];
-        bool is_predicated = ctx->qreg_is_predicated[i];
         intptr_t dstoff = offsetof(CPUHexagonState, QRegs[rnum]);
         intptr_t srcoff = offsetof(CPUHexagonState, future_QRegs[rnum]);
         size_t size = sizeof(MMQReg);
 
-        if (is_predicated) {
-            TCGv cmp = tcg_temp_new();
-            TCGLabel *label_skip = gen_new_label();
-
-            tcg_gen_andi_tl(cmp, hex_QRegs_updated, 1 << rnum);
-            tcg_gen_brcondi_tl(TCG_COND_EQ, cmp, 0, label_skip);
-            tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
-            gen_set_label(label_skip);
-        } else {
-            tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
-        }
+        tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
     }
 
     if (pkt_has_hvx_store(ctx->pkt)) {
@@ -775,13 +812,27 @@ static void gen_commit_packet(DisasContext *ctx)
             TCGv mask_tcgv;
 
             if (has_store_s0) {
-                mask |= (1 << 0);
+                mask =
+                    FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST0, 1);
             }
             if (has_store_s1) {
-                mask |= (1 << 1);
+                mask =
+                    FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST1, 1);
             }
             if (has_hvx_store) {
-                mask |= (1 << 2);
+                mask =
+                    FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES,
+                               HAS_HVX_STORES, 1);
+            }
+            if (has_store_s0 && slot_is_predicated(pkt, 0)) {
+                mask =
+                    FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES,
+                               S0_IS_PRED, 1);
+            }
+            if (has_store_s1 && slot_is_predicated(pkt, 1)) {
+                mask =
+                    FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES,
+                               S1_IS_PRED, 1);
             }
             mask_tcgv = tcg_constant_tl(mask);
             gen_helper_probe_pkt_scalar_hvx_stores(cpu_env, mask_tcgv, mem_idx);
@@ -791,8 +842,15 @@ static void gen_commit_packet(DisasContext *ctx)
          * process_store_log will execute the slot 1 store first,
          * so we only have to probe the store in slot 0
          */
-        TCGv mem_idx = tcg_constant_tl(ctx->mem_idx);
-        gen_helper_probe_pkt_scalar_store_s0(cpu_env, mem_idx);
+        int args = 0;
+        args =
+            FIELD_DP32(args, PROBE_PKT_SCALAR_STORE_S0, MMU_IDX, ctx->mem_idx);
+        if (slot_is_predicated(pkt, 0)) {
+            args =
+                FIELD_DP32(args, PROBE_PKT_SCALAR_STORE_S0, IS_PREDICATED, 1);
+        }
+        TCGv args_tcgv = tcg_constant_tl(args);
+        gen_helper_probe_pkt_scalar_store_s0(cpu_env, args_tcgv);
     }
 
     process_store_log(ctx);
@@ -1029,10 +1087,6 @@ void hexagon_translate_init(void)
         offsetof(CPUHexagonState, llsc_val), "llsc_val");
     hex_llsc_val_i64 = tcg_global_mem_new_i64(cpu_env,
         offsetof(CPUHexagonState, llsc_val_i64), "llsc_val_i64");
-    hex_VRegs_updated = tcg_global_mem_new(cpu_env,
-        offsetof(CPUHexagonState, VRegs_updated), "VRegs_updated");
-    hex_QRegs_updated = tcg_global_mem_new(cpu_env,
-        offsetof(CPUHexagonState, QRegs_updated), "QRegs_updated");
     for (i = 0; i < STORES_MAX; i++) {
         snprintf(store_addr_names[i], NAME_LEN, "store_addr_%d", i);
         hex_store_addr[i] = tcg_global_mem_new(cpu_env,
diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index d971f4f095..db832b0f88 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -38,6 +38,7 @@ typedef struct DisasContext {
     int reg_log[REG_WRITES_MAX];
     int reg_log_idx;
     DECLARE_BITMAP(regs_written, TOTAL_PER_THREAD_REGS);
+    DECLARE_BITMAP(predicated_regs, TOTAL_PER_THREAD_REGS);
     int preg_log[PRED_WRITES_MAX];
     int preg_log_idx;
     DECLARE_BITMAP(pregs_written, NUM_PREGS);
@@ -48,52 +49,54 @@ typedef struct DisasContext {
     int tmp_vregs_idx;
     int tmp_vregs_num[VECTOR_TEMPS_MAX];
     int vreg_log[NUM_VREGS];
-    bool vreg_is_predicated[NUM_VREGS];
     int vreg_log_idx;
     DECLARE_BITMAP(vregs_updated_tmp, NUM_VREGS);
     DECLARE_BITMAP(vregs_updated, NUM_VREGS);
     DECLARE_BITMAP(vregs_select, NUM_VREGS);
+    DECLARE_BITMAP(predicated_future_vregs, NUM_VREGS);
+    DECLARE_BITMAP(predicated_tmp_vregs, NUM_VREGS);
     int qreg_log[NUM_QREGS];
-    bool qreg_is_predicated[NUM_QREGS];
     int qreg_log_idx;
     bool pre_commit;
     TCGCond branch_cond;
     target_ulong branch_dest;
     bool is_tight_loop;
+    bool need_pkt_has_store_s1;
 } DisasContext;
 
-static inline void ctx_log_reg_write(DisasContext *ctx, int rnum)
-{
-    if (test_bit(rnum, ctx->regs_written)) {
-        HEX_DEBUG_LOG("WARNING: Multiple writes to r%d\n", rnum);
-    }
-    ctx->reg_log[ctx->reg_log_idx] = rnum;
-    ctx->reg_log_idx++;
-    set_bit(rnum, ctx->regs_written);
-}
-
-static inline void ctx_log_reg_write_pair(DisasContext *ctx, int rnum)
-{
-    ctx_log_reg_write(ctx, rnum);
-    ctx_log_reg_write(ctx, rnum + 1);
-}
-
 static inline void ctx_log_pred_write(DisasContext *ctx, int pnum)
 {
-    ctx->preg_log[ctx->preg_log_idx] = pnum;
-    ctx->preg_log_idx++;
-    set_bit(pnum, ctx->pregs_written);
+    if (!test_bit(pnum, ctx->pregs_written)) {
+        ctx->preg_log[ctx->preg_log_idx] = pnum;
+        ctx->preg_log_idx++;
+        set_bit(pnum, ctx->pregs_written);
+    }
 }
 
-static inline bool is_preloaded(DisasContext *ctx, int num)
+static inline void ctx_log_reg_write(DisasContext *ctx, int rnum,
+                                     bool is_predicated)
 {
-    return test_bit(num, ctx->regs_written);
+    if (rnum == HEX_REG_P3_0_ALIASED) {
+        for (int i = 0; i < NUM_PREGS; i++) {
+            ctx_log_pred_write(ctx, i);
+        }
+    } else {
+        if (!test_bit(rnum, ctx->regs_written)) {
+            ctx->reg_log[ctx->reg_log_idx] = rnum;
+            ctx->reg_log_idx++;
+            set_bit(rnum, ctx->regs_written);
+        }
+        if (is_predicated) {
+            set_bit(rnum, ctx->predicated_regs);
+        }
+    }
 }
 
-static inline bool is_vreg_preloaded(DisasContext *ctx, int num)
+static inline void ctx_log_reg_write_pair(DisasContext *ctx, int rnum,
+                                          bool is_predicated)
 {
-    return test_bit(num, ctx->vregs_updated) ||
-           test_bit(num, ctx->vregs_updated_tmp);
+    ctx_log_reg_write(ctx, rnum, is_predicated);
+    ctx_log_reg_write(ctx, rnum + 1, is_predicated);
 }
 
 intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum,
@@ -106,17 +109,25 @@ static inline void ctx_log_vreg_write(DisasContext *ctx,
                                       bool is_predicated)
 {
     if (type != EXT_TMP) {
-        ctx->vreg_log[ctx->vreg_log_idx] = rnum;
-        ctx->vreg_is_predicated[ctx->vreg_log_idx] = is_predicated;
-        ctx->vreg_log_idx++;
+        if (!test_bit(rnum, ctx->vregs_updated)) {
+            ctx->vreg_log[ctx->vreg_log_idx] = rnum;
+            ctx->vreg_log_idx++;
+            set_bit(rnum, ctx->vregs_updated);
+        }
 
         set_bit(rnum, ctx->vregs_updated);
+        if (is_predicated) {
+            set_bit(rnum, ctx->predicated_future_vregs);
+        }
     }
     if (type == EXT_NEW) {
         set_bit(rnum, ctx->vregs_select);
     }
     if (type == EXT_TMP) {
         set_bit(rnum, ctx->vregs_updated_tmp);
+        if (is_predicated) {
+            set_bit(rnum, ctx->predicated_tmp_vregs);
+        }
     }
 }
 
@@ -129,10 +140,9 @@ static inline void ctx_log_vreg_write_pair(DisasContext *ctx,
 }
 
 static inline void ctx_log_qreg_write(DisasContext *ctx,
-                                      int rnum, bool is_predicated)
+                                      int rnum)
 {
     ctx->qreg_log[ctx->qreg_log_idx] = rnum;
-    ctx->qreg_is_predicated[ctx->qreg_log_idx] = is_predicated;
     ctx->qreg_log_idx++;
 }
 
@@ -153,12 +163,20 @@ extern TCGv hex_dczero_addr;
 extern TCGv hex_llsc_addr;
 extern TCGv hex_llsc_val;
 extern TCGv_i64 hex_llsc_val_i64;
-extern TCGv hex_VRegs_updated;
-extern TCGv hex_QRegs_updated;
 extern TCGv hex_vstore_addr[VSTORES_MAX];
 extern TCGv hex_vstore_size[VSTORES_MAX];
 extern TCGv hex_vstore_pending[VSTORES_MAX];
 
 bool is_gather_store_insn(DisasContext *ctx);
 void process_store(DisasContext *ctx, int slot_num);
+
+FIELD(PROBE_PKT_SCALAR_STORE_S0, MMU_IDX,       0, 2)
+FIELD(PROBE_PKT_SCALAR_STORE_S0, IS_PREDICATED, 2, 1)
+
+FIELD(PROBE_PKT_SCALAR_HVX_STORES, HAS_ST0,        0, 1)
+FIELD(PROBE_PKT_SCALAR_HVX_STORES, HAS_ST1,        1, 1)
+FIELD(PROBE_PKT_SCALAR_HVX_STORES, HAS_HVX_STORES, 2, 1)
+FIELD(PROBE_PKT_SCALAR_HVX_STORES, S0_IS_PRED,     3, 1)
+FIELD(PROBE_PKT_SCALAR_HVX_STORES, S1_IS_PRED,     4, 1)
+
 #endif
diff --git a/tests/data/acpi/pc/DSDT b/tests/data/acpi/pc/DSDT
index 0b475fb5a9..32d255cfc0 100644
--- a/tests/data/acpi/pc/DSDT
+++ b/tests/data/acpi/pc/DSDT
Binary files differdiff --git a/tests/data/acpi/pc/DSDT.acpierst b/tests/data/acpi/pc/DSDT.acpierst
index 17ef7caeb6..33e872b2fa 100644
--- a/tests/data/acpi/pc/DSDT.acpierst
+++ b/tests/data/acpi/pc/DSDT.acpierst
Binary files differdiff --git a/tests/data/acpi/pc/DSDT.acpihmat b/tests/data/acpi/pc/DSDT.acpihmat
index 675b674eaa..cd84abc1b1 100644
--- a/tests/data/acpi/pc/DSDT.acpihmat
+++ b/tests/data/acpi/pc/DSDT.acpihmat
Binary files differdiff --git a/tests/data/acpi/pc/DSDT.bridge b/tests/data/acpi/pc/DSDT.bridge
index c1ce061366..69a73ea2a6 100644
--- a/tests/data/acpi/pc/DSDT.bridge
+++ b/tests/data/acpi/pc/DSDT.bridge
Binary files differdiff --git a/tests/data/acpi/pc/DSDT.cphp b/tests/data/acpi/pc/DSDT.cphp
index 754ab854dc..20379056b3 100644
--- a/tests/data/acpi/pc/DSDT.cphp
+++ b/tests/data/acpi/pc/DSDT.cphp
Binary files differdiff --git a/tests/data/acpi/pc/DSDT.dimmpxm b/tests/data/acpi/pc/DSDT.dimmpxm
index 170503336b..435496e836 100644
--- a/tests/data/acpi/pc/DSDT.dimmpxm
+++ b/tests/data/acpi/pc/DSDT.dimmpxm
Binary files differdiff --git a/tests/data/acpi/pc/DSDT.hpbridge b/tests/data/acpi/pc/DSDT.hpbridge
index 834c27002e..b6eafab250 100644
--- a/tests/data/acpi/pc/DSDT.hpbridge
+++ b/tests/data/acpi/pc/DSDT.hpbridge
Binary files differdiff --git a/tests/data/acpi/pc/DSDT.hpbrroot b/tests/data/acpi/pc/DSDT.hpbrroot
index a71ed4fbaa..a4073f36d6 100644
--- a/tests/data/acpi/pc/DSDT.hpbrroot
+++ b/tests/data/acpi/pc/DSDT.hpbrroot
Binary files differdiff --git a/tests/data/acpi/pc/DSDT.ipmikcs b/tests/data/acpi/pc/DSDT.ipmikcs
index dd71356027..06aa7bfdec 100644
--- a/tests/data/acpi/pc/DSDT.ipmikcs
+++ b/tests/data/acpi/pc/DSDT.ipmikcs
Binary files differdiff --git a/tests/data/acpi/pc/DSDT.memhp b/tests/data/acpi/pc/DSDT.memhp
index 2f895e9b38..10a0e44d61 100644
--- a/tests/data/acpi/pc/DSDT.memhp
+++ b/tests/data/acpi/pc/DSDT.memhp
Binary files differdiff --git a/tests/data/acpi/pc/DSDT.nohpet b/tests/data/acpi/pc/DSDT.nohpet
index c012b63ace..6905312d82 100644
--- a/tests/data/acpi/pc/DSDT.nohpet
+++ b/tests/data/acpi/pc/DSDT.nohpet
Binary files differdiff --git a/tests/data/acpi/pc/DSDT.numamem b/tests/data/acpi/pc/DSDT.numamem
index f2ef4b9729..59e31338ee 100644
--- a/tests/data/acpi/pc/DSDT.numamem
+++ b/tests/data/acpi/pc/DSDT.numamem
Binary files differdiff --git a/tests/data/acpi/pc/DSDT.roothp b/tests/data/acpi/pc/DSDT.roothp
index 657c8263f0..448d596cf4 100644
--- a/tests/data/acpi/pc/DSDT.roothp
+++ b/tests/data/acpi/pc/DSDT.roothp
Binary files differdiff --git a/tests/data/acpi/q35/DSDT b/tests/data/acpi/q35/DSDT
index d68c472b46..720e8cbbbb 100644
--- a/tests/data/acpi/q35/DSDT
+++ b/tests/data/acpi/q35/DSDT
Binary files differdiff --git a/tests/data/acpi/q35/DSDT.acpierst b/tests/data/acpi/q35/DSDT.acpierst
index de7ae27125..f26b1f2a19 100644
--- a/tests/data/acpi/q35/DSDT.acpierst
+++ b/tests/data/acpi/q35/DSDT.acpierst
Binary files differdiff --git a/tests/data/acpi/q35/DSDT.acpihmat b/tests/data/acpi/q35/DSDT.acpihmat
index 48e2862257..86771f1746 100644
--- a/tests/data/acpi/q35/DSDT.acpihmat
+++ b/tests/data/acpi/q35/DSDT.acpihmat
Binary files differdiff --git a/tests/data/acpi/q35/DSDT.acpihmat-noinitiator b/tests/data/acpi/q35/DSDT.acpihmat-noinitiator
index 30a4aa2ec8..a894a2d16c 100644
--- a/tests/data/acpi/q35/DSDT.acpihmat-noinitiator
+++ b/tests/data/acpi/q35/DSDT.acpihmat-noinitiator
Binary files differdiff --git a/tests/data/acpi/q35/DSDT.applesmc b/tests/data/acpi/q35/DSDT.applesmc
index 84e2b5cbc4..276ae1df51 100644
--- a/tests/data/acpi/q35/DSDT.applesmc
+++ b/tests/data/acpi/q35/DSDT.applesmc
Binary files differdiff --git a/tests/data/acpi/q35/DSDT.bridge b/tests/data/acpi/q35/DSDT.bridge
index e411d40fd1..9f8a208aaa 100644
--- a/tests/data/acpi/q35/DSDT.bridge
+++ b/tests/data/acpi/q35/DSDT.bridge
Binary files differdiff --git a/tests/data/acpi/q35/DSDT.core-count2 b/tests/data/acpi/q35/DSDT.core-count2
index 0603db8cc6..2ec11fe3c3 100644
--- a/tests/data/acpi/q35/DSDT.core-count2
+++ b/tests/data/acpi/q35/DSDT.core-count2
Binary files differdiff --git a/tests/data/acpi/q35/DSDT.cphp b/tests/data/acpi/q35/DSDT.cphp
index beeb83c33b..612c85b1b4 100644
--- a/tests/data/acpi/q35/DSDT.cphp
+++ b/tests/data/acpi/q35/DSDT.cphp
Binary files differdiff --git a/tests/data/acpi/q35/DSDT.cxl b/tests/data/acpi/q35/DSDT.cxl
index 4586b9a18b..f049f414f0 100644
--- a/tests/data/acpi/q35/DSDT.cxl
+++ b/tests/data/acpi/q35/DSDT.cxl
Binary files differdiff --git a/tests/data/acpi/q35/DSDT.dimmpxm b/tests/data/acpi/q35/DSDT.dimmpxm
index 99a93e12a7..23dabeacb0 100644
--- a/tests/data/acpi/q35/DSDT.dimmpxm
+++ b/tests/data/acpi/q35/DSDT.dimmpxm
Binary files differdiff --git a/tests/data/acpi/q35/DSDT.ipmibt b/tests/data/acpi/q35/DSDT.ipmibt
index 7f7601dbff..541bb70522 100644
--- a/tests/data/acpi/q35/DSDT.ipmibt
+++ b/tests/data/acpi/q35/DSDT.ipmibt
Binary files differdiff --git a/tests/data/acpi/q35/DSDT.ipmismbus b/tests/data/acpi/q35/DSDT.ipmismbus
index 6c5d1afe44..e2d57a3318 100644
--- a/tests/data/acpi/q35/DSDT.ipmismbus
+++ b/tests/data/acpi/q35/DSDT.ipmismbus
Binary files differdiff --git a/tests/data/acpi/q35/DSDT.ivrs b/tests/data/acpi/q35/DSDT.ivrs
index de7ae27125..f26b1f2a19 100644
--- a/tests/data/acpi/q35/DSDT.ivrs
+++ b/tests/data/acpi/q35/DSDT.ivrs
Binary files differdiff --git a/tests/data/acpi/q35/DSDT.memhp b/tests/data/acpi/q35/DSDT.memhp
index 79bce5c8f0..809d7e2f0f 100644
--- a/tests/data/acpi/q35/DSDT.memhp
+++ b/tests/data/acpi/q35/DSDT.memhp
Binary files differdiff --git a/tests/data/acpi/q35/DSDT.mmio64 b/tests/data/acpi/q35/DSDT.mmio64
index c249929add..ab3fe3c1b5 100644
--- a/tests/data/acpi/q35/DSDT.mmio64
+++ b/tests/data/acpi/q35/DSDT.mmio64
Binary files differdiff --git a/tests/data/acpi/q35/DSDT.multi-bridge b/tests/data/acpi/q35/DSDT.multi-bridge
index 66b39be294..9ae8ee0b41 100644
--- a/tests/data/acpi/q35/DSDT.multi-bridge
+++ b/tests/data/acpi/q35/DSDT.multi-bridge
Binary files differdiff --git a/tests/data/acpi/q35/DSDT.noacpihp b/tests/data/acpi/q35/DSDT.noacpihp
new file mode 100644
index 0000000000..6ab1f0e525
--- /dev/null
+++ b/tests/data/acpi/q35/DSDT.noacpihp
Binary files differdiff --git a/tests/data/acpi/q35/DSDT.nohpet b/tests/data/acpi/q35/DSDT.nohpet
index 9ff9983a80..becb5f7cad 100644
--- a/tests/data/acpi/q35/DSDT.nohpet
+++ b/tests/data/acpi/q35/DSDT.nohpet
Binary files differdiff --git a/tests/data/acpi/q35/DSDT.numamem b/tests/data/acpi/q35/DSDT.numamem
index 1e7c45ef3c..0cdec0b4c5 100644
--- a/tests/data/acpi/q35/DSDT.numamem
+++ b/tests/data/acpi/q35/DSDT.numamem
Binary files differdiff --git a/tests/data/acpi/q35/DSDT.pvpanic-isa b/tests/data/acpi/q35/DSDT.pvpanic-isa
index ed47451c44..6a9904ec94 100644
--- a/tests/data/acpi/q35/DSDT.pvpanic-isa
+++ b/tests/data/acpi/q35/DSDT.pvpanic-isa
Binary files differdiff --git a/tests/data/acpi/q35/DSDT.tis.tpm12 b/tests/data/acpi/q35/DSDT.tis.tpm12
index efc2efc19f..628bf628f6 100644
--- a/tests/data/acpi/q35/DSDT.tis.tpm12
+++ b/tests/data/acpi/q35/DSDT.tis.tpm12
Binary files differdiff --git a/tests/data/acpi/q35/DSDT.tis.tpm2 b/tests/data/acpi/q35/DSDT.tis.tpm2
index 675339715f..35c6b08068 100644
--- a/tests/data/acpi/q35/DSDT.tis.tpm2
+++ b/tests/data/acpi/q35/DSDT.tis.tpm2
Binary files differdiff --git a/tests/data/acpi/q35/DSDT.viot b/tests/data/acpi/q35/DSDT.viot
index eeb40b360f..3ad4d26b7f 100644
--- a/tests/data/acpi/q35/DSDT.viot
+++ b/tests/data/acpi/q35/DSDT.viot
Binary files differdiff --git a/tests/data/acpi/q35/DSDT.xapic b/tests/data/acpi/q35/DSDT.xapic
index 3aa86f0724..d4a34e2351 100644
--- a/tests/data/acpi/q35/DSDT.xapic
+++ b/tests/data/acpi/q35/DSDT.xapic
Binary files differdiff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c
index d29a4e47af..76d5100911 100644
--- a/tests/qtest/bios-tables-test.c
+++ b/tests/qtest/bios-tables-test.c
@@ -949,9 +949,14 @@ static void test_acpi_piix4_no_acpi_pci_hotplug(void)
     data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types);
     test_acpi_one("-global PIIX4_PM.acpi-root-pci-hotplug=off "
                   "-global PIIX4_PM.acpi-pci-hotplug-with-bridge-support=off "
-                  "-device pci-bridge,chassis_nr=1 "
-                  "-device pci-testdev,bus=pci.0 "
-                  "-device pci-testdev,bus=pci.1", &data);
+                  "-device pci-bridge,chassis_nr=1,addr=4.0 "
+                  "-device pci-testdev,bus=pci.0,addr=5.0 "
+                  "-device pci-testdev,bus=pci.0,addr=6.0,acpi-index=101 "
+                  "-device pci-testdev,bus=pci.1,addr=1.0 "
+                  "-device pci-testdev,bus=pci.1,addr=2.0,acpi-index=201 "
+                  "-device pci-bridge,id=nhpbr,chassis_nr=2,shpc=off,addr=7.0 "
+                  "-device pci-testdev,bus=nhpbr,addr=1.0,acpi-index=301 "
+                  , &data);
     free_test_data(&data);
 }
 
@@ -1002,18 +1007,42 @@ static void test_acpi_q35_tcg_bridge(void)
     free_test_data(&data);
 }
 
+static void test_acpi_q35_tcg_no_acpi_hotplug(void)
+{
+    test_data data;
+
+    memset(&data, 0, sizeof(data));
+    data.machine = MACHINE_Q35;
+    data.variant = ".noacpihp";
+    data.required_struct_types = base_required_struct_types;
+    data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types);
+    test_acpi_one("-global ICH9-LPC.acpi-pci-hotplug-with-bridge-support=off"
+        " -device pci-testdev,bus=pcie.0,acpi-index=101,addr=3.0"
+        " -device pci-bridge,chassis_nr=1,id=shpcbr,addr=4.0"
+        " -device pci-testdev,bus=shpcbr,addr=1.0,acpi-index=201"
+        " -device pci-bridge,chassis_nr=2,shpc=off,id=noshpcbr,addr=5.0"
+        " -device pci-testdev,bus=noshpcbr,addr=1.0,acpi-index=301"
+        " -device pcie-root-port,id=hprp,port=0x0,chassis=1,addr=6.0"
+        " -device pci-testdev,bus=hprp,acpi-index=401"
+        " -device pcie-root-port,id=nohprp,port=0x0,chassis=2,hotplug=off,"
+                                 "addr=7.0"
+        " -device pci-testdev,bus=nohprp,acpi-index=501"
+        " -device pcie-root-port,id=nohprpint,port=0x0,chassis=3,hotplug=off,"
+                                 "multifunction=on,addr=8.0"
+        " -device pci-testdev,bus=nohprpint,acpi-index=601,addr=8.1"
+        " -device pcie-root-port,id=hprp2,port=0x0,chassis=4,bus=nohprpint,"
+                                 "addr=9.0"
+        " -device pci-testdev,bus=hprp2,acpi-index=602"
+        , &data);
+    free_test_data(&data);
+}
+
 static void test_acpi_q35_multif_bridge(void)
 {
     test_data data = {
         .machine = MACHINE_Q35,
         .variant = ".multi-bridge",
     };
-
-    if (!qtest_has_device("pcie-root-port")) {
-        g_test_skip("Device pcie-root-port is not available");
-        goto out;
-    }
-
     test_vm_prepare("-S"
         " -device virtio-balloon,id=balloon0,addr=0x4.0x2"
         " -device pcie-root-port,id=rp0,multifunction=on,"
@@ -1025,9 +1054,14 @@ static void test_acpi_q35_multif_bridge(void)
         " -device pcie-root-port,id=rphptgt2,port=0x0,chassis=6,addr=2.2"
         " -device pcie-root-port,id=rphptgt3,port=0x0,chassis=7,addr=2.3"
         " -device pci-testdev,bus=pcie.0,addr=2.4"
+        " -device pci-testdev,bus=pcie.0,addr=2.5,acpi-index=102"
         " -device pci-testdev,bus=pcie.0,addr=5.0"
+        " -device pci-testdev,bus=pcie.0,addr=0xf.0,acpi-index=101"
         " -device pci-testdev,bus=rp0,addr=0.0"
-        " -device pci-testdev,bus=br1", &data);
+        " -device pci-testdev,bus=br1"
+        " -device pcie-root-port,id=rpnohp,chassis=8,addr=0xA.0,hotplug=off"
+        " -device pcie-root-port,id=rp3,chassis=9,bus=rpnohp"
+        , &data);
 
     /* hotplugged bridges section */
     qtest_qmp_device_add(data.qts, "pci-bridge", "hpbr1",
@@ -1049,7 +1083,6 @@ static void test_acpi_q35_multif_bridge(void)
     /* check that reboot/reset doesn't change any ACPI tables  */
     qtest_qmp_send(data.qts, "{'execute':'system_reset' }");
     process_acpi_tables(&data);
-out:
     free_test_data(&data);
 }
 
@@ -1403,11 +1436,6 @@ static void test_acpi_tcg_dimm_pxm(const char *machine)
 {
     test_data data;
 
-    if (!qtest_has_device("nvdimm")) {
-        g_test_skip("Device nvdimm is not available");
-        return;
-    }
-
     memset(&data, 0, sizeof(data));
     data.machine = machine;
     data.variant = ".dimmpxm";
@@ -1456,11 +1484,6 @@ static void test_acpi_virt_tcg_memhp(void)
         .scan_len = 256ULL * 1024 * 1024,
     };
 
-    if (!qtest_has_device("nvdimm")) {
-        g_test_skip("Device nvdimm is not available");
-        goto out;
-    }
-
     data.variant = ".memhp";
     test_acpi_one(" -machine nvdimm=on"
                   " -cpu cortex-a57"
@@ -1474,7 +1497,7 @@ static void test_acpi_virt_tcg_memhp(void)
                   " -device pc-dimm,id=dimm0,memdev=ram2,node=0"
                   " -device nvdimm,id=dimm1,memdev=nvm0,node=1",
                   &data);
-out:
+
     free_test_data(&data);
 
 }
@@ -1492,11 +1515,6 @@ static void test_acpi_microvm_tcg(void)
 {
     test_data data;
 
-    if (!qtest_has_device("virtio-blk-device")) {
-        g_test_skip("Device virtio-blk-device is not available");
-        return;
-    }
-
     test_acpi_microvm_prepare(&data);
     test_acpi_one(" -machine microvm,acpi=on,ioapic2=off,rtc=off",
                   &data);
@@ -1507,11 +1525,6 @@ static void test_acpi_microvm_usb_tcg(void)
 {
     test_data data;
 
-    if (!qtest_has_device("virtio-blk-device")) {
-        g_test_skip("Device virtio-blk-device is not available");
-        return;
-    }
-
     test_acpi_microvm_prepare(&data);
     data.variant = ".usb";
     test_acpi_one(" -machine microvm,acpi=on,ioapic2=off,usb=on,rtc=off",
@@ -1523,11 +1536,6 @@ static void test_acpi_microvm_rtc_tcg(void)
 {
     test_data data;
 
-    if (!qtest_has_device("virtio-blk-device")) {
-        g_test_skip("Device virtio-blk-device is not available");
-        return;
-    }
-
     test_acpi_microvm_prepare(&data);
     data.variant = ".rtc";
     test_acpi_one(" -machine microvm,acpi=on,ioapic2=off,rtc=on",
@@ -1539,11 +1547,6 @@ static void test_acpi_microvm_pcie_tcg(void)
 {
     test_data data;
 
-    if (!qtest_has_device("virtio-blk-device")) {
-        g_test_skip("Device virtio-blk-device is not available");
-        return;
-    }
-
     test_acpi_microvm_prepare(&data);
     data.variant = ".pcie";
     data.tcg_only = true; /* need constant host-phys-bits */
@@ -1556,11 +1559,6 @@ static void test_acpi_microvm_ioapic2_tcg(void)
 {
     test_data data;
 
-    if (!qtest_has_device("virtio-blk-device")) {
-        g_test_skip("Device virtio-blk-device is not available");
-        return;
-    }
-
     test_acpi_microvm_prepare(&data);
     data.variant = ".ioapic2";
     test_acpi_one(" -machine microvm,acpi=on,ioapic2=on,rtc=off",
@@ -1600,12 +1598,6 @@ static void test_acpi_virt_tcg_pxb(void)
         .ram_start = 0x40000000ULL,
         .scan_len = 128ULL * 1024 * 1024,
     };
-
-    if (!qtest_has_device("pcie-root-port")) {
-        g_test_skip("Device pcie-root-port is not available");
-        goto out;
-    }
-
     /*
      * While using -cdrom, the cdrom would auto plugged into pxb-pcie,
      * the reason is the bus of pxb-pcie is also root bus, it would lead
@@ -1624,7 +1616,7 @@ static void test_acpi_virt_tcg_pxb(void)
                   " -cpu cortex-a57"
                   " -device pxb-pcie,bus_nr=128",
                   &data);
-out:
+
     free_test_data(&data);
 }
 
@@ -1812,12 +1804,6 @@ static void test_acpi_microvm_acpi_erst(void)
     gchar *params;
     test_data data;
 
-    if (!qtest_has_device("virtio-blk-device")) {
-        g_test_skip("Device virtio-blk-device is not available");
-        g_free(tmp_path);
-        return;
-    }
-
     test_acpi_microvm_prepare(&data);
     data.variant = ".pcie";
     data.tcg_only = true; /* need constant host-phys-bits */
@@ -1878,11 +1864,6 @@ static void test_acpi_q35_viot(void)
         .variant = ".viot",
     };
 
-    if (!qtest_has_device("virtio-iommu")) {
-        g_test_skip("Device virtio-iommu is not available");
-        goto out;
-    }
-
     /*
      * To keep things interesting, two buses bypass the IOMMU.
      * VIOT should only describes the other two buses.
@@ -1893,7 +1874,6 @@ static void test_acpi_q35_viot(void)
                   "-device pxb-pcie,bus_nr=0x20,id=pcie.200,bus=pcie.0,bypass_iommu=on "
                   "-device pxb-pcie,bus_nr=0x30,id=pcie.300,bus=pcie.0",
                   &data);
-out:
     free_test_data(&data);
 }
 
@@ -1954,10 +1934,8 @@ static void test_acpi_virt_viot(void)
         .scan_len = 128ULL * 1024 * 1024,
     };
 
-    if (qtest_has_device("virtio-iommu")) {
-        test_acpi_one("-cpu cortex-a57 "
-                       "-device virtio-iommu-pci", &data);
-    }
+    test_acpi_one("-cpu cortex-a57 "
+                  "-device virtio-iommu-pci", &data);
     free_test_data(&data);
 }
 
@@ -2066,11 +2044,6 @@ static void test_acpi_microvm_oem_fields(void)
     test_data data;
     char *args;
 
-    if (!qtest_has_device("virtio-blk-device")) {
-        g_test_skip("Device virtio-blk-device is not available");
-        return;
-    }
-
     test_acpi_microvm_prepare(&data);
 
     args = test_acpi_create_args(&data,
@@ -2161,6 +2134,8 @@ int main(int argc, char *argv[])
                                test_acpi_q35_tcg_tpm12_tis);
             }
             qtest_add_func("acpi/q35/bridge", test_acpi_q35_tcg_bridge);
+            qtest_add_func("acpi/q35/no-acpi-hotplug",
+                           test_acpi_q35_tcg_no_acpi_hotplug);
             qtest_add_func("acpi/q35/multif-bridge",
                            test_acpi_q35_multif_bridge);
             qtest_add_func("acpi/q35/mmio64", test_acpi_q35_tcg_mmio64);
diff --git a/tests/tcg/hexagon/Makefile.target b/tests/tcg/hexagon/Makefile.target
index 18e6a5969e..0d82dfa76e 100644
--- a/tests/tcg/hexagon/Makefile.target
+++ b/tests/tcg/hexagon/Makefile.target
@@ -1,5 +1,5 @@
 ##
-##  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
+##  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
 ##
 ##  This program is free software; you can redistribute it and/or modify
 ##  it under the terms of the GNU General Public License as published by
@@ -45,6 +45,10 @@ HEX_TESTS += fpstuff
 HEX_TESTS += overflow
 HEX_TESTS += signal_context
 HEX_TESTS += reg_mut
+HEX_TESTS += vector_add_int
+HEX_TESTS += scatter_gather
+HEX_TESTS += hvx_misc
+HEX_TESTS += hvx_histogram
 
 HEX_TESTS += test_abs
 HEX_TESTS += test_bitcnt
@@ -78,3 +82,10 @@ TESTS += $(HEX_TESTS)
 usr: usr.c
 	$(CC) $(CFLAGS) -mv67t -O2 -Wno-inline-asm -Wno-expansion-to-defined $< -o $@ $(LDFLAGS)
 
+scatter_gather: CFLAGS += -mhvx
+vector_add_int: CFLAGS += -mhvx -fvectorize
+hvx_misc: CFLAGS += -mhvx
+hvx_histogram: CFLAGS += -mhvx -Wno-gnu-folding-constant
+
+hvx_histogram: hvx_histogram.c hvx_histogram_row.S
+	$(CC) $(CFLAGS) $(CROSS_CC_GUEST_CFLAGS) $^ -o $@ $(LDFLAGS)
diff --git a/tests/tcg/hexagon/fpstuff.c b/tests/tcg/hexagon/fpstuff.c
index 56bf562a40..90ce9a6ef3 100644
--- a/tests/tcg/hexagon/fpstuff.c
+++ b/tests/tcg/hexagon/fpstuff.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2020-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2020-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -40,6 +40,7 @@ const int SF_HEX_NAN =                    0xffffffff;
 const int SF_small_neg =                  0xab98fba8;
 const int SF_denorm =                     0x00000001;
 const int SF_random =                     0x346001d6;
+const int SF_neg_zero =                   0x80000000;
 
 const long long DF_QNaN =                 0x7ff8000000000000ULL;
 const long long DF_SNaN =                 0x7ff7000000000000ULL;
@@ -536,6 +537,33 @@ static void check_sffixupd(void)
     check32(result, 0x146001d6);
 }
 
+static void check_sffms(void)
+{
+    int result;
+
+    /* Check that sffms properly deals with -0 */
+    result = SF_neg_zero;
+    asm ("%0 -= sfmpy(%1 , %2)\n\t"
+        : "+r"(result)
+        : "r"(SF_ZERO), "r"(SF_ZERO)
+        : "r12", "r8");
+    check32(result, SF_neg_zero);
+
+    result = SF_ZERO;
+    asm ("%0 -= sfmpy(%1 , %2)\n\t"
+        : "+r"(result)
+        : "r"(SF_neg_zero), "r"(SF_ZERO)
+        : "r12", "r8");
+    check32(result, SF_ZERO);
+
+    result = SF_ZERO;
+    asm ("%0 -= sfmpy(%1 , %2)\n\t"
+        : "+r"(result)
+        : "r"(SF_ZERO), "r"(SF_neg_zero)
+        : "r12", "r8");
+    check32(result, SF_ZERO);
+}
+
 static void check_float2int_convs()
 {
     int res32;
@@ -688,6 +716,7 @@ int main()
     check_invsqrta();
     check_sffixupn();
     check_sffixupd();
+    check_sffms();
     check_float2int_convs();
 
     puts(err ? "FAIL" : "PASS");
diff --git a/tests/tcg/hexagon/preg_alias.c b/tests/tcg/hexagon/preg_alias.c
index b44a8112b4..8798fbcaf3 100644
--- a/tests/tcg/hexagon/preg_alias.c
+++ b/tests/tcg/hexagon/preg_alias.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -65,7 +65,7 @@ static inline void creg_alias(int cval, PRegs *pregs)
       : "=r"(pregs->pregs.p0), "=r"(pregs->pregs.p1),
         "=r"(pregs->pregs.p2), "=r"(pregs->pregs.p3)
       : "r"(cval)
-      : "p0", "p1", "p2", "p3");
+      : "c4", "p0", "p1", "p2", "p3");
 }
 
 int err;
@@ -92,7 +92,7 @@ static inline void creg_alias_pair(unsigned int cval, PRegs *pregs)
        : "=r"(pregs->pregs.p0), "=r"(pregs->pregs.p1),
          "=r"(pregs->pregs.p2), "=r"(pregs->pregs.p3), "=r"(c5)
        : "r"(cval_pair)
-       : "p0", "p1", "p2", "p3");
+       : "c4", "c5", "p0", "p1", "p2", "p3");
 
   check(c5, 0xdeadbeef);
 }
@@ -117,7 +117,7 @@ static void test_packet(void)
          "}\n\t"
          : "+r"(result)
          : "r"(0xffffffff), "r"(0xff00ffff), "r"(0x837ed653)
-         : "p0", "p1", "p2", "p3");
+         : "c4", "p0", "p1", "p2", "p3");
     check(result, old_val);
 
     /* Test a predicated store */
@@ -129,7 +129,7 @@ static void test_packet(void)
          "}\n\t"
          :
          : "r"(0), "r"(0xffffffff), "r"(&result)
-         : "p0", "p1", "p2", "p3", "memory");
+         : "c4", "p0", "p1", "p2", "p3", "memory");
     check(result, 0x0);
 }
 
diff --git a/tests/tcg/hexagon/scatter_gather.c b/tests/tcg/hexagon/scatter_gather.c
index b93eb18133..bf8b5e0317 100644
--- a/tests/tcg/hexagon/scatter_gather.c
+++ b/tests/tcg/hexagon/scatter_gather.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -40,47 +40,6 @@ typedef long HVX_VectorPair   __attribute__((__vector_size__(256)))
 typedef long HVX_VectorPred   __attribute__((__vector_size__(128)))
                               __attribute__((aligned(128)));
 
-#define VSCATTER_16(BASE, RGN, OFF, VALS) \
-    __builtin_HEXAGON_V6_vscattermh_128B((int)BASE, RGN, OFF, VALS)
-#define VSCATTER_16_MASKED(MASK, BASE, RGN, OFF, VALS) \
-    __builtin_HEXAGON_V6_vscattermhq_128B(MASK, (int)BASE, RGN, OFF, VALS)
-#define VSCATTER_32(BASE, RGN, OFF, VALS) \
-    __builtin_HEXAGON_V6_vscattermw_128B((int)BASE, RGN, OFF, VALS)
-#define VSCATTER_32_MASKED(MASK, BASE, RGN, OFF, VALS) \
-    __builtin_HEXAGON_V6_vscattermwq_128B(MASK, (int)BASE, RGN, OFF, VALS)
-#define VSCATTER_16_32(BASE, RGN, OFF, VALS) \
-    __builtin_HEXAGON_V6_vscattermhw_128B((int)BASE, RGN, OFF, VALS)
-#define VSCATTER_16_32_MASKED(MASK, BASE, RGN, OFF, VALS) \
-    __builtin_HEXAGON_V6_vscattermhwq_128B(MASK, (int)BASE, RGN, OFF, VALS)
-#define VSCATTER_16_ACC(BASE, RGN, OFF, VALS) \
-    __builtin_HEXAGON_V6_vscattermh_add_128B((int)BASE, RGN, OFF, VALS)
-#define VSCATTER_32_ACC(BASE, RGN, OFF, VALS) \
-    __builtin_HEXAGON_V6_vscattermw_add_128B((int)BASE, RGN, OFF, VALS)
-#define VSCATTER_16_32_ACC(BASE, RGN, OFF, VALS) \
-    __builtin_HEXAGON_V6_vscattermhw_add_128B((int)BASE, RGN, OFF, VALS)
-
-#define VGATHER_16(DSTADDR, BASE, RGN, OFF) \
-    __builtin_HEXAGON_V6_vgathermh_128B(DSTADDR, (int)BASE, RGN, OFF)
-#define VGATHER_16_MASKED(DSTADDR, MASK, BASE, RGN, OFF) \
-    __builtin_HEXAGON_V6_vgathermhq_128B(DSTADDR, MASK, (int)BASE, RGN, OFF)
-#define VGATHER_32(DSTADDR, BASE, RGN, OFF) \
-    __builtin_HEXAGON_V6_vgathermw_128B(DSTADDR, (int)BASE, RGN, OFF)
-#define VGATHER_32_MASKED(DSTADDR, MASK, BASE, RGN, OFF) \
-    __builtin_HEXAGON_V6_vgathermwq_128B(DSTADDR, MASK, (int)BASE, RGN, OFF)
-#define VGATHER_16_32(DSTADDR, BASE, RGN, OFF) \
-    __builtin_HEXAGON_V6_vgathermhw_128B(DSTADDR, (int)BASE, RGN, OFF)
-#define VGATHER_16_32_MASKED(DSTADDR, MASK, BASE, RGN, OFF) \
-    __builtin_HEXAGON_V6_vgathermhwq_128B(DSTADDR, MASK, (int)BASE, RGN, OFF)
-
-#define VSHUFF_H(V) \
-    __builtin_HEXAGON_V6_vshuffh_128B(V)
-#define VSPLAT_H(X) \
-    __builtin_HEXAGON_V6_lvsplath_128B(X)
-#define VAND_VAL(PRED, VAL) \
-    __builtin_HEXAGON_V6_vandvrt_128B(PRED, VAL)
-#define VDEAL_H(V) \
-    __builtin_HEXAGON_V6_vdealh_128B(V)
-
 int err;
 
 /* define the number of rows/cols in a square matrix */
@@ -108,22 +67,22 @@ unsigned short vscatter16_32_ref[SCATTER_BUFFER_SIZE];
 unsigned short vgather16_32_ref[MATRIX_SIZE];
 
 /* declare the arrays of offsets */
-unsigned short half_offsets[MATRIX_SIZE];
-unsigned int   word_offsets[MATRIX_SIZE];
+unsigned short half_offsets[MATRIX_SIZE] __attribute__((aligned(128)));
+unsigned int   word_offsets[MATRIX_SIZE] __attribute__((aligned(128)));
 
 /* declare the arrays of values */
-unsigned short half_values[MATRIX_SIZE];
-unsigned short half_values_acc[MATRIX_SIZE];
-unsigned short half_values_masked[MATRIX_SIZE];
-unsigned int   word_values[MATRIX_SIZE];
-unsigned int   word_values_acc[MATRIX_SIZE];
-unsigned int   word_values_masked[MATRIX_SIZE];
+unsigned short half_values[MATRIX_SIZE] __attribute__((aligned(128)));
+unsigned short half_values_acc[MATRIX_SIZE] __attribute__((aligned(128)));
+unsigned short half_values_masked[MATRIX_SIZE] __attribute__((aligned(128)));
+unsigned int   word_values[MATRIX_SIZE] __attribute__((aligned(128)));
+unsigned int   word_values_acc[MATRIX_SIZE] __attribute__((aligned(128)));
+unsigned int   word_values_masked[MATRIX_SIZE] __attribute__((aligned(128)));
 
 /* declare the arrays of predicates */
-unsigned short half_predicates[MATRIX_SIZE];
-unsigned int   word_predicates[MATRIX_SIZE];
+unsigned short half_predicates[MATRIX_SIZE] __attribute__((aligned(128)));
+unsigned int   word_predicates[MATRIX_SIZE] __attribute__((aligned(128)));
 
-/* make this big enough for all the intrinsics */
+/* make this big enough for all the operations */
 const size_t region_len = sizeof(vtcm);
 
 /* optionally add sync instructions */
@@ -261,164 +220,201 @@ void create_offsets_values_preds_16_32(void)
     }
 }
 
-/* scatter the 16 bit elements using intrinsics */
+/* scatter the 16 bit elements using HVX */
 void vector_scatter_16(void)
 {
-    /* copy the offsets and values to vectors */
-    HVX_Vector offsets = *(HVX_Vector *)half_offsets;
-    HVX_Vector values = *(HVX_Vector *)half_values;
-
-    VSCATTER_16(&vtcm.vscatter16, region_len, offsets, values);
+    asm ("m0 = %1\n\t"
+         "v0 = vmem(%2 + #0)\n\t"
+         "v1 = vmem(%3 + #0)\n\t"
+         "vscatter(%0, m0, v0.h).h = v1\n\t"
+         : : "r"(vtcm.vscatter16), "r"(region_len),
+             "r"(half_offsets), "r"(half_values)
+         : "m0", "v0", "v1", "memory");
 
     sync_scatter(vtcm.vscatter16);
 }
 
-/* scatter-accumulate the 16 bit elements using intrinsics */
+/* scatter-accumulate the 16 bit elements using HVX */
 void vector_scatter_16_acc(void)
 {
-    /* copy the offsets and values to vectors */
-    HVX_Vector offsets = *(HVX_Vector *)half_offsets;
-    HVX_Vector values = *(HVX_Vector *)half_values_acc;
-
-    VSCATTER_16_ACC(&vtcm.vscatter16, region_len, offsets, values);
+    asm ("m0 = %1\n\t"
+         "v0 = vmem(%2 + #0)\n\t"
+         "v1 = vmem(%3 + #0)\n\t"
+         "vscatter(%0, m0, v0.h).h += v1\n\t"
+         : : "r"(vtcm.vscatter16), "r"(region_len),
+             "r"(half_offsets), "r"(half_values_acc)
+         : "m0", "v0", "v1", "memory");
 
     sync_scatter(vtcm.vscatter16);
 }
 
-/* scatter the 16 bit elements using intrinsics */
+/* masked scatter the 16 bit elements using HVX */
 void vector_scatter_16_masked(void)
 {
-    /* copy the offsets and values to vectors */
-    HVX_Vector offsets = *(HVX_Vector *)half_offsets;
-    HVX_Vector values = *(HVX_Vector *)half_values_masked;
-    HVX_Vector pred_reg = *(HVX_Vector *)half_predicates;
-    HVX_VectorPred preds = VAND_VAL(pred_reg, ~0);
-
-    VSCATTER_16_MASKED(preds, &vtcm.vscatter16, region_len, offsets, values);
+    asm ("r1 = #-1\n\t"
+         "v0 = vmem(%0 + #0)\n\t"
+         "q0 = vand(v0, r1)\n\t"
+         "m0 = %2\n\t"
+         "v0 = vmem(%3 + #0)\n\t"
+         "v1 = vmem(%4 + #0)\n\t"
+         "if (q0) vscatter(%1, m0, v0.h).h = v1\n\t"
+         : : "r"(half_predicates), "r"(vtcm.vscatter16), "r"(region_len),
+             "r"(half_offsets), "r"(half_values_masked)
+         : "r1", "q0", "m0", "q0", "v0", "v1", "memory");
 
     sync_scatter(vtcm.vscatter16);
 }
 
-/* scatter the 32 bit elements using intrinsics */
+/* scatter the 32 bit elements using HVX */
 void vector_scatter_32(void)
 {
-    /* copy the offsets and values to vectors */
-    HVX_Vector offsetslo = *(HVX_Vector *)word_offsets;
-    HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2];
-    HVX_Vector valueslo = *(HVX_Vector *)word_values;
-    HVX_Vector valueshi = *(HVX_Vector *)&word_values[MATRIX_SIZE / 2];
-
-    VSCATTER_32(&vtcm.vscatter32, region_len, offsetslo, valueslo);
-    VSCATTER_32(&vtcm.vscatter32, region_len, offsetshi, valueshi);
+    HVX_Vector *offsetslo = (HVX_Vector *)word_offsets;
+    HVX_Vector *offsetshi = (HVX_Vector *)&word_offsets[MATRIX_SIZE / 2];
+    HVX_Vector *valueslo = (HVX_Vector *)word_values;
+    HVX_Vector *valueshi = (HVX_Vector *)&word_values[MATRIX_SIZE / 2];
+
+    asm ("m0 = %1\n\t"
+         "v0 = vmem(%2 + #0)\n\t"
+         "v1 = vmem(%3 + #0)\n\t"
+         "vscatter(%0, m0, v0.w).w = v1\n\t"
+         : : "r"(vtcm.vscatter32), "r"(region_len),
+             "r"(offsetslo), "r"(valueslo)
+         : "m0", "v0", "v1", "memory");
+    asm ("m0 = %1\n\t"
+         "v0 = vmem(%2 + #0)\n\t"
+         "v1 = vmem(%3 + #0)\n\t"
+         "vscatter(%0, m0, v0.w).w = v1\n\t"
+         : : "r"(vtcm.vscatter32), "r"(region_len),
+             "r"(offsetshi), "r"(valueshi)
+         : "m0", "v0", "v1", "memory");
 
     sync_scatter(vtcm.vscatter32);
 }
 
-/* scatter-acc the 32 bit elements using intrinsics */
+/* scatter-accumulate the 32 bit elements using HVX */
 void vector_scatter_32_acc(void)
 {
-    /* copy the offsets and values to vectors */
-    HVX_Vector offsetslo = *(HVX_Vector *)word_offsets;
-    HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2];
-    HVX_Vector valueslo = *(HVX_Vector *)word_values_acc;
-    HVX_Vector valueshi = *(HVX_Vector *)&word_values_acc[MATRIX_SIZE / 2];
-
-    VSCATTER_32_ACC(&vtcm.vscatter32, region_len, offsetslo, valueslo);
-    VSCATTER_32_ACC(&vtcm.vscatter32, region_len, offsetshi, valueshi);
+    HVX_Vector *offsetslo = (HVX_Vector *)word_offsets;
+    HVX_Vector *offsetshi = (HVX_Vector *)&word_offsets[MATRIX_SIZE / 2];
+    HVX_Vector *valueslo = (HVX_Vector *)word_values_acc;
+    HVX_Vector *valueshi = (HVX_Vector *)&word_values_acc[MATRIX_SIZE / 2];
+
+    asm ("m0 = %1\n\t"
+         "v0 = vmem(%2 + #0)\n\t"
+         "v1 = vmem(%3 + #0)\n\t"
+         "vscatter(%0, m0, v0.w).w += v1\n\t"
+         : : "r"(vtcm.vscatter32), "r"(region_len),
+             "r"(offsetslo), "r"(valueslo)
+         : "m0", "v0", "v1", "memory");
+    asm ("m0 = %1\n\t"
+         "v0 = vmem(%2 + #0)\n\t"
+         "v1 = vmem(%3 + #0)\n\t"
+         "vscatter(%0, m0, v0.w).w += v1\n\t"
+         : : "r"(vtcm.vscatter32), "r"(region_len),
+             "r"(offsetshi), "r"(valueshi)
+         : "m0", "v0", "v1", "memory");
 
     sync_scatter(vtcm.vscatter32);
 }
 
-/* scatter the 32 bit elements using intrinsics */
+/* masked scatter the 32 bit elements using HVX */
 void vector_scatter_32_masked(void)
 {
-    /* copy the offsets and values to vectors */
-    HVX_Vector offsetslo = *(HVX_Vector *)word_offsets;
-    HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2];
-    HVX_Vector valueslo = *(HVX_Vector *)word_values_masked;
-    HVX_Vector valueshi = *(HVX_Vector *)&word_values_masked[MATRIX_SIZE / 2];
-    HVX_Vector pred_reglo = *(HVX_Vector *)word_predicates;
-    HVX_Vector pred_reghi = *(HVX_Vector *)&word_predicates[MATRIX_SIZE / 2];
-    HVX_VectorPred predslo = VAND_VAL(pred_reglo, ~0);
-    HVX_VectorPred predshi = VAND_VAL(pred_reghi, ~0);
-
-    VSCATTER_32_MASKED(predslo, &vtcm.vscatter32, region_len, offsetslo,
-                       valueslo);
-    VSCATTER_32_MASKED(predshi, &vtcm.vscatter32, region_len, offsetshi,
-                       valueshi);
+    HVX_Vector *offsetslo = (HVX_Vector *)word_offsets;
+    HVX_Vector *offsetshi = (HVX_Vector *)&word_offsets[MATRIX_SIZE / 2];
+    HVX_Vector *valueslo = (HVX_Vector *)word_values_masked;
+    HVX_Vector *valueshi = (HVX_Vector *)&word_values_masked[MATRIX_SIZE / 2];
+    HVX_Vector *predslo = (HVX_Vector *)word_predicates;
+    HVX_Vector *predshi = (HVX_Vector *)&word_predicates[MATRIX_SIZE / 2];
+
+    asm ("r1 = #-1\n\t"
+         "v0 = vmem(%0 + #0)\n\t"
+         "q0 = vand(v0, r1)\n\t"
+         "m0 = %2\n\t"
+         "v0 = vmem(%3 + #0)\n\t"
+         "v1 = vmem(%4 + #0)\n\t"
+         "if (q0) vscatter(%1, m0, v0.w).w = v1\n\t"
+         : : "r"(predslo), "r"(vtcm.vscatter32), "r"(region_len),
+             "r"(offsetslo), "r"(valueslo)
+         : "r1", "q0", "m0", "q0", "v0", "v1", "memory");
+    asm ("r1 = #-1\n\t"
+         "v0 = vmem(%0 + #0)\n\t"
+         "q0 = vand(v0, r1)\n\t"
+         "m0 = %2\n\t"
+         "v0 = vmem(%3 + #0)\n\t"
+         "v1 = vmem(%4 + #0)\n\t"
+         "if (q0) vscatter(%1, m0, v0.w).w = v1\n\t"
+         : : "r"(predshi), "r"(vtcm.vscatter32), "r"(region_len),
+             "r"(offsetshi), "r"(valueshi)
+         : "r1", "q0", "m0", "q0", "v0", "v1", "memory");
 
-    sync_scatter(vtcm.vscatter16);
+    sync_scatter(vtcm.vscatter32);
 }
 
-/* scatter the 16 bit elements with 32 bit offsets using intrinsics */
+/* scatter the 16 bit elements with 32 bit offsets using HVX */
 void vector_scatter_16_32(void)
 {
-    HVX_VectorPair offsets;
-    HVX_Vector values;
-
-    /* get the word offsets in a vector pair */
-    offsets = *(HVX_VectorPair *)word_offsets;
-
-    /* these values need to be shuffled for the scatter */
-    values = *(HVX_Vector *)half_values;
-    values = VSHUFF_H(values);
-
-    VSCATTER_16_32(&vtcm.vscatter16_32, region_len, offsets, values);
+    asm ("m0 = %1\n\t"
+         "v0 = vmem(%2 + #0)\n\t"
+         "v1 = vmem(%2 + #1)\n\t"
+         "v2 = vmem(%3 + #0)\n\t"
+         "v2.h = vshuff(v2.h)\n\t"  /* shuffle the values for the scatter */
+         "vscatter(%0, m0, v1:0.w).h = v2\n\t"
+         : : "r"(vtcm.vscatter16_32), "r"(region_len),
+             "r"(word_offsets), "r"(half_values)
+         : "m0", "v0", "v1", "v2", "memory");
 
     sync_scatter(vtcm.vscatter16_32);
 }
 
-/* scatter-acc the 16 bit elements with 32 bit offsets using intrinsics */
+/* scatter-accumulate the 16 bit elements with 32 bit offsets using HVX */
 void vector_scatter_16_32_acc(void)
 {
-    HVX_VectorPair offsets;
-    HVX_Vector values;
-
-    /* get the word offsets in a vector pair */
-    offsets = *(HVX_VectorPair *)word_offsets;
-
-    /* these values need to be shuffled for the scatter */
-    values = *(HVX_Vector *)half_values_acc;
-    values = VSHUFF_H(values);
-
-    VSCATTER_16_32_ACC(&vtcm.vscatter16_32, region_len, offsets, values);
+    asm ("m0 = %1\n\t"
+         "v0 = vmem(%2 + #0)\n\t"
+         "v1 = vmem(%2 + #1)\n\t"
+         "v2 = vmem(%3 + #0)\n\t" \
+         "v2.h = vshuff(v2.h)\n\t"  /* shuffle the values for the scatter */
+         "vscatter(%0, m0, v1:0.w).h += v2\n\t"
+         : : "r"(vtcm.vscatter16_32), "r"(region_len),
+             "r"(word_offsets), "r"(half_values_acc)
+         : "m0", "v0", "v1", "v2", "memory");
 
     sync_scatter(vtcm.vscatter16_32);
 }
 
-/* masked scatter the 16 bit elements with 32 bit offsets using intrinsics */
+/* masked scatter the 16 bit elements with 32 bit offsets using HVX */
 void vector_scatter_16_32_masked(void)
 {
-    HVX_VectorPair offsets;
-    HVX_Vector values;
-    HVX_Vector pred_reg;
-
-    /* get the word offsets in a vector pair */
-    offsets = *(HVX_VectorPair *)word_offsets;
-
-    /* these values need to be shuffled for the scatter */
-    values = *(HVX_Vector *)half_values_masked;
-    values = VSHUFF_H(values);
-
-    pred_reg = *(HVX_Vector *)half_predicates;
-    pred_reg = VSHUFF_H(pred_reg);
-    HVX_VectorPred preds = VAND_VAL(pred_reg, ~0);
-
-    VSCATTER_16_32_MASKED(preds, &vtcm.vscatter16_32, region_len, offsets,
-                          values);
+    asm ("r1 = #-1\n\t"
+         "v0 = vmem(%0 + #0)\n\t"
+         "v0.h = vshuff(v0.h)\n\t"  /* shuffle the predicates */
+         "q0 = vand(v0, r1)\n\t"
+         "m0 = %2\n\t"
+         "v0 = vmem(%3 + #0)\n\t"
+         "v1 = vmem(%3 + #1)\n\t"
+         "v2 = vmem(%4 + #0)\n\t" \
+         "v2.h = vshuff(v2.h)\n\t"  /* shuffle the values for the scatter */
+         "if (q0) vscatter(%1, m0, v1:0.w).h = v2\n\t"
+         : : "r"(half_predicates), "r"(vtcm.vscatter16_32), "r"(region_len),
+             "r"(word_offsets), "r"(half_values_masked)
+         : "r1", "q0", "m0", "v0", "v1", "v2", "memory");
 
     sync_scatter(vtcm.vscatter16_32);
 }
 
-/* gather the elements from the scatter16 buffer */
+/* gather the elements from the scatter16 buffer using HVX */
 void vector_gather_16(void)
 {
-    HVX_Vector *vgather = (HVX_Vector *)&vtcm.vgather16;
-    HVX_Vector offsets = *(HVX_Vector *)half_offsets;
-
-    VGATHER_16(vgather, &vtcm.vscatter16, region_len, offsets);
-
-    sync_gather(vgather);
+    asm ("m0 = %1\n\t"
+         "v0 = vmem(%2 + #0)\n\t"
+         "{ vtmp.h = vgather(%0, m0, v0.h).h\n\t"
+         "  vmem(%3 + #0) = vtmp.new }\n\t"
+         : : "r"(vtcm.vscatter16), "r"(region_len),
+             "r"(half_offsets), "r"(vtcm.vgather16)
+         : "m0", "v0", "memory");
+
+    sync_gather(vtcm.vgather16);
 }
 
 static unsigned short gather_16_masked_init(void)
@@ -427,31 +423,51 @@ static unsigned short gather_16_masked_init(void)
     return letter | (letter << 8);
 }
 
+/* masked gather the elements from the scatter16 buffer using HVX */
 void vector_gather_16_masked(void)
 {
-    HVX_Vector *vgather = (HVX_Vector *)&vtcm.vgather16;
-    HVX_Vector offsets = *(HVX_Vector *)half_offsets;
-    HVX_Vector pred_reg = *(HVX_Vector *)half_predicates;
-    HVX_VectorPred preds = VAND_VAL(pred_reg, ~0);
-
-    *vgather = VSPLAT_H(gather_16_masked_init());
-    VGATHER_16_MASKED(vgather, preds, &vtcm.vscatter16, region_len, offsets);
-
-    sync_gather(vgather);
+    unsigned short init = gather_16_masked_init();
+
+    asm ("v0.h = vsplat(%5)\n\t"
+         "vmem(%4 + #0) = v0\n\t"  /* initialize the write area */
+         "r1 = #-1\n\t"
+         "v0 = vmem(%0 + #0)\n\t"
+         "q0 = vand(v0, r1)\n\t"
+         "m0 = %2\n\t"
+         "v0 = vmem(%3 + #0)\n\t"
+         "{ if (q0) vtmp.h = vgather(%1, m0, v0.h).h\n\t"
+         "  vmem(%4 + #0) = vtmp.new }\n\t"
+         : : "r"(half_predicates), "r"(vtcm.vscatter16), "r"(region_len),
+             "r"(half_offsets), "r"(vtcm.vgather16), "r"(init)
+         : "r1", "q0", "m0", "v0", "memory");
+
+    sync_gather(vtcm.vgather16);
 }
 
-/* gather the elements from the scatter32 buffer */
+/* gather the elements from the scatter32 buffer using HVX */
 void vector_gather_32(void)
 {
-    HVX_Vector *vgatherlo = (HVX_Vector *)&vtcm.vgather32;
-    HVX_Vector *vgatherhi =
-        (HVX_Vector *)((int)&vtcm.vgather32 + (MATRIX_SIZE * 2));
-    HVX_Vector offsetslo = *(HVX_Vector *)word_offsets;
-    HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2];
-
-    VGATHER_32(vgatherlo, &vtcm.vscatter32, region_len, offsetslo);
-    VGATHER_32(vgatherhi, &vtcm.vscatter32, region_len, offsetshi);
+    HVX_Vector *vgatherlo = (HVX_Vector *)vtcm.vgather32;
+    HVX_Vector *vgatherhi = (HVX_Vector *)&vtcm.vgather32[MATRIX_SIZE / 2];
+    HVX_Vector *offsetslo = (HVX_Vector *)word_offsets;
+    HVX_Vector *offsetshi = (HVX_Vector *)&word_offsets[MATRIX_SIZE / 2];
+
+    asm ("m0 = %1\n\t"
+         "v0 = vmem(%2 + #0)\n\t"
+         "{ vtmp.w = vgather(%0, m0, v0.w).w\n\t"
+         "  vmem(%3 + #0) = vtmp.new }\n\t"
+         : : "r"(vtcm.vscatter32), "r"(region_len),
+             "r"(offsetslo), "r"(vgatherlo)
+         : "m0", "v0", "memory");
+    asm ("m0 = %1\n\t"
+         "v0 = vmem(%2 + #0)\n\t"
+         "{ vtmp.w = vgather(%0, m0, v0.w).w\n\t"
+         "  vmem(%3 + #0) = vtmp.new }\n\t"
+         : : "r"(vtcm.vscatter32), "r"(region_len),
+             "r"(offsetshi), "r"(vgatherhi)
+         : "m0", "v0", "memory");
 
+    sync_gather(vgatherlo);
     sync_gather(vgatherhi);
 }
 
@@ -461,79 +477,88 @@ static unsigned int gather_32_masked_init(void)
     return letter | (letter << 8) | (letter << 16) | (letter << 24);
 }
 
+/* masked gather the elements from the scatter32 buffer using HVX */
 void vector_gather_32_masked(void)
 {
-    HVX_Vector *vgatherlo = (HVX_Vector *)&vtcm.vgather32;
-    HVX_Vector *vgatherhi =
-        (HVX_Vector *)((int)&vtcm.vgather32 + (MATRIX_SIZE * 2));
-    HVX_Vector offsetslo = *(HVX_Vector *)word_offsets;
-    HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2];
-    HVX_Vector pred_reglo = *(HVX_Vector *)word_predicates;
-    HVX_VectorPred predslo = VAND_VAL(pred_reglo, ~0);
-    HVX_Vector pred_reghi = *(HVX_Vector *)&word_predicates[MATRIX_SIZE / 2];
-    HVX_VectorPred predshi = VAND_VAL(pred_reghi, ~0);
-
-    *vgatherlo = VSPLAT_H(gather_32_masked_init());
-    *vgatherhi = VSPLAT_H(gather_32_masked_init());
-    VGATHER_32_MASKED(vgatherlo, predslo, &vtcm.vscatter32, region_len,
-                      offsetslo);
-    VGATHER_32_MASKED(vgatherhi, predshi, &vtcm.vscatter32, region_len,
-                      offsetshi);
+    unsigned int init = gather_32_masked_init();
+    HVX_Vector *vgatherlo = (HVX_Vector *)vtcm.vgather32;
+    HVX_Vector *vgatherhi = (HVX_Vector *)&vtcm.vgather32[MATRIX_SIZE / 2];
+    HVX_Vector *offsetslo = (HVX_Vector *)word_offsets;
+    HVX_Vector *offsetshi = (HVX_Vector *)&word_offsets[MATRIX_SIZE / 2];
+    HVX_Vector *predslo = (HVX_Vector *)word_predicates;
+    HVX_Vector *predshi = (HVX_Vector *)&word_predicates[MATRIX_SIZE / 2];
+
+    asm ("v0.h = vsplat(%5)\n\t"
+         "vmem(%4 + #0) = v0\n\t"  /* initialize the write area */
+         "r1 = #-1\n\t"
+         "v0 = vmem(%0 + #0)\n\t"
+         "q0 = vand(v0, r1)\n\t"
+         "m0 = %2\n\t"
+         "v0 = vmem(%3 + #0)\n\t"
+         "{ if (q0) vtmp.w = vgather(%1, m0, v0.w).w\n\t"
+         "  vmem(%4 + #0) = vtmp.new }\n\t"
+         : : "r"(predslo), "r"(vtcm.vscatter32), "r"(region_len),
+             "r"(offsetslo), "r"(vgatherlo), "r"(init)
+         : "r1", "q0", "m0", "v0", "memory");
+    asm ("v0.h = vsplat(%5)\n\t"
+         "vmem(%4 + #0) = v0\n\t"  /* initialize the write area */
+         "r1 = #-1\n\t"
+         "v0 = vmem(%0 + #0)\n\t"
+         "q0 = vand(v0, r1)\n\t"
+         "m0 = %2\n\t"
+         "v0 = vmem(%3 + #0)\n\t"
+         "{ if (q0) vtmp.w = vgather(%1, m0, v0.w).w\n\t"
+         "  vmem(%4 + #0) = vtmp.new }\n\t"
+         : : "r"(predshi), "r"(vtcm.vscatter32), "r"(region_len),
+             "r"(offsetshi), "r"(vgatherhi), "r"(init)
+         : "r1", "q0", "m0", "v0", "memory");
 
     sync_gather(vgatherlo);
     sync_gather(vgatherhi);
 }
 
-/* gather the elements from the scatter16_32 buffer */
+/* gather the elements from the scatter16_32 buffer using HVX */
 void vector_gather_16_32(void)
 {
-    HVX_Vector *vgather;
-    HVX_VectorPair offsets;
-    HVX_Vector values;
-
-    /* get the vtcm address to gather from */
-    vgather = (HVX_Vector *)&vtcm.vgather16_32;
-
-    /* get the word offsets in a vector pair */
-    offsets = *(HVX_VectorPair *)word_offsets;
-
-    VGATHER_16_32(vgather, &vtcm.vscatter16_32, region_len, offsets);
-
-    /* deal the elements to get the order back */
-    values = *(HVX_Vector *)vgather;
-    values = VDEAL_H(values);
-
-    /* write it back to vtcm address */
-    *(HVX_Vector *)vgather = values;
+    asm ("m0 = %1\n\t"
+         "v0 = vmem(%2 + #0)\n\t"
+         "v1 = vmem(%2 + #1)\n\t"
+         "{ vtmp.h = vgather(%0, m0, v1:0.w).h\n\t"
+         "  vmem(%3 + #0) = vtmp.new }\n\t"
+         "v0 = vmem(%3 + #0)\n\t"
+         "v0.h = vdeal(v0.h)\n\t"  /* deal the elements to get the order back */
+         "vmem(%3 + #0) = v0\n\t"
+         : : "r"(vtcm.vscatter16_32), "r"(region_len),
+             "r"(word_offsets), "r"(vtcm.vgather16_32)
+         : "m0", "v0", "v1", "memory");
+
+    sync_gather(vtcm.vgather16_32);
 }
 
+/* masked gather the elements from the scatter16_32 buffer using HVX */
 void vector_gather_16_32_masked(void)
 {
-    HVX_Vector *vgather;
-    HVX_VectorPair offsets;
-    HVX_Vector pred_reg;
-    HVX_VectorPred preds;
-    HVX_Vector values;
-
-    /* get the vtcm address to gather from */
-    vgather = (HVX_Vector *)&vtcm.vgather16_32;
-
-    /* get the word offsets in a vector pair */
-    offsets = *(HVX_VectorPair *)word_offsets;
-    pred_reg = *(HVX_Vector *)half_predicates;
-    pred_reg = VSHUFF_H(pred_reg);
-    preds = VAND_VAL(pred_reg, ~0);
-
-   *vgather = VSPLAT_H(gather_16_masked_init());
-   VGATHER_16_32_MASKED(vgather, preds, &vtcm.vscatter16_32, region_len,
-                        offsets);
-
-    /* deal the elements to get the order back */
-    values = *(HVX_Vector *)vgather;
-    values = VDEAL_H(values);
-
-    /* write it back to vtcm address */
-    *(HVX_Vector *)vgather = values;
+    unsigned short init = gather_16_masked_init();
+
+    asm ("v0.h = vsplat(%5)\n\t"
+         "vmem(%4 + #0) = v0\n\t"  /* initialize the write area */
+         "r1 = #-1\n\t"
+         "v0 = vmem(%0 + #0)\n\t"
+         "v0.h = vshuff(v0.h)\n\t"  /* shuffle the predicates */
+         "q0 = vand(v0, r1)\n\t"
+         "m0 = %2\n\t"
+         "v0 = vmem(%3 + #0)\n\t"
+         "v1 = vmem(%3 + #1)\n\t"
+         "{ if (q0) vtmp.h = vgather(%1, m0, v1:0.w).h\n\t"
+         "  vmem(%4 + #0) = vtmp.new }\n\t"
+         "v0 = vmem(%4 + #0)\n\t"
+         "v0.h = vdeal(v0.h)\n\t"  /* deal the elements to get the order back */
+         "vmem(%4 + #0) = v0\n\t"
+         : : "r"(half_predicates), "r"(vtcm.vscatter16_32), "r"(region_len),
+             "r"(word_offsets), "r"(vtcm.vgather16_32), "r"(init)
+         : "r1", "q0", "m0", "v0", "v1", "memory");
+
+    sync_gather(vtcm.vgather16_32);
 }
 
 static void check_buffer(const char *name, void *c, void *r, size_t size)
@@ -579,6 +604,7 @@ void scalar_scatter_16_acc(unsigned short *vscatter16)
     }
 }
 
+/* scatter-accumulate the 16 bit elements using C */
 void check_scatter_16_acc()
 {
     memset(vscatter16_ref, FILL_CHAR,
@@ -589,7 +615,7 @@ void check_scatter_16_acc()
                  SCATTER_BUFFER_SIZE * sizeof(unsigned short));
 }
 
-/* scatter the 16 bit elements using C */
+/* masked scatter the 16 bit elements using C */
 void scalar_scatter_16_masked(unsigned short *vscatter16)
 {
     for (int i = 0; i < MATRIX_SIZE; i++) {
@@ -628,7 +654,7 @@ void check_scatter_32()
                  SCATTER_BUFFER_SIZE * sizeof(unsigned int));
 }
 
-/* scatter the 32 bit elements using C */
+/* scatter-accumulate the 32 bit elements using C */
 void scalar_scatter_32_acc(unsigned int *vscatter32)
 {
     for (int i = 0; i < MATRIX_SIZE; ++i) {
@@ -646,7 +672,7 @@ void check_scatter_32_acc()
                  SCATTER_BUFFER_SIZE * sizeof(unsigned int));
 }
 
-/* scatter the 32 bit elements using C */
+/* masked scatter the 32 bit elements using C */
 void scalar_scatter_32_masked(unsigned int *vscatter32)
 {
     for (int i = 0; i < MATRIX_SIZE; i++) {
@@ -667,7 +693,7 @@ void check_scatter_32_masked()
                   SCATTER_BUFFER_SIZE * sizeof(unsigned int));
 }
 
-/* scatter the 32 bit elements using C */
+/* scatter the 16 bit elements with 32 bit offsets using C */
 void scalar_scatter_16_32(unsigned short *vscatter16_32)
 {
     for (int i = 0; i < MATRIX_SIZE; ++i) {
@@ -684,7 +710,7 @@ void check_scatter_16_32()
                  SCATTER_BUFFER_SIZE * sizeof(unsigned short));
 }
 
-/* scatter the 32 bit elements using C */
+/* scatter-accumulate the 16 bit elements with 32 bit offsets using C */
 void scalar_scatter_16_32_acc(unsigned short *vscatter16_32)
 {
     for (int i = 0; i < MATRIX_SIZE; ++i) {
@@ -702,6 +728,7 @@ void check_scatter_16_32_acc()
                  SCATTER_BUFFER_SIZE * sizeof(unsigned short));
 }
 
+/* masked scatter the 16 bit elements with 32 bit offsets using C */
 void scalar_scatter_16_32_masked(unsigned short *vscatter16_32)
 {
     for (int i = 0; i < MATRIX_SIZE; i++) {
@@ -738,6 +765,7 @@ void check_gather_16()
                    MATRIX_SIZE * sizeof(unsigned short));
 }
 
+/* masked gather the elements from the scatter buffer using C */
 void scalar_gather_16_masked(unsigned short *vgather16)
 {
     for (int i = 0; i < MATRIX_SIZE; ++i) {
@@ -756,7 +784,7 @@ void check_gather_16_masked()
                  MATRIX_SIZE * sizeof(unsigned short));
 }
 
-/* gather the elements from the scatter buffer using C */
+/* gather the elements from the scatter32 buffer using C */
 void scalar_gather_32(unsigned int *vgather32)
 {
     for (int i = 0; i < MATRIX_SIZE; ++i) {
@@ -772,6 +800,7 @@ void check_gather_32(void)
                  MATRIX_SIZE * sizeof(unsigned int));
 }
 
+/* masked gather the elements from the scatter32 buffer using C */
 void scalar_gather_32_masked(unsigned int *vgather32)
 {
     for (int i = 0; i < MATRIX_SIZE; ++i) {
@@ -781,7 +810,6 @@ void scalar_gather_32_masked(unsigned int *vgather32)
     }
 }
 
-
 void check_gather_32_masked(void)
 {
     memset(vgather32_ref, gather_32_masked_init(),
@@ -791,7 +819,7 @@ void check_gather_32_masked(void)
                  vgather32_ref, MATRIX_SIZE * sizeof(unsigned int));
 }
 
-/* gather the elements from the scatter buffer using C */
+/* gather the elements from the scatter16_32 buffer using C */
 void scalar_gather_16_32(unsigned short *vgather16_32)
 {
     for (int i = 0; i < MATRIX_SIZE; ++i) {
@@ -807,6 +835,7 @@ void check_gather_16_32(void)
                  MATRIX_SIZE * sizeof(unsigned short));
 }
 
+/* masked gather the elements from the scatter16_32 buffer using C */
 void scalar_gather_16_32_masked(unsigned short *vgather16_32)
 {
     for (int i = 0; i < MATRIX_SIZE; ++i) {