From ec132efaa81f09861a3bd6afad94827e74543b3f Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Tue, 12 Mar 2019 19:21:03 +1100
Subject: spapr: Support NVIDIA V100 GPU with NVLink2

NVIDIA V100 GPUs have on-board RAM which is mapped into the host memory
space and accessible as normal RAM via an NVLink bus. The VFIO-PCI driver
implements special regions for such GPUs and emulates an NVLink bridge.
NVLink2-enabled POWER9 CPUs also provide address translation services
which includes an ATS shootdown (ATSD) register exported via the NVLink
bridge device.

This adds a quirk to VFIO to map the GPU memory and create an MR;
the new MR is stored in a PCI device as a QOM link. The sPAPR PCI uses
this to get the MR and map it to the system address space.
Another quirk does the same for ATSD.

This adds additional steps to sPAPR PHB setup:

1. Search for specific GPUs and NPUs, collect findings in
sPAPRPHBState::nvgpus, manage system address space mappings;

2. Add device-specific properties such as "ibm,npu", "ibm,gpu",
"memory-block", "link-speed" to advertise the NVLink2 function to
the guest;

3. Add "mmio-atsd" to vPHB to advertise the ATSD capability;

4. Add new memory blocks (with extra "linux,memory-usable" to prevent
the guest OS from accessing the new memory until it is onlined) and
npuphb# nodes representing an NPU unit for every vPHB as the GPU driver
uses it for link discovery.

This allocates space for GPU RAM and ATSD like we do for MMIOs by
adding 2 new parameters to the phb_placement() hook. Older machine types
set these to zero.

This puts new memory nodes in a separate NUMA node to as the GPU RAM
needs to be configured equally distant from any other node in the system.
Unlike the host setup which assigns numa ids from 255 downwards, this
adds new NUMA nodes after the user configures nodes or from 1 if none
were configured.

This adds requirement similar to EEH - one IOMMU group per vPHB.
The reason for this is that ATSD registers belong to a physical NPU
so they cannot invalidate translations on GPUs attached to another NPU.
It is guaranteed by the host platform as it does not mix NVLink bridges
or GPUs from different NPU in the same IOMMU group. If more than one
IOMMU group is detected on a vPHB, this disables ATSD support for that
vPHB and prints a warning.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
[aw: for vfio portions]
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Message-Id: <20190312082103.130561-1-aik@ozlabs.ru>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 include/hw/pci-host/spapr.h | 45 +++++++++++++++++++++++++++++++++++++++++++++
 include/hw/ppc/spapr.h      |  5 ++++-
 2 files changed, 49 insertions(+), 1 deletion(-)

(limited to 'include/hw')

diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index b4aad26798..53519c835e 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -87,6 +87,9 @@ struct SpaprPhbState {
     uint32_t mig_liobn;
     hwaddr mig_mem_win_addr, mig_mem_win_size;
     hwaddr mig_io_win_addr, mig_io_win_size;
+    hwaddr nv2_gpa_win_addr;
+    hwaddr nv2_atsd_win_addr;
+    struct spapr_phb_pci_nvgpu_config *nvgpus;
 };
 
 #define SPAPR_PCI_MEM_WIN_BUS_OFFSET 0x80000000ULL
@@ -105,6 +108,22 @@ struct SpaprPhbState {
 
 #define SPAPR_PCI_MSI_WINDOW         0x40000000000ULL
 
+#define SPAPR_PCI_NV2RAM64_WIN_BASE  SPAPR_PCI_LIMIT
+#define SPAPR_PCI_NV2RAM64_WIN_SIZE  (2 * TiB) /* For up to 6 GPUs 256GB each */
+
+/* Max number of these GPUsper a physical box */
+#define NVGPU_MAX_NUM                6
+/* Max number of NVLinks per GPU in any physical box */
+#define NVGPU_MAX_LINKS              3
+
+/*
+ * GPU RAM starts at 64TiB so huge DMA window to cover it all ends at 128TiB
+ * which is enough. We do not need DMA for ATSD so we put them at 128TiB.
+ */
+#define SPAPR_PCI_NV2ATSD_WIN_BASE   (128 * TiB)
+#define SPAPR_PCI_NV2ATSD_WIN_SIZE   (NVGPU_MAX_NUM * NVGPU_MAX_LINKS * \
+                                      64 * KiB)
+
 static inline qemu_irq spapr_phb_lsi_qirq(struct SpaprPhbState *phb, int pin)
 {
     SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
@@ -135,6 +154,13 @@ int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state);
 int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option);
 int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb);
 void spapr_phb_vfio_reset(DeviceState *qdev);
+void spapr_phb_nvgpu_setup(SpaprPhbState *sphb, Error **errp);
+void spapr_phb_nvgpu_free(SpaprPhbState *sphb);
+void spapr_phb_nvgpu_populate_dt(SpaprPhbState *sphb, void *fdt, int bus_off,
+                                 Error **errp);
+void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb, void *fdt);
+void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt, int offset,
+                                        SpaprPhbState *sphb);
 #else
 static inline bool spapr_phb_eeh_available(SpaprPhbState *sphb)
 {
@@ -161,6 +187,25 @@ static inline int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb)
 static inline void spapr_phb_vfio_reset(DeviceState *qdev)
 {
 }
+static inline void spapr_phb_nvgpu_setup(SpaprPhbState *sphb, Error **errp)
+{
+}
+static inline void spapr_phb_nvgpu_free(SpaprPhbState *sphb)
+{
+}
+static inline void spapr_phb_nvgpu_populate_dt(SpaprPhbState *sphb, void *fdt,
+                                               int bus_off, Error **errp)
+{
+}
+static inline void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb,
+                                                   void *fdt)
+{
+}
+static inline void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt,
+                                                      int offset,
+                                                      SpaprPhbState *sphb)
+{
+}
 #endif
 
 void spapr_phb_dma_reset(SpaprPhbState *sphb);
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 5ea8081041..02b5ce7e40 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -123,7 +123,8 @@ struct SpaprMachineClass {
     void (*phb_placement)(SpaprMachineState *spapr, uint32_t index,
                           uint64_t *buid, hwaddr *pio, 
                           hwaddr *mmio32, hwaddr *mmio64,
-                          unsigned n_dma, uint32_t *liobns, Error **errp);
+                          unsigned n_dma, uint32_t *liobns, hwaddr *nv2gpa,
+                          hwaddr *nv2atsd, Error **errp);
     SpaprResizeHpt resize_hpt_default;
     SpaprCapabilities default_caps;
     SpaprIrq *irq;
@@ -199,6 +200,8 @@ struct SpaprMachineState {
 
     bool cmd_line_caps[SPAPR_CAP_NUM];
     SpaprCapabilities def, eff, mig;
+
+    unsigned gpu_numa_id;
 };
 
 #define H_SUCCESS         0
-- 
cgit 1.4.1


From 64db6c70dc5d41cb7ad0c6c9d7a81a905a0d321b Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 21 Mar 2019 15:49:07 +0100
Subject: spapr/rtas: modify spapr_rtas_register() to remove RTAS handlers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Removing RTAS handlers will become necessary when the new pseries
machine supporting multiple interrupt mode is introduced.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Message-Id: <20190321144914.19934-9-clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/ppc/spapr_rtas.c    | 2 +-
 include/hw/ppc/spapr.h | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/hw')

diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 24c45b12d4..ee24212765 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -404,7 +404,7 @@ void spapr_rtas_register(int token, const char *name, spapr_rtas_fn fn)
 
     token -= RTAS_TOKEN_BASE;
 
-    assert(!rtas_table[token].name);
+    assert(!name || !rtas_table[token].name);
 
     rtas_table[token].name = name;
     rtas_table[token].fn = fn;
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 02b5ce7e40..9331f5e0b9 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -675,6 +675,10 @@ typedef void (*spapr_rtas_fn)(PowerPCCPU *cpu, SpaprMachineState *sm,
                               uint32_t nargs, target_ulong args,
                               uint32_t nret, target_ulong rets);
 void spapr_rtas_register(int token, const char *name, spapr_rtas_fn fn);
+static inline void spapr_rtas_unregister(int token)
+{
+    spapr_rtas_register(token, NULL, NULL);
+}
 target_ulong spapr_rtas_call(PowerPCCPU *cpu, SpaprMachineState *sm,
                              uint32_t token, uint32_t nargs, target_ulong args,
                              uint32_t nret, target_ulong rets);
-- 
cgit 1.4.1


From e8ec4adfe2de8603e271e38b0d50d7d453c0c21b Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Fri, 5 Apr 2019 18:30:48 +0200
Subject: spapr: Drop duplicate PCI swizzle code

LSI mapping in spapr currently open-codes standard PCI swizzling. It thus
duplicates the code of pci_swizzle_map_irq_fn().

Expose the swizzling formula so that it can be used with a slot number
when building the device tree. Simply drop pci_spapr_map_irq() and call
pci_swizzle_map_irq_fn() instead.

Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <155448184841.8446.13959787238854054119.stgit@bahia.lan>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/pci/pci.c         |  2 +-
 hw/ppc/spapr_pci.c   | 24 ++++--------------------
 include/hw/pci/pci.h |  4 ++++
 3 files changed, 9 insertions(+), 21 deletions(-)

(limited to 'include/hw')

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 1808b242dd..a78023f669 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -1556,7 +1556,7 @@ void pci_device_set_intx_routing_notifier(PCIDevice *dev,
  */
 int pci_swizzle_map_irq_fn(PCIDevice *pci_dev, int pin)
 {
-    return (pin + PCI_SLOT(pci_dev->devfn)) % PCI_NUM_PINS;
+    return pci_swizzle(PCI_SLOT(pci_dev->devfn), pin);
 }
 
 /***********************************************************/
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index f005854dec..97961b0128 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -719,26 +719,10 @@ param_error_exit:
     rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 }
 
-static int pci_spapr_swizzle(int slot, int pin)
-{
-    return (slot + pin) % PCI_NUM_PINS;
-}
-
-static int pci_spapr_map_irq(PCIDevice *pci_dev, int irq_num)
-{
-    /*
-     * Here we need to convert pci_dev + irq_num to some unique value
-     * which is less than number of IRQs on the specific bus (4).  We
-     * use standard PCI swizzling, that is (slot number + pin number)
-     * % 4.
-     */
-    return pci_spapr_swizzle(PCI_SLOT(pci_dev->devfn), irq_num);
-}
-
 static void pci_spapr_set_irq(void *opaque, int irq_num, int level)
 {
     /*
-     * Here we use the number returned by pci_spapr_map_irq to find a
+     * Here we use the number returned by pci_swizzle_map_irq_fn to find a
      * corresponding qemu_irq.
      */
     SpaprPhbState *phb = opaque;
@@ -1766,7 +1750,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
                                 &sphb->iowindow);
 
     bus = pci_register_root_bus(dev, NULL,
-                                pci_spapr_set_irq, pci_spapr_map_irq, sphb,
+                                pci_spapr_set_irq, pci_swizzle_map_irq_fn, sphb,
                                 &sphb->memspace, &sphb->iospace,
                                 PCI_DEVFN(0, 0), PCI_NUM_PINS,
                                 TYPE_SPAPR_PHB_ROOT_BUS);
@@ -2259,14 +2243,14 @@ int spapr_populate_pci_dt(SpaprPhbState *phb, uint32_t intc_phandle, void *fdt,
     }
 
     /* Build the interrupt-map, this must matches what is done
-     * in pci_spapr_map_irq
+     * in pci_swizzle_map_irq_fn
      */
     _FDT(fdt_setprop(fdt, bus_off, "interrupt-map-mask",
                      &interrupt_map_mask, sizeof(interrupt_map_mask)));
     for (i = 0; i < PCI_SLOT_MAX; i++) {
         for (j = 0; j < PCI_NUM_PINS; j++) {
             uint32_t *irqmap = interrupt_map[i*PCI_NUM_PINS + j];
-            int lsi_num = pci_spapr_swizzle(i, j);
+            int lsi_num = pci_swizzle(i, j);
 
             irqmap[0] = cpu_to_be32(b_ddddd(i)|b_fff(0));
             irqmap[1] = 0;
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 0abb06b357..fdd4c43d3a 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -413,6 +413,10 @@ void pci_bus_irqs(PCIBus *bus, pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
 void pci_bus_irqs_cleanup(PCIBus *bus);
 int pci_bus_get_irq_level(PCIBus *bus, int irq_num);
 /* 0 <= pin <= 3 0 = INTA, 1 = INTB, 2 = INTC, 3 = INTD */
+static inline int pci_swizzle(int slot, int pin)
+{
+    return (slot + pin) % PCI_NUM_PINS;
+}
 int pci_swizzle_map_irq_fn(PCIDevice *pci_dev, int pin);
 PCIBus *pci_register_root_bus(DeviceState *parent, const char *name,
                               pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
-- 
cgit 1.4.1


From a2dd4e83e76ba9c0d432145059dd9e2b2a096e2b Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Thu, 11 Apr 2019 10:00:01 +0200
Subject: ppc/hash64: Rework R and C bit updates
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With MT-TCG, we are now running translation in a racy way, thus
we need to mimic hardware when it comes to updating the R and
C bits, by doing byte stores.

The current "store_hpte" abstraction is ill suited for this, we
replace it with two separate callbacks for setting R and C.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Message-Id: <20190411080004.8690-4-clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/ppc/spapr.c          | 41 +++++++++++++++++++++++---
 hw/ppc/spapr_hcall.c    | 13 +++++----
 include/hw/ppc/spapr.h  |  2 ++
 target/ppc/cpu.h        |  4 +--
 target/ppc/mmu-hash64.c | 76 +++++++++++++++++++++++++++++--------------------
 target/ppc/mmu-hash64.h |  2 --
 6 files changed, 93 insertions(+), 45 deletions(-)

(limited to 'include/hw')

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index b81e237635..c56939a43b 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1520,10 +1520,10 @@ static void spapr_unmap_hptes(PPCVirtualHypervisor *vhyp,
     /* Nothing to do for qemu managed HPT */
 }
 
-static void spapr_store_hpte(PPCVirtualHypervisor *vhyp, hwaddr ptex,
-                             uint64_t pte0, uint64_t pte1)
+void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
+                      uint64_t pte0, uint64_t pte1)
 {
-    SpaprMachineState *spapr = SPAPR_MACHINE(vhyp);
+    SpaprMachineState *spapr = SPAPR_MACHINE(cpu->vhyp);
     hwaddr offset = ptex * HASH_PTE_SIZE_64;
 
     if (!spapr->htab) {
@@ -1551,6 +1551,38 @@ static void spapr_store_hpte(PPCVirtualHypervisor *vhyp, hwaddr ptex,
     }
 }
 
+static void spapr_hpte_set_c(PPCVirtualHypervisor *vhyp, hwaddr ptex,
+                             uint64_t pte1)
+{
+    hwaddr offset = ptex * HASH_PTE_SIZE_64 + 15;
+    SpaprMachineState *spapr = SPAPR_MACHINE(vhyp);
+
+    if (!spapr->htab) {
+        /* There should always be a hash table when this is called */
+        error_report("spapr_hpte_set_c called with no hash table !");
+        return;
+    }
+
+    /* The HW performs a non-atomic byte update */
+    stb_p(spapr->htab + offset, (pte1 & 0xff) | 0x80);
+}
+
+static void spapr_hpte_set_r(PPCVirtualHypervisor *vhyp, hwaddr ptex,
+                             uint64_t pte1)
+{
+    hwaddr offset = ptex * HASH_PTE_SIZE_64 + 14;
+    SpaprMachineState *spapr = SPAPR_MACHINE(vhyp);
+
+    if (!spapr->htab) {
+        /* There should always be a hash table when this is called */
+        error_report("spapr_hpte_set_r called with no hash table !");
+        return;
+    }
+
+    /* The HW performs a non-atomic byte update */
+    stb_p(spapr->htab + offset, ((pte1 >> 8) & 0xff) | 0x01);
+}
+
 int spapr_hpt_shift_for_ramsize(uint64_t ramsize)
 {
     int shift;
@@ -4291,7 +4323,8 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     vhc->hpt_mask = spapr_hpt_mask;
     vhc->map_hptes = spapr_map_hptes;
     vhc->unmap_hptes = spapr_unmap_hptes;
-    vhc->store_hpte = spapr_store_hpte;
+    vhc->hpte_set_c = spapr_hpte_set_c;
+    vhc->hpte_set_r = spapr_hpte_set_r;
     vhc->get_pate = spapr_get_pate;
     vhc->encode_hpt_for_kvm_pr = spapr_encode_hpt_for_kvm_pr;
     xic->ics_get = spapr_ics_get;
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 3534079777..6c16d2b120 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -118,7 +118,7 @@ static target_ulong h_enter(PowerPCCPU *cpu, SpaprMachineState *spapr,
         ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
     }
 
-    ppc_hash64_store_hpte(cpu, ptex + slot, pteh | HPTE64_V_HPTE_DIRTY, ptel);
+    spapr_store_hpte(cpu, ptex + slot, pteh | HPTE64_V_HPTE_DIRTY, ptel);
 
     args[0] = ptex + slot;
     return H_SUCCESS;
@@ -131,7 +131,8 @@ typedef enum {
     REMOVE_HW = 3,
 } RemoveResult;
 
-static RemoveResult remove_hpte(PowerPCCPU *cpu, target_ulong ptex,
+static RemoveResult remove_hpte(PowerPCCPU *cpu
+                                , target_ulong ptex,
                                 target_ulong avpn,
                                 target_ulong flags,
                                 target_ulong *vp, target_ulong *rp)
@@ -155,7 +156,7 @@ static RemoveResult remove_hpte(PowerPCCPU *cpu, target_ulong ptex,
     }
     *vp = v;
     *rp = r;
-    ppc_hash64_store_hpte(cpu, ptex, HPTE64_V_HPTE_DIRTY, 0);
+    spapr_store_hpte(cpu, ptex, HPTE64_V_HPTE_DIRTY, 0);
     ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r);
     return REMOVE_SUCCESS;
 }
@@ -289,13 +290,13 @@ static target_ulong h_protect(PowerPCCPU *cpu, SpaprMachineState *spapr,
     r |= (flags << 55) & HPTE64_R_PP0;
     r |= (flags << 48) & HPTE64_R_KEY_HI;
     r |= flags & (HPTE64_R_PP | HPTE64_R_N | HPTE64_R_KEY_LO);
-    ppc_hash64_store_hpte(cpu, ptex,
-                          (v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0);
+    spapr_store_hpte(cpu, ptex,
+                     (v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0);
     ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r);
     /* Flush the tlb */
     check_tlb_flush(env, true);
     /* Don't need a memory barrier, due to qemu's global lock */
-    ppc_hash64_store_hpte(cpu, ptex, v | HPTE64_V_HPTE_DIRTY, r);
+    spapr_store_hpte(cpu, ptex, v | HPTE64_V_HPTE_DIRTY, r);
     return H_SUCCESS;
 }
 
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 9331f5e0b9..7e32f309c2 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -784,6 +784,8 @@ void spapr_reallocate_hpt(SpaprMachineState *spapr, int shift,
                           Error **errp);
 void spapr_clear_pending_events(SpaprMachineState *spapr);
 int spapr_max_server_number(SpaprMachineState *spapr);
+void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
+                      uint64_t pte0, uint64_t pte1);
 
 /* DRC callbacks. */
 void spapr_core_release(DeviceState *dev);
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 8a79db0c43..5e7cf54b2f 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1265,8 +1265,8 @@ struct PPCVirtualHypervisorClass {
     void (*unmap_hptes)(PPCVirtualHypervisor *vhyp,
                         const ppc_hash_pte64_t *hptes,
                         hwaddr ptex, int n);
-    void (*store_hpte)(PPCVirtualHypervisor *vhyp, hwaddr ptex,
-                       uint64_t pte0, uint64_t pte1);
+    void (*hpte_set_c)(PPCVirtualHypervisor *vhyp, hwaddr ptex, uint64_t pte1);
+    void (*hpte_set_r)(PPCVirtualHypervisor *vhyp, hwaddr ptex, uint64_t pte1);
     void (*get_pate)(PPCVirtualHypervisor *vhyp, ppc_v3_pate_t *entry);
     target_ulong (*encode_hpt_for_kvm_pr)(PPCVirtualHypervisor *vhyp);
 };
diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c
index c74818b2e7..7899eb2918 100644
--- a/target/ppc/mmu-hash64.c
+++ b/target/ppc/mmu-hash64.c
@@ -725,6 +725,39 @@ static void ppc_hash64_set_dsi(CPUState *cs, uint64_t dar, uint64_t dsisr)
 }
 
 
+static void ppc_hash64_set_r(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte1)
+{
+    hwaddr base, offset = ptex * HASH_PTE_SIZE_64 + 16;
+
+    if (cpu->vhyp) {
+        PPCVirtualHypervisorClass *vhc =
+            PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
+        vhc->hpte_set_r(cpu->vhyp, ptex, pte1);
+        return;
+    }
+    base = ppc_hash64_hpt_base(cpu);
+
+
+    /* The HW performs a non-atomic byte update */
+    stb_phys(CPU(cpu)->as, base + offset, ((pte1 >> 8) & 0xff) | 0x01);
+}
+
+static void ppc_hash64_set_c(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte1)
+{
+    hwaddr base, offset = ptex * HASH_PTE_SIZE_64 + 15;
+
+    if (cpu->vhyp) {
+        PPCVirtualHypervisorClass *vhc =
+            PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
+        vhc->hpte_set_c(cpu->vhyp, ptex, pte1);
+        return;
+    }
+    base = ppc_hash64_hpt_base(cpu);
+
+    /* The HW performs a non-atomic byte update */
+    stb_phys(CPU(cpu)->as, base + offset, (pte1 & 0xff) | 0x80);
+}
+
 int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr,
                                 int rwx, int mmu_idx)
 {
@@ -735,7 +768,6 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr,
     hwaddr ptex;
     ppc_hash_pte64_t pte;
     int exec_prot, pp_prot, amr_prot, prot;
-    uint64_t new_pte1;
     const int need_prot[] = {PAGE_READ, PAGE_WRITE, PAGE_EXEC};
     hwaddr raddr;
 
@@ -883,19 +915,19 @@ skip_slb_search:
 
     /* 6. Update PTE referenced and changed bits if necessary */
 
-    new_pte1 = pte.pte1 | HPTE64_R_R; /* set referenced bit */
-    if (rwx == 1) {
-        new_pte1 |= HPTE64_R_C; /* set changed (dirty) bit */
-    } else {
-        /*
-         * Treat the page as read-only for now, so that a later write
-         * will pass through this function again to set the C bit
-         */
-        prot &= ~PAGE_WRITE;
+    if (!(pte.pte1 & HPTE64_R_R)) {
+        ppc_hash64_set_r(cpu, ptex, pte.pte1);
     }
-
-    if (new_pte1 != pte.pte1) {
-        ppc_hash64_store_hpte(cpu, ptex, pte.pte0, new_pte1);
+    if (!(pte.pte1 & HPTE64_R_C)) {
+        if (rwx == 1) {
+            ppc_hash64_set_c(cpu, ptex, pte.pte1);
+        } else {
+            /*
+             * Treat the page as read-only for now, so that a later write
+             * will pass through this function again to set the C bit
+             */
+            prot &= ~PAGE_WRITE;
+        }
     }
 
     /* 7. Determine the real address from the PTE */
@@ -954,24 +986,6 @@ hwaddr ppc_hash64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr)
         & TARGET_PAGE_MASK;
 }
 
-void ppc_hash64_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
-                           uint64_t pte0, uint64_t pte1)
-{
-    hwaddr base;
-    hwaddr offset = ptex * HASH_PTE_SIZE_64;
-
-    if (cpu->vhyp) {
-        PPCVirtualHypervisorClass *vhc =
-            PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
-        vhc->store_hpte(cpu->vhyp, ptex, pte0, pte1);
-        return;
-    }
-    base = ppc_hash64_hpt_base(cpu);
-
-    stq_phys(CPU(cpu)->as, base + offset, pte0);
-    stq_phys(CPU(cpu)->as, base + offset + HASH_PTE_SIZE_64 / 2, pte1);
-}
-
 void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu, target_ulong ptex,
                                target_ulong pte0, target_ulong pte1)
 {
diff --git a/target/ppc/mmu-hash64.h b/target/ppc/mmu-hash64.h
index 5be7ad86db..87729d48b3 100644
--- a/target/ppc/mmu-hash64.h
+++ b/target/ppc/mmu-hash64.h
@@ -10,8 +10,6 @@ int ppc_store_slb(PowerPCCPU *cpu, target_ulong slot,
 hwaddr ppc_hash64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr);
 int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr address, int rw,
                                 int mmu_idx);
-void ppc_hash64_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
-                           uint64_t pte0, uint64_t pte1);
 void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu,
                                target_ulong pte_index,
                                target_ulong pte0, target_ulong pte1);
-- 
cgit 1.4.1