5 files changed, 272 insertions, 0 deletions
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 9b157f0246..67e699cdc9 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -119,6 +119,8 @@ enum powerpc_mmu_t {
     POWERPC_MMU_620        = POWERPC_MMU_64 | 0x00000002,
     /* Architecture 2.06 variant                               */
     POWERPC_MMU_2_06       = POWERPC_MMU_64 | POWERPC_MMU_1TSEG | 0x00000003,
+    /* Architecture 2.06 "degraded" (no 1T segments)           */
+    POWERPC_MMU_2_06d      = POWERPC_MMU_64 | 0x00000003,
 #endif /* defined(TARGET_PPC64) */
 };
 
@@ -874,6 +876,29 @@ enum {
 #define DBELL_PIRTAG_MASK              0x3fff
 
 /*****************************************************************************/
+/* Segment page size information, used by recent hash MMUs
+ * The format of this structure mirrors kvm_ppc_smmu_info
+ */
+
+#define PPC_PAGE_SIZES_MAX_SZ   8
+
+struct ppc_one_page_size {
+    uint32_t page_shift;  /* Page shift (or 0) */
+    uint32_t pte_enc;     /* Encoding in the HPTE (>>12) */
+};
+
+struct ppc_one_seg_page_size {
+    uint32_t page_shift;  /* Base page shift of segment (or 0) */
+    uint32_t slb_enc;     /* SLB encoding for BookS */
+    struct ppc_one_page_size enc[PPC_PAGE_SIZES_MAX_SZ];
+};
+
+struct ppc_segment_page_sizes {
+    struct ppc_one_seg_page_size sps[PPC_PAGE_SIZES_MAX_SZ];
+};
+
+
+/*****************************************************************************/
 /* The whole PowerPC CPU context */
 #define NB_MMU_MODES 3
 
@@ -889,6 +914,9 @@ struct ppc_def_t {
     powerpc_input_t bus_model;
     uint32_t flags;
     int bfd_mach;
+#if defined(TARGET_PPC64)
+    const struct ppc_segment_page_sizes *sps;
+#endif
     void (*init_proc)(CPUPPCState *env);
     int  (*check_pow)(CPUPPCState *env);
 };
@@ -1012,6 +1040,9 @@ struct CPUPPCState {
     uint32_t flags;
     uint64_t insns_flags;
     uint64_t insns_flags2;
+#if defined(TARGET_PPC64)
+    struct ppc_segment_page_sizes sps;
+#endif
 
 #if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
     target_phys_addr_t vpa;
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index c09cc39c78..b6ef72d16b 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -18,6 +18,7 @@
 #include <sys/types.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
+#include <sys/vfs.h>
 
 #include <linux/kvm.h>
 
@@ -167,10 +168,217 @@ static int kvm_booke206_tlb_init(CPUPPCState *env)
     return 0;
 }
 
+
+#if defined(TARGET_PPC64)
+static void kvm_get_fallback_smmu_info(CPUPPCState *env,
+                                       struct kvm_ppc_smmu_info *info)
+{
+    memset(info, 0, sizeof(*info));
+
+    /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
+     * need to "guess" what the supported page sizes are.
+     *
+     * For that to work we make a few assumptions:
+     *
+     * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
+     *   KVM which only supports 4K and 16M pages, but supports them
+     *   regardless of the backing store characteritics. We also don't
+     *   support 1T segments.
+     *
+     *   This is safe as if HV KVM ever supports that capability or PR
+     *   KVM grows supports for more page/segment sizes, those versions
+     *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
+     *   will not hit this fallback
+     *
+     * - Else we are running HV KVM. This means we only support page
+     *   sizes that fit in the backing store. Additionally we only
+     *   advertize 64K pages if the processor is ARCH 2.06 and we assume
+     *   P7 encodings for the SLB and hash table. Here too, we assume
+     *   support for any newer processor will mean a kernel that
+     *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
+     *   this fallback.
+     */
+    if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
+        /* No flags */
+        info->flags = 0;
+        info->slb_size = 64;
+
+        /* Standard 4k base page size segment */
+        info->sps[0].page_shift = 12;
+        info->sps[0].slb_enc = 0;
+        info->sps[0].enc[0].page_shift = 12;
+        info->sps[0].enc[0].pte_enc = 0;
+
+        /* Standard 16M large page size segment */
+        info->sps[1].page_shift = 24;
+        info->sps[1].slb_enc = SLB_VSID_L;
+        info->sps[1].enc[0].page_shift = 24;
+        info->sps[1].enc[0].pte_enc = 0;
+    } else {
+        int i = 0;
+
+        /* HV KVM has backing store size restrictions */
+        info->flags = KVM_PPC_PAGE_SIZES_REAL;
+
+        if (env->mmu_model & POWERPC_MMU_1TSEG) {
+            info->flags |= KVM_PPC_1T_SEGMENTS;
+        }
+
+        if (env->mmu_model == POWERPC_MMU_2_06) {
+            info->slb_size = 32;
+        } else {
+            info->slb_size = 64;
+        }
+
+        /* Standard 4k base page size segment */
+        info->sps[i].page_shift = 12;
+        info->sps[i].slb_enc = 0;
+        info->sps[i].enc[0].page_shift = 12;
+        info->sps[i].enc[0].pte_enc = 0;
+        i++;
+
+        /* 64K on MMU 2.06 */
+        if (env->mmu_model == POWERPC_MMU_2_06) {
+            info->sps[i].page_shift = 16;
+            info->sps[i].slb_enc = 0x110;
+            info->sps[i].enc[0].page_shift = 16;
+            info->sps[i].enc[0].pte_enc = 1;
+            i++;
+        }
+
+        /* Standard 16M large page size segment */
+        info->sps[i].page_shift = 24;
+        info->sps[i].slb_enc = SLB_VSID_L;
+        info->sps[i].enc[0].page_shift = 24;
+        info->sps[i].enc[0].pte_enc = 0;
+    }
+}
+
+static void kvm_get_smmu_info(CPUPPCState *env, struct kvm_ppc_smmu_info *info)
+{
+    int ret;
+
+    if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
+        ret = kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
+        if (ret == 0) {
+            return;
+        }
+    }
+
+    kvm_get_fallback_smmu_info(env, info);
+}
+
+static long getrampagesize(void)
+{
+    struct statfs fs;
+    int ret;
+
+    if (!mem_path) {
+        /* guest RAM is backed by normal anonymous pages */
+        return getpagesize();
+    }
+
+    do {
+        ret = statfs(mem_path, &fs);
+    } while (ret != 0 && errno == EINTR);
+
+    if (ret != 0) {
+        fprintf(stderr, "Couldn't statfs() memory path: %s\n",
+                strerror(errno));
+        exit(1);
+    }
+
+#define HUGETLBFS_MAGIC       0x958458f6
+
+    if (fs.f_type != HUGETLBFS_MAGIC) {
+        /* Explicit mempath, but it's ordinary pages */
+        return getpagesize();
+    }
+
+    /* It's hugepage, return the huge page size */
+    return fs.f_bsize;
+}
+
+static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
+{
+    if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
+        return true;
+    }
+
+    return (1ul << shift) <= rampgsize;
+}
+
+static void kvm_fixup_page_sizes(CPUPPCState *env)
+{
+    static struct kvm_ppc_smmu_info smmu_info;
+    static bool has_smmu_info;
+    long rampagesize;
+    int iq, ik, jq, jk;
+
+    /* We only handle page sizes for 64-bit server guests for now */
+    if (!(env->mmu_model & POWERPC_MMU_64)) {
+        return;
+    }
+
+    /* Collect MMU info from kernel if not already */
+    if (!has_smmu_info) {
+        kvm_get_smmu_info(env, &smmu_info);
+        has_smmu_info = true;
+    }
+
+    rampagesize = getrampagesize();
+
+    /* Convert to QEMU form */
+    memset(&env->sps, 0, sizeof(env->sps));
+
+    for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
+        struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
+        struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
+
+        if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
+                                 ksps->page_shift)) {
+            continue;
+        }
+        qsps->page_shift = ksps->page_shift;
+        qsps->slb_enc = ksps->slb_enc;
+        for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
+            if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
+                                     ksps->enc[jk].page_shift)) {
+                continue;
+            }
+            qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
+            qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
+            if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
+                break;
+            }
+        }
+        if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
+            break;
+        }
+    }
+    env->slb_nr = smmu_info.slb_size;
+    if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
+        env->mmu_model |= POWERPC_MMU_1TSEG;
+    } else {
+        env->mmu_model &= ~POWERPC_MMU_1TSEG;
+    }
+}
+#else /* defined (TARGET_PPC64) */
+
+static inline void kvm_fixup_page_sizes(CPUPPCState *env)
+{
+}
+
+#endif /* !defined (TARGET_PPC64) */
+
 int kvm_arch_init_vcpu(CPUPPCState *cenv)
 {
     int ret;
 
+    /* Gather server mmu info from KVM and update the CPU state */
+    kvm_fixup_page_sizes(cenv);
+
+    /* Synchronize sregs with kvm */
     ret = kvm_arch_sync_sregs(cenv);
     if (ret) {
         return ret;
diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
index 34ecad3e26..e2f8703853 100644
--- a/target-ppc/kvm_ppc.h
+++ b/target-ppc/kvm_ppc.h
@@ -58,6 +58,11 @@ static inline int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_l
     return -1;
 }
 
+static inline int kvmppc_read_segment_page_sizes(uint32_t *prop, int maxcells)
+{
+    return -1;
+}
+
 static inline int kvmppc_set_interrupt(CPUPPCState *env, int irq, int level)
 {
     return -1;
diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
index c4e79d9ca7..d2664acef0 100644
--- a/target-ppc/mmu_helper.c
+++ b/target-ppc/mmu_helper.c
@@ -1634,6 +1634,7 @@ void dump_mmu(FILE *f, fprintf_function cpu_fprintf, CPUPPCState *env)
 #if defined(TARGET_PPC64)
     case POWERPC_MMU_64B:
     case POWERPC_MMU_2_06:
+    case POWERPC_MMU_2_06d:
         mmubooks_dump_mmu(f, cpu_fprintf, env);
         break;
 #endif
@@ -1664,6 +1665,7 @@ static inline int check_physical(CPUPPCState *env, mmu_ctx_t *ctx,
     case POWERPC_MMU_620:
     case POWERPC_MMU_64B:
     case POWERPC_MMU_2_06:
+    case POWERPC_MMU_2_06d:
         /* Real address are 60 bits long */
         ctx->raddr &= 0x0FFFFFFFFFFFFFFFULL;
         ctx->prot |= PAGE_WRITE;
@@ -1745,6 +1747,7 @@ int get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, target_ulong eaddr,
         case POWERPC_MMU_620:
         case POWERPC_MMU_64B:
         case POWERPC_MMU_2_06:
+        case POWERPC_MMU_2_06d:
 #endif
             if (ret < 0) {
                 /* We didn't match any BAT entry or don't have BATs */
@@ -1886,6 +1889,7 @@ int cpu_ppc_handle_mmu_fault(CPUPPCState *env, target_ulong address, int rw,
                 case POWERPC_MMU_620:
                 case POWERPC_MMU_64B:
                 case POWERPC_MMU_2_06:
+                case POWERPC_MMU_2_06d:
 #endif
                     env->exception_index = POWERPC_EXCP_ISI;
                     env->error_code = 0x40000000;
@@ -1997,6 +2001,7 @@ int cpu_ppc_handle_mmu_fault(CPUPPCState *env, target_ulong address, int rw,
                 case POWERPC_MMU_620:
                 case POWERPC_MMU_64B:
                 case POWERPC_MMU_2_06:
+                case POWERPC_MMU_2_06d:
 #endif
                     env->exception_index = POWERPC_EXCP_DSI;
                     env->error_code = 0;
@@ -2326,6 +2331,7 @@ void ppc_tlb_invalidate_all(CPUPPCState *env)
     case POWERPC_MMU_620:
     case POWERPC_MMU_64B:
     case POWERPC_MMU_2_06:
+    case POWERPC_MMU_2_06d:
 #endif /* defined(TARGET_PPC64) */
         tlb_flush(env, 1);
         break;
@@ -2395,6 +2401,7 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr)
     case POWERPC_MMU_620:
     case POWERPC_MMU_64B:
     case POWERPC_MMU_2_06:
+    case POWERPC_MMU_2_06d:
         /* tlbie invalidate TLBs for all segments */
         /* XXX: given the fact that there are too many segments to invalidate,
          *      and we still don't have a tlb_flush_mask(env, n, mask) in QEMU,
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index f6ac589a53..57027a2883 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -9928,6 +9928,27 @@ int cpu_ppc_register_internal (CPUPPCState *env, const ppc_def_t *def)
     env->bfd_mach = def->bfd_mach;
     env->check_pow = def->check_pow;
 
+#if defined(TARGET_PPC64)
+    if (def->sps)
+        env->sps = *def->sps;
+    else if (env->mmu_model & POWERPC_MMU_64) {
+        /* Use default sets of page sizes */
+        static const struct ppc_segment_page_sizes defsps = {
+            .sps = {
+                { .page_shift = 12, /* 4K */
+                  .slb_enc = 0,
+                  .enc = { { .page_shift = 12, .pte_enc = 0 } }
+                },
+                { .page_shift = 24, /* 16M */
+                  .slb_enc = 0x100,
+                  .enc = { { .page_shift = 24, .pte_enc = 0 } }
+                },
+            },
+        };
+        env->sps = defsps;
+    }
+#endif /* defined(TARGET_PPC64) */
+
     if (kvm_enabled()) {
         if (kvmppc_fixup_cpu(env) != 0) {
             fprintf(stderr, "Unable to virtualize selected CPU with KVM\n");