From 86997772fa807f3961e5aeed97af7738adec1b43 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Mon, 22 Apr 2024 15:46:06 +0200 Subject: target/riscv/kvm: Fix exposure of Zkr The Zkr extension may only be exposed to KVM guests if the VMM implements the SEED CSR. Use the same implementation as TCG. Without this patch, running with a KVM which does not forward the SEED CSR access to QEMU will result in an ILL exception being injected into the guest (this results in Linux guests crashing on boot). And, when running with a KVM which does forward the access, QEMU will crash, since QEMU doesn't know what to do with the exit. Fixes: 3108e2f1c69d ("target/riscv/kvm: update KVM exts to Linux 6.8") Signed-off-by: Andrew Jones Reviewed-by: Daniel Henrique Barboza Cc: qemu-stable Message-ID: <20240422134605.534207-2-ajones@ventanamicro.com> Signed-off-by: Alistair Francis --- target/riscv/kvm/kvm-cpu.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'target/riscv/kvm/kvm-cpu.c') diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index eaa36121c7..b8136c7ef8 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -1418,6 +1418,28 @@ static int kvm_riscv_handle_sbi(CPUState *cs, struct kvm_run *run) return ret; } +static int kvm_riscv_handle_csr(CPUState *cs, struct kvm_run *run) +{ + target_ulong csr_num = run->riscv_csr.csr_num; + target_ulong new_value = run->riscv_csr.new_value; + target_ulong write_mask = run->riscv_csr.write_mask; + int ret = 0; + + switch (csr_num) { + case CSR_SEED: + run->riscv_csr.ret_value = riscv_new_csr_seed(new_value, write_mask); + break; + default: + qemu_log_mask(LOG_UNIMP, + "%s: un-handled CSR EXIT for CSR %lx\n", + __func__, csr_num); + ret = -1; + break; + } + + return ret; +} + int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) { int ret = 0; @@ -1425,6 +1447,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) case KVM_EXIT_RISCV_SBI: ret = kvm_riscv_handle_sbi(cs, run); break; + case KVM_EXIT_RISCV_CSR: + ret = kvm_riscv_handle_csr(cs, run); + break; default: qemu_log_mask(LOG_UNIMP, "%s: un-handled exit reason %d\n", __func__, run->exit_reason); -- cgit 1.4.1 From a6b53378f537a51355a49826b7d119698c74ffba Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Thu, 25 Apr 2024 12:50:12 -0300 Subject: target/riscv/kvm: implement SBI debug console (DBCN) calls SBI defines a Debug Console extension "DBCN" that will, in time, replace the legacy console putchar and getchar SBI extensions. The appeal of the DBCN extension is that it allows multiple bytes to be read/written in the SBI console in a single SBI call. As far as KVM goes, the DBCN calls are forwarded by an in-kernel KVM module to userspace. But this will only happens if the KVM module actually supports this SBI extension and we activate it. We'll check for DBCN support during init time, checking if get-reg-list is advertising KVM_RISCV_SBI_EXT_DBCN. In that case, we'll enable it via kvm_set_one_reg() during kvm_arch_init_vcpu(). Finally, change kvm_riscv_handle_sbi() to handle the incoming calls for SBI_EXT_DBCN, reading and writing as required. A simple KVM guest with 'earlycon=sbi', running in an emulated RISC-V host, takes around 20 seconds to boot without using DBCN. With this patch we're taking around 14 seconds to boot due to the speed-up in the terminal output. There's no change in boot time if the guest isn't using earlycon. Signed-off-by: Daniel Henrique Barboza Reviewed-by: Andrew Jones Message-ID: <20240425155012.581366-1-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis --- target/riscv/kvm/kvm-cpu.c | 111 +++++++++++++++++++++++++++++++++++++ target/riscv/sbi_ecall_interface.h | 17 ++++++ 2 files changed, 128 insertions(+) (limited to 'target/riscv/kvm/kvm-cpu.c') diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index b8136c7ef8..d2491d84e2 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -409,6 +409,12 @@ static KVMCPUConfig kvm_v_vlenb = { KVM_REG_RISCV_VECTOR_CSR_REG(vlenb) }; +static KVMCPUConfig kvm_sbi_dbcn = { + .name = "sbi_dbcn", + .kvm_reg_id = KVM_REG_RISCV | KVM_REG_SIZE_U64 | + KVM_REG_RISCV_SBI_EXT | KVM_RISCV_SBI_EXT_DBCN +}; + static void kvm_riscv_update_cpu_cfg_isa_ext(RISCVCPU *cpu, CPUState *cs) { CPURISCVState *env = &cpu->env; @@ -1037,6 +1043,20 @@ static int uint64_cmp(const void *a, const void *b) return 0; } +static void kvm_riscv_check_sbi_dbcn_support(RISCVCPU *cpu, + KVMScratchCPU *kvmcpu, + struct kvm_reg_list *reglist) +{ + struct kvm_reg_list *reg_search; + + reg_search = bsearch(&kvm_sbi_dbcn.kvm_reg_id, reglist->reg, reglist->n, + sizeof(uint64_t), uint64_cmp); + + if (reg_search) { + kvm_sbi_dbcn.supported = true; + } +} + static void kvm_riscv_read_vlenb(RISCVCPU *cpu, KVMScratchCPU *kvmcpu, struct kvm_reg_list *reglist) { @@ -1142,6 +1162,8 @@ static void kvm_riscv_init_multiext_cfg(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) if (riscv_has_ext(&cpu->env, RVV)) { kvm_riscv_read_vlenb(cpu, kvmcpu, reglist); } + + kvm_riscv_check_sbi_dbcn_support(cpu, kvmcpu, reglist); } static void riscv_init_kvm_registers(Object *cpu_obj) @@ -1316,6 +1338,17 @@ static int kvm_vcpu_set_machine_ids(RISCVCPU *cpu, CPUState *cs) return ret; } +static int kvm_vcpu_enable_sbi_dbcn(RISCVCPU *cpu, CPUState *cs) +{ + target_ulong reg = 1; + + if (!kvm_sbi_dbcn.supported) { + return 0; + } + + return kvm_set_one_reg(cs, kvm_sbi_dbcn.kvm_reg_id, ®); +} + int kvm_arch_init_vcpu(CPUState *cs) { int ret = 0; @@ -1333,6 +1366,8 @@ int kvm_arch_init_vcpu(CPUState *cs) kvm_riscv_update_cpu_misa_ext(cpu, cs); kvm_riscv_update_cpu_cfg_isa_ext(cpu, cs); + ret = kvm_vcpu_enable_sbi_dbcn(cpu, cs); + return ret; } @@ -1390,6 +1425,79 @@ bool kvm_arch_stop_on_emulation_error(CPUState *cs) return true; } +static void kvm_riscv_handle_sbi_dbcn(CPUState *cs, struct kvm_run *run) +{ + g_autofree uint8_t *buf = NULL; + RISCVCPU *cpu = RISCV_CPU(cs); + target_ulong num_bytes; + uint64_t addr; + unsigned char ch; + int ret; + + switch (run->riscv_sbi.function_id) { + case SBI_EXT_DBCN_CONSOLE_READ: + case SBI_EXT_DBCN_CONSOLE_WRITE: + num_bytes = run->riscv_sbi.args[0]; + + if (num_bytes == 0) { + run->riscv_sbi.ret[0] = SBI_SUCCESS; + run->riscv_sbi.ret[1] = 0; + break; + } + + addr = run->riscv_sbi.args[1]; + + /* + * Handle the case where a 32 bit CPU is running in a + * 64 bit addressing env. + */ + if (riscv_cpu_mxl(&cpu->env) == MXL_RV32) { + addr |= (uint64_t)run->riscv_sbi.args[2] << 32; + } + + buf = g_malloc0(num_bytes); + + if (run->riscv_sbi.function_id == SBI_EXT_DBCN_CONSOLE_READ) { + ret = qemu_chr_fe_read_all(serial_hd(0)->be, buf, num_bytes); + if (ret < 0) { + error_report("SBI_EXT_DBCN_CONSOLE_READ: error when " + "reading chardev"); + exit(1); + } + + cpu_physical_memory_write(addr, buf, ret); + } else { + cpu_physical_memory_read(addr, buf, num_bytes); + + ret = qemu_chr_fe_write_all(serial_hd(0)->be, buf, num_bytes); + if (ret < 0) { + error_report("SBI_EXT_DBCN_CONSOLE_WRITE: error when " + "writing chardev"); + exit(1); + } + } + + run->riscv_sbi.ret[0] = SBI_SUCCESS; + run->riscv_sbi.ret[1] = ret; + break; + case SBI_EXT_DBCN_CONSOLE_WRITE_BYTE: + ch = run->riscv_sbi.args[0]; + ret = qemu_chr_fe_write(serial_hd(0)->be, &ch, sizeof(ch)); + + if (ret < 0) { + error_report("SBI_EXT_DBCN_CONSOLE_WRITE_BYTE: error when " + "writing chardev"); + exit(1); + } + + run->riscv_sbi.ret[0] = SBI_SUCCESS; + run->riscv_sbi.ret[1] = 0; + break; + default: + run->riscv_sbi.ret[0] = SBI_ERR_NOT_SUPPORTED; + } +} + static int kvm_riscv_handle_sbi(CPUState *cs, struct kvm_run *run) { int ret = 0; @@ -1408,6 +1516,9 @@ static int kvm_riscv_handle_sbi(CPUState *cs, struct kvm_run *run) } ret = 0; break; + case SBI_EXT_DBCN: + kvm_riscv_handle_sbi_dbcn(cs, run); + break; default: qemu_log_mask(LOG_UNIMP, "%s: un-handled SBI EXIT, specific reasons is %lu\n", diff --git a/target/riscv/sbi_ecall_interface.h b/target/riscv/sbi_ecall_interface.h index 43899d08f6..7dfe5f72c6 100644 --- a/target/riscv/sbi_ecall_interface.h +++ b/target/riscv/sbi_ecall_interface.h @@ -12,6 +12,17 @@ /* clang-format off */ +#define SBI_SUCCESS 0 +#define SBI_ERR_FAILED -1 +#define SBI_ERR_NOT_SUPPORTED -2 +#define SBI_ERR_INVALID_PARAM -3 +#define SBI_ERR_DENIED -4 +#define SBI_ERR_INVALID_ADDRESS -5 +#define SBI_ERR_ALREADY_AVAILABLE -6 +#define SBI_ERR_ALREADY_STARTED -7 +#define SBI_ERR_ALREADY_STOPPED -8 +#define SBI_ERR_NO_SHMEM -9 + /* SBI Extension IDs */ #define SBI_EXT_0_1_SET_TIMER 0x0 #define SBI_EXT_0_1_CONSOLE_PUTCHAR 0x1 @@ -27,6 +38,7 @@ #define SBI_EXT_IPI 0x735049 #define SBI_EXT_RFENCE 0x52464E43 #define SBI_EXT_HSM 0x48534D +#define SBI_EXT_DBCN 0x4442434E /* SBI function IDs for BASE extension */ #define SBI_EXT_BASE_GET_SPEC_VERSION 0x0 @@ -57,6 +69,11 @@ #define SBI_EXT_HSM_HART_STOP 0x1 #define SBI_EXT_HSM_HART_GET_STATUS 0x2 +/* SBI function IDs for DBCN extension */ +#define SBI_EXT_DBCN_CONSOLE_WRITE 0x0 +#define SBI_EXT_DBCN_CONSOLE_READ 0x1 +#define SBI_EXT_DBCN_CONSOLE_WRITE_BYTE 0x2 + #define SBI_HSM_HART_STATUS_STARTED 0x0 #define SBI_HSM_HART_STATUS_STOPPED 0x1 #define SBI_HSM_HART_STATUS_START_PENDING 0x2 -- cgit 1.4.1 From 1215d45b2aa97512a2867e401aa59f3d0c23cb23 Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Mon, 22 Apr 2024 14:14:25 -0300 Subject: target/riscv/kvm: tolerate KVM disable ext errors Running a KVM guest using a 6.9-rc3 kernel, in a 6.8 host that has zkr enabled, will fail with a kernel oops SIGILL right at the start. The reason is that we can't expose zkr without implementing the SEED CSR. Disabling zkr in the guest would be a workaround, but if the KVM doesn't allow it we'll error out and never boot. In hindsight this is too strict. If we keep proceeding, despite not disabling the extension in the KVM vcpu, we'll not add the extension in the riscv,isa. The guest kernel will be unaware of the extension, i.e. it doesn't matter if the KVM vcpu has it enabled underneath or not. So it's ok to keep booting in this case. Change our current logic to not error out if we fail to disable an extension in kvm_set_one_reg(), but show a warning and keep booting. It is important to throw a warning because we must make the user aware that the extension is still available in the vcpu, meaning that an ill-behaved guest can ignore the riscv,isa settings and use the extension. The case we're handling happens with an EINVAL error code. If we fail to disable the extension in KVM for any other reason, error out. We'll also keep erroring out when we fail to enable an extension in KVM, since adding the extension in riscv,isa at this point will cause a guest malfunction because the extension isn't enabled in the vcpu. Suggested-by: Andrew Jones Signed-off-by: Daniel Henrique Barboza Reviewed-by: Andrew Jones Cc: qemu-stable Message-ID: <20240422171425.333037-2-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis --- target/riscv/kvm/kvm-cpu.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'target/riscv/kvm/kvm-cpu.c') diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index d2491d84e2..473416649f 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -433,10 +433,14 @@ static void kvm_riscv_update_cpu_cfg_isa_ext(RISCVCPU *cpu, CPUState *cs) reg = kvm_cpu_cfg_get(cpu, multi_ext_cfg); ret = kvm_set_one_reg(cs, id, ®); if (ret != 0) { - error_report("Unable to %s extension %s in KVM, error %d", - reg ? "enable" : "disable", - multi_ext_cfg->name, ret); - exit(EXIT_FAILURE); + if (!reg && ret == -EINVAL) { + warn_report("KVM cannot disable extension %s", + multi_ext_cfg->name); + } else { + error_report("Unable to enable extension %s in KVM, error %d", + multi_ext_cfg->name, ret); + exit(EXIT_FAILURE); + } } } } -- cgit 1.4.1 From 190b867f28cb5781f3cd01a3deb371e4211595b1 Mon Sep 17 00:00:00 2001 From: Yong-Xuan Wang Date: Wed, 15 May 2024 17:11:28 +0800 Subject: target/riscv/kvm.c: Fix the hart bit setting of AIA In AIA spec, each hart (or each hart within a group) has a unique hart number to locate the memory pages of interrupt files in the address space. The number of bits required to represent any hart number is equal to ceil(log2(hmax + 1)), where hmax is the largest hart number among groups. However, if the largest hart number among groups is a power of 2, QEMU will pass an inaccurate hart-index-bit setting to Linux. For example, when the guest OS has 4 harts, only ceil(log2(3 + 1)) = 2 bits are sufficient to represent 4 harts, but we passes 3 to Linux. The code needs to be updated to ensure accurate hart-index-bit settings. Additionally, a Linux patch[1] is necessary to correctly recover the hart index when the guest OS has only 1 hart, where the hart-index-bit is 0. [1] https://lore.kernel.org/lkml/20240415064905.25184-1-yongxuan.wang@sifive.com/t/ Signed-off-by: Yong-Xuan Wang Reviewed-by: Andrew Jones Cc: qemu-stable Message-ID: <20240515091129.28116-1-yongxuan.wang@sifive.com> Signed-off-by: Alistair Francis --- target/riscv/kvm/kvm-cpu.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'target/riscv/kvm/kvm-cpu.c') diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index 473416649f..235e2cdaca 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -1777,7 +1777,14 @@ void kvm_riscv_aia_create(MachineState *machine, uint64_t group_shift, } } - hart_bits = find_last_bit(&max_hart_per_socket, BITS_PER_LONG) + 1; + + if (max_hart_per_socket > 1) { + max_hart_per_socket--; + hart_bits = find_last_bit(&max_hart_per_socket, BITS_PER_LONG) + 1; + } else { + hart_bits = 0; + } + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, KVM_DEV_RISCV_AIA_CONFIG_HART_BITS, &hart_bits, true, NULL); -- cgit 1.4.1