diff options
30 files changed, 1388 insertions, 654 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 66ddbda9c9..a73a61a546 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1391,6 +1391,13 @@ F: include/hw/net/ F: tests/virtio-net-test.c T: git https://github.com/jasowang/qemu.git net +Parallel NOR Flash devices +M: Philippe Mathieu-Daudé <philmd@redhat.com> +T: git https://gitlab.com/philmd/qemu.git pflash-next +S: Maintained +F: hw/block/pflash_cfi*.c +F: include/hw/block/flash.h + SCSI M: Paolo Bonzini <pbonzini@redhat.com> R: Fam Zheng <fam@euphon.net> @@ -1478,6 +1485,8 @@ F: hw/*/*vhost* F: docs/interop/vhost-user.json F: docs/interop/vhost-user.txt F: contrib/vhost-user-*/ +F: backends/vhost-user.c +F: include/sysemu/vhost-user-backend.h virtio M: Michael S. Tsirkin <mst@redhat.com> @@ -1519,6 +1528,7 @@ L: qemu-s390x@nongnu.org virtio-input M: Gerd Hoffmann <kraxel@redhat.com> S: Maintained +F: hw/input/vhost-user-input.c F: hw/input/virtio-input*.c F: include/hw/virtio/virtio-input.h @@ -2404,12 +2414,13 @@ F: block/ssh.c CURL L: qemu-block@nongnu.org -S: Supported +S: Odd Fixes F: block/curl.c GLUSTER L: qemu-block@nongnu.org -S: Supported +L: integration@gluster.org +S: Odd Fixes F: block/gluster.c Null Block Driver diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index f2f618217d..a083324768 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -856,9 +856,8 @@ static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr) } static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, - int mmu_idx, - target_ulong addr, uintptr_t retaddr, - bool recheck, MMUAccessType access_type, int size) + int mmu_idx, target_ulong addr, uintptr_t retaddr, + MMUAccessType access_type, int size) { CPUState *cpu = ENV_GET_CPU(env); hwaddr mr_offset; @@ -868,30 +867,6 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, bool locked = false; MemTxResult r; - if (recheck) { - /* - * This is a TLB_RECHECK access, where the MMU protection - * covers a smaller range than a target page, and we must - * repeat the MMU check here. This tlb_fill() call might - * longjump out if this access should cause a guest exception. - */ - CPUTLBEntry *entry; - target_ulong tlb_addr; - - tlb_fill(cpu, addr, size, access_type, mmu_idx, retaddr); - - entry = tlb_entry(env, mmu_idx, addr); - tlb_addr = (access_type == MMU_DATA_LOAD ? - entry->addr_read : entry->addr_code); - if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) { - /* RAM access */ - uintptr_t haddr = addr + entry->addend; - - return ldn_p((void *)haddr, size); - } - /* Fall through for handling IO accesses */ - } - section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); mr = section->mr; mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; @@ -925,9 +900,8 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, } static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, - int mmu_idx, - uint64_t val, target_ulong addr, - uintptr_t retaddr, bool recheck, int size) + int mmu_idx, uint64_t val, target_ulong addr, + uintptr_t retaddr, int size) { CPUState *cpu = ENV_GET_CPU(env); hwaddr mr_offset; @@ -936,30 +910,6 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, bool locked = false; MemTxResult r; - if (recheck) { - /* - * This is a TLB_RECHECK access, where the MMU protection - * covers a smaller range than a target page, and we must - * repeat the MMU check here. This tlb_fill() call might - * longjump out if this access should cause a guest exception. - */ - CPUTLBEntry *entry; - target_ulong tlb_addr; - - tlb_fill(cpu, addr, size, MMU_DATA_STORE, mmu_idx, retaddr); - - entry = tlb_entry(env, mmu_idx, addr); - tlb_addr = tlb_addr_write(entry); - if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) { - /* RAM access */ - uintptr_t haddr = addr + entry->addend; - - stn_p((void *)haddr, size, val); - return; - } - /* Fall through for handling IO accesses */ - } - section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); mr = section->mr; mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; @@ -1168,26 +1118,481 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, } #ifdef TARGET_WORDS_BIGENDIAN -# define TGT_BE(X) (X) -# define TGT_LE(X) BSWAP(X) +#define NEED_BE_BSWAP 0 +#define NEED_LE_BSWAP 1 #else -# define TGT_BE(X) BSWAP(X) -# define TGT_LE(X) (X) +#define NEED_BE_BSWAP 1 +#define NEED_LE_BSWAP 0 #endif -#define MMUSUFFIX _mmu +/* + * Byte Swap Helper + * + * This should all dead code away depending on the build host and + * access type. + */ -#define DATA_SIZE 1 -#include "softmmu_template.h" +static inline uint64_t handle_bswap(uint64_t val, int size, bool big_endian) +{ + if ((big_endian && NEED_BE_BSWAP) || (!big_endian && NEED_LE_BSWAP)) { + switch (size) { + case 1: return val; + case 2: return bswap16(val); + case 4: return bswap32(val); + case 8: return bswap64(val); + default: + g_assert_not_reached(); + } + } else { + return val; + } +} -#define DATA_SIZE 2 -#include "softmmu_template.h" +/* + * Load Helpers + * + * We support two different access types. SOFTMMU_CODE_ACCESS is + * specifically for reading instructions from system memory. It is + * called by the translation loop and in some helpers where the code + * is disassembled. It shouldn't be called directly by guest code. + */ -#define DATA_SIZE 4 -#include "softmmu_template.h" +typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr); -#define DATA_SIZE 8 -#include "softmmu_template.h" +static inline uint64_t __attribute__((always_inline)) +load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, + uintptr_t retaddr, size_t size, bool big_endian, bool code_read, + FullLoadHelper *full_load) +{ + uintptr_t mmu_idx = get_mmuidx(oi); + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); + target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read; + const size_t tlb_off = code_read ? + offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read); + const MMUAccessType access_type = + code_read ? MMU_INST_FETCH : MMU_DATA_LOAD; + unsigned a_bits = get_alignment_bits(get_memop(oi)); + void *haddr; + uint64_t res; + + /* Handle CPU specific unaligned behaviour */ + if (addr & ((1 << a_bits) - 1)) { + cpu_unaligned_access(ENV_GET_CPU(env), addr, access_type, + mmu_idx, retaddr); + } + + /* If the TLB entry is for a different page, reload and try again. */ + if (!tlb_hit(tlb_addr, addr)) { + if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, + addr & TARGET_PAGE_MASK)) { + tlb_fill(ENV_GET_CPU(env), addr, size, + access_type, mmu_idx, retaddr); + index = tlb_index(env, mmu_idx, addr); + entry = tlb_entry(env, mmu_idx, addr); + } + tlb_addr = code_read ? entry->addr_code : entry->addr_read; + } + + /* Handle an IO access. */ + if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { + if ((addr & (size - 1)) != 0) { + goto do_unaligned_access; + } + + if (tlb_addr & TLB_RECHECK) { + /* + * This is a TLB_RECHECK access, where the MMU protection + * covers a smaller range than a target page, and we must + * repeat the MMU check here. This tlb_fill() call might + * longjump out if this access should cause a guest exception. + */ + tlb_fill(ENV_GET_CPU(env), addr, size, + access_type, mmu_idx, retaddr); + index = tlb_index(env, mmu_idx, addr); + entry = tlb_entry(env, mmu_idx, addr); + + tlb_addr = code_read ? entry->addr_code : entry->addr_read; + tlb_addr &= ~TLB_RECHECK; + if (!(tlb_addr & ~TARGET_PAGE_MASK)) { + /* RAM access */ + goto do_aligned_access; + } + } + + res = io_readx(env, &env->iotlb[mmu_idx][index], mmu_idx, addr, + retaddr, access_type, size); + return handle_bswap(res, size, big_endian); + } + + /* Handle slow unaligned access (it spans two pages or IO). */ + if (size > 1 + && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 + >= TARGET_PAGE_SIZE)) { + target_ulong addr1, addr2; + tcg_target_ulong r1, r2; + unsigned shift; + do_unaligned_access: + addr1 = addr & ~(size - 1); + addr2 = addr1 + size; + r1 = full_load(env, addr1, oi, retaddr); + r2 = full_load(env, addr2, oi, retaddr); + shift = (addr & (size - 1)) * 8; + + if (big_endian) { + /* Big-endian combine. */ + res = (r1 << shift) | (r2 >> ((size * 8) - shift)); + } else { + /* Little-endian combine. */ + res = (r1 >> shift) | (r2 << ((size * 8) - shift)); + } + return res & MAKE_64BIT_MASK(0, size * 8); + } + + do_aligned_access: + haddr = (void *)((uintptr_t)addr + entry->addend); + switch (size) { + case 1: + res = ldub_p(haddr); + break; + case 2: + if (big_endian) { + res = lduw_be_p(haddr); + } else { + res = lduw_le_p(haddr); + } + break; + case 4: + if (big_endian) { + res = (uint32_t)ldl_be_p(haddr); + } else { + res = (uint32_t)ldl_le_p(haddr); + } + break; + case 8: + if (big_endian) { + res = ldq_be_p(haddr); + } else { + res = ldq_le_p(haddr); + } + break; + default: + g_assert_not_reached(); + } + + return res; +} + +/* + * For the benefit of TCG generated code, we want to avoid the + * complication of ABI-specific return type promotion and always + * return a value extended to the register size of the host. This is + * tcg_target_long, except in the case of a 32-bit host and 64-bit + * data, and for that we always have uint64_t. + * + * We don't bother with this widened value for SOFTMMU_CODE_ACCESS. + */ + +static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 1, false, false, + full_ldub_mmu); +} + +tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return full_ldub_mmu(env, addr, oi, retaddr); +} + +static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 2, false, false, + full_le_lduw_mmu); +} + +tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return full_le_lduw_mmu(env, addr, oi, retaddr); +} + +static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 2, true, false, + full_be_lduw_mmu); +} + +tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return full_be_lduw_mmu(env, addr, oi, retaddr); +} + +static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 4, false, false, + full_le_ldul_mmu); +} + +tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return full_le_ldul_mmu(env, addr, oi, retaddr); +} + +static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 4, true, false, + full_be_ldul_mmu); +} + +tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return full_be_ldul_mmu(env, addr, oi, retaddr); +} + +uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 8, false, false, + helper_le_ldq_mmu); +} + +uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 8, true, false, + helper_be_ldq_mmu); +} + +/* + * Provide signed versions of the load routines as well. We can of course + * avoid this for 64-bit data, or for 32-bit data on 32-bit host. + */ + + +tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr); +} + +tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr); +} + +tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr); +} + +tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr); +} + +tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr); +} + +/* + * Store Helpers + */ + +static inline void __attribute__((always_inline)) +store_helper(CPUArchState *env, target_ulong addr, uint64_t val, + TCGMemOpIdx oi, uintptr_t retaddr, size_t size, bool big_endian) +{ + uintptr_t mmu_idx = get_mmuidx(oi); + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); + target_ulong tlb_addr = tlb_addr_write(entry); + const size_t tlb_off = offsetof(CPUTLBEntry, addr_write); + unsigned a_bits = get_alignment_bits(get_memop(oi)); + void *haddr; + + /* Handle CPU specific unaligned behaviour */ + if (addr & ((1 << a_bits) - 1)) { + cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE, + mmu_idx, retaddr); + } + + /* If the TLB entry is for a different page, reload and try again. */ + if (!tlb_hit(tlb_addr, addr)) { + if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, + addr & TARGET_PAGE_MASK)) { + tlb_fill(ENV_GET_CPU(env), addr, size, MMU_DATA_STORE, + mmu_idx, retaddr); + index = tlb_index(env, mmu_idx, addr); + entry = tlb_entry(env, mmu_idx, addr); + } + tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; + } + + /* Handle an IO access. */ + if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { + if ((addr & (size - 1)) != 0) { + goto do_unaligned_access; + } + + if (tlb_addr & TLB_RECHECK) { + /* + * This is a TLB_RECHECK access, where the MMU protection + * covers a smaller range than a target page, and we must + * repeat the MMU check here. This tlb_fill() call might + * longjump out if this access should cause a guest exception. + */ + tlb_fill(ENV_GET_CPU(env), addr, size, MMU_DATA_STORE, + mmu_idx, retaddr); + index = tlb_index(env, mmu_idx, addr); + entry = tlb_entry(env, mmu_idx, addr); + + tlb_addr = tlb_addr_write(entry); + tlb_addr &= ~TLB_RECHECK; + if (!(tlb_addr & ~TARGET_PAGE_MASK)) { + /* RAM access */ + goto do_aligned_access; + } + } + + io_writex(env, &env->iotlb[mmu_idx][index], mmu_idx, + handle_bswap(val, size, big_endian), + addr, retaddr, size); + return; + } + + /* Handle slow unaligned access (it spans two pages or IO). */ + if (size > 1 + && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 + >= TARGET_PAGE_SIZE)) { + int i; + uintptr_t index2; + CPUTLBEntry *entry2; + target_ulong page2, tlb_addr2; + do_unaligned_access: + /* + * Ensure the second page is in the TLB. Note that the first page + * is already guaranteed to be filled, and that the second page + * cannot evict the first. + */ + page2 = (addr + size) & TARGET_PAGE_MASK; + index2 = tlb_index(env, mmu_idx, page2); + entry2 = tlb_entry(env, mmu_idx, page2); + tlb_addr2 = tlb_addr_write(entry2); + if (!tlb_hit_page(tlb_addr2, page2) + && !victim_tlb_hit(env, mmu_idx, index2, tlb_off, + page2 & TARGET_PAGE_MASK)) { + tlb_fill(ENV_GET_CPU(env), page2, size, MMU_DATA_STORE, + mmu_idx, retaddr); + } + + /* + * XXX: not efficient, but simple. + * This loop must go in the forward direction to avoid issues + * with self-modifying code in Windows 64-bit. + */ + for (i = 0; i < size; ++i) { + uint8_t val8; + if (big_endian) { + /* Big-endian extract. */ + val8 = val >> (((size - 1) * 8) - (i * 8)); + } else { + /* Little-endian extract. */ + val8 = val >> (i * 8); + } + helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr); + } + return; + } + + do_aligned_access: + haddr = (void *)((uintptr_t)addr + entry->addend); + switch (size) { + case 1: + stb_p(haddr, val); + break; + case 2: + if (big_endian) { + stw_be_p(haddr, val); + } else { + stw_le_p(haddr, val); + } + break; + case 4: + if (big_endian) { + stl_be_p(haddr, val); + } else { + stl_le_p(haddr, val); + } + break; + case 8: + if (big_endian) { + stq_be_p(haddr, val); + } else { + stq_le_p(haddr, val); + } + break; + default: + g_assert_not_reached(); + break; + } +} + +void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + store_helper(env, addr, val, oi, retaddr, 1, false); +} + +void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + store_helper(env, addr, val, oi, retaddr, 2, false); +} + +void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + store_helper(env, addr, val, oi, retaddr, 2, true); +} + +void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + store_helper(env, addr, val, oi, retaddr, 4, false); +} + +void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + store_helper(env, addr, val, oi, retaddr, 4, true); +} + +void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + store_helper(env, addr, val, oi, retaddr, 8, false); +} + +void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + store_helper(env, addr, val, oi, retaddr, 8, true); +} /* First set of helpers allows passing in of OI and RETADDR. This makes them callable from other helpers. */ @@ -1248,20 +1653,81 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, /* Code access functions. */ -#undef MMUSUFFIX -#define MMUSUFFIX _cmmu -#undef GETPC -#define GETPC() ((uintptr_t)0) -#define SOFTMMU_CODE_ACCESS +static uint64_t full_ldub_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 1, false, true, + full_ldub_cmmu); +} -#define DATA_SIZE 1 -#include "softmmu_template.h" +uint8_t helper_ret_ldb_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return full_ldub_cmmu(env, addr, oi, retaddr); +} -#define DATA_SIZE 2 -#include "softmmu_template.h" +static uint64_t full_le_lduw_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 2, false, true, + full_le_lduw_cmmu); +} -#define DATA_SIZE 4 -#include "softmmu_template.h" +uint16_t helper_le_ldw_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return full_le_lduw_cmmu(env, addr, oi, retaddr); +} -#define DATA_SIZE 8 -#include "softmmu_template.h" +static uint64_t full_be_lduw_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 2, true, true, + full_be_lduw_cmmu); +} + +uint16_t helper_be_ldw_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return full_be_lduw_cmmu(env, addr, oi, retaddr); +} + +static uint64_t full_le_ldul_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 4, false, true, + full_le_ldul_cmmu); +} + +uint32_t helper_le_ldl_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return full_le_ldul_cmmu(env, addr, oi, retaddr); +} + +static uint64_t full_be_ldul_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 4, true, true, + full_be_ldul_cmmu); +} + +uint32_t helper_be_ldl_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return full_be_ldul_cmmu(env, addr, oi, retaddr); +} + +uint64_t helper_le_ldq_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 8, false, true, + helper_le_ldq_cmmu); +} + +uint64_t helper_be_ldq_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 8, true, true, + helper_be_ldq_cmmu); +} diff --git a/accel/tcg/softmmu_template.h b/accel/tcg/softmmu_template.h deleted file mode 100644 index e970a8b378..0000000000 --- a/accel/tcg/softmmu_template.h +++ /dev/null @@ -1,454 +0,0 @@ -/* - * Software MMU support - * - * Generate helpers used by TCG for qemu_ld/st ops and code load - * functions. - * - * Included from target op helpers and exec.c. - * - * Copyright (c) 2003 Fabrice Bellard - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see <http://www.gnu.org/licenses/>. - */ -#if DATA_SIZE == 8 -#define SUFFIX q -#define LSUFFIX q -#define SDATA_TYPE int64_t -#define DATA_TYPE uint64_t -#elif DATA_SIZE == 4 -#define SUFFIX l -#define LSUFFIX l -#define SDATA_TYPE int32_t -#define DATA_TYPE uint32_t -#elif DATA_SIZE == 2 -#define SUFFIX w -#define LSUFFIX uw -#define SDATA_TYPE int16_t -#define DATA_TYPE uint16_t -#elif DATA_SIZE == 1 -#define SUFFIX b -#define LSUFFIX ub -#define SDATA_TYPE int8_t -#define DATA_TYPE uint8_t -#else -#error unsupported data size -#endif - - -/* For the benefit of TCG generated code, we want to avoid the complication - of ABI-specific return type promotion and always return a value extended - to the register size of the host. This is tcg_target_long, except in the - case of a 32-bit host and 64-bit data, and for that we always have - uint64_t. Don't bother with this widened value for SOFTMMU_CODE_ACCESS. */ -#if defined(SOFTMMU_CODE_ACCESS) || DATA_SIZE == 8 -# define WORD_TYPE DATA_TYPE -# define USUFFIX SUFFIX -#else -# define WORD_TYPE tcg_target_ulong -# define USUFFIX glue(u, SUFFIX) -# define SSUFFIX glue(s, SUFFIX) -#endif - -#ifdef SOFTMMU_CODE_ACCESS -#define READ_ACCESS_TYPE MMU_INST_FETCH -#define ADDR_READ addr_code -#else -#define READ_ACCESS_TYPE MMU_DATA_LOAD -#define ADDR_READ addr_read -#endif - -#if DATA_SIZE == 8 -# define BSWAP(X) bswap64(X) -#elif DATA_SIZE == 4 -# define BSWAP(X) bswap32(X) -#elif DATA_SIZE == 2 -# define BSWAP(X) bswap16(X) -#else -# define BSWAP(X) (X) -#endif - -#if DATA_SIZE == 1 -# define helper_le_ld_name glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX) -# define helper_be_ld_name helper_le_ld_name -# define helper_le_lds_name glue(glue(helper_ret_ld, SSUFFIX), MMUSUFFIX) -# define helper_be_lds_name helper_le_lds_name -# define helper_le_st_name glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX) -# define helper_be_st_name helper_le_st_name -#else -# define helper_le_ld_name glue(glue(helper_le_ld, USUFFIX), MMUSUFFIX) -# define helper_be_ld_name glue(glue(helper_be_ld, USUFFIX), MMUSUFFIX) -# define helper_le_lds_name glue(glue(helper_le_ld, SSUFFIX), MMUSUFFIX) -# define helper_be_lds_name glue(glue(helper_be_ld, SSUFFIX), MMUSUFFIX) -# define helper_le_st_name glue(glue(helper_le_st, SUFFIX), MMUSUFFIX) -# define helper_be_st_name glue(glue(helper_be_st, SUFFIX), MMUSUFFIX) -#endif - -#ifndef SOFTMMU_CODE_ACCESS -static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env, - size_t mmu_idx, size_t index, - target_ulong addr, - uintptr_t retaddr, - bool recheck, - MMUAccessType access_type) -{ - CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index]; - return io_readx(env, iotlbentry, mmu_idx, addr, retaddr, recheck, - access_type, DATA_SIZE); -} -#endif - -WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) -{ - uintptr_t mmu_idx = get_mmuidx(oi); - uintptr_t index = tlb_index(env, mmu_idx, addr); - CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); - target_ulong tlb_addr = entry->ADDR_READ; - unsigned a_bits = get_alignment_bits(get_memop(oi)); - uintptr_t haddr; - DATA_TYPE res; - - if (addr & ((1 << a_bits) - 1)) { - cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE, - mmu_idx, retaddr); - } - - /* If the TLB entry is for a different page, reload and try again. */ - if (!tlb_hit(tlb_addr, addr)) { - if (!VICTIM_TLB_HIT(ADDR_READ, addr)) { - tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, READ_ACCESS_TYPE, - mmu_idx, retaddr); - index = tlb_index(env, mmu_idx, addr); - entry = tlb_entry(env, mmu_idx, addr); - } - tlb_addr = entry->ADDR_READ; - } - - /* Handle an IO access. */ - if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { - if ((addr & (DATA_SIZE - 1)) != 0) { - goto do_unaligned_access; - } - - /* ??? Note that the io helpers always read data in the target - byte ordering. We should push the LE/BE request down into io. */ - res = glue(io_read, SUFFIX)(env, mmu_idx, index, addr, retaddr, - tlb_addr & TLB_RECHECK, - READ_ACCESS_TYPE); - res = TGT_LE(res); - return res; - } - - /* Handle slow unaligned access (it spans two pages or IO). */ - if (DATA_SIZE > 1 - && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1 - >= TARGET_PAGE_SIZE)) { - target_ulong addr1, addr2; - DATA_TYPE res1, res2; - unsigned shift; - do_unaligned_access: - addr1 = addr & ~(DATA_SIZE - 1); - addr2 = addr1 + DATA_SIZE; - res1 = helper_le_ld_name(env, addr1, oi, retaddr); - res2 = helper_le_ld_name(env, addr2, oi, retaddr); - shift = (addr & (DATA_SIZE - 1)) * 8; - - /* Little-endian combine. */ - res = (res1 >> shift) | (res2 << ((DATA_SIZE * 8) - shift)); - return res; - } - - haddr = addr + entry->addend; -#if DATA_SIZE == 1 - res = glue(glue(ld, LSUFFIX), _p)((uint8_t *)haddr); -#else - res = glue(glue(ld, LSUFFIX), _le_p)((uint8_t *)haddr); -#endif - return res; -} - -#if DATA_SIZE > 1 -WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) -{ - uintptr_t mmu_idx = get_mmuidx(oi); - uintptr_t index = tlb_index(env, mmu_idx, addr); - CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); - target_ulong tlb_addr = entry->ADDR_READ; - unsigned a_bits = get_alignment_bits(get_memop(oi)); - uintptr_t haddr; - DATA_TYPE res; - - if (addr & ((1 << a_bits) - 1)) { - cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE, - mmu_idx, retaddr); - } - - /* If the TLB entry is for a different page, reload and try again. */ - if (!tlb_hit(tlb_addr, addr)) { - if (!VICTIM_TLB_HIT(ADDR_READ, addr)) { - tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, READ_ACCESS_TYPE, - mmu_idx, retaddr); - index = tlb_index(env, mmu_idx, addr); - entry = tlb_entry(env, mmu_idx, addr); - } - tlb_addr = entry->ADDR_READ; - } - - /* Handle an IO access. */ - if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { - if ((addr & (DATA_SIZE - 1)) != 0) { - goto do_unaligned_access; - } - - /* ??? Note that the io helpers always read data in the target - byte ordering. We should push the LE/BE request down into io. */ - res = glue(io_read, SUFFIX)(env, mmu_idx, index, addr, retaddr, - tlb_addr & TLB_RECHECK, - READ_ACCESS_TYPE); - res = TGT_BE(res); - return res; - } - - /* Handle slow unaligned access (it spans two pages or IO). */ - if (DATA_SIZE > 1 - && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1 - >= TARGET_PAGE_SIZE)) { - target_ulong addr1, addr2; - DATA_TYPE res1, res2; - unsigned shift; - do_unaligned_access: - addr1 = addr & ~(DATA_SIZE - 1); - addr2 = addr1 + DATA_SIZE; - res1 = helper_be_ld_name(env, addr1, oi, retaddr); - res2 = helper_be_ld_name(env, addr2, oi, retaddr); - shift = (addr & (DATA_SIZE - 1)) * 8; - - /* Big-endian combine. */ - res = (res1 << shift) | (res2 >> ((DATA_SIZE * 8) - shift)); - return res; - } - - haddr = addr + entry->addend; - res = glue(glue(ld, LSUFFIX), _be_p)((uint8_t *)haddr); - return res; -} -#endif /* DATA_SIZE > 1 */ - -#ifndef SOFTMMU_CODE_ACCESS - -/* Provide signed versions of the load routines as well. We can of course - avoid this for 64-bit data, or for 32-bit data on 32-bit host. */ -#if DATA_SIZE * 8 < TCG_TARGET_REG_BITS -WORD_TYPE helper_le_lds_name(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) -{ - return (SDATA_TYPE)helper_le_ld_name(env, addr, oi, retaddr); -} - -# if DATA_SIZE > 1 -WORD_TYPE helper_be_lds_name(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) -{ - return (SDATA_TYPE)helper_be_ld_name(env, addr, oi, retaddr); -} -# endif -#endif - -static inline void glue(io_write, SUFFIX)(CPUArchState *env, - size_t mmu_idx, size_t index, - DATA_TYPE val, - target_ulong addr, - uintptr_t retaddr, - bool recheck) -{ - CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index]; - return io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr, - recheck, DATA_SIZE); -} - -void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, - TCGMemOpIdx oi, uintptr_t retaddr) -{ - uintptr_t mmu_idx = get_mmuidx(oi); - uintptr_t index = tlb_index(env, mmu_idx, addr); - CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); - target_ulong tlb_addr = tlb_addr_write(entry); - unsigned a_bits = get_alignment_bits(get_memop(oi)); - uintptr_t haddr; - - if (addr & ((1 << a_bits) - 1)) { - cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE, - mmu_idx, retaddr); - } - - /* If the TLB entry is for a different page, reload and try again. */ - if (!tlb_hit(tlb_addr, addr)) { - if (!VICTIM_TLB_HIT(addr_write, addr)) { - tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE, - mmu_idx, retaddr); - index = tlb_index(env, mmu_idx, addr); - entry = tlb_entry(env, mmu_idx, addr); - } - tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; - } - - /* Handle an IO access. */ - if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { - if ((addr & (DATA_SIZE - 1)) != 0) { - goto do_unaligned_access; - } - - /* ??? Note that the io helpers always read data in the target - byte ordering. We should push the LE/BE request down into io. */ - val = TGT_LE(val); - glue(io_write, SUFFIX)(env, mmu_idx, index, val, addr, - retaddr, tlb_addr & TLB_RECHECK); - return; - } - - /* Handle slow unaligned access (it spans two pages or IO). */ - if (DATA_SIZE > 1 - && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1 - >= TARGET_PAGE_SIZE)) { - int i; - target_ulong page2; - CPUTLBEntry *entry2; - do_unaligned_access: - /* Ensure the second page is in the TLB. Note that the first page - is already guaranteed to be filled, and that the second page - cannot evict the first. */ - page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK; - entry2 = tlb_entry(env, mmu_idx, page2); - if (!tlb_hit_page(tlb_addr_write(entry2), page2) - && !VICTIM_TLB_HIT(addr_write, page2)) { - tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE, - mmu_idx, retaddr); - } - - /* XXX: not efficient, but simple. */ - /* This loop must go in the forward direction to avoid issues - with self-modifying code in Windows 64-bit. */ - for (i = 0; i < DATA_SIZE; ++i) { - /* Little-endian extract. */ - uint8_t val8 = val >> (i * 8); - glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8, - oi, retaddr); - } - return; - } - - haddr = addr + entry->addend; -#if DATA_SIZE == 1 - glue(glue(st, SUFFIX), _p)((uint8_t *)haddr, val); -#else - glue(glue(st, SUFFIX), _le_p)((uint8_t *)haddr, val); -#endif -} - -#if DATA_SIZE > 1 -void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, - TCGMemOpIdx oi, uintptr_t retaddr) -{ - uintptr_t mmu_idx = get_mmuidx(oi); - uintptr_t index = tlb_index(env, mmu_idx, addr); - CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); - target_ulong tlb_addr = tlb_addr_write(entry); - unsigned a_bits = get_alignment_bits(get_memop(oi)); - uintptr_t haddr; - - if (addr & ((1 << a_bits) - 1)) { - cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE, - mmu_idx, retaddr); - } - - /* If the TLB entry is for a different page, reload and try again. */ - if (!tlb_hit(tlb_addr, addr)) { - if (!VICTIM_TLB_HIT(addr_write, addr)) { - tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE, - mmu_idx, retaddr); - index = tlb_index(env, mmu_idx, addr); - entry = tlb_entry(env, mmu_idx, addr); - } - tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; - } - - /* Handle an IO access. */ - if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { - if ((addr & (DATA_SIZE - 1)) != 0) { - goto do_unaligned_access; - } - - /* ??? Note that the io helpers always read data in the target - byte ordering. We should push the LE/BE request down into io. */ - val = TGT_BE(val); - glue(io_write, SUFFIX)(env, mmu_idx, index, val, addr, retaddr, - tlb_addr & TLB_RECHECK); - return; - } - - /* Handle slow unaligned access (it spans two pages or IO). */ - if (DATA_SIZE > 1 - && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1 - >= TARGET_PAGE_SIZE)) { - int i; - target_ulong page2; - CPUTLBEntry *entry2; - do_unaligned_access: - /* Ensure the second page is in the TLB. Note that the first page - is already guaranteed to be filled, and that the second page - cannot evict the first. */ - page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK; - entry2 = tlb_entry(env, mmu_idx, page2); - if (!tlb_hit_page(tlb_addr_write(entry2), page2) - && !VICTIM_TLB_HIT(addr_write, page2)) { - tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE, - mmu_idx, retaddr); - } - - /* XXX: not efficient, but simple */ - /* This loop must go in the forward direction to avoid issues - with self-modifying code. */ - for (i = 0; i < DATA_SIZE; ++i) { - /* Big-endian extract. */ - uint8_t val8 = val >> (((DATA_SIZE - 1) * 8) - (i * 8)); - glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8, - oi, retaddr); - } - return; - } - - haddr = addr + entry->addend; - glue(glue(st, SUFFIX), _be_p)((uint8_t *)haddr, val); -} -#endif /* DATA_SIZE > 1 */ -#endif /* !defined(SOFTMMU_CODE_ACCESS) */ - -#undef READ_ACCESS_TYPE -#undef DATA_TYPE -#undef SUFFIX -#undef LSUFFIX -#undef DATA_SIZE -#undef ADDR_READ -#undef WORD_TYPE -#undef SDATA_TYPE -#undef USUFFIX -#undef SSUFFIX -#undef BSWAP -#undef helper_le_ld_name -#undef helper_be_ld_name -#undef helper_le_lds_name -#undef helper_be_lds_name -#undef helper_le_st_name -#undef helper_be_st_name diff --git a/backends/Makefile.objs b/backends/Makefile.objs index ff619d31b4..981e8e122f 100644 --- a/backends/Makefile.objs +++ b/backends/Makefile.objs @@ -14,4 +14,6 @@ common-obj-y += cryptodev-vhost.o common-obj-$(CONFIG_VHOST_CRYPTO) += cryptodev-vhost-user.o endif +common-obj-$(call land,$(CONFIG_VHOST_USER),$(CONFIG_VIRTIO)) += vhost-user.o + common-obj-$(CONFIG_LINUX) += hostmem-memfd.o diff --git a/backends/vhost-user.c b/backends/vhost-user.c new file mode 100644 index 0000000000..2b055544a7 --- /dev/null +++ b/backends/vhost-user.c @@ -0,0 +1,209 @@ +/* + * QEMU vhost-user backend + * + * Copyright (C) 2018 Red Hat Inc + * + * Authors: + * Marc-André Lureau <marcandre.lureau@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + + +#include "qemu/osdep.h" +#include "hw/qdev.h" +#include "qapi/error.h" +#include "qapi/qmp/qerror.h" +#include "qemu/error-report.h" +#include "qom/object_interfaces.h" +#include "sysemu/vhost-user-backend.h" +#include "sysemu/kvm.h" +#include "io/channel-command.h" +#include "hw/virtio/virtio-bus.h" + +static bool +ioeventfd_enabled(void) +{ + return kvm_enabled() && kvm_eventfds_enabled(); +} + +int +vhost_user_backend_dev_init(VhostUserBackend *b, VirtIODevice *vdev, + unsigned nvqs, Error **errp) +{ + int ret; + + assert(!b->vdev && vdev); + + if (!ioeventfd_enabled()) { + error_setg(errp, "vhost initialization failed: requires kvm"); + return -1; + } + + if (!vhost_user_init(&b->vhost_user, &b->chr, errp)) { + return -1; + } + + b->vdev = vdev; + b->dev.nvqs = nvqs; + b->dev.vqs = g_new(struct vhost_virtqueue, nvqs); + + ret = vhost_dev_init(&b->dev, &b->vhost_user, VHOST_BACKEND_TYPE_USER, 0); + if (ret < 0) { + error_setg_errno(errp, -ret, "vhost initialization failed"); + return -1; + } + + return 0; +} + +void +vhost_user_backend_start(VhostUserBackend *b) +{ + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(b->vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int ret, i ; + + if (b->started) { + return; + } + + if (!k->set_guest_notifiers) { + error_report("binding does not support guest notifiers"); + return; + } + + ret = vhost_dev_enable_notifiers(&b->dev, b->vdev); + if (ret < 0) { + return; + } + + ret = k->set_guest_notifiers(qbus->parent, b->dev.nvqs, true); + if (ret < 0) { + error_report("Error binding guest notifier"); + goto err_host_notifiers; + } + + b->dev.acked_features = b->vdev->guest_features; + ret = vhost_dev_start(&b->dev, b->vdev); + if (ret < 0) { + error_report("Error start vhost dev"); + goto err_guest_notifiers; + } + + /* guest_notifier_mask/pending not used yet, so just unmask + * everything here. virtio-pci will do the right thing by + * enabling/disabling irqfd. + */ + for (i = 0; i < b->dev.nvqs; i++) { + vhost_virtqueue_mask(&b->dev, b->vdev, + b->dev.vq_index + i, false); + } + + b->started = true; + return; + +err_guest_notifiers: + k->set_guest_notifiers(qbus->parent, b->dev.nvqs, false); +err_host_notifiers: + vhost_dev_disable_notifiers(&b->dev, b->vdev); +} + +void +vhost_user_backend_stop(VhostUserBackend *b) +{ + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(b->vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int ret = 0; + + if (!b->started) { + return; + } + + vhost_dev_stop(&b->dev, b->vdev); + + if (k->set_guest_notifiers) { + ret = k->set_guest_notifiers(qbus->parent, + b->dev.nvqs, false); + if (ret < 0) { + error_report("vhost guest notifier cleanup failed: %d", ret); + } + } + assert(ret >= 0); + + vhost_dev_disable_notifiers(&b->dev, b->vdev); + b->started = false; +} + +static void set_chardev(Object *obj, const char *value, Error **errp) +{ + VhostUserBackend *b = VHOST_USER_BACKEND(obj); + Chardev *chr; + + if (b->completed) { + error_setg(errp, QERR_PERMISSION_DENIED); + return; + } + + g_free(b->chr_name); + b->chr_name = g_strdup(value); + + chr = qemu_chr_find(b->chr_name); + if (chr == NULL) { + error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, + "Chardev '%s' not found", b->chr_name); + return; + } + + if (!qemu_chr_fe_init(&b->chr, chr, errp)) { + return; + } + + b->completed = true; + /* could call vhost_dev_init() so early message can be exchanged */ +} + +static char *get_chardev(Object *obj, Error **errp) +{ + VhostUserBackend *b = VHOST_USER_BACKEND(obj); + Chardev *chr = qemu_chr_fe_get_driver(&b->chr); + + if (chr && chr->label) { + return g_strdup(chr->label); + } + + return NULL; +} + +static void vhost_user_backend_init(Object *obj) +{ + object_property_add_str(obj, "chardev", get_chardev, set_chardev, NULL); +} + +static void vhost_user_backend_finalize(Object *obj) +{ + VhostUserBackend *b = VHOST_USER_BACKEND(obj); + + g_free(b->dev.vqs); + g_free(b->chr_name); + + vhost_user_cleanup(&b->vhost_user); + qemu_chr_fe_deinit(&b->chr, true); +} + +static const TypeInfo vhost_user_backend_info = { + .name = TYPE_VHOST_USER_BACKEND, + .parent = TYPE_OBJECT, + .instance_size = sizeof(VhostUserBackend), + .instance_init = vhost_user_backend_init, + .instance_finalize = vhost_user_backend_finalize, + .class_size = sizeof(VhostUserBackendClass), +}; + +static void register_types(void) +{ + type_register_static(&vhost_user_backend_info); +} + +type_init(register_types); diff --git a/block.c b/block.c index 5c2c6aa761..6999aad446 100644 --- a/block.c +++ b/block.c @@ -4082,14 +4082,14 @@ static void bdrv_delete(BlockDriverState *bs) assert(bdrv_op_blocker_is_empty(bs)); assert(!bs->refcnt); - bdrv_close(bs); - /* remove from list, if necessary */ if (bs->node_name[0] != '\0') { QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list); } QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list); + bdrv_close(bs); + g_free(bs); } diff --git a/block/io.c b/block/io.c index 0412a51314..aeebc9c23c 100644 --- a/block/io.c +++ b/block/io.c @@ -837,42 +837,6 @@ static int bdrv_prwv_co(BdrvChild *child, int64_t offset, return rwco.ret; } -/* - * Process a synchronous request using coroutines - */ -static int bdrv_rw_co(BdrvChild *child, int64_t sector_num, uint8_t *buf, - int nb_sectors, bool is_write, BdrvRequestFlags flags) -{ - QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, - nb_sectors * BDRV_SECTOR_SIZE); - - if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { - return -EINVAL; - } - - return bdrv_prwv_co(child, sector_num << BDRV_SECTOR_BITS, - &qiov, is_write, flags); -} - -/* return < 0 if error. See bdrv_write() for the return codes */ -int bdrv_read(BdrvChild *child, int64_t sector_num, - uint8_t *buf, int nb_sectors) -{ - return bdrv_rw_co(child, sector_num, buf, nb_sectors, false, 0); -} - -/* Return < 0 if error. Important errors are: - -EIO generic I/O error (may happen for all errors) - -ENOMEDIUM No media inserted. - -EINVAL Invalid sector number or nb_sectors - -EACCES Trying to write a read-only device -*/ -int bdrv_write(BdrvChild *child, int64_t sector_num, - const uint8_t *buf, int nb_sectors) -{ - return bdrv_rw_co(child, sector_num, (uint8_t *)buf, nb_sectors, true, 0); -} - int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, int bytes, BdrvRequestFlags flags) { @@ -935,6 +899,7 @@ int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov) return qiov->size; } +/* See bdrv_pwrite() for the return codes */ int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes) { QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); @@ -958,6 +923,12 @@ int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov) return qiov->size; } +/* Return no. of bytes on success or < 0 on error. Important errors are: + -EIO generic I/O error (may happen for all errors) + -ENOMEDIUM No media inserted. + -EINVAL Invalid offset or number of bytes + -EACCES Trying to write a read-only device +*/ int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes) { QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index fa7ac1f7cb..7481903396 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -2429,8 +2429,8 @@ write_refblocks: on_disk_refblock = (void *)((char *) *refcount_table + refblock_index * s->cluster_size); - ret = bdrv_write(bs->file, refblock_offset / BDRV_SECTOR_SIZE, - on_disk_refblock, s->cluster_sectors); + ret = bdrv_pwrite(bs->file, refblock_offset, on_disk_refblock, + s->cluster_size); if (ret < 0) { fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); goto fail; diff --git a/block/qcow2.c b/block/qcow2.c index a520d116ef..8e024007db 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1259,7 +1259,6 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, s->cluster_bits = header.cluster_bits; s->cluster_size = 1 << s->cluster_bits; - s->cluster_sectors = 1 << (s->cluster_bits - BDRV_SECTOR_BITS); /* Initialise version 3 header fields */ if (header.version == 2) { diff --git a/block/qcow2.h b/block/qcow2.h index fdee297f33..e62508d1ce 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -266,7 +266,6 @@ typedef struct Qcow2BitmapHeaderExt { typedef struct BDRVQcow2State { int cluster_bits; int cluster_size; - int cluster_sectors; int l2_slice_size; int l2_bits; int l2_size; diff --git a/block/vdi.c b/block/vdi.c index e1c42ad732..d7ef6628e7 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -171,6 +171,8 @@ typedef struct { uint64_t unused2[7]; } QEMU_PACKED VdiHeader; +QEMU_BUILD_BUG_ON(sizeof(VdiHeader) != 512); + typedef struct { /* The block map entries are little endian (even in memory). */ uint32_t *bmap; @@ -384,7 +386,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags, logout("\n"); - ret = bdrv_read(bs->file, 0, (uint8_t *)&header, 1); + ret = bdrv_pread(bs->file, 0, &header, sizeof(header)); if (ret < 0) { goto fail; } @@ -484,8 +486,8 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } - ret = bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap, - bmap_size); + ret = bdrv_pread(bs->file, header.offset_bmap, s->bmap, + bmap_size * SECTOR_SIZE); if (ret < 0) { goto fail_free_bmap; } @@ -704,7 +706,7 @@ nonallocating_write: assert(VDI_IS_ALLOCATED(bmap_first)); *header = s->header; vdi_header_to_le(header); - ret = bdrv_write(bs->file, 0, block, 1); + ret = bdrv_pwrite(bs->file, 0, block, sizeof(VdiHeader)); g_free(block); block = NULL; @@ -722,10 +724,11 @@ nonallocating_write: base = ((uint8_t *)&s->bmap[0]) + bmap_first * SECTOR_SIZE; logout("will write %u block map sectors starting from entry %u\n", n_sectors, bmap_first); - ret = bdrv_write(bs->file, offset, base, n_sectors); + ret = bdrv_pwrite(bs->file, offset * SECTOR_SIZE, base, + n_sectors * SECTOR_SIZE); } - return ret; + return ret < 0 ? ret : 0; } static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options, diff --git a/block/vvfat.c b/block/vvfat.c index 5f66787890..253cc716dd 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -1494,8 +1494,8 @@ static int vvfat_read(BlockDriverState *bs, int64_t sector_num, DLOG(fprintf(stderr, "sectors %" PRId64 "+%" PRId64 " allocated\n", sector_num, n >> BDRV_SECTOR_BITS)); - if (bdrv_read(s->qcow, sector_num, buf + i * 0x200, - n >> BDRV_SECTOR_BITS)) { + if (bdrv_pread(s->qcow, sector_num * BDRV_SECTOR_SIZE, + buf + i * 0x200, n) < 0) { return -1; } i += (n >> BDRV_SECTOR_BITS) - 1; @@ -1983,8 +1983,9 @@ static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s, if (res) { return -1; } - res = bdrv_write(s->qcow, offset, s->cluster_buffer, 1); - if (res) { + res = bdrv_pwrite(s->qcow, offset * BDRV_SECTOR_SIZE, + s->cluster_buffer, BDRV_SECTOR_SIZE); + if (res < 0) { return -2; } } @@ -3050,7 +3051,8 @@ DLOG(checkpoint()); * Use qcow backend. Commit later. */ DLOG(fprintf(stderr, "Write to qcow backend: %d + %d\n", (int)sector_num, nb_sectors)); - ret = bdrv_write(s->qcow, sector_num, buf, nb_sectors); + ret = bdrv_pwrite(s->qcow, sector_num * BDRV_SECTOR_SIZE, buf, + nb_sectors * BDRV_SECTOR_SIZE); if (ret < 0) { fprintf(stderr, "Error writing to qcow backend\n"); return ret; diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c index e08d6c7b97..74d42177c5 100644 --- a/contrib/libvhost-user/libvhost-user.c +++ b/contrib/libvhost-user/libvhost-user.c @@ -542,7 +542,7 @@ static bool vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg *vmsg) { int i; - VhostUserMemory *memory = &vmsg->payload.memory; + VhostUserMemory m = vmsg->payload.memory, *memory = &m; dev->nregions = memory->nregions; DPRINT("Nregions: %d\n", memory->nregions); @@ -684,7 +684,7 @@ static bool vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg) { int i; - VhostUserMemory *memory = &vmsg->payload.memory; + VhostUserMemory m = vmsg->payload.memory, *memory = &m; for (i = 0; i < dev->nregions; i++) { VuDevRegion *r = &dev->regions[i]; @@ -813,7 +813,7 @@ vu_set_vring_num_exec(VuDev *dev, VhostUserMsg *vmsg) static bool vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg) { - struct vhost_vring_addr *vra = &vmsg->payload.addr; + struct vhost_vring_addr addr = vmsg->payload.addr, *vra = &addr; unsigned int index = vra->index; VuVirtq *vq = &dev->vq[index]; @@ -1157,6 +1157,10 @@ vu_get_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg) features |= 1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT; } + if (dev->iface->get_config && dev->iface->set_config) { + features |= 1ULL << VHOST_USER_PROTOCOL_F_CONFIG; + } + if (dev->iface->get_protocol_features) { features |= dev->iface->get_protocol_features(dev); } diff --git a/hw/input/Kconfig b/hw/input/Kconfig index e2e66f0858..889363d8ae 100644 --- a/hw/input/Kconfig +++ b/hw/input/Kconfig @@ -27,7 +27,12 @@ config VIRTIO_INPUT config VIRTIO_INPUT_HOST bool default y - depends on VIRTIO && LINUX + depends on VIRTIO_INPUT && LINUX + +config VHOST_USER_INPUT + bool + default y + depends on VIRTIO_INPUT && VHOST_USER config TSC210X bool diff --git a/hw/input/Makefile.objs b/hw/input/Makefile.objs index c8b00f71ec..d1de307708 100644 --- a/hw/input/Makefile.objs +++ b/hw/input/Makefile.objs @@ -9,9 +9,8 @@ common-obj-$(CONFIG_TSC2005) += tsc2005.o common-obj-$(CONFIG_VIRTIO_INPUT) += virtio-input.o common-obj-$(CONFIG_VIRTIO_INPUT) += virtio-input-hid.o -ifeq ($(CONFIG_LINUX),y) -common-obj-$(CONFIG_VIRTIO_INPUT) += virtio-input-host.o -endif +common-obj-$(CONFIG_VIRTIO_INPUT_HOST) += virtio-input-host.o +common-obj-$(CONFIG_VHOST_USER_INPUT) += vhost-user-input.o obj-$(CONFIG_MILKYMIST) += milkymist-softusb.o obj-$(CONFIG_PXA2XX) += pxa2xx_keypad.o diff --git a/hw/input/vhost-user-input.c b/hw/input/vhost-user-input.c new file mode 100644 index 0000000000..6da497b1a8 --- /dev/null +++ b/hw/input/vhost-user-input.c @@ -0,0 +1,129 @@ +/* + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "qemu-common.h" + +#include "hw/qdev.h" +#include "hw/virtio/virtio-input.h" + +static int vhost_input_config_change(struct vhost_dev *dev) +{ + error_report("vhost-user-input: unhandled backend config change"); + return -1; +} + +static const VhostDevConfigOps config_ops = { + .vhost_dev_config_notifier = vhost_input_config_change, +}; + +static void vhost_input_realize(DeviceState *dev, Error **errp) +{ + VHostUserInput *vhi = VHOST_USER_INPUT(dev); + VirtIOInput *vinput = VIRTIO_INPUT(dev); + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + + vhost_dev_set_config_notifier(&vhi->vhost->dev, &config_ops); + vinput->cfg_size = sizeof_field(virtio_input_config, u); + if (vhost_user_backend_dev_init(vhi->vhost, vdev, 2, errp) == -1) { + return; + } +} + +static void vhost_input_change_active(VirtIOInput *vinput) +{ + VHostUserInput *vhi = VHOST_USER_INPUT(vinput); + + if (vinput->active) { + vhost_user_backend_start(vhi->vhost); + } else { + vhost_user_backend_stop(vhi->vhost); + } +} + +static void vhost_input_get_config(VirtIODevice *vdev, uint8_t *config_data) +{ + VirtIOInput *vinput = VIRTIO_INPUT(vdev); + VHostUserInput *vhi = VHOST_USER_INPUT(vdev); + int ret; + + memset(config_data, 0, vinput->cfg_size); + + ret = vhost_dev_get_config(&vhi->vhost->dev, config_data, vinput->cfg_size); + if (ret) { + error_report("vhost-user-input: get device config space failed"); + return; + } +} + +static void vhost_input_set_config(VirtIODevice *vdev, + const uint8_t *config_data) +{ + VHostUserInput *vhi = VHOST_USER_INPUT(vdev); + int ret; + + ret = vhost_dev_set_config(&vhi->vhost->dev, config_data, + 0, sizeof(virtio_input_config), + VHOST_SET_CONFIG_TYPE_MASTER); + if (ret) { + error_report("vhost-user-input: set device config space failed"); + return; + } + + virtio_notify_config(vdev); +} + +static const VMStateDescription vmstate_vhost_input = { + .name = "vhost-user-input", + .unmigratable = 1, +}; + +static void vhost_input_class_init(ObjectClass *klass, void *data) +{ + VirtIOInputClass *vic = VIRTIO_INPUT_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->vmsd = &vmstate_vhost_input; + vdc->get_config = vhost_input_get_config; + vdc->set_config = vhost_input_set_config; + vic->realize = vhost_input_realize; + vic->change_active = vhost_input_change_active; +} + +static void vhost_input_init(Object *obj) +{ + VHostUserInput *vhi = VHOST_USER_INPUT(obj); + + vhi->vhost = VHOST_USER_BACKEND(object_new(TYPE_VHOST_USER_BACKEND)); + object_property_add_alias(obj, "chardev", + OBJECT(vhi->vhost), "chardev", &error_abort); +} + +static void vhost_input_finalize(Object *obj) +{ + VHostUserInput *vhi = VHOST_USER_INPUT(obj); + + object_unref(OBJECT(vhi->vhost)); +} + +static const TypeInfo vhost_input_info = { + .name = TYPE_VHOST_USER_INPUT, + .parent = TYPE_VIRTIO_INPUT, + .instance_size = sizeof(VHostUserInput), + .instance_init = vhost_input_init, + .instance_finalize = vhost_input_finalize, + .class_init = vhost_input_class_init, +}; + +static void vhost_input_register_types(void) +{ + type_register_static(&vhost_input_info); +} + +type_init(vhost_input_register_types) diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs index f2ab667a21..5570ea8df8 100644 --- a/hw/virtio/Makefile.objs +++ b/hw/virtio/Makefile.objs @@ -17,6 +17,7 @@ obj-$(CONFIG_VHOST_VSOCK) += vhost-vsock.o ifeq ($(CONFIG_VIRTIO_PCI),y) obj-$(CONFIG_VHOST_VSOCK) += vhost-vsock-pci.o obj-$(CONFIG_VHOST_USER_BLK) += vhost-user-blk-pci.o +obj-$(CONFIG_VHOST_USER_INPUT) += vhost-user-input-pci.o obj-$(CONFIG_VHOST_USER_SCSI) += vhost-user-scsi-pci.o obj-$(CONFIG_VHOST_SCSI) += vhost-scsi-pci.o obj-$(CONFIG_VIRTIO_INPUT_HOST) += virtio-input-host-pci.o diff --git a/hw/virtio/vhost-user-input-pci.c b/hw/virtio/vhost-user-input-pci.c new file mode 100644 index 0000000000..ae9cff9aed --- /dev/null +++ b/hw/virtio/vhost-user-input-pci.c @@ -0,0 +1,50 @@ +/* + * This work is licensed under the terms of the GNU LGPL, version 2 or + * later. See the COPYING.LIB file in the top-level directory. + */ + +#include "qemu/osdep.h" + +#include "hw/virtio/virtio.h" +#include "hw/virtio/virtio-input.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "virtio-pci.h" + +typedef struct VHostUserInputPCI VHostUserInputPCI; + +#define TYPE_VHOST_USER_INPUT_PCI "vhost-user-input-pci" + +#define VHOST_USER_INPUT_PCI(obj) \ + OBJECT_CHECK(VHostUserInputPCI, (obj), TYPE_VHOST_USER_INPUT_PCI) + +struct VHostUserInputPCI { + VirtIOPCIProxy parent_obj; + VHostUserInput vhi; +}; + +static void vhost_user_input_pci_instance_init(Object *obj) +{ + VHostUserInputPCI *dev = VHOST_USER_INPUT_PCI(obj); + + virtio_instance_init_common(obj, &dev->vhi, sizeof(dev->vhi), + TYPE_VHOST_USER_INPUT); + + object_property_add_alias(obj, "chardev", + OBJECT(&dev->vhi), "chardev", + &error_abort); +} + +static const VirtioPCIDeviceTypeInfo vhost_user_input_pci_info = { + .generic_name = TYPE_VHOST_USER_INPUT_PCI, + .parent = TYPE_VIRTIO_INPUT_PCI, + .instance_size = sizeof(VHostUserInputPCI), + .instance_init = vhost_user_input_pci_instance_init, +}; + +static void vhost_user_input_pci_register(void) +{ + virtio_pci_types_register(&vhost_user_input_pci_info); +} + +type_init(vhost_user_input_pci_register) diff --git a/hw/virtio/virtio-input-host-pci.c b/hw/virtio/virtio-input-host-pci.c index 725a51ad30..124c4f3447 100644 --- a/hw/virtio/virtio-input-host-pci.c +++ b/hw/virtio/virtio-input-host-pci.c @@ -13,7 +13,7 @@ typedef struct VirtIOInputHostPCI VirtIOInputHostPCI; -#define TYPE_VIRTIO_INPUT_HOST_PCI "virtio-input-host-pci-base" +#define TYPE_VIRTIO_INPUT_HOST_PCI "virtio-input-host-pci" #define VIRTIO_INPUT_HOST_PCI(obj) \ OBJECT_CHECK(VirtIOInputHostPCI, (obj), TYPE_VIRTIO_INPUT_HOST_PCI) @@ -31,10 +31,7 @@ static void virtio_host_initfn(Object *obj) } static const VirtioPCIDeviceTypeInfo virtio_input_host_pci_info = { - .base_name = TYPE_VIRTIO_INPUT_HOST_PCI, - .generic_name = "virtio-input-host-pci", - .transitional_name = "virtio-input-host-pci-transitional", - .non_transitional_name = "virtio-input-host-pci-non-transitional", + .generic_name = TYPE_VIRTIO_INPUT_HOST_PCI, .parent = TYPE_VIRTIO_INPUT_PCI, .instance_size = sizeof(VirtIOInputHostPCI), .instance_init = virtio_host_initfn, diff --git a/include/block/block.h b/include/block/block.h index c7a26199aa..5e2b98b0ee 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -316,10 +316,6 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, Error **errp); void bdrv_reopen_commit(BDRVReopenState *reopen_state); void bdrv_reopen_abort(BDRVReopenState *reopen_state); -int bdrv_read(BdrvChild *child, int64_t sector_num, - uint8_t *buf, int nb_sectors); -int bdrv_write(BdrvChild *child, int64_t sector_num, - const uint8_t *buf, int nb_sectors); int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, int bytes, BdrvRequestFlags flags); int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags); diff --git a/include/hw/virtio/virtio-input.h b/include/hw/virtio/virtio-input.h index 054c38836f..4fca03e796 100644 --- a/include/hw/virtio/virtio-input.h +++ b/include/hw/virtio/virtio-input.h @@ -2,6 +2,7 @@ #define QEMU_VIRTIO_INPUT_H #include "ui/input.h" +#include "sysemu/vhost-user-backend.h" /* ----------------------------------------------------------------- */ /* virtio input protocol */ @@ -42,11 +43,18 @@ typedef struct virtio_input_event virtio_input_event; #define VIRTIO_INPUT_HOST_GET_PARENT_CLASS(obj) \ OBJECT_GET_PARENT_CLASS(obj, TYPE_VIRTIO_INPUT_HOST) +#define TYPE_VHOST_USER_INPUT "vhost-user-input" +#define VHOST_USER_INPUT(obj) \ + OBJECT_CHECK(VHostUserInput, (obj), TYPE_VHOST_USER_INPUT) +#define VHOST_USER_INPUT_GET_PARENT_CLASS(obj) \ + OBJECT_GET_PARENT_CLASS(obj, TYPE_VHOST_USER_INPUT) + typedef struct VirtIOInput VirtIOInput; typedef struct VirtIOInputClass VirtIOInputClass; typedef struct VirtIOInputConfig VirtIOInputConfig; typedef struct VirtIOInputHID VirtIOInputHID; typedef struct VirtIOInputHost VirtIOInputHost; +typedef struct VHostUserInput VHostUserInput; struct VirtIOInputConfig { virtio_input_config config; @@ -98,6 +106,12 @@ struct VirtIOInputHost { int fd; }; +struct VHostUserInput { + VirtIOInput parent_obj; + + VhostUserBackend *vhost; +}; + void virtio_input_send(VirtIOInput *vinput, virtio_input_event *event); void virtio_input_init_config(VirtIOInput *vinput, virtio_input_config *config); diff --git a/include/sysemu/vhost-user-backend.h b/include/sysemu/vhost-user-backend.h new file mode 100644 index 0000000000..9abf8f06a1 --- /dev/null +++ b/include/sysemu/vhost-user-backend.h @@ -0,0 +1,57 @@ +/* + * QEMU vhost-user backend + * + * Copyright (C) 2018 Red Hat Inc + * + * Authors: + * Marc-André Lureau <marcandre.lureau@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef QEMU_VHOST_USER_BACKEND_H +#define QEMU_VHOST_USER_BACKEND_H + +#include "qom/object.h" +#include "exec/memory.h" +#include "qemu/option.h" +#include "qemu/bitmap.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-user.h" +#include "chardev/char-fe.h" +#include "io/channel.h" + +#define TYPE_VHOST_USER_BACKEND "vhost-user-backend" +#define VHOST_USER_BACKEND(obj) \ + OBJECT_CHECK(VhostUserBackend, (obj), TYPE_VHOST_USER_BACKEND) +#define VHOST_USER_BACKEND_GET_CLASS(obj) \ + OBJECT_GET_CLASS(VhostUserBackendClass, (obj), TYPE_VHOST_USER_BACKEND) +#define VHOST_USER_BACKEND_CLASS(klass) \ + OBJECT_CLASS_CHECK(VhostUserBackendClass, (klass), TYPE_VHOST_USER_BACKEND) + +typedef struct VhostUserBackend VhostUserBackend; +typedef struct VhostUserBackendClass VhostUserBackendClass; + +struct VhostUserBackendClass { + ObjectClass parent_class; +}; + +struct VhostUserBackend { + /* private */ + Object parent; + + char *chr_name; + CharBackend chr; + VhostUserState vhost_user; + struct vhost_dev dev; + VirtIODevice *vdev; + bool started; + bool completed; +}; + +int vhost_user_backend_dev_init(VhostUserBackend *b, VirtIODevice *vdev, + unsigned nvqs, Error **errp); +void vhost_user_backend_start(VhostUserBackend *b); +void vhost_user_backend_stop(VhostUserBackend *b); + +#endif diff --git a/job.c b/job.c index da8e4b7bf2..2167d53717 100644 --- a/job.c +++ b/job.c @@ -432,7 +432,7 @@ void job_enter_cond(Job *job, bool(*fn)(Job *job)) timer_del(&job->sleep_timer); job->busy = true; job_unlock(); - aio_co_wake(job->co); + aio_co_enter(job->aio_context, job->co); } void job_enter(Job *job) diff --git a/qemu-img.c b/qemu-img.c index e6ad5978e0..28fba1e7a7 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -37,6 +37,7 @@ #include "qemu/option.h" #include "qemu/error-report.h" #include "qemu/log.h" +#include "qemu/units.h" #include "qom/object_interfaces.h" #include "sysemu/sysemu.h" #include "sysemu/block-backend.h" @@ -1216,7 +1217,7 @@ static int compare_buffers(const uint8_t *buf1, const uint8_t *buf2, return res; } -#define IO_BUF_SIZE (2 * 1024 * 1024) +#define IO_BUF_SIZE (2 * MiB) /* * Check if passed sectors are empty (not allocated or contain only 0 bytes) @@ -2960,7 +2961,7 @@ static int img_map(int argc, char **argv) int64_t n; /* Probe up to 1 GiB at a time. */ - n = MIN(1 << 30, length - offset); + n = MIN(1 * GiB, length - offset); ret = get_block_status(bs, offset, n, &next); if (ret < 0) { @@ -3311,26 +3312,30 @@ static int img_rebase(int argc, char **argv) char backing_name[PATH_MAX]; QDict *options = NULL; - if (bs->backing_format[0] != '\0') { - options = qdict_new(); - qdict_put_str(options, "driver", bs->backing_format); - } - - if (force_share) { - if (!options) { + if (bs->backing) { + if (bs->backing_format[0] != '\0') { options = qdict_new(); + qdict_put_str(options, "driver", bs->backing_format); } - qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true); - } - bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name)); - blk_old_backing = blk_new_open(backing_name, NULL, - options, src_flags, &local_err); - if (!blk_old_backing) { - error_reportf_err(local_err, - "Could not open old backing file '%s': ", - backing_name); - ret = -1; - goto out; + + if (force_share) { + if (!options) { + options = qdict_new(); + } + qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true); + } + bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name)); + blk_old_backing = blk_new_open(backing_name, NULL, + options, src_flags, &local_err); + if (!blk_old_backing) { + error_reportf_err(local_err, + "Could not open old backing file '%s': ", + backing_name); + ret = -1; + goto out; + } + } else { + blk_old_backing = NULL; } if (out_baseimg[0]) { @@ -3383,7 +3388,7 @@ static int img_rebase(int argc, char **argv) */ if (!unsafe) { int64_t size; - int64_t old_backing_size; + int64_t old_backing_size = 0; int64_t new_backing_size = 0; uint64_t offset; int64_t n; @@ -3399,15 +3404,18 @@ static int img_rebase(int argc, char **argv) ret = -1; goto out; } - old_backing_size = blk_getlength(blk_old_backing); - if (old_backing_size < 0) { - char backing_name[PATH_MAX]; + if (blk_old_backing) { + old_backing_size = blk_getlength(blk_old_backing); + if (old_backing_size < 0) { + char backing_name[PATH_MAX]; - bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name)); - error_report("Could not get size of '%s': %s", - backing_name, strerror(-old_backing_size)); - ret = -1; - goto out; + bdrv_get_backing_filename(bs, backing_name, + sizeof(backing_name)); + error_report("Could not get size of '%s': %s", + backing_name, strerror(-old_backing_size)); + ret = -1; + goto out; + } } if (blk_new_backing) { new_backing_size = blk_getlength(blk_new_backing); @@ -3424,6 +3432,8 @@ static int img_rebase(int argc, char **argv) } for (offset = 0; offset < size; offset += n) { + bool buf_old_is_zero = false; + /* How many bytes can we handle with the next read? */ n = MIN(IO_BUF_SIZE, size - offset); @@ -3444,6 +3454,7 @@ static int img_rebase(int argc, char **argv) */ if (offset >= old_backing_size) { memset(buf_old, 0, n); + buf_old_is_zero = true; } else { if (offset + n > old_backing_size) { n = old_backing_size - offset; @@ -3479,8 +3490,12 @@ static int img_rebase(int argc, char **argv) if (compare_buffers(buf_old + written, buf_new + written, n - written, &pnum)) { - ret = blk_pwrite(blk, offset + written, - buf_old + written, pnum, 0); + if (buf_old_is_zero) { + ret = blk_pwrite_zeroes(blk, offset + written, pnum, 0); + } else { + ret = blk_pwrite(blk, offset + written, + buf_old + written, pnum, 0); + } if (ret < 0) { error_report("Error while writing to COW image: %s", strerror(-ret)); diff --git a/tests/qemu-iotests/192 b/tests/qemu-iotests/192 index 158086f9d2..61a88ac88d 100755 --- a/tests/qemu-iotests/192 +++ b/tests/qemu-iotests/192 @@ -29,7 +29,9 @@ status=1 # failure is the default! _cleanup() { - _cleanup_test_img + _cleanup_qemu + _cleanup_test_img + rm -f "$TEST_DIR/nbd" } trap "_cleanup; exit \$status" 0 1 2 3 15 diff --git a/tests/qemu-iotests/252 b/tests/qemu-iotests/252 new file mode 100755 index 0000000000..f6c8f71444 --- /dev/null +++ b/tests/qemu-iotests/252 @@ -0,0 +1,124 @@ +#!/usr/bin/env bash +# +# Tests for rebasing COW images that require zero cluster support +# +# Copyright (C) 2019 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=mreitz@redhat.com + +seq=$(basename $0) +echo "QA output created by $seq" + +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img + rm -f "$TEST_IMG.base_new" +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter +. ./common.pattern + +# Currently only qcow2 and qed support rebasing, and only qcow2 v3 has +# zero cluster support +_supported_fmt qcow2 +_unsupported_imgopts 'compat=0.10' +_supported_proto file +_supported_os Linux + +CLUSTER_SIZE=65536 + +echo +echo "=== Test rebase without input base ===" +echo + +# Cluster allocations to be tested: +# +# Backing (new) 11 -- 11 -- 11 -- +# COW image 22 22 11 11 -- -- +# +# Expected result: +# +# COW image 22 22 11 11 00 -- +# +# (Cluster 2 might be "--" after the rebase, too, but rebase just +# compares the new backing file to the old one and disregards the +# overlay. Therefore, it will never discard overlay clusters.) + +_make_test_img $((6 * CLUSTER_SIZE)) +TEST_IMG="$TEST_IMG.base_new" _make_test_img $((6 * CLUSTER_SIZE)) + +echo + +$QEMU_IO "$TEST_IMG" \ + -c "write -P 0x22 $((0 * CLUSTER_SIZE)) $((2 * CLUSTER_SIZE))" \ + -c "write -P 0x11 $((2 * CLUSTER_SIZE)) $((2 * CLUSTER_SIZE))" \ + | _filter_qemu_io + +$QEMU_IO "$TEST_IMG.base_new" \ + -c "write -P 0x11 $((0 * CLUSTER_SIZE)) $CLUSTER_SIZE" \ + -c "write -P 0x11 $((2 * CLUSTER_SIZE)) $CLUSTER_SIZE" \ + -c "write -P 0x11 $((4 * CLUSTER_SIZE)) $CLUSTER_SIZE" \ + | _filter_qemu_io + +echo + +# This should be a no-op +$QEMU_IMG rebase -b "" "$TEST_IMG" + +# Verify the data is correct +$QEMU_IO "$TEST_IMG" \ + -c "read -P 0x22 $((0 * CLUSTER_SIZE)) $((2 * CLUSTER_SIZE))" \ + -c "read -P 0x11 $((2 * CLUSTER_SIZE)) $((2 * CLUSTER_SIZE))" \ + -c "read -P 0x00 $((4 * CLUSTER_SIZE)) $((2 * CLUSTER_SIZE))" \ + | _filter_qemu_io + +echo + +# Verify the allocation status (first four cluster should be allocated +# in TEST_IMG, clusters 4 and 5 should be unallocated (marked as zero +# clusters here because there is no backing file)) +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +echo + +$QEMU_IMG rebase -b "$TEST_IMG.base_new" "$TEST_IMG" + +# Verify the data is correct +$QEMU_IO "$TEST_IMG" \ + -c "read -P 0x22 $((0 * CLUSTER_SIZE)) $((2 * CLUSTER_SIZE))" \ + -c "read -P 0x11 $((2 * CLUSTER_SIZE)) $((2 * CLUSTER_SIZE))" \ + -c "read -P 0x00 $((4 * CLUSTER_SIZE)) $((2 * CLUSTER_SIZE))" \ + | _filter_qemu_io + +echo + +# Verify the allocation status (first four cluster should be allocated +# in TEST_IMG, cluster 4 should be zero, and cluster 5 should be +# unallocated (signified by '"depth": 1')) +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/252.out b/tests/qemu-iotests/252.out new file mode 100644 index 0000000000..12dce889f8 --- /dev/null +++ b/tests/qemu-iotests/252.out @@ -0,0 +1,39 @@ +QA output created by 252 + +=== Test rebase without input base === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=393216 +Formatting 'TEST_DIR/t.IMGFMT.base_new', fmt=IMGFMT size=393216 + +wrote 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 131072/131072 bytes at offset 131072 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 131072 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 262144 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +read 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 131072/131072 bytes at offset 131072 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 131072/131072 bytes at offset 262144 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +[{ "start": 0, "length": 262144, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, +{ "start": 262144, "length": 131072, "depth": 0, "zero": true, "data": false}] + +read 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 131072/131072 bytes at offset 131072 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 131072/131072 bytes at offset 262144 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +[{ "start": 0, "length": 262144, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, +{ "start": 262144, "length": 65536, "depth": 0, "zero": true, "data": false}, +{ "start": 327680, "length": 65536, "depth": 1, "zero": true, "data": false}] +*** done diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 7ac9a5ea4a..00e474ab0a 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -249,3 +249,4 @@ 247 rw auto quick 248 rw auto quick 249 rw auto quick +252 rw auto backing quick diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c index 97ac0b159d..036ed9a3b3 100644 --- a/tests/test-block-iothread.c +++ b/tests/test-block-iothread.c @@ -354,6 +354,111 @@ static void test_sync_op(const void *opaque) blk_unref(blk); } +typedef struct TestBlockJob { + BlockJob common; + bool should_complete; + int n; +} TestBlockJob; + +static int test_job_prepare(Job *job) +{ + g_assert(qemu_get_current_aio_context() == qemu_get_aio_context()); + return 0; +} + +static int coroutine_fn test_job_run(Job *job, Error **errp) +{ + TestBlockJob *s = container_of(job, TestBlockJob, common.job); + + job_transition_to_ready(&s->common.job); + while (!s->should_complete) { + s->n++; + g_assert(qemu_get_current_aio_context() == job->aio_context); + + /* Avoid job_sleep_ns() because it marks the job as !busy. We want to + * emulate some actual activity (probably some I/O) here so that the + * drain involved in AioContext switches has to wait for this activity + * to stop. */ + qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000); + + job_pause_point(&s->common.job); + } + + g_assert(qemu_get_current_aio_context() == job->aio_context); + return 0; +} + +static void test_job_complete(Job *job, Error **errp) +{ + TestBlockJob *s = container_of(job, TestBlockJob, common.job); + s->should_complete = true; +} + +BlockJobDriver test_job_driver = { + .job_driver = { + .instance_size = sizeof(TestBlockJob), + .free = block_job_free, + .user_resume = block_job_user_resume, + .drain = block_job_drain, + .run = test_job_run, + .complete = test_job_complete, + .prepare = test_job_prepare, + }, +}; + +static void test_attach_blockjob(void) +{ + IOThread *iothread = iothread_new(); + AioContext *ctx = iothread_get_aio_context(iothread); + BlockBackend *blk; + BlockDriverState *bs; + TestBlockJob *tjob; + + blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + bs = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort); + blk_insert_bs(blk, bs, &error_abort); + + tjob = block_job_create("job0", &test_job_driver, NULL, bs, + 0, BLK_PERM_ALL, + 0, 0, NULL, NULL, &error_abort); + job_start(&tjob->common.job); + + while (tjob->n == 0) { + aio_poll(qemu_get_aio_context(), false); + } + + blk_set_aio_context(blk, ctx); + + tjob->n = 0; + while (tjob->n == 0) { + aio_poll(qemu_get_aio_context(), false); + } + + aio_context_acquire(ctx); + blk_set_aio_context(blk, qemu_get_aio_context()); + aio_context_release(ctx); + + tjob->n = 0; + while (tjob->n == 0) { + aio_poll(qemu_get_aio_context(), false); + } + + blk_set_aio_context(blk, ctx); + + tjob->n = 0; + while (tjob->n == 0) { + aio_poll(qemu_get_aio_context(), false); + } + + aio_context_acquire(ctx); + job_complete_sync(&tjob->common.job, &error_abort); + blk_set_aio_context(blk, qemu_get_aio_context()); + aio_context_release(ctx); + + bdrv_unref(bs); + blk_unref(blk); +} + int main(int argc, char **argv) { int i; @@ -368,5 +473,7 @@ int main(int argc, char **argv) g_test_add_data_func(t->name, t, test_sync_op); } + g_test_add_func("/attach/blockjob", test_attach_blockjob); + return g_test_run(); } diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c index ba6335e71a..8850a280a8 100644 --- a/util/qemu-sockets.c +++ b/util/qemu-sockets.c @@ -970,26 +970,12 @@ static int unix_connect_saddr(UnixSocketAddress *saddr, Error **errp) /* compatibility wrapper */ int unix_listen(const char *str, Error **errp) { - char *path, *optstr; - int sock, len; UnixSocketAddress *saddr; + int sock; saddr = g_new0(UnixSocketAddress, 1); - - optstr = strchr(str, ','); - if (optstr) { - len = optstr - str; - if (len) { - path = g_malloc(len+1); - snprintf(path, len+1, "%.*s", len, str); - saddr->path = path; - } - } else { - saddr->path = g_strdup(str); - } - + saddr->path = g_strdup(str); sock = unix_listen_saddr(saddr, errp); - qapi_free_UnixSocketAddress(saddr); return sock; } |