diff options
Diffstat (limited to 'hw')
46 files changed, 2427 insertions, 564 deletions
diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c index 210d9e7705..d42ce6d8b8 100644 --- a/hw/9pfs/9p-local.c +++ b/hw/9pfs/9p-local.c @@ -32,10 +32,12 @@ #include "qemu/error-report.h" #include "qemu/option.h" #include <libgen.h> +#ifdef CONFIG_LINUX #include <linux/fs.h> #ifdef CONFIG_LINUX_MAGIC_H #include <linux/magic.h> #endif +#endif #include <sys/ioctl.h> #ifndef XFS_SUPER_MAGIC @@ -560,6 +562,15 @@ again: if (!entry) { return NULL; } +#ifdef CONFIG_DARWIN + int off; + off = telldir(fs->dir.stream); + /* If telldir fails, fail the entire readdir call */ + if (off < 0) { + return NULL; + } + entry->d_seekoff = off; +#endif if (ctx->export_flags & V9FS_SM_MAPPED) { entry->d_type = DT_UNKNOWN; @@ -671,7 +682,7 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath *dir_path, if (fs_ctx->export_flags & V9FS_SM_MAPPED || fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - err = mknodat(dirfd, name, fs_ctx->fmode | S_IFREG, 0); + err = qemu_mknodat(dirfd, name, fs_ctx->fmode | S_IFREG, 0); if (err == -1) { goto out; } @@ -686,7 +697,7 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath *dir_path, } } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH || fs_ctx->export_flags & V9FS_SM_NONE) { - err = mknodat(dirfd, name, credp->fc_mode, credp->fc_rdev); + err = qemu_mknodat(dirfd, name, credp->fc_mode, credp->fc_rdev); if (err == -1) { goto out; } @@ -779,16 +790,20 @@ static int local_fstat(FsContext *fs_ctx, int fid_type, mode_t tmp_mode; dev_t tmp_dev; - if (fgetxattr(fd, "user.virtfs.uid", &tmp_uid, sizeof(uid_t)) > 0) { + if (qemu_fgetxattr(fd, "user.virtfs.uid", + &tmp_uid, sizeof(uid_t)) > 0) { stbuf->st_uid = le32_to_cpu(tmp_uid); } - if (fgetxattr(fd, "user.virtfs.gid", &tmp_gid, sizeof(gid_t)) > 0) { + if (qemu_fgetxattr(fd, "user.virtfs.gid", + &tmp_gid, sizeof(gid_t)) > 0) { stbuf->st_gid = le32_to_cpu(tmp_gid); } - if (fgetxattr(fd, "user.virtfs.mode", &tmp_mode, sizeof(mode_t)) > 0) { + if (qemu_fgetxattr(fd, "user.virtfs.mode", + &tmp_mode, sizeof(mode_t)) > 0) { stbuf->st_mode = le32_to_cpu(tmp_mode); } - if (fgetxattr(fd, "user.virtfs.rdev", &tmp_dev, sizeof(dev_t)) > 0) { + if (qemu_fgetxattr(fd, "user.virtfs.rdev", + &tmp_dev, sizeof(dev_t)) > 0) { stbuf->st_rdev = le64_to_cpu(tmp_dev); } } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { diff --git a/hw/9pfs/9p-proxy.c b/hw/9pfs/9p-proxy.c index 09bd9f1464..8b4b5cf7dc 100644 --- a/hw/9pfs/9p-proxy.c +++ b/hw/9pfs/9p-proxy.c @@ -123,10 +123,16 @@ static void prstatfs_to_statfs(struct statfs *stfs, ProxyStatFS *prstfs) stfs->f_bavail = prstfs->f_bavail; stfs->f_files = prstfs->f_files; stfs->f_ffree = prstfs->f_ffree; +#ifdef CONFIG_DARWIN + /* f_namelen and f_frsize do not exist on Darwin */ + stfs->f_fsid.val[0] = prstfs->f_fsid[0] & 0xFFFFFFFFU; + stfs->f_fsid.val[1] = prstfs->f_fsid[1] >> 32 & 0xFFFFFFFFU; +#else stfs->f_fsid.__val[0] = prstfs->f_fsid[0] & 0xFFFFFFFFU; stfs->f_fsid.__val[1] = prstfs->f_fsid[1] >> 32 & 0xFFFFFFFFU; stfs->f_namelen = prstfs->f_namelen; stfs->f_frsize = prstfs->f_frsize; +#endif } /* Converts proxy_stat structure to VFS stat structure */ @@ -143,12 +149,24 @@ static void prstat_to_stat(struct stat *stbuf, ProxyStat *prstat) stbuf->st_size = prstat->st_size; stbuf->st_blksize = prstat->st_blksize; stbuf->st_blocks = prstat->st_blocks; + stbuf->st_atime = prstat->st_atim_sec; + stbuf->st_mtime = prstat->st_mtim_sec; + stbuf->st_ctime = prstat->st_ctim_sec; +#ifdef CONFIG_DARWIN + stbuf->st_atimespec.tv_sec = prstat->st_atim_sec; + stbuf->st_mtimespec.tv_sec = prstat->st_mtim_sec; + stbuf->st_ctimespec.tv_sec = prstat->st_ctim_sec; + stbuf->st_atimespec.tv_nsec = prstat->st_atim_nsec; + stbuf->st_mtimespec.tv_nsec = prstat->st_mtim_nsec; + stbuf->st_ctimespec.tv_nsec = prstat->st_ctim_nsec; +#else stbuf->st_atim.tv_sec = prstat->st_atim_sec; + stbuf->st_mtim.tv_sec = prstat->st_mtim_sec; + stbuf->st_ctim.tv_sec = prstat->st_ctim_sec; stbuf->st_atim.tv_nsec = prstat->st_atim_nsec; - stbuf->st_mtime = prstat->st_mtim_sec; stbuf->st_mtim.tv_nsec = prstat->st_mtim_nsec; - stbuf->st_ctime = prstat->st_ctim_sec; stbuf->st_ctim.tv_nsec = prstat->st_ctim_nsec; +#endif } /* @@ -688,7 +706,21 @@ static off_t proxy_telldir(FsContext *ctx, V9fsFidOpenState *fs) static struct dirent *proxy_readdir(FsContext *ctx, V9fsFidOpenState *fs) { - return readdir(fs->dir.stream); + struct dirent *entry; + entry = readdir(fs->dir.stream); +#ifdef CONFIG_DARWIN + if (!entry) { + return NULL; + } + int td; + td = telldir(fs->dir.stream); + /* If telldir fails, fail the entire readdir call */ + if (td < 0) { + return NULL; + } + entry->d_seekoff = td; +#endif + return entry; } static void proxy_seekdir(FsContext *ctx, V9fsFidOpenState *fs, off_t off) diff --git a/hw/9pfs/9p-synth.c b/hw/9pfs/9p-synth.c index 7a7cd5c5ba..b3080e415b 100644 --- a/hw/9pfs/9p-synth.c +++ b/hw/9pfs/9p-synth.c @@ -234,7 +234,11 @@ static void synth_direntry(V9fsSynthNode *node, offsetof(struct dirent, d_name) + sz); memcpy(entry->d_name, node->name, sz); entry->d_ino = node->attr->inode; +#ifdef CONFIG_DARWIN + entry->d_seekoff = off + 1; +#else entry->d_off = off + 1; +#endif } static struct dirent *synth_get_dentry(V9fsSynthNode *dir, @@ -439,7 +443,9 @@ static int synth_statfs(FsContext *s, V9fsPath *fs_path, stbuf->f_bsize = 512; stbuf->f_blocks = 0; stbuf->f_files = synth_node_count; +#ifndef CONFIG_DARWIN stbuf->f_namelen = NAME_MAX; +#endif return 0; } diff --git a/hw/9pfs/9p-util-darwin.c b/hw/9pfs/9p-util-darwin.c new file mode 100644 index 0000000000..bec0253474 --- /dev/null +++ b/hw/9pfs/9p-util-darwin.c @@ -0,0 +1,97 @@ +/* + * 9p utilities (Darwin Implementation) + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/xattr.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "9p-util.h" + +ssize_t fgetxattrat_nofollow(int dirfd, const char *filename, const char *name, + void *value, size_t size) +{ + int ret; + int fd = openat_file(dirfd, filename, + O_RDONLY | O_PATH_9P_UTIL | O_NOFOLLOW, 0); + if (fd == -1) { + return -1; + } + ret = fgetxattr(fd, name, value, size, 0, 0); + close_preserve_errno(fd); + return ret; +} + +ssize_t flistxattrat_nofollow(int dirfd, const char *filename, + char *list, size_t size) +{ + int ret; + int fd = openat_file(dirfd, filename, + O_RDONLY | O_PATH_9P_UTIL | O_NOFOLLOW, 0); + if (fd == -1) { + return -1; + } + ret = flistxattr(fd, list, size, 0); + close_preserve_errno(fd); + return ret; +} + +ssize_t fremovexattrat_nofollow(int dirfd, const char *filename, + const char *name) +{ + int ret; + int fd = openat_file(dirfd, filename, O_PATH_9P_UTIL | O_NOFOLLOW, 0); + if (fd == -1) { + return -1; + } + ret = fremovexattr(fd, name, 0); + close_preserve_errno(fd); + return ret; +} + +int fsetxattrat_nofollow(int dirfd, const char *filename, const char *name, + void *value, size_t size, int flags) +{ + int ret; + int fd = openat_file(dirfd, filename, O_PATH_9P_UTIL | O_NOFOLLOW, 0); + if (fd == -1) { + return -1; + } + ret = fsetxattr(fd, name, value, size, 0, flags); + close_preserve_errno(fd); + return ret; +} + +/* + * As long as mknodat is not available on macOS, this workaround + * using pthread_fchdir_np is needed. + * + * Radar filed with Apple for implementing mknodat: + * rdar://FB9862426 (https://openradar.appspot.com/FB9862426) + */ +#if defined CONFIG_PTHREAD_FCHDIR_NP + +int qemu_mknodat(int dirfd, const char *filename, mode_t mode, dev_t dev) +{ + int preserved_errno, err; + if (!pthread_fchdir_np) { + error_report_once("pthread_fchdir_np() not available on this version of macOS"); + return -ENOTSUP; + } + if (pthread_fchdir_np(dirfd) < 0) { + return -1; + } + err = mknod(filename, mode, dev); + preserved_errno = errno; + /* Stop using the thread-local cwd */ + pthread_fchdir_np(-1); + if (err < 0) { + errno = preserved_errno; + } + return err; +} + +#endif diff --git a/hw/9pfs/9p-util.c b/hw/9pfs/9p-util-linux.c index 3221d9b498..db451b0784 100644 --- a/hw/9pfs/9p-util.c +++ b/hw/9pfs/9p-util-linux.c @@ -1,5 +1,5 @@ /* - * 9p utilities + * 9p utilities (Linux Implementation) * * Copyright IBM, Corp. 2017 * @@ -61,4 +61,10 @@ int fsetxattrat_nofollow(int dirfd, const char *filename, const char *name, ret = lsetxattr(proc_path, name, value, size, flags); g_free(proc_path); return ret; + +} + +int qemu_mknodat(int dirfd, const char *filename, mode_t mode, dev_t dev) +{ + return mknodat(dirfd, filename, mode, dev); } diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h index 546f46dc7d..97e681e167 100644 --- a/hw/9pfs/9p-util.h +++ b/hw/9pfs/9p-util.h @@ -19,6 +19,23 @@ #define O_PATH_9P_UTIL 0 #endif +#ifdef CONFIG_DARWIN +#define qemu_fgetxattr(...) fgetxattr(__VA_ARGS__, 0, 0) +#define qemu_lgetxattr(...) getxattr(__VA_ARGS__, 0, XATTR_NOFOLLOW) +#define qemu_llistxattr(...) listxattr(__VA_ARGS__, XATTR_NOFOLLOW) +#define qemu_lremovexattr(...) removexattr(__VA_ARGS__, XATTR_NOFOLLOW) +static inline int qemu_lsetxattr(const char *path, const char *name, + const void *value, size_t size, int flags) { + return setxattr(path, name, value, size, 0, flags | XATTR_NOFOLLOW); +} +#else +#define qemu_fgetxattr fgetxattr +#define qemu_lgetxattr lgetxattr +#define qemu_llistxattr llistxattr +#define qemu_lremovexattr lremovexattr +#define qemu_lsetxattr lsetxattr +#endif + static inline void close_preserve_errno(int fd) { int serrno = errno; @@ -37,10 +54,13 @@ static inline int openat_file(int dirfd, const char *name, int flags, { int fd, serrno, ret; +#ifndef CONFIG_DARWIN again: +#endif fd = openat(dirfd, name, flags | O_NOFOLLOW | O_NOCTTY | O_NONBLOCK, mode); if (fd == -1) { +#ifndef CONFIG_DARWIN if (errno == EPERM && (flags & O_NOATIME)) { /* * The client passed O_NOATIME but we lack permissions to honor it. @@ -53,6 +73,7 @@ again: flags &= ~O_NOATIME; goto again; } +#endif return -1; } @@ -78,4 +99,61 @@ ssize_t flistxattrat_nofollow(int dirfd, const char *filename, ssize_t fremovexattrat_nofollow(int dirfd, const char *filename, const char *name); +/* + * Darwin has d_seekoff, which appears to function similarly to d_off. + * However, it does not appear to be supported on all file systems, + * so ensure it is manually injected earlier and call here when + * needed. + */ +static inline off_t qemu_dirent_off(struct dirent *dent) +{ +#ifdef CONFIG_DARWIN + return dent->d_seekoff; +#else + return dent->d_off; +#endif +} + +/** + * qemu_dirent_dup() - Duplicate directory entry @dent. + * + * @dent: original directory entry to be duplicated + * Return: duplicated directory entry which should be freed with g_free() + * + * It is highly recommended to use this function instead of open coding + * duplication of dirent objects, because the actual struct dirent + * size may be bigger or shorter than sizeof(struct dirent) and correct + * handling is platform specific (see gitlab issue #841). + */ +static inline struct dirent *qemu_dirent_dup(struct dirent *dent) +{ + size_t sz = 0; +#if defined _DIRENT_HAVE_D_RECLEN + /* Avoid use of strlen() if platform supports d_reclen. */ + sz = dent->d_reclen; +#endif + /* + * Test sz for zero even if d_reclen is available + * because some drivers may set d_reclen to zero. + */ + if (sz == 0) { + /* Fallback to the most portable way. */ + sz = offsetof(struct dirent, d_name) + + strlen(dent->d_name) + 1; + } + return g_memdup(dent, sz); +} + +/* + * As long as mknodat is not available on macOS, this workaround + * using pthread_fchdir_np is needed. qemu_mknodat is defined in + * os-posix.c. pthread_fchdir_np is weakly linked here as a guard + * in case it disappears in future macOS versions, because it is + * is a private API. + */ +#if defined CONFIG_DARWIN && defined CONFIG_PTHREAD_FCHDIR_NP +int pthread_fchdir_np(int fd) __attribute__((weak_import)); +#endif +int qemu_mknodat(int dirfd, const char *filename, mode_t mode, dev_t dev); + #endif diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index 15b3f4d385..a6d6b3f835 100644 --- a/hw/9pfs/9p.c +++ b/hw/9pfs/9p.c @@ -27,12 +27,17 @@ #include "virtio-9p.h" #include "fsdev/qemu-fsdev.h" #include "9p-xattr.h" +#include "9p-util.h" #include "coth.h" #include "trace.h" #include "migration/blocker.h" #include "qemu/xxhash.h" #include <math.h> +#ifdef CONFIG_LINUX #include <linux/limits.h> +#else +#include <limits.h> +#endif int open_fd_hw; int total_open_fd; @@ -133,11 +138,20 @@ static int dotl_to_open_flags(int flags) { P9_DOTL_NONBLOCK, O_NONBLOCK } , { P9_DOTL_DSYNC, O_DSYNC }, { P9_DOTL_FASYNC, FASYNC }, +#ifndef CONFIG_DARWIN + { P9_DOTL_NOATIME, O_NOATIME }, + /* + * On Darwin, we could map to F_NOCACHE, which is + * similar, but doesn't quite have the same + * semantics. However, we don't support O_DIRECT + * even on linux at the moment, so we just ignore + * it here. + */ { P9_DOTL_DIRECT, O_DIRECT }, +#endif { P9_DOTL_LARGEFILE, O_LARGEFILE }, { P9_DOTL_DIRECTORY, O_DIRECTORY }, { P9_DOTL_NOFOLLOW, O_NOFOLLOW }, - { P9_DOTL_NOATIME, O_NOATIME }, { P9_DOTL_SYNC, O_SYNC }, }; @@ -166,10 +180,12 @@ static int get_dotl_openflags(V9fsState *s, int oflags) */ flags = dotl_to_open_flags(oflags); flags &= ~(O_NOCTTY | O_ASYNC | O_CREAT); +#ifndef CONFIG_DARWIN /* * Ignore direct disk access hint until the server supports it. */ flags &= ~O_DIRECT; +#endif return flags; } @@ -612,8 +628,8 @@ static inline uint64_t mirror64bit(uint64_t value) ((uint64_t)mirror8bit((value >> 56) & 0xff)); } -/** - * @brief Parameter k for the Exponential Golomb algorihm to be used. +/* + * Parameter k for the Exponential Golomb algorihm to be used. * * The smaller this value, the smaller the minimum bit count for the Exp. * Golomb generated affixes will be (at lowest index) however for the @@ -626,28 +642,30 @@ static inline uint64_t mirror64bit(uint64_t value) * should be small, for a large amount of devices k might be increased * instead. The default of k=0 should be fine for most users though. * - * @b IMPORTANT: In case this ever becomes a runtime parameter; the value of + * IMPORTANT: In case this ever becomes a runtime parameter; the value of * k should not change as long as guest is still running! Because that would * cause completely different inode numbers to be generated on guest. */ #define EXP_GOLOMB_K 0 /** - * @brief Exponential Golomb algorithm for arbitrary k (including k=0). + * expGolombEncode() - Exponential Golomb algorithm for arbitrary k + * (including k=0). + * + * @n: natural number (or index) of the prefix to be generated + * (1, 2, 3, ...) + * @k: parameter k of Exp. Golomb algorithm to be used + * (see comment on EXP_GOLOMB_K macro for details about k) + * Return: prefix for given @n and @k * - * The Exponential Golomb algorithm generates @b prefixes (@b not suffixes!) + * The Exponential Golomb algorithm generates prefixes (NOT suffixes!) * with growing length and with the mathematical property of being * "prefix-free". The latter means the generated prefixes can be prepended * in front of arbitrary numbers and the resulting concatenated numbers are * guaranteed to be always unique. * * This is a minor adjustment to the original Exp. Golomb algorithm in the - * sense that lowest allowed index (@param n) starts with 1, not with zero. - * - * @param n - natural number (or index) of the prefix to be generated - * (1, 2, 3, ...) - * @param k - parameter k of Exp. Golomb algorithm to be used - * (see comment on EXP_GOLOMB_K macro for details about k) + * sense that lowest allowed index (@n) starts with 1, not with zero. */ static VariLenAffix expGolombEncode(uint64_t n, int k) { @@ -661,7 +679,9 @@ static VariLenAffix expGolombEncode(uint64_t n, int k) } /** - * @brief Converts a suffix into a prefix, or a prefix into a suffix. + * invertAffix() - Converts a suffix into a prefix, or a prefix into a suffix. + * @affix: either suffix or prefix to be inverted + * Return: inversion of passed @affix * * Simply mirror all bits of the affix value, for the purpose to preserve * respectively the mathematical "prefix-free" or "suffix-free" property @@ -685,16 +705,16 @@ static VariLenAffix invertAffix(const VariLenAffix *affix) } /** - * @brief Generates suffix numbers with "suffix-free" property. + * affixForIndex() - Generates suffix numbers with "suffix-free" property. + * @index: natural number (or index) of the suffix to be generated + * (1, 2, 3, ...) + * Return: Suffix suitable to assemble unique number. * * This is just a wrapper function on top of the Exp. Golomb algorithm. * * Since the Exp. Golomb algorithm generates prefixes, but we need suffixes, * this function converts the Exp. Golomb prefixes into appropriate suffixes * which are still suitable for generating unique numbers. - * - * @param n - natural number (or index) of the suffix to be generated - * (1, 2, 3, ...) */ static VariLenAffix affixForIndex(uint64_t index) { @@ -794,8 +814,8 @@ static int qid_inode_prefix_hash_bits(V9fsPDU *pdu, dev_t dev) return val->prefix_bits; } -/** - * @brief Slow / full mapping host inode nr -> guest inode nr. +/* + * Slow / full mapping host inode nr -> guest inode nr. * * This function performs a slower and much more costly remapping of an * original file inode number on host to an appropriate different inode @@ -807,7 +827,7 @@ static int qid_inode_prefix_hash_bits(V9fsPDU *pdu, dev_t dev) * qid_path_suffixmap() failed. In practice this slow / full mapping is not * expected ever to be used at all though. * - * @see qid_path_suffixmap() for details + * See qid_path_suffixmap() for details * */ static int qid_path_fullmap(V9fsPDU *pdu, const struct stat *stbuf, @@ -848,8 +868,8 @@ static int qid_path_fullmap(V9fsPDU *pdu, const struct stat *stbuf, return 0; } -/** - * @brief Quick mapping host inode nr -> guest inode nr. +/* + * Quick mapping host inode nr -> guest inode nr. * * This function performs quick remapping of an original file inode number * on host to an appropriate different inode number on guest. This remapping @@ -1265,12 +1285,15 @@ static int coroutine_fn stat_to_v9stat(V9fsPDU *pdu, V9fsPath *path, /** - * Convert host filesystem's block size into an appropriate block size for - * 9p client (guest OS side). The value returned suggests an "optimum" block - * size for 9p I/O, i.e. to maximize performance. + * blksize_to_iounit() - Block size exposed to 9p client. + * Return: block size * * @pdu: 9p client request * @blksize: host filesystem's block size + * + * Convert host filesystem's block size into an appropriate block size for + * 9p client (guest OS side). The value returned suggests an "optimum" block + * size for 9p I/O, i.e. to maximize performance. */ static int32_t blksize_to_iounit(const V9fsPDU *pdu, int32_t blksize) { @@ -1309,11 +1332,17 @@ static int stat_to_v9stat_dotl(V9fsPDU *pdu, const struct stat *stbuf, v9lstat->st_blksize = stat_to_iounit(pdu, stbuf); v9lstat->st_blocks = stbuf->st_blocks; v9lstat->st_atime_sec = stbuf->st_atime; - v9lstat->st_atime_nsec = stbuf->st_atim.tv_nsec; v9lstat->st_mtime_sec = stbuf->st_mtime; - v9lstat->st_mtime_nsec = stbuf->st_mtim.tv_nsec; v9lstat->st_ctime_sec = stbuf->st_ctime; +#ifdef CONFIG_DARWIN + v9lstat->st_atime_nsec = stbuf->st_atimespec.tv_nsec; + v9lstat->st_mtime_nsec = stbuf->st_mtimespec.tv_nsec; + v9lstat->st_ctime_nsec = stbuf->st_ctimespec.tv_nsec; +#else + v9lstat->st_atime_nsec = stbuf->st_atim.tv_nsec; + v9lstat->st_mtime_nsec = stbuf->st_mtim.tv_nsec; v9lstat->st_ctime_nsec = stbuf->st_ctim.tv_nsec; +#endif /* Currently we only support BASIC fields in stat */ v9lstat->st_result_mask = P9_STATS_BASIC; @@ -2271,7 +2300,7 @@ static int coroutine_fn v9fs_do_readdir_with_stat(V9fsPDU *pdu, count += len; v9fs_stat_free(&v9stat); v9fs_path_free(&path); - saved_dir_pos = dent->d_off; + saved_dir_pos = qemu_dirent_off(dent); } v9fs_readdir_unlock(&fidp->fs.dir); @@ -2376,10 +2405,11 @@ out_nofid: } /** - * Returns size required in Rreaddir response for the passed dirent @p name. + * v9fs_readdir_response_size() - Returns size required in Rreaddir response + * for the passed dirent @name. * - * @param name - directory entry's name (i.e. file name, directory name) - * @returns required size in bytes + * @name: directory entry's name (i.e. file name, directory name) + * Return: required size in bytes */ size_t v9fs_readdir_response_size(V9fsString *name) { @@ -2410,6 +2440,7 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp, V9fsString name; int len, err = 0; int32_t count = 0; + off_t off; struct dirent *dent; struct stat *st; struct V9fsDirEnt *entries = NULL; @@ -2470,12 +2501,13 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp, qid.version = 0; } + off = qemu_dirent_off(dent); v9fs_string_init(&name); v9fs_string_sprintf(&name, "%s", dent->d_name); /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */ len = pdu_marshal(pdu, 11 + count, "Qqbs", - &qid, dent->d_off, + &qid, off, dent->d_type, &name); v9fs_string_free(&name); @@ -3515,9 +3547,15 @@ static int v9fs_fill_statfs(V9fsState *s, V9fsPDU *pdu, struct statfs *stbuf) f_bavail = stbuf->f_bavail / bsize_factor; f_files = stbuf->f_files; f_ffree = stbuf->f_ffree; +#ifdef CONFIG_DARWIN + fsid_val = (unsigned int)stbuf->f_fsid.val[0] | + (unsigned long long)stbuf->f_fsid.val[1] << 32; + f_namelen = NAME_MAX; +#else fsid_val = (unsigned int) stbuf->f_fsid.__val[0] | (unsigned long long)stbuf->f_fsid.__val[1] << 32; f_namelen = stbuf->f_namelen; +#endif return pdu_marshal(pdu, offset, "ddqqqqqqd", f_type, f_bsize, f_blocks, f_bfree, @@ -3919,7 +3957,7 @@ static void coroutine_fn v9fs_xattrcreate(void *opaque) rflags |= XATTR_REPLACE; } - if (size > XATTR_SIZE_MAX) { + if (size > P9_XATTR_SIZE_MAX) { err = -E2BIG; goto out_nofid; } diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h index 1567b67841..af2635fae9 100644 --- a/hw/9pfs/9p.h +++ b/hw/9pfs/9p.h @@ -100,8 +100,8 @@ typedef enum P9ProtoVersion { V9FS_PROTO_2000L = 0x02, } P9ProtoVersion; -/** - * @brief Minimum message size supported by this 9pfs server. +/* + * Minimum message size supported by this 9pfs server. * * A client establishes a session by sending a Tversion request along with a * 'msize' parameter which suggests the server a maximum message size ever to be @@ -231,7 +231,7 @@ static inline void v9fs_readdir_init(P9ProtoVersion proto_version, V9fsDir *dir) } } -/** +/* * Type for 9p fs drivers' (a.k.a. 9p backends) result of readdir requests, * which is a chained list of directory entries. */ @@ -289,8 +289,8 @@ typedef enum AffixType_t { AffixType_Suffix, /* A.k.a. postfix. */ } AffixType_t; -/** - * @brief Unique affix of variable length. +/* + * Unique affix of variable length. * * An affix is (currently) either a suffix or a prefix, which is either * going to be prepended (prefix) or appended (suffix) with some other @@ -304,7 +304,7 @@ typedef struct VariLenAffix { AffixType_t type; /* Whether this affix is a suffix or a prefix. */ uint64_t value; /* Actual numerical value of this affix. */ /* - * Lenght of the affix, that is how many (of the lowest) bits of @c value + * Lenght of the affix, that is how many (of the lowest) bits of ``value`` * must be used for appending/prepending this affix to its final resulting, * unique number. */ @@ -479,4 +479,22 @@ struct V9fsTransport { void (*push_and_notify)(V9fsPDU *pdu); }; +#if defined(XATTR_SIZE_MAX) +/* Linux */ +#define P9_XATTR_SIZE_MAX XATTR_SIZE_MAX +#elif defined(CONFIG_DARWIN) +/* + * Darwin doesn't seem to define a maximum xattr size in its user + * space header, so manually configure it across platforms as 64k. + * + * Having no limit at all can lead to QEMU crashing during large g_malloc() + * calls. Because QEMU does not currently support macOS guests, the below + * preliminary solution only works due to its being a reflection of the limit of + * Linux guests. + */ +#define P9_XATTR_SIZE_MAX 65536 +#else +#error Missing definition for P9_XATTR_SIZE_MAX for this host system +#endif + #endif diff --git a/hw/9pfs/codir.c b/hw/9pfs/codir.c index c0873bde16..75148bc985 100644 --- a/hw/9pfs/codir.c +++ b/hw/9pfs/codir.c @@ -22,6 +22,8 @@ #include "qemu/coroutine.h" #include "qemu/main-loop.h" #include "coth.h" +#include "9p-xattr.h" +#include "9p-util.h" /* * Intended to be called from bottom-half (e.g. background I/O thread) @@ -166,7 +168,7 @@ static int do_readdir_many(V9fsPDU *pdu, V9fsFidState *fidp, } size += len; - saved_dir_pos = dent->d_off; + saved_dir_pos = qemu_dirent_off(dent); } /* restore (last) saved position */ @@ -182,14 +184,25 @@ out: } /** - * @brief Reads multiple directory entries in one rush. + * v9fs_co_readdir_many() - Reads multiple directory entries in one rush. + * + * @pdu: the causing 9p (T_readdir) client request + * @fidp: already opened directory where readdir shall be performed on + * @entries: output for directory entries (must not be NULL) + * @offset: initial position inside the directory the function shall + * seek to before retrieving the directory entries + * @maxsize: maximum result message body size (in bytes) + * @dostat: whether a stat() should be performed and returned for + * each directory entry + * Return: resulting response message body size (in bytes) on success, + * negative error code otherwise * * Retrieves the requested (max. amount of) directory entries from the fs * driver. This function must only be called by the main IO thread (top half). * Internally this function call will be dispatched to a background IO thread * (bottom half) where it is eventually executed by the fs driver. * - * @discussion Acquiring multiple directory entries in one rush from the fs + * Acquiring multiple directory entries in one rush from the fs * driver, instead of retrieving each directory entry individually, is very * beneficial from performance point of view. Because for every fs driver * request latency is added, which in practice could lead to overall @@ -197,20 +210,9 @@ out: * directory) if every directory entry was individually requested from fs * driver. * - * @note You must @b ALWAYS call @c v9fs_free_dirents(entries) after calling + * NOTE: You must ALWAYS call v9fs_free_dirents(entries) after calling * v9fs_co_readdir_many(), both on success and on error cases of this - * function, to avoid memory leaks once @p entries are no longer needed. - * - * @param pdu - the causing 9p (T_readdir) client request - * @param fidp - already opened directory where readdir shall be performed on - * @param entries - output for directory entries (must not be NULL) - * @param offset - initial position inside the directory the function shall - * seek to before retrieving the directory entries - * @param maxsize - maximum result message body size (in bytes) - * @param dostat - whether a stat() should be performed and returned for - * each directory entry - * @returns resulting response message body size (in bytes) on success, - * negative error code otherwise + * function, to avoid memory leaks once @entries are no longer needed. */ int coroutine_fn v9fs_co_readdir_many(V9fsPDU *pdu, V9fsFidState *fidp, struct V9fsDirEnt **entries, diff --git a/hw/9pfs/coth.h b/hw/9pfs/coth.h index f83c7dda7b..1a1edbdc2a 100644 --- a/hw/9pfs/coth.h +++ b/hw/9pfs/coth.h @@ -19,7 +19,7 @@ #include "qemu/coroutine.h" #include "9p.h" -/** +/* * we want to use bottom half because we want to make sure the below * sequence of events. * @@ -29,7 +29,7 @@ * we cannot swap step 1 and 2, because that would imply worker thread * can enter coroutine while step1 is still running * - * @b PERFORMANCE @b CONSIDERATIONS: As a rule of thumb, keep in mind + * PERFORMANCE CONSIDERATIONS: As a rule of thumb, keep in mind * that hopping between threads adds @b latency! So when handling a * 9pfs request, avoid calling v9fs_co_run_in_worker() too often, because * this might otherwise sum up to a significant, huge overall latency for diff --git a/hw/9pfs/meson.build b/hw/9pfs/meson.build index 99be5d9119..12443b6ad5 100644 --- a/hw/9pfs/meson.build +++ b/hw/9pfs/meson.build @@ -4,7 +4,6 @@ fs_ss.add(files( '9p-posix-acl.c', '9p-proxy.c', '9p-synth.c', - '9p-util.c', '9p-xattr-user.c', '9p-xattr.c', '9p.c', @@ -14,6 +13,8 @@ fs_ss.add(files( 'coth.c', 'coxattr.c', )) +fs_ss.add(when: 'CONFIG_LINUX', if_true: files('9p-util-linux.c')) +fs_ss.add(when: 'CONFIG_DARWIN', if_true: files('9p-util-darwin.c')) fs_ss.add(when: 'CONFIG_XEN', if_true: files('xen-9p-backend.c')) softmmu_ss.add_all(when: 'CONFIG_FSDEV_9P', if_true: fs_ss) diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 46bf7ceddf..46a42502bc 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -2102,6 +2102,10 @@ static void machvirt_init(MachineState *machine) object_property_set_bool(cpuobj, "pmu", false, NULL); } + if (vmc->no_tcg_lpa2 && object_property_find(cpuobj, "lpa2")) { + object_property_set_bool(cpuobj, "lpa2", false, NULL); + } + if (object_property_find(cpuobj, "reset-cbar")) { object_property_set_int(cpuobj, "reset-cbar", vms->memmap[VIRT_CPUPERIPHS].base, @@ -3020,8 +3024,11 @@ DEFINE_VIRT_MACHINE_AS_LATEST(7, 0) static void virt_machine_6_2_options(MachineClass *mc) { + VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); + virt_machine_7_0_options(mc); compat_props_add(mc->compat_props, hw_compat_6_2, hw_compat_6_2_len); + vmc->no_tcg_lpa2 = true; } DEFINE_VIRT_MACHINE(6, 2) diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c index 860787580a..2785b9e849 100644 --- a/hw/block/dataplane/xen-block.c +++ b/hw/block/dataplane/xen-block.c @@ -21,6 +21,7 @@ #include "qemu/osdep.h" #include "qemu/error-report.h" #include "qemu/main-loop.h" +#include "qemu/memalign.h" #include "qapi/error.h" #include "hw/xen/xen_common.h" #include "hw/block/xen_blkif.h" diff --git a/hw/block/fdc.c b/hw/block/fdc.c index 21d18ac2e3..347875a0cd 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -32,6 +32,7 @@ #include "qapi/error.h" #include "qemu/error-report.h" #include "qemu/timer.h" +#include "qemu/memalign.h" #include "hw/irq.h" #include "hw/isa/isa.h" #include "hw/qdev-properties.h" diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c index 81f9f971d8..74c7190302 100644 --- a/hw/block/pflash_cfi01.c +++ b/hw/block/pflash_cfi01.c @@ -1023,7 +1023,7 @@ static void postload_update_cb(void *opaque, bool running, RunState state) { PFlashCFI01 *pfl = opaque; - /* This is called after bdrv_invalidate_cache_all. */ + /* This is called after bdrv_activate_all. */ qemu_del_vm_change_state_handler(pfl->vmstate); pfl->vmstate = NULL; diff --git a/hw/display/edid-generate.c b/hw/display/edid-generate.c index bccf32af69..2cb819675e 100644 --- a/hw/display/edid-generate.c +++ b/hw/display/edid-generate.c @@ -255,33 +255,31 @@ static void edid_desc_dummy(uint8_t *desc) edid_desc_type(desc, 0x10); } -static void edid_desc_timing(uint8_t *desc, uint32_t refresh_rate, +static void edid_desc_timing(uint8_t *desc, const Timings *timings, uint32_t xres, uint32_t yres, uint32_t xmm, uint32_t ymm) { - Timings timings; - generate_timings(&timings, refresh_rate, xres, yres); - stl_le_p(desc, timings.clock); + stw_le_p(desc, timings->clock); desc[2] = xres & 0xff; - desc[3] = timings.xblank & 0xff; + desc[3] = timings->xblank & 0xff; desc[4] = (((xres & 0xf00) >> 4) | - ((timings.xblank & 0xf00) >> 8)); + ((timings->xblank & 0xf00) >> 8)); desc[5] = yres & 0xff; - desc[6] = timings.yblank & 0xff; + desc[6] = timings->yblank & 0xff; desc[7] = (((yres & 0xf00) >> 4) | - ((timings.yblank & 0xf00) >> 8)); + ((timings->yblank & 0xf00) >> 8)); - desc[8] = timings.xfront & 0xff; - desc[9] = timings.xsync & 0xff; + desc[8] = timings->xfront & 0xff; + desc[9] = timings->xsync & 0xff; - desc[10] = (((timings.yfront & 0x00f) << 4) | - ((timings.ysync & 0x00f) << 0)); - desc[11] = (((timings.xfront & 0x300) >> 2) | - ((timings.xsync & 0x300) >> 4) | - ((timings.yfront & 0x030) >> 2) | - ((timings.ysync & 0x030) >> 4)); + desc[10] = (((timings->yfront & 0x00f) << 4) | + ((timings->ysync & 0x00f) << 0)); + desc[11] = (((timings->xfront & 0x300) >> 2) | + ((timings->xsync & 0x300) >> 4) | + ((timings->yfront & 0x030) >> 2) | + ((timings->ysync & 0x030) >> 4)); desc[12] = xmm & 0xff; desc[13] = ymm & 0xff; @@ -348,13 +346,10 @@ static void init_displayid(uint8_t *did) edid_checksum(did + 1, did[2] + 4); } -static void qemu_displayid_generate(uint8_t *did, uint32_t refresh_rate, +static void qemu_displayid_generate(uint8_t *did, const Timings *timings, uint32_t xres, uint32_t yres, uint32_t xmm, uint32_t ymm) { - Timings timings; - generate_timings(&timings, refresh_rate, xres, yres); - did[0] = 0x70; /* display id extension */ did[1] = 0x13; /* version 1.3 */ did[2] = 23; /* length */ @@ -364,21 +359,21 @@ static void qemu_displayid_generate(uint8_t *did, uint32_t refresh_rate, did[6] = 0x00; /* revision */ did[7] = 0x14; /* block length */ - did[8] = timings.clock & 0xff; - did[9] = (timings.clock & 0xff00) >> 8; - did[10] = (timings.clock & 0xff0000) >> 16; + did[8] = timings->clock & 0xff; + did[9] = (timings->clock & 0xff00) >> 8; + did[10] = (timings->clock & 0xff0000) >> 16; did[11] = 0x88; /* leave aspect ratio undefined */ stw_le_p(did + 12, 0xffff & (xres - 1)); - stw_le_p(did + 14, 0xffff & (timings.xblank - 1)); - stw_le_p(did + 16, 0xffff & (timings.xfront - 1)); - stw_le_p(did + 18, 0xffff & (timings.xsync - 1)); + stw_le_p(did + 14, 0xffff & (timings->xblank - 1)); + stw_le_p(did + 16, 0xffff & (timings->xfront - 1)); + stw_le_p(did + 18, 0xffff & (timings->xsync - 1)); stw_le_p(did + 20, 0xffff & (yres - 1)); - stw_le_p(did + 22, 0xffff & (timings.yblank - 1)); - stw_le_p(did + 24, 0xffff & (timings.yfront - 1)); - stw_le_p(did + 26, 0xffff & (timings.ysync - 1)); + stw_le_p(did + 22, 0xffff & (timings->yblank - 1)); + stw_le_p(did + 24, 0xffff & (timings->yfront - 1)); + stw_le_p(did + 26, 0xffff & (timings->ysync - 1)); edid_checksum(did + 1, did[2] + 4); } @@ -386,6 +381,7 @@ static void qemu_displayid_generate(uint8_t *did, uint32_t refresh_rate, void qemu_edid_generate(uint8_t *edid, size_t size, qemu_edid_info *info) { + Timings timings; uint8_t *desc = edid + 54; uint8_t *xtra3 = NULL; uint8_t *dta = NULL; @@ -409,9 +405,6 @@ void qemu_edid_generate(uint8_t *edid, size_t size, if (!info->prefy) { info->prefy = 800; } - if (info->prefx >= 4096 || info->prefy >= 4096) { - large_screen = 1; - } if (info->width_mm && info->height_mm) { width_mm = info->width_mm; height_mm = info->height_mm; @@ -421,6 +414,11 @@ void qemu_edid_generate(uint8_t *edid, size_t size, height_mm = qemu_edid_dpi_to_mm(dpi, info->prefy); } + generate_timings(&timings, refresh_rate, info->prefx, info->prefy); + if (info->prefx >= 4096 || info->prefy >= 4096 || timings.clock >= 65536) { + large_screen = 1; + } + /* =============== extensions =============== */ if (size >= 256) { @@ -501,7 +499,7 @@ void qemu_edid_generate(uint8_t *edid, size_t size, if (!large_screen) { /* The DTD section has only 12 bits to store the resolution */ - edid_desc_timing(desc, refresh_rate, info->prefx, info->prefy, + edid_desc_timing(desc, &timings, info->prefx, info->prefy, width_mm, height_mm); desc = edid_desc_next(edid, dta, desc); } @@ -536,7 +534,7 @@ void qemu_edid_generate(uint8_t *edid, size_t size, /* =============== display id extensions =============== */ if (did && large_screen) { - qemu_displayid_generate(did, refresh_rate, info->prefx, info->prefy, + qemu_displayid_generate(did, &timings, info->prefx, info->prefy, width_mm, height_mm); } diff --git a/hw/display/trace-events b/hw/display/trace-events index 4a687d1b8e..91efc88f04 100644 --- a/hw/display/trace-events +++ b/hw/display/trace-events @@ -21,6 +21,9 @@ vmware_palette_write(uint32_t index, uint32_t value) "index %d, value 0x%x" vmware_scratch_read(uint32_t index, uint32_t value) "index %d, value 0x%x" vmware_scratch_write(uint32_t index, uint32_t value) "index %d, value 0x%x" vmware_setmode(uint32_t w, uint32_t h, uint32_t bpp) "%dx%d @ %d bpp" +vmware_verify_rect_less_than_zero(const char *name, const char *param, int x) "%s: %s was < 0 (%d)" +vmware_verify_rect_greater_than_bound(const char *name, const char *param, int bound, int x) "%s: %s was > %d (%d)" +vmware_verify_rect_surface_bound_exceeded(const char *name, const char *component, int bound, const char *param1, int value1, const char *param2, int value2) "%s: %s > %d (%s: %d, %s: %d)" # virtio-gpu-base.c virtio_gpu_features(bool virgl) "virgl %d" diff --git a/hw/display/vmware_vga.c b/hw/display/vmware_vga.c index e2969a6c81..0cc43a1f15 100644 --- a/hw/display/vmware_vga.c +++ b/hw/display/vmware_vga.c @@ -297,46 +297,52 @@ static inline bool vmsvga_verify_rect(DisplaySurface *surface, int x, int y, int w, int h) { if (x < 0) { - fprintf(stderr, "%s: x was < 0 (%d)\n", name, x); + trace_vmware_verify_rect_less_than_zero(name, "x", x); return false; } if (x > SVGA_MAX_WIDTH) { - fprintf(stderr, "%s: x was > %d (%d)\n", name, SVGA_MAX_WIDTH, x); + trace_vmware_verify_rect_greater_than_bound(name, "x", SVGA_MAX_WIDTH, + x); return false; } if (w < 0) { - fprintf(stderr, "%s: w was < 0 (%d)\n", name, w); + trace_vmware_verify_rect_less_than_zero(name, "w", w); return false; } if (w > SVGA_MAX_WIDTH) { - fprintf(stderr, "%s: w was > %d (%d)\n", name, SVGA_MAX_WIDTH, w); + trace_vmware_verify_rect_greater_than_bound(name, "w", SVGA_MAX_WIDTH, + w); return false; } if (x + w > surface_width(surface)) { - fprintf(stderr, "%s: width was > %d (x: %d, w: %d)\n", - name, surface_width(surface), x, w); + trace_vmware_verify_rect_surface_bound_exceeded(name, "width", + surface_width(surface), + "x", x, "w", w); return false; } if (y < 0) { - fprintf(stderr, "%s: y was < 0 (%d)\n", name, y); + trace_vmware_verify_rect_less_than_zero(name, "y", y); return false; } if (y > SVGA_MAX_HEIGHT) { - fprintf(stderr, "%s: y was > %d (%d)\n", name, SVGA_MAX_HEIGHT, y); + trace_vmware_verify_rect_greater_than_bound(name, "y", SVGA_MAX_HEIGHT, + y); return false; } if (h < 0) { - fprintf(stderr, "%s: h was < 0 (%d)\n", name, h); + trace_vmware_verify_rect_less_than_zero(name, "h", h); return false; } if (h > SVGA_MAX_HEIGHT) { - fprintf(stderr, "%s: h was > %d (%d)\n", name, SVGA_MAX_HEIGHT, h); + trace_vmware_verify_rect_greater_than_bound(name, "y", SVGA_MAX_HEIGHT, + y); return false; } if (y + h > surface_height(surface)) { - fprintf(stderr, "%s: update height > %d (y: %d, h: %d)\n", - name, surface_height(surface), y, h); + trace_vmware_verify_rect_surface_bound_exceeded(name, "height", + surface_height(surface), + "y", y, "h", h); return false; } diff --git a/hw/ide/core.c b/hw/ide/core.c index 33463d9b8f..3a5afff5d7 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -30,6 +30,7 @@ #include "qemu/main-loop.h" #include "qemu/timer.h" #include "qemu/hw-version.h" +#include "qemu/memalign.h" #include "sysemu/sysemu.h" #include "sysemu/blockdev.h" #include "sysemu/dma.h" @@ -434,12 +435,16 @@ static const AIOCBInfo trim_aiocb_info = { static void ide_trim_bh_cb(void *opaque) { TrimAIOCB *iocb = opaque; + BlockBackend *blk = iocb->s->blk; iocb->common.cb(iocb->common.opaque, iocb->ret); qemu_bh_delete(iocb->bh); iocb->bh = NULL; qemu_aio_unref(iocb); + + /* Paired with an increment in ide_issue_trim() */ + blk_dec_in_flight(blk); } static void ide_issue_trim_cb(void *opaque, int ret) @@ -509,6 +514,9 @@ BlockAIOCB *ide_issue_trim( IDEState *s = opaque; TrimAIOCB *iocb; + /* Paired with a decrement in ide_trim_bh_cb() */ + blk_inc_in_flight(s->blk); + iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque); iocb->s = s; iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb); diff --git a/hw/intc/Kconfig b/hw/intc/Kconfig index 528e77b4a6..ec8d4cec29 100644 --- a/hw/intc/Kconfig +++ b/hw/intc/Kconfig @@ -73,6 +73,9 @@ config RISCV_ACLINT config RISCV_APLIC bool +config RISCV_IMSIC + bool + config SIFIVE_PLIC bool diff --git a/hw/intc/arm_gicv3.c b/hw/intc/arm_gicv3.c index 6d3c8ee231..0b8f79a122 100644 --- a/hw/intc/arm_gicv3.c +++ b/hw/intc/arm_gicv3.c @@ -369,11 +369,19 @@ static const MemoryRegionOps gic_ops[] = { .read_with_attrs = gicv3_dist_read, .write_with_attrs = gicv3_dist_write, .endianness = DEVICE_NATIVE_ENDIAN, + .valid.min_access_size = 1, + .valid.max_access_size = 8, + .impl.min_access_size = 1, + .impl.max_access_size = 8, }, { .read_with_attrs = gicv3_redist_read, .write_with_attrs = gicv3_redist_write, .endianness = DEVICE_NATIVE_ENDIAN, + .valid.min_access_size = 1, + .valid.max_access_size = 8, + .impl.min_access_size = 1, + .impl.max_access_size = 8, } }; diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c index d7e03d0cab..1a3d440a54 100644 --- a/hw/intc/arm_gicv3_cpuif.c +++ b/hw/intc/arm_gicv3_cpuif.c @@ -612,7 +612,8 @@ static uint64_t icv_hppir_read(CPUARMState *env, const ARMCPRegInfo *ri) } } - trace_gicv3_icv_hppir_read(grp, gicv3_redist_affid(cs), value); + trace_gicv3_icv_hppir_read(ri->crm == 8 ? 0 : 1, + gicv3_redist_affid(cs), value); return value; } diff --git a/hw/intc/arm_gicv3_dist.c b/hw/intc/arm_gicv3_dist.c index 4164500ea9..28d913b211 100644 --- a/hw/intc/arm_gicv3_dist.c +++ b/hw/intc/arm_gicv3_dist.c @@ -838,7 +838,7 @@ MemTxResult gicv3_dist_read(void *opaque, hwaddr offset, uint64_t *data, if (!r) { qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid guest read at offset " TARGET_FMT_plx - "size %u\n", __func__, offset, size); + " size %u\n", __func__, offset, size); trace_gicv3_dist_badread(offset, size, attrs.secure); /* The spec requires that reserved registers are RAZ/WI; * so use MEMTX_ERROR returns from leaf functions as a way to @@ -879,7 +879,7 @@ MemTxResult gicv3_dist_write(void *opaque, hwaddr offset, uint64_t data, if (!r) { qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid guest write at offset " TARGET_FMT_plx - "size %u\n", __func__, offset, size); + " size %u\n", __func__, offset, size); trace_gicv3_dist_badwrite(offset, data, size, attrs.secure); /* The spec requires that reserved registers are RAZ/WI; * so use MEMTX_ERROR returns from leaf functions as a way to diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c index 4f598d3c14..b96b874afd 100644 --- a/hw/intc/arm_gicv3_its.c +++ b/hw/intc/arm_gicv3_its.c @@ -161,16 +161,22 @@ static MemTxResult get_cte(GICv3ITSState *s, uint16_t icid, CTEntry *cte) if (entry_addr == -1) { /* No L2 table entry, i.e. no valid CTE, or a memory error */ cte->valid = false; - return res; + goto out; } cteval = address_space_ldq_le(as, entry_addr, MEMTXATTRS_UNSPECIFIED, &res); if (res != MEMTX_OK) { - return res; + goto out; } cte->valid = FIELD_EX64(cteval, CTE, VALID); cte->rdbase = FIELD_EX64(cteval, CTE, RDBASE); - return MEMTX_OK; +out: + if (res != MEMTX_OK) { + trace_gicv3_its_cte_read_fault(icid); + } else { + trace_gicv3_its_cte_read(icid, cte->valid, cte->rdbase); + } + return res; } /* @@ -187,6 +193,10 @@ static bool update_ite(GICv3ITSState *s, uint32_t eventid, const DTEntry *dte, uint64_t itel = 0; uint32_t iteh = 0; + trace_gicv3_its_ite_write(dte->ittaddr, eventid, ite->valid, + ite->inttype, ite->intid, ite->icid, + ite->vpeid, ite->doorbell); + if (ite->valid) { itel = FIELD_DP64(itel, ITE_L, VALID, 1); itel = FIELD_DP64(itel, ITE_L, INTTYPE, ite->inttype); @@ -221,11 +231,13 @@ static MemTxResult get_ite(GICv3ITSState *s, uint32_t eventid, itel = address_space_ldq_le(as, iteaddr, MEMTXATTRS_UNSPECIFIED, &res); if (res != MEMTX_OK) { + trace_gicv3_its_ite_read_fault(dte->ittaddr, eventid); return res; } iteh = address_space_ldl_le(as, iteaddr + 8, MEMTXATTRS_UNSPECIFIED, &res); if (res != MEMTX_OK) { + trace_gicv3_its_ite_read_fault(dte->ittaddr, eventid); return res; } @@ -235,6 +247,9 @@ static MemTxResult get_ite(GICv3ITSState *s, uint32_t eventid, ite->icid = FIELD_EX64(itel, ITE_L, ICID); ite->vpeid = FIELD_EX64(itel, ITE_L, VPEID); ite->doorbell = FIELD_EX64(iteh, ITE_H, DOORBELL); + trace_gicv3_its_ite_read(dte->ittaddr, eventid, ite->valid, + ite->inttype, ite->intid, ite->icid, + ite->vpeid, ite->doorbell); return MEMTX_OK; } @@ -254,17 +269,23 @@ static MemTxResult get_dte(GICv3ITSState *s, uint32_t devid, DTEntry *dte) if (entry_addr == -1) { /* No L2 table entry, i.e. no valid DTE, or a memory error */ dte->valid = false; - return res; + goto out; } dteval = address_space_ldq_le(as, entry_addr, MEMTXATTRS_UNSPECIFIED, &res); if (res != MEMTX_OK) { - return res; + goto out; } dte->valid = FIELD_EX64(dteval, DTE, VALID); dte->size = FIELD_EX64(dteval, DTE, SIZE); /* DTE word field stores bits [51:8] of the ITT address */ dte->ittaddr = FIELD_EX64(dteval, DTE, ITTADDR) << ITTADDR_SHIFT; - return MEMTX_OK; +out: + if (res != MEMTX_OK) { + trace_gicv3_its_dte_read_fault(devid); + } else { + trace_gicv3_its_dte_read(devid, dte->valid, dte->size, dte->ittaddr); + } + return res; } /* @@ -366,6 +387,19 @@ static ItsCmdResult process_its_cmd(GICv3ITSState *s, const uint64_t *cmdpkt, devid = (cmdpkt[0] & DEVID_MASK) >> DEVID_SHIFT; eventid = cmdpkt[1] & EVENTID_MASK; + switch (cmd) { + case INTERRUPT: + trace_gicv3_its_cmd_int(devid, eventid); + break; + case CLEAR: + trace_gicv3_its_cmd_clear(devid, eventid); + break; + case DISCARD: + trace_gicv3_its_cmd_discard(devid, eventid); + break; + default: + g_assert_not_reached(); + } return do_process_its_cmd(s, devid, eventid, cmd); } @@ -382,15 +416,16 @@ static ItsCmdResult process_mapti(GICv3ITSState *s, const uint64_t *cmdpkt, devid = (cmdpkt[0] & DEVID_MASK) >> DEVID_SHIFT; eventid = cmdpkt[1] & EVENTID_MASK; + icid = cmdpkt[2] & ICID_MASK; if (ignore_pInt) { pIntid = eventid; + trace_gicv3_its_cmd_mapi(devid, eventid, icid); } else { pIntid = (cmdpkt[1] & pINTID_MASK) >> pINTID_SHIFT; + trace_gicv3_its_cmd_mapti(devid, eventid, icid, pIntid); } - icid = cmdpkt[2] & ICID_MASK; - if (devid >= s->dt.num_entries) { qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid command attributes: devid %d>=%d", @@ -451,6 +486,8 @@ static bool update_cte(GICv3ITSState *s, uint16_t icid, const CTEntry *cte) uint64_t cteval = 0; MemTxResult res = MEMTX_OK; + trace_gicv3_its_cte_write(icid, cte->valid, cte->rdbase); + if (cte->valid) { /* add mapping entry to collection table */ cteval = FIELD_DP64(cteval, CTE, VALID, 1); @@ -484,6 +521,7 @@ static ItsCmdResult process_mapc(GICv3ITSState *s, const uint64_t *cmdpkt) } else { cte.rdbase = 0; } + trace_gicv3_its_cmd_mapc(icid, cte.rdbase, cte.valid); if (icid >= s->ct.num_entries) { qemu_log_mask(LOG_GUEST_ERROR, "ITS MAPC: invalid ICID 0x%d", icid); @@ -509,6 +547,8 @@ static bool update_dte(GICv3ITSState *s, uint32_t devid, const DTEntry *dte) uint64_t dteval = 0; MemTxResult res = MEMTX_OK; + trace_gicv3_its_dte_write(devid, dte->valid, dte->size, dte->ittaddr); + if (dte->valid) { /* add mapping entry to device table */ dteval = FIELD_DP64(dteval, DTE, VALID, 1); @@ -539,6 +579,8 @@ static ItsCmdResult process_mapd(GICv3ITSState *s, const uint64_t *cmdpkt) dte.ittaddr = (cmdpkt[2] & ITTADDR_MASK) >> ITTADDR_SHIFT; dte.valid = cmdpkt[2] & CMD_FIELD_VALID_MASK; + trace_gicv3_its_cmd_mapd(devid, dte.size, dte.ittaddr, dte.valid); + if (devid >= s->dt.num_entries) { qemu_log_mask(LOG_GUEST_ERROR, "ITS MAPD: invalid device ID field 0x%x >= 0x%x\n", @@ -562,6 +604,8 @@ static ItsCmdResult process_movall(GICv3ITSState *s, const uint64_t *cmdpkt) rd1 = FIELD_EX64(cmdpkt[2], MOVALL_2, RDBASE1); rd2 = FIELD_EX64(cmdpkt[3], MOVALL_3, RDBASE2); + trace_gicv3_its_cmd_movall(rd1, rd2); + if (rd1 >= s->gicv3->num_cpu) { qemu_log_mask(LOG_GUEST_ERROR, "%s: RDBASE1 %" PRId64 @@ -601,6 +645,8 @@ static ItsCmdResult process_movi(GICv3ITSState *s, const uint64_t *cmdpkt) eventid = FIELD_EX64(cmdpkt[1], MOVI_1, EVENTID); new_icid = FIELD_EX64(cmdpkt[2], MOVI_2, ICID); + trace_gicv3_its_cmd_movi(devid, eventid, new_icid); + if (devid >= s->dt.num_entries) { qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid command attributes: devid %d>=%d", @@ -779,6 +825,7 @@ static void process_cmdq(GICv3ITSState *s) * is already consistent by the time SYNC command is executed. * Hence no further processing is required for SYNC command. */ + trace_gicv3_its_cmd_sync(); break; case GITS_CMD_MAPD: result = process_mapd(s, cmdpkt); @@ -803,6 +850,7 @@ static void process_cmdq(GICv3ITSState *s) * need to trigger lpi priority re-calculation to be in * sync with LPI config table or pending table changes. */ + trace_gicv3_its_cmd_inv(); for (i = 0; i < s->gicv3->num_cpu; i++) { gicv3_redist_update_lpi(&s->gicv3->cpu[i]); } @@ -814,6 +862,7 @@ static void process_cmdq(GICv3ITSState *s) result = process_movall(s, cmdpkt); break; default: + trace_gicv3_its_cmd_unknown(cmd); break; } if (result == CMD_CONTINUE) { @@ -1264,7 +1313,7 @@ static MemTxResult gicv3_its_read(void *opaque, hwaddr offset, uint64_t *data, if (!result) { qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid guest read at offset " TARGET_FMT_plx - "size %u\n", __func__, offset, size); + " size %u\n", __func__, offset, size); trace_gicv3_its_badread(offset, size); /* * The spec requires that reserved registers are RAZ/WI; @@ -1300,7 +1349,7 @@ static MemTxResult gicv3_its_write(void *opaque, hwaddr offset, uint64_t data, if (!result) { qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid guest write at offset " TARGET_FMT_plx - "size %u\n", __func__, offset, size); + " size %u\n", __func__, offset, size); trace_gicv3_its_badwrite(offset, data, size); /* * The spec requires that reserved registers are RAZ/WI; diff --git a/hw/intc/meson.build b/hw/intc/meson.build index d953197413..81ccdb0d78 100644 --- a/hw/intc/meson.build +++ b/hw/intc/meson.build @@ -51,6 +51,7 @@ specific_ss.add(when: 'CONFIG_S390_FLIC_KVM', if_true: files('s390_flic_kvm.c')) specific_ss.add(when: 'CONFIG_SH_INTC', if_true: files('sh_intc.c')) specific_ss.add(when: 'CONFIG_RISCV_ACLINT', if_true: files('riscv_aclint.c')) specific_ss.add(when: 'CONFIG_RISCV_APLIC', if_true: files('riscv_aplic.c')) +specific_ss.add(when: 'CONFIG_RISCV_IMSIC', if_true: files('riscv_imsic.c')) specific_ss.add(when: 'CONFIG_SIFIVE_PLIC', if_true: files('sifive_plic.c')) specific_ss.add(when: 'CONFIG_XICS', if_true: files('xics.c', 'xive2.c')) specific_ss.add(when: ['CONFIG_KVM', 'CONFIG_XICS'], diff --git a/hw/intc/riscv_imsic.c b/hw/intc/riscv_imsic.c new file mode 100644 index 0000000000..8615e4cc1d --- /dev/null +++ b/hw/intc/riscv_imsic.c @@ -0,0 +1,448 @@ +/* + * RISC-V IMSIC (Incoming Message Signaled Interrupt Controller) + * + * Copyright (c) 2021 Western Digital Corporation or its affiliates. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/log.h" +#include "qemu/module.h" +#include "qemu/error-report.h" +#include "qemu/bswap.h" +#include "exec/address-spaces.h" +#include "hw/sysbus.h" +#include "hw/pci/msi.h" +#include "hw/boards.h" +#include "hw/qdev-properties.h" +#include "hw/intc/riscv_imsic.h" +#include "hw/irq.h" +#include "target/riscv/cpu.h" +#include "target/riscv/cpu_bits.h" +#include "sysemu/sysemu.h" +#include "migration/vmstate.h" + +#define IMSIC_MMIO_PAGE_LE 0x00 +#define IMSIC_MMIO_PAGE_BE 0x04 + +#define IMSIC_MIN_ID ((IMSIC_EIPx_BITS * 2) - 1) +#define IMSIC_MAX_ID (IMSIC_TOPEI_IID_MASK) + +#define IMSIC_EISTATE_PENDING (1U << 0) +#define IMSIC_EISTATE_ENABLED (1U << 1) +#define IMSIC_EISTATE_ENPEND (IMSIC_EISTATE_ENABLED | \ + IMSIC_EISTATE_PENDING) + +static uint32_t riscv_imsic_topei(RISCVIMSICState *imsic, uint32_t page) +{ + uint32_t i, max_irq, base; + + base = page * imsic->num_irqs; + max_irq = (imsic->eithreshold[page] && + (imsic->eithreshold[page] <= imsic->num_irqs)) ? + imsic->eithreshold[page] : imsic->num_irqs; + for (i = 1; i < max_irq; i++) { + if ((imsic->eistate[base + i] & IMSIC_EISTATE_ENPEND) == + IMSIC_EISTATE_ENPEND) { + return (i << IMSIC_TOPEI_IID_SHIFT) | i; + } + } + + return 0; +} + +static void riscv_imsic_update(RISCVIMSICState *imsic, uint32_t page) +{ + if (imsic->eidelivery[page] && riscv_imsic_topei(imsic, page)) { + qemu_irq_raise(imsic->external_irqs[page]); + } else { + qemu_irq_lower(imsic->external_irqs[page]); + } +} + +static int riscv_imsic_eidelivery_rmw(RISCVIMSICState *imsic, uint32_t page, + target_ulong *val, + target_ulong new_val, + target_ulong wr_mask) +{ + target_ulong old_val = imsic->eidelivery[page]; + + if (val) { + *val = old_val; + } + + wr_mask &= 0x1; + imsic->eidelivery[page] = (old_val & ~wr_mask) | (new_val & wr_mask); + + riscv_imsic_update(imsic, page); + return 0; +} + +static int riscv_imsic_eithreshold_rmw(RISCVIMSICState *imsic, uint32_t page, + target_ulong *val, + target_ulong new_val, + target_ulong wr_mask) +{ + target_ulong old_val = imsic->eithreshold[page]; + + if (val) { + *val = old_val; + } + + wr_mask &= IMSIC_MAX_ID; + imsic->eithreshold[page] = (old_val & ~wr_mask) | (new_val & wr_mask); + + riscv_imsic_update(imsic, page); + return 0; +} + +static int riscv_imsic_topei_rmw(RISCVIMSICState *imsic, uint32_t page, + target_ulong *val, target_ulong new_val, + target_ulong wr_mask) +{ + uint32_t base, topei = riscv_imsic_topei(imsic, page); + + /* Read pending and enabled interrupt with highest priority */ + if (val) { + *val = topei; + } + + /* Writes ignore value and clear top pending interrupt */ + if (topei && wr_mask) { + topei >>= IMSIC_TOPEI_IID_SHIFT; + base = page * imsic->num_irqs; + if (topei) { + imsic->eistate[base + topei] &= ~IMSIC_EISTATE_PENDING; + } + + riscv_imsic_update(imsic, page); + } + + return 0; +} + +static int riscv_imsic_eix_rmw(RISCVIMSICState *imsic, + uint32_t xlen, uint32_t page, + uint32_t num, bool pend, target_ulong *val, + target_ulong new_val, target_ulong wr_mask) +{ + uint32_t i, base; + target_ulong mask; + uint32_t state = (pend) ? IMSIC_EISTATE_PENDING : IMSIC_EISTATE_ENABLED; + + if (xlen != 32) { + if (num & 0x1) { + return -EINVAL; + } + num >>= 1; + } + if (num >= (imsic->num_irqs / xlen)) { + return -EINVAL; + } + + base = (page * imsic->num_irqs) + (num * xlen); + + if (val) { + *val = 0; + for (i = 0; i < xlen; i++) { + mask = (target_ulong)1 << i; + *val |= (imsic->eistate[base + i] & state) ? mask : 0; + } + } + + for (i = 0; i < xlen; i++) { + /* Bit0 of eip0 and eie0 are read-only zero */ + if (!num && !i) { + continue; + } + + mask = (target_ulong)1 << i; + if (wr_mask & mask) { + if (new_val & mask) { + imsic->eistate[base + i] |= state; + } else { + imsic->eistate[base + i] &= ~state; + } + } + } + + riscv_imsic_update(imsic, page); + return 0; +} + +static int riscv_imsic_rmw(void *arg, target_ulong reg, target_ulong *val, + target_ulong new_val, target_ulong wr_mask) +{ + RISCVIMSICState *imsic = arg; + uint32_t isel, priv, virt, vgein, xlen, page; + + priv = AIA_IREG_PRIV(reg); + virt = AIA_IREG_VIRT(reg); + isel = AIA_IREG_ISEL(reg); + vgein = AIA_IREG_VGEIN(reg); + xlen = AIA_IREG_XLEN(reg); + + if (imsic->mmode) { + if (priv == PRV_M && !virt) { + page = 0; + } else { + goto err; + } + } else { + if (priv == PRV_S) { + if (virt) { + if (vgein && vgein < imsic->num_pages) { + page = vgein; + } else { + goto err; + } + } else { + page = 0; + } + } else { + goto err; + } + } + + switch (isel) { + case ISELECT_IMSIC_EIDELIVERY: + return riscv_imsic_eidelivery_rmw(imsic, page, val, + new_val, wr_mask); + case ISELECT_IMSIC_EITHRESHOLD: + return riscv_imsic_eithreshold_rmw(imsic, page, val, + new_val, wr_mask); + case ISELECT_IMSIC_TOPEI: + return riscv_imsic_topei_rmw(imsic, page, val, new_val, wr_mask); + case ISELECT_IMSIC_EIP0 ... ISELECT_IMSIC_EIP63: + return riscv_imsic_eix_rmw(imsic, xlen, page, + isel - ISELECT_IMSIC_EIP0, + true, val, new_val, wr_mask); + case ISELECT_IMSIC_EIE0 ... ISELECT_IMSIC_EIE63: + return riscv_imsic_eix_rmw(imsic, xlen, page, + isel - ISELECT_IMSIC_EIE0, + false, val, new_val, wr_mask); + default: + break; + }; + +err: + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Invalid register priv=%d virt=%d isel=%d vgein=%d\n", + __func__, priv, virt, isel, vgein); + return -EINVAL; +} + +static uint64_t riscv_imsic_read(void *opaque, hwaddr addr, unsigned size) +{ + RISCVIMSICState *imsic = opaque; + + /* Reads must be 4 byte words */ + if ((addr & 0x3) != 0) { + goto err; + } + + /* Reads cannot be out of range */ + if (addr > IMSIC_MMIO_SIZE(imsic->num_pages)) { + goto err; + } + + return 0; + +err: + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Invalid register read 0x%" HWADDR_PRIx "\n", + __func__, addr); + return 0; +} + +static void riscv_imsic_write(void *opaque, hwaddr addr, uint64_t value, + unsigned size) +{ + RISCVIMSICState *imsic = opaque; + uint32_t page; + + /* Writes must be 4 byte words */ + if ((addr & 0x3) != 0) { + goto err; + } + + /* Writes cannot be out of range */ + if (addr > IMSIC_MMIO_SIZE(imsic->num_pages)) { + goto err; + } + + /* Writes only supported for MSI little-endian registers */ + page = addr >> IMSIC_MMIO_PAGE_SHIFT; + if ((addr & (IMSIC_MMIO_PAGE_SZ - 1)) == IMSIC_MMIO_PAGE_LE) { + if (value && (value < imsic->num_irqs)) { + imsic->eistate[(page * imsic->num_irqs) + value] |= + IMSIC_EISTATE_PENDING; + } + } + + /* Update CPU external interrupt status */ + riscv_imsic_update(imsic, page); + + return; + +err: + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Invalid register write 0x%" HWADDR_PRIx "\n", + __func__, addr); +} + +static const MemoryRegionOps riscv_imsic_ops = { + .read = riscv_imsic_read, + .write = riscv_imsic_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4 + } +}; + +static void riscv_imsic_realize(DeviceState *dev, Error **errp) +{ + RISCVIMSICState *imsic = RISCV_IMSIC(dev); + RISCVCPU *rcpu = RISCV_CPU(qemu_get_cpu(imsic->hartid)); + CPUState *cpu = qemu_get_cpu(imsic->hartid); + CPURISCVState *env = cpu ? cpu->env_ptr : NULL; + + imsic->num_eistate = imsic->num_pages * imsic->num_irqs; + imsic->eidelivery = g_new0(uint32_t, imsic->num_pages); + imsic->eithreshold = g_new0(uint32_t, imsic->num_pages); + imsic->eistate = g_new0(uint32_t, imsic->num_eistate); + + memory_region_init_io(&imsic->mmio, OBJECT(dev), &riscv_imsic_ops, + imsic, TYPE_RISCV_IMSIC, + IMSIC_MMIO_SIZE(imsic->num_pages)); + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &imsic->mmio); + + /* Claim the CPU interrupt to be triggered by this IMSIC */ + if (riscv_cpu_claim_interrupts(rcpu, + (imsic->mmode) ? MIP_MEIP : MIP_SEIP) < 0) { + error_setg(errp, "%s already claimed", + (imsic->mmode) ? "MEIP" : "SEIP"); + return; + } + + /* Create output IRQ lines */ + imsic->external_irqs = g_malloc(sizeof(qemu_irq) * imsic->num_pages); + qdev_init_gpio_out(dev, imsic->external_irqs, imsic->num_pages); + + /* Force select AIA feature and setup CSR read-modify-write callback */ + if (env) { + riscv_set_feature(env, RISCV_FEATURE_AIA); + if (!imsic->mmode) { + riscv_cpu_set_geilen(env, imsic->num_pages - 1); + } + riscv_cpu_set_aia_ireg_rmw_fn(env, (imsic->mmode) ? PRV_M : PRV_S, + riscv_imsic_rmw, imsic); + } + + msi_nonbroken = true; +} + +static Property riscv_imsic_properties[] = { + DEFINE_PROP_BOOL("mmode", RISCVIMSICState, mmode, 0), + DEFINE_PROP_UINT32("hartid", RISCVIMSICState, hartid, 0), + DEFINE_PROP_UINT32("num-pages", RISCVIMSICState, num_pages, 0), + DEFINE_PROP_UINT32("num-irqs", RISCVIMSICState, num_irqs, 0), + DEFINE_PROP_END_OF_LIST(), +}; + +static const VMStateDescription vmstate_riscv_imsic = { + .name = "riscv_imsic", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_VARRAY_UINT32(eidelivery, RISCVIMSICState, + num_pages, 0, + vmstate_info_uint32, uint32_t), + VMSTATE_VARRAY_UINT32(eithreshold, RISCVIMSICState, + num_pages, 0, + vmstate_info_uint32, uint32_t), + VMSTATE_VARRAY_UINT32(eistate, RISCVIMSICState, + num_eistate, 0, + vmstate_info_uint32, uint32_t), + VMSTATE_END_OF_LIST() + } +}; + +static void riscv_imsic_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + device_class_set_props(dc, riscv_imsic_properties); + dc->realize = riscv_imsic_realize; + dc->vmsd = &vmstate_riscv_imsic; +} + +static const TypeInfo riscv_imsic_info = { + .name = TYPE_RISCV_IMSIC, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(RISCVIMSICState), + .class_init = riscv_imsic_class_init, +}; + +static void riscv_imsic_register_types(void) +{ + type_register_static(&riscv_imsic_info); +} + +type_init(riscv_imsic_register_types) + +/* + * Create IMSIC device. + */ +DeviceState *riscv_imsic_create(hwaddr addr, uint32_t hartid, bool mmode, + uint32_t num_pages, uint32_t num_ids) +{ + DeviceState *dev = qdev_new(TYPE_RISCV_IMSIC); + CPUState *cpu = qemu_get_cpu(hartid); + uint32_t i; + + assert(!(addr & (IMSIC_MMIO_PAGE_SZ - 1))); + if (mmode) { + assert(num_pages == 1); + } else { + assert(num_pages >= 1 && num_pages <= (IRQ_LOCAL_GUEST_MAX + 1)); + } + assert(IMSIC_MIN_ID <= num_ids); + assert(num_ids <= IMSIC_MAX_ID); + assert((num_ids & IMSIC_MIN_ID) == IMSIC_MIN_ID); + + qdev_prop_set_bit(dev, "mmode", mmode); + qdev_prop_set_uint32(dev, "hartid", hartid); + qdev_prop_set_uint32(dev, "num-pages", num_pages); + qdev_prop_set_uint32(dev, "num-irqs", num_ids + 1); + + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr); + + for (i = 0; i < num_pages; i++) { + if (!i) { + qdev_connect_gpio_out_named(dev, NULL, i, + qdev_get_gpio_in(DEVICE(cpu), + (mmode) ? IRQ_M_EXT : IRQ_S_EXT)); + } else { + qdev_connect_gpio_out_named(dev, NULL, i, + qdev_get_gpio_in(DEVICE(cpu), + IRQ_LOCAL_MAX + i - 1)); + } + } + + return dev; +} diff --git a/hw/intc/trace-events b/hw/intc/trace-events index b28cda4e08..53414aa197 100644 --- a/hw/intc/trace-events +++ b/hw/intc/trace-events @@ -176,6 +176,27 @@ gicv3_its_write(uint64_t offset, uint64_t data, unsigned size) "GICv3 ITS write: gicv3_its_badwrite(uint64_t offset, uint64_t data, unsigned size) "GICv3 ITS write: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u: error" gicv3_its_translation_write(uint64_t offset, uint64_t data, unsigned size, uint32_t requester_id) "GICv3 ITS TRANSLATER write: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u requester_id 0x%x" gicv3_its_process_command(uint32_t rd_offset, uint8_t cmd) "GICv3 ITS: processing command at offset 0x%x: 0x%x" +gicv3_its_cmd_int(uint32_t devid, uint32_t eventid) "GICv3 ITS: command INT DeviceID 0x%x EventID 0x%x" +gicv3_its_cmd_clear(uint32_t devid, uint32_t eventid) "GICv3 ITS: command CLEAR DeviceID 0x%x EventID 0x%x" +gicv3_its_cmd_discard(uint32_t devid, uint32_t eventid) "GICv3 ITS: command DISCARD DeviceID 0x%x EventID 0x%x" +gicv3_its_cmd_sync(void) "GICv3 ITS: command SYNC" +gicv3_its_cmd_mapd(uint32_t devid, uint32_t size, uint64_t ittaddr, int valid) "GICv3 ITS: command MAPD DeviceID 0x%x Size 0x%x ITT_addr 0x%" PRIx64 " V %d" +gicv3_its_cmd_mapc(uint32_t icid, uint64_t rdbase, int valid) "GICv3 ITS: command MAPC ICID 0x%x RDbase 0x%" PRIx64 " V %d" +gicv3_its_cmd_mapi(uint32_t devid, uint32_t eventid, uint32_t icid) "GICv3 ITS: command MAPI DeviceID 0x%x EventID 0x%x ICID 0x%x" +gicv3_its_cmd_mapti(uint32_t devid, uint32_t eventid, uint32_t icid, uint32_t intid) "GICv3 ITS: command MAPTI DeviceID 0x%x EventID 0x%x ICID 0x%x pINTID 0x%x" +gicv3_its_cmd_inv(void) "GICv3 ITS: command INV or INVALL" +gicv3_its_cmd_movall(uint64_t rd1, uint64_t rd2) "GICv3 ITS: command MOVALL RDbase1 0x%" PRIx64 " RDbase2 0x%" PRIx64 +gicv3_its_cmd_movi(uint32_t devid, uint32_t eventid, uint32_t icid) "GICv3 ITS: command MOVI DeviceID 0x%x EventID 0x%x ICID 0x%x" +gicv3_its_cmd_unknown(unsigned cmd) "GICv3 ITS: unknown command 0x%x" +gicv3_its_cte_read(uint32_t icid, int valid, uint32_t rdbase) "GICv3 ITS: Collection Table read for ICID 0x%x: valid %d RDBase 0x%x" +gicv3_its_cte_write(uint32_t icid, int valid, uint32_t rdbase) "GICv3 ITS: Collection Table write for ICID 0x%x: valid %d RDBase 0x%x" +gicv3_its_cte_read_fault(uint32_t icid) "GICv3 ITS: Collection Table read for ICID 0x%x: faulted" +gicv3_its_ite_read(uint64_t ittaddr, uint32_t eventid, int valid, int inttype, uint32_t intid, uint32_t icid, uint32_t vpeid, uint32_t doorbell) "GICv3 ITS: Interrupt Table read for ITTaddr 0x%" PRIx64 " EventID 0x%x: valid %d inttype %d intid 0x%x ICID 0x%x vPEID 0x%x doorbell 0x%x" +gicv3_its_ite_read_fault(uint64_t ittaddr, uint32_t eventid) "GICv3 ITS: Interrupt Table read for ITTaddr 0x%" PRIx64 " EventID 0x%x: faulted" +gicv3_its_ite_write(uint64_t ittaddr, uint32_t eventid, int valid, int inttype, uint32_t intid, uint32_t icid, uint32_t vpeid, uint32_t doorbell) "GICv3 ITS: Interrupt Table write for ITTaddr 0x%" PRIx64 " EventID 0x%x: valid %d inttype %d intid 0x%x ICID 0x%x vPEID 0x%x doorbell 0x%x" +gicv3_its_dte_read(uint32_t devid, int valid, uint32_t size, uint64_t ittaddr) "GICv3 ITS: Device Table read for DeviceID 0x%x: valid %d size 0x%x ITTaddr 0x%" PRIx64 +gicv3_its_dte_write(uint32_t devid, int valid, uint32_t size, uint64_t ittaddr) "GICv3 ITS: Device Table write for DeviceID 0x%x: valid %d size 0x%x ITTaddr 0x%" PRIx64 +gicv3_its_dte_read_fault(uint32_t devid) "GICv3 ITS: Device Table read for DeviceID 0x%x: faulted" # armv7m_nvic.c nvic_recompute_state(int vectpending, int vectpending_prio, int exception_prio) "NVIC state recomputed: vectpending %d vectpending_prio %d exception_prio %d" diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 98aac98bef..03760ddeae 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -163,6 +163,7 @@ #include "migration/vmstate.h" #include "nvme.h" +#include "dif.h" #include "trace.h" #define NVME_MAX_IOQPAIRS 0xffff @@ -195,6 +196,7 @@ static const bool nvme_feature_support[NVME_FID_MAX] = { [NVME_WRITE_ATOMICITY] = true, [NVME_ASYNCHRONOUS_EVENT_CONF] = true, [NVME_TIMESTAMP] = true, + [NVME_HOST_BEHAVIOR_SUPPORT] = true, [NVME_COMMAND_SET_PROFILE] = true, }; @@ -205,6 +207,7 @@ static const uint32_t nvme_feature_cap[NVME_FID_MAX] = { [NVME_NUMBER_OF_QUEUES] = NVME_FEAT_CAP_CHANGE, [NVME_ASYNCHRONOUS_EVENT_CONF] = NVME_FEAT_CAP_CHANGE, [NVME_TIMESTAMP] = NVME_FEAT_CAP_CHANGE, + [NVME_HOST_BEHAVIOR_SUPPORT] = NVME_FEAT_CAP_CHANGE, [NVME_COMMAND_SET_PROFILE] = NVME_FEAT_CAP_CHANGE, }; @@ -1065,7 +1068,8 @@ static uint16_t nvme_map_data(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req) size_t len = nvme_l2b(ns, nlb); uint16_t status; - if (nvme_ns_ext(ns) && !(pi && pract && ns->lbaf.ms == 8)) { + if (nvme_ns_ext(ns) && + !(pi && pract && ns->lbaf.ms == nvme_pi_tuple_size(ns))) { NvmeSg sg; len += nvme_m2b(ns, nlb); @@ -1244,7 +1248,8 @@ uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, uint32_t len, bool pi = !!NVME_ID_NS_DPS_TYPE(ns->id_ns.dps); bool pract = !!(le16_to_cpu(rw->control) & NVME_RW_PRINFO_PRACT); - if (nvme_ns_ext(ns) && !(pi && pract && ns->lbaf.ms == 8)) { + if (nvme_ns_ext(ns) && + !(pi && pract && ns->lbaf.ms == nvme_pi_tuple_size(ns))) { return nvme_tx_interleaved(n, &req->sg, ptr, len, ns->lbasz, ns->lbaf.ms, 0, dir); } @@ -2045,9 +2050,12 @@ static void nvme_verify_cb(void *opaque, int ret) uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control)); uint16_t apptag = le16_to_cpu(rw->apptag); uint16_t appmask = le16_to_cpu(rw->appmask); - uint32_t reftag = le32_to_cpu(rw->reftag); + uint64_t reftag = le32_to_cpu(rw->reftag); + uint64_t cdw3 = le32_to_cpu(rw->cdw3); uint16_t status; + reftag |= cdw3 << 32; + trace_pci_nvme_verify_cb(nvme_cid(req), prinfo, apptag, appmask, reftag); if (ret) { @@ -2136,7 +2144,8 @@ static void nvme_compare_mdata_cb(void *opaque, int ret) uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control)); uint16_t apptag = le16_to_cpu(rw->apptag); uint16_t appmask = le16_to_cpu(rw->appmask); - uint32_t reftag = le32_to_cpu(rw->reftag); + uint64_t reftag = le32_to_cpu(rw->reftag); + uint64_t cdw3 = le32_to_cpu(rw->cdw3); struct nvme_compare_ctx *ctx = req->opaque; g_autofree uint8_t *buf = NULL; BlockBackend *blk = ns->blkconf.blk; @@ -2144,6 +2153,8 @@ static void nvme_compare_mdata_cb(void *opaque, int ret) BlockAcctStats *stats = blk_get_stats(blk); uint16_t status = NVME_SUCCESS; + reftag |= cdw3 << 32; + trace_pci_nvme_compare_mdata_cb(nvme_cid(req)); if (ret) { @@ -2181,7 +2192,7 @@ static void nvme_compare_mdata_cb(void *opaque, int ret) * tuple. */ if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { - pil = ns->lbaf.ms - sizeof(NvmeDifTuple); + pil = ns->lbaf.ms - nvme_pi_tuple_size(ns); } for (bufp = buf; mbufp < end; bufp += ns->lbaf.ms, mbufp += ns->lbaf.ms) { @@ -2522,7 +2533,8 @@ typedef struct NvmeCopyAIOCB { QEMUBH *bh; int ret; - NvmeCopySourceRange *ranges; + void *ranges; + unsigned int format; int nr; int idx; @@ -2533,7 +2545,7 @@ typedef struct NvmeCopyAIOCB { BlockAcctCookie write; } acct; - uint32_t reftag; + uint64_t reftag; uint64_t slba; NvmeZone *zone; @@ -2587,13 +2599,101 @@ static void nvme_copy_bh(void *opaque) static void nvme_copy_cb(void *opaque, int ret); +static void nvme_copy_source_range_parse_format0(void *ranges, int idx, + uint64_t *slba, uint32_t *nlb, + uint16_t *apptag, + uint16_t *appmask, + uint64_t *reftag) +{ + NvmeCopySourceRangeFormat0 *_ranges = ranges; + + if (slba) { + *slba = le64_to_cpu(_ranges[idx].slba); + } + + if (nlb) { + *nlb = le16_to_cpu(_ranges[idx].nlb) + 1; + } + + if (apptag) { + *apptag = le16_to_cpu(_ranges[idx].apptag); + } + + if (appmask) { + *appmask = le16_to_cpu(_ranges[idx].appmask); + } + + if (reftag) { + *reftag = le32_to_cpu(_ranges[idx].reftag); + } +} + +static void nvme_copy_source_range_parse_format1(void *ranges, int idx, + uint64_t *slba, uint32_t *nlb, + uint16_t *apptag, + uint16_t *appmask, + uint64_t *reftag) +{ + NvmeCopySourceRangeFormat1 *_ranges = ranges; + + if (slba) { + *slba = le64_to_cpu(_ranges[idx].slba); + } + + if (nlb) { + *nlb = le16_to_cpu(_ranges[idx].nlb) + 1; + } + + if (apptag) { + *apptag = le16_to_cpu(_ranges[idx].apptag); + } + + if (appmask) { + *appmask = le16_to_cpu(_ranges[idx].appmask); + } + + if (reftag) { + *reftag = 0; + + *reftag |= (uint64_t)_ranges[idx].sr[4] << 40; + *reftag |= (uint64_t)_ranges[idx].sr[5] << 32; + *reftag |= (uint64_t)_ranges[idx].sr[6] << 24; + *reftag |= (uint64_t)_ranges[idx].sr[7] << 16; + *reftag |= (uint64_t)_ranges[idx].sr[8] << 8; + *reftag |= (uint64_t)_ranges[idx].sr[9]; + } +} + +static void nvme_copy_source_range_parse(void *ranges, int idx, uint8_t format, + uint64_t *slba, uint32_t *nlb, + uint16_t *apptag, uint16_t *appmask, + uint64_t *reftag) +{ + switch (format) { + case NVME_COPY_FORMAT_0: + nvme_copy_source_range_parse_format0(ranges, idx, slba, nlb, apptag, + appmask, reftag); + break; + + case NVME_COPY_FORMAT_1: + nvme_copy_source_range_parse_format1(ranges, idx, slba, nlb, apptag, + appmask, reftag); + break; + + default: + abort(); + } +} + static void nvme_copy_out_completed_cb(void *opaque, int ret) { NvmeCopyAIOCB *iocb = opaque; NvmeRequest *req = iocb->req; NvmeNamespace *ns = req->ns; - NvmeCopySourceRange *range = &iocb->ranges[iocb->idx]; - uint32_t nlb = le32_to_cpu(range->nlb) + 1; + uint32_t nlb; + + nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, NULL, + &nlb, NULL, NULL, NULL); if (ret < 0) { iocb->ret = ret; @@ -2617,7 +2717,6 @@ static void nvme_copy_out_cb(void *opaque, int ret) NvmeCopyAIOCB *iocb = opaque; NvmeRequest *req = iocb->req; NvmeNamespace *ns = req->ns; - NvmeCopySourceRange *range; uint32_t nlb; size_t mlen; uint8_t *mbounce; @@ -2634,8 +2733,8 @@ static void nvme_copy_out_cb(void *opaque, int ret) return; } - range = &iocb->ranges[iocb->idx]; - nlb = le32_to_cpu(range->nlb) + 1; + nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, NULL, + &nlb, NULL, NULL, NULL); mlen = nvme_m2b(ns, nlb); mbounce = iocb->bounce + nvme_l2b(ns, nlb); @@ -2658,8 +2757,10 @@ static void nvme_copy_in_completed_cb(void *opaque, int ret) NvmeCopyAIOCB *iocb = opaque; NvmeRequest *req = iocb->req; NvmeNamespace *ns = req->ns; - NvmeCopySourceRange *range; uint32_t nlb; + uint64_t slba; + uint16_t apptag, appmask; + uint64_t reftag; size_t len; uint16_t status; @@ -2670,8 +2771,8 @@ static void nvme_copy_in_completed_cb(void *opaque, int ret) goto out; } - range = &iocb->ranges[iocb->idx]; - nlb = le32_to_cpu(range->nlb) + 1; + nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, &slba, + &nlb, &apptag, &appmask, &reftag); len = nvme_l2b(ns, nlb); trace_pci_nvme_copy_out(iocb->slba, nlb); @@ -2682,11 +2783,6 @@ static void nvme_copy_in_completed_cb(void *opaque, int ret) uint16_t prinfor = ((copy->control[0] >> 4) & 0xf); uint16_t prinfow = ((copy->control[2] >> 2) & 0xf); - uint16_t apptag = le16_to_cpu(range->apptag); - uint16_t appmask = le16_to_cpu(range->appmask); - uint32_t reftag = le32_to_cpu(range->reftag); - - uint64_t slba = le64_to_cpu(range->slba); size_t mlen = nvme_m2b(ns, nlb); uint8_t *mbounce = iocb->bounce + nvme_l2b(ns, nlb); @@ -2759,7 +2855,6 @@ static void nvme_copy_in_cb(void *opaque, int ret) NvmeCopyAIOCB *iocb = opaque; NvmeRequest *req = iocb->req; NvmeNamespace *ns = req->ns; - NvmeCopySourceRange *range; uint64_t slba; uint32_t nlb; @@ -2775,9 +2870,8 @@ static void nvme_copy_in_cb(void *opaque, int ret) return; } - range = &iocb->ranges[iocb->idx]; - slba = le64_to_cpu(range->slba); - nlb = le32_to_cpu(range->nlb) + 1; + nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, &slba, + &nlb, NULL, NULL, NULL); qemu_iovec_reset(&iocb->iov); qemu_iovec_add(&iocb->iov, iocb->bounce + nvme_l2b(ns, nlb), @@ -2797,7 +2891,6 @@ static void nvme_copy_cb(void *opaque, int ret) NvmeCopyAIOCB *iocb = opaque; NvmeRequest *req = iocb->req; NvmeNamespace *ns = req->ns; - NvmeCopySourceRange *range; uint64_t slba; uint32_t nlb; size_t len; @@ -2814,9 +2907,8 @@ static void nvme_copy_cb(void *opaque, int ret) goto done; } - range = &iocb->ranges[iocb->idx]; - slba = le64_to_cpu(range->slba); - nlb = le32_to_cpu(range->nlb) + 1; + nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, &slba, + &nlb, NULL, NULL, NULL); len = nvme_l2b(ns, nlb); trace_pci_nvme_copy_source_range(slba, nlb); @@ -2872,6 +2964,7 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) uint8_t format = copy->control[0] & 0xf; uint16_t prinfor = ((copy->control[0] >> 4) & 0xf); uint16_t prinfow = ((copy->control[2] >> 2) & 0xf); + size_t len = sizeof(NvmeCopySourceRangeFormat0); uint16_t status; @@ -2897,10 +2990,18 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) goto invalid; } - iocb->ranges = g_new(NvmeCopySourceRange, nr); + if (ns->pif && format != 0x1) { + status = NVME_INVALID_FORMAT | NVME_DNR; + goto invalid; + } - status = nvme_h2c(n, (uint8_t *)iocb->ranges, - sizeof(NvmeCopySourceRange) * nr, req); + if (ns->pif) { + len = sizeof(NvmeCopySourceRangeFormat1); + } + + iocb->format = format; + iocb->ranges = g_malloc_n(nr, len); + status = nvme_h2c(n, (uint8_t *)iocb->ranges, len * nr, req); if (status) { goto invalid; } @@ -2926,6 +3027,7 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) iocb->nr = nr; iocb->idx = 0; iocb->reftag = le32_to_cpu(copy->reftag); + iocb->reftag |= (uint64_t)le32_to_cpu(copy->cdw3) << 32; iocb->bounce = g_malloc_n(le16_to_cpu(ns->id_ns.mssrl), ns->lbasz + ns->lbaf.ms); @@ -3164,7 +3266,7 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req) if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { bool pract = prinfo & NVME_PRINFO_PRACT; - if (pract && ns->lbaf.ms == 8) { + if (pract && ns->lbaf.ms == nvme_pi_tuple_size(ns)) { mapped_size = data_size; } } @@ -3241,7 +3343,7 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { bool pract = prinfo & NVME_PRINFO_PRACT; - if (pract && ns->lbaf.ms == 8) { + if (pract && ns->lbaf.ms == nvme_pi_tuple_size(ns)) { mapped_size -= nvme_m2b(ns, nlb); } } @@ -4712,7 +4814,8 @@ static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req, } if (c->csi == NVME_CSI_NVM) { - return nvme_rpt_empty_id_struct(n, req); + return nvme_c2h(n, (uint8_t *)&ns->id_ns_nvm, sizeof(NvmeIdNsNvm), + req); } else if (c->csi == NVME_CSI_ZONED && ns->csi == NVME_CSI_ZONED) { return nvme_c2h(n, (uint8_t *)ns->id_ns_zoned, sizeof(NvmeIdNsZoned), req); @@ -5090,6 +5193,9 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req) goto out; case NVME_TIMESTAMP: return nvme_get_feature_timestamp(n, req); + case NVME_HOST_BEHAVIOR_SUPPORT: + return nvme_c2h(n, (uint8_t *)&n->features.hbs, + sizeof(n->features.hbs), req); default: break; } @@ -5159,6 +5265,7 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req) uint32_t nsid = le32_to_cpu(cmd->nsid); uint8_t fid = NVME_GETSETFEAT_FID(dw10); uint8_t save = NVME_SETFEAT_SAVE(dw10); + uint16_t status; int i; trace_pci_nvme_setfeat(nvme_cid(req), nsid, fid, save, dw11); @@ -5280,6 +5387,27 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req) break; case NVME_TIMESTAMP: return nvme_set_feature_timestamp(n, req); + case NVME_HOST_BEHAVIOR_SUPPORT: + status = nvme_h2c(n, (uint8_t *)&n->features.hbs, + sizeof(n->features.hbs), req); + if (status) { + return status; + } + + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { + ns = nvme_ns(n, i); + + if (!ns) { + continue; + } + + ns->id_ns.nlbaf = ns->nlbaf - 1; + if (!n->features.hbs.lbafee) { + ns->id_ns.nlbaf = MIN(ns->id_ns.nlbaf, 15); + } + } + + return status; case NVME_COMMAND_SET_PROFILE: if (dw11 & 0x1ff) { trace_pci_nvme_err_invalid_iocsci(dw11 & 0x1ff); @@ -5443,6 +5571,11 @@ typedef struct NvmeFormatAIOCB { uint32_t nsid; bool broadcast; int64_t offset; + + uint8_t lbaf; + uint8_t mset; + uint8_t pi; + uint8_t pil; } NvmeFormatAIOCB; static void nvme_format_bh(void *opaque); @@ -5462,18 +5595,16 @@ static const AIOCBInfo nvme_format_aiocb_info = { .get_aio_context = nvme_get_aio_context, }; -static void nvme_format_set(NvmeNamespace *ns, NvmeCmd *cmd) +static void nvme_format_set(NvmeNamespace *ns, uint8_t lbaf, uint8_t mset, + uint8_t pi, uint8_t pil) { - uint32_t dw10 = le32_to_cpu(cmd->cdw10); - uint8_t lbaf = dw10 & 0xf; - uint8_t pi = (dw10 >> 5) & 0x7; - uint8_t mset = (dw10 >> 4) & 0x1; - uint8_t pil = (dw10 >> 8) & 0x1; + uint8_t lbafl = lbaf & 0xf; + uint8_t lbafu = lbaf >> 4; trace_pci_nvme_format_set(ns->params.nsid, lbaf, mset, pi, pil); ns->id_ns.dps = (pil << 3) | pi; - ns->id_ns.flbas = lbaf | (mset << 4); + ns->id_ns.flbas = (lbafu << 5) | (mset << 4) | lbafl; nvme_ns_init_format(ns); } @@ -5481,7 +5612,6 @@ static void nvme_format_set(NvmeNamespace *ns, NvmeCmd *cmd) static void nvme_format_ns_cb(void *opaque, int ret) { NvmeFormatAIOCB *iocb = opaque; - NvmeRequest *req = iocb->req; NvmeNamespace *ns = iocb->ns; int bytes; @@ -5503,7 +5633,7 @@ static void nvme_format_ns_cb(void *opaque, int ret) return; } - nvme_format_set(ns, &req->cmd); + nvme_format_set(ns, iocb->lbaf, iocb->mset, iocb->pi, iocb->pil); ns->status = 0x0; iocb->ns = NULL; iocb->offset = 0; @@ -5523,7 +5653,7 @@ static uint16_t nvme_format_check(NvmeNamespace *ns, uint8_t lbaf, uint8_t pi) return NVME_INVALID_FORMAT | NVME_DNR; } - if (pi && (ns->id_ns.lbaf[lbaf].ms < sizeof(NvmeDifTuple))) { + if (pi && (ns->id_ns.lbaf[lbaf].ms < nvme_pi_tuple_size(ns))) { return NVME_INVALID_FORMAT | NVME_DNR; } @@ -5586,6 +5716,12 @@ static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req) { NvmeFormatAIOCB *iocb; uint32_t nsid = le32_to_cpu(req->cmd.nsid); + uint32_t dw10 = le32_to_cpu(req->cmd.cdw10); + uint8_t lbaf = dw10 & 0xf; + uint8_t mset = (dw10 >> 4) & 0x1; + uint8_t pi = (dw10 >> 5) & 0x7; + uint8_t pil = (dw10 >> 8) & 0x1; + uint8_t lbafu = (dw10 >> 12) & 0x3; uint16_t status; iocb = qemu_aio_get(&nvme_format_aiocb_info, NULL, nvme_misc_cb, req); @@ -5595,9 +5731,17 @@ static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req) iocb->ret = 0; iocb->ns = NULL; iocb->nsid = 0; + iocb->lbaf = lbaf; + iocb->mset = mset; + iocb->pi = pi; + iocb->pil = pil; iocb->broadcast = (nsid == NVME_NSID_BROADCAST); iocb->offset = 0; + if (n->features.hbs.lbafee) { + iocb->lbaf |= lbafu << 4; + } + if (!iocb->broadcast) { if (!nvme_nsid_valid(n, nsid)) { status = NVME_INVALID_NSID | NVME_DNR; @@ -6573,6 +6717,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) id->cntlid = cpu_to_le16(n->cntlid); id->oaes = cpu_to_le32(NVME_OAES_NS_ATTR); + id->ctratt |= cpu_to_le32(NVME_CTRATT_ELBAS); id->rab = 6; @@ -6627,7 +6772,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) */ id->vwc = NVME_VWC_NSID_BROADCAST_SUPPORT | NVME_VWC_PRESENT; - id->ocfs = cpu_to_le16(NVME_OCFS_COPY_FORMAT_0); + id->ocfs = cpu_to_le16(NVME_OCFS_COPY_FORMAT_0 | NVME_OCFS_COPY_FORMAT_1); id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN | NVME_CTRL_SGLS_BITBUCKET); diff --git a/hw/nvme/dif.c b/hw/nvme/dif.c index 5dbd18b2a4..62d885f83e 100644 --- a/hw/nvme/dif.c +++ b/hw/nvme/dif.c @@ -13,13 +13,16 @@ #include "sysemu/block-backend.h" #include "nvme.h" +#include "dif.h" #include "trace.h" uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba, - uint32_t reftag) + uint64_t reftag) { + uint64_t mask = ns->pif ? 0xffffffffffff : 0xffffffff; + if ((NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) == NVME_ID_NS_DPS_TYPE_1) && - (prinfo & NVME_PRINFO_PRCHK_REF) && (slba & 0xffffffff) != reftag) { + (prinfo & NVME_PRINFO_PRCHK_REF) && (slba & mask) != reftag) { return NVME_INVALID_PROT_INFO | NVME_DNR; } @@ -27,43 +30,58 @@ uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba, } /* from Linux kernel (crypto/crct10dif_common.c) */ -static uint16_t crc_t10dif(uint16_t crc, const unsigned char *buffer, - size_t len) +static uint16_t crc16_t10dif(uint16_t crc, const unsigned char *buffer, + size_t len) { unsigned int i; for (i = 0; i < len; i++) { - crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff]; + crc = (crc << 8) ^ crc16_t10dif_table[((crc >> 8) ^ buffer[i]) & 0xff]; } return crc; } -void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, - uint8_t *mbuf, size_t mlen, uint16_t apptag, - uint32_t *reftag) +/* from Linux kernel (lib/crc64.c) */ +static uint64_t crc64_nvme(uint64_t crc, const unsigned char *buffer, + size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) { + crc = (crc >> 8) ^ crc64_nvme_table[(crc & 0xff) ^ buffer[i]]; + } + + return crc ^ (uint64_t)~0; +} + +static void nvme_dif_pract_generate_dif_crc16(NvmeNamespace *ns, uint8_t *buf, + size_t len, uint8_t *mbuf, + size_t mlen, uint16_t apptag, + uint64_t *reftag) { uint8_t *end = buf + len; int16_t pil = 0; if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { - pil = ns->lbaf.ms - sizeof(NvmeDifTuple); + pil = ns->lbaf.ms - nvme_pi_tuple_size(ns); } - trace_pci_nvme_dif_pract_generate_dif(len, ns->lbasz, ns->lbasz + pil, - apptag, *reftag); + trace_pci_nvme_dif_pract_generate_dif_crc16(len, ns->lbasz, + ns->lbasz + pil, apptag, + *reftag); for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) { NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil); - uint16_t crc = crc_t10dif(0x0, buf, ns->lbasz); + uint16_t crc = crc16_t10dif(0x0, buf, ns->lbasz); if (pil) { - crc = crc_t10dif(crc, mbuf, pil); + crc = crc16_t10dif(crc, mbuf, pil); } - dif->guard = cpu_to_be16(crc); - dif->apptag = cpu_to_be16(apptag); - dif->reftag = cpu_to_be32(*reftag); + dif->g16.guard = cpu_to_be16(crc); + dif->g16.apptag = cpu_to_be16(apptag); + dif->g16.reftag = cpu_to_be32(*reftag); if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) { (*reftag)++; @@ -71,57 +89,114 @@ void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, } } -static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif, - uint8_t *buf, uint8_t *mbuf, size_t pil, - uint8_t prinfo, uint16_t apptag, - uint16_t appmask, uint32_t reftag) +static void nvme_dif_pract_generate_dif_crc64(NvmeNamespace *ns, uint8_t *buf, + size_t len, uint8_t *mbuf, + size_t mlen, uint16_t apptag, + uint64_t *reftag) +{ + uint8_t *end = buf + len; + int16_t pil = 0; + + if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { + pil = ns->lbaf.ms - 16; + } + + trace_pci_nvme_dif_pract_generate_dif_crc64(len, ns->lbasz, + ns->lbasz + pil, apptag, + *reftag); + + for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) { + NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil); + uint64_t crc = crc64_nvme(~0ULL, buf, ns->lbasz); + + if (pil) { + crc = crc64_nvme(crc, mbuf, pil); + } + + dif->g64.guard = cpu_to_be64(crc); + dif->g64.apptag = cpu_to_be16(apptag); + + dif->g64.sr[0] = *reftag >> 40; + dif->g64.sr[1] = *reftag >> 32; + dif->g64.sr[2] = *reftag >> 24; + dif->g64.sr[3] = *reftag >> 16; + dif->g64.sr[4] = *reftag >> 8; + dif->g64.sr[5] = *reftag; + + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) { + (*reftag)++; + } + } +} + +void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, + uint8_t *mbuf, size_t mlen, uint16_t apptag, + uint64_t *reftag) +{ + switch (ns->pif) { + case NVME_PI_GUARD_16: + return nvme_dif_pract_generate_dif_crc16(ns, buf, len, mbuf, mlen, + apptag, reftag); + case NVME_PI_GUARD_64: + return nvme_dif_pract_generate_dif_crc64(ns, buf, len, mbuf, mlen, + apptag, reftag); + } + + abort(); +} + +static uint16_t nvme_dif_prchk_crc16(NvmeNamespace *ns, NvmeDifTuple *dif, + uint8_t *buf, uint8_t *mbuf, size_t pil, + uint8_t prinfo, uint16_t apptag, + uint16_t appmask, uint64_t reftag) { switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { case NVME_ID_NS_DPS_TYPE_3: - if (be32_to_cpu(dif->reftag) != 0xffffffff) { + if (be32_to_cpu(dif->g16.reftag) != 0xffffffff) { break; } /* fallthrough */ case NVME_ID_NS_DPS_TYPE_1: case NVME_ID_NS_DPS_TYPE_2: - if (be16_to_cpu(dif->apptag) != 0xffff) { + if (be16_to_cpu(dif->g16.apptag) != 0xffff) { break; } - trace_pci_nvme_dif_prchk_disabled(be16_to_cpu(dif->apptag), - be32_to_cpu(dif->reftag)); + trace_pci_nvme_dif_prchk_disabled_crc16(be16_to_cpu(dif->g16.apptag), + be32_to_cpu(dif->g16.reftag)); return NVME_SUCCESS; } if (prinfo & NVME_PRINFO_PRCHK_GUARD) { - uint16_t crc = crc_t10dif(0x0, buf, ns->lbasz); + uint16_t crc = crc16_t10dif(0x0, buf, ns->lbasz); if (pil) { - crc = crc_t10dif(crc, mbuf, pil); + crc = crc16_t10dif(crc, mbuf, pil); } - trace_pci_nvme_dif_prchk_guard(be16_to_cpu(dif->guard), crc); + trace_pci_nvme_dif_prchk_guard_crc16(be16_to_cpu(dif->g16.guard), crc); - if (be16_to_cpu(dif->guard) != crc) { + if (be16_to_cpu(dif->g16.guard) != crc) { return NVME_E2E_GUARD_ERROR; } } if (prinfo & NVME_PRINFO_PRCHK_APP) { - trace_pci_nvme_dif_prchk_apptag(be16_to_cpu(dif->apptag), apptag, + trace_pci_nvme_dif_prchk_apptag(be16_to_cpu(dif->g16.apptag), apptag, appmask); - if ((be16_to_cpu(dif->apptag) & appmask) != (apptag & appmask)) { + if ((be16_to_cpu(dif->g16.apptag) & appmask) != (apptag & appmask)) { return NVME_E2E_APP_ERROR; } } if (prinfo & NVME_PRINFO_PRCHK_REF) { - trace_pci_nvme_dif_prchk_reftag(be32_to_cpu(dif->reftag), reftag); + trace_pci_nvme_dif_prchk_reftag_crc16(be32_to_cpu(dif->g16.reftag), + reftag); - if (be32_to_cpu(dif->reftag) != reftag) { + if (be32_to_cpu(dif->g16.reftag) != reftag) { return NVME_E2E_REF_ERROR; } } @@ -129,12 +204,96 @@ static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif, return NVME_SUCCESS; } +static uint16_t nvme_dif_prchk_crc64(NvmeNamespace *ns, NvmeDifTuple *dif, + uint8_t *buf, uint8_t *mbuf, size_t pil, + uint8_t prinfo, uint16_t apptag, + uint16_t appmask, uint64_t reftag) +{ + uint64_t r = 0; + + r |= (uint64_t)dif->g64.sr[0] << 40; + r |= (uint64_t)dif->g64.sr[1] << 32; + r |= (uint64_t)dif->g64.sr[2] << 24; + r |= (uint64_t)dif->g64.sr[3] << 16; + r |= (uint64_t)dif->g64.sr[4] << 8; + r |= (uint64_t)dif->g64.sr[5]; + + switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + case NVME_ID_NS_DPS_TYPE_3: + if (r != 0xffffffffffff) { + break; + } + + /* fallthrough */ + case NVME_ID_NS_DPS_TYPE_1: + case NVME_ID_NS_DPS_TYPE_2: + if (be16_to_cpu(dif->g64.apptag) != 0xffff) { + break; + } + + trace_pci_nvme_dif_prchk_disabled_crc64(be16_to_cpu(dif->g16.apptag), + r); + + return NVME_SUCCESS; + } + + if (prinfo & NVME_PRINFO_PRCHK_GUARD) { + uint64_t crc = crc64_nvme(~0ULL, buf, ns->lbasz); + + if (pil) { + crc = crc64_nvme(crc, mbuf, pil); + } + + trace_pci_nvme_dif_prchk_guard_crc64(be64_to_cpu(dif->g64.guard), crc); + + if (be64_to_cpu(dif->g64.guard) != crc) { + return NVME_E2E_GUARD_ERROR; + } + } + + if (prinfo & NVME_PRINFO_PRCHK_APP) { + trace_pci_nvme_dif_prchk_apptag(be16_to_cpu(dif->g64.apptag), apptag, + appmask); + + if ((be16_to_cpu(dif->g64.apptag) & appmask) != (apptag & appmask)) { + return NVME_E2E_APP_ERROR; + } + } + + if (prinfo & NVME_PRINFO_PRCHK_REF) { + trace_pci_nvme_dif_prchk_reftag_crc64(r, reftag); + + if (r != reftag) { + return NVME_E2E_REF_ERROR; + } + } + + return NVME_SUCCESS; +} + +static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif, + uint8_t *buf, uint8_t *mbuf, size_t pil, + uint8_t prinfo, uint16_t apptag, + uint16_t appmask, uint64_t reftag) +{ + switch (ns->pif) { + case NVME_PI_GUARD_16: + return nvme_dif_prchk_crc16(ns, dif, buf, mbuf, pil, prinfo, apptag, + appmask, reftag); + case NVME_PI_GUARD_64: + return nvme_dif_prchk_crc64(ns, dif, buf, mbuf, pil, prinfo, apptag, + appmask, reftag); + } + + abort(); +} + uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, uint8_t *mbuf, size_t mlen, uint8_t prinfo, uint64_t slba, uint16_t apptag, - uint16_t appmask, uint32_t *reftag) + uint16_t appmask, uint64_t *reftag) { - uint8_t *end = buf + len; + uint8_t *bufp, *end = buf + len; int16_t pil = 0; uint16_t status; @@ -144,18 +303,34 @@ uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, } if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { - pil = ns->lbaf.ms - sizeof(NvmeDifTuple); + pil = ns->lbaf.ms - nvme_pi_tuple_size(ns); } trace_pci_nvme_dif_check(prinfo, ns->lbasz + pil); - for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) { + for (bufp = buf; bufp < end; bufp += ns->lbasz, mbuf += ns->lbaf.ms) { NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil); - - status = nvme_dif_prchk(ns, dif, buf, mbuf, pil, prinfo, apptag, + status = nvme_dif_prchk(ns, dif, bufp, mbuf, pil, prinfo, apptag, appmask, *reftag); if (status) { - return status; + /* + * The first block of a 'raw' image is always allocated, so we + * cannot reliably know if the block is all zeroes or not. For + * CRC16 this works fine because the T10 CRC16 is 0x0 for all + * zeroes, but the Rocksoft CRC64 is not. Thus, if a guard error is + * detected for the first block, check if it is zeroed and manually + * set the protection information to all ones to disable protection + * information checking. + */ + if (status == NVME_E2E_GUARD_ERROR && slba == 0x0 && bufp == buf) { + g_autofree uint8_t *zeroes = g_malloc0(ns->lbasz); + + if (memcmp(bufp, zeroes, ns->lbasz) == 0) { + memset(mbuf + pil, 0xff, nvme_pi_tuple_size(ns)); + } + } else { + return status; + } } if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) { @@ -183,7 +358,7 @@ uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen, if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { - pil = ns->lbaf.ms - sizeof(NvmeDifTuple); + pil = ns->lbaf.ms - nvme_pi_tuple_size(ns); } do { @@ -209,7 +384,7 @@ uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen, end = mbufp + mlen; for (; mbufp < end; mbufp += ns->lbaf.ms) { - memset(mbufp + pil, 0xff, sizeof(NvmeDifTuple)); + memset(mbufp + pil, 0xff, nvme_pi_tuple_size(ns)); } } @@ -251,9 +426,12 @@ static void nvme_dif_rw_check_cb(void *opaque, int ret) uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control)); uint16_t apptag = le16_to_cpu(rw->apptag); uint16_t appmask = le16_to_cpu(rw->appmask); - uint32_t reftag = le32_to_cpu(rw->reftag); + uint64_t reftag = le32_to_cpu(rw->reftag); + uint64_t cdw3 = le32_to_cpu(rw->cdw3); uint16_t status; + reftag |= cdw3 << 32; + trace_pci_nvme_dif_rw_check_cb(nvme_cid(req), prinfo, apptag, appmask, reftag); @@ -283,7 +461,7 @@ static void nvme_dif_rw_check_cb(void *opaque, int ret) goto out; } - if (prinfo & NVME_PRINFO_PRACT && ns->lbaf.ms == 8) { + if (prinfo & NVME_PRINFO_PRACT && ns->lbaf.ms == nvme_pi_tuple_size(ns)) { goto out; } @@ -367,11 +545,14 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req) uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control)); uint16_t apptag = le16_to_cpu(rw->apptag); uint16_t appmask = le16_to_cpu(rw->appmask); - uint32_t reftag = le32_to_cpu(rw->reftag); + uint64_t reftag = le32_to_cpu(rw->reftag); + uint64_t cdw3 = le32_to_cpu(rw->cdw3); bool pract = !!(prinfo & NVME_PRINFO_PRACT); NvmeBounceContext *ctx; uint16_t status; + reftag |= cdw3 << 32; + trace_pci_nvme_dif_rw(pract, prinfo); ctx = g_new0(NvmeBounceContext, 1); @@ -387,7 +568,7 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req) if (pract) { uint8_t *mbuf, *end; - int16_t pil = ns->lbaf.ms - sizeof(NvmeDifTuple); + int16_t pil = ns->lbaf.ms - nvme_pi_tuple_size(ns); status = nvme_check_prinfo(ns, prinfo, slba, reftag); if (status) { @@ -411,8 +592,29 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req) for (; mbuf < end; mbuf += ns->lbaf.ms) { NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil); - dif->apptag = cpu_to_be16(apptag); - dif->reftag = cpu_to_be32(reftag); + switch (ns->pif) { + case NVME_PI_GUARD_16: + dif->g16.apptag = cpu_to_be16(apptag); + dif->g16.reftag = cpu_to_be32(reftag); + + break; + + case NVME_PI_GUARD_64: + dif->g64.guard = cpu_to_be64(0x6482d367eb22b64e); + dif->g64.apptag = cpu_to_be16(apptag); + + dif->g64.sr[0] = reftag >> 40; + dif->g64.sr[1] = reftag >> 32; + dif->g64.sr[2] = reftag >> 24; + dif->g64.sr[3] = reftag >> 16; + dif->g64.sr[4] = reftag >> 8; + dif->g64.sr[5] = reftag; + + break; + + default: + abort(); + } switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { case NVME_ID_NS_DPS_TYPE_1: @@ -427,7 +629,7 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req) return NVME_NO_COMPLETE; } - if (nvme_ns_ext(ns) && !(pract && ns->lbaf.ms == 8)) { + if (nvme_ns_ext(ns) && !(pract && ns->lbaf.ms == nvme_pi_tuple_size(ns))) { mapped_len += mlen; } @@ -461,7 +663,7 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req) qemu_iovec_init(&ctx->mdata.iov, 1); qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen); - if (!(pract && ns->lbaf.ms == 8)) { + if (!(pract && ns->lbaf.ms == nvme_pi_tuple_size(ns))) { status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size, NVME_TX_DIRECTION_TO_DEVICE, req); if (status) { diff --git a/hw/nvme/dif.h b/hw/nvme/dif.h new file mode 100644 index 0000000000..f12e312250 --- /dev/null +++ b/hw/nvme/dif.h @@ -0,0 +1,191 @@ +#ifndef HW_NVME_DIF_H +#define HW_NVME_DIF_H + +/* from Linux kernel (crypto/crct10dif_common.c) */ +static const uint16_t crc16_t10dif_table[256] = { + 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B, + 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6, + 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6, + 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B, + 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1, + 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C, + 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C, + 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781, + 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8, + 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255, + 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925, + 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698, + 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472, + 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF, + 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF, + 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02, + 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA, + 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067, + 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17, + 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA, + 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640, + 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD, + 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D, + 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30, + 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759, + 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4, + 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394, + 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29, + 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3, + 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E, + 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E, + 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3 +}; + +#define CRC64_NVME_POLY 0x9A6C9329AC4BC9B5ULL + +static const uint64_t crc64_nvme_table[] = { + 0x0000000000000000ULL, 0x7F6EF0C830358979ULL, + 0xFEDDE190606B12F2ULL, 0x81B31158505E9B8BULL, + 0xC962E5739841B68FULL, 0xB60C15BBA8743FF6ULL, + 0x37BF04E3F82AA47DULL, 0x48D1F42BC81F2D04ULL, + 0xA61CECB46814FE75ULL, 0xD9721C7C5821770CULL, + 0x58C10D24087FEC87ULL, 0x27AFFDEC384A65FEULL, + 0x6F7E09C7F05548FAULL, 0x1010F90FC060C183ULL, + 0x91A3E857903E5A08ULL, 0xEECD189FA00BD371ULL, + 0x78E0FF3B88BE6F81ULL, 0x078E0FF3B88BE6F8ULL, + 0x863D1EABE8D57D73ULL, 0xF953EE63D8E0F40AULL, + 0xB1821A4810FFD90EULL, 0xCEECEA8020CA5077ULL, + 0x4F5FFBD87094CBFCULL, 0x30310B1040A14285ULL, + 0xDEFC138FE0AA91F4ULL, 0xA192E347D09F188DULL, + 0x2021F21F80C18306ULL, 0x5F4F02D7B0F40A7FULL, + 0x179EF6FC78EB277BULL, 0x68F0063448DEAE02ULL, + 0xE943176C18803589ULL, 0x962DE7A428B5BCF0ULL, + 0xF1C1FE77117CDF02ULL, 0x8EAF0EBF2149567BULL, + 0x0F1C1FE77117CDF0ULL, 0x7072EF2F41224489ULL, + 0x38A31B04893D698DULL, 0x47CDEBCCB908E0F4ULL, + 0xC67EFA94E9567B7FULL, 0xB9100A5CD963F206ULL, + 0x57DD12C379682177ULL, 0x28B3E20B495DA80EULL, + 0xA900F35319033385ULL, 0xD66E039B2936BAFCULL, + 0x9EBFF7B0E12997F8ULL, 0xE1D10778D11C1E81ULL, + 0x606216208142850AULL, 0x1F0CE6E8B1770C73ULL, + 0x8921014C99C2B083ULL, 0xF64FF184A9F739FAULL, + 0x77FCE0DCF9A9A271ULL, 0x08921014C99C2B08ULL, + 0x4043E43F0183060CULL, 0x3F2D14F731B68F75ULL, + 0xBE9E05AF61E814FEULL, 0xC1F0F56751DD9D87ULL, + 0x2F3DEDF8F1D64EF6ULL, 0x50531D30C1E3C78FULL, + 0xD1E00C6891BD5C04ULL, 0xAE8EFCA0A188D57DULL, + 0xE65F088B6997F879ULL, 0x9931F84359A27100ULL, + 0x1882E91B09FCEA8BULL, 0x67EC19D339C963F2ULL, + 0xD75ADABD7A6E2D6FULL, 0xA8342A754A5BA416ULL, + 0x29873B2D1A053F9DULL, 0x56E9CBE52A30B6E4ULL, + 0x1E383FCEE22F9BE0ULL, 0x6156CF06D21A1299ULL, + 0xE0E5DE5E82448912ULL, 0x9F8B2E96B271006BULL, + 0x71463609127AD31AULL, 0x0E28C6C1224F5A63ULL, + 0x8F9BD7997211C1E8ULL, 0xF0F5275142244891ULL, + 0xB824D37A8A3B6595ULL, 0xC74A23B2BA0EECECULL, + 0x46F932EAEA507767ULL, 0x3997C222DA65FE1EULL, + 0xAFBA2586F2D042EEULL, 0xD0D4D54EC2E5CB97ULL, + 0x5167C41692BB501CULL, 0x2E0934DEA28ED965ULL, + 0x66D8C0F56A91F461ULL, 0x19B6303D5AA47D18ULL, + 0x980521650AFAE693ULL, 0xE76BD1AD3ACF6FEAULL, + 0x09A6C9329AC4BC9BULL, 0x76C839FAAAF135E2ULL, + 0xF77B28A2FAAFAE69ULL, 0x8815D86ACA9A2710ULL, + 0xC0C42C4102850A14ULL, 0xBFAADC8932B0836DULL, + 0x3E19CDD162EE18E6ULL, 0x41773D1952DB919FULL, + 0x269B24CA6B12F26DULL, 0x59F5D4025B277B14ULL, + 0xD846C55A0B79E09FULL, 0xA72835923B4C69E6ULL, + 0xEFF9C1B9F35344E2ULL, 0x90973171C366CD9BULL, + 0x1124202993385610ULL, 0x6E4AD0E1A30DDF69ULL, + 0x8087C87E03060C18ULL, 0xFFE938B633338561ULL, + 0x7E5A29EE636D1EEAULL, 0x0134D92653589793ULL, + 0x49E52D0D9B47BA97ULL, 0x368BDDC5AB7233EEULL, + 0xB738CC9DFB2CA865ULL, 0xC8563C55CB19211CULL, + 0x5E7BDBF1E3AC9DECULL, 0x21152B39D3991495ULL, + 0xA0A63A6183C78F1EULL, 0xDFC8CAA9B3F20667ULL, + 0x97193E827BED2B63ULL, 0xE877CE4A4BD8A21AULL, + 0x69C4DF121B863991ULL, 0x16AA2FDA2BB3B0E8ULL, + 0xF86737458BB86399ULL, 0x8709C78DBB8DEAE0ULL, + 0x06BAD6D5EBD3716BULL, 0x79D4261DDBE6F812ULL, + 0x3105D23613F9D516ULL, 0x4E6B22FE23CC5C6FULL, + 0xCFD833A67392C7E4ULL, 0xB0B6C36E43A74E9DULL, + 0x9A6C9329AC4BC9B5ULL, 0xE50263E19C7E40CCULL, + 0x64B172B9CC20DB47ULL, 0x1BDF8271FC15523EULL, + 0x530E765A340A7F3AULL, 0x2C608692043FF643ULL, + 0xADD397CA54616DC8ULL, 0xD2BD67026454E4B1ULL, + 0x3C707F9DC45F37C0ULL, 0x431E8F55F46ABEB9ULL, + 0xC2AD9E0DA4342532ULL, 0xBDC36EC59401AC4BULL, + 0xF5129AEE5C1E814FULL, 0x8A7C6A266C2B0836ULL, + 0x0BCF7B7E3C7593BDULL, 0x74A18BB60C401AC4ULL, + 0xE28C6C1224F5A634ULL, 0x9DE29CDA14C02F4DULL, + 0x1C518D82449EB4C6ULL, 0x633F7D4A74AB3DBFULL, + 0x2BEE8961BCB410BBULL, 0x548079A98C8199C2ULL, + 0xD53368F1DCDF0249ULL, 0xAA5D9839ECEA8B30ULL, + 0x449080A64CE15841ULL, 0x3BFE706E7CD4D138ULL, + 0xBA4D61362C8A4AB3ULL, 0xC52391FE1CBFC3CAULL, + 0x8DF265D5D4A0EECEULL, 0xF29C951DE49567B7ULL, + 0x732F8445B4CBFC3CULL, 0x0C41748D84FE7545ULL, + 0x6BAD6D5EBD3716B7ULL, 0x14C39D968D029FCEULL, + 0x95708CCEDD5C0445ULL, 0xEA1E7C06ED698D3CULL, + 0xA2CF882D2576A038ULL, 0xDDA178E515432941ULL, + 0x5C1269BD451DB2CAULL, 0x237C997575283BB3ULL, + 0xCDB181EAD523E8C2ULL, 0xB2DF7122E51661BBULL, + 0x336C607AB548FA30ULL, 0x4C0290B2857D7349ULL, + 0x04D364994D625E4DULL, 0x7BBD94517D57D734ULL, + 0xFA0E85092D094CBFULL, 0x856075C11D3CC5C6ULL, + 0x134D926535897936ULL, 0x6C2362AD05BCF04FULL, + 0xED9073F555E26BC4ULL, 0x92FE833D65D7E2BDULL, + 0xDA2F7716ADC8CFB9ULL, 0xA54187DE9DFD46C0ULL, + 0x24F29686CDA3DD4BULL, 0x5B9C664EFD965432ULL, + 0xB5517ED15D9D8743ULL, 0xCA3F8E196DA80E3AULL, + 0x4B8C9F413DF695B1ULL, 0x34E26F890DC31CC8ULL, + 0x7C339BA2C5DC31CCULL, 0x035D6B6AF5E9B8B5ULL, + 0x82EE7A32A5B7233EULL, 0xFD808AFA9582AA47ULL, + 0x4D364994D625E4DAULL, 0x3258B95CE6106DA3ULL, + 0xB3EBA804B64EF628ULL, 0xCC8558CC867B7F51ULL, + 0x8454ACE74E645255ULL, 0xFB3A5C2F7E51DB2CULL, + 0x7A894D772E0F40A7ULL, 0x05E7BDBF1E3AC9DEULL, + 0xEB2AA520BE311AAFULL, 0x944455E88E0493D6ULL, + 0x15F744B0DE5A085DULL, 0x6A99B478EE6F8124ULL, + 0x224840532670AC20ULL, 0x5D26B09B16452559ULL, + 0xDC95A1C3461BBED2ULL, 0xA3FB510B762E37ABULL, + 0x35D6B6AF5E9B8B5BULL, 0x4AB846676EAE0222ULL, + 0xCB0B573F3EF099A9ULL, 0xB465A7F70EC510D0ULL, + 0xFCB453DCC6DA3DD4ULL, 0x83DAA314F6EFB4ADULL, + 0x0269B24CA6B12F26ULL, 0x7D0742849684A65FULL, + 0x93CA5A1B368F752EULL, 0xECA4AAD306BAFC57ULL, + 0x6D17BB8B56E467DCULL, 0x12794B4366D1EEA5ULL, + 0x5AA8BF68AECEC3A1ULL, 0x25C64FA09EFB4AD8ULL, + 0xA4755EF8CEA5D153ULL, 0xDB1BAE30FE90582AULL, + 0xBCF7B7E3C7593BD8ULL, 0xC399472BF76CB2A1ULL, + 0x422A5673A732292AULL, 0x3D44A6BB9707A053ULL, + 0x759552905F188D57ULL, 0x0AFBA2586F2D042EULL, + 0x8B48B3003F739FA5ULL, 0xF42643C80F4616DCULL, + 0x1AEB5B57AF4DC5ADULL, 0x6585AB9F9F784CD4ULL, + 0xE436BAC7CF26D75FULL, 0x9B584A0FFF135E26ULL, + 0xD389BE24370C7322ULL, 0xACE74EEC0739FA5BULL, + 0x2D545FB4576761D0ULL, 0x523AAF7C6752E8A9ULL, + 0xC41748D84FE75459ULL, 0xBB79B8107FD2DD20ULL, + 0x3ACAA9482F8C46ABULL, 0x45A459801FB9CFD2ULL, + 0x0D75ADABD7A6E2D6ULL, 0x721B5D63E7936BAFULL, + 0xF3A84C3BB7CDF024ULL, 0x8CC6BCF387F8795DULL, + 0x620BA46C27F3AA2CULL, 0x1D6554A417C62355ULL, + 0x9CD645FC4798B8DEULL, 0xE3B8B53477AD31A7ULL, + 0xAB69411FBFB21CA3ULL, 0xD407B1D78F8795DAULL, + 0x55B4A08FDFD90E51ULL, 0x2ADA5047EFEC8728ULL, +}; + +static inline size_t nvme_pi_tuple_size(NvmeNamespace *ns) +{ + return ns->pif ? 16 : 8; +} + +uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba, + uint64_t reftag); +uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen, + uint64_t slba); +void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, + uint8_t *mbuf, size_t mlen, uint16_t apptag, + uint64_t *reftag); +uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, + uint8_t *mbuf, size_t mlen, uint8_t prinfo, + uint64_t slba, uint16_t apptag, + uint16_t appmask, uint64_t *reftag); +uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req); + +#endif /* HW_NVME_DIF_H */ diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c index ee673f1a5b..8a3613d9ab 100644 --- a/hw/nvme/ns.c +++ b/hw/nvme/ns.c @@ -58,6 +58,7 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp) { static uint64_t ns_count; NvmeIdNs *id_ns = &ns->id_ns; + NvmeIdNsNvm *id_ns_nvm = &ns->id_ns_nvm; uint8_t ds; uint16_t ms; int i; @@ -101,6 +102,8 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp) id_ns->dps |= NVME_ID_NS_DPS_FIRST_EIGHT; } + ns->pif = ns->params.pif; + static const NvmeLBAF lbaf[16] = { [0] = { .ds = 9 }, [1] = { .ds = 9, .ms = 8 }, @@ -112,10 +115,11 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp) [7] = { .ds = 12, .ms = 64 }, }; + ns->nlbaf = 8; + memcpy(&id_ns->lbaf, &lbaf, sizeof(lbaf)); - id_ns->nlbaf = 7; - for (i = 0; i <= id_ns->nlbaf; i++) { + for (i = 0; i < ns->nlbaf; i++) { NvmeLBAF *lbaf = &id_ns->lbaf[i]; if (lbaf->ds == ds) { if (lbaf->ms == ms) { @@ -126,12 +130,16 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp) } /* add non-standard lba format */ - id_ns->nlbaf++; - id_ns->lbaf[id_ns->nlbaf].ds = ds; - id_ns->lbaf[id_ns->nlbaf].ms = ms; - id_ns->flbas |= id_ns->nlbaf; + id_ns->lbaf[ns->nlbaf].ds = ds; + id_ns->lbaf[ns->nlbaf].ms = ms; + ns->nlbaf++; + + id_ns->flbas |= i; + lbaf_found: + id_ns_nvm->elbaf[i] = (ns->pif & 0x3) << 7; + id_ns->nlbaf = ns->nlbaf - 1; nvme_ns_init_format(ns); return 0; @@ -370,15 +378,36 @@ static void nvme_zoned_ns_shutdown(NvmeNamespace *ns) static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp) { + unsigned int pi_size; + if (!ns->blkconf.blk) { error_setg(errp, "block backend not configured"); return -1; } - if (ns->params.pi && ns->params.ms < 8) { - error_setg(errp, "at least 8 bytes of metadata required to enable " - "protection information"); - return -1; + if (ns->params.pi) { + if (ns->params.pi > NVME_ID_NS_DPS_TYPE_3) { + error_setg(errp, "invalid 'pi' value"); + return -1; + } + + switch (ns->params.pif) { + case NVME_PI_GUARD_16: + pi_size = 8; + break; + case NVME_PI_GUARD_64: + pi_size = 16; + break; + default: + error_setg(errp, "invalid 'pif'"); + return -1; + } + + if (ns->params.ms < pi_size) { + error_setg(errp, "at least %u bytes of metadata required to " + "enable protection information", pi_size); + return -1; + } } if (ns->params.nsid > NVME_MAX_NAMESPACES) { @@ -590,6 +619,7 @@ static Property nvme_ns_props[] = { DEFINE_PROP_UINT8("mset", NvmeNamespace, params.mset, 0), DEFINE_PROP_UINT8("pi", NvmeNamespace, params.pi, 0), DEFINE_PROP_UINT8("pil", NvmeNamespace, params.pil, 0), + DEFINE_PROP_UINT8("pif", NvmeNamespace, params.pif, 0), DEFINE_PROP_UINT16("mssrl", NvmeNamespace, params.mssrl, 128), DEFINE_PROP_UINT32("mcl", NvmeNamespace, params.mcl, 128), DEFINE_PROP_UINT8("msrc", NvmeNamespace, params.msrc, 127), diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index 90c0bb7ce2..739c8b8f79 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -102,6 +102,7 @@ typedef struct NvmeNamespaceParams { uint8_t mset; uint8_t pi; uint8_t pil; + uint8_t pif; uint16_t mssrl; uint32_t mcl; @@ -127,12 +128,15 @@ typedef struct NvmeNamespace { int64_t size; int64_t moff; NvmeIdNs id_ns; + NvmeIdNsNvm id_ns_nvm; NvmeLBAF lbaf; + unsigned int nlbaf; size_t lbasz; const uint32_t *iocs; uint8_t csi; uint16_t status; int attached; + uint8_t pif; struct { uint16_t zrwas; @@ -468,7 +472,9 @@ typedef struct NvmeCtrl { uint16_t temp_thresh_hi; uint16_t temp_thresh_low; }; - uint32_t async_config; + + uint32_t async_config; + NvmeHostBehaviorSupport hbs; } features; } NvmeCtrl; @@ -513,54 +519,4 @@ void nvme_rw_complete_cb(void *opaque, int ret); uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len, NvmeCmd *cmd); -/* from Linux kernel (crypto/crct10dif_common.c) */ -static const uint16_t t10_dif_crc_table[256] = { - 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B, - 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6, - 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6, - 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B, - 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1, - 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C, - 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C, - 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781, - 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8, - 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255, - 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925, - 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698, - 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472, - 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF, - 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF, - 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02, - 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA, - 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067, - 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17, - 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA, - 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640, - 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD, - 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D, - 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30, - 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759, - 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4, - 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394, - 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29, - 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3, - 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E, - 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E, - 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3 -}; - -uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba, - uint32_t reftag); -uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen, - uint64_t slba); -void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, - uint8_t *mbuf, size_t mlen, uint16_t apptag, - uint32_t *reftag); -uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, - uint8_t *mbuf, size_t mlen, uint8_t prinfo, - uint64_t slba, uint16_t apptag, - uint16_t appmask, uint32_t *reftag); -uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req); - - #endif /* HW_NVME_INTERNAL_H */ diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events index 90730d802f..ff1b458969 100644 --- a/hw/nvme/trace-events +++ b/hw/nvme/trace-events @@ -20,12 +20,16 @@ pci_nvme_dif_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" pci_nvme_dif_rw_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" pci_nvme_dif_rw_mdata_out_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" pci_nvme_dif_rw_check_cb(uint16_t cid, uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) "cid %"PRIu16" prinfo 0x%"PRIx8" apptag 0x%"PRIx16" appmask 0x%"PRIx16" reftag 0x%"PRIx32"" -pci_nvme_dif_pract_generate_dif(size_t len, size_t lba_size, size_t chksum_len, uint16_t apptag, uint32_t reftag) "len %zu lba_size %zu chksum_len %zu apptag 0x%"PRIx16" reftag 0x%"PRIx32"" +pci_nvme_dif_pract_generate_dif_crc16(size_t len, size_t lba_size, size_t chksum_len, uint16_t apptag, uint32_t reftag) "len %zu lba_size %zu chksum_len %zu apptag 0x%"PRIx16" reftag 0x%"PRIx32"" +pci_nvme_dif_pract_generate_dif_crc64(size_t len, size_t lba_size, size_t chksum_len, uint16_t apptag, uint64_t reftag) "len %zu lba_size %zu chksum_len %zu apptag 0x%"PRIx16" reftag 0x%"PRIx64"" pci_nvme_dif_check(uint8_t prinfo, uint16_t chksum_len) "prinfo 0x%"PRIx8" chksum_len %"PRIu16"" -pci_nvme_dif_prchk_disabled(uint16_t apptag, uint32_t reftag) "apptag 0x%"PRIx16" reftag 0x%"PRIx32"" -pci_nvme_dif_prchk_guard(uint16_t guard, uint16_t crc) "guard 0x%"PRIx16" crc 0x%"PRIx16"" +pci_nvme_dif_prchk_disabled_crc16(uint16_t apptag, uint32_t reftag) "apptag 0x%"PRIx16" reftag 0x%"PRIx32"" +pci_nvme_dif_prchk_disabled_crc64(uint16_t apptag, uint64_t reftag) "apptag 0x%"PRIx16" reftag 0x%"PRIx64"" +pci_nvme_dif_prchk_guard_crc16(uint16_t guard, uint16_t crc) "guard 0x%"PRIx16" crc 0x%"PRIx16"" +pci_nvme_dif_prchk_guard_crc64(uint64_t guard, uint64_t crc) "guard 0x%"PRIx64" crc 0x%"PRIx64"" pci_nvme_dif_prchk_apptag(uint16_t apptag, uint16_t elbat, uint16_t elbatm) "apptag 0x%"PRIx16" elbat 0x%"PRIx16" elbatm 0x%"PRIx16"" -pci_nvme_dif_prchk_reftag(uint32_t reftag, uint32_t elbrt) "reftag 0x%"PRIx32" elbrt 0x%"PRIx32"" +pci_nvme_dif_prchk_reftag_crc16(uint32_t reftag, uint32_t elbrt) "reftag 0x%"PRIx32" elbrt 0x%"PRIx32"" +pci_nvme_dif_prchk_reftag_crc64(uint64_t reftag, uint64_t elbrt) "reftag 0x%"PRIx64" elbrt 0x%"PRIx64"" pci_nvme_copy(uint16_t cid, uint32_t nsid, uint16_t nr, uint8_t format) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu16" format 0x%"PRIx8"" pci_nvme_copy_source_range(uint64_t slba, uint32_t nlb) "slba 0x%"PRIx64" nlb %"PRIu32"" pci_nvme_copy_out(uint64_t slba, uint32_t nlb) "slba 0x%"PRIx64" nlb %"PRIu32"" diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c index fbfdf47e26..18b43be7f6 100644 --- a/hw/nvram/spapr_nvram.c +++ b/hw/nvram/spapr_nvram.c @@ -219,7 +219,7 @@ static void postload_update_cb(void *opaque, bool running, RunState state) { SpaprNvram *nvram = opaque; - /* This is called after bdrv_invalidate_cache_all. */ + /* This is called after bdrv_activate_all. */ qemu_del_vm_change_state_handler(nvram->vmstate); nvram->vmstate = NULL; diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 4cc204f90d..953fc65fa8 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -27,6 +27,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "qemu/datadir.h" +#include "qemu/memalign.h" #include "qapi/error.h" #include "qapi/qapi-events-machine.h" #include "qapi/qapi-events-qdev.h" diff --git a/hw/ppc/spapr_softmmu.c b/hw/ppc/spapr_softmmu.c index 4ee03c83e4..5170a33369 100644 --- a/hw/ppc/spapr_softmmu.c +++ b/hw/ppc/spapr_softmmu.c @@ -1,5 +1,6 @@ #include "qemu/osdep.h" #include "qemu/cutils.h" +#include "qemu/memalign.h" #include "cpu.h" #include "helper_regs.h" #include "hw/ppc/spapr.h" diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig index d2d869aaad..91bb9d21c4 100644 --- a/hw/riscv/Kconfig +++ b/hw/riscv/Kconfig @@ -42,6 +42,8 @@ config RISCV_VIRT select PFLASH_CFI01 select SERIAL select RISCV_ACLINT + select RISCV_APLIC + select RISCV_IMSIC select SIFIVE_PLIC select SIFIVE_TEST select VIRTIO_MMIO diff --git a/hw/riscv/opentitan.c b/hw/riscv/opentitan.c index aec7cfa33f..833624d66c 100644 --- a/hw/riscv/opentitan.c +++ b/hw/riscv/opentitan.c @@ -34,13 +34,15 @@ static const MemMapEntry ibex_memmap[] = { [IBEX_DEV_FLASH] = { 0x20000000, 0x80000 }, [IBEX_DEV_UART] = { 0x40000000, 0x1000 }, [IBEX_DEV_GPIO] = { 0x40040000, 0x1000 }, - [IBEX_DEV_SPI] = { 0x40050000, 0x1000 }, + [IBEX_DEV_SPI_DEVICE] = { 0x40050000, 0x1000 }, [IBEX_DEV_I2C] = { 0x40080000, 0x1000 }, [IBEX_DEV_PATTGEN] = { 0x400e0000, 0x1000 }, [IBEX_DEV_TIMER] = { 0x40100000, 0x1000 }, [IBEX_DEV_SENSOR_CTRL] = { 0x40110000, 0x1000 }, [IBEX_DEV_OTP_CTRL] = { 0x40130000, 0x4000 }, [IBEX_DEV_USBDEV] = { 0x40150000, 0x1000 }, + [IBEX_DEV_SPI_HOST0] = { 0x40300000, 0x1000 }, + [IBEX_DEV_SPI_HOST1] = { 0x40310000, 0x1000 }, [IBEX_DEV_PWRMGR] = { 0x40400000, 0x1000 }, [IBEX_DEV_RSTMGR] = { 0x40410000, 0x1000 }, [IBEX_DEV_CLKMGR] = { 0x40420000, 0x1000 }, @@ -209,8 +211,12 @@ static void lowrisc_ibex_soc_realize(DeviceState *dev_soc, Error **errp) create_unimplemented_device("riscv.lowrisc.ibex.gpio", memmap[IBEX_DEV_GPIO].base, memmap[IBEX_DEV_GPIO].size); - create_unimplemented_device("riscv.lowrisc.ibex.spi", - memmap[IBEX_DEV_SPI].base, memmap[IBEX_DEV_SPI].size); + create_unimplemented_device("riscv.lowrisc.ibex.spi_device", + memmap[IBEX_DEV_SPI_DEVICE].base, memmap[IBEX_DEV_SPI_DEVICE].size); + create_unimplemented_device("riscv.lowrisc.ibex.spi_host0", + memmap[IBEX_DEV_SPI_HOST0].base, memmap[IBEX_DEV_SPI_HOST0].size); + create_unimplemented_device("riscv.lowrisc.ibex.spi_host1", + memmap[IBEX_DEV_SPI_HOST1].base, memmap[IBEX_DEV_SPI_HOST1].size); create_unimplemented_device("riscv.lowrisc.ibex.i2c", memmap[IBEX_DEV_I2C].base, memmap[IBEX_DEV_I2C].size); create_unimplemented_device("riscv.lowrisc.ibex.pattgen", diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index e3068d6126..da50cbed43 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -33,6 +33,8 @@ #include "hw/riscv/boot.h" #include "hw/riscv/numa.h" #include "hw/intc/riscv_aclint.h" +#include "hw/intc/riscv_aplic.h" +#include "hw/intc/riscv_imsic.h" #include "hw/intc/sifive_plic.h" #include "hw/misc/sifive_test.h" #include "chardev/char.h" @@ -43,6 +45,28 @@ #include "hw/pci-host/gpex.h" #include "hw/display/ramfb.h" +/* + * The virt machine physical address space used by some of the devices + * namely ACLINT, PLIC, APLIC, and IMSIC depend on number of Sockets, + * number of CPUs, and number of IMSIC guest files. + * + * Various limits defined by VIRT_SOCKETS_MAX_BITS, VIRT_CPUS_MAX_BITS, + * and VIRT_IRQCHIP_MAX_GUESTS_BITS are tuned for maximum utilization + * of virt machine physical address space. + */ + +#define VIRT_IMSIC_GROUP_MAX_SIZE (1U << IMSIC_MMIO_GROUP_MIN_SHIFT) +#if VIRT_IMSIC_GROUP_MAX_SIZE < \ + IMSIC_GROUP_SIZE(VIRT_CPUS_MAX_BITS, VIRT_IRQCHIP_MAX_GUESTS_BITS) +#error "Can't accomodate single IMSIC group in address space" +#endif + +#define VIRT_IMSIC_MAX_SIZE (VIRT_SOCKETS_MAX * \ + VIRT_IMSIC_GROUP_MAX_SIZE) +#if 0x4000000 < VIRT_IMSIC_MAX_SIZE +#error "Can't accomodate all IMSIC groups in address space" +#endif + static const MemMapEntry virt_memmap[] = { [VIRT_DEBUG] = { 0x0, 0x100 }, [VIRT_MROM] = { 0x1000, 0xf000 }, @@ -52,10 +76,14 @@ static const MemMapEntry virt_memmap[] = { [VIRT_ACLINT_SSWI] = { 0x2F00000, 0x4000 }, [VIRT_PCIE_PIO] = { 0x3000000, 0x10000 }, [VIRT_PLIC] = { 0xc000000, VIRT_PLIC_SIZE(VIRT_CPUS_MAX * 2) }, + [VIRT_APLIC_M] = { 0xc000000, APLIC_SIZE(VIRT_CPUS_MAX) }, + [VIRT_APLIC_S] = { 0xd000000, APLIC_SIZE(VIRT_CPUS_MAX) }, [VIRT_UART0] = { 0x10000000, 0x100 }, [VIRT_VIRTIO] = { 0x10001000, 0x1000 }, [VIRT_FW_CFG] = { 0x10100000, 0x18 }, [VIRT_FLASH] = { 0x20000000, 0x4000000 }, + [VIRT_IMSIC_M] = { 0x24000000, VIRT_IMSIC_MAX_SIZE }, + [VIRT_IMSIC_S] = { 0x28000000, VIRT_IMSIC_MAX_SIZE }, [VIRT_PCIE_ECAM] = { 0x30000000, 0x10000000 }, [VIRT_PCIE_MMIO] = { 0x40000000, 0x40000000 }, [VIRT_DRAM] = { 0x80000000, 0x0 }, @@ -133,12 +161,13 @@ static void virt_flash_map(RISCVVirtState *s, sysmem); } -static void create_pcie_irq_map(void *fdt, char *nodename, - uint32_t plic_phandle) +static void create_pcie_irq_map(RISCVVirtState *s, void *fdt, char *nodename, + uint32_t irqchip_phandle) { int pin, dev; - uint32_t - full_irq_map[GPEX_NUM_IRQS * GPEX_NUM_IRQS * FDT_INT_MAP_WIDTH] = {}; + uint32_t irq_map_stride = 0; + uint32_t full_irq_map[GPEX_NUM_IRQS * GPEX_NUM_IRQS * + FDT_MAX_INT_MAP_WIDTH] = {}; uint32_t *irq_map = full_irq_map; /* This code creates a standard swizzle of interrupts such that @@ -156,23 +185,31 @@ static void create_pcie_irq_map(void *fdt, char *nodename, int irq_nr = PCIE_IRQ + ((pin + PCI_SLOT(devfn)) % GPEX_NUM_IRQS); int i = 0; + /* Fill PCI address cells */ irq_map[i] = cpu_to_be32(devfn << 8); - i += FDT_PCI_ADDR_CELLS; - irq_map[i] = cpu_to_be32(pin + 1); + /* Fill PCI Interrupt cells */ + irq_map[i] = cpu_to_be32(pin + 1); i += FDT_PCI_INT_CELLS; - irq_map[i++] = cpu_to_be32(plic_phandle); - i += FDT_PLIC_ADDR_CELLS; - irq_map[i] = cpu_to_be32(irq_nr); + /* Fill interrupt controller phandle and cells */ + irq_map[i++] = cpu_to_be32(irqchip_phandle); + irq_map[i++] = cpu_to_be32(irq_nr); + if (s->aia_type != VIRT_AIA_TYPE_NONE) { + irq_map[i++] = cpu_to_be32(0x4); + } - irq_map += FDT_INT_MAP_WIDTH; + if (!irq_map_stride) { + irq_map_stride = i; + } + irq_map += irq_map_stride; } } - qemu_fdt_setprop(fdt, nodename, "interrupt-map", - full_irq_map, sizeof(full_irq_map)); + qemu_fdt_setprop(fdt, nodename, "interrupt-map", full_irq_map, + GPEX_NUM_IRQS * GPEX_NUM_IRQS * + irq_map_stride * sizeof(uint32_t)); qemu_fdt_setprop_cells(fdt, nodename, "interrupt-map-mask", 0x1800, 0, 0, 0x7); @@ -298,7 +335,7 @@ static void create_fdt_socket_aclint(RISCVVirtState *s, { int cpu; char *name; - unsigned long addr; + unsigned long addr, size; uint32_t aclint_cells_size; uint32_t *aclint_mswi_cells; uint32_t *aclint_sswi_cells; @@ -319,29 +356,38 @@ static void create_fdt_socket_aclint(RISCVVirtState *s, } aclint_cells_size = s->soc[socket].num_harts * sizeof(uint32_t) * 2; - addr = memmap[VIRT_CLINT].base + (memmap[VIRT_CLINT].size * socket); - name = g_strdup_printf("/soc/mswi@%lx", addr); - qemu_fdt_add_subnode(mc->fdt, name); - qemu_fdt_setprop_string(mc->fdt, name, "compatible", "riscv,aclint-mswi"); - qemu_fdt_setprop_cells(mc->fdt, name, "reg", - 0x0, addr, 0x0, RISCV_ACLINT_SWI_SIZE); - qemu_fdt_setprop(mc->fdt, name, "interrupts-extended", - aclint_mswi_cells, aclint_cells_size); - qemu_fdt_setprop(mc->fdt, name, "interrupt-controller", NULL, 0); - qemu_fdt_setprop_cell(mc->fdt, name, "#interrupt-cells", 0); - riscv_socket_fdt_write_id(mc, mc->fdt, name, socket); - g_free(name); + if (s->aia_type != VIRT_AIA_TYPE_APLIC_IMSIC) { + addr = memmap[VIRT_CLINT].base + (memmap[VIRT_CLINT].size * socket); + name = g_strdup_printf("/soc/mswi@%lx", addr); + qemu_fdt_add_subnode(mc->fdt, name); + qemu_fdt_setprop_string(mc->fdt, name, "compatible", + "riscv,aclint-mswi"); + qemu_fdt_setprop_cells(mc->fdt, name, "reg", + 0x0, addr, 0x0, RISCV_ACLINT_SWI_SIZE); + qemu_fdt_setprop(mc->fdt, name, "interrupts-extended", + aclint_mswi_cells, aclint_cells_size); + qemu_fdt_setprop(mc->fdt, name, "interrupt-controller", NULL, 0); + qemu_fdt_setprop_cell(mc->fdt, name, "#interrupt-cells", 0); + riscv_socket_fdt_write_id(mc, mc->fdt, name, socket); + g_free(name); + } - addr = memmap[VIRT_CLINT].base + RISCV_ACLINT_SWI_SIZE + - (memmap[VIRT_CLINT].size * socket); + if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) { + addr = memmap[VIRT_CLINT].base + + (RISCV_ACLINT_DEFAULT_MTIMER_SIZE * socket); + size = RISCV_ACLINT_DEFAULT_MTIMER_SIZE; + } else { + addr = memmap[VIRT_CLINT].base + RISCV_ACLINT_SWI_SIZE + + (memmap[VIRT_CLINT].size * socket); + size = memmap[VIRT_CLINT].size - RISCV_ACLINT_SWI_SIZE; + } name = g_strdup_printf("/soc/mtimer@%lx", addr); qemu_fdt_add_subnode(mc->fdt, name); qemu_fdt_setprop_string(mc->fdt, name, "compatible", "riscv,aclint-mtimer"); qemu_fdt_setprop_cells(mc->fdt, name, "reg", 0x0, addr + RISCV_ACLINT_DEFAULT_MTIME, - 0x0, memmap[VIRT_CLINT].size - RISCV_ACLINT_SWI_SIZE - - RISCV_ACLINT_DEFAULT_MTIME, + 0x0, size - RISCV_ACLINT_DEFAULT_MTIME, 0x0, addr + RISCV_ACLINT_DEFAULT_MTIMECMP, 0x0, RISCV_ACLINT_DEFAULT_MTIME); qemu_fdt_setprop(mc->fdt, name, "interrupts-extended", @@ -349,19 +395,22 @@ static void create_fdt_socket_aclint(RISCVVirtState *s, riscv_socket_fdt_write_id(mc, mc->fdt, name, socket); g_free(name); - addr = memmap[VIRT_ACLINT_SSWI].base + - (memmap[VIRT_ACLINT_SSWI].size * socket); - name = g_strdup_printf("/soc/sswi@%lx", addr); - qemu_fdt_add_subnode(mc->fdt, name); - qemu_fdt_setprop_string(mc->fdt, name, "compatible", "riscv,aclint-sswi"); - qemu_fdt_setprop_cells(mc->fdt, name, "reg", - 0x0, addr, 0x0, memmap[VIRT_ACLINT_SSWI].size); - qemu_fdt_setprop(mc->fdt, name, "interrupts-extended", - aclint_sswi_cells, aclint_cells_size); - qemu_fdt_setprop(mc->fdt, name, "interrupt-controller", NULL, 0); - qemu_fdt_setprop_cell(mc->fdt, name, "#interrupt-cells", 0); - riscv_socket_fdt_write_id(mc, mc->fdt, name, socket); - g_free(name); + if (s->aia_type != VIRT_AIA_TYPE_APLIC_IMSIC) { + addr = memmap[VIRT_ACLINT_SSWI].base + + (memmap[VIRT_ACLINT_SSWI].size * socket); + name = g_strdup_printf("/soc/sswi@%lx", addr); + qemu_fdt_add_subnode(mc->fdt, name); + qemu_fdt_setprop_string(mc->fdt, name, "compatible", + "riscv,aclint-sswi"); + qemu_fdt_setprop_cells(mc->fdt, name, "reg", + 0x0, addr, 0x0, memmap[VIRT_ACLINT_SSWI].size); + qemu_fdt_setprop(mc->fdt, name, "interrupts-extended", + aclint_sswi_cells, aclint_cells_size); + qemu_fdt_setprop(mc->fdt, name, "interrupt-controller", NULL, 0); + qemu_fdt_setprop_cell(mc->fdt, name, "#interrupt-cells", 0); + riscv_socket_fdt_write_id(mc, mc->fdt, name, socket); + g_free(name); + } g_free(aclint_mswi_cells); g_free(aclint_mtimer_cells); @@ -405,8 +454,6 @@ static void create_fdt_socket_plic(RISCVVirtState *s, plic_name = g_strdup_printf("/soc/plic@%lx", plic_addr); qemu_fdt_add_subnode(mc->fdt, plic_name); qemu_fdt_setprop_cell(mc->fdt, plic_name, - "#address-cells", FDT_PLIC_ADDR_CELLS); - qemu_fdt_setprop_cell(mc->fdt, plic_name, "#interrupt-cells", FDT_PLIC_INT_CELLS); qemu_fdt_setprop_string_array(mc->fdt, plic_name, "compatible", (char **)&plic_compat, @@ -425,17 +472,233 @@ static void create_fdt_socket_plic(RISCVVirtState *s, g_free(plic_cells); } +static uint32_t imsic_num_bits(uint32_t count) +{ + uint32_t ret = 0; + + while (BIT(ret) < count) { + ret++; + } + + return ret; +} + +static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap, + uint32_t *phandle, uint32_t *intc_phandles, + uint32_t *msi_m_phandle, uint32_t *msi_s_phandle) +{ + int cpu, socket; + char *imsic_name; + MachineState *mc = MACHINE(s); + uint32_t imsic_max_hart_per_socket, imsic_guest_bits; + uint32_t *imsic_cells, *imsic_regs, imsic_addr, imsic_size; + + *msi_m_phandle = (*phandle)++; + *msi_s_phandle = (*phandle)++; + imsic_cells = g_new0(uint32_t, mc->smp.cpus * 2); + imsic_regs = g_new0(uint32_t, riscv_socket_count(mc) * 4); + + /* M-level IMSIC node */ + for (cpu = 0; cpu < mc->smp.cpus; cpu++) { + imsic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); + imsic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_EXT); + } + imsic_max_hart_per_socket = 0; + for (socket = 0; socket < riscv_socket_count(mc); socket++) { + imsic_addr = memmap[VIRT_IMSIC_M].base + + socket * VIRT_IMSIC_GROUP_MAX_SIZE; + imsic_size = IMSIC_HART_SIZE(0) * s->soc[socket].num_harts; + imsic_regs[socket * 4 + 0] = 0; + imsic_regs[socket * 4 + 1] = cpu_to_be32(imsic_addr); + imsic_regs[socket * 4 + 2] = 0; + imsic_regs[socket * 4 + 3] = cpu_to_be32(imsic_size); + if (imsic_max_hart_per_socket < s->soc[socket].num_harts) { + imsic_max_hart_per_socket = s->soc[socket].num_harts; + } + } + imsic_name = g_strdup_printf("/soc/imsics@%lx", + (unsigned long)memmap[VIRT_IMSIC_M].base); + qemu_fdt_add_subnode(mc->fdt, imsic_name); + qemu_fdt_setprop_string(mc->fdt, imsic_name, "compatible", + "riscv,imsics"); + qemu_fdt_setprop_cell(mc->fdt, imsic_name, "#interrupt-cells", + FDT_IMSIC_INT_CELLS); + qemu_fdt_setprop(mc->fdt, imsic_name, "interrupt-controller", + NULL, 0); + qemu_fdt_setprop(mc->fdt, imsic_name, "msi-controller", + NULL, 0); + qemu_fdt_setprop(mc->fdt, imsic_name, "interrupts-extended", + imsic_cells, mc->smp.cpus * sizeof(uint32_t) * 2); + qemu_fdt_setprop(mc->fdt, imsic_name, "reg", imsic_regs, + riscv_socket_count(mc) * sizeof(uint32_t) * 4); + qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,num-ids", + VIRT_IRQCHIP_NUM_MSIS); + qemu_fdt_setprop_cells(mc->fdt, imsic_name, "riscv,ipi-id", + VIRT_IRQCHIP_IPI_MSI); + if (riscv_socket_count(mc) > 1) { + qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,hart-index-bits", + imsic_num_bits(imsic_max_hart_per_socket)); + qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,group-index-bits", + imsic_num_bits(riscv_socket_count(mc))); + qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,group-index-shift", + IMSIC_MMIO_GROUP_MIN_SHIFT); + } + qemu_fdt_setprop_cell(mc->fdt, imsic_name, "phandle", *msi_m_phandle); + g_free(imsic_name); + + /* S-level IMSIC node */ + for (cpu = 0; cpu < mc->smp.cpus; cpu++) { + imsic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); + imsic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_S_EXT); + } + imsic_guest_bits = imsic_num_bits(s->aia_guests + 1); + imsic_max_hart_per_socket = 0; + for (socket = 0; socket < riscv_socket_count(mc); socket++) { + imsic_addr = memmap[VIRT_IMSIC_S].base + + socket * VIRT_IMSIC_GROUP_MAX_SIZE; + imsic_size = IMSIC_HART_SIZE(imsic_guest_bits) * + s->soc[socket].num_harts; + imsic_regs[socket * 4 + 0] = 0; + imsic_regs[socket * 4 + 1] = cpu_to_be32(imsic_addr); + imsic_regs[socket * 4 + 2] = 0; + imsic_regs[socket * 4 + 3] = cpu_to_be32(imsic_size); + if (imsic_max_hart_per_socket < s->soc[socket].num_harts) { + imsic_max_hart_per_socket = s->soc[socket].num_harts; + } + } + imsic_name = g_strdup_printf("/soc/imsics@%lx", + (unsigned long)memmap[VIRT_IMSIC_S].base); + qemu_fdt_add_subnode(mc->fdt, imsic_name); + qemu_fdt_setprop_string(mc->fdt, imsic_name, "compatible", + "riscv,imsics"); + qemu_fdt_setprop_cell(mc->fdt, imsic_name, "#interrupt-cells", + FDT_IMSIC_INT_CELLS); + qemu_fdt_setprop(mc->fdt, imsic_name, "interrupt-controller", + NULL, 0); + qemu_fdt_setprop(mc->fdt, imsic_name, "msi-controller", + NULL, 0); + qemu_fdt_setprop(mc->fdt, imsic_name, "interrupts-extended", + imsic_cells, mc->smp.cpus * sizeof(uint32_t) * 2); + qemu_fdt_setprop(mc->fdt, imsic_name, "reg", imsic_regs, + riscv_socket_count(mc) * sizeof(uint32_t) * 4); + qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,num-ids", + VIRT_IRQCHIP_NUM_MSIS); + qemu_fdt_setprop_cells(mc->fdt, imsic_name, "riscv,ipi-id", + VIRT_IRQCHIP_IPI_MSI); + if (imsic_guest_bits) { + qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,guest-index-bits", + imsic_guest_bits); + } + if (riscv_socket_count(mc) > 1) { + qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,hart-index-bits", + imsic_num_bits(imsic_max_hart_per_socket)); + qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,group-index-bits", + imsic_num_bits(riscv_socket_count(mc))); + qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,group-index-shift", + IMSIC_MMIO_GROUP_MIN_SHIFT); + } + qemu_fdt_setprop_cell(mc->fdt, imsic_name, "phandle", *msi_s_phandle); + g_free(imsic_name); + + g_free(imsic_regs); + g_free(imsic_cells); +} + +static void create_fdt_socket_aplic(RISCVVirtState *s, + const MemMapEntry *memmap, int socket, + uint32_t msi_m_phandle, + uint32_t msi_s_phandle, + uint32_t *phandle, + uint32_t *intc_phandles, + uint32_t *aplic_phandles) +{ + int cpu; + char *aplic_name; + uint32_t *aplic_cells; + unsigned long aplic_addr; + MachineState *mc = MACHINE(s); + uint32_t aplic_m_phandle, aplic_s_phandle; + + aplic_m_phandle = (*phandle)++; + aplic_s_phandle = (*phandle)++; + aplic_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2); + + /* M-level APLIC node */ + for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) { + aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); + aplic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_EXT); + } + aplic_addr = memmap[VIRT_APLIC_M].base + + (memmap[VIRT_APLIC_M].size * socket); + aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr); + qemu_fdt_add_subnode(mc->fdt, aplic_name); + qemu_fdt_setprop_string(mc->fdt, aplic_name, "compatible", "riscv,aplic"); + qemu_fdt_setprop_cell(mc->fdt, aplic_name, + "#interrupt-cells", FDT_APLIC_INT_CELLS); + qemu_fdt_setprop(mc->fdt, aplic_name, "interrupt-controller", NULL, 0); + if (s->aia_type == VIRT_AIA_TYPE_APLIC) { + qemu_fdt_setprop(mc->fdt, aplic_name, "interrupts-extended", + aplic_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 2); + } else { + qemu_fdt_setprop_cell(mc->fdt, aplic_name, "msi-parent", + msi_m_phandle); + } + qemu_fdt_setprop_cells(mc->fdt, aplic_name, "reg", + 0x0, aplic_addr, 0x0, memmap[VIRT_APLIC_M].size); + qemu_fdt_setprop_cell(mc->fdt, aplic_name, "riscv,num-sources", + VIRT_IRQCHIP_NUM_SOURCES); + qemu_fdt_setprop_cell(mc->fdt, aplic_name, "riscv,children", + aplic_s_phandle); + qemu_fdt_setprop_cells(mc->fdt, aplic_name, "riscv,delegate", + aplic_s_phandle, 0x1, VIRT_IRQCHIP_NUM_SOURCES); + riscv_socket_fdt_write_id(mc, mc->fdt, aplic_name, socket); + qemu_fdt_setprop_cell(mc->fdt, aplic_name, "phandle", aplic_m_phandle); + g_free(aplic_name); + + /* S-level APLIC node */ + for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) { + aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); + aplic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_S_EXT); + } + aplic_addr = memmap[VIRT_APLIC_S].base + + (memmap[VIRT_APLIC_S].size * socket); + aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr); + qemu_fdt_add_subnode(mc->fdt, aplic_name); + qemu_fdt_setprop_string(mc->fdt, aplic_name, "compatible", "riscv,aplic"); + qemu_fdt_setprop_cell(mc->fdt, aplic_name, + "#interrupt-cells", FDT_APLIC_INT_CELLS); + qemu_fdt_setprop(mc->fdt, aplic_name, "interrupt-controller", NULL, 0); + if (s->aia_type == VIRT_AIA_TYPE_APLIC) { + qemu_fdt_setprop(mc->fdt, aplic_name, "interrupts-extended", + aplic_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 2); + } else { + qemu_fdt_setprop_cell(mc->fdt, aplic_name, "msi-parent", + msi_s_phandle); + } + qemu_fdt_setprop_cells(mc->fdt, aplic_name, "reg", + 0x0, aplic_addr, 0x0, memmap[VIRT_APLIC_S].size); + qemu_fdt_setprop_cell(mc->fdt, aplic_name, "riscv,num-sources", + VIRT_IRQCHIP_NUM_SOURCES); + riscv_socket_fdt_write_id(mc, mc->fdt, aplic_name, socket); + qemu_fdt_setprop_cell(mc->fdt, aplic_name, "phandle", aplic_s_phandle); + g_free(aplic_name); + + g_free(aplic_cells); + aplic_phandles[socket] = aplic_s_phandle; +} + static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap, bool is_32_bit, uint32_t *phandle, uint32_t *irq_mmio_phandle, uint32_t *irq_pcie_phandle, - uint32_t *irq_virtio_phandle) + uint32_t *irq_virtio_phandle, + uint32_t *msi_pcie_phandle) { - int socket; char *clust_name; - uint32_t *intc_phandles; + int socket, phandle_pos; MachineState *mc = MACHINE(s); - uint32_t xplic_phandles[MAX_NODES]; + uint32_t msi_m_phandle = 0, msi_s_phandle = 0; + uint32_t *intc_phandles, xplic_phandles[MAX_NODES]; qemu_fdt_add_subnode(mc->fdt, "/cpus"); qemu_fdt_setprop_cell(mc->fdt, "/cpus", "timebase-frequency", @@ -444,32 +707,55 @@ static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap, qemu_fdt_setprop_cell(mc->fdt, "/cpus", "#address-cells", 0x1); qemu_fdt_add_subnode(mc->fdt, "/cpus/cpu-map"); + intc_phandles = g_new0(uint32_t, mc->smp.cpus); + + phandle_pos = mc->smp.cpus; for (socket = (riscv_socket_count(mc) - 1); socket >= 0; socket--) { + phandle_pos -= s->soc[socket].num_harts; + clust_name = g_strdup_printf("/cpus/cpu-map/cluster%d", socket); qemu_fdt_add_subnode(mc->fdt, clust_name); - intc_phandles = g_new0(uint32_t, s->soc[socket].num_harts); - create_fdt_socket_cpus(s, socket, clust_name, phandle, - is_32_bit, intc_phandles); + is_32_bit, &intc_phandles[phandle_pos]); create_fdt_socket_memory(s, memmap, socket); + g_free(clust_name); + if (!kvm_enabled()) { if (s->have_aclint) { - create_fdt_socket_aclint(s, memmap, socket, intc_phandles); + create_fdt_socket_aclint(s, memmap, socket, + &intc_phandles[phandle_pos]); } else { - create_fdt_socket_clint(s, memmap, socket, intc_phandles); + create_fdt_socket_clint(s, memmap, socket, + &intc_phandles[phandle_pos]); } } + } - create_fdt_socket_plic(s, memmap, socket, phandle, - intc_phandles, xplic_phandles); + if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) { + create_fdt_imsic(s, memmap, phandle, intc_phandles, + &msi_m_phandle, &msi_s_phandle); + *msi_pcie_phandle = msi_s_phandle; + } - g_free(intc_phandles); - g_free(clust_name); + phandle_pos = mc->smp.cpus; + for (socket = (riscv_socket_count(mc) - 1); socket >= 0; socket--) { + phandle_pos -= s->soc[socket].num_harts; + + if (s->aia_type == VIRT_AIA_TYPE_NONE) { + create_fdt_socket_plic(s, memmap, socket, phandle, + &intc_phandles[phandle_pos], xplic_phandles); + } else { + create_fdt_socket_aplic(s, memmap, socket, + msi_m_phandle, msi_s_phandle, phandle, + &intc_phandles[phandle_pos], xplic_phandles); + } } + g_free(intc_phandles); + for (socket = 0; socket < riscv_socket_count(mc); socket++) { if (socket == 0) { *irq_mmio_phandle = xplic_phandles[socket]; @@ -505,13 +791,20 @@ static void create_fdt_virtio(RISCVVirtState *s, const MemMapEntry *memmap, 0x0, memmap[VIRT_VIRTIO].size); qemu_fdt_setprop_cell(mc->fdt, name, "interrupt-parent", irq_virtio_phandle); - qemu_fdt_setprop_cell(mc->fdt, name, "interrupts", VIRTIO_IRQ + i); + if (s->aia_type == VIRT_AIA_TYPE_NONE) { + qemu_fdt_setprop_cell(mc->fdt, name, "interrupts", + VIRTIO_IRQ + i); + } else { + qemu_fdt_setprop_cells(mc->fdt, name, "interrupts", + VIRTIO_IRQ + i, 0x4); + } g_free(name); } } static void create_fdt_pcie(RISCVVirtState *s, const MemMapEntry *memmap, - uint32_t irq_pcie_phandle) + uint32_t irq_pcie_phandle, + uint32_t msi_pcie_phandle) { char *name; MachineState *mc = MACHINE(s); @@ -531,6 +824,9 @@ static void create_fdt_pcie(RISCVVirtState *s, const MemMapEntry *memmap, qemu_fdt_setprop_cells(mc->fdt, name, "bus-range", 0, memmap[VIRT_PCIE_ECAM].size / PCIE_MMCFG_SIZE_MIN - 1); qemu_fdt_setprop(mc->fdt, name, "dma-coherent", NULL, 0); + if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) { + qemu_fdt_setprop_cell(mc->fdt, name, "msi-parent", msi_pcie_phandle); + } qemu_fdt_setprop_cells(mc->fdt, name, "reg", 0, memmap[VIRT_PCIE_ECAM].base, 0, memmap[VIRT_PCIE_ECAM].size); qemu_fdt_setprop_sized_cells(mc->fdt, name, "ranges", @@ -543,7 +839,7 @@ static void create_fdt_pcie(RISCVVirtState *s, const MemMapEntry *memmap, 2, virt_high_pcie_memmap.base, 2, virt_high_pcie_memmap.base, 2, virt_high_pcie_memmap.size); - create_pcie_irq_map(mc->fdt, name, irq_pcie_phandle); + create_pcie_irq_map(s, mc->fdt, name, irq_pcie_phandle); g_free(name); } @@ -602,7 +898,11 @@ static void create_fdt_uart(RISCVVirtState *s, const MemMapEntry *memmap, 0x0, memmap[VIRT_UART0].size); qemu_fdt_setprop_cell(mc->fdt, name, "clock-frequency", 3686400); qemu_fdt_setprop_cell(mc->fdt, name, "interrupt-parent", irq_mmio_phandle); - qemu_fdt_setprop_cell(mc->fdt, name, "interrupts", UART0_IRQ); + if (s->aia_type == VIRT_AIA_TYPE_NONE) { + qemu_fdt_setprop_cell(mc->fdt, name, "interrupts", UART0_IRQ); + } else { + qemu_fdt_setprop_cells(mc->fdt, name, "interrupts", UART0_IRQ, 0x4); + } qemu_fdt_add_subnode(mc->fdt, "/chosen"); qemu_fdt_setprop_string(mc->fdt, "/chosen", "stdout-path", name); @@ -623,7 +923,11 @@ static void create_fdt_rtc(RISCVVirtState *s, const MemMapEntry *memmap, 0x0, memmap[VIRT_RTC].base, 0x0, memmap[VIRT_RTC].size); qemu_fdt_setprop_cell(mc->fdt, name, "interrupt-parent", irq_mmio_phandle); - qemu_fdt_setprop_cell(mc->fdt, name, "interrupts", RTC_IRQ); + if (s->aia_type == VIRT_AIA_TYPE_NONE) { + qemu_fdt_setprop_cell(mc->fdt, name, "interrupts", RTC_IRQ); + } else { + qemu_fdt_setprop_cells(mc->fdt, name, "interrupts", RTC_IRQ, 0x4); + } g_free(name); } @@ -648,7 +952,7 @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap, uint64_t mem_size, const char *cmdline, bool is_32_bit) { MachineState *mc = MACHINE(s); - uint32_t phandle = 1, irq_mmio_phandle = 1; + uint32_t phandle = 1, irq_mmio_phandle = 1, msi_pcie_phandle = 1; uint32_t irq_pcie_phandle = 1, irq_virtio_phandle = 1; if (mc->dtb) { @@ -678,11 +982,12 @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap, qemu_fdt_setprop_cell(mc->fdt, "/soc", "#address-cells", 0x2); create_fdt_sockets(s, memmap, is_32_bit, &phandle, - &irq_mmio_phandle, &irq_pcie_phandle, &irq_virtio_phandle); + &irq_mmio_phandle, &irq_pcie_phandle, &irq_virtio_phandle, + &msi_pcie_phandle); create_fdt_virtio(s, memmap, irq_virtio_phandle); - create_fdt_pcie(s, memmap, irq_pcie_phandle); + create_fdt_pcie(s, memmap, irq_pcie_phandle, msi_pcie_phandle); create_fdt_reset(s, memmap, &phandle); @@ -704,7 +1009,7 @@ static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem, hwaddr high_mmio_base, hwaddr high_mmio_size, hwaddr pio_base, - DeviceState *plic) + DeviceState *irqchip) { DeviceState *dev; MemoryRegion *ecam_alias, *ecam_reg; @@ -738,7 +1043,7 @@ static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem, sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, pio_base); for (i = 0; i < GPEX_NUM_IRQS; i++) { - irq = qdev_get_gpio_in(plic, PCIE_IRQ + i); + irq = qdev_get_gpio_in(irqchip, PCIE_IRQ + i); sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, irq); gpex_set_irq_num(GPEX_HOST(dev), i, PCIE_IRQ + i); @@ -769,18 +1074,100 @@ static FWCfgState *create_fw_cfg(const MachineState *mc) return fw_cfg; } +static DeviceState *virt_create_plic(const MemMapEntry *memmap, int socket, + int base_hartid, int hart_count) +{ + DeviceState *ret; + char *plic_hart_config; + + /* Per-socket PLIC hart topology configuration string */ + plic_hart_config = riscv_plic_hart_config_string(hart_count); + + /* Per-socket PLIC */ + ret = sifive_plic_create( + memmap[VIRT_PLIC].base + socket * memmap[VIRT_PLIC].size, + plic_hart_config, hart_count, base_hartid, + VIRT_IRQCHIP_NUM_SOURCES, + ((1U << VIRT_IRQCHIP_NUM_PRIO_BITS) - 1), + VIRT_PLIC_PRIORITY_BASE, + VIRT_PLIC_PENDING_BASE, + VIRT_PLIC_ENABLE_BASE, + VIRT_PLIC_ENABLE_STRIDE, + VIRT_PLIC_CONTEXT_BASE, + VIRT_PLIC_CONTEXT_STRIDE, + memmap[VIRT_PLIC].size); + + g_free(plic_hart_config); + + return ret; +} + +static DeviceState *virt_create_aia(RISCVVirtAIAType aia_type, int aia_guests, + const MemMapEntry *memmap, int socket, + int base_hartid, int hart_count) +{ + int i; + hwaddr addr; + uint32_t guest_bits; + DeviceState *aplic_m; + bool msimode = (aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) ? true : false; + + if (msimode) { + /* Per-socket M-level IMSICs */ + addr = memmap[VIRT_IMSIC_M].base + socket * VIRT_IMSIC_GROUP_MAX_SIZE; + for (i = 0; i < hart_count; i++) { + riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0), + base_hartid + i, true, 1, + VIRT_IRQCHIP_NUM_MSIS); + } + + /* Per-socket S-level IMSICs */ + guest_bits = imsic_num_bits(aia_guests + 1); + addr = memmap[VIRT_IMSIC_S].base + socket * VIRT_IMSIC_GROUP_MAX_SIZE; + for (i = 0; i < hart_count; i++) { + riscv_imsic_create(addr + i * IMSIC_HART_SIZE(guest_bits), + base_hartid + i, false, 1 + aia_guests, + VIRT_IRQCHIP_NUM_MSIS); + } + } + + /* Per-socket M-level APLIC */ + aplic_m = riscv_aplic_create( + memmap[VIRT_APLIC_M].base + socket * memmap[VIRT_APLIC_M].size, + memmap[VIRT_APLIC_M].size, + (msimode) ? 0 : base_hartid, + (msimode) ? 0 : hart_count, + VIRT_IRQCHIP_NUM_SOURCES, + VIRT_IRQCHIP_NUM_PRIO_BITS, + msimode, true, NULL); + + if (aplic_m) { + /* Per-socket S-level APLIC */ + riscv_aplic_create( + memmap[VIRT_APLIC_S].base + socket * memmap[VIRT_APLIC_S].size, + memmap[VIRT_APLIC_S].size, + (msimode) ? 0 : base_hartid, + (msimode) ? 0 : hart_count, + VIRT_IRQCHIP_NUM_SOURCES, + VIRT_IRQCHIP_NUM_PRIO_BITS, + msimode, false, aplic_m); + } + + return aplic_m; +} + static void virt_machine_init(MachineState *machine) { const MemMapEntry *memmap = virt_memmap; RISCVVirtState *s = RISCV_VIRT_MACHINE(machine); MemoryRegion *system_memory = get_system_memory(); MemoryRegion *mask_rom = g_new(MemoryRegion, 1); - char *plic_hart_config, *soc_name; + char *soc_name; target_ulong start_addr = memmap[VIRT_DRAM].base; target_ulong firmware_end_addr, kernel_start_addr; uint32_t fdt_load_addr; uint64_t kernel_entry; - DeviceState *mmio_plic, *virtio_plic, *pcie_plic; + DeviceState *mmio_irqchip, *virtio_irqchip, *pcie_irqchip; int i, base_hartid, hart_count; /* Check socket count limit */ @@ -791,7 +1178,7 @@ static void virt_machine_init(MachineState *machine) } /* Initialize sockets */ - mmio_plic = virtio_plic = pcie_plic = NULL; + mmio_irqchip = virtio_irqchip = pcie_irqchip = NULL; for (i = 0; i < riscv_socket_count(machine); i++) { if (!riscv_socket_check_hartids(machine, i)) { error_report("discontinuous hartids in socket%d", i); @@ -823,56 +1210,68 @@ static void virt_machine_init(MachineState *machine) sysbus_realize(SYS_BUS_DEVICE(&s->soc[i]), &error_abort); if (!kvm_enabled()) { - /* Per-socket CLINT */ - riscv_aclint_swi_create( - memmap[VIRT_CLINT].base + i * memmap[VIRT_CLINT].size, - base_hartid, hart_count, false); - riscv_aclint_mtimer_create( - memmap[VIRT_CLINT].base + i * memmap[VIRT_CLINT].size + - RISCV_ACLINT_SWI_SIZE, - RISCV_ACLINT_DEFAULT_MTIMER_SIZE, base_hartid, hart_count, - RISCV_ACLINT_DEFAULT_MTIMECMP, RISCV_ACLINT_DEFAULT_MTIME, - RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true); - - /* Per-socket ACLINT SSWI */ if (s->have_aclint) { + if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) { + /* Per-socket ACLINT MTIMER */ + riscv_aclint_mtimer_create(memmap[VIRT_CLINT].base + + i * RISCV_ACLINT_DEFAULT_MTIMER_SIZE, + RISCV_ACLINT_DEFAULT_MTIMER_SIZE, + base_hartid, hart_count, + RISCV_ACLINT_DEFAULT_MTIMECMP, + RISCV_ACLINT_DEFAULT_MTIME, + RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true); + } else { + /* Per-socket ACLINT MSWI, MTIMER, and SSWI */ + riscv_aclint_swi_create(memmap[VIRT_CLINT].base + + i * memmap[VIRT_CLINT].size, + base_hartid, hart_count, false); + riscv_aclint_mtimer_create(memmap[VIRT_CLINT].base + + i * memmap[VIRT_CLINT].size + + RISCV_ACLINT_SWI_SIZE, + RISCV_ACLINT_DEFAULT_MTIMER_SIZE, + base_hartid, hart_count, + RISCV_ACLINT_DEFAULT_MTIMECMP, + RISCV_ACLINT_DEFAULT_MTIME, + RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true); + riscv_aclint_swi_create(memmap[VIRT_ACLINT_SSWI].base + + i * memmap[VIRT_ACLINT_SSWI].size, + base_hartid, hart_count, true); + } + } else { + /* Per-socket SiFive CLINT */ riscv_aclint_swi_create( - memmap[VIRT_ACLINT_SSWI].base + - i * memmap[VIRT_ACLINT_SSWI].size, - base_hartid, hart_count, true); + memmap[VIRT_CLINT].base + i * memmap[VIRT_CLINT].size, + base_hartid, hart_count, false); + riscv_aclint_mtimer_create(memmap[VIRT_CLINT].base + + i * memmap[VIRT_CLINT].size + RISCV_ACLINT_SWI_SIZE, + RISCV_ACLINT_DEFAULT_MTIMER_SIZE, base_hartid, hart_count, + RISCV_ACLINT_DEFAULT_MTIMECMP, RISCV_ACLINT_DEFAULT_MTIME, + RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true); } } - /* Per-socket PLIC hart topology configuration string */ - plic_hart_config = riscv_plic_hart_config_string(hart_count); - - /* Per-socket PLIC */ - s->plic[i] = sifive_plic_create( - memmap[VIRT_PLIC].base + i * memmap[VIRT_PLIC].size, - plic_hart_config, hart_count, base_hartid, - VIRT_PLIC_NUM_SOURCES, - VIRT_PLIC_NUM_PRIORITIES, - VIRT_PLIC_PRIORITY_BASE, - VIRT_PLIC_PENDING_BASE, - VIRT_PLIC_ENABLE_BASE, - VIRT_PLIC_ENABLE_STRIDE, - VIRT_PLIC_CONTEXT_BASE, - VIRT_PLIC_CONTEXT_STRIDE, - memmap[VIRT_PLIC].size); - g_free(plic_hart_config); + /* Per-socket interrupt controller */ + if (s->aia_type == VIRT_AIA_TYPE_NONE) { + s->irqchip[i] = virt_create_plic(memmap, i, + base_hartid, hart_count); + } else { + s->irqchip[i] = virt_create_aia(s->aia_type, s->aia_guests, + memmap, i, base_hartid, + hart_count); + } - /* Try to use different PLIC instance based device type */ + /* Try to use different IRQCHIP instance based device type */ if (i == 0) { - mmio_plic = s->plic[i]; - virtio_plic = s->plic[i]; - pcie_plic = s->plic[i]; + mmio_irqchip = s->irqchip[i]; + virtio_irqchip = s->irqchip[i]; + pcie_irqchip = s->irqchip[i]; } if (i == 1) { - virtio_plic = s->plic[i]; - pcie_plic = s->plic[i]; + virtio_irqchip = s->irqchip[i]; + pcie_irqchip = s->irqchip[i]; } if (i == 2) { - pcie_plic = s->plic[i]; + pcie_irqchip = s->irqchip[i]; } } @@ -990,7 +1389,7 @@ static void virt_machine_init(MachineState *machine) for (i = 0; i < VIRTIO_COUNT; i++) { sysbus_create_simple("virtio-mmio", memmap[VIRT_VIRTIO].base + i * memmap[VIRT_VIRTIO].size, - qdev_get_gpio_in(DEVICE(virtio_plic), VIRTIO_IRQ + i)); + qdev_get_gpio_in(DEVICE(virtio_irqchip), VIRTIO_IRQ + i)); } gpex_pcie_init(system_memory, @@ -1001,14 +1400,14 @@ static void virt_machine_init(MachineState *machine) virt_high_pcie_memmap.base, virt_high_pcie_memmap.size, memmap[VIRT_PCIE_PIO].base, - DEVICE(pcie_plic)); + DEVICE(pcie_irqchip)); serial_mm_init(system_memory, memmap[VIRT_UART0].base, - 0, qdev_get_gpio_in(DEVICE(mmio_plic), UART0_IRQ), 399193, + 0, qdev_get_gpio_in(DEVICE(mmio_irqchip), UART0_IRQ), 399193, serial_hd(0), DEVICE_LITTLE_ENDIAN); sysbus_create_simple("goldfish_rtc", memmap[VIRT_RTC].base, - qdev_get_gpio_in(DEVICE(mmio_plic), RTC_IRQ)); + qdev_get_gpio_in(DEVICE(mmio_irqchip), RTC_IRQ)); virt_flash_create(s); @@ -1024,6 +1423,64 @@ static void virt_machine_instance_init(Object *obj) { } +static char *virt_get_aia_guests(Object *obj, Error **errp) +{ + RISCVVirtState *s = RISCV_VIRT_MACHINE(obj); + char val[32]; + + sprintf(val, "%d", s->aia_guests); + return g_strdup(val); +} + +static void virt_set_aia_guests(Object *obj, const char *val, Error **errp) +{ + RISCVVirtState *s = RISCV_VIRT_MACHINE(obj); + + s->aia_guests = atoi(val); + if (s->aia_guests < 0 || s->aia_guests > VIRT_IRQCHIP_MAX_GUESTS) { + error_setg(errp, "Invalid number of AIA IMSIC guests"); + error_append_hint(errp, "Valid values be between 0 and %d.\n", + VIRT_IRQCHIP_MAX_GUESTS); + } +} + +static char *virt_get_aia(Object *obj, Error **errp) +{ + RISCVVirtState *s = RISCV_VIRT_MACHINE(obj); + const char *val; + + switch (s->aia_type) { + case VIRT_AIA_TYPE_APLIC: + val = "aplic"; + break; + case VIRT_AIA_TYPE_APLIC_IMSIC: + val = "aplic-imsic"; + break; + default: + val = "none"; + break; + }; + + return g_strdup(val); +} + +static void virt_set_aia(Object *obj, const char *val, Error **errp) +{ + RISCVVirtState *s = RISCV_VIRT_MACHINE(obj); + + if (!strcmp(val, "none")) { + s->aia_type = VIRT_AIA_TYPE_NONE; + } else if (!strcmp(val, "aplic")) { + s->aia_type = VIRT_AIA_TYPE_APLIC; + } else if (!strcmp(val, "aplic-imsic")) { + s->aia_type = VIRT_AIA_TYPE_APLIC_IMSIC; + } else { + error_setg(errp, "Invalid AIA interrupt controller type"); + error_append_hint(errp, "Valid values are none, aplic, and " + "aplic-imsic.\n"); + } +} + static bool virt_get_aclint(Object *obj, Error **errp) { MachineState *ms = MACHINE(obj); @@ -1042,6 +1499,7 @@ static void virt_set_aclint(Object *obj, bool value, Error **errp) static void virt_machine_class_init(ObjectClass *oc, void *data) { + char str[128]; MachineClass *mc = MACHINE_CLASS(oc); mc->desc = "RISC-V VirtIO board"; @@ -1062,6 +1520,20 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) object_class_property_set_description(oc, "aclint", "Set on/off to enable/disable " "emulating ACLINT devices"); + + object_class_property_add_str(oc, "aia", virt_get_aia, + virt_set_aia); + object_class_property_set_description(oc, "aia", + "Set type of AIA interrupt " + "conttoller. Valid values are " + "none, aplic, and aplic-imsic."); + + object_class_property_add_str(oc, "aia-guests", + virt_get_aia_guests, + virt_set_aia_guests); + sprintf(str, "Set number of guest MMIO pages for AIA IMSIC. Valid value " + "should be between 0 and %d.", VIRT_IRQCHIP_MAX_GUESTS); + object_class_property_set_description(oc, "aia-guests", str); } static const TypeInfo virt_machine_typeinfo = { diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index 3666b8d946..072686ed58 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -26,6 +26,7 @@ #include "qemu/main-loop.h" #include "qemu/module.h" #include "qemu/hw-version.h" +#include "qemu/memalign.h" #include "hw/scsi/scsi.h" #include "migration/qemu-file-types.h" #include "migration/vmstate.h" diff --git a/hw/tpm/tpm_ppi.c b/hw/tpm/tpm_ppi.c index 6dbb9f41e4..c89ac53e65 100644 --- a/hw/tpm/tpm_ppi.c +++ b/hw/tpm/tpm_ppi.c @@ -12,7 +12,7 @@ */ #include "qemu/osdep.h" - +#include "qemu/memalign.h" #include "qapi/error.h" #include "sysemu/memory_mapping.h" #include "migration/vmstate.h" diff --git a/hw/usb/dev-mtp.c b/hw/usb/dev-mtp.c index 1e6ac76bef..e6b77a2a94 100644 --- a/hw/usb/dev-mtp.c +++ b/hw/usb/dev-mtp.c @@ -1607,7 +1607,7 @@ static void usb_mtp_write_data(MTPState *s, uint32_t handle) usb_mtp_object_lookup(s, s->dataset.parent_handle); char *path = NULL; uint64_t rc; - mode_t mask = 0644; + mode_t mask = 0755; int ret = 0; assert(d != NULL); @@ -1635,7 +1635,7 @@ static void usb_mtp_write_data(MTPState *s, uint32_t handle) } d->fd = open(path, O_CREAT | O_WRONLY | - O_CLOEXEC | O_NOFOLLOW, mask); + O_CLOEXEC | O_NOFOLLOW, mask & 0666); if (d->fd == -1) { ret = 1; goto done; diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c index a93d6b2e98..895b29fb86 100644 --- a/hw/usb/hcd-ohci.c +++ b/hw/usb/hcd-ohci.c @@ -58,8 +58,6 @@ struct ohci_hcca { #define ED_WBACK_OFFSET offsetof(struct ohci_ed, head) #define ED_WBACK_SIZE 4 -static void ohci_async_cancel_device(OHCIState *ohci, USBDevice *dev); - /* Bitfields for the first word of an Endpoint Desciptor. */ #define OHCI_ED_FA_SHIFT 0 #define OHCI_ED_FA_MASK (0x7f<<OHCI_ED_FA_SHIFT) @@ -261,92 +259,6 @@ static inline void ohci_set_interrupt(OHCIState *ohci, uint32_t intr) ohci_intr_update(ohci); } -/* Attach or detach a device on a root hub port. */ -static void ohci_attach(USBPort *port1) -{ - OHCIState *s = port1->opaque; - OHCIPort *port = &s->rhport[port1->index]; - uint32_t old_state = port->ctrl; - - /* set connect status */ - port->ctrl |= OHCI_PORT_CCS | OHCI_PORT_CSC; - - /* update speed */ - if (port->port.dev->speed == USB_SPEED_LOW) { - port->ctrl |= OHCI_PORT_LSDA; - } else { - port->ctrl &= ~OHCI_PORT_LSDA; - } - - /* notify of remote-wakeup */ - if ((s->ctl & OHCI_CTL_HCFS) == OHCI_USB_SUSPEND) { - ohci_set_interrupt(s, OHCI_INTR_RD); - } - - trace_usb_ohci_port_attach(port1->index); - - if (old_state != port->ctrl) { - ohci_set_interrupt(s, OHCI_INTR_RHSC); - } -} - -static void ohci_detach(USBPort *port1) -{ - OHCIState *s = port1->opaque; - OHCIPort *port = &s->rhport[port1->index]; - uint32_t old_state = port->ctrl; - - ohci_async_cancel_device(s, port1->dev); - - /* set connect status */ - if (port->ctrl & OHCI_PORT_CCS) { - port->ctrl &= ~OHCI_PORT_CCS; - port->ctrl |= OHCI_PORT_CSC; - } - /* disable port */ - if (port->ctrl & OHCI_PORT_PES) { - port->ctrl &= ~OHCI_PORT_PES; - port->ctrl |= OHCI_PORT_PESC; - } - trace_usb_ohci_port_detach(port1->index); - - if (old_state != port->ctrl) { - ohci_set_interrupt(s, OHCI_INTR_RHSC); - } -} - -static void ohci_wakeup(USBPort *port1) -{ - OHCIState *s = port1->opaque; - OHCIPort *port = &s->rhport[port1->index]; - uint32_t intr = 0; - if (port->ctrl & OHCI_PORT_PSS) { - trace_usb_ohci_port_wakeup(port1->index); - port->ctrl |= OHCI_PORT_PSSC; - port->ctrl &= ~OHCI_PORT_PSS; - intr = OHCI_INTR_RHSC; - } - /* Note that the controller can be suspended even if this port is not */ - if ((s->ctl & OHCI_CTL_HCFS) == OHCI_USB_SUSPEND) { - trace_usb_ohci_remote_wakeup(s->name); - /* This is the one state transition the controller can do by itself */ - s->ctl &= ~OHCI_CTL_HCFS; - s->ctl |= OHCI_USB_RESUME; - /* In suspend mode only ResumeDetected is possible, not RHSC: - * see the OHCI spec 5.1.2.3. - */ - intr = OHCI_INTR_RD; - } - ohci_set_interrupt(s, intr); -} - -static void ohci_child_detach(USBPort *port1, USBDevice *child) -{ - OHCIState *s = port1->opaque; - - ohci_async_cancel_device(s, child); -} - static USBDevice *ohci_find_device(OHCIState *ohci, uint8_t addr) { USBDevice *dev; @@ -369,6 +281,10 @@ void ohci_stop_endpoints(OHCIState *ohci) USBDevice *dev; int i, j; + if (ohci->async_td) { + usb_cancel_packet(&ohci->usb_packet); + ohci->async_td = 0; + } for (i = 0; i < ohci->num_ports; i++) { dev = ohci->rhport[i].port.dev; if (dev && dev->attached) { @@ -398,10 +314,6 @@ static void ohci_roothub_reset(OHCIState *ohci) usb_port_reset(&port->port); } } - if (ohci->async_td) { - usb_cancel_packet(&ohci->usb_packet); - ohci->async_td = 0; - } ohci_stop_endpoints(ohci); } @@ -634,21 +546,9 @@ static int ohci_copy_iso_td(OHCIState *ohci, return 0; } -static void ohci_process_lists(OHCIState *ohci, int completion); - -static void ohci_async_complete_packet(USBPort *port, USBPacket *packet) -{ - OHCIState *ohci = container_of(packet, OHCIState, usb_packet); - - trace_usb_ohci_async_complete(); - ohci->async_complete = true; - ohci_process_lists(ohci, 1); -} - #define USUB(a, b) ((int16_t)((uint16_t)(a) - (uint16_t)(b))) -static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed, - int completion) +static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed) { int dir; size_t len = 0; @@ -658,6 +558,9 @@ static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed, int i; USBDevice *dev; USBEndpoint *ep; + USBPacket *pkt; + uint8_t buf[8192]; + bool int_req; struct ohci_iso_td iso_td; uint32_t addr; uint16_t starting_frame; @@ -792,40 +695,42 @@ static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed, } else { len = end_addr - start_addr + 1; } - if (len > sizeof(ohci->usb_buf)) { - len = sizeof(ohci->usb_buf); + if (len > sizeof(buf)) { + len = sizeof(buf); } if (len && dir != OHCI_TD_DIR_IN) { - if (ohci_copy_iso_td(ohci, start_addr, end_addr, ohci->usb_buf, len, + if (ohci_copy_iso_td(ohci, start_addr, end_addr, buf, len, DMA_DIRECTION_TO_DEVICE)) { ohci_die(ohci); return 1; } } - if (!completion) { - bool int_req = relative_frame_number == frame_count && - OHCI_BM(iso_td.flags, TD_DI) == 0; - dev = ohci_find_device(ohci, OHCI_BM(ed->flags, ED_FA)); - if (dev == NULL) { - trace_usb_ohci_td_dev_error(); - return 1; - } - ep = usb_ep_get(dev, pid, OHCI_BM(ed->flags, ED_EN)); - usb_packet_setup(&ohci->usb_packet, pid, ep, 0, addr, false, int_req); - usb_packet_addbuf(&ohci->usb_packet, ohci->usb_buf, len); - usb_handle_packet(dev, &ohci->usb_packet); - if (ohci->usb_packet.status == USB_RET_ASYNC) { - usb_device_flush_ep_queue(dev, ep); - return 1; - } + dev = ohci_find_device(ohci, OHCI_BM(ed->flags, ED_FA)); + if (dev == NULL) { + trace_usb_ohci_td_dev_error(); + return 1; } - if (ohci->usb_packet.status == USB_RET_SUCCESS) { - ret = ohci->usb_packet.actual_length; + ep = usb_ep_get(dev, pid, OHCI_BM(ed->flags, ED_EN)); + pkt = g_new0(USBPacket, 1); + usb_packet_init(pkt); + int_req = relative_frame_number == frame_count && + OHCI_BM(iso_td.flags, TD_DI) == 0; + usb_packet_setup(pkt, pid, ep, 0, addr, false, int_req); + usb_packet_addbuf(pkt, buf, len); + usb_handle_packet(dev, pkt); + if (pkt->status == USB_RET_ASYNC) { + usb_device_flush_ep_queue(dev, ep); + g_free(pkt); + return 1; + } + if (pkt->status == USB_RET_SUCCESS) { + ret = pkt->actual_length; } else { - ret = ohci->usb_packet.status; + ret = pkt->status; } + g_free(pkt); trace_usb_ohci_iso_td_so(start_offset, end_offset, start_addr, end_addr, str, len, ret); @@ -833,7 +738,7 @@ static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed, /* Writeback */ if (dir == OHCI_TD_DIR_IN && ret >= 0 && ret <= len) { /* IN transfer succeeded */ - if (ohci_copy_iso_td(ohci, start_addr, end_addr, ohci->usb_buf, ret, + if (ohci_copy_iso_td(ohci, start_addr, end_addr, buf, ret, DMA_DIRECTION_FROM_DEVICE)) { ohci_die(ohci); return 1; @@ -1033,21 +938,21 @@ static int ohci_service_td(OHCIState *ohci, struct ohci_ed *ed) ohci->async_td = 0; ohci->async_complete = false; } else { + dev = ohci_find_device(ohci, OHCI_BM(ed->flags, ED_FA)); + if (dev == NULL) { + trace_usb_ohci_td_dev_error(); + return 1; + } + ep = usb_ep_get(dev, pid, OHCI_BM(ed->flags, ED_EN)); if (ohci->async_td) { /* ??? The hardware should allow one active packet per endpoint. We only allow one active packet per controller. This should be sufficient as long as devices respond in a timely manner. */ - trace_usb_ohci_td_too_many_pending(); + trace_usb_ohci_td_too_many_pending(ep->nr); return 1; } - dev = ohci_find_device(ohci, OHCI_BM(ed->flags, ED_FA)); - if (dev == NULL) { - trace_usb_ohci_td_dev_error(); - return 1; - } - ep = usb_ep_get(dev, pid, OHCI_BM(ed->flags, ED_EN)); usb_packet_setup(&ohci->usb_packet, pid, ep, 0, addr, !flag_r, OHCI_BM(td.flags, TD_DI) == 0); usb_packet_addbuf(&ohci->usb_packet, ohci->usb_buf, pktlen); @@ -1156,7 +1061,7 @@ exit_no_retire: } /* Service an endpoint list. Returns nonzero if active TD were found. */ -static int ohci_service_ed_list(OHCIState *ohci, uint32_t head, int completion) +static int ohci_service_ed_list(OHCIState *ohci, uint32_t head) { struct ohci_ed ed; uint32_t next_ed; @@ -1207,8 +1112,9 @@ static int ohci_service_ed_list(OHCIState *ohci, uint32_t head, int completion) break; } else { /* Handle isochronous endpoints */ - if (ohci_service_iso_td(ohci, &ed, completion)) + if (ohci_service_iso_td(ohci, &ed)) { break; + } } } @@ -1235,20 +1141,20 @@ static void ohci_sof(OHCIState *ohci) } /* Process Control and Bulk lists. */ -static void ohci_process_lists(OHCIState *ohci, int completion) +static void ohci_process_lists(OHCIState *ohci) { if ((ohci->ctl & OHCI_CTL_CLE) && (ohci->status & OHCI_STATUS_CLF)) { if (ohci->ctrl_cur && ohci->ctrl_cur != ohci->ctrl_head) { trace_usb_ohci_process_lists(ohci->ctrl_head, ohci->ctrl_cur); } - if (!ohci_service_ed_list(ohci, ohci->ctrl_head, completion)) { + if (!ohci_service_ed_list(ohci, ohci->ctrl_head)) { ohci->ctrl_cur = 0; ohci->status &= ~OHCI_STATUS_CLF; } } if ((ohci->ctl & OHCI_CTL_BLE) && (ohci->status & OHCI_STATUS_BLF)) { - if (!ohci_service_ed_list(ohci, ohci->bulk_head, completion)) { + if (!ohci_service_ed_list(ohci, ohci->bulk_head)) { ohci->bulk_cur = 0; ohci->status &= ~OHCI_STATUS_BLF; } @@ -1272,19 +1178,15 @@ static void ohci_frame_boundary(void *opaque) int n; n = ohci->frame_number & 0x1f; - ohci_service_ed_list(ohci, le32_to_cpu(hcca.intr[n]), 0); + ohci_service_ed_list(ohci, le32_to_cpu(hcca.intr[n])); } /* Cancel all pending packets if either of the lists has been disabled. */ if (ohci->old_ctl & (~ohci->ctl) & (OHCI_CTL_BLE | OHCI_CTL_CLE)) { - if (ohci->async_td) { - usb_cancel_packet(&ohci->usb_packet); - ohci->async_td = 0; - } ohci_stop_endpoints(ohci); } ohci->old_ctl = ohci->ctl; - ohci_process_lists(ohci, 0); + ohci_process_lists(ohci); /* Stop if UnrecoverableError happened or ohci_sof will crash */ if (ohci->intr_status & OHCI_INTR_UE) { @@ -1793,8 +1695,45 @@ static void ohci_mem_write(void *opaque, } } -static void ohci_async_cancel_device(OHCIState *ohci, USBDevice *dev) +static const MemoryRegionOps ohci_mem_ops = { + .read = ohci_mem_read, + .write = ohci_mem_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +/* USBPortOps */ +static void ohci_attach(USBPort *port1) { + OHCIState *s = port1->opaque; + OHCIPort *port = &s->rhport[port1->index]; + uint32_t old_state = port->ctrl; + + /* set connect status */ + port->ctrl |= OHCI_PORT_CCS | OHCI_PORT_CSC; + + /* update speed */ + if (port->port.dev->speed == USB_SPEED_LOW) { + port->ctrl |= OHCI_PORT_LSDA; + } else { + port->ctrl &= ~OHCI_PORT_LSDA; + } + + /* notify of remote-wakeup */ + if ((s->ctl & OHCI_CTL_HCFS) == OHCI_USB_SUSPEND) { + ohci_set_interrupt(s, OHCI_INTR_RD); + } + + trace_usb_ohci_port_attach(port1->index); + + if (old_state != port->ctrl) { + ohci_set_interrupt(s, OHCI_INTR_RHSC); + } +} + +static void ohci_child_detach(USBPort *port1, USBDevice *dev) +{ + OHCIState *ohci = port1->opaque; + if (ohci->async_td && usb_packet_is_inflight(&ohci->usb_packet) && ohci->usb_packet.ep->dev == dev) { @@ -1803,11 +1742,65 @@ static void ohci_async_cancel_device(OHCIState *ohci, USBDevice *dev) } } -static const MemoryRegionOps ohci_mem_ops = { - .read = ohci_mem_read, - .write = ohci_mem_write, - .endianness = DEVICE_LITTLE_ENDIAN, -}; +static void ohci_detach(USBPort *port1) +{ + OHCIState *s = port1->opaque; + OHCIPort *port = &s->rhport[port1->index]; + uint32_t old_state = port->ctrl; + + ohci_child_detach(port1, port1->dev); + + /* set connect status */ + if (port->ctrl & OHCI_PORT_CCS) { + port->ctrl &= ~OHCI_PORT_CCS; + port->ctrl |= OHCI_PORT_CSC; + } + /* disable port */ + if (port->ctrl & OHCI_PORT_PES) { + port->ctrl &= ~OHCI_PORT_PES; + port->ctrl |= OHCI_PORT_PESC; + } + trace_usb_ohci_port_detach(port1->index); + + if (old_state != port->ctrl) { + ohci_set_interrupt(s, OHCI_INTR_RHSC); + } +} + +static void ohci_wakeup(USBPort *port1) +{ + OHCIState *s = port1->opaque; + OHCIPort *port = &s->rhport[port1->index]; + uint32_t intr = 0; + if (port->ctrl & OHCI_PORT_PSS) { + trace_usb_ohci_port_wakeup(port1->index); + port->ctrl |= OHCI_PORT_PSSC; + port->ctrl &= ~OHCI_PORT_PSS; + intr = OHCI_INTR_RHSC; + } + /* Note that the controller can be suspended even if this port is not */ + if ((s->ctl & OHCI_CTL_HCFS) == OHCI_USB_SUSPEND) { + trace_usb_ohci_remote_wakeup(s->name); + /* This is the one state transition the controller can do by itself */ + s->ctl &= ~OHCI_CTL_HCFS; + s->ctl |= OHCI_USB_RESUME; + /* + * In suspend mode only ResumeDetected is possible, not RHSC: + * see the OHCI spec 5.1.2.3. + */ + intr = OHCI_INTR_RD; + } + ohci_set_interrupt(s, intr); +} + +static void ohci_async_complete_packet(USBPort *port, USBPacket *packet) +{ + OHCIState *ohci = container_of(packet, OHCIState, usb_packet); + + trace_usb_ohci_async_complete(); + ohci->async_complete = true; + ohci_process_lists(ohci); +} static USBPortOps ohci_port_ops = { .attach = ohci_attach, diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c index 14bdb89676..0cd0a5e540 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c @@ -2523,7 +2523,7 @@ static void xhci_process_commands(XHCIState *xhci) case CR_VENDOR_NEC_FIRMWARE_REVISION: if (xhci->nec_quirks) { event.type = 48; /* NEC reply */ - event.length = 0x3025; + event.length = 0x3034; } else { event.ccode = CC_TRB_ERROR; } diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c index 5f0ef9cb3b..8692ea2561 100644 --- a/hw/usb/redirect.c +++ b/hw/usb/redirect.c @@ -1239,7 +1239,11 @@ static void usbredir_create_parser(USBRedirDevice *dev) DPRINTF("creating usbredirparser\n"); - dev->parser = qemu_oom_check(usbredirparser_create()); + dev->parser = usbredirparser_create(); + if (!dev->parser) { + error_report("usbredirparser_create() failed"); + exit(1); + } dev->parser->priv = dev; dev->parser->log_func = usbredir_log; dev->parser->read_func = usbredir_read; @@ -2239,7 +2243,10 @@ static int usbredir_put_parser(QEMUFile *f, void *priv, size_t unused, } usbredirparser_serialize(dev->parser, &data, &len); - qemu_oom_check(data); + if (!data) { + error_report("usbredirparser_serialize failed"); + exit(1); + } qemu_put_be32(f, len); qemu_put_buffer(f, data, len); @@ -2330,7 +2337,11 @@ static int usbredir_get_bufpq(QEMUFile *f, void *priv, size_t unused, bufp->len = qemu_get_be32(f); bufp->status = qemu_get_be32(f); bufp->offset = 0; - bufp->data = qemu_oom_check(malloc(bufp->len)); /* regular malloc! */ + bufp->data = malloc(bufp->len); /* regular malloc! */ + if (!bufp->data) { + error_report("usbredir_get_bufpq: out of memory"); + exit(1); + } bufp->free_on_destroy = bufp->data; qemu_get_buffer(f, bufp->data, bufp->len); QTAILQ_INSERT_TAIL(&endp->bufpq, bufp, next); diff --git a/hw/usb/trace-events b/hw/usb/trace-events index b8287b63f1..9773cb5330 100644 --- a/hw/usb/trace-events +++ b/hw/usb/trace-events @@ -51,7 +51,7 @@ usb_ohci_td_skip_async(void) "" usb_ohci_td_pkt_hdr(uint32_t addr, int64_t pktlen, int64_t len, const char *s, int flag_r, uint32_t cbp, uint32_t be) " TD @ 0x%.8x %" PRId64 " of %" PRId64 " bytes %s r=%d cbp=0x%.8x be=0x%.8x" usb_ohci_td_pkt_short(const char *dir, const char *buf) "%s data: %s" usb_ohci_td_pkt_full(const char *dir, const char *buf) "%s data: %s" -usb_ohci_td_too_many_pending(void) "" +usb_ohci_td_too_many_pending(int ep) "ep=%d" usb_ohci_td_packet_status(int status) "status=%d" usb_ohci_ed_read_error(uint32_t addr) "ED read error at 0x%x" usb_ohci_ed_pkt(uint32_t cur, int h, int c, uint32_t head, uint32_t tail, uint32_t next) "ED @ 0x%.8x h=%u c=%u\n head=0x%.8x tailp=0x%.8x next=0x%.8x" |